Move files to match FreeBSD layout

author: Sebastian Huber <sebastian.huber@embedded-brains.de> 2013-10-09 22:42:09 +0200
committer: Sebastian Huber <sebastian.huber@embedded-brains.de> 2013-10-10 09:06:58 +0200
commit: bceabc95c1c85d793200446fa85f1ddc6313ea29 (patch)
tree: 973c8bd8deca9fd69913f2895cc91e0e6114d46c /freebsd/sys/contrib
parent: Add FreeBSD sources as a submodule (diff)
download: rtems-libbsd-bceabc95c1c85d793200446fa85f1ddc6313ea29.tar.bz2
36 files changed, 38420 insertions, 0 deletions
diff --git a/freebsd/sys/contrib/altq/altq/altq.h b/freebsd/sys/contrib/altq/altq/altq.h
new file mode 100644
index 00000000..2c9e3536
--- /dev/null
+++ b/freebsd/sys/contrib/altq/altq/altq.h
@@ -0,0 +1,204 @@
+/*	$FreeBSD$	*/
+/*	$KAME: altq.h,v 1.10 2003/07/10 12:07:47 kjc Exp $	*/
+
+/*
+ * Copyright (C) 1998-2003
+ *	Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef _ALTQ_ALTQ_HH_
+#define	_ALTQ_ALTQ_HH_
+
+#if 0
+/*
+ * allow altq-3 (altqd(8) and /dev/altq) to coexist with the new pf-based altq.
+ * altq3 is mainly for research experiments. pf-based altq is for daily use.
+ */
+#define ALTQ3_COMPAT		/* for compatibility with altq-3 */
+#define ALTQ3_CLFIER_COMPAT	/* for compatibility with altq-3 classifier */
+#endif
+
+#ifdef ALTQ3_COMPAT
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/ioccom.h>
+#include <freebsd/sys/queue.h>
+#include <freebsd/netinet/in.h>
+
+#ifndef IFNAMSIZ
+#define	IFNAMSIZ	16
+#endif
+#endif /* ALTQ3_COMPAT */
+
+/* altq discipline type */
+#define	ALTQT_NONE		0	/* reserved */
+#define	ALTQT_CBQ		1	/* cbq */
+#define	ALTQT_WFQ		2	/* wfq */
+#define	ALTQT_AFMAP		3	/* afmap */
+#define	ALTQT_FIFOQ		4	/* fifoq */
+#define	ALTQT_RED		5	/* red */
+#define	ALTQT_RIO		6	/* rio */
+#define	ALTQT_LOCALQ		7	/* local use */
+#define	ALTQT_HFSC		8	/* hfsc */
+#define	ALTQT_CDNR		9	/* traffic conditioner */
+#define	ALTQT_BLUE		10	/* blue */
+#define	ALTQT_PRIQ		11	/* priority queue */
+#define	ALTQT_JOBS		12	/* JoBS */
+#define	ALTQT_MAX		13	/* should be max discipline type + 1 */
+
+#ifdef ALTQ3_COMPAT
+struct	altqreq {
+	char	ifname[IFNAMSIZ];	/* if name, e.g. "en0" */
+	u_long	arg;			/* request-specific argument */
+};
+#endif
+
+/* simple token backet meter profile */
+struct	tb_profile {
+	u_int	rate;	/* rate in bit-per-sec */
+	u_int	depth;	/* depth in bytes */
+};
+
+#ifdef ALTQ3_COMPAT
+struct	tbrreq {
+	char	ifname[IFNAMSIZ];	/* if name, e.g. "en0" */
+	struct	tb_profile tb_prof;	/* token bucket profile */
+};
+
+#ifdef ALTQ3_CLFIER_COMPAT
+/*
+ * common network flow info structure
+ */
+struct flowinfo {
+	u_char		fi_len;		/* total length */
+	u_char		fi_family;	/* address family */
+	u_int8_t	fi_data[46];	/* actually longer; address family
+					   specific flow info. */
+};
+
+/*
+ * flow info structure for internet protocol family.
+ * (currently this is the only protocol family supported)
+ */
+struct flowinfo_in {
+	u_char		fi_len;		/* sizeof(struct flowinfo_in) */
+	u_char		fi_family;	/* AF_INET */
+	u_int8_t	fi_proto;	/* IPPROTO_XXX */
+	u_int8_t	fi_tos;		/* type-of-service */
+	struct in_addr	fi_dst;		/* dest address */
+	struct in_addr	fi_src;		/* src address */
+	u_int16_t	fi_dport;	/* dest port */
+	u_int16_t	fi_sport;	/* src port */
+	u_int32_t	fi_gpi;		/* generalized port id for ipsec */
+	u_int8_t	_pad[28];	/* make the size equal to
+					   flowinfo_in6 */
+};
+
+#ifdef SIN6_LEN
+struct flowinfo_in6 {
+	u_char		fi6_len;	/* sizeof(struct flowinfo_in6) */
+	u_char		fi6_family;	/* AF_INET6 */
+	u_int8_t	fi6_proto;	/* IPPROTO_XXX */
+	u_int8_t	fi6_tclass;	/* traffic class */
+	u_int32_t	fi6_flowlabel;	/* ipv6 flowlabel */
+	u_int16_t	fi6_dport;	/* dest port */
+	u_int16_t	fi6_sport;	/* src port */
+	u_int32_t	fi6_gpi;	/* generalized port id */
+	struct in6_addr fi6_dst;	/* dest address */
+	struct in6_addr fi6_src;	/* src address */
+};
+#endif /* INET6 */
+
+/*
+ * flow filters for AF_INET and AF_INET6
+ */
+struct flow_filter {
+	int			ff_ruleno;
+	struct flowinfo_in	ff_flow;
+	struct {
+		struct in_addr	mask_dst;
+		struct in_addr	mask_src;
+		u_int8_t	mask_tos;
+		u_int8_t	_pad[3];
+	} ff_mask;
+	u_int8_t _pad2[24];	/* make the size equal to flow_filter6 */
+};
+
+#ifdef SIN6_LEN
+struct flow_filter6 {
+	int			ff_ruleno;
+	struct flowinfo_in6	ff_flow6;
+	struct {
+		struct in6_addr	mask6_dst;
+		struct in6_addr	mask6_src;
+		u_int8_t	mask6_tclass;
+		u_int8_t	_pad[3];
+	} ff_mask6;
+};
+#endif /* INET6 */
+#endif /* ALTQ3_CLFIER_COMPAT */
+#endif /* ALTQ3_COMPAT */
+
+/*
+ * generic packet counter
+ */
+struct pktcntr {
+	u_int64_t	packets;
+	u_int64_t	bytes;
+};
+
+#define	PKTCNTR_ADD(cntr, len)	\
+	do { (cntr)->packets++; (cntr)->bytes += len; } while (/*CONSTCOND*/ 0)
+
+#ifdef ALTQ3_COMPAT
+/*
+ * altq related ioctls
+ */
+#define	ALTQGTYPE	_IOWR('q', 0, struct altqreq)	/* get queue type */
+#if 0
+/*
+ * these ioctls are currently discipline-specific but could be shared
+ * in the future.
+ */
+#define	ALTQATTACH	_IOW('q', 1, struct altqreq)	/* attach discipline */
+#define	ALTQDETACH	_IOW('q', 2, struct altqreq)	/* detach discipline */
+#define	ALTQENABLE	_IOW('q', 3, struct altqreq)	/* enable discipline */
+#define	ALTQDISABLE	_IOW('q', 4, struct altqreq)	/* disable discipline*/
+#define	ALTQCLEAR	_IOW('q', 5, struct altqreq)	/* (re)initialize */
+#define	ALTQCONFIG	_IOWR('q', 6, struct altqreq)	/* set config params */
+#define	ALTQADDCLASS	_IOWR('q', 7, struct altqreq)	/* add a class */
+#define	ALTQMODCLASS	_IOWR('q', 8, struct altqreq)	/* modify a class */
+#define	ALTQDELCLASS	_IOWR('q', 9, struct altqreq)	/* delete a class */
+#define	ALTQADDFILTER	_IOWR('q', 10, struct altqreq)	/* add a filter */
+#define	ALTQDELFILTER	_IOWR('q', 11, struct altqreq)	/* delete a filter */
+#define	ALTQGETSTATS	_IOWR('q', 12, struct altqreq)	/* get statistics */
+#define	ALTQGETCNTR	_IOWR('q', 13, struct altqreq)	/* get a pkt counter */
+#endif /* 0 */
+#define	ALTQTBRSET	_IOW('q', 14, struct tbrreq)	/* set tb regulator */
+#define	ALTQTBRGET	_IOWR('q', 15, struct tbrreq)	/* get tb regulator */
+#endif /* ALTQ3_COMPAT */
+
+#ifdef _KERNEL
+#include <freebsd/altq/altq_var.h>
+#endif
+
+#endif /* _ALTQ_ALTQ_HH_ */
diff --git a/freebsd/sys/contrib/altq/altq/altq_cbq.c b/freebsd/sys/contrib/altq/altq/altq_cbq.c
new file mode 100644
index 00000000..b5f21fb3
--- /dev/null
+++ b/freebsd/sys/contrib/altq/altq/altq_cbq.c
@@ -0,0 +1,1187 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*	$FreeBSD$	*/
+/*	$KAME: altq_cbq.c,v 1.19 2003/09/17 14:23:25 kjc Exp $	*/
+
+/*
+ * Copyright (c) Sun Microsystems, Inc. 1993-1998 All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by the SMCC Technology
+ *      Development Group at Sun Microsystems, Inc.
+ *
+ * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or
+ *      promote products derived from this software without specific prior
+ *      written permission.
+ *
+ * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE
+ * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE.  The software is
+ * provided "as is" without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this software.
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include <freebsd/local/opt_altq.h>
+#if (__FreeBSD__ != 2)
+#include <freebsd/local/opt_inet.h>
+#ifdef __FreeBSD__
+#include <freebsd/local/opt_inet6.h>
+#endif
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+#ifdef ALTQ_CBQ	/* cbq is enabled by ALTQ_CBQ option in opt_altq.h */
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/errno.h>
+#include <freebsd/sys/time.h>
+#ifdef ALTQ3_COMPAT
+#include <freebsd/sys/uio.h>
+#include <freebsd/sys/kernel.h>
+#endif
+
+#include <freebsd/net/if.h>
+#include <freebsd/netinet/in.h>
+
+#include <freebsd/net/pfvar.h>
+#include <freebsd/altq/altq.h>
+#include <freebsd/altq/altq_cbq.h>
+#ifdef ALTQ3_COMPAT
+#include <freebsd/altq/altq_conf.h>
+#endif
+
+#ifdef ALTQ3_COMPAT
+/*
+ * Local Data structures.
+ */
+static cbq_state_t *cbq_list = NULL;
+#endif
+
+/*
+ * Forward Declarations.
+ */
+static int		 cbq_class_destroy(cbq_state_t *, struct rm_class *);
+static struct rm_class  *clh_to_clp(cbq_state_t *, u_int32_t);
+static int		 cbq_clear_interface(cbq_state_t *);
+static int		 cbq_request(struct ifaltq *, int, void *);
+static int		 cbq_enqueue(struct ifaltq *, struct mbuf *,
+			     struct altq_pktattr *);
+static struct mbuf	*cbq_dequeue(struct ifaltq *, int);
+static void		 cbqrestart(struct ifaltq *);
+static void		 get_class_stats(class_stats_t *, struct rm_class *);
+static void		 cbq_purge(cbq_state_t *);
+#ifdef ALTQ3_COMPAT
+static int	cbq_add_class(struct cbq_add_class *);
+static int	cbq_delete_class(struct cbq_delete_class *);
+static int	cbq_modify_class(struct cbq_modify_class *);
+static int 	cbq_class_create(cbq_state_t *, struct cbq_add_class *,
+				 struct rm_class *, struct rm_class *);
+static int	cbq_clear_hierarchy(struct cbq_interface *);
+static int	cbq_set_enable(struct cbq_interface *, int);
+static int	cbq_ifattach(struct cbq_interface *);
+static int	cbq_ifdetach(struct cbq_interface *);
+static int 	cbq_getstats(struct cbq_getstats *);
+
+static int	cbq_add_filter(struct cbq_add_filter *);
+static int	cbq_delete_filter(struct cbq_delete_filter *);
+#endif /* ALTQ3_COMPAT */
+
+/*
+ * int
+ * cbq_class_destroy(cbq_mod_state_t *, struct rm_class *) - This
+ *	function destroys a given traffic class.  Before destroying
+ *	the class, all traffic for that class is released.
+ */
+static int
+cbq_class_destroy(cbq_state_t *cbqp, struct rm_class *cl)
+{
+	int	i;
+
+	/* delete the class */
+	rmc_delete_class(&cbqp->ifnp, cl);
+
+	/*
+	 * free the class handle
+	 */
+	for (i = 0; i < CBQ_MAX_CLASSES; i++)
+		if (cbqp->cbq_class_tbl[i] == cl)
+			cbqp->cbq_class_tbl[i] = NULL;
+
+	if (cl == cbqp->ifnp.root_)
+		cbqp->ifnp.root_ = NULL;
+	if (cl == cbqp->ifnp.default_)
+		cbqp->ifnp.default_ = NULL;
+#ifdef ALTQ3_COMPAT
+	if (cl == cbqp->ifnp.ctl_)
+		cbqp->ifnp.ctl_ = NULL;
+#endif
+	return (0);
+}
+
+/* convert class handle to class pointer */
+static struct rm_class *
+clh_to_clp(cbq_state_t *cbqp, u_int32_t chandle)
+{
+	int i;
+	struct rm_class *cl;
+
+	if (chandle == 0)
+		return (NULL);
+	/*
+	 * first, try optimistically the slot matching the lower bits of
+	 * the handle.  if it fails, do the linear table search.
+	 */
+	i = chandle % CBQ_MAX_CLASSES;
+	if ((cl = cbqp->cbq_class_tbl[i]) != NULL &&
+	    cl->stats_.handle == chandle)
+		return (cl);
+	for (i = 0; i < CBQ_MAX_CLASSES; i++)
+		if ((cl = cbqp->cbq_class_tbl[i]) != NULL &&
+		    cl->stats_.handle == chandle)
+			return (cl);
+	return (NULL);
+}
+
+static int
+cbq_clear_interface(cbq_state_t *cbqp)
+{
+	int		 again, i;
+	struct rm_class	*cl;
+
+#ifdef ALTQ3_CLFIER_COMPAT
+	/* free the filters for this interface */
+	acc_discard_filters(&cbqp->cbq_classifier, NULL, 1);
+#endif
+
+	/* clear out the classes now */
+	do {
+		again = 0;
+		for (i = 0; i < CBQ_MAX_CLASSES; i++) {
+			if ((cl = cbqp->cbq_class_tbl[i]) != NULL) {
+				if (is_a_parent_class(cl))
+					again++;
+				else {
+					cbq_class_destroy(cbqp, cl);
+					cbqp->cbq_class_tbl[i] = NULL;
+					if (cl == cbqp->ifnp.root_)
+						cbqp->ifnp.root_ = NULL;
+					if (cl == cbqp->ifnp.default_)
+						cbqp->ifnp.default_ = NULL;
+#ifdef ALTQ3_COMPAT
+					if (cl == cbqp->ifnp.ctl_)
+						cbqp->ifnp.ctl_ = NULL;
+#endif
+				}
+			}
+		}
+	} while (again);
+
+	return (0);
+}
+
+static int
+cbq_request(struct ifaltq *ifq, int req, void *arg)
+{
+	cbq_state_t	*cbqp = (cbq_state_t *)ifq->altq_disc;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	switch (req) {
+	case ALTRQ_PURGE:
+		cbq_purge(cbqp);
+		break;
+	}
+	return (0);
+}
+
+/* copy the stats info in rm_class to class_states_t */
+static void
+get_class_stats(class_stats_t *statsp, struct rm_class *cl)
+{
+	statsp->xmit_cnt	= cl->stats_.xmit_cnt;
+	statsp->drop_cnt	= cl->stats_.drop_cnt;
+	statsp->over		= cl->stats_.over;
+	statsp->borrows		= cl->stats_.borrows;
+	statsp->overactions	= cl->stats_.overactions;
+	statsp->delays		= cl->stats_.delays;
+
+	statsp->depth		= cl->depth_;
+	statsp->priority	= cl->pri_;
+	statsp->maxidle		= cl->maxidle_;
+	statsp->minidle		= cl->minidle_;
+	statsp->offtime		= cl->offtime_;
+	statsp->qmax		= qlimit(cl->q_);
+	statsp->ns_per_byte	= cl->ns_per_byte_;
+	statsp->wrr_allot	= cl->w_allotment_;
+	statsp->qcnt		= qlen(cl->q_);
+	statsp->avgidle		= cl->avgidle_;
+
+	statsp->qtype		= qtype(cl->q_);
+#ifdef ALTQ_RED
+	if (q_is_red(cl->q_))
+		red_getstats(cl->red_, &statsp->red[0]);
+#endif
+#ifdef ALTQ_RIO
+	if (q_is_rio(cl->q_))
+		rio_getstats((rio_t *)cl->red_, &statsp->red[0]);
+#endif
+}
+
+int
+cbq_pfattach(struct pf_altq *a)
+{
+	struct ifnet	*ifp;
+	int		 s, error;
+
+	if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
+		return (EINVAL);
+#ifdef __NetBSD__
+	s = splnet();
+#else
+	s = splimp();
+#endif
+	error = altq_attach(&ifp->if_snd, ALTQT_CBQ, a->altq_disc,
+	    cbq_enqueue, cbq_dequeue, cbq_request, NULL, NULL);
+	splx(s);
+	return (error);
+}
+
+int
+cbq_add_altq(struct pf_altq *a)
+{
+	cbq_state_t	*cbqp;
+	struct ifnet	*ifp;
+
+	if ((ifp = ifunit(a->ifname)) == NULL)
+		return (EINVAL);
+	if (!ALTQ_IS_READY(&ifp->if_snd))
+		return (ENODEV);
+
+	/* allocate and initialize cbq_state_t */
+	cbqp = malloc(sizeof(cbq_state_t), M_DEVBUF, M_WAITOK);
+	if (cbqp == NULL)
+		return (ENOMEM);
+	bzero(cbqp, sizeof(cbq_state_t));
+	CALLOUT_INIT(&cbqp->cbq_callout);
+	cbqp->cbq_qlen = 0;
+	cbqp->ifnp.ifq_ = &ifp->if_snd;	    /* keep the ifq */
+
+	/* keep the state in pf_altq */
+	a->altq_disc = cbqp;
+
+	return (0);
+}
+
+int
+cbq_remove_altq(struct pf_altq *a)
+{
+	cbq_state_t	*cbqp;
+
+	if ((cbqp = a->altq_disc) == NULL)
+		return (EINVAL);
+	a->altq_disc = NULL;
+
+	cbq_clear_interface(cbqp);
+
+	if (cbqp->ifnp.default_)
+		cbq_class_destroy(cbqp, cbqp->ifnp.default_);
+	if (cbqp->ifnp.root_)
+		cbq_class_destroy(cbqp, cbqp->ifnp.root_);
+
+	/* deallocate cbq_state_t */
+	free(cbqp, M_DEVBUF);
+
+	return (0);
+}
+
+int
+cbq_add_queue(struct pf_altq *a)
+{
+	struct rm_class	*borrow, *parent;
+	cbq_state_t	*cbqp;
+	struct rm_class	*cl;
+	struct cbq_opts	*opts;
+	int		i;
+
+	if ((cbqp = a->altq_disc) == NULL)
+		return (EINVAL);
+	if (a->qid == 0)
+		return (EINVAL);
+
+	/*
+	 * find a free slot in the class table.  if the slot matching
+	 * the lower bits of qid is free, use this slot.  otherwise,
+	 * use the first free slot.
+	 */
+	i = a->qid % CBQ_MAX_CLASSES;
+	if (cbqp->cbq_class_tbl[i] != NULL) {
+		for (i = 0; i < CBQ_MAX_CLASSES; i++)
+			if (cbqp->cbq_class_tbl[i] == NULL)
+				break;
+		if (i == CBQ_MAX_CLASSES)
+			return (EINVAL);
+	}
+
+	opts = &a->pq_u.cbq_opts;
+	/* check parameters */
+	if (a->priority >= CBQ_MAXPRI)
+		return (EINVAL);
+
+	/* Get pointers to parent and borrow classes.  */
+	parent = clh_to_clp(cbqp, a->parent_qid);
+	if (opts->flags & CBQCLF_BORROW)
+		borrow = parent;
+	else
+		borrow = NULL;
+
+	/*
+	 * A class must borrow from it's parent or it can not
+	 * borrow at all.  Hence, borrow can be null.
+	 */
+	if (parent == NULL && (opts->flags & CBQCLF_ROOTCLASS) == 0) {
+		printf("cbq_add_queue: no parent class!\n");
+		return (EINVAL);
+	}
+
+	if ((borrow != parent)  && (borrow != NULL)) {
+		printf("cbq_add_class: borrow class != parent\n");
+		return (EINVAL);
+	}
+
+	/*
+	 * check parameters
+	 */
+	switch (opts->flags & CBQCLF_CLASSMASK) {
+	case CBQCLF_ROOTCLASS:
+		if (parent != NULL)
+			return (EINVAL);
+		if (cbqp->ifnp.root_)
+			return (EINVAL);
+		break;
+	case CBQCLF_DEFCLASS:
+		if (cbqp->ifnp.default_)
+			return (EINVAL);
+		break;
+	case 0:
+		if (a->qid == 0)
+			return (EINVAL);
+		break;
+	default:
+		/* more than two flags bits set */
+		return (EINVAL);
+	}
+
+	/*
+	 * create a class.  if this is a root class, initialize the
+	 * interface.
+	 */
+	if ((opts->flags & CBQCLF_CLASSMASK) == CBQCLF_ROOTCLASS) {
+		rmc_init(cbqp->ifnp.ifq_, &cbqp->ifnp, opts->ns_per_byte,
+		    cbqrestart, a->qlimit, RM_MAXQUEUED,
+		    opts->maxidle, opts->minidle, opts->offtime,
+		    opts->flags);
+		cl = cbqp->ifnp.root_;
+	} else {
+		cl = rmc_newclass(a->priority,
+				  &cbqp->ifnp, opts->ns_per_byte,
+				  rmc_delay_action, a->qlimit, parent, borrow,
+				  opts->maxidle, opts->minidle, opts->offtime,
+				  opts->pktsize, opts->flags);
+	}
+	if (cl == NULL)
+		return (ENOMEM);
+
+	/* return handle to user space. */
+	cl->stats_.handle = a->qid;
+	cl->stats_.depth = cl->depth_;
+
+	/* save the allocated class */
+	cbqp->cbq_class_tbl[i] = cl;
+
+	if ((opts->flags & CBQCLF_CLASSMASK) == CBQCLF_DEFCLASS)
+		cbqp->ifnp.default_ = cl;
+
+	return (0);
+}
+
+int
+cbq_remove_queue(struct pf_altq *a)
+{
+	struct rm_class	*cl;
+	cbq_state_t	*cbqp;
+	int		i;
+
+	if ((cbqp = a->altq_disc) == NULL)
+		return (EINVAL);
+
+	if ((cl = clh_to_clp(cbqp, a->qid)) == NULL)
+		return (EINVAL);
+
+	/* if we are a parent class, then return an error. */
+	if (is_a_parent_class(cl))
+		return (EINVAL);
+
+	/* delete the class */
+	rmc_delete_class(&cbqp->ifnp, cl);
+
+	/*
+	 * free the class handle
+	 */
+	for (i = 0; i < CBQ_MAX_CLASSES; i++)
+		if (cbqp->cbq_class_tbl[i] == cl) {
+			cbqp->cbq_class_tbl[i] = NULL;
+			if (cl == cbqp->ifnp.root_)
+				cbqp->ifnp.root_ = NULL;
+			if (cl == cbqp->ifnp.default_)
+				cbqp->ifnp.default_ = NULL;
+			break;
+		}
+
+	return (0);
+}
+
+int
+cbq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+	cbq_state_t	*cbqp;
+	struct rm_class	*cl;
+	class_stats_t	 stats;
+	int		 error = 0;
+
+	if ((cbqp = altq_lookup(a->ifname, ALTQT_CBQ)) == NULL)
+		return (EBADF);
+
+	if ((cl = clh_to_clp(cbqp, a->qid)) == NULL)
+		return (EINVAL);
+
+	if (*nbytes < sizeof(stats))
+		return (EINVAL);
+
+	get_class_stats(&stats, cl);
+
+	if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
+		return (error);
+	*nbytes = sizeof(stats);
+	return (0);
+}
+
+/*
+ * int
+ * cbq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pattr)
+ *		- Queue data packets.
+ *
+ *	cbq_enqueue is set to ifp->if_altqenqueue and called by an upper
+ *	layer (e.g. ether_output).  cbq_enqueue queues the given packet
+ *	to the cbq, then invokes the driver's start routine.
+ *
+ *	Assumptions:	called in splimp
+ *	Returns:	0 if the queueing is successful.
+ *			ENOBUFS if a packet dropping occurred as a result of
+ *			the queueing.
+ */
+
+static int
+cbq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
+{
+	cbq_state_t	*cbqp = (cbq_state_t *)ifq->altq_disc;
+	struct rm_class	*cl;
+	struct pf_mtag	*t;
+	int		 len;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	/* grab class set by classifier */
+	if ((m->m_flags & M_PKTHDR) == 0) {
+		/* should not happen */
+#if defined(__NetBSD__) || defined(__OpenBSD__)\
+    || (defined(__FreeBSD__) && __FreeBSD_version >= 501113)
+		printf("altq: packet for %s does not have pkthdr\n",
+		    ifq->altq_ifp->if_xname);
+#else
+		printf("altq: packet for %s%d does not have pkthdr\n",
+		    ifq->altq_ifp->if_name, ifq->altq_ifp->if_unit);
+#endif
+		m_freem(m);
+		return (ENOBUFS);
+	}
+	cl = NULL;
+	if ((t = pf_find_mtag(m)) != NULL)
+		cl = clh_to_clp(cbqp, t->qid);
+#ifdef ALTQ3_COMPAT
+	else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL)
+		cl = pktattr->pattr_class;
+#endif
+	if (cl == NULL) {
+		cl = cbqp->ifnp.default_;
+		if (cl == NULL) {
+			m_freem(m);
+			return (ENOBUFS);
+		}
+	}
+#ifdef ALTQ3_COMPAT
+	if (pktattr != NULL)
+		cl->pktattr_ = pktattr;  /* save proto hdr used by ECN */
+	else
+#endif
+		cl->pktattr_ = NULL;
+	len = m_pktlen(m);
+	if (rmc_queue_packet(cl, m) != 0) {
+		/* drop occurred.  some mbuf was freed in rmc_queue_packet. */
+		PKTCNTR_ADD(&cl->stats_.drop_cnt, len);
+		return (ENOBUFS);
+	}
+
+	/* successfully queued. */
+	++cbqp->cbq_qlen;
+	IFQ_INC_LEN(ifq);
+	return (0);
+}
+
+static struct mbuf *
+cbq_dequeue(struct ifaltq *ifq, int op)
+{
+	cbq_state_t	*cbqp = (cbq_state_t *)ifq->altq_disc;
+	struct mbuf	*m;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	m = rmc_dequeue_next(&cbqp->ifnp, op);
+
+	if (m && op == ALTDQ_REMOVE) {
+		--cbqp->cbq_qlen;  /* decrement # of packets in cbq */
+		IFQ_DEC_LEN(ifq);
+
+		/* Update the class. */
+		rmc_update_class_util(&cbqp->ifnp);
+	}
+	return (m);
+}
+
+/*
+ * void
+ * cbqrestart(queue_t *) - Restart sending of data.
+ * called from rmc_restart in splimp via timeout after waking up
+ * a suspended class.
+ *	Returns:	NONE
+ */
+
+static void
+cbqrestart(struct ifaltq *ifq)
+{
+	cbq_state_t	*cbqp;
+	struct ifnet	*ifp;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	if (!ALTQ_IS_ENABLED(ifq))
+		/* cbq must have been detached */
+		return;
+
+	if ((cbqp = (cbq_state_t *)ifq->altq_disc) == NULL)
+		/* should not happen */
+		return;
+
+	ifp = ifq->altq_ifp;
+	if (ifp->if_start &&
+	    cbqp->cbq_qlen > 0 && (ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
+	    	IFQ_UNLOCK(ifq);
+		(*ifp->if_start)(ifp);
+		IFQ_LOCK(ifq);
+	}
+}
+
+static void cbq_purge(cbq_state_t *cbqp)
+{
+	struct rm_class	*cl;
+	int		 i;
+
+	for (i = 0; i < CBQ_MAX_CLASSES; i++)
+		if ((cl = cbqp->cbq_class_tbl[i]) != NULL)
+			rmc_dropall(cl);
+	if (ALTQ_IS_ENABLED(cbqp->ifnp.ifq_))
+		cbqp->ifnp.ifq_->ifq_len = 0;
+}
+#ifdef ALTQ3_COMPAT
+
+static int
+cbq_add_class(acp)
+	struct cbq_add_class *acp;
+{
+	char		*ifacename;
+	struct rm_class	*borrow, *parent;
+	cbq_state_t	*cbqp;
+
+	ifacename = acp->cbq_iface.cbq_ifacename;
+	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+		return (EBADF);
+
+	/* check parameters */
+	if (acp->cbq_class.priority >= CBQ_MAXPRI ||
+	    acp->cbq_class.maxq > CBQ_MAXQSIZE)
+		return (EINVAL);
+
+	/* Get pointers to parent and borrow classes.  */
+	parent = clh_to_clp(cbqp, acp->cbq_class.parent_class_handle);
+	borrow = clh_to_clp(cbqp, acp->cbq_class.borrow_class_handle);
+
+	/*
+	 * A class must borrow from it's parent or it can not
+	 * borrow at all.  Hence, borrow can be null.
+	 */
+	if (parent == NULL && (acp->cbq_class.flags & CBQCLF_ROOTCLASS) == 0) {
+		printf("cbq_add_class: no parent class!\n");
+		return (EINVAL);
+	}
+
+	if ((borrow != parent)  && (borrow != NULL)) {
+		printf("cbq_add_class: borrow class != parent\n");
+		return (EINVAL);
+	}
+
+	return cbq_class_create(cbqp, acp, parent, borrow);
+}
+
+static int
+cbq_delete_class(dcp)
+	struct cbq_delete_class *dcp;
+{
+	char		*ifacename;
+	struct rm_class	*cl;
+	cbq_state_t	*cbqp;
+
+	ifacename = dcp->cbq_iface.cbq_ifacename;
+	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+		return (EBADF);
+
+	if ((cl = clh_to_clp(cbqp, dcp->cbq_class_handle)) == NULL)
+		return (EINVAL);
+
+	/* if we are a parent class, then return an error. */
+	if (is_a_parent_class(cl))
+		return (EINVAL);
+
+	/* if a filter has a reference to this class delete the filter */
+	acc_discard_filters(&cbqp->cbq_classifier, cl, 0);
+
+	return cbq_class_destroy(cbqp, cl);
+}
+
+static int
+cbq_modify_class(acp)
+	struct cbq_modify_class *acp;
+{
+	char		*ifacename;
+	struct rm_class	*cl;
+	cbq_state_t	*cbqp;
+
+	ifacename = acp->cbq_iface.cbq_ifacename;
+	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+		return (EBADF);
+
+	/* Get pointer to this class */
+	if ((cl = clh_to_clp(cbqp, acp->cbq_class_handle)) == NULL)
+		return (EINVAL);
+
+	if (rmc_modclass(cl, acp->cbq_class.nano_sec_per_byte,
+			 acp->cbq_class.maxq, acp->cbq_class.maxidle,
+			 acp->cbq_class.minidle, acp->cbq_class.offtime,
+			 acp->cbq_class.pktsize) < 0)
+		return (EINVAL);
+	return (0);
+}
+
+/*
+ * struct rm_class *
+ * cbq_class_create(cbq_mod_state_t *cbqp, struct cbq_add_class *acp,
+ *		struct rm_class *parent, struct rm_class *borrow)
+ *
+ * This function create a new traffic class in the CBQ class hierarchy of
+ * given paramters.  The class that created is either the root, default,
+ * or a new dynamic class.  If CBQ is not initilaized, the the root class
+ * will be created.
+ */
+static int
+cbq_class_create(cbqp, acp, parent, borrow)
+	cbq_state_t *cbqp;
+	struct cbq_add_class *acp;
+	struct rm_class *parent, *borrow;
+{
+	struct rm_class	*cl;
+	cbq_class_spec_t *spec = &acp->cbq_class;
+	u_int32_t	chandle;
+	int		i;
+
+	/*
+	 * allocate class handle
+	 */
+	for (i = 1; i < CBQ_MAX_CLASSES; i++)
+		if (cbqp->cbq_class_tbl[i] == NULL)
+			break;
+	if (i == CBQ_MAX_CLASSES)
+		return (EINVAL);
+	chandle = i;	/* use the slot number as class handle */
+
+	/*
+	 * create a class.  if this is a root class, initialize the
+	 * interface.
+	 */
+	if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_ROOTCLASS) {
+		rmc_init(cbqp->ifnp.ifq_, &cbqp->ifnp, spec->nano_sec_per_byte,
+			 cbqrestart, spec->maxq, RM_MAXQUEUED,
+			 spec->maxidle, spec->minidle, spec->offtime,
+			 spec->flags);
+		cl = cbqp->ifnp.root_;
+	} else {
+		cl = rmc_newclass(spec->priority,
+				  &cbqp->ifnp, spec->nano_sec_per_byte,
+				  rmc_delay_action, spec->maxq, parent, borrow,
+				  spec->maxidle, spec->minidle, spec->offtime,
+				  spec->pktsize, spec->flags);
+	}
+	if (cl == NULL)
+		return (ENOMEM);
+
+	/* return handle to user space. */
+	acp->cbq_class_handle = chandle;
+
+	cl->stats_.handle = chandle;
+	cl->stats_.depth = cl->depth_;
+
+	/* save the allocated class */
+	cbqp->cbq_class_tbl[i] = cl;
+
+	if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_DEFCLASS)
+		cbqp->ifnp.default_ = cl;
+	if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_CTLCLASS)
+		cbqp->ifnp.ctl_ = cl;
+
+	return (0);
+}
+
+static int
+cbq_add_filter(afp)
+	struct cbq_add_filter *afp;
+{
+	char		*ifacename;
+	cbq_state_t	*cbqp;
+	struct rm_class	*cl;
+
+	ifacename = afp->cbq_iface.cbq_ifacename;
+	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+		return (EBADF);
+
+	/* Get the pointer to class. */
+	if ((cl = clh_to_clp(cbqp, afp->cbq_class_handle)) == NULL)
+		return (EINVAL);
+
+	return acc_add_filter(&cbqp->cbq_classifier, &afp->cbq_filter,
+			      cl, &afp->cbq_filter_handle);
+}
+
+static int
+cbq_delete_filter(dfp)
+	struct cbq_delete_filter *dfp;
+{
+	char		*ifacename;
+	cbq_state_t	*cbqp;
+
+	ifacename = dfp->cbq_iface.cbq_ifacename;
+	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+		return (EBADF);
+
+	return acc_delete_filter(&cbqp->cbq_classifier,
+				 dfp->cbq_filter_handle);
+}
+
+/*
+ * cbq_clear_hierarchy deletes all classes and their filters on the
+ * given interface.
+ */
+static int
+cbq_clear_hierarchy(ifacep)
+	struct cbq_interface *ifacep;
+{
+	char		*ifacename;
+	cbq_state_t	*cbqp;
+
+	ifacename = ifacep->cbq_ifacename;
+	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+		return (EBADF);
+
+	return cbq_clear_interface(cbqp);
+}
+
+/*
+ * static int
+ * cbq_set_enable(struct cbq_enable *ep) - this function processed the
+ *	ioctl request to enable class based queueing.  It searches the list
+ *	of interfaces for the specified interface and then enables CBQ on
+ *	that interface.
+ *
+ *	Returns:	0, for no error.
+ *			EBADF, for specified inteface not found.
+ */
+
+static int
+cbq_set_enable(ep, enable)
+	struct cbq_interface *ep;
+	int enable;
+{
+	int 	error = 0;
+	cbq_state_t	*cbqp;
+	char 	*ifacename;
+
+	ifacename = ep->cbq_ifacename;
+	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+		return (EBADF);
+
+	switch (enable) {
+	case ENABLE:
+		if (cbqp->ifnp.root_ == NULL || cbqp->ifnp.default_ == NULL ||
+		    cbqp->ifnp.ctl_ == NULL) {
+			if (cbqp->ifnp.root_ == NULL)
+				printf("No Root Class for %s\n", ifacename);
+			if (cbqp->ifnp.default_ == NULL)
+				printf("No Default Class for %s\n", ifacename);
+			if (cbqp->ifnp.ctl_ == NULL)
+				printf("No Control Class for %s\n", ifacename);
+			error = EINVAL;
+		} else if ((error = altq_enable(cbqp->ifnp.ifq_)) == 0) {
+			cbqp->cbq_qlen = 0;
+		}
+		break;
+
+	case DISABLE:
+		error = altq_disable(cbqp->ifnp.ifq_);
+		break;
+	}
+	return (error);
+}
+
+static int
+cbq_getstats(gsp)
+	struct cbq_getstats *gsp;
+{
+	char		*ifacename;
+	int		i, n, nclasses;
+	cbq_state_t	*cbqp;
+	struct rm_class	*cl;
+	class_stats_t	stats, *usp;
+	int error = 0;
+
+	ifacename = gsp->iface.cbq_ifacename;
+	nclasses = gsp->nclasses;
+	usp = gsp->stats;
+
+	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+		return (EBADF);
+	if (nclasses <= 0)
+		return (EINVAL);
+
+	for (n = 0, i = 0; n < nclasses && i < CBQ_MAX_CLASSES; n++, i++) {
+		while ((cl = cbqp->cbq_class_tbl[i]) == NULL)
+			if (++i >= CBQ_MAX_CLASSES)
+				goto out;
+
+		get_class_stats(&stats, cl);
+		stats.handle = cl->stats_.handle;
+
+		if ((error = copyout((caddr_t)&stats, (caddr_t)usp++,
+		    sizeof(stats))) != 0)
+			return (error);
+	}
+
+ out:
+	gsp->nclasses = n;
+	return (error);
+}
+
+static int
+cbq_ifattach(ifacep)
+	struct cbq_interface *ifacep;
+{
+	int		error = 0;
+	char		*ifacename;
+	cbq_state_t	*new_cbqp;
+	struct ifnet 	*ifp;
+
+	ifacename = ifacep->cbq_ifacename;
+	if ((ifp = ifunit(ifacename)) == NULL)
+		return (ENXIO);
+	if (!ALTQ_IS_READY(&ifp->if_snd))
+		return (ENXIO);
+
+	/* allocate and initialize cbq_state_t */
+	new_cbqp = malloc(sizeof(cbq_state_t), M_DEVBUF, M_WAITOK);
+	if (new_cbqp == NULL)
+		return (ENOMEM);
+	bzero(new_cbqp, sizeof(cbq_state_t));
+ 	CALLOUT_INIT(&new_cbqp->cbq_callout);
+
+	new_cbqp->cbq_qlen = 0;
+	new_cbqp->ifnp.ifq_ = &ifp->if_snd;	    /* keep the ifq */
+
+	/*
+	 * set CBQ to this ifnet structure.
+	 */
+	error = altq_attach(&ifp->if_snd, ALTQT_CBQ, new_cbqp,
+			    cbq_enqueue, cbq_dequeue, cbq_request,
+			    &new_cbqp->cbq_classifier, acc_classify);
+	if (error) {
+		free(new_cbqp, M_DEVBUF);
+		return (error);
+	}
+
+	/* prepend to the list of cbq_state_t's. */
+	new_cbqp->cbq_next = cbq_list;
+	cbq_list = new_cbqp;
+
+	return (0);
+}
+
+static int
+cbq_ifdetach(ifacep)
+	struct cbq_interface *ifacep;
+{
+	char		*ifacename;
+	cbq_state_t 	*cbqp;
+
+	ifacename = ifacep->cbq_ifacename;
+	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+		return (EBADF);
+
+	(void)cbq_set_enable(ifacep, DISABLE);
+
+	cbq_clear_interface(cbqp);
+
+	/* remove CBQ from the ifnet structure. */
+	(void)altq_detach(cbqp->ifnp.ifq_);
+
+	/* remove from the list of cbq_state_t's. */
+	if (cbq_list == cbqp)
+		cbq_list = cbqp->cbq_next;
+	else {
+		cbq_state_t *cp;
+
+		for (cp = cbq_list; cp != NULL; cp = cp->cbq_next)
+			if (cp->cbq_next == cbqp) {
+				cp->cbq_next = cbqp->cbq_next;
+				break;
+			}
+		ASSERT(cp != NULL);
+	}
+
+	/* deallocate cbq_state_t */
+	free(cbqp, M_DEVBUF);
+
+	return (0);
+}
+
+/*
+ * cbq device interface
+ */
+
+altqdev_decl(cbq);
+
+int
+cbqopen(dev, flag, fmt, p)
+	dev_t dev;
+	int flag, fmt;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	return (0);
+}
+
+int
+cbqclose(dev, flag, fmt, p)
+	dev_t dev;
+	int flag, fmt;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	struct ifnet *ifp;
+	struct cbq_interface iface;
+	int err, error = 0;
+
+	while (cbq_list) {
+		ifp = cbq_list->ifnp.ifq_->altq_ifp;
+#if defined(__NetBSD__) || defined(__OpenBSD__)\
+    || (defined(__FreeBSD__) && __FreeBSD_version >= 501113)
+		sprintf(iface.cbq_ifacename, "%s", ifp->if_xname);
+#else
+		sprintf(iface.cbq_ifacename,
+			"%s%d", ifp->if_name, ifp->if_unit);
+#endif
+		err = cbq_ifdetach(&iface);
+		if (err != 0 && error == 0)
+			error = err;
+	}
+
+	return (error);
+}
+
+int
+cbqioctl(dev, cmd, addr, flag, p)
+	dev_t dev;
+	ioctlcmd_t cmd;
+	caddr_t addr;
+	int flag;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	int	error = 0;
+
+	/* check cmd for superuser only */
+	switch (cmd) {
+	case CBQ_GETSTATS:
+		/* currently only command that an ordinary user can call */
+		break;
+	default:
+#if (__FreeBSD_version > 700000)
+		error = priv_check(p, PRIV_ALTQ_MANAGE);
+#elsif (__FreeBSD_version > 400000)
+		error = suser(p);
+#else
+		error = suser(p->p_ucred, &p->p_acflag);
+#endif
+		if (error)
+			return (error);
+		break;
+	}
+
+	switch (cmd) {
+
+	case CBQ_ENABLE:
+		error = cbq_set_enable((struct cbq_interface *)addr, ENABLE);
+		break;
+
+	case CBQ_DISABLE:
+		error = cbq_set_enable((struct cbq_interface *)addr, DISABLE);
+		break;
+
+	case CBQ_ADD_FILTER:
+		error = cbq_add_filter((struct cbq_add_filter *)addr);
+		break;
+
+	case CBQ_DEL_FILTER:
+		error = cbq_delete_filter((struct cbq_delete_filter *)addr);
+		break;
+
+	case CBQ_ADD_CLASS:
+		error = cbq_add_class((struct cbq_add_class *)addr);
+		break;
+
+	case CBQ_DEL_CLASS:
+		error = cbq_delete_class((struct cbq_delete_class *)addr);
+		break;
+
+	case CBQ_MODIFY_CLASS:
+		error = cbq_modify_class((struct cbq_modify_class *)addr);
+		break;
+
+	case CBQ_CLEAR_HIERARCHY:
+		error = cbq_clear_hierarchy((struct cbq_interface *)addr);
+		break;
+
+	case CBQ_IF_ATTACH:
+		error = cbq_ifattach((struct cbq_interface *)addr);
+		break;
+
+	case CBQ_IF_DETACH:
+		error = cbq_ifdetach((struct cbq_interface *)addr);
+		break;
+
+	case CBQ_GETSTATS:
+		error = cbq_getstats((struct cbq_getstats *)addr);
+		break;
+
+	default:
+		error = EINVAL;
+		break;
+	}
+
+	return error;
+}
+
+#if 0
+/* for debug */
+static void cbq_class_dump(int);
+
+static void cbq_class_dump(i)
+	int i;
+{
+	struct rm_class *cl;
+	rm_class_stats_t *s;
+	struct _class_queue_ *q;
+
+	if (cbq_list == NULL) {
+		printf("cbq_class_dump: no cbq_state found\n");
+		return;
+	}
+	cl = cbq_list->cbq_class_tbl[i];
+
+	printf("class %d cl=%p\n", i, cl);
+	if (cl != NULL) {
+		s = &cl->stats_;
+		q = cl->q_;
+
+		printf("pri=%d, depth=%d, maxrate=%d, allotment=%d\n",
+		       cl->pri_, cl->depth_, cl->maxrate_, cl->allotment_);
+		printf("w_allotment=%d, bytes_alloc=%d, avgidle=%d, maxidle=%d\n",
+		       cl->w_allotment_, cl->bytes_alloc_, cl->avgidle_,
+		       cl->maxidle_);
+		printf("minidle=%d, offtime=%d, sleeping=%d, leaf=%d\n",
+		       cl->minidle_, cl->offtime_, cl->sleeping_, cl->leaf_);
+		printf("handle=%d, depth=%d, packets=%d, bytes=%d\n",
+		       s->handle, s->depth,
+		       (int)s->xmit_cnt.packets, (int)s->xmit_cnt.bytes);
+		printf("over=%d\n, borrows=%d, drops=%d, overactions=%d, delays=%d\n",
+		       s->over, s->borrows, (int)s->drop_cnt.packets,
+		       s->overactions, s->delays);
+		printf("tail=%p, head=%p, qlen=%d, qlim=%d, qthresh=%d,qtype=%d\n",
+		       q->tail_, q->head_, q->qlen_, q->qlim_,
+		       q->qthresh_, q->qtype_);
+	}
+}
+#endif /* 0 */
+
+#ifdef KLD_MODULE
+
+static struct altqsw cbq_sw =
+	{"cbq", cbqopen, cbqclose, cbqioctl};
+
+ALTQ_MODULE(altq_cbq, ALTQT_CBQ, &cbq_sw);
+MODULE_DEPEND(altq_cbq, altq_red, 1, 1, 1);
+MODULE_DEPEND(altq_cbq, altq_rio, 1, 1, 1);
+
+#endif /* KLD_MODULE */
+#endif /* ALTQ3_COMPAT */
+
+#endif /* ALTQ_CBQ */
diff --git a/freebsd/sys/contrib/altq/altq/altq_cbq.h b/freebsd/sys/contrib/altq/altq/altq_cbq.h
new file mode 100644
index 00000000..4b90beb4
--- /dev/null
+++ b/freebsd/sys/contrib/altq/altq/altq_cbq.h
@@ -0,0 +1,221 @@
+/*	$KAME: altq_cbq.h,v 1.12 2003/10/03 05:05:15 kjc Exp $	*/
+
+/*
+ * Copyright (c) Sun Microsystems, Inc. 1993-1998 All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by the SMCC Technology
+ *      Development Group at Sun Microsystems, Inc.
+ *
+ * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or
+ *      promote products derived from this software without specific prior
+ *      written permission.
+ *
+ * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE
+ * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE.  The software is
+ * provided "as is" without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this software.
+ */
+
+#ifndef _ALTQ_ALTQ_CBQ_HH_
+#define	_ALTQ_ALTQ_CBQ_HH_
+
+#include <freebsd/altq/altq.h>
+#include <freebsd/altq/altq_rmclass.h>
+#include <freebsd/altq/altq_red.h>
+#include <freebsd/altq/altq_rio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define	NULL_CLASS_HANDLE	0
+
+/* class flags should be same as class flags in rm_class.h */
+#define	CBQCLF_RED		0x0001	/* use RED */
+#define	CBQCLF_ECN		0x0002  /* use RED/ECN */
+#define	CBQCLF_RIO		0x0004  /* use RIO */
+#define	CBQCLF_FLOWVALVE	0x0008	/* use flowvalve (aka penalty-box) */
+#define	CBQCLF_CLEARDSCP	0x0010  /* clear diffserv codepoint */
+#define	CBQCLF_BORROW		0x0020  /* borrow from parent */
+
+/* class flags only for root class */
+#define	CBQCLF_WRR		0x0100	/* weighted-round robin */
+#define	CBQCLF_EFFICIENT	0x0200  /* work-conserving */
+
+/* class flags for special classes */
+#define	CBQCLF_ROOTCLASS	0x1000	/* root class */
+#define	CBQCLF_DEFCLASS		0x2000	/* default class */
+#ifdef ALTQ3_COMPAT
+#define	CBQCLF_CTLCLASS		0x4000	/* control class */
+#endif
+#define	CBQCLF_CLASSMASK	0xf000	/* class mask */
+
+#define	CBQ_MAXQSIZE		200
+#define	CBQ_MAXPRI		RM_MAXPRIO
+
+typedef struct _cbq_class_stats_ {
+	u_int32_t	handle;
+	u_int		depth;
+
+	struct pktcntr	xmit_cnt;	/* packets sent in this class */
+	struct pktcntr	drop_cnt;	/* dropped packets */
+	u_int		over;		/* # times went over limit */
+	u_int		borrows;	/* # times tried to borrow */
+	u_int		overactions;	/* # times invoked overlimit action */
+	u_int		delays;		/* # times invoked delay actions */
+
+	/* other static class parameters useful for debugging */
+	int		priority;
+	int		maxidle;
+	int		minidle;
+	int		offtime;
+	int		qmax;
+	int		ns_per_byte;
+	int		wrr_allot;
+
+	int		qcnt;		/* # packets in queue */
+	int		avgidle;
+
+	/* red and rio related info */
+	int		qtype;
+	struct redstats	red[3];
+} class_stats_t;
+
+#ifdef ALTQ3_COMPAT
+/*
+ * Define structures associated with IOCTLS for cbq.
+ */
+
+/*
+ * Define the CBQ interface structure.  This must be included in all
+ * IOCTL's such that the CBQ driver may find the appropriate CBQ module
+ * associated with the network interface to be affected.
+ */
+struct cbq_interface {
+	char	cbq_ifacename[IFNAMSIZ];
+};
+
+typedef struct cbq_class_spec {
+	u_int		priority;
+	u_int		nano_sec_per_byte;
+	u_int		maxq;
+	u_int		maxidle;
+	int		minidle;
+	u_int		offtime;
+	u_int32_t	parent_class_handle;
+	u_int32_t	borrow_class_handle;
+
+	u_int		pktsize;
+	int		flags;
+} cbq_class_spec_t;
+
+struct cbq_add_class {
+	struct cbq_interface	cbq_iface;
+
+	cbq_class_spec_t	cbq_class;
+	u_int32_t		cbq_class_handle;
+};
+
+struct cbq_delete_class {
+	struct cbq_interface	cbq_iface;
+	u_int32_t		cbq_class_handle;
+};
+
+struct cbq_modify_class {
+	struct cbq_interface	cbq_iface;
+
+	cbq_class_spec_t	cbq_class;
+	u_int32_t		cbq_class_handle;
+};
+
+struct cbq_add_filter {
+	struct cbq_interface		cbq_iface;
+	u_int32_t		cbq_class_handle;
+	struct flow_filter	cbq_filter;
+
+	u_long			cbq_filter_handle;
+};
+
+struct cbq_delete_filter {
+	struct cbq_interface	cbq_iface;
+	u_long			cbq_filter_handle;
+};
+
+/* number of classes are returned in nclasses field */
+struct cbq_getstats {
+	struct cbq_interface	iface;
+	int			nclasses;
+	class_stats_t		*stats;
+};
+
+/*
+ * Define IOCTLs for CBQ.
+ */
+#define	CBQ_IF_ATTACH		_IOW('Q', 1, struct cbq_interface)
+#define	CBQ_IF_DETACH		_IOW('Q', 2, struct cbq_interface)
+#define	CBQ_ENABLE		_IOW('Q', 3, struct cbq_interface)
+#define	CBQ_DISABLE		_IOW('Q', 4, struct cbq_interface)
+#define	CBQ_CLEAR_HIERARCHY	_IOW('Q', 5, struct cbq_interface)
+#define	CBQ_ADD_CLASS		_IOWR('Q', 7, struct cbq_add_class)
+#define	CBQ_DEL_CLASS		_IOW('Q', 8, struct cbq_delete_class)
+#define	CBQ_MODIFY_CLASS	_IOWR('Q', 9, struct cbq_modify_class)
+#define	CBQ_ADD_FILTER		_IOWR('Q', 10, struct cbq_add_filter)
+#define	CBQ_DEL_FILTER		_IOW('Q', 11, struct cbq_delete_filter)
+#define	CBQ_GETSTATS		_IOWR('Q', 12, struct cbq_getstats)
+#endif /* ALTQ3_COMPAT */
+
+#ifdef _KERNEL
+/*
+ * Define macros only good for kernel drivers and modules.
+ */
+#define	CBQ_WATCHDOG		(hz / 20)
+#define	CBQ_TIMEOUT		10
+#define	CBQ_LS_TIMEOUT		(20 * hz / 1000)
+
+#define	CBQ_MAX_CLASSES	256
+
+#ifdef ALTQ3_COMPAT
+#define	CBQ_MAX_FILTERS 256
+
+#define	DISABLE		0x00
+#define	ENABLE		0x01
+#endif /* ALTQ3_COMPAT */
+
+/*
+ * Define State structures.
+ */
+typedef struct cbqstate {
+#ifdef ALTQ3_COMPAT
+	struct cbqstate		*cbq_next;
+#endif
+	int			 cbq_qlen;	/* # of packets in cbq */
+	struct rm_class		*cbq_class_tbl[CBQ_MAX_CLASSES];
+
+	struct rm_ifdat		 ifnp;
+	struct callout		 cbq_callout;	/* for timeouts */
+#ifdef ALTQ3_CLFIER_COMPAT
+	struct acc_classifier	cbq_classifier;
+#endif
+} cbq_state_t;
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !_ALTQ_ALTQ_CBQ_HH_ */
diff --git a/freebsd/sys/contrib/altq/altq/altq_cdnr.c b/freebsd/sys/contrib/altq/altq/altq_cdnr.c
new file mode 100644
index 00000000..66095acc
--- /dev/null
+++ b/freebsd/sys/contrib/altq/altq/altq_cdnr.c
@@ -0,0 +1,1393 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*	$FreeBSD$	*/
+/*	$KAME: altq_cdnr.c,v 1.14 2003/09/05 22:40:36 itojun Exp $	*/
+
+/*
+ * Copyright (C) 1999-2002
+ *	Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include <freebsd/local/opt_altq.h>
+#if (__FreeBSD__ != 2)
+#include <freebsd/local/opt_inet.h>
+#ifdef __FreeBSD__
+#include <freebsd/local/opt_inet6.h>
+#endif
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/errno.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/queue.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#ifdef INET6
+#include <freebsd/netinet/ip6.h>
+#endif
+
+#include <freebsd/altq/altq.h>
+#ifdef ALTQ3_COMPAT
+#include <freebsd/altq/altq_conf.h>
+#endif
+#include <freebsd/altq/altq_cdnr.h>
+
+#ifdef ALTQ3_COMPAT
+/*
+ * diffserv traffic conditioning module
+ */
+
+int altq_cdnr_enabled = 0;
+
+/* traffic conditioner is enabled by ALTQ_CDNR option in opt_altq.h */
+#ifdef ALTQ_CDNR
+
+/* cdnr_list keeps all cdnr's allocated. */
+static LIST_HEAD(, top_cdnr) tcb_list;
+
+static int altq_cdnr_input(struct mbuf *, int);
+static struct top_cdnr *tcb_lookup(char *ifname);
+static struct cdnr_block *cdnr_handle2cb(u_long);
+static u_long cdnr_cb2handle(struct cdnr_block *);
+static void *cdnr_cballoc(struct top_cdnr *, int,
+       struct tc_action *(*)(struct cdnr_block *, struct cdnr_pktinfo *));
+static void cdnr_cbdestroy(void *);
+static int tca_verify_action(struct tc_action *);
+static void tca_import_action(struct tc_action *, struct tc_action *);
+static void tca_invalidate_action(struct tc_action *);
+
+static int generic_element_destroy(struct cdnr_block *);
+static struct top_cdnr *top_create(struct ifaltq *);
+static int top_destroy(struct top_cdnr *);
+static struct cdnr_block *element_create(struct top_cdnr *, struct tc_action *);
+static int element_destroy(struct cdnr_block *);
+static void tb_import_profile(struct tbe *, struct tb_profile *);
+static struct tbmeter *tbm_create(struct top_cdnr *, struct tb_profile *,
+				  struct tc_action *, struct tc_action *);
+static int tbm_destroy(struct tbmeter *);
+static struct tc_action *tbm_input(struct cdnr_block *, struct cdnr_pktinfo *);
+static struct trtcm *trtcm_create(struct top_cdnr *,
+		  struct tb_profile *, struct tb_profile *,
+		  struct tc_action *, struct tc_action *, struct tc_action *,
+		  int);
+static int trtcm_destroy(struct trtcm *);
+static struct tc_action *trtcm_input(struct cdnr_block *, struct cdnr_pktinfo *);
+static struct tswtcm *tswtcm_create(struct top_cdnr *,
+		  u_int32_t, u_int32_t, u_int32_t,
+		  struct tc_action *, struct tc_action *, struct tc_action *);
+static int tswtcm_destroy(struct tswtcm *);
+static struct tc_action *tswtcm_input(struct cdnr_block *, struct cdnr_pktinfo *);
+
+static int cdnrcmd_if_attach(char *);
+static int cdnrcmd_if_detach(char *);
+static int cdnrcmd_add_element(struct cdnr_add_element *);
+static int cdnrcmd_delete_element(struct cdnr_delete_element *);
+static int cdnrcmd_add_filter(struct cdnr_add_filter *);
+static int cdnrcmd_delete_filter(struct cdnr_delete_filter *);
+static int cdnrcmd_add_tbm(struct cdnr_add_tbmeter *);
+static int cdnrcmd_modify_tbm(struct cdnr_modify_tbmeter *);
+static int cdnrcmd_tbm_stats(struct cdnr_tbmeter_stats *);
+static int cdnrcmd_add_trtcm(struct cdnr_add_trtcm *);
+static int cdnrcmd_modify_trtcm(struct cdnr_modify_trtcm *);
+static int cdnrcmd_tcm_stats(struct cdnr_tcm_stats *);
+static int cdnrcmd_add_tswtcm(struct cdnr_add_tswtcm *);
+static int cdnrcmd_modify_tswtcm(struct cdnr_modify_tswtcm *);
+static int cdnrcmd_get_stats(struct cdnr_get_stats *);
+
+altqdev_decl(cdnr);
+
+/*
+ * top level input function called from ip_input.
+ * should be called before converting header fields to host-byte-order.
+ */
+int
+altq_cdnr_input(m, af)
+	struct mbuf	*m;
+	int		af;	/* address family */
+{
+	struct ifnet		*ifp;
+	struct ip		*ip;
+	struct top_cdnr		*top;
+	struct tc_action	*tca;
+	struct cdnr_block	*cb;
+	struct cdnr_pktinfo	pktinfo;
+
+	ifp = m->m_pkthdr.rcvif;
+	if (!ALTQ_IS_CNDTNING(&ifp->if_snd))
+		/* traffic conditioner is not enabled on this interface */
+		return (1);
+
+	top = ifp->if_snd.altq_cdnr;
+
+	ip = mtod(m, struct ip *);
+#ifdef INET6
+	if (af == AF_INET6) {
+		u_int32_t flowlabel;
+
+		flowlabel = ((struct ip6_hdr *)ip)->ip6_flow;
+		pktinfo.pkt_dscp = (ntohl(flowlabel) >> 20) & DSCP_MASK;
+	} else
+#endif
+		pktinfo.pkt_dscp = ip->ip_tos & DSCP_MASK;
+	pktinfo.pkt_len = m_pktlen(m);
+
+	tca = NULL;
+
+	cb = acc_classify(&top->tc_classifier, m, af);
+	if (cb != NULL)
+		tca = &cb->cb_action;
+
+	if (tca == NULL)
+		tca = &top->tc_block.cb_action;
+
+	while (1) {
+		PKTCNTR_ADD(&top->tc_cnts[tca->tca_code], pktinfo.pkt_len);
+
+		switch (tca->tca_code) {
+		case TCACODE_PASS:
+			return (1);
+		case TCACODE_DROP:
+			m_freem(m);
+			return (0);
+		case TCACODE_RETURN:
+			return (0);
+		case TCACODE_MARK:
+#ifdef INET6
+			if (af == AF_INET6) {
+				struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
+				u_int32_t flowlabel;
+
+				flowlabel = ntohl(ip6->ip6_flow);
+				flowlabel = (tca->tca_dscp << 20) |
+					(flowlabel & ~(DSCP_MASK << 20));
+				ip6->ip6_flow = htonl(flowlabel);
+			} else
+#endif
+				ip->ip_tos = tca->tca_dscp |
+					(ip->ip_tos & DSCP_CUMASK);
+			return (1);
+		case TCACODE_NEXT:
+			cb = tca->tca_next;
+			tca = (*cb->cb_input)(cb, &pktinfo);
+			break;
+		case TCACODE_NONE:
+		default:
+			return (1);
+		}
+	}
+}
+
+static struct top_cdnr *
+tcb_lookup(ifname)
+	char *ifname;
+{
+	struct top_cdnr *top;
+	struct ifnet *ifp;
+
+	if ((ifp = ifunit(ifname)) != NULL)
+		LIST_FOREACH(top, &tcb_list, tc_next)
+			if (top->tc_ifq->altq_ifp == ifp)
+				return (top);
+	return (NULL);
+}
+
+static struct cdnr_block *
+cdnr_handle2cb(handle)
+	u_long handle;
+{
+	struct cdnr_block *cb;
+
+	cb = (struct cdnr_block *)handle;
+	if (handle != ALIGN(cb))
+		return (NULL);
+
+	if (cb == NULL || cb->cb_handle != handle)
+		return (NULL);
+	return (cb);
+}
+
+static u_long
+cdnr_cb2handle(cb)
+	struct cdnr_block *cb;
+{
+	return (cb->cb_handle);
+}
+
+static void *
+cdnr_cballoc(top, type, input_func)
+	struct top_cdnr *top;
+	int type;
+	struct tc_action *(*input_func)(struct cdnr_block *,
+					struct cdnr_pktinfo *);
+{
+	struct cdnr_block *cb;
+	int size;
+
+	switch (type) {
+	case TCETYPE_TOP:
+		size = sizeof(struct top_cdnr);
+		break;
+	case TCETYPE_ELEMENT:
+		size = sizeof(struct cdnr_block);
+		break;
+	case TCETYPE_TBMETER:
+		size = sizeof(struct tbmeter);
+		break;
+	case TCETYPE_TRTCM:
+		size = sizeof(struct trtcm);
+		break;
+	case TCETYPE_TSWTCM:
+		size = sizeof(struct tswtcm);
+		break;
+	default:
+		return (NULL);
+	}
+
+	cb = malloc(size, M_DEVBUF, M_WAITOK);
+	if (cb == NULL)
+		return (NULL);
+	bzero(cb, size);
+
+	cb->cb_len = size;
+	cb->cb_type = type;
+	cb->cb_ref = 0;
+	cb->cb_handle = (u_long)cb;
+	if (top == NULL)
+		cb->cb_top = (struct top_cdnr *)cb;
+	else
+		cb->cb_top = top;
+
+	if (input_func != NULL) {
+		/*
+		 * if this cdnr has an action function,
+		 * make tc_action to call itself.
+		 */
+		cb->cb_action.tca_code = TCACODE_NEXT;
+		cb->cb_action.tca_next = cb;
+		cb->cb_input = input_func;
+	} else
+		cb->cb_action.tca_code = TCACODE_NONE;
+
+	/* if this isn't top, register the element to the top level cdnr */
+	if (top != NULL)
+		LIST_INSERT_HEAD(&top->tc_elements, cb, cb_next);
+
+	return ((void *)cb);
+}
+
+static void
+cdnr_cbdestroy(cblock)
+	void *cblock;
+{
+	struct cdnr_block *cb = cblock;
+
+	/* delete filters belonging to this cdnr */
+	acc_discard_filters(&cb->cb_top->tc_classifier, cb, 0);
+
+	/* remove from the top level cdnr */
+	if (cb->cb_top != cblock)
+		LIST_REMOVE(cb, cb_next);
+
+	free(cb, M_DEVBUF);
+}
+
+/*
+ * conditioner common destroy routine
+ */
+static int
+generic_element_destroy(cb)
+	struct cdnr_block *cb;
+{
+	int error = 0;
+
+	switch (cb->cb_type) {
+	case TCETYPE_TOP:
+		error = top_destroy((struct top_cdnr *)cb);
+		break;
+	case TCETYPE_ELEMENT:
+		error = element_destroy(cb);
+		break;
+	case TCETYPE_TBMETER:
+		error = tbm_destroy((struct tbmeter *)cb);
+		break;
+	case TCETYPE_TRTCM:
+		error = trtcm_destroy((struct trtcm *)cb);
+		break;
+	case TCETYPE_TSWTCM:
+		error = tswtcm_destroy((struct tswtcm *)cb);
+		break;
+	default:
+		error = EINVAL;
+	}
+	return (error);
+}
+
+static int
+tca_verify_action(utca)
+	struct tc_action *utca;
+{
+	switch (utca->tca_code) {
+	case TCACODE_PASS:
+	case TCACODE_DROP:
+	case TCACODE_MARK:
+		/* these are ok */
+		break;
+
+	case TCACODE_HANDLE:
+		/* verify handle value */
+		if (cdnr_handle2cb(utca->tca_handle) == NULL)
+			return (-1);
+		break;
+
+	case TCACODE_NONE:
+	case TCACODE_RETURN:
+	case TCACODE_NEXT:
+	default:
+		/* should not be passed from a user */
+		return (-1);
+	}
+	return (0);
+}
+
+static void
+tca_import_action(ktca, utca)
+	struct tc_action *ktca, *utca;
+{
+	struct cdnr_block *cb;
+
+	*ktca = *utca;
+	if (ktca->tca_code == TCACODE_HANDLE) {
+		cb = cdnr_handle2cb(ktca->tca_handle);
+		if (cb == NULL) {
+			ktca->tca_code = TCACODE_NONE;
+			return;
+		}
+		ktca->tca_code = TCACODE_NEXT;
+		ktca->tca_next = cb;
+		cb->cb_ref++;
+	} else if (ktca->tca_code == TCACODE_MARK) {
+		ktca->tca_dscp &= DSCP_MASK;
+	}
+	return;
+}
+
+static void
+tca_invalidate_action(tca)
+	struct tc_action *tca;
+{
+	struct cdnr_block *cb;
+
+	if (tca->tca_code == TCACODE_NEXT) {
+		cb = tca->tca_next;
+		if (cb == NULL)
+			return;
+		cb->cb_ref--;
+	}
+	tca->tca_code = TCACODE_NONE;
+}
+
+/*
+ * top level traffic conditioner
+ */
+static struct top_cdnr *
+top_create(ifq)
+	struct ifaltq *ifq;
+{
+	struct top_cdnr *top;
+
+	if ((top = cdnr_cballoc(NULL, TCETYPE_TOP, NULL)) == NULL)
+		return (NULL);
+
+	top->tc_ifq = ifq;
+	/* set default action for the top level conditioner */
+	top->tc_block.cb_action.tca_code = TCACODE_PASS;
+
+	LIST_INSERT_HEAD(&tcb_list, top, tc_next);
+
+	ifq->altq_cdnr = top;
+
+	return (top);
+}
+
+static int
+top_destroy(top)
+	struct top_cdnr *top;
+{
+	struct cdnr_block *cb;
+
+	if (ALTQ_IS_CNDTNING(top->tc_ifq))
+		ALTQ_CLEAR_CNDTNING(top->tc_ifq);
+	top->tc_ifq->altq_cdnr = NULL;
+
+	/*
+	 * destroy all the conditioner elements belonging to this interface
+	 */
+	while ((cb = LIST_FIRST(&top->tc_elements)) != NULL) {
+		while (cb != NULL && cb->cb_ref > 0)
+			cb = LIST_NEXT(cb, cb_next);
+		if (cb != NULL)
+			generic_element_destroy(cb);
+	}
+
+	LIST_REMOVE(top, tc_next);
+
+	cdnr_cbdestroy(top);
+
+	/* if there is no active conditioner, remove the input hook */
+	if (altq_input != NULL) {
+		LIST_FOREACH(top, &tcb_list, tc_next)
+			if (ALTQ_IS_CNDTNING(top->tc_ifq))
+				break;
+		if (top == NULL)
+			altq_input = NULL;
+	}
+
+	return (0);
+}
+
+/*
+ * simple tc elements without input function (e.g., dropper and makers).
+ */
+static struct cdnr_block *
+element_create(top, action)
+	struct top_cdnr *top;
+	struct tc_action *action;
+{
+	struct cdnr_block *cb;
+
+	if (tca_verify_action(action) < 0)
+		return (NULL);
+
+	if ((cb = cdnr_cballoc(top, TCETYPE_ELEMENT, NULL)) == NULL)
+		return (NULL);
+
+	tca_import_action(&cb->cb_action, action);
+
+	return (cb);
+}
+
+static int
+element_destroy(cb)
+	struct cdnr_block *cb;
+{
+	if (cb->cb_ref > 0)
+		return (EBUSY);
+
+	tca_invalidate_action(&cb->cb_action);
+
+	cdnr_cbdestroy(cb);
+	return (0);
+}
+
+/*
+ * internal representation of token bucket parameters
+ *	rate: 	byte_per_unittime << 32
+ *		(((bits_per_sec) / 8) << 32) / machclk_freq
+ *	depth:	byte << 32
+ *
+ */
+#define	TB_SHIFT	32
+#define	TB_SCALE(x)	((u_int64_t)(x) << TB_SHIFT)
+#define	TB_UNSCALE(x)	((x) >> TB_SHIFT)
+
+static void
+tb_import_profile(tb, profile)
+	struct tbe *tb;
+	struct tb_profile *profile;
+{
+	tb->rate = TB_SCALE(profile->rate / 8) / machclk_freq;
+	tb->depth = TB_SCALE(profile->depth);
+	if (tb->rate > 0)
+		tb->filluptime = tb->depth / tb->rate;
+	else
+		tb->filluptime = 0xffffffffffffffffLL;
+	tb->token = tb->depth;
+	tb->last = read_machclk();
+}
+
+/*
+ * simple token bucket meter
+ */
+static struct tbmeter *
+tbm_create(top, profile, in_action, out_action)
+	struct top_cdnr *top;
+	struct tb_profile *profile;
+	struct tc_action *in_action, *out_action;
+{
+	struct tbmeter *tbm = NULL;
+
+	if (tca_verify_action(in_action) < 0
+	    || tca_verify_action(out_action) < 0)
+		return (NULL);
+
+	if ((tbm = cdnr_cballoc(top, TCETYPE_TBMETER,
+				tbm_input)) == NULL)
+		return (NULL);
+
+	tb_import_profile(&tbm->tb, profile);
+
+	tca_import_action(&tbm->in_action, in_action);
+	tca_import_action(&tbm->out_action, out_action);
+
+	return (tbm);
+}
+
+static int
+tbm_destroy(tbm)
+	struct tbmeter *tbm;
+{
+	if (tbm->cdnrblk.cb_ref > 0)
+		return (EBUSY);
+
+	tca_invalidate_action(&tbm->in_action);
+	tca_invalidate_action(&tbm->out_action);
+
+	cdnr_cbdestroy(tbm);
+	return (0);
+}
+
+static struct tc_action *
+tbm_input(cb, pktinfo)
+	struct cdnr_block *cb;
+	struct cdnr_pktinfo *pktinfo;
+{
+	struct tbmeter *tbm = (struct tbmeter *)cb;
+	u_int64_t	len;
+	u_int64_t	interval, now;
+
+	len = TB_SCALE(pktinfo->pkt_len);
+
+	if (tbm->tb.token < len) {
+		now = read_machclk();
+		interval = now - tbm->tb.last;
+		if (interval >= tbm->tb.filluptime)
+			tbm->tb.token = tbm->tb.depth;
+		else {
+			tbm->tb.token += interval * tbm->tb.rate;
+			if (tbm->tb.token > tbm->tb.depth)
+				tbm->tb.token = tbm->tb.depth;
+		}
+		tbm->tb.last = now;
+	}
+
+	if (tbm->tb.token < len) {
+		PKTCNTR_ADD(&tbm->out_cnt, pktinfo->pkt_len);
+		return (&tbm->out_action);
+	}
+
+	tbm->tb.token -= len;
+	PKTCNTR_ADD(&tbm->in_cnt, pktinfo->pkt_len);
+	return (&tbm->in_action);
+}
+
+/*
+ * two rate three color marker
+ * as described in draft-heinanen-diffserv-trtcm-01.txt
+ */
+static struct trtcm *
+trtcm_create(top, cmtd_profile, peak_profile,
+	     green_action, yellow_action, red_action, coloraware)
+	struct top_cdnr *top;
+	struct tb_profile *cmtd_profile, *peak_profile;
+	struct tc_action *green_action, *yellow_action, *red_action;
+	int	coloraware;
+{
+	struct trtcm *tcm = NULL;
+
+	if (tca_verify_action(green_action) < 0
+	    || tca_verify_action(yellow_action) < 0
+	    || tca_verify_action(red_action) < 0)
+		return (NULL);
+
+	if ((tcm = cdnr_cballoc(top, TCETYPE_TRTCM,
+				trtcm_input)) == NULL)
+		return (NULL);
+
+	tb_import_profile(&tcm->cmtd_tb, cmtd_profile);
+	tb_import_profile(&tcm->peak_tb, peak_profile);
+
+	tca_import_action(&tcm->green_action, green_action);
+	tca_import_action(&tcm->yellow_action, yellow_action);
+	tca_import_action(&tcm->red_action, red_action);
+
+	/* set dscps to use */
+	if (tcm->green_action.tca_code == TCACODE_MARK)
+		tcm->green_dscp = tcm->green_action.tca_dscp & DSCP_MASK;
+	else
+		tcm->green_dscp = DSCP_AF11;
+	if (tcm->yellow_action.tca_code == TCACODE_MARK)
+		tcm->yellow_dscp = tcm->yellow_action.tca_dscp & DSCP_MASK;
+	else
+		tcm->yellow_dscp = DSCP_AF12;
+	if (tcm->red_action.tca_code == TCACODE_MARK)
+		tcm->red_dscp = tcm->red_action.tca_dscp & DSCP_MASK;
+	else
+		tcm->red_dscp = DSCP_AF13;
+
+	tcm->coloraware = coloraware;
+
+	return (tcm);
+}
+
+static int
+trtcm_destroy(tcm)
+	struct trtcm *tcm;
+{
+	if (tcm->cdnrblk.cb_ref > 0)
+		return (EBUSY);
+
+	tca_invalidate_action(&tcm->green_action);
+	tca_invalidate_action(&tcm->yellow_action);
+	tca_invalidate_action(&tcm->red_action);
+
+	cdnr_cbdestroy(tcm);
+	return (0);
+}
+
+static struct tc_action *
+trtcm_input(cb, pktinfo)
+	struct cdnr_block *cb;
+	struct cdnr_pktinfo *pktinfo;
+{
+	struct trtcm *tcm = (struct trtcm *)cb;
+	u_int64_t	len;
+	u_int64_t	interval, now;
+	u_int8_t	color;
+
+	len = TB_SCALE(pktinfo->pkt_len);
+	if (tcm->coloraware) {
+		color = pktinfo->pkt_dscp;
+		if (color != tcm->yellow_dscp && color != tcm->red_dscp)
+			color = tcm->green_dscp;
+	} else {
+		/* if color-blind, precolor it as green */
+		color = tcm->green_dscp;
+	}
+
+	now = read_machclk();
+	if (tcm->cmtd_tb.token < len) {
+		interval = now - tcm->cmtd_tb.last;
+		if (interval >= tcm->cmtd_tb.filluptime)
+			tcm->cmtd_tb.token = tcm->cmtd_tb.depth;
+		else {
+			tcm->cmtd_tb.token += interval * tcm->cmtd_tb.rate;
+			if (tcm->cmtd_tb.token > tcm->cmtd_tb.depth)
+				tcm->cmtd_tb.token = tcm->cmtd_tb.depth;
+		}
+		tcm->cmtd_tb.last = now;
+	}
+	if (tcm->peak_tb.token < len) {
+		interval = now - tcm->peak_tb.last;
+		if (interval >= tcm->peak_tb.filluptime)
+			tcm->peak_tb.token = tcm->peak_tb.depth;
+		else {
+			tcm->peak_tb.token += interval * tcm->peak_tb.rate;
+			if (tcm->peak_tb.token > tcm->peak_tb.depth)
+				tcm->peak_tb.token = tcm->peak_tb.depth;
+		}
+		tcm->peak_tb.last = now;
+	}
+
+	if (color == tcm->red_dscp || tcm->peak_tb.token < len) {
+		pktinfo->pkt_dscp = tcm->red_dscp;
+		PKTCNTR_ADD(&tcm->red_cnt, pktinfo->pkt_len);
+		return (&tcm->red_action);
+	}
+
+	if (color == tcm->yellow_dscp || tcm->cmtd_tb.token < len) {
+		pktinfo->pkt_dscp = tcm->yellow_dscp;
+		tcm->peak_tb.token -= len;
+		PKTCNTR_ADD(&tcm->yellow_cnt, pktinfo->pkt_len);
+		return (&tcm->yellow_action);
+	}
+
+	pktinfo->pkt_dscp = tcm->green_dscp;
+	tcm->cmtd_tb.token -= len;
+	tcm->peak_tb.token -= len;
+	PKTCNTR_ADD(&tcm->green_cnt, pktinfo->pkt_len);
+	return (&tcm->green_action);
+}
+
+/*
+ * time sliding window three color marker
+ * as described in draft-fang-diffserv-tc-tswtcm-00.txt
+ */
+static struct tswtcm *
+tswtcm_create(top, cmtd_rate, peak_rate, avg_interval,
+	      green_action, yellow_action, red_action)
+	struct top_cdnr *top;
+	u_int32_t	cmtd_rate, peak_rate, avg_interval;
+	struct tc_action *green_action, *yellow_action, *red_action;
+{
+	struct tswtcm *tsw;
+
+	if (tca_verify_action(green_action) < 0
+	    || tca_verify_action(yellow_action) < 0
+	    || tca_verify_action(red_action) < 0)
+		return (NULL);
+
+	if ((tsw = cdnr_cballoc(top, TCETYPE_TSWTCM,
+				tswtcm_input)) == NULL)
+		return (NULL);
+
+	tca_import_action(&tsw->green_action, green_action);
+	tca_import_action(&tsw->yellow_action, yellow_action);
+	tca_import_action(&tsw->red_action, red_action);
+
+	/* set dscps to use */
+	if (tsw->green_action.tca_code == TCACODE_MARK)
+		tsw->green_dscp = tsw->green_action.tca_dscp & DSCP_MASK;
+	else
+		tsw->green_dscp = DSCP_AF11;
+	if (tsw->yellow_action.tca_code == TCACODE_MARK)
+		tsw->yellow_dscp = tsw->yellow_action.tca_dscp & DSCP_MASK;
+	else
+		tsw->yellow_dscp = DSCP_AF12;
+	if (tsw->red_action.tca_code == TCACODE_MARK)
+		tsw->red_dscp = tsw->red_action.tca_dscp & DSCP_MASK;
+	else
+		tsw->red_dscp = DSCP_AF13;
+
+	/* convert rates from bits/sec to bytes/sec */
+	tsw->cmtd_rate = cmtd_rate / 8;
+	tsw->peak_rate = peak_rate / 8;
+	tsw->avg_rate = 0;
+
+	/* timewin is converted from msec to machine clock unit */
+	tsw->timewin = (u_int64_t)machclk_freq * avg_interval / 1000;
+
+	return (tsw);
+}
+
+static int
+tswtcm_destroy(tsw)
+	struct tswtcm *tsw;
+{
+	if (tsw->cdnrblk.cb_ref > 0)
+		return (EBUSY);
+
+	tca_invalidate_action(&tsw->green_action);
+	tca_invalidate_action(&tsw->yellow_action);
+	tca_invalidate_action(&tsw->red_action);
+
+	cdnr_cbdestroy(tsw);
+	return (0);
+}
+
+static struct tc_action *
+tswtcm_input(cb, pktinfo)
+	struct cdnr_block *cb;
+	struct cdnr_pktinfo *pktinfo;
+{
+	struct tswtcm	*tsw = (struct tswtcm *)cb;
+	int		len;
+	u_int32_t	avg_rate;
+	u_int64_t	interval, now, tmp;
+
+	/*
+	 * rate estimator
+	 */
+	len = pktinfo->pkt_len;
+	now = read_machclk();
+
+	interval = now - tsw->t_front;
+	/*
+	 * calculate average rate:
+	 *	avg = (avg * timewin + pkt_len)/(timewin + interval)
+	 * pkt_len needs to be multiplied by machclk_freq in order to
+	 * get (bytes/sec).
+	 * note: when avg_rate (bytes/sec) and timewin (machclk unit) are
+	 * less than 32 bits, the following 64-bit operation has enough
+	 * precision.
+	 */
+	tmp = ((u_int64_t)tsw->avg_rate * tsw->timewin
+	       + (u_int64_t)len * machclk_freq) / (tsw->timewin + interval);
+	tsw->avg_rate = avg_rate = (u_int32_t)tmp;
+	tsw->t_front = now;
+
+	/*
+	 * marker
+	 */
+	if (avg_rate > tsw->cmtd_rate) {
+		u_int32_t randval = arc4random() % avg_rate;
+
+		if (avg_rate > tsw->peak_rate) {
+			if (randval < avg_rate - tsw->peak_rate) {
+				/* mark red */
+				pktinfo->pkt_dscp = tsw->red_dscp;
+				PKTCNTR_ADD(&tsw->red_cnt, len);
+				return (&tsw->red_action);
+			} else if (randval < avg_rate - tsw->cmtd_rate)
+				goto mark_yellow;
+		} else {
+			/* peak_rate >= avg_rate > cmtd_rate */
+			if (randval < avg_rate - tsw->cmtd_rate) {
+			mark_yellow:
+				pktinfo->pkt_dscp = tsw->yellow_dscp;
+				PKTCNTR_ADD(&tsw->yellow_cnt, len);
+				return (&tsw->yellow_action);
+			}
+		}
+	}
+
+	/* mark green */
+	pktinfo->pkt_dscp = tsw->green_dscp;
+	PKTCNTR_ADD(&tsw->green_cnt, len);
+	return (&tsw->green_action);
+}
+
+/*
+ * ioctl requests
+ */
+static int
+cdnrcmd_if_attach(ifname)
+	char *ifname;
+{
+	struct ifnet *ifp;
+	struct top_cdnr *top;
+
+	if ((ifp = ifunit(ifname)) == NULL)
+		return (EBADF);
+
+	if (ifp->if_snd.altq_cdnr != NULL)
+		return (EBUSY);
+
+	if ((top = top_create(&ifp->if_snd)) == NULL)
+		return (ENOMEM);
+	return (0);
+}
+
+static int
+cdnrcmd_if_detach(ifname)
+	char *ifname;
+{
+	struct top_cdnr *top;
+
+	if ((top = tcb_lookup(ifname)) == NULL)
+		return (EBADF);
+
+	return top_destroy(top);
+}
+
+static int
+cdnrcmd_add_element(ap)
+	struct cdnr_add_element *ap;
+{
+	struct top_cdnr *top;
+	struct cdnr_block *cb;
+
+	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+		return (EBADF);
+
+	cb = element_create(top, &ap->action);
+	if (cb == NULL)
+		return (EINVAL);
+	/* return a class handle to the user */
+	ap->cdnr_handle = cdnr_cb2handle(cb);
+	return (0);
+}
+
+static int
+cdnrcmd_delete_element(ap)
+	struct cdnr_delete_element *ap;
+{
+	struct top_cdnr *top;
+	struct cdnr_block *cb;
+
+	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+		return (EBADF);
+
+	if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+		return (EINVAL);
+
+	if (cb->cb_type != TCETYPE_ELEMENT)
+		return generic_element_destroy(cb);
+
+	return element_destroy(cb);
+}
+
+static int
+cdnrcmd_add_filter(ap)
+	struct cdnr_add_filter *ap;
+{
+	struct top_cdnr *top;
+	struct cdnr_block *cb;
+
+	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+		return (EBADF);
+
+	if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+		return (EINVAL);
+
+	return acc_add_filter(&top->tc_classifier, &ap->filter,
+			      cb, &ap->filter_handle);
+}
+
+static int
+cdnrcmd_delete_filter(ap)
+	struct cdnr_delete_filter *ap;
+{
+	struct top_cdnr *top;
+
+	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+		return (EBADF);
+
+	return acc_delete_filter(&top->tc_classifier, ap->filter_handle);
+}
+
+static int
+cdnrcmd_add_tbm(ap)
+	struct cdnr_add_tbmeter *ap;
+{
+	struct top_cdnr *top;
+	struct tbmeter *tbm;
+
+	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+		return (EBADF);
+
+	tbm = tbm_create(top, &ap->profile, &ap->in_action, &ap->out_action);
+	if (tbm == NULL)
+		return (EINVAL);
+	/* return a class handle to the user */
+	ap->cdnr_handle = cdnr_cb2handle(&tbm->cdnrblk);
+	return (0);
+}
+
+static int
+cdnrcmd_modify_tbm(ap)
+	struct cdnr_modify_tbmeter *ap;
+{
+	struct tbmeter *tbm;
+
+	if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+		return (EINVAL);
+
+	tb_import_profile(&tbm->tb, &ap->profile);
+
+	return (0);
+}
+
+static int
+cdnrcmd_tbm_stats(ap)
+	struct cdnr_tbmeter_stats *ap;
+{
+	struct tbmeter *tbm;
+
+	if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+		return (EINVAL);
+
+	ap->in_cnt = tbm->in_cnt;
+	ap->out_cnt = tbm->out_cnt;
+
+	return (0);
+}
+
+static int
+cdnrcmd_add_trtcm(ap)
+	struct cdnr_add_trtcm *ap;
+{
+	struct top_cdnr *top;
+	struct trtcm *tcm;
+
+	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+		return (EBADF);
+
+	tcm = trtcm_create(top, &ap->cmtd_profile, &ap->peak_profile,
+			   &ap->green_action, &ap->yellow_action,
+			   &ap->red_action, ap->coloraware);
+	if (tcm == NULL)
+		return (EINVAL);
+
+	/* return a class handle to the user */
+	ap->cdnr_handle = cdnr_cb2handle(&tcm->cdnrblk);
+	return (0);
+}
+
+static int
+cdnrcmd_modify_trtcm(ap)
+	struct cdnr_modify_trtcm *ap;
+{
+	struct trtcm *tcm;
+
+	if ((tcm = (struct trtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+		return (EINVAL);
+
+	tb_import_profile(&tcm->cmtd_tb, &ap->cmtd_profile);
+	tb_import_profile(&tcm->peak_tb, &ap->peak_profile);
+
+	return (0);
+}
+
+static int
+cdnrcmd_tcm_stats(ap)
+	struct cdnr_tcm_stats *ap;
+{
+	struct cdnr_block *cb;
+
+	if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+		return (EINVAL);
+
+	if (cb->cb_type == TCETYPE_TRTCM) {
+	    struct trtcm *tcm = (struct trtcm *)cb;
+
+	    ap->green_cnt = tcm->green_cnt;
+	    ap->yellow_cnt = tcm->yellow_cnt;
+	    ap->red_cnt = tcm->red_cnt;
+	} else if (cb->cb_type == TCETYPE_TSWTCM) {
+	    struct tswtcm *tsw = (struct tswtcm *)cb;
+
+	    ap->green_cnt = tsw->green_cnt;
+	    ap->yellow_cnt = tsw->yellow_cnt;
+	    ap->red_cnt = tsw->red_cnt;
+	} else
+	    return (EINVAL);
+
+	return (0);
+}
+
+static int
+cdnrcmd_add_tswtcm(ap)
+	struct cdnr_add_tswtcm *ap;
+{
+	struct top_cdnr *top;
+	struct tswtcm *tsw;
+
+	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+		return (EBADF);
+
+	if (ap->cmtd_rate > ap->peak_rate)
+		return (EINVAL);
+
+	tsw = tswtcm_create(top, ap->cmtd_rate, ap->peak_rate,
+			    ap->avg_interval, &ap->green_action,
+			    &ap->yellow_action, &ap->red_action);
+	if (tsw == NULL)
+	    return (EINVAL);
+
+	/* return a class handle to the user */
+	ap->cdnr_handle = cdnr_cb2handle(&tsw->cdnrblk);
+	return (0);
+}
+
+static int
+cdnrcmd_modify_tswtcm(ap)
+	struct cdnr_modify_tswtcm *ap;
+{
+	struct tswtcm *tsw;
+
+	if ((tsw = (struct tswtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+		return (EINVAL);
+
+	if (ap->cmtd_rate > ap->peak_rate)
+		return (EINVAL);
+
+	/* convert rates from bits/sec to bytes/sec */
+	tsw->cmtd_rate = ap->cmtd_rate / 8;
+	tsw->peak_rate = ap->peak_rate / 8;
+	tsw->avg_rate = 0;
+
+	/* timewin is converted from msec to machine clock unit */
+	tsw->timewin = (u_int64_t)machclk_freq * ap->avg_interval / 1000;
+
+	return (0);
+}
+
+static int
+cdnrcmd_get_stats(ap)
+	struct cdnr_get_stats *ap;
+{
+	struct top_cdnr *top;
+	struct cdnr_block *cb;
+	struct tbmeter *tbm;
+	struct trtcm *tcm;
+	struct tswtcm *tsw;
+	struct tce_stats tce, *usp;
+	int error, n, nskip, nelements;
+
+	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+		return (EBADF);
+
+	/* copy action stats */
+	bcopy(top->tc_cnts, ap->cnts, sizeof(ap->cnts));
+
+	/* stats for each element */
+	nelements = ap->nelements;
+	usp = ap->tce_stats;
+	if (nelements <= 0 || usp == NULL)
+		return (0);
+
+	nskip = ap->nskip;
+	n = 0;
+	LIST_FOREACH(cb, &top->tc_elements, cb_next) {
+		if (nskip > 0) {
+			nskip--;
+			continue;
+		}
+
+		bzero(&tce, sizeof(tce));
+		tce.tce_handle = cb->cb_handle;
+		tce.tce_type = cb->cb_type;
+		switch (cb->cb_type) {
+		case TCETYPE_TBMETER:
+			tbm = (struct tbmeter *)cb;
+			tce.tce_cnts[0] = tbm->in_cnt;
+			tce.tce_cnts[1] = tbm->out_cnt;
+			break;
+		case TCETYPE_TRTCM:
+			tcm = (struct trtcm *)cb;
+			tce.tce_cnts[0] = tcm->green_cnt;
+			tce.tce_cnts[1] = tcm->yellow_cnt;
+			tce.tce_cnts[2] = tcm->red_cnt;
+			break;
+		case TCETYPE_TSWTCM:
+			tsw = (struct tswtcm *)cb;
+			tce.tce_cnts[0] = tsw->green_cnt;
+			tce.tce_cnts[1] = tsw->yellow_cnt;
+			tce.tce_cnts[2] = tsw->red_cnt;
+			break;
+		default:
+			continue;
+		}
+
+		if ((error = copyout((caddr_t)&tce, (caddr_t)usp++,
+				     sizeof(tce))) != 0)
+			return (error);
+
+		if (++n == nelements)
+			break;
+	}
+	ap->nelements = n;
+
+	return (0);
+}
+
+/*
+ * conditioner device interface
+ */
+int
+cdnropen(dev, flag, fmt, p)
+	dev_t dev;
+	int flag, fmt;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	if (machclk_freq == 0)
+		init_machclk();
+
+	if (machclk_freq == 0) {
+		printf("cdnr: no cpu clock available!\n");
+		return (ENXIO);
+	}
+
+	/* everything will be done when the queueing scheme is attached. */
+	return 0;
+}
+
+int
+cdnrclose(dev, flag, fmt, p)
+	dev_t dev;
+	int flag, fmt;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	struct top_cdnr *top;
+	int err, error = 0;
+
+	while ((top = LIST_FIRST(&tcb_list)) != NULL) {
+		/* destroy all */
+		err = top_destroy(top);
+		if (err != 0 && error == 0)
+			error = err;
+	}
+	altq_input = NULL;
+
+	return (error);
+}
+
+int
+cdnrioctl(dev, cmd, addr, flag, p)
+	dev_t dev;
+	ioctlcmd_t cmd;
+	caddr_t addr;
+	int flag;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	struct top_cdnr *top;
+	struct cdnr_interface *ifacep;
+	int	s, error = 0;
+
+	/* check super-user privilege */
+	switch (cmd) {
+	case CDNR_GETSTATS:
+		break;
+	default:
+#if (__FreeBSD_version > 700000)
+		if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
+#elsif (__FreeBSD_version > 400000)
+		if ((error = suser(p)) != 0)
+#else
+		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+#endif
+			return (error);
+		break;
+	}
+
+#ifdef __NetBSD__
+	s = splnet();
+#else
+	s = splimp();
+#endif
+	switch (cmd) {
+
+	case CDNR_IF_ATTACH:
+		ifacep = (struct cdnr_interface *)addr;
+		error = cdnrcmd_if_attach(ifacep->cdnr_ifname);
+		break;
+
+	case CDNR_IF_DETACH:
+		ifacep = (struct cdnr_interface *)addr;
+		error = cdnrcmd_if_detach(ifacep->cdnr_ifname);
+		break;
+
+	case CDNR_ENABLE:
+	case CDNR_DISABLE:
+		ifacep = (struct cdnr_interface *)addr;
+		if ((top = tcb_lookup(ifacep->cdnr_ifname)) == NULL) {
+			error = EBADF;
+			break;
+		}
+
+		switch (cmd) {
+
+		case CDNR_ENABLE:
+			ALTQ_SET_CNDTNING(top->tc_ifq);
+			if (altq_input == NULL)
+				altq_input = altq_cdnr_input;
+			break;
+
+		case CDNR_DISABLE:
+			ALTQ_CLEAR_CNDTNING(top->tc_ifq);
+			LIST_FOREACH(top, &tcb_list, tc_next)
+				if (ALTQ_IS_CNDTNING(top->tc_ifq))
+					break;
+			if (top == NULL)
+				altq_input = NULL;
+			break;
+		}
+		break;
+
+	case CDNR_ADD_ELEM:
+		error = cdnrcmd_add_element((struct cdnr_add_element *)addr);
+		break;
+
+	case CDNR_DEL_ELEM:
+		error = cdnrcmd_delete_element((struct cdnr_delete_element *)addr);
+		break;
+
+	case CDNR_ADD_TBM:
+		error = cdnrcmd_add_tbm((struct cdnr_add_tbmeter *)addr);
+		break;
+
+	case CDNR_MOD_TBM:
+		error = cdnrcmd_modify_tbm((struct cdnr_modify_tbmeter *)addr);
+		break;
+
+	case CDNR_TBM_STATS:
+		error = cdnrcmd_tbm_stats((struct cdnr_tbmeter_stats *)addr);
+		break;
+
+	case CDNR_ADD_TCM:
+		error = cdnrcmd_add_trtcm((struct cdnr_add_trtcm *)addr);
+		break;
+
+	case CDNR_MOD_TCM:
+		error = cdnrcmd_modify_trtcm((struct cdnr_modify_trtcm *)addr);
+		break;
+
+	case CDNR_TCM_STATS:
+		error = cdnrcmd_tcm_stats((struct cdnr_tcm_stats *)addr);
+		break;
+
+	case CDNR_ADD_FILTER:
+		error = cdnrcmd_add_filter((struct cdnr_add_filter *)addr);
+		break;
+
+	case CDNR_DEL_FILTER:
+		error = cdnrcmd_delete_filter((struct cdnr_delete_filter *)addr);
+		break;
+
+	case CDNR_GETSTATS:
+		error = cdnrcmd_get_stats((struct cdnr_get_stats *)addr);
+		break;
+
+	case CDNR_ADD_TSW:
+		error = cdnrcmd_add_tswtcm((struct cdnr_add_tswtcm *)addr);
+		break;
+
+	case CDNR_MOD_TSW:
+		error = cdnrcmd_modify_tswtcm((struct cdnr_modify_tswtcm *)addr);
+		break;
+
+	default:
+		error = EINVAL;
+		break;
+	}
+	splx(s);
+
+	return error;
+}
+
+#ifdef KLD_MODULE
+
+static struct altqsw cdnr_sw =
+	{"cdnr", cdnropen, cdnrclose, cdnrioctl};
+
+ALTQ_MODULE(altq_cdnr, ALTQT_CDNR, &cdnr_sw);
+
+#endif /* KLD_MODULE */
+
+#endif /* ALTQ3_COMPAT */
+#endif /* ALTQ_CDNR */
diff --git a/freebsd/sys/contrib/altq/altq/altq_cdnr.h b/freebsd/sys/contrib/altq/altq/altq_cdnr.h
new file mode 100644
index 00000000..b765833f
--- /dev/null
+++ b/freebsd/sys/contrib/altq/altq/altq_cdnr.h
@@ -0,0 +1,335 @@
+/*	$KAME: altq_cdnr.h,v 1.9 2003/07/10 12:07:48 kjc Exp $	*/
+
+/*
+ * Copyright (C) 1999-2002
+ *	Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ALTQ_ALTQ_CDNR_HH_
+#define	_ALTQ_ALTQ_CDNR_HH_
+
+#include <freebsd/altq/altq.h>
+
+/*
+ * traffic conditioner element types
+ */
+#define	TCETYPE_NONE		0
+#define	TCETYPE_TOP		1	/* top level conditioner */
+#define	TCETYPE_ELEMENT		2	/* a simple tc element */
+#define	TCETYPE_TBMETER		3	/* token bucket meter */
+#define	TCETYPE_TRTCM		4	/* (two-rate) three color marker */
+#define	TCETYPE_TSWTCM		5	/* time sliding window 3-color maker */
+
+/*
+ * traffic conditioner action
+ */
+struct cdnr_block;
+
+struct tc_action {
+	int	tca_code;	/* e.g., TCACODE_PASS */
+	/* tca_code dependent variable */
+	union {
+		u_long		un_value;	/* template */
+		u_int8_t	un_dscp;	/* diffserv code point */
+		u_long		un_handle;	/* tc action handle */
+		struct cdnr_block *un_next;	/* next tc element block */
+	} tca_un;
+};
+#define	tca_value	tca_un.un_value
+#define	tca_dscp	tca_un.un_dscp
+#define	tca_handle	tca_un.un_handle
+#define	tca_next	tca_un.un_next
+
+#define	TCACODE_NONE	0	/* action is not set */
+#define	TCACODE_PASS	1 	/* pass this packet */
+#define	TCACODE_DROP	2	/* discard this packet */
+#define	TCACODE_RETURN	3	/* do not process this packet */
+#define	TCACODE_MARK	4	/* mark dscp */
+#define	TCACODE_HANDLE	5	/* take action specified by handle */
+#define	TCACODE_NEXT	6	/* take action in the next tc element */
+#define	TCACODE_MAX	6
+
+#define	CDNR_NULL_HANDLE	0
+
+struct cdnr_interface {
+	char	cdnr_ifname[IFNAMSIZ];  /* interface name (e.g., fxp0) */
+};
+
+/* simple element operations */
+struct cdnr_add_element {
+	struct cdnr_interface	iface;
+	struct tc_action	action;
+
+	u_long			cdnr_handle;	/* return value */
+};
+
+struct cdnr_delete_element {
+	struct cdnr_interface	iface;
+	u_long			cdnr_handle;
+};
+
+/* token-bucket meter operations */
+struct cdnr_add_tbmeter {
+	struct cdnr_interface	iface;
+	struct tb_profile	profile;
+	struct tc_action	in_action;
+	struct tc_action	out_action;
+
+	u_long			cdnr_handle;	/* return value */
+};
+
+struct cdnr_modify_tbmeter {
+	struct cdnr_interface	iface;
+	u_long			cdnr_handle;
+	struct tb_profile	profile;
+};
+
+struct cdnr_tbmeter_stats {
+	struct cdnr_interface	iface;
+	u_long			cdnr_handle;
+	struct pktcntr		in_cnt;
+	struct pktcntr		out_cnt;
+};
+
+/* two-rate three-color marker operations */
+struct cdnr_add_trtcm {
+	struct cdnr_interface	iface;
+	struct tb_profile	cmtd_profile;	/* profile for committed tb */
+	struct tb_profile	peak_profile;	/* profile for peak tb */
+	struct tc_action	green_action;	/* action for green packets */
+	struct tc_action	yellow_action;	/* action for yellow packets */
+	struct tc_action	red_action;	/* action for red packets */
+	int			coloraware;	/* color-aware/color-blind */
+
+	u_long			cdnr_handle;	/* return value */
+};
+
+struct cdnr_modify_trtcm {
+	struct cdnr_interface	iface;
+	u_long			cdnr_handle;
+	struct tb_profile	cmtd_profile;	/* profile for committed tb */
+	struct tb_profile	peak_profile;	/* profile for peak tb */
+	int			coloraware;	/* color-aware/color-blind */
+};
+
+struct cdnr_tcm_stats {
+	struct cdnr_interface	iface;
+	u_long			cdnr_handle;
+	struct pktcntr		green_cnt;
+	struct pktcntr		yellow_cnt;
+	struct pktcntr		red_cnt;
+};
+
+/* time sliding window three-color marker operations */
+struct cdnr_add_tswtcm {
+	struct cdnr_interface	iface;
+	u_int32_t		cmtd_rate;	/* committed rate (bits/sec) */
+	u_int32_t		peak_rate;	/* peak rate (bits/sec) */
+	u_int32_t		avg_interval;	/* averaging interval (msec) */
+	struct tc_action	green_action;	/* action for green packets */
+	struct tc_action	yellow_action;	/* action for yellow packets */
+	struct tc_action	red_action;	/* action for red packets */
+
+	u_long			cdnr_handle;	/* return value */
+};
+
+struct cdnr_modify_tswtcm {
+	struct cdnr_interface	iface;
+	u_long			cdnr_handle;
+	u_int32_t		cmtd_rate;	/* committed rate (bits/sec) */
+	u_int32_t		peak_rate;	/* peak rate (bits/sec) */
+	u_int32_t		avg_interval;	/* averaging interval (msec) */
+};
+
+struct cdnr_add_filter {
+	struct cdnr_interface	iface;
+	u_long			cdnr_handle;
+#ifdef ALTQ3_CLFIER_COMPAT
+	struct flow_filter	filter;
+#endif
+	u_long			filter_handle;	/* return value */
+};
+
+struct cdnr_delete_filter {
+	struct cdnr_interface	iface;
+	u_long			filter_handle;
+};
+
+struct tce_stats {
+	u_long			tce_handle;	/* tc element handle */
+	int			tce_type;	/* e.g., TCETYPE_ELEMENT */
+	struct pktcntr		tce_cnts[3];	/* tcm returns 3 counters */
+};
+
+struct cdnr_get_stats {
+	struct cdnr_interface	iface;
+	struct pktcntr		cnts[TCACODE_MAX+1];
+
+	/* element stats */
+	int			nskip;		/* skip # of elements */
+	int			nelements;	/* # of element stats (WR) */
+	struct tce_stats	*tce_stats;	/* pointer to stats array */
+};
+
+#define	CDNR_IF_ATTACH		_IOW('Q', 1, struct cdnr_interface)
+#define	CDNR_IF_DETACH		_IOW('Q', 2, struct cdnr_interface)
+#define	CDNR_ENABLE		_IOW('Q', 3, struct cdnr_interface)
+#define	CDNR_DISABLE		_IOW('Q', 4, struct cdnr_interface)
+#define	CDNR_ADD_FILTER		_IOWR('Q', 10, struct cdnr_add_filter)
+#define	CDNR_DEL_FILTER		_IOW('Q', 11, struct cdnr_delete_filter)
+#define	CDNR_GETSTATS		_IOWR('Q', 12, struct cdnr_get_stats)
+#define	CDNR_ADD_ELEM		_IOWR('Q', 30, struct cdnr_add_element)
+#define	CDNR_DEL_ELEM		_IOW('Q', 31, struct cdnr_delete_element)
+#define	CDNR_ADD_TBM		_IOWR('Q', 32, struct cdnr_add_tbmeter)
+#define	CDNR_MOD_TBM		_IOW('Q', 33, struct cdnr_modify_tbmeter)
+#define	CDNR_TBM_STATS		_IOWR('Q', 34, struct cdnr_tbmeter_stats)
+#define	CDNR_ADD_TCM		_IOWR('Q', 35, struct cdnr_add_trtcm)
+#define	CDNR_MOD_TCM		_IOWR('Q', 36, struct cdnr_modify_trtcm)
+#define	CDNR_TCM_STATS		_IOWR('Q', 37, struct cdnr_tcm_stats)
+#define	CDNR_ADD_TSW		_IOWR('Q', 38, struct cdnr_add_tswtcm)
+#define	CDNR_MOD_TSW		_IOWR('Q', 39, struct cdnr_modify_tswtcm)
+
+#ifndef DSCP_EF
+/* diffserve code points */
+#define	DSCP_MASK	0xfc
+#define	DSCP_CUMASK	0x03
+#define	DSCP_EF		0xb8
+#define	DSCP_AF11	0x28
+#define	DSCP_AF12	0x30
+#define	DSCP_AF13	0x38
+#define	DSCP_AF21	0x48
+#define	DSCP_AF22	0x50
+#define	DSCP_AF23	0x58
+#define	DSCP_AF31	0x68
+#define	DSCP_AF32	0x70
+#define	DSCP_AF33	0x78
+#define	DSCP_AF41	0x88
+#define	DSCP_AF42	0x90
+#define	DSCP_AF43	0x98
+#define	AF_CLASSMASK		0xe0
+#define	AF_DROPPRECMASK		0x18
+#endif
+
+#ifdef _KERNEL
+
+/*
+ * packet information passed to the input function of tc elements
+ */
+struct cdnr_pktinfo {
+	int		pkt_len;	/* packet length */
+	u_int8_t	pkt_dscp;	/* diffserv code point */
+};
+
+/*
+ * traffic conditioner control block common to all types of tc elements
+ */
+struct cdnr_block {
+	LIST_ENTRY(cdnr_block)	cb_next;
+	int		cb_len;		/* size of this tc element */
+	int		cb_type;	/* cdnr block type */
+	int		cb_ref;		/* reference count of this element */
+	u_long		cb_handle;	/* handle of this tc element */
+	struct top_cdnr *cb_top;	/* back pointer to top */
+	struct tc_action cb_action;	/* top level action for this tcb */
+	struct tc_action *(*cb_input)(struct cdnr_block *,
+				      struct cdnr_pktinfo *);
+};
+
+/*
+ * top level traffic conditioner structure for an interface
+ */
+struct top_cdnr {
+	struct cdnr_block	tc_block;
+
+	LIST_ENTRY(top_cdnr)	tc_next;
+	struct ifaltq		*tc_ifq;
+
+	LIST_HEAD(, cdnr_block) tc_elements;
+#ifdef ALTQ3_CLFIER_COMPAT
+	struct acc_classifier	tc_classifier;
+#endif
+	struct pktcntr		tc_cnts[TCACODE_MAX+1];
+};
+
+/* token bucket element */
+struct tbe {
+	u_int64_t	rate;
+	u_int64_t	depth;
+
+	u_int64_t	token;
+	u_int64_t	filluptime;
+	u_int64_t	last;
+};
+
+/* token bucket meter structure */
+struct tbmeter {
+	struct cdnr_block	cdnrblk;	/* conditioner block */
+	struct tbe		tb;		/* token bucket */
+	struct tc_action	in_action;	/* actions for IN/OUT */
+	struct tc_action	out_action;	/* actions for IN/OUT */
+	struct pktcntr		in_cnt;		/* statistics for IN/OUT */
+	struct pktcntr		out_cnt;	/* statistics for IN/OUT */
+};
+
+/* two-rate three-color marker structure */
+struct trtcm {
+	struct cdnr_block	cdnrblk;	/* conditioner block */
+	struct tbe		cmtd_tb;	/* committed tb profile */
+	struct tbe		peak_tb;	/* peak tb profile */
+	struct tc_action	green_action;
+	struct tc_action	yellow_action;
+	struct tc_action	red_action;
+	int			coloraware;
+	u_int8_t		green_dscp;
+	u_int8_t		yellow_dscp;
+	u_int8_t		red_dscp;
+	struct pktcntr		green_cnt;
+	struct pktcntr		yellow_cnt;
+	struct pktcntr		red_cnt;
+};
+
+/* time sliding window three-color marker structure */
+struct tswtcm {
+	struct cdnr_block	cdnrblk;	/* conditioner block */
+
+	u_int32_t		avg_rate;	/* average rate (bytes/sec) */
+	u_int64_t		t_front;	/* timestamp of last update */
+
+	u_int64_t		timewin;	/* average interval */
+	u_int32_t		cmtd_rate;	/* committed target rate */
+	u_int32_t		peak_rate;	/* peak target rate */
+	struct tc_action	green_action;
+	struct tc_action	yellow_action;
+	struct tc_action	red_action;
+	u_int8_t		green_dscp;
+	u_int8_t		yellow_dscp;
+	u_int8_t		red_dscp;
+	struct pktcntr		green_cnt;
+	struct pktcntr		yellow_cnt;
+	struct pktcntr		red_cnt;
+};
+
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_ALTQ_CDNR_HH_ */
diff --git a/freebsd/sys/contrib/altq/altq/altq_classq.h b/freebsd/sys/contrib/altq/altq/altq_classq.h
new file mode 100644
index 00000000..c3cfea37
--- /dev/null
+++ b/freebsd/sys/contrib/altq/altq/altq_classq.h
@@ -0,0 +1,206 @@
+/*	$KAME: altq_classq.h,v 1.6 2003/01/07 07:33:38 kjc Exp $	*/
+
+/*
+ * Copyright (c) 1991-1997 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the Network Research
+ *	Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ *    to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * class queue definitions extracted from rm_class.h.
+ */
+#ifndef _ALTQ_ALTQ_CLASSQ_HH_
+#define	_ALTQ_ALTQ_CLASSQ_HH_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Packet Queue types: RED or DROPHEAD.
+ */
+#define	Q_DROPHEAD	0x00
+#define	Q_RED		0x01
+#define	Q_RIO		0x02
+#define	Q_DROPTAIL	0x03
+
+#ifdef _KERNEL
+
+/*
+ * Packet Queue structures and macros to manipulate them.
+ */
+struct _class_queue_ {
+	struct mbuf	*tail_;	/* Tail of packet queue */
+	int	qlen_;		/* Queue length (in number of packets) */
+	int	qlim_;		/* Queue limit (in number of packets*) */
+	int	qtype_;		/* Queue type */
+};
+
+typedef struct _class_queue_	class_queue_t;
+
+#define	qtype(q)	(q)->qtype_		/* Get queue type */
+#define	qlimit(q)	(q)->qlim_		/* Max packets to be queued */
+#define	qlen(q)		(q)->qlen_		/* Current queue length. */
+#define	qtail(q)	(q)->tail_		/* Tail of the queue */
+#define	qhead(q)	((q)->tail_ ? (q)->tail_->m_nextpkt : NULL)
+
+#define	qempty(q)	((q)->qlen_ == 0)	/* Is the queue empty?? */
+#define	q_is_red(q)	((q)->qtype_ == Q_RED)	/* Is the queue a red queue */
+#define	q_is_rio(q)	((q)->qtype_ == Q_RIO)	/* Is the queue a rio queue */
+#define	q_is_red_or_rio(q)	((q)->qtype_ == Q_RED || (q)->qtype_ == Q_RIO)
+
+#if !defined(__GNUC__) || defined(ALTQ_DEBUG)
+
+extern void		_addq(class_queue_t *, struct mbuf *);
+extern struct mbuf	*_getq(class_queue_t *);
+extern struct mbuf	*_getq_tail(class_queue_t *);
+extern struct mbuf	*_getq_random(class_queue_t *);
+extern void		_removeq(class_queue_t *, struct mbuf *);
+extern void		_flushq(class_queue_t *);
+
+#else /* __GNUC__ && !ALTQ_DEBUG */
+/*
+ * inlined versions
+ */
+static __inline void
+_addq(class_queue_t *q, struct mbuf *m)
+{
+        struct mbuf *m0;
+
+	if ((m0 = qtail(q)) != NULL)
+		m->m_nextpkt = m0->m_nextpkt;
+	else
+		m0 = m;
+	m0->m_nextpkt = m;
+	qtail(q) = m;
+	qlen(q)++;
+}
+
+static __inline struct mbuf *
+_getq(class_queue_t *q)
+{
+	struct mbuf  *m, *m0;
+
+	if ((m = qtail(q)) == NULL)
+		return (NULL);
+	if ((m0 = m->m_nextpkt) != m)
+		m->m_nextpkt = m0->m_nextpkt;
+	else
+		qtail(q) = NULL;
+	qlen(q)--;
+	m0->m_nextpkt = NULL;
+	return (m0);
+}
+
+/* drop a packet at the tail of the queue */
+static __inline struct mbuf *
+_getq_tail(class_queue_t *q)
+{
+	struct mbuf *m, *m0, *prev;
+
+	if ((m = m0 = qtail(q)) == NULL)
+		return NULL;
+	do {
+		prev = m0;
+		m0 = m0->m_nextpkt;
+	} while (m0 != m);
+	prev->m_nextpkt = m->m_nextpkt;
+	if (prev == m)
+		qtail(q) = NULL;
+	else
+		qtail(q) = prev;
+	qlen(q)--;
+	m->m_nextpkt = NULL;
+	return (m);
+}
+
+/* randomly select a packet in the queue */
+static __inline struct mbuf *
+_getq_random(class_queue_t *q)
+{
+	struct mbuf *m;
+	int i, n;
+
+	if ((m = qtail(q)) == NULL)
+		return NULL;
+	if (m->m_nextpkt == m)
+		qtail(q) = NULL;
+	else {
+		struct mbuf *prev = NULL;
+
+		n = random() % qlen(q) + 1;
+		for (i = 0; i < n; i++) {
+			prev = m;
+			m = m->m_nextpkt;
+		}
+		prev->m_nextpkt = m->m_nextpkt;
+		if (m == qtail(q))
+			qtail(q) = prev;
+	}
+	qlen(q)--;
+	m->m_nextpkt = NULL;
+	return (m);
+}
+
+static __inline void
+_removeq(class_queue_t *q, struct mbuf *m)
+{
+	struct mbuf *m0, *prev;
+
+	m0 = qtail(q);
+	do {
+		prev = m0;
+		m0 = m0->m_nextpkt;
+	} while (m0 != m);
+	prev->m_nextpkt = m->m_nextpkt;
+	if (prev == m)
+		qtail(q) = NULL;
+	else if (qtail(q) == m)
+		qtail(q) = prev;
+	qlen(q)--;
+}
+
+static __inline void
+_flushq(class_queue_t *q)
+{
+	struct mbuf *m;
+
+	while ((m = _getq(q)) != NULL)
+		m_freem(m);
+}
+
+#endif /* __GNUC__ && !ALTQ_DEBUG */
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ALTQ_ALTQ_CLASSQ_HH_ */
diff --git a/freebsd/sys/contrib/altq/altq/altq_hfsc.c b/freebsd/sys/contrib/altq/altq/altq_hfsc.c
new file mode 100644
index 00000000..a559f006
--- /dev/null
+++ b/freebsd/sys/contrib/altq/altq/altq_hfsc.c
@@ -0,0 +1,2279 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*	$FreeBSD$	*/
+/*	$KAME: altq_hfsc.c,v 1.24 2003/12/05 05:40:46 kjc Exp $	*/
+
+/*
+ * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation is hereby granted (including for commercial or
+ * for-profit use), provided that both the copyright notice and this
+ * permission notice appear in all copies of the software, derivative
+ * works, or modified versions, and any portions thereof.
+ *
+ * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
+ * WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON PROVIDES THIS
+ * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * Carnegie Mellon encourages (but does not require) users of this
+ * software to return any improvements or extensions that they make,
+ * and to grant Carnegie Mellon the rights to redistribute these
+ * changes without encumbrance.
+ */
+/*
+ * H-FSC is described in Proceedings of SIGCOMM'97,
+ * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing,
+ * Real-Time and Priority Service"
+ * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng.
+ *
+ * Oleg Cherevko <olwi@aq.ml.com.ua> added the upperlimit for link-sharing.
+ * when a class has an upperlimit, the fit-time is computed from the
+ * upperlimit service curve.  the link-sharing scheduler does not schedule
+ * a class whose fit-time exceeds the current time.
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include <freebsd/local/opt_altq.h>
+#if (__FreeBSD__ != 2)
+#include <freebsd/local/opt_inet.h>
+#ifdef __FreeBSD__
+#include <freebsd/local/opt_inet6.h>
+#endif
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+
+#ifdef ALTQ_HFSC  /* hfsc is enabled by ALTQ_HFSC option in opt_altq.h */
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/errno.h>
+#include <freebsd/sys/queue.h>
+#if 1 /* ALTQ3_COMPAT */
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/kernel.h>
+#endif /* ALTQ3_COMPAT */
+
+#include <freebsd/net/if.h>
+#include <freebsd/netinet/in.h>
+
+#include <freebsd/net/pfvar.h>
+#include <freebsd/altq/altq.h>
+#include <freebsd/altq/altq_hfsc.h>
+#ifdef ALTQ3_COMPAT
+#include <freebsd/altq/altq_conf.h>
+#endif
+
+/*
+ * function prototypes
+ */
+static int			 hfsc_clear_interface(struct hfsc_if *);
+static int			 hfsc_request(struct ifaltq *, int, void *);
+static void			 hfsc_purge(struct hfsc_if *);
+static struct hfsc_class	*hfsc_class_create(struct hfsc_if *,
+    struct service_curve *, struct service_curve *, struct service_curve *,
+    struct hfsc_class *, int, int, int);
+static int			 hfsc_class_destroy(struct hfsc_class *);
+static struct hfsc_class	*hfsc_nextclass(struct hfsc_class *);
+static int			 hfsc_enqueue(struct ifaltq *, struct mbuf *,
+				    struct altq_pktattr *);
+static struct mbuf		*hfsc_dequeue(struct ifaltq *, int);
+
+static int		 hfsc_addq(struct hfsc_class *, struct mbuf *);
+static struct mbuf	*hfsc_getq(struct hfsc_class *);
+static struct mbuf	*hfsc_pollq(struct hfsc_class *);
+static void		 hfsc_purgeq(struct hfsc_class *);
+
+static void		 update_cfmin(struct hfsc_class *);
+static void		 set_active(struct hfsc_class *, int);
+static void		 set_passive(struct hfsc_class *);
+
+static void		 init_ed(struct hfsc_class *, int);
+static void		 update_ed(struct hfsc_class *, int);
+static void		 update_d(struct hfsc_class *, int);
+static void		 init_vf(struct hfsc_class *, int);
+static void		 update_vf(struct hfsc_class *, int, u_int64_t);
+static ellist_t		*ellist_alloc(void);
+static void		 ellist_destroy(ellist_t *);
+static void		 ellist_insert(struct hfsc_class *);
+static void		 ellist_remove(struct hfsc_class *);
+static void		 ellist_update(struct hfsc_class *);
+struct hfsc_class	*ellist_get_mindl(ellist_t *, u_int64_t);
+static actlist_t	*actlist_alloc(void);
+static void		 actlist_destroy(actlist_t *);
+static void		 actlist_insert(struct hfsc_class *);
+static void		 actlist_remove(struct hfsc_class *);
+static void		 actlist_update(struct hfsc_class *);
+
+static struct hfsc_class	*actlist_firstfit(struct hfsc_class *,
+				    u_int64_t);
+
+static __inline u_int64_t	seg_x2y(u_int64_t, u_int64_t);
+static __inline u_int64_t	seg_y2x(u_int64_t, u_int64_t);
+static __inline u_int64_t	m2sm(u_int);
+static __inline u_int64_t	m2ism(u_int);
+static __inline u_int64_t	d2dx(u_int);
+static u_int			sm2m(u_int64_t);
+static u_int			dx2d(u_int64_t);
+
+static void		sc2isc(struct service_curve *, struct internal_sc *);
+static void		rtsc_init(struct runtime_sc *, struct internal_sc *,
+			    u_int64_t, u_int64_t);
+static u_int64_t	rtsc_y2x(struct runtime_sc *, u_int64_t);
+static u_int64_t	rtsc_x2y(struct runtime_sc *, u_int64_t);
+static void		rtsc_min(struct runtime_sc *, struct internal_sc *,
+			    u_int64_t, u_int64_t);
+
+static void			 get_class_stats(struct hfsc_classstats *,
+				    struct hfsc_class *);
+static struct hfsc_class	*clh_to_clp(struct hfsc_if *, u_int32_t);
+
+
+#ifdef ALTQ3_COMPAT
+static struct hfsc_if *hfsc_attach(struct ifaltq *, u_int);
+static int hfsc_detach(struct hfsc_if *);
+static int hfsc_class_modify(struct hfsc_class *, struct service_curve *,
+    struct service_curve *, struct service_curve *);
+
+static int hfsccmd_if_attach(struct hfsc_attach *);
+static int hfsccmd_if_detach(struct hfsc_interface *);
+static int hfsccmd_add_class(struct hfsc_add_class *);
+static int hfsccmd_delete_class(struct hfsc_delete_class *);
+static int hfsccmd_modify_class(struct hfsc_modify_class *);
+static int hfsccmd_add_filter(struct hfsc_add_filter *);
+static int hfsccmd_delete_filter(struct hfsc_delete_filter *);
+static int hfsccmd_class_stats(struct hfsc_class_stats *);
+
+altqdev_decl(hfsc);
+#endif /* ALTQ3_COMPAT */
+
+/*
+ * macros
+ */
+#define	is_a_parent_class(cl)	((cl)->cl_children != NULL)
+
+#define	HT_INFINITY	0xffffffffffffffffLL	/* infinite time value */
+
+#ifdef ALTQ3_COMPAT
+/* hif_list keeps all hfsc_if's allocated. */
+static struct hfsc_if *hif_list = NULL;
+#endif /* ALTQ3_COMPAT */
+
+int
+hfsc_pfattach(struct pf_altq *a)
+{
+	struct ifnet *ifp;
+	int s, error;
+
+	if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
+		return (EINVAL);
+#ifdef __NetBSD__
+	s = splnet();
+#else
+	s = splimp();
+#endif
+	error = altq_attach(&ifp->if_snd, ALTQT_HFSC, a->altq_disc,
+	    hfsc_enqueue, hfsc_dequeue, hfsc_request, NULL, NULL);
+	splx(s);
+	return (error);
+}
+
+int
+hfsc_add_altq(struct pf_altq *a)
+{
+	struct hfsc_if *hif;
+	struct ifnet *ifp;
+
+	if ((ifp = ifunit(a->ifname)) == NULL)
+		return (EINVAL);
+	if (!ALTQ_IS_READY(&ifp->if_snd))
+		return (ENODEV);
+
+	hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_WAITOK);
+	if (hif == NULL)
+		return (ENOMEM);
+	bzero(hif, sizeof(struct hfsc_if));
+
+	hif->hif_eligible = ellist_alloc();
+	if (hif->hif_eligible == NULL) {
+		free(hif, M_DEVBUF);
+		return (ENOMEM);
+	}
+
+	hif->hif_ifq = &ifp->if_snd;
+
+	/* keep the state in pf_altq */
+	a->altq_disc = hif;
+
+	return (0);
+}
+
+int
+hfsc_remove_altq(struct pf_altq *a)
+{
+	struct hfsc_if *hif;
+
+	if ((hif = a->altq_disc) == NULL)
+		return (EINVAL);
+	a->altq_disc = NULL;
+
+	(void)hfsc_clear_interface(hif);
+	(void)hfsc_class_destroy(hif->hif_rootclass);
+
+	ellist_destroy(hif->hif_eligible);
+
+	free(hif, M_DEVBUF);
+
+	return (0);
+}
+
+int
+hfsc_add_queue(struct pf_altq *a)
+{
+	struct hfsc_if *hif;
+	struct hfsc_class *cl, *parent;
+	struct hfsc_opts *opts;
+	struct service_curve rtsc, lssc, ulsc;
+
+	if ((hif = a->altq_disc) == NULL)
+		return (EINVAL);
+
+	opts = &a->pq_u.hfsc_opts;
+
+	if (a->parent_qid == HFSC_NULLCLASS_HANDLE &&
+	    hif->hif_rootclass == NULL)
+		parent = NULL;
+	else if ((parent = clh_to_clp(hif, a->parent_qid)) == NULL)
+		return (EINVAL);
+
+	if (a->qid == 0)
+		return (EINVAL);
+
+	if (clh_to_clp(hif, a->qid) != NULL)
+		return (EBUSY);
+
+	rtsc.m1 = opts->rtsc_m1;
+	rtsc.d  = opts->rtsc_d;
+	rtsc.m2 = opts->rtsc_m2;
+	lssc.m1 = opts->lssc_m1;
+	lssc.d  = opts->lssc_d;
+	lssc.m2 = opts->lssc_m2;
+	ulsc.m1 = opts->ulsc_m1;
+	ulsc.d  = opts->ulsc_d;
+	ulsc.m2 = opts->ulsc_m2;
+
+	cl = hfsc_class_create(hif, &rtsc, &lssc, &ulsc,
+	    parent, a->qlimit, opts->flags, a->qid);
+	if (cl == NULL)
+		return (ENOMEM);
+
+	return (0);
+}
+
+int
+hfsc_remove_queue(struct pf_altq *a)
+{
+	struct hfsc_if *hif;
+	struct hfsc_class *cl;
+
+	if ((hif = a->altq_disc) == NULL)
+		return (EINVAL);
+
+	if ((cl = clh_to_clp(hif, a->qid)) == NULL)
+		return (EINVAL);
+
+	return (hfsc_class_destroy(cl));
+}
+
+int
+hfsc_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+	struct hfsc_if *hif;
+	struct hfsc_class *cl;
+	struct hfsc_classstats stats;
+	int error = 0;
+
+	if ((hif = altq_lookup(a->ifname, ALTQT_HFSC)) == NULL)
+		return (EBADF);
+
+	if ((cl = clh_to_clp(hif, a->qid)) == NULL)
+		return (EINVAL);
+
+	if (*nbytes < sizeof(stats))
+		return (EINVAL);
+
+	get_class_stats(&stats, cl);
+
+	if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
+		return (error);
+	*nbytes = sizeof(stats);
+	return (0);
+}
+
+/*
+ * bring the interface back to the initial state by discarding
+ * all the filters and classes except the root class.
+ */
+static int
+hfsc_clear_interface(struct hfsc_if *hif)
+{
+	struct hfsc_class	*cl;
+
+#ifdef ALTQ3_COMPAT
+	/* free the filters for this interface */
+	acc_discard_filters(&hif->hif_classifier, NULL, 1);
+#endif
+
+	/* clear out the classes */
+	while (hif->hif_rootclass != NULL &&
+	    (cl = hif->hif_rootclass->cl_children) != NULL) {
+		/*
+		 * remove the first leaf class found in the hierarchy
+		 * then start over
+		 */
+		for (; cl != NULL; cl = hfsc_nextclass(cl)) {
+			if (!is_a_parent_class(cl)) {
+				(void)hfsc_class_destroy(cl);
+				break;
+			}
+		}
+	}
+
+	return (0);
+}
+
+static int
+hfsc_request(struct ifaltq *ifq, int req, void *arg)
+{
+	struct hfsc_if	*hif = (struct hfsc_if *)ifq->altq_disc;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	switch (req) {
+	case ALTRQ_PURGE:
+		hfsc_purge(hif);
+		break;
+	}
+	return (0);
+}
+
+/* discard all the queued packets on the interface */
+static void
+hfsc_purge(struct hfsc_if *hif)
+{
+	struct hfsc_class *cl;
+
+	for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
+		if (!qempty(cl->cl_q))
+			hfsc_purgeq(cl);
+	if (ALTQ_IS_ENABLED(hif->hif_ifq))
+		hif->hif_ifq->ifq_len = 0;
+}
+
+struct hfsc_class *
+hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc,
+    struct service_curve *fsc, struct service_curve *usc,
+    struct hfsc_class *parent, int qlimit, int flags, int qid)
+{
+	struct hfsc_class *cl, *p;
+	int i, s;
+
+	if (hif->hif_classes >= HFSC_MAX_CLASSES)
+		return (NULL);
+
+#ifndef ALTQ_RED
+	if (flags & HFCF_RED) {
+#ifdef ALTQ_DEBUG
+		printf("hfsc_class_create: RED not configured for HFSC!\n");
+#endif
+		return (NULL);
+	}
+#endif
+
+	cl = malloc(sizeof(struct hfsc_class), M_DEVBUF, M_WAITOK);
+	if (cl == NULL)
+		return (NULL);
+	bzero(cl, sizeof(struct hfsc_class));
+
+	cl->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_WAITOK);
+	if (cl->cl_q == NULL)
+		goto err_ret;
+	bzero(cl->cl_q, sizeof(class_queue_t));
+
+	cl->cl_actc = actlist_alloc();
+	if (cl->cl_actc == NULL)
+		goto err_ret;
+
+	if (qlimit == 0)
+		qlimit = 50;  /* use default */
+	qlimit(cl->cl_q) = qlimit;
+	qtype(cl->cl_q) = Q_DROPTAIL;
+	qlen(cl->cl_q) = 0;
+	cl->cl_flags = flags;
+#ifdef ALTQ_RED
+	if (flags & (HFCF_RED|HFCF_RIO)) {
+		int red_flags, red_pkttime;
+		u_int m2;
+
+		m2 = 0;
+		if (rsc != NULL && rsc->m2 > m2)
+			m2 = rsc->m2;
+		if (fsc != NULL && fsc->m2 > m2)
+			m2 = fsc->m2;
+		if (usc != NULL && usc->m2 > m2)
+			m2 = usc->m2;
+
+		red_flags = 0;
+		if (flags & HFCF_ECN)
+			red_flags |= REDF_ECN;
+#ifdef ALTQ_RIO
+		if (flags & HFCF_CLEARDSCP)
+			red_flags |= RIOF_CLEARDSCP;
+#endif
+		if (m2 < 8)
+			red_pkttime = 1000 * 1000 * 1000; /* 1 sec */
+		else
+			red_pkttime = (int64_t)hif->hif_ifq->altq_ifp->if_mtu
+				* 1000 * 1000 * 1000 / (m2 / 8);
+		if (flags & HFCF_RED) {
+			cl->cl_red = red_alloc(0, 0,
+			    qlimit(cl->cl_q) * 10/100,
+			    qlimit(cl->cl_q) * 30/100,
+			    red_flags, red_pkttime);
+			if (cl->cl_red != NULL)
+				qtype(cl->cl_q) = Q_RED;
+		}
+#ifdef ALTQ_RIO
+		else {
+			cl->cl_red = (red_t *)rio_alloc(0, NULL,
+			    red_flags, red_pkttime);
+			if (cl->cl_red != NULL)
+				qtype(cl->cl_q) = Q_RIO;
+		}
+#endif
+	}
+#endif /* ALTQ_RED */
+
+	if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0)) {
+		cl->cl_rsc = malloc(sizeof(struct internal_sc),
+		    M_DEVBUF, M_WAITOK);
+		if (cl->cl_rsc == NULL)
+			goto err_ret;
+		sc2isc(rsc, cl->cl_rsc);
+		rtsc_init(&cl->cl_deadline, cl->cl_rsc, 0, 0);
+		rtsc_init(&cl->cl_eligible, cl->cl_rsc, 0, 0);
+	}
+	if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0)) {
+		cl->cl_fsc = malloc(sizeof(struct internal_sc),
+		    M_DEVBUF, M_WAITOK);
+		if (cl->cl_fsc == NULL)
+			goto err_ret;
+		sc2isc(fsc, cl->cl_fsc);
+		rtsc_init(&cl->cl_virtual, cl->cl_fsc, 0, 0);
+	}
+	if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0)) {
+		cl->cl_usc = malloc(sizeof(struct internal_sc),
+		    M_DEVBUF, M_WAITOK);
+		if (cl->cl_usc == NULL)
+			goto err_ret;
+		sc2isc(usc, cl->cl_usc);
+		rtsc_init(&cl->cl_ulimit, cl->cl_usc, 0, 0);
+	}
+
+	cl->cl_id = hif->hif_classid++;
+	cl->cl_handle = qid;
+	cl->cl_hif = hif;
+	cl->cl_parent = parent;
+
+#ifdef __NetBSD__
+	s = splnet();
+#else
+	s = splimp();
+#endif
+	IFQ_LOCK(hif->hif_ifq);
+	hif->hif_classes++;
+
+	/*
+	 * find a free slot in the class table.  if the slot matching
+	 * the lower bits of qid is free, use this slot.  otherwise,
+	 * use the first free slot.
+	 */
+	i = qid % HFSC_MAX_CLASSES;
+	if (hif->hif_class_tbl[i] == NULL)
+		hif->hif_class_tbl[i] = cl;
+	else {
+		for (i = 0; i < HFSC_MAX_CLASSES; i++)
+			if (hif->hif_class_tbl[i] == NULL) {
+				hif->hif_class_tbl[i] = cl;
+				break;
+			}
+		if (i == HFSC_MAX_CLASSES) {
+			IFQ_UNLOCK(hif->hif_ifq);
+			splx(s);
+			goto err_ret;
+		}
+	}
+
+	if (flags & HFCF_DEFAULTCLASS)
+		hif->hif_defaultclass = cl;
+
+	if (parent == NULL) {
+		/* this is root class */
+		hif->hif_rootclass = cl;
+	} else {
+		/* add this class to the children list of the parent */
+		if ((p = parent->cl_children) == NULL)
+			parent->cl_children = cl;
+		else {
+			while (p->cl_siblings != NULL)
+				p = p->cl_siblings;
+			p->cl_siblings = cl;
+		}
+	}
+	IFQ_UNLOCK(hif->hif_ifq);
+	splx(s);
+
+	return (cl);
+
+ err_ret:
+	if (cl->cl_actc != NULL)
+		actlist_destroy(cl->cl_actc);
+	if (cl->cl_red != NULL) {
+#ifdef ALTQ_RIO
+		if (q_is_rio(cl->cl_q))
+			rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+		if (q_is_red(cl->cl_q))
+			red_destroy(cl->cl_red);
+#endif
+	}
+	if (cl->cl_fsc != NULL)
+		free(cl->cl_fsc, M_DEVBUF);
+	if (cl->cl_rsc != NULL)
+		free(cl->cl_rsc, M_DEVBUF);
+	if (cl->cl_usc != NULL)
+		free(cl->cl_usc, M_DEVBUF);
+	if (cl->cl_q != NULL)
+		free(cl->cl_q, M_DEVBUF);
+	free(cl, M_DEVBUF);
+	return (NULL);
+}
+
+static int
+hfsc_class_destroy(struct hfsc_class *cl)
+{
+	int i, s;
+
+	if (cl == NULL)
+		return (0);
+
+	if (is_a_parent_class(cl))
+		return (EBUSY);
+
+#ifdef __NetBSD__
+	s = splnet();
+#else
+	s = splimp();
+#endif
+	IFQ_LOCK(cl->cl_hif->hif_ifq);
+
+#ifdef ALTQ3_COMPAT
+	/* delete filters referencing to this class */
+	acc_discard_filters(&cl->cl_hif->hif_classifier, cl, 0);
+#endif /* ALTQ3_COMPAT */
+
+	if (!qempty(cl->cl_q))
+		hfsc_purgeq(cl);
+
+	if (cl->cl_parent == NULL) {
+		/* this is root class */
+	} else {
+		struct hfsc_class *p = cl->cl_parent->cl_children;
+
+		if (p == cl)
+			cl->cl_parent->cl_children = cl->cl_siblings;
+		else do {
+			if (p->cl_siblings == cl) {
+				p->cl_siblings = cl->cl_siblings;
+				break;
+			}
+		} while ((p = p->cl_siblings) != NULL);
+		ASSERT(p != NULL);
+	}
+
+	for (i = 0; i < HFSC_MAX_CLASSES; i++)
+		if (cl->cl_hif->hif_class_tbl[i] == cl) {
+			cl->cl_hif->hif_class_tbl[i] = NULL;
+			break;
+		}
+
+	cl->cl_hif->hif_classes--;
+	IFQ_UNLOCK(cl->cl_hif->hif_ifq);
+	splx(s);
+
+	actlist_destroy(cl->cl_actc);
+
+	if (cl->cl_red != NULL) {
+#ifdef ALTQ_RIO
+		if (q_is_rio(cl->cl_q))
+			rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+		if (q_is_red(cl->cl_q))
+			red_destroy(cl->cl_red);
+#endif
+	}
+
+	IFQ_LOCK(cl->cl_hif->hif_ifq);
+	if (cl == cl->cl_hif->hif_rootclass)
+		cl->cl_hif->hif_rootclass = NULL;
+	if (cl == cl->cl_hif->hif_defaultclass)
+		cl->cl_hif->hif_defaultclass = NULL;
+	IFQ_UNLOCK(cl->cl_hif->hif_ifq);
+
+	if (cl->cl_usc != NULL)
+		free(cl->cl_usc, M_DEVBUF);
+	if (cl->cl_fsc != NULL)
+		free(cl->cl_fsc, M_DEVBUF);
+	if (cl->cl_rsc != NULL)
+		free(cl->cl_rsc, M_DEVBUF);
+	free(cl->cl_q, M_DEVBUF);
+	free(cl, M_DEVBUF);
+
+	return (0);
+}
+
+/*
+ * hfsc_nextclass returns the next class in the tree.
+ *   usage:
+ *	for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
+ *		do_something;
+ */
+static struct hfsc_class *
+hfsc_nextclass(struct hfsc_class *cl)
+{
+	if (cl->cl_children != NULL)
+		cl = cl->cl_children;
+	else if (cl->cl_siblings != NULL)
+		cl = cl->cl_siblings;
+	else {
+		while ((cl = cl->cl_parent) != NULL)
+			if (cl->cl_siblings) {
+				cl = cl->cl_siblings;
+				break;
+			}
+	}
+
+	return (cl);
+}
+
+/*
+ * hfsc_enqueue is an enqueue function to be registered to
+ * (*altq_enqueue) in struct ifaltq.
+ */
+static int
+hfsc_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
+{
+	struct hfsc_if	*hif = (struct hfsc_if *)ifq->altq_disc;
+	struct hfsc_class *cl;
+	struct pf_mtag *t;
+	int len;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	/* grab class set by classifier */
+	if ((m->m_flags & M_PKTHDR) == 0) {
+		/* should not happen */
+#if defined(__NetBSD__) || defined(__OpenBSD__)\
+    || (defined(__FreeBSD__) && __FreeBSD_version >= 501113)
+		printf("altq: packet for %s does not have pkthdr\n",
+		    ifq->altq_ifp->if_xname);
+#else
+		printf("altq: packet for %s%d does not have pkthdr\n",
+		    ifq->altq_ifp->if_name, ifq->altq_ifp->if_unit);
+#endif
+		m_freem(m);
+		return (ENOBUFS);
+	}
+	cl = NULL;
+	if ((t = pf_find_mtag(m)) != NULL)
+		cl = clh_to_clp(hif, t->qid);
+#ifdef ALTQ3_COMPAT
+	else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL)
+		cl = pktattr->pattr_class;
+#endif
+	if (cl == NULL || is_a_parent_class(cl)) {
+		cl = hif->hif_defaultclass;
+		if (cl == NULL) {
+			m_freem(m);
+			return (ENOBUFS);
+		}
+	}
+#ifdef ALTQ3_COMPAT
+	if (pktattr != NULL)
+		cl->cl_pktattr = pktattr;  /* save proto hdr used by ECN */
+	else
+#endif
+		cl->cl_pktattr = NULL;
+	len = m_pktlen(m);
+	if (hfsc_addq(cl, m) != 0) {
+		/* drop occurred.  mbuf was freed in hfsc_addq. */
+		PKTCNTR_ADD(&cl->cl_stats.drop_cnt, len);
+		return (ENOBUFS);
+	}
+	IFQ_INC_LEN(ifq);
+	cl->cl_hif->hif_packets++;
+
+	/* successfully queued. */
+	if (qlen(cl->cl_q) == 1)
+		set_active(cl, m_pktlen(m));
+
+	return (0);
+}
+
+/*
+ * hfsc_dequeue is a dequeue function to be registered to
+ * (*altq_dequeue) in struct ifaltq.
+ *
+ * note: ALTDQ_POLL returns the next packet without removing the packet
+ *	from the queue.  ALTDQ_REMOVE is a normal dequeue operation.
+ *	ALTDQ_REMOVE must return the same packet if called immediately
+ *	after ALTDQ_POLL.
+ */
+static struct mbuf *
+hfsc_dequeue(struct ifaltq *ifq, int op)
+{
+	struct hfsc_if	*hif = (struct hfsc_if *)ifq->altq_disc;
+	struct hfsc_class *cl;
+	struct mbuf *m;
+	int len, next_len;
+	int realtime = 0;
+	u_int64_t cur_time;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	if (hif->hif_packets == 0)
+		/* no packet in the tree */
+		return (NULL);
+
+	cur_time = read_machclk();
+
+	if (op == ALTDQ_REMOVE && hif->hif_pollcache != NULL) {
+
+		cl = hif->hif_pollcache;
+		hif->hif_pollcache = NULL;
+		/* check if the class was scheduled by real-time criteria */
+		if (cl->cl_rsc != NULL)
+			realtime = (cl->cl_e <= cur_time);
+	} else {
+		/*
+		 * if there are eligible classes, use real-time criteria.
+		 * find the class with the minimum deadline among
+		 * the eligible classes.
+		 */
+		if ((cl = ellist_get_mindl(hif->hif_eligible, cur_time))
+		    != NULL) {
+			realtime = 1;
+		} else {
+#ifdef ALTQ_DEBUG
+			int fits = 0;
+#endif
+			/*
+			 * use link-sharing criteria
+			 * get the class with the minimum vt in the hierarchy
+			 */
+			cl = hif->hif_rootclass;
+			while (is_a_parent_class(cl)) {
+
+				cl = actlist_firstfit(cl, cur_time);
+				if (cl == NULL) {
+#ifdef ALTQ_DEBUG
+					if (fits > 0)
+						printf("%d fit but none found\n",fits);
+#endif
+					return (NULL);
+				}
+				/*
+				 * update parent's cl_cvtmin.
+				 * don't update if the new vt is smaller.
+				 */
+				if (cl->cl_parent->cl_cvtmin < cl->cl_vt)
+					cl->cl_parent->cl_cvtmin = cl->cl_vt;
+#ifdef ALTQ_DEBUG
+				fits++;
+#endif
+			}
+		}
+
+		if (op == ALTDQ_POLL) {
+			hif->hif_pollcache = cl;
+			m = hfsc_pollq(cl);
+			return (m);
+		}
+	}
+
+	m = hfsc_getq(cl);
+	if (m == NULL)
+		panic("hfsc_dequeue:");
+	len = m_pktlen(m);
+	cl->cl_hif->hif_packets--;
+	IFQ_DEC_LEN(ifq);
+	PKTCNTR_ADD(&cl->cl_stats.xmit_cnt, len);
+
+	update_vf(cl, len, cur_time);
+	if (realtime)
+		cl->cl_cumul += len;
+
+	if (!qempty(cl->cl_q)) {
+		if (cl->cl_rsc != NULL) {
+			/* update ed */
+			next_len = m_pktlen(qhead(cl->cl_q));
+
+			if (realtime)
+				update_ed(cl, next_len);
+			else
+				update_d(cl, next_len);
+		}
+	} else {
+		/* the class becomes passive */
+		set_passive(cl);
+	}
+
+	return (m);
+}
+
+static int
+hfsc_addq(struct hfsc_class *cl, struct mbuf *m)
+{
+
+#ifdef ALTQ_RIO
+	if (q_is_rio(cl->cl_q))
+		return rio_addq((rio_t *)cl->cl_red, cl->cl_q,
+				m, cl->cl_pktattr);
+#endif
+#ifdef ALTQ_RED
+	if (q_is_red(cl->cl_q))
+		return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr);
+#endif
+	if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) {
+		m_freem(m);
+		return (-1);
+	}
+
+	if (cl->cl_flags & HFCF_CLEARDSCP)
+		write_dsfield(m, cl->cl_pktattr, 0);
+
+	_addq(cl->cl_q, m);
+
+	return (0);
+}
+
+static struct mbuf *
+hfsc_getq(struct hfsc_class *cl)
+{
+#ifdef ALTQ_RIO
+	if (q_is_rio(cl->cl_q))
+		return rio_getq((rio_t *)cl->cl_red, cl->cl_q);
+#endif
+#ifdef ALTQ_RED
+	if (q_is_red(cl->cl_q))
+		return red_getq(cl->cl_red, cl->cl_q);
+#endif
+	return _getq(cl->cl_q);
+}
+
+static struct mbuf *
+hfsc_pollq(struct hfsc_class *cl)
+{
+	return qhead(cl->cl_q);
+}
+
+static void
+hfsc_purgeq(struct hfsc_class *cl)
+{
+	struct mbuf *m;
+
+	if (qempty(cl->cl_q))
+		return;
+
+	while ((m = _getq(cl->cl_q)) != NULL) {
+		PKTCNTR_ADD(&cl->cl_stats.drop_cnt, m_pktlen(m));
+		m_freem(m);
+		cl->cl_hif->hif_packets--;
+		IFQ_DEC_LEN(cl->cl_hif->hif_ifq);
+	}
+	ASSERT(qlen(cl->cl_q) == 0);
+
+	update_vf(cl, 0, 0);	/* remove cl from the actlist */
+	set_passive(cl);
+}
+
+static void
+set_active(struct hfsc_class *cl, int len)
+{
+	if (cl->cl_rsc != NULL)
+		init_ed(cl, len);
+	if (cl->cl_fsc != NULL)
+		init_vf(cl, len);
+
+	cl->cl_stats.period++;
+}
+
+static void
+set_passive(struct hfsc_class *cl)
+{
+	if (cl->cl_rsc != NULL)
+		ellist_remove(cl);
+
+	/*
+	 * actlist is now handled in update_vf() so that update_vf(cl, 0, 0)
+	 * needs to be called explicitly to remove a class from actlist
+	 */
+}
+
+static void
+init_ed(struct hfsc_class *cl, int next_len)
+{
+	u_int64_t cur_time;
+
+	cur_time = read_machclk();
+
+	/* update the deadline curve */
+	rtsc_min(&cl->cl_deadline, cl->cl_rsc, cur_time, cl->cl_cumul);
+
+	/*
+	 * update the eligible curve.
+	 * for concave, it is equal to the deadline curve.
+	 * for convex, it is a linear curve with slope m2.
+	 */
+	cl->cl_eligible = cl->cl_deadline;
+	if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) {
+		cl->cl_eligible.dx = 0;
+		cl->cl_eligible.dy = 0;
+	}
+
+	/* compute e and d */
+	cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
+	cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+
+	ellist_insert(cl);
+}
+
+static void
+update_ed(struct hfsc_class *cl, int next_len)
+{
+	cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
+	cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+
+	ellist_update(cl);
+}
+
+static void
+update_d(struct hfsc_class *cl, int next_len)
+{
+	cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+}
+
+static void
+init_vf(struct hfsc_class *cl, int len)
+{
+	struct hfsc_class *max_cl, *p;
+	u_int64_t vt, f, cur_time;
+	int go_active;
+
+	cur_time = 0;
+	go_active = 1;
+	for ( ; cl->cl_parent != NULL; cl = cl->cl_parent) {
+
+		if (go_active && cl->cl_nactive++ == 0)
+			go_active = 1;
+		else
+			go_active = 0;
+
+		if (go_active) {
+			max_cl = actlist_last(cl->cl_parent->cl_actc);
+			if (max_cl != NULL) {
+				/*
+				 * set vt to the average of the min and max
+				 * classes.  if the parent's period didn't
+				 * change, don't decrease vt of the class.
+				 */
+				vt = max_cl->cl_vt;
+				if (cl->cl_parent->cl_cvtmin != 0)
+					vt = (cl->cl_parent->cl_cvtmin + vt)/2;
+
+				if (cl->cl_parent->cl_vtperiod !=
+				    cl->cl_parentperiod || vt > cl->cl_vt)
+					cl->cl_vt = vt;
+			} else {
+				/*
+				 * first child for a new parent backlog period.
+				 * add parent's cvtmax to vtoff of children
+				 * to make a new vt (vtoff + vt) larger than
+				 * the vt in the last period for all children.
+				 */
+				vt = cl->cl_parent->cl_cvtmax;
+				for (p = cl->cl_parent->cl_children; p != NULL;
+				     p = p->cl_siblings)
+					p->cl_vtoff += vt;
+				cl->cl_vt = 0;
+				cl->cl_parent->cl_cvtmax = 0;
+				cl->cl_parent->cl_cvtmin = 0;
+			}
+			cl->cl_initvt = cl->cl_vt;
+
+			/* update the virtual curve */
+			vt = cl->cl_vt + cl->cl_vtoff;
+			rtsc_min(&cl->cl_virtual, cl->cl_fsc, vt, cl->cl_total);
+			if (cl->cl_virtual.x == vt) {
+				cl->cl_virtual.x -= cl->cl_vtoff;
+				cl->cl_vtoff = 0;
+			}
+			cl->cl_vtadj = 0;
+
+			cl->cl_vtperiod++;  /* increment vt period */
+			cl->cl_parentperiod = cl->cl_parent->cl_vtperiod;
+			if (cl->cl_parent->cl_nactive == 0)
+				cl->cl_parentperiod++;
+			cl->cl_f = 0;
+
+			actlist_insert(cl);
+
+			if (cl->cl_usc != NULL) {
+				/* class has upper limit curve */
+				if (cur_time == 0)
+					cur_time = read_machclk();
+
+				/* update the ulimit curve */
+				rtsc_min(&cl->cl_ulimit, cl->cl_usc, cur_time,
+				    cl->cl_total);
+				/* compute myf */
+				cl->cl_myf = rtsc_y2x(&cl->cl_ulimit,
+				    cl->cl_total);
+				cl->cl_myfadj = 0;
+			}
+		}
+
+		if (cl->cl_myf > cl->cl_cfmin)
+			f = cl->cl_myf;
+		else
+			f = cl->cl_cfmin;
+		if (f != cl->cl_f) {
+			cl->cl_f = f;
+			update_cfmin(cl->cl_parent);
+		}
+	}
+}
+
+static void
+update_vf(struct hfsc_class *cl, int len, u_int64_t cur_time)
+{
+	u_int64_t f, myf_bound, delta;
+	int go_passive;
+
+	go_passive = qempty(cl->cl_q);
+
+	for (; cl->cl_parent != NULL; cl = cl->cl_parent) {
+
+		cl->cl_total += len;
+
+		if (cl->cl_fsc == NULL || cl->cl_nactive == 0)
+			continue;
+
+		if (go_passive && --cl->cl_nactive == 0)
+			go_passive = 1;
+		else
+			go_passive = 0;
+
+		if (go_passive) {
+			/* no more active child, going passive */
+
+			/* update cvtmax of the parent class */
+			if (cl->cl_vt > cl->cl_parent->cl_cvtmax)
+				cl->cl_parent->cl_cvtmax = cl->cl_vt;
+
+			/* remove this class from the vt list */
+			actlist_remove(cl);
+
+			update_cfmin(cl->cl_parent);
+
+			continue;
+		}
+
+		/*
+		 * update vt and f
+		 */
+		cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total)
+		    - cl->cl_vtoff + cl->cl_vtadj;
+
+		/*
+		 * if vt of the class is smaller than cvtmin,
+		 * the class was skipped in the past due to non-fit.
+		 * if so, we need to adjust vtadj.
+		 */
+		if (cl->cl_vt < cl->cl_parent->cl_cvtmin) {
+			cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt;
+			cl->cl_vt = cl->cl_parent->cl_cvtmin;
+		}
+
+		/* update the vt list */
+		actlist_update(cl);
+
+		if (cl->cl_usc != NULL) {
+			cl->cl_myf = cl->cl_myfadj
+			    + rtsc_y2x(&cl->cl_ulimit, cl->cl_total);
+
+			/*
+			 * if myf lags behind by more than one clock tick
+			 * from the current time, adjust myfadj to prevent
+			 * a rate-limited class from going greedy.
+			 * in a steady state under rate-limiting, myf
+			 * fluctuates within one clock tick.
+			 */
+			myf_bound = cur_time - machclk_per_tick;
+			if (cl->cl_myf < myf_bound) {
+				delta = cur_time - cl->cl_myf;
+				cl->cl_myfadj += delta;
+				cl->cl_myf += delta;
+			}
+		}
+
+		/* cl_f is max(cl_myf, cl_cfmin) */
+		if (cl->cl_myf > cl->cl_cfmin)
+			f = cl->cl_myf;
+		else
+			f = cl->cl_cfmin;
+		if (f != cl->cl_f) {
+			cl->cl_f = f;
+			update_cfmin(cl->cl_parent);
+		}
+	}
+}
+
+static void
+update_cfmin(struct hfsc_class *cl)
+{
+	struct hfsc_class *p;
+	u_int64_t cfmin;
+
+	if (TAILQ_EMPTY(cl->cl_actc)) {
+		cl->cl_cfmin = 0;
+		return;
+	}
+	cfmin = HT_INFINITY;
+	TAILQ_FOREACH(p, cl->cl_actc, cl_actlist) {
+		if (p->cl_f == 0) {
+			cl->cl_cfmin = 0;
+			return;
+		}
+		if (p->cl_f < cfmin)
+			cfmin = p->cl_f;
+	}
+	cl->cl_cfmin = cfmin;
+}
+
+/*
+ * TAILQ based ellist and actlist implementation
+ * (ion wanted to make a calendar queue based implementation)
+ */
+/*
+ * eligible list holds backlogged classes being sorted by their eligible times.
+ * there is one eligible list per interface.
+ */
+
+static ellist_t *
+ellist_alloc(void)
+{
+	ellist_t *head;
+
+	head = malloc(sizeof(ellist_t), M_DEVBUF, M_WAITOK);
+	TAILQ_INIT(head);
+	return (head);
+}
+
+static void
+ellist_destroy(ellist_t *head)
+{
+	free(head, M_DEVBUF);
+}
+
+static void
+ellist_insert(struct hfsc_class *cl)
+{
+	struct hfsc_if	*hif = cl->cl_hif;
+	struct hfsc_class *p;
+
+	/* check the last entry first */
+	if ((p = TAILQ_LAST(hif->hif_eligible, _eligible)) == NULL ||
+	    p->cl_e <= cl->cl_e) {
+		TAILQ_INSERT_TAIL(hif->hif_eligible, cl, cl_ellist);
+		return;
+	}
+
+	TAILQ_FOREACH(p, hif->hif_eligible, cl_ellist) {
+		if (cl->cl_e < p->cl_e) {
+			TAILQ_INSERT_BEFORE(p, cl, cl_ellist);
+			return;
+		}
+	}
+	ASSERT(0); /* should not reach here */
+}
+
+static void
+ellist_remove(struct hfsc_class *cl)
+{
+	struct hfsc_if	*hif = cl->cl_hif;
+
+	TAILQ_REMOVE(hif->hif_eligible, cl, cl_ellist);
+}
+
+static void
+ellist_update(struct hfsc_class *cl)
+{
+	struct hfsc_if	*hif = cl->cl_hif;
+	struct hfsc_class *p, *last;
+
+	/*
+	 * the eligible time of a class increases monotonically.
+	 * if the next entry has a larger eligible time, nothing to do.
+	 */
+	p = TAILQ_NEXT(cl, cl_ellist);
+	if (p == NULL || cl->cl_e <= p->cl_e)
+		return;
+
+	/* check the last entry */
+	last = TAILQ_LAST(hif->hif_eligible, _eligible);
+	ASSERT(last != NULL);
+	if (last->cl_e <= cl->cl_e) {
+		TAILQ_REMOVE(hif->hif_eligible, cl, cl_ellist);
+		TAILQ_INSERT_TAIL(hif->hif_eligible, cl, cl_ellist);
+		return;
+	}
+
+	/*
+	 * the new position must be between the next entry
+	 * and the last entry
+	 */
+	while ((p = TAILQ_NEXT(p, cl_ellist)) != NULL) {
+		if (cl->cl_e < p->cl_e) {
+			TAILQ_REMOVE(hif->hif_eligible, cl, cl_ellist);
+			TAILQ_INSERT_BEFORE(p, cl, cl_ellist);
+			return;
+		}
+	}
+	ASSERT(0); /* should not reach here */
+}
+
+/* find the class with the minimum deadline among the eligible classes */
+struct hfsc_class *
+ellist_get_mindl(ellist_t *head, u_int64_t cur_time)
+{
+	struct hfsc_class *p, *cl = NULL;
+
+	TAILQ_FOREACH(p, head, cl_ellist) {
+		if (p->cl_e > cur_time)
+			break;
+		if (cl == NULL || p->cl_d < cl->cl_d)
+			cl = p;
+	}
+	return (cl);
+}
+
+/*
+ * active children list holds backlogged child classes being sorted
+ * by their virtual time.
+ * each intermediate class has one active children list.
+ */
+static actlist_t *
+actlist_alloc(void)
+{
+	actlist_t *head;
+
+	head = malloc(sizeof(actlist_t), M_DEVBUF, M_WAITOK);
+	TAILQ_INIT(head);
+	return (head);
+}
+
+static void
+actlist_destroy(actlist_t *head)
+{
+	free(head, M_DEVBUF);
+}
+static void
+actlist_insert(struct hfsc_class *cl)
+{
+	struct hfsc_class *p;
+
+	/* check the last entry first */
+	if ((p = TAILQ_LAST(cl->cl_parent->cl_actc, _active)) == NULL
+	    || p->cl_vt <= cl->cl_vt) {
+		TAILQ_INSERT_TAIL(cl->cl_parent->cl_actc, cl, cl_actlist);
+		return;
+	}
+
+	TAILQ_FOREACH(p, cl->cl_parent->cl_actc, cl_actlist) {
+		if (cl->cl_vt < p->cl_vt) {
+			TAILQ_INSERT_BEFORE(p, cl, cl_actlist);
+			return;
+		}
+	}
+	ASSERT(0); /* should not reach here */
+}
+
+static void
+actlist_remove(struct hfsc_class *cl)
+{
+	TAILQ_REMOVE(cl->cl_parent->cl_actc, cl, cl_actlist);
+}
+
+static void
+actlist_update(struct hfsc_class *cl)
+{
+	struct hfsc_class *p, *last;
+
+	/*
+	 * the virtual time of a class increases monotonically during its
+	 * backlogged period.
+	 * if the next entry has a larger virtual time, nothing to do.
+	 */
+	p = TAILQ_NEXT(cl, cl_actlist);
+	if (p == NULL || cl->cl_vt < p->cl_vt)
+		return;
+
+	/* check the last entry */
+	last = TAILQ_LAST(cl->cl_parent->cl_actc, _active);
+	ASSERT(last != NULL);
+	if (last->cl_vt <= cl->cl_vt) {
+		TAILQ_REMOVE(cl->cl_parent->cl_actc, cl, cl_actlist);
+		TAILQ_INSERT_TAIL(cl->cl_parent->cl_actc, cl, cl_actlist);
+		return;
+	}
+
+	/*
+	 * the new position must be between the next entry
+	 * and the last entry
+	 */
+	while ((p = TAILQ_NEXT(p, cl_actlist)) != NULL) {
+		if (cl->cl_vt < p->cl_vt) {
+			TAILQ_REMOVE(cl->cl_parent->cl_actc, cl, cl_actlist);
+			TAILQ_INSERT_BEFORE(p, cl, cl_actlist);
+			return;
+		}
+	}
+	ASSERT(0); /* should not reach here */
+}
+
+static struct hfsc_class *
+actlist_firstfit(struct hfsc_class *cl, u_int64_t cur_time)
+{
+	struct hfsc_class *p;
+
+	TAILQ_FOREACH(p, cl->cl_actc, cl_actlist) {
+		if (p->cl_f <= cur_time)
+			return (p);
+	}
+	return (NULL);
+}
+
+/*
+ * service curve support functions
+ *
+ *  external service curve parameters
+ *	m: bits/sec
+ *	d: msec
+ *  internal service curve parameters
+ *	sm: (bytes/tsc_interval) << SM_SHIFT
+ *	ism: (tsc_count/byte) << ISM_SHIFT
+ *	dx: tsc_count
+ *
+ * SM_SHIFT and ISM_SHIFT are scaled in order to keep effective digits.
+ * we should be able to handle 100K-1Gbps linkspeed with 200Hz-1GHz CPU
+ * speed.  SM_SHIFT and ISM_SHIFT are selected to have at least 3 effective
+ * digits in decimal using the following table.
+ *
+ *  bits/sec    100Kbps     1Mbps     10Mbps     100Mbps    1Gbps
+ *  ----------+-------------------------------------------------------
+ *  bytes/nsec  12.5e-6    125e-6     1250e-6    12500e-6   125000e-6
+ *  sm(500MHz)  25.0e-6    250e-6     2500e-6    25000e-6   250000e-6
+ *  sm(200MHz)  62.5e-6    625e-6     6250e-6    62500e-6   625000e-6
+ *
+ *  nsec/byte   80000      8000       800        80         8
+ *  ism(500MHz) 40000      4000       400        40         4
+ *  ism(200MHz) 16000      1600       160        16         1.6
+ */
+#define	SM_SHIFT	24
+#define	ISM_SHIFT	10
+
+#define	SM_MASK		((1LL << SM_SHIFT) - 1)
+#define	ISM_MASK	((1LL << ISM_SHIFT) - 1)
+
+static __inline u_int64_t
+seg_x2y(u_int64_t x, u_int64_t sm)
+{
+	u_int64_t y;
+
+	/*
+	 * compute
+	 *	y = x * sm >> SM_SHIFT
+	 * but divide it for the upper and lower bits to avoid overflow
+	 */
+	y = (x >> SM_SHIFT) * sm + (((x & SM_MASK) * sm) >> SM_SHIFT);
+	return (y);
+}
+
+static __inline u_int64_t
+seg_y2x(u_int64_t y, u_int64_t ism)
+{
+	u_int64_t x;
+
+	if (y == 0)
+		x = 0;
+	else if (ism == HT_INFINITY)
+		x = HT_INFINITY;
+	else {
+		x = (y >> ISM_SHIFT) * ism
+		    + (((y & ISM_MASK) * ism) >> ISM_SHIFT);
+	}
+	return (x);
+}
+
+static __inline u_int64_t
+m2sm(u_int m)
+{
+	u_int64_t sm;
+
+	sm = ((u_int64_t)m << SM_SHIFT) / 8 / machclk_freq;
+	return (sm);
+}
+
+static __inline u_int64_t
+m2ism(u_int m)
+{
+	u_int64_t ism;
+
+	if (m == 0)
+		ism = HT_INFINITY;
+	else
+		ism = ((u_int64_t)machclk_freq << ISM_SHIFT) * 8 / m;
+	return (ism);
+}
+
+static __inline u_int64_t
+d2dx(u_int d)
+{
+	u_int64_t dx;
+
+	dx = ((u_int64_t)d * machclk_freq) / 1000;
+	return (dx);
+}
+
+static u_int
+sm2m(u_int64_t sm)
+{
+	u_int64_t m;
+
+	m = (sm * 8 * machclk_freq) >> SM_SHIFT;
+	return ((u_int)m);
+}
+
+static u_int
+dx2d(u_int64_t dx)
+{
+	u_int64_t d;
+
+	d = dx * 1000 / machclk_freq;
+	return ((u_int)d);
+}
+
+static void
+sc2isc(struct service_curve *sc, struct internal_sc *isc)
+{
+	isc->sm1 = m2sm(sc->m1);
+	isc->ism1 = m2ism(sc->m1);
+	isc->dx = d2dx(sc->d);
+	isc->dy = seg_x2y(isc->dx, isc->sm1);
+	isc->sm2 = m2sm(sc->m2);
+	isc->ism2 = m2ism(sc->m2);
+}
+
+/*
+ * initialize the runtime service curve with the given internal
+ * service curve starting at (x, y).
+ */
+static void
+rtsc_init(struct runtime_sc *rtsc, struct internal_sc * isc, u_int64_t x,
+    u_int64_t y)
+{
+	rtsc->x =	x;
+	rtsc->y =	y;
+	rtsc->sm1 =	isc->sm1;
+	rtsc->ism1 =	isc->ism1;
+	rtsc->dx =	isc->dx;
+	rtsc->dy =	isc->dy;
+	rtsc->sm2 =	isc->sm2;
+	rtsc->ism2 =	isc->ism2;
+}
+
+/*
+ * calculate the y-projection of the runtime service curve by the
+ * given x-projection value
+ */
+static u_int64_t
+rtsc_y2x(struct runtime_sc *rtsc, u_int64_t y)
+{
+	u_int64_t	x;
+
+	if (y < rtsc->y)
+		x = rtsc->x;
+	else if (y <= rtsc->y + rtsc->dy) {
+		/* x belongs to the 1st segment */
+		if (rtsc->dy == 0)
+			x = rtsc->x + rtsc->dx;
+		else
+			x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1);
+	} else {
+		/* x belongs to the 2nd segment */
+		x = rtsc->x + rtsc->dx
+		    + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2);
+	}
+	return (x);
+}
+
+static u_int64_t
+rtsc_x2y(struct runtime_sc *rtsc, u_int64_t x)
+{
+	u_int64_t	y;
+
+	if (x <= rtsc->x)
+		y = rtsc->y;
+	else if (x <= rtsc->x + rtsc->dx)
+		/* y belongs to the 1st segment */
+		y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1);
+	else
+		/* y belongs to the 2nd segment */
+		y = rtsc->y + rtsc->dy
+		    + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2);
+	return (y);
+}
+
+/*
+ * update the runtime service curve by taking the minimum of the current
+ * runtime service curve and the service curve starting at (x, y).
+ */
+static void
+rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u_int64_t x,
+    u_int64_t y)
+{
+	u_int64_t	y1, y2, dx, dy;
+
+	if (isc->sm1 <= isc->sm2) {
+		/* service curve is convex */
+		y1 = rtsc_x2y(rtsc, x);
+		if (y1 < y)
+			/* the current rtsc is smaller */
+			return;
+		rtsc->x = x;
+		rtsc->y = y;
+		return;
+	}
+
+	/*
+	 * service curve is concave
+	 * compute the two y values of the current rtsc
+	 *	y1: at x
+	 *	y2: at (x + dx)
+	 */
+	y1 = rtsc_x2y(rtsc, x);
+	if (y1 <= y) {
+		/* rtsc is below isc, no change to rtsc */
+		return;
+	}
+
+	y2 = rtsc_x2y(rtsc, x + isc->dx);
+	if (y2 >= y + isc->dy) {
+		/* rtsc is above isc, replace rtsc by isc */
+		rtsc->x = x;
+		rtsc->y = y;
+		rtsc->dx = isc->dx;
+		rtsc->dy = isc->dy;
+		return;
+	}
+
+	/*
+	 * the two curves intersect
+	 * compute the offsets (dx, dy) using the reverse
+	 * function of seg_x2y()
+	 *	seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y)
+	 */
+	dx = ((y1 - y) << SM_SHIFT) / (isc->sm1 - isc->sm2);
+	/*
+	 * check if (x, y1) belongs to the 1st segment of rtsc.
+	 * if so, add the offset.
+	 */
+	if (rtsc->x + rtsc->dx > x)
+		dx += rtsc->x + rtsc->dx - x;
+	dy = seg_x2y(dx, isc->sm1);
+
+	rtsc->x = x;
+	rtsc->y = y;
+	rtsc->dx = dx;
+	rtsc->dy = dy;
+	return;
+}
+
+static void
+get_class_stats(struct hfsc_classstats *sp, struct hfsc_class *cl)
+{
+	sp->class_id = cl->cl_id;
+	sp->class_handle = cl->cl_handle;
+
+	if (cl->cl_rsc != NULL) {
+		sp->rsc.m1 = sm2m(cl->cl_rsc->sm1);
+		sp->rsc.d = dx2d(cl->cl_rsc->dx);
+		sp->rsc.m2 = sm2m(cl->cl_rsc->sm2);
+	} else {
+		sp->rsc.m1 = 0;
+		sp->rsc.d = 0;
+		sp->rsc.m2 = 0;
+	}
+	if (cl->cl_fsc != NULL) {
+		sp->fsc.m1 = sm2m(cl->cl_fsc->sm1);
+		sp->fsc.d = dx2d(cl->cl_fsc->dx);
+		sp->fsc.m2 = sm2m(cl->cl_fsc->sm2);
+	} else {
+		sp->fsc.m1 = 0;
+		sp->fsc.d = 0;
+		sp->fsc.m2 = 0;
+	}
+	if (cl->cl_usc != NULL) {
+		sp->usc.m1 = sm2m(cl->cl_usc->sm1);
+		sp->usc.d = dx2d(cl->cl_usc->dx);
+		sp->usc.m2 = sm2m(cl->cl_usc->sm2);
+	} else {
+		sp->usc.m1 = 0;
+		sp->usc.d = 0;
+		sp->usc.m2 = 0;
+	}
+
+	sp->total = cl->cl_total;
+	sp->cumul = cl->cl_cumul;
+
+	sp->d = cl->cl_d;
+	sp->e = cl->cl_e;
+	sp->vt = cl->cl_vt;
+	sp->f = cl->cl_f;
+
+	sp->initvt = cl->cl_initvt;
+	sp->vtperiod = cl->cl_vtperiod;
+	sp->parentperiod = cl->cl_parentperiod;
+	sp->nactive = cl->cl_nactive;
+	sp->vtoff = cl->cl_vtoff;
+	sp->cvtmax = cl->cl_cvtmax;
+	sp->myf = cl->cl_myf;
+	sp->cfmin = cl->cl_cfmin;
+	sp->cvtmin = cl->cl_cvtmin;
+	sp->myfadj = cl->cl_myfadj;
+	sp->vtadj = cl->cl_vtadj;
+
+	sp->cur_time = read_machclk();
+	sp->machclk_freq = machclk_freq;
+
+	sp->qlength = qlen(cl->cl_q);
+	sp->qlimit = qlimit(cl->cl_q);
+	sp->xmit_cnt = cl->cl_stats.xmit_cnt;
+	sp->drop_cnt = cl->cl_stats.drop_cnt;
+	sp->period = cl->cl_stats.period;
+
+	sp->qtype = qtype(cl->cl_q);
+#ifdef ALTQ_RED
+	if (q_is_red(cl->cl_q))
+		red_getstats(cl->cl_red, &sp->red[0]);
+#endif
+#ifdef ALTQ_RIO
+	if (q_is_rio(cl->cl_q))
+		rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
+#endif
+}
+
+/* convert a class handle to the corresponding class pointer */
+static struct hfsc_class *
+clh_to_clp(struct hfsc_if *hif, u_int32_t chandle)
+{
+	int i;
+	struct hfsc_class *cl;
+
+	if (chandle == 0)
+		return (NULL);
+	/*
+	 * first, try optimistically the slot matching the lower bits of
+	 * the handle.  if it fails, do the linear table search.
+	 */
+	i = chandle % HFSC_MAX_CLASSES;
+	if ((cl = hif->hif_class_tbl[i]) != NULL && cl->cl_handle == chandle)
+		return (cl);
+	for (i = 0; i < HFSC_MAX_CLASSES; i++)
+		if ((cl = hif->hif_class_tbl[i]) != NULL &&
+		    cl->cl_handle == chandle)
+			return (cl);
+	return (NULL);
+}
+
+#ifdef ALTQ3_COMPAT
+static struct hfsc_if *
+hfsc_attach(ifq, bandwidth)
+	struct ifaltq *ifq;
+	u_int bandwidth;
+{
+	struct hfsc_if *hif;
+
+	hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_WAITOK);
+	if (hif == NULL)
+		return (NULL);
+	bzero(hif, sizeof(struct hfsc_if));
+
+	hif->hif_eligible = ellist_alloc();
+	if (hif->hif_eligible == NULL) {
+		free(hif, M_DEVBUF);
+		return NULL;
+	}
+
+	hif->hif_ifq = ifq;
+
+	/* add this state to the hfsc list */
+	hif->hif_next = hif_list;
+	hif_list = hif;
+
+	return (hif);
+}
+
+static int
+hfsc_detach(hif)
+	struct hfsc_if *hif;
+{
+	(void)hfsc_clear_interface(hif);
+	(void)hfsc_class_destroy(hif->hif_rootclass);
+
+	/* remove this interface from the hif list */
+	if (hif_list == hif)
+		hif_list = hif->hif_next;
+	else {
+		struct hfsc_if *h;
+
+		for (h = hif_list; h != NULL; h = h->hif_next)
+			if (h->hif_next == hif) {
+				h->hif_next = hif->hif_next;
+				break;
+			}
+		ASSERT(h != NULL);
+	}
+
+	ellist_destroy(hif->hif_eligible);
+
+	free(hif, M_DEVBUF);
+
+	return (0);
+}
+
+static int
+hfsc_class_modify(cl, rsc, fsc, usc)
+	struct hfsc_class *cl;
+	struct service_curve *rsc, *fsc, *usc;
+{
+	struct internal_sc *rsc_tmp, *fsc_tmp, *usc_tmp;
+	u_int64_t cur_time;
+	int s;
+
+	rsc_tmp = fsc_tmp = usc_tmp = NULL;
+	if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0) &&
+	    cl->cl_rsc == NULL) {
+		rsc_tmp = malloc(sizeof(struct internal_sc),
+		    M_DEVBUF, M_WAITOK);
+		if (rsc_tmp == NULL)
+			return (ENOMEM);
+	}
+	if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0) &&
+	    cl->cl_fsc == NULL) {
+		fsc_tmp = malloc(sizeof(struct internal_sc),
+		    M_DEVBUF, M_WAITOK);
+		if (fsc_tmp == NULL) {
+			free(rsc_tmp);
+			return (ENOMEM);
+		}
+	}
+	if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0) &&
+	    cl->cl_usc == NULL) {
+		usc_tmp = malloc(sizeof(struct internal_sc),
+		    M_DEVBUF, M_WAITOK);
+		if (usc_tmp == NULL) {
+			free(rsc_tmp);
+			free(fsc_tmp);
+			return (ENOMEM);
+		}
+	}
+
+	cur_time = read_machclk();
+#ifdef __NetBSD__
+	s = splnet();
+#else
+	s = splimp();
+#endif
+	IFQ_LOCK(cl->cl_hif->hif_ifq);
+
+	if (rsc != NULL) {
+		if (rsc->m1 == 0 && rsc->m2 == 0) {
+			if (cl->cl_rsc != NULL) {
+				if (!qempty(cl->cl_q))
+					hfsc_purgeq(cl);
+				free(cl->cl_rsc, M_DEVBUF);
+				cl->cl_rsc = NULL;
+			}
+		} else {
+			if (cl->cl_rsc == NULL)
+				cl->cl_rsc = rsc_tmp;
+			sc2isc(rsc, cl->cl_rsc);
+			rtsc_init(&cl->cl_deadline, cl->cl_rsc, cur_time,
+			    cl->cl_cumul);
+			cl->cl_eligible = cl->cl_deadline;
+			if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) {
+				cl->cl_eligible.dx = 0;
+				cl->cl_eligible.dy = 0;
+			}
+		}
+	}
+
+	if (fsc != NULL) {
+		if (fsc->m1 == 0 && fsc->m2 == 0) {
+			if (cl->cl_fsc != NULL) {
+				if (!qempty(cl->cl_q))
+					hfsc_purgeq(cl);
+				free(cl->cl_fsc, M_DEVBUF);
+				cl->cl_fsc = NULL;
+			}
+		} else {
+			if (cl->cl_fsc == NULL)
+				cl->cl_fsc = fsc_tmp;
+			sc2isc(fsc, cl->cl_fsc);
+			rtsc_init(&cl->cl_virtual, cl->cl_fsc, cl->cl_vt,
+			    cl->cl_total);
+		}
+	}
+
+	if (usc != NULL) {
+		if (usc->m1 == 0 && usc->m2 == 0) {
+			if (cl->cl_usc != NULL) {
+				free(cl->cl_usc, M_DEVBUF);
+				cl->cl_usc = NULL;
+				cl->cl_myf = 0;
+			}
+		} else {
+			if (cl->cl_usc == NULL)
+				cl->cl_usc = usc_tmp;
+			sc2isc(usc, cl->cl_usc);
+			rtsc_init(&cl->cl_ulimit, cl->cl_usc, cur_time,
+			    cl->cl_total);
+		}
+	}
+
+	if (!qempty(cl->cl_q)) {
+		if (cl->cl_rsc != NULL)
+			update_ed(cl, m_pktlen(qhead(cl->cl_q)));
+		if (cl->cl_fsc != NULL)
+			update_vf(cl, 0, cur_time);
+		/* is this enough? */
+	}
+
+	IFQ_UNLOCK(cl->cl_hif->hif_ifq);
+	splx(s);
+
+	return (0);
+}
+
+/*
+ * hfsc device interface
+ */
+int
+hfscopen(dev, flag, fmt, p)
+	dev_t dev;
+	int flag, fmt;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	if (machclk_freq == 0)
+		init_machclk();
+
+	if (machclk_freq == 0) {
+		printf("hfsc: no cpu clock available!\n");
+		return (ENXIO);
+	}
+
+	/* everything will be done when the queueing scheme is attached. */
+	return 0;
+}
+
+int
+hfscclose(dev, flag, fmt, p)
+	dev_t dev;
+	int flag, fmt;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	struct hfsc_if *hif;
+	int err, error = 0;
+
+	while ((hif = hif_list) != NULL) {
+		/* destroy all */
+		if (ALTQ_IS_ENABLED(hif->hif_ifq))
+			altq_disable(hif->hif_ifq);
+
+		err = altq_detach(hif->hif_ifq);
+		if (err == 0)
+			err = hfsc_detach(hif);
+		if (err != 0 && error == 0)
+			error = err;
+	}
+
+	return error;
+}
+
+int
+hfscioctl(dev, cmd, addr, flag, p)
+	dev_t dev;
+	ioctlcmd_t cmd;
+	caddr_t addr;
+	int flag;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	struct hfsc_if *hif;
+	struct hfsc_interface *ifacep;
+	int	error = 0;
+
+	/* check super-user privilege */
+	switch (cmd) {
+	case HFSC_GETSTATS:
+		break;
+	default:
+#if (__FreeBSD_version > 700000)
+		if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
+			return (error);
+#elsif (__FreeBSD_version > 400000)
+		if ((error = suser(p)) != 0)
+			return (error);
+#else
+		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+			return (error);
+#endif
+		break;
+	}
+
+	switch (cmd) {
+
+	case HFSC_IF_ATTACH:
+		error = hfsccmd_if_attach((struct hfsc_attach *)addr);
+		break;
+
+	case HFSC_IF_DETACH:
+		error = hfsccmd_if_detach((struct hfsc_interface *)addr);
+		break;
+
+	case HFSC_ENABLE:
+	case HFSC_DISABLE:
+	case HFSC_CLEAR_HIERARCHY:
+		ifacep = (struct hfsc_interface *)addr;
+		if ((hif = altq_lookup(ifacep->hfsc_ifname,
+				       ALTQT_HFSC)) == NULL) {
+			error = EBADF;
+			break;
+		}
+
+		switch (cmd) {
+
+		case HFSC_ENABLE:
+			if (hif->hif_defaultclass == NULL) {
+#ifdef ALTQ_DEBUG
+				printf("hfsc: no default class\n");
+#endif
+				error = EINVAL;
+				break;
+			}
+			error = altq_enable(hif->hif_ifq);
+			break;
+
+		case HFSC_DISABLE:
+			error = altq_disable(hif->hif_ifq);
+			break;
+
+		case HFSC_CLEAR_HIERARCHY:
+			hfsc_clear_interface(hif);
+			break;
+		}
+		break;
+
+	case HFSC_ADD_CLASS:
+		error = hfsccmd_add_class((struct hfsc_add_class *)addr);
+		break;
+
+	case HFSC_DEL_CLASS:
+		error = hfsccmd_delete_class((struct hfsc_delete_class *)addr);
+		break;
+
+	case HFSC_MOD_CLASS:
+		error = hfsccmd_modify_class((struct hfsc_modify_class *)addr);
+		break;
+
+	case HFSC_ADD_FILTER:
+		error = hfsccmd_add_filter((struct hfsc_add_filter *)addr);
+		break;
+
+	case HFSC_DEL_FILTER:
+		error = hfsccmd_delete_filter((struct hfsc_delete_filter *)addr);
+		break;
+
+	case HFSC_GETSTATS:
+		error = hfsccmd_class_stats((struct hfsc_class_stats *)addr);
+		break;
+
+	default:
+		error = EINVAL;
+		break;
+	}
+	return error;
+}
+
+static int
+hfsccmd_if_attach(ap)
+	struct hfsc_attach *ap;
+{
+	struct hfsc_if *hif;
+	struct ifnet *ifp;
+	int error;
+
+	if ((ifp = ifunit(ap->iface.hfsc_ifname)) == NULL)
+		return (ENXIO);
+
+	if ((hif = hfsc_attach(&ifp->if_snd, ap->bandwidth)) == NULL)
+		return (ENOMEM);
+
+	/*
+	 * set HFSC to this ifnet structure.
+	 */
+	if ((error = altq_attach(&ifp->if_snd, ALTQT_HFSC, hif,
+				 hfsc_enqueue, hfsc_dequeue, hfsc_request,
+				 &hif->hif_classifier, acc_classify)) != 0)
+		(void)hfsc_detach(hif);
+
+	return (error);
+}
+
+static int
+hfsccmd_if_detach(ap)
+	struct hfsc_interface *ap;
+{
+	struct hfsc_if *hif;
+	int error;
+
+	if ((hif = altq_lookup(ap->hfsc_ifname, ALTQT_HFSC)) == NULL)
+		return (EBADF);
+
+	if (ALTQ_IS_ENABLED(hif->hif_ifq))
+		altq_disable(hif->hif_ifq);
+
+	if ((error = altq_detach(hif->hif_ifq)))
+		return (error);
+
+	return hfsc_detach(hif);
+}
+
+static int
+hfsccmd_add_class(ap)
+	struct hfsc_add_class *ap;
+{
+	struct hfsc_if *hif;
+	struct hfsc_class *cl, *parent;
+	int	i;
+
+	if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+		return (EBADF);
+
+	if (ap->parent_handle == HFSC_NULLCLASS_HANDLE &&
+	    hif->hif_rootclass == NULL)
+		parent = NULL;
+	else if ((parent = clh_to_clp(hif, ap->parent_handle)) == NULL)
+		return (EINVAL);
+
+	/* assign a class handle (use a free slot number for now) */
+	for (i = 1; i < HFSC_MAX_CLASSES; i++)
+		if (hif->hif_class_tbl[i] == NULL)
+			break;
+	if (i == HFSC_MAX_CLASSES)
+		return (EBUSY);
+
+	if ((cl = hfsc_class_create(hif, &ap->service_curve, NULL, NULL,
+	    parent, ap->qlimit, ap->flags, i)) == NULL)
+		return (ENOMEM);
+
+	/* return a class handle to the user */
+	ap->class_handle = i;
+
+	return (0);
+}
+
+static int
+hfsccmd_delete_class(ap)
+	struct hfsc_delete_class *ap;
+{
+	struct hfsc_if *hif;
+	struct hfsc_class *cl;
+
+	if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+		return (EBADF);
+
+	if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL)
+		return (EINVAL);
+
+	return hfsc_class_destroy(cl);
+}
+
+static int
+hfsccmd_modify_class(ap)
+	struct hfsc_modify_class *ap;
+{
+	struct hfsc_if *hif;
+	struct hfsc_class *cl;
+	struct service_curve *rsc = NULL;
+	struct service_curve *fsc = NULL;
+	struct service_curve *usc = NULL;
+
+	if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+		return (EBADF);
+
+	if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL)
+		return (EINVAL);
+
+	if (ap->sctype & HFSC_REALTIMESC)
+		rsc = &ap->service_curve;
+	if (ap->sctype & HFSC_LINKSHARINGSC)
+		fsc = &ap->service_curve;
+	if (ap->sctype & HFSC_UPPERLIMITSC)
+		usc = &ap->service_curve;
+
+	return hfsc_class_modify(cl, rsc, fsc, usc);
+}
+
+static int
+hfsccmd_add_filter(ap)
+	struct hfsc_add_filter *ap;
+{
+	struct hfsc_if *hif;
+	struct hfsc_class *cl;
+
+	if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+		return (EBADF);
+
+	if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL)
+		return (EINVAL);
+
+	if (is_a_parent_class(cl)) {
+#ifdef ALTQ_DEBUG
+		printf("hfsccmd_add_filter: not a leaf class!\n");
+#endif
+		return (EINVAL);
+	}
+
+	return acc_add_filter(&hif->hif_classifier, &ap->filter,
+			      cl, &ap->filter_handle);
+}
+
+static int
+hfsccmd_delete_filter(ap)
+	struct hfsc_delete_filter *ap;
+{
+	struct hfsc_if *hif;
+
+	if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+		return (EBADF);
+
+	return acc_delete_filter(&hif->hif_classifier,
+				 ap->filter_handle);
+}
+
+static int
+hfsccmd_class_stats(ap)
+	struct hfsc_class_stats *ap;
+{
+	struct hfsc_if *hif;
+	struct hfsc_class *cl;
+	struct hfsc_classstats stats, *usp;
+	int	n, nclasses, error;
+
+	if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+		return (EBADF);
+
+	ap->cur_time = read_machclk();
+	ap->machclk_freq = machclk_freq;
+	ap->hif_classes = hif->hif_classes;
+	ap->hif_packets = hif->hif_packets;
+
+	/* skip the first N classes in the tree */
+	nclasses = ap->nskip;
+	for (cl = hif->hif_rootclass, n = 0; cl != NULL && n < nclasses;
+	     cl = hfsc_nextclass(cl), n++)
+		;
+	if (n != nclasses)
+		return (EINVAL);
+
+	/* then, read the next N classes in the tree */
+	nclasses = ap->nclasses;
+	usp = ap->stats;
+	for (n = 0; cl != NULL && n < nclasses; cl = hfsc_nextclass(cl), n++) {
+
+		get_class_stats(&stats, cl);
+
+		if ((error = copyout((caddr_t)&stats, (caddr_t)usp++,
+				     sizeof(stats))) != 0)
+			return (error);
+	}
+
+	ap->nclasses = n;
+
+	return (0);
+}
+
+#ifdef KLD_MODULE
+
+static struct altqsw hfsc_sw =
+	{"hfsc", hfscopen, hfscclose, hfscioctl};
+
+ALTQ_MODULE(altq_hfsc, ALTQT_HFSC, &hfsc_sw);
+MODULE_DEPEND(altq_hfsc, altq_red, 1, 1, 1);
+MODULE_DEPEND(altq_hfsc, altq_rio, 1, 1, 1);
+
+#endif /* KLD_MODULE */
+#endif /* ALTQ3_COMPAT */
+
+#endif /* ALTQ_HFSC */
diff --git a/freebsd/sys/contrib/altq/altq/altq_hfsc.h b/freebsd/sys/contrib/altq/altq/altq_hfsc.h
new file mode 100644
index 00000000..fc983586
--- /dev/null
+++ b/freebsd/sys/contrib/altq/altq/altq_hfsc.h
@@ -0,0 +1,320 @@
+/*	$KAME: altq_hfsc.h,v 1.12 2003/12/05 05:40:46 kjc Exp $	*/
+
+/*
+ * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation is hereby granted (including for commercial or
+ * for-profit use), provided that both the copyright notice and this
+ * permission notice appear in all copies of the software, derivative
+ * works, or modified versions, and any portions thereof.
+ *
+ * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
+ * WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON PROVIDES THIS
+ * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * Carnegie Mellon encourages (but does not require) users of this
+ * software to return any improvements or extensions that they make,
+ * and to grant Carnegie Mellon the rights to redistribute these
+ * changes without encumbrance.
+ */
+#ifndef _ALTQ_ALTQ_HFSC_HH_
+#define	_ALTQ_ALTQ_HFSC_HH_
+
+#include <freebsd/altq/altq.h>
+#include <freebsd/altq/altq_classq.h>
+#include <freebsd/altq/altq_red.h>
+#include <freebsd/altq/altq_rio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct service_curve {
+	u_int	m1;	/* slope of the first segment in bits/sec */
+	u_int	d;	/* the x-projection of the first segment in msec */
+	u_int	m2;	/* slope of the second segment in bits/sec */
+};
+
+/* special class handles */
+#define	HFSC_NULLCLASS_HANDLE	0
+#define	HFSC_MAX_CLASSES	64
+
+/* hfsc class flags */
+#define	HFCF_RED		0x0001	/* use RED */
+#define	HFCF_ECN		0x0002  /* use RED/ECN */
+#define	HFCF_RIO		0x0004  /* use RIO */
+#define	HFCF_CLEARDSCP		0x0010  /* clear diffserv codepoint */
+#define	HFCF_DEFAULTCLASS	0x1000	/* default class */
+
+/* service curve types */
+#define	HFSC_REALTIMESC		1
+#define	HFSC_LINKSHARINGSC	2
+#define	HFSC_UPPERLIMITSC	4
+#define	HFSC_DEFAULTSC		(HFSC_REALTIMESC|HFSC_LINKSHARINGSC)
+
+struct hfsc_classstats {
+	u_int			class_id;
+	u_int32_t		class_handle;
+	struct service_curve	rsc;
+	struct service_curve	fsc;
+	struct service_curve	usc;	/* upper limit service curve */
+
+	u_int64_t		total;	/* total work in bytes */
+	u_int64_t		cumul;	/* cumulative work in bytes
+					   done by real-time criteria */
+	u_int64_t		d;		/* deadline */
+	u_int64_t		e;		/* eligible time */
+	u_int64_t		vt;		/* virtual time */
+	u_int64_t		f;		/* fit time for upper-limit */
+
+	/* info helpful for debugging */
+	u_int64_t		initvt;		/* init virtual time */
+	u_int64_t		vtoff;		/* cl_vt_ipoff */
+	u_int64_t		cvtmax;		/* cl_maxvt */
+	u_int64_t		myf;		/* cl_myf */
+	u_int64_t		cfmin;		/* cl_mincf */
+	u_int64_t		cvtmin;		/* cl_mincvt */
+	u_int64_t		myfadj;		/* cl_myfadj */
+	u_int64_t		vtadj;		/* cl_vtadj */
+	u_int64_t		cur_time;
+	u_int32_t		machclk_freq;
+
+	u_int			qlength;
+	u_int			qlimit;
+	struct pktcntr		xmit_cnt;
+	struct pktcntr		drop_cnt;
+	u_int			period;
+
+	u_int			vtperiod;	/* vt period sequence no */
+	u_int			parentperiod;	/* parent's vt period seqno */
+	int			nactive;	/* number of active children */
+
+	/* red and rio related info */
+	int			qtype;
+	struct redstats		red[3];
+};
+
+#ifdef ALTQ3_COMPAT
+struct hfsc_interface {
+	char	hfsc_ifname[IFNAMSIZ];  /* interface name (e.g., fxp0) */
+};
+
+struct hfsc_attach {
+	struct hfsc_interface	iface;
+	u_int			bandwidth;  /* link bandwidth in bits/sec */
+};
+
+struct hfsc_add_class {
+	struct hfsc_interface	iface;
+	u_int32_t		parent_handle;
+	struct service_curve	service_curve;
+	int			qlimit;
+	int			flags;
+
+	u_int32_t		class_handle;  /* return value */
+};
+
+struct hfsc_delete_class {
+	struct hfsc_interface	iface;
+	u_int32_t		class_handle;
+};
+
+struct hfsc_modify_class {
+	struct hfsc_interface	iface;
+	u_int32_t		class_handle;
+	struct service_curve	service_curve;
+	int			sctype;
+};
+
+struct hfsc_add_filter {
+	struct hfsc_interface	iface;
+	u_int32_t		class_handle;
+	struct flow_filter	filter;
+
+	u_long			filter_handle;  /* return value */
+};
+
+struct hfsc_delete_filter {
+	struct hfsc_interface	iface;
+	u_long			filter_handle;
+};
+
+struct hfsc_class_stats {
+	struct hfsc_interface	iface;
+	int			nskip;		/* skip # of classes */
+	int			nclasses;	/* # of class stats (WR) */
+	u_int64_t		cur_time;	/* current time */
+	u_int32_t		machclk_freq;	/* machine clock frequency */
+	u_int			hif_classes;	/* # of classes in the tree */
+	u_int			hif_packets;	/* # of packets in the tree */
+	struct hfsc_classstats	*stats;		/* pointer to stats array */
+};
+
+#define	HFSC_IF_ATTACH		_IOW('Q', 1, struct hfsc_attach)
+#define	HFSC_IF_DETACH		_IOW('Q', 2, struct hfsc_interface)
+#define	HFSC_ENABLE		_IOW('Q', 3, struct hfsc_interface)
+#define	HFSC_DISABLE		_IOW('Q', 4, struct hfsc_interface)
+#define	HFSC_CLEAR_HIERARCHY	_IOW('Q', 5, struct hfsc_interface)
+#define	HFSC_ADD_CLASS		_IOWR('Q', 7, struct hfsc_add_class)
+#define	HFSC_DEL_CLASS		_IOW('Q', 8, struct hfsc_delete_class)
+#define	HFSC_MOD_CLASS		_IOW('Q', 9, struct hfsc_modify_class)
+#define	HFSC_ADD_FILTER		_IOWR('Q', 10, struct hfsc_add_filter)
+#define	HFSC_DEL_FILTER		_IOW('Q', 11, struct hfsc_delete_filter)
+#define	HFSC_GETSTATS		_IOWR('Q', 12, struct hfsc_class_stats)
+#endif /* ALTQ3_COMPAT */
+
+#ifdef _KERNEL
+/*
+ * kernel internal service curve representation
+ *	coordinates are given by 64 bit unsigned integers.
+ *	x-axis: unit is clock count.  for the intel x86 architecture,
+ *		the raw Pentium TSC (Timestamp Counter) value is used.
+ *		virtual time is also calculated in this time scale.
+ *	y-axis: unit is byte.
+ *
+ *	the service curve parameters are converted to the internal
+ *	representation.
+ *	the slope values are scaled to avoid overflow.
+ *	the inverse slope values as well as the y-projection of the 1st
+ *	segment are kept in order to to avoid 64-bit divide operations
+ *	that are expensive on 32-bit architectures.
+ *
+ *  note: Intel Pentium TSC never wraps around in several thousands of years.
+ *	x-axis doesn't wrap around for 1089 years with 1GHz clock.
+ *      y-axis doesn't wrap around for 4358 years with 1Gbps bandwidth.
+ */
+
+/* kernel internal representation of a service curve */
+struct internal_sc {
+	u_int64_t	sm1;	/* scaled slope of the 1st segment */
+	u_int64_t	ism1;	/* scaled inverse-slope of the 1st segment */
+	u_int64_t	dx;	/* the x-projection of the 1st segment */
+	u_int64_t	dy;	/* the y-projection of the 1st segment */
+	u_int64_t	sm2;	/* scaled slope of the 2nd segment */
+	u_int64_t	ism2;	/* scaled inverse-slope of the 2nd segment */
+};
+
+/* runtime service curve */
+struct runtime_sc {
+	u_int64_t	x;	/* current starting position on x-axis */
+	u_int64_t	y;	/* current starting position on x-axis */
+	u_int64_t	sm1;	/* scaled slope of the 1st segment */
+	u_int64_t	ism1;	/* scaled inverse-slope of the 1st segment */
+	u_int64_t	dx;	/* the x-projection of the 1st segment */
+	u_int64_t	dy;	/* the y-projection of the 1st segment */
+	u_int64_t	sm2;	/* scaled slope of the 2nd segment */
+	u_int64_t	ism2;	/* scaled inverse-slope of the 2nd segment */
+};
+
+/* for TAILQ based ellist and actlist implementation */
+struct hfsc_class;
+typedef TAILQ_HEAD(_eligible, hfsc_class) ellist_t;
+typedef TAILQ_ENTRY(hfsc_class) elentry_t;
+typedef TAILQ_HEAD(_active, hfsc_class) actlist_t;
+typedef TAILQ_ENTRY(hfsc_class) actentry_t;
+#define	ellist_first(s)		TAILQ_FIRST(s)
+#define	actlist_first(s)	TAILQ_FIRST(s)
+#define	actlist_last(s)		TAILQ_LAST(s, _active)
+
+struct hfsc_class {
+	u_int		cl_id;		/* class id (just for debug) */
+	u_int32_t	cl_handle;	/* class handle */
+	struct hfsc_if	*cl_hif;	/* back pointer to struct hfsc_if */
+	int		cl_flags;	/* misc flags */
+
+	struct hfsc_class *cl_parent;	/* parent class */
+	struct hfsc_class *cl_siblings;	/* sibling classes */
+	struct hfsc_class *cl_children;	/* child classes */
+
+	class_queue_t	*cl_q;		/* class queue structure */
+	struct red	*cl_red;	/* RED state */
+	struct altq_pktattr *cl_pktattr; /* saved header used by ECN */
+
+	u_int64_t	cl_total;	/* total work in bytes */
+	u_int64_t	cl_cumul;	/* cumulative work in bytes
+					   done by real-time criteria */
+	u_int64_t	cl_d;		/* deadline */
+	u_int64_t	cl_e;		/* eligible time */
+	u_int64_t	cl_vt;		/* virtual time */
+	u_int64_t	cl_f;		/* time when this class will fit for
+					   link-sharing, max(myf, cfmin) */
+	u_int64_t	cl_myf;		/* my fit-time (as calculated from this
+					   class's own upperlimit curve) */
+	u_int64_t	cl_myfadj;	/* my fit-time adjustment
+					   (to cancel history dependence) */
+	u_int64_t	cl_cfmin;	/* earliest children's fit-time (used
+					   with cl_myf to obtain cl_f) */
+	u_int64_t	cl_cvtmin;	/* minimal virtual time among the
+					   children fit for link-sharing
+					   (monotonic within a period) */
+	u_int64_t	cl_vtadj;	/* intra-period cumulative vt
+					   adjustment */
+	u_int64_t	cl_vtoff;	/* inter-period cumulative vt offset */
+	u_int64_t	cl_cvtmax;	/* max child's vt in the last period */
+
+	u_int64_t	cl_initvt;	/* init virtual time (for debugging) */
+
+	struct internal_sc *cl_rsc;	/* internal real-time service curve */
+	struct internal_sc *cl_fsc;	/* internal fair service curve */
+	struct internal_sc *cl_usc;	/* internal upperlimit service curve */
+	struct runtime_sc  cl_deadline;	/* deadline curve */
+	struct runtime_sc  cl_eligible;	/* eligible curve */
+	struct runtime_sc  cl_virtual;	/* virtual curve */
+	struct runtime_sc  cl_ulimit;	/* upperlimit curve */
+
+	u_int		cl_vtperiod;	/* vt period sequence no */
+	u_int		cl_parentperiod;  /* parent's vt period seqno */
+	int		cl_nactive;	/* number of active children */
+	actlist_t	*cl_actc;	/* active children list */
+
+	actentry_t	cl_actlist;	/* active children list entry */
+	elentry_t	cl_ellist;	/* eligible list entry */
+
+	struct {
+		struct pktcntr	xmit_cnt;
+		struct pktcntr	drop_cnt;
+		u_int period;
+	} cl_stats;
+};
+
+/*
+ * hfsc interface state
+ */
+struct hfsc_if {
+	struct hfsc_if		*hif_next;	/* interface state list */
+	struct ifaltq		*hif_ifq;	/* backpointer to ifaltq */
+	struct hfsc_class	*hif_rootclass;		/* root class */
+	struct hfsc_class	*hif_defaultclass;	/* default class */
+	struct hfsc_class	*hif_class_tbl[HFSC_MAX_CLASSES];
+	struct hfsc_class	*hif_pollcache;	/* cache for poll operation */
+
+	u_int	hif_classes;			/* # of classes in the tree */
+	u_int	hif_packets;			/* # of packets in the tree */
+	u_int	hif_classid;			/* class id sequence number */
+
+	ellist_t *hif_eligible;			/* eligible list */
+
+#ifdef ALTQ3_CLFIER_COMPAT
+	struct acc_classifier	hif_classifier;
+#endif
+};
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ALTQ_ALTQ_HFSC_HH_ */
diff --git a/freebsd/sys/contrib/altq/altq/altq_priq.c b/freebsd/sys/contrib/altq/altq/altq_priq.c
new file mode 100644
index 00000000..d395a833
--- /dev/null
+++ b/freebsd/sys/contrib/altq/altq/altq_priq.c
@@ -0,0 +1,1055 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*	$FreeBSD$	*/
+/*	$KAME: altq_priq.c,v 1.11 2003/09/17 14:23:25 kjc Exp $	*/
+/*
+ * Copyright (C) 2000-2003
+ *	Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * priority queue
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include <freebsd/local/opt_altq.h>
+#if (__FreeBSD__ != 2)
+#include <freebsd/local/opt_inet.h>
+#ifdef __FreeBSD__
+#include <freebsd/local/opt_inet6.h>
+#endif
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+
+#ifdef ALTQ_PRIQ  /* priq is enabled by ALTQ_PRIQ option in opt_altq.h */
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/errno.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/queue.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/netinet/in.h>
+
+#include <freebsd/net/pfvar.h>
+#include <freebsd/altq/altq.h>
+#ifdef ALTQ3_COMPAT
+#include <freebsd/altq/altq_conf.h>
+#endif
+#include <freebsd/altq/altq_priq.h>
+
+/*
+ * function prototypes
+ */
+#ifdef ALTQ3_COMPAT
+static struct priq_if *priq_attach(struct ifaltq *, u_int);
+static int priq_detach(struct priq_if *);
+#endif
+static int priq_clear_interface(struct priq_if *);
+static int priq_request(struct ifaltq *, int, void *);
+static void priq_purge(struct priq_if *);
+static struct priq_class *priq_class_create(struct priq_if *, int, int, int,
+    int);
+static int priq_class_destroy(struct priq_class *);
+static int priq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+static struct mbuf *priq_dequeue(struct ifaltq *, int);
+
+static int priq_addq(struct priq_class *, struct mbuf *);
+static struct mbuf *priq_getq(struct priq_class *);
+static struct mbuf *priq_pollq(struct priq_class *);
+static void priq_purgeq(struct priq_class *);
+
+#ifdef ALTQ3_COMPAT
+static int priqcmd_if_attach(struct priq_interface *);
+static int priqcmd_if_detach(struct priq_interface *);
+static int priqcmd_add_class(struct priq_add_class *);
+static int priqcmd_delete_class(struct priq_delete_class *);
+static int priqcmd_modify_class(struct priq_modify_class *);
+static int priqcmd_add_filter(struct priq_add_filter *);
+static int priqcmd_delete_filter(struct priq_delete_filter *);
+static int priqcmd_class_stats(struct priq_class_stats *);
+#endif /* ALTQ3_COMPAT */
+
+static void get_class_stats(struct priq_classstats *, struct priq_class *);
+static struct priq_class *clh_to_clp(struct priq_if *, u_int32_t);
+
+#ifdef ALTQ3_COMPAT
+altqdev_decl(priq);
+
+/* pif_list keeps all priq_if's allocated. */
+static struct priq_if *pif_list = NULL;
+#endif /* ALTQ3_COMPAT */
+
+int
+priq_pfattach(struct pf_altq *a)
+{
+	struct ifnet *ifp;
+	int s, error;
+
+	if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
+		return (EINVAL);
+#ifdef __NetBSD__
+	s = splnet();
+#else
+	s = splimp();
+#endif
+	error = altq_attach(&ifp->if_snd, ALTQT_PRIQ, a->altq_disc,
+	    priq_enqueue, priq_dequeue, priq_request, NULL, NULL);
+	splx(s);
+	return (error);
+}
+
+int
+priq_add_altq(struct pf_altq *a)
+{
+	struct priq_if	*pif;
+	struct ifnet	*ifp;
+
+	if ((ifp = ifunit(a->ifname)) == NULL)
+		return (EINVAL);
+	if (!ALTQ_IS_READY(&ifp->if_snd))
+		return (ENODEV);
+
+	pif = malloc(sizeof(struct priq_if),
+	    M_DEVBUF, M_WAITOK);
+	if (pif == NULL)
+		return (ENOMEM);
+	bzero(pif, sizeof(struct priq_if));
+	pif->pif_bandwidth = a->ifbandwidth;
+	pif->pif_maxpri = -1;
+	pif->pif_ifq = &ifp->if_snd;
+
+	/* keep the state in pf_altq */
+	a->altq_disc = pif;
+
+	return (0);
+}
+
+int
+priq_remove_altq(struct pf_altq *a)
+{
+	struct priq_if *pif;
+
+	if ((pif = a->altq_disc) == NULL)
+		return (EINVAL);
+	a->altq_disc = NULL;
+
+	(void)priq_clear_interface(pif);
+
+	free(pif, M_DEVBUF);
+	return (0);
+}
+
+int
+priq_add_queue(struct pf_altq *a)
+{
+	struct priq_if *pif;
+	struct priq_class *cl;
+
+	if ((pif = a->altq_disc) == NULL)
+		return (EINVAL);
+
+	/* check parameters */
+	if (a->priority >= PRIQ_MAXPRI)
+		return (EINVAL);
+	if (a->qid == 0)
+		return (EINVAL);
+	if (pif->pif_classes[a->priority] != NULL)
+		return (EBUSY);
+	if (clh_to_clp(pif, a->qid) != NULL)
+		return (EBUSY);
+
+	cl = priq_class_create(pif, a->priority, a->qlimit,
+	    a->pq_u.priq_opts.flags, a->qid);
+	if (cl == NULL)
+		return (ENOMEM);
+
+	return (0);
+}
+
+int
+priq_remove_queue(struct pf_altq *a)
+{
+	struct priq_if *pif;
+	struct priq_class *cl;
+
+	if ((pif = a->altq_disc) == NULL)
+		return (EINVAL);
+
+	if ((cl = clh_to_clp(pif, a->qid)) == NULL)
+		return (EINVAL);
+
+	return (priq_class_destroy(cl));
+}
+
+int
+priq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+	struct priq_if *pif;
+	struct priq_class *cl;
+	struct priq_classstats stats;
+	int error = 0;
+
+	if ((pif = altq_lookup(a->ifname, ALTQT_PRIQ)) == NULL)
+		return (EBADF);
+
+	if ((cl = clh_to_clp(pif, a->qid)) == NULL)
+		return (EINVAL);
+
+	if (*nbytes < sizeof(stats))
+		return (EINVAL);
+
+	get_class_stats(&stats, cl);
+
+	if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
+		return (error);
+	*nbytes = sizeof(stats);
+	return (0);
+}
+
+/*
+ * bring the interface back to the initial state by discarding
+ * all the filters and classes.
+ */
+static int
+priq_clear_interface(struct priq_if *pif)
+{
+	struct priq_class	*cl;
+	int pri;
+
+#ifdef ALTQ3_CLFIER_COMPAT
+	/* free the filters for this interface */
+	acc_discard_filters(&pif->pif_classifier, NULL, 1);
+#endif
+
+	/* clear out the classes */
+	for (pri = 0; pri <= pif->pif_maxpri; pri++)
+		if ((cl = pif->pif_classes[pri]) != NULL)
+			priq_class_destroy(cl);
+
+	return (0);
+}
+
+static int
+priq_request(struct ifaltq *ifq, int req, void *arg)
+{
+	struct priq_if	*pif = (struct priq_if *)ifq->altq_disc;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	switch (req) {
+	case ALTRQ_PURGE:
+		priq_purge(pif);
+		break;
+	}
+	return (0);
+}
+
+/* discard all the queued packets on the interface */
+static void
+priq_purge(struct priq_if *pif)
+{
+	struct priq_class *cl;
+	int pri;
+
+	for (pri = 0; pri <= pif->pif_maxpri; pri++) {
+		if ((cl = pif->pif_classes[pri]) != NULL && !qempty(cl->cl_q))
+			priq_purgeq(cl);
+	}
+	if (ALTQ_IS_ENABLED(pif->pif_ifq))
+		pif->pif_ifq->ifq_len = 0;
+}
+
+static struct priq_class *
+priq_class_create(struct priq_if *pif, int pri, int qlimit, int flags, int qid)
+{
+	struct priq_class *cl;
+	int s;
+
+#ifndef ALTQ_RED
+	if (flags & PRCF_RED) {
+#ifdef ALTQ_DEBUG
+		printf("priq_class_create: RED not configured for PRIQ!\n");
+#endif
+		return (NULL);
+	}
+#endif
+
+	if ((cl = pif->pif_classes[pri]) != NULL) {
+		/* modify the class instead of creating a new one */
+#ifdef __NetBSD__
+		s = splnet();
+#else
+		s = splimp();
+#endif
+		IFQ_LOCK(cl->cl_pif->pif_ifq);
+		if (!qempty(cl->cl_q))
+			priq_purgeq(cl);
+		IFQ_UNLOCK(cl->cl_pif->pif_ifq);
+		splx(s);
+#ifdef ALTQ_RIO
+		if (q_is_rio(cl->cl_q))
+			rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+		if (q_is_red(cl->cl_q))
+			red_destroy(cl->cl_red);
+#endif
+	} else {
+		cl = malloc(sizeof(struct priq_class),
+		       M_DEVBUF, M_WAITOK);
+		if (cl == NULL)
+			return (NULL);
+		bzero(cl, sizeof(struct priq_class));
+
+		cl->cl_q = malloc(sizeof(class_queue_t),
+		       M_DEVBUF, M_WAITOK);
+		if (cl->cl_q == NULL)
+			goto err_ret;
+		bzero(cl->cl_q, sizeof(class_queue_t));
+	}
+
+	pif->pif_classes[pri] = cl;
+	if (flags & PRCF_DEFAULTCLASS)
+		pif->pif_default = cl;
+	if (qlimit == 0)
+		qlimit = 50;  /* use default */
+	qlimit(cl->cl_q) = qlimit;
+	qtype(cl->cl_q) = Q_DROPTAIL;
+	qlen(cl->cl_q) = 0;
+	cl->cl_flags = flags;
+	cl->cl_pri = pri;
+	if (pri > pif->pif_maxpri)
+		pif->pif_maxpri = pri;
+	cl->cl_pif = pif;
+	cl->cl_handle = qid;
+
+#ifdef ALTQ_RED
+	if (flags & (PRCF_RED|PRCF_RIO)) {
+		int red_flags, red_pkttime;
+
+		red_flags = 0;
+		if (flags & PRCF_ECN)
+			red_flags |= REDF_ECN;
+#ifdef ALTQ_RIO
+		if (flags & PRCF_CLEARDSCP)
+			red_flags |= RIOF_CLEARDSCP;
+#endif
+		if (pif->pif_bandwidth < 8)
+			red_pkttime = 1000 * 1000 * 1000; /* 1 sec */
+		else
+			red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu
+			  * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8);
+#ifdef ALTQ_RIO
+		if (flags & PRCF_RIO) {
+			cl->cl_red = (red_t *)rio_alloc(0, NULL,
+						red_flags, red_pkttime);
+			if (cl->cl_red != NULL)
+				qtype(cl->cl_q) = Q_RIO;
+		} else
+#endif
+		if (flags & PRCF_RED) {
+			cl->cl_red = red_alloc(0, 0,
+			    qlimit(cl->cl_q) * 10/100,
+			    qlimit(cl->cl_q) * 30/100,
+			    red_flags, red_pkttime);
+			if (cl->cl_red != NULL)
+				qtype(cl->cl_q) = Q_RED;
+		}
+	}
+#endif /* ALTQ_RED */
+
+	return (cl);
+
+ err_ret:
+	if (cl->cl_red != NULL) {
+#ifdef ALTQ_RIO
+		if (q_is_rio(cl->cl_q))
+			rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+		if (q_is_red(cl->cl_q))
+			red_destroy(cl->cl_red);
+#endif
+	}
+	if (cl->cl_q != NULL)
+		free(cl->cl_q, M_DEVBUF);
+	free(cl, M_DEVBUF);
+	return (NULL);
+}
+
+static int
+priq_class_destroy(struct priq_class *cl)
+{
+	struct priq_if *pif;
+	int s, pri;
+
+#ifdef __NetBSD__
+	s = splnet();
+#else
+	s = splimp();
+#endif
+	IFQ_LOCK(cl->cl_pif->pif_ifq);
+
+#ifdef ALTQ3_CLFIER_COMPAT
+	/* delete filters referencing to this class */
+	acc_discard_filters(&cl->cl_pif->pif_classifier, cl, 0);
+#endif
+
+	if (!qempty(cl->cl_q))
+		priq_purgeq(cl);
+
+	pif = cl->cl_pif;
+	pif->pif_classes[cl->cl_pri] = NULL;
+	if (pif->pif_maxpri == cl->cl_pri) {
+		for (pri = cl->cl_pri; pri >= 0; pri--)
+			if (pif->pif_classes[pri] != NULL) {
+				pif->pif_maxpri = pri;
+				break;
+			}
+		if (pri < 0)
+			pif->pif_maxpri = -1;
+	}
+	IFQ_UNLOCK(cl->cl_pif->pif_ifq);
+	splx(s);
+
+	if (cl->cl_red != NULL) {
+#ifdef ALTQ_RIO
+		if (q_is_rio(cl->cl_q))
+			rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+		if (q_is_red(cl->cl_q))
+			red_destroy(cl->cl_red);
+#endif
+	}
+	free(cl->cl_q, M_DEVBUF);
+	free(cl, M_DEVBUF);
+	return (0);
+}
+
+/*
+ * priq_enqueue is an enqueue function to be registered to
+ * (*altq_enqueue) in struct ifaltq.
+ */
+static int
+priq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
+{
+	struct priq_if	*pif = (struct priq_if *)ifq->altq_disc;
+	struct priq_class *cl;
+	struct pf_mtag *t;
+	int len;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	/* grab class set by classifier */
+	if ((m->m_flags & M_PKTHDR) == 0) {
+		/* should not happen */
+#if defined(__NetBSD__) || defined(__OpenBSD__)\
+    || (defined(__FreeBSD__) && __FreeBSD_version >= 501113)
+		printf("altq: packet for %s does not have pkthdr\n",
+		    ifq->altq_ifp->if_xname);
+#else
+		printf("altq: packet for %s%d does not have pkthdr\n",
+		    ifq->altq_ifp->if_name, ifq->altq_ifp->if_unit);
+#endif
+		m_freem(m);
+		return (ENOBUFS);
+	}
+	cl = NULL;
+	if ((t = pf_find_mtag(m)) != NULL)
+		cl = clh_to_clp(pif, t->qid);
+#ifdef ALTQ3_COMPAT
+	else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL)
+		cl = pktattr->pattr_class;
+#endif
+	if (cl == NULL) {
+		cl = pif->pif_default;
+		if (cl == NULL) {
+			m_freem(m);
+			return (ENOBUFS);
+		}
+	}
+#ifdef ALTQ3_COMPAT
+	if (pktattr != NULL)
+		cl->cl_pktattr = pktattr;  /* save proto hdr used by ECN */
+	else
+#endif
+		cl->cl_pktattr = NULL;
+	len = m_pktlen(m);
+	if (priq_addq(cl, m) != 0) {
+		/* drop occurred.  mbuf was freed in priq_addq. */
+		PKTCNTR_ADD(&cl->cl_dropcnt, len);
+		return (ENOBUFS);
+	}
+	IFQ_INC_LEN(ifq);
+
+	/* successfully queued. */
+	return (0);
+}
+
+/*
+ * priq_dequeue is a dequeue function to be registered to
+ * (*altq_dequeue) in struct ifaltq.
+ *
+ * note: ALTDQ_POLL returns the next packet without removing the packet
+ *	from the queue.  ALTDQ_REMOVE is a normal dequeue operation.
+ *	ALTDQ_REMOVE must return the same packet if called immediately
+ *	after ALTDQ_POLL.
+ */
+static struct mbuf *
+priq_dequeue(struct ifaltq *ifq, int op)
+{
+	struct priq_if	*pif = (struct priq_if *)ifq->altq_disc;
+	struct priq_class *cl;
+	struct mbuf *m;
+	int pri;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	if (IFQ_IS_EMPTY(ifq))
+		/* no packet in the queue */
+		return (NULL);
+
+	for (pri = pif->pif_maxpri;  pri >= 0; pri--) {
+		if ((cl = pif->pif_classes[pri]) != NULL &&
+		    !qempty(cl->cl_q)) {
+			if (op == ALTDQ_POLL)
+				return (priq_pollq(cl));
+
+			m = priq_getq(cl);
+			if (m != NULL) {
+				IFQ_DEC_LEN(ifq);
+				if (qempty(cl->cl_q))
+					cl->cl_period++;
+				PKTCNTR_ADD(&cl->cl_xmitcnt, m_pktlen(m));
+			}
+			return (m);
+		}
+	}
+	return (NULL);
+}
+
+static int
+priq_addq(struct priq_class *cl, struct mbuf *m)
+{
+
+#ifdef ALTQ_RIO
+	if (q_is_rio(cl->cl_q))
+		return rio_addq((rio_t *)cl->cl_red, cl->cl_q, m,
+				cl->cl_pktattr);
+#endif
+#ifdef ALTQ_RED
+	if (q_is_red(cl->cl_q))
+		return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr);
+#endif
+	if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) {
+		m_freem(m);
+		return (-1);
+	}
+
+	if (cl->cl_flags & PRCF_CLEARDSCP)
+		write_dsfield(m, cl->cl_pktattr, 0);
+
+	_addq(cl->cl_q, m);
+
+	return (0);
+}
+
+static struct mbuf *
+priq_getq(struct priq_class *cl)
+{
+#ifdef ALTQ_RIO
+	if (q_is_rio(cl->cl_q))
+		return rio_getq((rio_t *)cl->cl_red, cl->cl_q);
+#endif
+#ifdef ALTQ_RED
+	if (q_is_red(cl->cl_q))
+		return red_getq(cl->cl_red, cl->cl_q);
+#endif
+	return _getq(cl->cl_q);
+}
+
+static struct mbuf *
+priq_pollq(cl)
+	struct priq_class *cl;
+{
+	return qhead(cl->cl_q);
+}
+
+static void
+priq_purgeq(struct priq_class *cl)
+{
+	struct mbuf *m;
+
+	if (qempty(cl->cl_q))
+		return;
+
+	while ((m = _getq(cl->cl_q)) != NULL) {
+		PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m));
+		m_freem(m);
+	}
+	ASSERT(qlen(cl->cl_q) == 0);
+}
+
+static void
+get_class_stats(struct priq_classstats *sp, struct priq_class *cl)
+{
+	sp->class_handle = cl->cl_handle;
+	sp->qlength = qlen(cl->cl_q);
+	sp->qlimit = qlimit(cl->cl_q);
+	sp->period = cl->cl_period;
+	sp->xmitcnt = cl->cl_xmitcnt;
+	sp->dropcnt = cl->cl_dropcnt;
+
+	sp->qtype = qtype(cl->cl_q);
+#ifdef ALTQ_RED
+	if (q_is_red(cl->cl_q))
+		red_getstats(cl->cl_red, &sp->red[0]);
+#endif
+#ifdef ALTQ_RIO
+	if (q_is_rio(cl->cl_q))
+		rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
+#endif
+
+}
+
+/* convert a class handle to the corresponding class pointer */
+static struct priq_class *
+clh_to_clp(struct priq_if *pif, u_int32_t chandle)
+{
+	struct priq_class *cl;
+	int idx;
+
+	if (chandle == 0)
+		return (NULL);
+
+	for (idx = pif->pif_maxpri; idx >= 0; idx--)
+		if ((cl = pif->pif_classes[idx]) != NULL &&
+		    cl->cl_handle == chandle)
+			return (cl);
+
+	return (NULL);
+}
+
+
+#ifdef ALTQ3_COMPAT
+
+static struct priq_if *
+priq_attach(ifq, bandwidth)
+	struct ifaltq *ifq;
+	u_int bandwidth;
+{
+	struct priq_if *pif;
+
+	pif = malloc(sizeof(struct priq_if),
+	       M_DEVBUF, M_WAITOK);
+	if (pif == NULL)
+		return (NULL);
+	bzero(pif, sizeof(struct priq_if));
+	pif->pif_bandwidth = bandwidth;
+	pif->pif_maxpri = -1;
+	pif->pif_ifq = ifq;
+
+	/* add this state to the priq list */
+	pif->pif_next = pif_list;
+	pif_list = pif;
+
+	return (pif);
+}
+
+static int
+priq_detach(pif)
+	struct priq_if *pif;
+{
+	(void)priq_clear_interface(pif);
+
+	/* remove this interface from the pif list */
+	if (pif_list == pif)
+		pif_list = pif->pif_next;
+	else {
+		struct priq_if *p;
+
+		for (p = pif_list; p != NULL; p = p->pif_next)
+			if (p->pif_next == pif) {
+				p->pif_next = pif->pif_next;
+				break;
+			}
+		ASSERT(p != NULL);
+	}
+
+	free(pif, M_DEVBUF);
+	return (0);
+}
+
+/*
+ * priq device interface
+ */
+int
+priqopen(dev, flag, fmt, p)
+	dev_t dev;
+	int flag, fmt;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	/* everything will be done when the queueing scheme is attached. */
+	return 0;
+}
+
+int
+priqclose(dev, flag, fmt, p)
+	dev_t dev;
+	int flag, fmt;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	struct priq_if *pif;
+	int err, error = 0;
+
+	while ((pif = pif_list) != NULL) {
+		/* destroy all */
+		if (ALTQ_IS_ENABLED(pif->pif_ifq))
+			altq_disable(pif->pif_ifq);
+
+		err = altq_detach(pif->pif_ifq);
+		if (err == 0)
+			err = priq_detach(pif);
+		if (err != 0 && error == 0)
+			error = err;
+	}
+
+	return error;
+}
+
+int
+priqioctl(dev, cmd, addr, flag, p)
+	dev_t dev;
+	ioctlcmd_t cmd;
+	caddr_t addr;
+	int flag;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	struct priq_if *pif;
+	struct priq_interface *ifacep;
+	int	error = 0;
+
+	/* check super-user privilege */
+	switch (cmd) {
+	case PRIQ_GETSTATS:
+		break;
+	default:
+#if (__FreeBSD_version > 700000)
+		if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
+			return (error);
+#elsif (__FreeBSD_version > 400000)
+		if ((error = suser(p)) != 0)
+			return (error);
+#else
+		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+			return (error);
+#endif
+		break;
+	}
+
+	switch (cmd) {
+
+	case PRIQ_IF_ATTACH:
+		error = priqcmd_if_attach((struct priq_interface *)addr);
+		break;
+
+	case PRIQ_IF_DETACH:
+		error = priqcmd_if_detach((struct priq_interface *)addr);
+		break;
+
+	case PRIQ_ENABLE:
+	case PRIQ_DISABLE:
+	case PRIQ_CLEAR:
+		ifacep = (struct priq_interface *)addr;
+		if ((pif = altq_lookup(ifacep->ifname,
+				       ALTQT_PRIQ)) == NULL) {
+			error = EBADF;
+			break;
+		}
+
+		switch (cmd) {
+		case PRIQ_ENABLE:
+			if (pif->pif_default == NULL) {
+#ifdef ALTQ_DEBUG
+				printf("priq: no default class\n");
+#endif
+				error = EINVAL;
+				break;
+			}
+			error = altq_enable(pif->pif_ifq);
+			break;
+
+		case PRIQ_DISABLE:
+			error = altq_disable(pif->pif_ifq);
+			break;
+
+		case PRIQ_CLEAR:
+			priq_clear_interface(pif);
+			break;
+		}
+		break;
+
+	case PRIQ_ADD_CLASS:
+		error = priqcmd_add_class((struct priq_add_class *)addr);
+		break;
+
+	case PRIQ_DEL_CLASS:
+		error = priqcmd_delete_class((struct priq_delete_class *)addr);
+		break;
+
+	case PRIQ_MOD_CLASS:
+		error = priqcmd_modify_class((struct priq_modify_class *)addr);
+		break;
+
+	case PRIQ_ADD_FILTER:
+		error = priqcmd_add_filter((struct priq_add_filter *)addr);
+		break;
+
+	case PRIQ_DEL_FILTER:
+		error = priqcmd_delete_filter((struct priq_delete_filter *)addr);
+		break;
+
+	case PRIQ_GETSTATS:
+		error = priqcmd_class_stats((struct priq_class_stats *)addr);
+		break;
+
+	default:
+		error = EINVAL;
+		break;
+	}
+	return error;
+}
+
+static int
+priqcmd_if_attach(ap)
+	struct priq_interface *ap;
+{
+	struct priq_if *pif;
+	struct ifnet *ifp;
+	int error;
+
+	if ((ifp = ifunit(ap->ifname)) == NULL)
+		return (ENXIO);
+
+	if ((pif = priq_attach(&ifp->if_snd, ap->arg)) == NULL)
+		return (ENOMEM);
+
+	/*
+	 * set PRIQ to this ifnet structure.
+	 */
+	if ((error = altq_attach(&ifp->if_snd, ALTQT_PRIQ, pif,
+				 priq_enqueue, priq_dequeue, priq_request,
+				 &pif->pif_classifier, acc_classify)) != 0)
+		(void)priq_detach(pif);
+
+	return (error);
+}
+
+static int
+priqcmd_if_detach(ap)
+	struct priq_interface *ap;
+{
+	struct priq_if *pif;
+	int error;
+
+	if ((pif = altq_lookup(ap->ifname, ALTQT_PRIQ)) == NULL)
+		return (EBADF);
+
+	if (ALTQ_IS_ENABLED(pif->pif_ifq))
+		altq_disable(pif->pif_ifq);
+
+	if ((error = altq_detach(pif->pif_ifq)))
+		return (error);
+
+	return priq_detach(pif);
+}
+
+static int
+priqcmd_add_class(ap)
+	struct priq_add_class *ap;
+{
+	struct priq_if *pif;
+	struct priq_class *cl;
+	int qid;
+
+	if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+		return (EBADF);
+
+	if (ap->pri < 0 || ap->pri >= PRIQ_MAXPRI)
+		return (EINVAL);
+	if (pif->pif_classes[ap->pri] != NULL)
+		return (EBUSY);
+
+	qid = ap->pri + 1;
+	if ((cl = priq_class_create(pif, ap->pri,
+	    ap->qlimit, ap->flags, qid)) == NULL)
+		return (ENOMEM);
+
+	/* return a class handle to the user */
+	ap->class_handle = cl->cl_handle;
+
+	return (0);
+}
+
+static int
+priqcmd_delete_class(ap)
+	struct priq_delete_class *ap;
+{
+	struct priq_if *pif;
+	struct priq_class *cl;
+
+	if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+		return (EBADF);
+
+	if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL)
+		return (EINVAL);
+
+	return priq_class_destroy(cl);
+}
+
+static int
+priqcmd_modify_class(ap)
+	struct priq_modify_class *ap;
+{
+	struct priq_if *pif;
+	struct priq_class *cl;
+
+	if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+		return (EBADF);
+
+	if (ap->pri < 0 || ap->pri >= PRIQ_MAXPRI)
+		return (EINVAL);
+
+	if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL)
+		return (EINVAL);
+
+	/*
+	 * if priority is changed, move the class to the new priority
+	 */
+	if (pif->pif_classes[ap->pri] != cl) {
+		if (pif->pif_classes[ap->pri] != NULL)
+			return (EEXIST);
+		pif->pif_classes[cl->cl_pri] = NULL;
+		pif->pif_classes[ap->pri] = cl;
+		cl->cl_pri = ap->pri;
+	}
+
+	/* call priq_class_create to change class parameters */
+	if ((cl = priq_class_create(pif, ap->pri,
+	    ap->qlimit, ap->flags, ap->class_handle)) == NULL)
+		return (ENOMEM);
+	return 0;
+}
+
+static int
+priqcmd_add_filter(ap)
+	struct priq_add_filter *ap;
+{
+	struct priq_if *pif;
+	struct priq_class *cl;
+
+	if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+		return (EBADF);
+
+	if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL)
+		return (EINVAL);
+
+	return acc_add_filter(&pif->pif_classifier, &ap->filter,
+			      cl, &ap->filter_handle);
+}
+
+static int
+priqcmd_delete_filter(ap)
+	struct priq_delete_filter *ap;
+{
+	struct priq_if *pif;
+
+	if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+		return (EBADF);
+
+	return acc_delete_filter(&pif->pif_classifier,
+				 ap->filter_handle);
+}
+
+static int
+priqcmd_class_stats(ap)
+	struct priq_class_stats *ap;
+{
+	struct priq_if *pif;
+	struct priq_class *cl;
+	struct priq_classstats stats, *usp;
+	int	pri, error;
+
+	if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+		return (EBADF);
+
+	ap->maxpri = pif->pif_maxpri;
+
+	/* then, read the next N classes in the tree */
+	usp = ap->stats;
+	for (pri = 0; pri <= pif->pif_maxpri; pri++) {
+		cl = pif->pif_classes[pri];
+		if (cl != NULL)
+			get_class_stats(&stats, cl);
+		else
+			bzero(&stats, sizeof(stats));
+		if ((error = copyout((caddr_t)&stats, (caddr_t)usp++,
+				     sizeof(stats))) != 0)
+			return (error);
+	}
+	return (0);
+}
+
+#ifdef KLD_MODULE
+
+static struct altqsw priq_sw =
+	{"priq", priqopen, priqclose, priqioctl};
+
+ALTQ_MODULE(altq_priq, ALTQT_PRIQ, &priq_sw);
+MODULE_DEPEND(altq_priq, altq_red, 1, 1, 1);
+MODULE_DEPEND(altq_priq, altq_rio, 1, 1, 1);
+
+#endif /* KLD_MODULE */
+
+#endif /* ALTQ3_COMPAT */
+#endif /* ALTQ_PRIQ */
diff --git a/freebsd/sys/contrib/altq/altq/altq_priq.h b/freebsd/sys/contrib/altq/altq/altq_priq.h
new file mode 100644
index 00000000..7f553b8a
--- /dev/null
+++ b/freebsd/sys/contrib/altq/altq/altq_priq.h
@@ -0,0 +1,170 @@
+/*	$KAME: altq_priq.h,v 1.7 2003/10/03 05:05:15 kjc Exp $	*/
+/*
+ * Copyright (C) 2000-2003
+ *	Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ALTQ_ALTQ_PRIQ_HH_
+#define	_ALTQ_ALTQ_PRIQ_HH_
+
+#include <freebsd/altq/altq.h>
+#include <freebsd/altq/altq_classq.h>
+#include <freebsd/altq/altq_red.h>
+#include <freebsd/altq/altq_rio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define	PRIQ_MAXPRI	16	/* upper limit of the number of priorities */
+
+#ifdef ALTQ3_COMPAT
+struct priq_interface {
+	char	ifname[IFNAMSIZ];	/* interface name (e.g., fxp0) */
+	u_long	arg;			/* request-specific argument */
+};
+
+struct priq_add_class {
+	struct priq_interface	iface;
+	int			pri;	/* priority (0 is the lowest) */
+	int			qlimit;	/* queue size limit */
+	int			flags;	/* misc flags (see below) */
+
+	u_int32_t		class_handle;  /* return value */
+};
+#endif /* ALTQ3_COMPAT */
+
+/* priq class flags */
+#define	PRCF_RED		0x0001	/* use RED */
+#define	PRCF_ECN		0x0002  /* use RED/ECN */
+#define	PRCF_RIO		0x0004  /* use RIO */
+#define	PRCF_CLEARDSCP		0x0010  /* clear diffserv codepoint */
+#define	PRCF_DEFAULTCLASS	0x1000	/* default class */
+
+/* special class handles */
+#define	PRIQ_NULLCLASS_HANDLE	0
+
+#ifdef ALTQ3_COMPAT
+struct priq_delete_class {
+	struct priq_interface	iface;
+	u_int32_t		class_handle;
+};
+
+struct priq_modify_class {
+	struct priq_interface	iface;
+	u_int32_t		class_handle;
+	int			pri;
+	int			qlimit;
+	int			flags;
+};
+
+struct priq_add_filter {
+	struct priq_interface	iface;
+	u_int32_t		class_handle;
+	struct flow_filter	filter;
+
+	u_long			filter_handle;  /* return value */
+};
+
+struct priq_delete_filter {
+	struct priq_interface	iface;
+	u_long			filter_handle;
+};
+#endif /* ALTQ3_COMPAT */
+
+struct priq_classstats {
+	u_int32_t		class_handle;
+
+	u_int			qlength;
+	u_int			qlimit;
+	u_int			period;
+	struct pktcntr		xmitcnt;  /* transmitted packet counter */
+	struct pktcntr		dropcnt;  /* dropped packet counter */
+
+	/* red and rio related info */
+	int			qtype;
+	struct redstats		red[3];	/* rio has 3 red stats */
+};
+
+#ifdef ALTQ3_COMPAT
+struct priq_class_stats {
+	struct priq_interface	iface;
+	int			maxpri;	  /* in/out */
+
+	struct priq_classstats	*stats;   /* pointer to stats array */
+};
+
+#define	PRIQ_IF_ATTACH		_IOW('Q', 1, struct priq_interface)
+#define	PRIQ_IF_DETACH		_IOW('Q', 2, struct priq_interface)
+#define	PRIQ_ENABLE		_IOW('Q', 3, struct priq_interface)
+#define	PRIQ_DISABLE		_IOW('Q', 4, struct priq_interface)
+#define	PRIQ_CLEAR		_IOW('Q', 5, struct priq_interface)
+#define	PRIQ_ADD_CLASS		_IOWR('Q', 7, struct priq_add_class)
+#define	PRIQ_DEL_CLASS		_IOW('Q', 8, struct priq_delete_class)
+#define	PRIQ_MOD_CLASS		_IOW('Q', 9, struct priq_modify_class)
+#define	PRIQ_ADD_FILTER		_IOWR('Q', 10, struct priq_add_filter)
+#define	PRIQ_DEL_FILTER		_IOW('Q', 11, struct priq_delete_filter)
+#define	PRIQ_GETSTATS		_IOWR('Q', 12, struct priq_class_stats)
+
+#endif /* ALTQ3_COMPAT */
+
+#ifdef _KERNEL
+
+struct priq_class {
+	u_int32_t	cl_handle;	/* class handle */
+	class_queue_t	*cl_q;		/* class queue structure */
+	struct red	*cl_red;	/* RED state */
+	int		cl_pri;		/* priority */
+	int		cl_flags;	/* class flags */
+	struct priq_if	*cl_pif;	/* back pointer to pif */
+	struct altq_pktattr *cl_pktattr; /* saved header used by ECN */
+
+	/* statistics */
+	u_int		cl_period;	/* backlog period */
+	struct pktcntr  cl_xmitcnt;	/* transmitted packet counter */
+	struct pktcntr  cl_dropcnt;	/* dropped packet counter */
+};
+
+/*
+ * priq interface state
+ */
+struct priq_if {
+	struct priq_if		*pif_next;	/* interface state list */
+	struct ifaltq		*pif_ifq;	/* backpointer to ifaltq */
+	u_int			pif_bandwidth;	/* link bandwidth in bps */
+	int			pif_maxpri;	/* max priority in use */
+	struct priq_class	*pif_default;	/* default class */
+	struct priq_class	*pif_classes[PRIQ_MAXPRI]; /* classes */
+#ifdef ALTQ3_CLFIER_COMPAT
+	struct acc_classifier	pif_classifier;	/* classifier */
+#endif
+};
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ALTQ_ALTQ_PRIQ_HH_ */
diff --git a/freebsd/sys/contrib/altq/altq/altq_red.c b/freebsd/sys/contrib/altq/altq/altq_red.c
new file mode 100644
index 00000000..18fa7209
--- /dev/null
+++ b/freebsd/sys/contrib/altq/altq/altq_red.c
@@ -0,0 +1,1503 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*	$FreeBSD$	*/
+/*	$KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $	*/
+
+/*
+ * Copyright (C) 1997-2003
+ *	Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+/*
+ * Copyright (c) 1990-1994 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the Computer Systems
+ *	Engineering Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ *    to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include <freebsd/local/opt_altq.h>
+#if (__FreeBSD__ != 2)
+#include <freebsd/local/opt_inet.h>
+#ifdef __FreeBSD__
+#include <freebsd/local/opt_inet6.h>
+#endif
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+#ifdef ALTQ_RED	/* red is enabled by ALTQ_RED option in opt_altq.h */
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/errno.h>
+#if 1 /* ALTQ3_COMPAT */
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/kernel.h>
+#ifdef ALTQ_FLOWVALVE
+#include <freebsd/sys/queue.h>
+#include <freebsd/sys/time.h>
+#endif
+#endif /* ALTQ3_COMPAT */
+
+#include <freebsd/net/if.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#ifdef INET6
+#include <freebsd/netinet/ip6.h>
+#endif
+
+#include <freebsd/net/pfvar.h>
+#include <freebsd/altq/altq.h>
+#include <freebsd/altq/altq_red.h>
+#ifdef ALTQ3_COMPAT
+#include <freebsd/altq/altq_conf.h>
+#ifdef ALTQ_FLOWVALVE
+#include <freebsd/altq/altq_flowvalve.h>
+#endif
+#endif
+
+/*
+ * ALTQ/RED (Random Early Detection) implementation using 32-bit
+ * fixed-point calculation.
+ *
+ * written by kjc using the ns code as a reference.
+ * you can learn more about red and ns from Sally's home page at
+ * http://www-nrg.ee.lbl.gov/floyd/
+ *
+ * most of the red parameter values are fixed in this implementation
+ * to prevent fixed-point overflow/underflow.
+ * if you change the parameters, watch out for overflow/underflow!
+ *
+ * the parameters used are recommended values by Sally.
+ * the corresponding ns config looks:
+ *	q_weight=0.00195
+ *	minthresh=5 maxthresh=15 queue-size=60
+ *	linterm=30
+ *	dropmech=drop-tail
+ *	bytes=false (can't be handled by 32-bit fixed-point)
+ *	doubleq=false dqthresh=false
+ *	wait=true
+ */
+/*
+ * alternative red parameters for a slow link.
+ *
+ * assume the queue length becomes from zero to L and keeps L, it takes
+ * N packets for q_avg to reach 63% of L.
+ * when q_weight is 0.002, N is about 500 packets.
+ * for a slow link like dial-up, 500 packets takes more than 1 minute!
+ * when q_weight is 0.008, N is about 127 packets.
+ * when q_weight is 0.016, N is about 63 packets.
+ * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
+ * are allowed for 0.016.
+ * see Sally's paper for more details.
+ */
+/* normal red parameters */
+#define	W_WEIGHT	512	/* inverse of weight of EWMA (511/512) */
+				/* q_weight = 0.00195 */
+
+/* red parameters for a slow link */
+#define	W_WEIGHT_1	128	/* inverse of weight of EWMA (127/128) */
+				/* q_weight = 0.0078125 */
+
+/* red parameters for a very slow link (e.g., dialup) */
+#define	W_WEIGHT_2	64	/* inverse of weight of EWMA (63/64) */
+				/* q_weight = 0.015625 */
+
+/* fixed-point uses 12-bit decimal places */
+#define	FP_SHIFT	12	/* fixed-point shift */
+
+/* red parameters for drop probability */
+#define	INV_P_MAX	10	/* inverse of max drop probability */
+#define	TH_MIN		5	/* min threshold */
+#define	TH_MAX		15	/* max threshold */
+
+#define	RED_LIMIT	60	/* default max queue lenght */
+#define	RED_STATS		/* collect statistics */
+
+/*
+ * our default policy for forced-drop is drop-tail.
+ * (in altq-1.1.2 or earlier, the default was random-drop.
+ * but it makes more sense to punish the cause of the surge.)
+ * to switch to the random-drop policy, define "RED_RANDOM_DROP".
+ */
+
+#ifdef ALTQ3_COMPAT
+#ifdef ALTQ_FLOWVALVE
+/*
+ * flow-valve is an extention to protect red from unresponsive flows
+ * and to promote end-to-end congestion control.
+ * flow-valve observes the average drop rates of the flows that have
+ * experienced packet drops in the recent past.
+ * when the average drop rate exceeds the threshold, the flow is
+ * blocked by the flow-valve.  the trapped flow should back off
+ * exponentially to escape from the flow-valve.
+ */
+#ifdef RED_RANDOM_DROP
+#error "random-drop can't be used with flow-valve!"
+#endif
+#endif /* ALTQ_FLOWVALVE */
+
+/* red_list keeps all red_queue_t's allocated. */
+static red_queue_t *red_list = NULL;
+
+#endif /* ALTQ3_COMPAT */
+
+/* default red parameter values */
+static int default_th_min = TH_MIN;
+static int default_th_max = TH_MAX;
+static int default_inv_pmax = INV_P_MAX;
+
+#ifdef ALTQ3_COMPAT
+/* internal function prototypes */
+static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+static struct mbuf *red_dequeue(struct ifaltq *, int);
+static int red_request(struct ifaltq *, int, void *);
+static void red_purgeq(red_queue_t *);
+static int red_detach(red_queue_t *);
+#ifdef ALTQ_FLOWVALVE
+static __inline struct fve *flowlist_lookup(struct flowvalve *,
+			 struct altq_pktattr *, struct timeval *);
+static __inline struct fve *flowlist_reclaim(struct flowvalve *,
+					     struct altq_pktattr *);
+static __inline void flowlist_move_to_head(struct flowvalve *, struct fve *);
+static __inline int fv_p2f(struct flowvalve *, int);
+#if 0 /* XXX: make the compiler happy (fv_alloc unused) */
+static struct flowvalve *fv_alloc(struct red *);
+#endif
+static void fv_destroy(struct flowvalve *);
+static int fv_checkflow(struct flowvalve *, struct altq_pktattr *,
+			struct fve **);
+static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *,
+			 struct fve *);
+#endif
+#endif /* ALTQ3_COMPAT */
+
+/*
+ * red support routines
+ */
+red_t *
+red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
+   int pkttime)
+{
+	red_t	*rp;
+	int	 w, i;
+	int	 npkts_per_sec;
+
+	rp = malloc(sizeof(red_t), M_DEVBUF, M_WAITOK);
+	if (rp == NULL)
+		return (NULL);
+	bzero(rp, sizeof(red_t));
+
+	rp->red_avg = 0;
+	rp->red_idle = 1;
+
+	if (weight == 0)
+		rp->red_weight = W_WEIGHT;
+	else
+		rp->red_weight = weight;
+	if (inv_pmax == 0)
+		rp->red_inv_pmax = default_inv_pmax;
+	else
+		rp->red_inv_pmax = inv_pmax;
+	if (th_min == 0)
+		rp->red_thmin = default_th_min;
+	else
+		rp->red_thmin = th_min;
+	if (th_max == 0)
+		rp->red_thmax = default_th_max;
+	else
+		rp->red_thmax = th_max;
+
+	rp->red_flags = flags;
+
+	if (pkttime == 0)
+		/* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
+		rp->red_pkttime = 800;
+	else
+		rp->red_pkttime = pkttime;
+
+	if (weight == 0) {
+		/* when the link is very slow, adjust red parameters */
+		npkts_per_sec = 1000000 / rp->red_pkttime;
+		if (npkts_per_sec < 50) {
+			/* up to about 400Kbps */
+			rp->red_weight = W_WEIGHT_2;
+		} else if (npkts_per_sec < 300) {
+			/* up to about 2.4Mbps */
+			rp->red_weight = W_WEIGHT_1;
+		}
+	}
+
+	/* calculate wshift.  weight must be power of 2 */
+	w = rp->red_weight;
+	for (i = 0; w > 1; i++)
+		w = w >> 1;
+	rp->red_wshift = i;
+	w = 1 << rp->red_wshift;
+	if (w != rp->red_weight) {
+		printf("invalid weight value %d for red! use %d\n",
+		       rp->red_weight, w);
+		rp->red_weight = w;
+	}
+
+	/*
+	 * thmin_s and thmax_s are scaled versions of th_min and th_max
+	 * to be compared with avg.
+	 */
+	rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
+	rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
+
+	/*
+	 * precompute probability denominator
+	 *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
+	 */
+	rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
+			 * rp->red_inv_pmax) << FP_SHIFT;
+
+	/* allocate weight table */
+	rp->red_wtab = wtab_alloc(rp->red_weight);
+
+	microtime(&rp->red_last);
+	return (rp);
+}
+
+void
+red_destroy(red_t *rp)
+{
+#ifdef ALTQ3_COMPAT
+#ifdef ALTQ_FLOWVALVE
+	if (rp->red_flowvalve != NULL)
+		fv_destroy(rp->red_flowvalve);
+#endif
+#endif /* ALTQ3_COMPAT */
+	wtab_destroy(rp->red_wtab);
+	free(rp, M_DEVBUF);
+}
+
+void
+red_getstats(red_t *rp, struct redstats *sp)
+{
+	sp->q_avg		= rp->red_avg >> rp->red_wshift;
+	sp->xmit_cnt		= rp->red_stats.xmit_cnt;
+	sp->drop_cnt		= rp->red_stats.drop_cnt;
+	sp->drop_forced		= rp->red_stats.drop_forced;
+	sp->drop_unforced	= rp->red_stats.drop_unforced;
+	sp->marked_packets	= rp->red_stats.marked_packets;
+}
+
+int
+red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
+    struct altq_pktattr *pktattr)
+{
+	int avg, droptype;
+	int n;
+#ifdef ALTQ3_COMPAT
+#ifdef ALTQ_FLOWVALVE
+	struct fve *fve = NULL;
+
+	if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
+		if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
+			m_freem(m);
+			return (-1);
+		}
+#endif
+#endif /* ALTQ3_COMPAT */
+
+	avg = rp->red_avg;
+
+	/*
+	 * if we were idle, we pretend that n packets arrived during
+	 * the idle period.
+	 */
+	if (rp->red_idle) {
+		struct timeval now;
+		int t;
+
+		rp->red_idle = 0;
+		microtime(&now);
+		t = (now.tv_sec - rp->red_last.tv_sec);
+		if (t > 60) {
+			/*
+			 * being idle for more than 1 minute, set avg to zero.
+			 * this prevents t from overflow.
+			 */
+			avg = 0;
+		} else {
+			t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
+			n = t / rp->red_pkttime - 1;
+
+			/* the following line does (avg = (1 - Wq)^n * avg) */
+			if (n > 0)
+				avg = (avg >> FP_SHIFT) *
+				    pow_w(rp->red_wtab, n);
+		}
+	}
+
+	/* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
+	avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
+	rp->red_avg = avg;		/* save the new value */
+
+	/*
+	 * red_count keeps a tally of arriving traffic that has not
+	 * been dropped.
+	 */
+	rp->red_count++;
+
+	/* see if we drop early */
+	droptype = DTYPE_NODROP;
+	if (avg >= rp->red_thmin_s && qlen(q) > 1) {
+		if (avg >= rp->red_thmax_s) {
+			/* avg >= th_max: forced drop */
+			droptype = DTYPE_FORCED;
+		} else if (rp->red_old == 0) {
+			/* first exceeds th_min */
+			rp->red_count = 1;
+			rp->red_old = 1;
+		} else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
+				      rp->red_probd, rp->red_count)) {
+			/* mark or drop by red */
+			if ((rp->red_flags & REDF_ECN) &&
+			    mark_ecn(m, pktattr, rp->red_flags)) {
+				/* successfully marked.  do not drop. */
+				rp->red_count = 0;
+#ifdef RED_STATS
+				rp->red_stats.marked_packets++;
+#endif
+			} else {
+				/* unforced drop by red */
+				droptype = DTYPE_EARLY;
+			}
+		}
+	} else {
+		/* avg < th_min */
+		rp->red_old = 0;
+	}
+
+	/*
+	 * if the queue length hits the hard limit, it's a forced drop.
+	 */
+	if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
+		droptype = DTYPE_FORCED;
+
+#ifdef RED_RANDOM_DROP
+	/* if successful or forced drop, enqueue this packet. */
+	if (droptype != DTYPE_EARLY)
+		_addq(q, m);
+#else
+	/* if successful, enqueue this packet. */
+	if (droptype == DTYPE_NODROP)
+		_addq(q, m);
+#endif
+	if (droptype != DTYPE_NODROP) {
+		if (droptype == DTYPE_EARLY) {
+			/* drop the incoming packet */
+#ifdef RED_STATS
+			rp->red_stats.drop_unforced++;
+#endif
+		} else {
+			/* forced drop, select a victim packet in the queue. */
+#ifdef RED_RANDOM_DROP
+			m = _getq_random(q);
+#endif
+#ifdef RED_STATS
+			rp->red_stats.drop_forced++;
+#endif
+		}
+#ifdef RED_STATS
+		PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
+#endif
+		rp->red_count = 0;
+#ifdef ALTQ3_COMPAT
+#ifdef ALTQ_FLOWVALVE
+		if (rp->red_flowvalve != NULL)
+			fv_dropbyred(rp->red_flowvalve, pktattr, fve);
+#endif
+#endif /* ALTQ3_COMPAT */
+		m_freem(m);
+		return (-1);
+	}
+	/* successfully queued */
+#ifdef RED_STATS
+	PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
+#endif
+	return (0);
+}
+
+/*
+ * early-drop probability is calculated as follows:
+ *   prob = p_max * (avg - th_min) / (th_max - th_min)
+ *   prob_a = prob / (2 - count*prob)
+ *	    = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
+ * here prob_a increases as successive undrop count increases.
+ * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
+ * becomes 1 when (count >= (2 / prob))).
+ */
+int
+drop_early(int fp_len, int fp_probd, int count)
+{
+	int	d;		/* denominator of drop-probability */
+
+	d = fp_probd - count * fp_len;
+	if (d <= 0)
+		/* count exceeds the hard limit: drop or mark */
+		return (1);
+
+	/*
+	 * now the range of d is [1..600] in fixed-point. (when
+	 * th_max-th_min=10 and p_max=1/30)
+	 * drop probability = (avg - TH_MIN) / d
+	 */
+
+	if ((arc4random() % d) < fp_len) {
+		/* drop or mark */
+		return (1);
+	}
+	/* no drop/mark */
+	return (0);
+}
+
+/*
+ * try to mark CE bit to the packet.
+ *    returns 1 if successfully marked, 0 otherwise.
+ */
+int
+mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
+{
+	struct mbuf	*m0;
+	struct pf_mtag	*at;
+	void		*hdr;
+	int		 af;
+
+	at = pf_find_mtag(m);
+	if (at != NULL) {
+		af = at->af;
+		hdr = at->hdr;
+#ifdef ALTQ3_COMPAT
+	} else if (pktattr != NULL) {
+		af = pktattr->pattr_af;
+		hdr = pktattr->pattr_hdr;
+#endif /* ALTQ3_COMPAT */
+	} else
+		return (0);
+
+	if (af != AF_INET && af != AF_INET6)
+		return (0);
+
+	/* verify that pattr_hdr is within the mbuf data */
+	for (m0 = m; m0 != NULL; m0 = m0->m_next)
+		if (((caddr_t)hdr >= m0->m_data) &&
+		    ((caddr_t)hdr < m0->m_data + m0->m_len))
+			break;
+	if (m0 == NULL) {
+		/* ick, tag info is stale */
+		return (0);
+	}
+
+	switch (af) {
+	case AF_INET:
+		if (flags & REDF_ECN4) {
+			struct ip *ip = hdr;
+			u_int8_t otos;
+			int sum;
+
+			if (ip->ip_v != 4)
+				return (0);	/* version mismatch! */
+
+			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
+				return (0);	/* not-ECT */
+			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
+				return (1);	/* already marked */
+
+			/*
+			 * ecn-capable but not marked,
+			 * mark CE and update checksum
+			 */
+			otos = ip->ip_tos;
+			ip->ip_tos |= IPTOS_ECN_CE;
+			/*
+			 * update checksum (from RFC1624)
+			 *	   HC' = ~(~HC + ~m + m')
+			 */
+			sum = ~ntohs(ip->ip_sum) & 0xffff;
+			sum += (~otos & 0xffff) + ip->ip_tos;
+			sum = (sum >> 16) + (sum & 0xffff);
+			sum += (sum >> 16);  /* add carry */
+			ip->ip_sum = htons(~sum & 0xffff);
+			return (1);
+		}
+		break;
+#ifdef INET6
+	case AF_INET6:
+		if (flags & REDF_ECN6) {
+			struct ip6_hdr *ip6 = hdr;
+			u_int32_t flowlabel;
+
+			flowlabel = ntohl(ip6->ip6_flow);
+			if ((flowlabel >> 28) != 6)
+				return (0);	/* version mismatch! */
+			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
+			    (IPTOS_ECN_NOTECT << 20))
+				return (0);	/* not-ECT */
+			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
+			    (IPTOS_ECN_CE << 20))
+				return (1);	/* already marked */
+			/*
+			 * ecn-capable but not marked,  mark CE
+			 */
+			flowlabel |= (IPTOS_ECN_CE << 20);
+			ip6->ip6_flow = htonl(flowlabel);
+			return (1);
+		}
+		break;
+#endif  /* INET6 */
+	}
+
+	/* not marked */
+	return (0);
+}
+
+struct mbuf *
+red_getq(rp, q)
+	red_t *rp;
+	class_queue_t *q;
+{
+	struct mbuf *m;
+
+	if ((m = _getq(q)) == NULL) {
+		if (rp->red_idle == 0) {
+			rp->red_idle = 1;
+			microtime(&rp->red_last);
+		}
+		return NULL;
+	}
+
+	rp->red_idle = 0;
+	return (m);
+}
+
+/*
+ * helper routine to calibrate avg during idle.
+ * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
+ * here Wq = 1/weight and the code assumes Wq is close to zero.
+ *
+ * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
+ */
+static struct wtab *wtab_list = NULL;	/* pointer to wtab list */
+
+struct wtab *
+wtab_alloc(int weight)
+{
+	struct wtab	*w;
+	int		 i;
+
+	for (w = wtab_list; w != NULL; w = w->w_next)
+		if (w->w_weight == weight) {
+			w->w_refcount++;
+			return (w);
+		}
+
+	w = malloc(sizeof(struct wtab), M_DEVBUF, M_WAITOK);
+	if (w == NULL)
+		panic("wtab_alloc: malloc failed!");
+	bzero(w, sizeof(struct wtab));
+	w->w_weight = weight;
+	w->w_refcount = 1;
+	w->w_next = wtab_list;
+	wtab_list = w;
+
+	/* initialize the weight table */
+	w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
+	for (i = 1; i < 32; i++) {
+		w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
+		if (w->w_tab[i] == 0 && w->w_param_max == 0)
+			w->w_param_max = 1 << i;
+	}
+
+	return (w);
+}
+
+int
+wtab_destroy(struct wtab *w)
+{
+	struct wtab	*prev;
+
+	if (--w->w_refcount > 0)
+		return (0);
+
+	if (wtab_list == w)
+		wtab_list = w->w_next;
+	else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
+		if (prev->w_next == w) {
+			prev->w_next = w->w_next;
+			break;
+		}
+
+	free(w, M_DEVBUF);
+	return (0);
+}
+
+int32_t
+pow_w(struct wtab *w, int n)
+{
+	int	i, bit;
+	int32_t	val;
+
+	if (n >= w->w_param_max)
+		return (0);
+
+	val = 1 << FP_SHIFT;
+	if (n <= 0)
+		return (val);
+
+	bit = 1;
+	i = 0;
+	while (n) {
+		if (n & bit) {
+			val = (val * w->w_tab[i]) >> FP_SHIFT;
+			n &= ~bit;
+		}
+		i++;
+		bit <<=  1;
+	}
+	return (val);
+}
+
+#ifdef ALTQ3_COMPAT
+/*
+ * red device interface
+ */
+altqdev_decl(red);
+
+int
+redopen(dev, flag, fmt, p)
+	dev_t dev;
+	int flag, fmt;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	/* everything will be done when the queueing scheme is attached. */
+	return 0;
+}
+
+int
+redclose(dev, flag, fmt, p)
+	dev_t dev;
+	int flag, fmt;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	red_queue_t *rqp;
+	int err, error = 0;
+
+	while ((rqp = red_list) != NULL) {
+		/* destroy all */
+		err = red_detach(rqp);
+		if (err != 0 && error == 0)
+			error = err;
+	}
+
+	return error;
+}
+
+int
+redioctl(dev, cmd, addr, flag, p)
+	dev_t dev;
+	ioctlcmd_t cmd;
+	caddr_t addr;
+	int flag;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	red_queue_t *rqp;
+	struct red_interface *ifacep;
+	struct ifnet *ifp;
+	int	error = 0;
+
+	/* check super-user privilege */
+	switch (cmd) {
+	case RED_GETSTATS:
+		break;
+	default:
+#if (__FreeBSD_version > 700000)
+		if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
+#elsif (__FreeBSD_version > 400000)
+		if ((error = suser(p)) != 0)
+#else
+		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+#endif
+			return (error);
+		break;
+	}
+
+	switch (cmd) {
+
+	case RED_ENABLE:
+		ifacep = (struct red_interface *)addr;
+		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
+			error = EBADF;
+			break;
+		}
+		error = altq_enable(rqp->rq_ifq);
+		break;
+
+	case RED_DISABLE:
+		ifacep = (struct red_interface *)addr;
+		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
+			error = EBADF;
+			break;
+		}
+		error = altq_disable(rqp->rq_ifq);
+		break;
+
+	case RED_IF_ATTACH:
+		ifp = ifunit(((struct red_interface *)addr)->red_ifname);
+		if (ifp == NULL) {
+			error = ENXIO;
+			break;
+		}
+
+		/* allocate and initialize red_queue_t */
+		rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK);
+		if (rqp == NULL) {
+			error = ENOMEM;
+			break;
+		}
+		bzero(rqp, sizeof(red_queue_t));
+
+		rqp->rq_q = malloc(sizeof(class_queue_t),
+		       M_DEVBUF, M_WAITOK);
+		if (rqp->rq_q == NULL) {
+			free(rqp, M_DEVBUF);
+			error = ENOMEM;
+			break;
+		}
+		bzero(rqp->rq_q, sizeof(class_queue_t));
+
+		rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
+		if (rqp->rq_red == NULL) {
+			free(rqp->rq_q, M_DEVBUF);
+			free(rqp, M_DEVBUF);
+			error = ENOMEM;
+			break;
+		}
+
+		rqp->rq_ifq = &ifp->if_snd;
+		qtail(rqp->rq_q) = NULL;
+		qlen(rqp->rq_q) = 0;
+		qlimit(rqp->rq_q) = RED_LIMIT;
+		qtype(rqp->rq_q) = Q_RED;
+
+		/*
+		 * set RED to this ifnet structure.
+		 */
+		error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
+				    red_enqueue, red_dequeue, red_request,
+				    NULL, NULL);
+		if (error) {
+			red_destroy(rqp->rq_red);
+			free(rqp->rq_q, M_DEVBUF);
+			free(rqp, M_DEVBUF);
+			break;
+		}
+
+		/* add this state to the red list */
+		rqp->rq_next = red_list;
+		red_list = rqp;
+		break;
+
+	case RED_IF_DETACH:
+		ifacep = (struct red_interface *)addr;
+		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
+			error = EBADF;
+			break;
+		}
+		error = red_detach(rqp);
+		break;
+
+	case RED_GETSTATS:
+		do {
+			struct red_stats *q_stats;
+			red_t *rp;
+
+			q_stats = (struct red_stats *)addr;
+			if ((rqp = altq_lookup(q_stats->iface.red_ifname,
+					     ALTQT_RED)) == NULL) {
+				error = EBADF;
+				break;
+			}
+
+			q_stats->q_len 	   = qlen(rqp->rq_q);
+			q_stats->q_limit   = qlimit(rqp->rq_q);
+
+			rp = rqp->rq_red;
+			q_stats->q_avg 	   = rp->red_avg >> rp->red_wshift;
+			q_stats->xmit_cnt  = rp->red_stats.xmit_cnt;
+			q_stats->drop_cnt  = rp->red_stats.drop_cnt;
+			q_stats->drop_forced   = rp->red_stats.drop_forced;
+			q_stats->drop_unforced = rp->red_stats.drop_unforced;
+			q_stats->marked_packets = rp->red_stats.marked_packets;
+
+			q_stats->weight		= rp->red_weight;
+			q_stats->inv_pmax	= rp->red_inv_pmax;
+			q_stats->th_min		= rp->red_thmin;
+			q_stats->th_max		= rp->red_thmax;
+
+#ifdef ALTQ_FLOWVALVE
+			if (rp->red_flowvalve != NULL) {
+				struct flowvalve *fv = rp->red_flowvalve;
+				q_stats->fv_flows    = fv->fv_flows;
+				q_stats->fv_pass     = fv->fv_stats.pass;
+				q_stats->fv_predrop  = fv->fv_stats.predrop;
+				q_stats->fv_alloc    = fv->fv_stats.alloc;
+				q_stats->fv_escape   = fv->fv_stats.escape;
+			} else {
+#endif /* ALTQ_FLOWVALVE */
+				q_stats->fv_flows    = 0;
+				q_stats->fv_pass     = 0;
+				q_stats->fv_predrop  = 0;
+				q_stats->fv_alloc    = 0;
+				q_stats->fv_escape   = 0;
+#ifdef ALTQ_FLOWVALVE
+			}
+#endif /* ALTQ_FLOWVALVE */
+		} while (/*CONSTCOND*/ 0);
+		break;
+
+	case RED_CONFIG:
+		do {
+			struct red_conf *fc;
+			red_t *new;
+			int s, limit;
+
+			fc = (struct red_conf *)addr;
+			if ((rqp = altq_lookup(fc->iface.red_ifname,
+					       ALTQT_RED)) == NULL) {
+				error = EBADF;
+				break;
+			}
+			new = red_alloc(fc->red_weight,
+					fc->red_inv_pmax,
+					fc->red_thmin,
+					fc->red_thmax,
+					fc->red_flags,
+					fc->red_pkttime);
+			if (new == NULL) {
+				error = ENOMEM;
+				break;
+			}
+
+#ifdef __NetBSD__
+			s = splnet();
+#else
+			s = splimp();
+#endif
+			red_purgeq(rqp);
+			limit = fc->red_limit;
+			if (limit < fc->red_thmax)
+				limit = fc->red_thmax;
+			qlimit(rqp->rq_q) = limit;
+			fc->red_limit = limit;	/* write back the new value */
+
+			red_destroy(rqp->rq_red);
+			rqp->rq_red = new;
+
+			splx(s);
+
+			/* write back new values */
+			fc->red_limit = limit;
+			fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
+			fc->red_thmin = rqp->rq_red->red_thmin;
+			fc->red_thmax = rqp->rq_red->red_thmax;
+
+		} while (/*CONSTCOND*/ 0);
+		break;
+
+	case RED_SETDEFAULTS:
+		do {
+			struct redparams *rp;
+
+			rp = (struct redparams *)addr;
+
+			default_th_min = rp->th_min;
+			default_th_max = rp->th_max;
+			default_inv_pmax = rp->inv_pmax;
+		} while (/*CONSTCOND*/ 0);
+		break;
+
+	default:
+		error = EINVAL;
+		break;
+	}
+	return error;
+}
+
+static int
+red_detach(rqp)
+	red_queue_t *rqp;
+{
+	red_queue_t *tmp;
+	int error = 0;
+
+	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
+		altq_disable(rqp->rq_ifq);
+
+	if ((error = altq_detach(rqp->rq_ifq)))
+		return (error);
+
+	if (red_list == rqp)
+		red_list = rqp->rq_next;
+	else {
+		for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
+			if (tmp->rq_next == rqp) {
+				tmp->rq_next = rqp->rq_next;
+				break;
+			}
+		if (tmp == NULL)
+			printf("red_detach: no state found in red_list!\n");
+	}
+
+	red_destroy(rqp->rq_red);
+	free(rqp->rq_q, M_DEVBUF);
+	free(rqp, M_DEVBUF);
+	return (error);
+}
+
+/*
+ * enqueue routine:
+ *
+ *	returns: 0 when successfully queued.
+ *		 ENOBUFS when drop occurs.
+ */
+static int
+red_enqueue(ifq, m, pktattr)
+	struct ifaltq *ifq;
+	struct mbuf *m;
+	struct altq_pktattr *pktattr;
+{
+	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
+		return ENOBUFS;
+	ifq->ifq_len++;
+	return 0;
+}
+
+/*
+ * dequeue routine:
+ *	must be called in splimp.
+ *
+ *	returns: mbuf dequeued.
+ *		 NULL when no packet is available in the queue.
+ */
+
+static struct mbuf *
+red_dequeue(ifq, op)
+	struct ifaltq *ifq;
+	int op;
+{
+	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
+	struct mbuf *m;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	if (op == ALTDQ_POLL)
+		return qhead(rqp->rq_q);
+
+	/* op == ALTDQ_REMOVE */
+	m =  red_getq(rqp->rq_red, rqp->rq_q);
+	if (m != NULL)
+		ifq->ifq_len--;
+	return (m);
+}
+
+static int
+red_request(ifq, req, arg)
+	struct ifaltq *ifq;
+	int req;
+	void *arg;
+{
+	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	switch (req) {
+	case ALTRQ_PURGE:
+		red_purgeq(rqp);
+		break;
+	}
+	return (0);
+}
+
+static void
+red_purgeq(rqp)
+	red_queue_t *rqp;
+{
+	_flushq(rqp->rq_q);
+	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
+		rqp->rq_ifq->ifq_len = 0;
+}
+
+#ifdef ALTQ_FLOWVALVE
+
+#define	FV_PSHIFT	7	/* weight of average drop rate -- 1/128 */
+#define	FV_PSCALE(x)	((x) << FV_PSHIFT)
+#define	FV_PUNSCALE(x)	((x) >> FV_PSHIFT)
+#define	FV_FSHIFT	5	/* weight of average fraction -- 1/32 */
+#define	FV_FSCALE(x)	((x) << FV_FSHIFT)
+#define	FV_FUNSCALE(x)	((x) >> FV_FSHIFT)
+
+#define	FV_TIMER	(3 * hz)	/* timer value for garbage collector */
+#define	FV_FLOWLISTSIZE		64	/* how many flows in flowlist */
+
+#define	FV_N			10	/* update fve_f every FV_N packets */
+
+#define	FV_BACKOFFTHRESH	1  /* backoff threshold interval in second */
+#define	FV_TTHRESH		3  /* time threshold to delete fve */
+#define	FV_ALPHA		5  /* extra packet count */
+
+#define	FV_STATS
+
+#if (__FreeBSD_version > 300000)
+#define	FV_TIMESTAMP(tp)	getmicrotime(tp)
+#else
+#define	FV_TIMESTAMP(tp)	{ (*(tp)) = time; }
+#endif
+
+/*
+ * Brtt table: 127 entry table to convert drop rate (p) to
+ * the corresponding bandwidth fraction (f)
+ * the following equation is implemented to use scaled values,
+ * fve_p and fve_f, in the fixed point format.
+ *
+ *   Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
+ *   f = Brtt(p) / (max_th + alpha)
+ */
+#define	BRTT_SIZE	128
+#define	BRTT_SHIFT	12
+#define	BRTT_MASK	0x0007f000
+#define	BRTT_PMAX	(1 << (FV_PSHIFT + FP_SHIFT))
+
+const int brtt_tab[BRTT_SIZE] = {
+	0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
+	392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
+	225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
+	145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
+	98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
+	67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
+	47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
+	33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
+	24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
+	18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
+	14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
+	10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
+	8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
+	6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
+	5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
+	4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
+};
+
+static __inline struct fve *
+flowlist_lookup(fv, pktattr, now)
+	struct flowvalve *fv;
+	struct altq_pktattr *pktattr;
+	struct timeval *now;
+{
+	struct fve *fve;
+	int flows;
+	struct ip *ip;
+#ifdef INET6
+	struct ip6_hdr *ip6;
+#endif
+	struct timeval tthresh;
+
+	if (pktattr == NULL)
+		return (NULL);
+
+	tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
+	flows = 0;
+	/*
+	 * search the flow list
+	 */
+	switch (pktattr->pattr_af) {
+	case AF_INET:
+		ip = (struct ip *)pktattr->pattr_hdr;
+		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
+			if (fve->fve_lastdrop.tv_sec == 0)
+				break;
+			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
+				fve->fve_lastdrop.tv_sec = 0;
+				break;
+			}
+			if (fve->fve_flow.flow_af == AF_INET &&
+			    fve->fve_flow.flow_ip.ip_src.s_addr ==
+			    ip->ip_src.s_addr &&
+			    fve->fve_flow.flow_ip.ip_dst.s_addr ==
+			    ip->ip_dst.s_addr)
+				return (fve);
+			flows++;
+		}
+		break;
+#ifdef INET6
+	case AF_INET6:
+		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
+		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
+			if (fve->fve_lastdrop.tv_sec == 0)
+				break;
+			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
+				fve->fve_lastdrop.tv_sec = 0;
+				break;
+			}
+			if (fve->fve_flow.flow_af == AF_INET6 &&
+			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
+					       &ip6->ip6_src) &&
+			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
+					       &ip6->ip6_dst))
+				return (fve);
+			flows++;
+		}
+		break;
+#endif /* INET6 */
+
+	default:
+		/* unknown protocol.  no drop. */
+		return (NULL);
+	}
+	fv->fv_flows = flows;	/* save the number of active fve's */
+	return (NULL);
+}
+
+static __inline struct fve *
+flowlist_reclaim(fv, pktattr)
+	struct flowvalve *fv;
+	struct altq_pktattr *pktattr;
+{
+	struct fve *fve;
+	struct ip *ip;
+#ifdef INET6
+	struct ip6_hdr *ip6;
+#endif
+
+	/*
+	 * get an entry from the tail of the LRU list.
+	 */
+	fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
+
+	switch (pktattr->pattr_af) {
+	case AF_INET:
+		ip = (struct ip *)pktattr->pattr_hdr;
+		fve->fve_flow.flow_af = AF_INET;
+		fve->fve_flow.flow_ip.ip_src = ip->ip_src;
+		fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
+		break;
+#ifdef INET6
+	case AF_INET6:
+		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
+		fve->fve_flow.flow_af = AF_INET6;
+		fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
+		fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
+		break;
+#endif
+	}
+
+	fve->fve_state = Green;
+	fve->fve_p = 0.0;
+	fve->fve_f = 0.0;
+	fve->fve_ifseq = fv->fv_ifseq - 1;
+	fve->fve_count = 0;
+
+	fv->fv_flows++;
+#ifdef FV_STATS
+	fv->fv_stats.alloc++;
+#endif
+	return (fve);
+}
+
+static __inline void
+flowlist_move_to_head(fv, fve)
+	struct flowvalve *fv;
+	struct fve *fve;
+{
+	if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
+		TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
+		TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
+	}
+}
+
+#if 0 /* XXX: make the compiler happy (fv_alloc unused) */
+/*
+ * allocate flowvalve structure
+ */
+static struct flowvalve *
+fv_alloc(rp)
+	struct red *rp;
+{
+	struct flowvalve *fv;
+	struct fve *fve;
+	int i, num;
+
+	num = FV_FLOWLISTSIZE;
+	fv = malloc(sizeof(struct flowvalve),
+	       M_DEVBUF, M_WAITOK);
+	if (fv == NULL)
+		return (NULL);
+	bzero(fv, sizeof(struct flowvalve));
+
+	fv->fv_fves = malloc(sizeof(struct fve) * num,
+	       M_DEVBUF, M_WAITOK);
+	if (fv->fv_fves == NULL) {
+		free(fv, M_DEVBUF);
+		return (NULL);
+	}
+	bzero(fv->fv_fves, sizeof(struct fve) * num);
+
+	fv->fv_flows = 0;
+	TAILQ_INIT(&fv->fv_flowlist);
+	for (i = 0; i < num; i++) {
+		fve = &fv->fv_fves[i];
+		fve->fve_lastdrop.tv_sec = 0;
+		TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
+	}
+
+	/* initialize drop rate threshold in scaled fixed-point */
+	fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
+
+	/* initialize drop rate to fraction table */
+	fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE,
+	       M_DEVBUF, M_WAITOK);
+	if (fv->fv_p2ftab == NULL) {
+		free(fv->fv_fves, M_DEVBUF);
+		free(fv, M_DEVBUF);
+		return (NULL);
+	}
+	/*
+	 * create the p2f table.
+	 * (shift is used to keep the precision)
+	 */
+	for (i = 1; i < BRTT_SIZE; i++) {
+		int f;
+
+		f = brtt_tab[i] << 8;
+		fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
+	}
+
+	return (fv);
+}
+#endif
+
+static void fv_destroy(fv)
+	struct flowvalve *fv;
+{
+	free(fv->fv_p2ftab, M_DEVBUF);
+	free(fv->fv_fves, M_DEVBUF);
+	free(fv, M_DEVBUF);
+}
+
+static __inline int
+fv_p2f(fv, p)
+	struct flowvalve	*fv;
+	int	p;
+{
+	int val, f;
+
+	if (p >= BRTT_PMAX)
+		f = fv->fv_p2ftab[BRTT_SIZE-1];
+	else if ((val = (p & BRTT_MASK)))
+		f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
+	else
+		f = fv->fv_p2ftab[1];
+	return (f);
+}
+
+/*
+ * check if an arriving packet should be pre-dropped.
+ * called from red_addq() when a packet arrives.
+ * returns 1 when the packet should be pre-dropped.
+ * should be called in splimp.
+ */
+static int
+fv_checkflow(fv, pktattr, fcache)
+	struct flowvalve *fv;
+	struct altq_pktattr *pktattr;
+	struct fve **fcache;
+{
+	struct fve *fve;
+	struct timeval now;
+
+	fv->fv_ifseq++;
+	FV_TIMESTAMP(&now);
+
+	if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
+		/* no matching entry in the flowlist */
+		return (0);
+
+	*fcache = fve;
+
+	/* update fraction f for every FV_N packets */
+	if (++fve->fve_count == FV_N) {
+		/*
+		 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
+		 */
+		fve->fve_f =
+			(FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
+			+ fve->fve_f - FV_FUNSCALE(fve->fve_f);
+		fve->fve_ifseq = fv->fv_ifseq;
+		fve->fve_count = 0;
+	}
+
+	/*
+	 * overpumping test
+	 */
+	if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
+		int fthresh;
+
+		/* calculate a threshold */
+		fthresh = fv_p2f(fv, fve->fve_p);
+		if (fve->fve_f > fthresh)
+			fve->fve_state = Red;
+	}
+
+	if (fve->fve_state == Red) {
+		/*
+		 * backoff test
+		 */
+		if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
+			/* no drop for at least FV_BACKOFFTHRESH sec */
+			fve->fve_p = 0;
+			fve->fve_state = Green;
+#ifdef FV_STATS
+			fv->fv_stats.escape++;
+#endif
+		} else {
+			/* block this flow */
+			flowlist_move_to_head(fv, fve);
+			fve->fve_lastdrop = now;
+#ifdef FV_STATS
+			fv->fv_stats.predrop++;
+#endif
+			return (1);
+		}
+	}
+
+	/*
+	 * p = (1 - Wp) * p
+	 */
+	fve->fve_p -= FV_PUNSCALE(fve->fve_p);
+	if (fve->fve_p < 0)
+		fve->fve_p = 0;
+#ifdef FV_STATS
+	fv->fv_stats.pass++;
+#endif
+	return (0);
+}
+
+/*
+ * called from red_addq when a packet is dropped by red.
+ * should be called in splimp.
+ */
+static void fv_dropbyred(fv, pktattr, fcache)
+	struct flowvalve *fv;
+	struct altq_pktattr *pktattr;
+	struct fve *fcache;
+{
+	struct fve *fve;
+	struct timeval now;
+
+	if (pktattr == NULL)
+		return;
+	FV_TIMESTAMP(&now);
+
+	if (fcache != NULL)
+		/* the fve of this packet is already cached */
+		fve = fcache;
+	else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
+		fve = flowlist_reclaim(fv, pktattr);
+
+	flowlist_move_to_head(fv, fve);
+
+	/*
+	 * update p:  the following line cancels the update
+	 *	      in fv_checkflow() and calculate
+	 *	p = Wp + (1 - Wp) * p
+	 */
+	fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
+
+	fve->fve_lastdrop = now;
+}
+
+#endif /* ALTQ_FLOWVALVE */
+
+#ifdef KLD_MODULE
+
+static struct altqsw red_sw =
+	{"red", redopen, redclose, redioctl};
+
+ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
+MODULE_VERSION(altq_red, 1);
+
+#endif /* KLD_MODULE */
+#endif /* ALTQ3_COMPAT */
+
+#endif /* ALTQ_RED */
diff --git a/freebsd/sys/contrib/altq/altq/altq_red.h b/freebsd/sys/contrib/altq/altq/altq_red.h
new file mode 100644
index 00000000..2d267101
--- /dev/null
+++ b/freebsd/sys/contrib/altq/altq/altq_red.h
@@ -0,0 +1,198 @@
+/*	$KAME: altq_red.h,v 1.8 2003/07/10 12:07:49 kjc Exp $	*/
+
+/*
+ * Copyright (C) 1997-2003
+ *	Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ALTQ_ALTQ_RED_HH_
+#define	_ALTQ_ALTQ_RED_HH_
+
+#include <freebsd/altq/altq_classq.h>
+
+#ifdef ALTQ3_COMPAT
+struct red_interface {
+	char	red_ifname[IFNAMSIZ];
+};
+
+struct red_stats {
+	struct red_interface iface;
+	int q_len;
+	int q_avg;
+
+	struct pktcntr	xmit_cnt;
+	struct pktcntr	drop_cnt;
+	u_int		drop_forced;
+	u_int		drop_unforced;
+	u_int		marked_packets;
+
+	/* static red parameters */
+	int q_limit;
+	int weight;
+	int inv_pmax;
+	int th_min;
+	int th_max;
+
+	/* flowvalve related stuff */
+	u_int fv_flows;
+	u_int fv_pass;
+	u_int fv_predrop;
+	u_int fv_alloc;
+	u_int fv_escape;
+};
+
+struct red_conf {
+	struct red_interface iface;
+	int red_weight;		/* weight for EWMA */
+	int red_inv_pmax;	/* inverse of max drop probability */
+	int red_thmin;		/* red min threshold */
+	int red_thmax;		/* red max threshold */
+	int red_limit;		/* max queue length */
+	int red_pkttime;	/* average packet time in usec */
+	int red_flags;		/* see below */
+};
+#endif /* ALTQ3_COMPAT */
+
+/* red flags */
+#define	REDF_ECN4	0x01	/* use packet marking for IPv4 packets */
+#define	REDF_ECN6	0x02	/* use packet marking for IPv6 packets */
+#define	REDF_ECN	(REDF_ECN4 | REDF_ECN6)
+#define	REDF_FLOWVALVE	0x04	/* use flowvalve (aka penalty-box) */
+
+/*
+ * simpler versions of red parameters and statistics used by other
+ * disciplines (e.g., CBQ)
+ */
+struct redparams {
+	int th_min;		/* red min threshold */
+	int th_max;		/* red max threshold */
+	int inv_pmax;		/* inverse of max drop probability */
+};
+
+struct redstats {
+	int		q_avg;
+	struct pktcntr	xmit_cnt;
+	struct pktcntr	drop_cnt;
+	u_int		drop_forced;
+	u_int		drop_unforced;
+	u_int		marked_packets;
+};
+
+#ifdef ALTQ3_COMPAT
+/*
+ * IOCTLs for RED
+ */
+#define	RED_IF_ATTACH		_IOW('Q', 1, struct red_interface)
+#define	RED_IF_DETACH		_IOW('Q', 2, struct red_interface)
+#define	RED_ENABLE		_IOW('Q', 3, struct red_interface)
+#define	RED_DISABLE		_IOW('Q', 4, struct red_interface)
+#define	RED_CONFIG		_IOWR('Q', 6, struct red_conf)
+#define	RED_GETSTATS		_IOWR('Q', 12, struct red_stats)
+#define	RED_SETDEFAULTS		_IOW('Q', 30, struct redparams)
+#endif /* ALTQ3_COMPAT */
+
+#ifdef _KERNEL
+
+#ifdef ALTQ3_COMPAT
+struct flowvalve;
+#endif
+
+/* weight table structure for idle time calibration */
+struct wtab {
+	struct wtab	*w_next;
+	int		 w_weight;
+	int		 w_param_max;
+	int		 w_refcount;
+	int32_t		 w_tab[32];
+};
+
+typedef struct red {
+	int		red_pkttime;	/* average packet time in micro sec
+					   used for idle calibration */
+	int		red_flags;	/* red flags */
+
+	/* red parameters */
+	int		red_weight;	/* weight for EWMA */
+	int		red_inv_pmax;	/* inverse of max drop probability */
+	int		red_thmin;	/* red min threshold */
+	int		red_thmax;	/* red max threshold */
+
+	/* variables for internal use */
+	int		red_wshift;	/* log(red_weight) */
+	int		red_thmin_s;	/* th_min scaled by avgshift */
+	int		red_thmax_s;	/* th_max scaled by avgshift */
+	int		red_probd;	/* drop probability denominator */
+
+	int		red_avg;	/* queue len avg scaled by avgshift */
+	int		red_count;	/* packet count since last dropped/
+					   marked packet */
+	int		red_idle;	/* queue was empty */
+	int		red_old;	/* avg is above th_min */
+	struct wtab	*red_wtab;	/* weight table */
+	struct timeval	 red_last;	/* time when the queue becomes idle */
+
+#ifdef ALTQ3_COMPAT
+	struct flowvalve *red_flowvalve;	/* flowvalve state */
+#endif
+
+	struct {
+		struct pktcntr	xmit_cnt;
+		struct pktcntr	drop_cnt;
+		u_int		drop_forced;
+		u_int		drop_unforced;
+		u_int		marked_packets;
+	} red_stats;
+} red_t;
+
+#ifdef ALTQ3_COMPAT
+typedef struct red_queue {
+	struct red_queue *rq_next;	/* next red_state in the list */
+	struct ifaltq *rq_ifq;		/* backpointer to ifaltq */
+
+	class_queue_t *rq_q;
+
+	red_t *rq_red;
+} red_queue_t;
+#endif /* ALTQ3_COMPAT */
+
+/* red drop types */
+#define	DTYPE_NODROP	0	/* no drop */
+#define	DTYPE_FORCED	1	/* a "forced" drop */
+#define	DTYPE_EARLY	2	/* an "unforced" (early) drop */
+
+extern red_t		*red_alloc(int, int, int, int, int, int);
+extern void		 red_destroy(red_t *);
+extern void		 red_getstats(red_t *, struct redstats *);
+extern int		 red_addq(red_t *, class_queue_t *, struct mbuf *,
+			     struct altq_pktattr *);
+extern struct mbuf	*red_getq(red_t *, class_queue_t *);
+extern int		 drop_early(int, int, int);
+extern int		 mark_ecn(struct mbuf *, struct altq_pktattr *, int);
+extern struct wtab	*wtab_alloc(int);
+extern int		 wtab_destroy(struct wtab *);
+extern int32_t		 pow_w(struct wtab *, int);
+
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_ALTQ_RED_HH_ */
diff --git a/freebsd/sys/contrib/altq/altq/altq_rio.c b/freebsd/sys/contrib/altq/altq/altq_rio.c
new file mode 100644
index 00000000..33a5fce6
--- /dev/null
+++ b/freebsd/sys/contrib/altq/altq/altq_rio.c
@@ -0,0 +1,855 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*	$FreeBSD$	*/
+/*	$KAME: altq_rio.c,v 1.17 2003/07/10 12:07:49 kjc Exp $	*/
+
+/*
+ * Copyright (C) 1998-2003
+ *	Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * Copyright (c) 1990-1994 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the Computer Systems
+ *	Engineering Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ *    to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include <freebsd/local/opt_altq.h>
+#if (__FreeBSD__ != 2)
+#include <freebsd/local/opt_inet.h>
+#ifdef __FreeBSD__
+#include <freebsd/local/opt_inet6.h>
+#endif
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+#ifdef ALTQ_RIO	/* rio is enabled by ALTQ_RIO option in opt_altq.h */
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/errno.h>
+#if 1 /* ALTQ3_COMPAT */
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/kernel.h>
+#endif
+
+#include <freebsd/net/if.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#ifdef INET6
+#include <freebsd/netinet/ip6.h>
+#endif
+
+#include <freebsd/net/pfvar.h>
+#include <freebsd/altq/altq.h>
+#include <freebsd/altq/altq_cdnr.h>
+#include <freebsd/altq/altq_red.h>
+#include <freebsd/altq/altq_rio.h>
+#ifdef ALTQ3_COMPAT
+#include <freebsd/altq/altq_conf.h>
+#endif
+
+/*
+ * RIO: RED with IN/OUT bit
+ *   described in
+ *	"Explicit Allocation of Best Effort Packet Delivery Service"
+ *	David D. Clark and Wenjia Fang, MIT Lab for Computer Science
+ *	http://diffserv.lcs.mit.edu/Papers/exp-alloc-ddc-wf.{ps,pdf}
+ *
+ * this implementation is extended to support more than 2 drop precedence
+ * values as described in RFC2597 (Assured Forwarding PHB Group).
+ *
+ */
+/*
+ * AF DS (differentiated service) codepoints.
+ * (classes can be mapped to CBQ or H-FSC classes.)
+ *
+ *      0   1   2   3   4   5   6   7
+ *    +---+---+---+---+---+---+---+---+
+ *    |   CLASS   |DropPre| 0 |  CU   |
+ *    +---+---+---+---+---+---+---+---+
+ *
+ *    class 1: 001
+ *    class 2: 010
+ *    class 3: 011
+ *    class 4: 100
+ *
+ *    low drop prec:    01
+ *    medium drop prec: 10
+ *    high drop prec:   01
+ */
+
+/* normal red parameters */
+#define	W_WEIGHT	512	/* inverse of weight of EWMA (511/512) */
+				/* q_weight = 0.00195 */
+
+/* red parameters for a slow link */
+#define	W_WEIGHT_1	128	/* inverse of weight of EWMA (127/128) */
+				/* q_weight = 0.0078125 */
+
+/* red parameters for a very slow link (e.g., dialup) */
+#define	W_WEIGHT_2	64	/* inverse of weight of EWMA (63/64) */
+				/* q_weight = 0.015625 */
+
+/* fixed-point uses 12-bit decimal places */
+#define	FP_SHIFT	12	/* fixed-point shift */
+
+/* red parameters for drop probability */
+#define	INV_P_MAX	10	/* inverse of max drop probability */
+#define	TH_MIN		 5	/* min threshold */
+#define	TH_MAX		15	/* max threshold */
+
+#define	RIO_LIMIT	60	/* default max queue lenght */
+#define	RIO_STATS		/* collect statistics */
+
+#define	TV_DELTA(a, b, delta) {					\
+	register int	xxs;					\
+								\
+	delta = (a)->tv_usec - (b)->tv_usec; 			\
+	if ((xxs = (a)->tv_sec - (b)->tv_sec) != 0) { 		\
+		if (xxs < 0) { 					\
+			delta = 60000000;			\
+		} else if (xxs > 4)  {				\
+			if (xxs > 60)				\
+				delta = 60000000;		\
+			else					\
+				delta += xxs * 1000000;		\
+		} else while (xxs > 0) {			\
+			delta += 1000000;			\
+			xxs--;					\
+		}						\
+	}							\
+}
+
+#ifdef ALTQ3_COMPAT
+/* rio_list keeps all rio_queue_t's allocated. */
+static rio_queue_t *rio_list = NULL;
+#endif
+/* default rio parameter values */
+static struct redparams default_rio_params[RIO_NDROPPREC] = {
+  /* th_min,		 th_max,     inv_pmax */
+  { TH_MAX * 2 + TH_MIN, TH_MAX * 3, INV_P_MAX }, /* low drop precedence */
+  { TH_MAX + TH_MIN,	 TH_MAX * 2, INV_P_MAX }, /* medium drop precedence */
+  { TH_MIN,		 TH_MAX,     INV_P_MAX }  /* high drop precedence */
+};
+
+/* internal function prototypes */
+static int dscp2index(u_int8_t);
+#ifdef ALTQ3_COMPAT
+static int rio_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+static struct mbuf *rio_dequeue(struct ifaltq *, int);
+static int rio_request(struct ifaltq *, int, void *);
+static int rio_detach(rio_queue_t *);
+
+/*
+ * rio device interface
+ */
+altqdev_decl(rio);
+
+#endif /* ALTQ3_COMPAT */
+
+rio_t *
+rio_alloc(int weight, struct redparams *params, int flags, int pkttime)
+{
+	rio_t	*rp;
+	int	 w, i;
+	int	 npkts_per_sec;
+
+	rp = malloc(sizeof(rio_t), M_DEVBUF, M_WAITOK);
+	if (rp == NULL)
+		return (NULL);
+	bzero(rp, sizeof(rio_t));
+
+	rp->rio_flags = flags;
+	if (pkttime == 0)
+		/* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
+		rp->rio_pkttime = 800;
+	else
+		rp->rio_pkttime = pkttime;
+
+	if (weight != 0)
+		rp->rio_weight = weight;
+	else {
+		/* use default */
+		rp->rio_weight = W_WEIGHT;
+
+		/* when the link is very slow, adjust red parameters */
+		npkts_per_sec = 1000000 / rp->rio_pkttime;
+		if (npkts_per_sec < 50) {
+			/* up to about 400Kbps */
+			rp->rio_weight = W_WEIGHT_2;
+		} else if (npkts_per_sec < 300) {
+			/* up to about 2.4Mbps */
+			rp->rio_weight = W_WEIGHT_1;
+		}
+	}
+
+	/* calculate wshift.  weight must be power of 2 */
+	w = rp->rio_weight;
+	for (i = 0; w > 1; i++)
+		w = w >> 1;
+	rp->rio_wshift = i;
+	w = 1 << rp->rio_wshift;
+	if (w != rp->rio_weight) {
+		printf("invalid weight value %d for red! use %d\n",
+		       rp->rio_weight, w);
+		rp->rio_weight = w;
+	}
+
+	/* allocate weight table */
+	rp->rio_wtab = wtab_alloc(rp->rio_weight);
+
+	for (i = 0; i < RIO_NDROPPREC; i++) {
+		struct dropprec_state *prec = &rp->rio_precstate[i];
+
+		prec->avg = 0;
+		prec->idle = 1;
+
+		if (params == NULL || params[i].inv_pmax == 0)
+			prec->inv_pmax = default_rio_params[i].inv_pmax;
+		else
+			prec->inv_pmax = params[i].inv_pmax;
+		if (params == NULL || params[i].th_min == 0)
+			prec->th_min = default_rio_params[i].th_min;
+		else
+			prec->th_min = params[i].th_min;
+		if (params == NULL || params[i].th_max == 0)
+			prec->th_max = default_rio_params[i].th_max;
+		else
+			prec->th_max = params[i].th_max;
+
+		/*
+		 * th_min_s and th_max_s are scaled versions of th_min
+		 * and th_max to be compared with avg.
+		 */
+		prec->th_min_s = prec->th_min << (rp->rio_wshift + FP_SHIFT);
+		prec->th_max_s = prec->th_max << (rp->rio_wshift + FP_SHIFT);
+
+		/*
+		 * precompute probability denominator
+		 *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
+		 */
+		prec->probd = (2 * (prec->th_max - prec->th_min)
+			       * prec->inv_pmax) << FP_SHIFT;
+
+		microtime(&prec->last);
+	}
+
+	return (rp);
+}
+
+void
+rio_destroy(rio_t *rp)
+{
+	wtab_destroy(rp->rio_wtab);
+	free(rp, M_DEVBUF);
+}
+
+void
+rio_getstats(rio_t *rp, struct redstats *sp)
+{
+	int	i;
+
+	for (i = 0; i < RIO_NDROPPREC; i++) {
+		bcopy(&rp->q_stats[i], sp, sizeof(struct redstats));
+		sp->q_avg = rp->rio_precstate[i].avg >> rp->rio_wshift;
+		sp++;
+	}
+}
+
+#if (RIO_NDROPPREC == 3)
+/*
+ * internally, a drop precedence value is converted to an index
+ * starting from 0.
+ */
+static int
+dscp2index(u_int8_t dscp)
+{
+	int	dpindex = dscp & AF_DROPPRECMASK;
+
+	if (dpindex == 0)
+		return (0);
+	return ((dpindex >> 3) - 1);
+}
+#endif
+
+#if 1
+/*
+ * kludge: when a packet is dequeued, we need to know its drop precedence
+ * in order to keep the queue length of each drop precedence.
+ * use m_pkthdr.rcvif to pass this info.
+ */
+#define	RIOM_SET_PRECINDEX(m, idx)	\
+	do { (m)->m_pkthdr.rcvif = (void *)((long)(idx)); } while (0)
+#define	RIOM_GET_PRECINDEX(m)	\
+	({ long idx; idx = (long)((m)->m_pkthdr.rcvif); \
+	(m)->m_pkthdr.rcvif = NULL; idx; })
+#endif
+
+int
+rio_addq(rio_t *rp, class_queue_t *q, struct mbuf *m,
+    struct altq_pktattr *pktattr)
+{
+	int			 avg, droptype;
+	u_int8_t		 dsfield, odsfield;
+	int			 dpindex, i, n, t;
+	struct timeval		 now;
+	struct dropprec_state	*prec;
+
+	dsfield = odsfield = read_dsfield(m, pktattr);
+	dpindex = dscp2index(dsfield);
+
+	/*
+	 * update avg of the precedence states whose drop precedence
+	 * is larger than or equal to the drop precedence of the packet
+	 */
+	now.tv_sec = 0;
+	for (i = dpindex; i < RIO_NDROPPREC; i++) {
+		prec = &rp->rio_precstate[i];
+		avg = prec->avg;
+		if (prec->idle) {
+			prec->idle = 0;
+			if (now.tv_sec == 0)
+				microtime(&now);
+			t = (now.tv_sec - prec->last.tv_sec);
+			if (t > 60)
+				avg = 0;
+			else {
+				t = t * 1000000 +
+					(now.tv_usec - prec->last.tv_usec);
+				n = t / rp->rio_pkttime;
+				/* calculate (avg = (1 - Wq)^n * avg) */
+				if (n > 0)
+					avg = (avg >> FP_SHIFT) *
+						pow_w(rp->rio_wtab, n);
+			}
+		}
+
+		/* run estimator. (avg is scaled by WEIGHT in fixed-point) */
+		avg += (prec->qlen << FP_SHIFT) - (avg >> rp->rio_wshift);
+		prec->avg = avg;		/* save the new value */
+		/*
+		 * count keeps a tally of arriving traffic that has not
+		 * been dropped.
+		 */
+		prec->count++;
+	}
+
+	prec = &rp->rio_precstate[dpindex];
+	avg = prec->avg;
+
+	/* see if we drop early */
+	droptype = DTYPE_NODROP;
+	if (avg >= prec->th_min_s && prec->qlen > 1) {
+		if (avg >= prec->th_max_s) {
+			/* avg >= th_max: forced drop */
+			droptype = DTYPE_FORCED;
+		} else if (prec->old == 0) {
+			/* first exceeds th_min */
+			prec->count = 1;
+			prec->old = 1;
+		} else if (drop_early((avg - prec->th_min_s) >> rp->rio_wshift,
+				      prec->probd, prec->count)) {
+			/* unforced drop by red */
+			droptype = DTYPE_EARLY;
+		}
+	} else {
+		/* avg < th_min */
+		prec->old = 0;
+	}
+
+	/*
+	 * if the queue length hits the hard limit, it's a forced drop.
+	 */
+	if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
+		droptype = DTYPE_FORCED;
+
+	if (droptype != DTYPE_NODROP) {
+		/* always drop incoming packet (as opposed to randomdrop) */
+		for (i = dpindex; i < RIO_NDROPPREC; i++)
+			rp->rio_precstate[i].count = 0;
+#ifdef RIO_STATS
+		if (droptype == DTYPE_EARLY)
+			rp->q_stats[dpindex].drop_unforced++;
+		else
+			rp->q_stats[dpindex].drop_forced++;
+		PKTCNTR_ADD(&rp->q_stats[dpindex].drop_cnt, m_pktlen(m));
+#endif
+		m_freem(m);
+		return (-1);
+	}
+
+	for (i = dpindex; i < RIO_NDROPPREC; i++)
+		rp->rio_precstate[i].qlen++;
+
+	/* save drop precedence index in mbuf hdr */
+	RIOM_SET_PRECINDEX(m, dpindex);
+
+	if (rp->rio_flags & RIOF_CLEARDSCP)
+		dsfield &= ~DSCP_MASK;
+
+	if (dsfield != odsfield)
+		write_dsfield(m, pktattr, dsfield);
+
+	_addq(q, m);
+
+#ifdef RIO_STATS
+	PKTCNTR_ADD(&rp->q_stats[dpindex].xmit_cnt, m_pktlen(m));
+#endif
+	return (0);
+}
+
+struct mbuf *
+rio_getq(rio_t *rp, class_queue_t *q)
+{
+	struct mbuf	*m;
+	int		 dpindex, i;
+
+	if ((m = _getq(q)) == NULL)
+		return NULL;
+
+	dpindex = RIOM_GET_PRECINDEX(m);
+	for (i = dpindex; i < RIO_NDROPPREC; i++) {
+		if (--rp->rio_precstate[i].qlen == 0) {
+			if (rp->rio_precstate[i].idle == 0) {
+				rp->rio_precstate[i].idle = 1;
+				microtime(&rp->rio_precstate[i].last);
+			}
+		}
+	}
+	return (m);
+}
+
+#ifdef ALTQ3_COMPAT
+int
+rioopen(dev, flag, fmt, p)
+	dev_t dev;
+	int flag, fmt;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	/* everything will be done when the queueing scheme is attached. */
+	return 0;
+}
+
+int
+rioclose(dev, flag, fmt, p)
+	dev_t dev;
+	int flag, fmt;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	rio_queue_t *rqp;
+	int err, error = 0;
+
+	while ((rqp = rio_list) != NULL) {
+		/* destroy all */
+		err = rio_detach(rqp);
+		if (err != 0 && error == 0)
+			error = err;
+	}
+
+	return error;
+}
+
+int
+rioioctl(dev, cmd, addr, flag, p)
+	dev_t dev;
+	ioctlcmd_t cmd;
+	caddr_t addr;
+	int flag;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	rio_queue_t *rqp;
+	struct rio_interface *ifacep;
+	struct ifnet *ifp;
+	int	error = 0;
+
+	/* check super-user privilege */
+	switch (cmd) {
+	case RIO_GETSTATS:
+		break;
+	default:
+#if (__FreeBSD_version > 700000)
+		if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
+			return (error);
+#elsif (__FreeBSD_version > 400000)
+		if ((error = suser(p)) != 0)
+			return (error);
+#else
+		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+			return (error);
+#endif
+		break;
+	}
+
+	switch (cmd) {
+
+	case RIO_ENABLE:
+		ifacep = (struct rio_interface *)addr;
+		if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
+			error = EBADF;
+			break;
+		}
+		error = altq_enable(rqp->rq_ifq);
+		break;
+
+	case RIO_DISABLE:
+		ifacep = (struct rio_interface *)addr;
+		if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
+			error = EBADF;
+			break;
+		}
+		error = altq_disable(rqp->rq_ifq);
+		break;
+
+	case RIO_IF_ATTACH:
+		ifp = ifunit(((struct rio_interface *)addr)->rio_ifname);
+		if (ifp == NULL) {
+			error = ENXIO;
+			break;
+		}
+
+		/* allocate and initialize rio_queue_t */
+		rqp = malloc(sizeof(rio_queue_t), M_DEVBUF, M_WAITOK);
+		if (rqp == NULL) {
+			error = ENOMEM;
+			break;
+		}
+		bzero(rqp, sizeof(rio_queue_t));
+
+		rqp->rq_q = malloc(sizeof(class_queue_t),
+		       M_DEVBUF, M_WAITOK);
+		if (rqp->rq_q == NULL) {
+			free(rqp, M_DEVBUF);
+			error = ENOMEM;
+			break;
+		}
+		bzero(rqp->rq_q, sizeof(class_queue_t));
+
+		rqp->rq_rio = rio_alloc(0, NULL, 0, 0);
+		if (rqp->rq_rio == NULL) {
+			free(rqp->rq_q, M_DEVBUF);
+			free(rqp, M_DEVBUF);
+			error = ENOMEM;
+			break;
+		}
+
+		rqp->rq_ifq = &ifp->if_snd;
+		qtail(rqp->rq_q) = NULL;
+		qlen(rqp->rq_q) = 0;
+		qlimit(rqp->rq_q) = RIO_LIMIT;
+		qtype(rqp->rq_q) = Q_RIO;
+
+		/*
+		 * set RIO to this ifnet structure.
+		 */
+		error = altq_attach(rqp->rq_ifq, ALTQT_RIO, rqp,
+				    rio_enqueue, rio_dequeue, rio_request,
+				    NULL, NULL);
+		if (error) {
+			rio_destroy(rqp->rq_rio);
+			free(rqp->rq_q, M_DEVBUF);
+			free(rqp, M_DEVBUF);
+			break;
+		}
+
+		/* add this state to the rio list */
+		rqp->rq_next = rio_list;
+		rio_list = rqp;
+		break;
+
+	case RIO_IF_DETACH:
+		ifacep = (struct rio_interface *)addr;
+		if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
+			error = EBADF;
+			break;
+		}
+		error = rio_detach(rqp);
+		break;
+
+	case RIO_GETSTATS:
+		do {
+			struct rio_stats *q_stats;
+			rio_t *rp;
+			int i;
+
+			q_stats = (struct rio_stats *)addr;
+			if ((rqp = altq_lookup(q_stats->iface.rio_ifname,
+					       ALTQT_RIO)) == NULL) {
+				error = EBADF;
+				break;
+			}
+
+			rp = rqp->rq_rio;
+
+			q_stats->q_limit = qlimit(rqp->rq_q);
+			q_stats->weight	= rp->rio_weight;
+			q_stats->flags = rp->rio_flags;
+
+			for (i = 0; i < RIO_NDROPPREC; i++) {
+				q_stats->q_len[i] = rp->rio_precstate[i].qlen;
+				bcopy(&rp->q_stats[i], &q_stats->q_stats[i],
+				      sizeof(struct redstats));
+				q_stats->q_stats[i].q_avg =
+				    rp->rio_precstate[i].avg >> rp->rio_wshift;
+
+				q_stats->q_params[i].inv_pmax
+					= rp->rio_precstate[i].inv_pmax;
+				q_stats->q_params[i].th_min
+					= rp->rio_precstate[i].th_min;
+				q_stats->q_params[i].th_max
+					= rp->rio_precstate[i].th_max;
+			}
+		} while (/*CONSTCOND*/ 0);
+		break;
+
+	case RIO_CONFIG:
+		do {
+			struct rio_conf *fc;
+			rio_t	*new;
+			int s, limit, i;
+
+			fc = (struct rio_conf *)addr;
+			if ((rqp = altq_lookup(fc->iface.rio_ifname,
+					       ALTQT_RIO)) == NULL) {
+				error = EBADF;
+				break;
+			}
+
+			new = rio_alloc(fc->rio_weight, &fc->q_params[0],
+					fc->rio_flags, fc->rio_pkttime);
+			if (new == NULL) {
+				error = ENOMEM;
+				break;
+			}
+
+#ifdef __NetBSD__
+			s = splnet();
+#else
+			s = splimp();
+#endif
+			_flushq(rqp->rq_q);
+			limit = fc->rio_limit;
+			if (limit < fc->q_params[RIO_NDROPPREC-1].th_max)
+				limit = fc->q_params[RIO_NDROPPREC-1].th_max;
+			qlimit(rqp->rq_q) = limit;
+
+			rio_destroy(rqp->rq_rio);
+			rqp->rq_rio = new;
+
+			splx(s);
+
+			/* write back new values */
+			fc->rio_limit = limit;
+			for (i = 0; i < RIO_NDROPPREC; i++) {
+				fc->q_params[i].inv_pmax =
+					rqp->rq_rio->rio_precstate[i].inv_pmax;
+				fc->q_params[i].th_min =
+					rqp->rq_rio->rio_precstate[i].th_min;
+				fc->q_params[i].th_max =
+					rqp->rq_rio->rio_precstate[i].th_max;
+			}
+		} while (/*CONSTCOND*/ 0);
+		break;
+
+	case RIO_SETDEFAULTS:
+		do {
+			struct redparams *rp;
+			int i;
+
+			rp = (struct redparams *)addr;
+			for (i = 0; i < RIO_NDROPPREC; i++)
+				default_rio_params[i] = rp[i];
+		} while (/*CONSTCOND*/ 0);
+		break;
+
+	default:
+		error = EINVAL;
+		break;
+	}
+
+	return error;
+}
+
+static int
+rio_detach(rqp)
+	rio_queue_t *rqp;
+{
+	rio_queue_t *tmp;
+	int error = 0;
+
+	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
+		altq_disable(rqp->rq_ifq);
+
+	if ((error = altq_detach(rqp->rq_ifq)))
+		return (error);
+
+	if (rio_list == rqp)
+		rio_list = rqp->rq_next;
+	else {
+		for (tmp = rio_list; tmp != NULL; tmp = tmp->rq_next)
+			if (tmp->rq_next == rqp) {
+				tmp->rq_next = rqp->rq_next;
+				break;
+			}
+		if (tmp == NULL)
+			printf("rio_detach: no state found in rio_list!\n");
+	}
+
+	rio_destroy(rqp->rq_rio);
+	free(rqp->rq_q, M_DEVBUF);
+	free(rqp, M_DEVBUF);
+	return (error);
+}
+
+/*
+ * rio support routines
+ */
+static int
+rio_request(ifq, req, arg)
+	struct ifaltq *ifq;
+	int req;
+	void *arg;
+{
+	rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	switch (req) {
+	case ALTRQ_PURGE:
+		_flushq(rqp->rq_q);
+		if (ALTQ_IS_ENABLED(ifq))
+			ifq->ifq_len = 0;
+		break;
+	}
+	return (0);
+}
+
+/*
+ * enqueue routine:
+ *
+ *	returns: 0 when successfully queued.
+ *		 ENOBUFS when drop occurs.
+ */
+static int
+rio_enqueue(ifq, m, pktattr)
+	struct ifaltq *ifq;
+	struct mbuf *m;
+	struct altq_pktattr *pktattr;
+{
+	rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
+	int error = 0;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	if (rio_addq(rqp->rq_rio, rqp->rq_q, m, pktattr) == 0)
+		ifq->ifq_len++;
+	else
+		error = ENOBUFS;
+	return error;
+}
+
+/*
+ * dequeue routine:
+ *	must be called in splimp.
+ *
+ *	returns: mbuf dequeued.
+ *		 NULL when no packet is available in the queue.
+ */
+
+static struct mbuf *
+rio_dequeue(ifq, op)
+	struct ifaltq *ifq;
+	int op;
+{
+	rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
+	struct mbuf *m = NULL;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	if (op == ALTDQ_POLL)
+		return qhead(rqp->rq_q);
+
+	m = rio_getq(rqp->rq_rio, rqp->rq_q);
+	if (m != NULL)
+		ifq->ifq_len--;
+	return m;
+}
+
+#ifdef KLD_MODULE
+
+static struct altqsw rio_sw =
+	{"rio", rioopen, rioclose, rioioctl};
+
+ALTQ_MODULE(altq_rio, ALTQT_RIO, &rio_sw);
+MODULE_VERSION(altq_rio, 1);
+MODULE_DEPEND(altq_rio, altq_red, 1, 1, 1);
+
+#endif /* KLD_MODULE */
+#endif /* ALTQ3_COMPAT */
+
+#endif /* ALTQ_RIO */
diff --git a/freebsd/sys/contrib/altq/altq/altq_rio.h b/freebsd/sys/contrib/altq/altq/altq_rio.h
new file mode 100644
index 00000000..e32d2ee2
--- /dev/null
+++ b/freebsd/sys/contrib/altq/altq/altq_rio.h
@@ -0,0 +1,144 @@
+/*	$KAME: altq_rio.h,v 1.9 2003/07/10 12:07:49 kjc Exp $	*/
+
+/*
+ * Copyright (C) 1998-2003
+ *	Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ALTQ_ALTQ_RIO_HH_
+#define	_ALTQ_ALTQ_RIO_HH_
+
+#include <freebsd/altq/altq_classq.h>
+
+/*
+ * RIO: RED with IN/OUT bit
+ * (extended to support more than 2 drop precedence values)
+ */
+#define	RIO_NDROPPREC	3	/* number of drop precedence values */
+
+#ifdef ALTQ3_COMPAT
+struct rio_interface {
+	char	rio_ifname[IFNAMSIZ];
+};
+
+struct rio_stats {
+	struct rio_interface iface;
+	int q_len[RIO_NDROPPREC];
+	struct redstats q_stats[RIO_NDROPPREC];
+
+	/* static red parameters */
+	int q_limit;
+	int weight;
+	int flags;
+	struct redparams q_params[RIO_NDROPPREC];
+};
+
+struct rio_conf {
+	struct rio_interface iface;
+	struct redparams q_params[RIO_NDROPPREC];
+	int rio_weight;		/* weight for EWMA */
+	int rio_limit;		/* max queue length */
+	int rio_pkttime;	/* average packet time in usec */
+	int rio_flags;		/* see below */
+};
+#endif /* ALTQ3_COMPAT */
+
+/* rio flags */
+#define	RIOF_ECN4	0x01	/* use packet marking for IPv4 packets */
+#define	RIOF_ECN6	0x02	/* use packet marking for IPv6 packets */
+#define	RIOF_ECN	(RIOF_ECN4 | RIOF_ECN6)
+#define	RIOF_CLEARDSCP	0x200	/* clear diffserv codepoint */
+
+#ifdef ALTQ3_COMPAT
+/*
+ * IOCTLs for RIO
+ */
+#define	RIO_IF_ATTACH		_IOW('Q', 1, struct rio_interface)
+#define	RIO_IF_DETACH		_IOW('Q', 2, struct rio_interface)
+#define	RIO_ENABLE		_IOW('Q', 3, struct rio_interface)
+#define	RIO_DISABLE		_IOW('Q', 4, struct rio_interface)
+#define	RIO_CONFIG		_IOWR('Q', 6, struct rio_conf)
+#define	RIO_GETSTATS		_IOWR('Q', 12, struct rio_stats)
+#define	RIO_SETDEFAULTS		_IOW('Q', 30, struct redparams[RIO_NDROPPREC])
+#endif /* ALTQ3_COMPAT */
+
+#ifdef _KERNEL
+
+typedef struct rio {
+	/* per drop precedence structure */
+	struct dropprec_state {
+		/* red parameters */
+		int	inv_pmax;	/* inverse of max drop probability */
+		int	th_min;		/* red min threshold */
+		int	th_max;		/* red max threshold */
+
+		/* variables for internal use */
+		int	th_min_s;	/* th_min scaled by avgshift */
+		int	th_max_s;	/* th_max scaled by avgshift */
+		int	probd;		/* drop probability denominator */
+
+		int	qlen;		/* queue length */
+		int	avg;		/* (scaled) queue length average */
+		int	count;		/* packet count since the last dropped/
+					   marked packet */
+		int	idle;		/* queue was empty */
+		int	old;		/* avg is above th_min */
+		struct timeval	last;	/* timestamp when queue becomes idle */
+	} rio_precstate[RIO_NDROPPREC];
+
+	int		 rio_wshift;	/* log(red_weight) */
+	int		 rio_weight;	/* weight for EWMA */
+	struct wtab	*rio_wtab;	/* weight table */
+
+	int		 rio_pkttime;	/* average packet time in micro sec
+					   used for idle calibration */
+	int		 rio_flags;	/* rio flags */
+
+	u_int8_t	 rio_codepoint;	/* codepoint value to tag packets */
+	u_int8_t	 rio_codepointmask;	/* codepoint mask bits */
+
+	struct redstats q_stats[RIO_NDROPPREC];	/* statistics */
+} rio_t;
+
+#ifdef ALTQ3_COMPAT
+typedef struct rio_queue {
+	struct rio_queue	*rq_next;	/* next red_state in the list */
+	struct ifaltq		*rq_ifq;	/* backpointer to ifaltq */
+
+	class_queue_t		*rq_q;
+
+	rio_t			*rq_rio;
+} rio_queue_t;
+#endif /* ALTQ3_COMPAT */
+
+extern rio_t		*rio_alloc(int, struct redparams *, int, int);
+extern void		 rio_destroy(rio_t *);
+extern void		 rio_getstats(rio_t *, struct redstats *);
+extern int		 rio_addq(rio_t *, class_queue_t *, struct mbuf *,
+			     struct altq_pktattr *);
+extern struct mbuf	*rio_getq(rio_t *, class_queue_t *);
+
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_ALTQ_RIO_HH_ */
diff --git a/freebsd/sys/contrib/altq/altq/altq_rmclass.c b/freebsd/sys/contrib/altq/altq/altq_rmclass.c
new file mode 100644
index 00000000..06f61bd3
--- /dev/null
+++ b/freebsd/sys/contrib/altq/altq/altq_rmclass.c
@@ -0,0 +1,1843 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*	$FreeBSD$	*/
+/*	$KAME: altq_rmclass.c,v 1.18 2003/11/06 06:32:53 kjc Exp $	*/
+
+/*
+ * Copyright (c) 1991-1997 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by the Network Research
+ *      Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ *    to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * LBL code modified by speer@eng.sun.com, May 1977.
+ * For questions and/or comments, please send mail to cbq@ee.lbl.gov
+ */
+
+#ident "@(#)rm_class.c  1.48     97/12/05 SMI"
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include <freebsd/local/opt_altq.h>
+#if (__FreeBSD__ != 2)
+#include <freebsd/local/opt_inet.h>
+#ifdef __FreeBSD__
+#include <freebsd/local/opt_inet6.h>
+#endif
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+#ifdef ALTQ_CBQ	/* cbq is enabled by ALTQ_CBQ option in opt_altq.h */
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/errno.h>
+#include <freebsd/sys/time.h>
+#ifdef ALTQ3_COMPAT
+#include <freebsd/sys/kernel.h>
+#endif
+
+#include <freebsd/net/if.h>
+#ifdef ALTQ3_COMPAT
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#endif
+
+#include <freebsd/altq/altq.h>
+#include <freebsd/altq/altq_rmclass.h>
+#include <freebsd/altq/altq_rmclass_debug.h>
+#include <freebsd/altq/altq_red.h>
+#include <freebsd/altq/altq_rio.h>
+
+/*
+ * Local Macros
+ */
+
+#define	reset_cutoff(ifd)	{ ifd->cutoff_ = RM_MAXDEPTH; }
+
+/*
+ * Local routines.
+ */
+
+static int	rmc_satisfied(struct rm_class *, struct timeval *);
+static void	rmc_wrr_set_weights(struct rm_ifdat *);
+static void	rmc_depth_compute(struct rm_class *);
+static void	rmc_depth_recompute(rm_class_t *);
+
+static mbuf_t	*_rmc_wrr_dequeue_next(struct rm_ifdat *, int);
+static mbuf_t	*_rmc_prr_dequeue_next(struct rm_ifdat *, int);
+
+static int	_rmc_addq(rm_class_t *, mbuf_t *);
+static void	_rmc_dropq(rm_class_t *);
+static mbuf_t	*_rmc_getq(rm_class_t *);
+static mbuf_t	*_rmc_pollq(rm_class_t *);
+
+static int	rmc_under_limit(struct rm_class *, struct timeval *);
+static void	rmc_tl_satisfied(struct rm_ifdat *, struct timeval *);
+static void	rmc_drop_action(struct rm_class *);
+static void	rmc_restart(struct rm_class *);
+static void	rmc_root_overlimit(struct rm_class *, struct rm_class *);
+
+#define	BORROW_OFFTIME
+/*
+ * BORROW_OFFTIME (experimental):
+ * borrow the offtime of the class borrowing from.
+ * the reason is that when its own offtime is set, the class is unable
+ * to borrow much, especially when cutoff is taking effect.
+ * but when the borrowed class is overloaded (advidle is close to minidle),
+ * use the borrowing class's offtime to avoid overload.
+ */
+#define	ADJUST_CUTOFF
+/*
+ * ADJUST_CUTOFF (experimental):
+ * if no underlimit class is found due to cutoff, increase cutoff and
+ * retry the scheduling loop.
+ * also, don't invoke delay_actions while cutoff is taking effect,
+ * since a sleeping class won't have a chance to be scheduled in the
+ * next loop.
+ *
+ * now heuristics for setting the top-level variable (cutoff_) becomes:
+ *	1. if a packet arrives for a not-overlimit class, set cutoff
+ *	   to the depth of the class.
+ *	2. if cutoff is i, and a packet arrives for an overlimit class
+ *	   with an underlimit ancestor at a lower level than i (say j),
+ *	   then set cutoff to j.
+ *	3. at scheduling a packet, if there is no underlimit class
+ *	   due to the current cutoff level, increase cutoff by 1 and
+ *	   then try to schedule again.
+ */
+
+/*
+ * rm_class_t *
+ * rmc_newclass(...) - Create a new resource management class at priority
+ * 'pri' on the interface given by 'ifd'.
+ *
+ * nsecPerByte  is the data rate of the interface in nanoseconds/byte.
+ *              E.g., 800 for a 10Mb/s ethernet.  If the class gets less
+ *              than 100% of the bandwidth, this number should be the
+ *              'effective' rate for the class.  Let f be the
+ *              bandwidth fraction allocated to this class, and let
+ *              nsPerByte be the data rate of the output link in
+ *              nanoseconds/byte.  Then nsecPerByte is set to
+ *              nsPerByte / f.  E.g., 1600 (= 800 / .5)
+ *              for a class that gets 50% of an ethernet's bandwidth.
+ *
+ * action       the routine to call when the class is over limit.
+ *
+ * maxq         max allowable queue size for class (in packets).
+ *
+ * parent       parent class pointer.
+ *
+ * borrow       class to borrow from (should be either 'parent' or null).
+ *
+ * maxidle      max value allowed for class 'idle' time estimate (this
+ *              parameter determines how large an initial burst of packets
+ *              can be before overlimit action is invoked.
+ *
+ * offtime      how long 'delay' action will delay when class goes over
+ *              limit (this parameter determines the steady-state burst
+ *              size when a class is running over its limit).
+ *
+ * Maxidle and offtime have to be computed from the following:  If the
+ * average packet size is s, the bandwidth fraction allocated to this
+ * class is f, we want to allow b packet bursts, and the gain of the
+ * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then:
+ *
+ *   ptime = s * nsPerByte * (1 - f) / f
+ *   maxidle = ptime * (1 - g^b) / g^b
+ *   minidle = -ptime * (1 / (f - 1))
+ *   offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1)
+ *
+ * Operationally, it's convenient to specify maxidle & offtime in units
+ * independent of the link bandwidth so the maxidle & offtime passed to
+ * this routine are the above values multiplied by 8*f/(1000*nsPerByte).
+ * (The constant factor is a scale factor needed to make the parameters
+ * integers.  This scaling also means that the 'unscaled' values of
+ * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds,
+ * not nanoseconds.)  Also note that the 'idle' filter computation keeps
+ * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of
+ * maxidle also must be scaled upward by this value.  Thus, the passed
+ * values for maxidle and offtime can be computed as follows:
+ *
+ * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte)
+ * offtime = offtime * 8 / (1000 * nsecPerByte)
+ *
+ * When USE_HRTIME is employed, then maxidle and offtime become:
+ * 	maxidle = maxilde * (8.0 / nsecPerByte);
+ * 	offtime = offtime * (8.0 / nsecPerByte);
+ */
+struct rm_class *
+rmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte,
+    void (*action)(rm_class_t *, rm_class_t *), int maxq,
+    struct rm_class *parent, struct rm_class *borrow, u_int maxidle,
+    int minidle, u_int offtime, int pktsize, int flags)
+{
+	struct rm_class	*cl;
+	struct rm_class	*peer;
+	int		 s;
+
+	if (pri >= RM_MAXPRIO)
+		return (NULL);
+#ifndef ALTQ_RED
+	if (flags & RMCF_RED) {
+#ifdef ALTQ_DEBUG
+		printf("rmc_newclass: RED not configured for CBQ!\n");
+#endif
+		return (NULL);
+	}
+#endif
+#ifndef ALTQ_RIO
+	if (flags & RMCF_RIO) {
+#ifdef ALTQ_DEBUG
+		printf("rmc_newclass: RIO not configured for CBQ!\n");
+#endif
+		return (NULL);
+	}
+#endif
+
+	cl = malloc(sizeof(struct rm_class),
+	       M_DEVBUF, M_WAITOK);
+	if (cl == NULL)
+		return (NULL);
+	bzero(cl, sizeof(struct rm_class));
+	CALLOUT_INIT(&cl->callout_);
+	cl->q_ = malloc(sizeof(class_queue_t),
+	       M_DEVBUF, M_WAITOK);
+	if (cl->q_ == NULL) {
+		free(cl, M_DEVBUF);
+		return (NULL);
+	}
+	bzero(cl->q_, sizeof(class_queue_t));
+
+	/*
+	 * Class initialization.
+	 */
+	cl->children_ = NULL;
+	cl->parent_ = parent;
+	cl->borrow_ = borrow;
+	cl->leaf_ = 1;
+	cl->ifdat_ = ifd;
+	cl->pri_ = pri;
+	cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
+	cl->depth_ = 0;
+	cl->qthresh_ = 0;
+	cl->ns_per_byte_ = nsecPerByte;
+
+	qlimit(cl->q_) = maxq;
+	qtype(cl->q_) = Q_DROPHEAD;
+	qlen(cl->q_) = 0;
+	cl->flags_ = flags;
+
+#if 1 /* minidle is also scaled in ALTQ */
+	cl->minidle_ = (minidle * (int)nsecPerByte) / 8;
+	if (cl->minidle_ > 0)
+		cl->minidle_ = 0;
+#else
+	cl->minidle_ = minidle;
+#endif
+	cl->maxidle_ = (maxidle * nsecPerByte) / 8;
+	if (cl->maxidle_ == 0)
+		cl->maxidle_ = 1;
+#if 1 /* offtime is also scaled in ALTQ */
+	cl->avgidle_ = cl->maxidle_;
+	cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
+	if (cl->offtime_ == 0)
+		cl->offtime_ = 1;
+#else
+	cl->avgidle_ = 0;
+	cl->offtime_ = (offtime * nsecPerByte) / 8;
+#endif
+	cl->overlimit = action;
+
+#ifdef ALTQ_RED
+	if (flags & (RMCF_RED|RMCF_RIO)) {
+		int red_flags, red_pkttime;
+
+		red_flags = 0;
+		if (flags & RMCF_ECN)
+			red_flags |= REDF_ECN;
+		if (flags & RMCF_FLOWVALVE)
+			red_flags |= REDF_FLOWVALVE;
+#ifdef ALTQ_RIO
+		if (flags & RMCF_CLEARDSCP)
+			red_flags |= RIOF_CLEARDSCP;
+#endif
+		red_pkttime = nsecPerByte * pktsize  / 1000;
+
+		if (flags & RMCF_RED) {
+			cl->red_ = red_alloc(0, 0,
+			    qlimit(cl->q_) * 10/100,
+			    qlimit(cl->q_) * 30/100,
+			    red_flags, red_pkttime);
+			if (cl->red_ != NULL)
+				qtype(cl->q_) = Q_RED;
+		}
+#ifdef ALTQ_RIO
+		else {
+			cl->red_ = (red_t *)rio_alloc(0, NULL,
+						      red_flags, red_pkttime);
+			if (cl->red_ != NULL)
+				qtype(cl->q_) = Q_RIO;
+		}
+#endif
+	}
+#endif /* ALTQ_RED */
+
+	/*
+	 * put the class into the class tree
+	 */
+#ifdef __NetBSD__
+	s = splnet();
+#else
+	s = splimp();
+#endif
+	IFQ_LOCK(ifd->ifq_);
+	if ((peer = ifd->active_[pri]) != NULL) {
+		/* find the last class at this pri */
+		cl->peer_ = peer;
+		while (peer->peer_ != ifd->active_[pri])
+			peer = peer->peer_;
+		peer->peer_ = cl;
+	} else {
+		ifd->active_[pri] = cl;
+		cl->peer_ = cl;
+	}
+
+	if (cl->parent_) {
+		cl->next_ = parent->children_;
+		parent->children_ = cl;
+		parent->leaf_ = 0;
+	}
+
+	/*
+	 * Compute the depth of this class and its ancestors in the class
+	 * hierarchy.
+	 */
+	rmc_depth_compute(cl);
+
+	/*
+	 * If CBQ's WRR is enabled, then initialize the class WRR state.
+	 */
+	if (ifd->wrr_) {
+		ifd->num_[pri]++;
+		ifd->alloc_[pri] += cl->allotment_;
+		rmc_wrr_set_weights(ifd);
+	}
+	IFQ_UNLOCK(ifd->ifq_);
+	splx(s);
+	return (cl);
+}
+
+int
+rmc_modclass(struct rm_class *cl, u_int nsecPerByte, int maxq, u_int maxidle,
+    int minidle, u_int offtime, int pktsize)
+{
+	struct rm_ifdat	*ifd;
+	u_int		 old_allotment;
+	int		 s;
+
+	ifd = cl->ifdat_;
+	old_allotment = cl->allotment_;
+
+#ifdef __NetBSD__
+	s = splnet();
+#else
+	s = splimp();
+#endif
+	IFQ_LOCK(ifd->ifq_);
+	cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
+	cl->qthresh_ = 0;
+	cl->ns_per_byte_ = nsecPerByte;
+
+	qlimit(cl->q_) = maxq;
+
+#if 1 /* minidle is also scaled in ALTQ */
+	cl->minidle_ = (minidle * nsecPerByte) / 8;
+	if (cl->minidle_ > 0)
+		cl->minidle_ = 0;
+#else
+	cl->minidle_ = minidle;
+#endif
+	cl->maxidle_ = (maxidle * nsecPerByte) / 8;
+	if (cl->maxidle_ == 0)
+		cl->maxidle_ = 1;
+#if 1 /* offtime is also scaled in ALTQ */
+	cl->avgidle_ = cl->maxidle_;
+	cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
+	if (cl->offtime_ == 0)
+		cl->offtime_ = 1;
+#else
+	cl->avgidle_ = 0;
+	cl->offtime_ = (offtime * nsecPerByte) / 8;
+#endif
+
+	/*
+	 * If CBQ's WRR is enabled, then initialize the class WRR state.
+	 */
+	if (ifd->wrr_) {
+		ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment;
+		rmc_wrr_set_weights(ifd);
+	}
+	IFQ_UNLOCK(ifd->ifq_);
+	splx(s);
+	return (0);
+}
+
+/*
+ * static void
+ * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes
+ *	the appropriate run robin weights for the CBQ weighted round robin
+ *	algorithm.
+ *
+ *	Returns: NONE
+ */
+
+static void
+rmc_wrr_set_weights(struct rm_ifdat *ifd)
+{
+	int		i;
+	struct rm_class	*cl, *clh;
+
+	for (i = 0; i < RM_MAXPRIO; i++) {
+		/*
+		 * This is inverted from that of the simulator to
+		 * maintain precision.
+		 */
+		if (ifd->num_[i] == 0)
+			ifd->M_[i] = 0;
+		else
+			ifd->M_[i] = ifd->alloc_[i] /
+				(ifd->num_[i] * ifd->maxpkt_);
+		/*
+		 * Compute the weighted allotment for each class.
+		 * This takes the expensive div instruction out
+		 * of the main loop for the wrr scheduling path.
+		 * These only get recomputed when a class comes or
+		 * goes.
+		 */
+		if (ifd->active_[i] != NULL) {
+			clh = cl = ifd->active_[i];
+			do {
+				/* safe-guard for slow link or alloc_ == 0 */
+				if (ifd->M_[i] == 0)
+					cl->w_allotment_ = 0;
+				else
+					cl->w_allotment_ = cl->allotment_ /
+						ifd->M_[i];
+				cl = cl->peer_;
+			} while ((cl != NULL) && (cl != clh));
+		}
+	}
+}
+
+int
+rmc_get_weight(struct rm_ifdat *ifd, int pri)
+{
+	if ((pri >= 0) && (pri < RM_MAXPRIO))
+		return (ifd->M_[pri]);
+	else
+		return (0);
+}
+
+/*
+ * static void
+ * rmc_depth_compute(struct rm_class *cl) - This function computes the
+ *	appropriate depth of class 'cl' and its ancestors.
+ *
+ *	Returns:	NONE
+ */
+
+static void
+rmc_depth_compute(struct rm_class *cl)
+{
+	rm_class_t	*t = cl, *p;
+
+	/*
+	 * Recompute the depth for the branch of the tree.
+	 */
+	while (t != NULL) {
+		p = t->parent_;
+		if (p && (t->depth_ >= p->depth_)) {
+			p->depth_ = t->depth_ + 1;
+			t = p;
+		} else
+			t = NULL;
+	}
+}
+
+/*
+ * static void
+ * rmc_depth_recompute(struct rm_class *cl) - This function re-computes
+ *	the depth of the tree after a class has been deleted.
+ *
+ *	Returns:	NONE
+ */
+
+static void
+rmc_depth_recompute(rm_class_t *cl)
+{
+#if 1 /* ALTQ */
+	rm_class_t	*p, *t;
+
+	p = cl;
+	while (p != NULL) {
+		if ((t = p->children_) == NULL) {
+			p->depth_ = 0;
+		} else {
+			int cdepth = 0;
+
+			while (t != NULL) {
+				if (t->depth_ > cdepth)
+					cdepth = t->depth_;
+				t = t->next_;
+			}
+
+			if (p->depth_ == cdepth + 1)
+				/* no change to this parent */
+				return;
+
+			p->depth_ = cdepth + 1;
+		}
+
+		p = p->parent_;
+	}
+#else
+	rm_class_t	*t;
+
+	if (cl->depth_ >= 1) {
+		if (cl->children_ == NULL) {
+			cl->depth_ = 0;
+		} else if ((t = cl->children_) != NULL) {
+			while (t != NULL) {
+				if (t->children_ != NULL)
+					rmc_depth_recompute(t);
+				t = t->next_;
+			}
+		} else
+			rmc_depth_compute(cl);
+	}
+#endif
+}
+
+/*
+ * void
+ * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This
+ *	function deletes a class from the link-sharing structure and frees
+ *	all resources associated with the class.
+ *
+ *	Returns: NONE
+ */
+
+void
+rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl)
+{
+	struct rm_class	*p, *head, *previous;
+	int		 s;
+
+	ASSERT(cl->children_ == NULL);
+
+	if (cl->sleeping_)
+		CALLOUT_STOP(&cl->callout_);
+
+#ifdef __NetBSD__
+	s = splnet();
+#else
+	s = splimp();
+#endif
+	IFQ_LOCK(ifd->ifq_);
+	/*
+	 * Free packets in the packet queue.
+	 * XXX - this may not be a desired behavior.  Packets should be
+	 *		re-queued.
+	 */
+	rmc_dropall(cl);
+
+	/*
+	 * If the class has a parent, then remove the class from the
+	 * class from the parent's children chain.
+	 */
+	if (cl->parent_ != NULL) {
+		head = cl->parent_->children_;
+		p = previous = head;
+		if (head->next_ == NULL) {
+			ASSERT(head == cl);
+			cl->parent_->children_ = NULL;
+			cl->parent_->leaf_ = 1;
+		} else while (p != NULL) {
+			if (p == cl) {
+				if (cl == head)
+					cl->parent_->children_ = cl->next_;
+				else
+					previous->next_ = cl->next_;
+				cl->next_ = NULL;
+				p = NULL;
+			} else {
+				previous = p;
+				p = p->next_;
+			}
+		}
+	}
+
+	/*
+	 * Delete class from class priority peer list.
+	 */
+	if ((p = ifd->active_[cl->pri_]) != NULL) {
+		/*
+		 * If there is more than one member of this priority
+		 * level, then look for class(cl) in the priority level.
+		 */
+		if (p != p->peer_) {
+			while (p->peer_ != cl)
+				p = p->peer_;
+			p->peer_ = cl->peer_;
+
+			if (ifd->active_[cl->pri_] == cl)
+				ifd->active_[cl->pri_] = cl->peer_;
+		} else {
+			ASSERT(p == cl);
+			ifd->active_[cl->pri_] = NULL;
+		}
+	}
+
+	/*
+	 * Recompute the WRR weights.
+	 */
+	if (ifd->wrr_) {
+		ifd->alloc_[cl->pri_] -= cl->allotment_;
+		ifd->num_[cl->pri_]--;
+		rmc_wrr_set_weights(ifd);
+	}
+
+	/*
+	 * Re-compute the depth of the tree.
+	 */
+#if 1 /* ALTQ */
+	rmc_depth_recompute(cl->parent_);
+#else
+	rmc_depth_recompute(ifd->root_);
+#endif
+
+	IFQ_UNLOCK(ifd->ifq_);
+	splx(s);
+
+	/*
+	 * Free the class structure.
+	 */
+	if (cl->red_ != NULL) {
+#ifdef ALTQ_RIO
+		if (q_is_rio(cl->q_))
+			rio_destroy((rio_t *)cl->red_);
+#endif
+#ifdef ALTQ_RED
+		if (q_is_red(cl->q_))
+			red_destroy(cl->red_);
+#endif
+	}
+	free(cl->q_, M_DEVBUF);
+	free(cl, M_DEVBUF);
+}
+
+
+/*
+ * void
+ * rmc_init(...) - Initialize the resource management data structures
+ *	associated with the output portion of interface 'ifp'.  'ifd' is
+ *	where the structures will be built (for backwards compatibility, the
+ *	structures aren't kept in the ifnet struct).  'nsecPerByte'
+ *	gives the link speed (inverse of bandwidth) in nanoseconds/byte.
+ *	'restart' is the driver-specific routine that the generic 'delay
+ *	until under limit' action will call to restart output.  `maxq'
+ *	is the queue size of the 'link' & 'default' classes.  'maxqueued'
+ *	is the maximum number of packets that the resource management
+ *	code will allow to be queued 'downstream' (this is typically 1).
+ *
+ *	Returns:	NONE
+ */
+
+void
+rmc_init(struct ifaltq *ifq, struct rm_ifdat *ifd, u_int nsecPerByte,
+    void (*restart)(struct ifaltq *), int maxq, int maxqueued, u_int maxidle,
+    int minidle, u_int offtime, int flags)
+{
+	int		i, mtu;
+
+	/*
+	 * Initialize the CBQ tracing/debug facility.
+	 */
+	CBQTRACEINIT();
+
+	bzero((char *)ifd, sizeof (*ifd));
+	mtu = ifq->altq_ifp->if_mtu;
+	ifd->ifq_ = ifq;
+	ifd->restart = restart;
+	ifd->maxqueued_ = maxqueued;
+	ifd->ns_per_byte_ = nsecPerByte;
+	ifd->maxpkt_ = mtu;
+	ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0;
+	ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0;
+#if 1
+	ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16;
+	if (mtu * nsecPerByte > 10 * 1000000)
+		ifd->maxiftime_ /= 4;
+#endif
+
+	reset_cutoff(ifd);
+	CBQTRACE(rmc_init, 'INIT', ifd->cutoff_);
+
+	/*
+	 * Initialize the CBQ's WRR state.
+	 */
+	for (i = 0; i < RM_MAXPRIO; i++) {
+		ifd->alloc_[i] = 0;
+		ifd->M_[i] = 0;
+		ifd->num_[i] = 0;
+		ifd->na_[i] = 0;
+		ifd->active_[i] = NULL;
+	}
+
+	/*
+	 * Initialize current packet state.
+	 */
+	ifd->qi_ = 0;
+	ifd->qo_ = 0;
+	for (i = 0; i < RM_MAXQUEUED; i++) {
+		ifd->class_[i] = NULL;
+		ifd->curlen_[i] = 0;
+		ifd->borrowed_[i] = NULL;
+	}
+
+	/*
+	 * Create the root class of the link-sharing structure.
+	 */
+	if ((ifd->root_ = rmc_newclass(0, ifd,
+				       nsecPerByte,
+				       rmc_root_overlimit, maxq, 0, 0,
+				       maxidle, minidle, offtime,
+				       0, 0)) == NULL) {
+		printf("rmc_init: root class not allocated\n");
+		return ;
+	}
+	ifd->root_->depth_ = 0;
+}
+
+/*
+ * void
+ * rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by
+ *	mbuf 'm' to queue for resource class 'cl'.  This routine is called
+ *	by a driver's if_output routine.  This routine must be called with
+ *	output packet completion interrupts locked out (to avoid racing with
+ *	rmc_dequeue_next).
+ *
+ *	Returns:	0 on successful queueing
+ *			-1 when packet drop occurs
+ */
+int
+rmc_queue_packet(struct rm_class *cl, mbuf_t *m)
+{
+	struct timeval	 now;
+	struct rm_ifdat *ifd = cl->ifdat_;
+	int		 cpri = cl->pri_;
+	int		 is_empty = qempty(cl->q_);
+
+	RM_GETTIME(now);
+	if (ifd->cutoff_ > 0) {
+		if (TV_LT(&cl->undertime_, &now)) {
+			if (ifd->cutoff_ > cl->depth_)
+				ifd->cutoff_ = cl->depth_;
+			CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_);
+		}
+#if 1 /* ALTQ */
+		else {
+			/*
+			 * the class is overlimit. if the class has
+			 * underlimit ancestors, set cutoff to the lowest
+			 * depth among them.
+			 */
+			struct rm_class *borrow = cl->borrow_;
+
+			while (borrow != NULL &&
+			       borrow->depth_ < ifd->cutoff_) {
+				if (TV_LT(&borrow->undertime_, &now)) {
+					ifd->cutoff_ = borrow->depth_;
+					CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_);
+					break;
+				}
+				borrow = borrow->borrow_;
+			}
+		}
+#else /* !ALTQ */
+		else if ((ifd->cutoff_ > 1) && cl->borrow_) {
+			if (TV_LT(&cl->borrow_->undertime_, &now)) {
+				ifd->cutoff_ = cl->borrow_->depth_;
+				CBQTRACE(rmc_queue_packet, 'ffob',
+					 cl->borrow_->depth_);
+			}
+		}
+#endif /* !ALTQ */
+	}
+
+	if (_rmc_addq(cl, m) < 0)
+		/* failed */
+		return (-1);
+
+	if (is_empty) {
+		CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle);
+		ifd->na_[cpri]++;
+	}
+
+	if (qlen(cl->q_) > qlimit(cl->q_)) {
+		/* note: qlimit can be set to 0 or 1 */
+		rmc_drop_action(cl);
+		return (-1);
+	}
+	return (0);
+}
+
+/*
+ * void
+ * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all
+ *	classes to see if there are satified.
+ */
+
+static void
+rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now)
+{
+	int		 i;
+	rm_class_t	*p, *bp;
+
+	for (i = RM_MAXPRIO - 1; i >= 0; i--) {
+		if ((bp = ifd->active_[i]) != NULL) {
+			p = bp;
+			do {
+				if (!rmc_satisfied(p, now)) {
+					ifd->cutoff_ = p->depth_;
+					return;
+				}
+				p = p->peer_;
+			} while (p != bp);
+		}
+	}
+
+	reset_cutoff(ifd);
+}
+
+/*
+ * rmc_satisfied - Return 1 of the class is satisfied.  O, otherwise.
+ */
+
+static int
+rmc_satisfied(struct rm_class *cl, struct timeval *now)
+{
+	rm_class_t	*p;
+
+	if (cl == NULL)
+		return (1);
+	if (TV_LT(now, &cl->undertime_))
+		return (1);
+	if (cl->depth_ == 0) {
+		if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_))
+			return (0);
+		else
+			return (1);
+	}
+	if (cl->children_ != NULL) {
+		p = cl->children_;
+		while (p != NULL) {
+			if (!rmc_satisfied(p, now))
+				return (0);
+			p = p->next_;
+		}
+	}
+
+	return (1);
+}
+
+/*
+ * Return 1 if class 'cl' is under limit or can borrow from a parent,
+ * 0 if overlimit.  As a side-effect, this routine will invoke the
+ * class overlimit action if the class if overlimit.
+ */
+
+static int
+rmc_under_limit(struct rm_class *cl, struct timeval *now)
+{
+	rm_class_t	*p = cl;
+	rm_class_t	*top;
+	struct rm_ifdat	*ifd = cl->ifdat_;
+
+	ifd->borrowed_[ifd->qi_] = NULL;
+	/*
+	 * If cl is the root class, then always return that it is
+	 * underlimit.  Otherwise, check to see if the class is underlimit.
+	 */
+	if (cl->parent_ == NULL)
+		return (1);
+
+	if (cl->sleeping_) {
+		if (TV_LT(now, &cl->undertime_))
+			return (0);
+
+		CALLOUT_STOP(&cl->callout_);
+		cl->sleeping_ = 0;
+		cl->undertime_.tv_sec = 0;
+		return (1);
+	}
+
+	top = NULL;
+	while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) {
+		if (((cl = cl->borrow_) == NULL) ||
+		    (cl->depth_ > ifd->cutoff_)) {
+#ifdef ADJUST_CUTOFF
+			if (cl != NULL)
+				/* cutoff is taking effect, just
+				   return false without calling
+				   the delay action. */
+				return (0);
+#endif
+#ifdef BORROW_OFFTIME
+			/*
+			 * check if the class can borrow offtime too.
+			 * borrow offtime from the top of the borrow
+			 * chain if the top class is not overloaded.
+			 */
+			if (cl != NULL) {
+				/* cutoff is taking effect, use this class as top. */
+				top = cl;
+				CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_);
+			}
+			if (top != NULL && top->avgidle_ == top->minidle_)
+				top = NULL;
+			p->overtime_ = *now;
+			(p->overlimit)(p, top);
+#else
+			p->overtime_ = *now;
+			(p->overlimit)(p, NULL);
+#endif
+			return (0);
+		}
+		top = cl;
+	}
+
+	if (cl != p)
+		ifd->borrowed_[ifd->qi_] = cl;
+	return (1);
+}
+
+/*
+ * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to
+ *	Packet-by-packet round robin.
+ *
+ * The heart of the weighted round-robin scheduler, which decides which
+ * class next gets to send a packet.  Highest priority first, then
+ * weighted round-robin within priorites.
+ *
+ * Each able-to-send class gets to send until its byte allocation is
+ * exhausted.  Thus, the active pointer is only changed after a class has
+ * exhausted its allocation.
+ *
+ * If the scheduler finds no class that is underlimit or able to borrow,
+ * then the first class found that had a nonzero queue and is allowed to
+ * borrow gets to send.
+ */
+
+static mbuf_t *
+_rmc_wrr_dequeue_next(struct rm_ifdat *ifd, int op)
+{
+	struct rm_class	*cl = NULL, *first = NULL;
+	u_int		 deficit;
+	int		 cpri;
+	mbuf_t		*m;
+	struct timeval	 now;
+
+	RM_GETTIME(now);
+
+	/*
+	 * if the driver polls the top of the queue and then removes
+	 * the polled packet, we must return the same packet.
+	 */
+	if (op == ALTDQ_REMOVE && ifd->pollcache_) {
+		cl = ifd->pollcache_;
+		cpri = cl->pri_;
+		if (ifd->efficient_) {
+			/* check if this class is overlimit */
+			if (cl->undertime_.tv_sec != 0 &&
+			    rmc_under_limit(cl, &now) == 0)
+				first = cl;
+		}
+		ifd->pollcache_ = NULL;
+		goto _wrr_out;
+	}
+	else {
+		/* mode == ALTDQ_POLL || pollcache == NULL */
+		ifd->pollcache_ = NULL;
+		ifd->borrowed_[ifd->qi_] = NULL;
+	}
+#ifdef ADJUST_CUTOFF
+ _again:
+#endif
+	for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
+		if (ifd->na_[cpri] == 0)
+			continue;
+		deficit = 0;
+		/*
+		 * Loop through twice for a priority level, if some class
+		 * was unable to send a packet the first round because
+		 * of the weighted round-robin mechanism.
+		 * During the second loop at this level, deficit==2.
+		 * (This second loop is not needed if for every class,
+		 * "M[cl->pri_])" times "cl->allotment" is greater than
+		 * the byte size for the largest packet in the class.)
+		 */
+ _wrr_loop:
+		cl = ifd->active_[cpri];
+		ASSERT(cl != NULL);
+		do {
+			if ((deficit < 2) && (cl->bytes_alloc_ <= 0))
+				cl->bytes_alloc_ += cl->w_allotment_;
+			if (!qempty(cl->q_)) {
+				if ((cl->undertime_.tv_sec == 0) ||
+				    rmc_under_limit(cl, &now)) {
+					if (cl->bytes_alloc_ > 0 || deficit > 1)
+						goto _wrr_out;
+
+					/* underlimit but no alloc */
+					deficit = 1;
+#if 1
+					ifd->borrowed_[ifd->qi_] = NULL;
+#endif
+				}
+				else if (first == NULL && cl->borrow_ != NULL)
+					first = cl; /* borrowing candidate */
+			}
+
+			cl->bytes_alloc_ = 0;
+			cl = cl->peer_;
+		} while (cl != ifd->active_[cpri]);
+
+		if (deficit == 1) {
+			/* first loop found an underlimit class with deficit */
+			/* Loop on same priority level, with new deficit.  */
+			deficit = 2;
+			goto _wrr_loop;
+		}
+	}
+
+#ifdef ADJUST_CUTOFF
+	/*
+	 * no underlimit class found.  if cutoff is taking effect,
+	 * increase cutoff and try again.
+	 */
+	if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
+		ifd->cutoff_++;
+		CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_);
+		goto _again;
+	}
+#endif /* ADJUST_CUTOFF */
+	/*
+	 * If LINK_EFFICIENCY is turned on, then the first overlimit
+	 * class we encounter will send a packet if all the classes
+	 * of the link-sharing structure are overlimit.
+	 */
+	reset_cutoff(ifd);
+	CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_);
+
+	if (!ifd->efficient_ || first == NULL)
+		return (NULL);
+
+	cl = first;
+	cpri = cl->pri_;
+#if 0	/* too time-consuming for nothing */
+	if (cl->sleeping_)
+		CALLOUT_STOP(&cl->callout_);
+	cl->sleeping_ = 0;
+	cl->undertime_.tv_sec = 0;
+#endif
+	ifd->borrowed_[ifd->qi_] = cl->borrow_;
+	ifd->cutoff_ = cl->borrow_->depth_;
+
+	/*
+	 * Deque the packet and do the book keeping...
+	 */
+ _wrr_out:
+	if (op == ALTDQ_REMOVE) {
+		m = _rmc_getq(cl);
+		if (m == NULL)
+			panic("_rmc_wrr_dequeue_next");
+		if (qempty(cl->q_))
+			ifd->na_[cpri]--;
+
+		/*
+		 * Update class statistics and link data.
+		 */
+		if (cl->bytes_alloc_ > 0)
+			cl->bytes_alloc_ -= m_pktlen(m);
+
+		if ((cl->bytes_alloc_ <= 0) || first == cl)
+			ifd->active_[cl->pri_] = cl->peer_;
+		else
+			ifd->active_[cl->pri_] = cl;
+
+		ifd->class_[ifd->qi_] = cl;
+		ifd->curlen_[ifd->qi_] = m_pktlen(m);
+		ifd->now_[ifd->qi_] = now;
+		ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
+		ifd->queued_++;
+	} else {
+		/* mode == ALTDQ_PPOLL */
+		m = _rmc_pollq(cl);
+		ifd->pollcache_ = cl;
+	}
+	return (m);
+}
+
+/*
+ * Dequeue & return next packet from the highest priority class that
+ * has a packet to send & has enough allocation to send it.  This
+ * routine is called by a driver whenever it needs a new packet to
+ * output.
+ */
+static mbuf_t *
+_rmc_prr_dequeue_next(struct rm_ifdat *ifd, int op)
+{
+	mbuf_t		*m;
+	int		 cpri;
+	struct rm_class	*cl, *first = NULL;
+	struct timeval	 now;
+
+	RM_GETTIME(now);
+
+	/*
+	 * if the driver polls the top of the queue and then removes
+	 * the polled packet, we must return the same packet.
+	 */
+	if (op == ALTDQ_REMOVE && ifd->pollcache_) {
+		cl = ifd->pollcache_;
+		cpri = cl->pri_;
+		ifd->pollcache_ = NULL;
+		goto _prr_out;
+	} else {
+		/* mode == ALTDQ_POLL || pollcache == NULL */
+		ifd->pollcache_ = NULL;
+		ifd->borrowed_[ifd->qi_] = NULL;
+	}
+#ifdef ADJUST_CUTOFF
+ _again:
+#endif
+	for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
+		if (ifd->na_[cpri] == 0)
+			continue;
+		cl = ifd->active_[cpri];
+		ASSERT(cl != NULL);
+		do {
+			if (!qempty(cl->q_)) {
+				if ((cl->undertime_.tv_sec == 0) ||
+				    rmc_under_limit(cl, &now))
+					goto _prr_out;
+				if (first == NULL && cl->borrow_ != NULL)
+					first = cl;
+			}
+			cl = cl->peer_;
+		} while (cl != ifd->active_[cpri]);
+	}
+
+#ifdef ADJUST_CUTOFF
+	/*
+	 * no underlimit class found.  if cutoff is taking effect, increase
+	 * cutoff and try again.
+	 */
+	if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
+		ifd->cutoff_++;
+		goto _again;
+	}
+#endif /* ADJUST_CUTOFF */
+	/*
+	 * If LINK_EFFICIENCY is turned on, then the first overlimit
+	 * class we encounter will send a packet if all the classes
+	 * of the link-sharing structure are overlimit.
+	 */
+	reset_cutoff(ifd);
+	if (!ifd->efficient_ || first == NULL)
+		return (NULL);
+
+	cl = first;
+	cpri = cl->pri_;
+#if 0	/* too time-consuming for nothing */
+	if (cl->sleeping_)
+		CALLOUT_STOP(&cl->callout_);
+	cl->sleeping_ = 0;
+	cl->undertime_.tv_sec = 0;
+#endif
+	ifd->borrowed_[ifd->qi_] = cl->borrow_;
+	ifd->cutoff_ = cl->borrow_->depth_;
+
+	/*
+	 * Deque the packet and do the book keeping...
+	 */
+ _prr_out:
+	if (op == ALTDQ_REMOVE) {
+		m = _rmc_getq(cl);
+		if (m == NULL)
+			panic("_rmc_prr_dequeue_next");
+		if (qempty(cl->q_))
+			ifd->na_[cpri]--;
+
+		ifd->active_[cpri] = cl->peer_;
+
+		ifd->class_[ifd->qi_] = cl;
+		ifd->curlen_[ifd->qi_] = m_pktlen(m);
+		ifd->now_[ifd->qi_] = now;
+		ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
+		ifd->queued_++;
+	} else {
+		/* mode == ALTDQ_POLL */
+		m = _rmc_pollq(cl);
+		ifd->pollcache_ = cl;
+	}
+	return (m);
+}
+
+/*
+ * mbuf_t *
+ * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function
+ *	is invoked by the packet driver to get the next packet to be
+ *	dequeued and output on the link.  If WRR is enabled, then the
+ *	WRR dequeue next routine will determine the next packet to sent.
+ *	Otherwise, packet-by-packet round robin is invoked.
+ *
+ *	Returns:	NULL, if a packet is not available or if all
+ *			classes are overlimit.
+ *
+ *			Otherwise, Pointer to the next packet.
+ */
+
+mbuf_t *
+rmc_dequeue_next(struct rm_ifdat *ifd, int mode)
+{
+	if (ifd->queued_ >= ifd->maxqueued_)
+		return (NULL);
+	else if (ifd->wrr_)
+		return (_rmc_wrr_dequeue_next(ifd, mode));
+	else
+		return (_rmc_prr_dequeue_next(ifd, mode));
+}
+
+/*
+ * Update the utilization estimate for the packet that just completed.
+ * The packet's class & the parent(s) of that class all get their
+ * estimators updated.  This routine is called by the driver's output-
+ * packet-completion interrupt service routine.
+ */
+
+/*
+ * a macro to approximate "divide by 1000" that gives 0.000999,
+ * if a value has enough effective digits.
+ * (on pentium, mul takes 9 cycles but div takes 46!)
+ */
+#define	NSEC_TO_USEC(t)	(((t) >> 10) + ((t) >> 16) + ((t) >> 17))
+void
+rmc_update_class_util(struct rm_ifdat *ifd)
+{
+	int		 idle, avgidle, pktlen;
+	int		 pkt_time, tidle;
+	rm_class_t	*cl, *borrowed;
+	rm_class_t	*borrows;
+	struct timeval	*nowp;
+
+	/*
+	 * Get the most recent completed class.
+	 */
+	if ((cl = ifd->class_[ifd->qo_]) == NULL)
+		return;
+
+	pktlen = ifd->curlen_[ifd->qo_];
+	borrowed = ifd->borrowed_[ifd->qo_];
+	borrows = borrowed;
+
+	PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
+
+	/*
+	 * Run estimator on class and its ancestors.
+	 */
+	/*
+	 * rm_update_class_util is designed to be called when the
+	 * transfer is completed from a xmit complete interrupt,
+	 * but most drivers don't implement an upcall for that.
+	 * so, just use estimated completion time.
+	 * as a result, ifd->qi_ and ifd->qo_ are always synced.
+	 */
+	nowp = &ifd->now_[ifd->qo_];
+	/* get pkt_time (for link) in usec */
+#if 1  /* use approximation */
+	pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_;
+	pkt_time = NSEC_TO_USEC(pkt_time);
+#else
+	pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000;
+#endif
+#if 1 /* ALTQ4PPP */
+	if (TV_LT(nowp, &ifd->ifnow_)) {
+		int iftime;
+
+		/*
+		 * make sure the estimated completion time does not go
+		 * too far.  it can happen when the link layer supports
+		 * data compression or the interface speed is set to
+		 * a much lower value.
+		 */
+		TV_DELTA(&ifd->ifnow_, nowp, iftime);
+		if (iftime+pkt_time < ifd->maxiftime_) {
+			TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
+		} else {
+			TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_);
+		}
+	} else {
+		TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
+	}
+#else
+	if (TV_LT(nowp, &ifd->ifnow_)) {
+		TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
+	} else {
+		TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
+	}
+#endif
+
+	while (cl != NULL) {
+		TV_DELTA(&ifd->ifnow_, &cl->last_, idle);
+		if (idle >= 2000000)
+			/*
+			 * this class is idle enough, reset avgidle.
+			 * (TV_DELTA returns 2000000 us when delta is large.)
+			 */
+			cl->avgidle_ = cl->maxidle_;
+
+		/* get pkt_time (for class) in usec */
+#if 1  /* use approximation */
+		pkt_time = pktlen * cl->ns_per_byte_;
+		pkt_time = NSEC_TO_USEC(pkt_time);
+#else
+		pkt_time = pktlen * cl->ns_per_byte_ / 1000;
+#endif
+		idle -= pkt_time;
+
+		avgidle = cl->avgidle_;
+		avgidle += idle - (avgidle >> RM_FILTER_GAIN);
+		cl->avgidle_ = avgidle;
+
+		/* Are we overlimit ? */
+		if (avgidle <= 0) {
+			CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle);
+#if 1 /* ALTQ */
+			/*
+			 * need some lower bound for avgidle, otherwise
+			 * a borrowing class gets unbounded penalty.
+			 */
+			if (avgidle < cl->minidle_)
+				avgidle = cl->avgidle_ = cl->minidle_;
+#endif
+			/* set next idle to make avgidle 0 */
+			tidle = pkt_time +
+				(((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN);
+			TV_ADD_DELTA(nowp, tidle, &cl->undertime_);
+			++cl->stats_.over;
+		} else {
+			cl->avgidle_ =
+			    (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle;
+			cl->undertime_.tv_sec = 0;
+			if (cl->sleeping_) {
+				CALLOUT_STOP(&cl->callout_);
+				cl->sleeping_ = 0;
+			}
+		}
+
+		if (borrows != NULL) {
+			if (borrows != cl)
+				++cl->stats_.borrows;
+			else
+				borrows = NULL;
+		}
+		cl->last_ = ifd->ifnow_;
+		cl->last_pkttime_ = pkt_time;
+
+#if 1
+		if (cl->parent_ == NULL) {
+			/* take stats of root class */
+			PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
+		}
+#endif
+
+		cl = cl->parent_;
+	}
+
+	/*
+	 * Check to see if cutoff needs to set to a new level.
+	 */
+	cl = ifd->class_[ifd->qo_];
+	if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) {
+#if 1 /* ALTQ */
+		if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) {
+			rmc_tl_satisfied(ifd, nowp);
+			CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
+		} else {
+			ifd->cutoff_ = borrowed->depth_;
+			CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
+		}
+#else /* !ALTQ */
+		if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) {
+			reset_cutoff(ifd);
+#ifdef notdef
+			rmc_tl_satisfied(ifd, &now);
+#endif
+			CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
+		} else {
+			ifd->cutoff_ = borrowed->depth_;
+			CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
+		}
+#endif /* !ALTQ */
+	}
+
+	/*
+	 * Release class slot
+	 */
+	ifd->borrowed_[ifd->qo_] = NULL;
+	ifd->class_[ifd->qo_] = NULL;
+	ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_;
+	ifd->queued_--;
+}
+
+/*
+ * void
+ * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific)
+ *	over-limit action routines.  These get invoked by rmc_under_limit()
+ *	if a class with packets to send if over its bandwidth limit & can't
+ *	borrow from a parent class.
+ *
+ *	Returns: NONE
+ */
+
+static void
+rmc_drop_action(struct rm_class *cl)
+{
+	struct rm_ifdat	*ifd = cl->ifdat_;
+
+	ASSERT(qlen(cl->q_) > 0);
+	_rmc_dropq(cl);
+	if (qempty(cl->q_))
+		ifd->na_[cl->pri_]--;
+}
+
+void rmc_dropall(struct rm_class *cl)
+{
+	struct rm_ifdat	*ifd = cl->ifdat_;
+
+	if (!qempty(cl->q_)) {
+		_flushq(cl->q_);
+
+		ifd->na_[cl->pri_]--;
+	}
+}
+
+#if (__FreeBSD_version > 300000)
+/* hzto() is removed from FreeBSD-3.0 */
+static int hzto(struct timeval *);
+
+static int
+hzto(tv)
+	struct timeval *tv;
+{
+	struct timeval t2;
+
+	getmicrotime(&t2);
+	t2.tv_sec = tv->tv_sec - t2.tv_sec;
+	t2.tv_usec = tv->tv_usec - t2.tv_usec;
+	return (tvtohz(&t2));
+}
+#endif /* __FreeBSD_version > 300000 */
+
+/*
+ * void
+ * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ
+ *	delay action routine.  It is invoked via rmc_under_limit when the
+ *	packet is discoverd to be overlimit.
+ *
+ *	If the delay action is result of borrow class being overlimit, then
+ *	delay for the offtime of the borrowing class that is overlimit.
+ *
+ *	Returns: NONE
+ */
+
+void
+rmc_delay_action(struct rm_class *cl, struct rm_class *borrow)
+{
+	int	delay, t, extradelay;
+
+	cl->stats_.overactions++;
+	TV_DELTA(&cl->undertime_, &cl->overtime_, delay);
+#ifndef BORROW_OFFTIME
+	delay += cl->offtime_;
+#endif
+
+	if (!cl->sleeping_) {
+		CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle);
+#ifdef BORROW_OFFTIME
+		if (borrow != NULL)
+			extradelay = borrow->offtime_;
+		else
+#endif
+			extradelay = cl->offtime_;
+
+#ifdef ALTQ
+		/*
+		 * XXX recalculate suspend time:
+		 * current undertime is (tidle + pkt_time) calculated
+		 * from the last transmission.
+		 *	tidle: time required to bring avgidle back to 0
+		 *	pkt_time: target waiting time for this class
+		 * we need to replace pkt_time by offtime
+		 */
+		extradelay -= cl->last_pkttime_;
+#endif
+		if (extradelay > 0) {
+			TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_);
+			delay += extradelay;
+		}
+
+		cl->sleeping_ = 1;
+		cl->stats_.delays++;
+
+		/*
+		 * Since packets are phased randomly with respect to the
+		 * clock, 1 tick (the next clock tick) can be an arbitrarily
+		 * short time so we have to wait for at least two ticks.
+		 * NOTE:  If there's no other traffic, we need the timer as
+		 * a 'backstop' to restart this class.
+		 */
+		if (delay > tick * 2) {
+#ifdef __FreeBSD__
+			/* FreeBSD rounds up the tick */
+			t = hzto(&cl->undertime_);
+#else
+			/* other BSDs round down the tick */
+			t = hzto(&cl->undertime_) + 1;
+#endif
+		} else
+			t = 2;
+		CALLOUT_RESET(&cl->callout_, t,
+			      (timeout_t *)rmc_restart, (caddr_t)cl);
+	}
+}
+
+/*
+ * void
+ * rmc_restart() - is just a helper routine for rmc_delay_action -- it is
+ *	called by the system timer code & is responsible checking if the
+ *	class is still sleeping (it might have been restarted as a side
+ *	effect of the queue scan on a packet arrival) and, if so, restarting
+ *	output for the class.  Inspecting the class state & restarting output
+ *	require locking the class structure.  In general the driver is
+ *	responsible for locking but this is the only routine that is not
+ *	called directly or indirectly from the interface driver so it has
+ *	know about system locking conventions.  Under bsd, locking is done
+ *	by raising IPL to splimp so that's what's implemented here.  On a
+ *	different system this would probably need to be changed.
+ *
+ *	Returns:	NONE
+ */
+
+static void
+rmc_restart(struct rm_class *cl)
+{
+	struct rm_ifdat	*ifd = cl->ifdat_;
+	int		 s;
+
+#ifdef __NetBSD__
+	s = splnet();
+#else
+	s = splimp();
+#endif
+	IFQ_LOCK(ifd->ifq_);
+	if (cl->sleeping_) {
+		cl->sleeping_ = 0;
+		cl->undertime_.tv_sec = 0;
+
+		if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) {
+			CBQTRACE(rmc_restart, 'trts', cl->stats_.handle);
+			(ifd->restart)(ifd->ifq_);
+		}
+	}
+	IFQ_UNLOCK(ifd->ifq_);
+	splx(s);
+}
+
+/*
+ * void
+ * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit
+ *	handling routine for the root class of the link sharing structure.
+ *
+ *	Returns: NONE
+ */
+
+static void
+rmc_root_overlimit(struct rm_class *cl, struct rm_class *borrow)
+{
+    panic("rmc_root_overlimit");
+}
+
+/*
+ * Packet Queue handling routines.  Eventually, this is to localize the
+ *	effects on the code whether queues are red queues or droptail
+ *	queues.
+ */
+
+static int
+_rmc_addq(rm_class_t *cl, mbuf_t *m)
+{
+#ifdef ALTQ_RIO
+	if (q_is_rio(cl->q_))
+		return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_);
+#endif
+#ifdef ALTQ_RED
+	if (q_is_red(cl->q_))
+		return red_addq(cl->red_, cl->q_, m, cl->pktattr_);
+#endif /* ALTQ_RED */
+
+	if (cl->flags_ & RMCF_CLEARDSCP)
+		write_dsfield(m, cl->pktattr_, 0);
+
+	_addq(cl->q_, m);
+	return (0);
+}
+
+/* note: _rmc_dropq is not called for red */
+static void
+_rmc_dropq(rm_class_t *cl)
+{
+	mbuf_t	*m;
+
+	if ((m = _getq(cl->q_)) != NULL)
+		m_freem(m);
+}
+
+static mbuf_t *
+_rmc_getq(rm_class_t *cl)
+{
+#ifdef ALTQ_RIO
+	if (q_is_rio(cl->q_))
+		return rio_getq((rio_t *)cl->red_, cl->q_);
+#endif
+#ifdef ALTQ_RED
+	if (q_is_red(cl->q_))
+		return red_getq(cl->red_, cl->q_);
+#endif
+	return _getq(cl->q_);
+}
+
+static mbuf_t *
+_rmc_pollq(rm_class_t *cl)
+{
+	return qhead(cl->q_);
+}
+
+#ifdef CBQ_TRACE
+
+struct cbqtrace		 cbqtrace_buffer[NCBQTRACE+1];
+struct cbqtrace		*cbqtrace_ptr = NULL;
+int			 cbqtrace_count;
+
+/*
+ * DDB hook to trace cbq events:
+ *  the last 1024 events are held in a circular buffer.
+ *  use "call cbqtrace_dump(N)" to display 20 events from Nth event.
+ */
+void cbqtrace_dump(int);
+static char *rmc_funcname(void *);
+
+static struct rmc_funcs {
+	void	*func;
+	char	*name;
+} rmc_funcs[] =
+{
+	rmc_init,		"rmc_init",
+	rmc_queue_packet,	"rmc_queue_packet",
+	rmc_under_limit,	"rmc_under_limit",
+	rmc_update_class_util,	"rmc_update_class_util",
+	rmc_delay_action,	"rmc_delay_action",
+	rmc_restart,		"rmc_restart",
+	_rmc_wrr_dequeue_next,	"_rmc_wrr_dequeue_next",
+	NULL,			NULL
+};
+
+static char *rmc_funcname(void *func)
+{
+	struct rmc_funcs *fp;
+
+	for (fp = rmc_funcs; fp->func != NULL; fp++)
+		if (fp->func == func)
+			return (fp->name);
+	return ("unknown");
+}
+
+void cbqtrace_dump(int counter)
+{
+	int	 i, *p;
+	char	*cp;
+
+	counter = counter % NCBQTRACE;
+	p = (int *)&cbqtrace_buffer[counter];
+
+	for (i=0; i<20; i++) {
+		printf("[0x%x] ", *p++);
+		printf("%s: ", rmc_funcname((void *)*p++));
+		cp = (char *)p++;
+		printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]);
+		printf("%d\n",*p++);
+
+		if (p >= (int *)&cbqtrace_buffer[NCBQTRACE])
+			p = (int *)cbqtrace_buffer;
+	}
+}
+#endif /* CBQ_TRACE */
+#endif /* ALTQ_CBQ */
+
+#if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || defined(ALTQ_HFSC) || defined(ALTQ_PRIQ)
+#if !defined(__GNUC__) || defined(ALTQ_DEBUG)
+
+void
+_addq(class_queue_t *q, mbuf_t *m)
+{
+        mbuf_t	*m0;
+
+	if ((m0 = qtail(q)) != NULL)
+		m->m_nextpkt = m0->m_nextpkt;
+	else
+		m0 = m;
+	m0->m_nextpkt = m;
+	qtail(q) = m;
+	qlen(q)++;
+}
+
+mbuf_t *
+_getq(class_queue_t *q)
+{
+	mbuf_t	*m, *m0;
+
+	if ((m = qtail(q)) == NULL)
+		return (NULL);
+	if ((m0 = m->m_nextpkt) != m)
+		m->m_nextpkt = m0->m_nextpkt;
+	else {
+		ASSERT(qlen(q) == 1);
+		qtail(q) = NULL;
+	}
+	qlen(q)--;
+	m0->m_nextpkt = NULL;
+	return (m0);
+}
+
+/* drop a packet at the tail of the queue */
+mbuf_t *
+_getq_tail(class_queue_t *q)
+{
+	mbuf_t	*m, *m0, *prev;
+
+	if ((m = m0 = qtail(q)) == NULL)
+		return NULL;
+	do {
+		prev = m0;
+		m0 = m0->m_nextpkt;
+	} while (m0 != m);
+	prev->m_nextpkt = m->m_nextpkt;
+	if (prev == m)  {
+		ASSERT(qlen(q) == 1);
+		qtail(q) = NULL;
+	} else
+		qtail(q) = prev;
+	qlen(q)--;
+	m->m_nextpkt = NULL;
+	return (m);
+}
+
+/* randomly select a packet in the queue */
+mbuf_t *
+_getq_random(class_queue_t *q)
+{
+	struct mbuf	*m;
+	int		 i, n;
+
+	if ((m = qtail(q)) == NULL)
+		return NULL;
+	if (m->m_nextpkt == m) {
+		ASSERT(qlen(q) == 1);
+		qtail(q) = NULL;
+	} else {
+		struct mbuf *prev = NULL;
+
+		n = arc4random() % qlen(q) + 1;
+		for (i = 0; i < n; i++) {
+			prev = m;
+			m = m->m_nextpkt;
+		}
+		prev->m_nextpkt = m->m_nextpkt;
+		if (m == qtail(q))
+			qtail(q) = prev;
+	}
+	qlen(q)--;
+	m->m_nextpkt = NULL;
+	return (m);
+}
+
+void
+_removeq(class_queue_t *q, mbuf_t *m)
+{
+	mbuf_t	*m0, *prev;
+
+	m0 = qtail(q);
+	do {
+		prev = m0;
+		m0 = m0->m_nextpkt;
+	} while (m0 != m);
+	prev->m_nextpkt = m->m_nextpkt;
+	if (prev == m)
+		qtail(q) = NULL;
+	else if (qtail(q) == m)
+		qtail(q) = prev;
+	qlen(q)--;
+}
+
+void
+_flushq(class_queue_t *q)
+{
+	mbuf_t *m;
+
+	while ((m = _getq(q)) != NULL)
+		m_freem(m);
+	ASSERT(qlen(q) == 0);
+}
+
+#endif /* !__GNUC__ || ALTQ_DEBUG */
+#endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */
diff --git a/freebsd/sys/contrib/altq/altq/altq_rmclass.h b/freebsd/sys/contrib/altq/altq/altq_rmclass.h
new file mode 100644
index 00000000..feb30c1c
--- /dev/null
+++ b/freebsd/sys/contrib/altq/altq/altq_rmclass.h
@@ -0,0 +1,266 @@
+/*	$KAME: altq_rmclass.h,v 1.10 2003/08/20 23:30:23 itojun Exp $	*/
+
+/*
+ * Copyright (c) 1991-1997 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the Network Research
+ *	Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ *    to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ALTQ_ALTQ_RMCLASS_HH_
+#define	_ALTQ_ALTQ_RMCLASS_HH_
+
+#include <freebsd/altq/altq_classq.h>
+
+/* #pragma ident "@(#)rm_class.h  1.20     97/10/23 SMI" */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define	RM_MAXPRIO	8	/* Max priority */
+
+#ifdef _KERNEL
+
+typedef struct mbuf		mbuf_t;
+typedef struct rm_ifdat		rm_ifdat_t;
+typedef struct rm_class		rm_class_t;
+
+struct red;
+
+/*
+ * Macros for dealing with time values.  We assume all times are
+ * 'timevals'.  `microtime' is used to get the best available clock
+ * resolution.  If `microtime' *doesn't* return a value that's about
+ * ten times smaller than the average packet time on the fastest
+ * link that will use these routines, a slightly different clock
+ * scheme than this one should be used.
+ * (Bias due to truncation error in this scheme will overestimate utilization
+ * and discriminate against high bandwidth classes.  To remove this bias an
+ * integrator needs to be added.  The simplest integrator uses a history of
+ * 10 * avg.packet.time / min.tick.time packet completion entries.  This is
+ * straight forward to add but we don't want to pay the extra memory
+ * traffic to maintain it if it's not necessary (occasionally a vendor
+ * accidentally builds a workstation with a decent clock - e.g., Sun & HP).)
+ */
+
+#define	RM_GETTIME(now) microtime(&now)
+
+#define	TV_LT(a, b) (((a)->tv_sec < (b)->tv_sec) ||  \
+	(((a)->tv_usec < (b)->tv_usec) && ((a)->tv_sec <= (b)->tv_sec)))
+
+#define	TV_DELTA(a, b, delta) { \
+	register int	xxs;	\
+							\
+	delta = (a)->tv_usec - (b)->tv_usec; \
+	if ((xxs = (a)->tv_sec - (b)->tv_sec)) { \
+		switch (xxs) { \
+		default: \
+			/* if (xxs < 0) \
+				printf("rm_class: bogus time values\n"); */ \
+			delta = 0; \
+			/* fall through */ \
+		case 2: \
+			delta += 1000000; \
+			/* fall through */ \
+		case 1: \
+			delta += 1000000; \
+			break; \
+		} \
+	} \
+}
+
+#define	TV_ADD_DELTA(a, delta, res) { \
+	register int xxus = (a)->tv_usec + (delta); \
+	\
+	(res)->tv_sec = (a)->tv_sec; \
+	while (xxus >= 1000000) { \
+		++((res)->tv_sec); \
+		xxus -= 1000000; \
+	} \
+	(res)->tv_usec = xxus; \
+}
+
+#define	RM_TIMEOUT	2	/* 1 Clock tick. */
+
+#if 1
+#define	RM_MAXQUEUED	1	/* this isn't used in ALTQ/CBQ */
+#else
+#define	RM_MAXQUEUED	16	/* Max number of packets downstream of CBQ */
+#endif
+#define	RM_MAXQUEUE	64	/* Max queue length */
+#define	RM_FILTER_GAIN	5	/* log2 of gain, e.g., 5 => 31/32 */
+#define	RM_POWER	(1 << RM_FILTER_GAIN)
+#define	RM_MAXDEPTH	32
+#define	RM_NS_PER_SEC	(1000000000)
+
+typedef struct _rm_class_stats_ {
+	u_int		handle;
+	u_int		depth;
+
+	struct pktcntr	xmit_cnt;	/* packets sent in this class */
+	struct pktcntr	drop_cnt;	/* dropped packets */
+	u_int		over;		/* # times went over limit */
+	u_int		borrows;	/* # times tried to borrow */
+	u_int		overactions;	/* # times invoked overlimit action */
+	u_int		delays;		/* # times invoked delay actions */
+} rm_class_stats_t;
+
+/*
+ * CBQ Class state structure
+ */
+struct rm_class {
+	class_queue_t	*q_;		/* Queue of packets */
+	rm_ifdat_t	*ifdat_;
+	int		pri_;		/* Class priority. */
+	int		depth_;		/* Class depth */
+	u_int		ns_per_byte_;	/* NanoSeconds per byte. */
+	u_int		maxrate_;	/* Bytes per second for this class. */
+	u_int		allotment_;	/* Fraction of link bandwidth. */
+	u_int		w_allotment_;	/* Weighted allotment for WRR */
+	int		bytes_alloc_;	/* Allocation for round of WRR */
+
+	int		avgidle_;
+	int		maxidle_;
+	int		minidle_;
+	int		offtime_;
+	int		sleeping_;	/* != 0 if delaying */
+	int		qthresh_;	/* Queue threshold for formal link sharing */
+	int		leaf_;		/* Note whether leaf class or not.*/
+
+	rm_class_t	*children_;	/* Children of this class */
+	rm_class_t	*next_;		/* Next pointer, used if child */
+
+	rm_class_t	*peer_;		/* Peer class */
+	rm_class_t	*borrow_;	/* Borrow class */
+	rm_class_t	*parent_;	/* Parent class */
+
+	void	(*overlimit)(struct rm_class *, struct rm_class *);
+	void	(*drop)(struct rm_class *);       /* Class drop action. */
+
+	struct red	*red_;		/* RED state pointer */
+	struct altq_pktattr *pktattr_;	/* saved hdr used by RED/ECN */
+	int		flags_;
+
+	int		last_pkttime_;	/* saved pkt_time */
+	struct timeval	undertime_;	/* time can next send */
+	struct timeval	last_;		/* time last packet sent */
+	struct timeval	overtime_;
+	struct callout	callout_; 	/* for timeout() calls */
+
+	rm_class_stats_t stats_;	/* Class Statistics */
+};
+
+/*
+ * CBQ Interface state
+ */
+struct rm_ifdat {
+	int		queued_;	/* # pkts queued downstream */
+	int		efficient_;	/* Link Efficency bit */
+	int		wrr_;		/* Enable Weighted Round-Robin */
+	u_long		ns_per_byte_;	/* Link byte speed. */
+	int		maxqueued_;	/* Max packets to queue */
+	int		maxpkt_;	/* Max packet size. */
+	int		qi_;		/* In/out pointers for downstream */
+	int		qo_;		/* packets */
+
+	/*
+	 * Active class state and WRR state.
+	 */
+	rm_class_t	*active_[RM_MAXPRIO];	/* Active cl's in each pri */
+	int		na_[RM_MAXPRIO];	/* # of active cl's in a pri */
+	int		num_[RM_MAXPRIO];	/* # of cl's per pri */
+	int		alloc_[RM_MAXPRIO];	/* Byte Allocation */
+	u_long		M_[RM_MAXPRIO];		/* WRR weights. */
+
+	/*
+	 * Network Interface/Solaris Queue state pointer.
+	 */
+	struct ifaltq	*ifq_;
+	rm_class_t	*default_;	/* Default Pkt class, BE */
+	rm_class_t	*root_;		/* Root Link class. */
+	rm_class_t	*ctl_;		/* Control Traffic class. */
+	void		(*restart)(struct ifaltq *);	/* Restart routine. */
+
+	/*
+	 * Current packet downstream packet state and dynamic state.
+	 */
+	rm_class_t	*borrowed_[RM_MAXQUEUED]; /* Class borrowed last */
+	rm_class_t	*class_[RM_MAXQUEUED];	/* class sending */
+	int		curlen_[RM_MAXQUEUED];	/* Current pktlen */
+	struct timeval	now_[RM_MAXQUEUED];	/* Current packet time. */
+	int		is_overlimit_[RM_MAXQUEUED];/* Current packet time. */
+
+	int		cutoff_;	/* Cut-off depth for borrowing */
+
+	struct timeval	ifnow_;		/* expected xmit completion time */
+#if 1 /* ALTQ4PPP */
+	int		maxiftime_;	/* max delay inside interface */
+#endif
+        rm_class_t	*pollcache_;	/* cached rm_class by poll operation */
+};
+
+/* flags for rmc_init and rmc_newclass */
+/* class flags */
+#define	RMCF_RED		0x0001
+#define	RMCF_ECN		0x0002
+#define	RMCF_RIO		0x0004
+#define	RMCF_FLOWVALVE		0x0008	/* use flowvalve (aka penalty-box) */
+#define	RMCF_CLEARDSCP		0x0010  /* clear diffserv codepoint */
+
+/* flags for rmc_init */
+#define	RMCF_WRR		0x0100
+#define	RMCF_EFFICIENT		0x0200
+
+#define	is_a_parent_class(cl)	((cl)->children_ != NULL)
+
+extern rm_class_t *rmc_newclass(int, struct rm_ifdat *, u_int,
+				void (*)(struct rm_class *, struct rm_class *),
+				int, struct rm_class *, struct rm_class *,
+				u_int, int, u_int, int, int);
+extern void	rmc_delete_class(struct rm_ifdat *, struct rm_class *);
+extern int 	rmc_modclass(struct rm_class *, u_int, int,
+			     u_int, int, u_int, int);
+extern void	rmc_init(struct ifaltq *, struct rm_ifdat *, u_int,
+			 void (*)(struct ifaltq *),
+			 int, int, u_int, int, u_int, int);
+extern int	rmc_queue_packet(struct rm_class *, mbuf_t *);
+extern mbuf_t	*rmc_dequeue_next(struct rm_ifdat *, int);
+extern void	rmc_update_class_util(struct rm_ifdat *);
+extern void	rmc_delay_action(struct rm_class *, struct rm_class *);
+extern void	rmc_dropall(struct rm_class *);
+extern int	rmc_get_weight(struct rm_ifdat *, int);
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ALTQ_ALTQ_RMCLASS_HH_ */
diff --git a/freebsd/sys/contrib/altq/altq/altq_rmclass_debug.h b/freebsd/sys/contrib/altq/altq/altq_rmclass_debug.h
new file mode 100644
index 00000000..6723a4b7
--- /dev/null
+++ b/freebsd/sys/contrib/altq/altq/altq_rmclass_debug.h
@@ -0,0 +1,112 @@
+/*	$KAME: altq_rmclass_debug.h,v 1.3 2002/11/29 04:36:24 kjc Exp $	*/
+
+/*
+ * Copyright (c) Sun Microsystems, Inc. 1998 All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by the SMCC Technology
+ *      Development Group at Sun Microsystems, Inc.
+ *
+ * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or
+ *      promote products derived from this software without specific prior
+ *      written permission.
+ *
+ * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE
+ * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE.  The software is
+ * provided "as is" without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this software.
+ */
+
+#ifndef _ALTQ_ALTQ_RMCLASS_DEBUG_HH_
+#define	_ALTQ_ALTQ_RMCLASS_DEBUG_HH_
+
+/* #pragma ident	"@(#)rm_class_debug.h	1.7	98/05/04 SMI" */
+
+/*
+ * Cbq debugging macros
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef	CBQ_TRACE
+#ifndef NCBQTRACE
+#define	NCBQTRACE (16 * 1024)
+#endif
+
+/*
+ * To view the trace output, using adb, type:
+ *	adb -k /dev/ksyms /dev/mem <cr>, then type
+ *	cbqtrace_count/D to get the count, then type
+ *	cbqtrace_buffer,0tcount/Dp4C" "Xn
+ *	This will dump the trace buffer from 0 to count.
+ */
+/*
+ * in ALTQ, "call cbqtrace_dump(N)" from DDB to display 20 events
+ * from Nth event in the circular buffer.
+ */
+
+struct cbqtrace {
+	int count;
+	int function;		/* address of function */
+	int trace_action;	/* descriptive 4 characters */
+	int object;		/* object operated on */
+};
+
+extern struct cbqtrace cbqtrace_buffer[];
+extern struct cbqtrace *cbqtrace_ptr;
+extern int cbqtrace_count;
+
+#define	CBQTRACEINIT() {				\
+	if (cbqtrace_ptr == NULL)		\
+		cbqtrace_ptr = cbqtrace_buffer; \
+	else { \
+		cbqtrace_ptr = cbqtrace_buffer; \
+		bzero((void *)cbqtrace_ptr, sizeof(cbqtrace_buffer)); \
+		cbqtrace_count = 0; \
+	} \
+}
+
+#define	LOCK_TRACE()	splimp()
+#define	UNLOCK_TRACE(x)	splx(x)
+
+#define	CBQTRACE(func, act, obj) {		\
+	int __s = LOCK_TRACE();			\
+	int *_p = &cbqtrace_ptr->count;	\
+	*_p++ = ++cbqtrace_count;		\
+	*_p++ = (int)(func);			\
+	*_p++ = (int)(act);			\
+	*_p++ = (int)(obj);			\
+	if ((struct cbqtrace *)(void *)_p >= &cbqtrace_buffer[NCBQTRACE])\
+		cbqtrace_ptr = cbqtrace_buffer; \
+	else					\
+		cbqtrace_ptr = (struct cbqtrace *)(void *)_p; \
+	UNLOCK_TRACE(__s);			\
+	}
+#else
+
+/* If no tracing, define no-ops */
+#define	CBQTRACEINIT()
+#define	CBQTRACE(a, b, c)
+
+#endif	/* !CBQ_TRACE */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif	/* _ALTQ_ALTQ_RMCLASS_DEBUG_HH_ */
diff --git a/freebsd/sys/contrib/altq/altq/altq_subr.c b/freebsd/sys/contrib/altq/altq/altq_subr.c
new file mode 100644
index 00000000..f4bb6b47
--- /dev/null
+++ b/freebsd/sys/contrib/altq/altq/altq_subr.c
@@ -0,0 +1,2042 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*	$FreeBSD$	*/
+/*	$KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $	*/
+
+/*
+ * Copyright (C) 1997-2003
+ *	Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include <freebsd/local/opt_altq.h>
+#include <freebsd/local/opt_inet.h>
+#ifdef __FreeBSD__
+#include <freebsd/local/opt_inet6.h>
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/errno.h>
+#include <freebsd/sys/syslog.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/queue.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_dl.h>
+#include <freebsd/net/if_types.h>
+#ifdef __FreeBSD__
+#include <freebsd/net/vnet.h>
+#endif
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#ifdef INET6
+#include <freebsd/netinet/ip6.h>
+#endif
+#include <freebsd/netinet/tcp.h>
+#include <freebsd/netinet/udp.h>
+
+#include <freebsd/net/pfvar.h>
+#include <freebsd/altq/altq.h>
+#ifdef ALTQ3_COMPAT
+#include <freebsd/altq/altq_conf.h>
+#endif
+
+/* machine dependent clock related includes */
+#ifdef __FreeBSD__
+#if __FreeBSD__ < 3
+#include <freebsd/local/opt_cpu.h>	/* for FreeBSD-2.2.8 to get i586_ctr_freq */
+#endif
+#include <freebsd/sys/bus.h>
+#include <freebsd/sys/cpu.h>
+#include <freebsd/sys/eventhandler.h>
+#include <freebsd/machine/clock.h>
+#endif
+#if defined(__i386__)
+#include <freebsd/machine/cpufunc.h>		/* for pentium tsc */
+#include <freebsd/machine/specialreg.h>		/* for CPUID_TSC */
+#ifdef __FreeBSD__
+#include <freebsd/machine/md_var.h>		/* for cpu_feature */
+#elif defined(__NetBSD__) || defined(__OpenBSD__)
+#include <freebsd/machine/cpu.h>		/* for cpu_feature */
+#endif
+#endif /* __i386__ */
+
+/*
+ * internal function prototypes
+ */
+static void	tbr_timeout(void *);
+int (*altq_input)(struct mbuf *, int) = NULL;
+static struct mbuf *tbr_dequeue(struct ifaltq *, int);
+static int tbr_timer = 0;	/* token bucket regulator timer */
+#if !defined(__FreeBSD__) || (__FreeBSD_version < 600000)
+static struct callout tbr_callout = CALLOUT_INITIALIZER;
+#else
+static struct callout tbr_callout;
+#endif
+
+#ifdef ALTQ3_CLFIER_COMPAT
+static int 	extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *);
+#ifdef INET6
+static int 	extract_ports6(struct mbuf *, struct ip6_hdr *,
+			       struct flowinfo_in6 *);
+#endif
+static int	apply_filter4(u_int32_t, struct flow_filter *,
+			      struct flowinfo_in *);
+static int	apply_ppfilter4(u_int32_t, struct flow_filter *,
+				struct flowinfo_in *);
+#ifdef INET6
+static int	apply_filter6(u_int32_t, struct flow_filter6 *,
+			      struct flowinfo_in6 *);
+#endif
+static int	apply_tosfilter4(u_int32_t, struct flow_filter *,
+				 struct flowinfo_in *);
+static u_long	get_filt_handle(struct acc_classifier *, int);
+static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long);
+static u_int32_t filt2fibmask(struct flow_filter *);
+
+static void 	ip4f_cache(struct ip *, struct flowinfo_in *);
+static int 	ip4f_lookup(struct ip *, struct flowinfo_in *);
+static int 	ip4f_init(void);
+static struct ip4_frag	*ip4f_alloc(void);
+static void 	ip4f_free(struct ip4_frag *);
+#endif /* ALTQ3_CLFIER_COMPAT */
+
+/*
+ * alternate queueing support routines
+ */
+
+/* look up the queue state by the interface name and the queueing type. */
+void *
+altq_lookup(name, type)
+	char *name;
+	int type;
+{
+	struct ifnet *ifp;
+
+	if ((ifp = ifunit(name)) != NULL) {
+		/* read if_snd unlocked */
+		if (type != ALTQT_NONE && ifp->if_snd.altq_type == type)
+			return (ifp->if_snd.altq_disc);
+	}
+
+	return NULL;
+}
+
+int
+altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify)
+	struct ifaltq *ifq;
+	int type;
+	void *discipline;
+	int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+	struct mbuf *(*dequeue)(struct ifaltq *, int);
+	int (*request)(struct ifaltq *, int, void *);
+	void *clfier;
+	void *(*classify)(void *, struct mbuf *, int);
+{
+	IFQ_LOCK(ifq);
+	if (!ALTQ_IS_READY(ifq)) {
+		IFQ_UNLOCK(ifq);
+		return ENXIO;
+	}
+
+#ifdef ALTQ3_COMPAT
+	/*
+	 * pfaltq can override the existing discipline, but altq3 cannot.
+	 * check these if clfier is not NULL (which implies altq3).
+	 */
+	if (clfier != NULL) {
+		if (ALTQ_IS_ENABLED(ifq)) {
+			IFQ_UNLOCK(ifq);
+			return EBUSY;
+		}
+		if (ALTQ_IS_ATTACHED(ifq)) {
+			IFQ_UNLOCK(ifq);
+			return EEXIST;
+		}
+	}
+#endif
+	ifq->altq_type     = type;
+	ifq->altq_disc     = discipline;
+	ifq->altq_enqueue  = enqueue;
+	ifq->altq_dequeue  = dequeue;
+	ifq->altq_request  = request;
+	ifq->altq_clfier   = clfier;
+	ifq->altq_classify = classify;
+	ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED);
+#ifdef ALTQ3_COMPAT
+#ifdef ALTQ_KLD
+	altq_module_incref(type);
+#endif
+#endif
+	IFQ_UNLOCK(ifq);
+	return 0;
+}
+
+int
+altq_detach(ifq)
+	struct ifaltq *ifq;
+{
+	IFQ_LOCK(ifq);
+
+	if (!ALTQ_IS_READY(ifq)) {
+		IFQ_UNLOCK(ifq);
+		return ENXIO;
+	}
+	if (ALTQ_IS_ENABLED(ifq)) {
+		IFQ_UNLOCK(ifq);
+		return EBUSY;
+	}
+	if (!ALTQ_IS_ATTACHED(ifq)) {
+		IFQ_UNLOCK(ifq);
+		return (0);
+	}
+#ifdef ALTQ3_COMPAT
+#ifdef ALTQ_KLD
+	altq_module_declref(ifq->altq_type);
+#endif
+#endif
+
+	ifq->altq_type     = ALTQT_NONE;
+	ifq->altq_disc     = NULL;
+	ifq->altq_enqueue  = NULL;
+	ifq->altq_dequeue  = NULL;
+	ifq->altq_request  = NULL;
+	ifq->altq_clfier   = NULL;
+	ifq->altq_classify = NULL;
+	ifq->altq_flags &= ALTQF_CANTCHANGE;
+
+	IFQ_UNLOCK(ifq);
+	return 0;
+}
+
+int
+altq_enable(ifq)
+	struct ifaltq *ifq;
+{
+	int s;
+
+	IFQ_LOCK(ifq);
+
+	if (!ALTQ_IS_READY(ifq)) {
+		IFQ_UNLOCK(ifq);
+		return ENXIO;
+	}
+	if (ALTQ_IS_ENABLED(ifq)) {
+		IFQ_UNLOCK(ifq);
+		return 0;
+	}
+
+#ifdef __NetBSD__
+	s = splnet();
+#else
+	s = splimp();
+#endif
+	IFQ_PURGE_NOLOCK(ifq);
+	ASSERT(ifq->ifq_len == 0);
+	ifq->ifq_drv_maxlen = 0;		/* disable bulk dequeue */
+	ifq->altq_flags |= ALTQF_ENABLED;
+	if (ifq->altq_clfier != NULL)
+		ifq->altq_flags |= ALTQF_CLASSIFY;
+	splx(s);
+
+	IFQ_UNLOCK(ifq);
+	return 0;
+}
+
+int
+altq_disable(ifq)
+	struct ifaltq *ifq;
+{
+	int s;
+
+	IFQ_LOCK(ifq);
+	if (!ALTQ_IS_ENABLED(ifq)) {
+		IFQ_UNLOCK(ifq);
+		return 0;
+	}
+
+#ifdef __NetBSD__
+	s = splnet();
+#else
+	s = splimp();
+#endif
+	IFQ_PURGE_NOLOCK(ifq);
+	ASSERT(ifq->ifq_len == 0);
+	ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY);
+	splx(s);
+	
+	IFQ_UNLOCK(ifq);
+	return 0;
+}
+
+#ifdef ALTQ_DEBUG
+void
+altq_assert(file, line, failedexpr)
+	const char *file, *failedexpr;
+	int line;
+{
+	(void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n",
+		     failedexpr, file, line);
+	panic("altq assertion");
+	/* NOTREACHED */
+}
+#endif
+
+/*
+ * internal representation of token bucket parameters
+ *	rate:	byte_per_unittime << 32
+ *		(((bits_per_sec) / 8) << 32) / machclk_freq
+ *	depth:	byte << 32
+ *
+ */
+#define	TBR_SHIFT	32
+#define	TBR_SCALE(x)	((int64_t)(x) << TBR_SHIFT)
+#define	TBR_UNSCALE(x)	((x) >> TBR_SHIFT)
+
+static struct mbuf *
+tbr_dequeue(ifq, op)
+	struct ifaltq *ifq;
+	int op;
+{
+	struct tb_regulator *tbr;
+	struct mbuf *m;
+	int64_t interval;
+	u_int64_t now;
+
+	IFQ_LOCK_ASSERT(ifq);
+	tbr = ifq->altq_tbr;
+	if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) {
+		/* if this is a remove after poll, bypass tbr check */
+	} else {
+		/* update token only when it is negative */
+		if (tbr->tbr_token <= 0) {
+			now = read_machclk();
+			interval = now - tbr->tbr_last;
+			if (interval >= tbr->tbr_filluptime)
+				tbr->tbr_token = tbr->tbr_depth;
+			else {
+				tbr->tbr_token += interval * tbr->tbr_rate;
+				if (tbr->tbr_token > tbr->tbr_depth)
+					tbr->tbr_token = tbr->tbr_depth;
+			}
+			tbr->tbr_last = now;
+		}
+		/* if token is still negative, don't allow dequeue */
+		if (tbr->tbr_token <= 0)
+			return (NULL);
+	}
+
+	if (ALTQ_IS_ENABLED(ifq))
+		m = (*ifq->altq_dequeue)(ifq, op);
+	else {
+		if (op == ALTDQ_POLL)
+			_IF_POLL(ifq, m);
+		else
+			_IF_DEQUEUE(ifq, m);
+	}
+
+	if (m != NULL && op == ALTDQ_REMOVE)
+		tbr->tbr_token -= TBR_SCALE(m_pktlen(m));
+	tbr->tbr_lastop = op;
+	return (m);
+}
+
+/*
+ * set a token bucket regulator.
+ * if the specified rate is zero, the token bucket regulator is deleted.
+ */
+int
+tbr_set(ifq, profile)
+	struct ifaltq *ifq;
+	struct tb_profile *profile;
+{
+	struct tb_regulator *tbr, *otbr;
+	
+	if (tbr_dequeue_ptr == NULL)
+		tbr_dequeue_ptr = tbr_dequeue;
+
+	if (machclk_freq == 0)
+		init_machclk();
+	if (machclk_freq == 0) {
+		printf("tbr_set: no cpu clock available!\n");
+		return (ENXIO);
+	}
+
+	IFQ_LOCK(ifq);
+	if (profile->rate == 0) {
+		/* delete this tbr */
+		if ((tbr = ifq->altq_tbr) == NULL) {
+			IFQ_UNLOCK(ifq);
+			return (ENOENT);
+		}
+		ifq->altq_tbr = NULL;
+		free(tbr, M_DEVBUF);
+		IFQ_UNLOCK(ifq);
+		return (0);
+	}
+
+	IFQ_UNLOCK(ifq);
+	tbr = malloc(sizeof(struct tb_regulator),
+	       M_DEVBUF, M_WAITOK);
+	if (tbr == NULL) {		/* can not happen */
+		IFQ_UNLOCK(ifq);
+		return (ENOMEM);
+	}
+	bzero(tbr, sizeof(struct tb_regulator));
+
+	tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq;
+	tbr->tbr_depth = TBR_SCALE(profile->depth);
+	if (tbr->tbr_rate > 0)
+		tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
+	else
+		tbr->tbr_filluptime = 0xffffffffffffffffLL;
+	tbr->tbr_token = tbr->tbr_depth;
+	tbr->tbr_last = read_machclk();
+	tbr->tbr_lastop = ALTDQ_REMOVE;
+
+	IFQ_LOCK(ifq);
+	otbr = ifq->altq_tbr;
+	ifq->altq_tbr = tbr;	/* set the new tbr */
+
+	if (otbr != NULL)
+		free(otbr, M_DEVBUF);
+	else {
+		if (tbr_timer == 0) {
+			CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
+			tbr_timer = 1;
+		}
+	}
+	IFQ_UNLOCK(ifq);
+	return (0);
+}
+
+/*
+ * tbr_timeout goes through the interface list, and kicks the drivers
+ * if necessary.
+ *
+ * MPSAFE
+ */
+static void
+tbr_timeout(arg)
+	void *arg;
+{
+#if defined(__FreeBSD__)
+	VNET_ITERATOR_DECL(vnet_iter);
+#endif
+	struct ifnet *ifp;
+	int active, s;
+
+	active = 0;
+#ifdef __NetBSD__
+	s = splnet();
+#else
+	s = splimp();
+#endif
+#if defined(__FreeBSD__) && (__FreeBSD_version >= 500000)
+	IFNET_RLOCK_NOSLEEP();
+	VNET_LIST_RLOCK_NOSLEEP();
+	VNET_FOREACH(vnet_iter) {
+		CURVNET_SET(vnet_iter);
+#endif
+		for (ifp = TAILQ_FIRST(&V_ifnet); ifp;
+		    ifp = TAILQ_NEXT(ifp, if_list)) {
+			/* read from if_snd unlocked */
+			if (!TBR_IS_ENABLED(&ifp->if_snd))
+				continue;
+			active++;
+			if (!IFQ_IS_EMPTY(&ifp->if_snd) &&
+			    ifp->if_start != NULL)
+				(*ifp->if_start)(ifp);
+		}
+#if defined(__FreeBSD__) && (__FreeBSD_version >= 500000)
+		CURVNET_RESTORE();
+	}
+	VNET_LIST_RUNLOCK_NOSLEEP();
+	IFNET_RUNLOCK_NOSLEEP();
+#endif
+	splx(s);
+	if (active > 0)
+		CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
+	else
+		tbr_timer = 0;	/* don't need tbr_timer anymore */
+#if defined(__alpha__) && !defined(ALTQ_NOPCC)
+	{
+		/*
+		 * XXX read out the machine dependent clock once a second
+		 * to detect counter wrap-around.
+		 */
+		static u_int cnt;
+
+		if (++cnt >= hz) {
+			(void)read_machclk();
+			cnt = 0;
+		}
+	}
+#endif /* __alpha__ && !ALTQ_NOPCC */
+}
+
+/*
+ * get token bucket regulator profile
+ */
+int
+tbr_get(ifq, profile)
+	struct ifaltq *ifq;
+	struct tb_profile *profile;
+{
+	struct tb_regulator *tbr;
+
+	IFQ_LOCK(ifq);
+	if ((tbr = ifq->altq_tbr) == NULL) {
+		profile->rate = 0;
+		profile->depth = 0;
+	} else {
+		profile->rate =
+		    (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq);
+		profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth);
+	}
+	IFQ_UNLOCK(ifq);
+	return (0);
+}
+
+/*
+ * attach a discipline to the interface.  if one already exists, it is
+ * overridden.
+ * Locking is done in the discipline specific attach functions. Basically
+ * they call back to altq_attach which takes care of the attach and locking.
+ */
+int
+altq_pfattach(struct pf_altq *a)
+{
+	int error = 0;
+
+	switch (a->scheduler) {
+	case ALTQT_NONE:
+		break;
+#ifdef ALTQ_CBQ
+	case ALTQT_CBQ:
+		error = cbq_pfattach(a);
+		break;
+#endif
+#ifdef ALTQ_PRIQ
+	case ALTQT_PRIQ:
+		error = priq_pfattach(a);
+		break;
+#endif
+#ifdef ALTQ_HFSC
+	case ALTQT_HFSC:
+		error = hfsc_pfattach(a);
+		break;
+#endif
+	default:
+		error = ENXIO;
+	}
+
+	return (error);
+}
+
+/*
+ * detach a discipline from the interface.
+ * it is possible that the discipline was already overridden by another
+ * discipline.
+ */
+int
+altq_pfdetach(struct pf_altq *a)
+{
+	struct ifnet *ifp;
+	int s, error = 0;
+
+	if ((ifp = ifunit(a->ifname)) == NULL)
+		return (EINVAL);
+
+	/* if this discipline is no longer referenced, just return */
+	/* read unlocked from if_snd */
+	if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc)
+		return (0);
+
+#ifdef __NetBSD__
+	s = splnet();
+#else
+	s = splimp();
+#endif
+	/* read unlocked from if_snd, _disable and _detach take care */
+	if (ALTQ_IS_ENABLED(&ifp->if_snd))
+		error = altq_disable(&ifp->if_snd);
+	if (error == 0)
+		error = altq_detach(&ifp->if_snd);
+	splx(s);
+
+	return (error);
+}
+
+/*
+ * add a discipline or a queue
+ * Locking is done in the discipline specific functions with regards to
+ * malloc with WAITOK, also it is not yet clear which lock to use.
+ */
+int
+altq_add(struct pf_altq *a)
+{
+	int error = 0;
+
+	if (a->qname[0] != 0)
+		return (altq_add_queue(a));
+
+	if (machclk_freq == 0)
+		init_machclk();
+	if (machclk_freq == 0)
+		panic("altq_add: no cpu clock");
+
+	switch (a->scheduler) {
+#ifdef ALTQ_CBQ
+	case ALTQT_CBQ:
+		error = cbq_add_altq(a);
+		break;
+#endif
+#ifdef ALTQ_PRIQ
+	case ALTQT_PRIQ:
+		error = priq_add_altq(a);
+		break;
+#endif
+#ifdef ALTQ_HFSC
+	case ALTQT_HFSC:
+		error = hfsc_add_altq(a);
+		break;
+#endif
+	default:
+		error = ENXIO;
+	}
+
+	return (error);
+}
+
+/*
+ * remove a discipline or a queue
+ * It is yet unclear what lock to use to protect this operation, the
+ * discipline specific functions will determine and grab it
+ */
+int
+altq_remove(struct pf_altq *a)
+{
+	int error = 0;
+
+	if (a->qname[0] != 0)
+		return (altq_remove_queue(a));
+
+	switch (a->scheduler) {
+#ifdef ALTQ_CBQ
+	case ALTQT_CBQ:
+		error = cbq_remove_altq(a);
+		break;
+#endif
+#ifdef ALTQ_PRIQ
+	case ALTQT_PRIQ:
+		error = priq_remove_altq(a);
+		break;
+#endif
+#ifdef ALTQ_HFSC
+	case ALTQT_HFSC:
+		error = hfsc_remove_altq(a);
+		break;
+#endif
+	default:
+		error = ENXIO;
+	}
+
+	return (error);
+}
+
+/*
+ * add a queue to the discipline
+ * It is yet unclear what lock to use to protect this operation, the
+ * discipline specific functions will determine and grab it
+ */
+int
+altq_add_queue(struct pf_altq *a)
+{
+	int error = 0;
+
+	switch (a->scheduler) {
+#ifdef ALTQ_CBQ
+	case ALTQT_CBQ:
+		error = cbq_add_queue(a);
+		break;
+#endif
+#ifdef ALTQ_PRIQ
+	case ALTQT_PRIQ:
+		error = priq_add_queue(a);
+		break;
+#endif
+#ifdef ALTQ_HFSC
+	case ALTQT_HFSC:
+		error = hfsc_add_queue(a);
+		break;
+#endif
+	default:
+		error = ENXIO;
+	}
+
+	return (error);
+}
+
+/*
+ * remove a queue from the discipline
+ * It is yet unclear what lock to use to protect this operation, the
+ * discipline specific functions will determine and grab it
+ */
+int
+altq_remove_queue(struct pf_altq *a)
+{
+	int error = 0;
+
+	switch (a->scheduler) {
+#ifdef ALTQ_CBQ
+	case ALTQT_CBQ:
+		error = cbq_remove_queue(a);
+		break;
+#endif
+#ifdef ALTQ_PRIQ
+	case ALTQT_PRIQ:
+		error = priq_remove_queue(a);
+		break;
+#endif
+#ifdef ALTQ_HFSC
+	case ALTQT_HFSC:
+		error = hfsc_remove_queue(a);
+		break;
+#endif
+	default:
+		error = ENXIO;
+	}
+
+	return (error);
+}
+
+/*
+ * get queue statistics
+ * Locking is done in the discipline specific functions with regards to
+ * copyout operations, also it is not yet clear which lock to use.
+ */
+int
+altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+	int error = 0;
+
+	switch (a->scheduler) {
+#ifdef ALTQ_CBQ
+	case ALTQT_CBQ:
+		error = cbq_getqstats(a, ubuf, nbytes);
+		break;
+#endif
+#ifdef ALTQ_PRIQ
+	case ALTQT_PRIQ:
+		error = priq_getqstats(a, ubuf, nbytes);
+		break;
+#endif
+#ifdef ALTQ_HFSC
+	case ALTQT_HFSC:
+		error = hfsc_getqstats(a, ubuf, nbytes);
+		break;
+#endif
+	default:
+		error = ENXIO;
+	}
+
+	return (error);
+}
+
+/*
+ * read and write diffserv field in IPv4 or IPv6 header
+ */
+u_int8_t
+read_dsfield(m, pktattr)
+	struct mbuf *m;
+	struct altq_pktattr *pktattr;
+{
+	struct mbuf *m0;
+	u_int8_t ds_field = 0;
+
+	if (pktattr == NULL ||
+	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
+		return ((u_int8_t)0);
+
+	/* verify that pattr_hdr is within the mbuf data */
+	for (m0 = m; m0 != NULL; m0 = m0->m_next)
+		if ((pktattr->pattr_hdr >= m0->m_data) &&
+		    (pktattr->pattr_hdr < m0->m_data + m0->m_len))
+			break;
+	if (m0 == NULL) {
+		/* ick, pattr_hdr is stale */
+		pktattr->pattr_af = AF_UNSPEC;
+#ifdef ALTQ_DEBUG
+		printf("read_dsfield: can't locate header!\n");
+#endif
+		return ((u_int8_t)0);
+	}
+
+	if (pktattr->pattr_af == AF_INET) {
+		struct ip *ip = (struct ip *)pktattr->pattr_hdr;
+
+		if (ip->ip_v != 4)
+			return ((u_int8_t)0);	/* version mismatch! */
+		ds_field = ip->ip_tos;
+	}
+#ifdef INET6
+	else if (pktattr->pattr_af == AF_INET6) {
+		struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
+		u_int32_t flowlabel;
+
+		flowlabel = ntohl(ip6->ip6_flow);
+		if ((flowlabel >> 28) != 6)
+			return ((u_int8_t)0);	/* version mismatch! */
+		ds_field = (flowlabel >> 20) & 0xff;
+	}
+#endif
+	return (ds_field);
+}
+
+void
+write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield)
+{
+	struct mbuf *m0;
+
+	if (pktattr == NULL ||
+	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
+		return;
+
+	/* verify that pattr_hdr is within the mbuf data */
+	for (m0 = m; m0 != NULL; m0 = m0->m_next)
+		if ((pktattr->pattr_hdr >= m0->m_data) &&
+		    (pktattr->pattr_hdr < m0->m_data + m0->m_len))
+			break;
+	if (m0 == NULL) {
+		/* ick, pattr_hdr is stale */
+		pktattr->pattr_af = AF_UNSPEC;
+#ifdef ALTQ_DEBUG
+		printf("write_dsfield: can't locate header!\n");
+#endif
+		return;
+	}
+
+	if (pktattr->pattr_af == AF_INET) {
+		struct ip *ip = (struct ip *)pktattr->pattr_hdr;
+		u_int8_t old;
+		int32_t sum;
+
+		if (ip->ip_v != 4)
+			return;		/* version mismatch! */
+		old = ip->ip_tos;
+		dsfield |= old & 3;	/* leave CU bits */
+		if (old == dsfield)
+			return;
+		ip->ip_tos = dsfield;
+		/*
+		 * update checksum (from RFC1624)
+		 *	   HC' = ~(~HC + ~m + m')
+		 */
+		sum = ~ntohs(ip->ip_sum) & 0xffff;
+		sum += 0xff00 + (~old & 0xff) + dsfield;
+		sum = (sum >> 16) + (sum & 0xffff);
+		sum += (sum >> 16);  /* add carry */
+
+		ip->ip_sum = htons(~sum & 0xffff);
+	}
+#ifdef INET6
+	else if (pktattr->pattr_af == AF_INET6) {
+		struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
+		u_int32_t flowlabel;
+
+		flowlabel = ntohl(ip6->ip6_flow);
+		if ((flowlabel >> 28) != 6)
+			return;		/* version mismatch! */
+		flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20);
+		ip6->ip6_flow = htonl(flowlabel);
+	}
+#endif
+	return;
+}
+
+
+/*
+ * high resolution clock support taking advantage of a machine dependent
+ * high resolution time counter (e.g., timestamp counter of intel pentium).
+ * we assume
+ *  - 64-bit-long monotonically-increasing counter
+ *  - frequency range is 100M-4GHz (CPU speed)
+ */
+/* if pcc is not available or disabled, emulate 256MHz using microtime() */
+#define	MACHCLK_SHIFT	8
+
+int machclk_usepcc;
+u_int32_t machclk_freq;
+u_int32_t machclk_per_tick;
+
+#ifdef __alpha__
+#ifdef __FreeBSD__
+extern u_int32_t cycles_per_sec;	/* alpha cpu clock frequency */
+#elif defined(__NetBSD__) || defined(__OpenBSD__)
+extern u_int64_t cycles_per_usec;	/* alpha cpu clock frequency */
+#endif
+#endif /* __alpha__ */
+#if defined(__i386__) && defined(__NetBSD__)
+extern u_int64_t cpu_tsc_freq;
+#endif /* __alpha__ */
+
+#ifndef __rtems__
+#if (__FreeBSD_version >= 700035)
+/* Update TSC freq with the value indicated by the caller. */
+static void
+tsc_freq_changed(void *arg, const struct cf_level *level, int status)
+{
+	/* If there was an error during the transition, don't do anything. */
+	if (status != 0)
+		return;
+
+#if (__FreeBSD_version >= 701102) && (defined(__amd64__) || defined(__i386__))
+	/* If TSC is P-state invariant, don't do anything. */
+	if (tsc_is_invariant)
+		return;
+#endif
+
+	/* Total setting for this level gives the new frequency in MHz. */
+	init_machclk();
+}
+EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL,
+    EVENTHANDLER_PRI_LAST);
+#endif /* __FreeBSD_version >= 700035 */
+#endif /* __rtems__ */
+
+static void
+init_machclk_setup(void)
+{
+#if (__FreeBSD_version >= 600000)
+	callout_init(&tbr_callout, 0);
+#endif
+
+	machclk_usepcc = 1;
+
+#if (!defined(__i386__) && !defined(__alpha__)) || defined(ALTQ_NOPCC)
+	machclk_usepcc = 0;
+#endif
+#if defined(__FreeBSD__) && defined(SMP)
+	machclk_usepcc = 0;
+#endif
+#if defined(__NetBSD__) && defined(MULTIPROCESSOR)
+	machclk_usepcc = 0;
+#endif
+#ifdef __i386__
+#ifndef __rtems__
+	/* check if TSC is available */
+	if (machclk_usepcc == 1 && ((cpu_feature & CPUID_TSC) == 0 ||
+	    tsc_is_broken))
+#else   /* __rtems__ */
+	/* check if TSC is available */
+	if (machclk_usepcc == 1 && ((cpu_feature & CPUID_TSC) == 0 ||
+	    !(x86_has_tsc()) ))
+#endif /* __rtems__ */
+		machclk_usepcc = 0;
+#endif
+}
+
+void
+init_machclk(void)
+{
+	static int called;
+
+	/* Call one-time initialization function. */
+	if (!called) {
+		init_machclk_setup();
+		called = 1;
+	}
+
+	if (machclk_usepcc == 0) {
+		/* emulate 256MHz using microtime() */
+		machclk_freq = 1000000 << MACHCLK_SHIFT;
+		machclk_per_tick = machclk_freq / hz;
+#ifdef ALTQ_DEBUG
+		printf("altq: emulate %uHz cpu clock\n", machclk_freq);
+#endif
+		return;
+	}
+
+	/*
+	 * if the clock frequency (of Pentium TSC or Alpha PCC) is
+	 * accessible, just use it.
+	 */
+#ifndef __rtems__
+#ifdef __i386__
+#ifdef __FreeBSD__
+#if (__FreeBSD_version > 300000)
+	machclk_freq = tsc_freq;
+#else
+	machclk_freq = i586_ctr_freq;
+#endif
+#elif defined(__NetBSD__)
+	machclk_freq = (u_int32_t)cpu_tsc_freq;
+#elif defined(__OpenBSD__) && (defined(I586_CPU) || defined(I686_CPU))
+	machclk_freq = pentium_mhz * 1000000;
+#endif
+#elif defined(__alpha__)
+#ifdef __FreeBSD__
+	machclk_freq = cycles_per_sec;
+#elif defined(__NetBSD__) || defined(__OpenBSD__)
+	machclk_freq = (u_int32_t)(cycles_per_usec * 1000000);
+#endif
+#endif /* __alpha__ */
+#endif /* __rtems__ */
+
+	/*
+	 * if we don't know the clock frequency, measure it.
+	 */
+	if (machclk_freq == 0) {
+		static int	wait;
+		struct timeval	tv_start, tv_end;
+		u_int64_t	start, end, diff;
+		int		timo;
+
+		microtime(&tv_start);
+		start = read_machclk();
+		timo = hz;	/* 1 sec */
+		(void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo);
+		microtime(&tv_end);
+		end = read_machclk();
+		diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000
+		    + tv_end.tv_usec - tv_start.tv_usec;
+		if (diff != 0)
+			machclk_freq = (u_int)((end - start) * 1000000 / diff);
+	}
+
+	machclk_per_tick = machclk_freq / hz;
+
+#ifdef ALTQ_DEBUG
+	printf("altq: CPU clock: %uHz\n", machclk_freq);
+#endif
+}
+
+#if defined(__OpenBSD__) && defined(__i386__)
+static __inline u_int64_t
+rdtsc(void)
+{
+	u_int64_t rv;
+	__asm __volatile(".byte 0x0f, 0x31" : "=A" (rv));
+	return (rv);
+}
+#endif /* __OpenBSD__ && __i386__ */
+
+u_int64_t
+read_machclk(void)
+{
+	u_int64_t val;
+
+	if (machclk_usepcc) {
+#if defined(__i386__)
+		val = rdtsc();
+#elif defined(__alpha__)
+		static u_int32_t last_pcc, upper;
+		u_int32_t pcc;
+
+		/*
+		 * for alpha, make a 64bit counter value out of the 32bit
+		 * alpha processor cycle counter.
+		 * read_machclk must be called within a half of its
+		 * wrap-around cycle (about 5 sec for 400MHz cpu) to properly
+		 * detect a counter wrap-around.
+		 * tbr_timeout calls read_machclk once a second.
+		 */
+		pcc = (u_int32_t)alpha_rpcc();
+		if (pcc <= last_pcc)
+			upper++;
+		last_pcc = pcc;
+		val = ((u_int64_t)upper << 32) + pcc;
+#else
+		panic("read_machclk");
+#endif
+	} else {
+		struct timeval tv;
+
+		microtime(&tv);
+		val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000
+		    + tv.tv_usec) << MACHCLK_SHIFT);
+	}
+	return (val);
+}
+
+#ifdef ALTQ3_CLFIER_COMPAT
+
+#ifndef IPPROTO_ESP
+#define	IPPROTO_ESP	50		/* encapsulating security payload */
+#endif
+#ifndef IPPROTO_AH
+#define	IPPROTO_AH	51		/* authentication header */
+#endif
+
+/*
+ * extract flow information from a given packet.
+ * filt_mask shows flowinfo fields required.
+ * we assume the ip header is in one mbuf, and addresses and ports are
+ * in network byte order.
+ */
+int
+altq_extractflow(m, af, flow, filt_bmask)
+	struct mbuf *m;
+	int af;
+	struct flowinfo *flow;
+	u_int32_t	filt_bmask;
+{
+
+	switch (af) {
+	case PF_INET: {
+		struct flowinfo_in *fin;
+		struct ip *ip;
+
+		ip = mtod(m, struct ip *);
+
+		if (ip->ip_v != 4)
+			break;
+
+		fin = (struct flowinfo_in *)flow;
+		fin->fi_len = sizeof(struct flowinfo_in);
+		fin->fi_family = AF_INET;
+
+		fin->fi_proto = ip->ip_p;
+		fin->fi_tos = ip->ip_tos;
+
+		fin->fi_src.s_addr = ip->ip_src.s_addr;
+		fin->fi_dst.s_addr = ip->ip_dst.s_addr;
+
+		if (filt_bmask & FIMB4_PORTS)
+			/* if port info is required, extract port numbers */
+			extract_ports4(m, ip, fin);
+		else {
+			fin->fi_sport = 0;
+			fin->fi_dport = 0;
+			fin->fi_gpi = 0;
+		}
+		return (1);
+	}
+
+#ifdef INET6
+	case PF_INET6: {
+		struct flowinfo_in6 *fin6;
+		struct ip6_hdr *ip6;
+
+		ip6 = mtod(m, struct ip6_hdr *);
+		/* should we check the ip version? */
+
+		fin6 = (struct flowinfo_in6 *)flow;
+		fin6->fi6_len = sizeof(struct flowinfo_in6);
+		fin6->fi6_family = AF_INET6;
+
+		fin6->fi6_proto = ip6->ip6_nxt;
+		fin6->fi6_tclass   = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+
+		fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff);
+		fin6->fi6_src = ip6->ip6_src;
+		fin6->fi6_dst = ip6->ip6_dst;
+
+		if ((filt_bmask & FIMB6_PORTS) ||
+		    ((filt_bmask & FIMB6_PROTO)
+		     && ip6->ip6_nxt > IPPROTO_IPV6))
+			/*
+			 * if port info is required, or proto is required
+			 * but there are option headers, extract port
+			 * and protocol numbers.
+			 */
+			extract_ports6(m, ip6, fin6);
+		else {
+			fin6->fi6_sport = 0;
+			fin6->fi6_dport = 0;
+			fin6->fi6_gpi = 0;
+		}
+		return (1);
+	}
+#endif /* INET6 */
+
+	default:
+		break;
+	}
+
+	/* failed */
+	flow->fi_len = sizeof(struct flowinfo);
+	flow->fi_family = AF_UNSPEC;
+	return (0);
+}
+
+/*
+ * helper routine to extract port numbers
+ */
+/* structure for ipsec and ipv6 option header template */
+struct _opt6 {
+	u_int8_t	opt6_nxt;	/* next header */
+	u_int8_t	opt6_hlen;	/* header extension length */
+	u_int16_t	_pad;
+	u_int32_t	ah_spi;		/* security parameter index
+					   for authentication header */
+};
+
+/*
+ * extract port numbers from a ipv4 packet.
+ */
+static int
+extract_ports4(m, ip, fin)
+	struct mbuf *m;
+	struct ip *ip;
+	struct flowinfo_in *fin;
+{
+	struct mbuf *m0;
+	u_short ip_off;
+	u_int8_t proto;
+	int 	off;
+
+	fin->fi_sport = 0;
+	fin->fi_dport = 0;
+	fin->fi_gpi = 0;
+
+	ip_off = ntohs(ip->ip_off);
+	/* if it is a fragment, try cached fragment info */
+	if (ip_off & IP_OFFMASK) {
+		ip4f_lookup(ip, fin);
+		return (1);
+	}
+
+	/* locate the mbuf containing the protocol header */
+	for (m0 = m; m0 != NULL; m0 = m0->m_next)
+		if (((caddr_t)ip >= m0->m_data) &&
+		    ((caddr_t)ip < m0->m_data + m0->m_len))
+			break;
+	if (m0 == NULL) {
+#ifdef ALTQ_DEBUG
+		printf("extract_ports4: can't locate header! ip=%p\n", ip);
+#endif
+		return (0);
+	}
+	off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2);
+	proto = ip->ip_p;
+
+#ifdef ALTQ_IPSEC
+ again:
+#endif
+	while (off >= m0->m_len) {
+		off -= m0->m_len;
+		m0 = m0->m_next;
+		if (m0 == NULL)
+			return (0);  /* bogus ip_hl! */
+	}
+	if (m0->m_len < off + 4)
+		return (0);
+
+	switch (proto) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP: {
+		struct udphdr *udp;
+
+		udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
+		fin->fi_sport = udp->uh_sport;
+		fin->fi_dport = udp->uh_dport;
+		fin->fi_proto = proto;
+		}
+		break;
+
+#ifdef ALTQ_IPSEC
+	case IPPROTO_ESP:
+		if (fin->fi_gpi == 0){
+			u_int32_t *gpi;
+
+			gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
+			fin->fi_gpi   = *gpi;
+		}
+		fin->fi_proto = proto;
+		break;
+
+	case IPPROTO_AH: {
+			/* get next header and header length */
+			struct _opt6 *opt6;
+
+			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
+			proto = opt6->opt6_nxt;
+			off += 8 + (opt6->opt6_hlen * 4);
+			if (fin->fi_gpi == 0 && m0->m_len >= off + 8)
+				fin->fi_gpi = opt6->ah_spi;
+		}
+		/* goto the next header */
+		goto again;
+#endif  /* ALTQ_IPSEC */
+
+	default:
+		fin->fi_proto = proto;
+		return (0);
+	}
+
+	/* if this is a first fragment, cache it. */
+	if (ip_off & IP_MF)
+		ip4f_cache(ip, fin);
+
+	return (1);
+}
+
+#ifdef INET6
+static int
+extract_ports6(m, ip6, fin6)
+	struct mbuf *m;
+	struct ip6_hdr *ip6;
+	struct flowinfo_in6 *fin6;
+{
+	struct mbuf *m0;
+	int	off;
+	u_int8_t proto;
+
+	fin6->fi6_gpi   = 0;
+	fin6->fi6_sport = 0;
+	fin6->fi6_dport = 0;
+
+	/* locate the mbuf containing the protocol header */
+	for (m0 = m; m0 != NULL; m0 = m0->m_next)
+		if (((caddr_t)ip6 >= m0->m_data) &&
+		    ((caddr_t)ip6 < m0->m_data + m0->m_len))
+			break;
+	if (m0 == NULL) {
+#ifdef ALTQ_DEBUG
+		printf("extract_ports6: can't locate header! ip6=%p\n", ip6);
+#endif
+		return (0);
+	}
+	off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr);
+
+	proto = ip6->ip6_nxt;
+	do {
+		while (off >= m0->m_len) {
+			off -= m0->m_len;
+			m0 = m0->m_next;
+			if (m0 == NULL)
+				return (0);
+		}
+		if (m0->m_len < off + 4)
+			return (0);
+
+		switch (proto) {
+		case IPPROTO_TCP:
+		case IPPROTO_UDP: {
+			struct udphdr *udp;
+
+			udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
+			fin6->fi6_sport = udp->uh_sport;
+			fin6->fi6_dport = udp->uh_dport;
+			fin6->fi6_proto = proto;
+			}
+			return (1);
+
+		case IPPROTO_ESP:
+			if (fin6->fi6_gpi == 0) {
+				u_int32_t *gpi;
+
+				gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
+				fin6->fi6_gpi   = *gpi;
+			}
+			fin6->fi6_proto = proto;
+			return (1);
+
+		case IPPROTO_AH: {
+			/* get next header and header length */
+			struct _opt6 *opt6;
+
+			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
+			if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8)
+				fin6->fi6_gpi = opt6->ah_spi;
+			proto = opt6->opt6_nxt;
+			off += 8 + (opt6->opt6_hlen * 4);
+			/* goto the next header */
+			break;
+			}
+
+		case IPPROTO_HOPOPTS:
+		case IPPROTO_ROUTING:
+		case IPPROTO_DSTOPTS: {
+			/* get next header and header length */
+			struct _opt6 *opt6;
+
+			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
+			proto = opt6->opt6_nxt;
+			off += (opt6->opt6_hlen + 1) * 8;
+			/* goto the next header */
+			break;
+			}
+
+		case IPPROTO_FRAGMENT:
+			/* ipv6 fragmentations are not supported yet */
+		default:
+			fin6->fi6_proto = proto;
+			return (0);
+		}
+	} while (1);
+	/*NOTREACHED*/
+}
+#endif /* INET6 */
+
+/*
+ * altq common classifier
+ */
+int
+acc_add_filter(classifier, filter, class, phandle)
+	struct acc_classifier *classifier;
+	struct flow_filter *filter;
+	void	*class;
+	u_long	*phandle;
+{
+	struct acc_filter *afp, *prev, *tmp;
+	int	i, s;
+
+#ifdef INET6
+	if (filter->ff_flow.fi_family != AF_INET &&
+	    filter->ff_flow.fi_family != AF_INET6)
+		return (EINVAL);
+#else
+	if (filter->ff_flow.fi_family != AF_INET)
+		return (EINVAL);
+#endif
+
+	afp = malloc(sizeof(struct acc_filter),
+	       M_DEVBUF, M_WAITOK);
+	if (afp == NULL)
+		return (ENOMEM);
+	bzero(afp, sizeof(struct acc_filter));
+
+	afp->f_filter = *filter;
+	afp->f_class = class;
+
+	i = ACC_WILDCARD_INDEX;
+	if (filter->ff_flow.fi_family == AF_INET) {
+		struct flow_filter *filter4 = &afp->f_filter;
+
+		/*
+		 * if address is 0, it's a wildcard.  if address mask
+		 * isn't set, use full mask.
+		 */
+		if (filter4->ff_flow.fi_dst.s_addr == 0)
+			filter4->ff_mask.mask_dst.s_addr = 0;
+		else if (filter4->ff_mask.mask_dst.s_addr == 0)
+			filter4->ff_mask.mask_dst.s_addr = 0xffffffff;
+		if (filter4->ff_flow.fi_src.s_addr == 0)
+			filter4->ff_mask.mask_src.s_addr = 0;
+		else if (filter4->ff_mask.mask_src.s_addr == 0)
+			filter4->ff_mask.mask_src.s_addr = 0xffffffff;
+
+		/* clear extra bits in addresses  */
+		   filter4->ff_flow.fi_dst.s_addr &=
+		       filter4->ff_mask.mask_dst.s_addr;
+		   filter4->ff_flow.fi_src.s_addr &=
+		       filter4->ff_mask.mask_src.s_addr;
+
+		/*
+		 * if dst address is a wildcard, use hash-entry
+		 * ACC_WILDCARD_INDEX.
+		 */
+		if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff)
+			i = ACC_WILDCARD_INDEX;
+		else
+			i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr);
+	}
+#ifdef INET6
+	else if (filter->ff_flow.fi_family == AF_INET6) {
+		struct flow_filter6 *filter6 =
+			(struct flow_filter6 *)&afp->f_filter;
+#ifndef IN6MASK0 /* taken from kame ipv6 */
+#define	IN6MASK0	{{{ 0, 0, 0, 0 }}}
+#define	IN6MASK128	{{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}}
+		const struct in6_addr in6mask0 = IN6MASK0;
+		const struct in6_addr in6mask128 = IN6MASK128;
+#endif
+
+		if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst))
+			filter6->ff_mask6.mask6_dst = in6mask0;
+		else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst))
+			filter6->ff_mask6.mask6_dst = in6mask128;
+		if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src))
+			filter6->ff_mask6.mask6_src = in6mask0;
+		else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src))
+			filter6->ff_mask6.mask6_src = in6mask128;
+
+		/* clear extra bits in addresses  */
+		for (i = 0; i < 16; i++)
+			filter6->ff_flow6.fi6_dst.s6_addr[i] &=
+			    filter6->ff_mask6.mask6_dst.s6_addr[i];
+		for (i = 0; i < 16; i++)
+			filter6->ff_flow6.fi6_src.s6_addr[i] &=
+			    filter6->ff_mask6.mask6_src.s6_addr[i];
+
+		if (filter6->ff_flow6.fi6_flowlabel == 0)
+			i = ACC_WILDCARD_INDEX;
+		else
+			i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel);
+	}
+#endif /* INET6 */
+
+	afp->f_handle = get_filt_handle(classifier, i);
+
+	/* update filter bitmask */
+	afp->f_fbmask = filt2fibmask(filter);
+	classifier->acc_fbmask |= afp->f_fbmask;
+
+	/*
+	 * add this filter to the filter list.
+	 * filters are ordered from the highest rule number.
+	 */
+#ifdef __NetBSD__
+	s = splnet();
+#else
+	s = splimp();
+#endif
+	prev = NULL;
+	LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) {
+		if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno)
+			prev = tmp;
+		else
+			break;
+	}
+	if (prev == NULL)
+		LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain);
+	else
+		LIST_INSERT_AFTER(prev, afp, f_chain);
+	splx(s);
+
+	*phandle = afp->f_handle;
+	return (0);
+}
+
+int
+acc_delete_filter(classifier, handle)
+	struct acc_classifier *classifier;
+	u_long handle;
+{
+	struct acc_filter *afp;
+	int	s;
+
+	if ((afp = filth_to_filtp(classifier, handle)) == NULL)
+		return (EINVAL);
+
+#ifdef __NetBSD__
+	s = splnet();
+#else
+	s = splimp();
+#endif
+	LIST_REMOVE(afp, f_chain);
+	splx(s);
+
+	free(afp, M_DEVBUF);
+
+	/* todo: update filt_bmask */
+
+	return (0);
+}
+
+/*
+ * delete filters referencing to the specified class.
+ * if the all flag is not 0, delete all the filters.
+ */
+int
+acc_discard_filters(classifier, class, all)
+	struct acc_classifier *classifier;
+	void	*class;
+	int	all;
+{
+	struct acc_filter *afp;
+	int	i, s;
+
+#ifdef __NetBSD__
+	s = splnet();
+#else
+	s = splimp();
+#endif
+	for (i = 0; i < ACC_FILTER_TABLESIZE; i++) {
+		do {
+			LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
+				if (all || afp->f_class == class) {
+					LIST_REMOVE(afp, f_chain);
+					free(afp, M_DEVBUF);
+					/* start again from the head */
+					break;
+				}
+		} while (afp != NULL);
+	}
+	splx(s);
+
+	if (all)
+		classifier->acc_fbmask = 0;
+
+	return (0);
+}
+
+void *
+acc_classify(clfier, m, af)
+	void *clfier;
+	struct mbuf *m;
+	int af;
+{
+	struct acc_classifier *classifier;
+	struct flowinfo flow;
+	struct acc_filter *afp;
+	int	i;
+
+	classifier = (struct acc_classifier *)clfier;
+	altq_extractflow(m, af, &flow, classifier->acc_fbmask);
+
+	if (flow.fi_family == AF_INET) {
+		struct flowinfo_in *fp = (struct flowinfo_in *)&flow;
+
+		if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) {
+			/* only tos is used */
+			LIST_FOREACH(afp,
+				 &classifier->acc_filters[ACC_WILDCARD_INDEX],
+				 f_chain)
+				if (apply_tosfilter4(afp->f_fbmask,
+						     &afp->f_filter, fp))
+					/* filter matched */
+					return (afp->f_class);
+		} else if ((classifier->acc_fbmask &
+			(~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL))
+		    == 0) {
+			/* only proto and ports are used */
+			LIST_FOREACH(afp,
+				 &classifier->acc_filters[ACC_WILDCARD_INDEX],
+				 f_chain)
+				if (apply_ppfilter4(afp->f_fbmask,
+						    &afp->f_filter, fp))
+					/* filter matched */
+					return (afp->f_class);
+		} else {
+			/* get the filter hash entry from its dest address */
+			i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr);
+			do {
+				/*
+				 * go through this loop twice.  first for dst
+				 * hash, second for wildcards.
+				 */
+				LIST_FOREACH(afp, &classifier->acc_filters[i],
+					     f_chain)
+					if (apply_filter4(afp->f_fbmask,
+							  &afp->f_filter, fp))
+						/* filter matched */
+						return (afp->f_class);
+
+				/*
+				 * check again for filters with a dst addr
+				 * wildcard.
+				 * (daddr == 0 || dmask != 0xffffffff).
+				 */
+				if (i != ACC_WILDCARD_INDEX)
+					i = ACC_WILDCARD_INDEX;
+				else
+					break;
+			} while (1);
+		}
+	}
+#ifdef INET6
+	else if (flow.fi_family == AF_INET6) {
+		struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow;
+
+		/* get the filter hash entry from its flow ID */
+		if (fp6->fi6_flowlabel != 0)
+			i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel);
+		else
+			/* flowlable can be zero */
+			i = ACC_WILDCARD_INDEX;
+
+		/* go through this loop twice.  first for flow hash, second
+		   for wildcards. */
+		do {
+			LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
+				if (apply_filter6(afp->f_fbmask,
+					(struct flow_filter6 *)&afp->f_filter,
+					fp6))
+					/* filter matched */
+					return (afp->f_class);
+
+			/*
+			 * check again for filters with a wildcard.
+			 */
+			if (i != ACC_WILDCARD_INDEX)
+				i = ACC_WILDCARD_INDEX;
+			else
+				break;
+		} while (1);
+	}
+#endif /* INET6 */
+
+	/* no filter matched */
+	return (NULL);
+}
+
+static int
+apply_filter4(fbmask, filt, pkt)
+	u_int32_t	fbmask;
+	struct flow_filter *filt;
+	struct flowinfo_in *pkt;
+{
+	if (filt->ff_flow.fi_family != AF_INET)
+		return (0);
+	if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
+		return (0);
+	if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
+		return (0);
+	if ((fbmask & FIMB4_DADDR) &&
+	    filt->ff_flow.fi_dst.s_addr !=
+	    (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr))
+		return (0);
+	if ((fbmask & FIMB4_SADDR) &&
+	    filt->ff_flow.fi_src.s_addr !=
+	    (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr))
+		return (0);
+	if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
+		return (0);
+	if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
+	    (pkt->fi_tos & filt->ff_mask.mask_tos))
+		return (0);
+	if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi))
+		return (0);
+	/* match */
+	return (1);
+}
+
+/*
+ * filter matching function optimized for a common case that checks
+ * only protocol and port numbers
+ */
+static int
+apply_ppfilter4(fbmask, filt, pkt)
+	u_int32_t	fbmask;
+	struct flow_filter *filt;
+	struct flowinfo_in *pkt;
+{
+	if (filt->ff_flow.fi_family != AF_INET)
+		return (0);
+	if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
+		return (0);
+	if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
+		return (0);
+	if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
+		return (0);
+	/* match */
+	return (1);
+}
+
+/*
+ * filter matching function only for tos field.
+ */
+static int
+apply_tosfilter4(fbmask, filt, pkt)
+	u_int32_t	fbmask;
+	struct flow_filter *filt;
+	struct flowinfo_in *pkt;
+{
+	if (filt->ff_flow.fi_family != AF_INET)
+		return (0);
+	if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
+	    (pkt->fi_tos & filt->ff_mask.mask_tos))
+		return (0);
+	/* match */
+	return (1);
+}
+
+#ifdef INET6
+static int
+apply_filter6(fbmask, filt, pkt)
+	u_int32_t	fbmask;
+	struct flow_filter6 *filt;
+	struct flowinfo_in6 *pkt;
+{
+	int i;
+
+	if (filt->ff_flow6.fi6_family != AF_INET6)
+		return (0);
+	if ((fbmask & FIMB6_FLABEL) &&
+	    filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel)
+		return (0);
+	if ((fbmask & FIMB6_PROTO) &&
+	    filt->ff_flow6.fi6_proto != pkt->fi6_proto)
+		return (0);
+	if ((fbmask & FIMB6_SPORT) &&
+	    filt->ff_flow6.fi6_sport != pkt->fi6_sport)
+		return (0);
+	if ((fbmask & FIMB6_DPORT) &&
+	    filt->ff_flow6.fi6_dport != pkt->fi6_dport)
+		return (0);
+	if (fbmask & FIMB6_SADDR) {
+		for (i = 0; i < 4; i++)
+			if (filt->ff_flow6.fi6_src.s6_addr32[i] !=
+			    (pkt->fi6_src.s6_addr32[i] &
+			     filt->ff_mask6.mask6_src.s6_addr32[i]))
+				return (0);
+	}
+	if (fbmask & FIMB6_DADDR) {
+		for (i = 0; i < 4; i++)
+			if (filt->ff_flow6.fi6_dst.s6_addr32[i] !=
+			    (pkt->fi6_dst.s6_addr32[i] &
+			     filt->ff_mask6.mask6_dst.s6_addr32[i]))
+				return (0);
+	}
+	if ((fbmask & FIMB6_TCLASS) &&
+	    filt->ff_flow6.fi6_tclass !=
+	    (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass))
+		return (0);
+	if ((fbmask & FIMB6_GPI) &&
+	    filt->ff_flow6.fi6_gpi != pkt->fi6_gpi)
+		return (0);
+	/* match */
+	return (1);
+}
+#endif /* INET6 */
+
+/*
+ *  filter handle:
+ *	bit 20-28: index to the filter hash table
+ *	bit  0-19: unique id in the hash bucket.
+ */
+static u_long
+get_filt_handle(classifier, i)
+	struct acc_classifier *classifier;
+	int	i;
+{
+	static u_long handle_number = 1;
+	u_long 	handle;
+	struct acc_filter *afp;
+
+	while (1) {
+		handle = handle_number++ & 0x000fffff;
+
+		if (LIST_EMPTY(&classifier->acc_filters[i]))
+			break;
+
+		LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
+			if ((afp->f_handle & 0x000fffff) == handle)
+				break;
+		if (afp == NULL)
+			break;
+		/* this handle is already used, try again */
+	}
+
+	return ((i << 20) | handle);
+}
+
+/* convert filter handle to filter pointer */
+static struct acc_filter *
+filth_to_filtp(classifier, handle)
+	struct acc_classifier *classifier;
+	u_long handle;
+{
+	struct acc_filter *afp;
+	int	i;
+
+	i = ACC_GET_HINDEX(handle);
+
+	LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
+		if (afp->f_handle == handle)
+			return (afp);
+
+	return (NULL);
+}
+
+/* create flowinfo bitmask */
+static u_int32_t
+filt2fibmask(filt)
+	struct flow_filter *filt;
+{
+	u_int32_t mask = 0;
+#ifdef INET6
+	struct flow_filter6 *filt6;
+#endif
+
+	switch (filt->ff_flow.fi_family) {
+	case AF_INET:
+		if (filt->ff_flow.fi_proto != 0)
+			mask |= FIMB4_PROTO;
+		if (filt->ff_flow.fi_tos != 0)
+			mask |= FIMB4_TOS;
+		if (filt->ff_flow.fi_dst.s_addr != 0)
+			mask |= FIMB4_DADDR;
+		if (filt->ff_flow.fi_src.s_addr != 0)
+			mask |= FIMB4_SADDR;
+		if (filt->ff_flow.fi_sport != 0)
+			mask |= FIMB4_SPORT;
+		if (filt->ff_flow.fi_dport != 0)
+			mask |= FIMB4_DPORT;
+		if (filt->ff_flow.fi_gpi != 0)
+			mask |= FIMB4_GPI;
+		break;
+#ifdef INET6
+	case AF_INET6:
+		filt6 = (struct flow_filter6 *)filt;
+
+		if (filt6->ff_flow6.fi6_proto != 0)
+			mask |= FIMB6_PROTO;
+		if (filt6->ff_flow6.fi6_tclass != 0)
+			mask |= FIMB6_TCLASS;
+		if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst))
+			mask |= FIMB6_DADDR;
+		if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src))
+			mask |= FIMB6_SADDR;
+		if (filt6->ff_flow6.fi6_sport != 0)
+			mask |= FIMB6_SPORT;
+		if (filt6->ff_flow6.fi6_dport != 0)
+			mask |= FIMB6_DPORT;
+		if (filt6->ff_flow6.fi6_gpi != 0)
+			mask |= FIMB6_GPI;
+		if (filt6->ff_flow6.fi6_flowlabel != 0)
+			mask |= FIMB6_FLABEL;
+		break;
+#endif /* INET6 */
+	}
+	return (mask);
+}
+
+
+/*
+ * helper functions to handle IPv4 fragments.
+ * currently only in-sequence fragments are handled.
+ *	- fragment info is cached in a LRU list.
+ *	- when a first fragment is found, cache its flow info.
+ *	- when a non-first fragment is found, lookup the cache.
+ */
+
+struct ip4_frag {
+    TAILQ_ENTRY(ip4_frag) ip4f_chain;
+    char    ip4f_valid;
+    u_short ip4f_id;
+    struct flowinfo_in ip4f_info;
+};
+
+static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */
+
+#define	IP4F_TABSIZE		16	/* IPv4 fragment cache size */
+
+
+static void
+ip4f_cache(ip, fin)
+	struct ip *ip;
+	struct flowinfo_in *fin;
+{
+	struct ip4_frag *fp;
+
+	if (TAILQ_EMPTY(&ip4f_list)) {
+		/* first time call, allocate fragment cache entries. */
+		if (ip4f_init() < 0)
+			/* allocation failed! */
+			return;
+	}
+
+	fp = ip4f_alloc();
+	fp->ip4f_id = ip->ip_id;
+	fp->ip4f_info.fi_proto = ip->ip_p;
+	fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr;
+	fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr;
+
+	/* save port numbers */
+	fp->ip4f_info.fi_sport = fin->fi_sport;
+	fp->ip4f_info.fi_dport = fin->fi_dport;
+	fp->ip4f_info.fi_gpi   = fin->fi_gpi;
+}
+
+static int
+ip4f_lookup(ip, fin)
+	struct ip *ip;
+	struct flowinfo_in *fin;
+{
+	struct ip4_frag *fp;
+
+	for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid;
+	     fp = TAILQ_NEXT(fp, ip4f_chain))
+		if (ip->ip_id == fp->ip4f_id &&
+		    ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr &&
+		    ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr &&
+		    ip->ip_p == fp->ip4f_info.fi_proto) {
+
+			/* found the matching entry */
+			fin->fi_sport = fp->ip4f_info.fi_sport;
+			fin->fi_dport = fp->ip4f_info.fi_dport;
+			fin->fi_gpi   = fp->ip4f_info.fi_gpi;
+
+			if ((ntohs(ip->ip_off) & IP_MF) == 0)
+				/* this is the last fragment,
+				   release the entry. */
+				ip4f_free(fp);
+
+			return (1);
+		}
+
+	/* no matching entry found */
+	return (0);
+}
+
+static int
+ip4f_init(void)
+{
+	struct ip4_frag *fp;
+	int i;
+
+	TAILQ_INIT(&ip4f_list);
+	for (i=0; i<IP4F_TABSIZE; i++) {
+		fp = malloc(sizeof(struct ip4_frag),
+		       M_DEVBUF, M_NOWAIT);
+		if (fp == NULL) {
+			printf("ip4f_init: can't alloc %dth entry!\n", i);
+			if (i == 0)
+				return (-1);
+			return (0);
+		}
+		fp->ip4f_valid = 0;
+		TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
+	}
+	return (0);
+}
+
+static struct ip4_frag *
+ip4f_alloc(void)
+{
+	struct ip4_frag *fp;
+
+	/* reclaim an entry at the tail, put it at the head */
+	fp = TAILQ_LAST(&ip4f_list, ip4f_list);
+	TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
+	fp->ip4f_valid = 1;
+	TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain);
+	return (fp);
+}
+
+static void
+ip4f_free(fp)
+	struct ip4_frag *fp;
+{
+	TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
+	fp->ip4f_valid = 0;
+	TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
+}
+
+#endif /* ALTQ3_CLFIER_COMPAT */
diff --git a/freebsd/sys/contrib/altq/altq/altq_var.h b/freebsd/sys/contrib/altq/altq/altq_var.h
new file mode 100644
index 00000000..9ebfe2ed
--- /dev/null
+++ b/freebsd/sys/contrib/altq/altq/altq_var.h
@@ -0,0 +1,265 @@
+/*	$FreeBSD$	*/
+/*	$KAME: altq_var.h,v 1.16 2003/10/03 05:05:15 kjc Exp $	*/
+
+/*
+ * Copyright (C) 1998-2003
+ *	Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef _ALTQ_ALTQ_VAR_HH_
+#define	_ALTQ_ALTQ_VAR_HH_
+
+#ifdef _KERNEL
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/queue.h>
+
+#ifdef ALTQ3_CLFIER_COMPAT
+/*
+ * filter structure for altq common classifier
+ */
+struct acc_filter {
+	LIST_ENTRY(acc_filter)	f_chain;
+	void			*f_class;	/* pointer to the class */
+	u_long			f_handle;	/* filter id */
+	u_int32_t		f_fbmask;	/* filter bitmask */
+	struct flow_filter	f_filter;	/* filter value */
+};
+
+/*
+ * XXX ACC_FILTER_TABLESIZE can't be larger than 2048 unless we fix
+ * the handle assignment.
+ */
+#define	ACC_FILTER_TABLESIZE	(256+1)
+#define	ACC_FILTER_MASK		(ACC_FILTER_TABLESIZE - 2)
+#define	ACC_WILDCARD_INDEX	(ACC_FILTER_TABLESIZE - 1)
+#ifdef __GNUC__
+#define	ACC_GET_HASH_INDEX(addr) \
+	({int x = (addr) + ((addr) >> 16); (x + (x >> 8)) & ACC_FILTER_MASK;})
+#else
+#define	ACC_GET_HASH_INDEX(addr) \
+	(((addr) + ((addr) >> 8) + ((addr) >> 16) + ((addr) >> 24)) \
+	& ACC_FILTER_MASK)
+#endif
+#define	ACC_GET_HINDEX(handle) ((handle) >> 20)
+
+#if (__FreeBSD_version > 500000)
+#define ACC_LOCK_INIT(ac)	mtx_init(&(ac)->acc_mtx, "classifier", MTX_DEF)
+#define ACC_LOCK_DESTROY(ac)	mtx_destroy(&(ac)->acc_mtx)
+#define ACC_LOCK(ac)		mtx_lock(&(ac)->acc_mtx)
+#define ACC_UNLOCK(ac)		mtx_unlock(&(ac)->acc_mtx)
+#else
+#define ACC_LOCK_INIT(ac)
+#define ACC_LOCK_DESTROY(ac)
+#define ACC_LOCK(ac)
+#define ACC_UNLOCK(ac)
+#endif
+
+struct acc_classifier {
+	u_int32_t			acc_fbmask;
+	LIST_HEAD(filt, acc_filter)	acc_filters[ACC_FILTER_TABLESIZE];
+
+#if (__FreeBSD_version > 500000)
+	struct	mtx acc_mtx;
+#endif
+};
+
+/*
+ * flowinfo mask bits used by classifier
+ */
+/* for ipv4 */
+#define	FIMB4_PROTO	0x0001
+#define	FIMB4_TOS	0x0002
+#define	FIMB4_DADDR	0x0004
+#define	FIMB4_SADDR	0x0008
+#define	FIMB4_DPORT	0x0010
+#define	FIMB4_SPORT	0x0020
+#define	FIMB4_GPI	0x0040
+#define	FIMB4_ALL	0x007f
+/* for ipv6 */
+#define	FIMB6_PROTO	0x0100
+#define	FIMB6_TCLASS	0x0200
+#define	FIMB6_DADDR	0x0400
+#define	FIMB6_SADDR	0x0800
+#define	FIMB6_DPORT	0x1000
+#define	FIMB6_SPORT	0x2000
+#define	FIMB6_GPI	0x4000
+#define	FIMB6_FLABEL	0x8000
+#define	FIMB6_ALL	0xff00
+
+#define	FIMB_ALL	(FIMB4_ALL|FIMB6_ALL)
+
+#define	FIMB4_PORTS	(FIMB4_DPORT|FIMB4_SPORT|FIMB4_GPI)
+#define	FIMB6_PORTS	(FIMB6_DPORT|FIMB6_SPORT|FIMB6_GPI)
+#endif /* ALTQ3_CLFIER_COMPAT */
+
+/*
+ * machine dependent clock
+ * a 64bit high resolution time counter.
+ */
+extern int machclk_usepcc;
+extern u_int32_t machclk_freq;
+extern u_int32_t machclk_per_tick;
+extern void init_machclk(void);
+extern u_int64_t read_machclk(void);
+
+/*
+ * debug support
+ */
+#ifdef ALTQ_DEBUG
+#ifdef __STDC__
+#define	ASSERT(e)	((e) ? (void)0 : altq_assert(__FILE__, __LINE__, #e))
+#else	/* PCC */
+#define	ASSERT(e)	((e) ? (void)0 : altq_assert(__FILE__, __LINE__, "e"))
+#endif
+#else
+#define	ASSERT(e)	((void)0)
+#endif
+
+/*
+ * misc stuff for compatibility
+ */
+/* ioctl cmd type */
+#if defined(__FreeBSD__) && (__FreeBSD__ < 3)
+typedef int ioctlcmd_t;
+#else
+typedef u_long ioctlcmd_t;
+#endif
+
+/*
+ * queue macros:
+ * the interface of TAILQ_LAST macro changed after the introduction
+ * of softupdate. redefine it here to make it work with pre-2.2.7.
+ */
+#undef TAILQ_LAST
+#define	TAILQ_LAST(head, headname) \
+	(*(((struct headname *)((head)->tqh_last))->tqh_last))
+
+#ifndef TAILQ_EMPTY
+#define	TAILQ_EMPTY(head) ((head)->tqh_first == NULL)
+#endif
+#ifndef TAILQ_FOREACH
+#define TAILQ_FOREACH(var, head, field)					\
+	for (var = TAILQ_FIRST(head); var; var = TAILQ_NEXT(var, field))
+#endif
+
+/* macro for timeout/untimeout */
+#if (__FreeBSD_version > 300000) || defined(__NetBSD__)
+/* use callout */
+#include <freebsd/sys/callout.h>
+
+#if (__FreeBSD_version > 500000)
+#define	CALLOUT_INIT(c)		callout_init((c), 0)
+#else
+#define	CALLOUT_INIT(c)		callout_init((c))
+#endif
+#define	CALLOUT_RESET(c,t,f,a)	callout_reset((c),(t),(f),(a))
+#define	CALLOUT_STOP(c)		callout_stop((c))
+#if !defined(CALLOUT_INITIALIZER) && (__FreeBSD_version < 600000)
+#define	CALLOUT_INITIALIZER	{ { { NULL } }, 0, NULL, NULL, 0 }
+#endif
+#elif defined(__OpenBSD__)
+#include <freebsd/sys/timeout.h>
+/* callout structure as a wrapper of struct timeout */
+struct callout {
+	struct timeout	c_to;
+};
+#define	CALLOUT_INIT(c)		do { bzero((c), sizeof(*(c))); } while (/*CONSTCOND*/ 0)
+#define	CALLOUT_RESET(c,t,f,a)	do { if (!timeout_initialized(&(c)->c_to))  \
+					 timeout_set(&(c)->c_to, (f), (a)); \
+				     timeout_add(&(c)->c_to, (t)); } while (/*CONSTCOND*/ 0)
+#define	CALLOUT_STOP(c)		timeout_del(&(c)->c_to)
+#define	CALLOUT_INITIALIZER	{ { { NULL }, NULL, NULL, 0, 0 } }
+#else
+/* use old-style timeout/untimeout */
+/* dummy callout structure */
+struct callout {
+	void		*c_arg;			/* function argument */
+	void		(*c_func)(void *);	/* functiuon to call */
+};
+#define	CALLOUT_INIT(c)		do { bzero((c), sizeof(*(c))); } while (/*CONSTCOND*/ 0)
+#define	CALLOUT_RESET(c,t,f,a)	do {	(c)->c_arg = (a);	\
+					(c)->c_func = (f);	\
+					timeout((f),(a),(t)); } while (/*CONSTCOND*/ 0)
+#define	CALLOUT_STOP(c)		untimeout((c)->c_func,(c)->c_arg)
+#define	CALLOUT_INITIALIZER	{ NULL, NULL }
+#endif
+#if !defined(__FreeBSD__)
+typedef void (timeout_t)(void *);
+#endif
+
+#define	m_pktlen(m)		((m)->m_pkthdr.len)
+
+struct ifnet; struct mbuf;
+struct pf_altq;
+#ifdef ALTQ3_CLFIER_COMPAT
+struct flowinfo;
+#endif
+
+void	*altq_lookup(char *, int);
+#ifdef ALTQ3_CLFIER_COMPAT
+int	altq_extractflow(struct mbuf *, int, struct flowinfo *, u_int32_t);
+int	acc_add_filter(struct acc_classifier *, struct flow_filter *,
+	    void *, u_long *);
+int	acc_delete_filter(struct acc_classifier *, u_long);
+int	acc_discard_filters(struct acc_classifier *, void *, int);
+void	*acc_classify(void *, struct mbuf *, int);
+#endif
+u_int8_t read_dsfield(struct mbuf *, struct altq_pktattr *);
+void	write_dsfield(struct mbuf *, struct altq_pktattr *, u_int8_t);
+void	altq_assert(const char *, int, const char *);
+int	tbr_set(struct ifaltq *, struct tb_profile *);
+int	tbr_get(struct ifaltq *, struct tb_profile *);
+
+int	altq_pfattach(struct pf_altq *);
+int	altq_pfdetach(struct pf_altq *);
+int	altq_add(struct pf_altq *);
+int	altq_remove(struct pf_altq *);
+int	altq_add_queue(struct pf_altq *);
+int	altq_remove_queue(struct pf_altq *);
+int	altq_getqstats(struct pf_altq *, void *, int *);
+
+int	cbq_pfattach(struct pf_altq *);
+int	cbq_add_altq(struct pf_altq *);
+int	cbq_remove_altq(struct pf_altq *);
+int	cbq_add_queue(struct pf_altq *);
+int	cbq_remove_queue(struct pf_altq *);
+int	cbq_getqstats(struct pf_altq *, void *, int *);
+
+int	priq_pfattach(struct pf_altq *);
+int	priq_add_altq(struct pf_altq *);
+int	priq_remove_altq(struct pf_altq *);
+int	priq_add_queue(struct pf_altq *);
+int	priq_remove_queue(struct pf_altq *);
+int	priq_getqstats(struct pf_altq *, void *, int *);
+
+int	hfsc_pfattach(struct pf_altq *);
+int	hfsc_add_altq(struct pf_altq *);
+int	hfsc_remove_altq(struct pf_altq *);
+int	hfsc_add_queue(struct pf_altq *);
+int	hfsc_remove_queue(struct pf_altq *);
+int	hfsc_getqstats(struct pf_altq *, void *, int *);
+
+#endif /* _KERNEL */
+#endif /* _ALTQ_ALTQ_VAR_HH_ */
diff --git a/freebsd/sys/contrib/altq/altq/altqconf.h b/freebsd/sys/contrib/altq/altq/altqconf.h
new file mode 100644
index 00000000..b4d0f84e
--- /dev/null
+++ b/freebsd/sys/contrib/altq/altq/altqconf.h
@@ -0,0 +1,29 @@
+/*	$OpenBSD: altqconf.h,v 1.1 2001/06/27 05:28:36 kjc Exp $	*/
+/*	$NetBSD: altqconf.h,v 1.2 2001/05/30 11:57:16 mrg Exp $	*/
+
+#if defined(_KERNEL_OPT) || defined(__OpenBSD__)
+
+#if defined(_KERNEL_OPT)
+#include <freebsd/local/opt_altq_enabled.h>
+#endif
+
+#include <freebsd/sys/conf.h>
+
+#ifdef ALTQ
+#define	NALTQ	1
+#else
+#define	NALTQ	0
+#endif
+
+cdev_decl(altq);
+
+#ifdef __OpenBSD__
+#define cdev_altq_init(c,n) { \
+	dev_init(c,n,open), dev_init(c,n,close), (dev_type_read((*))) enodev, \
+	(dev_type_write((*))) enodev, dev_init(c,n,ioctl), \
+	(dev_type_stop((*))) enodev, 0, (dev_type_select((*))) enodev, \
+	(dev_type_mmap((*))) enodev }
+#else
+#define	cdev_altq_init(x,y)	cdev__oci_init(x,y)
+#endif
+#endif /* defined(_KERNEL_OPT) || defined(__OpenBSD__) */
diff --git a/freebsd/sys/contrib/altq/altq/if_altq.h b/freebsd/sys/contrib/altq/altq/if_altq.h
new file mode 100644
index 00000000..c2255b8c
--- /dev/null
+++ b/freebsd/sys/contrib/altq/altq/if_altq.h
@@ -0,0 +1,191 @@
+/*	$FreeBSD$	*/
+/*	$KAME: if_altq.h,v 1.11 2003/07/10 12:07:50 kjc Exp $	*/
+
+/*
+ * Copyright (C) 1997-2003
+ *	Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef _ALTQ_IF_ALTQ_HH_
+#define	_ALTQ_IF_ALTQ_HH_
+
+#if (defined(__FreeBSD__) && __FreeBSD_version >= 500000)
+#include <freebsd/sys/lock.h>		/* XXX */
+#include <freebsd/sys/mutex.h>		/* XXX */
+#include <freebsd/sys/event.h>		/* XXX */
+#endif
+
+#ifdef _KERNEL_OPT
+#include <freebsd/altq/altqconf.h>
+#endif
+
+struct altq_pktattr; struct tb_regulator; struct top_cdnr;
+
+/*
+ * Structure defining a queue for a network interface.
+ */
+struct	ifaltq {
+	/* fields compatible with struct ifqueue */
+	struct	mbuf *ifq_head;
+	struct	mbuf *ifq_tail;
+	int	ifq_len;
+	int	ifq_maxlen;
+	int	ifq_drops;
+#if (defined(__FreeBSD__) && __FreeBSD_version >= 500000)
+	struct	mtx ifq_mtx;
+#endif
+
+	/* driver owned queue (used for bulk dequeue and prepend) UNLOCKED */
+	struct	mbuf *ifq_drv_head;
+	struct	mbuf *ifq_drv_tail;
+	int	ifq_drv_len;
+	int	ifq_drv_maxlen;
+
+	/* alternate queueing related fields */
+	int	altq_type;		/* discipline type */
+	int	altq_flags;		/* flags (e.g. ready, in-use) */
+	void	*altq_disc;		/* for discipline-specific use */
+	struct	ifnet *altq_ifp;	/* back pointer to interface */
+
+	int	(*altq_enqueue)(struct ifaltq *, struct mbuf *,
+				struct altq_pktattr *);
+	struct	mbuf *(*altq_dequeue)(struct ifaltq *, int);
+	int	(*altq_request)(struct ifaltq *, int, void *);
+
+	/* classifier fields */
+	void	*altq_clfier;		/* classifier-specific use */
+	void	*(*altq_classify)(void *, struct mbuf *, int);
+
+	/* token bucket regulator */
+	struct	tb_regulator *altq_tbr;
+
+	/* input traffic conditioner (doesn't belong to the output queue...) */
+	struct top_cdnr *altq_cdnr;
+};
+
+
+#ifdef _KERNEL
+
+/*
+ * packet attributes used by queueing disciplines.
+ * pattr_class is a discipline-dependent scheduling class that is
+ * set by a classifier.
+ * pattr_hdr and pattr_af may be used by a discipline to access
+ * the header within a mbuf.  (e.g. ECN needs to update the CE bit)
+ * note that pattr_hdr could be stale after m_pullup, though link
+ * layer output routines usually don't use m_pullup.  link-level
+ * compression also invalidates these fields.  thus, pattr_hdr needs
+ * to be verified when a discipline touches the header.
+ */
+struct altq_pktattr {
+	void	*pattr_class;		/* sched class set by classifier */
+	int	pattr_af;		/* address family */
+	caddr_t	pattr_hdr;		/* saved header position in mbuf */
+};
+
+/*
+ * mbuf tag to carry a queue id (and hints for ECN).
+ */
+struct altq_tag {
+	u_int32_t	qid;		/* queue id */
+	/* hints for ecn */
+	int		af;		/* address family */
+	void		*hdr;		/* saved header position in mbuf */
+};
+
+/*
+ * a token-bucket regulator limits the rate that a network driver can
+ * dequeue packets from the output queue.
+ * modern cards are able to buffer a large amount of packets and dequeue
+ * too many packets at a time.  this bursty dequeue behavior makes it
+ * impossible to schedule packets by queueing disciplines.
+ * a token-bucket is used to control the burst size in a device
+ * independent manner.
+ */
+struct tb_regulator {
+	int64_t		tbr_rate;	/* (scaled) token bucket rate */
+	int64_t		tbr_depth;	/* (scaled) token bucket depth */
+
+	int64_t		tbr_token;	/* (scaled) current token */
+	int64_t		tbr_filluptime;	/* (scaled) time to fill up bucket */
+	u_int64_t	tbr_last;	/* last time token was updated */
+
+	int		tbr_lastop;	/* last dequeue operation type
+					   needed for poll-and-dequeue */
+};
+
+/* if_altqflags */
+#define	ALTQF_READY	 0x01	/* driver supports alternate queueing */
+#define	ALTQF_ENABLED	 0x02	/* altq is in use */
+#define	ALTQF_CLASSIFY	 0x04	/* classify packets */
+#define	ALTQF_CNDTNING	 0x08	/* altq traffic conditioning is enabled */
+#define	ALTQF_DRIVER1	 0x40	/* driver specific */
+
+/* if_altqflags set internally only: */
+#define	ALTQF_CANTCHANGE 	(ALTQF_READY)
+
+/* altq_dequeue 2nd arg */
+#define	ALTDQ_REMOVE		1	/* dequeue mbuf from the queue */
+#define	ALTDQ_POLL		2	/* don't dequeue mbuf from the queue */
+
+/* altq request types (currently only purge is defined) */
+#define	ALTRQ_PURGE		1	/* purge all packets */
+
+#define	ALTQ_IS_READY(ifq)		((ifq)->altq_flags & ALTQF_READY)
+#define	ALTQ_IS_ENABLED(ifq)		((ifq)->altq_flags & ALTQF_ENABLED)
+#define	ALTQ_NEEDS_CLASSIFY(ifq)	((ifq)->altq_flags & ALTQF_CLASSIFY)
+#define	ALTQ_IS_CNDTNING(ifq)		((ifq)->altq_flags & ALTQF_CNDTNING)
+
+#define	ALTQ_SET_CNDTNING(ifq)		((ifq)->altq_flags |= ALTQF_CNDTNING)
+#define	ALTQ_CLEAR_CNDTNING(ifq)	((ifq)->altq_flags &= ~ALTQF_CNDTNING)
+#define	ALTQ_IS_ATTACHED(ifq)		((ifq)->altq_disc != NULL)
+
+#define	ALTQ_ENQUEUE(ifq, m, pa, err)					\
+	(err) = (*(ifq)->altq_enqueue)((ifq),(m),(pa))
+#define	ALTQ_DEQUEUE(ifq, m)						\
+	(m) = (*(ifq)->altq_dequeue)((ifq), ALTDQ_REMOVE)
+#define	ALTQ_POLL(ifq, m)						\
+	(m) = (*(ifq)->altq_dequeue)((ifq), ALTDQ_POLL)
+#define	ALTQ_PURGE(ifq)							\
+	(void)(*(ifq)->altq_request)((ifq), ALTRQ_PURGE, (void *)0)
+#define	ALTQ_IS_EMPTY(ifq)		((ifq)->ifq_len == 0)
+#define	TBR_IS_ENABLED(ifq)		((ifq)->altq_tbr != NULL)
+
+extern int altq_attach(struct ifaltq *, int, void *,
+		       int (*)(struct ifaltq *, struct mbuf *,
+			       struct altq_pktattr *),
+		       struct mbuf *(*)(struct ifaltq *, int),
+		       int (*)(struct ifaltq *, int, void *),
+		       void *,
+		       void *(*)(void *, struct mbuf *, int));
+extern int altq_detach(struct ifaltq *);
+extern int altq_enable(struct ifaltq *);
+extern int altq_disable(struct ifaltq *);
+extern struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int);
+extern int (*altq_input)(struct mbuf *, int);
+#if 0 /* ALTQ3_CLFIER_COMPAT */
+void altq_etherclassify(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+#endif
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_IF_ALTQ_HH_ */
diff --git a/freebsd/sys/contrib/pf/net/if_pflog.c b/freebsd/sys/contrib/pf/net/if_pflog.c
new file mode 100644
index 00000000..8ecc7cee
--- /dev/null
+++ b/freebsd/sys/contrib/pf/net/if_pflog.c
@@ -0,0 +1,438 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*	$OpenBSD: if_pflog.c,v 1.22 2006/12/15 09:31:20 otto Exp $	*/
+/*
+ * The authors of this code are John Ioannidis (ji@tla.org),
+ * Angelos D. Keromytis (kermit@csd.uch.gr) and 
+ * Niels Provos (provos@physnet.uni-hamburg.de).
+ *
+ * This code was written by John Ioannidis for BSD/OS in Athens, Greece, 
+ * in November 1995.
+ *
+ * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
+ * by Angelos D. Keromytis.
+ *
+ * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
+ * and Niels Provos.
+ *
+ * Copyright (C) 1995, 1996, 1997, 1998 by John Ioannidis, Angelos D. Keromytis
+ * and Niels Provos.
+ * Copyright (c) 2001, Angelos D. Keromytis, Niels Provos.
+ *
+ * Permission to use, copy, and modify this software with or without fee
+ * is hereby granted, provided that this entire notice is included in
+ * all copies of any software which is or includes a copy or
+ * modification of this software. 
+ * You may use this code under the GNU public license if you so wish. Please
+ * contribute changes back to the authors under this freer than GPL license
+ * so that we may further the use of strong encryption without limitations to
+ * all.
+ *
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
+ * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
+ * PURPOSE.
+ */
+
+#ifdef __FreeBSD__
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_bpf.h>
+#include <freebsd/local/opt_pf.h>
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifdef DEV_BPF
+#define	NBPFILTER	DEV_BPF
+#else
+#define	NBPFILTER	0
+#endif
+
+#ifdef DEV_PFLOG
+#define	NPFLOG		DEV_PFLOG
+#else
+#define	NPFLOG		0
+#endif
+
+#else /* ! __FreeBSD__ */
+#include <freebsd/local/bpfilter.h>
+#include <freebsd/local/pflog.h>
+#endif /* __FreeBSD__ */
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/socket.h>
+#ifdef __FreeBSD__
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/limits.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/sockio.h>
+#else
+#include <freebsd/sys/ioctl.h>
+#endif
+
+#include <freebsd/net/if.h>
+#ifdef __FreeBSD__
+#include <freebsd/net/if_clone.h>
+#endif
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/bpf.h>
+
+#ifdef	INET
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#endif
+
+#ifdef INET6
+#ifndef INET
+#include <freebsd/netinet/in.h>
+#endif
+#include <freebsd/netinet6/nd6.h>
+#endif /* INET6 */
+
+#include <freebsd/net/pfvar.h>
+#include <freebsd/net/if_pflog.h>
+
+#ifdef INET
+#ifdef __FreeBSD__
+#include <freebsd/machine/in_cksum.h>
+#endif
+#endif
+
+#define PFLOGMTU	(32768 + MHLEN + MLEN)
+
+#ifdef PFLOGDEBUG
+#define DPRINTF(x)    do { if (pflogdebug) printf x ; } while (0)
+#else
+#define DPRINTF(x)
+#endif
+
+void	pflogattach(int);
+int	pflogoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
+    		       struct route *);
+int	pflogioctl(struct ifnet *, u_long, caddr_t);
+void	pflogstart(struct ifnet *);
+#ifdef __FreeBSD__
+static int pflog_clone_create(struct if_clone *, int, caddr_t);
+static void pflog_clone_destroy(struct ifnet *);
+#else
+int	pflog_clone_create(struct if_clone *, int);
+int	pflog_clone_destroy(struct ifnet *);
+#endif
+
+LIST_HEAD(, pflog_softc)	pflogif_list;
+#ifdef __FreeBSD__
+IFC_SIMPLE_DECLARE(pflog, 1);    
+#else
+struct if_clone	pflog_cloner =
+    IF_CLONE_INITIALIZER("pflog", pflog_clone_create, pflog_clone_destroy);
+#endif
+
+struct ifnet	*pflogifs[PFLOGIFS_MAX];	/* for fast access */
+
+#ifndef __FreeBSD__
+extern int ifqmaxlen;
+#endif
+
+void
+pflogattach(int npflog)
+{
+	int	i;
+	LIST_INIT(&pflogif_list);
+	for (i = 0; i < PFLOGIFS_MAX; i++)
+		pflogifs[i] = NULL;
+#ifndef __FreeBSD__
+	(void) pflog_clone_create(&pflog_cloner, 0);
+#endif
+	if_clone_attach(&pflog_cloner);
+}
+
+#ifdef __FreeBSD__
+static int
+pflog_clone_create(struct if_clone *ifc, int unit, caddr_t param)
+#else
+int
+pflog_clone_create(struct if_clone *ifc, int unit)
+#endif
+{
+	struct ifnet *ifp;
+	struct pflog_softc *pflogif;
+	int s;
+
+	if (unit >= PFLOGIFS_MAX)
+		return (EINVAL);
+
+	if ((pflogif = malloc(sizeof(*pflogif), M_DEVBUF, M_NOWAIT)) == NULL)
+		return (ENOMEM);
+	bzero(pflogif, sizeof(*pflogif));
+
+	pflogif->sc_unit = unit;
+#ifdef __FreeBSD__
+	ifp = pflogif->sc_ifp = if_alloc(IFT_PFLOG);
+	if (ifp == NULL) {
+		free(pflogif, M_DEVBUF);
+		return (ENOSPC);
+	}
+	if_initname(ifp, ifc->ifc_name, unit);
+#else
+	ifp = &pflogif->sc_if;
+	snprintf(ifp->if_xname, sizeof ifp->if_xname, "pflog%d", unit);
+#endif
+	ifp->if_softc = pflogif;
+	ifp->if_mtu = PFLOGMTU;
+	ifp->if_ioctl = pflogioctl;
+	ifp->if_output = pflogoutput;
+	ifp->if_start = pflogstart;
+#ifndef __FreeBSD__
+	ifp->if_type = IFT_PFLOG;
+#endif
+	ifp->if_snd.ifq_maxlen = ifqmaxlen;
+	ifp->if_hdrlen = PFLOG_HDRLEN;
+	if_attach(ifp);
+#ifndef __FreeBSD__
+	if_alloc_sadl(ifp);
+#endif
+
+#if NBPFILTER > 0
+#ifdef __FreeBSD__
+	bpfattach(ifp, DLT_PFLOG, PFLOG_HDRLEN);
+#else
+	bpfattach(&pflogif->sc_if.if_bpf, ifp, DLT_PFLOG, PFLOG_HDRLEN);
+#endif
+#endif
+
+	s = splnet();
+#ifdef __FreeBSD__
+	PF_LOCK();
+#endif
+	LIST_INSERT_HEAD(&pflogif_list, pflogif, sc_list);
+	pflogifs[unit] = ifp;
+#ifdef __FreeBSD__
+	PF_UNLOCK();
+#endif
+	splx(s);
+
+	return (0);
+}
+
+#ifdef __FreeBSD__
+static void
+pflog_clone_destroy(struct ifnet *ifp)
+#else
+int
+pflog_clone_destroy(struct ifnet *ifp)
+#endif
+{
+	struct pflog_softc	*pflogif = ifp->if_softc;
+	int			 s;
+
+	s = splnet();
+#ifdef __FreeBSD__
+	PF_LOCK();
+#endif
+	pflogifs[pflogif->sc_unit] = NULL;
+	LIST_REMOVE(pflogif, sc_list);
+#ifdef __FreeBSD__
+	PF_UNLOCK();
+#endif
+	splx(s);
+
+#if NBPFILTER > 0
+	bpfdetach(ifp);
+#endif
+	if_detach(ifp);
+#ifdef __FreeBSD__
+	if_free(ifp);
+#endif
+	free(pflogif, M_DEVBUF);
+#ifndef __FreeBSD__
+	return (0);
+#endif
+}
+
+/*
+ * Start output on the pflog interface.
+ */
+void
+pflogstart(struct ifnet *ifp)
+{
+	struct mbuf *m;
+#ifndef __FreeBSD__
+	int s;
+#endif
+
+	for (;;) {
+#ifdef __FreeBSD__
+		IF_LOCK(&ifp->if_snd);
+		_IF_DROP(&ifp->if_snd);
+		_IF_DEQUEUE(&ifp->if_snd, m);
+		IF_UNLOCK(&ifp->if_snd);
+#else
+		s = splnet();
+		IF_DROP(&ifp->if_snd);
+		IF_DEQUEUE(&ifp->if_snd, m);
+		splx(s);
+#endif
+
+		if (m == NULL)
+			return;
+		else
+			m_freem(m);
+	}
+}
+
+int
+pflogoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+       struct route *ro)
+{
+	m_freem(m);
+	return (0);
+}
+
+/* ARGSUSED */
+int
+pflogioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+	switch (cmd) {
+	case SIOCSIFADDR:
+	case SIOCAIFADDR:
+	case SIOCSIFDSTADDR:
+	case SIOCSIFFLAGS:
+#ifdef __FreeBSD__
+		if (ifp->if_flags & IFF_UP)
+			ifp->if_drv_flags |= IFF_DRV_RUNNING;
+		else
+			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+#else
+		if (ifp->if_flags & IFF_UP)
+			ifp->if_flags |= IFF_RUNNING;
+		else
+			ifp->if_flags &= ~IFF_RUNNING;
+#endif
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	return (0);
+}
+
+int
+pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir,
+    u_int8_t reason, struct pf_rule *rm, struct pf_rule *am,
+    struct pf_ruleset *ruleset, struct pf_pdesc *pd)
+{
+#if NBPFILTER > 0
+	struct ifnet *ifn;
+	struct pfloghdr hdr;
+
+	if (kif == NULL || m == NULL || rm == NULL || pd == NULL)
+		return (-1);
+
+	if ((ifn = pflogifs[rm->logif]) == NULL || !ifn->if_bpf)
+		return (0);
+
+	bzero(&hdr, sizeof(hdr));
+	hdr.length = PFLOG_REAL_HDRLEN;
+	hdr.af = af;
+	hdr.action = rm->action;
+	hdr.reason = reason;
+	memcpy(hdr.ifname, kif->pfik_name, sizeof(hdr.ifname));
+
+	if (am == NULL) {
+		hdr.rulenr = htonl(rm->nr);
+		hdr.subrulenr = -1;
+	} else {
+		hdr.rulenr = htonl(am->nr);
+		hdr.subrulenr = htonl(rm->nr);
+		if (ruleset != NULL && ruleset->anchor != NULL)
+			strlcpy(hdr.ruleset, ruleset->anchor->name,
+			    sizeof(hdr.ruleset));
+	}
+	if (rm->log & PF_LOG_SOCKET_LOOKUP && !pd->lookup.done)
+#ifdef __FreeBSD__
+		/* 
+		 * XXX: This should not happen as we force an early lookup
+		 * via debug.pfugidhack
+		 */
+		 ; /* empty */
+#else
+		pd->lookup.done = pf_socket_lookup(dir, pd);
+#endif
+	if (pd->lookup.done > 0) {
+		hdr.uid = pd->lookup.uid;
+		hdr.pid = pd->lookup.pid;
+	} else {
+		hdr.uid = UID_MAX;
+		hdr.pid = NO_PID;
+	}
+	hdr.rule_uid = rm->cuid;
+	hdr.rule_pid = rm->cpid;
+	hdr.dir = dir;
+
+#ifdef INET
+	if (af == AF_INET && dir == PF_OUT) {
+		struct ip *ip;
+
+		ip = mtod(m, struct ip *);
+		ip->ip_sum = 0;
+		ip->ip_sum = in_cksum(m, ip->ip_hl << 2);
+	}
+#endif /* INET */
+
+	ifn->if_opackets++;
+	ifn->if_obytes += m->m_pkthdr.len;
+#ifdef __FreeBSD__
+	BPF_MTAP2(ifn, &hdr, PFLOG_HDRLEN, m);
+#else
+	bpf_mtap_hdr(ifn->if_bpf, (char *)&hdr, PFLOG_HDRLEN, m,
+	    BPF_DIRECTION_OUT);
+#endif
+#endif
+
+	return (0);
+}
+
+#ifdef __FreeBSD__
+static int
+pflog_modevent(module_t mod, int type, void *data)
+{
+	int error = 0;
+
+	switch (type) {
+	case MOD_LOAD:
+		pflogattach(1);
+		PF_LOCK();
+		pflog_packet_ptr = pflog_packet;
+		PF_UNLOCK();
+		break;
+	case MOD_UNLOAD:
+		PF_LOCK();
+		pflog_packet_ptr = NULL;
+		PF_UNLOCK();
+		if_clone_detach(&pflog_cloner);
+		break;
+	default:
+		error = EINVAL;
+		break;
+	}
+
+	return error;
+}
+
+static moduledata_t pflog_mod = { "pflog", pflog_modevent, 0 };
+
+#define PFLOG_MODVER 1
+
+DECLARE_MODULE(pflog, pflog_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
+MODULE_VERSION(pflog, PFLOG_MODVER);
+MODULE_DEPEND(pflog, pf, PF_MODVER, PF_MODVER, PF_MODVER);
+#endif /* __FreeBSD__ */
diff --git a/freebsd/sys/contrib/pf/net/if_pflog.h b/freebsd/sys/contrib/pf/net/if_pflog.h
new file mode 100644
index 00000000..9e9efbef
--- /dev/null
+++ b/freebsd/sys/contrib/pf/net/if_pflog.h
@@ -0,0 +1,103 @@
+/* $FreeBSD$ */
+/* $OpenBSD: if_pflog.h,v 1.14 2006/10/25 11:27:01 henning Exp $ */
+/*
+ * Copyright 2001 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _NET_IF_PFLOG_HH_
+#define _NET_IF_PFLOG_HH_
+
+#define	PFLOGIFS_MAX	16
+
+#ifdef _KERNEL
+struct pflog_softc {
+#ifdef __FreeBSD__
+	struct ifnet		*sc_ifp;	/* the interface pointer */
+#else
+	struct ifnet		sc_if;		/* the interface */
+#endif
+	int			sc_unit;
+	LIST_ENTRY(pflog_softc)	sc_list;
+};
+#endif /* _KERNEL */
+
+#define PFLOG_RULESET_NAME_SIZE	16
+
+struct pfloghdr {
+	u_int8_t	length;
+	sa_family_t	af;
+	u_int8_t	action;
+	u_int8_t	reason;
+	char		ifname[IFNAMSIZ];
+	char		ruleset[PFLOG_RULESET_NAME_SIZE];
+	u_int32_t	rulenr;
+	u_int32_t	subrulenr;
+	uid_t		uid;
+	pid_t		pid;
+	uid_t		rule_uid;
+	pid_t		rule_pid;
+	u_int8_t	dir;
+	u_int8_t	pad[3];
+};
+
+#define PFLOG_HDRLEN		sizeof(struct pfloghdr)
+/* minus pad, also used as a signature */
+#define PFLOG_REAL_HDRLEN	offsetof(struct pfloghdr, pad)
+
+/* XXX remove later when old format logs are no longer needed */
+struct old_pfloghdr {
+	u_int32_t af;
+	char ifname[IFNAMSIZ];
+	short rnr;
+	u_short reason;
+	u_short action;
+	u_short dir;
+};
+#define OLD_PFLOG_HDRLEN	sizeof(struct old_pfloghdr)
+
+#ifdef _KERNEL
+
+#ifdef __FreeBSD__
+struct pf_rule;
+struct pf_ruleset;
+struct pfi_kif;
+struct pf_pdesc;
+
+typedef int pflog_packet_t(struct pfi_kif *, struct mbuf *, sa_family_t,
+    u_int8_t, u_int8_t, struct pf_rule *, struct pf_rule *,
+    struct pf_ruleset *, struct pf_pdesc *);
+extern pflog_packet_t *pflog_packet_ptr;
+#define	PFLOG_PACKET(i,x,a,b,c,d,e,f,g,h) do {	\
+	if (pflog_packet_ptr != NULL)		\
+	pflog_packet_ptr(i,a,b,c,d,e,f,g,h);	\
+} while (0)
+#else /* ! __FreeBSD__ */
+#if NPFLOG > 0
+#define	PFLOG_PACKET(i,x,a,b,c,d,e,f,g,h) pflog_packet(i,a,b,c,d,e,f,g,h)
+#else
+#define	PFLOG_PACKET(i,x,a,b,c,d,e,f,g,h) ((void)0)
+#endif /* NPFLOG > 0 */
+#endif /* __FreeBSD__ */
+#endif /* _KERNEL */
+#endif /* _NET_IF_PFLOG_HH_ */
diff --git a/freebsd/sys/contrib/pf/net/if_pfsync.c b/freebsd/sys/contrib/pf/net/if_pfsync.c
new file mode 100644
index 00000000..bc70adfe
--- /dev/null
+++ b/freebsd/sys/contrib/pf/net/if_pfsync.c
@@ -0,0 +1,2331 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*	$OpenBSD: if_pfsync.c,v 1.73 2006/11/16 13:13:38 henning Exp $	*/
+
+/*
+ * Copyright (c) 2002 Michael Shalayeff
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef __FreeBSD__
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_carp.h>
+#include <freebsd/local/opt_bpf.h>
+#include <freebsd/local/opt_pf.h>
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifdef DEV_BPF
+#define	NBPFILTER	DEV_BPF
+#else
+#define	NBPFILTER	0
+#endif
+
+#ifdef DEV_PFSYNC
+#define	NPFSYNC		DEV_PFSYNC
+#else
+#define	NPFSYNC		0
+#endif
+
+#ifdef DEV_CARP
+#define	NCARP		DEV_CARP
+#else
+#define	NCARP		0
+#endif
+#endif /* __FreeBSD__ */
+
+#include <freebsd/sys/param.h>
+#ifdef __FreeBSD__
+#include <freebsd/sys/priv.h>
+#endif
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/time.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/socket.h>
+#ifdef __FreeBSD__
+#include <freebsd/sys/endian.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/taskqueue.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/mutex.h>
+#include <freebsd/sys/sysctl.h>
+#else
+#include <freebsd/sys/ioctl.h>
+#include <freebsd/sys/timeout.h>
+#endif
+#include <freebsd/sys/kernel.h>
+
+#include <freebsd/net/if.h>
+#ifdef __FreeBSD__
+#include <freebsd/net/if_clone.h>
+#endif
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/bpf.h>
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/if_ether.h>
+#include <freebsd/netinet/tcp.h>
+#include <freebsd/netinet/tcp_seq.h>
+
+#ifdef	INET
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/ip_var.h>
+#endif
+
+#ifdef INET6
+#include <freebsd/netinet6/nd6.h>
+#endif /* INET6 */
+
+#ifndef __FreeBSD__
+#include <freebsd/local/carp.h>
+#endif
+#if NCARP > 0
+#include <freebsd/netinet/ip_carp.h>
+#endif
+
+#include <freebsd/net/pfvar.h>
+#include <freebsd/net/if_pfsync.h>
+
+#ifndef __FreeBSD__
+#include <freebsd/local/bpfilter.h>
+#include <freebsd/local/pfsync.h>
+#endif
+
+#define PFSYNC_MINMTU	\
+    (sizeof(struct pfsync_header) + sizeof(struct pf_state))
+
+#ifdef PFSYNCDEBUG
+#define DPRINTF(x)    do { if (pfsyncdebug) printf x ; } while (0)
+int pfsyncdebug;
+#else
+#define DPRINTF(x)
+#endif
+
+struct pfsync_softc	*pfsyncif = NULL;
+struct pfsyncstats	 pfsyncstats;
+#ifdef __FreeBSD__
+SYSCTL_DECL(_net_inet_pfsync);
+SYSCTL_STRUCT(_net_inet_pfsync, 0, stats, CTLFLAG_RW,
+    &pfsyncstats, pfsyncstats,
+    "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)");
+#endif
+
+void	pfsyncattach(int);
+#ifdef __FreeBSD__
+int	pfsync_clone_create(struct if_clone *, int, caddr_t);
+void	pfsync_clone_destroy(struct ifnet *);
+#else
+int	pfsync_clone_create(struct if_clone *, int);
+int	pfsync_clone_destroy(struct ifnet *);
+#endif
+void	pfsync_setmtu(struct pfsync_softc *, int);
+int	pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
+	    struct pf_state_peer *);
+int	pfsync_insert_net_state(struct pfsync_state *, u_int8_t);
+#ifdef PFSYNC_TDB
+void	pfsync_update_net_tdb(struct pfsync_tdb *);
+#endif
+int	pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
+	    struct route *);
+int	pfsyncioctl(struct ifnet *, u_long, caddr_t);
+void	pfsyncstart(struct ifnet *);
+
+struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **);
+int	pfsync_request_update(struct pfsync_state_upd *, struct in_addr *);
+int	pfsync_sendout(struct pfsync_softc *);
+#ifdef PFSYNC_TDB
+int	pfsync_tdb_sendout(struct pfsync_softc *);
+#endif
+int	pfsync_sendout_mbuf(struct pfsync_softc *, struct mbuf *);
+void	pfsync_timeout(void *);
+#ifdef PFSYNC_TDB
+void	pfsync_tdb_timeout(void *);
+#endif
+void	pfsync_send_bus(struct pfsync_softc *, u_int8_t);
+void	pfsync_bulk_update(void *);
+void	pfsync_bulkfail(void *);
+
+#ifdef __FreeBSD__
+void	pfsync_ifdetach(void *, struct ifnet *);
+void	pfsync_senddef(void *, int);
+
+/* XXX: ugly */
+#define	betoh64		(unsigned long long)be64toh
+#define	timeout_del	callout_stop
+#endif
+
+int	pfsync_sync_ok;
+#ifndef __FreeBSD__
+extern int ifqmaxlen;
+#endif
+
+#ifdef __FreeBSD__
+IFC_SIMPLE_DECLARE(pfsync, 1);
+#else
+struct if_clone	pfsync_cloner =
+    IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy);
+#endif
+
+void
+pfsyncattach(int npfsync)
+{
+	if_clone_attach(&pfsync_cloner);
+}
+
+int
+#ifdef __FreeBSD__
+pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param)
+#else
+pfsync_clone_create(struct if_clone *ifc, int unit)
+#endif
+{
+	struct ifnet *ifp;
+
+	if (unit != 0)
+		return (EINVAL);
+
+	pfsync_sync_ok = 1;
+	if ((pfsyncif = malloc(sizeof(*pfsyncif), M_DEVBUF, M_NOWAIT)) == NULL)
+		return (ENOMEM);
+	bzero(pfsyncif, sizeof(*pfsyncif));
+#ifdef __FreeBSD__
+	if ((pfsyncif->sc_imo.imo_membership = (struct in_multi **)malloc(
+	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_DEVBUF,
+	    M_NOWAIT)) == NULL) {
+		free(pfsyncif, M_DEVBUF);
+		return (ENOSPC);
+	}
+	pfsyncif->sc_imo.imo_mfilters = NULL;
+	pfsyncif->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
+	pfsyncif->sc_imo.imo_multicast_vif = -1;
+
+	ifp = pfsyncif->sc_ifp = if_alloc(IFT_PFSYNC);
+	if (ifp == NULL) {
+		free(pfsyncif->sc_imo.imo_membership, M_DEVBUF);
+		free(pfsyncif, M_DEVBUF);
+		return (ENOSPC);
+	}
+	if_initname(ifp, ifc->ifc_name, unit);
+
+	pfsyncif->sc_detachtag = EVENTHANDLER_REGISTER(ifnet_departure_event,
+	    pfsync_ifdetach, pfsyncif, EVENTHANDLER_PRI_ANY);
+	if (pfsyncif->sc_detachtag == NULL) {
+		if_free(ifp);
+		free(pfsyncif->sc_imo.imo_membership, M_DEVBUF);
+		free(pfsyncif, M_DEVBUF);
+		return (ENOSPC);
+	}
+
+	pfsyncif->sc_ifq.ifq_maxlen = ifqmaxlen;
+	mtx_init(&pfsyncif->sc_ifq.ifq_mtx, ifp->if_xname,
+	    "pfsync send queue", MTX_DEF);
+	TASK_INIT(&pfsyncif->sc_send_task, 0, pfsync_senddef, pfsyncif);
+#endif
+	pfsyncif->sc_mbuf = NULL;
+	pfsyncif->sc_mbuf_net = NULL;
+#ifdef PFSYNC_TDB
+	pfsyncif->sc_mbuf_tdb = NULL;
+#endif
+	pfsyncif->sc_statep.s = NULL;
+	pfsyncif->sc_statep_net.s = NULL;
+#ifdef PFSYNC_TDB
+	pfsyncif->sc_statep_tdb.t = NULL;
+#endif
+	pfsyncif->sc_maxupdates = 128;
+#ifdef __FreeBSD__
+	pfsyncif->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP);
+	pfsyncif->sc_sendaddr.s_addr = htonl(INADDR_PFSYNC_GROUP);
+#else
+	pfsyncif->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
+	pfsyncif->sc_sendaddr.s_addr = INADDR_PFSYNC_GROUP;
+#endif
+	pfsyncif->sc_ureq_received = 0;
+	pfsyncif->sc_ureq_sent = 0;
+	pfsyncif->sc_bulk_send_next = NULL;
+	pfsyncif->sc_bulk_terminator = NULL;
+#ifndef __FreeBSD__
+	ifp = &pfsyncif->sc_if;
+	snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit);
+#endif
+	ifp->if_softc = pfsyncif;
+	ifp->if_ioctl = pfsyncioctl;
+	ifp->if_output = pfsyncoutput;
+	ifp->if_start = pfsyncstart;
+	ifp->if_type = IFT_PFSYNC;
+	ifp->if_snd.ifq_maxlen = ifqmaxlen;
+	ifp->if_hdrlen = PFSYNC_HDRLEN;
+	pfsync_setmtu(pfsyncif, ETHERMTU);
+#ifdef __FreeBSD__
+	callout_init(&pfsyncif->sc_tmo, CALLOUT_MPSAFE);
+#ifdef PFSYNC_TDB
+	callout_init(&pfsyncif->sc_tdb_tmo, CALLOUT_MPSAFE);
+#endif
+	callout_init(&pfsyncif->sc_bulk_tmo, CALLOUT_MPSAFE);
+	callout_init(&pfsyncif->sc_bulkfail_tmo, CALLOUT_MPSAFE);
+#else
+	timeout_set(&pfsyncif->sc_tmo, pfsync_timeout, pfsyncif);
+	timeout_set(&pfsyncif->sc_tdb_tmo, pfsync_tdb_timeout, pfsyncif);
+	timeout_set(&pfsyncif->sc_bulk_tmo, pfsync_bulk_update, pfsyncif);
+	timeout_set(&pfsyncif->sc_bulkfail_tmo, pfsync_bulkfail, pfsyncif);
+#endif
+	if_attach(ifp);
+#ifndef __FreeBSD__
+	if_alloc_sadl(ifp);
+#endif
+
+#if NCARP > 0
+	if_addgroup(ifp, "carp");
+#endif
+
+#if NBPFILTER > 0
+#ifdef __FreeBSD__
+	bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
+#else
+	bpfattach(&pfsyncif->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
+#endif
+#endif
+
+	return (0);
+}
+
+#ifdef __FreeBSD__
+void
+#else
+int
+#endif
+pfsync_clone_destroy(struct ifnet *ifp)
+{
+#ifdef __FreeBSD__
+	EVENTHANDLER_DEREGISTER(ifnet_departure_event, pfsyncif->sc_detachtag);
+	callout_stop(&pfsyncif->sc_tmo);
+#ifdef PFSYNC_TDB
+	callout_stop(&pfsyncif->sc_tdb_tmo);
+#endif
+	callout_stop(&pfsyncif->sc_bulk_tmo);
+	callout_stop(&pfsyncif->sc_bulkfail_tmo);
+	/* XXX: more? */
+#endif
+
+#if NBPFILTER > 0
+	bpfdetach(ifp);
+#endif
+	if_detach(ifp);
+#ifdef __FreeBSD__
+	if_free(ifp);
+	free(pfsyncif->sc_imo.imo_membership, M_DEVBUF);
+#endif
+	free(pfsyncif, M_DEVBUF);
+	pfsyncif = NULL;
+#ifndef __FreeBSD__
+	return (0);
+#endif
+}
+
+/*
+ * Start output on the pfsync interface.
+ */
+void
+pfsyncstart(struct ifnet *ifp)
+{
+	struct mbuf *m;
+#ifndef __FreeBSD__
+	int s;
+#endif
+
+	for (;;) {
+#ifdef __FreeBSD__
+		IF_LOCK(&ifp->if_snd);
+		_IF_DROP(&ifp->if_snd);
+		_IF_DEQUEUE(&ifp->if_snd, m);
+		IF_UNLOCK(&ifp->if_snd);
+#else
+		s = splnet();
+		IF_DROP(&ifp->if_snd);
+		IF_DEQUEUE(&ifp->if_snd, m);
+		splx(s);
+#endif
+
+		if (m == NULL)
+			return;
+		else
+			m_freem(m);
+	}
+}
+
+int
+pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
+    struct pf_state_peer *d)
+{
+	if (s->scrub.scrub_flag && d->scrub == NULL) {
+		d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
+		if (d->scrub == NULL)
+			return (ENOMEM);
+		bzero(d->scrub, sizeof(*d->scrub));
+	}
+
+	return (0);
+}
+
+int
+pfsync_insert_net_state(struct pfsync_state *sp, u_int8_t chksum_flag)
+{
+	struct pf_state	*st = NULL;
+	struct pf_rule *r = NULL;
+	struct pfi_kif	*kif;
+
+	if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) {
+		printf("pfsync_insert_net_state: invalid creator id:"
+		    " %08x\n", ntohl(sp->creatorid));
+		return (EINVAL);
+	}
+
+	kif = pfi_kif_get(sp->ifname);
+	if (kif == NULL) {
+		if (pf_status.debug >= PF_DEBUG_MISC)
+			printf("pfsync_insert_net_state: "
+			    "unknown interface: %s\n", sp->ifname);
+		/* skip this state */
+		return (0);
+	}
+
+	/*
+	 * If the ruleset checksums match, it's safe to associate the state
+	 * with the rule of that number.
+	 */
+	if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && chksum_flag)
+		r = pf_main_ruleset.rules[
+		    PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)];
+	else
+		r = &pf_default_rule;
+
+	if (!r->max_states || r->states < r->max_states)
+		st = pool_get(&pf_state_pl, PR_NOWAIT);
+	if (st == NULL) {
+		pfi_kif_unref(kif, PFI_KIF_REF_NONE);
+		return (ENOMEM);
+	}
+	bzero(st, sizeof(*st));
+
+	/* allocate memory for scrub info */
+	if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
+	    pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) {
+		pfi_kif_unref(kif, PFI_KIF_REF_NONE);
+		if (st->src.scrub)
+			pool_put(&pf_state_scrub_pl, st->src.scrub);
+		pool_put(&pf_state_pl, st);
+		return (ENOMEM);
+	}
+
+	st->rule.ptr = r;
+	/* XXX get pointers to nat_rule and anchor */
+
+	/* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
+	r->states++;
+
+	/* fill in the rest of the state entry */
+	pf_state_host_ntoh(&sp->lan, &st->lan);
+	pf_state_host_ntoh(&sp->gwy, &st->gwy);
+	pf_state_host_ntoh(&sp->ext, &st->ext);
+
+	pf_state_peer_ntoh(&sp->src, &st->src);
+	pf_state_peer_ntoh(&sp->dst, &st->dst);
+
+	bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
+	st->creation = time_second - ntohl(sp->creation);
+	st->expire = ntohl(sp->expire) + time_second;
+
+	st->af = sp->af;
+	st->proto = sp->proto;
+	st->direction = sp->direction;
+	st->log = sp->log;
+	st->timeout = sp->timeout;
+	st->state_flags = sp->state_flags;
+
+	bcopy(sp->id, &st->id, sizeof(st->id));
+	st->creatorid = sp->creatorid;
+	st->sync_flags = PFSTATE_FROMSYNC;
+
+	if (pf_insert_state(kif, st)) {
+		pfi_kif_unref(kif, PFI_KIF_REF_NONE);
+		/* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */
+		r->states--;
+		if (st->dst.scrub)
+			pool_put(&pf_state_scrub_pl, st->dst.scrub);
+		if (st->src.scrub)
+			pool_put(&pf_state_scrub_pl, st->src.scrub);
+		pool_put(&pf_state_pl, st);
+		return (EINVAL);
+	}
+
+	return (0);
+}
+
+void
+#ifdef __FreeBSD__
+pfsync_input(struct mbuf *m, __unused int off)
+#else
+pfsync_input(struct mbuf *m, ...)
+#endif
+{
+	struct ip *ip = mtod(m, struct ip *);
+	struct pfsync_header *ph;
+	struct pfsync_softc *sc = pfsyncif;
+	struct pf_state *st;
+	struct pf_state_cmp key;
+	struct pfsync_state *sp;
+	struct pfsync_state_upd *up;
+	struct pfsync_state_del *dp;
+	struct pfsync_state_clr *cp;
+	struct pfsync_state_upd_req *rup;
+	struct pfsync_state_bus *bus;
+#ifdef PFSYNC_TDB
+	struct pfsync_tdb *pt;
+#endif
+	struct in_addr src;
+	struct mbuf *mp;
+	int iplen, action, error, i, s, count, offp, sfail, stale = 0;
+	u_int8_t chksum_flag = 0;
+
+	pfsyncstats.pfsyncs_ipackets++;
+
+	/* verify that we have a sync interface configured */
+	if (!sc || !sc->sc_sync_ifp || !pf_status.running)
+		goto done;
+
+	/* verify that the packet came in on the right interface */
+	if (sc->sc_sync_ifp != m->m_pkthdr.rcvif) {
+		pfsyncstats.pfsyncs_badif++;
+		goto done;
+	}
+
+	/* verify that the IP TTL is 255.  */
+	if (ip->ip_ttl != PFSYNC_DFLTTL) {
+		pfsyncstats.pfsyncs_badttl++;
+		goto done;
+	}
+
+	iplen = ip->ip_hl << 2;
+
+	if (m->m_pkthdr.len < iplen + sizeof(*ph)) {
+		pfsyncstats.pfsyncs_hdrops++;
+		goto done;
+	}
+
+	if (iplen + sizeof(*ph) > m->m_len) {
+		if ((m = m_pullup(m, iplen + sizeof(*ph))) == NULL) {
+			pfsyncstats.pfsyncs_hdrops++;
+			goto done;
+		}
+		ip = mtod(m, struct ip *);
+	}
+	ph = (struct pfsync_header *)((char *)ip + iplen);
+
+	/* verify the version */
+	if (ph->version != PFSYNC_VERSION) {
+		pfsyncstats.pfsyncs_badver++;
+		goto done;
+	}
+
+	action = ph->action;
+	count = ph->count;
+
+	/* make sure it's a valid action code */
+	if (action >= PFSYNC_ACT_MAX) {
+		pfsyncstats.pfsyncs_badact++;
+		goto done;
+	}
+
+	/* Cheaper to grab this now than having to mess with mbufs later */
+	src = ip->ip_src;
+
+	if (!bcmp(&ph->pf_chksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
+		chksum_flag++;
+
+	switch (action) {
+	case PFSYNC_ACT_CLR: {
+		struct pf_state *nexts;
+		struct pfi_kif	*kif;
+		u_int32_t creatorid;
+		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
+		    sizeof(*cp), &offp)) == NULL) {
+			pfsyncstats.pfsyncs_badlen++;
+			return;
+		}
+		cp = (struct pfsync_state_clr *)(mp->m_data + offp);
+		creatorid = cp->creatorid;
+
+		s = splsoftnet();
+#ifdef __FreeBSD__
+		PF_LOCK();
+#endif
+		if (cp->ifname[0] == '\0') {
+			for (st = RB_MIN(pf_state_tree_id, &tree_id);
+			    st; st = nexts) {
+				nexts = RB_NEXT(pf_state_tree_id, &tree_id, st);
+				if (st->creatorid == creatorid) {
+					st->sync_flags |= PFSTATE_FROMSYNC;
+					pf_unlink_state(st);
+				}
+			}
+		} else {
+			if ((kif = pfi_kif_get(cp->ifname)) == NULL) {
+#ifdef __FreeBSD__
+				PF_UNLOCK();
+#endif
+				splx(s);
+				return;
+			}
+			for (st = RB_MIN(pf_state_tree_lan_ext,
+			    &kif->pfik_lan_ext); st; st = nexts) {
+				nexts = RB_NEXT(pf_state_tree_lan_ext,
+				    &kif->pfik_lan_ext, st);
+				if (st->creatorid == creatorid) {
+					st->sync_flags |= PFSTATE_FROMSYNC;
+					pf_unlink_state(st);
+				}
+			}
+		}
+#ifdef __FreeBSD__
+		PF_UNLOCK();
+#endif
+		splx(s);
+
+		break;
+	}
+	case PFSYNC_ACT_INS:
+		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
+		    count * sizeof(*sp), &offp)) == NULL) {
+			pfsyncstats.pfsyncs_badlen++;
+			return;
+		}
+
+		s = splsoftnet();
+#ifdef __FreeBSD__
+		PF_LOCK();
+#endif
+		for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
+		    i < count; i++, sp++) {
+			/* check for invalid values */
+			if (sp->timeout >= PFTM_MAX ||
+			    sp->src.state > PF_TCPS_PROXY_DST ||
+			    sp->dst.state > PF_TCPS_PROXY_DST ||
+			    sp->direction > PF_OUT ||
+			    (sp->af != AF_INET && sp->af != AF_INET6)) {
+				if (pf_status.debug >= PF_DEBUG_MISC)
+					printf("pfsync_insert: PFSYNC_ACT_INS: "
+					    "invalid value\n");
+				pfsyncstats.pfsyncs_badstate++;
+				continue;
+			}
+
+			if ((error = pfsync_insert_net_state(sp,
+			    chksum_flag))) {
+				if (error == ENOMEM) {
+#ifdef __FreeBSD__
+					PF_UNLOCK();
+#endif
+					splx(s);
+					goto done;
+				}
+				continue;
+			}
+		}
+#ifdef __FreeBSD__
+		PF_UNLOCK();
+#endif
+		splx(s);
+		break;
+	case PFSYNC_ACT_UPD:
+		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
+		    count * sizeof(*sp), &offp)) == NULL) {
+			pfsyncstats.pfsyncs_badlen++;
+			return;
+		}
+
+		s = splsoftnet();
+#ifdef __FreeBSD__
+		PF_LOCK();
+#endif
+		for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
+		    i < count; i++, sp++) {
+			int flags = PFSYNC_FLAG_STALE;
+
+			/* check for invalid values */
+			if (sp->timeout >= PFTM_MAX ||
+			    sp->src.state > PF_TCPS_PROXY_DST ||
+			    sp->dst.state > PF_TCPS_PROXY_DST) {
+				if (pf_status.debug >= PF_DEBUG_MISC)
+					printf("pfsync_insert: PFSYNC_ACT_UPD: "
+					    "invalid value\n");
+				pfsyncstats.pfsyncs_badstate++;
+				continue;
+			}
+
+			bcopy(sp->id, &key.id, sizeof(key.id));
+			key.creatorid = sp->creatorid;
+
+			st = pf_find_state_byid(&key);
+			if (st == NULL) {
+				/* insert the update */
+				if (pfsync_insert_net_state(sp, chksum_flag))
+					pfsyncstats.pfsyncs_badstate++;
+				continue;
+			}
+			sfail = 0;
+			if (st->proto == IPPROTO_TCP) {
+				/*
+				 * The state should never go backwards except
+				 * for syn-proxy states.  Neither should the
+				 * sequence window slide backwards.
+				 */
+				if (st->src.state > sp->src.state &&
+				    (st->src.state < PF_TCPS_PROXY_SRC ||
+				    sp->src.state >= PF_TCPS_PROXY_SRC))
+					sfail = 1;
+				else if (SEQ_GT(st->src.seqlo,
+				    ntohl(sp->src.seqlo)))
+					sfail = 3;
+				else if (st->dst.state > sp->dst.state) {
+					/* There might still be useful
+					 * information about the src state here,
+					 * so import that part of the update,
+					 * then "fail" so we send the updated
+					 * state back to the peer who is missing
+					 * our what we know. */
+					pf_state_peer_ntoh(&sp->src, &st->src);
+					/* XXX do anything with timeouts? */
+					sfail = 7;
+					flags = 0;
+				} else if (st->dst.state >= TCPS_SYN_SENT &&
+				    SEQ_GT(st->dst.seqlo, ntohl(sp->dst.seqlo)))
+					sfail = 4;
+			} else {
+				/*
+				 * Non-TCP protocol state machine always go
+				 * forwards
+				 */
+				if (st->src.state > sp->src.state)
+					sfail = 5;
+				else if (st->dst.state > sp->dst.state)
+					sfail = 6;
+			}
+			if (sfail) {
+				if (pf_status.debug >= PF_DEBUG_MISC)
+					printf("pfsync: %s stale update "
+					    "(%d) id: %016llx "
+					    "creatorid: %08x\n",
+					    (sfail < 7 ?  "ignoring"
+					     : "partial"), sfail,
+					    betoh64(st->id),
+					    ntohl(st->creatorid));
+				pfsyncstats.pfsyncs_badstate++;
+
+				if (!(sp->sync_flags & PFSTATE_STALE)) {
+					/* we have a better state, send it */
+					if (sc->sc_mbuf != NULL && !stale)
+						pfsync_sendout(sc);
+					stale++;
+					if (!st->sync_flags)
+						pfsync_pack_state(
+						    PFSYNC_ACT_UPD, st, flags);
+				}
+				continue;
+			}
+	    		pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
+			pf_state_peer_ntoh(&sp->src, &st->src);
+			pf_state_peer_ntoh(&sp->dst, &st->dst);
+			st->expire = ntohl(sp->expire) + time_second;
+			st->timeout = sp->timeout;
+		}
+		if (stale && sc->sc_mbuf != NULL)
+			pfsync_sendout(sc);
+#ifdef __FreeBSD__
+		PF_UNLOCK();
+#endif
+		splx(s);
+		break;
+	/*
+	 * It's not strictly necessary for us to support the "uncompressed"
+	 * delete action, but it's relatively simple and maintains consistency.
+	 */
+	case PFSYNC_ACT_DEL:
+		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
+		    count * sizeof(*sp), &offp)) == NULL) {
+			pfsyncstats.pfsyncs_badlen++;
+			return;
+		}
+
+		s = splsoftnet();
+#ifdef __FreeBSD__
+		PF_LOCK();
+#endif
+		for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
+		    i < count; i++, sp++) {
+			bcopy(sp->id, &key.id, sizeof(key.id));
+			key.creatorid = sp->creatorid;
+
+			st = pf_find_state_byid(&key);
+			if (st == NULL) {
+				pfsyncstats.pfsyncs_badstate++;
+				continue;
+			}
+			st->sync_flags |= PFSTATE_FROMSYNC;
+			pf_unlink_state(st);
+		}
+#ifdef __FreeBSD__
+		PF_UNLOCK();
+#endif
+		splx(s);
+		break;
+	case PFSYNC_ACT_UPD_C: {
+		int update_requested = 0;
+
+		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
+		    count * sizeof(*up), &offp)) == NULL) {
+			pfsyncstats.pfsyncs_badlen++;
+			return;
+		}
+
+		s = splsoftnet();
+#ifdef __FreeBSD__
+		PF_LOCK();
+#endif
+		for (i = 0, up = (struct pfsync_state_upd *)(mp->m_data + offp);
+		    i < count; i++, up++) {
+			/* check for invalid values */
+			if (up->timeout >= PFTM_MAX ||
+			    up->src.state > PF_TCPS_PROXY_DST ||
+			    up->dst.state > PF_TCPS_PROXY_DST) {
+				if (pf_status.debug >= PF_DEBUG_MISC)
+					printf("pfsync_insert: "
+					    "PFSYNC_ACT_UPD_C: "
+					    "invalid value\n");
+				pfsyncstats.pfsyncs_badstate++;
+				continue;
+			}
+
+			bcopy(up->id, &key.id, sizeof(key.id));
+			key.creatorid = up->creatorid;
+
+			st = pf_find_state_byid(&key);
+			if (st == NULL) {
+				/* We don't have this state. Ask for it. */
+				error = pfsync_request_update(up, &src);
+				if (error == ENOMEM) {
+#ifdef __FreeBSD__
+					PF_UNLOCK();
+#endif
+					splx(s);
+					goto done;
+				}
+				update_requested = 1;
+				pfsyncstats.pfsyncs_badstate++;
+				continue;
+			}
+			sfail = 0;
+			if (st->proto == IPPROTO_TCP) {
+				/*
+				 * The state should never go backwards except
+				 * for syn-proxy states.  Neither should the
+				 * sequence window slide backwards.
+				 */
+				if (st->src.state > up->src.state &&
+				    (st->src.state < PF_TCPS_PROXY_SRC ||
+				    up->src.state >= PF_TCPS_PROXY_SRC))
+					sfail = 1;
+				else if (st->dst.state > up->dst.state)
+					sfail = 2;
+				else if (SEQ_GT(st->src.seqlo,
+				    ntohl(up->src.seqlo)))
+					sfail = 3;
+				else if (st->dst.state >= TCPS_SYN_SENT &&
+				    SEQ_GT(st->dst.seqlo, ntohl(up->dst.seqlo)))
+					sfail = 4;
+			} else {
+				/*
+				 * Non-TCP protocol state machine always go
+				 * forwards
+				 */
+				if (st->src.state > up->src.state)
+					sfail = 5;
+				else if (st->dst.state > up->dst.state)
+					sfail = 6;
+			}
+			if (sfail) {
+				if (pf_status.debug >= PF_DEBUG_MISC)
+					printf("pfsync: ignoring stale update "
+					    "(%d) id: %016llx "
+					    "creatorid: %08x\n", sfail,
+					    betoh64(st->id),
+					    ntohl(st->creatorid));
+				pfsyncstats.pfsyncs_badstate++;
+
+				/* we have a better state, send it out */
+				if ((!stale || update_requested) &&
+				    sc->sc_mbuf != NULL) {
+					pfsync_sendout(sc);
+					update_requested = 0;
+				}
+				stale++;
+				if (!st->sync_flags)
+					pfsync_pack_state(PFSYNC_ACT_UPD, st,
+					    PFSYNC_FLAG_STALE);
+				continue;
+			}
+	    		pfsync_alloc_scrub_memory(&up->dst, &st->dst);
+			pf_state_peer_ntoh(&up->src, &st->src);
+			pf_state_peer_ntoh(&up->dst, &st->dst);
+			st->expire = ntohl(up->expire) + time_second;
+			st->timeout = up->timeout;
+		}
+		if ((update_requested || stale) && sc->sc_mbuf)
+			pfsync_sendout(sc);
+#ifdef __FreeBSD__
+		PF_UNLOCK();
+#endif
+		splx(s);
+		break;
+	}
+	case PFSYNC_ACT_DEL_C:
+		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
+		    count * sizeof(*dp), &offp)) == NULL) {
+			pfsyncstats.pfsyncs_badlen++;
+			return;
+		}
+
+		s = splsoftnet();
+#ifdef __FreeBSD__
+		PF_LOCK();
+#endif
+		for (i = 0, dp = (struct pfsync_state_del *)(mp->m_data + offp);
+		    i < count; i++, dp++) {
+			bcopy(dp->id, &key.id, sizeof(key.id));
+			key.creatorid = dp->creatorid;
+
+			st = pf_find_state_byid(&key);
+			if (st == NULL) {
+				pfsyncstats.pfsyncs_badstate++;
+				continue;
+			}
+			st->sync_flags |= PFSTATE_FROMSYNC;
+			pf_unlink_state(st);
+		}
+#ifdef __FreeBSD__
+		PF_UNLOCK();
+#endif
+		splx(s);
+		break;
+	case PFSYNC_ACT_INS_F:
+	case PFSYNC_ACT_DEL_F:
+		/* not implemented */
+		break;
+	case PFSYNC_ACT_UREQ:
+		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
+		    count * sizeof(*rup), &offp)) == NULL) {
+			pfsyncstats.pfsyncs_badlen++;
+			return;
+		}
+
+		s = splsoftnet();
+#ifdef __FreeBSD__
+		PF_LOCK();
+#endif
+		if (sc->sc_mbuf != NULL)
+			pfsync_sendout(sc);
+		for (i = 0,
+		    rup = (struct pfsync_state_upd_req *)(mp->m_data + offp);
+		    i < count; i++, rup++) {
+			bcopy(rup->id, &key.id, sizeof(key.id));
+			key.creatorid = rup->creatorid;
+
+			if (key.id == 0 && key.creatorid == 0) {
+				sc->sc_ureq_received = time_uptime;
+				if (sc->sc_bulk_send_next == NULL)
+					sc->sc_bulk_send_next =
+					    TAILQ_FIRST(&state_list);
+				sc->sc_bulk_terminator = sc->sc_bulk_send_next;
+				if (pf_status.debug >= PF_DEBUG_MISC)
+					printf("pfsync: received "
+					    "bulk update request\n");
+				pfsync_send_bus(sc, PFSYNC_BUS_START);
+#ifdef __FreeBSD__
+				callout_reset(&sc->sc_bulk_tmo, 1 * hz,
+				    pfsync_bulk_update, pfsyncif);
+#else
+				timeout_add(&sc->sc_bulk_tmo, 1 * hz);
+#endif
+			} else {
+				st = pf_find_state_byid(&key);
+				if (st == NULL) {
+					pfsyncstats.pfsyncs_badstate++;
+					continue;
+				}
+				if (!st->sync_flags)
+					pfsync_pack_state(PFSYNC_ACT_UPD,
+					    st, 0);
+			}
+		}
+		if (sc->sc_mbuf != NULL)
+			pfsync_sendout(sc);
+#ifdef __FreeBSD__
+		PF_UNLOCK();
+#endif
+		splx(s);
+		break;
+	case PFSYNC_ACT_BUS:
+		/* If we're not waiting for a bulk update, who cares. */
+		if (sc->sc_ureq_sent == 0)
+			break;
+
+		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
+		    sizeof(*bus), &offp)) == NULL) {
+			pfsyncstats.pfsyncs_badlen++;
+			return;
+		}
+		bus = (struct pfsync_state_bus *)(mp->m_data + offp);
+		switch (bus->status) {
+		case PFSYNC_BUS_START:
+#ifdef __FreeBSD__
+			callout_reset(&sc->sc_bulkfail_tmo,
+			    pf_pool_limits[PF_LIMIT_STATES].limit /
+			    (PFSYNC_BULKPACKETS * sc->sc_maxcount),
+			    pfsync_bulkfail, pfsyncif);
+#else
+			timeout_add(&sc->sc_bulkfail_tmo,
+			    pf_pool_limits[PF_LIMIT_STATES].limit /
+			    (PFSYNC_BULKPACKETS * sc->sc_maxcount));
+#endif
+			if (pf_status.debug >= PF_DEBUG_MISC)
+				printf("pfsync: received bulk "
+				    "update start\n");
+			break;
+		case PFSYNC_BUS_END:
+			if (time_uptime - ntohl(bus->endtime) >=
+			    sc->sc_ureq_sent) {
+				/* that's it, we're happy */
+				sc->sc_ureq_sent = 0;
+				sc->sc_bulk_tries = 0;
+				timeout_del(&sc->sc_bulkfail_tmo);
+#if NCARP > 0
+				if (!pfsync_sync_ok)
+#ifdef __FreeBSD__
+#ifdef CARP_ADVANCED
+					carp_group_demote_adj(sc->sc_ifp, -1);
+#endif
+#else
+					carp_group_demote_adj(&sc->sc_if, -1);
+#endif
+#endif
+				pfsync_sync_ok = 1;
+				if (pf_status.debug >= PF_DEBUG_MISC)
+					printf("pfsync: received valid "
+					    "bulk update end\n");
+			} else {
+				if (pf_status.debug >= PF_DEBUG_MISC)
+					printf("pfsync: received invalid "
+					    "bulk update end: bad timestamp\n");
+			}
+			break;
+		}
+		break;
+#ifdef PFSYNC_TDB
+	case PFSYNC_ACT_TDB_UPD:
+		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
+		    count * sizeof(*pt), &offp)) == NULL) {
+			pfsyncstats.pfsyncs_badlen++;
+			return;
+		}
+		s = splsoftnet();
+#ifdef __FreeBSD__
+		PF_LOCK();
+#endif
+		for (i = 0, pt = (struct pfsync_tdb *)(mp->m_data + offp);
+		    i < count; i++, pt++)
+			pfsync_update_net_tdb(pt);
+#ifdef __FreeBSD__
+		PF_UNLOCK();
+#endif
+		splx(s);
+		break;
+#endif
+	}
+
+done:
+	if (m)
+		m_freem(m);
+}
+
+int
+pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+	struct route *ro)
+{
+	m_freem(m);
+	return (0);
+}
+
+/* ARGSUSED */
+int
+pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+#ifndef __FreeBSD__
+	struct proc *p = curproc;
+#endif
+	struct pfsync_softc *sc = ifp->if_softc;
+	struct ifreq *ifr = (struct ifreq *)data;
+	struct ip_moptions *imo = &sc->sc_imo;
+	struct pfsyncreq pfsyncr;
+	struct ifnet    *sifp;
+	int s, error;
+
+	switch (cmd) {
+	case SIOCSIFADDR:
+	case SIOCAIFADDR:
+	case SIOCSIFDSTADDR:
+	case SIOCSIFFLAGS:
+#ifdef __FreeBSD__
+		if (ifp->if_flags & IFF_UP)
+			ifp->if_drv_flags |= IFF_DRV_RUNNING;
+		else
+			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+#else
+		if (ifp->if_flags & IFF_UP)
+			ifp->if_flags |= IFF_RUNNING;
+		else
+			ifp->if_flags &= ~IFF_RUNNING;
+#endif
+		break;
+	case SIOCSIFMTU:
+		if (ifr->ifr_mtu < PFSYNC_MINMTU)
+			return (EINVAL);
+		if (ifr->ifr_mtu > MCLBYTES)
+			ifr->ifr_mtu = MCLBYTES;
+		s = splnet();
+#ifdef __FreeBSD__
+		PF_LOCK();
+#endif
+		if (ifr->ifr_mtu < ifp->if_mtu)
+			pfsync_sendout(sc);
+		pfsync_setmtu(sc, ifr->ifr_mtu);
+#ifdef __FreeBSD__
+		PF_UNLOCK();
+#endif
+		splx(s);
+		break;
+	case SIOCGETPFSYNC:
+		bzero(&pfsyncr, sizeof(pfsyncr));
+		if (sc->sc_sync_ifp)
+			strlcpy(pfsyncr.pfsyncr_syncdev,
+			    sc->sc_sync_ifp->if_xname, IFNAMSIZ);
+		pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
+		pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
+		if ((error = copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))))
+			return (error);
+		break;
+	case SIOCSETPFSYNC:
+#ifdef __FreeBSD__
+		if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0)
+#else
+		if ((error = suser(p, p->p_acflag)) != 0)
+#endif
+			return (error);
+		if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
+			return (error);
+
+#ifdef __FreeBSD__
+		PF_LOCK();
+#endif
+		if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
+#ifdef __FreeBSD__
+			sc->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP);
+#else
+			sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
+#endif
+		else
+			sc->sc_sync_peer.s_addr =
+			    pfsyncr.pfsyncr_syncpeer.s_addr;
+
+		if (pfsyncr.pfsyncr_maxupdates > 255)
+#ifdef __FreeBSD__
+		{
+			PF_UNLOCK();
+#endif
+			return (EINVAL);
+#ifdef __FreeBSD__
+		}
+#endif
+		sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
+
+		if (pfsyncr.pfsyncr_syncdev[0] == 0) {
+			sc->sc_sync_ifp = NULL;
+			if (sc->sc_mbuf_net != NULL) {
+				/* Don't keep stale pfsync packets around. */
+				s = splnet();
+				m_freem(sc->sc_mbuf_net);
+				sc->sc_mbuf_net = NULL;
+				sc->sc_statep_net.s = NULL;
+				splx(s);
+			}
+#ifdef __FreeBSD__
+			PF_UNLOCK();
+#endif
+			if (imo->imo_num_memberships > 0) {
+				in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
+				imo->imo_multicast_ifp = NULL;
+			}
+			break;
+		}
+
+#ifdef __FreeBSD__
+		PF_UNLOCK();
+#endif
+		if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL)
+			return (EINVAL);
+#ifdef __FreeBSD__
+		PF_LOCK();
+#endif
+
+		s = splnet();
+#ifdef __FreeBSD__
+		if (sifp->if_mtu < sc->sc_ifp->if_mtu ||
+#else
+		if (sifp->if_mtu < sc->sc_if.if_mtu ||
+#endif
+		    (sc->sc_sync_ifp != NULL &&
+		    sifp->if_mtu < sc->sc_sync_ifp->if_mtu) ||
+		    sifp->if_mtu < MCLBYTES - sizeof(struct ip))
+			pfsync_sendout(sc);
+		sc->sc_sync_ifp = sifp;
+
+#ifdef __FreeBSD__
+		pfsync_setmtu(sc, sc->sc_ifp->if_mtu);
+#else
+		pfsync_setmtu(sc, sc->sc_if.if_mtu);
+#endif
+
+		if (imo->imo_num_memberships > 0) {
+#ifdef __FreeBSD__
+			PF_UNLOCK();
+#endif
+			in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
+#ifdef __FreeBSD__
+			PF_LOCK();
+#endif
+			imo->imo_multicast_ifp = NULL;
+		}
+
+		if (sc->sc_sync_ifp &&
+#ifdef __FreeBSD__
+		    sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) {
+#else
+		    sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
+#endif
+			struct in_addr addr;
+
+			if (!(sc->sc_sync_ifp->if_flags & IFF_MULTICAST)) {
+				sc->sc_sync_ifp = NULL;
+#ifdef __FreeBSD__
+				PF_UNLOCK();
+#endif
+				splx(s);
+				return (EADDRNOTAVAIL);
+			}
+
+#ifdef __FreeBSD__
+			addr.s_addr = htonl(INADDR_PFSYNC_GROUP);
+#else
+			addr.s_addr = INADDR_PFSYNC_GROUP;
+#endif
+
+#ifdef __FreeBSD__
+			PF_UNLOCK();
+#endif
+			if ((imo->imo_membership[0] =
+			    in_addmulti(&addr, sc->sc_sync_ifp)) == NULL) {
+				sc->sc_sync_ifp = NULL;
+				splx(s);
+				return (ENOBUFS);
+			}
+#ifdef __FreeBSD__
+			PF_LOCK();
+#endif
+			imo->imo_num_memberships++;
+			imo->imo_multicast_ifp = sc->sc_sync_ifp;
+			imo->imo_multicast_ttl = PFSYNC_DFLTTL;
+			imo->imo_multicast_loop = 0;
+		}
+
+		if (sc->sc_sync_ifp ||
+#ifdef __FreeBSD__
+		    sc->sc_sendaddr.s_addr != htonl(INADDR_PFSYNC_GROUP)) {
+#else
+		    sc->sc_sendaddr.s_addr != INADDR_PFSYNC_GROUP) {
+#endif
+			/* Request a full state table update. */
+			sc->sc_ureq_sent = time_uptime;
+#if NCARP > 0
+			if (pfsync_sync_ok)
+#ifdef __FreeBSD__
+#ifdef CARP_ADVANCED
+				carp_group_demote_adj(sc->sc_ifp, 1);
+#endif
+#else
+				carp_group_demote_adj(&sc->sc_if, 1);
+#endif
+#endif
+			pfsync_sync_ok = 0;
+			if (pf_status.debug >= PF_DEBUG_MISC)
+				printf("pfsync: requesting bulk update\n");
+#ifdef __FreeBSD__
+			callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
+			    pfsync_bulkfail, pfsyncif);
+#else
+			timeout_add(&sc->sc_bulkfail_tmo, 5 * hz);
+#endif
+			error = pfsync_request_update(NULL, NULL);
+			if (error == ENOMEM) {
+#ifdef __FreeBSD__
+				PF_UNLOCK();
+#endif
+				splx(s);
+				return (ENOMEM);
+			}
+			pfsync_sendout(sc);
+		}
+#ifdef __FreeBSD__
+		PF_UNLOCK();
+#endif
+		splx(s);
+
+		break;
+
+	default:
+		return (ENOTTY);
+	}
+
+	return (0);
+}
+
+void
+pfsync_setmtu(struct pfsync_softc *sc, int mtu_req)
+{
+	int mtu;
+
+	if (sc->sc_sync_ifp && sc->sc_sync_ifp->if_mtu < mtu_req)
+		mtu = sc->sc_sync_ifp->if_mtu;
+	else
+		mtu = mtu_req;
+
+	sc->sc_maxcount = (mtu - sizeof(struct pfsync_header)) /
+	    sizeof(struct pfsync_state);
+	if (sc->sc_maxcount > 254)
+	    sc->sc_maxcount = 254;
+#ifdef __FreeBSD__
+	sc->sc_ifp->if_mtu = sizeof(struct pfsync_header) +
+#else
+	sc->sc_if.if_mtu = sizeof(struct pfsync_header) +
+#endif
+	    sc->sc_maxcount * sizeof(struct pfsync_state);
+}
+
+struct mbuf *
+pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp)
+{
+	struct pfsync_header *h;
+	struct mbuf *m;
+	int len;
+
+	MGETHDR(m, M_DONTWAIT, MT_DATA);
+	if (m == NULL) {
+#ifdef __FreeBSD__
+		sc->sc_ifp->if_oerrors++;
+#else
+		sc->sc_if.if_oerrors++;
+#endif
+		return (NULL);
+	}
+
+	switch (action) {
+	case PFSYNC_ACT_CLR:
+		len = sizeof(struct pfsync_header) +
+		    sizeof(struct pfsync_state_clr);
+		break;
+	case PFSYNC_ACT_UPD_C:
+		len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd)) +
+		    sizeof(struct pfsync_header);
+		break;
+	case PFSYNC_ACT_DEL_C:
+		len = (sc->sc_maxcount * sizeof(struct pfsync_state_del)) +
+		    sizeof(struct pfsync_header);
+		break;
+	case PFSYNC_ACT_UREQ:
+		len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd_req)) +
+		    sizeof(struct pfsync_header);
+		break;
+	case PFSYNC_ACT_BUS:
+		len = sizeof(struct pfsync_header) +
+		    sizeof(struct pfsync_state_bus);
+		break;
+#ifdef PFSYNC_TDB
+	case PFSYNC_ACT_TDB_UPD:
+		len = (sc->sc_maxcount * sizeof(struct pfsync_tdb)) +
+		    sizeof(struct pfsync_header);
+		break;
+#endif
+	default:
+		len = (sc->sc_maxcount * sizeof(struct pfsync_state)) +
+		    sizeof(struct pfsync_header);
+		break;
+	}
+
+	if (len > MHLEN) {
+		MCLGET(m, M_DONTWAIT);
+		if ((m->m_flags & M_EXT) == 0) {
+			m_free(m);
+#ifdef __FreeBSD__
+			sc->sc_ifp->if_oerrors++;
+#else
+			sc->sc_if.if_oerrors++;
+#endif
+			return (NULL);
+		}
+		m->m_data += (MCLBYTES - len) &~ (sizeof(long) - 1);
+	} else
+		MH_ALIGN(m, len);
+
+	m->m_pkthdr.rcvif = NULL;
+	m->m_pkthdr.len = m->m_len = sizeof(struct pfsync_header);
+	h = mtod(m, struct pfsync_header *);
+	h->version = PFSYNC_VERSION;
+	h->af = 0;
+	h->count = 0;
+	h->action = action;
+#ifndef PFSYNC_TDB
+	if (action != PFSYNC_ACT_TDB_UPD)
+#endif
+		bcopy(&pf_status.pf_chksum, &h->pf_chksum,
+		    PF_MD5_DIGEST_LENGTH);
+
+	*sp = (void *)((char *)h + PFSYNC_HDRLEN);
+#ifdef PFSYNC_TDB
+	if (action == PFSYNC_ACT_TDB_UPD)
+#ifdef __FreeBSD__
+		callout_reset(&sc->sc_tdb_tmo, hz, pfsync_tdb_timeout,
+		    pfsyncif);
+#else
+		timeout_add(&sc->sc_tdb_tmo, hz);
+#endif
+	else
+#endif
+#ifdef __FreeBSD__
+		callout_reset(&sc->sc_tmo, hz, pfsync_timeout, pfsyncif);
+#else
+		timeout_add(&sc->sc_tmo, hz);
+#endif
+	return (m);
+}
+
+int
+pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags)
+{
+	struct ifnet *ifp = NULL;
+	struct pfsync_softc *sc = pfsyncif;
+	struct pfsync_header *h, *h_net;
+	struct pfsync_state *sp = NULL;
+	struct pfsync_state_upd *up = NULL;
+	struct pfsync_state_del *dp = NULL;
+	struct pf_rule *r;
+	u_long secs;
+	int s, ret = 0;
+	u_int8_t i = 255, newaction = 0;
+
+	if (sc == NULL)
+		return (0);
+#ifdef __FreeBSD__
+	ifp = sc->sc_ifp;
+#else
+	ifp = &sc->sc_if;
+#endif
+
+	/*
+	 * If a packet falls in the forest and there's nobody around to
+	 * hear, does it make a sound?
+	 */
+	if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL &&
+#ifdef __FreeBSD__
+	    sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) {
+#else
+	    sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
+#endif
+		/* Don't leave any stale pfsync packets hanging around. */
+		if (sc->sc_mbuf != NULL) {
+			m_freem(sc->sc_mbuf);
+			sc->sc_mbuf = NULL;
+			sc->sc_statep.s = NULL;
+		}
+		return (0);
+	}
+
+	if (action >= PFSYNC_ACT_MAX)
+		return (EINVAL);
+
+	s = splnet();
+#ifdef __FreeBSD__
+	PF_ASSERT(MA_OWNED);
+#endif
+	if (sc->sc_mbuf == NULL) {
+		if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action,
+		    (void *)&sc->sc_statep.s)) == NULL) {
+			splx(s);
+			return (ENOMEM);
+		}
+		h = mtod(sc->sc_mbuf, struct pfsync_header *);
+	} else {
+		h = mtod(sc->sc_mbuf, struct pfsync_header *);
+		if (h->action != action) {
+			pfsync_sendout(sc);
+			if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action,
+			    (void *)&sc->sc_statep.s)) == NULL) {
+				splx(s);
+				return (ENOMEM);
+			}
+			h = mtod(sc->sc_mbuf, struct pfsync_header *);
+		} else {
+			/*
+			 * If it's an update, look in the packet to see if
+			 * we already have an update for the state.
+			 */
+			if (action == PFSYNC_ACT_UPD && sc->sc_maxupdates) {
+				struct pfsync_state *usp =
+				    (void *)((char *)h + PFSYNC_HDRLEN);
+
+				for (i = 0; i < h->count; i++) {
+					if (!memcmp(usp->id, &st->id,
+					    PFSYNC_ID_LEN) &&
+					    usp->creatorid == st->creatorid) {
+						sp = usp;
+						sp->updates++;
+						break;
+					}
+					usp++;
+				}
+			}
+		}
+	}
+
+	secs = time_second;
+
+	st->pfsync_time = time_uptime;
+
+	if (sp == NULL) {
+		/* not a "duplicate" update */
+		i = 255;
+		sp = sc->sc_statep.s++;
+		sc->sc_mbuf->m_pkthdr.len =
+		    sc->sc_mbuf->m_len += sizeof(struct pfsync_state);
+		h->count++;
+		bzero(sp, sizeof(*sp));
+
+		bcopy(&st->id, sp->id, sizeof(sp->id));
+		sp->creatorid = st->creatorid;
+
+		strlcpy(sp->ifname, st->u.s.kif->pfik_name, sizeof(sp->ifname));
+		pf_state_host_hton(&st->lan, &sp->lan);
+		pf_state_host_hton(&st->gwy, &sp->gwy);
+		pf_state_host_hton(&st->ext, &sp->ext);
+
+		bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
+
+		sp->creation = htonl(secs - st->creation);
+		pf_state_counter_hton(st->packets[0], sp->packets[0]);
+		pf_state_counter_hton(st->packets[1], sp->packets[1]);
+		pf_state_counter_hton(st->bytes[0], sp->bytes[0]);
+		pf_state_counter_hton(st->bytes[1], sp->bytes[1]);
+		if ((r = st->rule.ptr) == NULL)
+			sp->rule = htonl(-1);
+		else
+			sp->rule = htonl(r->nr);
+		if ((r = st->anchor.ptr) == NULL)
+			sp->anchor = htonl(-1);
+		else
+			sp->anchor = htonl(r->nr);
+		sp->af = st->af;
+		sp->proto = st->proto;
+		sp->direction = st->direction;
+		sp->log = st->log;
+		sp->state_flags = st->state_flags;
+		sp->timeout = st->timeout;
+
+		if (flags & PFSYNC_FLAG_STALE)
+			sp->sync_flags |= PFSTATE_STALE;
+	}
+
+	pf_state_peer_hton(&st->src, &sp->src);
+	pf_state_peer_hton(&st->dst, &sp->dst);
+
+	if (st->expire <= secs)
+		sp->expire = htonl(0);
+	else
+		sp->expire = htonl(st->expire - secs);
+
+	/* do we need to build "compressed" actions for network transfer? */
+	if (sc->sc_sync_ifp && flags & PFSYNC_FLAG_COMPRESS) {
+		switch (action) {
+		case PFSYNC_ACT_UPD:
+			newaction = PFSYNC_ACT_UPD_C;
+			break;
+		case PFSYNC_ACT_DEL:
+			newaction = PFSYNC_ACT_DEL_C;
+			break;
+		default:
+			/* by default we just send the uncompressed states */
+			break;
+		}
+	}
+
+	if (newaction) {
+		if (sc->sc_mbuf_net == NULL) {
+			if ((sc->sc_mbuf_net = pfsync_get_mbuf(sc, newaction,
+			    (void *)&sc->sc_statep_net.s)) == NULL) {
+				splx(s);
+				return (ENOMEM);
+			}
+		}
+		h_net = mtod(sc->sc_mbuf_net, struct pfsync_header *);
+
+		switch (newaction) {
+		case PFSYNC_ACT_UPD_C:
+			if (i != 255) {
+				up = (void *)((char *)h_net +
+				    PFSYNC_HDRLEN + (i * sizeof(*up)));
+				up->updates++;
+			} else {
+				h_net->count++;
+				sc->sc_mbuf_net->m_pkthdr.len =
+				    sc->sc_mbuf_net->m_len += sizeof(*up);
+				up = sc->sc_statep_net.u++;
+
+				bzero(up, sizeof(*up));
+				bcopy(&st->id, up->id, sizeof(up->id));
+				up->creatorid = st->creatorid;
+			}
+			up->timeout = st->timeout;
+			up->expire = sp->expire;
+			up->src = sp->src;
+			up->dst = sp->dst;
+			break;
+		case PFSYNC_ACT_DEL_C:
+			sc->sc_mbuf_net->m_pkthdr.len =
+			    sc->sc_mbuf_net->m_len += sizeof(*dp);
+			dp = sc->sc_statep_net.d++;
+			h_net->count++;
+
+			bzero(dp, sizeof(*dp));
+			bcopy(&st->id, dp->id, sizeof(dp->id));
+			dp->creatorid = st->creatorid;
+			break;
+		}
+	}
+
+	if (h->count == sc->sc_maxcount ||
+	    (sc->sc_maxupdates && (sp->updates >= sc->sc_maxupdates)))
+		ret = pfsync_sendout(sc);
+
+	splx(s);
+	return (ret);
+}
+
+/* This must be called in splnet() */
+int
+pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src)
+{
+	struct ifnet *ifp = NULL;
+	struct pfsync_header *h;
+	struct pfsync_softc *sc = pfsyncif;
+	struct pfsync_state_upd_req *rup;
+	int ret = 0;
+
+	if (sc == NULL)
+		return (0);
+
+#ifdef __FreeBSD__
+	ifp = sc->sc_ifp;
+#else
+	ifp = &sc->sc_if;
+#endif
+	if (sc->sc_mbuf == NULL) {
+		if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ,
+		    (void *)&sc->sc_statep.s)) == NULL)
+			return (ENOMEM);
+		h = mtod(sc->sc_mbuf, struct pfsync_header *);
+	} else {
+		h = mtod(sc->sc_mbuf, struct pfsync_header *);
+		if (h->action != PFSYNC_ACT_UREQ) {
+			pfsync_sendout(sc);
+			if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ,
+			    (void *)&sc->sc_statep.s)) == NULL)
+				return (ENOMEM);
+			h = mtod(sc->sc_mbuf, struct pfsync_header *);
+		}
+	}
+
+	if (src != NULL)
+		sc->sc_sendaddr = *src;
+	sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*rup);
+	h->count++;
+	rup = sc->sc_statep.r++;
+	bzero(rup, sizeof(*rup));
+	if (up != NULL) {
+		bcopy(up->id, rup->id, sizeof(rup->id));
+		rup->creatorid = up->creatorid;
+	}
+
+	if (h->count == sc->sc_maxcount)
+		ret = pfsync_sendout(sc);
+
+	return (ret);
+}
+
+int
+pfsync_clear_states(u_int32_t creatorid, char *ifname)
+{
+	struct ifnet *ifp = NULL;
+	struct pfsync_softc *sc = pfsyncif;
+	struct pfsync_state_clr *cp;
+	int s, ret;
+
+	if (sc == NULL)
+		return (0);
+
+#ifdef __FreeBSD__
+	ifp = sc->sc_ifp;
+#else
+	ifp = &sc->sc_if;
+#endif
+#ifdef __FreeBSD__
+	PF_ASSERT(MA_OWNED);
+#endif
+	s = splnet();
+	if (sc->sc_mbuf != NULL)
+		pfsync_sendout(sc);
+	if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR,
+	    (void *)&sc->sc_statep.c)) == NULL) {
+		splx(s);
+		return (ENOMEM);
+	}
+	sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*cp);
+	cp = sc->sc_statep.c;
+	cp->creatorid = creatorid;
+	if (ifname != NULL)
+		strlcpy(cp->ifname, ifname, IFNAMSIZ);
+
+	ret = (pfsync_sendout(sc));
+	splx(s);
+	return (ret);
+}
+
+void
+pfsync_timeout(void *v)
+{
+	struct pfsync_softc *sc = v;
+	int s;
+
+	s = splnet();
+#ifdef __FreeBSD__
+	PF_LOCK();
+#endif
+	pfsync_sendout(sc);
+#ifdef __FreeBSD__
+	PF_UNLOCK();
+#endif
+	splx(s);
+}
+
+#ifdef PFSYNC_TDB
+void
+pfsync_tdb_timeout(void *v)
+{
+	struct pfsync_softc *sc = v;
+	int s;
+
+	s = splnet();
+#ifdef __FreeBSD__
+	PF_LOCK();
+#endif
+	pfsync_tdb_sendout(sc);
+#ifdef __FreeBSD__
+	PF_UNLOCK();
+#endif
+	splx(s);
+}
+#endif
+
+/* This must be called in splnet() */
+void
+pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status)
+{
+	struct pfsync_state_bus *bus;
+
+#ifdef __FreeBSD__
+	PF_ASSERT(MA_OWNED);
+#endif
+	if (sc->sc_mbuf != NULL)
+		pfsync_sendout(sc);
+
+	if (pfsync_sync_ok &&
+	    (sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_BUS,
+	    (void *)&sc->sc_statep.b)) != NULL) {
+		sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*bus);
+		bus = sc->sc_statep.b;
+		bus->creatorid = pf_status.hostid;
+		bus->status = status;
+		bus->endtime = htonl(time_uptime - sc->sc_ureq_received);
+		pfsync_sendout(sc);
+	}
+}
+
+void
+pfsync_bulk_update(void *v)
+{
+	struct pfsync_softc *sc = v;
+	int s, i = 0;
+	struct pf_state *state;
+
+	s = splnet();
+#ifdef __FreeBSD__
+	PF_LOCK();
+#endif
+	if (sc->sc_mbuf != NULL)
+		pfsync_sendout(sc);
+
+	/*
+	 * Grab at most PFSYNC_BULKPACKETS worth of states which have not
+	 * been sent since the latest request was made.
+	 */
+	state = sc->sc_bulk_send_next;
+	if (state)
+		do {
+			/* send state update if syncable and not already sent */
+			if (!state->sync_flags
+			    && state->timeout < PFTM_MAX
+			    && state->pfsync_time <= sc->sc_ureq_received) {
+				pfsync_pack_state(PFSYNC_ACT_UPD, state, 0);
+				i++;
+			}
+
+			/* figure next state to send */
+			state = TAILQ_NEXT(state, u.s.entry_list);
+
+			/* wrap to start of list if we hit the end */
+			if (!state)
+				state = TAILQ_FIRST(&state_list);
+		} while (i < sc->sc_maxcount * PFSYNC_BULKPACKETS &&
+		    state != sc->sc_bulk_terminator);
+
+	if (!state || state == sc->sc_bulk_terminator) {
+		/* we're done */
+		pfsync_send_bus(sc, PFSYNC_BUS_END);
+		sc->sc_ureq_received = 0;
+		sc->sc_bulk_send_next = NULL;
+		sc->sc_bulk_terminator = NULL;
+		timeout_del(&sc->sc_bulk_tmo);
+		if (pf_status.debug >= PF_DEBUG_MISC)
+			printf("pfsync: bulk update complete\n");
+	} else {
+		/* look again for more in a bit */
+#ifdef __FreeBSD__
+		callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update,
+		    pfsyncif);
+#else
+		timeout_add(&sc->sc_bulk_tmo, 1);
+#endif
+		sc->sc_bulk_send_next = state;
+	}
+	if (sc->sc_mbuf != NULL)
+		pfsync_sendout(sc);
+	splx(s);
+#ifdef __FreeBSD__
+	PF_UNLOCK();
+#endif
+}
+
+void
+pfsync_bulkfail(void *v)
+{
+	struct pfsync_softc *sc = v;
+	int s, error;
+
+#ifdef __FreeBSD__
+	PF_LOCK();
+#endif
+	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
+		/* Try again in a bit */
+#ifdef __FreeBSD__
+		callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulkfail,
+		    pfsyncif);
+#else
+		timeout_add(&sc->sc_bulkfail_tmo, 5 * hz);
+#endif
+		s = splnet();
+		error = pfsync_request_update(NULL, NULL);
+		if (error == ENOMEM) {
+			if (pf_status.debug >= PF_DEBUG_MISC)
+				printf("pfsync: cannot allocate mbufs for "
+				    "bulk update\n");
+		} else
+			pfsync_sendout(sc);
+		splx(s);
+	} else {
+		/* Pretend like the transfer was ok */
+		sc->sc_ureq_sent = 0;
+		sc->sc_bulk_tries = 0;
+#if NCARP > 0
+		if (!pfsync_sync_ok)
+#ifdef __FreeBSD__
+#ifdef CARP_ADVANCED
+			carp_group_demote_adj(sc->sc_ifp, -1);
+#endif
+#else
+			carp_group_demote_adj(&sc->sc_if, -1);
+#endif
+#endif
+		pfsync_sync_ok = 1;
+		if (pf_status.debug >= PF_DEBUG_MISC)
+			printf("pfsync: failed to receive "
+			    "bulk update status\n");
+		timeout_del(&sc->sc_bulkfail_tmo);
+	}
+#ifdef __FreeBSD__
+	PF_UNLOCK();
+#endif
+}
+
+/* This must be called in splnet() */
+int
+pfsync_sendout(struct pfsync_softc *sc)
+{
+#if NBPFILTER > 0
+#ifdef __FreeBSD__
+	struct ifnet *ifp = sc->sc_ifp;
+#else
+	struct ifnet *ifp = &sc->sc_if;
+#endif
+#endif
+	struct mbuf *m;
+
+#ifdef __FreeBSD__
+	PF_ASSERT(MA_OWNED);
+#endif
+	timeout_del(&sc->sc_tmo);
+
+	if (sc->sc_mbuf == NULL)
+		return (0);
+	m = sc->sc_mbuf;
+	sc->sc_mbuf = NULL;
+	sc->sc_statep.s = NULL;
+
+#if NBPFILTER > 0
+	if (ifp->if_bpf)
+#ifdef __FreeBSD__
+		BPF_MTAP(ifp, m);
+#else
+		bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
+#endif
+#endif
+
+	if (sc->sc_mbuf_net) {
+		m_freem(m);
+		m = sc->sc_mbuf_net;
+		sc->sc_mbuf_net = NULL;
+		sc->sc_statep_net.s = NULL;
+	}
+
+	return pfsync_sendout_mbuf(sc, m);
+}
+
+#ifdef PFSYNC_TDB
+int
+pfsync_tdb_sendout(struct pfsync_softc *sc)
+{
+#if NBPFILTER > 0
+#ifdef __FreeBSD__
+	struct ifnet *ifp = sc->sc_ifp;
+#else
+	struct ifnet *ifp = &sc->sc_if;
+#endif
+#endif
+	struct mbuf *m;
+
+#ifdef __FreeBSD__
+	PF_ASSERT(MA_OWNED);
+#endif
+	timeout_del(&sc->sc_tdb_tmo);
+
+	if (sc->sc_mbuf_tdb == NULL)
+		return (0);
+	m = sc->sc_mbuf_tdb;
+	sc->sc_mbuf_tdb = NULL;
+	sc->sc_statep_tdb.t = NULL;
+
+#if NBPFILTER > 0
+	if (ifp->if_bpf)
+#ifdef __FreeBSD__
+		BPF_MTAP(ifp, m);
+#else
+		bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
+#endif
+#endif
+
+	return pfsync_sendout_mbuf(sc, m);
+}
+#endif
+
+int
+pfsync_sendout_mbuf(struct pfsync_softc *sc, struct mbuf *m)
+{
+	struct sockaddr sa;
+	struct ip *ip;
+
+#ifdef __FreeBSD__
+	PF_ASSERT(MA_OWNED);
+#endif
+	if (sc->sc_sync_ifp ||
+#ifdef __FreeBSD__
+	    sc->sc_sync_peer.s_addr != htonl(INADDR_PFSYNC_GROUP)) {
+#else
+	    sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) {
+#endif
+		M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
+		if (m == NULL) {
+			pfsyncstats.pfsyncs_onomem++;
+			return (0);
+		}
+		ip = mtod(m, struct ip *);
+		ip->ip_v = IPVERSION;
+		ip->ip_hl = sizeof(*ip) >> 2;
+		ip->ip_tos = IPTOS_LOWDELAY;
+#ifdef __FreeBSD__
+		ip->ip_len = m->m_pkthdr.len;
+#else
+		ip->ip_len = htons(m->m_pkthdr.len);
+#endif
+		ip->ip_id = htons(ip_randomid());
+#ifdef __FreeBSD__
+		ip->ip_off = IP_DF;
+#else
+		ip->ip_off = htons(IP_DF);
+#endif
+		ip->ip_ttl = PFSYNC_DFLTTL;
+		ip->ip_p = IPPROTO_PFSYNC;
+		ip->ip_sum = 0;
+
+		bzero(&sa, sizeof(sa));
+		ip->ip_src.s_addr = INADDR_ANY;
+
+#ifdef __FreeBSD__
+		if (sc->sc_sendaddr.s_addr == htonl(INADDR_PFSYNC_GROUP))
+#else
+		if (sc->sc_sendaddr.s_addr == INADDR_PFSYNC_GROUP)
+#endif
+			m->m_flags |= M_MCAST;
+		ip->ip_dst = sc->sc_sendaddr;
+		sc->sc_sendaddr.s_addr = sc->sc_sync_peer.s_addr;
+
+		pfsyncstats.pfsyncs_opackets++;
+
+#ifdef __FreeBSD__
+		if (!IF_HANDOFF(&sc->sc_ifq, m, NULL))
+			pfsyncstats.pfsyncs_oerrors++;
+		taskqueue_enqueue(taskqueue_thread, &pfsyncif->sc_send_task);
+#else
+		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))
+			pfsyncstats.pfsyncs_oerrors++;
+#endif
+	} else
+		m_freem(m);
+
+	return (0);
+}
+
+#ifdef PFSYNC_TDB
+/* Update an in-kernel tdb. Silently fail if no tdb is found. */
+void
+pfsync_update_net_tdb(struct pfsync_tdb *pt)
+{
+	struct tdb		*tdb;
+	int			 s;
+
+	/* check for invalid values */
+	if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
+	    (pt->dst.sa.sa_family != AF_INET &&
+	     pt->dst.sa.sa_family != AF_INET6))
+		goto bad;
+
+	s = spltdb();
+	tdb = gettdb(pt->spi, &pt->dst, pt->sproto);
+	if (tdb) {
+		pt->rpl = ntohl(pt->rpl);
+		pt->cur_bytes = betoh64(pt->cur_bytes);
+
+		/* Neither replay nor byte counter should ever decrease. */
+		if (pt->rpl < tdb->tdb_rpl ||
+		    pt->cur_bytes < tdb->tdb_cur_bytes) {
+			splx(s);
+			goto bad;
+		}
+
+		tdb->tdb_rpl = pt->rpl;
+		tdb->tdb_cur_bytes = pt->cur_bytes;
+	}
+	splx(s);
+	return;
+
+ bad:
+	if (pf_status.debug >= PF_DEBUG_MISC)
+		printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: "
+		    "invalid value\n");
+	pfsyncstats.pfsyncs_badstate++;
+	return;
+}
+
+/* One of our local tdbs have been updated, need to sync rpl with others */
+int
+pfsync_update_tdb(struct tdb *tdb, int output)
+{
+	struct ifnet *ifp = NULL;
+	struct pfsync_softc *sc = pfsyncif;
+	struct pfsync_header *h;
+	struct pfsync_tdb *pt = NULL;
+	int s, i, ret;
+
+	if (sc == NULL)
+		return (0);
+
+#ifdef __FreeBSD__
+	ifp = sc->sc_ifp;
+#else
+	ifp = &sc->sc_if;
+#endif
+	if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL &&
+#ifdef __FreeBSD__
+	    sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) {
+#else
+	    sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
+#endif
+		/* Don't leave any stale pfsync packets hanging around. */
+		if (sc->sc_mbuf_tdb != NULL) {
+			m_freem(sc->sc_mbuf_tdb);
+			sc->sc_mbuf_tdb = NULL;
+			sc->sc_statep_tdb.t = NULL;
+		}
+		return (0);
+	}
+
+#ifdef __FreeBSD__
+	PF_ASSERT(MA_OWNED);
+#endif
+	s = splnet();
+	if (sc->sc_mbuf_tdb == NULL) {
+		if ((sc->sc_mbuf_tdb = pfsync_get_mbuf(sc, PFSYNC_ACT_TDB_UPD,
+		    (void *)&sc->sc_statep_tdb.t)) == NULL) {
+			splx(s);
+			return (ENOMEM);
+		}
+		h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *);
+	} else {
+		h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *);
+		if (h->action != PFSYNC_ACT_TDB_UPD) {
+			/*
+			 * XXX will never happen as long as there's
+			 * only one "TDB action".
+			 */
+			pfsync_tdb_sendout(sc);
+			sc->sc_mbuf_tdb = pfsync_get_mbuf(sc,
+			    PFSYNC_ACT_TDB_UPD, (void *)&sc->sc_statep_tdb.t);
+			if (sc->sc_mbuf_tdb == NULL) {
+				splx(s);
+				return (ENOMEM);
+			}
+			h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *);
+		} else if (sc->sc_maxupdates) {
+			/*
+			 * If it's an update, look in the packet to see if
+			 * we already have an update for the state.
+			 */
+			struct pfsync_tdb *u =
+			    (void *)((char *)h + PFSYNC_HDRLEN);
+
+			for (i = 0; !pt && i < h->count; i++) {
+				if (tdb->tdb_spi == u->spi &&
+				    tdb->tdb_sproto == u->sproto &&
+			            !bcmp(&tdb->tdb_dst, &u->dst,
+				    SA_LEN(&u->dst.sa))) {
+					pt = u;
+					pt->updates++;
+				}
+				u++;
+			}
+		}
+	}
+
+	if (pt == NULL) {
+		/* not a "duplicate" update */
+		pt = sc->sc_statep_tdb.t++;
+		sc->sc_mbuf_tdb->m_pkthdr.len =
+		    sc->sc_mbuf_tdb->m_len += sizeof(struct pfsync_tdb);
+		h->count++;
+		bzero(pt, sizeof(*pt));
+
+		pt->spi = tdb->tdb_spi;
+		memcpy(&pt->dst, &tdb->tdb_dst, sizeof pt->dst);
+		pt->sproto = tdb->tdb_sproto;
+	}
+
+	/*
+	 * When a failover happens, the master's rpl is probably above
+	 * what we see here (we may be up to a second late), so
+	 * increase it a bit for outbound tdbs to manage most such
+	 * situations.
+	 *
+	 * For now, just add an offset that is likely to be larger
+	 * than the number of packets we can see in one second. The RFC
+	 * just says the next packet must have a higher seq value.
+	 *
+	 * XXX What is a good algorithm for this? We could use
+	 * a rate-determined increase, but to know it, we would have
+	 * to extend struct tdb.
+	 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb
+	 * will soon be replaced anyway. For now, just don't handle
+	 * this edge case.
+	 */
+#define RPL_INCR 16384
+	pt->rpl = htonl(tdb->tdb_rpl + (output ? RPL_INCR : 0));
+	pt->cur_bytes = htobe64(tdb->tdb_cur_bytes);
+
+	if (h->count == sc->sc_maxcount ||
+	    (sc->sc_maxupdates && (pt->updates >= sc->sc_maxupdates)))
+		ret = pfsync_tdb_sendout(sc);
+
+	splx(s);
+	return (ret);
+}
+#endif /* PFSYNC_TDB */
+
+#ifdef __FreeBSD__
+void
+pfsync_ifdetach(void *arg, struct ifnet *ifp)
+{
+	struct pfsync_softc *sc = (struct pfsync_softc *)arg;
+	struct ip_moptions *imo;
+
+	if (sc == NULL || sc->sc_sync_ifp != ifp)
+		return;         /* not for us; unlocked read */
+
+	PF_LOCK();
+
+	/* Deal with a member interface going away from under us. */
+	sc->sc_sync_ifp = NULL;
+	if (sc->sc_mbuf_net != NULL) {
+		m_freem(sc->sc_mbuf_net);
+		sc->sc_mbuf_net = NULL;
+		sc->sc_statep_net.s = NULL;
+	}
+	imo = &sc->sc_imo;
+	if (imo->imo_num_memberships > 0) {
+		KASSERT(imo->imo_num_memberships == 1,
+		    ("%s: imo_num_memberships != 1", __func__));
+		/*
+		 * Our event handler is always called after protocol
+		 * domains have been detached from the underlying ifnet.
+		 * Do not call in_delmulti(); we held a single reference
+		 * which the protocol domain has purged in in_purgemaddrs().
+		 */
+		PF_UNLOCK();
+		imo->imo_membership[--imo->imo_num_memberships] = NULL;
+		PF_LOCK();
+		imo->imo_multicast_ifp = NULL;
+	}
+
+	PF_UNLOCK();
+}
+
+void
+pfsync_senddef(void *arg, __unused int pending)
+{
+	struct pfsync_softc *sc = (struct pfsync_softc *)arg;
+	struct mbuf *m;
+
+	for(;;) {
+		IF_DEQUEUE(&sc->sc_ifq, m);
+		if (m == NULL)
+			break;
+		/* Deal with a member interface going away from under us. */
+		if (sc->sc_sync_ifp == NULL) {
+			pfsyncstats.pfsyncs_oerrors++;
+			m_freem(m);
+			continue;
+		}
+		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))
+			pfsyncstats.pfsyncs_oerrors++;
+	}
+}
+
+static int
+pfsync_modevent(module_t mod, int type, void *data)
+{
+	int error = 0;
+
+	switch (type) {
+	case MOD_LOAD:
+		pfsyncattach(0);
+		break;
+	case MOD_UNLOAD:
+		if_clone_detach(&pfsync_cloner);
+		break;
+	default:
+		error = EINVAL;
+		break;
+	}
+
+	return error;
+}
+
+static moduledata_t pfsync_mod = {
+	"pfsync",
+	pfsync_modevent,
+	0
+};
+
+#define PFSYNC_MODVER 1
+
+DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
+MODULE_VERSION(pfsync, PFSYNC_MODVER);
+MODULE_DEPEND(pflog, pf, PF_MODVER, PF_MODVER, PF_MODVER);
+#endif /* __FreeBSD__ */
diff --git a/freebsd/sys/contrib/pf/net/if_pfsync.h b/freebsd/sys/contrib/pf/net/if_pfsync.h
new file mode 100644
index 00000000..e3e6caf9
--- /dev/null
+++ b/freebsd/sys/contrib/pf/net/if_pfsync.h
@@ -0,0 +1,375 @@
+/*	$FreeBSD$	*/
+/*	$OpenBSD: if_pfsync.h,v 1.30 2006/10/31 14:49:01 henning Exp $	*/
+
+/*
+ * Copyright (c) 2001 Michael Shalayeff
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _NET_IF_PFSYNC_HH_
+#define _NET_IF_PFSYNC_HH_
+
+
+#define PFSYNC_ID_LEN	sizeof(u_int64_t)
+
+struct pfsync_state_scrub {
+	u_int16_t	pfss_flags;
+	u_int8_t	pfss_ttl;	/* stashed TTL		*/
+#define PFSYNC_SCRUB_FLAG_VALID 	0x01
+	u_int8_t	scrub_flag;
+	u_int32_t	pfss_ts_mod;	/* timestamp modulation	*/
+} __packed;
+
+struct pfsync_state_host {
+	struct pf_addr	addr;
+	u_int16_t	port;
+	u_int16_t	pad[3];
+} __packed;
+
+struct pfsync_state_peer {
+	struct pfsync_state_scrub scrub;	/* state is scrubbed	*/
+	u_int32_t	seqlo;		/* Max sequence number sent	*/
+	u_int32_t	seqhi;		/* Max the other end ACKd + win	*/
+	u_int32_t	seqdiff;	/* Sequence number modulator	*/
+	u_int16_t	max_win;	/* largest window (pre scaling)	*/
+	u_int16_t	mss;		/* Maximum segment size option	*/
+	u_int8_t	state;		/* active state level		*/
+	u_int8_t	wscale;		/* window scaling factor	*/
+	u_int8_t	pad[6];
+} __packed;
+
+struct pfsync_state {
+	u_int32_t	 id[2];
+	char		 ifname[IFNAMSIZ];
+	struct pfsync_state_host lan;
+	struct pfsync_state_host gwy;
+	struct pfsync_state_host ext;
+	struct pfsync_state_peer src;
+	struct pfsync_state_peer dst;
+	struct pf_addr	 rt_addr;
+	u_int32_t	 rule;
+	u_int32_t	 anchor;
+	u_int32_t	 nat_rule;
+	u_int32_t	 creation;
+	u_int32_t	 expire;
+	u_int32_t	 packets[2][2];
+	u_int32_t	 bytes[2][2];
+	u_int32_t	 creatorid;
+	sa_family_t	 af;
+	u_int8_t	 proto;
+	u_int8_t	 direction;
+	u_int8_t	 log;
+	u_int8_t	 state_flags;
+	u_int8_t	 timeout;
+	u_int8_t	 sync_flags;
+	u_int8_t	 updates;
+} __packed;
+
+#define PFSYNC_FLAG_COMPRESS 	0x01
+#define PFSYNC_FLAG_STALE	0x02
+
+#ifdef PFSYNC_TDB
+struct pfsync_tdb {
+	u_int32_t	spi;
+	union sockaddr_union dst;
+	u_int32_t	rpl;
+	u_int64_t	cur_bytes;
+	u_int8_t	sproto;
+	u_int8_t	updates;
+	u_int8_t	pad[2];
+} __packed;
+#endif
+
+struct pfsync_state_upd {
+	u_int32_t		id[2];
+	struct pfsync_state_peer	src;
+	struct pfsync_state_peer	dst;
+	u_int32_t		creatorid;
+	u_int32_t		expire;
+	u_int8_t		timeout;
+	u_int8_t		updates;
+	u_int8_t		pad[6];
+} __packed;
+
+struct pfsync_state_del {
+	u_int32_t		id[2];
+	u_int32_t		creatorid;
+	struct {
+		u_int8_t	state;
+	} src;
+	struct {
+		u_int8_t	state;
+	} dst;
+	u_int8_t		pad[2];
+} __packed;
+
+struct pfsync_state_upd_req {
+	u_int32_t		id[2];
+	u_int32_t		creatorid;
+	u_int32_t		pad;
+} __packed;
+
+struct pfsync_state_clr {
+	char			ifname[IFNAMSIZ];
+	u_int32_t		creatorid;
+	u_int32_t		pad;
+} __packed;
+
+struct pfsync_state_bus {
+	u_int32_t		creatorid;
+	u_int32_t		endtime;
+	u_int8_t		status;
+#define PFSYNC_BUS_START	1
+#define PFSYNC_BUS_END		2
+	u_int8_t		pad[7];
+} __packed;
+
+#ifdef _KERNEL
+
+union sc_statep {
+	struct pfsync_state	*s;
+	struct pfsync_state_upd	*u;
+	struct pfsync_state_del	*d;
+	struct pfsync_state_clr	*c;
+	struct pfsync_state_bus	*b;
+	struct pfsync_state_upd_req	*r;
+};
+
+#ifdef PFSYNC_TDB
+union sc_tdb_statep {
+	struct pfsync_tdb	*t;
+};
+#endif
+
+extern int	pfsync_sync_ok;
+
+struct pfsync_softc {
+#ifdef __FreeBSD__
+	struct ifnet		*sc_ifp;
+#else
+	struct ifnet		 sc_if;
+#endif
+	struct ifnet		*sc_sync_ifp;
+
+	struct ip_moptions	 sc_imo;
+#ifdef __FreeBSD__
+	struct callout		 sc_tmo;
+#ifdef PFSYNC_TDB
+	struct callout		 sc_tdb_tmo;
+#endif
+	struct callout		 sc_bulk_tmo;
+	struct callout		 sc_bulkfail_tmo;
+#else
+	struct timeout		 sc_tmo;
+	struct timeout		 sc_tdb_tmo;
+	struct timeout		 sc_bulk_tmo;
+	struct timeout		 sc_bulkfail_tmo;
+#endif
+	struct in_addr		 sc_sync_peer;
+	struct in_addr		 sc_sendaddr;
+	struct mbuf		*sc_mbuf;	/* current cumulative mbuf */
+	struct mbuf		*sc_mbuf_net;	/* current cumulative mbuf */
+#ifdef PFSYNC_TDB
+    	struct mbuf		*sc_mbuf_tdb;	/* dito for TDB updates */
+#endif
+#ifdef __FreeBSD__
+	struct ifqueue		 sc_ifq;
+	struct task		 sc_send_task;
+#endif
+	union sc_statep		 sc_statep;
+	union sc_statep		 sc_statep_net;
+#ifdef PFSYNC_TDB
+	union sc_tdb_statep	 sc_statep_tdb;
+#endif
+	u_int32_t		 sc_ureq_received;
+	u_int32_t		 sc_ureq_sent;
+	struct pf_state		*sc_bulk_send_next;
+	struct pf_state		*sc_bulk_terminator;
+	int			 sc_bulk_tries;
+	int			 sc_maxcount;	/* number of states in mtu */
+	int			 sc_maxupdates;	/* number of updates/state */
+#ifdef __FreeBSD__
+	eventhandler_tag	 sc_detachtag;
+#endif
+};
+
+extern struct pfsync_softc	*pfsyncif;
+#endif
+
+
+struct pfsync_header {
+	u_int8_t version;
+#define	PFSYNC_VERSION	3
+	u_int8_t af;
+	u_int8_t action;
+#define	PFSYNC_ACT_CLR		0	/* clear all states */
+#define	PFSYNC_ACT_INS		1	/* insert state */
+#define	PFSYNC_ACT_UPD		2	/* update state */
+#define	PFSYNC_ACT_DEL		3	/* delete state */
+#define	PFSYNC_ACT_UPD_C	4	/* "compressed" state update */
+#define	PFSYNC_ACT_DEL_C	5	/* "compressed" state delete */
+#define	PFSYNC_ACT_INS_F	6	/* insert fragment */
+#define	PFSYNC_ACT_DEL_F	7	/* delete fragments */
+#define	PFSYNC_ACT_UREQ		8	/* request "uncompressed" state */
+#define PFSYNC_ACT_BUS		9	/* Bulk Update Status */
+#define PFSYNC_ACT_TDB_UPD	10	/* TDB replay counter update */
+#define	PFSYNC_ACT_MAX		11
+	u_int8_t count;
+	u_int8_t pf_chksum[PF_MD5_DIGEST_LENGTH];
+} __packed;
+
+#define PFSYNC_BULKPACKETS	1	/* # of packets per timeout */
+#define PFSYNC_MAX_BULKTRIES	12
+#define PFSYNC_HDRLEN	sizeof(struct pfsync_header)
+#define	PFSYNC_ACTIONS \
+	"CLR ST", "INS ST", "UPD ST", "DEL ST", \
+	"UPD ST COMP", "DEL ST COMP", "INS FR", "DEL FR", \
+	"UPD REQ", "BLK UPD STAT", "TDB UPD"
+
+#define PFSYNC_DFLTTL		255
+
+struct pfsyncstats {
+	u_int64_t	pfsyncs_ipackets;	/* total input packets, IPv4 */
+	u_int64_t	pfsyncs_ipackets6;	/* total input packets, IPv6 */
+	u_int64_t	pfsyncs_badif;		/* not the right interface */
+	u_int64_t	pfsyncs_badttl;		/* TTL is not PFSYNC_DFLTTL */
+	u_int64_t	pfsyncs_hdrops;		/* packets shorter than hdr */
+	u_int64_t	pfsyncs_badver;		/* bad (incl unsupp) version */
+	u_int64_t	pfsyncs_badact;		/* bad action */
+	u_int64_t	pfsyncs_badlen;		/* data length does not match */
+	u_int64_t	pfsyncs_badauth;	/* bad authentication */
+	u_int64_t	pfsyncs_stale;		/* stale state */
+	u_int64_t	pfsyncs_badval;		/* bad values */
+	u_int64_t	pfsyncs_badstate;	/* insert/lookup failed */
+
+	u_int64_t	pfsyncs_opackets;	/* total output packets, IPv4 */
+	u_int64_t	pfsyncs_opackets6;	/* total output packets, IPv6 */
+	u_int64_t	pfsyncs_onomem;		/* no memory for an mbuf */
+	u_int64_t	pfsyncs_oerrors;	/* ip output error */
+};
+
+/*
+ * Configuration structure for SIOCSETPFSYNC SIOCGETPFSYNC
+ */
+struct pfsyncreq {
+	char		 pfsyncr_syncdev[IFNAMSIZ];
+	struct in_addr	 pfsyncr_syncpeer;
+	int		 pfsyncr_maxupdates;
+	int		 pfsyncr_authlevel;
+};
+
+#ifdef __FreeBSD__
+#define	SIOCSETPFSYNC	_IOW('i', 247, struct ifreq)
+#define	SIOCGETPFSYNC	_IOWR('i', 248, struct ifreq)
+#endif
+
+#define pf_state_peer_hton(s,d) do {		\
+	(d)->seqlo = htonl((s)->seqlo);		\
+	(d)->seqhi = htonl((s)->seqhi);		\
+	(d)->seqdiff = htonl((s)->seqdiff);	\
+	(d)->max_win = htons((s)->max_win);	\
+	(d)->mss = htons((s)->mss);		\
+	(d)->state = (s)->state;		\
+	(d)->wscale = (s)->wscale;		\
+	if ((s)->scrub) {						\
+		(d)->scrub.pfss_flags = 				\
+		    htons((s)->scrub->pfss_flags & PFSS_TIMESTAMP);	\
+		(d)->scrub.pfss_ttl = (s)->scrub->pfss_ttl;		\
+		(d)->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);\
+		(d)->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID;	\
+	}								\
+} while (0)
+
+#define pf_state_peer_ntoh(s,d) do {		\
+	(d)->seqlo = ntohl((s)->seqlo);		\
+	(d)->seqhi = ntohl((s)->seqhi);		\
+	(d)->seqdiff = ntohl((s)->seqdiff);	\
+	(d)->max_win = ntohs((s)->max_win);	\
+	(d)->mss = ntohs((s)->mss);		\
+	(d)->state = (s)->state;		\
+	(d)->wscale = (s)->wscale;		\
+	if ((s)->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID && 	\
+	    (d)->scrub != NULL) {					\
+		(d)->scrub->pfss_flags =				\
+		    ntohs((s)->scrub.pfss_flags) & PFSS_TIMESTAMP;	\
+		(d)->scrub->pfss_ttl = (s)->scrub.pfss_ttl;		\
+		(d)->scrub->pfss_ts_mod = ntohl((s)->scrub.pfss_ts_mod);\
+	}								\
+} while (0)
+
+#define pf_state_host_hton(s,d) do {				\
+	bcopy(&(s)->addr, &(d)->addr, sizeof((d)->addr));	\
+	(d)->port = (s)->port;					\
+} while (0)
+
+#define pf_state_host_ntoh(s,d) do {				\
+	bcopy(&(s)->addr, &(d)->addr, sizeof((d)->addr));	\
+	(d)->port = (s)->port;					\
+} while (0)
+
+#define pf_state_counter_hton(s,d) do {				\
+	d[0] = htonl((s>>32)&0xffffffff);			\
+	d[1] = htonl(s&0xffffffff);				\
+} while (0)
+
+#define pf_state_counter_ntoh(s,d) do {				\
+	d = ntohl(s[0]);					\
+	d = d<<32;						\
+	d += ntohl(s[1]);					\
+} while (0)
+
+#ifdef _KERNEL
+#ifdef __FreeBSD__
+void pfsync_input(struct mbuf *, __unused int);
+#else
+void pfsync_input(struct mbuf *, ...);
+#endif
+int pfsync_clear_states(u_int32_t, char *);
+int pfsync_pack_state(u_int8_t, struct pf_state *, int);
+#define pfsync_insert_state(st)	do {				\
+	if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) ||	\
+	    (st->proto == IPPROTO_PFSYNC))			\
+		st->sync_flags |= PFSTATE_NOSYNC;		\
+	else if (!st->sync_flags)				\
+		pfsync_pack_state(PFSYNC_ACT_INS, (st), 	\
+		    PFSYNC_FLAG_COMPRESS);			\
+	st->sync_flags &= ~PFSTATE_FROMSYNC;			\
+} while (0)
+#define pfsync_update_state(st) do {				\
+	if (!st->sync_flags)					\
+		pfsync_pack_state(PFSYNC_ACT_UPD, (st), 	\
+		    PFSYNC_FLAG_COMPRESS);			\
+	st->sync_flags &= ~PFSTATE_FROMSYNC;			\
+} while (0)
+#define pfsync_delete_state(st) do {				\
+	if (!st->sync_flags)					\
+		pfsync_pack_state(PFSYNC_ACT_DEL, (st),		\
+		    PFSYNC_FLAG_COMPRESS);			\
+} while (0)
+#ifdef PFSYNC_TDB
+int pfsync_update_tdb(struct tdb *, int);
+#endif
+#endif
+
+#endif /* _NET_IF_PFSYNC_HH_ */
diff --git a/freebsd/sys/contrib/pf/net/pf.c b/freebsd/sys/contrib/pf/net/pf.c
new file mode 100644
index 00000000..4ac395f1
--- /dev/null
+++ b/freebsd/sys/contrib/pf/net/pf.c
@@ -0,0 +1,7771 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*	$OpenBSD: pf.c,v 1.527 2007/02/22 15:23:23 pyr Exp $ */
+/* add:	$OpenBSD: pf.c,v 1.559 2007/09/18 18:45:59 markus Exp $ */
+
+/*
+ * Copyright (c) 2001 Daniel Hartmeier
+ * Copyright (c) 2002,2003 Henning Brauer
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    - Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    - Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Effort sponsored in part by the Defense Advanced Research Projects
+ * Agency (DARPA) and Air Force Research Laboratory, Air Force
+ * Materiel Command, USAF, under agreement number F30602-01-2-0537.
+ *
+ */
+
+#ifdef __FreeBSD__
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#endif
+
+#ifdef __FreeBSD__
+#include <freebsd/local/opt_bpf.h>
+#include <freebsd/local/opt_pf.h>
+
+#ifdef DEV_BPF
+#define	NBPFILTER	DEV_BPF
+#else
+#define	NBPFILTER	0
+#endif
+
+#ifdef DEV_PFLOG
+#define	NPFLOG		DEV_PFLOG
+#else
+#define	NPFLOG		0
+#endif
+
+#ifdef DEV_PFSYNC
+#define	NPFSYNC		DEV_PFSYNC
+#else
+#define	NPFSYNC		0
+#endif
+
+#else
+#include <freebsd/local/bpfilter.h>
+#include <freebsd/local/pflog.h>
+#include <freebsd/local/pfsync.h>
+#endif
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/filio.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/time.h>
+#ifdef __FreeBSD__
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/endian.h>
+#else
+#include <freebsd/sys/pool.h>
+#endif
+#include <freebsd/sys/proc.h>
+#ifdef __FreeBSD__
+#include <freebsd/sys/kthread.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/sx.h>
+#else
+#include <freebsd/sys/rwlock.h>
+#endif
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/bpf.h>
+#include <freebsd/net/route.h>
+#ifndef __FreeBSD__
+#include <freebsd/net/radix_mpath.h>
+#endif
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/tcp.h>
+#include <freebsd/netinet/tcp_seq.h>
+#include <freebsd/netinet/udp.h>
+#include <freebsd/netinet/ip_icmp.h>
+#include <freebsd/netinet/in_pcb.h>
+#include <freebsd/netinet/tcp_timer.h>
+#include <freebsd/netinet/tcp_var.h>
+#include <freebsd/netinet/udp_var.h>
+#include <freebsd/netinet/icmp_var.h>
+#include <freebsd/netinet/if_ether.h>
+
+#ifndef __FreeBSD__
+#include <freebsd/dev/rndvar.h>
+#endif
+#include <freebsd/net/pfvar.h>
+#include <freebsd/net/if_pflog.h>
+
+#if NPFSYNC > 0
+#include <freebsd/net/if_pfsync.h>
+#endif /* NPFSYNC > 0 */
+
+#ifdef INET6
+#include <freebsd/netinet/ip6.h>
+#include <freebsd/netinet/in_pcb.h>
+#include <freebsd/netinet/icmp6.h>
+#include <freebsd/netinet6/nd6.h>
+#ifdef __FreeBSD__
+#include <freebsd/netinet6/ip6_var.h>
+#include <freebsd/netinet6/in6_pcb.h>
+#endif
+#endif /* INET6 */
+
+#ifdef __FreeBSD__
+#include <freebsd/machine/in_cksum.h>
+#include <freebsd/sys/limits.h>
+#include <freebsd/sys/ucred.h>
+#include <freebsd/security/mac/mac_framework.h>
+
+extern int ip_optcopy(struct ip *, struct ip *);
+extern int debug_pfugidhack;
+#endif
+
+#define DPFPRINTF(n, x)	if (pf_status.debug >= (n)) printf x
+
+/*
+ * Global variables
+ */
+
+struct pf_altqqueue	 pf_altqs[2];
+struct pf_palist	 pf_pabuf;
+struct pf_altqqueue	*pf_altqs_active;
+struct pf_altqqueue	*pf_altqs_inactive;
+struct pf_status	 pf_status;
+
+u_int32_t		 ticket_altqs_active;
+u_int32_t		 ticket_altqs_inactive;
+int			 altqs_inactive_open;
+u_int32_t		 ticket_pabuf;
+
+struct pf_anchor_stackframe {
+	struct pf_ruleset			*rs;
+	struct pf_rule				*r;
+	struct pf_anchor_node			*parent;
+	struct pf_anchor			*child;
+} pf_anchor_stack[64];
+
+#ifdef __FreeBSD__
+uma_zone_t		 pf_src_tree_pl, pf_rule_pl;
+uma_zone_t		 pf_state_pl, pf_altq_pl, pf_pooladdr_pl;
+#else
+struct pool		 pf_src_tree_pl, pf_rule_pl;
+struct pool		 pf_state_pl, pf_altq_pl, pf_pooladdr_pl;
+#endif
+
+void			 pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
+
+void			 pf_init_threshold(struct pf_threshold *, u_int32_t,
+			    u_int32_t);
+void			 pf_add_threshold(struct pf_threshold *);
+int			 pf_check_threshold(struct pf_threshold *);
+
+void			 pf_change_ap(struct pf_addr *, u_int16_t *,
+			    u_int16_t *, u_int16_t *, struct pf_addr *,
+			    u_int16_t, u_int8_t, sa_family_t);
+int			 pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *,
+			    struct tcphdr *, struct pf_state_peer *);
+#ifdef INET6
+void			 pf_change_a6(struct pf_addr *, u_int16_t *,
+			    struct pf_addr *, u_int8_t);
+#endif /* INET6 */
+void			 pf_change_icmp(struct pf_addr *, u_int16_t *,
+			    struct pf_addr *, struct pf_addr *, u_int16_t,
+			    u_int16_t *, u_int16_t *, u_int16_t *,
+			    u_int16_t *, u_int8_t, sa_family_t);
+#ifdef __FreeBSD__
+void			 pf_send_tcp(struct mbuf *,
+			    const struct pf_rule *, sa_family_t,
+#else
+void			 pf_send_tcp(const struct pf_rule *, sa_family_t,
+#endif
+			    const struct pf_addr *, const struct pf_addr *,
+			    u_int16_t, u_int16_t, u_int32_t, u_int32_t,
+			    u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
+			    u_int16_t, struct ether_header *, struct ifnet *);
+void			 pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
+			    sa_family_t, struct pf_rule *);
+struct pf_rule		*pf_match_translation(struct pf_pdesc *, struct mbuf *,
+			    int, int, struct pfi_kif *,
+			    struct pf_addr *, u_int16_t, struct pf_addr *,
+			    u_int16_t, int);
+struct pf_rule		*pf_get_translation(struct pf_pdesc *, struct mbuf *,
+			    int, int, struct pfi_kif *, struct pf_src_node **,
+			    struct pf_addr *, u_int16_t,
+			    struct pf_addr *, u_int16_t,
+			    struct pf_addr *, u_int16_t *);
+int			 pf_test_tcp(struct pf_rule **, struct pf_state **,
+			    int, struct pfi_kif *, struct mbuf *, int,
+			    void *, struct pf_pdesc *, struct pf_rule **,
+#ifdef __FreeBSD__
+			    struct pf_ruleset **, struct ifqueue *,
+			    struct inpcb *);
+#else
+			    struct pf_ruleset **, struct ifqueue *);
+#endif
+int			 pf_test_udp(struct pf_rule **, struct pf_state **,
+			    int, struct pfi_kif *, struct mbuf *, int,
+			    void *, struct pf_pdesc *, struct pf_rule **,
+#ifdef __FreeBSD__
+			    struct pf_ruleset **, struct ifqueue *,
+			    struct inpcb *);
+#else
+			    struct pf_ruleset **, struct ifqueue *);
+#endif
+int			 pf_test_icmp(struct pf_rule **, struct pf_state **,
+			    int, struct pfi_kif *, struct mbuf *, int,
+			    void *, struct pf_pdesc *, struct pf_rule **,
+			    struct pf_ruleset **, struct ifqueue *);
+int			 pf_test_other(struct pf_rule **, struct pf_state **,
+			    int, struct pfi_kif *, struct mbuf *, int, void *,
+			    struct pf_pdesc *, struct pf_rule **,
+			    struct pf_ruleset **, struct ifqueue *);
+int			 pf_test_fragment(struct pf_rule **, int,
+			    struct pfi_kif *, struct mbuf *, void *,
+			    struct pf_pdesc *, struct pf_rule **,
+			    struct pf_ruleset **);
+int			 pf_tcp_track_full(struct pf_state_peer *,
+			    struct pf_state_peer *, struct pf_state **,
+			    struct pfi_kif *, struct mbuf *, int,
+			    struct pf_pdesc *, u_short *, int *);
+int			 pf_tcp_track_sloppy(struct pf_state_peer *,
+			    struct pf_state_peer *, struct pf_state **,
+			    struct pf_pdesc *, u_short *);
+int			 pf_test_state_tcp(struct pf_state **, int,
+			    struct pfi_kif *, struct mbuf *, int,
+			    void *, struct pf_pdesc *, u_short *);
+int			 pf_test_state_udp(struct pf_state **, int,
+			    struct pfi_kif *, struct mbuf *, int,
+			    void *, struct pf_pdesc *);
+int			 pf_test_state_icmp(struct pf_state **, int,
+			    struct pfi_kif *, struct mbuf *, int,
+			    void *, struct pf_pdesc *, u_short *);
+int			 pf_test_state_other(struct pf_state **, int,
+			    struct pfi_kif *, struct pf_pdesc *);
+int			 pf_match_tag(struct mbuf *, struct pf_rule *,
+			     struct pf_mtag *, int *);
+int			 pf_step_out_of_anchor(int *, struct pf_ruleset **,
+			     int, struct pf_rule **, struct pf_rule **,
+			     int *);
+void			 pf_hash(struct pf_addr *, struct pf_addr *,
+			    struct pf_poolhashkey *, sa_family_t);
+int			 pf_map_addr(u_int8_t, struct pf_rule *,
+			    struct pf_addr *, struct pf_addr *,
+			    struct pf_addr *, struct pf_src_node **);
+int			 pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *,
+			    struct pf_addr *, struct pf_addr *, u_int16_t,
+			    struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t,
+			    struct pf_src_node **);
+void			 pf_route(struct mbuf **, struct pf_rule *, int,
+			    struct ifnet *, struct pf_state *,
+			    struct pf_pdesc *);
+void			 pf_route6(struct mbuf **, struct pf_rule *, int,
+			    struct ifnet *, struct pf_state *,
+			    struct pf_pdesc *);
+#ifdef __FreeBSD__
+/* XXX: import */
+#else
+int			 pf_socket_lookup(int, struct pf_pdesc *);
+#endif
+u_int8_t		 pf_get_wscale(struct mbuf *, int, u_int16_t,
+			    sa_family_t);
+u_int16_t		 pf_get_mss(struct mbuf *, int, u_int16_t,
+			    sa_family_t);
+u_int16_t		 pf_calc_mss(struct pf_addr *, sa_family_t,
+				u_int16_t);
+void			 pf_set_rt_ifp(struct pf_state *,
+			    struct pf_addr *);
+int			 pf_check_proto_cksum(struct mbuf *, int, int,
+			    u_int8_t, sa_family_t);
+int			 pf_addr_wrap_neq(struct pf_addr_wrap *,
+			    struct pf_addr_wrap *);
+struct pf_state		*pf_find_state_recurse(struct pfi_kif *,
+			    struct pf_state_cmp *, u_int8_t);
+int			 pf_src_connlimit(struct pf_state **);
+int			 pf_check_congestion(struct ifqueue *);
+
+#ifdef __FreeBSD__
+int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
+
+extern int pf_end_threads;
+
+struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX];
+#else
+extern struct pool pfr_ktable_pl;
+extern struct pool pfr_kentry_pl;
+
+struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
+	{ &pf_state_pl, PFSTATE_HIWAT },
+	{ &pf_src_tree_pl, PFSNODE_HIWAT },
+	{ &pf_frent_pl, PFFRAG_FRENT_HIWAT },
+	{ &pfr_ktable_pl, PFR_KTABLE_HIWAT },
+	{ &pfr_kentry_pl, PFR_KENTRY_HIWAT }
+};
+#endif
+
+#define STATE_LOOKUP()							\
+	do {								\
+		if (direction == PF_IN)					\
+			*state = pf_find_state_recurse(			\
+			    kif, &key, PF_EXT_GWY);			\
+		else							\
+			*state = pf_find_state_recurse(			\
+			    kif, &key, PF_LAN_EXT);			\
+		if (*state == NULL || (*state)->timeout == PFTM_PURGE)	\
+			return (PF_DROP);				\
+		if (direction == PF_OUT &&				\
+		    (((*state)->rule.ptr->rt == PF_ROUTETO &&		\
+		    (*state)->rule.ptr->direction == PF_OUT) ||		\
+		    ((*state)->rule.ptr->rt == PF_REPLYTO &&		\
+		    (*state)->rule.ptr->direction == PF_IN)) &&		\
+		    (*state)->rt_kif != NULL &&				\
+		    (*state)->rt_kif != kif)				\
+			return (PF_PASS);				\
+	} while (0)
+
+#define	STATE_TRANSLATE(s) \
+	(s)->lan.addr.addr32[0] != (s)->gwy.addr.addr32[0] || \
+	((s)->af == AF_INET6 && \
+	((s)->lan.addr.addr32[1] != (s)->gwy.addr.addr32[1] || \
+	(s)->lan.addr.addr32[2] != (s)->gwy.addr.addr32[2] || \
+	(s)->lan.addr.addr32[3] != (s)->gwy.addr.addr32[3])) || \
+	(s)->lan.port != (s)->gwy.port
+
+#define BOUND_IFACE(r, k) \
+	((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all
+
+#define STATE_INC_COUNTERS(s)				\
+	do {						\
+		s->rule.ptr->states++;			\
+		if (s->anchor.ptr != NULL)		\
+			s->anchor.ptr->states++;	\
+		if (s->nat_rule.ptr != NULL)		\
+			s->nat_rule.ptr->states++;	\
+	} while (0)
+
+#define STATE_DEC_COUNTERS(s)				\
+	do {						\
+		if (s->nat_rule.ptr != NULL)		\
+			s->nat_rule.ptr->states--;	\
+		if (s->anchor.ptr != NULL)		\
+			s->anchor.ptr->states--;	\
+		s->rule.ptr->states--;			\
+	} while (0)
+
+struct pf_src_tree tree_src_tracking;
+
+struct pf_state_tree_id tree_id;
+struct pf_state_queue state_list;
+
+#ifdef __FreeBSD__
+static int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
+static int pf_state_compare_lan_ext(struct pf_state *, struct pf_state *);
+static int pf_state_compare_ext_gwy(struct pf_state *, struct pf_state *);
+static int pf_state_compare_id(struct pf_state *, struct pf_state *);
+#endif
+
+RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
+RB_GENERATE(pf_state_tree_lan_ext, pf_state,
+    u.s.entry_lan_ext, pf_state_compare_lan_ext);
+RB_GENERATE(pf_state_tree_ext_gwy, pf_state,
+    u.s.entry_ext_gwy, pf_state_compare_ext_gwy);
+RB_GENERATE(pf_state_tree_id, pf_state,
+    u.s.entry_id, pf_state_compare_id);
+
+#ifdef __FreeBSD__
+static int
+#else
+static __inline int
+#endif
+pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
+{
+	int	diff;
+
+	if (a->rule.ptr > b->rule.ptr)
+		return (1);
+	if (a->rule.ptr < b->rule.ptr)
+		return (-1);
+	if ((diff = a->af - b->af) != 0)
+		return (diff);
+	switch (a->af) {
+#ifdef INET
+	case AF_INET:
+		if (a->addr.addr32[0] > b->addr.addr32[0])
+			return (1);
+		if (a->addr.addr32[0] < b->addr.addr32[0])
+			return (-1);
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		if (a->addr.addr32[3] > b->addr.addr32[3])
+			return (1);
+		if (a->addr.addr32[3] < b->addr.addr32[3])
+			return (-1);
+		if (a->addr.addr32[2] > b->addr.addr32[2])
+			return (1);
+		if (a->addr.addr32[2] < b->addr.addr32[2])
+			return (-1);
+		if (a->addr.addr32[1] > b->addr.addr32[1])
+			return (1);
+		if (a->addr.addr32[1] < b->addr.addr32[1])
+			return (-1);
+		if (a->addr.addr32[0] > b->addr.addr32[0])
+			return (1);
+		if (a->addr.addr32[0] < b->addr.addr32[0])
+			return (-1);
+		break;
+#endif /* INET6 */
+	}
+	return (0);
+}
+
+#ifdef __FreeBSD__
+static int
+#else
+static __inline int
+#endif
+pf_state_compare_lan_ext(struct pf_state *a, struct pf_state *b)
+{
+	int	diff;
+
+	if ((diff = a->proto - b->proto) != 0)
+		return (diff);
+	if ((diff = a->af - b->af) != 0)
+		return (diff);
+	switch (a->af) {
+#ifdef INET
+	case AF_INET:
+		if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
+			return (1);
+		if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
+			return (-1);
+		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
+			return (1);
+		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
+			return (-1);
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		if (a->lan.addr.addr32[3] > b->lan.addr.addr32[3])
+			return (1);
+		if (a->lan.addr.addr32[3] < b->lan.addr.addr32[3])
+			return (-1);
+		if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
+			return (1);
+		if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
+			return (-1);
+		if (a->lan.addr.addr32[2] > b->lan.addr.addr32[2])
+			return (1);
+		if (a->lan.addr.addr32[2] < b->lan.addr.addr32[2])
+			return (-1);
+		if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
+			return (1);
+		if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
+			return (-1);
+		if (a->lan.addr.addr32[1] > b->lan.addr.addr32[1])
+			return (1);
+		if (a->lan.addr.addr32[1] < b->lan.addr.addr32[1])
+			return (-1);
+		if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
+			return (1);
+		if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
+			return (-1);
+		if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
+			return (1);
+		if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
+			return (-1);
+		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
+			return (1);
+		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
+			return (-1);
+		break;
+#endif /* INET6 */
+	}
+
+	if ((diff = a->lan.port - b->lan.port) != 0)
+		return (diff);
+	if ((diff = a->ext.port - b->ext.port) != 0)
+		return (diff);
+
+	return (0);
+}
+
+#ifdef __FreeBSD__
+static int
+#else
+static __inline int
+#endif
+pf_state_compare_ext_gwy(struct pf_state *a, struct pf_state *b)
+{
+	int	diff;
+
+	if ((diff = a->proto - b->proto) != 0)
+		return (diff);
+	if ((diff = a->af - b->af) != 0)
+		return (diff);
+	switch (a->af) {
+#ifdef INET
+	case AF_INET:
+		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
+			return (1);
+		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
+			return (-1);
+		if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
+			return (1);
+		if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
+			return (-1);
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
+			return (1);
+		if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
+			return (-1);
+		if (a->gwy.addr.addr32[3] > b->gwy.addr.addr32[3])
+			return (1);
+		if (a->gwy.addr.addr32[3] < b->gwy.addr.addr32[3])
+			return (-1);
+		if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
+			return (1);
+		if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
+			return (-1);
+		if (a->gwy.addr.addr32[2] > b->gwy.addr.addr32[2])
+			return (1);
+		if (a->gwy.addr.addr32[2] < b->gwy.addr.addr32[2])
+			return (-1);
+		if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
+			return (1);
+		if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
+			return (-1);
+		if (a->gwy.addr.addr32[1] > b->gwy.addr.addr32[1])
+			return (1);
+		if (a->gwy.addr.addr32[1] < b->gwy.addr.addr32[1])
+			return (-1);
+		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
+			return (1);
+		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
+			return (-1);
+		if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
+			return (1);
+		if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
+			return (-1);
+		break;
+#endif /* INET6 */
+	}
+
+	if ((diff = a->ext.port - b->ext.port) != 0)
+		return (diff);
+	if ((diff = a->gwy.port - b->gwy.port) != 0)
+		return (diff);
+
+	return (0);
+}
+
+#ifdef __FreeBSD__
+static int
+#else
+static __inline int
+#endif
+pf_state_compare_id(struct pf_state *a, struct pf_state *b)
+{
+	if (a->id > b->id)
+		return (1);
+	if (a->id < b->id)
+		return (-1);
+	if (a->creatorid > b->creatorid)
+		return (1);
+	if (a->creatorid < b->creatorid)
+		return (-1);
+
+	return (0);
+}
+
+#ifdef INET6
+void
+pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
+{
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		dst->addr32[0] = src->addr32[0];
+		break;
+#endif /* INET */
+	case AF_INET6:
+		dst->addr32[0] = src->addr32[0];
+		dst->addr32[1] = src->addr32[1];
+		dst->addr32[2] = src->addr32[2];
+		dst->addr32[3] = src->addr32[3];
+		break;
+	}
+}
+#endif /* INET6 */
+
+struct pf_state *
+pf_find_state_byid(struct pf_state_cmp *key)
+{
+	pf_status.fcounters[FCNT_STATE_SEARCH]++;
+	return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key));
+}
+
+struct pf_state *
+pf_find_state_recurse(struct pfi_kif *kif, struct pf_state_cmp *key, u_int8_t tree)
+{
+	struct pf_state *s;
+
+	pf_status.fcounters[FCNT_STATE_SEARCH]++;
+
+	switch (tree) {
+	case PF_LAN_EXT:
+		if ((s = RB_FIND(pf_state_tree_lan_ext, &kif->pfik_lan_ext,
+		    (struct pf_state *)key)) != NULL)
+			return (s);
+		if ((s = RB_FIND(pf_state_tree_lan_ext, &pfi_all->pfik_lan_ext,
+		    (struct pf_state *)key)) != NULL)
+			return (s);
+		return (NULL);
+	case PF_EXT_GWY:
+		if ((s = RB_FIND(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy,
+		    (struct pf_state *)key)) != NULL)
+			return (s);
+		if ((s = RB_FIND(pf_state_tree_ext_gwy, &pfi_all->pfik_ext_gwy,
+		    (struct pf_state *)key)) != NULL)
+			return (s);
+		return (NULL);
+	default:
+		panic("pf_find_state_recurse");
+	}
+}
+
+struct pf_state *
+pf_find_state_all(struct pf_state_cmp *key, u_int8_t tree, int *more)
+{
+	struct pf_state *s, *ss = NULL;
+	struct pfi_kif	*kif;
+
+	pf_status.fcounters[FCNT_STATE_SEARCH]++;
+
+	switch (tree) {
+	case PF_LAN_EXT:
+		TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) {
+			s = RB_FIND(pf_state_tree_lan_ext,
+			    &kif->pfik_lan_ext, (struct pf_state *)key);
+			if (s == NULL)
+				continue;
+			if (more == NULL)
+				return (s);
+			ss = s;
+			(*more)++;
+		}
+		return (ss);
+	case PF_EXT_GWY:
+		TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) {
+			s = RB_FIND(pf_state_tree_ext_gwy,
+			    &kif->pfik_ext_gwy, (struct pf_state *)key);
+			if (s == NULL)
+				continue;
+			if (more == NULL)
+				return (s);
+			ss = s;
+			(*more)++;
+		}
+		return (ss);
+	default:
+		panic("pf_find_state_all");
+	}
+}
+
+void
+pf_init_threshold(struct pf_threshold *threshold,
+    u_int32_t limit, u_int32_t seconds)
+{
+	threshold->limit = limit * PF_THRESHOLD_MULT;
+	threshold->seconds = seconds;
+	threshold->count = 0;
+	threshold->last = time_second;
+}
+
+void
+pf_add_threshold(struct pf_threshold *threshold)
+{
+	u_int32_t t = time_second, diff = t - threshold->last;
+
+	if (diff >= threshold->seconds)
+		threshold->count = 0;
+	else
+		threshold->count -= threshold->count * diff /
+		    threshold->seconds;
+	threshold->count += PF_THRESHOLD_MULT;
+	threshold->last = t;
+}
+
+int
+pf_check_threshold(struct pf_threshold *threshold)
+{
+	return (threshold->count > threshold->limit);
+}
+
+int
+pf_src_connlimit(struct pf_state **state)
+{
+	struct pf_state	*s;
+	int bad = 0;
+
+	(*state)->src_node->conn++;
+	(*state)->src.tcp_est = 1;
+	pf_add_threshold(&(*state)->src_node->conn_rate);
+
+	if ((*state)->rule.ptr->max_src_conn &&
+	    (*state)->rule.ptr->max_src_conn <
+	    (*state)->src_node->conn) {
+		pf_status.lcounters[LCNT_SRCCONN]++;
+		bad++;
+	}
+
+	if ((*state)->rule.ptr->max_src_conn_rate.limit &&
+	    pf_check_threshold(&(*state)->src_node->conn_rate)) {
+		pf_status.lcounters[LCNT_SRCCONNRATE]++;
+		bad++;
+	}
+
+	if (!bad)
+		return (0);
+
+	if ((*state)->rule.ptr->overload_tbl) {
+		struct pfr_addr p;
+		u_int32_t	killed = 0;
+
+		pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
+		if (pf_status.debug >= PF_DEBUG_MISC) {
+			printf("pf_src_connlimit: blocking address ");
+			pf_print_host(&(*state)->src_node->addr, 0,
+			    (*state)->af);
+		}
+
+		bzero(&p, sizeof(p));
+		p.pfra_af = (*state)->af;
+		switch ((*state)->af) {
+#ifdef INET
+		case AF_INET:
+			p.pfra_net = 32;
+			p.pfra_ip4addr = (*state)->src_node->addr.v4;
+			break;
+#endif /* INET */
+#ifdef INET6
+		case AF_INET6:
+			p.pfra_net = 128;
+			p.pfra_ip6addr = (*state)->src_node->addr.v6;
+			break;
+#endif /* INET6 */
+		}
+
+		pfr_insert_kentry((*state)->rule.ptr->overload_tbl,
+		    &p, time_second);
+
+		/* kill existing states if that's required. */
+		if ((*state)->rule.ptr->flush) {
+			pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
+
+			RB_FOREACH(s, pf_state_tree_id, &tree_id) {
+				/*
+				 * Kill states from this source.  (Only those
+				 * from the same rule if PF_FLUSH_GLOBAL is not
+				 * set)
+				 */
+				if (s->af == (*state)->af &&
+				    (((*state)->direction == PF_OUT &&
+				    PF_AEQ(&(*state)->src_node->addr,
+				    &s->lan.addr, s->af)) ||
+				    ((*state)->direction == PF_IN &&
+				    PF_AEQ(&(*state)->src_node->addr,
+				    &s->ext.addr, s->af))) &&
+				    ((*state)->rule.ptr->flush &
+				    PF_FLUSH_GLOBAL ||
+				    (*state)->rule.ptr == s->rule.ptr)) {
+					s->timeout = PFTM_PURGE;
+					s->src.state = s->dst.state =
+					    TCPS_CLOSED;
+					killed++;
+				}
+			}
+			if (pf_status.debug >= PF_DEBUG_MISC)
+				printf(", %u states killed", killed);
+		}
+		if (pf_status.debug >= PF_DEBUG_MISC)
+			printf("\n");
+	}
+
+	/* kill this state */
+	(*state)->timeout = PFTM_PURGE;
+	(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
+	return (1);
+}
+
+int
+pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
+    struct pf_addr *src, sa_family_t af)
+{
+	struct pf_src_node	k;
+
+	if (*sn == NULL) {
+		k.af = af;
+		PF_ACPY(&k.addr, src, af);
+		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
+		    rule->rpool.opts & PF_POOL_STICKYADDR)
+			k.rule.ptr = rule;
+		else
+			k.rule.ptr = NULL;
+		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
+		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
+	}
+	if (*sn == NULL) {
+		if (!rule->max_src_nodes ||
+		    rule->src_nodes < rule->max_src_nodes)
+			(*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT);
+		else
+			pf_status.lcounters[LCNT_SRCNODES]++;
+		if ((*sn) == NULL)
+			return (-1);
+		bzero(*sn, sizeof(struct pf_src_node));
+
+		pf_init_threshold(&(*sn)->conn_rate,
+		    rule->max_src_conn_rate.limit,
+		    rule->max_src_conn_rate.seconds);
+
+		(*sn)->af = af;
+		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
+		    rule->rpool.opts & PF_POOL_STICKYADDR)
+			(*sn)->rule.ptr = rule;
+		else
+			(*sn)->rule.ptr = NULL;
+		PF_ACPY(&(*sn)->addr, src, af);
+		if (RB_INSERT(pf_src_tree,
+		    &tree_src_tracking, *sn) != NULL) {
+			if (pf_status.debug >= PF_DEBUG_MISC) {
+				printf("pf: src_tree insert failed: ");
+				pf_print_host(&(*sn)->addr, 0, af);
+				printf("\n");
+			}
+			pool_put(&pf_src_tree_pl, *sn);
+			return (-1);
+		}
+		(*sn)->creation = time_second;
+		(*sn)->ruletype = rule->action;
+		if ((*sn)->rule.ptr != NULL)
+			(*sn)->rule.ptr->src_nodes++;
+		pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
+		pf_status.src_nodes++;
+	} else {
+		if (rule->max_src_states &&
+		    (*sn)->states >= rule->max_src_states) {
+			pf_status.lcounters[LCNT_SRCSTATES]++;
+			return (-1);
+		}
+	}
+	return (0);
+}
+
+int
+pf_insert_state(struct pfi_kif *kif, struct pf_state *state)
+{
+	/* Thou MUST NOT insert multiple duplicate keys */
+	state->u.s.kif = kif;
+	if (RB_INSERT(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state)) {
+		if (pf_status.debug >= PF_DEBUG_MISC) {
+			printf("pf: state insert failed: tree_lan_ext");
+			printf(" lan: ");
+			pf_print_host(&state->lan.addr, state->lan.port,
+			    state->af);
+			printf(" gwy: ");
+			pf_print_host(&state->gwy.addr, state->gwy.port,
+			    state->af);
+			printf(" ext: ");
+			pf_print_host(&state->ext.addr, state->ext.port,
+			    state->af);
+			if (state->sync_flags & PFSTATE_FROMSYNC)
+				printf(" (from sync)");
+			printf("\n");
+		}
+		return (-1);
+	}
+
+	if (RB_INSERT(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state)) {
+		if (pf_status.debug >= PF_DEBUG_MISC) {
+			printf("pf: state insert failed: tree_ext_gwy");
+			printf(" lan: ");
+			pf_print_host(&state->lan.addr, state->lan.port,
+			    state->af);
+			printf(" gwy: ");
+			pf_print_host(&state->gwy.addr, state->gwy.port,
+			    state->af);
+			printf(" ext: ");
+			pf_print_host(&state->ext.addr, state->ext.port,
+			    state->af);
+			if (state->sync_flags & PFSTATE_FROMSYNC)
+				printf(" (from sync)");
+			printf("\n");
+		}
+		RB_REMOVE(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state);
+		return (-1);
+	}
+
+	if (state->id == 0 && state->creatorid == 0) {
+		state->id = htobe64(pf_status.stateid++);
+		state->creatorid = pf_status.hostid;
+	}
+	if (RB_INSERT(pf_state_tree_id, &tree_id, state) != NULL) {
+		if (pf_status.debug >= PF_DEBUG_MISC) {
+#ifdef __FreeBSD__
+			printf("pf: state insert failed: "
+			    "id: %016llx creatorid: %08x",
+			    (long long)be64toh(state->id),
+			    ntohl(state->creatorid));
+#else
+			printf("pf: state insert failed: "
+			    "id: %016llx creatorid: %08x",
+			    betoh64(state->id), ntohl(state->creatorid));
+#endif
+			if (state->sync_flags & PFSTATE_FROMSYNC)
+				printf(" (from sync)");
+			printf("\n");
+		}
+		RB_REMOVE(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state);
+		RB_REMOVE(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state);
+		return (-1);
+	}
+	TAILQ_INSERT_TAIL(&state_list, state, u.s.entry_list);
+	pf_status.fcounters[FCNT_STATE_INSERT]++;
+	pf_status.states++;
+	pfi_kif_ref(kif, PFI_KIF_REF_STATE);
+#if NPFSYNC
+	pfsync_insert_state(state);
+#endif
+	return (0);
+}
+
+void
+pf_purge_thread(void *v)
+{
+	int nloops = 0, s;
+#ifdef __FreeBSD__
+	int locked;
+#endif
+
+	for (;;) {
+		tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz);
+
+#ifdef __FreeBSD__
+		sx_slock(&pf_consistency_lock);
+		PF_LOCK();
+		locked = 0;
+
+		if (pf_end_threads) {
+			PF_UNLOCK();
+			sx_sunlock(&pf_consistency_lock);
+			sx_xlock(&pf_consistency_lock);
+			PF_LOCK();
+			pf_purge_expired_states(pf_status.states, 1);
+			pf_purge_expired_fragments();
+			pf_purge_expired_src_nodes(1);
+			pf_end_threads++;
+
+			sx_xunlock(&pf_consistency_lock);
+			PF_UNLOCK();
+			wakeup(pf_purge_thread);
+			kproc_exit(0);
+		}
+#endif
+		s = splsoftnet();
+
+		/* process a fraction of the state table every second */
+#ifdef __FreeBSD__
+		if(!pf_purge_expired_states(1 + (pf_status.states
+		    / pf_default_rule.timeout[PFTM_INTERVAL]), 0)) {
+			PF_UNLOCK();
+			sx_sunlock(&pf_consistency_lock);
+			sx_xlock(&pf_consistency_lock);
+			PF_LOCK();
+			locked = 1;
+
+			pf_purge_expired_states(1 + (pf_status.states
+			    / pf_default_rule.timeout[PFTM_INTERVAL]), 1);
+		}
+#else
+		pf_purge_expired_states(1 + (pf_status.states
+		    / pf_default_rule.timeout[PFTM_INTERVAL]));
+#endif
+
+		/* purge other expired types every PFTM_INTERVAL seconds */
+		if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
+			pf_purge_expired_fragments();
+			if (!pf_purge_expired_src_nodes(locked)) {
+				PF_UNLOCK();
+				sx_sunlock(&pf_consistency_lock);
+				sx_xlock(&pf_consistency_lock);
+				PF_LOCK();
+				locked = 1;
+				pf_purge_expired_src_nodes(1);
+			}
+			nloops = 0;
+		}
+
+		splx(s);
+#ifdef __FreeBSD__
+		PF_UNLOCK();
+		if (locked)
+			sx_xunlock(&pf_consistency_lock);
+		else
+			sx_sunlock(&pf_consistency_lock);
+#endif
+	}
+}
+
+u_int32_t
+pf_state_expires(const struct pf_state *state)
+{
+	u_int32_t	timeout;
+	u_int32_t	start;
+	u_int32_t	end;
+	u_int32_t	states;
+
+	/* handle all PFTM_* > PFTM_MAX here */
+	if (state->timeout == PFTM_PURGE)
+		return (time_second);
+	if (state->timeout == PFTM_UNTIL_PACKET)
+		return (0);
+#ifdef __FreeBSD__	
+	KASSERT(state->timeout != PFTM_UNLINKED,
+	    ("pf_state_expires: timeout == PFTM_UNLINKED"));
+	KASSERT((state->timeout < PFTM_MAX), 
+	    ("pf_state_expires: timeout > PFTM_MAX"));
+#else
+	KASSERT(state->timeout != PFTM_UNLINKED);
+	KASSERT(state->timeout < PFTM_MAX);
+#endif
+	timeout = state->rule.ptr->timeout[state->timeout];
+	if (!timeout)
+		timeout = pf_default_rule.timeout[state->timeout];
+	start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
+	if (start) {
+		end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
+		states = state->rule.ptr->states;
+	} else {
+		start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
+		end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
+		states = pf_status.states;
+	}
+	if (end && states > start && start < end) {
+		if (states < end)
+			return (state->expire + timeout * (end - states) /
+			    (end - start));
+		else
+			return (time_second);
+	}
+	return (state->expire + timeout);
+}
+
+#ifdef __FreeBSD__
+int
+pf_purge_expired_src_nodes(int waslocked)
+#else
+void
+pf_purge_expired_src_nodes(int waslocked)
+#endif
+{
+	 struct pf_src_node		*cur, *next;
+	 int				 locked = waslocked;
+
+	 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
+		 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
+
+		 if (cur->states <= 0 && cur->expire <= time_second) {
+			 if (! locked) {
+#ifdef __FreeBSD__
+				 if (!sx_try_upgrade(&pf_consistency_lock))
+				 	return (0);
+#else
+				 rw_enter_write(&pf_consistency_lock);
+#endif
+			 	 next = RB_NEXT(pf_src_tree,
+				     &tree_src_tracking, cur);
+				 locked = 1;
+			 }
+			 if (cur->rule.ptr != NULL) {
+				 cur->rule.ptr->src_nodes--;
+				 if (cur->rule.ptr->states <= 0 &&
+				     cur->rule.ptr->max_src_nodes <= 0)
+					 pf_rm_rule(NULL, cur->rule.ptr);
+			 }
+			 RB_REMOVE(pf_src_tree, &tree_src_tracking, cur);
+			 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
+			 pf_status.src_nodes--;
+			 pool_put(&pf_src_tree_pl, cur);
+		 }
+	 }
+
+	 if (locked && !waslocked)
+#ifdef __FreeBSD__
+		sx_downgrade(&pf_consistency_lock);
+#else
+		rw_exit_write(&pf_consistency_lock);
+#endif
+
+#ifdef __FreeBSD__
+	return (1);
+#endif
+}
+
+void
+pf_src_tree_remove_state(struct pf_state *s)
+{
+	u_int32_t timeout;
+
+	if (s->src_node != NULL) {
+		if (s->proto == IPPROTO_TCP) {
+			if (s->src.tcp_est)
+				--s->src_node->conn;
+		}
+		if (--s->src_node->states <= 0) {
+			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
+			if (!timeout)
+				timeout =
+				    pf_default_rule.timeout[PFTM_SRC_NODE];
+			s->src_node->expire = time_second + timeout;
+		}
+	}
+	if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
+		if (--s->nat_src_node->states <= 0) {
+			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
+			if (!timeout)
+				timeout =
+				    pf_default_rule.timeout[PFTM_SRC_NODE];
+			s->nat_src_node->expire = time_second + timeout;
+		}
+	}
+	s->src_node = s->nat_src_node = NULL;
+}
+
+/* callers should be at splsoftnet */
+void
+pf_unlink_state(struct pf_state *cur)
+{
+#ifdef __FreeBSD__
+	if (cur->local_flags & PFSTATE_EXPIRING)
+		return;
+	cur->local_flags |= PFSTATE_EXPIRING;
+#endif
+	if (cur->src.state == PF_TCPS_PROXY_DST) {
+#ifdef __FreeBSD__
+		pf_send_tcp(NULL, cur->rule.ptr, cur->af,
+#else
+		pf_send_tcp(cur->rule.ptr, cur->af,
+#endif
+		    &cur->ext.addr, &cur->lan.addr,
+		    cur->ext.port, cur->lan.port,
+		    cur->src.seqhi, cur->src.seqlo + 1,
+		    TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL);
+	}
+	RB_REMOVE(pf_state_tree_ext_gwy,
+	    &cur->u.s.kif->pfik_ext_gwy, cur);
+	RB_REMOVE(pf_state_tree_lan_ext,
+	    &cur->u.s.kif->pfik_lan_ext, cur);
+	RB_REMOVE(pf_state_tree_id, &tree_id, cur);
+#if NPFSYNC
+	if (cur->creatorid == pf_status.hostid)
+		pfsync_delete_state(cur);
+#endif
+	cur->timeout = PFTM_UNLINKED;
+	pf_src_tree_remove_state(cur);
+}
+
+/* callers should be at splsoftnet and hold the
+ * write_lock on pf_consistency_lock */
+void
+pf_free_state(struct pf_state *cur)
+{
+#if NPFSYNC
+	if (pfsyncif != NULL &&
+	    (pfsyncif->sc_bulk_send_next == cur ||
+	    pfsyncif->sc_bulk_terminator == cur))
+		return;
+#endif
+#ifdef __FreeBSD__
+	KASSERT(cur->timeout == PFTM_UNLINKED,
+	    ("pf_free_state: cur->timeout != PFTM_UNLINKED"));
+#else
+	KASSERT(cur->timeout == PFTM_UNLINKED);
+#endif
+	if (--cur->rule.ptr->states <= 0 &&
+	    cur->rule.ptr->src_nodes <= 0)
+		pf_rm_rule(NULL, cur->rule.ptr);
+	if (cur->nat_rule.ptr != NULL)
+		if (--cur->nat_rule.ptr->states <= 0 &&
+			cur->nat_rule.ptr->src_nodes <= 0)
+			pf_rm_rule(NULL, cur->nat_rule.ptr);
+	if (cur->anchor.ptr != NULL)
+		if (--cur->anchor.ptr->states <= 0)
+			pf_rm_rule(NULL, cur->anchor.ptr);
+	pf_normalize_tcp_cleanup(cur);
+	pfi_kif_unref(cur->u.s.kif, PFI_KIF_REF_STATE);
+	TAILQ_REMOVE(&state_list, cur, u.s.entry_list);
+	if (cur->tag)
+		pf_tag_unref(cur->tag);
+	pool_put(&pf_state_pl, cur);
+	pf_status.fcounters[FCNT_STATE_REMOVALS]++;
+	pf_status.states--;
+}
+
+#ifdef __FreeBSD__
+int
+pf_purge_expired_states(u_int32_t maxcheck, int waslocked)
+#else
+void
+pf_purge_expired_states(u_int32_t maxcheck)
+#endif
+{
+	static struct pf_state	*cur = NULL;
+	struct pf_state		*next;
+#ifdef __FreeBSD__
+	int 			 locked = waslocked;
+#else
+	int 			 locked = 0;
+#endif
+
+	while (maxcheck--) {
+		/* wrap to start of list when we hit the end */
+		if (cur == NULL) {
+			cur = TAILQ_FIRST(&state_list);
+			if (cur == NULL)
+				break;	/* list empty */
+		}
+
+		/* get next state, as cur may get deleted */
+		next = TAILQ_NEXT(cur, u.s.entry_list);
+
+		if (cur->timeout == PFTM_UNLINKED) {
+			/* free unlinked state */
+			if (! locked) {
+#ifdef __FreeBSD__
+				 if (!sx_try_upgrade(&pf_consistency_lock))
+				 	return (0);
+#else
+				rw_enter_write(&pf_consistency_lock);
+#endif
+				locked = 1;
+			}
+			pf_free_state(cur);
+		} else if (pf_state_expires(cur) <= time_second) {
+			/* unlink and free expired state */
+			pf_unlink_state(cur);
+			if (! locked) {
+#ifdef __FreeBSD__
+				 if (!sx_try_upgrade(&pf_consistency_lock))
+				 	return (0);
+#else
+				rw_enter_write(&pf_consistency_lock);
+#endif
+				locked = 1;
+			}
+			pf_free_state(cur);
+		}
+		cur = next;
+	}
+
+#ifdef __FreeBSD__
+	if (!waslocked && locked)
+		sx_downgrade(&pf_consistency_lock);
+
+	return (1);
+#else
+	if (locked)
+		rw_exit_write(&pf_consistency_lock);
+#endif
+}
+
+int
+pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
+{
+	if (aw->type != PF_ADDR_TABLE)
+		return (0);
+	if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL)
+		return (1);
+	return (0);
+}
+
+void
+pf_tbladdr_remove(struct pf_addr_wrap *aw)
+{
+	if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL)
+		return;
+	pfr_detach_table(aw->p.tbl);
+	aw->p.tbl = NULL;
+}
+
+void
+pf_tbladdr_copyout(struct pf_addr_wrap *aw)
+{
+	struct pfr_ktable *kt = aw->p.tbl;
+
+	if (aw->type != PF_ADDR_TABLE || kt == NULL)
+		return;
+	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
+		kt = kt->pfrkt_root;
+	aw->p.tbl = NULL;
+	aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
+		kt->pfrkt_cnt : -1;
+}
+
+void
+pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
+{
+	switch (af) {
+#ifdef INET
+	case AF_INET: {
+		u_int32_t a = ntohl(addr->addr32[0]);
+		printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
+		    (a>>8)&255, a&255);
+		if (p) {
+			p = ntohs(p);
+			printf(":%u", p);
+		}
+		break;
+	}
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6: {
+		u_int16_t b;
+		u_int8_t i, curstart = 255, curend = 0,
+		    maxstart = 0, maxend = 0;
+		for (i = 0; i < 8; i++) {
+			if (!addr->addr16[i]) {
+				if (curstart == 255)
+					curstart = i;
+				else
+					curend = i;
+			} else {
+				if (curstart) {
+					if ((curend - curstart) >
+					    (maxend - maxstart)) {
+						maxstart = curstart;
+						maxend = curend;
+						curstart = 255;
+					}
+				}
+			}
+		}
+		for (i = 0; i < 8; i++) {
+			if (i >= maxstart && i <= maxend) {
+				if (maxend != 7) {
+					if (i == maxstart)
+						printf(":");
+				} else {
+					if (i == maxend)
+						printf(":");
+				}
+			} else {
+				b = ntohs(addr->addr16[i]);
+				printf("%x", b);
+				if (i < 7)
+					printf(":");
+			}
+		}
+		if (p) {
+			p = ntohs(p);
+			printf("[%u]", p);
+		}
+		break;
+	}
+#endif /* INET6 */
+	}
+}
+
+void
+pf_print_state(struct pf_state *s)
+{
+	switch (s->proto) {
+	case IPPROTO_TCP:
+		printf("TCP ");
+		break;
+	case IPPROTO_UDP:
+		printf("UDP ");
+		break;
+	case IPPROTO_ICMP:
+		printf("ICMP ");
+		break;
+	case IPPROTO_ICMPV6:
+		printf("ICMPV6 ");
+		break;
+	default:
+		printf("%u ", s->proto);
+		break;
+	}
+	pf_print_host(&s->lan.addr, s->lan.port, s->af);
+	printf(" ");
+	pf_print_host(&s->gwy.addr, s->gwy.port, s->af);
+	printf(" ");
+	pf_print_host(&s->ext.addr, s->ext.port, s->af);
+	printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo,
+	    s->src.seqhi, s->src.max_win, s->src.seqdiff);
+	if (s->src.wscale && s->dst.wscale)
+		printf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK);
+	printf("]");
+	printf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo,
+	    s->dst.seqhi, s->dst.max_win, s->dst.seqdiff);
+	if (s->src.wscale && s->dst.wscale)
+		printf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK);
+	printf("]");
+	printf(" %u:%u", s->src.state, s->dst.state);
+}
+
+void
+pf_print_flags(u_int8_t f)
+{
+	if (f)
+		printf(" ");
+	if (f & TH_FIN)
+		printf("F");
+	if (f & TH_SYN)
+		printf("S");
+	if (f & TH_RST)
+		printf("R");
+	if (f & TH_PUSH)
+		printf("P");
+	if (f & TH_ACK)
+		printf("A");
+	if (f & TH_URG)
+		printf("U");
+	if (f & TH_ECE)
+		printf("E");
+	if (f & TH_CWR)
+		printf("W");
+}
+
+#define	PF_SET_SKIP_STEPS(i)					\
+	do {							\
+		while (head[i] != cur) {			\
+			head[i]->skip[i].ptr = cur;		\
+			head[i] = TAILQ_NEXT(head[i], entries);	\
+		}						\
+	} while (0)
+
+void
+pf_calc_skip_steps(struct pf_rulequeue *rules)
+{
+	struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
+	int i;
+
+	cur = TAILQ_FIRST(rules);
+	prev = cur;
+	for (i = 0; i < PF_SKIP_COUNT; ++i)
+		head[i] = cur;
+	while (cur != NULL) {
+
+		if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
+			PF_SET_SKIP_STEPS(PF_SKIP_IFP);
+		if (cur->direction != prev->direction)
+			PF_SET_SKIP_STEPS(PF_SKIP_DIR);
+		if (cur->af != prev->af)
+			PF_SET_SKIP_STEPS(PF_SKIP_AF);
+		if (cur->proto != prev->proto)
+			PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
+		if (cur->src.neg != prev->src.neg ||
+		    pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
+			PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
+		if (cur->src.port[0] != prev->src.port[0] ||
+		    cur->src.port[1] != prev->src.port[1] ||
+		    cur->src.port_op != prev->src.port_op)
+			PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
+		if (cur->dst.neg != prev->dst.neg ||
+		    pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
+			PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
+		if (cur->dst.port[0] != prev->dst.port[0] ||
+		    cur->dst.port[1] != prev->dst.port[1] ||
+		    cur->dst.port_op != prev->dst.port_op)
+			PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
+
+		prev = cur;
+		cur = TAILQ_NEXT(cur, entries);
+	}
+	for (i = 0; i < PF_SKIP_COUNT; ++i)
+		PF_SET_SKIP_STEPS(i);
+}
+
+int
+pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
+{
+	if (aw1->type != aw2->type)
+		return (1);
+	switch (aw1->type) {
+	case PF_ADDR_ADDRMASK:
+		if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0))
+			return (1);
+		if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0))
+			return (1);
+		return (0);
+	case PF_ADDR_DYNIFTL:
+		return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
+	case PF_ADDR_NOROUTE:
+	case PF_ADDR_URPFFAILED:
+		return (0);
+	case PF_ADDR_TABLE:
+		return (aw1->p.tbl != aw2->p.tbl);
+	case PF_ADDR_RTLABEL:
+		return (aw1->v.rtlabel != aw2->v.rtlabel);
+	default:
+		printf("invalid address type: %d\n", aw1->type);
+		return (1);
+	}
+}
+
+u_int16_t
+pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
+{
+	u_int32_t	l;
+
+	if (udp && !cksum)
+		return (0x0000);
+	l = cksum + old - new;
+	l = (l >> 16) + (l & 65535);
+	l = l & 65535;
+	if (udp && !l)
+		return (0xFFFF);
+	return (l);
+}
+
+void
+pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc,
+    struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af)
+{
+	struct pf_addr	ao;
+	u_int16_t	po = *p;
+
+	PF_ACPY(&ao, a, af);
+	PF_ACPY(a, an, af);
+
+	*p = pn;
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
+		    ao.addr16[0], an->addr16[0], 0),
+		    ao.addr16[1], an->addr16[1], 0);
+		*p = pn;
+		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
+		    ao.addr16[0], an->addr16[0], u),
+		    ao.addr16[1], an->addr16[1], u),
+		    po, pn, u);
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
+		    ao.addr16[0], an->addr16[0], u),
+		    ao.addr16[1], an->addr16[1], u),
+		    ao.addr16[2], an->addr16[2], u),
+		    ao.addr16[3], an->addr16[3], u),
+		    ao.addr16[4], an->addr16[4], u),
+		    ao.addr16[5], an->addr16[5], u),
+		    ao.addr16[6], an->addr16[6], u),
+		    ao.addr16[7], an->addr16[7], u),
+		    po, pn, u);
+		break;
+#endif /* INET6 */
+	}
+}
+
+
+/* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
+void
+pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
+{
+	u_int32_t	ao;
+
+	memcpy(&ao, a, sizeof(ao));
+	memcpy(a, &an, sizeof(u_int32_t));
+	*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
+	    ao % 65536, an % 65536, u);
+}
+
+#ifdef INET6
+void
+pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
+{
+	struct pf_addr	ao;
+
+	PF_ACPY(&ao, a, AF_INET6);
+	PF_ACPY(a, an, AF_INET6);
+
+	*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+	    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+	    pf_cksum_fixup(pf_cksum_fixup(*c,
+	    ao.addr16[0], an->addr16[0], u),
+	    ao.addr16[1], an->addr16[1], u),
+	    ao.addr16[2], an->addr16[2], u),
+	    ao.addr16[3], an->addr16[3], u),
+	    ao.addr16[4], an->addr16[4], u),
+	    ao.addr16[5], an->addr16[5], u),
+	    ao.addr16[6], an->addr16[6], u),
+	    ao.addr16[7], an->addr16[7], u);
+}
+#endif /* INET6 */
+
+void
+pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
+    struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
+    u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
+{
+	struct pf_addr	oia, ooa;
+
+	PF_ACPY(&oia, ia, af);
+	PF_ACPY(&ooa, oa, af);
+
+	/* Change inner protocol port, fix inner protocol checksum. */
+	if (ip != NULL) {
+		u_int16_t	oip = *ip;
+		u_int32_t	opc = 0;	/* make the compiler happy */
+
+		if (pc != NULL)
+			opc = *pc;
+		*ip = np;
+		if (pc != NULL)
+			*pc = pf_cksum_fixup(*pc, oip, *ip, u);
+		*ic = pf_cksum_fixup(*ic, oip, *ip, 0);
+		if (pc != NULL)
+			*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
+	}
+	/* Change inner ip address, fix inner ip and icmp checksums. */
+	PF_ACPY(ia, na, af);
+	switch (af) {
+#ifdef INET
+	case AF_INET: {
+		u_int32_t	 oh2c = *h2c;
+
+		*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
+		    oia.addr16[0], ia->addr16[0], 0),
+		    oia.addr16[1], ia->addr16[1], 0);
+		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
+		    oia.addr16[0], ia->addr16[0], 0),
+		    oia.addr16[1], ia->addr16[1], 0);
+		*ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
+		break;
+	}
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+		    pf_cksum_fixup(pf_cksum_fixup(*ic,
+		    oia.addr16[0], ia->addr16[0], u),
+		    oia.addr16[1], ia->addr16[1], u),
+		    oia.addr16[2], ia->addr16[2], u),
+		    oia.addr16[3], ia->addr16[3], u),
+		    oia.addr16[4], ia->addr16[4], u),
+		    oia.addr16[5], ia->addr16[5], u),
+		    oia.addr16[6], ia->addr16[6], u),
+		    oia.addr16[7], ia->addr16[7], u);
+		break;
+#endif /* INET6 */
+	}
+	/* Change outer ip address, fix outer ip or icmpv6 checksum. */
+	PF_ACPY(oa, na, af);
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		*hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
+		    ooa.addr16[0], oa->addr16[0], 0),
+		    ooa.addr16[1], oa->addr16[1], 0);
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+		    pf_cksum_fixup(pf_cksum_fixup(*ic,
+		    ooa.addr16[0], oa->addr16[0], u),
+		    ooa.addr16[1], oa->addr16[1], u),
+		    ooa.addr16[2], oa->addr16[2], u),
+		    ooa.addr16[3], oa->addr16[3], u),
+		    ooa.addr16[4], oa->addr16[4], u),
+		    ooa.addr16[5], oa->addr16[5], u),
+		    ooa.addr16[6], oa->addr16[6], u),
+		    ooa.addr16[7], oa->addr16[7], u);
+		break;
+#endif /* INET6 */
+	}
+}
+
+
+/*
+ * Need to modulate the sequence numbers in the TCP SACK option
+ * (credits to Krzysztof Pfaff for report and patch)
+ */
+int
+pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
+    struct tcphdr *th, struct pf_state_peer *dst)
+{
+	int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
+#ifdef __FreeBSD__
+	u_int8_t opts[TCP_MAXOLEN], *opt = opts;
+#else
+	u_int8_t opts[MAX_TCPOPTLEN], *opt = opts;
+#endif
+	int copyback = 0, i, olen;
+	struct sackblk sack;
+
+#define TCPOLEN_SACKLEN	(TCPOLEN_SACK + 2)
+	if (hlen < TCPOLEN_SACKLEN ||
+	    !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af))
+		return 0;
+
+	while (hlen >= TCPOLEN_SACKLEN) {
+		olen = opt[1];
+		switch (*opt) {
+		case TCPOPT_EOL:	/* FALLTHROUGH */
+		case TCPOPT_NOP:
+			opt++;
+			hlen--;
+			break;
+		case TCPOPT_SACK:
+			if (olen > hlen)
+				olen = hlen;
+			if (olen >= TCPOLEN_SACKLEN) {
+				for (i = 2; i + TCPOLEN_SACK <= olen;
+				    i += TCPOLEN_SACK) {
+					memcpy(&sack, &opt[i], sizeof(sack));
+					pf_change_a(&sack.start, &th->th_sum,
+					    htonl(ntohl(sack.start) -
+					    dst->seqdiff), 0);
+					pf_change_a(&sack.end, &th->th_sum,
+					    htonl(ntohl(sack.end) -
+					    dst->seqdiff), 0);
+					memcpy(&opt[i], &sack, sizeof(sack));
+				}
+				copyback = 1;
+			}
+			/* FALLTHROUGH */
+		default:
+			if (olen < 2)
+				olen = 2;
+			hlen -= olen;
+			opt += olen;
+		}
+	}
+
+	if (copyback)
+#ifdef __FreeBSD__
+		m_copyback(m, off + sizeof(*th), thoptlen, (caddr_t)opts);
+#else
+		m_copyback(m, off + sizeof(*th), thoptlen, opts);
+#endif
+	return (copyback);
+}
+
+void
+#ifdef __FreeBSD__
+pf_send_tcp(struct mbuf *replyto, const struct pf_rule *r, sa_family_t af,
+#else
+pf_send_tcp(const struct pf_rule *r, sa_family_t af,
+#endif
+    const struct pf_addr *saddr, const struct pf_addr *daddr,
+    u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
+    u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
+    u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp)
+{
+	struct mbuf	*m;
+	int		 len, tlen;
+#ifdef INET
+	struct ip	*h;
+#endif /* INET */
+#ifdef INET6
+	struct ip6_hdr	*h6;
+#endif /* INET6 */
+	struct tcphdr	*th;
+	char		*opt;
+	struct pf_mtag	*pf_mtag;
+
+#ifdef __FreeBSD__
+	KASSERT(
+#ifdef INET
+	    af == AF_INET
+#else
+	    0
+#endif
+	    ||
+#ifdef INET6
+	    af == AF_INET6
+#else
+	    0
+#endif
+	    , ("Unsupported AF %d", af));
+	len = 0;
+	th = NULL;
+#ifdef INET
+	h = NULL;
+#endif
+#ifdef INET6
+	h6 = NULL;
+#endif
+#endif
+
+	/* maximum segment size tcp option */
+	tlen = sizeof(struct tcphdr);
+	if (mss)
+		tlen += 4;
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		len = sizeof(struct ip) + tlen;
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		len = sizeof(struct ip6_hdr) + tlen;
+		break;
+#endif /* INET6 */
+	}
+
+	/* create outgoing mbuf */
+	m = m_gethdr(M_DONTWAIT, MT_HEADER);
+	if (m == NULL)
+		return;
+#ifdef __FreeBSD__
+#ifdef MAC
+	if (replyto)
+		mac_netinet_firewall_reply(replyto, m);
+	else
+		mac_netinet_firewall_send(m);
+#else
+	(void)replyto;
+#endif
+#endif
+	if ((pf_mtag = pf_get_mtag(m)) == NULL) {
+		m_freem(m);
+		return;
+	}
+	if (tag)
+#ifdef __FreeBSD__
+		m->m_flags |= M_SKIP_FIREWALL;
+#else
+		pf_mtag->flags |= PF_TAG_GENERATED;
+#endif
+
+	pf_mtag->tag = rtag;
+
+	if (r != NULL && r->rtableid >= 0)
+#ifdef __FreeBSD__
+	{
+		M_SETFIB(m, r->rtableid);
+#endif
+		pf_mtag->rtableid = r->rtableid;
+#ifdef __FreeBSD__
+	}
+#endif
+#ifdef ALTQ
+	if (r != NULL && r->qid) {
+		pf_mtag->qid = r->qid;
+		/* add hints for ecn */
+		pf_mtag->af = af;
+		pf_mtag->hdr = mtod(m, struct ip *);
+	}
+#endif /* ALTQ */
+	m->m_data += max_linkhdr;
+	m->m_pkthdr.len = m->m_len = len;
+	m->m_pkthdr.rcvif = NULL;
+	bzero(m->m_data, len);
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		h = mtod(m, struct ip *);
+
+		/* IP header fields included in the TCP checksum */
+		h->ip_p = IPPROTO_TCP;
+		h->ip_len = htons(tlen);
+		h->ip_src.s_addr = saddr->v4.s_addr;
+		h->ip_dst.s_addr = daddr->v4.s_addr;
+
+		th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		h6 = mtod(m, struct ip6_hdr *);
+
+		/* IP header fields included in the TCP checksum */
+		h6->ip6_nxt = IPPROTO_TCP;
+		h6->ip6_plen = htons(tlen);
+		memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
+		memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
+
+		th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
+		break;
+#endif /* INET6 */
+	}
+
+	/* TCP header */
+	th->th_sport = sport;
+	th->th_dport = dport;
+	th->th_seq = htonl(seq);
+	th->th_ack = htonl(ack);
+	th->th_off = tlen >> 2;
+	th->th_flags = flags;
+	th->th_win = htons(win);
+
+	if (mss) {
+		opt = (char *)(th + 1);
+		opt[0] = TCPOPT_MAXSEG;
+		opt[1] = 4;
+		HTONS(mss);
+		bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
+	}
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		/* TCP checksum */
+		th->th_sum = in_cksum(m, len);
+
+		/* Finish the IP header */
+		h->ip_v = 4;
+		h->ip_hl = sizeof(*h) >> 2;
+		h->ip_tos = IPTOS_LOWDELAY;
+#ifdef __FreeBSD__
+		h->ip_off = V_path_mtu_discovery ? IP_DF : 0;
+		h->ip_len = len;
+#else
+		h->ip_off = htons(ip_mtudisc ? IP_DF : 0);
+		h->ip_len = htons(len);
+#endif
+		h->ip_ttl = ttl ? ttl : V_ip_defttl;
+		h->ip_sum = 0;
+		if (eh == NULL) {
+#ifdef __FreeBSD__
+			PF_UNLOCK();
+			ip_output(m, (void *)NULL, (void *)NULL, 0,
+			    (void *)NULL, (void *)NULL);
+			PF_LOCK();
+#else /* ! __FreeBSD__ */
+			ip_output(m, (void *)NULL, (void *)NULL, 0,
+			    (void *)NULL, (void *)NULL);
+#endif
+		} else {
+			struct route		 ro;
+			struct rtentry		 rt;
+			struct ether_header	*e = (void *)ro.ro_dst.sa_data;
+
+			if (ifp == NULL) {
+				m_freem(m);
+				return;
+			}
+			rt.rt_ifp = ifp;
+			ro.ro_rt = &rt;
+			ro.ro_dst.sa_len = sizeof(ro.ro_dst);
+			ro.ro_dst.sa_family = pseudo_AF_HDRCMPLT;
+			bcopy(eh->ether_dhost, e->ether_shost, ETHER_ADDR_LEN);
+			bcopy(eh->ether_shost, e->ether_dhost, ETHER_ADDR_LEN);
+			e->ether_type = eh->ether_type;
+#ifdef __FreeBSD__
+			PF_UNLOCK();
+			/* XXX_IMPORT: later */
+			ip_output(m, (void *)NULL, &ro, 0,
+			    (void *)NULL, (void *)NULL);
+			PF_LOCK();
+#else /* ! __FreeBSD__ */
+			ip_output(m, (void *)NULL, &ro, IP_ROUTETOETHER,
+			    (void *)NULL, (void *)NULL);
+#endif
+		}
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		/* TCP checksum */
+		th->th_sum = in6_cksum(m, IPPROTO_TCP,
+		    sizeof(struct ip6_hdr), tlen);
+
+		h6->ip6_vfc |= IPV6_VERSION;
+		h6->ip6_hlim = IPV6_DEFHLIM;
+
+#ifdef __FreeBSD__
+		PF_UNLOCK();
+		ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
+		PF_LOCK();
+#else
+		ip6_output(m, NULL, NULL, 0, NULL, NULL);
+#endif
+		break;
+#endif /* INET6 */
+	}
+}
+
+void
+pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
+    struct pf_rule *r)
+{
+	struct pf_mtag	*pf_mtag;
+	struct mbuf	*m0;
+#ifdef __FreeBSD__
+	struct ip *ip;
+#endif
+
+#ifdef __FreeBSD__
+	m0 = m_copypacket(m, M_DONTWAIT);
+	if (m0 == NULL)
+		return;
+#else
+	m0 = m_copy(m, 0, M_COPYALL);
+#endif
+	if ((pf_mtag = pf_get_mtag(m0)) == NULL)
+		return;
+#ifdef __FreeBSD__
+	/* XXX: revisit */
+	m0->m_flags |= M_SKIP_FIREWALL;
+#else
+	pf_mtag->flags |= PF_TAG_GENERATED;
+#endif
+
+	if (r->rtableid >= 0)
+#ifdef __FreeBSD__
+	{
+		M_SETFIB(m0, r->rtableid);
+#endif
+		pf_mtag->rtableid = r->rtableid;
+#ifdef __FreeBSD__
+	}
+#endif
+
+#ifdef ALTQ
+	if (r->qid) {
+		pf_mtag->qid = r->qid;
+		/* add hints for ecn */
+		pf_mtag->af = af;
+		pf_mtag->hdr = mtod(m0, struct ip *);
+	}
+#endif /* ALTQ */
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+#ifdef __FreeBSD__
+		/* icmp_error() expects host byte ordering */
+		ip = mtod(m0, struct ip *);
+		NTOHS(ip->ip_len);
+		NTOHS(ip->ip_off);
+		PF_UNLOCK();
+		icmp_error(m0, type, code, 0, 0);
+		PF_LOCK();
+#else
+		icmp_error(m0, type, code, 0, 0);
+#endif
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+#ifdef __FreeBSD__
+		PF_UNLOCK();
+#endif
+		icmp6_error(m0, type, code, 0);
+#ifdef __FreeBSD__
+		PF_LOCK();
+#endif
+		break;
+#endif /* INET6 */
+	}
+}
+
+/*
+ * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
+ * If n is 0, they match if they are equal. If n is != 0, they match if they
+ * are different.
+ */
+int
+pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
+    struct pf_addr *b, sa_family_t af)
+{
+	int	match = 0;
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		if ((a->addr32[0] & m->addr32[0]) ==
+		    (b->addr32[0] & m->addr32[0]))
+			match++;
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		if (((a->addr32[0] & m->addr32[0]) ==
+		     (b->addr32[0] & m->addr32[0])) &&
+		    ((a->addr32[1] & m->addr32[1]) ==
+		     (b->addr32[1] & m->addr32[1])) &&
+		    ((a->addr32[2] & m->addr32[2]) ==
+		     (b->addr32[2] & m->addr32[2])) &&
+		    ((a->addr32[3] & m->addr32[3]) ==
+		     (b->addr32[3] & m->addr32[3])))
+			match++;
+		break;
+#endif /* INET6 */
+	}
+	if (match) {
+		if (n)
+			return (0);
+		else
+			return (1);
+	} else {
+		if (n)
+			return (1);
+		else
+			return (0);
+	}
+}
+
+int
+pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
+{
+	switch (op) {
+	case PF_OP_IRG:
+		return ((p > a1) && (p < a2));
+	case PF_OP_XRG:
+		return ((p < a1) || (p > a2));
+	case PF_OP_RRG:
+		return ((p >= a1) && (p <= a2));
+	case PF_OP_EQ:
+		return (p == a1);
+	case PF_OP_NE:
+		return (p != a1);
+	case PF_OP_LT:
+		return (p < a1);
+	case PF_OP_LE:
+		return (p <= a1);
+	case PF_OP_GT:
+		return (p > a1);
+	case PF_OP_GE:
+		return (p >= a1);
+	}
+	return (0); /* never reached */
+}
+
+int
+pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
+{
+	NTOHS(a1);
+	NTOHS(a2);
+	NTOHS(p);
+	return (pf_match(op, a1, a2, p));
+}
+
+int
+pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
+{
+	if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
+		return (0);
+	return (pf_match(op, a1, a2, u));
+}
+
+int
+pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
+{
+	if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
+		return (0);
+	return (pf_match(op, a1, a2, g));
+}
+
+#ifndef __FreeBSD__
+struct pf_mtag *
+pf_find_mtag(struct mbuf *m)
+{
+	struct m_tag	*mtag;
+
+	if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) == NULL)
+		return (NULL);
+
+	return ((struct pf_mtag *)(mtag + 1));
+}
+
+struct pf_mtag *
+pf_get_mtag(struct mbuf *m)
+{
+	struct m_tag	*mtag;
+
+	if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) == NULL) {
+		mtag = m_tag_get(PACKET_TAG_PF, sizeof(struct pf_mtag),
+		    M_NOWAIT);
+		if (mtag == NULL)
+			return (NULL);
+		bzero(mtag + 1, sizeof(struct pf_mtag));
+		m_tag_prepend(m, mtag);
+	}
+
+	return ((struct pf_mtag *)(mtag + 1));
+}
+#endif
+
+int
+pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_mtag *pf_mtag,
+    int *tag)
+{
+	if (*tag == -1)
+		*tag = pf_mtag->tag;
+
+	return ((!r->match_tag_not && r->match_tag == *tag) ||
+	    (r->match_tag_not && r->match_tag != *tag));
+}
+
+int
+pf_tag_packet(struct mbuf *m, struct pf_mtag *pf_mtag, int tag, int rtableid)
+{
+	if (tag <= 0 && rtableid < 0)
+		return (0);
+
+	if (pf_mtag == NULL)
+		if ((pf_mtag = pf_get_mtag(m)) == NULL)
+			return (1);
+	if (tag > 0)
+		pf_mtag->tag = tag;
+	if (rtableid >= 0)
+#ifdef __FreeBSD__
+	{
+		M_SETFIB(m, rtableid);
+#endif
+		pf_mtag->rtableid = rtableid;
+#ifdef __FreeBSD__
+	}
+#endif
+
+	return (0);
+}
+
+static void
+pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n,
+    struct pf_rule **r, struct pf_rule **a,  int *match)
+{
+	struct pf_anchor_stackframe	*f;
+
+	(*r)->anchor->match = 0;
+	if (match)
+		*match = 0;
+	if (*depth >= sizeof(pf_anchor_stack) /
+	    sizeof(pf_anchor_stack[0])) {
+		printf("pf_step_into_anchor: stack overflow\n");
+		*r = TAILQ_NEXT(*r, entries);
+		return;
+	} else if (*depth == 0 && a != NULL)
+		*a = *r;
+	f = pf_anchor_stack + (*depth)++;
+	f->rs = *rs;
+	f->r = *r;
+	if ((*r)->anchor_wildcard) {
+		f->parent = &(*r)->anchor->children;
+		if ((f->child = RB_MIN(pf_anchor_node, f->parent)) ==
+		    NULL) {
+			*r = NULL;
+			return;
+		}
+		*rs = &f->child->ruleset;
+	} else {
+		f->parent = NULL;
+		f->child = NULL;
+		*rs = &(*r)->anchor->ruleset;
+	}
+	*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
+}
+
+int
+pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n,
+    struct pf_rule **r, struct pf_rule **a, int *match)
+{
+	struct pf_anchor_stackframe	*f;
+	int quick = 0;
+
+	do {
+		if (*depth <= 0)
+			break;
+		f = pf_anchor_stack + *depth - 1;
+		if (f->parent != NULL && f->child != NULL) {
+			if (f->child->match ||
+			    (match != NULL && *match)) {
+				f->r->anchor->match = 1;
+				*match = 0;
+			}
+			f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
+			if (f->child != NULL) {
+				*rs = &f->child->ruleset;
+				*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
+				if (*r == NULL)
+					continue;
+				else
+					break;
+			}
+		}
+		(*depth)--;
+		if (*depth == 0 && a != NULL)
+			*a = NULL;
+		*rs = f->rs;
+		if (f->r->anchor->match || (match  != NULL && *match))
+			quick = f->r->quick;
+		*r = TAILQ_NEXT(f->r, entries);
+	} while (*r == NULL);
+
+	return (quick);
+}
+
+#ifdef INET6
+void
+pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
+    struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
+{
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
+		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
+		break;
+#endif /* INET */
+	case AF_INET6:
+		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
+		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
+		naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
+		((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
+		naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
+		((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
+		naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
+		((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
+		break;
+	}
+}
+
+void
+pf_addr_inc(struct pf_addr *addr, sa_family_t af)
+{
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
+		break;
+#endif /* INET */
+	case AF_INET6:
+		if (addr->addr32[3] == 0xffffffff) {
+			addr->addr32[3] = 0;
+			if (addr->addr32[2] == 0xffffffff) {
+				addr->addr32[2] = 0;
+				if (addr->addr32[1] == 0xffffffff) {
+					addr->addr32[1] = 0;
+					addr->addr32[0] =
+					    htonl(ntohl(addr->addr32[0]) + 1);
+				} else
+					addr->addr32[1] =
+					    htonl(ntohl(addr->addr32[1]) + 1);
+			} else
+				addr->addr32[2] =
+				    htonl(ntohl(addr->addr32[2]) + 1);
+		} else
+			addr->addr32[3] =
+			    htonl(ntohl(addr->addr32[3]) + 1);
+		break;
+	}
+}
+#endif /* INET6 */
+
+#define mix(a,b,c) \
+	do {					\
+		a -= b; a -= c; a ^= (c >> 13);	\
+		b -= c; b -= a; b ^= (a << 8);	\
+		c -= a; c -= b; c ^= (b >> 13);	\
+		a -= b; a -= c; a ^= (c >> 12);	\
+		b -= c; b -= a; b ^= (a << 16);	\
+		c -= a; c -= b; c ^= (b >> 5);	\
+		a -= b; a -= c; a ^= (c >> 3);	\
+		b -= c; b -= a; b ^= (a << 10);	\
+		c -= a; c -= b; c ^= (b >> 15);	\
+	} while (0)
+
+/*
+ * hash function based on bridge_hash in if_bridge.c
+ */
+void
+pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
+    struct pf_poolhashkey *key, sa_family_t af)
+{
+	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		a += inaddr->addr32[0];
+		b += key->key32[1];
+		mix(a, b, c);
+		hash->addr32[0] = c + key->key32[2];
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		a += inaddr->addr32[0];
+		b += inaddr->addr32[2];
+		mix(a, b, c);
+		hash->addr32[0] = c;
+		a += inaddr->addr32[1];
+		b += inaddr->addr32[3];
+		c += key->key32[1];
+		mix(a, b, c);
+		hash->addr32[1] = c;
+		a += inaddr->addr32[2];
+		b += inaddr->addr32[1];
+		c += key->key32[2];
+		mix(a, b, c);
+		hash->addr32[2] = c;
+		a += inaddr->addr32[3];
+		b += inaddr->addr32[0];
+		c += key->key32[3];
+		mix(a, b, c);
+		hash->addr32[3] = c;
+		break;
+#endif /* INET6 */
+	}
+}
+
+int
+pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
+    struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn)
+{
+	unsigned char		 hash[16];
+	struct pf_pool		*rpool = &r->rpool;
+	struct pf_addr		*raddr = &rpool->cur->addr.v.a.addr;
+	struct pf_addr		*rmask = &rpool->cur->addr.v.a.mask;
+	struct pf_pooladdr	*acur = rpool->cur;
+	struct pf_src_node	 k;
+
+	if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
+	    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
+		k.af = af;
+		PF_ACPY(&k.addr, saddr, af);
+		if (r->rule_flag & PFRULE_RULESRCTRACK ||
+		    r->rpool.opts & PF_POOL_STICKYADDR)
+			k.rule.ptr = r;
+		else
+			k.rule.ptr = NULL;
+		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
+		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
+		if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
+			PF_ACPY(naddr, &(*sn)->raddr, af);
+			if (pf_status.debug >= PF_DEBUG_MISC) {
+				printf("pf_map_addr: src tracking maps ");
+				pf_print_host(&k.addr, 0, af);
+				printf(" to ");
+				pf_print_host(naddr, 0, af);
+				printf("\n");
+			}
+			return (0);
+		}
+	}
+
+	if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
+		return (1);
+	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
+		switch (af) {
+#ifdef INET
+		case AF_INET:
+			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
+			    (rpool->opts & PF_POOL_TYPEMASK) !=
+			    PF_POOL_ROUNDROBIN)
+				return (1);
+			 raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
+			 rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
+			break;
+#endif /* INET */
+#ifdef INET6
+		case AF_INET6:
+			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
+			    (rpool->opts & PF_POOL_TYPEMASK) !=
+			    PF_POOL_ROUNDROBIN)
+				return (1);
+			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
+			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
+			break;
+#endif /* INET6 */
+		}
+	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
+		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
+			return (1); /* unsupported */
+	} else {
+		raddr = &rpool->cur->addr.v.a.addr;
+		rmask = &rpool->cur->addr.v.a.mask;
+	}
+
+	switch (rpool->opts & PF_POOL_TYPEMASK) {
+	case PF_POOL_NONE:
+		PF_ACPY(naddr, raddr, af);
+		break;
+	case PF_POOL_BITMASK:
+		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
+		break;
+	case PF_POOL_RANDOM:
+		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
+			switch (af) {
+#ifdef INET
+			case AF_INET:
+				rpool->counter.addr32[0] = htonl(arc4random());
+				break;
+#endif /* INET */
+#ifdef INET6
+			case AF_INET6:
+				if (rmask->addr32[3] != 0xffffffff)
+					rpool->counter.addr32[3] =
+					    htonl(arc4random());
+				else
+					break;
+				if (rmask->addr32[2] != 0xffffffff)
+					rpool->counter.addr32[2] =
+					    htonl(arc4random());
+				else
+					break;
+				if (rmask->addr32[1] != 0xffffffff)
+					rpool->counter.addr32[1] =
+					    htonl(arc4random());
+				else
+					break;
+				if (rmask->addr32[0] != 0xffffffff)
+					rpool->counter.addr32[0] =
+					    htonl(arc4random());
+				break;
+#endif /* INET6 */
+			}
+			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
+			PF_ACPY(init_addr, naddr, af);
+
+		} else {
+			PF_AINC(&rpool->counter, af);
+			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
+		}
+		break;
+	case PF_POOL_SRCHASH:
+		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
+		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
+		break;
+	case PF_POOL_ROUNDROBIN:
+		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
+			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
+			    &rpool->tblidx, &rpool->counter,
+			    &raddr, &rmask, af))
+				goto get_addr;
+		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
+			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
+			    &rpool->tblidx, &rpool->counter,
+			    &raddr, &rmask, af))
+				goto get_addr;
+		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
+			goto get_addr;
+
+	try_next:
+		if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL)
+			rpool->cur = TAILQ_FIRST(&rpool->list);
+		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
+			rpool->tblidx = -1;
+			if (pfr_pool_get(rpool->cur->addr.p.tbl,
+			    &rpool->tblidx, &rpool->counter,
+			    &raddr, &rmask, af)) {
+				/* table contains no address of type 'af' */
+				if (rpool->cur != acur)
+					goto try_next;
+				return (1);
+			}
+		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
+			rpool->tblidx = -1;
+			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
+			    &rpool->tblidx, &rpool->counter,
+			    &raddr, &rmask, af)) {
+				/* table contains no address of type 'af' */
+				if (rpool->cur != acur)
+					goto try_next;
+				return (1);
+			}
+		} else {
+			raddr = &rpool->cur->addr.v.a.addr;
+			rmask = &rpool->cur->addr.v.a.mask;
+			PF_ACPY(&rpool->counter, raddr, af);
+		}
+
+	get_addr:
+		PF_ACPY(naddr, &rpool->counter, af);
+		if (init_addr != NULL && PF_AZERO(init_addr, af))
+			PF_ACPY(init_addr, naddr, af);
+		PF_AINC(&rpool->counter, af);
+		break;
+	}
+	if (*sn != NULL)
+		PF_ACPY(&(*sn)->raddr, naddr, af);
+
+	if (pf_status.debug >= PF_DEBUG_MISC &&
+	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
+		printf("pf_map_addr: selected address ");
+		pf_print_host(naddr, 0, af);
+		printf("\n");
+	}
+
+	return (0);
+}
+
+int
+pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
+    struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport,
+    struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
+    struct pf_src_node **sn)
+{
+	struct pf_state_cmp	key;
+	struct pf_addr		init_addr;
+	u_int16_t		cut;
+
+	bzero(&init_addr, sizeof(init_addr));
+	if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
+		return (1);
+
+	if (proto == IPPROTO_ICMP) {
+		low = 1;
+		high = 65535;
+	}
+
+	do {
+		key.af = af;
+		key.proto = proto;
+		PF_ACPY(&key.ext.addr, daddr, key.af);
+		PF_ACPY(&key.gwy.addr, naddr, key.af);
+		key.ext.port = dport;
+
+		/*
+		 * port search; start random, step;
+		 * similar 2 portloop in in_pcbbind
+		 */
+		if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
+		    proto == IPPROTO_ICMP)) {
+			key.gwy.port = dport;
+			if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
+				return (0);
+		} else if (low == 0 && high == 0) {
+			key.gwy.port = *nport;
+			if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
+				return (0);
+		} else if (low == high) {
+			key.gwy.port = htons(low);
+			if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) {
+				*nport = htons(low);
+				return (0);
+			}
+		} else {
+			u_int16_t tmp;
+
+			if (low > high) {
+				tmp = low;
+				low = high;
+				high = tmp;
+			}
+			/* low < high */
+			cut = htonl(arc4random()) % (1 + high - low) + low;
+			/* low <= cut <= high */
+			for (tmp = cut; tmp <= high; ++(tmp)) {
+				key.gwy.port = htons(tmp);
+				if (pf_find_state_all(&key, PF_EXT_GWY, NULL) ==
+				    NULL) {
+					*nport = htons(tmp);
+					return (0);
+				}
+			}
+			for (tmp = cut - 1; tmp >= low; --(tmp)) {
+				key.gwy.port = htons(tmp);
+				if (pf_find_state_all(&key, PF_EXT_GWY, NULL) ==
+				    NULL) {
+					*nport = htons(tmp);
+					return (0);
+				}
+			}
+		}
+
+		switch (r->rpool.opts & PF_POOL_TYPEMASK) {
+		case PF_POOL_RANDOM:
+		case PF_POOL_ROUNDROBIN:
+			if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
+				return (1);
+			break;
+		case PF_POOL_NONE:
+		case PF_POOL_SRCHASH:
+		case PF_POOL_BITMASK:
+		default:
+			return (1);
+		}
+	} while (! PF_AEQ(&init_addr, naddr, af) );
+
+	return (1);					/* none available */
+}
+
+struct pf_rule *
+pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
+    int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport,
+    struct pf_addr *daddr, u_int16_t dport, int rs_num)
+{
+	struct pf_rule		*r, *rm = NULL;
+	struct pf_ruleset	*ruleset = NULL;
+	int			 tag = -1;
+	int			 rtableid = -1;
+	int			 asd = 0;
+
+	r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
+	while (r && rm == NULL) {
+		struct pf_rule_addr	*src = NULL, *dst = NULL;
+		struct pf_addr_wrap	*xdst = NULL;
+
+		if (r->action == PF_BINAT && direction == PF_IN) {
+			src = &r->dst;
+			if (r->rpool.cur != NULL)
+				xdst = &r->rpool.cur->addr;
+		} else {
+			src = &r->src;
+			dst = &r->dst;
+		}
+
+		r->evaluations++;
+		if (pfi_kif_match(r->kif, kif) == r->ifnot)
+			r = r->skip[PF_SKIP_IFP].ptr;
+		else if (r->direction && r->direction != direction)
+			r = r->skip[PF_SKIP_DIR].ptr;
+		else if (r->af && r->af != pd->af)
+			r = r->skip[PF_SKIP_AF].ptr;
+		else if (r->proto && r->proto != pd->proto)
+			r = r->skip[PF_SKIP_PROTO].ptr;
+		else if (PF_MISMATCHAW(&src->addr, saddr, pd->af,
+		    src->neg, kif))
+			r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
+			    PF_SKIP_DST_ADDR].ptr;
+		else if (src->port_op && !pf_match_port(src->port_op,
+		    src->port[0], src->port[1], sport))
+			r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
+			    PF_SKIP_DST_PORT].ptr;
+		else if (dst != NULL &&
+		    PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL))
+			r = r->skip[PF_SKIP_DST_ADDR].ptr;
+		else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
+		    0, NULL))
+			r = TAILQ_NEXT(r, entries);
+		else if (dst != NULL && dst->port_op &&
+		    !pf_match_port(dst->port_op, dst->port[0],
+		    dst->port[1], dport))
+			r = r->skip[PF_SKIP_DST_PORT].ptr;
+		else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
+		    IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
+		    off, pd->hdr.tcp), r->os_fingerprint)))
+			r = TAILQ_NEXT(r, entries);
+		else {
+			if (r->tag)
+				tag = r->tag;
+			if (r->rtableid >= 0)
+				rtableid = r->rtableid;
+			if (r->anchor == NULL) {
+				rm = r;
+			} else
+				pf_step_into_anchor(&asd, &ruleset, rs_num,
+				    &r, NULL, NULL);
+		}
+		if (r == NULL)
+			pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r,
+			    NULL, NULL);
+	}
+	if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid))
+		return (NULL);
+	if (rm != NULL && (rm->action == PF_NONAT ||
+	    rm->action == PF_NORDR || rm->action == PF_NOBINAT))
+		return (NULL);
+	return (rm);
+}
+
+struct pf_rule *
+pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
+    struct pfi_kif *kif, struct pf_src_node **sn,
+    struct pf_addr *saddr, u_int16_t sport,
+    struct pf_addr *daddr, u_int16_t dport,
+    struct pf_addr *naddr, u_int16_t *nport)
+{
+	struct pf_rule	*r = NULL;
+
+	if (direction == PF_OUT) {
+		r = pf_match_translation(pd, m, off, direction, kif, saddr,
+		    sport, daddr, dport, PF_RULESET_BINAT);
+		if (r == NULL)
+			r = pf_match_translation(pd, m, off, direction, kif,
+			    saddr, sport, daddr, dport, PF_RULESET_NAT);
+	} else {
+		r = pf_match_translation(pd, m, off, direction, kif, saddr,
+		    sport, daddr, dport, PF_RULESET_RDR);
+		if (r == NULL)
+			r = pf_match_translation(pd, m, off, direction, kif,
+			    saddr, sport, daddr, dport, PF_RULESET_BINAT);
+	}
+
+	if (r != NULL) {
+		switch (r->action) {
+		case PF_NONAT:
+		case PF_NOBINAT:
+		case PF_NORDR:
+			return (NULL);
+		case PF_NAT:
+			if (pf_get_sport(pd->af, pd->proto, r, saddr,
+			    daddr, dport, naddr, nport, r->rpool.proxy_port[0],
+			    r->rpool.proxy_port[1], sn)) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: NAT proxy port allocation "
+				    "(%u-%u) failed\n",
+				    r->rpool.proxy_port[0],
+				    r->rpool.proxy_port[1]));
+				return (NULL);
+			}
+			break;
+		case PF_BINAT:
+			switch (direction) {
+			case PF_OUT:
+				if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
+					switch (pd->af) {
+#ifdef INET
+					case AF_INET:
+						if (r->rpool.cur->addr.p.dyn->
+						    pfid_acnt4 < 1)
+							return (NULL);
+						PF_POOLMASK(naddr,
+						    &r->rpool.cur->addr.p.dyn->
+						    pfid_addr4,
+						    &r->rpool.cur->addr.p.dyn->
+						    pfid_mask4,
+						    saddr, AF_INET);
+						break;
+#endif /* INET */
+#ifdef INET6
+					case AF_INET6:
+						if (r->rpool.cur->addr.p.dyn->
+						    pfid_acnt6 < 1)
+							return (NULL);
+						PF_POOLMASK(naddr,
+						    &r->rpool.cur->addr.p.dyn->
+						    pfid_addr6,
+						    &r->rpool.cur->addr.p.dyn->
+						    pfid_mask6,
+						    saddr, AF_INET6);
+						break;
+#endif /* INET6 */
+					}
+				} else
+					PF_POOLMASK(naddr,
+					    &r->rpool.cur->addr.v.a.addr,
+					    &r->rpool.cur->addr.v.a.mask,
+					    saddr, pd->af);
+				break;
+			case PF_IN:
+				if (r->src.addr.type == PF_ADDR_DYNIFTL) {
+					switch (pd->af) {
+#ifdef INET
+					case AF_INET:
+						if (r->src.addr.p.dyn->
+						    pfid_acnt4 < 1)
+							return (NULL);
+						PF_POOLMASK(naddr,
+						    &r->src.addr.p.dyn->
+						    pfid_addr4,
+						    &r->src.addr.p.dyn->
+						    pfid_mask4,
+						    daddr, AF_INET);
+						break;
+#endif /* INET */
+#ifdef INET6
+					case AF_INET6:
+						if (r->src.addr.p.dyn->
+						    pfid_acnt6 < 1)
+							return (NULL);
+						PF_POOLMASK(naddr,
+						    &r->src.addr.p.dyn->
+						    pfid_addr6,
+						    &r->src.addr.p.dyn->
+						    pfid_mask6,
+						    daddr, AF_INET6);
+						break;
+#endif /* INET6 */
+					}
+				} else
+					PF_POOLMASK(naddr,
+					    &r->src.addr.v.a.addr,
+					    &r->src.addr.v.a.mask, daddr,
+					    pd->af);
+				break;
+			}
+			break;
+		case PF_RDR: {
+			if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn))
+				return (NULL);
+			if ((r->rpool.opts & PF_POOL_TYPEMASK) ==
+			    PF_POOL_BITMASK)
+				PF_POOLMASK(naddr, naddr,
+				    &r->rpool.cur->addr.v.a.mask, daddr,
+				    pd->af);
+
+			if (r->rpool.proxy_port[1]) {
+				u_int32_t	tmp_nport;
+
+				tmp_nport = ((ntohs(dport) -
+				    ntohs(r->dst.port[0])) %
+				    (r->rpool.proxy_port[1] -
+				    r->rpool.proxy_port[0] + 1)) +
+				    r->rpool.proxy_port[0];
+
+				/* wrap around if necessary */
+				if (tmp_nport > 65535)
+					tmp_nport -= 65535;
+				*nport = htons((u_int16_t)tmp_nport);
+			} else if (r->rpool.proxy_port[0])
+				*nport = htons(r->rpool.proxy_port[0]);
+			break;
+		}
+		default:
+			return (NULL);
+		}
+	}
+
+	return (r);
+}
+
+int
+#ifdef __FreeBSD__
+pf_socket_lookup(int direction, struct pf_pdesc *pd, struct inpcb *inp_arg)
+#else
+pf_socket_lookup(int direction, struct pf_pdesc *pd)
+#endif
+{
+	struct pf_addr		*saddr, *daddr;
+	u_int16_t		 sport, dport;
+#ifdef __FreeBSD__
+	struct inpcbinfo	*pi;
+#else
+	struct inpcbtable	*tb;
+#endif
+	struct inpcb		*inp;
+
+	if (pd == NULL)
+		return (-1);
+	pd->lookup.uid = UID_MAX;
+	pd->lookup.gid = GID_MAX;
+	pd->lookup.pid = NO_PID;		/* XXX: revisit */
+#ifdef __FreeBSD__
+	if (inp_arg != NULL) {
+		INP_LOCK_ASSERT(inp_arg);
+		pd->lookup.uid = inp_arg->inp_cred->cr_uid;
+		pd->lookup.gid = inp_arg->inp_cred->cr_groups[0];
+		return (1);
+	}
+#endif
+	switch (pd->proto) {
+	case IPPROTO_TCP:
+		if (pd->hdr.tcp == NULL)
+			return (-1);
+		sport = pd->hdr.tcp->th_sport;
+		dport = pd->hdr.tcp->th_dport;
+#ifdef __FreeBSD__
+		pi = &V_tcbinfo;
+#else
+		tb = &tcbtable;
+#endif
+		break;
+	case IPPROTO_UDP:
+		if (pd->hdr.udp == NULL)
+			return (-1);
+		sport = pd->hdr.udp->uh_sport;
+		dport = pd->hdr.udp->uh_dport;
+#ifdef __FreeBSD__
+		pi = &V_udbinfo;
+#else
+		tb = &udbtable;
+#endif
+		break;
+	default:
+		return (-1);
+	}
+	if (direction == PF_IN) {
+		saddr = pd->src;
+		daddr = pd->dst;
+	} else {
+		u_int16_t	p;
+
+		p = sport;
+		sport = dport;
+		dport = p;
+		saddr = pd->dst;
+		daddr = pd->src;
+	}
+	switch (pd->af) {
+#ifdef INET
+	case AF_INET:
+#ifdef __FreeBSD__
+		INP_INFO_RLOCK(pi);	/* XXX LOR */
+		inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4,
+			dport, 0, NULL);
+		if (inp == NULL) {
+			inp = in_pcblookup_hash(pi, saddr->v4, sport,
+			   daddr->v4, dport, INPLOOKUP_WILDCARD, NULL);
+			if(inp == NULL) {
+				INP_INFO_RUNLOCK(pi);
+				return (-1);
+			}
+		}
+#else
+		inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport);
+		if (inp == NULL) {
+			inp = in_pcblookup_listen(tb, daddr->v4, dport, 0);
+			if (inp == NULL)
+				return (-1);
+		}
+#endif
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+#ifdef __FreeBSD__
+		INP_INFO_RLOCK(pi);
+		inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
+			&daddr->v6, dport, 0, NULL);
+		if (inp == NULL) {
+			inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
+			&daddr->v6, dport, INPLOOKUP_WILDCARD, NULL);
+			if (inp == NULL) {
+				INP_INFO_RUNLOCK(pi);
+				return (-1);
+			}
+		}
+#else
+		inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6,
+		    dport);
+		if (inp == NULL) {
+			inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 0);
+			if (inp == NULL)
+				return (-1);
+		}
+#endif
+		break;
+#endif /* INET6 */
+
+	default:
+		return (-1);
+	}
+#ifdef __FreeBSD__
+	pd->lookup.uid = inp->inp_cred->cr_uid;
+	pd->lookup.gid = inp->inp_cred->cr_groups[0];
+	INP_INFO_RUNLOCK(pi);
+#else
+	pd->lookup.uid = inp->inp_socket->so_euid;
+	pd->lookup.gid = inp->inp_socket->so_egid;
+	pd->lookup.pid = inp->inp_socket->so_cpid;
+#endif
+	return (1);
+}
+
+u_int8_t
+pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
+{
+	int		 hlen;
+	u_int8_t	 hdr[60];
+	u_int8_t	*opt, optlen;
+	u_int8_t	 wscale = 0;
+
+	hlen = th_off << 2;		/* hlen <= sizeof(hdr) */
+	if (hlen <= sizeof(struct tcphdr))
+		return (0);
+	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
+		return (0);
+	opt = hdr + sizeof(struct tcphdr);
+	hlen -= sizeof(struct tcphdr);
+	while (hlen >= 3) {
+		switch (*opt) {
+		case TCPOPT_EOL:
+		case TCPOPT_NOP:
+			++opt;
+			--hlen;
+			break;
+		case TCPOPT_WINDOW:
+			wscale = opt[2];
+			if (wscale > TCP_MAX_WINSHIFT)
+				wscale = TCP_MAX_WINSHIFT;
+			wscale |= PF_WSCALE_FLAG;
+			/* FALLTHROUGH */
+		default:
+			optlen = opt[1];
+			if (optlen < 2)
+				optlen = 2;
+			hlen -= optlen;
+			opt += optlen;
+			break;
+		}
+	}
+	return (wscale);
+}
+
+u_int16_t
+pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
+{
+	int		 hlen;
+	u_int8_t	 hdr[60];
+	u_int8_t	*opt, optlen;
+	u_int16_t	 mss = V_tcp_mssdflt;
+
+	hlen = th_off << 2;	/* hlen <= sizeof(hdr) */
+	if (hlen <= sizeof(struct tcphdr))
+		return (0);
+	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
+		return (0);
+	opt = hdr + sizeof(struct tcphdr);
+	hlen -= sizeof(struct tcphdr);
+	while (hlen >= TCPOLEN_MAXSEG) {
+		switch (*opt) {
+		case TCPOPT_EOL:
+		case TCPOPT_NOP:
+			++opt;
+			--hlen;
+			break;
+		case TCPOPT_MAXSEG:
+			bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
+			NTOHS(mss);
+			/* FALLTHROUGH */
+		default:
+			optlen = opt[1];
+			if (optlen < 2)
+				optlen = 2;
+			hlen -= optlen;
+			opt += optlen;
+			break;
+		}
+	}
+	return (mss);
+}
+
+u_int16_t
+pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
+{
+#ifdef INET
+	struct sockaddr_in	*dst;
+	struct route		 ro;
+#endif /* INET */
+#ifdef INET6
+	struct sockaddr_in6	*dst6;
+	struct route_in6	 ro6;
+#endif /* INET6 */
+	struct rtentry		*rt = NULL;
+	int			 hlen = 0;	/* make the compiler happy */
+	u_int16_t		 mss = V_tcp_mssdflt;
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		hlen = sizeof(struct ip);
+		bzero(&ro, sizeof(ro));
+		dst = (struct sockaddr_in *)&ro.ro_dst;
+		dst->sin_family = AF_INET;
+		dst->sin_len = sizeof(*dst);
+		dst->sin_addr = addr->v4;
+#ifdef __FreeBSD__
+#ifdef RTF_PRCLONING
+		rtalloc_ign(&ro, (RTF_CLONING | RTF_PRCLONING));
+#else /* !RTF_PRCLONING */
+		in_rtalloc_ign(&ro, 0, 0);
+#endif
+#else /* ! __FreeBSD__ */
+		rtalloc_noclone(&ro, NO_CLONING);
+#endif
+		rt = ro.ro_rt;
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		hlen = sizeof(struct ip6_hdr);
+		bzero(&ro6, sizeof(ro6));
+		dst6 = (struct sockaddr_in6 *)&ro6.ro_dst;
+		dst6->sin6_family = AF_INET6;
+		dst6->sin6_len = sizeof(*dst6);
+		dst6->sin6_addr = addr->v6;
+#ifdef __FreeBSD__
+#ifdef RTF_PRCLONING
+		rtalloc_ign((struct route *)&ro6,
+		    (RTF_CLONING | RTF_PRCLONING));
+#else /* !RTF_PRCLONING */
+		rtalloc_ign((struct route *)&ro6, 0);
+#endif
+#else /* ! __FreeBSD__ */
+		rtalloc_noclone((struct route *)&ro6, NO_CLONING);
+#endif
+		rt = ro6.ro_rt;
+		break;
+#endif /* INET6 */
+	}
+
+	if (rt && rt->rt_ifp) {
+		mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
+		mss = max(V_tcp_mssdflt, mss);
+		RTFREE(rt);
+	}
+	mss = min(mss, offer);
+	mss = max(mss, 64);		/* sanity - at least max opt space */
+	return (mss);
+}
+
+void
+pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr)
+{
+	struct pf_rule *r = s->rule.ptr;
+
+	s->rt_kif = NULL;
+	if (!r->rt || r->rt == PF_FASTROUTE)
+		return;
+	switch (s->af) {
+#ifdef INET
+	case AF_INET:
+		pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL,
+		    &s->nat_src_node);
+		s->rt_kif = r->rpool.cur->kif;
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL,
+		    &s->nat_src_node);
+		s->rt_kif = r->rpool.cur->kif;
+		break;
+#endif /* INET6 */
+	}
+}
+
+int
+pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction,
+    struct pfi_kif *kif, struct mbuf *m, int off, void *h,
+#ifdef __FreeBSD__
+    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
+    struct ifqueue *ifq, struct inpcb *inp)
+#else
+    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
+    struct ifqueue *ifq)
+#endif
+{
+	struct pf_rule		*nr = NULL;
+	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
+	struct tcphdr		*th = pd->hdr.tcp;
+	u_int16_t		 bport, nport = 0;
+	sa_family_t		 af = pd->af;
+	struct pf_rule		*r, *a = NULL;
+	struct pf_ruleset	*ruleset = NULL;
+	struct pf_src_node	*nsn = NULL;
+	u_short			 reason;
+	int			 rewrite = 0;
+	int			 tag = -1, rtableid = -1;
+	u_int16_t		 mss = V_tcp_mssdflt;
+	int			 asd = 0;
+	int			 match = 0;
+
+	if (pf_check_congestion(ifq)) {
+		REASON_SET(&reason, PFRES_CONGEST);
+		return (PF_DROP);
+	}
+
+#ifdef __FreeBSD__
+	if (inp != NULL)
+		pd->lookup.done = pf_socket_lookup(direction, pd, inp);
+	else if (debug_pfugidhack) {
+		PF_UNLOCK();
+		DPFPRINTF(PF_DEBUG_MISC, ("pf: unlocked lookup\n"));
+		pd->lookup.done = pf_socket_lookup(direction, pd, inp);
+		PF_LOCK();
+	}
+#endif
+
+	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
+
+	if (direction == PF_OUT) {
+		bport = nport = th->th_sport;
+		/* check outgoing packet for BINAT/NAT */
+		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
+		    saddr, th->th_sport, daddr, th->th_dport,
+		    &pd->naddr, &nport)) != NULL) {
+			PF_ACPY(&pd->baddr, saddr, af);
+			pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
+			    &th->th_sum, &pd->naddr, nport, 0, af);
+			rewrite++;
+			if (nr->natpass)
+				r = NULL;
+			pd->nat_rule = nr;
+		}
+	} else {
+		bport = nport = th->th_dport;
+		/* check incoming packet for BINAT/RDR */
+		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
+		    saddr, th->th_sport, daddr, th->th_dport,
+		    &pd->naddr, &nport)) != NULL) {
+			PF_ACPY(&pd->baddr, daddr, af);
+			pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
+			    &th->th_sum, &pd->naddr, nport, 0, af);
+			rewrite++;
+			if (nr->natpass)
+				r = NULL;
+			pd->nat_rule = nr;
+		}
+	}
+
+	while (r != NULL) {
+		r->evaluations++;
+		if (pfi_kif_match(r->kif, kif) == r->ifnot)
+			r = r->skip[PF_SKIP_IFP].ptr;
+		else if (r->direction && r->direction != direction)
+			r = r->skip[PF_SKIP_DIR].ptr;
+		else if (r->af && r->af != af)
+			r = r->skip[PF_SKIP_AF].ptr;
+		else if (r->proto && r->proto != IPPROTO_TCP)
+			r = r->skip[PF_SKIP_PROTO].ptr;
+		else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
+		    r->src.neg, kif))
+			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
+		else if (r->src.port_op && !pf_match_port(r->src.port_op,
+		    r->src.port[0], r->src.port[1], th->th_sport))
+			r = r->skip[PF_SKIP_SRC_PORT].ptr;
+		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
+		    r->dst.neg, NULL))
+			r = r->skip[PF_SKIP_DST_ADDR].ptr;
+		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
+		    r->dst.port[0], r->dst.port[1], th->th_dport))
+			r = r->skip[PF_SKIP_DST_PORT].ptr;
+		else if (r->tos && !(r->tos == pd->tos))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->rule_flag & PFRULE_FRAGMENT)
+			r = TAILQ_NEXT(r, entries);
+		else if ((r->flagset & th->th_flags) != r->flags)
+			r = TAILQ_NEXT(r, entries);
+		else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
+#ifdef __FreeBSD__
+		    pf_socket_lookup(direction, pd, inp), 1)) &&
+#else
+		    pf_socket_lookup(direction, pd), 1)) &&
+#endif
+		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
+		    pd->lookup.uid))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
+#ifdef __FreeBSD__
+		    pf_socket_lookup(direction, pd, inp), 1)) &&
+#else
+		    pf_socket_lookup(direction, pd), 1)) &&
+#endif
+		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
+		    pd->lookup.gid))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->prob && r->prob <= arc4random())
+			r = TAILQ_NEXT(r, entries);
+		else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
+		    pf_osfp_fingerprint(pd, m, off, th), r->os_fingerprint))
+			r = TAILQ_NEXT(r, entries);
+		else {
+			if (r->tag)
+				tag = r->tag;
+			if (r->rtableid >= 0)
+				rtableid = r->rtableid;
+			if (r->anchor == NULL) {
+				match = 1;
+				*rm = r;
+				*am = a;
+				*rsm = ruleset;
+				if ((*rm)->quick)
+					break;
+				r = TAILQ_NEXT(r, entries);
+			} else
+				pf_step_into_anchor(&asd, &ruleset,
+				    PF_RULESET_FILTER, &r, &a, &match);
+		}
+		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
+		    PF_RULESET_FILTER, &r, &a, &match))
+			break;
+	}
+	r = *rm;
+	a = *am;
+	ruleset = *rsm;
+
+	REASON_SET(&reason, PFRES_MATCH);
+
+	if (r->log || (nr != NULL && nr->natpass && nr->log)) {
+		if (rewrite)
+#ifdef __FreeBSD__
+			m_copyback(m, off, sizeof(*th), (caddr_t)th);
+#else
+			m_copyback(m, off, sizeof(*th), th);
+#endif
+		PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
+		    a, ruleset, pd);
+	}
+
+	if ((r->action == PF_DROP) &&
+	    ((r->rule_flag & PFRULE_RETURNRST) ||
+	    (r->rule_flag & PFRULE_RETURNICMP) ||
+	    (r->rule_flag & PFRULE_RETURN))) {
+		/* undo NAT changes, if they have taken place */
+		if (nr != NULL) {
+			if (direction == PF_OUT) {
+				pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
+				    &th->th_sum, &pd->baddr, bport, 0, af);
+				rewrite++;
+			} else {
+				pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
+				    &th->th_sum, &pd->baddr, bport, 0, af);
+				rewrite++;
+			}
+		}
+		if (((r->rule_flag & PFRULE_RETURNRST) ||
+		    (r->rule_flag & PFRULE_RETURN)) &&
+		    !(th->th_flags & TH_RST)) {
+			u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
+
+			if (th->th_flags & TH_SYN)
+				ack++;
+			if (th->th_flags & TH_FIN)
+				ack++;
+#ifdef __FreeBSD__
+			pf_send_tcp(m, r, af, pd->dst,
+#else
+			pf_send_tcp(r, af, pd->dst,
+#endif
+			    pd->src, th->th_dport, th->th_sport,
+			    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
+			    r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp);
+		} else if ((af == AF_INET) && r->return_icmp)
+			pf_send_icmp(m, r->return_icmp >> 8,
+			    r->return_icmp & 255, af, r);
+		else if ((af == AF_INET6) && r->return_icmp6)
+			pf_send_icmp(m, r->return_icmp6 >> 8,
+			    r->return_icmp6 & 255, af, r);
+	}
+
+	if (r->action == PF_DROP)
+		return (PF_DROP);
+
+	if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) {
+		REASON_SET(&reason, PFRES_MEMORY);
+		return (PF_DROP);
+	}
+
+	if (r->keep_state || nr != NULL ||
+	    (pd->flags & PFDESC_TCP_NORM)) {
+		/* create new state */
+		u_int16_t	 len;
+		struct pf_state	*s = NULL;
+		struct pf_src_node *sn = NULL;
+
+		len = pd->tot_len - off - (th->th_off << 2);
+
+		/* check maximums */
+		if (r->max_states && (r->states >= r->max_states)) {
+			pf_status.lcounters[LCNT_STATES]++;
+			REASON_SET(&reason, PFRES_MAXSTATES);
+			goto cleanup;
+		}
+		/* src node for filter rule */
+		if ((r->rule_flag & PFRULE_SRCTRACK ||
+		    r->rpool.opts & PF_POOL_STICKYADDR) &&
+		    pf_insert_src_node(&sn, r, saddr, af) != 0) {
+			REASON_SET(&reason, PFRES_SRCLIMIT);
+			goto cleanup;
+		}
+		/* src node for translation rule */
+		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
+		    ((direction == PF_OUT &&
+		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
+		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
+			REASON_SET(&reason, PFRES_SRCLIMIT);
+			goto cleanup;
+		}
+		s = pool_get(&pf_state_pl, PR_NOWAIT);
+		if (s == NULL) {
+			REASON_SET(&reason, PFRES_MEMORY);
+cleanup:
+			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
+				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
+				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
+				pf_status.src_nodes--;
+				pool_put(&pf_src_tree_pl, sn);
+			}
+			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
+			    nsn->expire == 0) {
+				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
+				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
+				pf_status.src_nodes--;
+				pool_put(&pf_src_tree_pl, nsn);
+			}
+			return (PF_DROP);
+		}
+		bzero(s, sizeof(*s));
+		s->rule.ptr = r;
+		s->nat_rule.ptr = nr;
+		s->anchor.ptr = a;
+		STATE_INC_COUNTERS(s);
+		if (r->allow_opts)
+			s->state_flags |= PFSTATE_ALLOWOPTS;
+		if (r->rule_flag & PFRULE_STATESLOPPY)
+			s->state_flags |= PFSTATE_SLOPPY;
+		s->log = r->log & PF_LOG_ALL;
+		if (nr != NULL)
+			s->log |= nr->log & PF_LOG_ALL;
+		s->proto = IPPROTO_TCP;
+		s->direction = direction;
+		s->af = af;
+		if (direction == PF_OUT) {
+			PF_ACPY(&s->gwy.addr, saddr, af);
+			s->gwy.port = th->th_sport;		/* sport */
+			PF_ACPY(&s->ext.addr, daddr, af);
+			s->ext.port = th->th_dport;
+			if (nr != NULL) {
+				PF_ACPY(&s->lan.addr, &pd->baddr, af);
+				s->lan.port = bport;
+			} else {
+				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
+				s->lan.port = s->gwy.port;
+			}
+		} else {
+			PF_ACPY(&s->lan.addr, daddr, af);
+			s->lan.port = th->th_dport;
+			PF_ACPY(&s->ext.addr, saddr, af);
+			s->ext.port = th->th_sport;
+			if (nr != NULL) {
+				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
+				s->gwy.port = bport;
+			} else {
+				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
+				s->gwy.port = s->lan.port;
+			}
+		}
+
+		s->src.seqlo = ntohl(th->th_seq);
+		s->src.seqhi = s->src.seqlo + len + 1;
+		if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
+		    r->keep_state == PF_STATE_MODULATE) {
+			/* Generate sequence number modulator */
+#ifdef __FreeBSD__
+			while ((s->src.seqdiff =
+			    pf_new_isn(s) - s->src.seqlo) == 0)
+				;	
+#else
+			while ((s->src.seqdiff =
+			    tcp_rndiss_next() - s->src.seqlo) == 0)
+				;
+#endif
+			pf_change_a(&th->th_seq, &th->th_sum,
+			    htonl(s->src.seqlo + s->src.seqdiff), 0);
+			rewrite = 1;
+		} else
+			s->src.seqdiff = 0;
+		if (th->th_flags & TH_SYN) {
+			s->src.seqhi++;
+			s->src.wscale = pf_get_wscale(m, off, th->th_off, af);
+		}
+		s->src.max_win = MAX(ntohs(th->th_win), 1);
+		if (s->src.wscale & PF_WSCALE_MASK) {
+			/* Remove scale factor from initial window */
+			int win = s->src.max_win;
+			win += 1 << (s->src.wscale & PF_WSCALE_MASK);
+			s->src.max_win = (win - 1) >>
+			    (s->src.wscale & PF_WSCALE_MASK);
+		}
+		if (th->th_flags & TH_FIN)
+			s->src.seqhi++;
+		s->dst.seqhi = 1;
+		s->dst.max_win = 1;
+		s->src.state = TCPS_SYN_SENT;
+		s->dst.state = TCPS_CLOSED;
+		s->creation = time_second;
+		s->expire = time_second;
+		s->timeout = PFTM_TCP_FIRST_PACKET;
+		pf_set_rt_ifp(s, saddr);
+		if (sn != NULL) {
+			s->src_node = sn;
+			s->src_node->states++;
+		}
+		if (nsn != NULL) {
+			PF_ACPY(&nsn->raddr, &pd->naddr, af);
+			s->nat_src_node = nsn;
+			s->nat_src_node->states++;
+		}
+		if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
+		    off, pd, th, &s->src, &s->dst)) {
+			REASON_SET(&reason, PFRES_MEMORY);
+			pf_src_tree_remove_state(s);
+			STATE_DEC_COUNTERS(s);
+			pool_put(&pf_state_pl, s);
+			return (PF_DROP);
+		}
+		if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
+		    pf_normalize_tcp_stateful(m, off, pd, &reason, th, s,
+		    &s->src, &s->dst, &rewrite)) {
+			/* This really shouldn't happen!!! */
+			DPFPRINTF(PF_DEBUG_URGENT,
+			    ("pf_normalize_tcp_stateful failed on first pkt"));
+			pf_normalize_tcp_cleanup(s);
+			pf_src_tree_remove_state(s);
+			STATE_DEC_COUNTERS(s);
+			pool_put(&pf_state_pl, s);
+			return (PF_DROP);
+		}
+		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
+			pf_normalize_tcp_cleanup(s);
+			REASON_SET(&reason, PFRES_STATEINS);
+			pf_src_tree_remove_state(s);
+			STATE_DEC_COUNTERS(s);
+			pool_put(&pf_state_pl, s);
+			return (PF_DROP);
+		} else
+			*sm = s;
+		if (tag > 0) {
+			pf_tag_ref(tag);
+			s->tag = tag;
+		}
+		if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
+		    r->keep_state == PF_STATE_SYNPROXY) {
+			s->src.state = PF_TCPS_PROXY_SRC;
+			if (nr != NULL) {
+				if (direction == PF_OUT) {
+					pf_change_ap(saddr, &th->th_sport,
+					    pd->ip_sum, &th->th_sum, &pd->baddr,
+					    bport, 0, af);
+				} else {
+					pf_change_ap(daddr, &th->th_dport,
+					    pd->ip_sum, &th->th_sum, &pd->baddr,
+					    bport, 0, af);
+				}
+			}
+			s->src.seqhi = htonl(arc4random());
+			/* Find mss option */
+			mss = pf_get_mss(m, off, th->th_off, af);
+			mss = pf_calc_mss(saddr, af, mss);
+			mss = pf_calc_mss(daddr, af, mss);
+			s->src.mss = mss;
+#ifdef __FreeBSD__
+			pf_send_tcp(NULL, r, af, daddr, saddr, th->th_dport,
+#else
+			pf_send_tcp(r, af, daddr, saddr, th->th_dport,
+#endif
+			    th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
+			    TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL);
+			REASON_SET(&reason, PFRES_SYNPROXY);
+			return (PF_SYNPROXY_DROP);
+		}
+	}
+
+	/* copy back packet headers if we performed NAT operations */
+	if (rewrite)
+		m_copyback(m, off, sizeof(*th), (caddr_t)th);
+
+	return (PF_PASS);
+}
+
+int
+pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction,
+    struct pfi_kif *kif, struct mbuf *m, int off, void *h,
+#ifdef __FreeBSD__
+    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
+    struct ifqueue *ifq, struct inpcb *inp)
+#else
+    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
+    struct ifqueue *ifq)
+#endif
+{
+	struct pf_rule		*nr = NULL;
+	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
+	struct udphdr		*uh = pd->hdr.udp;
+	u_int16_t		 bport, nport = 0;
+	sa_family_t		 af = pd->af;
+	struct pf_rule		*r, *a = NULL;
+	struct pf_ruleset	*ruleset = NULL;
+	struct pf_src_node	*nsn = NULL;
+	u_short			 reason;
+	int			 rewrite = 0;
+	int			 tag = -1, rtableid = -1;
+	int			 asd = 0;
+	int			 match = 0;
+
+	if (pf_check_congestion(ifq)) {
+		REASON_SET(&reason, PFRES_CONGEST);
+		return (PF_DROP);
+	}
+
+#ifdef __FreeBSD__
+	if (inp != NULL)
+		pd->lookup.done = pf_socket_lookup(direction, pd, inp);
+	else if (debug_pfugidhack) {
+		PF_UNLOCK();
+		DPFPRINTF(PF_DEBUG_MISC, ("pf: unlocked lookup\n"));
+		pd->lookup.done = pf_socket_lookup(direction, pd, inp);
+		PF_LOCK();
+	}
+#endif
+
+	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
+
+	if (direction == PF_OUT) {
+		bport = nport = uh->uh_sport;
+		/* check outgoing packet for BINAT/NAT */
+		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
+		    saddr, uh->uh_sport, daddr, uh->uh_dport,
+		    &pd->naddr, &nport)) != NULL) {
+			PF_ACPY(&pd->baddr, saddr, af);
+			pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum,
+			    &uh->uh_sum, &pd->naddr, nport, 1, af);
+			rewrite++;
+			if (nr->natpass)
+				r = NULL;
+			pd->nat_rule = nr;
+		}
+	} else {
+		bport = nport = uh->uh_dport;
+		/* check incoming packet for BINAT/RDR */
+		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
+		    saddr, uh->uh_sport, daddr, uh->uh_dport, &pd->naddr,
+		    &nport)) != NULL) {
+			PF_ACPY(&pd->baddr, daddr, af);
+			pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum,
+			    &uh->uh_sum, &pd->naddr, nport, 1, af);
+			rewrite++;
+			if (nr->natpass)
+				r = NULL;
+			pd->nat_rule = nr;
+		}
+	}
+
+	while (r != NULL) {
+		r->evaluations++;
+		if (pfi_kif_match(r->kif, kif) == r->ifnot)
+			r = r->skip[PF_SKIP_IFP].ptr;
+		else if (r->direction && r->direction != direction)
+			r = r->skip[PF_SKIP_DIR].ptr;
+		else if (r->af && r->af != af)
+			r = r->skip[PF_SKIP_AF].ptr;
+		else if (r->proto && r->proto != IPPROTO_UDP)
+			r = r->skip[PF_SKIP_PROTO].ptr;
+		else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
+		    r->src.neg, kif))
+			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
+		else if (r->src.port_op && !pf_match_port(r->src.port_op,
+		    r->src.port[0], r->src.port[1], uh->uh_sport))
+			r = r->skip[PF_SKIP_SRC_PORT].ptr;
+		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
+		    r->dst.neg, NULL))
+			r = r->skip[PF_SKIP_DST_ADDR].ptr;
+		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
+		    r->dst.port[0], r->dst.port[1], uh->uh_dport))
+			r = r->skip[PF_SKIP_DST_PORT].ptr;
+		else if (r->tos && !(r->tos == pd->tos))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->rule_flag & PFRULE_FRAGMENT)
+			r = TAILQ_NEXT(r, entries);
+		else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
+#ifdef __FreeBSD__
+		    pf_socket_lookup(direction, pd, inp), 1)) &&
+#else
+		    pf_socket_lookup(direction, pd), 1)) &&
+#endif
+		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
+		    pd->lookup.uid))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
+#ifdef __FreeBSD__
+		    pf_socket_lookup(direction, pd, inp), 1)) &&
+#else
+		    pf_socket_lookup(direction, pd), 1)) &&
+#endif
+		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
+		    pd->lookup.gid))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->prob && r->prob <= arc4random())
+			r = TAILQ_NEXT(r, entries);
+		else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->os_fingerprint != PF_OSFP_ANY)
+			r = TAILQ_NEXT(r, entries);
+		else {
+			if (r->tag)
+				tag = r->tag;
+			if (r->rtableid >= 0)
+				rtableid = r->rtableid;
+			if (r->anchor == NULL) {
+				match = 1;
+				*rm = r;
+				*am = a;
+				*rsm = ruleset;
+				if ((*rm)->quick)
+					break;
+				r = TAILQ_NEXT(r, entries);
+			} else
+				pf_step_into_anchor(&asd, &ruleset,
+				    PF_RULESET_FILTER, &r, &a, &match);
+		}
+		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
+		    PF_RULESET_FILTER, &r, &a, &match))
+			break;
+	}
+	r = *rm;
+	a = *am;
+	ruleset = *rsm;
+
+	REASON_SET(&reason, PFRES_MATCH);
+
+	if (r->log || (nr != NULL && nr->natpass && nr->log)) {
+		if (rewrite)
+#ifdef __FreeBSD__
+			m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
+#else
+			m_copyback(m, off, sizeof(*uh), uh);
+#endif
+		PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
+		    a, ruleset, pd);
+	}
+
+	if ((r->action == PF_DROP) &&
+	    ((r->rule_flag & PFRULE_RETURNICMP) ||
+	    (r->rule_flag & PFRULE_RETURN))) {
+		/* undo NAT changes, if they have taken place */
+		if (nr != NULL) {
+			if (direction == PF_OUT) {
+				pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum,
+				    &uh->uh_sum, &pd->baddr, bport, 1, af);
+				rewrite++;
+			} else {
+				pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum,
+				    &uh->uh_sum, &pd->baddr, bport, 1, af);
+				rewrite++;
+			}
+		}
+		if ((af == AF_INET) && r->return_icmp)
+			pf_send_icmp(m, r->return_icmp >> 8,
+			    r->return_icmp & 255, af, r);
+		else if ((af == AF_INET6) && r->return_icmp6)
+			pf_send_icmp(m, r->return_icmp6 >> 8,
+			    r->return_icmp6 & 255, af, r);
+	}
+
+	if (r->action == PF_DROP)
+		return (PF_DROP);
+
+	if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) {
+		REASON_SET(&reason, PFRES_MEMORY);
+		return (PF_DROP);
+	}
+
+	if (r->keep_state || nr != NULL) {
+		/* create new state */
+		struct pf_state	*s = NULL;
+		struct pf_src_node *sn = NULL;
+
+		/* check maximums */
+		if (r->max_states && (r->states >= r->max_states)) {
+			pf_status.lcounters[LCNT_STATES]++;
+			REASON_SET(&reason, PFRES_MAXSTATES);
+			goto cleanup;
+		}
+		/* src node for filter rule */
+		if ((r->rule_flag & PFRULE_SRCTRACK ||
+		    r->rpool.opts & PF_POOL_STICKYADDR) &&
+		    pf_insert_src_node(&sn, r, saddr, af) != 0) {
+			REASON_SET(&reason, PFRES_SRCLIMIT);
+			goto cleanup;
+		}
+		/* src node for translation rule */
+		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
+		    ((direction == PF_OUT &&
+		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
+		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
+			REASON_SET(&reason, PFRES_SRCLIMIT);
+			goto cleanup;
+		}
+		s = pool_get(&pf_state_pl, PR_NOWAIT);
+		if (s == NULL) {
+			REASON_SET(&reason, PFRES_MEMORY);
+cleanup:
+			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
+				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
+				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
+				pf_status.src_nodes--;
+				pool_put(&pf_src_tree_pl, sn);
+			}
+			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
+			    nsn->expire == 0) {
+				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
+				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
+				pf_status.src_nodes--;
+				pool_put(&pf_src_tree_pl, nsn);
+			}
+			return (PF_DROP);
+		}
+		bzero(s, sizeof(*s));
+		s->rule.ptr = r;
+		s->nat_rule.ptr = nr;
+		s->anchor.ptr = a;
+		STATE_INC_COUNTERS(s);
+		if (r->allow_opts)
+			s->state_flags |= PFSTATE_ALLOWOPTS;
+ 		if (r->rule_flag & PFRULE_STATESLOPPY)
+			s->state_flags |= PFSTATE_SLOPPY;
+		s->log = r->log & PF_LOG_ALL;
+		if (nr != NULL)
+			s->log |= nr->log & PF_LOG_ALL;
+		s->proto = IPPROTO_UDP;
+		s->direction = direction;
+		s->af = af;
+		if (direction == PF_OUT) {
+			PF_ACPY(&s->gwy.addr, saddr, af);
+			s->gwy.port = uh->uh_sport;
+			PF_ACPY(&s->ext.addr, daddr, af);
+			s->ext.port = uh->uh_dport;
+			if (nr != NULL) {
+				PF_ACPY(&s->lan.addr, &pd->baddr, af);
+				s->lan.port = bport;
+			} else {
+				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
+				s->lan.port = s->gwy.port;
+			}
+		} else {
+			PF_ACPY(&s->lan.addr, daddr, af);
+			s->lan.port = uh->uh_dport;
+			PF_ACPY(&s->ext.addr, saddr, af);
+			s->ext.port = uh->uh_sport;
+			if (nr != NULL) {
+				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
+				s->gwy.port = bport;
+			} else {
+				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
+				s->gwy.port = s->lan.port;
+			}
+		}
+		s->src.state = PFUDPS_SINGLE;
+		s->dst.state = PFUDPS_NO_TRAFFIC;
+		s->creation = time_second;
+		s->expire = time_second;
+		s->timeout = PFTM_UDP_FIRST_PACKET;
+		pf_set_rt_ifp(s, saddr);
+		if (sn != NULL) {
+			s->src_node = sn;
+			s->src_node->states++;
+		}
+		if (nsn != NULL) {
+			PF_ACPY(&nsn->raddr, &pd->naddr, af);
+			s->nat_src_node = nsn;
+			s->nat_src_node->states++;
+		}
+		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
+			REASON_SET(&reason, PFRES_STATEINS);
+			pf_src_tree_remove_state(s);
+			STATE_DEC_COUNTERS(s);
+			pool_put(&pf_state_pl, s);
+			return (PF_DROP);
+		} else
+			*sm = s;
+		if (tag > 0) {
+			pf_tag_ref(tag);
+			s->tag = tag;
+		}
+	}
+
+	/* copy back packet headers if we performed NAT operations */
+	if (rewrite)
+		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
+
+	return (PF_PASS);
+}
+
+int
+pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction,
+    struct pfi_kif *kif, struct mbuf *m, int off, void *h,
+    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
+    struct ifqueue *ifq)
+{
+	struct pf_rule		*nr = NULL;
+	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
+	struct pf_rule		*r, *a = NULL;
+	struct pf_ruleset	*ruleset = NULL;
+	struct pf_src_node	*nsn = NULL;
+	u_short			 reason;
+	u_int16_t		 icmpid = 0, bport, nport = 0;
+	sa_family_t		 af = pd->af;
+	u_int8_t		 icmptype = 0;	/* make the compiler happy */
+	u_int8_t		 icmpcode = 0;	/* make the compiler happy */
+	int			 state_icmp = 0;
+	int			 tag = -1, rtableid = -1;
+#ifdef INET6
+	int			 rewrite = 0;
+#endif /* INET6 */
+	int			 asd = 0;
+	int			 match = 0;
+
+	if (pf_check_congestion(ifq)) {
+		REASON_SET(&reason, PFRES_CONGEST);
+		return (PF_DROP);
+	}
+
+	switch (pd->proto) {
+#ifdef INET
+	case IPPROTO_ICMP:
+		icmptype = pd->hdr.icmp->icmp_type;
+		icmpcode = pd->hdr.icmp->icmp_code;
+		icmpid = pd->hdr.icmp->icmp_id;
+
+		if (icmptype == ICMP_UNREACH ||
+		    icmptype == ICMP_SOURCEQUENCH ||
+		    icmptype == ICMP_REDIRECT ||
+		    icmptype == ICMP_TIMXCEED ||
+		    icmptype == ICMP_PARAMPROB)
+			state_icmp++;
+		break;
+#endif /* INET */
+#ifdef INET6
+	case IPPROTO_ICMPV6:
+		icmptype = pd->hdr.icmp6->icmp6_type;
+		icmpcode = pd->hdr.icmp6->icmp6_code;
+		icmpid = pd->hdr.icmp6->icmp6_id;
+
+		if (icmptype == ICMP6_DST_UNREACH ||
+		    icmptype == ICMP6_PACKET_TOO_BIG ||
+		    icmptype == ICMP6_TIME_EXCEEDED ||
+		    icmptype == ICMP6_PARAM_PROB)
+			state_icmp++;
+		break;
+#endif /* INET6 */
+	}
+
+	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
+
+	if (direction == PF_OUT) {
+		bport = nport = icmpid;
+		/* check outgoing packet for BINAT/NAT */
+		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
+		    saddr, icmpid, daddr, icmpid, &pd->naddr, &nport)) !=
+		    NULL) {
+			PF_ACPY(&pd->baddr, saddr, af);
+			switch (af) {
+#ifdef INET
+			case AF_INET:
+				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
+				    pd->naddr.v4.s_addr, 0);
+				pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
+				    pd->hdr.icmp->icmp_cksum, icmpid, nport, 0);
+				pd->hdr.icmp->icmp_id = nport;
+				m_copyback(m, off, ICMP_MINLEN,
+				    (caddr_t)pd->hdr.icmp);
+				break;
+#endif /* INET */
+#ifdef INET6
+			case AF_INET6:
+				pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
+				    &pd->naddr, 0);
+				rewrite++;
+				break;
+#endif /* INET6 */
+			}
+			if (nr->natpass)
+				r = NULL;
+			pd->nat_rule = nr;
+		}
+	} else {
+		bport = nport = icmpid;
+		/* check incoming packet for BINAT/RDR */
+		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
+		    saddr, icmpid, daddr, icmpid, &pd->naddr, &nport)) !=
+		    NULL) {
+			PF_ACPY(&pd->baddr, daddr, af);
+			switch (af) {
+#ifdef INET
+			case AF_INET:
+				pf_change_a(&daddr->v4.s_addr,
+				    pd->ip_sum, pd->naddr.v4.s_addr, 0);
+				break;
+#endif /* INET */
+#ifdef INET6
+			case AF_INET6:
+				pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
+				    &pd->naddr, 0);
+				rewrite++;
+				break;
+#endif /* INET6 */
+			}
+			if (nr->natpass)
+				r = NULL;
+			pd->nat_rule = nr;
+		}
+	}
+
+	while (r != NULL) {
+		r->evaluations++;
+		if (pfi_kif_match(r->kif, kif) == r->ifnot)
+			r = r->skip[PF_SKIP_IFP].ptr;
+		else if (r->direction && r->direction != direction)
+			r = r->skip[PF_SKIP_DIR].ptr;
+		else if (r->af && r->af != af)
+			r = r->skip[PF_SKIP_AF].ptr;
+		else if (r->proto && r->proto != pd->proto)
+			r = r->skip[PF_SKIP_PROTO].ptr;
+		else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
+		    r->src.neg, kif))
+			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
+		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
+		    r->dst.neg, NULL))
+			r = r->skip[PF_SKIP_DST_ADDR].ptr;
+		else if (r->type && r->type != icmptype + 1)
+			r = TAILQ_NEXT(r, entries);
+		else if (r->code && r->code != icmpcode + 1)
+			r = TAILQ_NEXT(r, entries);
+		else if (r->tos && !(r->tos == pd->tos))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->rule_flag & PFRULE_FRAGMENT)
+			r = TAILQ_NEXT(r, entries);
+		else if (r->prob && r->prob <= arc4random())
+			r = TAILQ_NEXT(r, entries);
+		else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->os_fingerprint != PF_OSFP_ANY)
+			r = TAILQ_NEXT(r, entries);
+		else {
+			if (r->tag)
+				tag = r->tag;
+			if (r->rtableid >= 0)
+				rtableid = r->rtableid;
+			if (r->anchor == NULL) {
+				match = 1;
+				*rm = r;
+				*am = a;
+				*rsm = ruleset;
+				if ((*rm)->quick)
+					break;
+				r = TAILQ_NEXT(r, entries);
+			} else
+				pf_step_into_anchor(&asd, &ruleset,
+				    PF_RULESET_FILTER, &r, &a, &match);
+		}
+		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
+		    PF_RULESET_FILTER, &r, &a, &match))
+			break;
+	}
+	r = *rm;
+	a = *am;
+	ruleset = *rsm;
+
+	REASON_SET(&reason, PFRES_MATCH);
+
+	if (r->log || (nr != NULL && nr->natpass && nr->log)) {
+#ifdef INET6
+		if (rewrite)
+			m_copyback(m, off, sizeof(struct icmp6_hdr),
+			    (caddr_t)pd->hdr.icmp6);
+#endif /* INET6 */
+		PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
+		    a, ruleset, pd);
+	}
+
+	if (r->action != PF_PASS)
+		return (PF_DROP);
+
+	if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) {
+		REASON_SET(&reason, PFRES_MEMORY);
+		return (PF_DROP);
+	}
+
+	if (!state_icmp && (r->keep_state || nr != NULL)) {
+		/* create new state */
+		struct pf_state	*s = NULL;
+		struct pf_src_node *sn = NULL;
+
+		/* check maximums */
+		if (r->max_states && (r->states >= r->max_states)) {
+			pf_status.lcounters[LCNT_STATES]++;
+			REASON_SET(&reason, PFRES_MAXSTATES);
+			goto cleanup;
+		}
+		/* src node for filter rule */
+		if ((r->rule_flag & PFRULE_SRCTRACK ||
+		    r->rpool.opts & PF_POOL_STICKYADDR) &&
+		    pf_insert_src_node(&sn, r, saddr, af) != 0) {
+			REASON_SET(&reason, PFRES_SRCLIMIT);
+			goto cleanup;
+		}
+		/* src node for translation rule */
+		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
+		    ((direction == PF_OUT &&
+		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
+		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
+			REASON_SET(&reason, PFRES_SRCLIMIT);
+			goto cleanup;
+		}
+		s = pool_get(&pf_state_pl, PR_NOWAIT);
+		if (s == NULL) {
+			REASON_SET(&reason, PFRES_MEMORY);
+cleanup:
+			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
+				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
+				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
+				pf_status.src_nodes--;
+				pool_put(&pf_src_tree_pl, sn);
+			}
+			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
+			    nsn->expire == 0) {
+				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
+				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
+				pf_status.src_nodes--;
+				pool_put(&pf_src_tree_pl, nsn);
+			}
+			return (PF_DROP);
+		}
+		bzero(s, sizeof(*s));
+		s->rule.ptr = r;
+		s->nat_rule.ptr = nr;
+		s->anchor.ptr = a;
+		STATE_INC_COUNTERS(s);
+		if (r->allow_opts)
+			s->state_flags |= PFSTATE_ALLOWOPTS;
+ 		if (r->rule_flag & PFRULE_STATESLOPPY)
+			s->state_flags |= PFSTATE_SLOPPY;
+		s->log = r->log & PF_LOG_ALL;
+		if (nr != NULL)
+			s->log |= nr->log & PF_LOG_ALL;
+		s->proto = pd->proto;
+		s->direction = direction;
+		s->af = af;
+		if (direction == PF_OUT) {
+			PF_ACPY(&s->gwy.addr, saddr, af);
+			s->gwy.port = nport;
+			PF_ACPY(&s->ext.addr, daddr, af);
+			s->ext.port = 0;
+			if (nr != NULL) {
+				PF_ACPY(&s->lan.addr, &pd->baddr, af);
+				s->lan.port = bport;
+			} else {
+				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
+				s->lan.port = s->gwy.port;
+			}
+		} else {
+			PF_ACPY(&s->lan.addr, daddr, af);
+			s->lan.port = nport;
+			PF_ACPY(&s->ext.addr, saddr, af);
+			s->ext.port = 0; 
+			if (nr != NULL) {
+				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
+				s->gwy.port = bport;
+			} else {
+				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
+				s->gwy.port = s->lan.port;
+			}
+		}
+		s->creation = time_second;
+		s->expire = time_second;
+		s->timeout = PFTM_ICMP_FIRST_PACKET;
+		pf_set_rt_ifp(s, saddr);
+		if (sn != NULL) {
+			s->src_node = sn;
+			s->src_node->states++;
+		}
+		if (nsn != NULL) {
+			PF_ACPY(&nsn->raddr, &pd->naddr, af);
+			s->nat_src_node = nsn;
+			s->nat_src_node->states++;
+		}
+		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
+			REASON_SET(&reason, PFRES_STATEINS);
+			pf_src_tree_remove_state(s);
+			STATE_DEC_COUNTERS(s);
+			pool_put(&pf_state_pl, s);
+			return (PF_DROP);
+		} else
+			*sm = s;
+		if (tag > 0) {
+			pf_tag_ref(tag);
+			s->tag = tag;
+		}
+	}
+
+#ifdef INET6
+	/* copy back packet headers if we performed IPv6 NAT operations */
+	if (rewrite)
+		m_copyback(m, off, sizeof(struct icmp6_hdr),
+		    (caddr_t)pd->hdr.icmp6);
+#endif /* INET6 */
+
+	return (PF_PASS);
+}
+
+int
+pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction,
+    struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
+    struct pf_rule **am, struct pf_ruleset **rsm, struct ifqueue *ifq)
+{
+	struct pf_rule		*nr = NULL;
+	struct pf_rule		*r, *a = NULL;
+	struct pf_ruleset	*ruleset = NULL;
+	struct pf_src_node	*nsn = NULL;
+	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
+	sa_family_t		 af = pd->af;
+	u_short			 reason;
+	int			 tag = -1, rtableid = -1;
+	int			 asd = 0;
+	int			 match = 0;
+
+	if (pf_check_congestion(ifq)) {
+		REASON_SET(&reason, PFRES_CONGEST);
+		return (PF_DROP);
+	}
+
+	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
+
+	if (direction == PF_OUT) {
+		/* check outgoing packet for BINAT/NAT */
+		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
+		    saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
+			PF_ACPY(&pd->baddr, saddr, af);
+			switch (af) {
+#ifdef INET
+			case AF_INET:
+				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
+				    pd->naddr.v4.s_addr, 0);
+				break;
+#endif /* INET */
+#ifdef INET6
+			case AF_INET6:
+				PF_ACPY(saddr, &pd->naddr, af);
+				break;
+#endif /* INET6 */
+			}
+			if (nr->natpass)
+				r = NULL;
+			pd->nat_rule = nr;
+		}
+	} else {
+		/* check incoming packet for BINAT/RDR */
+		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
+		    saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
+			PF_ACPY(&pd->baddr, daddr, af);
+			switch (af) {
+#ifdef INET
+			case AF_INET:
+				pf_change_a(&daddr->v4.s_addr,
+				    pd->ip_sum, pd->naddr.v4.s_addr, 0);
+				break;
+#endif /* INET */
+#ifdef INET6
+			case AF_INET6:
+				PF_ACPY(daddr, &pd->naddr, af);
+				break;
+#endif /* INET6 */
+			}
+			if (nr->natpass)
+				r = NULL;
+			pd->nat_rule = nr;
+		}
+	}
+
+	while (r != NULL) {
+		r->evaluations++;
+		if (pfi_kif_match(r->kif, kif) == r->ifnot)
+			r = r->skip[PF_SKIP_IFP].ptr;
+		else if (r->direction && r->direction != direction)
+			r = r->skip[PF_SKIP_DIR].ptr;
+		else if (r->af && r->af != af)
+			r = r->skip[PF_SKIP_AF].ptr;
+		else if (r->proto && r->proto != pd->proto)
+			r = r->skip[PF_SKIP_PROTO].ptr;
+		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
+		    r->src.neg, kif))
+			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
+		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
+		    r->dst.neg, NULL))
+			r = r->skip[PF_SKIP_DST_ADDR].ptr;
+		else if (r->tos && !(r->tos == pd->tos))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->rule_flag & PFRULE_FRAGMENT)
+			r = TAILQ_NEXT(r, entries);
+		else if (r->prob && r->prob <= arc4random())
+			r = TAILQ_NEXT(r, entries);
+		else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->os_fingerprint != PF_OSFP_ANY)
+			r = TAILQ_NEXT(r, entries);
+		else {
+			if (r->tag)
+				tag = r->tag;
+			if (r->rtableid >= 0)
+				rtableid = r->rtableid;
+			if (r->anchor == NULL) {
+				match = 1;
+				*rm = r;
+				*am = a;
+				*rsm = ruleset;
+				if ((*rm)->quick)
+					break;
+				r = TAILQ_NEXT(r, entries);
+			} else
+				pf_step_into_anchor(&asd, &ruleset,
+				    PF_RULESET_FILTER, &r, &a, &match);
+		}
+		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
+		    PF_RULESET_FILTER, &r, &a, &match))
+			break;
+	}
+	r = *rm;
+	a = *am;
+	ruleset = *rsm;
+
+	REASON_SET(&reason, PFRES_MATCH);
+
+	if (r->log || (nr != NULL && nr->natpass && nr->log))
+		PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
+		    a, ruleset, pd);
+
+	if ((r->action == PF_DROP) &&
+	    ((r->rule_flag & PFRULE_RETURNICMP) ||
+	    (r->rule_flag & PFRULE_RETURN))) {
+		struct pf_addr *a = NULL;
+
+		if (nr != NULL) {
+			if (direction == PF_OUT)
+				a = saddr;
+			else
+				a = daddr;
+		}
+		if (a != NULL) {
+			switch (af) {
+#ifdef INET
+			case AF_INET:
+				pf_change_a(&a->v4.s_addr, pd->ip_sum,
+				    pd->baddr.v4.s_addr, 0);
+				break;
+#endif /* INET */
+#ifdef INET6
+			case AF_INET6:
+				PF_ACPY(a, &pd->baddr, af);
+				break;
+#endif /* INET6 */
+			}
+		}
+		if ((af == AF_INET) && r->return_icmp)
+			pf_send_icmp(m, r->return_icmp >> 8,
+			    r->return_icmp & 255, af, r);
+		else if ((af == AF_INET6) && r->return_icmp6)
+			pf_send_icmp(m, r->return_icmp6 >> 8,
+			    r->return_icmp6 & 255, af, r);
+	}
+
+	if (r->action != PF_PASS)
+		return (PF_DROP);
+
+	if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) {
+		REASON_SET(&reason, PFRES_MEMORY);
+		return (PF_DROP);
+	}
+
+	if (r->keep_state || nr != NULL) {
+		/* create new state */
+		struct pf_state	*s = NULL;
+		struct pf_src_node *sn = NULL;
+
+		/* check maximums */
+		if (r->max_states && (r->states >= r->max_states)) {
+			pf_status.lcounters[LCNT_STATES]++;
+			REASON_SET(&reason, PFRES_MAXSTATES);
+			goto cleanup;
+		}
+		/* src node for filter rule */
+		if ((r->rule_flag & PFRULE_SRCTRACK ||
+		    r->rpool.opts & PF_POOL_STICKYADDR) &&
+		    pf_insert_src_node(&sn, r, saddr, af) != 0) {
+			REASON_SET(&reason, PFRES_SRCLIMIT);
+			goto cleanup;
+		}
+		/* src node for translation rule */
+		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
+		    ((direction == PF_OUT &&
+		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
+		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
+			REASON_SET(&reason, PFRES_SRCLIMIT);
+			goto cleanup;
+		}
+		s = pool_get(&pf_state_pl, PR_NOWAIT);
+		if (s == NULL) {
+			REASON_SET(&reason, PFRES_MEMORY);
+cleanup:
+			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
+				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
+				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
+				pf_status.src_nodes--;
+				pool_put(&pf_src_tree_pl, sn);
+			}
+			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
+			    nsn->expire == 0) {
+				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
+				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
+				pf_status.src_nodes--;
+				pool_put(&pf_src_tree_pl, nsn);
+			}
+			return (PF_DROP);
+		}
+		bzero(s, sizeof(*s));
+		s->rule.ptr = r;
+		s->nat_rule.ptr = nr;
+		s->anchor.ptr = a;
+		STATE_INC_COUNTERS(s);
+		if (r->allow_opts)
+			s->state_flags |= PFSTATE_ALLOWOPTS;
+ 		if (r->rule_flag & PFRULE_STATESLOPPY)
+			s->state_flags |= PFSTATE_SLOPPY;
+		s->log = r->log & PF_LOG_ALL;
+		if (nr != NULL)
+			s->log |= nr->log & PF_LOG_ALL;
+		s->proto = pd->proto;
+		s->direction = direction;
+		s->af = af;
+		if (direction == PF_OUT) {
+			PF_ACPY(&s->gwy.addr, saddr, af);
+			PF_ACPY(&s->ext.addr, daddr, af);
+			if (nr != NULL)
+				PF_ACPY(&s->lan.addr, &pd->baddr, af);
+			else
+				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
+		} else {
+			PF_ACPY(&s->lan.addr, daddr, af);
+			PF_ACPY(&s->ext.addr, saddr, af);
+			if (nr != NULL)
+				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
+			else
+				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
+		}
+		s->src.state = PFOTHERS_SINGLE;
+		s->dst.state = PFOTHERS_NO_TRAFFIC;
+		s->creation = time_second;
+		s->expire = time_second;
+		s->timeout = PFTM_OTHER_FIRST_PACKET;
+		pf_set_rt_ifp(s, saddr);
+		if (sn != NULL) {
+			s->src_node = sn;
+			s->src_node->states++;
+		}
+		if (nsn != NULL) {
+			PF_ACPY(&nsn->raddr, &pd->naddr, af);
+			s->nat_src_node = nsn;
+			s->nat_src_node->states++;
+		}
+		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
+			REASON_SET(&reason, PFRES_STATEINS);
+			pf_src_tree_remove_state(s);
+			STATE_DEC_COUNTERS(s);
+			pool_put(&pf_state_pl, s);
+			return (PF_DROP);
+		} else
+			*sm = s;
+		if (tag > 0) {
+			pf_tag_ref(tag);
+			s->tag = tag;
+		}
+	}
+
+	return (PF_PASS);
+}
+
+int
+pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
+    struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
+    struct pf_ruleset **rsm)
+{
+	struct pf_rule		*r, *a = NULL;
+	struct pf_ruleset	*ruleset = NULL;
+	sa_family_t		 af = pd->af;
+	u_short			 reason;
+	int			 tag = -1;
+	int			 asd = 0;
+	int			 match = 0;
+
+	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
+	while (r != NULL) {
+		r->evaluations++;
+		if (pfi_kif_match(r->kif, kif) == r->ifnot)
+			r = r->skip[PF_SKIP_IFP].ptr;
+		else if (r->direction && r->direction != direction)
+			r = r->skip[PF_SKIP_DIR].ptr;
+		else if (r->af && r->af != af)
+			r = r->skip[PF_SKIP_AF].ptr;
+		else if (r->proto && r->proto != pd->proto)
+			r = r->skip[PF_SKIP_PROTO].ptr;
+		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
+		    r->src.neg, kif))
+			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
+		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
+		    r->dst.neg, NULL))
+			r = r->skip[PF_SKIP_DST_ADDR].ptr;
+		else if (r->tos && !(r->tos == pd->tos))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->os_fingerprint != PF_OSFP_ANY)
+			r = TAILQ_NEXT(r, entries);
+		else if (pd->proto == IPPROTO_UDP &&
+		    (r->src.port_op || r->dst.port_op))
+			r = TAILQ_NEXT(r, entries);
+		else if (pd->proto == IPPROTO_TCP &&
+		    (r->src.port_op || r->dst.port_op || r->flagset))
+			r = TAILQ_NEXT(r, entries);
+		else if ((pd->proto == IPPROTO_ICMP ||
+		    pd->proto == IPPROTO_ICMPV6) &&
+		    (r->type || r->code))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->prob && r->prob <= arc4random())
+			r = TAILQ_NEXT(r, entries);
+		else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
+			r = TAILQ_NEXT(r, entries);
+		else {
+			if (r->anchor == NULL) {
+				match = 1;
+				*rm = r;
+				*am = a;
+				*rsm = ruleset;
+				if ((*rm)->quick)
+					break;
+				r = TAILQ_NEXT(r, entries);
+			} else
+				pf_step_into_anchor(&asd, &ruleset,
+				    PF_RULESET_FILTER, &r, &a, &match);
+		}
+		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
+		    PF_RULESET_FILTER, &r, &a, &match))
+			break;
+	}
+	r = *rm;
+	a = *am;
+	ruleset = *rsm;
+
+	REASON_SET(&reason, PFRES_MATCH);
+
+	if (r->log)
+		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset,
+		    pd);
+
+	if (r->action != PF_PASS)
+		return (PF_DROP);
+
+	if (pf_tag_packet(m, pd->pf_mtag, tag, -1)) {
+		REASON_SET(&reason, PFRES_MEMORY);
+		return (PF_DROP);
+	}
+
+	return (PF_PASS);
+}
+
+int
+pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst,
+	struct pf_state **state, struct pfi_kif *kif, struct mbuf *m, int off,
+	struct pf_pdesc *pd, u_short *reason, int *copyback)
+{
+ 	struct tcphdr		*th = pd->hdr.tcp;
+ 	u_int16_t		 win = ntohs(th->th_win);
+ 	u_int32_t		 ack, end, seq, orig_seq;
+ 	u_int8_t		 sws, dws;
+ 	int			 ackskew;
+
+	if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
+		sws = src->wscale & PF_WSCALE_MASK;
+		dws = dst->wscale & PF_WSCALE_MASK;
+	} else
+		sws = dws = 0;
+
+	/*
+	 * Sequence tracking algorithm from Guido van Rooij's paper:
+	 *   http://www.madison-gurkha.com/publications/tcp_filtering/
+	 *	tcp_filtering.ps
+	 */
+
+	orig_seq = seq = ntohl(th->th_seq);
+	if (src->seqlo == 0) {
+		/* First packet from this end. Set its state */
+
+		if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
+		    src->scrub == NULL) {
+			if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) {
+				REASON_SET(reason, PFRES_MEMORY);
+				return (PF_DROP);
+			}
+		}
+
+		/* Deferred generation of sequence number modulator */
+		if (dst->seqdiff && !src->seqdiff) {
+#ifdef __FreeBSD__
+			while ((src->seqdiff = pf_new_isn(*state) - seq) == 0)
+				;
+#else
+			while ((src->seqdiff = tcp_rndiss_next() - seq) == 0)
+				;
+#endif
+			ack = ntohl(th->th_ack) - dst->seqdiff;
+			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
+			    src->seqdiff), 0);
+			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
+			*copyback = 1;
+		} else {
+			ack = ntohl(th->th_ack);
+		}
+
+		end = seq + pd->p_len;
+		if (th->th_flags & TH_SYN) {
+			end++;
+			if (dst->wscale & PF_WSCALE_FLAG) {
+				src->wscale = pf_get_wscale(m, off, th->th_off,
+				    pd->af);
+				if (src->wscale & PF_WSCALE_FLAG) {
+					/* Remove scale factor from initial
+					 * window */
+					sws = src->wscale & PF_WSCALE_MASK;
+					win = ((u_int32_t)win + (1 << sws) - 1)
+					    >> sws;
+					dws = dst->wscale & PF_WSCALE_MASK;
+				} else {
+					/* fixup other window */
+					dst->max_win <<= dst->wscale &
+					    PF_WSCALE_MASK;
+					/* in case of a retrans SYN|ACK */
+					dst->wscale = 0;
+				}
+			}
+		}
+		if (th->th_flags & TH_FIN)
+			end++;
+
+		src->seqlo = seq;
+		if (src->state < TCPS_SYN_SENT)
+			src->state = TCPS_SYN_SENT;
+
+		/*
+		 * May need to slide the window (seqhi may have been set by
+		 * the crappy stack check or if we picked up the connection
+		 * after establishment)
+		 */
+		if (src->seqhi == 1 ||
+		    SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
+			src->seqhi = end + MAX(1, dst->max_win << dws);
+		if (win > src->max_win)
+			src->max_win = win;
+
+	} else {
+		ack = ntohl(th->th_ack) - dst->seqdiff;
+		if (src->seqdiff) {
+			/* Modulate sequence numbers */
+			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
+			    src->seqdiff), 0);
+			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
+			*copyback = 1;
+		}
+		end = seq + pd->p_len;
+		if (th->th_flags & TH_SYN)
+			end++;
+		if (th->th_flags & TH_FIN)
+			end++;
+	}
+
+	if ((th->th_flags & TH_ACK) == 0) {
+		/* Let it pass through the ack skew check */
+		ack = dst->seqlo;
+	} else if ((ack == 0 &&
+	    (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
+	    /* broken tcp stacks do not set ack */
+	    (dst->state < TCPS_SYN_SENT)) {
+		/*
+		 * Many stacks (ours included) will set the ACK number in an
+		 * FIN|ACK if the SYN times out -- no sequence to ACK.
+		 */
+		ack = dst->seqlo;
+	}
+
+	if (seq == end) {
+		/* Ease sequencing restrictions on no data packets */
+		seq = src->seqlo;
+		end = seq;
+	}
+
+	ackskew = dst->seqlo - ack;
+
+
+	/*
+	 * Need to demodulate the sequence numbers in any TCP SACK options
+	 * (Selective ACK). We could optionally validate the SACK values
+	 * against the current ACK window, either forwards or backwards, but
+	 * I'm not confident that SACK has been implemented properly
+	 * everywhere. It wouldn't surprise me if several stacks accidently
+	 * SACK too far backwards of previously ACKed data. There really aren't
+	 * any security implications of bad SACKing unless the target stack
+	 * doesn't validate the option length correctly. Someone trying to
+	 * spoof into a TCP connection won't bother blindly sending SACK
+	 * options anyway.
+	 */
+	if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) {
+		if (pf_modulate_sack(m, off, pd, th, dst))
+			*copyback = 1;
+	}
+
+
+#define MAXACKWINDOW (0xffff + 1500)	/* 1500 is an arbitrary fudge factor */
+	if (SEQ_GEQ(src->seqhi, end) &&
+	    /* Last octet inside other's window space */
+	    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
+	    /* Retrans: not more than one window back */
+	    (ackskew >= -MAXACKWINDOW) &&
+	    /* Acking not more than one reassembled fragment backwards */
+	    (ackskew <= (MAXACKWINDOW << sws)) &&
+	    /* Acking not more than one window forward */
+	    ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
+	    (orig_seq == src->seqlo + 1) || (pd->flags & PFDESC_IP_REAS) == 0)) {
+	    /* Require an exact/+1 sequence match on resets when possible */
+
+		if (dst->scrub || src->scrub) {
+			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
+			    *state, src, dst, copyback))
+				return (PF_DROP);
+		}
+
+		/* update max window */
+		if (src->max_win < win)
+			src->max_win = win;
+		/* synchronize sequencing */
+		if (SEQ_GT(end, src->seqlo))
+			src->seqlo = end;
+		/* slide the window of what the other end can send */
+		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
+			dst->seqhi = ack + MAX((win << sws), 1);
+
+
+		/* update states */
+		if (th->th_flags & TH_SYN)
+			if (src->state < TCPS_SYN_SENT)
+				src->state = TCPS_SYN_SENT;
+		if (th->th_flags & TH_FIN)
+			if (src->state < TCPS_CLOSING)
+				src->state = TCPS_CLOSING;
+		if (th->th_flags & TH_ACK) {
+			if (dst->state == TCPS_SYN_SENT) {
+				dst->state = TCPS_ESTABLISHED;
+				if (src->state == TCPS_ESTABLISHED &&
+				    (*state)->src_node != NULL &&
+				    pf_src_connlimit(state)) {
+					REASON_SET(reason, PFRES_SRCLIMIT);
+					return (PF_DROP);
+				}
+			} else if (dst->state == TCPS_CLOSING)
+				dst->state = TCPS_FIN_WAIT_2;
+		}
+		if (th->th_flags & TH_RST)
+			src->state = dst->state = TCPS_TIME_WAIT;
+
+		/* update expire time */
+		(*state)->expire = time_second;
+		if (src->state >= TCPS_FIN_WAIT_2 &&
+		    dst->state >= TCPS_FIN_WAIT_2)
+			(*state)->timeout = PFTM_TCP_CLOSED;
+		else if (src->state >= TCPS_CLOSING &&
+		    dst->state >= TCPS_CLOSING)
+			(*state)->timeout = PFTM_TCP_FIN_WAIT;
+		else if (src->state < TCPS_ESTABLISHED ||
+		    dst->state < TCPS_ESTABLISHED)
+			(*state)->timeout = PFTM_TCP_OPENING;
+		else if (src->state >= TCPS_CLOSING ||
+		    dst->state >= TCPS_CLOSING)
+			(*state)->timeout = PFTM_TCP_CLOSING;
+		else
+			(*state)->timeout = PFTM_TCP_ESTABLISHED;
+
+		/* Fall through to PASS packet */
+
+	} else if ((dst->state < TCPS_SYN_SENT ||
+		dst->state >= TCPS_FIN_WAIT_2 ||
+		src->state >= TCPS_FIN_WAIT_2) &&
+	    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
+	    /* Within a window forward of the originating packet */
+	    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
+	    /* Within a window backward of the originating packet */
+
+		/*
+		 * This currently handles three situations:
+		 *  1) Stupid stacks will shotgun SYNs before their peer
+		 *     replies.
+		 *  2) When PF catches an already established stream (the
+		 *     firewall rebooted, the state table was flushed, routes
+		 *     changed...)
+		 *  3) Packets get funky immediately after the connection
+		 *     closes (this should catch Solaris spurious ACK|FINs
+		 *     that web servers like to spew after a close)
+		 *
+		 * This must be a little more careful than the above code
+		 * since packet floods will also be caught here. We don't
+		 * update the TTL here to mitigate the damage of a packet
+		 * flood and so the same code can handle awkward establishment
+		 * and a loosened connection close.
+		 * In the establishment case, a correct peer response will
+		 * validate the connection, go through the normal state code
+		 * and keep updating the state TTL.
+		 */
+
+		if (pf_status.debug >= PF_DEBUG_MISC) {
+			printf("pf: loose state match: ");
+			pf_print_state(*state);
+			pf_print_flags(th->th_flags);
+			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
+			    "pkts=%llu:%llu\n", seq, orig_seq, ack, pd->p_len,
+#ifdef __FreeBSD__
+			    ackskew, (unsigned long long)(*state)->packets[0],
+			    (unsigned long long)(*state)->packets[1]);
+#else
+			    ackskew, (*state)->packets[0],
+			    (*state)->packets[1]);
+#endif
+		}
+
+		if (dst->scrub || src->scrub) {
+			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
+			    *state, src, dst, copyback))
+				return (PF_DROP);
+		}
+
+		/* update max window */
+		if (src->max_win < win)
+			src->max_win = win;
+		/* synchronize sequencing */
+		if (SEQ_GT(end, src->seqlo))
+			src->seqlo = end;
+		/* slide the window of what the other end can send */
+		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
+			dst->seqhi = ack + MAX((win << sws), 1);
+
+		/*
+		 * Cannot set dst->seqhi here since this could be a shotgunned
+		 * SYN and not an already established connection.
+		 */
+
+		if (th->th_flags & TH_FIN)
+			if (src->state < TCPS_CLOSING)
+				src->state = TCPS_CLOSING;
+		if (th->th_flags & TH_RST)
+			src->state = dst->state = TCPS_TIME_WAIT;
+
+		/* Fall through to PASS packet */
+
+	} else {
+		if ((*state)->dst.state == TCPS_SYN_SENT &&
+		    (*state)->src.state == TCPS_SYN_SENT) {
+			/* Send RST for state mismatches during handshake */
+			if (!(th->th_flags & TH_RST))
+#ifdef __FreeBSD__
+				pf_send_tcp(m, (*state)->rule.ptr, pd->af,
+#else
+				pf_send_tcp((*state)->rule.ptr, pd->af,
+#endif
+				    pd->dst, pd->src, th->th_dport,
+				    th->th_sport, ntohl(th->th_ack), 0,
+				    TH_RST, 0, 0,
+				    (*state)->rule.ptr->return_ttl, 1, 0,
+				    pd->eh, kif->pfik_ifp);
+			src->seqlo = 0;
+			src->seqhi = 1;
+			src->max_win = 1;
+		} else if (pf_status.debug >= PF_DEBUG_MISC) {
+			printf("pf: BAD state: ");
+			pf_print_state(*state);
+			pf_print_flags(th->th_flags);
+			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
+#ifdef notyet
+			    "pkts=%llu:%llu dir=%s,%s\n",
+#else
+			    "pkts=%llu:%llu%s\n",
+#endif
+			    seq, orig_seq, ack, pd->p_len, ackskew,
+#ifdef __FreeBSD__
+			    (unsigned long long)(*state)->packets[0],
+			    (unsigned long long)(*state)->packets[1],
+#else
+			    (*state)->packets[0], (*state)->packets[1],
+#endif
+#ifdef notyet
+			    direction == PF_IN ? "in" : "out",
+			    direction == (*state)->direction ? "fwd" : "rev");
+#else
+			    "");
+#endif
+			printf("pf: State failure on: %c %c %c %c | %c %c\n",
+			    SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
+			    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
+			    ' ': '2',
+			    (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
+			    (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
+			    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
+			    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
+		}
+		REASON_SET(reason, PFRES_BADSTATE);
+		return (PF_DROP);
+	}
+
+	/* Any packets which have gotten here are to be passed */
+	return (PF_PASS);
+}
+
+int
+pf_tcp_track_sloppy(struct pf_state_peer *src, struct pf_state_peer *dst,
+	struct pf_state **state, struct pf_pdesc *pd, u_short *reason)
+{
+	struct tcphdr		*th = pd->hdr.tcp;
+
+	if (th->th_flags & TH_SYN)
+		if (src->state < TCPS_SYN_SENT)
+			src->state = TCPS_SYN_SENT;
+	if (th->th_flags & TH_FIN)
+		if (src->state < TCPS_CLOSING)
+			src->state = TCPS_CLOSING;
+	if (th->th_flags & TH_ACK) {
+		if (dst->state == TCPS_SYN_SENT) {
+			dst->state = TCPS_ESTABLISHED;
+			if (src->state == TCPS_ESTABLISHED &&
+			    (*state)->src_node != NULL &&
+			    pf_src_connlimit(state)) {
+				REASON_SET(reason, PFRES_SRCLIMIT);
+				return (PF_DROP);
+			}
+		} else if (dst->state == TCPS_CLOSING) {
+			dst->state = TCPS_FIN_WAIT_2;
+		} else if (src->state == TCPS_SYN_SENT &&
+		    dst->state < TCPS_SYN_SENT) {
+			/*
+			 * Handle a special sloppy case where we only see one
+			 * half of the connection. If there is a ACK after
+			 * the initial SYN without ever seeing a packet from
+			 * the destination, set the connection to established.
+			 */
+			dst->state = src->state = TCPS_ESTABLISHED;
+			if ((*state)->src_node != NULL &&
+			    pf_src_connlimit(state)) {
+				REASON_SET(reason, PFRES_SRCLIMIT);
+				return (PF_DROP);
+			}
+		} else if (src->state == TCPS_CLOSING &&
+		    dst->state == TCPS_ESTABLISHED &&
+		    dst->seqlo == 0) {
+			/*
+			 * Handle the closing of half connections where we
+			 * don't see the full bidirectional FIN/ACK+ACK
+			 * handshake.
+			 */
+			dst->state = TCPS_CLOSING;
+		}
+	}
+	if (th->th_flags & TH_RST)
+		src->state = dst->state = TCPS_TIME_WAIT;
+
+	/* update expire time */
+	(*state)->expire = time_second;
+	if (src->state >= TCPS_FIN_WAIT_2 &&
+	    dst->state >= TCPS_FIN_WAIT_2)
+		(*state)->timeout = PFTM_TCP_CLOSED;
+	else if (src->state >= TCPS_CLOSING &&
+	    dst->state >= TCPS_CLOSING)
+		(*state)->timeout = PFTM_TCP_FIN_WAIT;
+	else if (src->state < TCPS_ESTABLISHED ||
+	    dst->state < TCPS_ESTABLISHED)
+		(*state)->timeout = PFTM_TCP_OPENING;
+	else if (src->state >= TCPS_CLOSING ||
+	    dst->state >= TCPS_CLOSING)
+		(*state)->timeout = PFTM_TCP_CLOSING;
+	else
+		(*state)->timeout = PFTM_TCP_ESTABLISHED;
+
+	return (PF_PASS);
+}
+
+
+int
+pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
+    struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
+    u_short *reason)
+{
+	struct pf_state_cmp	 key;
+	struct tcphdr		*th = pd->hdr.tcp;
+	int			 copyback = 0;
+	struct pf_state_peer	*src, *dst;
+
+	key.af = pd->af;
+	key.proto = IPPROTO_TCP;
+	if (direction == PF_IN)	{
+		PF_ACPY(&key.ext.addr, pd->src, key.af);
+		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
+		key.ext.port = th->th_sport;
+		key.gwy.port = th->th_dport;
+	} else {
+		PF_ACPY(&key.lan.addr, pd->src, key.af);
+		PF_ACPY(&key.ext.addr, pd->dst, key.af);
+		key.lan.port = th->th_sport;
+		key.ext.port = th->th_dport;
+	}
+
+	STATE_LOOKUP();
+
+	if (direction == (*state)->direction) {
+		src = &(*state)->src;
+		dst = &(*state)->dst;
+	} else {
+		src = &(*state)->dst;
+		dst = &(*state)->src;
+	}
+
+	if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
+		if (direction != (*state)->direction) {
+			REASON_SET(reason, PFRES_SYNPROXY);
+			return (PF_SYNPROXY_DROP);
+		}
+		if (th->th_flags & TH_SYN) {
+			if (ntohl(th->th_seq) != (*state)->src.seqlo) {
+				REASON_SET(reason, PFRES_SYNPROXY);
+				return (PF_DROP);
+			}
+#ifdef __FreeBSD__
+			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst,
+#else
+			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
+#endif
+			    pd->src, th->th_dport, th->th_sport,
+			    (*state)->src.seqhi, ntohl(th->th_seq) + 1,
+			    TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1,
+			    0, NULL, NULL);
+			REASON_SET(reason, PFRES_SYNPROXY);
+			return (PF_SYNPROXY_DROP);
+		} else if (!(th->th_flags & TH_ACK) ||
+		    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
+		    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
+			REASON_SET(reason, PFRES_SYNPROXY);
+			return (PF_DROP);
+		} else if ((*state)->src_node != NULL &&
+		    pf_src_connlimit(state)) {
+			REASON_SET(reason, PFRES_SRCLIMIT);
+			return (PF_DROP);
+		} else
+			(*state)->src.state = PF_TCPS_PROXY_DST;
+	}
+	if ((*state)->src.state == PF_TCPS_PROXY_DST) {
+		struct pf_state_host *src, *dst;
+
+		if (direction == PF_OUT) {
+			src = &(*state)->gwy;
+			dst = &(*state)->ext;
+		} else {
+			src = &(*state)->ext;
+			dst = &(*state)->lan;
+		}
+		if (direction == (*state)->direction) {
+			if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
+			    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
+			    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
+				REASON_SET(reason, PFRES_SYNPROXY);
+				return (PF_DROP);
+			}
+			(*state)->src.max_win = MAX(ntohs(th->th_win), 1);
+			if ((*state)->dst.seqhi == 1)
+				(*state)->dst.seqhi = htonl(arc4random());
+#ifdef __FreeBSD__
+			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
+			    &src->addr,
+#else
+			pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr,
+#endif
+			    &dst->addr, src->port, dst->port,
+			    (*state)->dst.seqhi, 0, TH_SYN, 0,
+			    (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL);
+			REASON_SET(reason, PFRES_SYNPROXY);
+			return (PF_SYNPROXY_DROP);
+		} else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
+		    (TH_SYN|TH_ACK)) ||
+		    (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) {
+			REASON_SET(reason, PFRES_SYNPROXY);
+			return (PF_DROP);
+		} else {
+			(*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
+			(*state)->dst.seqlo = ntohl(th->th_seq);
+#ifdef __FreeBSD__
+			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst,
+#else
+			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
+#endif
+			    pd->src, th->th_dport, th->th_sport,
+			    ntohl(th->th_ack), ntohl(th->th_seq) + 1,
+			    TH_ACK, (*state)->src.max_win, 0, 0, 0,
+			    (*state)->tag, NULL, NULL);
+#ifdef __FreeBSD__
+			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
+			    &src->addr,
+#else
+			pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr,
+#endif
+			    &dst->addr, src->port, dst->port,
+			    (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
+			    TH_ACK, (*state)->dst.max_win, 0, 0, 1,
+			    0, NULL, NULL);
+			(*state)->src.seqdiff = (*state)->dst.seqhi -
+			    (*state)->src.seqlo;
+			(*state)->dst.seqdiff = (*state)->src.seqhi -
+			    (*state)->dst.seqlo;
+			(*state)->src.seqhi = (*state)->src.seqlo +
+			    (*state)->dst.max_win;
+			(*state)->dst.seqhi = (*state)->dst.seqlo +
+			    (*state)->src.max_win;
+			(*state)->src.wscale = (*state)->dst.wscale = 0;
+			(*state)->src.state = (*state)->dst.state =
+			    TCPS_ESTABLISHED;
+			REASON_SET(reason, PFRES_SYNPROXY);
+			return (PF_SYNPROXY_DROP);
+		}
+	}
+
+	if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) &&
+	    dst->state >= TCPS_FIN_WAIT_2 &&
+	    src->state >= TCPS_FIN_WAIT_2) {
+		if (pf_status.debug >= PF_DEBUG_MISC) {
+			printf("pf: state reuse ");
+			pf_print_state(*state);
+			pf_print_flags(th->th_flags);
+			printf("\n");
+		}
+		/* XXX make sure it's the same direction ?? */
+		(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
+		pf_unlink_state(*state);
+		*state = NULL;
+		return (PF_DROP);
+	}
+
+	if ((*state)->state_flags & PFSTATE_SLOPPY) {
+		if (pf_tcp_track_sloppy(src, dst, state, pd, reason) == PF_DROP)
+			return (PF_DROP);
+	} else {
+		if (pf_tcp_track_full(src, dst, state, kif, m, off, pd, reason,
+		    &copyback) == PF_DROP)
+			return (PF_DROP);
+	}
+
+	/* translate source/destination address, if necessary */
+	if (STATE_TRANSLATE(*state)) {
+		if (direction == PF_OUT)
+			pf_change_ap(pd->src, &th->th_sport, pd->ip_sum,
+			    &th->th_sum, &(*state)->gwy.addr,
+			    (*state)->gwy.port, 0, pd->af);
+		else
+			pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum,
+			    &th->th_sum, &(*state)->lan.addr,
+			    (*state)->lan.port, 0, pd->af);
+		m_copyback(m, off, sizeof(*th), (caddr_t)th);
+	} else if (copyback) {
+		/* Copyback sequence modulation or stateful scrub changes */
+		m_copyback(m, off, sizeof(*th), (caddr_t)th);
+	}
+
+	return (PF_PASS);
+}
+
+int
+pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
+    struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
+{
+	struct pf_state_peer	*src, *dst;
+	struct pf_state_cmp	 key;
+	struct udphdr		*uh = pd->hdr.udp;
+
+	key.af = pd->af;
+	key.proto = IPPROTO_UDP;
+	if (direction == PF_IN)	{
+		PF_ACPY(&key.ext.addr, pd->src, key.af);
+		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
+		key.ext.port = uh->uh_sport;
+		key.gwy.port = uh->uh_dport;
+	} else {
+		PF_ACPY(&key.lan.addr, pd->src, key.af);
+		PF_ACPY(&key.ext.addr, pd->dst, key.af);
+		key.lan.port = uh->uh_sport;
+		key.ext.port = uh->uh_dport;
+	}
+
+	STATE_LOOKUP();
+
+	if (direction == (*state)->direction) {
+		src = &(*state)->src;
+		dst = &(*state)->dst;
+	} else {
+		src = &(*state)->dst;
+		dst = &(*state)->src;
+	}
+
+	/* update states */
+	if (src->state < PFUDPS_SINGLE)
+		src->state = PFUDPS_SINGLE;
+	if (dst->state == PFUDPS_SINGLE)
+		dst->state = PFUDPS_MULTIPLE;
+
+	/* update expire time */
+	(*state)->expire = time_second;
+	if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
+		(*state)->timeout = PFTM_UDP_MULTIPLE;
+	else
+		(*state)->timeout = PFTM_UDP_SINGLE;
+
+	/* translate source/destination address, if necessary */
+	if (STATE_TRANSLATE(*state)) {
+		if (direction == PF_OUT)
+			pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum,
+			    &uh->uh_sum, &(*state)->gwy.addr,
+			    (*state)->gwy.port, 1, pd->af);
+		else
+			pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum,
+			    &uh->uh_sum, &(*state)->lan.addr,
+			    (*state)->lan.port, 1, pd->af);
+		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
+	}
+
+	return (PF_PASS);
+}
+
+int
+pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
+    struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason)
+{
+	struct pf_addr	*saddr = pd->src, *daddr = pd->dst;
+	u_int16_t	 icmpid = 0;		/* make the compiler happy */
+	u_int16_t	*icmpsum = NULL;	/* make the compiler happy */
+	u_int8_t	 icmptype = 0;		/* make the compiler happy */
+	int		 state_icmp = 0;
+	struct pf_state_cmp key;
+
+	switch (pd->proto) {
+#ifdef INET
+	case IPPROTO_ICMP:
+		icmptype = pd->hdr.icmp->icmp_type;
+		icmpid = pd->hdr.icmp->icmp_id;
+		icmpsum = &pd->hdr.icmp->icmp_cksum;
+
+		if (icmptype == ICMP_UNREACH ||
+		    icmptype == ICMP_SOURCEQUENCH ||
+		    icmptype == ICMP_REDIRECT ||
+		    icmptype == ICMP_TIMXCEED ||
+		    icmptype == ICMP_PARAMPROB)
+			state_icmp++;
+		break;
+#endif /* INET */
+#ifdef INET6
+	case IPPROTO_ICMPV6:
+		icmptype = pd->hdr.icmp6->icmp6_type;
+		icmpid = pd->hdr.icmp6->icmp6_id;
+		icmpsum = &pd->hdr.icmp6->icmp6_cksum;
+
+		if (icmptype == ICMP6_DST_UNREACH ||
+		    icmptype == ICMP6_PACKET_TOO_BIG ||
+		    icmptype == ICMP6_TIME_EXCEEDED ||
+		    icmptype == ICMP6_PARAM_PROB)
+			state_icmp++;
+		break;
+#endif /* INET6 */
+	}
+
+	if (!state_icmp) {
+
+		/*
+		 * ICMP query/reply message not related to a TCP/UDP packet.
+		 * Search for an ICMP state.
+		 */
+		key.af = pd->af;
+		key.proto = pd->proto;
+		if (direction == PF_IN)	{
+			PF_ACPY(&key.ext.addr, pd->src, key.af);
+			PF_ACPY(&key.gwy.addr, pd->dst, key.af);
+			key.ext.port = 0;
+			key.gwy.port = icmpid;
+		} else {
+			PF_ACPY(&key.lan.addr, pd->src, key.af);
+			PF_ACPY(&key.ext.addr, pd->dst, key.af);
+			key.lan.port = icmpid;
+			key.ext.port = 0;
+		}
+
+		STATE_LOOKUP();
+
+		(*state)->expire = time_second;
+		(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
+
+		/* translate source/destination address, if necessary */
+		if (STATE_TRANSLATE(*state)) {
+			if (direction == PF_OUT) {
+				switch (pd->af) {
+#ifdef INET
+				case AF_INET:
+					pf_change_a(&saddr->v4.s_addr,
+					    pd->ip_sum,
+					    (*state)->gwy.addr.v4.s_addr, 0);
+					pd->hdr.icmp->icmp_cksum =
+					    pf_cksum_fixup(
+					    pd->hdr.icmp->icmp_cksum, icmpid,
+					    (*state)->gwy.port, 0);
+					pd->hdr.icmp->icmp_id =
+					    (*state)->gwy.port;
+					m_copyback(m, off, ICMP_MINLEN,
+					    (caddr_t)pd->hdr.icmp);
+					break;
+#endif /* INET */
+#ifdef INET6
+				case AF_INET6:
+					pf_change_a6(saddr,
+					    &pd->hdr.icmp6->icmp6_cksum,
+					    &(*state)->gwy.addr, 0);
+					m_copyback(m, off,
+					    sizeof(struct icmp6_hdr),
+					    (caddr_t)pd->hdr.icmp6);
+					break;
+#endif /* INET6 */
+				}
+			} else {
+				switch (pd->af) {
+#ifdef INET
+				case AF_INET:
+					pf_change_a(&daddr->v4.s_addr,
+					    pd->ip_sum,
+					    (*state)->lan.addr.v4.s_addr, 0);
+					pd->hdr.icmp->icmp_cksum =
+					    pf_cksum_fixup(
+					    pd->hdr.icmp->icmp_cksum, icmpid,
+					    (*state)->lan.port, 0);
+					pd->hdr.icmp->icmp_id =
+					    (*state)->lan.port;
+					m_copyback(m, off, ICMP_MINLEN,
+					    (caddr_t)pd->hdr.icmp);
+					break;
+#endif /* INET */
+#ifdef INET6
+				case AF_INET6:
+					pf_change_a6(daddr,
+					    &pd->hdr.icmp6->icmp6_cksum,
+					    &(*state)->lan.addr, 0);
+					m_copyback(m, off,
+					    sizeof(struct icmp6_hdr),
+					    (caddr_t)pd->hdr.icmp6);
+					break;
+#endif /* INET6 */
+				}
+			}
+		}
+
+		return (PF_PASS);
+
+	} else {
+		/*
+		 * ICMP error message in response to a TCP/UDP packet.
+		 * Extract the inner TCP/UDP header and search for that state.
+		 */
+
+		struct pf_pdesc	pd2;
+#ifdef INET
+		struct ip	h2;
+#endif /* INET */
+#ifdef INET6
+		struct ip6_hdr	h2_6;
+		int		terminal = 0;
+#endif /* INET6 */
+		int		ipoff2 = 0;	/* make the compiler happy */
+		int		off2 = 0;	/* make the compiler happy */
+
+		pd2.af = pd->af;
+		switch (pd->af) {
+#ifdef INET
+		case AF_INET:
+			/* offset of h2 in mbuf chain */
+			ipoff2 = off + ICMP_MINLEN;
+
+			if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2),
+			    NULL, reason, pd2.af)) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: ICMP error message too short "
+				    "(ip)\n"));
+				return (PF_DROP);
+			}
+			/*
+			 * ICMP error messages don't refer to non-first
+			 * fragments
+			 */
+			if (h2.ip_off & htons(IP_OFFMASK)) {
+				REASON_SET(reason, PFRES_FRAG);
+				return (PF_DROP);
+			}
+
+			/* offset of protocol header that follows h2 */
+			off2 = ipoff2 + (h2.ip_hl << 2);
+
+			pd2.proto = h2.ip_p;
+			pd2.src = (struct pf_addr *)&h2.ip_src;
+			pd2.dst = (struct pf_addr *)&h2.ip_dst;
+			pd2.ip_sum = &h2.ip_sum;
+			break;
+#endif /* INET */
+#ifdef INET6
+		case AF_INET6:
+			ipoff2 = off + sizeof(struct icmp6_hdr);
+
+			if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6),
+			    NULL, reason, pd2.af)) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: ICMP error message too short "
+				    "(ip6)\n"));
+				return (PF_DROP);
+			}
+			pd2.proto = h2_6.ip6_nxt;
+			pd2.src = (struct pf_addr *)&h2_6.ip6_src;
+			pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
+			pd2.ip_sum = NULL;
+			off2 = ipoff2 + sizeof(h2_6);
+			do {
+				switch (pd2.proto) {
+				case IPPROTO_FRAGMENT:
+					/*
+					 * ICMPv6 error messages for
+					 * non-first fragments
+					 */
+					REASON_SET(reason, PFRES_FRAG);
+					return (PF_DROP);
+				case IPPROTO_AH:
+				case IPPROTO_HOPOPTS:
+				case IPPROTO_ROUTING:
+				case IPPROTO_DSTOPTS: {
+					/* get next header and header length */
+					struct ip6_ext opt6;
+
+					if (!pf_pull_hdr(m, off2, &opt6,
+					    sizeof(opt6), NULL, reason,
+					    pd2.af)) {
+						DPFPRINTF(PF_DEBUG_MISC,
+						    ("pf: ICMPv6 short opt\n"));
+						return (PF_DROP);
+					}
+					if (pd2.proto == IPPROTO_AH)
+						off2 += (opt6.ip6e_len + 2) * 4;
+					else
+						off2 += (opt6.ip6e_len + 1) * 8;
+					pd2.proto = opt6.ip6e_nxt;
+					/* goto the next header */
+					break;
+				}
+				default:
+					terminal++;
+					break;
+				}
+			} while (!terminal);
+			break;
+#endif /* INET6 */
+#ifdef __FreeBSD__
+		default:
+			panic("AF not supported: %d", pd->af);
+#endif
+		}
+
+		switch (pd2.proto) {
+		case IPPROTO_TCP: {
+			struct tcphdr		 th;
+			u_int32_t		 seq;
+			struct pf_state_peer	*src, *dst;
+			u_int8_t		 dws;
+			int			 copyback = 0;
+
+			/*
+			 * Only the first 8 bytes of the TCP header can be
+			 * expected. Don't access any TCP header fields after
+			 * th_seq, an ackskew test is not possible.
+			 */
+			if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason,
+			    pd2.af)) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: ICMP error message too short "
+				    "(tcp)\n"));
+				return (PF_DROP);
+			}
+
+			key.af = pd2.af;
+			key.proto = IPPROTO_TCP;
+			if (direction == PF_IN)	{
+				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
+				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
+				key.ext.port = th.th_dport;
+				key.gwy.port = th.th_sport;
+			} else {
+				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
+				PF_ACPY(&key.ext.addr, pd2.src, key.af);
+				key.lan.port = th.th_dport;
+				key.ext.port = th.th_sport;
+			}
+
+			STATE_LOOKUP();
+
+			if (direction == (*state)->direction) {
+				src = &(*state)->dst;
+				dst = &(*state)->src;
+			} else {
+				src = &(*state)->src;
+				dst = &(*state)->dst;
+			}
+
+			if (src->wscale && dst->wscale)
+				dws = dst->wscale & PF_WSCALE_MASK;
+			else
+				dws = 0;
+
+			/* Demodulate sequence number */
+			seq = ntohl(th.th_seq) - src->seqdiff;
+			if (src->seqdiff) {
+				pf_change_a(&th.th_seq, icmpsum,
+				    htonl(seq), 0);
+				copyback = 1;
+			}
+
+			if (!((*state)->state_flags & PFSTATE_SLOPPY) &&
+			    (!SEQ_GEQ(src->seqhi, seq) ||
+			    !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) {
+				if (pf_status.debug >= PF_DEBUG_MISC) {
+					printf("pf: BAD ICMP %d:%d ",
+					    icmptype, pd->hdr.icmp->icmp_code);
+					pf_print_host(pd->src, 0, pd->af);
+					printf(" -> ");
+					pf_print_host(pd->dst, 0, pd->af);
+					printf(" state: ");
+					pf_print_state(*state);
+					printf(" seq=%u\n", seq);
+				}
+				REASON_SET(reason, PFRES_BADSTATE);
+				return (PF_DROP);
+			}
+
+			if (STATE_TRANSLATE(*state)) {
+				if (direction == PF_IN) {
+					pf_change_icmp(pd2.src, &th.th_sport,
+					    daddr, &(*state)->lan.addr,
+					    (*state)->lan.port, NULL,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 0, pd2.af);
+				} else {
+					pf_change_icmp(pd2.dst, &th.th_dport,
+					    saddr, &(*state)->gwy.addr,
+					    (*state)->gwy.port, NULL,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 0, pd2.af);
+				}
+				copyback = 1;
+			}
+
+			if (copyback) {
+				switch (pd2.af) {
+#ifdef INET
+				case AF_INET:
+					m_copyback(m, off, ICMP_MINLEN,
+					    (caddr_t)pd->hdr.icmp);
+					m_copyback(m, ipoff2, sizeof(h2),
+					    (caddr_t)&h2);
+					break;
+#endif /* INET */
+#ifdef INET6
+				case AF_INET6:
+					m_copyback(m, off,
+					    sizeof(struct icmp6_hdr),
+					    (caddr_t)pd->hdr.icmp6);
+					m_copyback(m, ipoff2, sizeof(h2_6),
+					    (caddr_t)&h2_6);
+					break;
+#endif /* INET6 */
+				}
+				m_copyback(m, off2, 8, (caddr_t)&th);
+			}
+
+			return (PF_PASS);
+			break;
+		}
+		case IPPROTO_UDP: {
+			struct udphdr		uh;
+
+			if (!pf_pull_hdr(m, off2, &uh, sizeof(uh),
+			    NULL, reason, pd2.af)) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: ICMP error message too short "
+				    "(udp)\n"));
+				return (PF_DROP);
+			}
+
+			key.af = pd2.af;
+			key.proto = IPPROTO_UDP;
+			if (direction == PF_IN)	{
+				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
+				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
+				key.ext.port = uh.uh_dport;
+				key.gwy.port = uh.uh_sport;
+			} else {
+				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
+				PF_ACPY(&key.ext.addr, pd2.src, key.af);
+				key.lan.port = uh.uh_dport;
+				key.ext.port = uh.uh_sport;
+			}
+
+			STATE_LOOKUP();
+
+			if (STATE_TRANSLATE(*state)) {
+				if (direction == PF_IN) {
+					pf_change_icmp(pd2.src, &uh.uh_sport,
+					    daddr, &(*state)->lan.addr,
+					    (*state)->lan.port, &uh.uh_sum,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 1, pd2.af);
+				} else {
+					pf_change_icmp(pd2.dst, &uh.uh_dport,
+					    saddr, &(*state)->gwy.addr,
+					    (*state)->gwy.port, &uh.uh_sum,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 1, pd2.af);
+				}
+				switch (pd2.af) {
+#ifdef INET
+				case AF_INET:
+					m_copyback(m, off, ICMP_MINLEN,
+					    (caddr_t)pd->hdr.icmp);
+					m_copyback(m, ipoff2, sizeof(h2),
+					    (caddr_t)&h2);
+					break;
+#endif /* INET */
+#ifdef INET6
+				case AF_INET6:
+					m_copyback(m, off,
+					    sizeof(struct icmp6_hdr),
+					    (caddr_t)pd->hdr.icmp6);
+					m_copyback(m, ipoff2, sizeof(h2_6),
+					    (caddr_t)&h2_6);
+					break;
+#endif /* INET6 */
+				}
+				m_copyback(m, off2, sizeof(uh),
+				    (caddr_t)&uh);
+			}
+
+			return (PF_PASS);
+			break;
+		}
+#ifdef INET
+		case IPPROTO_ICMP: {
+			struct icmp		iih;
+
+			if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN,
+			    NULL, reason, pd2.af)) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: ICMP error message too short i"
+				    "(icmp)\n"));
+				return (PF_DROP);
+			}
+
+			key.af = pd2.af;
+			key.proto = IPPROTO_ICMP;
+			if (direction == PF_IN)	{
+				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
+				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
+				key.ext.port = 0;
+				key.gwy.port = iih.icmp_id;
+			} else {
+				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
+				PF_ACPY(&key.ext.addr, pd2.src, key.af);
+				key.lan.port = iih.icmp_id;
+				key.ext.port = 0;
+			}
+
+			STATE_LOOKUP();
+
+			if (STATE_TRANSLATE(*state)) {
+				if (direction == PF_IN) {
+					pf_change_icmp(pd2.src, &iih.icmp_id,
+					    daddr, &(*state)->lan.addr,
+					    (*state)->lan.port, NULL,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 0, AF_INET);
+				} else {
+					pf_change_icmp(pd2.dst, &iih.icmp_id,
+					    saddr, &(*state)->gwy.addr,
+					    (*state)->gwy.port, NULL,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 0, AF_INET);
+				}
+				m_copyback(m, off, ICMP_MINLEN,
+				    (caddr_t)pd->hdr.icmp);
+				m_copyback(m, ipoff2, sizeof(h2),
+				    (caddr_t)&h2);
+				m_copyback(m, off2, ICMP_MINLEN,
+				    (caddr_t)&iih);
+			}
+
+			return (PF_PASS);
+			break;
+		}
+#endif /* INET */
+#ifdef INET6
+		case IPPROTO_ICMPV6: {
+			struct icmp6_hdr	iih;
+
+			if (!pf_pull_hdr(m, off2, &iih,
+			    sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: ICMP error message too short "
+				    "(icmp6)\n"));
+				return (PF_DROP);
+			}
+
+			key.af = pd2.af;
+			key.proto = IPPROTO_ICMPV6;
+			if (direction == PF_IN)	{
+				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
+				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
+				key.ext.port = 0;
+				key.gwy.port = iih.icmp6_id;
+			} else {
+				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
+				PF_ACPY(&key.ext.addr, pd2.src, key.af);
+				key.lan.port = iih.icmp6_id;
+				key.ext.port = 0;
+			}
+
+			STATE_LOOKUP();
+
+			if (STATE_TRANSLATE(*state)) {
+				if (direction == PF_IN) {
+					pf_change_icmp(pd2.src, &iih.icmp6_id,
+					    daddr, &(*state)->lan.addr,
+					    (*state)->lan.port, NULL,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 0, AF_INET6);
+				} else {
+					pf_change_icmp(pd2.dst, &iih.icmp6_id,
+					    saddr, &(*state)->gwy.addr,
+					    (*state)->gwy.port, NULL,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 0, AF_INET6);
+				}
+				m_copyback(m, off, sizeof(struct icmp6_hdr),
+				    (caddr_t)pd->hdr.icmp6);
+				m_copyback(m, ipoff2, sizeof(h2_6),
+				    (caddr_t)&h2_6);
+				m_copyback(m, off2, sizeof(struct icmp6_hdr),
+				    (caddr_t)&iih);
+			}
+
+			return (PF_PASS);
+			break;
+		}
+#endif /* INET6 */
+		default: {
+			key.af = pd2.af;
+			key.proto = pd2.proto;
+			if (direction == PF_IN)	{
+				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
+				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
+				key.ext.port = 0;
+				key.gwy.port = 0;
+			} else {
+				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
+				PF_ACPY(&key.ext.addr, pd2.src, key.af);
+				key.lan.port = 0;
+				key.ext.port = 0;
+			}
+
+			STATE_LOOKUP();
+
+			if (STATE_TRANSLATE(*state)) {
+				if (direction == PF_IN) {
+					pf_change_icmp(pd2.src, NULL,
+					    daddr, &(*state)->lan.addr,
+					    0, NULL,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 0, pd2.af);
+				} else {
+					pf_change_icmp(pd2.dst, NULL,
+					    saddr, &(*state)->gwy.addr,
+					    0, NULL,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 0, pd2.af);
+				}
+				switch (pd2.af) {
+#ifdef INET
+				case AF_INET:
+					m_copyback(m, off, ICMP_MINLEN,
+					    (caddr_t)pd->hdr.icmp);
+					m_copyback(m, ipoff2, sizeof(h2),
+					    (caddr_t)&h2);
+					break;
+#endif /* INET */
+#ifdef INET6
+				case AF_INET6:
+					m_copyback(m, off,
+					    sizeof(struct icmp6_hdr),
+					    (caddr_t)pd->hdr.icmp6);
+					m_copyback(m, ipoff2, sizeof(h2_6),
+					    (caddr_t)&h2_6);
+					break;
+#endif /* INET6 */
+				}
+			}
+
+			return (PF_PASS);
+			break;
+		}
+		}
+	}
+}
+
+int
+pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
+    struct pf_pdesc *pd)
+{
+	struct pf_state_peer	*src, *dst;
+	struct pf_state_cmp	 key;
+
+	key.af = pd->af;
+	key.proto = pd->proto;
+	if (direction == PF_IN)	{
+		PF_ACPY(&key.ext.addr, pd->src, key.af);
+		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
+		key.ext.port = 0;
+		key.gwy.port = 0;
+	} else {
+		PF_ACPY(&key.lan.addr, pd->src, key.af);
+		PF_ACPY(&key.ext.addr, pd->dst, key.af);
+		key.lan.port = 0;
+		key.ext.port = 0;
+	}
+
+	STATE_LOOKUP();
+
+	if (direction == (*state)->direction) {
+		src = &(*state)->src;
+		dst = &(*state)->dst;
+	} else {
+		src = &(*state)->dst;
+		dst = &(*state)->src;
+	}
+
+	/* update states */
+	if (src->state < PFOTHERS_SINGLE)
+		src->state = PFOTHERS_SINGLE;
+	if (dst->state == PFOTHERS_SINGLE)
+		dst->state = PFOTHERS_MULTIPLE;
+
+	/* update expire time */
+	(*state)->expire = time_second;
+	if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
+		(*state)->timeout = PFTM_OTHER_MULTIPLE;
+	else
+		(*state)->timeout = PFTM_OTHER_SINGLE;
+
+	/* translate source/destination address, if necessary */
+	if (STATE_TRANSLATE(*state)) {
+		if (direction == PF_OUT)
+			switch (pd->af) {
+#ifdef INET
+			case AF_INET:
+				pf_change_a(&pd->src->v4.s_addr,
+				    pd->ip_sum, (*state)->gwy.addr.v4.s_addr,
+				    0);
+				break;
+#endif /* INET */
+#ifdef INET6
+			case AF_INET6:
+				PF_ACPY(pd->src, &(*state)->gwy.addr, pd->af);
+				break;
+#endif /* INET6 */
+			}
+		else
+			switch (pd->af) {
+#ifdef INET
+			case AF_INET:
+				pf_change_a(&pd->dst->v4.s_addr,
+				    pd->ip_sum, (*state)->lan.addr.v4.s_addr,
+				    0);
+				break;
+#endif /* INET */
+#ifdef INET6
+			case AF_INET6:
+				PF_ACPY(pd->dst, &(*state)->lan.addr, pd->af);
+				break;
+#endif /* INET6 */
+			}
+	}
+
+	return (PF_PASS);
+}
+
+/*
+ * ipoff and off are measured from the start of the mbuf chain.
+ * h must be at "ipoff" on the mbuf chain.
+ */
+void *
+pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
+    u_short *actionp, u_short *reasonp, sa_family_t af)
+{
+	switch (af) {
+#ifdef INET
+	case AF_INET: {
+		struct ip	*h = mtod(m, struct ip *);
+		u_int16_t	 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
+
+		if (fragoff) {
+			if (fragoff >= len)
+				ACTION_SET(actionp, PF_PASS);
+			else {
+				ACTION_SET(actionp, PF_DROP);
+				REASON_SET(reasonp, PFRES_FRAG);
+			}
+			return (NULL);
+		}
+		if (m->m_pkthdr.len < off + len ||
+		    ntohs(h->ip_len) < off + len) {
+			ACTION_SET(actionp, PF_DROP);
+			REASON_SET(reasonp, PFRES_SHORT);
+			return (NULL);
+		}
+		break;
+	}
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6: {
+		struct ip6_hdr	*h = mtod(m, struct ip6_hdr *);
+
+		if (m->m_pkthdr.len < off + len ||
+		    (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
+		    (unsigned)(off + len)) {
+			ACTION_SET(actionp, PF_DROP);
+			REASON_SET(reasonp, PFRES_SHORT);
+			return (NULL);
+		}
+		break;
+	}
+#endif /* INET6 */
+	}
+	m_copydata(m, off, len, p);
+	return (p);
+}
+
+int
+pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif)
+{
+	struct sockaddr_in	*dst;
+	int			 ret = 1;
+	int			 check_mpath;
+#ifndef __FreeBSD__
+	extern int		 ipmultipath;
+#endif
+#ifdef INET6
+#ifndef __FreeBSD__
+	extern int		 ip6_multipath;
+#endif
+	struct sockaddr_in6	*dst6;
+	struct route_in6	 ro;
+#else
+	struct route		 ro;
+#endif
+	struct radix_node	*rn;
+	struct rtentry		*rt;
+	struct ifnet		*ifp;
+
+	check_mpath = 0;
+	bzero(&ro, sizeof(ro));
+	switch (af) {
+	case AF_INET:
+		dst = satosin(&ro.ro_dst);
+		dst->sin_family = AF_INET;
+		dst->sin_len = sizeof(*dst);
+		dst->sin_addr = addr->v4;
+#ifndef __FreeBSD__	/* MULTIPATH_ROUTING */
+		if (ipmultipath)
+			check_mpath = 1;
+#endif
+		break;
+#ifdef INET6
+	case AF_INET6:
+		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
+		dst6->sin6_family = AF_INET6;
+		dst6->sin6_len = sizeof(*dst6);
+		dst6->sin6_addr = addr->v6;
+#ifndef __FreeBSD__	/* MULTIPATH_ROUTING */
+		if (ip6_multipath)
+			check_mpath = 1;
+#endif
+		break;
+#endif /* INET6 */
+	default:
+		return (0);
+	}
+
+	/* Skip checks for ipsec interfaces */
+	if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
+		goto out;
+
+#ifdef __FreeBSD__
+/* XXX MRT not always INET */ /* stick with table 0 though */
+	if (af == AF_INET)
+		in_rtalloc_ign((struct route *)&ro, 0, 0);
+	else
+		rtalloc_ign((struct route *)&ro, 0);
+#else /* ! __FreeBSD__ */
+	rtalloc_noclone((struct route *)&ro, NO_CLONING);
+#endif
+
+	if (ro.ro_rt != NULL) {
+		/* No interface given, this is a no-route check */
+		if (kif == NULL)
+			goto out;
+
+		if (kif->pfik_ifp == NULL) {
+			ret = 0;
+			goto out;
+		}
+
+		/* Perform uRPF check if passed input interface */
+		ret = 0;
+		rn = (struct radix_node *)ro.ro_rt;
+		do {
+			rt = (struct rtentry *)rn;
+#ifndef __FreeBSD__ /* CARPDEV */
+			if (rt->rt_ifp->if_type == IFT_CARP)
+				ifp = rt->rt_ifp->if_carpdev;
+			else
+#endif
+				ifp = rt->rt_ifp;
+
+			if (kif->pfik_ifp == ifp)
+				ret = 1;
+#ifdef __FreeBSD__ /* MULTIPATH_ROUTING */
+			rn = NULL;
+#else
+			rn = rn_mpath_next(rn);
+#endif
+		} while (check_mpath == 1 && rn != NULL && ret == 0);
+	} else
+		ret = 0;
+out:
+	if (ro.ro_rt != NULL)
+		RTFREE(ro.ro_rt);
+	return (ret);
+}
+
+int
+pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw)
+{
+	struct sockaddr_in	*dst;
+#ifdef INET6
+	struct sockaddr_in6	*dst6;
+	struct route_in6	 ro;
+#else
+	struct route		 ro;
+#endif
+	int			 ret = 0;
+
+	bzero(&ro, sizeof(ro));
+	switch (af) {
+	case AF_INET:
+		dst = satosin(&ro.ro_dst);
+		dst->sin_family = AF_INET;
+		dst->sin_len = sizeof(*dst);
+		dst->sin_addr = addr->v4;
+		break;
+#ifdef INET6
+	case AF_INET6:
+		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
+		dst6->sin6_family = AF_INET6;
+		dst6->sin6_len = sizeof(*dst6);
+		dst6->sin6_addr = addr->v6;
+		break;
+#endif /* INET6 */
+	default:
+		return (0);
+	}
+
+#ifdef __FreeBSD__
+# ifdef RTF_PRCLONING
+	rtalloc_ign((struct route *)&ro, (RTF_CLONING|RTF_PRCLONING));
+# else /* !RTF_PRCLONING */
+	if (af == AF_INET)
+		in_rtalloc_ign((struct route *)&ro, 0, 0);
+	else
+		rtalloc_ign((struct route *)&ro, 0);
+# endif
+#else /* ! __FreeBSD__ */
+	rtalloc_noclone((struct route *)&ro, NO_CLONING);
+#endif
+
+	if (ro.ro_rt != NULL) {
+#ifdef __FreeBSD__
+		/* XXX_IMPORT: later */
+#else
+		if (ro.ro_rt->rt_labelid == aw->v.rtlabel)
+			ret = 1;
+#endif
+		RTFREE(ro.ro_rt);
+	}
+
+	return (ret);
+}
+
+#ifdef INET
+
+void
+pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
+    struct pf_state *s, struct pf_pdesc *pd)
+{
+	struct mbuf		*m0, *m1;
+	struct route		 iproute;
+	struct route		*ro = NULL;
+	struct sockaddr_in	*dst;
+	struct ip		*ip;
+	struct ifnet		*ifp = NULL;
+	struct pf_addr		 naddr;
+	struct pf_src_node	*sn = NULL;
+	int			 error = 0;
+#ifdef __FreeBSD__
+	int sw_csum;
+#endif
+#ifdef IPSEC
+	struct m_tag		*mtag;
+#endif /* IPSEC */
+
+	if (m == NULL || *m == NULL || r == NULL ||
+	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
+		panic("pf_route: invalid parameters");
+
+	if (pd->pf_mtag->routed++ > 3) {
+		m0 = *m;
+		*m = NULL;
+		goto bad;
+	}
+
+	if (r->rt == PF_DUPTO) {
+#ifdef __FreeBSD__
+		if ((m0 = m_dup(*m, M_DONTWAIT)) == NULL)
+#else
+		if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL)
+#endif
+			return;
+	} else {
+		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
+			return;
+		m0 = *m;
+	}
+
+	if (m0->m_len < sizeof(struct ip)) {
+		DPFPRINTF(PF_DEBUG_URGENT,
+		    ("pf_route: m0->m_len < sizeof(struct ip)\n"));
+		goto bad;
+	}
+
+	ip = mtod(m0, struct ip *);
+
+	ro = &iproute;
+	bzero((caddr_t)ro, sizeof(*ro));
+	dst = satosin(&ro->ro_dst);
+	dst->sin_family = AF_INET;
+	dst->sin_len = sizeof(*dst);
+	dst->sin_addr = ip->ip_dst;
+
+	if (r->rt == PF_FASTROUTE) {
+		in_rtalloc(ro, 0);
+		if (ro->ro_rt == 0) {
+			KMOD_IPSTAT_INC(ips_noroute);
+			goto bad;
+		}
+
+		ifp = ro->ro_rt->rt_ifp;
+		ro->ro_rt->rt_use++;
+
+		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
+			dst = satosin(ro->ro_rt->rt_gateway);
+	} else {
+		if (TAILQ_EMPTY(&r->rpool.list)) {
+			DPFPRINTF(PF_DEBUG_URGENT,
+			    ("pf_route: TAILQ_EMPTY(&r->rpool.list)\n"));
+			goto bad;
+		}
+		if (s == NULL) {
+			pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
+			    &naddr, NULL, &sn);
+			if (!PF_AZERO(&naddr, AF_INET))
+				dst->sin_addr.s_addr = naddr.v4.s_addr;
+			ifp = r->rpool.cur->kif ?
+			    r->rpool.cur->kif->pfik_ifp : NULL;
+		} else {
+			if (!PF_AZERO(&s->rt_addr, AF_INET))
+				dst->sin_addr.s_addr =
+				    s->rt_addr.v4.s_addr;
+			ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
+		}
+	}
+	if (ifp == NULL)
+		goto bad;
+
+	if (oifp != ifp) {
+#ifdef __FreeBSD__
+		PF_UNLOCK();
+		if (pf_test(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
+			PF_LOCK();
+			goto bad;
+		} else if (m0 == NULL) {
+			PF_LOCK();
+			goto done;
+		}
+		PF_LOCK();
+#else
+		if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS)
+			goto bad;
+		else if (m0 == NULL)
+			goto done;
+#endif
+		if (m0->m_len < sizeof(struct ip)) {
+			DPFPRINTF(PF_DEBUG_URGENT,
+			    ("pf_route: m0->m_len < sizeof(struct ip)\n"));
+			goto bad;
+		}
+		ip = mtod(m0, struct ip *);
+	}
+
+#ifdef __FreeBSD__
+	/* Copied from FreeBSD 5.1-CURRENT ip_output. */
+	m0->m_pkthdr.csum_flags |= CSUM_IP;
+	sw_csum = m0->m_pkthdr.csum_flags & ~ifp->if_hwassist;
+	if (sw_csum & CSUM_DELAY_DATA) {
+		/*
+		 * XXX: in_delayed_cksum assumes HBO for ip->ip_len (at least)
+		 */
+		NTOHS(ip->ip_len);
+		NTOHS(ip->ip_off);	 /* XXX: needed? */
+		in_delayed_cksum(m0);
+		HTONS(ip->ip_len);
+		HTONS(ip->ip_off);
+		sw_csum &= ~CSUM_DELAY_DATA;
+	}
+	m0->m_pkthdr.csum_flags &= ifp->if_hwassist;
+
+	if (ntohs(ip->ip_len) <= ifp->if_mtu ||
+	    (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 ||
+	    (ifp->if_hwassist & CSUM_FRAGMENT &&
+		((ip->ip_off & htons(IP_DF)) == 0))) {
+		/*
+		 * ip->ip_len = htons(ip->ip_len);
+		 * ip->ip_off = htons(ip->ip_off);
+		 */
+		ip->ip_sum = 0;
+		if (sw_csum & CSUM_DELAY_IP) {
+			/* From KAME */
+			if (ip->ip_v == IPVERSION &&
+			    (ip->ip_hl << 2) == sizeof(*ip)) {
+				ip->ip_sum = in_cksum_hdr(ip);
+			} else {
+				ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
+			}
+		}
+		PF_UNLOCK();
+		error = (*ifp->if_output)(ifp, m0, sintosa(dst), ro);
+		PF_LOCK();
+		goto done;
+	}
+
+#else
+	/* Copied from ip_output. */
+#ifdef IPSEC
+	/*
+	 * If deferred crypto processing is needed, check that the
+	 * interface supports it.
+	 */
+	if ((mtag = m_tag_find(m0, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL))
+	    != NULL && (ifp->if_capabilities & IFCAP_IPSEC) == 0) {
+		/* Notify IPsec to do its own crypto. */
+		ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
+		goto bad;
+	}
+#endif /* IPSEC */
+
+	/* Catch routing changes wrt. hardware checksumming for TCP or UDP. */
+	if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT) {
+		if (!(ifp->if_capabilities & IFCAP_CSUM_TCPv4) ||
+		    ifp->if_bridge != NULL) {
+			in_delayed_cksum(m0);
+			m0->m_pkthdr.csum_flags &= ~M_TCPV4_CSUM_OUT; /* Clear */
+		}
+	} else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT) {
+		if (!(ifp->if_capabilities & IFCAP_CSUM_UDPv4) ||
+		    ifp->if_bridge != NULL) {
+			in_delayed_cksum(m0);
+			m0->m_pkthdr.csum_flags &= ~M_UDPV4_CSUM_OUT; /* Clear */
+		}
+	}
+
+	if (ntohs(ip->ip_len) <= ifp->if_mtu) {
+		if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
+		    ifp->if_bridge == NULL) {
+			m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
+			KMOD_IPSTAT_INC(ips_outhwcsum);
+		} else {
+			ip->ip_sum = 0;
+			ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
+		}
+		/* Update relevant hardware checksum stats for TCP/UDP */
+		if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT)
+			KMOD_TCPSTAT_INC(tcps_outhwcsum);
+		else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT)
+			KMOD_UDPSTAT_INC(udps_outhwcsum);
+		error = (*ifp->if_output)(ifp, m0, sintosa(dst), NULL);
+		goto done;
+	}
+#endif
+	/*
+	 * Too large for interface; fragment if possible.
+	 * Must be able to put at least 8 bytes per fragment.
+	 */
+	if (ip->ip_off & htons(IP_DF) || (m0->m_pkthdr.csum_flags & CSUM_TSO)) {
+		KMOD_IPSTAT_INC(ips_cantfrag);
+		if (r->rt != PF_DUPTO) {
+#ifdef __FreeBSD__
+			/* icmp_error() expects host byte ordering */
+			NTOHS(ip->ip_len);
+			NTOHS(ip->ip_off);
+			PF_UNLOCK();
+			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
+			    ifp->if_mtu);
+			PF_LOCK();
+#else
+			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
+			    ifp->if_mtu);
+#endif
+			goto done;
+		} else
+			goto bad;
+	}
+
+	m1 = m0;
+#ifdef __FreeBSD__
+	/*
+	 * XXX: is cheaper + less error prone than own function
+	 */
+	NTOHS(ip->ip_len);
+	NTOHS(ip->ip_off);
+	error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist, sw_csum);
+#else
+	error = ip_fragment(m0, ifp, ifp->if_mtu);
+#endif
+	if (error) {
+#ifndef __FreeBSD__	/* ip_fragment does not do m_freem() on FreeBSD */
+		m0 = NULL;
+#endif
+		goto bad;
+	}
+
+	for (m0 = m1; m0; m0 = m1) {
+		m1 = m0->m_nextpkt;
+		m0->m_nextpkt = 0;
+#ifdef __FreeBSD__
+		if (error == 0) {
+			PF_UNLOCK();
+			error = (*ifp->if_output)(ifp, m0, sintosa(dst),
+			    NULL);
+			PF_LOCK();
+		} else
+#else
+		if (error == 0)
+			error = (*ifp->if_output)(ifp, m0, sintosa(dst),
+			    NULL);
+		else
+#endif
+			m_freem(m0);
+	}
+
+	if (error == 0)
+		KMOD_IPSTAT_INC(ips_fragmented);
+
+done:
+	if (r->rt != PF_DUPTO)
+		*m = NULL;
+	if (ro == &iproute && ro->ro_rt)
+		RTFREE(ro->ro_rt);
+	return;
+
+bad:
+	m_freem(m0);
+	goto done;
+}
+#endif /* INET */
+
+#ifdef INET6
+void
+pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
+    struct pf_state *s, struct pf_pdesc *pd)
+{
+	struct mbuf		*m0;
+	struct route_in6	 ip6route;
+	struct route_in6	*ro;
+	struct sockaddr_in6	*dst;
+	struct ip6_hdr		*ip6;
+	struct ifnet		*ifp = NULL;
+	struct pf_addr		 naddr;
+	struct pf_src_node	*sn = NULL;
+	int			 error = 0;
+
+	if (m == NULL || *m == NULL || r == NULL ||
+	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
+		panic("pf_route6: invalid parameters");
+
+	if (pd->pf_mtag->routed++ > 3) {
+		m0 = *m;
+		*m = NULL;
+		goto bad;
+	}
+
+	if (r->rt == PF_DUPTO) {
+#ifdef __FreeBSD__
+		if ((m0 = m_dup(*m, M_DONTWAIT)) == NULL)
+#else
+		if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL)
+#endif
+			return;
+	} else {
+		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
+			return;
+		m0 = *m;
+	}
+
+	if (m0->m_len < sizeof(struct ip6_hdr)) {
+		DPFPRINTF(PF_DEBUG_URGENT,
+		    ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n"));
+		goto bad;
+	}
+	ip6 = mtod(m0, struct ip6_hdr *);
+
+	ro = &ip6route;
+	bzero((caddr_t)ro, sizeof(*ro));
+	dst = (struct sockaddr_in6 *)&ro->ro_dst;
+	dst->sin6_family = AF_INET6;
+	dst->sin6_len = sizeof(*dst);
+	dst->sin6_addr = ip6->ip6_dst;
+
+	/* Cheat. XXX why only in the v6 case??? */
+	if (r->rt == PF_FASTROUTE) {
+#ifdef __FreeBSD__
+		m0->m_flags |= M_SKIP_FIREWALL;
+		PF_UNLOCK();
+		ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
+		PF_LOCK();
+#else
+		mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT);
+		if (mtag == NULL)
+			goto bad;
+		m_tag_prepend(m0, mtag);
+		pd->pf_mtag->flags |= PF_TAG_GENERATED;
+		ip6_output(m0, NULL, NULL, 0, NULL, NULL);
+#endif
+		return;
+	}
+
+	if (TAILQ_EMPTY(&r->rpool.list)) {
+		DPFPRINTF(PF_DEBUG_URGENT,
+		    ("pf_route6: TAILQ_EMPTY(&r->rpool.list)\n"));
+		goto bad;
+	}
+	if (s == NULL) {
+		pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src,
+		    &naddr, NULL, &sn);
+		if (!PF_AZERO(&naddr, AF_INET6))
+			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
+			    &naddr, AF_INET6);
+		ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
+	} else {
+		if (!PF_AZERO(&s->rt_addr, AF_INET6))
+			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
+			    &s->rt_addr, AF_INET6);
+		ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
+	}
+	if (ifp == NULL)
+		goto bad;
+
+	if (oifp != ifp) {
+#ifdef __FreeBSD__
+		PF_UNLOCK();
+		if (pf_test6(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
+			PF_LOCK();
+			goto bad;
+		} else if (m0 == NULL) {
+			PF_LOCK();
+			goto done;
+		}
+		PF_LOCK();
+#else
+		if (pf_test6(PF_OUT, ifp, &m0, NULL) != PF_PASS)
+			goto bad;
+		else if (m0 == NULL)
+			goto done;
+#endif
+		if (m0->m_len < sizeof(struct ip6_hdr)) {
+			DPFPRINTF(PF_DEBUG_URGENT,
+			    ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n"));
+			goto bad;
+		}
+		ip6 = mtod(m0, struct ip6_hdr *);
+	}
+
+	/*
+	 * If the packet is too large for the outgoing interface,
+	 * send back an icmp6 error.
+	 */
+	if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr))
+		dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
+	if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) {
+#ifdef __FreeBSD__
+		PF_UNLOCK();
+#endif
+		error = nd6_output(ifp, ifp, m0, dst, NULL);
+#ifdef __FreeBSD__
+		PF_LOCK();
+#endif
+	} else {
+		in6_ifstat_inc(ifp, ifs6_in_toobig);
+#ifdef __FreeBSD__
+		if (r->rt != PF_DUPTO) {
+			PF_UNLOCK();
+			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
+			PF_LOCK();
+		 } else
+#else
+		if (r->rt != PF_DUPTO)
+			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
+		else
+#endif
+			goto bad;
+	}
+
+done:
+	if (r->rt != PF_DUPTO)
+		*m = NULL;
+	return;
+
+bad:
+	m_freem(m0);
+	goto done;
+}
+#endif /* INET6 */
+
+
+#ifdef __FreeBSD__
+/*
+ * FreeBSD supports cksum offloads for the following drivers.
+ *  em(4), fxp(4), ixgb(4), lge(4), ndis(4), nge(4), re(4),
+ *   ti(4), txp(4), xl(4)
+ *
+ * CSUM_DATA_VALID | CSUM_PSEUDO_HDR :
+ *  network driver performed cksum including pseudo header, need to verify
+ *   csum_data
+ * CSUM_DATA_VALID :
+ *  network driver performed cksum, needs to additional pseudo header
+ *  cksum computation with partial csum_data(i.e. lack of H/W support for
+ *  pseudo header, for instance hme(4), sk(4) and possibly gem(4))
+ *
+ * After validating the cksum of packet, set both flag CSUM_DATA_VALID and
+ * CSUM_PSEUDO_HDR in order to avoid recomputation of the cksum in upper
+ * TCP/UDP layer.
+ * Also, set csum_data to 0xffff to force cksum validation.
+ */
+int
+pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af)
+{
+	u_int16_t sum = 0;
+	int hw_assist = 0;
+	struct ip *ip;
+
+	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
+		return (1);
+	if (m->m_pkthdr.len < off + len)
+		return (1);
+
+	switch (p) {
+	case IPPROTO_TCP:
+		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
+			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
+				sum = m->m_pkthdr.csum_data;
+			} else {
+				ip = mtod(m, struct ip *);	
+				sum = in_pseudo(ip->ip_src.s_addr,
+					ip->ip_dst.s_addr, htonl((u_short)len + 
+					m->m_pkthdr.csum_data + IPPROTO_TCP));
+			}
+			sum ^= 0xffff;
+			++hw_assist;
+		}
+		break;
+	case IPPROTO_UDP:
+		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
+			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
+				sum = m->m_pkthdr.csum_data;
+			} else {
+				ip = mtod(m, struct ip *);	
+				sum = in_pseudo(ip->ip_src.s_addr,
+					ip->ip_dst.s_addr, htonl((u_short)len +
+					m->m_pkthdr.csum_data + IPPROTO_UDP));
+			}
+			sum ^= 0xffff;
+			++hw_assist;
+                }
+		break;
+	case IPPROTO_ICMP:
+#ifdef INET6
+	case IPPROTO_ICMPV6:
+#endif /* INET6 */
+		break;
+	default:
+		return (1);
+	}
+
+	if (!hw_assist) {
+		switch (af) {
+		case AF_INET:
+			if (p == IPPROTO_ICMP) {
+				if (m->m_len < off)
+					return (1);
+				m->m_data += off;
+				m->m_len -= off;
+				sum = in_cksum(m, len);
+				m->m_data -= off;
+				m->m_len += off;
+			} else {
+				if (m->m_len < sizeof(struct ip))
+					return (1);
+				sum = in4_cksum(m, p, off, len);
+			}
+			break;
+#ifdef INET6
+		case AF_INET6:
+			if (m->m_len < sizeof(struct ip6_hdr))
+				return (1);
+			sum = in6_cksum(m, p, off, len);
+			break;
+#endif /* INET6 */
+		default:
+			return (1);
+		}
+	}
+	if (sum) {
+		switch (p) {
+		case IPPROTO_TCP:
+		    {
+			KMOD_TCPSTAT_INC(tcps_rcvbadsum);
+			break;
+		    }
+		case IPPROTO_UDP:
+		    {
+			KMOD_UDPSTAT_INC(udps_badsum);
+			break;
+		    }
+		case IPPROTO_ICMP:
+		    {
+			KMOD_ICMPSTAT_INC(icps_checksum);
+			break;
+		    }
+#ifdef INET6
+		case IPPROTO_ICMPV6:
+		    {
+			KMOD_ICMP6STAT_INC(icp6s_checksum);
+			break;
+		    }
+#endif /* INET6 */
+		}
+		return (1);
+	} else {
+		if (p == IPPROTO_TCP || p == IPPROTO_UDP) {
+			m->m_pkthdr.csum_flags |=
+			    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
+			m->m_pkthdr.csum_data = 0xffff;
+		}
+	}
+	return (0);
+}
+#else /* !__FreeBSD__ */
+/*
+ * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag
+ *   off is the offset where the protocol header starts
+ *   len is the total length of protocol header plus payload
+ * returns 0 when the checksum is valid, otherwise returns 1.
+ */
+int
+pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p,
+    sa_family_t af)
+{
+	u_int16_t flag_ok, flag_bad;
+	u_int16_t sum;
+
+	switch (p) {
+	case IPPROTO_TCP:
+		flag_ok = M_TCP_CSUM_IN_OK;
+		flag_bad = M_TCP_CSUM_IN_BAD;
+		break;
+	case IPPROTO_UDP:
+		flag_ok = M_UDP_CSUM_IN_OK;
+		flag_bad = M_UDP_CSUM_IN_BAD;
+		break;
+	case IPPROTO_ICMP:
+#ifdef INET6
+	case IPPROTO_ICMPV6:
+#endif /* INET6 */
+		flag_ok = flag_bad = 0;
+		break;
+	default:
+		return (1);
+	}
+	if (m->m_pkthdr.csum_flags & flag_ok)
+		return (0);
+	if (m->m_pkthdr.csum_flags & flag_bad)
+		return (1);
+	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
+		return (1);
+	if (m->m_pkthdr.len < off + len)
+		return (1);
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		if (p == IPPROTO_ICMP) {
+			if (m->m_len < off)
+				return (1);
+			m->m_data += off;
+			m->m_len -= off;
+			sum = in_cksum(m, len);
+			m->m_data -= off;
+			m->m_len += off;
+		} else {
+			if (m->m_len < sizeof(struct ip))
+				return (1);
+			sum = in4_cksum(m, p, off, len);
+		}
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		if (m->m_len < sizeof(struct ip6_hdr))
+			return (1);
+		sum = in6_cksum(m, p, off, len);
+		break;
+#endif /* INET6 */
+	default:
+		return (1);
+	}
+	if (sum) {
+		m->m_pkthdr.csum_flags |= flag_bad;
+		switch (p) {
+		case IPPROTO_TCP:
+			KMOD_TCPSTAT_INC(tcps_rcvbadsum);
+			break;
+		case IPPROTO_UDP:
+			KMOD_UDPSTAT_INC(udps_badsum);
+			break;
+		case IPPROTO_ICMP:
+			KMOD_ICMPSTAT_INC(icps_checksum);
+			break;
+#ifdef INET6
+		case IPPROTO_ICMPV6:
+			KMOD_ICMP6STAT_INC(icp6s_checksum);
+			break;
+#endif /* INET6 */
+		}
+		return (1);
+	}
+	m->m_pkthdr.csum_flags |= flag_ok;
+	return (0);
+}
+#endif /* __FreeBSD__ */
+
+#ifdef INET
+int
+#ifdef __FreeBSD__
+pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
+    struct ether_header *eh, struct inpcb *inp)
+#else
+pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
+    struct ether_header *eh)
+#endif
+{
+	struct pfi_kif		*kif;
+	u_short			 action, reason = 0, log = 0;
+	struct mbuf		*m = *m0;
+	struct ip		*h = NULL;	/* make the compiler happy */
+	struct pf_rule		*a = NULL, *r = &pf_default_rule, *tr, *nr;
+	struct pf_state		*s = NULL;
+	struct pf_ruleset	*ruleset = NULL;
+	struct pf_pdesc		 pd;
+	int			 off, dirndx, pqid = 0;
+
+#ifdef __FreeBSD__
+	PF_LOCK();
+#endif
+	if (!pf_status.running)
+#ifdef __FreeBSD__
+	{
+		PF_UNLOCK();
+#endif
+		return (PF_PASS);
+#ifdef __FreeBSD__
+	}
+#endif
+
+	memset(&pd, 0, sizeof(pd));
+	if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) {
+#ifdef __FreeBSD__
+		PF_UNLOCK();
+#endif
+		DPFPRINTF(PF_DEBUG_URGENT,
+		    ("pf_test: pf_get_mtag returned NULL\n"));
+		return (PF_DROP);
+	}
+#ifdef __FreeBSD__
+	if (m->m_flags & M_SKIP_FIREWALL) {
+		PF_UNLOCK();
+		return (PF_PASS);
+	}
+#else
+	if (pd.pf_mtag->flags & PF_TAG_GENERATED)
+		return (PF_PASS);
+#endif
+
+#ifdef __FreeBSD__
+	/* XXX_IMPORT: later */
+#else
+	if (ifp->if_type == IFT_CARP && ifp->if_carpdev)
+		ifp = ifp->if_carpdev;
+#endif
+
+	kif = (struct pfi_kif *)ifp->if_pf_kif;
+	if (kif == NULL) {
+#ifdef __FreeBSD__
+		PF_UNLOCK();
+#endif
+		DPFPRINTF(PF_DEBUG_URGENT,
+		    ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname));
+		return (PF_DROP);
+	}
+	if (kif->pfik_flags & PFI_IFLAG_SKIP) {
+#ifdef __FreeBSD__
+		PF_UNLOCK();
+#endif
+		return (PF_PASS);
+	}
+
+#ifdef __FreeBSD__
+	M_ASSERTPKTHDR(m);
+#else
+#ifdef DIAGNOSTIC
+	if ((m->m_flags & M_PKTHDR) == 0)
+		panic("non-M_PKTHDR is passed to pf_test");
+#endif /* DIAGNOSTIC */
+#endif /* __FreeBSD__ */
+
+	if (m->m_pkthdr.len < (int)sizeof(*h)) {
+		action = PF_DROP;
+		REASON_SET(&reason, PFRES_SHORT);
+		log = 1;
+		goto done;
+	}
+
+	/* We do IP header normalization and packet reassembly here */
+	if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) {
+		action = PF_DROP;
+		goto done;
+	}
+	m = *m0;
+	h = mtod(m, struct ip *);
+
+	off = h->ip_hl << 2;
+	if (off < (int)sizeof(*h)) {
+		action = PF_DROP;
+		REASON_SET(&reason, PFRES_SHORT);
+		log = 1;
+		goto done;
+	}
+
+	pd.src = (struct pf_addr *)&h->ip_src;
+	pd.dst = (struct pf_addr *)&h->ip_dst;
+	PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET);
+	pd.ip_sum = &h->ip_sum;
+	pd.proto = h->ip_p;
+	pd.af = AF_INET;
+	pd.tos = h->ip_tos;
+	pd.tot_len = ntohs(h->ip_len);
+	pd.eh = eh;
+
+	/* handle fragments that didn't get reassembled by normalization */
+	if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
+		action = pf_test_fragment(&r, dir, kif, m, h,
+		    &pd, &a, &ruleset);
+		goto done;
+	}
+
+	switch (h->ip_p) {
+
+	case IPPROTO_TCP: {
+		struct tcphdr	th;
+
+		pd.hdr.tcp = &th;
+		if (!pf_pull_hdr(m, off, &th, sizeof(th),
+		    &action, &reason, AF_INET)) {
+			log = action != PF_PASS;
+			goto done;
+		}
+		if (dir == PF_IN && pf_check_proto_cksum(m, off,
+		    ntohs(h->ip_len) - off, IPPROTO_TCP, AF_INET)) {
+			REASON_SET(&reason, PFRES_PROTCKSUM);
+			action = PF_DROP;
+			goto done;
+		}
+		pd.p_len = pd.tot_len - off - (th.th_off << 2);
+		if ((th.th_flags & TH_ACK) && pd.p_len == 0)
+			pqid = 1;
+		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
+		if (action == PF_DROP)
+			goto done;
+		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
+		    &reason);
+		if (action == PF_PASS) {
+#if NPFSYNC
+			pfsync_update_state(s);
+#endif /* NPFSYNC */
+			r = s->rule.ptr;
+			a = s->anchor.ptr;
+			log = s->log;
+		} else if (s == NULL)
+#ifdef __FreeBSD__
+			action = pf_test_tcp(&r, &s, dir, kif,
+			    m, off, h, &pd, &a, &ruleset, NULL, inp);
+#else
+			action = pf_test_tcp(&r, &s, dir, kif,
+			    m, off, h, &pd, &a, &ruleset, &ipintrq);
+#endif
+		break;
+	}
+
+	case IPPROTO_UDP: {
+		struct udphdr	uh;
+
+		pd.hdr.udp = &uh;
+		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
+		    &action, &reason, AF_INET)) {
+			log = action != PF_PASS;
+			goto done;
+		}
+		if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m,
+		    off, ntohs(h->ip_len) - off, IPPROTO_UDP, AF_INET)) {
+			action = PF_DROP;
+			REASON_SET(&reason, PFRES_PROTCKSUM);
+			goto done;
+		}
+		if (uh.uh_dport == 0 ||
+		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
+		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
+			action = PF_DROP;
+			REASON_SET(&reason, PFRES_SHORT);
+			goto done;
+		}
+		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
+		if (action == PF_PASS) {
+#if NPFSYNC
+			pfsync_update_state(s);
+#endif /* NPFSYNC */
+			r = s->rule.ptr;
+			a = s->anchor.ptr;
+			log = s->log;
+		} else if (s == NULL)
+#ifdef __FreeBSD__
+			action = pf_test_udp(&r, &s, dir, kif,
+			    m, off, h, &pd, &a, &ruleset, NULL, inp);
+#else
+			action = pf_test_udp(&r, &s, dir, kif,
+			    m, off, h, &pd, &a, &ruleset, &ipintrq);
+#endif
+		break;
+	}
+
+	case IPPROTO_ICMP: {
+		struct icmp	ih;
+
+		pd.hdr.icmp = &ih;
+		if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN,
+		    &action, &reason, AF_INET)) {
+			log = action != PF_PASS;
+			goto done;
+		}
+		if (dir == PF_IN && pf_check_proto_cksum(m, off,
+		    ntohs(h->ip_len) - off, IPPROTO_ICMP, AF_INET)) {
+			action = PF_DROP;
+			REASON_SET(&reason, PFRES_PROTCKSUM);
+			goto done;
+		}
+		action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd,
+		    &reason);
+		if (action == PF_PASS) {
+#if NPFSYNC
+			pfsync_update_state(s);
+#endif /* NPFSYNC */
+			r = s->rule.ptr;
+			a = s->anchor.ptr;
+			log = s->log;
+		} else if (s == NULL)
+#ifdef __FreeBSD__
+			action = pf_test_icmp(&r, &s, dir, kif,
+			    m, off, h, &pd, &a, &ruleset, NULL);
+#else
+			action = pf_test_icmp(&r, &s, dir, kif,
+			    m, off, h, &pd, &a, &ruleset, &ipintrq);
+#endif
+		break;
+	}
+
+	default:
+		action = pf_test_state_other(&s, dir, kif, &pd);
+		if (action == PF_PASS) {
+#if NPFSYNC
+			pfsync_update_state(s);
+#endif /* NPFSYNC */
+			r = s->rule.ptr;
+			a = s->anchor.ptr;
+			log = s->log;
+		} else if (s == NULL)
+#ifdef __FreeBSD__
+			action = pf_test_other(&r, &s, dir, kif, m, off, h,
+			    &pd, &a, &ruleset, NULL);
+#else
+			action = pf_test_other(&r, &s, dir, kif, m, off, h,
+			    &pd, &a, &ruleset, &ipintrq);
+#endif
+		break;
+	}
+
+done:
+	if (action == PF_PASS && h->ip_hl > 5 &&
+	    !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) {
+		action = PF_DROP;
+		REASON_SET(&reason, PFRES_IPOPTIONS);
+		log = 1;
+		DPFPRINTF(PF_DEBUG_MISC,
+		    ("pf: dropping packet with ip options\n"));
+	}
+
+	if ((s && s->tag) || r->rtableid)
+		pf_tag_packet(m, pd.pf_mtag, s ? s->tag : 0, r->rtableid);
+
+#ifdef ALTQ
+	if (action == PF_PASS && r->qid) {
+		if (pqid || (pd.tos & IPTOS_LOWDELAY))
+			pd.pf_mtag->qid = r->pqid;
+		else
+			pd.pf_mtag->qid = r->qid;
+		/* add hints for ecn */
+		pd.pf_mtag->af = AF_INET;
+		pd.pf_mtag->hdr = h;
+	}
+#endif /* ALTQ */
+
+	/*
+	 * connections redirected to loopback should not match sockets
+	 * bound specifically to loopback due to security implications,
+	 * see tcp_input() and in_pcblookup_listen().
+	 */
+	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
+	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
+	    (s->nat_rule.ptr->action == PF_RDR ||
+	    s->nat_rule.ptr->action == PF_BINAT) &&
+	    (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
+		pd.pf_mtag->flags |= PF_TAG_TRANSLATE_LOCALHOST;
+
+	if (log) {
+		struct pf_rule *lr;
+
+		if (s != NULL && s->nat_rule.ptr != NULL &&
+		    s->nat_rule.ptr->log & PF_LOG_ALL)
+			lr = s->nat_rule.ptr;
+		else
+			lr = r;
+		PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, lr, a, ruleset,
+		    &pd);
+	}
+
+	kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
+	kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
+
+	if (action == PF_PASS || r->action == PF_DROP) {
+		dirndx = (dir == PF_OUT);
+		r->packets[dirndx]++;
+		r->bytes[dirndx] += pd.tot_len;
+		if (a != NULL) {
+			a->packets[dirndx]++;
+			a->bytes[dirndx] += pd.tot_len;
+		}
+		if (s != NULL) {
+			if (s->nat_rule.ptr != NULL) {
+				s->nat_rule.ptr->packets[dirndx]++;
+				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
+			}
+			if (s->src_node != NULL) {
+				s->src_node->packets[dirndx]++;
+				s->src_node->bytes[dirndx] += pd.tot_len;
+			}
+			if (s->nat_src_node != NULL) {
+				s->nat_src_node->packets[dirndx]++;
+				s->nat_src_node->bytes[dirndx] += pd.tot_len;
+			}
+			dirndx = (dir == s->direction) ? 0 : 1;
+			s->packets[dirndx]++;
+			s->bytes[dirndx] += pd.tot_len;
+		}
+		tr = r;
+		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
+		if (nr != NULL) {
+			struct pf_addr *x;
+			/*
+			 * XXX: we need to make sure that the addresses
+			 * passed to pfr_update_stats() are the same than
+			 * the addresses used during matching (pfr_match)
+			 */
+			if (r == &pf_default_rule) {
+				tr = nr;
+				x = (s == NULL || s->direction == dir) ?
+				    &pd.baddr : &pd.naddr;
+			} else
+				x = (s == NULL || s->direction == dir) ?
+				    &pd.naddr : &pd.baddr;
+			if (x == &pd.baddr || s == NULL) {
+				/* we need to change the address */
+				if (dir == PF_OUT)
+					pd.src = x;
+				else
+					pd.dst = x;
+			}
+		}
+		if (tr->src.addr.type == PF_ADDR_TABLE)
+			pfr_update_stats(tr->src.addr.p.tbl, (s == NULL ||
+			    s->direction == dir) ? pd.src : pd.dst, pd.af,
+			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
+			    tr->src.neg);
+		if (tr->dst.addr.type == PF_ADDR_TABLE)
+			pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL ||
+			    s->direction == dir) ? pd.dst : pd.src, pd.af,
+			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
+			    tr->dst.neg);
+	}
+
+
+	if (action == PF_SYNPROXY_DROP) {
+		m_freem(*m0);
+		*m0 = NULL;
+		action = PF_PASS;
+	} else if (r->rt)
+		/* pf_route can free the mbuf causing *m0 to become NULL */
+		pf_route(m0, r, dir, ifp, s, &pd);
+
+#ifdef __FreeBSD__
+	PF_UNLOCK();
+#endif
+
+	return (action);
+}
+#endif /* INET */
+
+#ifdef INET6
+int
+#ifdef __FreeBSD__
+pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
+    struct ether_header *eh, struct inpcb *inp)
+#else
+pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
+    struct ether_header *eh)
+#endif
+{
+	struct pfi_kif		*kif;
+	u_short			 action, reason = 0, log = 0;
+	struct mbuf		*m = *m0, *n = NULL;
+	struct ip6_hdr		*h;
+	struct pf_rule		*a = NULL, *r = &pf_default_rule, *tr, *nr;
+	struct pf_state		*s = NULL;
+	struct pf_ruleset	*ruleset = NULL;
+	struct pf_pdesc		 pd;
+	int			 off, terminal = 0, dirndx, rh_cnt = 0;
+
+#ifdef __FreeBSD__
+	PF_LOCK();
+#endif
+
+	if (!pf_status.running)
+#ifdef __FreeBSD__
+	{
+		PF_UNLOCK();
+#endif
+		return (PF_PASS);
+#ifdef __FreeBSD__
+	}
+#endif
+
+	memset(&pd, 0, sizeof(pd));
+	if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) {
+#ifdef __FreeBSD__
+		PF_UNLOCK();
+#endif
+		DPFPRINTF(PF_DEBUG_URGENT,
+		    ("pf_test6: pf_get_mtag returned NULL\n"));
+		return (PF_DROP);
+	}
+	if (pd.pf_mtag->flags & PF_TAG_GENERATED)
+		return (PF_PASS);
+
+#ifdef __FreeBSD__
+	/* XXX_IMPORT: later */
+#else
+	if (ifp->if_type == IFT_CARP && ifp->if_carpdev)
+		ifp = ifp->if_carpdev;
+#endif
+
+	kif = (struct pfi_kif *)ifp->if_pf_kif;
+	if (kif == NULL) {
+#ifdef __FreeBSD__
+		PF_UNLOCK();
+#endif
+		DPFPRINTF(PF_DEBUG_URGENT,
+		    ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname));
+		return (PF_DROP);
+	}
+	if (kif->pfik_flags & PFI_IFLAG_SKIP) {
+#ifdef __FreeBSD__
+		PF_UNLOCK();
+#endif
+		return (PF_PASS);
+	}
+
+#ifdef __FreeBSD__
+	M_ASSERTPKTHDR(m);
+#else
+#ifdef DIAGNOSTIC
+	if ((m->m_flags & M_PKTHDR) == 0)
+		panic("non-M_PKTHDR is passed to pf_test6");
+#endif /* DIAGNOSTIC */
+#endif
+
+#ifdef __FreeBSD__
+	h = NULL;	/* make the compiler happy */
+#endif
+
+	if (m->m_pkthdr.len < (int)sizeof(*h)) {
+		action = PF_DROP;
+		REASON_SET(&reason, PFRES_SHORT);
+		log = 1;
+		goto done;
+	}
+
+	/* We do IP header normalization and packet reassembly here */
+	if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) {
+		action = PF_DROP;
+		goto done;
+	}
+	m = *m0;
+	h = mtod(m, struct ip6_hdr *);
+
+#if 1
+	/*
+	 * we do not support jumbogram yet.  if we keep going, zero ip6_plen
+	 * will do something bad, so drop the packet for now.
+	 */
+	if (htons(h->ip6_plen) == 0) {
+		action = PF_DROP;
+		REASON_SET(&reason, PFRES_NORM);	/*XXX*/
+		goto done;
+	}
+#endif
+
+	pd.src = (struct pf_addr *)&h->ip6_src;
+	pd.dst = (struct pf_addr *)&h->ip6_dst;
+	PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET6);
+	pd.ip_sum = NULL;
+	pd.af = AF_INET6;
+	pd.tos = 0;
+	pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
+	pd.eh = eh;
+
+	off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr);
+	pd.proto = h->ip6_nxt;
+	do {
+		switch (pd.proto) {
+		case IPPROTO_FRAGMENT:
+			action = pf_test_fragment(&r, dir, kif, m, h,
+			    &pd, &a, &ruleset);
+			if (action == PF_DROP)
+				REASON_SET(&reason, PFRES_FRAG);
+			goto done;
+		case IPPROTO_ROUTING: {
+			struct ip6_rthdr rthdr;
+
+			if (rh_cnt++) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: IPv6 more than one rthdr\n"));
+				action = PF_DROP;
+				REASON_SET(&reason, PFRES_IPOPTIONS);
+				log = 1;
+				goto done;
+			}
+			if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL,
+			    &reason, pd.af)) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: IPv6 short rthdr\n"));
+				action = PF_DROP;
+				REASON_SET(&reason, PFRES_SHORT);
+				log = 1;
+				goto done;
+			}
+			if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: IPv6 rthdr0\n"));
+				action = PF_DROP;
+				REASON_SET(&reason, PFRES_IPOPTIONS);
+				log = 1;
+				goto done;
+			}
+			/* fallthrough */
+		}
+		case IPPROTO_AH:
+		case IPPROTO_HOPOPTS:
+		case IPPROTO_DSTOPTS: {
+			/* get next header and header length */
+			struct ip6_ext	opt6;
+
+			if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6),
+			    NULL, &reason, pd.af)) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: IPv6 short opt\n"));
+				action = PF_DROP;
+				log = 1;
+				goto done;
+			}
+			if (pd.proto == IPPROTO_AH)
+				off += (opt6.ip6e_len + 2) * 4;
+			else
+				off += (opt6.ip6e_len + 1) * 8;
+			pd.proto = opt6.ip6e_nxt;
+			/* goto the next header */
+			break;
+		}
+		default:
+			terminal++;
+			break;
+		}
+	} while (!terminal);
+
+	/* if there's no routing header, use unmodified mbuf for checksumming */
+	if (!n)
+		n = m;
+
+	switch (pd.proto) {
+
+	case IPPROTO_TCP: {
+		struct tcphdr	th;
+
+		pd.hdr.tcp = &th;
+		if (!pf_pull_hdr(m, off, &th, sizeof(th),
+		    &action, &reason, AF_INET6)) {
+			log = action != PF_PASS;
+			goto done;
+		}
+		if (dir == PF_IN && pf_check_proto_cksum(n, off,
+		    ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)),
+		    IPPROTO_TCP, AF_INET6)) {
+			action = PF_DROP;
+			REASON_SET(&reason, PFRES_PROTCKSUM);
+			goto done;
+		}
+		pd.p_len = pd.tot_len - off - (th.th_off << 2);
+		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
+		if (action == PF_DROP)
+			goto done;
+		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
+		    &reason);
+		if (action == PF_PASS) {
+#if NPFSYNC
+			pfsync_update_state(s);
+#endif /* NPFSYNC */
+			r = s->rule.ptr;
+			a = s->anchor.ptr;
+			log = s->log;
+		} else if (s == NULL)
+#ifdef __FreeBSD__
+			action = pf_test_tcp(&r, &s, dir, kif,
+			    m, off, h, &pd, &a, &ruleset, NULL, inp);
+#else
+			action = pf_test_tcp(&r, &s, dir, kif,
+			    m, off, h, &pd, &a, &ruleset, &ip6intrq);
+#endif
+		break;
+	}
+
+	case IPPROTO_UDP: {
+		struct udphdr	uh;
+
+		pd.hdr.udp = &uh;
+		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
+		    &action, &reason, AF_INET6)) {
+			log = action != PF_PASS;
+			goto done;
+		}
+		if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(n,
+		    off, ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)),
+		    IPPROTO_UDP, AF_INET6)) {
+			action = PF_DROP;
+			REASON_SET(&reason, PFRES_PROTCKSUM);
+			goto done;
+		}
+		if (uh.uh_dport == 0 ||
+		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
+		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
+			action = PF_DROP;
+			REASON_SET(&reason, PFRES_SHORT);
+			goto done;
+		}
+		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
+		if (action == PF_PASS) {
+#if NPFSYNC
+			pfsync_update_state(s);
+#endif /* NPFSYNC */
+			r = s->rule.ptr;
+			a = s->anchor.ptr;
+			log = s->log;
+		} else if (s == NULL)
+#ifdef __FreeBSD__
+			action = pf_test_udp(&r, &s, dir, kif,
+			    m, off, h, &pd, &a, &ruleset, NULL, inp);
+#else
+			action = pf_test_udp(&r, &s, dir, kif,
+			    m, off, h, &pd, &a, &ruleset, &ip6intrq);
+#endif
+		break;
+	}
+
+	case IPPROTO_ICMPV6: {
+		struct icmp6_hdr	ih;
+
+		pd.hdr.icmp6 = &ih;
+		if (!pf_pull_hdr(m, off, &ih, sizeof(ih),
+		    &action, &reason, AF_INET6)) {
+			log = action != PF_PASS;
+			goto done;
+		}
+		if (dir == PF_IN && pf_check_proto_cksum(n, off,
+		    ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)),
+		    IPPROTO_ICMPV6, AF_INET6)) {
+			action = PF_DROP;
+			REASON_SET(&reason, PFRES_PROTCKSUM);
+			goto done;
+		}
+		action = pf_test_state_icmp(&s, dir, kif,
+		    m, off, h, &pd, &reason);
+		if (action == PF_PASS) {
+#if NPFSYNC
+			pfsync_update_state(s);
+#endif /* NPFSYNC */
+			r = s->rule.ptr;
+			a = s->anchor.ptr;
+			log = s->log;
+		} else if (s == NULL)
+#ifdef __FreeBSD__
+			action = pf_test_icmp(&r, &s, dir, kif,
+			    m, off, h, &pd, &a, &ruleset, NULL);
+#else
+			action = pf_test_icmp(&r, &s, dir, kif,
+			    m, off, h, &pd, &a, &ruleset, &ip6intrq);
+#endif
+		break;
+	}
+
+	default:
+		action = pf_test_state_other(&s, dir, kif, &pd);
+		if (action == PF_PASS) {
+#if NPFSYNC
+			pfsync_update_state(s);
+#endif /* NPFSYNC */
+			r = s->rule.ptr;
+			a = s->anchor.ptr;
+			log = s->log;
+		} else if (s == NULL)
+#ifdef __FreeBSD__
+			action = pf_test_other(&r, &s, dir, kif, m, off, h,
+			    &pd, &a, &ruleset, NULL);
+#else
+			action = pf_test_other(&r, &s, dir, kif, m, off, h,
+			    &pd, &a, &ruleset, &ip6intrq);
+#endif
+		break;
+	}
+
+done:
+	/* handle dangerous IPv6 extension headers. */
+	if (action == PF_PASS && rh_cnt &&
+	    !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) {
+		action = PF_DROP;
+		REASON_SET(&reason, PFRES_IPOPTIONS);
+		log = 1;
+		DPFPRINTF(PF_DEBUG_MISC,
+		    ("pf: dropping packet with dangerous v6 headers\n"));
+	}
+
+	if ((s && s->tag) || r->rtableid)
+		pf_tag_packet(m, pd.pf_mtag, s ? s->tag : 0, r->rtableid);
+
+#ifdef ALTQ
+	if (action == PF_PASS && r->qid) {
+		if (pd.tos & IPTOS_LOWDELAY)
+			pd.pf_mtag->qid = r->pqid;
+		else
+			pd.pf_mtag->qid = r->qid;
+		/* add hints for ecn */
+		pd.pf_mtag->af = AF_INET6;
+		pd.pf_mtag->hdr = h;
+	}
+#endif /* ALTQ */
+
+	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
+	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
+	    (s->nat_rule.ptr->action == PF_RDR ||
+	    s->nat_rule.ptr->action == PF_BINAT) &&
+	    IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))
+		pd.pf_mtag->flags |= PF_TAG_TRANSLATE_LOCALHOST;
+
+	if (log) {
+		struct pf_rule *lr;
+
+		if (s != NULL && s->nat_rule.ptr != NULL &&
+		    s->nat_rule.ptr->log & PF_LOG_ALL)
+			lr = s->nat_rule.ptr;
+		else
+			lr = r;
+		PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, lr, a, ruleset,
+		    &pd);
+	}
+
+	kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
+	kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
+
+	if (action == PF_PASS || r->action == PF_DROP) {
+		dirndx = (dir == PF_OUT);
+		r->packets[dirndx]++;
+		r->bytes[dirndx] += pd.tot_len;
+		if (a != NULL) {
+			a->packets[dirndx]++;
+			a->bytes[dirndx] += pd.tot_len;
+		}
+		if (s != NULL) {
+			if (s->nat_rule.ptr != NULL) {
+				s->nat_rule.ptr->packets[dirndx]++;
+				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
+			}
+			if (s->src_node != NULL) {
+				s->src_node->packets[dirndx]++;
+				s->src_node->bytes[dirndx] += pd.tot_len;
+			}
+			if (s->nat_src_node != NULL) {
+				s->nat_src_node->packets[dirndx]++;
+				s->nat_src_node->bytes[dirndx] += pd.tot_len;
+			}
+			dirndx = (dir == s->direction) ? 0 : 1;
+			s->packets[dirndx]++;
+			s->bytes[dirndx] += pd.tot_len;
+		}
+		tr = r;
+		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
+		if (nr != NULL) {
+			struct pf_addr *x;
+			/*
+			 * XXX: we need to make sure that the addresses
+			 * passed to pfr_update_stats() are the same than
+			 * the addresses used during matching (pfr_match)
+			 */
+			if (r == &pf_default_rule) {
+				tr = nr;
+				x = (s == NULL || s->direction == dir) ?
+				    &pd.baddr : &pd.naddr;
+			} else {
+				x = (s == NULL || s->direction == dir) ?
+				    &pd.naddr : &pd.baddr;
+			}
+			if (x == &pd.baddr || s == NULL) {
+				if (dir == PF_OUT)
+					pd.src = x;
+				else
+					pd.dst = x;
+			}
+		}
+		if (tr->src.addr.type == PF_ADDR_TABLE)
+			pfr_update_stats(tr->src.addr.p.tbl, (s == NULL ||
+			    s->direction == dir) ? pd.src : pd.dst, pd.af,
+			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
+			    tr->src.neg);
+		if (tr->dst.addr.type == PF_ADDR_TABLE)
+			pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL ||
+			    s->direction == dir) ? pd.dst : pd.src, pd.af,
+			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
+			    tr->dst.neg);
+	}
+
+
+	if (action == PF_SYNPROXY_DROP) {
+		m_freem(*m0);
+		*m0 = NULL;
+		action = PF_PASS;
+	} else if (r->rt)
+		/* pf_route6 can free the mbuf causing *m0 to become NULL */
+		pf_route6(m0, r, dir, ifp, s, &pd);
+
+#ifdef __FreeBSD__
+	PF_UNLOCK();
+#endif
+	return (action);
+}
+#endif /* INET6 */
+
+int
+pf_check_congestion(struct ifqueue *ifq)
+{
+#ifdef __FreeBSD__
+	/* XXX_IMPORT: later */
+	return (0);
+#else
+	if (ifq->ifq_congestion)
+		return (1);
+	else
+		return (0);
+#endif
+}
diff --git a/freebsd/sys/contrib/pf/net/pf_if.c b/freebsd/sys/contrib/pf/net/pf_if.c
new file mode 100644
index 00000000..e873a2ef
--- /dev/null
+++ b/freebsd/sys/contrib/pf/net/pf_if.c
@@ -0,0 +1,950 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*	$OpenBSD: pf_if.c,v 1.46 2006/12/13 09:01:59 itojun Exp $ */
+
+/*
+ * Copyright 2005 Henning Brauer <henning@openbsd.org>
+ * Copyright 2005 Ryan McBride <mcbride@openbsd.org>
+ * Copyright (c) 2001 Daniel Hartmeier
+ * Copyright (c) 2003 Cedric Berger
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    - Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    - Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#if defined(__FreeBSD__)
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#endif
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#ifdef __FreeBSD__
+#include <freebsd/sys/malloc.h>
+#endif
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/filio.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/kernel.h>
+#ifndef __FreeBSD__
+#include <freebsd/sys/device.h>
+#endif
+#include <freebsd/sys/time.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_types.h>
+#ifdef __FreeBSD__
+#include <freebsd/net/vnet.h>
+#endif
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/ip_var.h>
+
+#include <freebsd/net/pfvar.h>
+
+#ifdef INET6
+#include <freebsd/netinet/ip6.h>
+#endif /* INET6 */
+
+struct pfi_kif		 *pfi_all = NULL;
+struct pfi_statehead	  pfi_statehead;
+#ifdef __FreeBSD__
+uma_zone_t		  pfi_addr_pl;
+#else
+struct pool		  pfi_addr_pl;
+#endif
+struct pfi_ifhead	  pfi_ifs;
+long			  pfi_update = 1;
+struct pfr_addr		 *pfi_buffer;
+int			  pfi_buffer_cnt;
+int			  pfi_buffer_max;
+#ifdef __FreeBSD__
+eventhandler_tag	  pfi_attach_cookie = NULL;
+eventhandler_tag	  pfi_detach_cookie = NULL;
+eventhandler_tag	  pfi_attach_group_cookie = NULL;
+eventhandler_tag	  pfi_change_group_cookie = NULL;
+eventhandler_tag	  pfi_detach_group_cookie = NULL;
+eventhandler_tag	  pfi_ifaddr_event_cookie = NULL;
+#endif
+
+void		 pfi_kif_update(struct pfi_kif *);
+void		 pfi_dynaddr_update(struct pfi_dynaddr *dyn);
+void		 pfi_table_update(struct pfr_ktable *, struct pfi_kif *,
+		    int, int);
+void		 pfi_kifaddr_update(void *);
+void		 pfi_instance_add(struct ifnet *, int, int);
+void		 pfi_address_add(struct sockaddr *, int, int);
+int		 pfi_if_compare(struct pfi_kif *, struct pfi_kif *);
+int		 pfi_skip_if(const char *, struct pfi_kif *);
+int		 pfi_unmask(void *);
+#ifdef __FreeBSD__
+void		 pfi_attach_ifnet_event(void * __unused, struct ifnet *);
+void		 pfi_detach_ifnet_event(void * __unused, struct ifnet *);
+void		 pfi_attach_group_event(void * __unused, struct ifg_group *);
+void		 pfi_change_group_event(void * __unused, char *);
+void		 pfi_detach_group_event(void * __unused, struct ifg_group *);
+void		 pfi_ifaddr_event(void * __unused, struct ifnet *);
+
+#endif
+
+RB_PROTOTYPE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare);
+RB_GENERATE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare);
+
+#define PFI_BUFFER_MAX		0x10000
+#define PFI_MTYPE		M_IFADDR
+
+void
+pfi_initialize(void)
+{
+
+	if (pfi_all != NULL)	/* already initialized */
+		return;
+
+	TAILQ_INIT(&pfi_statehead);
+#ifndef __FreeBSD__
+	pool_init(&pfi_addr_pl, sizeof(struct pfi_dynaddr), 0, 0, 0,
+	    "pfiaddrpl", &pool_allocator_nointr);
+#endif
+	pfi_buffer_max = 64;
+	pfi_buffer = malloc(pfi_buffer_max * sizeof(*pfi_buffer),
+	    PFI_MTYPE, M_WAITOK);
+
+	if ((pfi_all = pfi_kif_get(IFG_ALL)) == NULL)
+		panic("pfi_kif_get for pfi_all failed");
+
+#ifdef __FreeBSD__
+	struct ifg_group *ifg;
+	struct ifnet *ifp;
+
+	IFNET_RLOCK();
+	TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
+		pfi_attach_ifgroup(ifg);
+	TAILQ_FOREACH(ifp, &V_ifnet, if_link)
+		pfi_attach_ifnet(ifp);
+	IFNET_RUNLOCK();
+
+	pfi_attach_cookie = EVENTHANDLER_REGISTER(ifnet_arrival_event,
+	    pfi_attach_ifnet_event, NULL, EVENTHANDLER_PRI_ANY);
+	pfi_detach_cookie = EVENTHANDLER_REGISTER(ifnet_departure_event,
+	    pfi_detach_ifnet_event, NULL, EVENTHANDLER_PRI_ANY);
+	pfi_attach_group_cookie = EVENTHANDLER_REGISTER(group_attach_event,
+	    pfi_attach_group_event, NULL, EVENTHANDLER_PRI_ANY);
+	pfi_change_group_cookie = EVENTHANDLER_REGISTER(group_change_event,
+	    pfi_change_group_event, NULL, EVENTHANDLER_PRI_ANY);
+	pfi_detach_group_cookie = EVENTHANDLER_REGISTER(group_detach_event,
+	    pfi_detach_group_event, NULL, EVENTHANDLER_PRI_ANY);
+	pfi_ifaddr_event_cookie = EVENTHANDLER_REGISTER(ifaddr_event,
+	    pfi_ifaddr_event, NULL, EVENTHANDLER_PRI_ANY);
+#endif
+}
+
+#ifdef __FreeBSD__
+void
+pfi_cleanup(void)
+{
+	struct pfi_kif *p;
+
+	PF_UNLOCK();
+	EVENTHANDLER_DEREGISTER(ifnet_arrival_event, pfi_attach_cookie);
+	EVENTHANDLER_DEREGISTER(ifnet_departure_event, pfi_detach_cookie);
+	EVENTHANDLER_DEREGISTER(group_attach_event, pfi_attach_group_cookie);
+	EVENTHANDLER_DEREGISTER(group_change_event, pfi_change_group_cookie);
+	EVENTHANDLER_DEREGISTER(group_detach_event, pfi_detach_group_cookie);
+	EVENTHANDLER_DEREGISTER(ifaddr_event, pfi_ifaddr_event_cookie);
+	PF_LOCK();
+
+	pfi_all = NULL;
+	while ((p = RB_MIN(pfi_ifhead, &pfi_ifs))) {
+		if (p->pfik_rules || p->pfik_states) {
+			printf("pfi_cleanup: dangling refs for %s\n",
+			    p->pfik_name);
+		}
+
+		RB_REMOVE(pfi_ifhead, &pfi_ifs, p);
+		free(p, PFI_MTYPE);
+	}
+
+	free(pfi_buffer, PFI_MTYPE);
+}
+#endif
+
+struct pfi_kif *
+pfi_kif_get(const char *kif_name)
+{
+	struct pfi_kif		*kif;
+	struct pfi_kif_cmp	 s;
+
+	bzero(&s, sizeof(s));
+	strlcpy(s.pfik_name, kif_name, sizeof(s.pfik_name));
+	if ((kif = RB_FIND(pfi_ifhead, &pfi_ifs, (struct pfi_kif *)&s)) != NULL)
+		return (kif);
+
+	/* create new one */
+#ifdef __FreeBSD__
+	if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_NOWAIT)) == NULL)
+#else
+	if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_DONTWAIT)) == NULL)
+#endif
+		return (NULL);
+
+	bzero(kif, sizeof(*kif));
+	strlcpy(kif->pfik_name, kif_name, sizeof(kif->pfik_name));
+#ifdef __FreeBSD__
+	/*
+	 * It seems that the value of time_second is in unintialzied state
+	 * when pf sets interface statistics clear time in boot phase if pf
+	 * was statically linked to kernel. Instead of setting the bogus
+	 * time value have pfi_get_ifaces handle this case. In
+	 * pfi_get_ifaces it uses boottime.tv_sec if it sees the time is 0.
+	 */
+	kif->pfik_tzero = time_second > 1 ? time_second : 0;
+#else
+	kif->pfik_tzero = time_second;
+#endif
+	TAILQ_INIT(&kif->pfik_dynaddrs);
+
+	RB_INSERT(pfi_ifhead, &pfi_ifs, kif);
+	return (kif);
+}
+
+void
+pfi_kif_ref(struct pfi_kif *kif, enum pfi_kif_refs what)
+{
+	switch (what) {
+	case PFI_KIF_REF_RULE:
+		kif->pfik_rules++;
+		break;
+	case PFI_KIF_REF_STATE:
+		if (!kif->pfik_states++)
+			TAILQ_INSERT_TAIL(&pfi_statehead, kif, pfik_w_states);
+		break;
+	default:
+		panic("pfi_kif_ref with unknown type");
+	}
+}
+
+void
+pfi_kif_unref(struct pfi_kif *kif, enum pfi_kif_refs what)
+{
+	if (kif == NULL)
+		return;
+
+	switch (what) {
+	case PFI_KIF_REF_NONE:
+		break;
+	case PFI_KIF_REF_RULE:
+		if (kif->pfik_rules <= 0) {
+			printf("pfi_kif_unref: rules refcount <= 0\n");
+			return;
+		}
+		kif->pfik_rules--;
+		break;
+	case PFI_KIF_REF_STATE:
+		if (kif->pfik_states <= 0) {
+			printf("pfi_kif_unref: state refcount <= 0\n");
+			return;
+		}
+		if (!--kif->pfik_states)
+			TAILQ_REMOVE(&pfi_statehead, kif, pfik_w_states);
+		break;
+	default:
+		panic("pfi_kif_unref with unknown type");
+	}
+
+	if (kif->pfik_ifp != NULL || kif->pfik_group != NULL || kif == pfi_all)
+		return;
+
+	if (kif->pfik_rules || kif->pfik_states)
+		return;
+
+	RB_REMOVE(pfi_ifhead, &pfi_ifs, kif);
+	free(kif, PFI_MTYPE);
+}
+
+int
+pfi_kif_match(struct pfi_kif *rule_kif, struct pfi_kif *packet_kif)
+{
+	struct ifg_list	*p;
+
+	if (rule_kif == NULL || rule_kif == packet_kif)
+		return (1);
+
+	if (rule_kif->pfik_group != NULL)
+		TAILQ_FOREACH(p, &packet_kif->pfik_ifp->if_groups, ifgl_next)
+			if (p->ifgl_group == rule_kif->pfik_group)
+				return (1);
+
+	return (0);
+}
+
+void
+pfi_attach_ifnet(struct ifnet *ifp)
+{
+	struct pfi_kif		*kif;
+	int			 s;
+
+	pfi_initialize();
+	s = splsoftnet();
+	pfi_update++;
+	if ((kif = pfi_kif_get(ifp->if_xname)) == NULL)
+		panic("pfi_kif_get failed");
+
+	kif->pfik_ifp = ifp;
+	ifp->if_pf_kif = (caddr_t)kif;
+
+#ifndef __FreeBSD__
+	if ((kif->pfik_ah_cookie = hook_establish(ifp->if_addrhooks, 1,
+	    pfi_kifaddr_update, kif)) == NULL)
+		panic("pfi_attach_ifnet: cannot allocate '%s' address hook",
+		    ifp->if_xname);
+#endif
+
+	pfi_kif_update(kif);
+
+	splx(s);
+}
+
+void
+pfi_detach_ifnet(struct ifnet *ifp)
+{
+	int			 s;
+	struct pfi_kif		*kif;
+
+	if ((kif = (struct pfi_kif *)ifp->if_pf_kif) == NULL)
+		return;
+
+	s = splsoftnet();
+	pfi_update++;
+#ifndef __FreeBSD__
+	hook_disestablish(ifp->if_addrhooks, kif->pfik_ah_cookie);
+#endif
+	pfi_kif_update(kif);
+
+	kif->pfik_ifp = NULL;
+	ifp->if_pf_kif = NULL;
+	pfi_kif_unref(kif, PFI_KIF_REF_NONE);
+	splx(s);
+}
+
+void
+pfi_attach_ifgroup(struct ifg_group *ifg)
+{
+	struct pfi_kif	*kif;
+	int		 s;
+
+	pfi_initialize();
+	s = splsoftnet();
+	pfi_update++;
+	if ((kif = pfi_kif_get(ifg->ifg_group)) == NULL)
+		panic("pfi_kif_get failed");
+
+	kif->pfik_group = ifg;
+	ifg->ifg_pf_kif = (caddr_t)kif;
+
+	splx(s);
+}
+
+void
+pfi_detach_ifgroup(struct ifg_group *ifg)
+{
+	int		 s;
+	struct pfi_kif	*kif;
+
+	if ((kif = (struct pfi_kif *)ifg->ifg_pf_kif) == NULL)
+		return;
+
+	s = splsoftnet();
+	pfi_update++;
+
+	kif->pfik_group = NULL;
+	ifg->ifg_pf_kif = NULL;
+	pfi_kif_unref(kif, PFI_KIF_REF_NONE);
+	splx(s);
+}
+
+void
+pfi_group_change(const char *group)
+{
+	struct pfi_kif		*kif;
+	int			 s;
+
+	s = splsoftnet();
+	pfi_update++;
+	if ((kif = pfi_kif_get(group)) == NULL)
+		panic("pfi_kif_get failed");
+
+	pfi_kif_update(kif);
+
+	splx(s);
+}
+
+int
+pfi_match_addr(struct pfi_dynaddr *dyn, struct pf_addr *a, sa_family_t af)
+{
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		switch (dyn->pfid_acnt4) {
+		case 0:
+			return (0);
+		case 1:
+			return (PF_MATCHA(0, &dyn->pfid_addr4,
+			    &dyn->pfid_mask4, a, AF_INET));
+		default:
+			return (pfr_match_addr(dyn->pfid_kt, a, AF_INET));
+		}
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		switch (dyn->pfid_acnt6) {
+		case 0:
+			return (0);
+		case 1:
+			return (PF_MATCHA(0, &dyn->pfid_addr6,
+			    &dyn->pfid_mask6, a, AF_INET6));
+		default:
+			return (pfr_match_addr(dyn->pfid_kt, a, AF_INET6));
+		}
+		break;
+#endif /* INET6 */
+	default:
+		return (0);
+	}
+}
+
+int
+pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af)
+{
+	struct pfi_dynaddr	*dyn;
+	char			 tblname[PF_TABLE_NAME_SIZE];
+	struct pf_ruleset	*ruleset = NULL;
+	int			 s, rv = 0;
+
+	if (aw->type != PF_ADDR_DYNIFTL)
+		return (0);
+	if ((dyn = pool_get(&pfi_addr_pl, PR_NOWAIT)) == NULL)
+		return (1);
+	bzero(dyn, sizeof(*dyn));
+
+	s = splsoftnet();
+	if (!strcmp(aw->v.ifname, "self"))
+		dyn->pfid_kif = pfi_kif_get(IFG_ALL);
+	else
+		dyn->pfid_kif = pfi_kif_get(aw->v.ifname);
+	if (dyn->pfid_kif == NULL) {
+		rv = 1;
+		goto _bad;
+	}
+	pfi_kif_ref(dyn->pfid_kif, PFI_KIF_REF_RULE);
+
+	dyn->pfid_net = pfi_unmask(&aw->v.a.mask);
+	if (af == AF_INET && dyn->pfid_net == 32)
+		dyn->pfid_net = 128;
+	strlcpy(tblname, aw->v.ifname, sizeof(tblname));
+	if (aw->iflags & PFI_AFLAG_NETWORK)
+		strlcat(tblname, ":network", sizeof(tblname));
+	if (aw->iflags & PFI_AFLAG_BROADCAST)
+		strlcat(tblname, ":broadcast", sizeof(tblname));
+	if (aw->iflags & PFI_AFLAG_PEER)
+		strlcat(tblname, ":peer", sizeof(tblname));
+	if (aw->iflags & PFI_AFLAG_NOALIAS)
+		strlcat(tblname, ":0", sizeof(tblname));
+	if (dyn->pfid_net != 128)
+		snprintf(tblname + strlen(tblname),
+		    sizeof(tblname) - strlen(tblname), "/%d", dyn->pfid_net);
+	if ((ruleset = pf_find_or_create_ruleset(PF_RESERVED_ANCHOR)) == NULL) {
+		rv = 1;
+		goto _bad;
+	}
+
+	if ((dyn->pfid_kt = pfr_attach_table(ruleset, tblname)) == NULL) {
+		rv = 1;
+		goto _bad;
+	}
+
+	dyn->pfid_kt->pfrkt_flags |= PFR_TFLAG_ACTIVE;
+	dyn->pfid_iflags = aw->iflags;
+	dyn->pfid_af = af;
+
+	TAILQ_INSERT_TAIL(&dyn->pfid_kif->pfik_dynaddrs, dyn, entry);
+	aw->p.dyn = dyn;
+	pfi_kif_update(dyn->pfid_kif);
+	splx(s);
+	return (0);
+
+_bad:
+	if (dyn->pfid_kt != NULL)
+		pfr_detach_table(dyn->pfid_kt);
+	if (ruleset != NULL)
+		pf_remove_if_empty_ruleset(ruleset);
+	if (dyn->pfid_kif != NULL)
+		pfi_kif_unref(dyn->pfid_kif, PFI_KIF_REF_RULE);
+	pool_put(&pfi_addr_pl, dyn);
+	splx(s);
+	return (rv);
+}
+
+void
+pfi_kif_update(struct pfi_kif *kif)
+{
+	struct ifg_list		*ifgl;
+	struct pfi_dynaddr	*p;
+
+	/* update all dynaddr */
+	TAILQ_FOREACH(p, &kif->pfik_dynaddrs, entry)
+		pfi_dynaddr_update(p);
+
+	/* again for all groups kif is member of */
+	if (kif->pfik_ifp != NULL)
+		TAILQ_FOREACH(ifgl, &kif->pfik_ifp->if_groups, ifgl_next)
+			pfi_kif_update((struct pfi_kif *)
+			    ifgl->ifgl_group->ifg_pf_kif);
+}
+
+void
+pfi_dynaddr_update(struct pfi_dynaddr *dyn)
+{
+	struct pfi_kif		*kif;
+	struct pfr_ktable	*kt;
+
+	if (dyn == NULL || dyn->pfid_kif == NULL || dyn->pfid_kt == NULL)
+		panic("pfi_dynaddr_update");
+
+	kif = dyn->pfid_kif;
+	kt = dyn->pfid_kt;
+
+	if (kt->pfrkt_larg != pfi_update) {
+		/* this table needs to be brought up-to-date */
+		pfi_table_update(kt, kif, dyn->pfid_net, dyn->pfid_iflags);
+		kt->pfrkt_larg = pfi_update;
+	}
+	pfr_dynaddr_update(kt, dyn);
+}
+
+void
+pfi_table_update(struct pfr_ktable *kt, struct pfi_kif *kif, int net, int flags)
+{
+	int			 e, size2 = 0;
+	struct ifg_member	*ifgm;
+
+	pfi_buffer_cnt = 0;
+
+	if (kif->pfik_ifp != NULL)
+		pfi_instance_add(kif->pfik_ifp, net, flags);
+	else if (kif->pfik_group != NULL)
+		TAILQ_FOREACH(ifgm, &kif->pfik_group->ifg_members, ifgm_next)
+			pfi_instance_add(ifgm->ifgm_ifp, net, flags);
+
+	if ((e = pfr_set_addrs(&kt->pfrkt_t, pfi_buffer, pfi_buffer_cnt, &size2,
+	    NULL, NULL, NULL, 0, PFR_TFLAG_ALLMASK)))
+		printf("pfi_table_update: cannot set %d new addresses "
+		    "into table %s: %d\n", pfi_buffer_cnt, kt->pfrkt_name, e);
+}
+
+void
+pfi_instance_add(struct ifnet *ifp, int net, int flags)
+{
+	struct ifaddr	*ia;
+	int		 got4 = 0, got6 = 0;
+	int		 net2, af;
+
+	if (ifp == NULL)
+		return;
+	TAILQ_FOREACH(ia, &ifp->if_addrlist, ifa_list) {
+		if (ia->ifa_addr == NULL)
+			continue;
+		af = ia->ifa_addr->sa_family;
+		if (af != AF_INET && af != AF_INET6)
+			continue;
+#ifdef __FreeBSD__
+		/*
+		 * XXX: For point-to-point interfaces, (ifname:0) and IPv4,
+		 *	jump over addresses without a proper route to work
+		 *	around a problem with ppp not fully removing the
+		 *	address used during IPCP.
+		 */
+		if ((ifp->if_flags & IFF_POINTOPOINT) &&
+		    !(ia->ifa_flags & IFA_ROUTE) &&
+		    (flags & PFI_AFLAG_NOALIAS) && (af == AF_INET))
+			continue;
+#endif
+		if ((flags & PFI_AFLAG_BROADCAST) && af == AF_INET6)
+			continue;
+		if ((flags & PFI_AFLAG_BROADCAST) &&
+		    !(ifp->if_flags & IFF_BROADCAST))
+			continue;
+		if ((flags & PFI_AFLAG_PEER) &&
+		    !(ifp->if_flags & IFF_POINTOPOINT))
+			continue;
+		if ((flags & PFI_AFLAG_NETWORK) && af == AF_INET6 &&
+		    IN6_IS_ADDR_LINKLOCAL(
+		    &((struct sockaddr_in6 *)ia->ifa_addr)->sin6_addr))
+			continue;
+		if (flags & PFI_AFLAG_NOALIAS) {
+			if (af == AF_INET && got4)
+				continue;
+			if (af == AF_INET6 && got6)
+				continue;
+		}
+		if (af == AF_INET)
+			got4 = 1;
+		else if (af == AF_INET6)
+			got6 = 1;
+		net2 = net;
+		if (net2 == 128 && (flags & PFI_AFLAG_NETWORK)) {
+			if (af == AF_INET)
+				net2 = pfi_unmask(&((struct sockaddr_in *)
+				    ia->ifa_netmask)->sin_addr);
+			else if (af == AF_INET6)
+				net2 = pfi_unmask(&((struct sockaddr_in6 *)
+				    ia->ifa_netmask)->sin6_addr);
+		}
+		if (af == AF_INET && net2 > 32)
+			net2 = 32;
+		if (flags & PFI_AFLAG_BROADCAST)
+			pfi_address_add(ia->ifa_broadaddr, af, net2);
+		else if (flags & PFI_AFLAG_PEER)
+			pfi_address_add(ia->ifa_dstaddr, af, net2);
+		else
+			pfi_address_add(ia->ifa_addr, af, net2);
+	}
+}
+
+void
+pfi_address_add(struct sockaddr *sa, int af, int net)
+{
+	struct pfr_addr	*p;
+	int		 i;
+
+	if (pfi_buffer_cnt >= pfi_buffer_max) {
+		int		 new_max = pfi_buffer_max * 2;
+
+		if (new_max > PFI_BUFFER_MAX) {
+			printf("pfi_address_add: address buffer full (%d/%d)\n",
+			    pfi_buffer_cnt, PFI_BUFFER_MAX);
+			return;
+		}
+		p = malloc(new_max * sizeof(*pfi_buffer), PFI_MTYPE,
+#ifdef __FreeBSD__
+		    M_NOWAIT);
+#else
+		    M_DONTWAIT);
+#endif
+		if (p == NULL) {
+			printf("pfi_address_add: no memory to grow buffer "
+			    "(%d/%d)\n", pfi_buffer_cnt, PFI_BUFFER_MAX);
+			return;
+		}
+		memcpy(p, pfi_buffer, pfi_buffer_max * sizeof(*pfi_buffer));
+		/* no need to zero buffer */
+		free(pfi_buffer, PFI_MTYPE);
+		pfi_buffer = p;
+		pfi_buffer_max = new_max;
+	}
+	if (af == AF_INET && net > 32)
+		net = 128;
+	p = pfi_buffer + pfi_buffer_cnt++;
+	bzero(p, sizeof(*p));
+	p->pfra_af = af;
+	p->pfra_net = net;
+	if (af == AF_INET)
+		p->pfra_ip4addr = ((struct sockaddr_in *)sa)->sin_addr;
+	else if (af == AF_INET6) {
+		p->pfra_ip6addr = ((struct sockaddr_in6 *)sa)->sin6_addr;
+		if (IN6_IS_SCOPE_EMBED(&p->pfra_ip6addr))
+			p->pfra_ip6addr.s6_addr16[1] = 0;
+	}
+	/* mask network address bits */
+	if (net < 128)
+		((caddr_t)p)[p->pfra_net/8] &= ~(0xFF >> (p->pfra_net%8));
+	for (i = (p->pfra_net+7)/8; i < sizeof(p->pfra_u); i++)
+		((caddr_t)p)[i] = 0;
+}
+
+void
+pfi_dynaddr_remove(struct pf_addr_wrap *aw)
+{
+	int	s;
+
+	if (aw->type != PF_ADDR_DYNIFTL || aw->p.dyn == NULL ||
+	    aw->p.dyn->pfid_kif == NULL || aw->p.dyn->pfid_kt == NULL)
+		return;
+
+	s = splsoftnet();
+	TAILQ_REMOVE(&aw->p.dyn->pfid_kif->pfik_dynaddrs, aw->p.dyn, entry);
+	pfi_kif_unref(aw->p.dyn->pfid_kif, PFI_KIF_REF_RULE);
+	aw->p.dyn->pfid_kif = NULL;
+	pfr_detach_table(aw->p.dyn->pfid_kt);
+	aw->p.dyn->pfid_kt = NULL;
+	pool_put(&pfi_addr_pl, aw->p.dyn);
+	aw->p.dyn = NULL;
+	splx(s);
+}
+
+void
+pfi_dynaddr_copyout(struct pf_addr_wrap *aw)
+{
+	if (aw->type != PF_ADDR_DYNIFTL || aw->p.dyn == NULL ||
+	    aw->p.dyn->pfid_kif == NULL)
+		return;
+	aw->p.dyncnt = aw->p.dyn->pfid_acnt4 + aw->p.dyn->pfid_acnt6;
+}
+
+void
+pfi_kifaddr_update(void *v)
+{
+	int			 s;
+	struct pfi_kif		*kif = (struct pfi_kif *)v;
+
+	s = splsoftnet();
+	pfi_update++;
+	pfi_kif_update(kif);
+	splx(s);
+}
+
+int
+pfi_if_compare(struct pfi_kif *p, struct pfi_kif *q)
+{
+	return (strncmp(p->pfik_name, q->pfik_name, IFNAMSIZ));
+}
+
+void
+pfi_fill_oldstatus(struct pf_status *pfs)
+{
+	struct pfi_kif		*p;
+	struct pfi_kif_cmp 	 key;
+	int			 i, j, k, s;
+
+	strlcpy(key.pfik_name, pfs->ifname, sizeof(key.pfik_name));
+	s = splsoftnet();
+	p = RB_FIND(pfi_ifhead, &pfi_ifs, (struct pfi_kif *)&key);
+	if (p == NULL) {
+		splx(s);
+		return;
+	}
+	bzero(pfs->pcounters, sizeof(pfs->pcounters));
+	bzero(pfs->bcounters, sizeof(pfs->bcounters));
+	for (i = 0; i < 2; i++)
+		for (j = 0; j < 2; j++)
+			for (k = 0; k < 2; k++) {
+				pfs->pcounters[i][j][k] =
+					p->pfik_packets[i][j][k];
+				pfs->bcounters[i][j] +=
+					p->pfik_bytes[i][j][k];
+			}
+	splx(s);
+}
+
+int
+pfi_clr_istats(const char *name)
+{
+	struct pfi_kif	*p;
+	int		 s;
+
+	s = splsoftnet();
+	RB_FOREACH(p, pfi_ifhead, &pfi_ifs) {
+		if (pfi_skip_if(name, p))
+			continue;
+		bzero(p->pfik_packets, sizeof(p->pfik_packets));
+		bzero(p->pfik_bytes, sizeof(p->pfik_bytes));
+		p->pfik_tzero = time_second;
+	}
+	splx(s);
+
+	return (0);
+}
+
+int
+pfi_get_ifaces(const char *name, struct pfi_kif *buf, int *size)
+{
+	struct pfi_kif	*p, *nextp;
+	int		 s, n = 0;
+#ifdef __FreeBSD__
+	int		 error;
+#endif
+
+	s = splsoftnet();
+	for (p = RB_MIN(pfi_ifhead, &pfi_ifs); p; p = nextp) {
+		nextp = RB_NEXT(pfi_ifhead, &pfi_ifs, p);
+		if (pfi_skip_if(name, p))
+			continue;
+		if (*size > n++) {
+			if (!p->pfik_tzero)
+				p->pfik_tzero = time_second;
+			pfi_kif_ref(p, PFI_KIF_REF_RULE);
+#ifdef __FreeBSD__
+			PF_COPYOUT(p, buf++, sizeof(*buf), error);
+			if (error) {
+#else
+			if (copyout(p, buf++, sizeof(*buf))) {
+#endif
+				pfi_kif_unref(p, PFI_KIF_REF_RULE);
+				splx(s);
+				return (EFAULT);
+			}
+			nextp = RB_NEXT(pfi_ifhead, &pfi_ifs, p);
+			pfi_kif_unref(p, PFI_KIF_REF_RULE);
+		}
+	}
+	splx(s);
+	*size = n;
+	return (0);
+}
+
+int
+pfi_skip_if(const char *filter, struct pfi_kif *p)
+{
+	int	n;
+
+	if (filter == NULL || !*filter)
+		return (0);
+	if (!strcmp(p->pfik_name, filter))
+		return (0);	/* exact match */
+	n = strlen(filter);
+	if (n < 1 || n >= IFNAMSIZ)
+		return (1);	/* sanity check */
+	if (filter[n-1] >= '0' && filter[n-1] <= '9')
+		return (1);	/* only do exact match in that case */
+	if (strncmp(p->pfik_name, filter, n))
+		return (1);	/* prefix doesn't match */
+	return (p->pfik_name[n] < '0' || p->pfik_name[n] > '9');
+}
+
+int
+pfi_set_flags(const char *name, int flags)
+{
+	struct pfi_kif	*p;
+	int		 s;
+
+	s = splsoftnet();
+	RB_FOREACH(p, pfi_ifhead, &pfi_ifs) {
+		if (pfi_skip_if(name, p))
+			continue;
+		p->pfik_flags |= flags;
+	}
+	splx(s);
+	return (0);
+}
+
+int
+pfi_clear_flags(const char *name, int flags)
+{
+	struct pfi_kif	*p;
+	int		 s;
+
+	s = splsoftnet();
+	RB_FOREACH(p, pfi_ifhead, &pfi_ifs) {
+		if (pfi_skip_if(name, p))
+			continue;
+		p->pfik_flags &= ~flags;
+	}
+	splx(s);
+	return (0);
+}
+
+/* from pf_print_state.c */
+int
+pfi_unmask(void *addr)
+{
+	struct pf_addr *m = addr;
+	int i = 31, j = 0, b = 0;
+	u_int32_t tmp;
+
+	while (j < 4 && m->addr32[j] == 0xffffffff) {
+		b += 32;
+		j++;
+	}
+	if (j < 4) {
+		tmp = ntohl(m->addr32[j]);
+		for (i = 31; tmp & (1 << i); --i)
+			b++;
+	}
+	return (b);
+}
+
+#ifdef __FreeBSD__
+void
+pfi_attach_ifnet_event(void *arg __unused, struct ifnet *ifp)
+{
+	PF_LOCK();
+	pfi_attach_ifnet(ifp);
+#ifdef ALTQ
+	pf_altq_ifnet_event(ifp, 0);
+#endif
+	PF_UNLOCK();
+}
+
+void
+pfi_detach_ifnet_event(void *arg __unused, struct ifnet *ifp)
+{
+	PF_LOCK();
+	pfi_detach_ifnet(ifp);
+#ifdef ALTQ
+	pf_altq_ifnet_event(ifp, 1);
+#endif
+	PF_UNLOCK();
+}
+
+void
+pfi_attach_group_event(void *arg __unused, struct ifg_group *ifg)
+{
+	PF_LOCK();
+	pfi_attach_ifgroup(ifg);
+	PF_UNLOCK();
+}
+
+void
+pfi_change_group_event(void *arg __unused, char *gname)
+{
+	PF_LOCK();
+	pfi_group_change(gname);
+	PF_UNLOCK();
+}
+
+void
+pfi_detach_group_event(void *arg __unused, struct ifg_group *ifg)
+{
+	PF_LOCK();
+	pfi_detach_ifgroup(ifg);
+	PF_UNLOCK();
+}
+
+void
+pfi_ifaddr_event(void *arg __unused, struct ifnet *ifp)
+{
+	PF_LOCK();
+	if (ifp && ifp->if_pf_kif)
+		pfi_kifaddr_update(ifp->if_pf_kif);
+	PF_UNLOCK();
+}
+#endif /* __FreeBSD__ */
diff --git a/freebsd/sys/contrib/pf/net/pf_ioctl.c b/freebsd/sys/contrib/pf/net/pf_ioctl.c
new file mode 100644
index 00000000..de612b4f
--- /dev/null
+++ b/freebsd/sys/contrib/pf/net/pf_ioctl.c
@@ -0,0 +1,3896 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*	$OpenBSD: pf_ioctl.c,v 1.175 2007/02/26 22:47:43 deraadt Exp $ */
+
+/*
+ * Copyright (c) 2001 Daniel Hartmeier
+ * Copyright (c) 2002,2003 Henning Brauer
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    - Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    - Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Effort sponsored in part by the Defense Advanced Research Projects
+ * Agency (DARPA) and Air Force Research Laboratory, Air Force
+ * Materiel Command, USAF, under agreement number F30602-01-2-0537.
+ *
+ */
+
+#ifdef __FreeBSD__
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_bpf.h>
+#include <freebsd/local/opt_pf.h>
+
+#ifdef DEV_BPF
+#define	NBPFILTER	DEV_BPF
+#else
+#define	NBPFILTER	0
+#endif
+
+#ifdef DEV_PFLOG
+#define	NPFLOG		DEV_PFLOG
+#else
+#define	NPFLOG		0
+#endif
+
+#ifdef DEV_PFSYNC
+#define	NPFSYNC		DEV_PFSYNC
+#else
+#define	NPFSYNC		0
+#endif
+
+#else
+#include <freebsd/local/bpfilter.h>
+#include <freebsd/local/pflog.h>
+#include <freebsd/local/pfsync.h>
+#endif
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/filio.h>
+#include <freebsd/sys/fcntl.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/time.h>
+#include <freebsd/sys/malloc.h>
+#ifdef __FreeBSD__
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/conf.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/sysctl.h>
+#else
+#include <freebsd/sys/timeout.h>
+#include <freebsd/sys/pool.h>
+#endif
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/kthread.h>
+#ifndef __FreeBSD__
+#include <freebsd/sys/rwlock.h>
+#include <freebsd/uvm/uvm_extern.h>
+#endif
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_types.h>
+#ifdef __FreeBSD__
+#include <freebsd/net/vnet.h>
+#endif
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/ip_icmp.h>
+
+#ifdef __FreeBSD__
+#include <freebsd/sys/md5.h>
+#else
+#include <freebsd/dev/rndvar.h>
+#include <freebsd/crypto/md5.h>
+#endif
+#include <freebsd/net/pfvar.h>
+
+#if NPFSYNC > 0
+#include <freebsd/net/if_pfsync.h>
+#endif /* NPFSYNC > 0 */
+
+#include <freebsd/net/if_pflog.h>
+
+#ifdef INET6
+#include <freebsd/netinet/ip6.h>
+#include <freebsd/netinet/in_pcb.h>
+#endif /* INET6 */
+
+#ifdef ALTQ
+#include <freebsd/altq/altq.h>
+#endif
+
+#ifdef __FreeBSD__
+#include <freebsd/sys/limits.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/mutex.h>
+#include <freebsd/net/pfil.h>
+#endif /* __FreeBSD__ */
+
+#ifdef __FreeBSD__
+void			 init_zone_var(void);
+void			 cleanup_pf_zone(void);
+int			 pfattach(void);
+#else
+void			 pfattach(int);
+void			 pf_thread_create(void *);
+int			 pfopen(dev_t, int, int, struct proc *);
+int			 pfclose(dev_t, int, int, struct proc *);
+#endif
+struct pf_pool		*pf_get_pool(char *, u_int32_t, u_int8_t, u_int32_t,
+			    u_int8_t, u_int8_t, u_int8_t);
+
+void			 pf_mv_pool(struct pf_palist *, struct pf_palist *);
+void			 pf_empty_pool(struct pf_palist *);
+#ifdef __FreeBSD__
+int			 pfioctl(struct cdev *, u_long, caddr_t, int, struct thread *);
+#else
+int			 pfioctl(struct cdev *, u_long, caddr_t, int, struct proc *);
+#endif
+#ifdef ALTQ
+int			 pf_begin_altq(u_int32_t *);
+int			 pf_rollback_altq(u_int32_t);
+int			 pf_commit_altq(u_int32_t);
+int			 pf_enable_altq(struct pf_altq *);
+int			 pf_disable_altq(struct pf_altq *);
+#endif /* ALTQ */
+int			 pf_begin_rules(u_int32_t *, int, const char *);
+int			 pf_rollback_rules(u_int32_t, int, char *);
+int			 pf_setup_pfsync_matching(struct pf_ruleset *);
+void			 pf_hash_rule(MD5_CTX *, struct pf_rule *);
+void			 pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *);
+int			 pf_commit_rules(u_int32_t, int, char *);
+
+struct pf_rule		 pf_default_rule;
+#ifdef __FreeBSD__
+struct sx		 pf_consistency_lock;
+SX_SYSINIT(pf_consistency_lock, &pf_consistency_lock, "pf_statetbl_lock");
+#else
+struct rwlock		 pf_consistency_lock = RWLOCK_INITIALIZER;
+#endif
+#ifdef ALTQ
+static int		 pf_altq_running;
+#endif
+
+#define	TAGID_MAX	 50000
+TAILQ_HEAD(pf_tags, pf_tagname)	pf_tags = TAILQ_HEAD_INITIALIZER(pf_tags),
+				pf_qids = TAILQ_HEAD_INITIALIZER(pf_qids);
+
+#if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE)
+#error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE
+#endif
+u_int16_t		 tagname2tag(struct pf_tags *, char *);
+void			 tag2tagname(struct pf_tags *, u_int16_t, char *);
+void			 tag_unref(struct pf_tags *, u_int16_t);
+int			 pf_rtlabel_add(struct pf_addr_wrap *);
+void			 pf_rtlabel_remove(struct pf_addr_wrap *);
+void			 pf_rtlabel_copyout(struct pf_addr_wrap *);
+
+#define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x
+
+
+#ifdef __FreeBSD__
+static struct cdev	*pf_dev;
+
+/*
+ * XXX - These are new and need to be checked when moveing to a new version
+ */
+static void		 pf_clear_states(void);
+static int		 pf_clear_tables(void);
+static void		 pf_clear_srcnodes(void);
+/*
+ * XXX - These are new and need to be checked when moveing to a new version
+ */
+ 
+/*
+ * Wrapper functions for pfil(9) hooks
+ */
+static int pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp,
+		int dir, struct inpcb *inp);
+static int pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp,
+		int dir, struct inpcb *inp);
+#ifdef INET6
+static int pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp,
+		int dir, struct inpcb *inp);
+static int pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp,
+		int dir, struct inpcb *inp);
+#endif
+
+static int 		 hook_pf(void);
+static int 		 dehook_pf(void);
+static int 		 shutdown_pf(void);
+static int 		 pf_load(void);
+static int 		 pf_unload(void);
+
+static struct cdevsw pf_cdevsw = {
+	.d_ioctl =	pfioctl,
+	.d_name =	PF_NAME,
+	.d_version =	D_VERSION,
+};
+
+static volatile int pf_pfil_hooked = 0;
+int pf_end_threads = 0;
+struct mtx pf_task_mtx;
+pflog_packet_t *pflog_packet_ptr = NULL;
+
+int debug_pfugidhack = 0;
+SYSCTL_INT(_debug, OID_AUTO, pfugidhack, CTLFLAG_RW, &debug_pfugidhack, 0,
+    "Enable/disable pf user/group rules mpsafe hack");
+
+void
+init_pf_mutex(void)
+{
+	mtx_init(&pf_task_mtx, "pf task mtx", NULL, MTX_DEF);
+}
+
+void
+destroy_pf_mutex(void)
+{
+	mtx_destroy(&pf_task_mtx);
+}
+
+void
+init_zone_var(void)
+{
+	pf_src_tree_pl = pf_rule_pl = NULL;
+	pf_state_pl = pf_altq_pl = pf_pooladdr_pl = NULL;
+	pf_frent_pl = pf_frag_pl = pf_cache_pl = pf_cent_pl = NULL;
+	pf_state_scrub_pl = NULL;
+	pfr_ktable_pl = pfr_kentry_pl = NULL;
+}
+
+void
+cleanup_pf_zone(void)
+{
+	UMA_DESTROY(pf_src_tree_pl);
+	UMA_DESTROY(pf_rule_pl);
+	UMA_DESTROY(pf_state_pl);
+	UMA_DESTROY(pf_altq_pl);
+	UMA_DESTROY(pf_pooladdr_pl);
+	UMA_DESTROY(pf_frent_pl);
+	UMA_DESTROY(pf_frag_pl);
+	UMA_DESTROY(pf_cache_pl);
+	UMA_DESTROY(pf_cent_pl);
+	UMA_DESTROY(pfr_ktable_pl);
+	UMA_DESTROY(pfr_kentry_pl2);
+	UMA_DESTROY(pfr_kentry_pl);
+	UMA_DESTROY(pf_state_scrub_pl);
+	UMA_DESTROY(pfi_addr_pl);
+}
+
+int
+pfattach(void)
+{
+	u_int32_t *my_timeout = pf_default_rule.timeout;
+	int error = 1;
+
+	do {
+		UMA_CREATE(pf_src_tree_pl,struct pf_src_node, "pfsrctrpl");
+		UMA_CREATE(pf_rule_pl,	  struct pf_rule, "pfrulepl");
+		UMA_CREATE(pf_state_pl,	  struct pf_state, "pfstatepl");
+		UMA_CREATE(pf_altq_pl,	  struct pf_altq, "pfaltqpl");
+		UMA_CREATE(pf_pooladdr_pl, struct pf_pooladdr, "pfpooladdrpl");
+		UMA_CREATE(pfr_ktable_pl,  struct pfr_ktable, "pfrktable");
+		UMA_CREATE(pfr_kentry_pl,  struct pfr_kentry, "pfrkentry");
+		UMA_CREATE(pfr_kentry_pl2,  struct pfr_kentry, "pfrkentry2");
+		UMA_CREATE(pf_frent_pl,	  struct pf_frent, "pffrent");
+		UMA_CREATE(pf_frag_pl,	  struct pf_fragment, "pffrag");
+		UMA_CREATE(pf_cache_pl,	  struct pf_fragment, "pffrcache");
+		UMA_CREATE(pf_cent_pl,	  struct pf_frcache, "pffrcent");
+		UMA_CREATE(pf_state_scrub_pl, struct pf_state_scrub, 
+		    "pfstatescrub");
+		UMA_CREATE(pfi_addr_pl, struct pfi_dynaddr, "pfiaddrpl");
+		error = 0;
+	} while(0);
+	if (error) {
+		cleanup_pf_zone();
+		return (error);
+	}
+	pfr_initialize();
+	pfi_initialize();
+	if ( (error = pf_osfp_initialize()) ) {
+		cleanup_pf_zone();
+		pf_osfp_cleanup();
+		return (error);
+	}
+
+	pf_pool_limits[PF_LIMIT_STATES].pp = pf_state_pl;
+	pf_pool_limits[PF_LIMIT_STATES].limit = PFSTATE_HIWAT;
+	pf_pool_limits[PF_LIMIT_SRC_NODES].pp = pf_src_tree_pl;
+	pf_pool_limits[PF_LIMIT_SRC_NODES].limit = PFSNODE_HIWAT;
+	pf_pool_limits[PF_LIMIT_FRAGS].pp = pf_frent_pl;
+	pf_pool_limits[PF_LIMIT_FRAGS].limit = PFFRAG_FRENT_HIWAT;
+	pf_pool_limits[PF_LIMIT_TABLES].pp = pfr_ktable_pl;
+	pf_pool_limits[PF_LIMIT_TABLES].limit = PFR_KTABLE_HIWAT;
+	pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].pp = pfr_kentry_pl;
+	pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].limit = PFR_KENTRY_HIWAT;
+	uma_zone_set_max(pf_pool_limits[PF_LIMIT_STATES].pp,
+		pf_pool_limits[PF_LIMIT_STATES].limit);
+
+	RB_INIT(&tree_src_tracking);
+	RB_INIT(&pf_anchors);
+	pf_init_ruleset(&pf_main_ruleset);
+	TAILQ_INIT(&pf_altqs[0]);
+	TAILQ_INIT(&pf_altqs[1]);
+	TAILQ_INIT(&pf_pabuf);
+	pf_altqs_active = &pf_altqs[0];
+	pf_altqs_inactive = &pf_altqs[1];
+	TAILQ_INIT(&state_list);
+
+	/* default rule should never be garbage collected */
+	pf_default_rule.entries.tqe_prev = &pf_default_rule.entries.tqe_next;
+	pf_default_rule.action = PF_PASS;
+	pf_default_rule.nr = -1;
+	pf_default_rule.rtableid = -1;
+
+	/* initialize default timeouts */
+	my_timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL;
+	my_timeout[PFTM_TCP_OPENING] = PFTM_TCP_OPENING_VAL;
+	my_timeout[PFTM_TCP_ESTABLISHED] = PFTM_TCP_ESTABLISHED_VAL;
+	my_timeout[PFTM_TCP_CLOSING] = PFTM_TCP_CLOSING_VAL;
+	my_timeout[PFTM_TCP_FIN_WAIT] = PFTM_TCP_FIN_WAIT_VAL;
+	my_timeout[PFTM_TCP_CLOSED] = PFTM_TCP_CLOSED_VAL;
+	my_timeout[PFTM_UDP_FIRST_PACKET] = PFTM_UDP_FIRST_PACKET_VAL;
+	my_timeout[PFTM_UDP_SINGLE] = PFTM_UDP_SINGLE_VAL;
+	my_timeout[PFTM_UDP_MULTIPLE] = PFTM_UDP_MULTIPLE_VAL;
+	my_timeout[PFTM_ICMP_FIRST_PACKET] = PFTM_ICMP_FIRST_PACKET_VAL;
+	my_timeout[PFTM_ICMP_ERROR_REPLY] = PFTM_ICMP_ERROR_REPLY_VAL;
+	my_timeout[PFTM_OTHER_FIRST_PACKET] = PFTM_OTHER_FIRST_PACKET_VAL;
+	my_timeout[PFTM_OTHER_SINGLE] = PFTM_OTHER_SINGLE_VAL;
+	my_timeout[PFTM_OTHER_MULTIPLE] = PFTM_OTHER_MULTIPLE_VAL;
+	my_timeout[PFTM_FRAG] = PFTM_FRAG_VAL;
+	my_timeout[PFTM_INTERVAL] = PFTM_INTERVAL_VAL;
+	my_timeout[PFTM_SRC_NODE] = PFTM_SRC_NODE_VAL;
+	my_timeout[PFTM_TS_DIFF] = PFTM_TS_DIFF_VAL;
+	my_timeout[PFTM_ADAPTIVE_START] = PFSTATE_ADAPT_START;
+	my_timeout[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END;
+
+	pf_normalize_init();
+	bzero(&pf_status, sizeof(pf_status));
+	pf_status.debug = PF_DEBUG_URGENT;
+
+	pf_pfil_hooked = 0;
+
+	/* XXX do our best to avoid a conflict */
+	pf_status.hostid = arc4random();
+
+	if (kproc_create(pf_purge_thread, NULL, NULL, 0, 0, "pfpurge"))
+		return (ENXIO);
+
+	return (error);
+}
+#else /* !__FreeBSD__ */
+void
+pfattach(int num)
+{
+	u_int32_t *timeout = pf_default_rule.timeout;
+
+	pool_init(&pf_rule_pl, sizeof(struct pf_rule), 0, 0, 0, "pfrulepl",
+	    &pool_allocator_nointr);
+	pool_init(&pf_src_tree_pl, sizeof(struct pf_src_node), 0, 0, 0,
+	    "pfsrctrpl", NULL);
+	pool_init(&pf_state_pl, sizeof(struct pf_state), 0, 0, 0, "pfstatepl",
+	    NULL);
+	pool_init(&pf_altq_pl, sizeof(struct pf_altq), 0, 0, 0, "pfaltqpl",
+	    &pool_allocator_nointr);
+	pool_init(&pf_pooladdr_pl, sizeof(struct pf_pooladdr), 0, 0, 0,
+	    "pfpooladdrpl", &pool_allocator_nointr);
+	pfr_initialize();
+	pfi_initialize();
+	pf_osfp_initialize();
+
+	pool_sethardlimit(pf_pool_limits[PF_LIMIT_STATES].pp,
+	    pf_pool_limits[PF_LIMIT_STATES].limit, NULL, 0);
+
+	if (ctob(physmem) <= 100*1024*1024)
+		pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].limit =
+		    PFR_KENTRY_HIWAT_SMALL;
+
+	RB_INIT(&tree_src_tracking);
+	RB_INIT(&pf_anchors);
+	pf_init_ruleset(&pf_main_ruleset);
+	TAILQ_INIT(&pf_altqs[0]);
+	TAILQ_INIT(&pf_altqs[1]);
+	TAILQ_INIT(&pf_pabuf);
+	pf_altqs_active = &pf_altqs[0];
+	pf_altqs_inactive = &pf_altqs[1];
+	TAILQ_INIT(&state_list);
+
+	/* default rule should never be garbage collected */
+	pf_default_rule.entries.tqe_prev = &pf_default_rule.entries.tqe_next;
+	pf_default_rule.action = PF_PASS;
+	pf_default_rule.nr = -1;
+	pf_default_rule.rtableid = -1;
+
+	/* initialize default timeouts */
+	timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL;
+	timeout[PFTM_TCP_OPENING] = PFTM_TCP_OPENING_VAL;
+	timeout[PFTM_TCP_ESTABLISHED] = PFTM_TCP_ESTABLISHED_VAL;
+	timeout[PFTM_TCP_CLOSING] = PFTM_TCP_CLOSING_VAL;
+	timeout[PFTM_TCP_FIN_WAIT] = PFTM_TCP_FIN_WAIT_VAL;
+	timeout[PFTM_TCP_CLOSED] = PFTM_TCP_CLOSED_VAL;
+	timeout[PFTM_UDP_FIRST_PACKET] = PFTM_UDP_FIRST_PACKET_VAL;
+	timeout[PFTM_UDP_SINGLE] = PFTM_UDP_SINGLE_VAL;
+	timeout[PFTM_UDP_MULTIPLE] = PFTM_UDP_MULTIPLE_VAL;
+	timeout[PFTM_ICMP_FIRST_PACKET] = PFTM_ICMP_FIRST_PACKET_VAL;
+	timeout[PFTM_ICMP_ERROR_REPLY] = PFTM_ICMP_ERROR_REPLY_VAL;
+	timeout[PFTM_OTHER_FIRST_PACKET] = PFTM_OTHER_FIRST_PACKET_VAL;
+	timeout[PFTM_OTHER_SINGLE] = PFTM_OTHER_SINGLE_VAL;
+	timeout[PFTM_OTHER_MULTIPLE] = PFTM_OTHER_MULTIPLE_VAL;
+	timeout[PFTM_FRAG] = PFTM_FRAG_VAL;
+	timeout[PFTM_INTERVAL] = PFTM_INTERVAL_VAL;
+	timeout[PFTM_SRC_NODE] = PFTM_SRC_NODE_VAL;
+	timeout[PFTM_TS_DIFF] = PFTM_TS_DIFF_VAL;
+	timeout[PFTM_ADAPTIVE_START] = PFSTATE_ADAPT_START;
+	timeout[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END;
+
+	pf_normalize_init();
+	bzero(&pf_status, sizeof(pf_status));
+	pf_status.debug = PF_DEBUG_URGENT;
+
+	/* XXX do our best to avoid a conflict */
+	pf_status.hostid = arc4random();
+
+	/* require process context to purge states, so perform in a thread */
+	kproc_create_deferred(pf_thread_create, NULL);
+}
+
+void
+pf_thread_create(void *v)
+{
+	if (kproc_create(pf_purge_thread, NULL, NULL, "pfpurge"))
+		panic("pfpurge thread");
+}
+
+int
+pfopen(struct cdev *dev, int flags, int fmt, struct proc *p)
+{
+	if (dev2unit(dev) >= 1)
+		return (ENXIO);
+	return (0);
+}
+
+int
+pfclose(struct cdev *dev, int flags, int fmt, struct proc *p)
+{
+	if (dev2unit(dev) >= 1)
+		return (ENXIO);
+	return (0);
+}
+#endif /* __FreeBSD__ */
+
+struct pf_pool *
+pf_get_pool(char *anchor, u_int32_t ticket, u_int8_t rule_action,
+    u_int32_t rule_number, u_int8_t r_last, u_int8_t active,
+    u_int8_t check_ticket)
+{
+	struct pf_ruleset	*ruleset;
+	struct pf_rule		*rule;
+	int			 rs_num;
+
+	ruleset = pf_find_ruleset(anchor);
+	if (ruleset == NULL)
+		return (NULL);
+	rs_num = pf_get_ruleset_number(rule_action);
+	if (rs_num >= PF_RULESET_MAX)
+		return (NULL);
+	if (active) {
+		if (check_ticket && ticket !=
+		    ruleset->rules[rs_num].active.ticket)
+			return (NULL);
+		if (r_last)
+			rule = TAILQ_LAST(ruleset->rules[rs_num].active.ptr,
+			    pf_rulequeue);
+		else
+			rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr);
+	} else {
+		if (check_ticket && ticket !=
+		    ruleset->rules[rs_num].inactive.ticket)
+			return (NULL);
+		if (r_last)
+			rule = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr,
+			    pf_rulequeue);
+		else
+			rule = TAILQ_FIRST(ruleset->rules[rs_num].inactive.ptr);
+	}
+	if (!r_last) {
+		while ((rule != NULL) && (rule->nr != rule_number))
+			rule = TAILQ_NEXT(rule, entries);
+	}
+	if (rule == NULL)
+		return (NULL);
+
+	return (&rule->rpool);
+}
+
+void
+pf_mv_pool(struct pf_palist *poola, struct pf_palist *poolb)
+{
+	struct pf_pooladdr	*mv_pool_pa;
+
+	while ((mv_pool_pa = TAILQ_FIRST(poola)) != NULL) {
+		TAILQ_REMOVE(poola, mv_pool_pa, entries);
+		TAILQ_INSERT_TAIL(poolb, mv_pool_pa, entries);
+	}
+}
+
+void
+pf_empty_pool(struct pf_palist *poola)
+{
+	struct pf_pooladdr	*empty_pool_pa;
+
+	while ((empty_pool_pa = TAILQ_FIRST(poola)) != NULL) {
+		pfi_dynaddr_remove(&empty_pool_pa->addr);
+		pf_tbladdr_remove(&empty_pool_pa->addr);
+		pfi_kif_unref(empty_pool_pa->kif, PFI_KIF_REF_RULE);
+		TAILQ_REMOVE(poola, empty_pool_pa, entries);
+		pool_put(&pf_pooladdr_pl, empty_pool_pa);
+	}
+}
+
+void
+pf_rm_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule)
+{
+	if (rulequeue != NULL) {
+		if (rule->states <= 0) {
+			/*
+			 * XXX - we need to remove the table *before* detaching
+			 * the rule to make sure the table code does not delete
+			 * the anchor under our feet.
+			 */
+			pf_tbladdr_remove(&rule->src.addr);
+			pf_tbladdr_remove(&rule->dst.addr);
+			if (rule->overload_tbl)
+				pfr_detach_table(rule->overload_tbl);
+		}
+		TAILQ_REMOVE(rulequeue, rule, entries);
+		rule->entries.tqe_prev = NULL;
+		rule->nr = -1;
+	}
+
+	if (rule->states > 0 || rule->src_nodes > 0 ||
+	    rule->entries.tqe_prev != NULL)
+		return;
+	pf_tag_unref(rule->tag);
+	pf_tag_unref(rule->match_tag);
+#ifdef ALTQ
+	if (rule->pqid != rule->qid)
+		pf_qid_unref(rule->pqid);
+	pf_qid_unref(rule->qid);
+#endif
+	pf_rtlabel_remove(&rule->src.addr);
+	pf_rtlabel_remove(&rule->dst.addr);
+	pfi_dynaddr_remove(&rule->src.addr);
+	pfi_dynaddr_remove(&rule->dst.addr);
+	if (rulequeue == NULL) {
+		pf_tbladdr_remove(&rule->src.addr);
+		pf_tbladdr_remove(&rule->dst.addr);
+		if (rule->overload_tbl)
+			pfr_detach_table(rule->overload_tbl);
+	}
+	pfi_kif_unref(rule->kif, PFI_KIF_REF_RULE);
+	pf_anchor_remove(rule);
+	pf_empty_pool(&rule->rpool.list);
+	pool_put(&pf_rule_pl, rule);
+}
+
+u_int16_t
+tagname2tag(struct pf_tags *head, char *tagname)
+{
+	struct pf_tagname	*tag, *p = NULL;
+	u_int16_t		 new_tagid = 1;
+
+	TAILQ_FOREACH(tag, head, entries)
+		if (strcmp(tagname, tag->name) == 0) {
+			tag->ref++;
+			return (tag->tag);
+		}
+
+	/*
+	 * to avoid fragmentation, we do a linear search from the beginning
+	 * and take the first free slot we find. if there is none or the list
+	 * is empty, append a new entry at the end.
+	 */
+
+	/* new entry */
+	if (!TAILQ_EMPTY(head))
+		for (p = TAILQ_FIRST(head); p != NULL &&
+		    p->tag == new_tagid; p = TAILQ_NEXT(p, entries))
+			new_tagid = p->tag + 1;
+
+	if (new_tagid > TAGID_MAX)
+		return (0);
+
+	/* allocate and fill new struct pf_tagname */
+	tag = (struct pf_tagname *)malloc(sizeof(struct pf_tagname),
+	    M_TEMP, M_NOWAIT);
+	if (tag == NULL)
+		return (0);
+	bzero(tag, sizeof(struct pf_tagname));
+	strlcpy(tag->name, tagname, sizeof(tag->name));
+	tag->tag = new_tagid;
+	tag->ref++;
+
+	if (p != NULL)	/* insert new entry before p */
+		TAILQ_INSERT_BEFORE(p, tag, entries);
+	else	/* either list empty or no free slot in between */
+		TAILQ_INSERT_TAIL(head, tag, entries);
+
+	return (tag->tag);
+}
+
+void
+tag2tagname(struct pf_tags *head, u_int16_t tagid, char *p)
+{
+	struct pf_tagname	*tag;
+
+	TAILQ_FOREACH(tag, head, entries)
+		if (tag->tag == tagid) {
+			strlcpy(p, tag->name, PF_TAG_NAME_SIZE);
+			return;
+		}
+}
+
+void
+tag_unref(struct pf_tags *head, u_int16_t tag)
+{
+	struct pf_tagname	*p, *next;
+
+	if (tag == 0)
+		return;
+
+	for (p = TAILQ_FIRST(head); p != NULL; p = next) {
+		next = TAILQ_NEXT(p, entries);
+		if (tag == p->tag) {
+			if (--p->ref == 0) {
+				TAILQ_REMOVE(head, p, entries);
+				free(p, M_TEMP);
+			}
+			break;
+		}
+	}
+}
+
+u_int16_t
+pf_tagname2tag(char *tagname)
+{
+	return (tagname2tag(&pf_tags, tagname));
+}
+
+void
+pf_tag2tagname(u_int16_t tagid, char *p)
+{
+	tag2tagname(&pf_tags, tagid, p);
+}
+
+void
+pf_tag_ref(u_int16_t tag)
+{
+	struct pf_tagname *t;
+
+	TAILQ_FOREACH(t, &pf_tags, entries)
+		if (t->tag == tag)
+			break;
+	if (t != NULL)
+		t->ref++;
+}
+
+void
+pf_tag_unref(u_int16_t tag)
+{
+	tag_unref(&pf_tags, tag);
+}
+
+int
+pf_rtlabel_add(struct pf_addr_wrap *a)
+{
+#ifdef __FreeBSD__
+	/* XXX_IMPORT: later */
+	return (0);
+#else
+	if (a->type == PF_ADDR_RTLABEL &&
+	    (a->v.rtlabel = rtlabel_name2id(a->v.rtlabelname)) == 0)
+		return (-1);
+	return (0);
+#endif
+}
+
+void
+pf_rtlabel_remove(struct pf_addr_wrap *a)
+{
+#ifdef __FreeBSD__
+	/* XXX_IMPORT: later */
+#else
+	if (a->type == PF_ADDR_RTLABEL)
+		rtlabel_unref(a->v.rtlabel);
+#endif
+}
+
+void
+pf_rtlabel_copyout(struct pf_addr_wrap *a)
+{
+#ifdef __FreeBSD__
+	/* XXX_IMPORT: later */
+	if (a->type == PF_ADDR_RTLABEL && a->v.rtlabel)
+		strlcpy(a->v.rtlabelname, "?", sizeof(a->v.rtlabelname));
+#else
+	const char	*name;
+
+	if (a->type == PF_ADDR_RTLABEL && a->v.rtlabel) {
+		if ((name = rtlabel_id2name(a->v.rtlabel)) == NULL)
+			strlcpy(a->v.rtlabelname, "?",
+			    sizeof(a->v.rtlabelname));
+		else
+			strlcpy(a->v.rtlabelname, name,
+			    sizeof(a->v.rtlabelname));
+	}
+#endif
+}
+
+#ifdef ALTQ
+u_int32_t
+pf_qname2qid(char *qname)
+{
+	return ((u_int32_t)tagname2tag(&pf_qids, qname));
+}
+
+void
+pf_qid2qname(u_int32_t qid, char *p)
+{
+	tag2tagname(&pf_qids, (u_int16_t)qid, p);
+}
+
+void
+pf_qid_unref(u_int32_t qid)
+{
+	tag_unref(&pf_qids, (u_int16_t)qid);
+}
+
+int
+pf_begin_altq(u_int32_t *ticket)
+{
+	struct pf_altq	*altq;
+	int		 error = 0;
+
+	/* Purge the old altq list */
+	while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) {
+		TAILQ_REMOVE(pf_altqs_inactive, altq, entries);
+#ifdef __FreeBSD__
+		if (altq->qname[0] == 0 &&
+		    (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
+#else
+		if (altq->qname[0] == 0) {
+#endif
+			/* detach and destroy the discipline */
+			error = altq_remove(altq);
+		} else
+			pf_qid_unref(altq->qid);
+		pool_put(&pf_altq_pl, altq);
+	}
+	if (error)
+		return (error);
+	*ticket = ++ticket_altqs_inactive;
+	altqs_inactive_open = 1;
+	return (0);
+}
+
+int
+pf_rollback_altq(u_int32_t ticket)
+{
+	struct pf_altq	*altq;
+	int		 error = 0;
+
+	if (!altqs_inactive_open || ticket != ticket_altqs_inactive)
+		return (0);
+	/* Purge the old altq list */
+	while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) {
+		TAILQ_REMOVE(pf_altqs_inactive, altq, entries);
+#ifdef __FreeBSD__
+		if (altq->qname[0] == 0 &&
+		    (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
+#else
+		if (altq->qname[0] == 0) {
+#endif
+			/* detach and destroy the discipline */
+			error = altq_remove(altq);
+		} else
+			pf_qid_unref(altq->qid);
+		pool_put(&pf_altq_pl, altq);
+	}
+	altqs_inactive_open = 0;
+	return (error);
+}
+
+int
+pf_commit_altq(u_int32_t ticket)
+{
+	struct pf_altqqueue	*old_altqs;
+	struct pf_altq		*altq;
+	int			 s, err, error = 0;
+
+	if (!altqs_inactive_open || ticket != ticket_altqs_inactive)
+		return (EBUSY);
+
+	/* swap altqs, keep the old. */
+	s = splsoftnet();
+	old_altqs = pf_altqs_active;
+	pf_altqs_active = pf_altqs_inactive;
+	pf_altqs_inactive = old_altqs;
+	ticket_altqs_active = ticket_altqs_inactive;
+
+	/* Attach new disciplines */
+	TAILQ_FOREACH(altq, pf_altqs_active, entries) {
+#ifdef __FreeBSD__
+		if (altq->qname[0] == 0 &&
+		    (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
+#else
+		if (altq->qname[0] == 0) {
+#endif
+			/* attach the discipline */
+			error = altq_pfattach(altq);
+			if (error == 0 && pf_altq_running)
+				error = pf_enable_altq(altq);
+			if (error != 0) {
+				splx(s);
+				return (error);
+			}
+		}
+	}
+
+	/* Purge the old altq list */
+	while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) {
+		TAILQ_REMOVE(pf_altqs_inactive, altq, entries);
+#ifdef __FreeBSD__
+		if (altq->qname[0] == 0 &&
+		    (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
+#else
+		if (altq->qname[0] == 0) {
+#endif
+			/* detach and destroy the discipline */
+			if (pf_altq_running)
+				error = pf_disable_altq(altq);
+			err = altq_pfdetach(altq);
+			if (err != 0 && error == 0)
+				error = err;
+			err = altq_remove(altq);
+			if (err != 0 && error == 0)
+				error = err;
+		} else
+			pf_qid_unref(altq->qid);
+		pool_put(&pf_altq_pl, altq);
+	}
+	splx(s);
+
+	altqs_inactive_open = 0;
+	return (error);
+}
+
+int
+pf_enable_altq(struct pf_altq *altq)
+{
+	struct ifnet		*ifp;
+	struct tb_profile	 tb;
+	int			 s, error = 0;
+
+	if ((ifp = ifunit(altq->ifname)) == NULL)
+		return (EINVAL);
+
+	if (ifp->if_snd.altq_type != ALTQT_NONE)
+		error = altq_enable(&ifp->if_snd);
+
+	/* set tokenbucket regulator */
+	if (error == 0 && ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
+		tb.rate = altq->ifbandwidth;
+		tb.depth = altq->tbrsize;
+		s = splnet();
+#ifdef __FreeBSD__
+		PF_UNLOCK();
+#endif
+		error = tbr_set(&ifp->if_snd, &tb);
+#ifdef __FreeBSD__
+		PF_LOCK();
+#endif
+		splx(s);
+	}
+
+	return (error);
+}
+
+int
+pf_disable_altq(struct pf_altq *altq)
+{
+	struct ifnet		*ifp;
+	struct tb_profile	 tb;
+	int			 s, error;
+
+	if ((ifp = ifunit(altq->ifname)) == NULL)
+		return (EINVAL);
+
+	/*
+	 * when the discipline is no longer referenced, it was overridden
+	 * by a new one.  if so, just return.
+	 */
+	if (altq->altq_disc != ifp->if_snd.altq_disc)
+		return (0);
+
+	error = altq_disable(&ifp->if_snd);
+
+	if (error == 0) {
+		/* clear tokenbucket regulator */
+		tb.rate = 0;
+		s = splnet();
+#ifdef __FreeBSD__
+		PF_UNLOCK();
+#endif
+		error = tbr_set(&ifp->if_snd, &tb);
+#ifdef __FreeBSD__
+		PF_LOCK();
+#endif
+		splx(s);
+	}
+
+	return (error);
+}
+
+#ifdef __FreeBSD__
+void
+pf_altq_ifnet_event(struct ifnet *ifp, int remove)
+{
+	struct ifnet		*ifp1;
+	struct pf_altq		*a1, *a2, *a3;
+	u_int32_t		 ticket;
+	int			 error = 0;
+
+	/* Interrupt userland queue modifications */
+	if (altqs_inactive_open)
+		pf_rollback_altq(ticket_altqs_inactive);
+
+	/* Start new altq ruleset */
+	if (pf_begin_altq(&ticket))
+		return;
+
+	/* Copy the current active set */
+	TAILQ_FOREACH(a1, pf_altqs_active, entries) {
+		a2 = pool_get(&pf_altq_pl, PR_NOWAIT);
+		if (a2 == NULL) {
+			error = ENOMEM;
+			break;
+		}
+		bcopy(a1, a2, sizeof(struct pf_altq));
+
+		if (a2->qname[0] != 0) {
+			if ((a2->qid = pf_qname2qid(a2->qname)) == 0) {
+				error = EBUSY;
+				pool_put(&pf_altq_pl, a2);
+				break;
+			}
+			a2->altq_disc = NULL;
+			TAILQ_FOREACH(a3, pf_altqs_inactive, entries) {
+				if (strncmp(a3->ifname, a2->ifname,
+				    IFNAMSIZ) == 0 && a3->qname[0] == 0) {
+					a2->altq_disc = a3->altq_disc;
+					break;
+				}
+			}
+		}
+		/* Deactivate the interface in question */
+		a2->local_flags &= ~PFALTQ_FLAG_IF_REMOVED;
+		if ((ifp1 = ifunit(a2->ifname)) == NULL ||
+		    (remove && ifp1 == ifp)) {
+			a2->local_flags |= PFALTQ_FLAG_IF_REMOVED;
+		} else {
+			PF_UNLOCK();
+			error = altq_add(a2);
+			PF_LOCK();
+
+			if (ticket != ticket_altqs_inactive)
+				error = EBUSY;
+
+			if (error) {
+				pool_put(&pf_altq_pl, a2);
+				break;
+			}
+		}
+
+		TAILQ_INSERT_TAIL(pf_altqs_inactive, a2, entries);
+	}
+
+	if (error != 0)
+		pf_rollback_altq(ticket);
+	else
+		pf_commit_altq(ticket);
+}
+#endif
+#endif /* ALTQ */
+
+int
+pf_begin_rules(u_int32_t *ticket, int rs_num, const char *anchor)
+{
+	struct pf_ruleset	*rs;
+	struct pf_rule		*rule;
+
+	if (rs_num < 0 || rs_num >= PF_RULESET_MAX)
+		return (EINVAL);
+	rs = pf_find_or_create_ruleset(anchor);
+	if (rs == NULL)
+		return (EINVAL);
+	while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) {
+		pf_rm_rule(rs->rules[rs_num].inactive.ptr, rule);
+		rs->rules[rs_num].inactive.rcount--;
+	}
+	*ticket = ++rs->rules[rs_num].inactive.ticket;
+	rs->rules[rs_num].inactive.open = 1;
+	return (0);
+}
+
+int
+pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor)
+{
+	struct pf_ruleset	*rs;
+	struct pf_rule		*rule;
+
+	if (rs_num < 0 || rs_num >= PF_RULESET_MAX)
+		return (EINVAL);
+	rs = pf_find_ruleset(anchor);
+	if (rs == NULL || !rs->rules[rs_num].inactive.open ||
+	    rs->rules[rs_num].inactive.ticket != ticket)
+		return (0);
+	while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) {
+		pf_rm_rule(rs->rules[rs_num].inactive.ptr, rule);
+		rs->rules[rs_num].inactive.rcount--;
+	}
+	rs->rules[rs_num].inactive.open = 0;
+	return (0);
+}
+
+#define PF_MD5_UPD(st, elm)						\
+		MD5Update(ctx, (u_int8_t *) &(st)->elm, sizeof((st)->elm))
+
+#define PF_MD5_UPD_STR(st, elm)						\
+		MD5Update(ctx, (u_int8_t *) (st)->elm, strlen((st)->elm))
+
+#define PF_MD5_UPD_HTONL(st, elm, stor) do {				\
+		(stor) = htonl((st)->elm);				\
+		MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int32_t));\
+} while (0)
+
+#define PF_MD5_UPD_HTONS(st, elm, stor) do {				\
+		(stor) = htons((st)->elm);				\
+		MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int16_t));\
+} while (0)
+
+void
+pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr)
+{
+	PF_MD5_UPD(pfr, addr.type);
+	switch (pfr->addr.type) {
+		case PF_ADDR_DYNIFTL:
+			PF_MD5_UPD(pfr, addr.v.ifname);
+			PF_MD5_UPD(pfr, addr.iflags);
+			break;
+		case PF_ADDR_TABLE:
+			PF_MD5_UPD(pfr, addr.v.tblname);
+			break;
+		case PF_ADDR_ADDRMASK:
+			/* XXX ignore af? */
+			PF_MD5_UPD(pfr, addr.v.a.addr.addr32);
+			PF_MD5_UPD(pfr, addr.v.a.mask.addr32);
+			break;
+		case PF_ADDR_RTLABEL:
+			PF_MD5_UPD(pfr, addr.v.rtlabelname);
+			break;
+	}
+
+	PF_MD5_UPD(pfr, port[0]);
+	PF_MD5_UPD(pfr, port[1]);
+	PF_MD5_UPD(pfr, neg);
+	PF_MD5_UPD(pfr, port_op);
+}
+
+void
+pf_hash_rule(MD5_CTX *ctx, struct pf_rule *rule)
+{
+	u_int16_t x;
+	u_int32_t y;
+
+	pf_hash_rule_addr(ctx, &rule->src);
+	pf_hash_rule_addr(ctx, &rule->dst);
+	PF_MD5_UPD_STR(rule, label);
+	PF_MD5_UPD_STR(rule, ifname);
+	PF_MD5_UPD_STR(rule, match_tagname);
+	PF_MD5_UPD_HTONS(rule, match_tag, x); /* dup? */
+	PF_MD5_UPD_HTONL(rule, os_fingerprint, y);
+	PF_MD5_UPD_HTONL(rule, prob, y);
+	PF_MD5_UPD_HTONL(rule, uid.uid[0], y);
+	PF_MD5_UPD_HTONL(rule, uid.uid[1], y);
+	PF_MD5_UPD(rule, uid.op);
+	PF_MD5_UPD_HTONL(rule, gid.gid[0], y);
+	PF_MD5_UPD_HTONL(rule, gid.gid[1], y);
+	PF_MD5_UPD(rule, gid.op);
+	PF_MD5_UPD_HTONL(rule, rule_flag, y);
+	PF_MD5_UPD(rule, action);
+	PF_MD5_UPD(rule, direction);
+	PF_MD5_UPD(rule, af);
+	PF_MD5_UPD(rule, quick);
+	PF_MD5_UPD(rule, ifnot);
+	PF_MD5_UPD(rule, match_tag_not);
+	PF_MD5_UPD(rule, natpass);
+	PF_MD5_UPD(rule, keep_state);
+	PF_MD5_UPD(rule, proto);
+	PF_MD5_UPD(rule, type);
+	PF_MD5_UPD(rule, code);
+	PF_MD5_UPD(rule, flags);
+	PF_MD5_UPD(rule, flagset);
+	PF_MD5_UPD(rule, allow_opts);
+	PF_MD5_UPD(rule, rt);
+	PF_MD5_UPD(rule, tos);
+}
+
+int
+pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor)
+{
+	struct pf_ruleset	*rs;
+	struct pf_rule		*rule, **old_array;
+	struct pf_rulequeue	*old_rules;
+	int			 s, error;
+	u_int32_t		 old_rcount;
+
+	if (rs_num < 0 || rs_num >= PF_RULESET_MAX)
+		return (EINVAL);
+	rs = pf_find_ruleset(anchor);
+	if (rs == NULL || !rs->rules[rs_num].inactive.open ||
+	    ticket != rs->rules[rs_num].inactive.ticket)
+		return (EBUSY);
+
+	/* Calculate checksum for the main ruleset */
+	if (rs == &pf_main_ruleset) {
+		error = pf_setup_pfsync_matching(rs);
+		if (error != 0)
+			return (error);
+	}
+
+	/* Swap rules, keep the old. */
+	s = splsoftnet();
+	old_rules = rs->rules[rs_num].active.ptr;
+	old_rcount = rs->rules[rs_num].active.rcount;
+	old_array = rs->rules[rs_num].active.ptr_array;
+
+	rs->rules[rs_num].active.ptr =
+	    rs->rules[rs_num].inactive.ptr;
+	rs->rules[rs_num].active.ptr_array =
+	    rs->rules[rs_num].inactive.ptr_array;
+	rs->rules[rs_num].active.rcount =
+	    rs->rules[rs_num].inactive.rcount;
+	rs->rules[rs_num].inactive.ptr = old_rules;
+	rs->rules[rs_num].inactive.ptr_array = old_array;
+	rs->rules[rs_num].inactive.rcount = old_rcount;
+
+	rs->rules[rs_num].active.ticket =
+	    rs->rules[rs_num].inactive.ticket;
+	pf_calc_skip_steps(rs->rules[rs_num].active.ptr);
+
+
+	/* Purge the old rule list. */
+	while ((rule = TAILQ_FIRST(old_rules)) != NULL)
+		pf_rm_rule(old_rules, rule);
+	if (rs->rules[rs_num].inactive.ptr_array)
+		free(rs->rules[rs_num].inactive.ptr_array, M_TEMP);
+	rs->rules[rs_num].inactive.ptr_array = NULL;
+	rs->rules[rs_num].inactive.rcount = 0;
+	rs->rules[rs_num].inactive.open = 0;
+	pf_remove_if_empty_ruleset(rs);
+	splx(s);
+	return (0);
+}
+
+int
+pf_setup_pfsync_matching(struct pf_ruleset *rs)
+{
+	MD5_CTX			 ctx;
+	struct pf_rule		*rule;
+	int			 rs_cnt;
+	u_int8_t		 digest[PF_MD5_DIGEST_LENGTH];
+
+	MD5Init(&ctx);
+	for (rs_cnt = 0; rs_cnt < PF_RULESET_MAX; rs_cnt++) {
+		/* XXX PF_RULESET_SCRUB as well? */
+		if (rs_cnt == PF_RULESET_SCRUB)
+			continue;
+
+		if (rs->rules[rs_cnt].inactive.ptr_array)
+			free(rs->rules[rs_cnt].inactive.ptr_array, M_TEMP);
+		rs->rules[rs_cnt].inactive.ptr_array = NULL;
+
+		if (rs->rules[rs_cnt].inactive.rcount) {
+			rs->rules[rs_cnt].inactive.ptr_array =
+			    malloc(sizeof(caddr_t) *
+			    rs->rules[rs_cnt].inactive.rcount,
+			    M_TEMP, M_NOWAIT);
+
+			if (!rs->rules[rs_cnt].inactive.ptr_array)
+				return (ENOMEM);
+		}
+
+		TAILQ_FOREACH(rule, rs->rules[rs_cnt].inactive.ptr,
+		    entries) {
+			pf_hash_rule(&ctx, rule);
+			(rs->rules[rs_cnt].inactive.ptr_array)[rule->nr] = rule;
+		}
+	}
+
+	MD5Final(digest, &ctx);
+	memcpy(pf_status.pf_chksum, digest, sizeof(pf_status.pf_chksum));
+	return (0);
+}
+
+int
+#ifdef __FreeBSD__
+pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td)
+#else
+pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
+#endif
+{
+	struct pf_pooladdr	*pa = NULL;
+	struct pf_pool		*pool = NULL;
+#ifndef __FreeBSD__
+	int			 s;
+#endif
+	int			 error = 0;
+
+	/* XXX keep in sync with switch() below */
+#ifdef __FreeBSD__
+	if (securelevel_gt(td->td_ucred, 2))
+#else
+	if (securelevel > 1)
+#endif
+		switch (cmd) {
+		case DIOCGETRULES:
+		case DIOCGETRULE:
+		case DIOCGETADDRS:
+		case DIOCGETADDR:
+		case DIOCGETSTATE:
+		case DIOCSETSTATUSIF:
+		case DIOCGETSTATUS:
+		case DIOCCLRSTATUS:
+		case DIOCNATLOOK:
+		case DIOCSETDEBUG:
+		case DIOCGETSTATES:
+		case DIOCGETTIMEOUT:
+		case DIOCCLRRULECTRS:
+		case DIOCGETLIMIT:
+		case DIOCGETALTQS:
+		case DIOCGETALTQ:
+		case DIOCGETQSTATS:
+		case DIOCGETRULESETS:
+		case DIOCGETRULESET:
+		case DIOCRGETTABLES:
+		case DIOCRGETTSTATS:
+		case DIOCRCLRTSTATS:
+		case DIOCRCLRADDRS:
+		case DIOCRADDADDRS:
+		case DIOCRDELADDRS:
+		case DIOCRSETADDRS:
+		case DIOCRGETADDRS:
+		case DIOCRGETASTATS:
+		case DIOCRCLRASTATS:
+		case DIOCRTSTADDRS:
+		case DIOCOSFPGET:
+		case DIOCGETSRCNODES:
+		case DIOCCLRSRCNODES:
+		case DIOCIGETIFACES:
+#ifdef __FreeBSD__
+		case DIOCGIFSPEED:
+#endif
+		case DIOCSETIFFLAG:
+		case DIOCCLRIFFLAG:
+			break;
+		case DIOCRCLRTABLES:
+		case DIOCRADDTABLES:
+		case DIOCRDELTABLES:
+		case DIOCRSETTFLAGS:
+			if (((struct pfioc_table *)addr)->pfrio_flags &
+			    PFR_FLAG_DUMMY)
+				break; /* dummy operation ok */
+			return (EPERM);
+		default:
+			return (EPERM);
+		}
+
+	if (!(flags & FWRITE))
+		switch (cmd) {
+		case DIOCGETRULES:
+		case DIOCGETADDRS:
+		case DIOCGETADDR:
+		case DIOCGETSTATE:
+		case DIOCGETSTATUS:
+		case DIOCGETSTATES:
+		case DIOCGETTIMEOUT:
+		case DIOCGETLIMIT:
+		case DIOCGETALTQS:
+		case DIOCGETALTQ:
+		case DIOCGETQSTATS:
+		case DIOCGETRULESETS:
+		case DIOCGETRULESET:
+		case DIOCNATLOOK:
+		case DIOCRGETTABLES:
+		case DIOCRGETTSTATS:
+		case DIOCRGETADDRS:
+		case DIOCRGETASTATS:
+		case DIOCRTSTADDRS:
+		case DIOCOSFPGET:
+		case DIOCGETSRCNODES:
+		case DIOCIGETIFACES:
+#ifdef __FreeBSD__
+		case DIOCGIFSPEED:
+#endif
+			break;
+		case DIOCRCLRTABLES:
+		case DIOCRADDTABLES:
+		case DIOCRDELTABLES:
+		case DIOCRCLRTSTATS:
+		case DIOCRCLRADDRS:
+		case DIOCRADDADDRS:
+		case DIOCRDELADDRS:
+		case DIOCRSETADDRS:
+		case DIOCRSETTFLAGS:
+			if (((struct pfioc_table *)addr)->pfrio_flags &
+			    PFR_FLAG_DUMMY) {
+				flags |= FWRITE; /* need write lock for dummy */
+				break; /* dummy operation ok */
+			}
+			return (EACCES);
+		case DIOCGETRULE:
+			if (((struct pfioc_rule *)addr)->action == PF_GET_CLR_CNTR)
+				return (EACCES);
+			break;
+		default:
+			return (EACCES);
+		}
+
+	if (flags & FWRITE)
+#ifdef __FreeBSD__
+		sx_xlock(&pf_consistency_lock);
+	else
+		sx_slock(&pf_consistency_lock);
+#else
+		rw_enter_write(&pf_consistency_lock);
+	else
+		rw_enter_read(&pf_consistency_lock);
+#endif
+
+#ifdef __FreeBSD__
+	PF_LOCK();
+#else
+	s = splsoftnet();
+#endif
+	switch (cmd) {
+
+	case DIOCSTART:
+		if (pf_status.running)
+			error = EEXIST;
+		else {
+#ifdef __FreeBSD__
+			PF_UNLOCK();
+			error = hook_pf();
+			PF_LOCK();
+			if (error) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: pfil registeration fail\n"));
+				break;
+			}
+#endif
+			pf_status.running = 1;
+			pf_status.since = time_second;
+			if (pf_status.stateid == 0) {
+				pf_status.stateid = time_second;
+				pf_status.stateid = pf_status.stateid << 32;
+			}
+			DPFPRINTF(PF_DEBUG_MISC, ("pf: started\n"));
+		}
+		break;
+
+	case DIOCSTOP:
+		if (!pf_status.running)
+			error = ENOENT;
+		else {
+			pf_status.running = 0;
+#ifdef __FreeBSD__
+			PF_UNLOCK();
+			error = dehook_pf();
+			PF_LOCK();
+			if (error) {
+				pf_status.running = 1;
+				DPFPRINTF(PF_DEBUG_MISC,
+					("pf: pfil unregisteration failed\n"));
+			}
+#endif
+			pf_status.since = time_second;
+			DPFPRINTF(PF_DEBUG_MISC, ("pf: stopped\n"));
+		}
+		break;
+
+	case DIOCADDRULE: {
+		struct pfioc_rule	*pr = (struct pfioc_rule *)addr;
+		struct pf_ruleset	*ruleset;
+		struct pf_rule		*rule, *tail;
+		struct pf_pooladdr	*pa;
+		int			 rs_num;
+
+		pr->anchor[sizeof(pr->anchor) - 1] = 0;
+		ruleset = pf_find_ruleset(pr->anchor);
+		if (ruleset == NULL) {
+			error = EINVAL;
+			break;
+		}
+		rs_num = pf_get_ruleset_number(pr->rule.action);
+		if (rs_num >= PF_RULESET_MAX) {
+			error = EINVAL;
+			break;
+		}
+		if (pr->rule.return_icmp >> 8 > ICMP_MAXTYPE) {
+			error = EINVAL;
+			break;
+		}
+		if (pr->ticket != ruleset->rules[rs_num].inactive.ticket) {
+#ifdef __FreeBSD__
+			DPFPRINTF(PF_DEBUG_MISC,
+			    ("ticket: %d != [%d]%d\n", pr->ticket, rs_num,
+			    ruleset->rules[rs_num].inactive.ticket));
+#endif
+			error = EBUSY;
+			break;
+		}
+		if (pr->pool_ticket != ticket_pabuf) {
+#ifdef __FreeBSD__
+			DPFPRINTF(PF_DEBUG_MISC,
+			    ("pool_ticket: %d != %d\n", pr->pool_ticket,
+			    ticket_pabuf));
+#endif
+			error = EBUSY;
+			break;
+		}
+		rule = pool_get(&pf_rule_pl, PR_NOWAIT);
+		if (rule == NULL) {
+			error = ENOMEM;
+			break;
+		}
+		bcopy(&pr->rule, rule, sizeof(struct pf_rule));
+#ifdef __FreeBSD__
+		rule->cuid = td->td_ucred->cr_ruid;
+		rule->cpid = td->td_proc ? td->td_proc->p_pid : 0;
+#else
+		rule->cuid = p->p_cred->p_ruid;
+		rule->cpid = p->p_pid;
+#endif
+		rule->anchor = NULL;
+		rule->kif = NULL;
+		TAILQ_INIT(&rule->rpool.list);
+		/* initialize refcounting */
+		rule->states = 0;
+		rule->src_nodes = 0;
+		rule->entries.tqe_prev = NULL;
+#ifndef INET
+		if (rule->af == AF_INET) {
+			pool_put(&pf_rule_pl, rule);
+			error = EAFNOSUPPORT;
+			break;
+		}
+#endif /* INET */
+#ifndef INET6
+		if (rule->af == AF_INET6) {
+			pool_put(&pf_rule_pl, rule);
+			error = EAFNOSUPPORT;
+			break;
+		}
+#endif /* INET6 */
+		tail = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr,
+		    pf_rulequeue);
+		if (tail)
+			rule->nr = tail->nr + 1;
+		else
+			rule->nr = 0;
+		if (rule->ifname[0]) {
+			rule->kif = pfi_kif_get(rule->ifname);
+			if (rule->kif == NULL) {
+				pool_put(&pf_rule_pl, rule);
+				error = EINVAL;
+				break;
+			}
+			pfi_kif_ref(rule->kif, PFI_KIF_REF_RULE);
+		}
+
+#ifdef __FreeBSD__ /* ROUTING */
+		if (rule->rtableid > 0 && rule->rtableid > rt_numfibs)
+#else
+		if (rule->rtableid > 0 && !rtable_exists(rule->rtableid))
+#endif
+			error = EBUSY;
+
+#ifdef ALTQ
+		/* set queue IDs */
+		if (rule->qname[0] != 0) {
+			if ((rule->qid = pf_qname2qid(rule->qname)) == 0)
+				error = EBUSY;
+			else if (rule->pqname[0] != 0) {
+				if ((rule->pqid =
+				    pf_qname2qid(rule->pqname)) == 0)
+					error = EBUSY;
+			} else
+				rule->pqid = rule->qid;
+		}
+#endif
+		if (rule->tagname[0])
+			if ((rule->tag = pf_tagname2tag(rule->tagname)) == 0)
+				error = EBUSY;
+		if (rule->match_tagname[0])
+			if ((rule->match_tag =
+			    pf_tagname2tag(rule->match_tagname)) == 0)
+				error = EBUSY;
+		if (rule->rt && !rule->direction)
+			error = EINVAL;
+#if NPFLOG > 0
+#ifdef __FreeBSD__
+		if (!rule->log)
+			rule->logif = 0;
+#endif
+		if (rule->logif >= PFLOGIFS_MAX)
+			error = EINVAL;
+#endif
+		if (pf_rtlabel_add(&rule->src.addr) ||
+		    pf_rtlabel_add(&rule->dst.addr))
+			error = EBUSY;
+		if (pfi_dynaddr_setup(&rule->src.addr, rule->af))
+			error = EINVAL;
+		if (pfi_dynaddr_setup(&rule->dst.addr, rule->af))
+			error = EINVAL;
+		if (pf_tbladdr_setup(ruleset, &rule->src.addr))
+			error = EINVAL;
+		if (pf_tbladdr_setup(ruleset, &rule->dst.addr))
+			error = EINVAL;
+		if (pf_anchor_setup(rule, ruleset, pr->anchor_call))
+			error = EINVAL;
+		TAILQ_FOREACH(pa, &pf_pabuf, entries)
+			if (pf_tbladdr_setup(ruleset, &pa->addr))
+				error = EINVAL;
+
+		if (rule->overload_tblname[0]) {
+			if ((rule->overload_tbl = pfr_attach_table(ruleset,
+			    rule->overload_tblname)) == NULL)
+				error = EINVAL;
+			else
+				rule->overload_tbl->pfrkt_flags |=
+				    PFR_TFLAG_ACTIVE;
+		}
+
+		pf_mv_pool(&pf_pabuf, &rule->rpool.list);
+		if (((((rule->action == PF_NAT) || (rule->action == PF_RDR) ||
+		    (rule->action == PF_BINAT)) && rule->anchor == NULL) ||
+		    (rule->rt > PF_FASTROUTE)) &&
+		    (TAILQ_FIRST(&rule->rpool.list) == NULL))
+			error = EINVAL;
+
+		if (error) {
+			pf_rm_rule(NULL, rule);
+			break;
+		}
+
+#ifdef __FreeBSD__
+		if (!debug_pfugidhack && (rule->uid.op || rule->gid.op ||
+		    rule->log & PF_LOG_SOCKET_LOOKUP)) {
+			DPFPRINTF(PF_DEBUG_MISC,
+			    ("pf: debug.pfugidhack enabled\n"));
+			debug_pfugidhack = 1;
+		}
+#endif
+
+		rule->rpool.cur = TAILQ_FIRST(&rule->rpool.list);
+		rule->evaluations = rule->packets[0] = rule->packets[1] =
+		    rule->bytes[0] = rule->bytes[1] = 0;
+		TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr,
+		    rule, entries);
+		ruleset->rules[rs_num].inactive.rcount++;
+		break;
+	}
+
+	case DIOCGETRULES: {
+		struct pfioc_rule	*pr = (struct pfioc_rule *)addr;
+		struct pf_ruleset	*ruleset;
+		struct pf_rule		*tail;
+		int			 rs_num;
+
+		pr->anchor[sizeof(pr->anchor) - 1] = 0;
+		ruleset = pf_find_ruleset(pr->anchor);
+		if (ruleset == NULL) {
+			error = EINVAL;
+			break;
+		}
+		rs_num = pf_get_ruleset_number(pr->rule.action);
+		if (rs_num >= PF_RULESET_MAX) {
+			error = EINVAL;
+			break;
+		}
+		tail = TAILQ_LAST(ruleset->rules[rs_num].active.ptr,
+		    pf_rulequeue);
+		if (tail)
+			pr->nr = tail->nr + 1;
+		else
+			pr->nr = 0;
+		pr->ticket = ruleset->rules[rs_num].active.ticket;
+		break;
+	}
+
+	case DIOCGETRULE: {
+		struct pfioc_rule	*pr = (struct pfioc_rule *)addr;
+		struct pf_ruleset	*ruleset;
+		struct pf_rule		*rule;
+		int			 rs_num, i;
+
+		pr->anchor[sizeof(pr->anchor) - 1] = 0;
+		ruleset = pf_find_ruleset(pr->anchor);
+		if (ruleset == NULL) {
+			error = EINVAL;
+			break;
+		}
+		rs_num = pf_get_ruleset_number(pr->rule.action);
+		if (rs_num >= PF_RULESET_MAX) {
+			error = EINVAL;
+			break;
+		}
+		if (pr->ticket != ruleset->rules[rs_num].active.ticket) {
+			error = EBUSY;
+			break;
+		}
+		rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr);
+		while ((rule != NULL) && (rule->nr != pr->nr))
+			rule = TAILQ_NEXT(rule, entries);
+		if (rule == NULL) {
+			error = EBUSY;
+			break;
+		}
+		bcopy(rule, &pr->rule, sizeof(struct pf_rule));
+		if (pf_anchor_copyout(ruleset, rule, pr)) {
+			error = EBUSY;
+			break;
+		}
+		pfi_dynaddr_copyout(&pr->rule.src.addr);
+		pfi_dynaddr_copyout(&pr->rule.dst.addr);
+		pf_tbladdr_copyout(&pr->rule.src.addr);
+		pf_tbladdr_copyout(&pr->rule.dst.addr);
+		pf_rtlabel_copyout(&pr->rule.src.addr);
+		pf_rtlabel_copyout(&pr->rule.dst.addr);
+		for (i = 0; i < PF_SKIP_COUNT; ++i)
+			if (rule->skip[i].ptr == NULL)
+				pr->rule.skip[i].nr = -1;
+			else
+				pr->rule.skip[i].nr =
+				    rule->skip[i].ptr->nr;
+
+		if (pr->action == PF_GET_CLR_CNTR) {
+			rule->evaluations = 0;
+			rule->packets[0] = rule->packets[1] = 0;
+			rule->bytes[0] = rule->bytes[1] = 0;
+		}
+		break;
+	}
+
+	case DIOCCHANGERULE: {
+		struct pfioc_rule	*pcr = (struct pfioc_rule *)addr;
+		struct pf_ruleset	*ruleset;
+		struct pf_rule		*oldrule = NULL, *newrule = NULL;
+		u_int32_t		 nr = 0;
+		int			 rs_num;
+
+		if (!(pcr->action == PF_CHANGE_REMOVE ||
+		    pcr->action == PF_CHANGE_GET_TICKET) &&
+		    pcr->pool_ticket != ticket_pabuf) {
+			error = EBUSY;
+			break;
+		}
+
+		if (pcr->action < PF_CHANGE_ADD_HEAD ||
+		    pcr->action > PF_CHANGE_GET_TICKET) {
+			error = EINVAL;
+			break;
+		}
+		ruleset = pf_find_ruleset(pcr->anchor);
+		if (ruleset == NULL) {
+			error = EINVAL;
+			break;
+		}
+		rs_num = pf_get_ruleset_number(pcr->rule.action);
+		if (rs_num >= PF_RULESET_MAX) {
+			error = EINVAL;
+			break;
+		}
+
+		if (pcr->action == PF_CHANGE_GET_TICKET) {
+			pcr->ticket = ++ruleset->rules[rs_num].active.ticket;
+			break;
+		} else {
+			if (pcr->ticket !=
+			    ruleset->rules[rs_num].active.ticket) {
+				error = EINVAL;
+				break;
+			}
+			if (pcr->rule.return_icmp >> 8 > ICMP_MAXTYPE) {
+				error = EINVAL;
+				break;
+			}
+		}
+
+		if (pcr->action != PF_CHANGE_REMOVE) {
+			newrule = pool_get(&pf_rule_pl, PR_NOWAIT);
+			if (newrule == NULL) {
+				error = ENOMEM;
+				break;
+			}
+			bcopy(&pcr->rule, newrule, sizeof(struct pf_rule));
+#ifdef __FreeBSD__
+			newrule->cuid = td->td_ucred->cr_ruid;
+			newrule->cpid = td->td_proc ? td->td_proc->p_pid : 0;
+#else
+			newrule->cuid = p->p_cred->p_ruid;
+			newrule->cpid = p->p_pid;
+#endif
+			TAILQ_INIT(&newrule->rpool.list);
+			/* initialize refcounting */
+			newrule->states = 0;
+			newrule->entries.tqe_prev = NULL;
+#ifndef INET
+			if (newrule->af == AF_INET) {
+				pool_put(&pf_rule_pl, newrule);
+				error = EAFNOSUPPORT;
+				break;
+			}
+#endif /* INET */
+#ifndef INET6
+			if (newrule->af == AF_INET6) {
+				pool_put(&pf_rule_pl, newrule);
+				error = EAFNOSUPPORT;
+				break;
+			}
+#endif /* INET6 */
+			if (newrule->ifname[0]) {
+				newrule->kif = pfi_kif_get(newrule->ifname);
+				if (newrule->kif == NULL) {
+					pool_put(&pf_rule_pl, newrule);
+					error = EINVAL;
+					break;
+				}
+				pfi_kif_ref(newrule->kif, PFI_KIF_REF_RULE);
+			} else
+				newrule->kif = NULL;
+
+			if (newrule->rtableid > 0 &&
+#ifdef __FreeBSD__ /* ROUTING */
+			    newrule->rtableid > rt_numfibs)
+#else
+			    !rtable_exists(newrule->rtableid))
+#endif
+				error = EBUSY;
+
+#ifdef ALTQ
+			/* set queue IDs */
+			if (newrule->qname[0] != 0) {
+				if ((newrule->qid =
+				    pf_qname2qid(newrule->qname)) == 0)
+					error = EBUSY;
+				else if (newrule->pqname[0] != 0) {
+					if ((newrule->pqid =
+					    pf_qname2qid(newrule->pqname)) == 0)
+						error = EBUSY;
+				} else
+					newrule->pqid = newrule->qid;
+			}
+#endif /* ALTQ */
+			if (newrule->tagname[0])
+				if ((newrule->tag =
+				    pf_tagname2tag(newrule->tagname)) == 0)
+					error = EBUSY;
+			if (newrule->match_tagname[0])
+				if ((newrule->match_tag = pf_tagname2tag(
+				    newrule->match_tagname)) == 0)
+					error = EBUSY;
+			if (newrule->rt && !newrule->direction)
+				error = EINVAL;
+#ifdef __FreeBSD__
+#if NPFLOG > 0
+			if (!newrule->log)
+				newrule->logif = 0;
+			if (newrule->logif >= PFLOGIFS_MAX)
+				error = EINVAL;
+#endif
+#endif
+			if (pf_rtlabel_add(&newrule->src.addr) ||
+			    pf_rtlabel_add(&newrule->dst.addr))
+				error = EBUSY;
+			if (pfi_dynaddr_setup(&newrule->src.addr, newrule->af))
+				error = EINVAL;
+			if (pfi_dynaddr_setup(&newrule->dst.addr, newrule->af))
+				error = EINVAL;
+			if (pf_tbladdr_setup(ruleset, &newrule->src.addr))
+				error = EINVAL;
+			if (pf_tbladdr_setup(ruleset, &newrule->dst.addr))
+				error = EINVAL;
+			if (pf_anchor_setup(newrule, ruleset, pcr->anchor_call))
+				error = EINVAL;
+			TAILQ_FOREACH(pa, &pf_pabuf, entries)
+				if (pf_tbladdr_setup(ruleset, &pa->addr))
+					error = EINVAL;
+
+			if (newrule->overload_tblname[0]) {
+				if ((newrule->overload_tbl = pfr_attach_table(
+				    ruleset, newrule->overload_tblname)) ==
+				    NULL)
+					error = EINVAL;
+				else
+					newrule->overload_tbl->pfrkt_flags |=
+					    PFR_TFLAG_ACTIVE;
+			}
+
+			pf_mv_pool(&pf_pabuf, &newrule->rpool.list);
+			if (((((newrule->action == PF_NAT) ||
+			    (newrule->action == PF_RDR) ||
+			    (newrule->action == PF_BINAT) ||
+			    (newrule->rt > PF_FASTROUTE)) &&
+			    !newrule->anchor)) &&
+			    (TAILQ_FIRST(&newrule->rpool.list) == NULL))
+				error = EINVAL;
+
+			if (error) {
+				pf_rm_rule(NULL, newrule);
+				break;
+			}
+
+#ifdef __FreeBSD__
+			if (!debug_pfugidhack && (newrule->uid.op ||
+			    newrule->gid.op ||
+			    newrule->log & PF_LOG_SOCKET_LOOKUP)) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: debug.pfugidhack enabled\n"));
+				debug_pfugidhack = 1;
+			}
+#endif
+
+			newrule->rpool.cur = TAILQ_FIRST(&newrule->rpool.list);
+			newrule->evaluations = 0;
+			newrule->packets[0] = newrule->packets[1] = 0;
+			newrule->bytes[0] = newrule->bytes[1] = 0;
+		}
+		pf_empty_pool(&pf_pabuf);
+
+		if (pcr->action == PF_CHANGE_ADD_HEAD)
+			oldrule = TAILQ_FIRST(
+			    ruleset->rules[rs_num].active.ptr);
+		else if (pcr->action == PF_CHANGE_ADD_TAIL)
+			oldrule = TAILQ_LAST(
+			    ruleset->rules[rs_num].active.ptr, pf_rulequeue);
+		else {
+			oldrule = TAILQ_FIRST(
+			    ruleset->rules[rs_num].active.ptr);
+			while ((oldrule != NULL) && (oldrule->nr != pcr->nr))
+				oldrule = TAILQ_NEXT(oldrule, entries);
+			if (oldrule == NULL) {
+				if (newrule != NULL)
+					pf_rm_rule(NULL, newrule);
+				error = EINVAL;
+				break;
+			}
+		}
+
+		if (pcr->action == PF_CHANGE_REMOVE) {
+			pf_rm_rule(ruleset->rules[rs_num].active.ptr, oldrule);
+			ruleset->rules[rs_num].active.rcount--;
+		} else {
+			if (oldrule == NULL)
+				TAILQ_INSERT_TAIL(
+				    ruleset->rules[rs_num].active.ptr,
+				    newrule, entries);
+			else if (pcr->action == PF_CHANGE_ADD_HEAD ||
+			    pcr->action == PF_CHANGE_ADD_BEFORE)
+				TAILQ_INSERT_BEFORE(oldrule, newrule, entries);
+			else
+				TAILQ_INSERT_AFTER(
+				    ruleset->rules[rs_num].active.ptr,
+				    oldrule, newrule, entries);
+			ruleset->rules[rs_num].active.rcount++;
+		}
+
+		nr = 0;
+		TAILQ_FOREACH(oldrule,
+		    ruleset->rules[rs_num].active.ptr, entries)
+			oldrule->nr = nr++;
+
+		ruleset->rules[rs_num].active.ticket++;
+
+		pf_calc_skip_steps(ruleset->rules[rs_num].active.ptr);
+		pf_remove_if_empty_ruleset(ruleset);
+
+		break;
+	}
+
+	case DIOCCLRSTATES: {
+		struct pf_state		*state, *nexts;
+		struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr;
+		int			 killed = 0;
+
+		for (state = RB_MIN(pf_state_tree_id, &tree_id); state;
+		    state = nexts) {
+			nexts = RB_NEXT(pf_state_tree_id, &tree_id, state);
+
+			if (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname,
+			    state->u.s.kif->pfik_name)) {
+#if NPFSYNC
+				/* don't send out individual delete messages */
+				state->sync_flags = PFSTATE_NOSYNC;
+#endif
+				pf_unlink_state(state);
+				killed++;
+			}
+		}
+		psk->psk_af = killed;
+#if NPFSYNC
+		pfsync_clear_states(pf_status.hostid, psk->psk_ifname);
+#endif
+		break;
+	}
+
+	case DIOCKILLSTATES: {
+		struct pf_state		*state, *nexts;
+		struct pf_state_host	*src, *dst;
+		struct pfioc_state_kill	*psk = (struct pfioc_state_kill *)addr;
+		int			 killed = 0;
+
+		for (state = RB_MIN(pf_state_tree_id, &tree_id); state;
+		    state = nexts) {
+			nexts = RB_NEXT(pf_state_tree_id, &tree_id, state);
+
+			if (state->direction == PF_OUT) {
+				src = &state->lan;
+				dst = &state->ext;
+			} else {
+				src = &state->ext;
+				dst = &state->lan;
+			}
+			if ((!psk->psk_af || state->af == psk->psk_af)
+			    && (!psk->psk_proto || psk->psk_proto ==
+			    state->proto) &&
+			    PF_MATCHA(psk->psk_src.neg,
+			    &psk->psk_src.addr.v.a.addr,
+			    &psk->psk_src.addr.v.a.mask,
+			    &src->addr, state->af) &&
+			    PF_MATCHA(psk->psk_dst.neg,
+			    &psk->psk_dst.addr.v.a.addr,
+			    &psk->psk_dst.addr.v.a.mask,
+			    &dst->addr, state->af) &&
+			    (psk->psk_src.port_op == 0 ||
+			    pf_match_port(psk->psk_src.port_op,
+			    psk->psk_src.port[0], psk->psk_src.port[1],
+			    src->port)) &&
+			    (psk->psk_dst.port_op == 0 ||
+			    pf_match_port(psk->psk_dst.port_op,
+			    psk->psk_dst.port[0], psk->psk_dst.port[1],
+			    dst->port)) &&
+			    (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname,
+			    state->u.s.kif->pfik_name))) {
+#if NPFSYNC > 0
+				/* send immediate delete of state */
+				pfsync_delete_state(state);
+				state->sync_flags |= PFSTATE_NOSYNC;
+#endif
+				pf_unlink_state(state);
+				killed++;
+			}
+		}
+		psk->psk_af = killed;
+		break;
+	}
+
+	case DIOCADDSTATE: {
+		struct pfioc_state	*ps = (struct pfioc_state *)addr;
+		struct pf_state		*state;
+		struct pfi_kif		*kif;
+
+		if (ps->state.timeout >= PFTM_MAX &&
+		    ps->state.timeout != PFTM_UNTIL_PACKET) {
+			error = EINVAL;
+			break;
+		}
+		state = pool_get(&pf_state_pl, PR_NOWAIT);
+		if (state == NULL) {
+			error = ENOMEM;
+			break;
+		}
+		kif = pfi_kif_get(ps->state.u.ifname);
+		if (kif == NULL) {
+			pool_put(&pf_state_pl, state);
+			error = ENOENT;
+			break;
+		}
+		bcopy(&ps->state, state, sizeof(struct pf_state));
+		bzero(&state->u, sizeof(state->u));
+		state->rule.ptr = &pf_default_rule;
+		state->nat_rule.ptr = NULL;
+		state->anchor.ptr = NULL;
+		state->rt_kif = NULL;
+		state->creation = time_second;
+		state->pfsync_time = 0;
+		state->packets[0] = state->packets[1] = 0;
+		state->bytes[0] = state->bytes[1] = 0;
+
+		if (pf_insert_state(kif, state)) {
+			pfi_kif_unref(kif, PFI_KIF_REF_NONE);
+			pool_put(&pf_state_pl, state);
+			error = ENOMEM;
+		}
+		break;
+	}
+
+	case DIOCGETSTATE: {
+		struct pfioc_state	*ps = (struct pfioc_state *)addr;
+		struct pf_state		*state;
+		u_int32_t		 nr;
+		int			 secs;
+
+		nr = 0;
+		RB_FOREACH(state, pf_state_tree_id, &tree_id) {
+			if (nr >= ps->nr)
+				break;
+			nr++;
+		}
+		if (state == NULL) {
+			error = EBUSY;
+			break;
+		}
+		secs = time_second;
+		bcopy(state, &ps->state, sizeof(ps->state));
+		strlcpy(ps->state.u.ifname, state->u.s.kif->pfik_name,
+		    sizeof(ps->state.u.ifname));
+		ps->state.rule.nr = state->rule.ptr->nr;
+		ps->state.nat_rule.nr = (state->nat_rule.ptr == NULL) ?
+		    -1 : state->nat_rule.ptr->nr;
+		ps->state.anchor.nr = (state->anchor.ptr == NULL) ?
+		    -1 : state->anchor.ptr->nr;
+		ps->state.creation = secs - ps->state.creation;
+		ps->state.expire = pf_state_expires(state);
+		if (ps->state.expire > secs)
+			ps->state.expire -= secs;
+		else
+			ps->state.expire = 0;
+		break;
+	}
+
+	case DIOCGETSTATES: {
+		struct pfioc_states	*ps = (struct pfioc_states *)addr;
+		struct pf_state		*state;
+		struct pf_state		*p, *pstore;
+		u_int32_t		 nr = 0;
+		int			 space = ps->ps_len;
+
+		if (space == 0) {
+			nr = pf_status.states;
+			ps->ps_len = sizeof(struct pf_state) * nr;
+			break;
+		}
+
+#ifdef __FreeBSD__
+		PF_UNLOCK();
+#endif
+		pstore = malloc(sizeof(*pstore), M_TEMP, M_WAITOK);
+#ifdef __FreeBSD__
+		PF_LOCK();
+#endif
+
+		p = ps->ps_states;
+
+		state = TAILQ_FIRST(&state_list);
+		while (state) {
+			if (state->timeout != PFTM_UNLINKED) {
+				int	secs = time_second;
+
+				if ((nr+1) * sizeof(*p) > (unsigned)ps->ps_len)
+					break;
+
+				bcopy(state, pstore, sizeof(*pstore));
+				strlcpy(pstore->u.ifname,
+				    state->u.s.kif->pfik_name,
+				    sizeof(pstore->u.ifname));
+				pstore->rule.nr = state->rule.ptr->nr;
+				pstore->nat_rule.nr = (state->nat_rule.ptr ==
+				    NULL) ? -1 : state->nat_rule.ptr->nr;
+				pstore->anchor.nr = (state->anchor.ptr ==
+				    NULL) ? -1 : state->anchor.ptr->nr;
+				pstore->creation = secs - pstore->creation;
+				pstore->expire = pf_state_expires(state);
+				if (pstore->expire > secs)
+					pstore->expire -= secs;
+				else
+					pstore->expire = 0;
+#ifdef __FreeBSD__
+				PF_COPYOUT(pstore, p, sizeof(*p), error);
+#else
+				error = copyout(pstore, p, sizeof(*p));
+#endif
+				if (error) {
+					free(pstore, M_TEMP);
+					goto fail;
+				}
+				p++;
+				nr++;
+			}
+			state = TAILQ_NEXT(state, u.s.entry_list);
+		}
+
+		ps->ps_len = sizeof(struct pf_state) * nr;
+
+		free(pstore, M_TEMP);
+		break;
+	}
+
+	case DIOCGETSTATUS: {
+		struct pf_status *s = (struct pf_status *)addr;
+		bcopy(&pf_status, s, sizeof(struct pf_status));
+		pfi_fill_oldstatus(s);
+		break;
+	}
+
+	case DIOCSETSTATUSIF: {
+		struct pfioc_if	*pi = (struct pfioc_if *)addr;
+
+		if (pi->ifname[0] == 0) {
+			bzero(pf_status.ifname, IFNAMSIZ);
+			break;
+		}
+		if (ifunit(pi->ifname) == NULL) {
+			error = EINVAL;
+			break;
+		}
+		strlcpy(pf_status.ifname, pi->ifname, IFNAMSIZ);
+		break;
+	}
+
+	case DIOCCLRSTATUS: {
+		bzero(pf_status.counters, sizeof(pf_status.counters));
+		bzero(pf_status.fcounters, sizeof(pf_status.fcounters));
+		bzero(pf_status.scounters, sizeof(pf_status.scounters));
+		pf_status.since = time_second;
+		if (*pf_status.ifname)
+			pfi_clr_istats(pf_status.ifname);
+		break;
+	}
+
+	case DIOCNATLOOK: {
+		struct pfioc_natlook	*pnl = (struct pfioc_natlook *)addr;
+		struct pf_state		*state;
+		struct pf_state_cmp	 key;
+		int			 m = 0, direction = pnl->direction;
+
+		key.af = pnl->af;
+		key.proto = pnl->proto;
+
+		if (!pnl->proto ||
+		    PF_AZERO(&pnl->saddr, pnl->af) ||
+		    PF_AZERO(&pnl->daddr, pnl->af) ||
+		    ((pnl->proto == IPPROTO_TCP ||
+		    pnl->proto == IPPROTO_UDP) &&
+		    (!pnl->dport || !pnl->sport)))
+			error = EINVAL;
+		else {
+			/*
+			 * userland gives us source and dest of connection,
+			 * reverse the lookup so we ask for what happens with
+			 * the return traffic, enabling us to find it in the
+			 * state tree.
+			 */
+			if (direction == PF_IN) {
+				PF_ACPY(&key.ext.addr, &pnl->daddr, pnl->af);
+				key.ext.port = pnl->dport;
+				PF_ACPY(&key.gwy.addr, &pnl->saddr, pnl->af);
+				key.gwy.port = pnl->sport;
+				state = pf_find_state_all(&key, PF_EXT_GWY, &m);
+			} else {
+				PF_ACPY(&key.lan.addr, &pnl->daddr, pnl->af);
+				key.lan.port = pnl->dport;
+				PF_ACPY(&key.ext.addr, &pnl->saddr, pnl->af);
+				key.ext.port = pnl->sport;
+				state = pf_find_state_all(&key, PF_LAN_EXT, &m);
+			}
+			if (m > 1)
+				error = E2BIG;	/* more than one state */
+			else if (state != NULL) {
+				if (direction == PF_IN) {
+					PF_ACPY(&pnl->rsaddr, &state->lan.addr,
+					    state->af);
+					pnl->rsport = state->lan.port;
+					PF_ACPY(&pnl->rdaddr, &pnl->daddr,
+					    pnl->af);
+					pnl->rdport = pnl->dport;
+				} else {
+					PF_ACPY(&pnl->rdaddr, &state->gwy.addr,
+					    state->af);
+					pnl->rdport = state->gwy.port;
+					PF_ACPY(&pnl->rsaddr, &pnl->saddr,
+					    pnl->af);
+					pnl->rsport = pnl->sport;
+				}
+			} else
+				error = ENOENT;
+		}
+		break;
+	}
+
+	case DIOCSETTIMEOUT: {
+		struct pfioc_tm	*pt = (struct pfioc_tm *)addr;
+		int		 old;
+
+		if (pt->timeout < 0 || pt->timeout >= PFTM_MAX ||
+		    pt->seconds < 0) {
+			error = EINVAL;
+			goto fail;
+		}
+		old = pf_default_rule.timeout[pt->timeout];
+		if (pt->timeout == PFTM_INTERVAL && pt->seconds == 0)
+			pt->seconds = 1;
+		pf_default_rule.timeout[pt->timeout] = pt->seconds;
+		if (pt->timeout == PFTM_INTERVAL && pt->seconds < old)
+			wakeup(pf_purge_thread);
+		pt->seconds = old;
+		break;
+	}
+
+	case DIOCGETTIMEOUT: {
+		struct pfioc_tm	*pt = (struct pfioc_tm *)addr;
+
+		if (pt->timeout < 0 || pt->timeout >= PFTM_MAX) {
+			error = EINVAL;
+			goto fail;
+		}
+		pt->seconds = pf_default_rule.timeout[pt->timeout];
+		break;
+	}
+
+	case DIOCGETLIMIT: {
+		struct pfioc_limit	*pl = (struct pfioc_limit *)addr;
+
+		if (pl->index < 0 || pl->index >= PF_LIMIT_MAX) {
+			error = EINVAL;
+			goto fail;
+		}
+		pl->limit = pf_pool_limits[pl->index].limit;
+		break;
+	}
+
+	case DIOCSETLIMIT: {
+		struct pfioc_limit	*pl = (struct pfioc_limit *)addr;
+		int			 old_limit;
+
+		if (pl->index < 0 || pl->index >= PF_LIMIT_MAX ||
+		    pf_pool_limits[pl->index].pp == NULL) {
+			error = EINVAL;
+			goto fail;
+		}
+#ifdef __FreeBSD__
+		uma_zone_set_max(pf_pool_limits[pl->index].pp, pl->limit);
+#else
+		if (pool_sethardlimit(pf_pool_limits[pl->index].pp,
+		    pl->limit, NULL, 0) != 0) {
+			error = EBUSY;
+			goto fail;
+		}
+#endif
+		old_limit = pf_pool_limits[pl->index].limit;
+		pf_pool_limits[pl->index].limit = pl->limit;
+		pl->limit = old_limit;
+		break;
+	}
+
+	case DIOCSETDEBUG: {
+		u_int32_t	*level = (u_int32_t *)addr;
+
+		pf_status.debug = *level;
+		break;
+	}
+
+	case DIOCCLRRULECTRS: {
+		/* obsoleted by DIOCGETRULE with action=PF_GET_CLR_CNTR */
+		struct pf_ruleset	*ruleset = &pf_main_ruleset;
+		struct pf_rule		*rule;
+
+		TAILQ_FOREACH(rule,
+		    ruleset->rules[PF_RULESET_FILTER].active.ptr, entries) {
+			rule->evaluations = 0;
+			rule->packets[0] = rule->packets[1] = 0;
+			rule->bytes[0] = rule->bytes[1] = 0;
+		}
+		break;
+	}
+
+#ifdef __FreeBSD__
+	case DIOCGIFSPEED: {
+		struct pf_ifspeed	*psp = (struct pf_ifspeed *)addr;
+		struct pf_ifspeed	ps;
+		struct ifnet		*ifp;
+
+		if (psp->ifname[0] != 0) {
+			/* Can we completely trust user-land? */
+			strlcpy(ps.ifname, psp->ifname, IFNAMSIZ);
+			ifp = ifunit(ps.ifname);
+			if (ifp != NULL)
+				psp->baudrate = ifp->if_baudrate;
+			else
+				error = EINVAL;
+		} else
+			error = EINVAL;
+		break;
+	}
+#endif /* __FreeBSD__ */
+
+#ifdef ALTQ
+	case DIOCSTARTALTQ: {
+		struct pf_altq		*altq;
+
+		/* enable all altq interfaces on active list */
+		TAILQ_FOREACH(altq, pf_altqs_active, entries) {
+#ifdef __FreeBSD__
+			if (altq->qname[0] == 0 && (altq->local_flags &
+			    PFALTQ_FLAG_IF_REMOVED) == 0) {
+#else
+			if (altq->qname[0] == 0) {
+#endif
+				error = pf_enable_altq(altq);
+				if (error != 0)
+					break;
+			}
+		}
+		if (error == 0)
+			pf_altq_running = 1;
+		DPFPRINTF(PF_DEBUG_MISC, ("altq: started\n"));
+		break;
+	}
+
+	case DIOCSTOPALTQ: {
+		struct pf_altq		*altq;
+
+		/* disable all altq interfaces on active list */
+		TAILQ_FOREACH(altq, pf_altqs_active, entries) {
+#ifdef __FreeBSD__
+			if (altq->qname[0] == 0 && (altq->local_flags &
+			    PFALTQ_FLAG_IF_REMOVED) == 0) {
+#else
+			if (altq->qname[0] == 0) {
+#endif
+				error = pf_disable_altq(altq);
+				if (error != 0)
+					break;
+			}
+		}
+		if (error == 0)
+			pf_altq_running = 0;
+		DPFPRINTF(PF_DEBUG_MISC, ("altq: stopped\n"));
+		break;
+	}
+
+	case DIOCADDALTQ: {
+		struct pfioc_altq	*pa = (struct pfioc_altq *)addr;
+		struct pf_altq		*altq, *a;
+
+		if (pa->ticket != ticket_altqs_inactive) {
+			error = EBUSY;
+			break;
+		}
+		altq = pool_get(&pf_altq_pl, PR_NOWAIT);
+		if (altq == NULL) {
+			error = ENOMEM;
+			break;
+		}
+		bcopy(&pa->altq, altq, sizeof(struct pf_altq));
+#ifdef __FreeBSD__
+		altq->local_flags = 0;
+#endif
+
+		/*
+		 * if this is for a queue, find the discipline and
+		 * copy the necessary fields
+		 */
+		if (altq->qname[0] != 0) {
+			if ((altq->qid = pf_qname2qid(altq->qname)) == 0) {
+				error = EBUSY;
+				pool_put(&pf_altq_pl, altq);
+				break;
+			}
+			altq->altq_disc = NULL;
+			TAILQ_FOREACH(a, pf_altqs_inactive, entries) {
+				if (strncmp(a->ifname, altq->ifname,
+				    IFNAMSIZ) == 0 && a->qname[0] == 0) {
+					altq->altq_disc = a->altq_disc;
+					break;
+				}
+			}
+		}
+
+#ifdef __FreeBSD__
+		struct ifnet *ifp;
+
+		if ((ifp = ifunit(altq->ifname)) == NULL) {
+			altq->local_flags |= PFALTQ_FLAG_IF_REMOVED;
+		} else {
+			PF_UNLOCK();
+#endif		
+		error = altq_add(altq);
+#ifdef __FreeBSD__
+			PF_LOCK();
+		}
+#endif
+		if (error) {
+			pool_put(&pf_altq_pl, altq);
+			break;
+		}
+
+		TAILQ_INSERT_TAIL(pf_altqs_inactive, altq, entries);
+		bcopy(altq, &pa->altq, sizeof(struct pf_altq));
+		break;
+	}
+
+	case DIOCGETALTQS: {
+		struct pfioc_altq	*pa = (struct pfioc_altq *)addr;
+		struct pf_altq		*altq;
+
+		pa->nr = 0;
+		TAILQ_FOREACH(altq, pf_altqs_active, entries)
+			pa->nr++;
+		pa->ticket = ticket_altqs_active;
+		break;
+	}
+
+	case DIOCGETALTQ: {
+		struct pfioc_altq	*pa = (struct pfioc_altq *)addr;
+		struct pf_altq		*altq;
+		u_int32_t		 nr;
+
+		if (pa->ticket != ticket_altqs_active) {
+			error = EBUSY;
+			break;
+		}
+		nr = 0;
+		altq = TAILQ_FIRST(pf_altqs_active);
+		while ((altq != NULL) && (nr < pa->nr)) {
+			altq = TAILQ_NEXT(altq, entries);
+			nr++;
+		}
+		if (altq == NULL) {
+			error = EBUSY;
+			break;
+		}
+		bcopy(altq, &pa->altq, sizeof(struct pf_altq));
+		break;
+	}
+
+	case DIOCCHANGEALTQ:
+		/* CHANGEALTQ not supported yet! */
+		error = ENODEV;
+		break;
+
+	case DIOCGETQSTATS: {
+		struct pfioc_qstats	*pq = (struct pfioc_qstats *)addr;
+		struct pf_altq		*altq;
+		u_int32_t		 nr;
+		int			 nbytes;
+
+		if (pq->ticket != ticket_altqs_active) {
+			error = EBUSY;
+			break;
+		}
+		nbytes = pq->nbytes;
+		nr = 0;
+		altq = TAILQ_FIRST(pf_altqs_active);
+		while ((altq != NULL) && (nr < pq->nr)) {
+			altq = TAILQ_NEXT(altq, entries);
+			nr++;
+		}
+		if (altq == NULL) {
+			error = EBUSY;
+			break;
+		}
+#ifdef __FreeBSD__
+		if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) != 0) {
+			error = ENXIO;
+			break;
+		}
+		PF_UNLOCK();
+#endif
+		error = altq_getqstats(altq, pq->buf, &nbytes);
+#ifdef __FreeBSD__
+		PF_LOCK();
+#endif
+		if (error == 0) {
+			pq->scheduler = altq->scheduler;
+			pq->nbytes = nbytes;
+		}
+		break;
+	}
+#endif /* ALTQ */
+
+	case DIOCBEGINADDRS: {
+		struct pfioc_pooladdr	*pp = (struct pfioc_pooladdr *)addr;
+
+		pf_empty_pool(&pf_pabuf);
+		pp->ticket = ++ticket_pabuf;
+		break;
+	}
+
+	case DIOCADDADDR: {
+		struct pfioc_pooladdr	*pp = (struct pfioc_pooladdr *)addr;
+
+		if (pp->ticket != ticket_pabuf) {
+			error = EBUSY;
+			break;
+		}
+#ifndef INET
+		if (pp->af == AF_INET) {
+			error = EAFNOSUPPORT;
+			break;
+		}
+#endif /* INET */
+#ifndef INET6
+		if (pp->af == AF_INET6) {
+			error = EAFNOSUPPORT;
+			break;
+		}
+#endif /* INET6 */
+		if (pp->addr.addr.type != PF_ADDR_ADDRMASK &&
+		    pp->addr.addr.type != PF_ADDR_DYNIFTL &&
+		    pp->addr.addr.type != PF_ADDR_TABLE) {
+			error = EINVAL;
+			break;
+		}
+		pa = pool_get(&pf_pooladdr_pl, PR_NOWAIT);
+		if (pa == NULL) {
+			error = ENOMEM;
+			break;
+		}
+		bcopy(&pp->addr, pa, sizeof(struct pf_pooladdr));
+		if (pa->ifname[0]) {
+			pa->kif = pfi_kif_get(pa->ifname);
+			if (pa->kif == NULL) {
+				pool_put(&pf_pooladdr_pl, pa);
+				error = EINVAL;
+				break;
+			}
+			pfi_kif_ref(pa->kif, PFI_KIF_REF_RULE);
+		}
+		if (pfi_dynaddr_setup(&pa->addr, pp->af)) {
+			pfi_dynaddr_remove(&pa->addr);
+			pfi_kif_unref(pa->kif, PFI_KIF_REF_RULE);
+			pool_put(&pf_pooladdr_pl, pa);
+			error = EINVAL;
+			break;
+		}
+		TAILQ_INSERT_TAIL(&pf_pabuf, pa, entries);
+		break;
+	}
+
+	case DIOCGETADDRS: {
+		struct pfioc_pooladdr	*pp = (struct pfioc_pooladdr *)addr;
+
+		pp->nr = 0;
+		pool = pf_get_pool(pp->anchor, pp->ticket, pp->r_action,
+		    pp->r_num, 0, 1, 0);
+		if (pool == NULL) {
+			error = EBUSY;
+			break;
+		}
+		TAILQ_FOREACH(pa, &pool->list, entries)
+			pp->nr++;
+		break;
+	}
+
+	case DIOCGETADDR: {
+		struct pfioc_pooladdr	*pp = (struct pfioc_pooladdr *)addr;
+		u_int32_t		 nr = 0;
+
+		pool = pf_get_pool(pp->anchor, pp->ticket, pp->r_action,
+		    pp->r_num, 0, 1, 1);
+		if (pool == NULL) {
+			error = EBUSY;
+			break;
+		}
+		pa = TAILQ_FIRST(&pool->list);
+		while ((pa != NULL) && (nr < pp->nr)) {
+			pa = TAILQ_NEXT(pa, entries);
+			nr++;
+		}
+		if (pa == NULL) {
+			error = EBUSY;
+			break;
+		}
+		bcopy(pa, &pp->addr, sizeof(struct pf_pooladdr));
+		pfi_dynaddr_copyout(&pp->addr.addr);
+		pf_tbladdr_copyout(&pp->addr.addr);
+		pf_rtlabel_copyout(&pp->addr.addr);
+		break;
+	}
+
+	case DIOCCHANGEADDR: {
+		struct pfioc_pooladdr	*pca = (struct pfioc_pooladdr *)addr;
+		struct pf_pooladdr	*oldpa = NULL, *newpa = NULL;
+		struct pf_ruleset	*ruleset;
+
+		if (pca->action < PF_CHANGE_ADD_HEAD ||
+		    pca->action > PF_CHANGE_REMOVE) {
+			error = EINVAL;
+			break;
+		}
+		if (pca->addr.addr.type != PF_ADDR_ADDRMASK &&
+		    pca->addr.addr.type != PF_ADDR_DYNIFTL &&
+		    pca->addr.addr.type != PF_ADDR_TABLE) {
+			error = EINVAL;
+			break;
+		}
+
+		ruleset = pf_find_ruleset(pca->anchor);
+		if (ruleset == NULL) {
+			error = EBUSY;
+			break;
+		}
+		pool = pf_get_pool(pca->anchor, pca->ticket, pca->r_action,
+		    pca->r_num, pca->r_last, 1, 1);
+		if (pool == NULL) {
+			error = EBUSY;
+			break;
+		}
+		if (pca->action != PF_CHANGE_REMOVE) {
+			newpa = pool_get(&pf_pooladdr_pl, PR_NOWAIT);
+			if (newpa == NULL) {
+				error = ENOMEM;
+				break;
+			}
+			bcopy(&pca->addr, newpa, sizeof(struct pf_pooladdr));
+#ifndef INET
+			if (pca->af == AF_INET) {
+				pool_put(&pf_pooladdr_pl, newpa);
+				error = EAFNOSUPPORT;
+				break;
+			}
+#endif /* INET */
+#ifndef INET6
+			if (pca->af == AF_INET6) {
+				pool_put(&pf_pooladdr_pl, newpa);
+				error = EAFNOSUPPORT;
+				break;
+			}
+#endif /* INET6 */
+			if (newpa->ifname[0]) {
+				newpa->kif = pfi_kif_get(newpa->ifname);
+				if (newpa->kif == NULL) {
+					pool_put(&pf_pooladdr_pl, newpa);
+					error = EINVAL;
+					break;
+				}
+				pfi_kif_ref(newpa->kif, PFI_KIF_REF_RULE);
+			} else
+				newpa->kif = NULL;
+			if (pfi_dynaddr_setup(&newpa->addr, pca->af) ||
+			    pf_tbladdr_setup(ruleset, &newpa->addr)) {
+				pfi_dynaddr_remove(&newpa->addr);
+				pfi_kif_unref(newpa->kif, PFI_KIF_REF_RULE);
+				pool_put(&pf_pooladdr_pl, newpa);
+				error = EINVAL;
+				break;
+			}
+		}
+
+		if (pca->action == PF_CHANGE_ADD_HEAD)
+			oldpa = TAILQ_FIRST(&pool->list);
+		else if (pca->action == PF_CHANGE_ADD_TAIL)
+			oldpa = TAILQ_LAST(&pool->list, pf_palist);
+		else {
+			int	i = 0;
+
+			oldpa = TAILQ_FIRST(&pool->list);
+			while ((oldpa != NULL) && (i < pca->nr)) {
+				oldpa = TAILQ_NEXT(oldpa, entries);
+				i++;
+			}
+			if (oldpa == NULL) {
+				error = EINVAL;
+				break;
+			}
+		}
+
+		if (pca->action == PF_CHANGE_REMOVE) {
+			TAILQ_REMOVE(&pool->list, oldpa, entries);
+			pfi_dynaddr_remove(&oldpa->addr);
+			pf_tbladdr_remove(&oldpa->addr);
+			pfi_kif_unref(oldpa->kif, PFI_KIF_REF_RULE);
+			pool_put(&pf_pooladdr_pl, oldpa);
+		} else {
+			if (oldpa == NULL)
+				TAILQ_INSERT_TAIL(&pool->list, newpa, entries);
+			else if (pca->action == PF_CHANGE_ADD_HEAD ||
+			    pca->action == PF_CHANGE_ADD_BEFORE)
+				TAILQ_INSERT_BEFORE(oldpa, newpa, entries);
+			else
+				TAILQ_INSERT_AFTER(&pool->list, oldpa,
+				    newpa, entries);
+		}
+
+		pool->cur = TAILQ_FIRST(&pool->list);
+		PF_ACPY(&pool->counter, &pool->cur->addr.v.a.addr,
+		    pca->af);
+		break;
+	}
+
+	case DIOCGETRULESETS: {
+		struct pfioc_ruleset	*pr = (struct pfioc_ruleset *)addr;
+		struct pf_ruleset	*ruleset;
+		struct pf_anchor	*anchor;
+
+		pr->path[sizeof(pr->path) - 1] = 0;
+		if ((ruleset = pf_find_ruleset(pr->path)) == NULL) {
+			error = EINVAL;
+			break;
+		}
+		pr->nr = 0;
+		if (ruleset->anchor == NULL) {
+			/* XXX kludge for pf_main_ruleset */
+			RB_FOREACH(anchor, pf_anchor_global, &pf_anchors)
+				if (anchor->parent == NULL)
+					pr->nr++;
+		} else {
+			RB_FOREACH(anchor, pf_anchor_node,
+			    &ruleset->anchor->children)
+				pr->nr++;
+		}
+		break;
+	}
+
+	case DIOCGETRULESET: {
+		struct pfioc_ruleset	*pr = (struct pfioc_ruleset *)addr;
+		struct pf_ruleset	*ruleset;
+		struct pf_anchor	*anchor;
+		u_int32_t		 nr = 0;
+
+		pr->path[sizeof(pr->path) - 1] = 0;
+		if ((ruleset = pf_find_ruleset(pr->path)) == NULL) {
+			error = EINVAL;
+			break;
+		}
+		pr->name[0] = 0;
+		if (ruleset->anchor == NULL) {
+			/* XXX kludge for pf_main_ruleset */
+			RB_FOREACH(anchor, pf_anchor_global, &pf_anchors)
+				if (anchor->parent == NULL && nr++ == pr->nr) {
+					strlcpy(pr->name, anchor->name,
+					    sizeof(pr->name));
+					break;
+				}
+		} else {
+			RB_FOREACH(anchor, pf_anchor_node,
+			    &ruleset->anchor->children)
+				if (nr++ == pr->nr) {
+					strlcpy(pr->name, anchor->name,
+					    sizeof(pr->name));
+					break;
+				}
+		}
+		if (!pr->name[0])
+			error = EBUSY;
+		break;
+	}
+
+	case DIOCRCLRTABLES: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+
+		if (io->pfrio_esize != 0) {
+			error = ENODEV;
+			break;
+		}
+		error = pfr_clr_tables(&io->pfrio_table, &io->pfrio_ndel,
+		    io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		break;
+	}
+
+	case DIOCRADDTABLES: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+
+		if (io->pfrio_esize != sizeof(struct pfr_table)) {
+			error = ENODEV;
+			break;
+		}
+		error = pfr_add_tables(io->pfrio_buffer, io->pfrio_size,
+		    &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		break;
+	}
+
+	case DIOCRDELTABLES: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+
+		if (io->pfrio_esize != sizeof(struct pfr_table)) {
+			error = ENODEV;
+			break;
+		}
+		error = pfr_del_tables(io->pfrio_buffer, io->pfrio_size,
+		    &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		break;
+	}
+
+	case DIOCRGETTABLES: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+
+		if (io->pfrio_esize != sizeof(struct pfr_table)) {
+			error = ENODEV;
+			break;
+		}
+		error = pfr_get_tables(&io->pfrio_table, io->pfrio_buffer,
+		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		break;
+	}
+
+	case DIOCRGETTSTATS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+
+		if (io->pfrio_esize != sizeof(struct pfr_tstats)) {
+			error = ENODEV;
+			break;
+		}
+		error = pfr_get_tstats(&io->pfrio_table, io->pfrio_buffer,
+		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		break;
+	}
+
+	case DIOCRCLRTSTATS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+
+		if (io->pfrio_esize != sizeof(struct pfr_table)) {
+			error = ENODEV;
+			break;
+		}
+		error = pfr_clr_tstats(io->pfrio_buffer, io->pfrio_size,
+		    &io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		break;
+	}
+
+	case DIOCRSETTFLAGS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+
+		if (io->pfrio_esize != sizeof(struct pfr_table)) {
+			error = ENODEV;
+			break;
+		}
+		error = pfr_set_tflags(io->pfrio_buffer, io->pfrio_size,
+		    io->pfrio_setflag, io->pfrio_clrflag, &io->pfrio_nchange,
+		    &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		break;
+	}
+
+	case DIOCRCLRADDRS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+
+		if (io->pfrio_esize != 0) {
+			error = ENODEV;
+			break;
+		}
+		error = pfr_clr_addrs(&io->pfrio_table, &io->pfrio_ndel,
+		    io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		break;
+	}
+
+	case DIOCRADDADDRS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+
+		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
+			error = ENODEV;
+			break;
+		}
+		error = pfr_add_addrs(&io->pfrio_table, io->pfrio_buffer,
+		    io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags |
+		    PFR_FLAG_USERIOCTL);
+		break;
+	}
+
+	case DIOCRDELADDRS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+
+		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
+			error = ENODEV;
+			break;
+		}
+		error = pfr_del_addrs(&io->pfrio_table, io->pfrio_buffer,
+		    io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags |
+		    PFR_FLAG_USERIOCTL);
+		break;
+	}
+
+	case DIOCRSETADDRS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+
+		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
+			error = ENODEV;
+			break;
+		}
+		error = pfr_set_addrs(&io->pfrio_table, io->pfrio_buffer,
+		    io->pfrio_size, &io->pfrio_size2, &io->pfrio_nadd,
+		    &io->pfrio_ndel, &io->pfrio_nchange, io->pfrio_flags |
+		    PFR_FLAG_USERIOCTL, 0);
+		break;
+	}
+
+	case DIOCRGETADDRS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+
+		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
+			error = ENODEV;
+			break;
+		}
+		error = pfr_get_addrs(&io->pfrio_table, io->pfrio_buffer,
+		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		break;
+	}
+
+	case DIOCRGETASTATS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+
+		if (io->pfrio_esize != sizeof(struct pfr_astats)) {
+			error = ENODEV;
+			break;
+		}
+		error = pfr_get_astats(&io->pfrio_table, io->pfrio_buffer,
+		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		break;
+	}
+
+	case DIOCRCLRASTATS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+
+		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
+			error = ENODEV;
+			break;
+		}
+		error = pfr_clr_astats(&io->pfrio_table, io->pfrio_buffer,
+		    io->pfrio_size, &io->pfrio_nzero, io->pfrio_flags |
+		    PFR_FLAG_USERIOCTL);
+		break;
+	}
+
+	case DIOCRTSTADDRS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+
+		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
+			error = ENODEV;
+			break;
+		}
+		error = pfr_tst_addrs(&io->pfrio_table, io->pfrio_buffer,
+		    io->pfrio_size, &io->pfrio_nmatch, io->pfrio_flags |
+		    PFR_FLAG_USERIOCTL);
+		break;
+	}
+
+	case DIOCRINADEFINE: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+
+		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
+			error = ENODEV;
+			break;
+		}
+		error = pfr_ina_define(&io->pfrio_table, io->pfrio_buffer,
+		    io->pfrio_size, &io->pfrio_nadd, &io->pfrio_naddr,
+		    io->pfrio_ticket, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		break;
+	}
+
+	case DIOCOSFPADD: {
+		struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr;
+		error = pf_osfp_add(io);
+		break;
+	}
+
+	case DIOCOSFPGET: {
+		struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr;
+		error = pf_osfp_get(io);
+		break;
+	}
+
+	case DIOCXBEGIN: {
+		struct pfioc_trans	*io = (struct pfioc_trans *)addr;
+		struct pfioc_trans_e	*ioe;
+		struct pfr_table	*table;
+		int			 i;
+
+		if (io->esize != sizeof(*ioe)) {
+			error = ENODEV;
+			goto fail;
+		}
+#ifdef __FreeBSD__
+		PF_UNLOCK();
+#endif
+		ioe = (struct pfioc_trans_e *)malloc(sizeof(*ioe),
+		    M_TEMP, M_WAITOK);
+		table = (struct pfr_table *)malloc(sizeof(*table),
+		    M_TEMP, M_WAITOK);
+#ifdef __FreeBSD__
+		PF_LOCK();
+#endif
+		for (i = 0; i < io->size; i++) {
+#ifdef __FreeBSD__
+			PF_COPYIN(io->array+i, ioe, sizeof(*ioe), error);
+			if (error) {
+#else
+			if (copyin(io->array+i, ioe, sizeof(*ioe))) {
+#endif
+				free(table, M_TEMP);
+				free(ioe, M_TEMP);
+				error = EFAULT;
+				goto fail;
+			}
+			switch (ioe->rs_num) {
+#ifdef ALTQ
+			case PF_RULESET_ALTQ:
+				if (ioe->anchor[0]) {
+					free(table, M_TEMP);
+					free(ioe, M_TEMP);
+					error = EINVAL;
+					goto fail;
+				}
+				if ((error = pf_begin_altq(&ioe->ticket))) {
+					free(table, M_TEMP);
+					free(ioe, M_TEMP);
+					goto fail;
+				}
+				break;
+#endif /* ALTQ */
+			case PF_RULESET_TABLE:
+				bzero(table, sizeof(*table));
+				strlcpy(table->pfrt_anchor, ioe->anchor,
+				    sizeof(table->pfrt_anchor));
+				if ((error = pfr_ina_begin(table,
+				    &ioe->ticket, NULL, 0))) {
+					free(table, M_TEMP);
+					free(ioe, M_TEMP);
+					goto fail;
+				}
+				break;
+			default:
+				if ((error = pf_begin_rules(&ioe->ticket,
+				    ioe->rs_num, ioe->anchor))) {
+					free(table, M_TEMP);
+					free(ioe, M_TEMP);
+					goto fail;
+				}
+				break;
+			}
+#ifdef __FreeBSD__
+			PF_COPYOUT(ioe, io->array+i, sizeof(io->array[i]),
+			    error);
+			if (error) {
+#else
+			if (copyout(ioe, io->array+i, sizeof(io->array[i]))) {
+#endif
+				free(table, M_TEMP);
+				free(ioe, M_TEMP);
+				error = EFAULT;
+				goto fail;
+			}
+		}
+		free(table, M_TEMP);
+		free(ioe, M_TEMP);
+		break;
+	}
+
+	case DIOCXROLLBACK: {
+		struct pfioc_trans	*io = (struct pfioc_trans *)addr;
+		struct pfioc_trans_e	*ioe;
+		struct pfr_table	*table;
+		int			 i;
+
+		if (io->esize != sizeof(*ioe)) {
+			error = ENODEV;
+			goto fail;
+		}
+#ifdef __FreeBSD__
+		PF_UNLOCK();
+#endif
+		ioe = (struct pfioc_trans_e *)malloc(sizeof(*ioe),
+		    M_TEMP, M_WAITOK);
+		table = (struct pfr_table *)malloc(sizeof(*table),
+		    M_TEMP, M_WAITOK);
+#ifdef __FreeBSD__
+		PF_LOCK();
+#endif
+		for (i = 0; i < io->size; i++) {
+#ifdef __FreeBSD__
+			PF_COPYIN(io->array+i, ioe, sizeof(*ioe), error);
+			if (error) {
+#else
+			if (copyin(io->array+i, ioe, sizeof(*ioe))) {
+#endif
+				free(table, M_TEMP);
+				free(ioe, M_TEMP);
+				error = EFAULT;
+				goto fail;
+			}
+			switch (ioe->rs_num) {
+#ifdef ALTQ
+			case PF_RULESET_ALTQ:
+				if (ioe->anchor[0]) {
+					free(table, M_TEMP);
+					free(ioe, M_TEMP);
+					error = EINVAL;
+					goto fail;
+				}
+				if ((error = pf_rollback_altq(ioe->ticket))) {
+					free(table, M_TEMP);
+					free(ioe, M_TEMP);
+					goto fail; /* really bad */
+				}
+				break;
+#endif /* ALTQ */
+			case PF_RULESET_TABLE:
+				bzero(table, sizeof(*table));
+				strlcpy(table->pfrt_anchor, ioe->anchor,
+				    sizeof(table->pfrt_anchor));
+				if ((error = pfr_ina_rollback(table,
+				    ioe->ticket, NULL, 0))) {
+					free(table, M_TEMP);
+					free(ioe, M_TEMP);
+					goto fail; /* really bad */
+				}
+				break;
+			default:
+				if ((error = pf_rollback_rules(ioe->ticket,
+				    ioe->rs_num, ioe->anchor))) {
+					free(table, M_TEMP);
+					free(ioe, M_TEMP);
+					goto fail; /* really bad */
+				}
+				break;
+			}
+		}
+		free(table, M_TEMP);
+		free(ioe, M_TEMP);
+		break;
+	}
+
+	case DIOCXCOMMIT: {
+		struct pfioc_trans	*io = (struct pfioc_trans *)addr;
+		struct pfioc_trans_e	*ioe;
+		struct pfr_table	*table;
+		struct pf_ruleset	*rs;
+		int			 i;
+
+		if (io->esize != sizeof(*ioe)) {
+			error = ENODEV;
+			goto fail;
+		}
+#ifdef __FreeBSD__
+		PF_UNLOCK();
+#endif
+		ioe = (struct pfioc_trans_e *)malloc(sizeof(*ioe),
+		    M_TEMP, M_WAITOK);
+		table = (struct pfr_table *)malloc(sizeof(*table),
+		    M_TEMP, M_WAITOK);
+#ifdef __FreeBSD__
+		PF_LOCK();
+#endif
+		/* first makes sure everything will succeed */
+		for (i = 0; i < io->size; i++) {
+#ifdef __FreeBSD__
+			PF_COPYIN(io->array+i, ioe, sizeof(*ioe), error);
+			if (error) {
+#else
+			if (copyin(io->array+i, ioe, sizeof(*ioe))) {
+#endif
+				free(table, M_TEMP);
+				free(ioe, M_TEMP);
+				error = EFAULT;
+				goto fail;
+			}
+			switch (ioe->rs_num) {
+#ifdef ALTQ
+			case PF_RULESET_ALTQ:
+				if (ioe->anchor[0]) {
+					free(table, M_TEMP);
+					free(ioe, M_TEMP);
+					error = EINVAL;
+					goto fail;
+				}
+				if (!altqs_inactive_open || ioe->ticket !=
+				    ticket_altqs_inactive) {
+					free(table, M_TEMP);
+					free(ioe, M_TEMP);
+					error = EBUSY;
+					goto fail;
+				}
+				break;
+#endif /* ALTQ */
+			case PF_RULESET_TABLE:
+				rs = pf_find_ruleset(ioe->anchor);
+				if (rs == NULL || !rs->topen || ioe->ticket !=
+				     rs->tticket) {
+					free(table, M_TEMP);
+					free(ioe, M_TEMP);
+					error = EBUSY;
+					goto fail;
+				}
+				break;
+			default:
+				if (ioe->rs_num < 0 || ioe->rs_num >=
+				    PF_RULESET_MAX) {
+					free(table, M_TEMP);
+					free(ioe, M_TEMP);
+					error = EINVAL;
+					goto fail;
+				}
+				rs = pf_find_ruleset(ioe->anchor);
+				if (rs == NULL ||
+				    !rs->rules[ioe->rs_num].inactive.open ||
+				    rs->rules[ioe->rs_num].inactive.ticket !=
+				    ioe->ticket) {
+					free(table, M_TEMP);
+					free(ioe, M_TEMP);
+					error = EBUSY;
+					goto fail;
+				}
+				break;
+			}
+		}
+		/* now do the commit - no errors should happen here */
+		for (i = 0; i < io->size; i++) {
+#ifdef __FreeBSD__
+			PF_COPYIN(io->array+i, ioe, sizeof(*ioe), error);
+			if (error) {
+#else
+			if (copyin(io->array+i, ioe, sizeof(*ioe))) {
+#endif
+				free(table, M_TEMP);
+				free(ioe, M_TEMP);
+				error = EFAULT;
+				goto fail;
+			}
+			switch (ioe->rs_num) {
+#ifdef ALTQ
+			case PF_RULESET_ALTQ:
+				if ((error = pf_commit_altq(ioe->ticket))) {
+					free(table, M_TEMP);
+					free(ioe, M_TEMP);
+					goto fail; /* really bad */
+				}
+				break;
+#endif /* ALTQ */
+			case PF_RULESET_TABLE:
+				bzero(table, sizeof(*table));
+				strlcpy(table->pfrt_anchor, ioe->anchor,
+				    sizeof(table->pfrt_anchor));
+				if ((error = pfr_ina_commit(table, ioe->ticket,
+				    NULL, NULL, 0))) {
+					free(table, M_TEMP);
+					free(ioe, M_TEMP);
+					goto fail; /* really bad */
+				}
+				break;
+			default:
+				if ((error = pf_commit_rules(ioe->ticket,
+				    ioe->rs_num, ioe->anchor))) {
+					free(table, M_TEMP);
+					free(ioe, M_TEMP);
+					goto fail; /* really bad */
+				}
+				break;
+			}
+		}
+		free(table, M_TEMP);
+		free(ioe, M_TEMP);
+		break;
+	}
+
+	case DIOCGETSRCNODES: {
+		struct pfioc_src_nodes	*psn = (struct pfioc_src_nodes *)addr;
+		struct pf_src_node	*n, *p, *pstore;
+		u_int32_t		 nr = 0;
+		int			 space = psn->psn_len;
+
+		if (space == 0) {
+			RB_FOREACH(n, pf_src_tree, &tree_src_tracking)
+				nr++;
+			psn->psn_len = sizeof(struct pf_src_node) * nr;
+			break;
+		}
+
+#ifdef __FreeBSD__
+		PF_UNLOCK();
+#endif
+		pstore = malloc(sizeof(*pstore), M_TEMP, M_WAITOK);
+#ifdef __FreeBSD__
+		PF_LOCK();
+#endif
+
+		p = psn->psn_src_nodes;
+		RB_FOREACH(n, pf_src_tree, &tree_src_tracking) {
+			int	secs = time_second, diff;
+
+			if ((nr + 1) * sizeof(*p) > (unsigned)psn->psn_len)
+				break;
+
+			bcopy(n, pstore, sizeof(*pstore));
+			if (n->rule.ptr != NULL)
+				pstore->rule.nr = n->rule.ptr->nr;
+			pstore->creation = secs - pstore->creation;
+			if (pstore->expire > secs)
+				pstore->expire -= secs;
+			else
+				pstore->expire = 0;
+
+			/* adjust the connection rate estimate */
+			diff = secs - n->conn_rate.last;
+			if (diff >= n->conn_rate.seconds)
+				pstore->conn_rate.count = 0;
+			else
+				pstore->conn_rate.count -=
+				    n->conn_rate.count * diff /
+				    n->conn_rate.seconds;
+
+#ifdef __FreeBSD__
+			PF_COPYOUT(pstore, p, sizeof(*p), error);
+#else
+			error = copyout(pstore, p, sizeof(*p));
+#endif
+			if (error) {
+				free(pstore, M_TEMP);
+				goto fail;
+			}
+			p++;
+			nr++;
+		}
+		psn->psn_len = sizeof(struct pf_src_node) * nr;
+
+		free(pstore, M_TEMP);
+		break;
+	}
+
+	case DIOCCLRSRCNODES: {
+		struct pf_src_node	*n;
+		struct pf_state		*state;
+
+		RB_FOREACH(state, pf_state_tree_id, &tree_id) {
+			state->src_node = NULL;
+			state->nat_src_node = NULL;
+		}
+		RB_FOREACH(n, pf_src_tree, &tree_src_tracking) {
+			n->expire = 1;
+			n->states = 0;
+		}
+		pf_purge_expired_src_nodes(1);
+		pf_status.src_nodes = 0;
+		break;
+	}
+
+	case DIOCKILLSRCNODES: {
+		struct pf_src_node	*sn;
+		struct pf_state		*s;
+		struct pfioc_src_node_kill *psnk = \
+			(struct pfioc_src_node_kill *) addr;
+		int			killed = 0;
+
+		RB_FOREACH(sn, pf_src_tree, &tree_src_tracking) {
+        		if (PF_MATCHA(psnk->psnk_src.neg, \
+				      &psnk->psnk_src.addr.v.a.addr, \
+				      &psnk->psnk_src.addr.v.a.mask, \
+				      &sn->addr, sn->af) &&
+			    PF_MATCHA(psnk->psnk_dst.neg, \
+				      &psnk->psnk_dst.addr.v.a.addr, \
+				      &psnk->psnk_dst.addr.v.a.mask, \
+				      &sn->raddr, sn->af)) {
+				/* Handle state to src_node linkage */
+				if (sn->states != 0) {
+					RB_FOREACH(s, pf_state_tree_id, 
+					    &tree_id) {
+						if (s->src_node == sn)
+							s->src_node = NULL;
+						if (s->nat_src_node == sn)
+							s->nat_src_node = NULL;
+					}
+					sn->states = 0;
+				}
+				sn->expire = 1;
+				killed++;
+			}
+		}
+
+		if (killed > 0)
+			pf_purge_expired_src_nodes(1);
+
+		psnk->psnk_af = killed;
+		break;
+	}
+
+	case DIOCSETHOSTID: {
+		u_int32_t	*hostid = (u_int32_t *)addr;
+
+		if (*hostid == 0)
+			pf_status.hostid = arc4random();
+		else
+			pf_status.hostid = *hostid;
+		break;
+	}
+
+	case DIOCOSFPFLUSH:
+		pf_osfp_flush();
+		break;
+
+	case DIOCIGETIFACES: {
+		struct pfioc_iface *io = (struct pfioc_iface *)addr;
+
+		if (io->pfiio_esize != sizeof(struct pfi_kif)) {
+			error = ENODEV;
+			break;
+		}
+		error = pfi_get_ifaces(io->pfiio_name, io->pfiio_buffer,
+		    &io->pfiio_size);
+		break;
+	}
+
+	case DIOCSETIFFLAG: {
+		struct pfioc_iface *io = (struct pfioc_iface *)addr;
+
+		error = pfi_set_flags(io->pfiio_name, io->pfiio_flags);
+		break;
+	}
+
+	case DIOCCLRIFFLAG: {
+		struct pfioc_iface *io = (struct pfioc_iface *)addr;
+
+		error = pfi_clear_flags(io->pfiio_name, io->pfiio_flags);
+		break;
+	}
+
+	default:
+		error = ENODEV;
+		break;
+	}
+fail:
+#ifdef __FreeBSD__
+	PF_UNLOCK();
+
+	if (flags & FWRITE)
+		sx_xunlock(&pf_consistency_lock);
+	else
+		sx_sunlock(&pf_consistency_lock);
+#else
+	splx(s);
+	/* XXX: Lock order? */
+	if (flags & FWRITE)
+		rw_exit_write(&pf_consistency_lock);
+	else
+		rw_exit_read(&pf_consistency_lock);
+#endif
+	return (error);
+}
+
+#ifdef __FreeBSD__
+/*
+ * XXX - Check for version missmatch!!!
+ */
+static void
+pf_clear_states(void)
+{
+	struct pf_state		*state;
+
+	RB_FOREACH(state, pf_state_tree_id, &tree_id) {
+		state->timeout = PFTM_PURGE;
+#if NPFSYNC
+		/* don't send out individual delete messages */
+		state->sync_flags = PFSTATE_NOSYNC;
+#endif
+		pf_unlink_state(state);
+	}
+
+#if 0 /* NPFSYNC */
+/*
+ * XXX This is called on module unload, we do not want to sync that over? */
+ */
+	pfsync_clear_states(pf_status.hostid, psk->psk_ifname);
+#endif
+}
+
+static int
+pf_clear_tables(void)
+{
+	struct pfioc_table io;
+	int error;
+
+	bzero(&io, sizeof(io));
+
+	error = pfr_clr_tables(&io.pfrio_table, &io.pfrio_ndel,
+	    io.pfrio_flags);
+
+	return (error);
+}
+
+static void
+pf_clear_srcnodes(void)
+{
+	struct pf_src_node	*n;
+	struct pf_state		*state;
+
+	RB_FOREACH(state, pf_state_tree_id, &tree_id) {
+		state->src_node = NULL;
+		state->nat_src_node = NULL;
+	}
+	RB_FOREACH(n, pf_src_tree, &tree_src_tracking) {
+		n->expire = 1;
+		n->states = 0;
+	}
+}
+/*
+ * XXX - Check for version missmatch!!!
+ */
+
+/*
+ * Duplicate pfctl -Fa operation to get rid of as much as we can.
+ */
+static int
+shutdown_pf(void)
+{
+	int error = 0;
+	u_int32_t t[5];
+	char nn = '\0';
+
+	pf_status.running = 0;
+	do {
+		if ((error = pf_begin_rules(&t[0], PF_RULESET_SCRUB, &nn))
+		    != 0) {
+			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: SCRUB\n"));
+			break;
+		}
+		if ((error = pf_begin_rules(&t[1], PF_RULESET_FILTER, &nn))
+		    != 0) {
+			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: FILTER\n"));
+			break;		/* XXX: rollback? */
+		}
+		if ((error = pf_begin_rules(&t[2], PF_RULESET_NAT, &nn))
+		    != 0) {
+			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: NAT\n"));
+			break;		/* XXX: rollback? */
+		}
+		if ((error = pf_begin_rules(&t[3], PF_RULESET_BINAT, &nn))
+		    != 0) {
+			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: BINAT\n"));
+			break;		/* XXX: rollback? */
+		}
+		if ((error = pf_begin_rules(&t[4], PF_RULESET_RDR, &nn))
+		    != 0) {
+			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: RDR\n"));
+			break;		/* XXX: rollback? */
+		}
+
+		/* XXX: these should always succeed here */
+		pf_commit_rules(t[0], PF_RULESET_SCRUB, &nn);
+		pf_commit_rules(t[1], PF_RULESET_FILTER, &nn);
+		pf_commit_rules(t[2], PF_RULESET_NAT, &nn);
+		pf_commit_rules(t[3], PF_RULESET_BINAT, &nn);
+		pf_commit_rules(t[4], PF_RULESET_RDR, &nn);
+
+		if ((error = pf_clear_tables()) != 0)
+			break;
+
+#ifdef ALTQ
+		if ((error = pf_begin_altq(&t[0])) != 0) {
+			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: ALTQ\n"));
+			break;
+		}
+		pf_commit_altq(t[0]);
+#endif
+
+		pf_clear_states();
+
+		pf_clear_srcnodes();
+
+		/* status does not use malloced mem so no need to cleanup */
+		/* fingerprints and interfaces have thier own cleanup code */
+	} while(0);
+
+        return (error);
+}
+
+static int
+pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
+    struct inpcb *inp)
+{
+	/*
+	 * XXX Wed Jul 9 22:03:16 2003 UTC
+	 * OpenBSD has changed its byte ordering convention on ip_len/ip_off
+	 * in network stack. OpenBSD's network stack have converted
+	 * ip_len/ip_off to host byte order frist as FreeBSD.
+	 * Now this is not true anymore , so we should convert back to network
+	 * byte order. 
+	 */
+	struct ip *h = NULL;
+	int chk;
+
+	if ((*m)->m_pkthdr.len >= (int)sizeof(struct ip)) {
+		/* if m_pkthdr.len is less than ip header, pf will handle. */
+		h = mtod(*m, struct ip *);
+	        HTONS(h->ip_len);
+	        HTONS(h->ip_off);
+	}
+	chk = pf_test(PF_IN, ifp, m, NULL, inp);
+	if (chk && *m) {
+		m_freem(*m);
+		*m = NULL;
+	}
+	if (*m != NULL) {
+		/* pf_test can change ip header location */
+		h = mtod(*m, struct ip *);
+		NTOHS(h->ip_len);
+		NTOHS(h->ip_off);
+	}
+	return chk;
+}
+
+static int
+pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
+    struct inpcb *inp)
+{
+	/*
+	 * XXX Wed Jul 9 22:03:16 2003 UTC
+	 * OpenBSD has changed its byte ordering convention on ip_len/ip_off
+	 * in network stack. OpenBSD's network stack have converted
+	 * ip_len/ip_off to host byte order frist as FreeBSD.
+	 * Now this is not true anymore , so we should convert back to network
+	 * byte order. 
+	 */
+	struct ip *h = NULL;
+	int chk;
+
+	/* We need a proper CSUM befor we start (s. OpenBSD ip_output) */
+	if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+		in_delayed_cksum(*m);
+		(*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+	}
+	if ((*m)->m_pkthdr.len >= (int)sizeof(*h)) {
+		/* if m_pkthdr.len is less than ip header, pf will handle. */
+		h = mtod(*m, struct ip *);
+	        HTONS(h->ip_len);
+	        HTONS(h->ip_off);
+	}
+	chk = pf_test(PF_OUT, ifp, m, NULL, inp);
+	if (chk && *m) {
+		m_freem(*m);
+		*m = NULL;
+	}
+	if (*m != NULL) {
+		/* pf_test can change ip header location */
+		h = mtod(*m, struct ip *);
+		NTOHS(h->ip_len);
+		NTOHS(h->ip_off);
+	}
+	return chk;
+}
+
+#ifdef INET6
+static int
+pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
+    struct inpcb *inp)
+{
+
+	/*
+	 * IPv6 is not affected by ip_len/ip_off byte order changes.
+	 */
+	int chk;
+
+	/*
+	 * In case of loopback traffic IPv6 uses the real interface in
+	 * order to support scoped addresses. In order to support stateful
+	 * filtering we have change this to lo0 as it is the case in IPv4.
+	 */
+	chk = pf_test6(PF_IN, (*m)->m_flags & M_LOOP ? V_loif : ifp, m,
+	    NULL, inp);
+	if (chk && *m) {
+		m_freem(*m);
+		*m = NULL;
+	}
+	return chk;
+}
+
+static int
+pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
+    struct inpcb *inp)
+{
+	/*
+	 * IPv6 does not affected ip_len/ip_off byte order changes.
+	 */
+	int chk;
+
+	/* We need a proper CSUM befor we start (s. OpenBSD ip_output) */
+	if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+		in_delayed_cksum(*m);
+		(*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+	}
+	chk = pf_test6(PF_OUT, ifp, m, NULL, inp);
+	if (chk && *m) {
+		m_freem(*m);
+		*m = NULL;
+	}
+	return chk;
+}
+#endif /* INET6 */
+
+static int
+hook_pf(void)
+{
+	struct pfil_head *pfh_inet;
+#ifdef INET6
+	struct pfil_head *pfh_inet6;
+#endif
+	
+	PF_ASSERT(MA_NOTOWNED);
+
+	if (pf_pfil_hooked)
+		return (0); 
+	
+	pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET);
+	if (pfh_inet == NULL)
+		return (ESRCH); /* XXX */
+	pfil_add_hook(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet);
+	pfil_add_hook(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet);
+#ifdef INET6
+	pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6);
+	if (pfh_inet6 == NULL) {
+		pfil_remove_hook(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK,
+		    pfh_inet);
+		pfil_remove_hook(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK,
+		    pfh_inet);
+		return (ESRCH); /* XXX */
+	}
+	pfil_add_hook(pf_check6_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet6);
+	pfil_add_hook(pf_check6_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet6);
+#endif
+
+	pf_pfil_hooked = 1;
+	return (0);
+}
+
+static int
+dehook_pf(void)
+{
+	struct pfil_head *pfh_inet;
+#ifdef INET6
+	struct pfil_head *pfh_inet6;
+#endif
+
+	PF_ASSERT(MA_NOTOWNED);
+
+	if (pf_pfil_hooked == 0)
+		return (0);
+
+	pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET);
+	if (pfh_inet == NULL)
+		return (ESRCH); /* XXX */
+	pfil_remove_hook(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK,
+	    pfh_inet);
+	pfil_remove_hook(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK,
+	    pfh_inet);
+#ifdef INET6
+	pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6);
+	if (pfh_inet6 == NULL)
+		return (ESRCH); /* XXX */
+	pfil_remove_hook(pf_check6_in, NULL, PFIL_IN | PFIL_WAITOK,
+	    pfh_inet6);
+	pfil_remove_hook(pf_check6_out, NULL, PFIL_OUT | PFIL_WAITOK,
+	    pfh_inet6);
+#endif
+
+	pf_pfil_hooked = 0;
+	return (0);
+}
+
+static int
+pf_load(void)
+{
+	init_zone_var();
+	init_pf_mutex();
+	pf_dev = make_dev(&pf_cdevsw, 0, 0, 0, 0600, PF_NAME);
+	if (pfattach() < 0) {
+		destroy_dev(pf_dev);
+		destroy_pf_mutex();
+		return (ENOMEM);
+	}
+	return (0);
+}
+
+static int
+pf_unload(void)
+{
+	int error = 0;
+
+	PF_LOCK();
+	pf_status.running = 0;
+	PF_UNLOCK();
+	error = dehook_pf();
+	if (error) {
+		/*
+		 * Should not happen!
+		 * XXX Due to error code ESRCH, kldunload will show
+		 * a message like 'No such process'.
+		 */
+		printf("%s : pfil unregisteration fail\n", __FUNCTION__);
+		return error;
+	}
+	PF_LOCK();
+	shutdown_pf();
+	pf_end_threads = 1;
+	while (pf_end_threads < 2) {
+		wakeup_one(pf_purge_thread);
+		msleep(pf_purge_thread, &pf_task_mtx, 0, "pftmo", hz);
+	}
+	pfi_cleanup();
+	pf_osfp_flush();
+	pf_osfp_cleanup();
+	cleanup_pf_zone();
+	PF_UNLOCK();
+	destroy_dev(pf_dev);
+	destroy_pf_mutex();
+	return error;
+}
+
+static int
+pf_modevent(module_t mod, int type, void *data)
+{
+	int error = 0;
+
+	switch(type) {
+	case MOD_LOAD:
+		error = pf_load();
+		break;
+
+	case MOD_UNLOAD:
+		error = pf_unload();
+		break;
+	default:
+		error = EINVAL;
+		break;
+	}
+	return error;
+}
+
+static moduledata_t pf_mod = {
+	"pf",
+	pf_modevent,
+	0
+};
+
+DECLARE_MODULE(pf, pf_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST);
+MODULE_VERSION(pf, PF_MODVER);
+#endif	/* __FreeBSD__ */
diff --git a/freebsd/sys/contrib/pf/net/pf_mtag.h b/freebsd/sys/contrib/pf/net/pf_mtag.h
new file mode 100644
index 00000000..09aeb25c
--- /dev/null
+++ b/freebsd/sys/contrib/pf/net/pf_mtag.h
@@ -0,0 +1,82 @@
+/*	$FreeBSD$	*/
+/*
+ * Copyright (c) 2001 Daniel Hartmeier
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    - Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    - Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _NET_PF_MTAG_HH_
+#define _NET_PF_MTAG_HH_
+
+#ifdef _KERNEL
+
+#define	PF_TAG_GENERATED		0x01
+#define	PF_TAG_FRAGCACHE		0x02
+#define	PF_TAG_TRANSLATE_LOCALHOST	0x04
+
+struct pf_mtag {
+	void		*hdr;		/* saved hdr pos in mbuf, for ECN */
+	u_int		 rtableid;	/* alternate routing table id */
+	u_int32_t	 qid;		/* queue id */
+	u_int16_t	 tag;		/* tag id */
+	u_int8_t	 flags;
+	u_int8_t	 routed;
+	sa_family_t	 af;		/* for ECN */
+};
+
+static __inline struct pf_mtag *pf_find_mtag(struct mbuf *);
+static __inline struct pf_mtag *pf_get_mtag(struct mbuf *);
+
+static __inline struct pf_mtag *
+pf_find_mtag(struct mbuf *m)
+{
+	struct m_tag	*mtag;
+
+	if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) == NULL)
+		return (NULL);
+
+	return ((struct pf_mtag *)(mtag + 1));
+}
+
+static __inline struct pf_mtag *
+pf_get_mtag(struct mbuf *m)
+{
+	struct m_tag	*mtag;
+
+	if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) == NULL) {
+		mtag = m_tag_get(PACKET_TAG_PF, sizeof(struct pf_mtag),
+		    M_NOWAIT);
+		if (mtag == NULL)
+			return (NULL);
+		bzero(mtag + 1, sizeof(struct pf_mtag));
+		m_tag_prepend(m, mtag);
+	}
+
+	return ((struct pf_mtag *)(mtag + 1));
+}
+#endif /* _KERNEL */
+#endif /* _NET_PF_MTAG_HH_ */
diff --git a/freebsd/sys/contrib/pf/net/pf_norm.c b/freebsd/sys/contrib/pf/net/pf_norm.c
new file mode 100644
index 00000000..ea68ab50
--- /dev/null
+++ b/freebsd/sys/contrib/pf/net/pf_norm.c
@@ -0,0 +1,2062 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*	$OpenBSD: pf_norm.c,v 1.107 2006/04/16 00:59:52 pascoe Exp $ */
+
+/*
+ * Copyright 2001 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef __FreeBSD__
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_pf.h>
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifdef DEV_PFLOG
+#define	NPFLOG DEV_PFLOG
+#else
+#define	NPFLOG 0
+#endif
+#else
+#include <freebsd/local/pflog.h>
+#endif
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/filio.h>
+#include <freebsd/sys/fcntl.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/time.h>
+#ifndef __FreeBSD__
+#include <freebsd/sys/pool.h>
+
+#include <freebsd/dev/rndvar.h>
+#endif
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/bpf.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/if_pflog.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/tcp.h>
+#include <freebsd/netinet/tcp_seq.h>
+#include <freebsd/netinet/udp.h>
+#include <freebsd/netinet/ip_icmp.h>
+
+#ifdef INET6
+#include <freebsd/netinet/ip6.h>
+#endif /* INET6 */
+
+#include <freebsd/net/pfvar.h>
+
+#ifndef __FreeBSD__
+#include <freebsd/inttypes.h>
+
+struct pf_frent {
+	LIST_ENTRY(pf_frent) fr_next;
+	struct ip *fr_ip;
+	struct mbuf *fr_m;
+};
+
+struct pf_frcache {
+	LIST_ENTRY(pf_frcache) fr_next;
+	uint16_t	fr_off;
+	uint16_t	fr_end;
+};
+#endif
+
+#define PFFRAG_SEENLAST	0x0001		/* Seen the last fragment for this */
+#define PFFRAG_NOBUFFER	0x0002		/* Non-buffering fragment cache */
+#define PFFRAG_DROP	0x0004		/* Drop all fragments */
+#define BUFFER_FRAGMENTS(fr)	(!((fr)->fr_flags & PFFRAG_NOBUFFER))
+
+#ifndef __FreeBSD__
+struct pf_fragment {
+	RB_ENTRY(pf_fragment) fr_entry;
+	TAILQ_ENTRY(pf_fragment) frag_next;
+	struct in_addr	fr_src;
+	struct in_addr	fr_dst;
+	u_int8_t	fr_p;		/* protocol of this fragment */
+	u_int8_t	fr_flags;	/* status flags */
+	u_int16_t	fr_id;		/* fragment id for reassemble */
+	u_int16_t	fr_max;		/* fragment data max */
+	u_int32_t	fr_timeout;
+#define fr_queue	fr_u.fru_queue
+#define fr_cache	fr_u.fru_cache
+	union {
+		LIST_HEAD(pf_fragq, pf_frent) fru_queue;	/* buffering */
+		LIST_HEAD(pf_cacheq, pf_frcache) fru_cache;	/* non-buf */
+	} fr_u;
+};
+#endif
+
+TAILQ_HEAD(pf_fragqueue, pf_fragment)	pf_fragqueue;
+TAILQ_HEAD(pf_cachequeue, pf_fragment)	pf_cachequeue;
+
+#ifndef __FreeBSD__
+static __inline int	 pf_frag_compare(struct pf_fragment *,
+			    struct pf_fragment *);
+#else
+static int	 pf_frag_compare(struct pf_fragment *,
+			    struct pf_fragment *);
+#endif
+RB_HEAD(pf_frag_tree, pf_fragment)	pf_frag_tree, pf_cache_tree;
+RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
+RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
+
+/* Private prototypes */
+void			 pf_ip2key(struct pf_fragment *, struct ip *);
+void			 pf_remove_fragment(struct pf_fragment *);
+void			 pf_flush_fragments(void);
+void			 pf_free_fragment(struct pf_fragment *);
+struct pf_fragment	*pf_find_fragment(struct ip *, struct pf_frag_tree *);
+struct mbuf		*pf_reassemble(struct mbuf **, struct pf_fragment **,
+			    struct pf_frent *, int);
+struct mbuf		*pf_fragcache(struct mbuf **, struct ip*,
+			    struct pf_fragment **, int, int, int *);
+int			 pf_normalize_tcpopt(struct pf_rule *, struct mbuf *,
+			    struct tcphdr *, int);
+
+#define	DPFPRINTF(x) do {				\
+	if (pf_status.debug >= PF_DEBUG_MISC) {		\
+		printf("%s: ", __func__);		\
+		printf x ;				\
+	}						\
+} while(0)
+
+/* Globals */
+#ifdef __FreeBSD__
+uma_zone_t		 pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl;
+uma_zone_t		 pf_state_scrub_pl;
+#else
+struct pool		 pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl;
+struct pool		 pf_state_scrub_pl;
+#endif
+int			 pf_nfrents, pf_ncache;
+
+void
+pf_normalize_init(void)
+{
+#ifdef __FreeBSD__
+	/*
+	 * XXX
+	 * No high water mark support(It's hint not hard limit).
+	 * uma_zone_set_max(pf_frag_pl, PFFRAG_FRAG_HIWAT);
+	 */
+	uma_zone_set_max(pf_frent_pl, PFFRAG_FRENT_HIWAT);
+	uma_zone_set_max(pf_cache_pl, PFFRAG_FRCACHE_HIWAT);
+	uma_zone_set_max(pf_cent_pl, PFFRAG_FRCENT_HIWAT);
+#else
+	pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent",
+	    NULL);
+	pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag",
+	    NULL);
+	pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0,
+	    "pffrcache", NULL);
+	pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent",
+	    NULL);
+	pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0,
+	    "pfstscr", NULL);
+
+	pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT);
+	pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0);
+	pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0);
+	pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0);
+#endif
+
+	TAILQ_INIT(&pf_fragqueue);
+	TAILQ_INIT(&pf_cachequeue);
+}
+
+#ifdef __FreeBSD__
+static int
+#else
+static __inline int
+#endif
+pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
+{
+	int	diff;
+
+	if ((diff = a->fr_id - b->fr_id))
+		return (diff);
+	else if ((diff = a->fr_p - b->fr_p))
+		return (diff);
+	else if (a->fr_src.s_addr < b->fr_src.s_addr)
+		return (-1);
+	else if (a->fr_src.s_addr > b->fr_src.s_addr)
+		return (1);
+	else if (a->fr_dst.s_addr < b->fr_dst.s_addr)
+		return (-1);
+	else if (a->fr_dst.s_addr > b->fr_dst.s_addr)
+		return (1);
+	return (0);
+}
+
+void
+pf_purge_expired_fragments(void)
+{
+	struct pf_fragment	*frag;
+	u_int32_t		 expire = time_second -
+				    pf_default_rule.timeout[PFTM_FRAG];
+
+	while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
+#ifdef __FreeBSD__
+		KASSERT((BUFFER_FRAGMENTS(frag)),
+			("BUFFER_FRAGMENTS(frag) == 0: %s", __FUNCTION__));
+#else
+		KASSERT(BUFFER_FRAGMENTS(frag));
+#endif
+		if (frag->fr_timeout > expire)
+			break;
+
+		DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
+		pf_free_fragment(frag);
+	}
+
+	while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) {
+#ifdef __FreeBSD__
+		KASSERT((!BUFFER_FRAGMENTS(frag)),
+			("BUFFER_FRAGMENTS(frag) != 0: %s", __FUNCTION__));
+#else
+		KASSERT(!BUFFER_FRAGMENTS(frag));
+#endif
+		if (frag->fr_timeout > expire)
+			break;
+
+		DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
+		pf_free_fragment(frag);
+#ifdef __FreeBSD__
+		KASSERT((TAILQ_EMPTY(&pf_cachequeue) ||
+		    TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag),
+		    ("!(TAILQ_EMPTY() || TAILQ_LAST() == farg): %s",
+		    __FUNCTION__));
+#else
+		KASSERT(TAILQ_EMPTY(&pf_cachequeue) ||
+		    TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag);
+#endif
+	}
+}
+
+/*
+ * Try to flush old fragments to make space for new ones
+ */
+
+void
+pf_flush_fragments(void)
+{
+	struct pf_fragment	*frag;
+	int			 goal;
+
+	goal = pf_nfrents * 9 / 10;
+	DPFPRINTF(("trying to free > %d frents\n",
+	    pf_nfrents - goal));
+	while (goal < pf_nfrents) {
+		frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue);
+		if (frag == NULL)
+			break;
+		pf_free_fragment(frag);
+	}
+
+
+	goal = pf_ncache * 9 / 10;
+	DPFPRINTF(("trying to free > %d cache entries\n",
+	    pf_ncache - goal));
+	while (goal < pf_ncache) {
+		frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue);
+		if (frag == NULL)
+			break;
+		pf_free_fragment(frag);
+	}
+}
+
+/* Frees the fragments and all associated entries */
+
+void
+pf_free_fragment(struct pf_fragment *frag)
+{
+	struct pf_frent		*frent;
+	struct pf_frcache	*frcache;
+
+	/* Free all fragments */
+	if (BUFFER_FRAGMENTS(frag)) {
+		for (frent = LIST_FIRST(&frag->fr_queue); frent;
+		    frent = LIST_FIRST(&frag->fr_queue)) {
+			LIST_REMOVE(frent, fr_next);
+
+			m_freem(frent->fr_m);
+			pool_put(&pf_frent_pl, frent);
+			pf_nfrents--;
+		}
+	} else {
+		for (frcache = LIST_FIRST(&frag->fr_cache); frcache;
+		    frcache = LIST_FIRST(&frag->fr_cache)) {
+			LIST_REMOVE(frcache, fr_next);
+
+#ifdef __FreeBSD__
+			KASSERT((LIST_EMPTY(&frag->fr_cache) ||
+			    LIST_FIRST(&frag->fr_cache)->fr_off >
+			    frcache->fr_end),
+			    ("! (LIST_EMPTY() || LIST_FIRST()->fr_off >"
+                             " frcache->fr_end): %s", __FUNCTION__));
+#else
+			KASSERT(LIST_EMPTY(&frag->fr_cache) ||
+			    LIST_FIRST(&frag->fr_cache)->fr_off >
+			    frcache->fr_end);
+#endif
+
+			pool_put(&pf_cent_pl, frcache);
+			pf_ncache--;
+		}
+	}
+
+	pf_remove_fragment(frag);
+}
+
+void
+pf_ip2key(struct pf_fragment *key, struct ip *ip)
+{
+	key->fr_p = ip->ip_p;
+	key->fr_id = ip->ip_id;
+	key->fr_src.s_addr = ip->ip_src.s_addr;
+	key->fr_dst.s_addr = ip->ip_dst.s_addr;
+}
+
+struct pf_fragment *
+pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree)
+{
+	struct pf_fragment	 key;
+	struct pf_fragment	*frag;
+
+	pf_ip2key(&key, ip);
+
+	frag = RB_FIND(pf_frag_tree, tree, &key);
+	if (frag != NULL) {
+		/* XXX Are we sure we want to update the timeout? */
+		frag->fr_timeout = time_second;
+		if (BUFFER_FRAGMENTS(frag)) {
+			TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
+			TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
+		} else {
+			TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
+			TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next);
+		}
+	}
+
+	return (frag);
+}
+
+/* Removes a fragment from the fragment queue and frees the fragment */
+
+void
+pf_remove_fragment(struct pf_fragment *frag)
+{
+	if (BUFFER_FRAGMENTS(frag)) {
+		RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag);
+		TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
+		pool_put(&pf_frag_pl, frag);
+	} else {
+		RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag);
+		TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
+		pool_put(&pf_cache_pl, frag);
+	}
+}
+
+#define FR_IP_OFF(fr)	((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
+struct mbuf *
+pf_reassemble(struct mbuf **m0, struct pf_fragment **frag,
+    struct pf_frent *frent, int mff)
+{
+	struct mbuf	*m = *m0, *m2;
+	struct pf_frent	*frea, *next;
+	struct pf_frent	*frep = NULL;
+	struct ip	*ip = frent->fr_ip;
+	int		 hlen = ip->ip_hl << 2;
+	u_int16_t	 off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
+	u_int16_t	 ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4;
+	u_int16_t	 max = ip_len + off;
+
+#ifdef __FreeBSD__
+	KASSERT((*frag == NULL || BUFFER_FRAGMENTS(*frag)),
+	    ("! (*frag == NULL || BUFFER_FRAGMENTS(*frag)): %s", __FUNCTION__));
+#else
+	KASSERT(*frag == NULL || BUFFER_FRAGMENTS(*frag));
+#endif
+
+	/* Strip off ip header */
+	m->m_data += hlen;
+	m->m_len -= hlen;
+
+	/* Create a new reassembly queue for this packet */
+	if (*frag == NULL) {
+		*frag = pool_get(&pf_frag_pl, PR_NOWAIT);
+		if (*frag == NULL) {
+			pf_flush_fragments();
+			*frag = pool_get(&pf_frag_pl, PR_NOWAIT);
+			if (*frag == NULL)
+				goto drop_fragment;
+		}
+
+		(*frag)->fr_flags = 0;
+		(*frag)->fr_max = 0;
+		(*frag)->fr_src = frent->fr_ip->ip_src;
+		(*frag)->fr_dst = frent->fr_ip->ip_dst;
+		(*frag)->fr_p = frent->fr_ip->ip_p;
+		(*frag)->fr_id = frent->fr_ip->ip_id;
+		(*frag)->fr_timeout = time_second;
+		LIST_INIT(&(*frag)->fr_queue);
+
+		RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
+		TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
+
+		/* We do not have a previous fragment */
+		frep = NULL;
+		goto insert;
+	}
+
+	/*
+	 * Find a fragment after the current one:
+	 *  - off contains the real shifted offset.
+	 */
+	LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
+		if (FR_IP_OFF(frea) > off)
+			break;
+		frep = frea;
+	}
+
+#ifdef __FreeBSD__
+	KASSERT((frep != NULL || frea != NULL),
+	    ("!(frep != NULL || frea != NULL): %s", __FUNCTION__));;
+#else
+	KASSERT(frep != NULL || frea != NULL);
+#endif
+
+	if (frep != NULL &&
+	    FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl *
+	    4 > off)
+	{
+		u_int16_t	precut;
+
+		precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) -
+		    frep->fr_ip->ip_hl * 4 - off;
+		if (precut >= ip_len)
+			goto drop_fragment;
+		m_adj(frent->fr_m, precut);
+		DPFPRINTF(("overlap -%d\n", precut));
+		/* Enforce 8 byte boundaries */
+		ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3));
+		off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
+		ip_len -= precut;
+		ip->ip_len = htons(ip_len);
+	}
+
+	for (; frea != NULL && ip_len + off > FR_IP_OFF(frea);
+	    frea = next)
+	{
+		u_int16_t	aftercut;
+
+		aftercut = ip_len + off - FR_IP_OFF(frea);
+		DPFPRINTF(("adjust overlap %d\n", aftercut));
+		if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl
+		    * 4)
+		{
+			frea->fr_ip->ip_len =
+			    htons(ntohs(frea->fr_ip->ip_len) - aftercut);
+			frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) +
+			    (aftercut >> 3));
+			m_adj(frea->fr_m, aftercut);
+			break;
+		}
+
+		/* This fragment is completely overlapped, lose it */
+		next = LIST_NEXT(frea, fr_next);
+		m_freem(frea->fr_m);
+		LIST_REMOVE(frea, fr_next);
+		pool_put(&pf_frent_pl, frea);
+		pf_nfrents--;
+	}
+
+ insert:
+	/* Update maximum data size */
+	if ((*frag)->fr_max < max)
+		(*frag)->fr_max = max;
+	/* This is the last segment */
+	if (!mff)
+		(*frag)->fr_flags |= PFFRAG_SEENLAST;
+
+	if (frep == NULL)
+		LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
+	else
+		LIST_INSERT_AFTER(frep, frent, fr_next);
+
+	/* Check if we are completely reassembled */
+	if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
+		return (NULL);
+
+	/* Check if we have all the data */
+	off = 0;
+	for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
+		next = LIST_NEXT(frep, fr_next);
+
+		off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4;
+		if (off < (*frag)->fr_max &&
+		    (next == NULL || FR_IP_OFF(next) != off))
+		{
+			DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
+			    off, next == NULL ? -1 : FR_IP_OFF(next),
+			    (*frag)->fr_max));
+			return (NULL);
+		}
+	}
+	DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
+	if (off < (*frag)->fr_max)
+		return (NULL);
+
+	/* We have all the data */
+	frent = LIST_FIRST(&(*frag)->fr_queue);
+#ifdef __FreeBSD__
+	KASSERT((frent != NULL), ("frent == NULL: %s", __FUNCTION__));
+#else
+	KASSERT(frent != NULL);
+#endif
+	if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) {
+		DPFPRINTF(("drop: too big: %d\n", off));
+		pf_free_fragment(*frag);
+		*frag = NULL;
+		return (NULL);
+	}
+	next = LIST_NEXT(frent, fr_next);
+
+	/* Magic from ip_input */
+	ip = frent->fr_ip;
+	m = frent->fr_m;
+	m2 = m->m_next;
+	m->m_next = NULL;
+	m_cat(m, m2);
+	pool_put(&pf_frent_pl, frent);
+	pf_nfrents--;
+	for (frent = next; frent != NULL; frent = next) {
+		next = LIST_NEXT(frent, fr_next);
+
+		m2 = frent->fr_m;
+		pool_put(&pf_frent_pl, frent);
+		pf_nfrents--;
+#ifdef __FreeBSD__
+		m->m_pkthdr.csum_flags &= m2->m_pkthdr.csum_flags;
+		m->m_pkthdr.csum_data += m2->m_pkthdr.csum_data;
+#endif
+		m_cat(m, m2);
+	}
+#ifdef __FreeBSD__
+	while (m->m_pkthdr.csum_data & 0xffff0000)
+		m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) +
+		    (m->m_pkthdr.csum_data >> 16);
+#endif
+
+	ip->ip_src = (*frag)->fr_src;
+	ip->ip_dst = (*frag)->fr_dst;
+
+	/* Remove from fragment queue */
+	pf_remove_fragment(*frag);
+	*frag = NULL;
+
+	hlen = ip->ip_hl << 2;
+	ip->ip_len = htons(off + hlen);
+	m->m_len += hlen;
+	m->m_data -= hlen;
+
+	/* some debugging cruft by sklower, below, will go away soon */
+	/* XXX this should be done elsewhere */
+	if (m->m_flags & M_PKTHDR) {
+		int plen = 0;
+		for (m2 = m; m2; m2 = m2->m_next)
+			plen += m2->m_len;
+		m->m_pkthdr.len = plen;
+	}
+
+	DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len)));
+	return (m);
+
+ drop_fragment:
+	/* Oops - fail safe - drop packet */
+	pool_put(&pf_frent_pl, frent);
+	pf_nfrents--;
+	m_freem(m);
+	return (NULL);
+}
+
+struct mbuf *
+pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
+    int drop, int *nomem)
+{
+	struct mbuf		*m = *m0;
+	struct pf_frcache	*frp, *fra, *cur = NULL;
+	int			 ip_len = ntohs(h->ip_len) - (h->ip_hl << 2);
+	u_int16_t		 off = ntohs(h->ip_off) << 3;
+	u_int16_t		 max = ip_len + off;
+	int			 hosed = 0;
+
+#ifdef __FreeBSD__
+	KASSERT((*frag == NULL || !BUFFER_FRAGMENTS(*frag)),
+	    ("!(*frag == NULL || !BUFFER_FRAGMENTS(*frag)): %s", __FUNCTION__));
+#else
+	KASSERT(*frag == NULL || !BUFFER_FRAGMENTS(*frag));
+#endif
+
+	/* Create a new range queue for this packet */
+	if (*frag == NULL) {
+		*frag = pool_get(&pf_cache_pl, PR_NOWAIT);
+		if (*frag == NULL) {
+			pf_flush_fragments();
+			*frag = pool_get(&pf_cache_pl, PR_NOWAIT);
+			if (*frag == NULL)
+				goto no_mem;
+		}
+
+		/* Get an entry for the queue */
+		cur = pool_get(&pf_cent_pl, PR_NOWAIT);
+		if (cur == NULL) {
+			pool_put(&pf_cache_pl, *frag);
+			*frag = NULL;
+			goto no_mem;
+		}
+		pf_ncache++;
+
+		(*frag)->fr_flags = PFFRAG_NOBUFFER;
+		(*frag)->fr_max = 0;
+		(*frag)->fr_src = h->ip_src;
+		(*frag)->fr_dst = h->ip_dst;
+		(*frag)->fr_p = h->ip_p;
+		(*frag)->fr_id = h->ip_id;
+		(*frag)->fr_timeout = time_second;
+
+		cur->fr_off = off;
+		cur->fr_end = max;
+		LIST_INIT(&(*frag)->fr_cache);
+		LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
+
+		RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
+		TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
+
+		DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, max));
+
+		goto pass;
+	}
+
+	/*
+	 * Find a fragment after the current one:
+	 *  - off contains the real shifted offset.
+	 */
+	frp = NULL;
+	LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
+		if (fra->fr_off > off)
+			break;
+		frp = fra;
+	}
+
+#ifdef __FreeBSD__
+	KASSERT((frp != NULL || fra != NULL),
+	    ("!(frp != NULL || fra != NULL): %s", __FUNCTION__));
+#else
+	KASSERT(frp != NULL || fra != NULL);
+#endif
+
+	if (frp != NULL) {
+		int	precut;
+
+		precut = frp->fr_end - off;
+		if (precut >= ip_len) {
+			/* Fragment is entirely a duplicate */
+			DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
+			    h->ip_id, frp->fr_off, frp->fr_end, off, max));
+			goto drop_fragment;
+		}
+		if (precut == 0) {
+			/* They are adjacent.  Fixup cache entry */
+			DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
+			    h->ip_id, frp->fr_off, frp->fr_end, off, max));
+			frp->fr_end = max;
+		} else if (precut > 0) {
+			/* The first part of this payload overlaps with a
+			 * fragment that has already been passed.
+			 * Need to trim off the first part of the payload.
+			 * But to do so easily, we need to create another
+			 * mbuf to throw the original header into.
+			 */
+
+			DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
+			    h->ip_id, precut, frp->fr_off, frp->fr_end, off,
+			    max));
+
+			off += precut;
+			max -= precut;
+			/* Update the previous frag to encompass this one */
+			frp->fr_end = max;
+
+			if (!drop) {
+				/* XXX Optimization opportunity
+				 * This is a very heavy way to trim the payload.
+				 * we could do it much faster by diddling mbuf
+				 * internals but that would be even less legible
+				 * than this mbuf magic.  For my next trick,
+				 * I'll pull a rabbit out of my laptop.
+				 */
+#ifdef __FreeBSD__
+				*m0 = m_dup(m, M_DONTWAIT);
+#else
+				*m0 = m_copym2(m, 0, h->ip_hl << 2, M_NOWAIT);
+#endif
+				if (*m0 == NULL)
+					goto no_mem;
+#ifdef __FreeBSD__
+				/* From KAME Project : We have missed this! */
+				m_adj(*m0, (h->ip_hl << 2) -
+				    (*m0)->m_pkthdr.len);
+
+				KASSERT(((*m0)->m_next == NULL), 
+				    ("(*m0)->m_next != NULL: %s", 
+				    __FUNCTION__));
+#else
+				KASSERT((*m0)->m_next == NULL);
+#endif
+				m_adj(m, precut + (h->ip_hl << 2));
+				m_cat(*m0, m);
+				m = *m0;
+				if (m->m_flags & M_PKTHDR) {
+					int plen = 0;
+					struct mbuf *t;
+					for (t = m; t; t = t->m_next)
+						plen += t->m_len;
+					m->m_pkthdr.len = plen;
+				}
+
+
+				h = mtod(m, struct ip *);
+
+#ifdef __FreeBSD__
+				KASSERT(((int)m->m_len ==
+				    ntohs(h->ip_len) - precut),
+				    ("m->m_len != ntohs(h->ip_len) - precut: %s",
+				    __FUNCTION__));
+#else
+				KASSERT((int)m->m_len ==
+				    ntohs(h->ip_len) - precut);
+#endif
+				h->ip_off = htons(ntohs(h->ip_off) +
+				    (precut >> 3));
+				h->ip_len = htons(ntohs(h->ip_len) - precut);
+			} else {
+				hosed++;
+			}
+		} else {
+			/* There is a gap between fragments */
+
+			DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
+			    h->ip_id, -precut, frp->fr_off, frp->fr_end, off,
+			    max));
+
+			cur = pool_get(&pf_cent_pl, PR_NOWAIT);
+			if (cur == NULL)
+				goto no_mem;
+			pf_ncache++;
+
+			cur->fr_off = off;
+			cur->fr_end = max;
+			LIST_INSERT_AFTER(frp, cur, fr_next);
+		}
+	}
+
+	if (fra != NULL) {
+		int	aftercut;
+		int	merge = 0;
+
+		aftercut = max - fra->fr_off;
+		if (aftercut == 0) {
+			/* Adjacent fragments */
+			DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
+			    h->ip_id, off, max, fra->fr_off, fra->fr_end));
+			fra->fr_off = off;
+			merge = 1;
+		} else if (aftercut > 0) {
+			/* Need to chop off the tail of this fragment */
+			DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
+			    h->ip_id, aftercut, off, max, fra->fr_off,
+			    fra->fr_end));
+			fra->fr_off = off;
+			max -= aftercut;
+
+			merge = 1;
+
+			if (!drop) {
+				m_adj(m, -aftercut);
+				if (m->m_flags & M_PKTHDR) {
+					int plen = 0;
+					struct mbuf *t;
+					for (t = m; t; t = t->m_next)
+						plen += t->m_len;
+					m->m_pkthdr.len = plen;
+				}
+				h = mtod(m, struct ip *);
+#ifdef __FreeBSD__
+				KASSERT(((int)m->m_len == ntohs(h->ip_len) - aftercut),
+				    ("m->m_len != ntohs(h->ip_len) - aftercut: %s",
+				    __FUNCTION__));
+#else
+				KASSERT((int)m->m_len ==
+				    ntohs(h->ip_len) - aftercut);
+#endif
+				h->ip_len = htons(ntohs(h->ip_len) - aftercut);
+			} else {
+				hosed++;
+			}
+		} else if (frp == NULL) {
+			/* There is a gap between fragments */
+			DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
+			    h->ip_id, -aftercut, off, max, fra->fr_off,
+			    fra->fr_end));
+
+			cur = pool_get(&pf_cent_pl, PR_NOWAIT);
+			if (cur == NULL)
+				goto no_mem;
+			pf_ncache++;
+
+			cur->fr_off = off;
+			cur->fr_end = max;
+			LIST_INSERT_BEFORE(fra, cur, fr_next);
+		}
+
+
+		/* Need to glue together two separate fragment descriptors */
+		if (merge) {
+			if (cur && fra->fr_off <= cur->fr_end) {
+				/* Need to merge in a previous 'cur' */
+				DPFPRINTF(("fragcache[%d]: adjacent(merge "
+				    "%d-%d) %d-%d (%d-%d)\n",
+				    h->ip_id, cur->fr_off, cur->fr_end, off,
+				    max, fra->fr_off, fra->fr_end));
+				fra->fr_off = cur->fr_off;
+				LIST_REMOVE(cur, fr_next);
+				pool_put(&pf_cent_pl, cur);
+				pf_ncache--;
+				cur = NULL;
+
+			} else if (frp && fra->fr_off <= frp->fr_end) {
+				/* Need to merge in a modified 'frp' */
+#ifdef __FreeBSD__
+				KASSERT((cur == NULL), ("cur != NULL: %s",
+				    __FUNCTION__));
+#else
+				KASSERT(cur == NULL);
+#endif
+				DPFPRINTF(("fragcache[%d]: adjacent(merge "
+				    "%d-%d) %d-%d (%d-%d)\n",
+				    h->ip_id, frp->fr_off, frp->fr_end, off,
+				    max, fra->fr_off, fra->fr_end));
+				fra->fr_off = frp->fr_off;
+				LIST_REMOVE(frp, fr_next);
+				pool_put(&pf_cent_pl, frp);
+				pf_ncache--;
+				frp = NULL;
+
+			}
+		}
+	}
+
+	if (hosed) {
+		/*
+		 * We must keep tracking the overall fragment even when
+		 * we're going to drop it anyway so that we know when to
+		 * free the overall descriptor.  Thus we drop the frag late.
+		 */
+		goto drop_fragment;
+	}
+
+
+ pass:
+	/* Update maximum data size */
+	if ((*frag)->fr_max < max)
+		(*frag)->fr_max = max;
+
+	/* This is the last segment */
+	if (!mff)
+		(*frag)->fr_flags |= PFFRAG_SEENLAST;
+
+	/* Check if we are completely reassembled */
+	if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
+	    LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 &&
+	    LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) {
+		/* Remove from fragment queue */
+		DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id,
+		    (*frag)->fr_max));
+		pf_free_fragment(*frag);
+		*frag = NULL;
+	}
+
+	return (m);
+
+ no_mem:
+	*nomem = 1;
+
+	/* Still need to pay attention to !IP_MF */
+	if (!mff && *frag != NULL)
+		(*frag)->fr_flags |= PFFRAG_SEENLAST;
+
+	m_freem(m);
+	return (NULL);
+
+ drop_fragment:
+
+	/* Still need to pay attention to !IP_MF */
+	if (!mff && *frag != NULL)
+		(*frag)->fr_flags |= PFFRAG_SEENLAST;
+
+	if (drop) {
+		/* This fragment has been deemed bad.  Don't reass */
+		if (((*frag)->fr_flags & PFFRAG_DROP) == 0)
+			DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
+			    h->ip_id));
+		(*frag)->fr_flags |= PFFRAG_DROP;
+	}
+
+	m_freem(m);
+	return (NULL);
+}
+
+int
+pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
+    struct pf_pdesc *pd)
+{
+	struct mbuf		*m = *m0;
+	struct pf_rule		*r;
+	struct pf_frent		*frent;
+	struct pf_fragment	*frag = NULL;
+	struct ip		*h = mtod(m, struct ip *);
+	int			 mff = (ntohs(h->ip_off) & IP_MF);
+	int			 hlen = h->ip_hl << 2;
+	u_int16_t		 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
+	u_int16_t		 max;
+	int			 ip_len;
+	int			 ip_off;
+
+	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
+	while (r != NULL) {
+		r->evaluations++;
+		if (pfi_kif_match(r->kif, kif) == r->ifnot)
+			r = r->skip[PF_SKIP_IFP].ptr;
+		else if (r->direction && r->direction != dir)
+			r = r->skip[PF_SKIP_DIR].ptr;
+		else if (r->af && r->af != AF_INET)
+			r = r->skip[PF_SKIP_AF].ptr;
+		else if (r->proto && r->proto != h->ip_p)
+			r = r->skip[PF_SKIP_PROTO].ptr;
+		else if (PF_MISMATCHAW(&r->src.addr,
+		    (struct pf_addr *)&h->ip_src.s_addr, AF_INET,
+		    r->src.neg, kif))
+			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
+		else if (PF_MISMATCHAW(&r->dst.addr,
+		    (struct pf_addr *)&h->ip_dst.s_addr, AF_INET,
+		    r->dst.neg, NULL))
+			r = r->skip[PF_SKIP_DST_ADDR].ptr;
+		else
+			break;
+	}
+
+	if (r == NULL || r->action == PF_NOSCRUB)
+		return (PF_PASS);
+	else {
+		r->packets[dir == PF_OUT]++;
+		r->bytes[dir == PF_OUT] += pd->tot_len;
+	}
+
+	/* Check for illegal packets */
+	if (hlen < (int)sizeof(struct ip))
+		goto drop;
+
+	if (hlen > ntohs(h->ip_len))
+		goto drop;
+
+	/* Clear IP_DF if the rule uses the no-df option */
+	if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
+		u_int16_t ip_off = h->ip_off;
+
+		h->ip_off &= htons(~IP_DF);
+		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
+	}
+
+	/* We will need other tests here */
+	if (!fragoff && !mff)
+		goto no_fragment;
+
+	/* We're dealing with a fragment now. Don't allow fragments
+	 * with IP_DF to enter the cache. If the flag was cleared by
+	 * no-df above, fine. Otherwise drop it.
+	 */
+	if (h->ip_off & htons(IP_DF)) {
+		DPFPRINTF(("IP_DF\n"));
+		goto bad;
+	}
+
+	ip_len = ntohs(h->ip_len) - hlen;
+	ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
+
+	/* All fragments are 8 byte aligned */
+	if (mff && (ip_len & 0x7)) {
+		DPFPRINTF(("mff and %d\n", ip_len));
+		goto bad;
+	}
+
+	/* Respect maximum length */
+	if (fragoff + ip_len > IP_MAXPACKET) {
+		DPFPRINTF(("max packet %d\n", fragoff + ip_len));
+		goto bad;
+	}
+	max = fragoff + ip_len;
+
+	if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
+		/* Fully buffer all of the fragments */
+
+		frag = pf_find_fragment(h, &pf_frag_tree);
+
+		/* Check if we saw the last fragment already */
+		if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
+		    max > frag->fr_max)
+			goto bad;
+
+		/* Get an entry for the fragment queue */
+		frent = pool_get(&pf_frent_pl, PR_NOWAIT);
+		if (frent == NULL) {
+			REASON_SET(reason, PFRES_MEMORY);
+			return (PF_DROP);
+		}
+		pf_nfrents++;
+		frent->fr_ip = h;
+		frent->fr_m = m;
+
+		/* Might return a completely reassembled mbuf, or NULL */
+		DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max));
+		*m0 = m = pf_reassemble(m0, &frag, frent, mff);
+
+		if (m == NULL)
+			return (PF_DROP);
+
+		/* use mtag from concatenated mbuf chain */
+		pd->pf_mtag = pf_find_mtag(m);
+#ifdef DIAGNOSTIC
+		if (pd->pf_mtag == NULL) {
+			printf("%s: pf_find_mtag returned NULL(1)\n", __func__);
+			if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
+				m_freem(m);
+				*m0 = NULL;
+				goto no_mem;
+			}
+		}
+#endif
+		if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
+			goto drop;
+
+		h = mtod(m, struct ip *);
+	} else {
+		/* non-buffering fragment cache (drops or masks overlaps) */
+		int	nomem = 0;
+
+		if (dir == PF_OUT && pd->pf_mtag->flags & PF_TAG_FRAGCACHE) {
+			/*
+			 * Already passed the fragment cache in the
+			 * input direction.  If we continued, it would
+			 * appear to be a dup and would be dropped.
+			 */
+			goto fragment_pass;
+		}
+
+		frag = pf_find_fragment(h, &pf_cache_tree);
+
+		/* Check if we saw the last fragment already */
+		if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
+		    max > frag->fr_max) {
+			if (r->rule_flag & PFRULE_FRAGDROP)
+				frag->fr_flags |= PFFRAG_DROP;
+			goto bad;
+		}
+
+		*m0 = m = pf_fragcache(m0, h, &frag, mff,
+		    (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
+		if (m == NULL) {
+			if (nomem)
+				goto no_mem;
+			goto drop;
+		}
+
+		/* use mtag from copied and trimmed mbuf chain */
+		pd->pf_mtag = pf_find_mtag(m);
+#ifdef DIAGNOSTIC
+		if (pd->pf_mtag == NULL) {
+			printf("%s: pf_find_mtag returned NULL(2)\n", __func__);
+			if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
+				m_freem(m);
+				*m0 = NULL;
+				goto no_mem;
+			}
+		}
+#endif
+		if (dir == PF_IN)
+			pd->pf_mtag->flags |= PF_TAG_FRAGCACHE;
+
+		if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
+			goto drop;
+		goto fragment_pass;
+	}
+
+ no_fragment:
+	/* At this point, only IP_DF is allowed in ip_off */
+	if (h->ip_off & ~htons(IP_DF)) {
+		u_int16_t ip_off = h->ip_off;
+
+		h->ip_off &= htons(IP_DF);
+		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
+	}
+
+	/* Enforce a minimum ttl, may cause endless packet loops */
+	if (r->min_ttl && h->ip_ttl < r->min_ttl) {
+		u_int16_t ip_ttl = h->ip_ttl;
+
+		h->ip_ttl = r->min_ttl;
+		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
+	}
+
+	if (r->rule_flag & PFRULE_RANDOMID) {
+		u_int16_t ip_id = h->ip_id;
+
+		h->ip_id = ip_randomid();
+		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0);
+	}
+	if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
+		pd->flags |= PFDESC_IP_REAS;
+
+	return (PF_PASS);
+
+ fragment_pass:
+	/* Enforce a minimum ttl, may cause endless packet loops */
+	if (r->min_ttl && h->ip_ttl < r->min_ttl) {
+		u_int16_t ip_ttl = h->ip_ttl;
+
+		h->ip_ttl = r->min_ttl;
+		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
+	}
+	if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
+		pd->flags |= PFDESC_IP_REAS;
+	return (PF_PASS);
+
+ no_mem:
+	REASON_SET(reason, PFRES_MEMORY);
+	if (r != NULL && r->log)
+		PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
+	return (PF_DROP);
+
+ drop:
+	REASON_SET(reason, PFRES_NORM);
+	if (r != NULL && r->log)
+		PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
+	return (PF_DROP);
+
+ bad:
+	DPFPRINTF(("dropping bad fragment\n"));
+
+	/* Free associated fragments */
+	if (frag != NULL)
+		pf_free_fragment(frag);
+
+	REASON_SET(reason, PFRES_FRAG);
+	if (r != NULL && r->log)
+		PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
+
+	return (PF_DROP);
+}
+
+#ifdef INET6
+int
+pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
+    u_short *reason, struct pf_pdesc *pd)
+{
+	struct mbuf		*m = *m0;
+	struct pf_rule		*r;
+	struct ip6_hdr		*h = mtod(m, struct ip6_hdr *);
+	int			 off;
+	struct ip6_ext		 ext;
+	struct ip6_opt		 opt;
+	struct ip6_opt_jumbo	 jumbo;
+	struct ip6_frag		 frag;
+	u_int32_t		 jumbolen = 0, plen;
+	u_int16_t		 fragoff = 0;
+	int			 optend;
+	int			 ooff;
+	u_int8_t		 proto;
+	int			 terminal;
+
+	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
+	while (r != NULL) {
+		r->evaluations++;
+		if (pfi_kif_match(r->kif, kif) == r->ifnot)
+			r = r->skip[PF_SKIP_IFP].ptr;
+		else if (r->direction && r->direction != dir)
+			r = r->skip[PF_SKIP_DIR].ptr;
+		else if (r->af && r->af != AF_INET6)
+			r = r->skip[PF_SKIP_AF].ptr;
+#if 0 /* header chain! */
+		else if (r->proto && r->proto != h->ip6_nxt)
+			r = r->skip[PF_SKIP_PROTO].ptr;
+#endif
+		else if (PF_MISMATCHAW(&r->src.addr,
+		    (struct pf_addr *)&h->ip6_src, AF_INET6,
+		    r->src.neg, kif))
+			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
+		else if (PF_MISMATCHAW(&r->dst.addr,
+		    (struct pf_addr *)&h->ip6_dst, AF_INET6,
+		    r->dst.neg, NULL))
+			r = r->skip[PF_SKIP_DST_ADDR].ptr;
+		else
+			break;
+	}
+
+	if (r == NULL || r->action == PF_NOSCRUB)
+		return (PF_PASS);
+	else {
+		r->packets[dir == PF_OUT]++;
+		r->bytes[dir == PF_OUT] += pd->tot_len;
+	}
+
+	/* Check for illegal packets */
+	if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len)
+		goto drop;
+
+	off = sizeof(struct ip6_hdr);
+	proto = h->ip6_nxt;
+	terminal = 0;
+	do {
+		switch (proto) {
+		case IPPROTO_FRAGMENT:
+			goto fragment;
+			break;
+		case IPPROTO_AH:
+		case IPPROTO_ROUTING:
+		case IPPROTO_DSTOPTS:
+			if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
+			    NULL, AF_INET6))
+				goto shortpkt;
+			if (proto == IPPROTO_AH)
+				off += (ext.ip6e_len + 2) * 4;
+			else
+				off += (ext.ip6e_len + 1) * 8;
+			proto = ext.ip6e_nxt;
+			break;
+		case IPPROTO_HOPOPTS:
+			if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
+			    NULL, AF_INET6))
+				goto shortpkt;
+			optend = off + (ext.ip6e_len + 1) * 8;
+			ooff = off + sizeof(ext);
+			do {
+				if (!pf_pull_hdr(m, ooff, &opt.ip6o_type,
+				    sizeof(opt.ip6o_type), NULL, NULL,
+				    AF_INET6))
+					goto shortpkt;
+				if (opt.ip6o_type == IP6OPT_PAD1) {
+					ooff++;
+					continue;
+				}
+				if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt),
+				    NULL, NULL, AF_INET6))
+					goto shortpkt;
+				if (ooff + sizeof(opt) + opt.ip6o_len > optend)
+					goto drop;
+				switch (opt.ip6o_type) {
+				case IP6OPT_JUMBO:
+					if (h->ip6_plen != 0)
+						goto drop;
+					if (!pf_pull_hdr(m, ooff, &jumbo,
+					    sizeof(jumbo), NULL, NULL,
+					    AF_INET6))
+						goto shortpkt;
+					memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
+					    sizeof(jumbolen));
+					jumbolen = ntohl(jumbolen);
+					if (jumbolen <= IPV6_MAXPACKET)
+						goto drop;
+					if (sizeof(struct ip6_hdr) + jumbolen !=
+					    m->m_pkthdr.len)
+						goto drop;
+					break;
+				default:
+					break;
+				}
+				ooff += sizeof(opt) + opt.ip6o_len;
+			} while (ooff < optend);
+
+			off = optend;
+			proto = ext.ip6e_nxt;
+			break;
+		default:
+			terminal = 1;
+			break;
+		}
+	} while (!terminal);
+
+	/* jumbo payload option must be present, or plen > 0 */
+	if (ntohs(h->ip6_plen) == 0)
+		plen = jumbolen;
+	else
+		plen = ntohs(h->ip6_plen);
+	if (plen == 0)
+		goto drop;
+	if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
+		goto shortpkt;
+
+	/* Enforce a minimum ttl, may cause endless packet loops */
+	if (r->min_ttl && h->ip6_hlim < r->min_ttl)
+		h->ip6_hlim = r->min_ttl;
+
+	return (PF_PASS);
+
+ fragment:
+	if (ntohs(h->ip6_plen) == 0 || jumbolen)
+		goto drop;
+	plen = ntohs(h->ip6_plen);
+
+	if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6))
+		goto shortpkt;
+	fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK);
+	if (fragoff + (plen - off - sizeof(frag)) > IPV6_MAXPACKET)
+		goto badfrag;
+
+	/* do something about it */
+	/* remember to set pd->flags |= PFDESC_IP_REAS */
+	return (PF_PASS);
+
+ shortpkt:
+	REASON_SET(reason, PFRES_SHORT);
+	if (r != NULL && r->log)
+		PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
+	return (PF_DROP);
+
+ drop:
+	REASON_SET(reason, PFRES_NORM);
+	if (r != NULL && r->log)
+		PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
+	return (PF_DROP);
+
+ badfrag:
+	REASON_SET(reason, PFRES_FRAG);
+	if (r != NULL && r->log)
+		PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
+	return (PF_DROP);
+}
+#endif /* INET6 */
+
+int
+pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff,
+    int off, void *h, struct pf_pdesc *pd)
+{
+	struct pf_rule	*r, *rm = NULL;
+	struct tcphdr	*th = pd->hdr.tcp;
+	int		 rewrite = 0;
+	u_short		 reason;
+	u_int8_t	 flags;
+	sa_family_t	 af = pd->af;
+
+	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
+	while (r != NULL) {
+		r->evaluations++;
+		if (pfi_kif_match(r->kif, kif) == r->ifnot)
+			r = r->skip[PF_SKIP_IFP].ptr;
+		else if (r->direction && r->direction != dir)
+			r = r->skip[PF_SKIP_DIR].ptr;
+		else if (r->af && r->af != af)
+			r = r->skip[PF_SKIP_AF].ptr;
+		else if (r->proto && r->proto != pd->proto)
+			r = r->skip[PF_SKIP_PROTO].ptr;
+		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
+		    r->src.neg, kif))
+			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
+		else if (r->src.port_op && !pf_match_port(r->src.port_op,
+			    r->src.port[0], r->src.port[1], th->th_sport))
+			r = r->skip[PF_SKIP_SRC_PORT].ptr;
+		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
+		    r->dst.neg, NULL))
+			r = r->skip[PF_SKIP_DST_ADDR].ptr;
+		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
+			    r->dst.port[0], r->dst.port[1], th->th_dport))
+			r = r->skip[PF_SKIP_DST_PORT].ptr;
+		else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
+			    pf_osfp_fingerprint(pd, m, off, th),
+			    r->os_fingerprint))
+			r = TAILQ_NEXT(r, entries);
+		else {
+			rm = r;
+			break;
+		}
+	}
+
+	if (rm == NULL || rm->action == PF_NOSCRUB)
+		return (PF_PASS);
+	else {
+		r->packets[dir == PF_OUT]++;
+		r->bytes[dir == PF_OUT] += pd->tot_len;
+	}
+
+	if (rm->rule_flag & PFRULE_REASSEMBLE_TCP)
+		pd->flags |= PFDESC_TCP_NORM;
+
+	flags = th->th_flags;
+	if (flags & TH_SYN) {
+		/* Illegal packet */
+		if (flags & TH_RST)
+			goto tcp_drop;
+
+		if (flags & TH_FIN)
+			flags &= ~TH_FIN;
+	} else {
+		/* Illegal packet */
+		if (!(flags & (TH_ACK|TH_RST)))
+			goto tcp_drop;
+	}
+
+	if (!(flags & TH_ACK)) {
+		/* These flags are only valid if ACK is set */
+		if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG))
+			goto tcp_drop;
+	}
+
+	/* Check for illegal header length */
+	if (th->th_off < (sizeof(struct tcphdr) >> 2))
+		goto tcp_drop;
+
+	/* If flags changed, or reserved data set, then adjust */
+	if (flags != th->th_flags || th->th_x2 != 0) {
+		u_int16_t	ov, nv;
+
+		ov = *(u_int16_t *)(&th->th_ack + 1);
+		th->th_flags = flags;
+		th->th_x2 = 0;
+		nv = *(u_int16_t *)(&th->th_ack + 1);
+
+		th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0);
+		rewrite = 1;
+	}
+
+	/* Remove urgent pointer, if TH_URG is not set */
+	if (!(flags & TH_URG) && th->th_urp) {
+		th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0);
+		th->th_urp = 0;
+		rewrite = 1;
+	}
+
+	/* Process options */
+	if (r->max_mss && pf_normalize_tcpopt(r, m, th, off))
+		rewrite = 1;
+
+	/* copy back packet headers if we sanitized */
+	if (rewrite)
+		m_copyback(m, off, sizeof(*th), (caddr_t)th);
+
+	return (PF_PASS);
+
+ tcp_drop:
+	REASON_SET(&reason, PFRES_NORM);
+	if (rm != NULL && r->log)
+		PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL, pd);
+	return (PF_DROP);
+}
+
+int
+pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd,
+    struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst)
+{
+	u_int32_t tsval, tsecr;
+	u_int8_t hdr[60];
+	u_int8_t *opt;
+
+#ifdef __FreeBSD__
+	KASSERT((src->scrub == NULL), 
+	    ("pf_normalize_tcp_init: src->scrub != NULL"));
+#else
+	KASSERT(src->scrub == NULL);
+#endif
+
+	src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
+	if (src->scrub == NULL)
+		return (1);
+	bzero(src->scrub, sizeof(*src->scrub));
+
+	switch (pd->af) {
+#ifdef INET
+	case AF_INET: {
+		struct ip *h = mtod(m, struct ip *);
+		src->scrub->pfss_ttl = h->ip_ttl;
+		break;
+	}
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6: {
+		struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
+		src->scrub->pfss_ttl = h->ip6_hlim;
+		break;
+	}
+#endif /* INET6 */
+	}
+
+
+	/*
+	 * All normalizations below are only begun if we see the start of
+	 * the connections.  They must all set an enabled bit in pfss_flags
+	 */
+	if ((th->th_flags & TH_SYN) == 0)
+		return (0);
+
+
+	if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub &&
+	    pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
+		/* Diddle with TCP options */
+		int hlen;
+		opt = hdr + sizeof(struct tcphdr);
+		hlen = (th->th_off << 2) - sizeof(struct tcphdr);
+		while (hlen >= TCPOLEN_TIMESTAMP) {
+			switch (*opt) {
+			case TCPOPT_EOL:	/* FALLTHROUGH */
+			case TCPOPT_NOP:
+				opt++;
+				hlen--;
+				break;
+			case TCPOPT_TIMESTAMP:
+				if (opt[1] >= TCPOLEN_TIMESTAMP) {
+					src->scrub->pfss_flags |=
+					    PFSS_TIMESTAMP;
+					src->scrub->pfss_ts_mod =
+					    htonl(arc4random());
+
+					/* note PFSS_PAWS not set yet */
+					memcpy(&tsval, &opt[2],
+					    sizeof(u_int32_t));
+					memcpy(&tsecr, &opt[6],
+					    sizeof(u_int32_t));
+					src->scrub->pfss_tsval0 = ntohl(tsval);
+					src->scrub->pfss_tsval = ntohl(tsval);
+					src->scrub->pfss_tsecr = ntohl(tsecr);
+					getmicrouptime(&src->scrub->pfss_last);
+				}
+				/* FALLTHROUGH */
+			default:
+				hlen -= MAX(opt[1], 2);
+				opt += MAX(opt[1], 2);
+				break;
+			}
+		}
+	}
+
+	return (0);
+}
+
+void
+pf_normalize_tcp_cleanup(struct pf_state *state)
+{
+	if (state->src.scrub)
+		pool_put(&pf_state_scrub_pl, state->src.scrub);
+	if (state->dst.scrub)
+		pool_put(&pf_state_scrub_pl, state->dst.scrub);
+
+	/* Someday... flush the TCP segment reassembly descriptors. */
+}
+
+int
+pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
+    u_short *reason, struct tcphdr *th, struct pf_state *state,
+    struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback)
+{
+	struct timeval uptime;
+	u_int32_t tsval, tsecr;
+	u_int tsval_from_last;
+	u_int8_t hdr[60];
+	u_int8_t *opt;
+	int copyback = 0;
+	int got_ts = 0;
+
+#ifdef __FreeBSD__
+	KASSERT((src->scrub || dst->scrub), 
+	    ("pf_normalize_tcp_statefull: src->scrub && dst->scrub!"));
+#else
+	KASSERT(src->scrub || dst->scrub);
+#endif
+
+	/*
+	 * Enforce the minimum TTL seen for this connection.  Negate a common
+	 * technique to evade an intrusion detection system and confuse
+	 * firewall state code.
+	 */
+	switch (pd->af) {
+#ifdef INET
+	case AF_INET: {
+		if (src->scrub) {
+			struct ip *h = mtod(m, struct ip *);
+			if (h->ip_ttl > src->scrub->pfss_ttl)
+				src->scrub->pfss_ttl = h->ip_ttl;
+			h->ip_ttl = src->scrub->pfss_ttl;
+		}
+		break;
+	}
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6: {
+		if (src->scrub) {
+			struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
+			if (h->ip6_hlim > src->scrub->pfss_ttl)
+				src->scrub->pfss_ttl = h->ip6_hlim;
+			h->ip6_hlim = src->scrub->pfss_ttl;
+		}
+		break;
+	}
+#endif /* INET6 */
+	}
+
+	if (th->th_off > (sizeof(struct tcphdr) >> 2) &&
+	    ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
+	    (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
+	    pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
+		/* Diddle with TCP options */
+		int hlen;
+		opt = hdr + sizeof(struct tcphdr);
+		hlen = (th->th_off << 2) - sizeof(struct tcphdr);
+		while (hlen >= TCPOLEN_TIMESTAMP) {
+			switch (*opt) {
+			case TCPOPT_EOL:	/* FALLTHROUGH */
+			case TCPOPT_NOP:
+				opt++;
+				hlen--;
+				break;
+			case TCPOPT_TIMESTAMP:
+				/* Modulate the timestamps.  Can be used for
+				 * NAT detection, OS uptime determination or
+				 * reboot detection.
+				 */
+
+				if (got_ts) {
+					/* Huh?  Multiple timestamps!? */
+					if (pf_status.debug >= PF_DEBUG_MISC) {
+						DPFPRINTF(("multiple TS??"));
+						pf_print_state(state);
+						printf("\n");
+					}
+					REASON_SET(reason, PFRES_TS);
+					return (PF_DROP);
+				}
+				if (opt[1] >= TCPOLEN_TIMESTAMP) {
+					memcpy(&tsval, &opt[2],
+					    sizeof(u_int32_t));
+					if (tsval && src->scrub &&
+					    (src->scrub->pfss_flags &
+					    PFSS_TIMESTAMP)) {
+						tsval = ntohl(tsval);
+						pf_change_a(&opt[2],
+						    &th->th_sum,
+						    htonl(tsval +
+						    src->scrub->pfss_ts_mod),
+						    0);
+						copyback = 1;
+					}
+
+					/* Modulate TS reply iff valid (!0) */
+					memcpy(&tsecr, &opt[6],
+					    sizeof(u_int32_t));
+					if (tsecr && dst->scrub &&
+					    (dst->scrub->pfss_flags &
+					    PFSS_TIMESTAMP)) {
+						tsecr = ntohl(tsecr)
+						    - dst->scrub->pfss_ts_mod;
+						pf_change_a(&opt[6],
+						    &th->th_sum, htonl(tsecr),
+						    0);
+						copyback = 1;
+					}
+					got_ts = 1;
+				}
+				/* FALLTHROUGH */
+			default:
+				hlen -= MAX(opt[1], 2);
+				opt += MAX(opt[1], 2);
+				break;
+			}
+		}
+		if (copyback) {
+			/* Copyback the options, caller copys back header */
+			*writeback = 1;
+			m_copyback(m, off + sizeof(struct tcphdr),
+			    (th->th_off << 2) - sizeof(struct tcphdr), hdr +
+			    sizeof(struct tcphdr));
+		}
+	}
+
+
+	/*
+	 * Must invalidate PAWS checks on connections idle for too long.
+	 * The fastest allowed timestamp clock is 1ms.  That turns out to
+	 * be about 24 days before it wraps.  XXX Right now our lowerbound
+	 * TS echo check only works for the first 12 days of a connection
+	 * when the TS has exhausted half its 32bit space
+	 */
+#define TS_MAX_IDLE	(24*24*60*60)
+#define TS_MAX_CONN	(12*24*60*60)	/* XXX remove when better tsecr check */
+
+	getmicrouptime(&uptime);
+	if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
+	    (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
+	    time_second - state->creation > TS_MAX_CONN))  {
+		if (pf_status.debug >= PF_DEBUG_MISC) {
+			DPFPRINTF(("src idled out of PAWS\n"));
+			pf_print_state(state);
+			printf("\n");
+		}
+		src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS)
+		    | PFSS_PAWS_IDLED;
+	}
+	if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
+	    uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
+		if (pf_status.debug >= PF_DEBUG_MISC) {
+			DPFPRINTF(("dst idled out of PAWS\n"));
+			pf_print_state(state);
+			printf("\n");
+		}
+		dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS)
+		    | PFSS_PAWS_IDLED;
+	}
+
+	if (got_ts && src->scrub && dst->scrub &&
+	    (src->scrub->pfss_flags & PFSS_PAWS) &&
+	    (dst->scrub->pfss_flags & PFSS_PAWS)) {
+		/* Validate that the timestamps are "in-window".
+		 * RFC1323 describes TCP Timestamp options that allow
+		 * measurement of RTT (round trip time) and PAWS
+		 * (protection against wrapped sequence numbers).  PAWS
+		 * gives us a set of rules for rejecting packets on
+		 * long fat pipes (packets that were somehow delayed 
+		 * in transit longer than the time it took to send the
+		 * full TCP sequence space of 4Gb).  We can use these
+		 * rules and infer a few others that will let us treat
+		 * the 32bit timestamp and the 32bit echoed timestamp
+		 * as sequence numbers to prevent a blind attacker from
+		 * inserting packets into a connection.
+		 *
+		 * RFC1323 tells us:
+		 *  - The timestamp on this packet must be greater than
+		 *    or equal to the last value echoed by the other
+		 *    endpoint.  The RFC says those will be discarded
+		 *    since it is a dup that has already been acked.
+		 *    This gives us a lowerbound on the timestamp.
+		 *        timestamp >= other last echoed timestamp
+		 *  - The timestamp will be less than or equal to
+		 *    the last timestamp plus the time between the
+		 *    last packet and now.  The RFC defines the max
+		 *    clock rate as 1ms.  We will allow clocks to be
+		 *    up to 10% fast and will allow a total difference
+		 *    or 30 seconds due to a route change.  And this
+		 *    gives us an upperbound on the timestamp.
+		 *        timestamp <= last timestamp + max ticks
+		 *    We have to be careful here.  Windows will send an
+		 *    initial timestamp of zero and then initialize it
+		 *    to a random value after the 3whs; presumably to
+		 *    avoid a DoS by having to call an expensive RNG
+		 *    during a SYN flood.  Proof MS has at least one
+		 *    good security geek.
+		 *
+		 *  - The TCP timestamp option must also echo the other
+		 *    endpoints timestamp.  The timestamp echoed is the
+		 *    one carried on the earliest unacknowledged segment
+		 *    on the left edge of the sequence window.  The RFC
+		 *    states that the host will reject any echoed
+		 *    timestamps that were larger than any ever sent.
+		 *    This gives us an upperbound on the TS echo.
+		 *        tescr <= largest_tsval
+		 *  - The lowerbound on the TS echo is a little more
+		 *    tricky to determine.  The other endpoint's echoed
+		 *    values will not decrease.  But there may be
+		 *    network conditions that re-order packets and
+		 *    cause our view of them to decrease.  For now the
+		 *    only lowerbound we can safely determine is that
+		 *    the TS echo will never be less than the orginal
+		 *    TS.  XXX There is probably a better lowerbound.
+		 *    Remove TS_MAX_CONN with better lowerbound check.
+		 *        tescr >= other original TS
+		 *
+		 * It is also important to note that the fastest
+		 * timestamp clock of 1ms will wrap its 32bit space in
+		 * 24 days.  So we just disable TS checking after 24
+		 * days of idle time.  We actually must use a 12d
+		 * connection limit until we can come up with a better
+		 * lowerbound to the TS echo check.
+		 */
+		struct timeval delta_ts;
+		int ts_fudge;
+
+
+		/*
+		 * PFTM_TS_DIFF is how many seconds of leeway to allow
+		 * a host's timestamp.  This can happen if the previous
+		 * packet got delayed in transit for much longer than
+		 * this packet.
+		 */
+		if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0)
+			ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF];
+
+
+		/* Calculate max ticks since the last timestamp */
+#define TS_MAXFREQ	1100		/* RFC max TS freq of 1Khz + 10% skew */
+#define TS_MICROSECS	1000000		/* microseconds per second */
+#ifdef __FreeBSD__
+#ifndef timersub
+#define timersub(tvp, uvp, vvp)						\
+	do {								\
+		(vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec;		\
+		(vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec;	\
+		if ((vvp)->tv_usec < 0) {				\
+			(vvp)->tv_sec--;				\
+			(vvp)->tv_usec += 1000000;			\
+		}							\
+	} while (0)
+#endif
+#endif
+		timersub(&uptime, &src->scrub->pfss_last, &delta_ts);
+		tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
+		tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ);
+
+
+		if ((src->state >= TCPS_ESTABLISHED &&
+		    dst->state >= TCPS_ESTABLISHED) &&
+		    (SEQ_LT(tsval, dst->scrub->pfss_tsecr) ||
+		    SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ||
+		    (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) ||
+		    SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
+			/* Bad RFC1323 implementation or an insertion attack.
+			 *
+			 * - Solaris 2.6 and 2.7 are known to send another ACK
+			 *   after the FIN,FIN|ACK,ACK closing that carries
+			 *   an old timestamp.
+			 */
+
+			DPFPRINTF(("Timestamp failed %c%c%c%c\n",
+			    SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
+			    SEQ_GT(tsval, src->scrub->pfss_tsval +
+			    tsval_from_last) ? '1' : ' ',
+			    SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
+			    SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '));
+#ifdef __FreeBSD__
+			DPFPRINTF((" tsval: %u  tsecr: %u  +ticks: %u  "
+			    "idle: %jus %lums\n",
+			    tsval, tsecr, tsval_from_last,
+			    (uintmax_t)delta_ts.tv_sec,
+			    delta_ts.tv_usec / 1000));
+			DPFPRINTF((" src->tsval: %u  tsecr: %u\n",
+			    src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
+			DPFPRINTF((" dst->tsval: %u  tsecr: %u  tsval0: %u"
+			    "\n", dst->scrub->pfss_tsval,
+			    dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0));
+#else
+			DPFPRINTF((" tsval: %lu  tsecr: %lu  +ticks: %lu  "
+			    "idle: %lus %lums\n",
+			    tsval, tsecr, tsval_from_last, delta_ts.tv_sec,
+			    delta_ts.tv_usec / 1000));
+			DPFPRINTF((" src->tsval: %lu  tsecr: %lu\n",
+			    src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
+			DPFPRINTF((" dst->tsval: %lu  tsecr: %lu  tsval0: %lu"
+			    "\n", dst->scrub->pfss_tsval,
+			    dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0));
+#endif
+			if (pf_status.debug >= PF_DEBUG_MISC) {
+				pf_print_state(state);
+				pf_print_flags(th->th_flags);
+				printf("\n");
+			}
+			REASON_SET(reason, PFRES_TS);
+			return (PF_DROP);
+		}
+
+		/* XXX I'd really like to require tsecr but it's optional */
+
+	} else if (!got_ts && (th->th_flags & TH_RST) == 0 &&
+	    ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
+	    || pd->p_len > 0 || (th->th_flags & TH_SYN)) &&
+	    src->scrub && dst->scrub &&
+	    (src->scrub->pfss_flags & PFSS_PAWS) &&
+	    (dst->scrub->pfss_flags & PFSS_PAWS)) {
+		/* Didn't send a timestamp.  Timestamps aren't really useful
+		 * when:
+		 *  - connection opening or closing (often not even sent).
+		 *    but we must not let an attacker to put a FIN on a
+		 *    data packet to sneak it through our ESTABLISHED check.
+		 *  - on a TCP reset.  RFC suggests not even looking at TS.
+		 *  - on an empty ACK.  The TS will not be echoed so it will
+		 *    probably not help keep the RTT calculation in sync and
+		 *    there isn't as much danger when the sequence numbers
+		 *    got wrapped.  So some stacks don't include TS on empty
+		 *    ACKs :-(
+		 *
+		 * To minimize the disruption to mostly RFC1323 conformant
+		 * stacks, we will only require timestamps on data packets.
+		 *
+		 * And what do ya know, we cannot require timestamps on data
+		 * packets.  There appear to be devices that do legitimate
+		 * TCP connection hijacking.  There are HTTP devices that allow
+		 * a 3whs (with timestamps) and then buffer the HTTP request.
+		 * If the intermediate device has the HTTP response cache, it
+		 * will spoof the response but not bother timestamping its
+		 * packets.  So we can look for the presence of a timestamp in
+		 * the first data packet and if there, require it in all future
+		 * packets.
+		 */
+
+		if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
+			/*
+			 * Hey!  Someone tried to sneak a packet in.  Or the
+			 * stack changed its RFC1323 behavior?!?!
+			 */
+			if (pf_status.debug >= PF_DEBUG_MISC) {
+				DPFPRINTF(("Did not receive expected RFC1323 "
+				    "timestamp\n"));
+				pf_print_state(state);
+				pf_print_flags(th->th_flags);
+				printf("\n");
+			}
+			REASON_SET(reason, PFRES_TS);
+			return (PF_DROP);
+		}
+	}
+
+
+	/*
+	 * We will note if a host sends his data packets with or without
+	 * timestamps.  And require all data packets to contain a timestamp
+	 * if the first does.  PAWS implicitly requires that all data packets be
+	 * timestamped.  But I think there are middle-man devices that hijack
+	 * TCP streams immediately after the 3whs and don't timestamp their
+	 * packets (seen in a WWW accelerator or cache).
+	 */
+	if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags &
+	    (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
+		if (got_ts)
+			src->scrub->pfss_flags |= PFSS_DATA_TS;
+		else {
+			src->scrub->pfss_flags |= PFSS_DATA_NOTS;
+			if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
+			    (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
+				/* Don't warn if other host rejected RFC1323 */
+				DPFPRINTF(("Broken RFC1323 stack did not "
+				    "timestamp data packet. Disabled PAWS "
+				    "security.\n"));
+				pf_print_state(state);
+				pf_print_flags(th->th_flags);
+				printf("\n");
+			}
+		}
+	}
+
+
+	/*
+	 * Update PAWS values
+	 */
+	if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
+	    (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) {
+		getmicrouptime(&src->scrub->pfss_last);
+		if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) ||
+		    (src->scrub->pfss_flags & PFSS_PAWS) == 0)
+			src->scrub->pfss_tsval = tsval;
+
+		if (tsecr) {
+			if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) ||
+			    (src->scrub->pfss_flags & PFSS_PAWS) == 0)
+				src->scrub->pfss_tsecr = tsecr;
+
+			if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 &&
+			    (SEQ_LT(tsval, src->scrub->pfss_tsval0) ||
+			    src->scrub->pfss_tsval0 == 0)) {
+				/* tsval0 MUST be the lowest timestamp */
+				src->scrub->pfss_tsval0 = tsval;
+			}
+
+			/* Only fully initialized after a TS gets echoed */
+			if ((src->scrub->pfss_flags & PFSS_PAWS) == 0)
+				src->scrub->pfss_flags |= PFSS_PAWS;
+		}
+	}
+
+	/* I have a dream....  TCP segment reassembly.... */
+	return (0);
+}
+
+int
+pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
+    int off)
+{
+	u_int16_t	*mss;
+	int		 thoff;
+	int		 opt, cnt, optlen = 0;
+	int		 rewrite = 0;
+	u_char		*optp;
+
+	thoff = th->th_off << 2;
+	cnt = thoff - sizeof(struct tcphdr);
+	optp = mtod(m, caddr_t) + off + sizeof(struct tcphdr);
+
+	for (; cnt > 0; cnt -= optlen, optp += optlen) {
+		opt = optp[0];
+		if (opt == TCPOPT_EOL)
+			break;
+		if (opt == TCPOPT_NOP)
+			optlen = 1;
+		else {
+			if (cnt < 2)
+				break;
+			optlen = optp[1];
+			if (optlen < 2 || optlen > cnt)
+				break;
+		}
+		switch (opt) {
+		case TCPOPT_MAXSEG:
+			mss = (u_int16_t *)(optp + 2);
+			if ((ntohs(*mss)) > r->max_mss) {
+				th->th_sum = pf_cksum_fixup(th->th_sum,
+				    *mss, htons(r->max_mss), 0);
+				*mss = htons(r->max_mss);
+				rewrite = 1;
+			}
+			break;
+		default:
+			break;
+		}
+	}
+
+	return (rewrite);
+}
diff --git a/freebsd/sys/contrib/pf/net/pf_osfp.c b/freebsd/sys/contrib/pf/net/pf_osfp.c
new file mode 100644
index 00000000..dafeabf1
--- /dev/null
+++ b/freebsd/sys/contrib/pf/net/pf_osfp.c
@@ -0,0 +1,640 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*	$OpenBSD: pf_osfp.c,v 1.12 2006/12/13 18:14:10 itojun Exp $ */
+
+/*
+ * Copyright (c) 2003 Mike Frantzen <frantzen@w4g.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#ifdef __FreeBSD__
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#endif
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/socket.h>
+#ifdef _KERNEL
+# include <freebsd/sys/systm.h>
+#endif /* _KERNEL */
+#include <freebsd/sys/mbuf.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/tcp.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/pfvar.h>
+
+#include <freebsd/netinet/ip6.h>
+#ifdef _KERNEL
+#include <freebsd/netinet6/in6_var.h>
+#endif
+
+#ifdef _KERNEL
+# define DPFPRINTF(format, x...)		\
+	if (pf_status.debug >= PF_DEBUG_NOISY)	\
+		printf(format , ##x)
+#ifdef __FreeBSD__
+typedef uma_zone_t pool_t;
+#else
+typedef struct pool pool_t;
+#endif
+
+#else
+/* Userland equivalents so we can lend code to tcpdump et al. */
+
+# include <freebsd/arpa/inet.h>
+# include <freebsd/errno.h>
+# include <freebsd/stdio.h>
+# include <freebsd/stdlib.h>
+# include <freebsd/string.h>
+# include <freebsd/netdb.h>
+# define pool_t			int
+# define pool_get(pool, flags)	malloc(*(pool))
+# define pool_put(pool, item)	free(item)
+# define pool_init(pool, size, a, ao, f, m, p)	(*(pool)) = (size)
+
+# ifdef __FreeBSD__
+# define NTOHS(x) (x) = ntohs((u_int16_t)(x))
+# endif
+
+# ifdef PFDEBUG
+#  include <freebsd/sys/stdarg.h>
+#  define DPFPRINTF(format, x...)	fprintf(stderr, format , ##x)
+# else
+#  define DPFPRINTF(format, x...)	((void)0)
+# endif /* PFDEBUG */
+#endif /* _KERNEL */
+
+
+SLIST_HEAD(pf_osfp_list, pf_os_fingerprint) pf_osfp_list;
+pool_t pf_osfp_entry_pl;
+pool_t pf_osfp_pl;
+
+struct pf_os_fingerprint	*pf_osfp_find(struct pf_osfp_list *,
+				    struct pf_os_fingerprint *, u_int8_t);
+struct pf_os_fingerprint	*pf_osfp_find_exact(struct pf_osfp_list *,
+				    struct pf_os_fingerprint *);
+void				 pf_osfp_insert(struct pf_osfp_list *,
+				    struct pf_os_fingerprint *);
+
+
+#ifdef _KERNEL
+/*
+ * Passively fingerprint the OS of the host (IPv4 TCP SYN packets only)
+ * Returns the list of possible OSes.
+ */
+struct pf_osfp_enlist *
+pf_osfp_fingerprint(struct pf_pdesc *pd, struct mbuf *m, int off,
+    const struct tcphdr *tcp)
+{
+	struct ip *ip;
+	struct ip6_hdr *ip6;
+	char hdr[60];
+
+	if ((pd->af != PF_INET && pd->af != PF_INET6) ||
+	    pd->proto != IPPROTO_TCP || (tcp->th_off << 2) < sizeof(*tcp))
+		return (NULL);
+
+	if (pd->af == PF_INET) {
+		ip = mtod(m, struct ip *);
+		ip6 = (struct ip6_hdr *)NULL;
+	} else {
+		ip = (struct ip *)NULL;
+		ip6 = mtod(m, struct ip6_hdr *);
+	}
+	if (!pf_pull_hdr(m, off, hdr, tcp->th_off << 2, NULL, NULL,
+	    pd->af)) return (NULL);
+
+	return (pf_osfp_fingerprint_hdr(ip, ip6, (struct tcphdr *)hdr));
+}
+#endif /* _KERNEL */
+
+struct pf_osfp_enlist *
+pf_osfp_fingerprint_hdr(const struct ip *ip, const struct ip6_hdr *ip6, const struct tcphdr *tcp)
+{
+	struct pf_os_fingerprint fp, *fpresult;
+	int cnt, optlen = 0;
+	const u_int8_t *optp;
+#ifdef _KERNEL
+	char srcname[128];
+#else
+	char srcname[NI_MAXHOST];
+#endif
+#ifdef __rtems__
+#ifdef INET6
+char ip6buf[INET6_ADDRSTRLEN];
+#endif //INET6
+#endif //__rtems__
+
+	if ((tcp->th_flags & (TH_SYN|TH_ACK)) != TH_SYN)
+		return (NULL);
+	if (ip) {
+		if ((ip->ip_off & htons(IP_OFFMASK)) != 0)
+			return (NULL);
+	}
+
+	memset(&fp, 0, sizeof(fp));
+
+	if (ip) {
+#ifndef _KERNEL
+		struct sockaddr_in sin;
+#endif
+
+		fp.fp_psize = ntohs(ip->ip_len);
+		fp.fp_ttl = ip->ip_ttl;
+		if (ip->ip_off & htons(IP_DF))
+			fp.fp_flags |= PF_OSFP_DF;
+#ifdef _KERNEL
+		strlcpy(srcname, inet_ntoa(ip->ip_src), sizeof(srcname));
+#else
+		memset(&sin, 0, sizeof(sin));
+		sin.sin_family = AF_INET;
+		sin.sin_len = sizeof(struct sockaddr_in);
+		sin.sin_addr = ip->ip_src;
+		(void)getnameinfo((struct sockaddr *)&sin,
+		    sizeof(struct sockaddr_in), srcname, sizeof(srcname),
+		    NULL, 0, NI_NUMERICHOST);
+#endif
+	}
+#ifdef INET6
+	else if (ip6) {
+#ifndef _KERNEL
+		struct sockaddr_in6 sin6;
+#endif
+
+		/* jumbo payload? */
+		fp.fp_psize = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen);
+		fp.fp_ttl = ip6->ip6_hlim;
+		fp.fp_flags |= PF_OSFP_DF;
+		fp.fp_flags |= PF_OSFP_INET6;
+#ifdef _KERNEL
+#ifndef __rtems__
+    strlcpy(srcname, ip6_sprintf((struct in6_addr *)&ip6->ip6_src),
+		    sizeof(srcname));
+#else
+    strlcpy(srcname, ip6_sprintf(&ip6buf, (struct in6_addr *)&ip6->ip6_src),
+        sizeof(srcname));
+#endif
+#else
+		memset(&sin6, 0, sizeof(sin6));
+		sin6.sin6_family = AF_INET6;
+		sin6.sin6_len = sizeof(struct sockaddr_in6);
+		sin6.sin6_addr = ip6->ip6_src;
+		(void)getnameinfo((struct sockaddr *)&sin6,
+		    sizeof(struct sockaddr_in6), srcname, sizeof(srcname),
+		    NULL, 0, NI_NUMERICHOST);
+#endif
+	}
+#endif
+	else
+		return (NULL);
+	fp.fp_wsize = ntohs(tcp->th_win);
+
+
+	cnt = (tcp->th_off << 2) - sizeof(*tcp);
+	optp = (const u_int8_t *)((const char *)tcp + sizeof(*tcp));
+	for (; cnt > 0; cnt -= optlen, optp += optlen) {
+		if (*optp == TCPOPT_EOL)
+			break;
+
+		fp.fp_optcnt++;
+		if (*optp == TCPOPT_NOP) {
+			fp.fp_tcpopts = (fp.fp_tcpopts << PF_OSFP_TCPOPT_BITS) |
+			    PF_OSFP_TCPOPT_NOP;
+			optlen = 1;
+		} else {
+			if (cnt < 2)
+				return (NULL);
+			optlen = optp[1];
+			if (optlen > cnt || optlen < 2)
+				return (NULL);
+			switch (*optp) {
+			case TCPOPT_MAXSEG:
+				if (optlen >= TCPOLEN_MAXSEG)
+					memcpy(&fp.fp_mss, &optp[2],
+					    sizeof(fp.fp_mss));
+				fp.fp_tcpopts = (fp.fp_tcpopts <<
+				    PF_OSFP_TCPOPT_BITS) | PF_OSFP_TCPOPT_MSS;
+				NTOHS(fp.fp_mss);
+				break;
+			case TCPOPT_WINDOW:
+				if (optlen >= TCPOLEN_WINDOW)
+					memcpy(&fp.fp_wscale, &optp[2],
+					    sizeof(fp.fp_wscale));
+				NTOHS(fp.fp_wscale);
+				fp.fp_tcpopts = (fp.fp_tcpopts <<
+				    PF_OSFP_TCPOPT_BITS) |
+				    PF_OSFP_TCPOPT_WSCALE;
+				break;
+			case TCPOPT_SACK_PERMITTED:
+				fp.fp_tcpopts = (fp.fp_tcpopts <<
+				    PF_OSFP_TCPOPT_BITS) | PF_OSFP_TCPOPT_SACK;
+				break;
+			case TCPOPT_TIMESTAMP:
+				if (optlen >= TCPOLEN_TIMESTAMP) {
+					u_int32_t ts;
+					memcpy(&ts, &optp[2], sizeof(ts));
+					if (ts == 0)
+						fp.fp_flags |= PF_OSFP_TS0;
+
+				}
+				fp.fp_tcpopts = (fp.fp_tcpopts <<
+				    PF_OSFP_TCPOPT_BITS) | PF_OSFP_TCPOPT_TS;
+				break;
+			default:
+				return (NULL);
+			}
+		}
+		optlen = MAX(optlen, 1);	/* paranoia */
+	}
+
+	DPFPRINTF("fingerprinted %s:%d  %d:%d:%d:%d:%llx (%d) "
+	    "(TS=%s,M=%s%d,W=%s%d)\n",
+	    srcname, ntohs(tcp->th_sport),
+	    fp.fp_wsize, fp.fp_ttl, (fp.fp_flags & PF_OSFP_DF) != 0,
+	    fp.fp_psize, (long long int)fp.fp_tcpopts, fp.fp_optcnt,
+	    (fp.fp_flags & PF_OSFP_TS0) ? "0" : "",
+	    (fp.fp_flags & PF_OSFP_MSS_MOD) ? "%" :
+	    (fp.fp_flags & PF_OSFP_MSS_DC) ? "*" : "",
+	    fp.fp_mss,
+	    (fp.fp_flags & PF_OSFP_WSCALE_MOD) ? "%" :
+	    (fp.fp_flags & PF_OSFP_WSCALE_DC) ? "*" : "",
+	    fp.fp_wscale);
+
+	if ((fpresult = pf_osfp_find(&pf_osfp_list, &fp,
+	    PF_OSFP_MAXTTL_OFFSET)))
+		return (&fpresult->fp_oses);
+	return (NULL);
+}
+
+/* Match a fingerprint ID against a list of OSes */
+int
+pf_osfp_match(struct pf_osfp_enlist *list, pf_osfp_t os)
+{
+	struct pf_osfp_entry *entry;
+	int os_class, os_version, os_subtype;
+	int en_class, en_version, en_subtype;
+
+	if (os == PF_OSFP_ANY)
+		return (1);
+	if (list == NULL) {
+		DPFPRINTF("osfp no match against %x\n", os);
+		return (os == PF_OSFP_UNKNOWN);
+	}
+	PF_OSFP_UNPACK(os, os_class, os_version, os_subtype);
+	SLIST_FOREACH(entry, list, fp_entry) {
+		PF_OSFP_UNPACK(entry->fp_os, en_class, en_version, en_subtype);
+		if ((os_class == PF_OSFP_ANY || en_class == os_class) &&
+		    (os_version == PF_OSFP_ANY || en_version == os_version) &&
+		    (os_subtype == PF_OSFP_ANY || en_subtype == os_subtype)) {
+			DPFPRINTF("osfp matched %s %s %s  %x==%x\n",
+			    entry->fp_class_nm, entry->fp_version_nm,
+			    entry->fp_subtype_nm, os, entry->fp_os);
+			return (1);
+		}
+	}
+	DPFPRINTF("fingerprint 0x%x didn't match\n", os);
+	return (0);
+}
+
+/* Initialize the OS fingerprint system */
+#ifdef __FreeBSD__
+int
+#else
+void
+#endif
+pf_osfp_initialize(void)
+{
+#if defined(__FreeBSD__) && defined(_KERNEL)
+	int error = ENOMEM;
+
+	do {
+		pf_osfp_entry_pl = pf_osfp_pl = NULL;
+		UMA_CREATE(pf_osfp_entry_pl, struct pf_osfp_entry, "pfospfen");
+		UMA_CREATE(pf_osfp_pl, struct pf_os_fingerprint, "pfosfp");
+		error = 0;
+	} while(0);
+#else
+	pool_init(&pf_osfp_entry_pl, sizeof(struct pf_osfp_entry), 0, 0, 0,
+	    "pfosfpen", &pool_allocator_nointr);
+	pool_init(&pf_osfp_pl, sizeof(struct pf_os_fingerprint), 0, 0, 0,
+	    "pfosfp", &pool_allocator_nointr);
+#endif
+	SLIST_INIT(&pf_osfp_list);
+#ifdef __FreeBSD__
+#ifdef _KERNEL
+	return (error);
+#else
+	return (0);
+#endif
+#endif
+}
+
+#if defined(__FreeBSD__) && (_KERNEL)
+void
+pf_osfp_cleanup(void)
+{
+	UMA_DESTROY(pf_osfp_entry_pl);
+	UMA_DESTROY(pf_osfp_pl);
+}
+#endif
+
+/* Flush the fingerprint list */
+void
+pf_osfp_flush(void)
+{
+	struct pf_os_fingerprint *fp;
+	struct pf_osfp_entry *entry;
+
+	while ((fp = SLIST_FIRST(&pf_osfp_list))) {
+		SLIST_REMOVE_HEAD(&pf_osfp_list, fp_next);
+		while ((entry = SLIST_FIRST(&fp->fp_oses))) {
+			SLIST_REMOVE_HEAD(&fp->fp_oses, fp_entry);
+			pool_put(&pf_osfp_entry_pl, entry);
+		}
+		pool_put(&pf_osfp_pl, fp);
+	}
+}
+
+
+/* Add a fingerprint */
+int
+pf_osfp_add(struct pf_osfp_ioctl *fpioc)
+{
+	struct pf_os_fingerprint *fp, fpadd;
+	struct pf_osfp_entry *entry;
+
+	memset(&fpadd, 0, sizeof(fpadd));
+	fpadd.fp_tcpopts = fpioc->fp_tcpopts;
+	fpadd.fp_wsize = fpioc->fp_wsize;
+	fpadd.fp_psize = fpioc->fp_psize;
+	fpadd.fp_mss = fpioc->fp_mss;
+	fpadd.fp_flags = fpioc->fp_flags;
+	fpadd.fp_optcnt = fpioc->fp_optcnt;
+	fpadd.fp_wscale = fpioc->fp_wscale;
+	fpadd.fp_ttl = fpioc->fp_ttl;
+
+	DPFPRINTF("adding osfp %s %s %s = %s%d:%d:%d:%s%d:0x%llx %d "
+	    "(TS=%s,M=%s%d,W=%s%d) %x\n",
+	    fpioc->fp_os.fp_class_nm, fpioc->fp_os.fp_version_nm,
+	    fpioc->fp_os.fp_subtype_nm,
+	    (fpadd.fp_flags & PF_OSFP_WSIZE_MOD) ? "%" :
+	    (fpadd.fp_flags & PF_OSFP_WSIZE_MSS) ? "S" :
+	    (fpadd.fp_flags & PF_OSFP_WSIZE_MTU) ? "T" :
+	    (fpadd.fp_flags & PF_OSFP_WSIZE_DC) ? "*" : "",
+	    fpadd.fp_wsize,
+	    fpadd.fp_ttl,
+	    (fpadd.fp_flags & PF_OSFP_DF) ? 1 : 0,
+	    (fpadd.fp_flags & PF_OSFP_PSIZE_MOD) ? "%" :
+	    (fpadd.fp_flags & PF_OSFP_PSIZE_DC) ? "*" : "",
+	    fpadd.fp_psize,
+	    (long long int)fpadd.fp_tcpopts, fpadd.fp_optcnt,
+	    (fpadd.fp_flags & PF_OSFP_TS0) ? "0" : "",
+	    (fpadd.fp_flags & PF_OSFP_MSS_MOD) ? "%" :
+	    (fpadd.fp_flags & PF_OSFP_MSS_DC) ? "*" : "",
+	    fpadd.fp_mss,
+	    (fpadd.fp_flags & PF_OSFP_WSCALE_MOD) ? "%" :
+	    (fpadd.fp_flags & PF_OSFP_WSCALE_DC) ? "*" : "",
+	    fpadd.fp_wscale,
+	    fpioc->fp_os.fp_os);
+
+
+	if ((fp = pf_osfp_find_exact(&pf_osfp_list, &fpadd))) {
+		 SLIST_FOREACH(entry, &fp->fp_oses, fp_entry) {
+			if (PF_OSFP_ENTRY_EQ(entry, &fpioc->fp_os))
+				return (EEXIST);
+		}
+		if ((entry = pool_get(&pf_osfp_entry_pl, PR_NOWAIT)) == NULL)
+			return (ENOMEM);
+	} else {
+		if ((fp = pool_get(&pf_osfp_pl, PR_NOWAIT)) == NULL)
+			return (ENOMEM);
+		memset(fp, 0, sizeof(*fp));
+		fp->fp_tcpopts = fpioc->fp_tcpopts;
+		fp->fp_wsize = fpioc->fp_wsize;
+		fp->fp_psize = fpioc->fp_psize;
+		fp->fp_mss = fpioc->fp_mss;
+		fp->fp_flags = fpioc->fp_flags;
+		fp->fp_optcnt = fpioc->fp_optcnt;
+		fp->fp_wscale = fpioc->fp_wscale;
+		fp->fp_ttl = fpioc->fp_ttl;
+		SLIST_INIT(&fp->fp_oses);
+		if ((entry = pool_get(&pf_osfp_entry_pl, PR_NOWAIT)) == NULL) {
+			pool_put(&pf_osfp_pl, fp);
+			return (ENOMEM);
+		}
+		pf_osfp_insert(&pf_osfp_list, fp);
+	}
+	memcpy(entry, &fpioc->fp_os, sizeof(*entry));
+
+	/* Make sure the strings are NUL terminated */
+	entry->fp_class_nm[sizeof(entry->fp_class_nm)-1] = '\0';
+	entry->fp_version_nm[sizeof(entry->fp_version_nm)-1] = '\0';
+	entry->fp_subtype_nm[sizeof(entry->fp_subtype_nm)-1] = '\0';
+
+	SLIST_INSERT_HEAD(&fp->fp_oses, entry, fp_entry);
+
+#ifdef PFDEBUG
+	if ((fp = pf_osfp_validate()))
+		printf("Invalid fingerprint list\n");
+#endif /* PFDEBUG */
+	return (0);
+}
+
+
+/* Find a fingerprint in the list */
+struct pf_os_fingerprint *
+pf_osfp_find(struct pf_osfp_list *list, struct pf_os_fingerprint *find,
+    u_int8_t ttldiff)
+{
+	struct pf_os_fingerprint *f;
+
+#define MATCH_INT(_MOD, _DC, _field)					\
+	if ((f->fp_flags & _DC) == 0) {					\
+		if ((f->fp_flags & _MOD) == 0) {			\
+			if (f->_field != find->_field)			\
+				continue;				\
+		} else {						\
+			if (f->_field == 0 || find->_field % f->_field)	\
+				continue;				\
+		}							\
+	}
+
+	SLIST_FOREACH(f, list, fp_next) {
+		if (f->fp_tcpopts != find->fp_tcpopts ||
+		    f->fp_optcnt != find->fp_optcnt ||
+		    f->fp_ttl < find->fp_ttl ||
+		    f->fp_ttl - find->fp_ttl > ttldiff ||
+		    (f->fp_flags & (PF_OSFP_DF|PF_OSFP_TS0)) !=
+		    (find->fp_flags & (PF_OSFP_DF|PF_OSFP_TS0)))
+			continue;
+
+		MATCH_INT(PF_OSFP_PSIZE_MOD, PF_OSFP_PSIZE_DC, fp_psize)
+		MATCH_INT(PF_OSFP_MSS_MOD, PF_OSFP_MSS_DC, fp_mss)
+		MATCH_INT(PF_OSFP_WSCALE_MOD, PF_OSFP_WSCALE_DC, fp_wscale)
+		if ((f->fp_flags & PF_OSFP_WSIZE_DC) == 0) {
+			if (f->fp_flags & PF_OSFP_WSIZE_MSS) {
+				if (find->fp_mss == 0)
+					continue;
+
+/* Some "smart" NAT devices and DSL routers will tweak the MSS size and
+ * will set it to whatever is suitable for the link type.
+ */
+#define SMART_MSS	1460
+				if ((find->fp_wsize % find->fp_mss ||
+				    find->fp_wsize / find->fp_mss !=
+				    f->fp_wsize) &&
+				    (find->fp_wsize % SMART_MSS ||
+				    find->fp_wsize / SMART_MSS !=
+				    f->fp_wsize))
+					continue;
+			} else if (f->fp_flags & PF_OSFP_WSIZE_MTU) {
+				if (find->fp_mss == 0)
+					continue;
+
+#define MTUOFF	(sizeof(struct ip) + sizeof(struct tcphdr))
+#define SMART_MTU	(SMART_MSS + MTUOFF)
+				if ((find->fp_wsize % (find->fp_mss + MTUOFF) ||
+				    find->fp_wsize / (find->fp_mss + MTUOFF) !=
+				    f->fp_wsize) &&
+				    (find->fp_wsize % SMART_MTU ||
+				    find->fp_wsize / SMART_MTU !=
+				    f->fp_wsize))
+					continue;
+			} else if (f->fp_flags & PF_OSFP_WSIZE_MOD) {
+				if (f->fp_wsize == 0 || find->fp_wsize %
+				    f->fp_wsize)
+					continue;
+			} else {
+				if (f->fp_wsize != find->fp_wsize)
+					continue;
+			}
+		}
+		return (f);
+	}
+
+	return (NULL);
+}
+
+/* Find an exact fingerprint in the list */
+struct pf_os_fingerprint *
+pf_osfp_find_exact(struct pf_osfp_list *list, struct pf_os_fingerprint *find)
+{
+	struct pf_os_fingerprint *f;
+
+	SLIST_FOREACH(f, list, fp_next) {
+		if (f->fp_tcpopts == find->fp_tcpopts &&
+		    f->fp_wsize == find->fp_wsize &&
+		    f->fp_psize == find->fp_psize &&
+		    f->fp_mss == find->fp_mss &&
+		    f->fp_flags == find->fp_flags &&
+		    f->fp_optcnt == find->fp_optcnt &&
+		    f->fp_wscale == find->fp_wscale &&
+		    f->fp_ttl == find->fp_ttl)
+			return (f);
+	}
+
+	return (NULL);
+}
+
+/* Insert a fingerprint into the list */
+void
+pf_osfp_insert(struct pf_osfp_list *list, struct pf_os_fingerprint *ins)
+{
+	struct pf_os_fingerprint *f, *prev = NULL;
+
+	/* XXX need to go semi tree based.  can key on tcp options */
+
+	SLIST_FOREACH(f, list, fp_next)
+		prev = f;
+	if (prev)
+		SLIST_INSERT_AFTER(prev, ins, fp_next);
+	else
+		SLIST_INSERT_HEAD(list, ins, fp_next);
+}
+
+/* Fill a fingerprint by its number (from an ioctl) */
+int
+pf_osfp_get(struct pf_osfp_ioctl *fpioc)
+{
+	struct pf_os_fingerprint *fp;
+	struct pf_osfp_entry *entry;
+	int num = fpioc->fp_getnum;
+	int i = 0;
+
+
+	memset(fpioc, 0, sizeof(*fpioc));
+	SLIST_FOREACH(fp, &pf_osfp_list, fp_next) {
+		SLIST_FOREACH(entry, &fp->fp_oses, fp_entry) {
+			if (i++ == num) {
+				fpioc->fp_mss = fp->fp_mss;
+				fpioc->fp_wsize = fp->fp_wsize;
+				fpioc->fp_flags = fp->fp_flags;
+				fpioc->fp_psize = fp->fp_psize;
+				fpioc->fp_ttl = fp->fp_ttl;
+				fpioc->fp_wscale = fp->fp_wscale;
+				fpioc->fp_getnum = num;
+				memcpy(&fpioc->fp_os, entry,
+				    sizeof(fpioc->fp_os));
+				return (0);
+			}
+		}
+	}
+
+	return (EBUSY);
+}
+
+
+/* Validate that each signature is reachable */
+struct pf_os_fingerprint *
+pf_osfp_validate(void)
+{
+	struct pf_os_fingerprint *f, *f2, find;
+
+	SLIST_FOREACH(f, &pf_osfp_list, fp_next) {
+		memcpy(&find, f, sizeof(find));
+
+		/* We do a few MSS/th_win percolations to make things unique */
+		if (find.fp_mss == 0)
+			find.fp_mss = 128;
+		if (f->fp_flags & PF_OSFP_WSIZE_MSS)
+			find.fp_wsize *= find.fp_mss, 1;
+		else if (f->fp_flags & PF_OSFP_WSIZE_MTU)
+			find.fp_wsize *= (find.fp_mss + 40);
+		else if (f->fp_flags & PF_OSFP_WSIZE_MOD)
+			find.fp_wsize *= 2;
+		if (f != (f2 = pf_osfp_find(&pf_osfp_list, &find, 0))) {
+			if (f2)
+				printf("Found \"%s %s %s\" instead of "
+				    "\"%s %s %s\"\n",
+				    SLIST_FIRST(&f2->fp_oses)->fp_class_nm,
+				    SLIST_FIRST(&f2->fp_oses)->fp_version_nm,
+				    SLIST_FIRST(&f2->fp_oses)->fp_subtype_nm,
+				    SLIST_FIRST(&f->fp_oses)->fp_class_nm,
+				    SLIST_FIRST(&f->fp_oses)->fp_version_nm,
+				    SLIST_FIRST(&f->fp_oses)->fp_subtype_nm);
+			else
+				printf("Couldn't find \"%s %s %s\"\n",
+				    SLIST_FIRST(&f->fp_oses)->fp_class_nm,
+				    SLIST_FIRST(&f->fp_oses)->fp_version_nm,
+				    SLIST_FIRST(&f->fp_oses)->fp_subtype_nm);
+			return (f);
+		}
+	}
+	return (NULL);
+}
diff --git a/freebsd/sys/contrib/pf/net/pf_ruleset.c b/freebsd/sys/contrib/pf/net/pf_ruleset.c
new file mode 100644
index 00000000..8705d4ca
--- /dev/null
+++ b/freebsd/sys/contrib/pf/net/pf_ruleset.c
@@ -0,0 +1,433 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*	$OpenBSD: pf_ruleset.c,v 1.1 2006/10/27 13:56:51 mcbride Exp $ */
+
+/*
+ * Copyright (c) 2001 Daniel Hartmeier
+ * Copyright (c) 2002,2003 Henning Brauer
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    - Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    - Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Effort sponsored in part by the Defense Advanced Research Projects
+ * Agency (DARPA) and Air Force Research Laboratory, Air Force
+ * Materiel Command, USAF, under agreement number F30602-01-2-0537.
+ *
+ */
+
+#ifdef __FreeBSD__
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#endif
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/socket.h>
+#ifdef _KERNEL
+# include <freebsd/sys/systm.h>
+#endif /* _KERNEL */
+#include <freebsd/sys/mbuf.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/tcp.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/pfvar.h>
+
+#ifdef INET6
+#include <freebsd/netinet/ip6.h>
+#endif /* INET6 */
+
+
+#ifdef _KERNEL
+# define DPFPRINTF(format, x...)		\
+	if (pf_status.debug >= PF_DEBUG_NOISY)	\
+		printf(format , ##x)
+#ifdef __FreeBSD__
+#define rs_malloc(x)		malloc(x, M_TEMP, M_NOWAIT)
+#else
+#define rs_malloc(x)		malloc(x, M_TEMP, M_WAITOK)
+#endif
+#define rs_free(x)		free(x, M_TEMP)
+
+#else
+/* Userland equivalents so we can lend code to pfctl et al. */
+
+# include <freebsd/arpa/inet.h>
+# include <freebsd/errno.h>
+# include <freebsd/stdio.h>
+# include <freebsd/stdlib.h>
+# include <freebsd/string.h>
+# define rs_malloc(x)		 malloc(x)
+# define rs_free(x)		 free(x)
+
+# ifdef PFDEBUG
+#  include <freebsd/sys/stdarg.h>
+#  define DPFPRINTF(format, x...)	fprintf(stderr, format , ##x)
+# else
+#  define DPFPRINTF(format, x...)	((void)0)
+# endif /* PFDEBUG */
+#endif /* _KERNEL */
+
+
+struct pf_anchor_global	 pf_anchors;
+struct pf_anchor	 pf_main_anchor;
+
+#ifndef __FreeBSD__
+/* XXX: hum? */
+int			 pf_get_ruleset_number(u_int8_t);
+void			 pf_init_ruleset(struct pf_ruleset *);
+int			 pf_anchor_setup(struct pf_rule *,
+			    const struct pf_ruleset *, const char *);
+int			 pf_anchor_copyout(const struct pf_ruleset *,
+			    const struct pf_rule *, struct pfioc_rule *);
+void			 pf_anchor_remove(struct pf_rule *);
+#endif
+
+static __inline int pf_anchor_compare(struct pf_anchor *, struct pf_anchor *);
+
+RB_GENERATE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare);
+RB_GENERATE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare);
+
+static __inline int
+pf_anchor_compare(struct pf_anchor *a, struct pf_anchor *b)
+{
+	int c = strcmp(a->path, b->path);
+
+	return (c ? (c < 0 ? -1 : 1) : 0);
+}
+
+int
+pf_get_ruleset_number(u_int8_t action)
+{
+	switch (action) {
+	case PF_SCRUB:
+	case PF_NOSCRUB:
+		return (PF_RULESET_SCRUB);
+		break;
+	case PF_PASS:
+	case PF_DROP:
+		return (PF_RULESET_FILTER);
+		break;
+	case PF_NAT:
+	case PF_NONAT:
+		return (PF_RULESET_NAT);
+		break;
+	case PF_BINAT:
+	case PF_NOBINAT:
+		return (PF_RULESET_BINAT);
+		break;
+	case PF_RDR:
+	case PF_NORDR:
+		return (PF_RULESET_RDR);
+		break;
+	default:
+		return (PF_RULESET_MAX);
+		break;
+	}
+}
+
+void
+pf_init_ruleset(struct pf_ruleset *ruleset)
+{
+	int	i;
+
+	memset(ruleset, 0, sizeof(struct pf_ruleset));
+	for (i = 0; i < PF_RULESET_MAX; i++) {
+		TAILQ_INIT(&ruleset->rules[i].queues[0]);
+		TAILQ_INIT(&ruleset->rules[i].queues[1]);
+		ruleset->rules[i].active.ptr = &ruleset->rules[i].queues[0];
+		ruleset->rules[i].inactive.ptr = &ruleset->rules[i].queues[1];
+	}
+}
+
+struct pf_anchor *
+pf_find_anchor(const char *path)
+{
+	struct pf_anchor	*key, *found;
+
+	key = (struct pf_anchor *)rs_malloc(sizeof(*key));
+	memset(key, 0, sizeof(*key));
+	strlcpy(key->path, path, sizeof(key->path));
+	found = RB_FIND(pf_anchor_global, &pf_anchors, key);
+	rs_free(key);
+	return (found);
+}
+
+struct pf_ruleset *
+pf_find_ruleset(const char *path)
+{
+	struct pf_anchor	*anchor;
+
+	while (*path == '/')
+		path++;
+	if (!*path)
+		return (&pf_main_ruleset);
+	anchor = pf_find_anchor(path);
+	if (anchor == NULL)
+		return (NULL);
+	else
+		return (&anchor->ruleset);
+}
+
+struct pf_ruleset *
+pf_find_or_create_ruleset(const char *path)
+{
+	char			*p, *q, *r;
+	struct pf_ruleset	*ruleset;
+#ifdef __FreeBSD__
+	struct pf_anchor	*anchor = NULL, *dup, *parent = NULL;
+#else
+	struct pf_anchor	*anchor, *dup, *parent = NULL;
+#endif
+
+	if (path[0] == 0)
+		return (&pf_main_ruleset);
+	while (*path == '/')
+		path++;
+	ruleset = pf_find_ruleset(path);
+	if (ruleset != NULL)
+		return (ruleset);
+	p = (char *)rs_malloc(MAXPATHLEN);
+	bzero(p, MAXPATHLEN);
+	strlcpy(p, path, MAXPATHLEN);
+	while (parent == NULL && (q = strrchr(p, '/')) != NULL) {
+		*q = 0;
+		if ((ruleset = pf_find_ruleset(p)) != NULL) {
+			parent = ruleset->anchor;
+			break;
+		}
+	}
+	if (q == NULL)
+		q = p;
+	else
+		q++;
+	strlcpy(p, path, MAXPATHLEN);
+	if (!*q) {
+		rs_free(p);
+		return (NULL);
+	}
+	while ((r = strchr(q, '/')) != NULL || *q) {
+		if (r != NULL)
+			*r = 0;
+		if (!*q || strlen(q) >= PF_ANCHOR_NAME_SIZE ||
+		    (parent != NULL && strlen(parent->path) >=
+		    MAXPATHLEN - PF_ANCHOR_NAME_SIZE - 1)) {
+			rs_free(p);
+			return (NULL);
+		}
+		anchor = (struct pf_anchor *)rs_malloc(sizeof(*anchor));
+		if (anchor == NULL) {
+			rs_free(p);
+			return (NULL);
+		}
+		memset(anchor, 0, sizeof(*anchor));
+		RB_INIT(&anchor->children);
+		strlcpy(anchor->name, q, sizeof(anchor->name));
+		if (parent != NULL) {
+			strlcpy(anchor->path, parent->path,
+			    sizeof(anchor->path));
+			strlcat(anchor->path, "/", sizeof(anchor->path));
+		}
+		strlcat(anchor->path, anchor->name, sizeof(anchor->path));
+		if ((dup = RB_INSERT(pf_anchor_global, &pf_anchors, anchor)) !=
+		    NULL) {
+			printf("pf_find_or_create_ruleset: RB_INSERT1 "
+			    "'%s' '%s' collides with '%s' '%s'\n",
+			    anchor->path, anchor->name, dup->path, dup->name);
+			rs_free(anchor);
+			rs_free(p);
+			return (NULL);
+		}
+		if (parent != NULL) {
+			anchor->parent = parent;
+			if ((dup = RB_INSERT(pf_anchor_node, &parent->children,
+			    anchor)) != NULL) {
+				printf("pf_find_or_create_ruleset: "
+				    "RB_INSERT2 '%s' '%s' collides with "
+				    "'%s' '%s'\n", anchor->path, anchor->name,
+				    dup->path, dup->name);
+				RB_REMOVE(pf_anchor_global, &pf_anchors,
+				    anchor);
+				rs_free(anchor);
+				rs_free(p);
+				return (NULL);
+			}
+		}
+		pf_init_ruleset(&anchor->ruleset);
+		anchor->ruleset.anchor = anchor;
+		parent = anchor;
+		if (r != NULL)
+			q = r + 1;
+		else
+			*q = 0;
+	}
+	rs_free(p);
+	return (&anchor->ruleset);
+}
+
+void
+pf_remove_if_empty_ruleset(struct pf_ruleset *ruleset)
+{
+	struct pf_anchor	*parent;
+	int			 i;
+
+	while (ruleset != NULL) {
+		if (ruleset == &pf_main_ruleset || ruleset->anchor == NULL ||
+		    !RB_EMPTY(&ruleset->anchor->children) ||
+		    ruleset->anchor->refcnt > 0 || ruleset->tables > 0 ||
+		    ruleset->topen)
+			return;
+		for (i = 0; i < PF_RULESET_MAX; ++i)
+			if (!TAILQ_EMPTY(ruleset->rules[i].active.ptr) ||
+			    !TAILQ_EMPTY(ruleset->rules[i].inactive.ptr) ||
+			    ruleset->rules[i].inactive.open)
+				return;
+		RB_REMOVE(pf_anchor_global, &pf_anchors, ruleset->anchor);
+		if ((parent = ruleset->anchor->parent) != NULL)
+			RB_REMOVE(pf_anchor_node, &parent->children,
+			    ruleset->anchor);
+		rs_free(ruleset->anchor);
+		if (parent == NULL)
+			return;
+		ruleset = &parent->ruleset;
+	}
+}
+
+int
+pf_anchor_setup(struct pf_rule *r, const struct pf_ruleset *s,
+    const char *name)
+{
+	char			*p, *path;
+	struct pf_ruleset	*ruleset;
+
+	r->anchor = NULL;
+	r->anchor_relative = 0;
+	r->anchor_wildcard = 0;
+	if (!name[0])
+		return (0);
+	path = (char *)rs_malloc(MAXPATHLEN);
+	bzero(path, MAXPATHLEN);
+	if (name[0] == '/')
+		strlcpy(path, name + 1, MAXPATHLEN);
+	else {
+		/* relative path */
+		r->anchor_relative = 1;
+		if (s->anchor == NULL || !s->anchor->path[0])
+			path[0] = 0;
+		else
+			strlcpy(path, s->anchor->path, MAXPATHLEN);
+		while (name[0] == '.' && name[1] == '.' && name[2] == '/') {
+			if (!path[0]) {
+				printf("pf_anchor_setup: .. beyond root\n");
+				rs_free(path);
+				return (1);
+			}
+			if ((p = strrchr(path, '/')) != NULL)
+				*p = 0;
+			else
+				path[0] = 0;
+			r->anchor_relative++;
+			name += 3;
+		}
+		if (path[0])
+			strlcat(path, "/", MAXPATHLEN);
+		strlcat(path, name, MAXPATHLEN);
+	}
+	if ((p = strrchr(path, '/')) != NULL && !strcmp(p, "/*")) {
+		r->anchor_wildcard = 1;
+		*p = 0;
+	}
+	ruleset = pf_find_or_create_ruleset(path);
+	rs_free(path);
+	if (ruleset == NULL || ruleset->anchor == NULL) {
+		printf("pf_anchor_setup: ruleset\n");
+		return (1);
+	}
+	r->anchor = ruleset->anchor;
+	r->anchor->refcnt++;
+	return (0);
+}
+
+int
+pf_anchor_copyout(const struct pf_ruleset *rs, const struct pf_rule *r,
+    struct pfioc_rule *pr)
+{
+	pr->anchor_call[0] = 0;
+	if (r->anchor == NULL)
+		return (0);
+	if (!r->anchor_relative) {
+		strlcpy(pr->anchor_call, "/", sizeof(pr->anchor_call));
+		strlcat(pr->anchor_call, r->anchor->path,
+		    sizeof(pr->anchor_call));
+	} else {
+		char	*a, *p;
+		int	 i;
+
+		a = (char *)rs_malloc(MAXPATHLEN);
+		bzero(a, MAXPATHLEN);
+		if (rs->anchor == NULL)
+			a[0] = 0;
+		else
+			strlcpy(a, rs->anchor->path, MAXPATHLEN);
+		for (i = 1; i < r->anchor_relative; ++i) {
+			if ((p = strrchr(a, '/')) == NULL)
+				p = a;
+			*p = 0;
+			strlcat(pr->anchor_call, "../",
+			    sizeof(pr->anchor_call));
+		}
+		if (strncmp(a, r->anchor->path, strlen(a))) {
+			printf("pf_anchor_copyout: '%s' '%s'\n", a,
+			    r->anchor->path);
+			rs_free(a);
+			return (1);
+		}
+		if (strlen(r->anchor->path) > strlen(a))
+			strlcat(pr->anchor_call, r->anchor->path + (a[0] ?
+			    strlen(a) + 1 : 0), sizeof(pr->anchor_call));
+		rs_free(a);
+	}
+	if (r->anchor_wildcard)
+		strlcat(pr->anchor_call, pr->anchor_call[0] ? "/*" : "*",
+		    sizeof(pr->anchor_call));
+	return (0);
+}
+
+void
+pf_anchor_remove(struct pf_rule *r)
+{
+	if (r->anchor == NULL)
+		return;
+	if (r->anchor->refcnt <= 0) {
+		printf("pf_anchor_remove: broken refcount\n");
+		r->anchor = NULL;
+		return;
+	}
+	if (!--r->anchor->refcnt)
+		pf_remove_if_empty_ruleset(&r->anchor->ruleset);
+	r->anchor = NULL;
+}
diff --git a/freebsd/sys/contrib/pf/net/pf_subr.c b/freebsd/sys/contrib/pf/net/pf_subr.c
new file mode 100644
index 00000000..14cf232e
--- /dev/null
+++ b/freebsd/sys/contrib/pf/net/pf_subr.c
@@ -0,0 +1,170 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/libkern.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/md5.h>
+#include <freebsd/sys/time.h>
+#include <freebsd/sys/random.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/time.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/bpf.h>
+#include <freebsd/net/route.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/tcp.h>
+#include <freebsd/netinet/tcp_seq.h>
+#include <freebsd/netinet/udp.h>
+#include <freebsd/netinet/ip_icmp.h>
+#include <freebsd/netinet/in_pcb.h>
+#include <freebsd/netinet/tcp_timer.h>
+#include <freebsd/netinet/tcp_var.h>
+#include <freebsd/netinet/if_ether.h>
+#include <freebsd/net/pfvar.h>
+
+/*
+ * Following is where TCP initial sequence number generation occurs.
+ *
+ * There are two places where we must use initial sequence numbers:
+ * 1.  In SYN-ACK packets.
+ * 2.  In SYN packets.
+ *
+ * All ISNs for SYN-ACK packets are generated by the syncache.  See
+ * tcp_syncache.c for details.
+ *
+ * The ISNs in SYN packets must be monotonic; TIME_WAIT recycling
+ * depends on this property.  In addition, these ISNs should be
+ * unguessable so as to prevent connection hijacking.  To satisfy
+ * the requirements of this situation, the algorithm outlined in
+ * RFC 1948 is used, with only small modifications.
+ *
+ * Implementation details:
+ *
+ * Time is based off the system timer, and is corrected so that it
+ * increases by one megabyte per second.  This allows for proper
+ * recycling on high speed LANs while still leaving over an hour
+ * before rollover.
+ *
+ * As reading the *exact* system time is too expensive to be done
+ * whenever setting up a TCP connection, we increment the time
+ * offset in two ways.  First, a small random positive increment
+ * is added to isn_offset for each connection that is set up.
+ * Second, the function tcp_isn_tick fires once per clock tick
+ * and increments isn_offset as necessary so that sequence numbers
+ * are incremented at approximately ISN_BYTES_PER_SECOND.  The
+ * random positive increments serve only to ensure that the same
+ * exact sequence number is never sent out twice (as could otherwise
+ * happen when a port is recycled in less than the system tick
+ * interval.)
+ *
+ * net.inet.tcp.isn_reseed_interval controls the number of seconds
+ * between seeding of isn_secret.  This is normally set to zero,
+ * as reseeding should not be necessary.
+ *
+ * Locking of the global variables isn_secret, isn_last_reseed, isn_offset,
+ * isn_offset_old, and isn_ctx is performed using the TCP pcbinfo lock.  In
+ * general, this means holding an exclusive (write) lock.
+ */
+
+#define ISN_BYTES_PER_SECOND 1048576
+#define ISN_STATIC_INCREMENT 4096
+#define ISN_RANDOM_INCREMENT (4096 - 1)
+
+static u_char pf_isn_secret[32];
+static int pf_isn_last_reseed;
+static u_int32_t pf_isn_offset;
+
+u_int32_t
+pf_new_isn(struct pf_state *s)
+{
+	MD5_CTX isn_ctx;
+	u_int32_t md5_buffer[4];
+	u_int32_t new_isn;
+	struct pf_state_host *src, *dst;
+
+	/* Seed if this is the first use, reseed if requested. */
+	if (pf_isn_last_reseed == 0) {
+		read_random(&pf_isn_secret, sizeof(pf_isn_secret));
+		pf_isn_last_reseed = ticks;
+	}
+
+	if (s->direction == PF_IN) {
+		src = &s->ext;
+		dst = &s->gwy;
+	} else {
+		src = &s->lan;
+		dst = &s->ext;
+	}
+
+	/* Compute the md5 hash and return the ISN. */
+	MD5Init(&isn_ctx);
+	MD5Update(&isn_ctx, (u_char *) &dst->port, sizeof(u_short));
+	MD5Update(&isn_ctx, (u_char *) &src->port, sizeof(u_short));
+#ifdef INET6
+	if (s->af == AF_INET6) {
+		MD5Update(&isn_ctx, (u_char *) &dst->addr,
+			  sizeof(struct in6_addr));
+		MD5Update(&isn_ctx, (u_char *) &src->addr,
+			  sizeof(struct in6_addr));
+	} else
+#endif
+	{
+		MD5Update(&isn_ctx, (u_char *) &dst->addr,
+			  sizeof(struct in_addr));
+		MD5Update(&isn_ctx, (u_char *) &src->addr,
+			  sizeof(struct in_addr));
+	}
+	MD5Update(&isn_ctx, (u_char *) &pf_isn_secret, sizeof(pf_isn_secret));
+	MD5Final((u_char *) &md5_buffer, &isn_ctx);
+	new_isn = (tcp_seq) md5_buffer[0];
+	pf_isn_offset += ISN_STATIC_INCREMENT +
+		(arc4random() & ISN_RANDOM_INCREMENT);
+	new_isn += pf_isn_offset;
+	return (new_isn);
+}
diff --git a/freebsd/sys/contrib/pf/net/pf_table.c b/freebsd/sys/contrib/pf/net/pf_table.c
new file mode 100644
index 00000000..b07fb7c0
--- /dev/null
+++ b/freebsd/sys/contrib/pf/net/pf_table.c
@@ -0,0 +1,2363 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*	$OpenBSD: pf_table.c,v 1.68 2006/05/02 10:08:45 dhartmei Exp $	*/
+
+/*
+ * Copyright (c) 2002 Cedric Berger
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    - Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    - Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifdef __FreeBSD__
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#endif
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/rwlock.h>
+#ifdef __FreeBSD__
+#include <freebsd/sys/malloc.h>
+#endif
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/route.h>
+#include <freebsd/netinet/in.h>
+#ifndef __FreeBSD__
+#include <freebsd/netinet/ip_ipsp.h>
+#endif
+
+#include <freebsd/net/pfvar.h>
+
+#define ACCEPT_FLAGS(oklist)			\
+	do {					\
+		if ((flags & ~(oklist)) &	\
+		    PFR_FLAG_ALLMASK)		\
+			return (EINVAL);	\
+	} while (0)
+
+#ifdef __FreeBSD__
+static inline int
+_copyin(const void *uaddr, void *kaddr, size_t len)
+{
+	int r;
+
+	PF_UNLOCK();
+	r = copyin(uaddr, kaddr, len);
+	PF_LOCK();
+
+	return (r);
+}
+
+static inline int
+_copyout(const void *uaddr, void *kaddr, size_t len)
+{
+	int r;
+
+	PF_UNLOCK();
+	r = copyout(uaddr, kaddr, len);
+	PF_LOCK();
+
+	return (r);
+}
+
+#define COPYIN(from, to, size)			\
+	((flags & PFR_FLAG_USERIOCTL) ?		\
+	_copyin((from), (to), (size)) :		\
+	(bcopy((from), (to), (size)), 0))
+
+#define COPYOUT(from, to, size)			\
+	((flags & PFR_FLAG_USERIOCTL) ?		\
+	_copyout((from), (to), (size)) :	\
+	(bcopy((from), (to), (size)), 0))
+
+#else
+
+#define COPYIN(from, to, size)			\
+	((flags & PFR_FLAG_USERIOCTL) ?		\
+	copyin((from), (to), (size)) :		\
+	(bcopy((from), (to), (size)), 0))
+
+#define COPYOUT(from, to, size)			\
+	((flags & PFR_FLAG_USERIOCTL) ?		\
+	copyout((from), (to), (size)) :		\
+	(bcopy((from), (to), (size)), 0))
+
+#endif
+
+#define	FILLIN_SIN(sin, addr)			\
+	do {					\
+		(sin).sin_len = sizeof(sin);	\
+		(sin).sin_family = AF_INET;	\
+		(sin).sin_addr = (addr);	\
+	} while (0)
+
+#define	FILLIN_SIN6(sin6, addr)			\
+	do {					\
+		(sin6).sin6_len = sizeof(sin6);	\
+		(sin6).sin6_family = AF_INET6;	\
+		(sin6).sin6_addr = (addr);	\
+	} while (0)
+
+#define SWAP(type, a1, a2)			\
+	do {					\
+		type tmp = a1;			\
+		a1 = a2;			\
+		a2 = tmp;			\
+	} while (0)
+
+#define SUNION2PF(su, af) (((af)==AF_INET) ?	\
+    (struct pf_addr *)&(su)->sin.sin_addr :	\
+    (struct pf_addr *)&(su)->sin6.sin6_addr)
+
+#define	AF_BITS(af)		(((af)==AF_INET)?32:128)
+#define	ADDR_NETWORK(ad)	((ad)->pfra_net < AF_BITS((ad)->pfra_af))
+#define	KENTRY_NETWORK(ke)	((ke)->pfrke_net < AF_BITS((ke)->pfrke_af))
+#define KENTRY_RNF_ROOT(ke) \
+		((((struct radix_node *)(ke))->rn_flags & RNF_ROOT) != 0)
+
+#define NO_ADDRESSES		(-1)
+#define ENQUEUE_UNMARKED_ONLY	(1)
+#define INVERT_NEG_FLAG		(1)
+
+struct pfr_walktree {
+	enum pfrw_op {
+		PFRW_MARK,
+		PFRW_SWEEP,
+		PFRW_ENQUEUE,
+		PFRW_GET_ADDRS,
+		PFRW_GET_ASTATS,
+		PFRW_POOL_GET,
+		PFRW_DYNADDR_UPDATE
+	}	 pfrw_op;
+	union {
+		struct pfr_addr		*pfrw1_addr;
+		struct pfr_astats	*pfrw1_astats;
+		struct pfr_kentryworkq	*pfrw1_workq;
+		struct pfr_kentry	*pfrw1_kentry;
+		struct pfi_dynaddr	*pfrw1_dyn;
+	}	 pfrw_1;
+	int	 pfrw_free;
+	int	 pfrw_flags;
+};
+#define pfrw_addr	pfrw_1.pfrw1_addr
+#define pfrw_astats	pfrw_1.pfrw1_astats
+#define pfrw_workq	pfrw_1.pfrw1_workq
+#define pfrw_kentry	pfrw_1.pfrw1_kentry
+#define pfrw_dyn	pfrw_1.pfrw1_dyn
+#define pfrw_cnt	pfrw_free
+
+#define senderr(e)	do { rv = (e); goto _bad; } while (0)
+
+#ifdef __FreeBSD__
+uma_zone_t		 pfr_ktable_pl;
+uma_zone_t		 pfr_kentry_pl;
+uma_zone_t		 pfr_kentry_pl2;
+#else
+struct pool		 pfr_ktable_pl;
+struct pool		 pfr_kentry_pl;
+struct pool		 pfr_kentry_pl2;
+#endif
+struct sockaddr_in	 pfr_sin;
+struct sockaddr_in6	 pfr_sin6;
+union sockaddr_union	 pfr_mask;
+struct pf_addr		 pfr_ffaddr;
+
+void			 pfr_copyout_addr(struct pfr_addr *,
+			    struct pfr_kentry *ke);
+int			 pfr_validate_addr(struct pfr_addr *);
+void			 pfr_enqueue_addrs(struct pfr_ktable *,
+			    struct pfr_kentryworkq *, int *, int);
+void			 pfr_mark_addrs(struct pfr_ktable *);
+struct pfr_kentry	*pfr_lookup_addr(struct pfr_ktable *,
+			    struct pfr_addr *, int);
+struct pfr_kentry	*pfr_create_kentry(struct pfr_addr *, int);
+void			 pfr_destroy_kentries(struct pfr_kentryworkq *);
+void			 pfr_destroy_kentry(struct pfr_kentry *);
+void			 pfr_insert_kentries(struct pfr_ktable *,
+			    struct pfr_kentryworkq *, long);
+void			 pfr_remove_kentries(struct pfr_ktable *,
+			    struct pfr_kentryworkq *);
+void			 pfr_clstats_kentries(struct pfr_kentryworkq *, long,
+			    int);
+void			 pfr_reset_feedback(struct pfr_addr *, int, int);
+void			 pfr_prepare_network(union sockaddr_union *, int, int);
+int			 pfr_route_kentry(struct pfr_ktable *,
+			    struct pfr_kentry *);
+int			 pfr_unroute_kentry(struct pfr_ktable *,
+			    struct pfr_kentry *);
+int			 pfr_walktree(struct radix_node *, void *);
+int			 pfr_validate_table(struct pfr_table *, int, int);
+int			 pfr_fix_anchor(char *);
+void			 pfr_commit_ktable(struct pfr_ktable *, long);
+void			 pfr_insert_ktables(struct pfr_ktableworkq *);
+void			 pfr_insert_ktable(struct pfr_ktable *);
+void			 pfr_setflags_ktables(struct pfr_ktableworkq *);
+void			 pfr_setflags_ktable(struct pfr_ktable *, int);
+void			 pfr_clstats_ktables(struct pfr_ktableworkq *, long,
+			    int);
+void			 pfr_clstats_ktable(struct pfr_ktable *, long, int);
+struct pfr_ktable	*pfr_create_ktable(struct pfr_table *, long, int);
+void			 pfr_destroy_ktables(struct pfr_ktableworkq *, int);
+void			 pfr_destroy_ktable(struct pfr_ktable *, int);
+int			 pfr_ktable_compare(struct pfr_ktable *,
+			    struct pfr_ktable *);
+struct pfr_ktable	*pfr_lookup_table(struct pfr_table *);
+void			 pfr_clean_node_mask(struct pfr_ktable *,
+			    struct pfr_kentryworkq *);
+int			 pfr_table_count(struct pfr_table *, int);
+int			 pfr_skip_table(struct pfr_table *,
+			    struct pfr_ktable *, int);
+struct pfr_kentry	*pfr_kentry_byidx(struct pfr_ktable *, int, int);
+
+RB_PROTOTYPE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare);
+RB_GENERATE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare);
+
+struct pfr_ktablehead	 pfr_ktables;
+struct pfr_table	 pfr_nulltable;
+int			 pfr_ktable_cnt;
+
+void
+pfr_initialize(void)
+{
+#ifndef __FreeBSD__
+	pool_init(&pfr_ktable_pl, sizeof(struct pfr_ktable), 0, 0, 0,
+	    "pfrktable", &pool_allocator_oldnointr);
+	pool_init(&pfr_kentry_pl, sizeof(struct pfr_kentry), 0, 0, 0,
+	    "pfrkentry", &pool_allocator_oldnointr);
+	pool_init(&pfr_kentry_pl2, sizeof(struct pfr_kentry), 0, 0, 0,
+	    "pfrkentry2", NULL);
+#endif
+
+	pfr_sin.sin_len = sizeof(pfr_sin);
+	pfr_sin.sin_family = AF_INET;
+	pfr_sin6.sin6_len = sizeof(pfr_sin6);
+	pfr_sin6.sin6_family = AF_INET6;
+
+	memset(&pfr_ffaddr, 0xff, sizeof(pfr_ffaddr));
+}
+
+int
+pfr_clr_addrs(struct pfr_table *tbl, int *ndel, int flags)
+{
+	struct pfr_ktable	*kt;
+	struct pfr_kentryworkq	 workq;
+	int			 s;
+
+	ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY);
+	if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL))
+		return (EINVAL);
+	kt = pfr_lookup_table(tbl);
+	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (ESRCH);
+	if (kt->pfrkt_flags & PFR_TFLAG_CONST)
+		return (EPERM);
+	pfr_enqueue_addrs(kt, &workq, ndel, 0);
+
+	if (!(flags & PFR_FLAG_DUMMY)) {
+		s = 0;
+		if (flags & PFR_FLAG_ATOMIC)
+			s = splsoftnet();
+		pfr_remove_kentries(kt, &workq);
+		if (flags & PFR_FLAG_ATOMIC)
+			splx(s);
+		if (kt->pfrkt_cnt) {
+			printf("pfr_clr_addrs: corruption detected (%d).\n",
+			    kt->pfrkt_cnt);
+			kt->pfrkt_cnt = 0;
+		}
+	}
+	return (0);
+}
+
+int
+pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
+    int *nadd, int flags)
+{
+	struct pfr_ktable	*kt, *tmpkt;
+	struct pfr_kentryworkq	 workq;
+	struct pfr_kentry	*p, *q;
+	struct pfr_addr		 ad;
+	int			 i, rv, s = 0, xadd = 0;
+	long			 tzero = time_second;
+
+	ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK);
+	if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL))
+		return (EINVAL);
+	kt = pfr_lookup_table(tbl);
+	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (ESRCH);
+	if (kt->pfrkt_flags & PFR_TFLAG_CONST)
+		return (EPERM);
+	tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0);
+	if (tmpkt == NULL)
+		return (ENOMEM);
+	SLIST_INIT(&workq);
+	for (i = 0; i < size; i++) {
+		if (COPYIN(addr+i, &ad, sizeof(ad)))
+			senderr(EFAULT);
+		if (pfr_validate_addr(&ad))
+			senderr(EINVAL);
+		p = pfr_lookup_addr(kt, &ad, 1);
+		q = pfr_lookup_addr(tmpkt, &ad, 1);
+		if (flags & PFR_FLAG_FEEDBACK) {
+			if (q != NULL)
+				ad.pfra_fback = PFR_FB_DUPLICATE;
+			else if (p == NULL)
+				ad.pfra_fback = PFR_FB_ADDED;
+			else if (p->pfrke_not != ad.pfra_not)
+				ad.pfra_fback = PFR_FB_CONFLICT;
+			else
+				ad.pfra_fback = PFR_FB_NONE;
+		}
+		if (p == NULL && q == NULL) {
+			p = pfr_create_kentry(&ad, 0);
+			if (p == NULL)
+				senderr(ENOMEM);
+			if (pfr_route_kentry(tmpkt, p)) {
+				pfr_destroy_kentry(p);
+				ad.pfra_fback = PFR_FB_NONE;
+			} else {
+				SLIST_INSERT_HEAD(&workq, p, pfrke_workq);
+				xadd++;
+			}
+		}
+		if (flags & PFR_FLAG_FEEDBACK) {
+			if (COPYOUT(&ad, addr+i, sizeof(ad)))
+				senderr(EFAULT);
+		}
+	}
+	pfr_clean_node_mask(tmpkt, &workq);
+	if (!(flags & PFR_FLAG_DUMMY)) {
+		if (flags & PFR_FLAG_ATOMIC)
+			s = splsoftnet();
+		pfr_insert_kentries(kt, &workq, tzero);
+		if (flags & PFR_FLAG_ATOMIC)
+			splx(s);
+	} else
+		pfr_destroy_kentries(&workq);
+	if (nadd != NULL)
+		*nadd = xadd;
+	pfr_destroy_ktable(tmpkt, 0);
+	return (0);
+_bad:
+	pfr_clean_node_mask(tmpkt, &workq);
+	pfr_destroy_kentries(&workq);
+	if (flags & PFR_FLAG_FEEDBACK)
+		pfr_reset_feedback(addr, size, flags);
+	pfr_destroy_ktable(tmpkt, 0);
+	return (rv);
+}
+
+int
+pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
+    int *ndel, int flags)
+{
+	struct pfr_ktable	*kt;
+	struct pfr_kentryworkq	 workq;
+	struct pfr_kentry	*p;
+	struct pfr_addr		 ad;
+	int			 i, rv, s = 0, xdel = 0, log = 1;
+
+	ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK);
+	if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL))
+		return (EINVAL);
+	kt = pfr_lookup_table(tbl);
+	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (ESRCH);
+	if (kt->pfrkt_flags & PFR_TFLAG_CONST)
+		return (EPERM);
+	/*
+	 * there are two algorithms to choose from here.
+	 * with:
+	 *   n: number of addresses to delete
+	 *   N: number of addresses in the table
+	 *
+	 * one is O(N) and is better for large 'n'
+	 * one is O(n*LOG(N)) and is better for small 'n'
+	 * 
+	 * following code try to decide which one is best.
+	 */
+	for (i = kt->pfrkt_cnt; i > 0; i >>= 1)
+		log++;
+	if (size > kt->pfrkt_cnt/log) {
+		/* full table scan */
+		pfr_mark_addrs(kt);
+	} else {
+		/* iterate over addresses to delete */
+		for (i = 0; i < size; i++) {
+			if (COPYIN(addr+i, &ad, sizeof(ad)))
+				return (EFAULT);
+			if (pfr_validate_addr(&ad))
+				return (EINVAL);
+			p = pfr_lookup_addr(kt, &ad, 1);
+			if (p != NULL)
+				p->pfrke_mark = 0;
+		}
+	}
+	SLIST_INIT(&workq);
+	for (i = 0; i < size; i++) {
+		if (COPYIN(addr+i, &ad, sizeof(ad)))
+			senderr(EFAULT);
+		if (pfr_validate_addr(&ad))
+			senderr(EINVAL);
+		p = pfr_lookup_addr(kt, &ad, 1);
+		if (flags & PFR_FLAG_FEEDBACK) {
+			if (p == NULL)
+				ad.pfra_fback = PFR_FB_NONE;
+			else if (p->pfrke_not != ad.pfra_not)
+				ad.pfra_fback = PFR_FB_CONFLICT;
+			else if (p->pfrke_mark)
+				ad.pfra_fback = PFR_FB_DUPLICATE;
+			else
+				ad.pfra_fback = PFR_FB_DELETED;
+		}
+		if (p != NULL && p->pfrke_not == ad.pfra_not &&
+		    !p->pfrke_mark) {
+			p->pfrke_mark = 1;
+			SLIST_INSERT_HEAD(&workq, p, pfrke_workq);
+			xdel++;
+		}
+		if (flags & PFR_FLAG_FEEDBACK)
+			if (COPYOUT(&ad, addr+i, sizeof(ad)))
+				senderr(EFAULT);
+	}
+	if (!(flags & PFR_FLAG_DUMMY)) {
+		if (flags & PFR_FLAG_ATOMIC)
+			s = splsoftnet();
+		pfr_remove_kentries(kt, &workq);
+		if (flags & PFR_FLAG_ATOMIC)
+			splx(s);
+	}
+	if (ndel != NULL)
+		*ndel = xdel;
+	return (0);
+_bad:
+	if (flags & PFR_FLAG_FEEDBACK)
+		pfr_reset_feedback(addr, size, flags);
+	return (rv);
+}
+
+int
+pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
+    int *size2, int *nadd, int *ndel, int *nchange, int flags,
+    u_int32_t ignore_pfrt_flags)
+{
+	struct pfr_ktable	*kt, *tmpkt;
+	struct pfr_kentryworkq	 addq, delq, changeq;
+	struct pfr_kentry	*p, *q;
+	struct pfr_addr		 ad;
+	int			 i, rv, s = 0, xadd = 0, xdel = 0, xchange = 0;
+	long			 tzero = time_second;
+
+	ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK);
+	if (pfr_validate_table(tbl, ignore_pfrt_flags, flags &
+	    PFR_FLAG_USERIOCTL))
+		return (EINVAL);
+	kt = pfr_lookup_table(tbl);
+	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (ESRCH);
+	if (kt->pfrkt_flags & PFR_TFLAG_CONST)
+		return (EPERM);
+	tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0);
+	if (tmpkt == NULL)
+		return (ENOMEM);
+	pfr_mark_addrs(kt);
+	SLIST_INIT(&addq);
+	SLIST_INIT(&delq);
+	SLIST_INIT(&changeq);
+	for (i = 0; i < size; i++) {
+		if (COPYIN(addr+i, &ad, sizeof(ad)))
+			senderr(EFAULT);
+		if (pfr_validate_addr(&ad))
+			senderr(EINVAL);
+		ad.pfra_fback = PFR_FB_NONE;
+		p = pfr_lookup_addr(kt, &ad, 1);
+		if (p != NULL) {
+			if (p->pfrke_mark) {
+				ad.pfra_fback = PFR_FB_DUPLICATE;
+				goto _skip;
+			}
+			p->pfrke_mark = 1;
+			if (p->pfrke_not != ad.pfra_not) {
+				SLIST_INSERT_HEAD(&changeq, p, pfrke_workq);
+				ad.pfra_fback = PFR_FB_CHANGED;
+				xchange++;
+			}
+		} else {
+			q = pfr_lookup_addr(tmpkt, &ad, 1);
+			if (q != NULL) {
+				ad.pfra_fback = PFR_FB_DUPLICATE;
+				goto _skip;
+			}
+			p = pfr_create_kentry(&ad, 0);
+			if (p == NULL)
+				senderr(ENOMEM);
+			if (pfr_route_kentry(tmpkt, p)) {
+				pfr_destroy_kentry(p);
+				ad.pfra_fback = PFR_FB_NONE;
+			} else {
+				SLIST_INSERT_HEAD(&addq, p, pfrke_workq);
+				ad.pfra_fback = PFR_FB_ADDED;
+				xadd++;
+			}
+		}
+_skip:
+		if (flags & PFR_FLAG_FEEDBACK)
+			if (COPYOUT(&ad, addr+i, sizeof(ad)))
+				senderr(EFAULT);
+	}
+	pfr_enqueue_addrs(kt, &delq, &xdel, ENQUEUE_UNMARKED_ONLY);
+	if ((flags & PFR_FLAG_FEEDBACK) && *size2) {
+		if (*size2 < size+xdel) {
+			*size2 = size+xdel;
+			senderr(0);
+		}
+		i = 0;
+		SLIST_FOREACH(p, &delq, pfrke_workq) {
+			pfr_copyout_addr(&ad, p);
+			ad.pfra_fback = PFR_FB_DELETED;
+			if (COPYOUT(&ad, addr+size+i, sizeof(ad)))
+				senderr(EFAULT);
+			i++;
+		}
+	}
+	pfr_clean_node_mask(tmpkt, &addq);
+	if (!(flags & PFR_FLAG_DUMMY)) {
+		if (flags & PFR_FLAG_ATOMIC)
+			s = splsoftnet();
+		pfr_insert_kentries(kt, &addq, tzero);
+		pfr_remove_kentries(kt, &delq);
+		pfr_clstats_kentries(&changeq, tzero, INVERT_NEG_FLAG);
+		if (flags & PFR_FLAG_ATOMIC)
+			splx(s);
+	} else
+		pfr_destroy_kentries(&addq);
+	if (nadd != NULL)
+		*nadd = xadd;
+	if (ndel != NULL)
+		*ndel = xdel;
+	if (nchange != NULL)
+		*nchange = xchange;
+	if ((flags & PFR_FLAG_FEEDBACK) && size2)
+		*size2 = size+xdel;
+	pfr_destroy_ktable(tmpkt, 0);
+	return (0);
+_bad:
+	pfr_clean_node_mask(tmpkt, &addq);
+	pfr_destroy_kentries(&addq);
+	if (flags & PFR_FLAG_FEEDBACK)
+		pfr_reset_feedback(addr, size, flags);
+	pfr_destroy_ktable(tmpkt, 0);
+	return (rv);
+}
+
+int
+pfr_tst_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
+	int *nmatch, int flags)
+{
+	struct pfr_ktable	*kt;
+	struct pfr_kentry	*p;
+	struct pfr_addr		 ad;
+	int			 i, xmatch = 0;
+
+	ACCEPT_FLAGS(PFR_FLAG_REPLACE);
+	if (pfr_validate_table(tbl, 0, 0))
+		return (EINVAL);
+	kt = pfr_lookup_table(tbl);
+	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (ESRCH);
+
+	for (i = 0; i < size; i++) {
+		if (COPYIN(addr+i, &ad, sizeof(ad)))
+			return (EFAULT);
+		if (pfr_validate_addr(&ad))
+			return (EINVAL);
+		if (ADDR_NETWORK(&ad))
+			return (EINVAL);
+		p = pfr_lookup_addr(kt, &ad, 0);
+		if (flags & PFR_FLAG_REPLACE)
+			pfr_copyout_addr(&ad, p);
+		ad.pfra_fback = (p == NULL) ? PFR_FB_NONE :
+		    (p->pfrke_not ? PFR_FB_NOTMATCH : PFR_FB_MATCH);
+		if (p != NULL && !p->pfrke_not)
+			xmatch++;
+		if (COPYOUT(&ad, addr+i, sizeof(ad)))
+			return (EFAULT);
+	}
+	if (nmatch != NULL)
+		*nmatch = xmatch;
+	return (0);
+}
+
+int
+pfr_get_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int *size,
+	int flags)
+{
+	struct pfr_ktable	*kt;
+	struct pfr_walktree	 w;
+	int			 rv;
+
+	ACCEPT_FLAGS(0);
+	if (pfr_validate_table(tbl, 0, 0))
+		return (EINVAL);
+	kt = pfr_lookup_table(tbl);
+	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (ESRCH);
+	if (kt->pfrkt_cnt > *size) {
+		*size = kt->pfrkt_cnt;
+		return (0);
+	}
+
+	bzero(&w, sizeof(w));
+	w.pfrw_op = PFRW_GET_ADDRS;
+	w.pfrw_addr = addr;
+	w.pfrw_free = kt->pfrkt_cnt;
+	w.pfrw_flags = flags;
+#ifdef __FreeBSD__
+	rv = kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w);
+#else
+	rv = rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w);
+#endif
+	if (!rv)
+#ifdef __FreeBSD__
+		rv = kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, 
+		    &w);
+#else
+		rv = rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w);
+#endif
+	if (rv)
+		return (rv);
+
+	if (w.pfrw_free) {
+		printf("pfr_get_addrs: corruption detected (%d).\n",
+		    w.pfrw_free);
+		return (ENOTTY);
+	}
+	*size = kt->pfrkt_cnt;
+	return (0);
+}
+
+int
+pfr_get_astats(struct pfr_table *tbl, struct pfr_astats *addr, int *size,
+	int flags)
+{
+	struct pfr_ktable	*kt;
+	struct pfr_walktree	 w;
+	struct pfr_kentryworkq	 workq;
+	int			 rv, s = 0;
+	long			 tzero = time_second;
+
+	ACCEPT_FLAGS(PFR_FLAG_ATOMIC); /* XXX PFR_FLAG_CLSTATS disabled */
+	if (pfr_validate_table(tbl, 0, 0))
+		return (EINVAL);
+	kt = pfr_lookup_table(tbl);
+	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (ESRCH);
+	if (kt->pfrkt_cnt > *size) {
+		*size = kt->pfrkt_cnt;
+		return (0);
+	}
+
+	bzero(&w, sizeof(w));
+	w.pfrw_op = PFRW_GET_ASTATS;
+	w.pfrw_astats = addr;
+	w.pfrw_free = kt->pfrkt_cnt;
+	w.pfrw_flags = flags;
+	if (flags & PFR_FLAG_ATOMIC)
+		s = splsoftnet();
+#ifdef __FreeBSD__
+	rv = kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w);
+#else
+	rv = rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w);
+#endif
+	if (!rv)
+#ifdef __FreeBSD__
+		rv = kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, 
+		    &w);
+#else
+		rv = rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w);
+#endif
+	if (!rv && (flags & PFR_FLAG_CLSTATS)) {
+		pfr_enqueue_addrs(kt, &workq, NULL, 0);
+		pfr_clstats_kentries(&workq, tzero, 0);
+	}
+	if (flags & PFR_FLAG_ATOMIC)
+		splx(s);
+	if (rv)
+		return (rv);
+
+	if (w.pfrw_free) {
+		printf("pfr_get_astats: corruption detected (%d).\n",
+		    w.pfrw_free);
+		return (ENOTTY);
+	}
+	*size = kt->pfrkt_cnt;
+	return (0);
+}
+
+int
+pfr_clr_astats(struct pfr_table *tbl, struct pfr_addr *addr, int size,
+    int *nzero, int flags)
+{
+	struct pfr_ktable	*kt;
+	struct pfr_kentryworkq	 workq;
+	struct pfr_kentry	*p;
+	struct pfr_addr		 ad;
+	int			 i, rv, s = 0, xzero = 0;
+
+	ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK);
+	if (pfr_validate_table(tbl, 0, 0))
+		return (EINVAL);
+	kt = pfr_lookup_table(tbl);
+	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (ESRCH);
+	SLIST_INIT(&workq);
+	for (i = 0; i < size; i++) {
+		if (COPYIN(addr+i, &ad, sizeof(ad)))
+			senderr(EFAULT);
+		if (pfr_validate_addr(&ad))
+			senderr(EINVAL);
+		p = pfr_lookup_addr(kt, &ad, 1);
+		if (flags & PFR_FLAG_FEEDBACK) {
+			ad.pfra_fback = (p != NULL) ?
+			    PFR_FB_CLEARED : PFR_FB_NONE;
+			if (COPYOUT(&ad, addr+i, sizeof(ad)))
+				senderr(EFAULT);
+		}
+		if (p != NULL) {
+			SLIST_INSERT_HEAD(&workq, p, pfrke_workq);
+			xzero++;
+		}
+	}
+
+	if (!(flags & PFR_FLAG_DUMMY)) {
+		if (flags & PFR_FLAG_ATOMIC)
+			s = splsoftnet();
+		pfr_clstats_kentries(&workq, 0, 0);
+		if (flags & PFR_FLAG_ATOMIC)
+			splx(s);
+	}
+	if (nzero != NULL)
+		*nzero = xzero;
+	return (0);
+_bad:
+	if (flags & PFR_FLAG_FEEDBACK)
+		pfr_reset_feedback(addr, size, flags);
+	return (rv);
+}
+
+int
+pfr_validate_addr(struct pfr_addr *ad)
+{
+	int i;
+
+	switch (ad->pfra_af) {
+#ifdef INET
+	case AF_INET:
+		if (ad->pfra_net > 32)
+			return (-1);
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		if (ad->pfra_net > 128)
+			return (-1);
+		break;
+#endif /* INET6 */
+	default:
+		return (-1);
+	}
+	if (ad->pfra_net < 128 &&
+		(((caddr_t)ad)[ad->pfra_net/8] & (0xFF >> (ad->pfra_net%8))))
+			return (-1);
+	for (i = (ad->pfra_net+7)/8; i < sizeof(ad->pfra_u); i++)
+		if (((caddr_t)ad)[i])
+			return (-1);
+	if (ad->pfra_not && ad->pfra_not != 1)
+		return (-1);
+	if (ad->pfra_fback)
+		return (-1);
+	return (0);
+}
+
+void
+pfr_enqueue_addrs(struct pfr_ktable *kt, struct pfr_kentryworkq *workq,
+	int *naddr, int sweep)
+{
+	struct pfr_walktree	w;
+
+	SLIST_INIT(workq);
+	bzero(&w, sizeof(w));
+	w.pfrw_op = sweep ? PFRW_SWEEP : PFRW_ENQUEUE;
+	w.pfrw_workq = workq;
+	if (kt->pfrkt_ip4 != NULL)
+#ifdef __FreeBSD__
+		if (kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, 
+		    &w))
+#else
+		if (rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w))
+#endif
+			printf("pfr_enqueue_addrs: IPv4 walktree failed.\n");
+	if (kt->pfrkt_ip6 != NULL)
+#ifdef __FreeBSD__
+		if (kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, 
+		    &w))
+#else
+		if (rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w))
+#endif
+			printf("pfr_enqueue_addrs: IPv6 walktree failed.\n");
+	if (naddr != NULL)
+		*naddr = w.pfrw_cnt;
+}
+
+void
+pfr_mark_addrs(struct pfr_ktable *kt)
+{
+	struct pfr_walktree	w;
+
+	bzero(&w, sizeof(w));
+	w.pfrw_op = PFRW_MARK;
+#ifdef __FreeBSD__
+	if (kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w))
+#else
+	if (rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w))
+#endif
+		printf("pfr_mark_addrs: IPv4 walktree failed.\n");
+#ifdef __FreeBSD__
+	if (kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w))
+#else
+	if (rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w))
+#endif
+		printf("pfr_mark_addrs: IPv6 walktree failed.\n");
+}
+
+
+struct pfr_kentry *
+pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact)
+{
+	union sockaddr_union	 sa, mask;
+	struct radix_node_head	*head = NULL;	/* make the compiler happy */
+	struct pfr_kentry	*ke;
+	int			 s;
+
+	bzero(&sa, sizeof(sa));
+	if (ad->pfra_af == AF_INET) {
+		FILLIN_SIN(sa.sin, ad->pfra_ip4addr);
+		head = kt->pfrkt_ip4;
+	} else if ( ad->pfra_af == AF_INET6 ) {
+		FILLIN_SIN6(sa.sin6, ad->pfra_ip6addr);
+		head = kt->pfrkt_ip6;
+	}
+	if (ADDR_NETWORK(ad)) {
+		pfr_prepare_network(&mask, ad->pfra_af, ad->pfra_net);
+		s = splsoftnet(); /* rn_lookup makes use of globals */
+#ifdef __FreeBSD__
+		PF_ASSERT(MA_OWNED);
+#endif
+		ke = (struct pfr_kentry *)rn_lookup(&sa, &mask, head);
+		splx(s);
+		if (ke && KENTRY_RNF_ROOT(ke))
+			ke = NULL;
+	} else {
+		ke = (struct pfr_kentry *)rn_match(&sa, head);
+		if (ke && KENTRY_RNF_ROOT(ke))
+			ke = NULL;
+		if (exact && ke && KENTRY_NETWORK(ke))
+			ke = NULL;
+	}
+	return (ke);
+}
+
+struct pfr_kentry *
+pfr_create_kentry(struct pfr_addr *ad, int intr)
+{
+	struct pfr_kentry	*ke;
+
+	if (intr)
+		ke = pool_get(&pfr_kentry_pl2, PR_NOWAIT);
+	else
+		ke = pool_get(&pfr_kentry_pl, PR_NOWAIT);
+	if (ke == NULL)
+		return (NULL);
+	bzero(ke, sizeof(*ke));
+
+	if (ad->pfra_af == AF_INET)
+		FILLIN_SIN(ke->pfrke_sa.sin, ad->pfra_ip4addr);
+	else if (ad->pfra_af == AF_INET6)
+		FILLIN_SIN6(ke->pfrke_sa.sin6, ad->pfra_ip6addr);
+	ke->pfrke_af = ad->pfra_af;
+	ke->pfrke_net = ad->pfra_net;
+	ke->pfrke_not = ad->pfra_not;
+	ke->pfrke_intrpool = intr;
+	return (ke);
+}
+
+void
+pfr_destroy_kentries(struct pfr_kentryworkq *workq)
+{
+	struct pfr_kentry	*p, *q;
+
+	for (p = SLIST_FIRST(workq); p != NULL; p = q) {
+		q = SLIST_NEXT(p, pfrke_workq);
+		pfr_destroy_kentry(p);
+	}
+}
+
+void
+pfr_destroy_kentry(struct pfr_kentry *ke)
+{
+	if (ke->pfrke_intrpool)
+		pool_put(&pfr_kentry_pl2, ke);
+	else
+		pool_put(&pfr_kentry_pl, ke);
+}
+
+void
+pfr_insert_kentries(struct pfr_ktable *kt,
+    struct pfr_kentryworkq *workq, long tzero)
+{
+	struct pfr_kentry	*p;
+	int			 rv, n = 0;
+
+	SLIST_FOREACH(p, workq, pfrke_workq) {
+		rv = pfr_route_kentry(kt, p);
+		if (rv) {
+			printf("pfr_insert_kentries: cannot route entry "
+			    "(code=%d).\n", rv);
+			break;
+		}
+		p->pfrke_tzero = tzero;
+		n++;
+	}
+	kt->pfrkt_cnt += n;
+}
+
+int
+pfr_insert_kentry(struct pfr_ktable *kt, struct pfr_addr *ad, long tzero)
+{
+	struct pfr_kentry	*p;
+	int			 rv;
+
+	p = pfr_lookup_addr(kt, ad, 1);
+	if (p != NULL)
+		return (0);
+	p = pfr_create_kentry(ad, 1);
+	if (p == NULL)
+		return (EINVAL);
+
+	rv = pfr_route_kentry(kt, p);
+	if (rv)
+		return (rv);
+
+	p->pfrke_tzero = tzero;
+	kt->pfrkt_cnt++;
+
+	return (0);
+}
+
+void
+pfr_remove_kentries(struct pfr_ktable *kt,
+    struct pfr_kentryworkq *workq)
+{
+	struct pfr_kentry	*p;
+	int			 n = 0;
+
+	SLIST_FOREACH(p, workq, pfrke_workq) {
+		pfr_unroute_kentry(kt, p);
+		n++;
+	}
+	kt->pfrkt_cnt -= n;
+	pfr_destroy_kentries(workq);
+}
+
+void
+pfr_clean_node_mask(struct pfr_ktable *kt,
+    struct pfr_kentryworkq *workq)
+{
+	struct pfr_kentry	*p;
+
+	SLIST_FOREACH(p, workq, pfrke_workq)
+		pfr_unroute_kentry(kt, p);
+}
+
+void
+pfr_clstats_kentries(struct pfr_kentryworkq *workq, long tzero, int negchange)
+{
+	struct pfr_kentry	*p;
+	int			 s;
+
+	SLIST_FOREACH(p, workq, pfrke_workq) {
+		s = splsoftnet();
+		if (negchange)
+			p->pfrke_not = !p->pfrke_not;
+		bzero(p->pfrke_packets, sizeof(p->pfrke_packets));
+		bzero(p->pfrke_bytes, sizeof(p->pfrke_bytes));
+		splx(s);
+		p->pfrke_tzero = tzero;
+	}
+}
+
+void
+pfr_reset_feedback(struct pfr_addr *addr, int size, int flags)
+{
+	struct pfr_addr	ad;
+	int		i;
+
+	for (i = 0; i < size; i++) {
+		if (COPYIN(addr+i, &ad, sizeof(ad)))
+			break;
+		ad.pfra_fback = PFR_FB_NONE;
+		if (COPYOUT(&ad, addr+i, sizeof(ad)))
+			break;
+	}
+}
+
+void
+pfr_prepare_network(union sockaddr_union *sa, int af, int net)
+{
+	int	i;
+
+	bzero(sa, sizeof(*sa));
+	if (af == AF_INET) {
+		sa->sin.sin_len = sizeof(sa->sin);
+		sa->sin.sin_family = AF_INET;
+		sa->sin.sin_addr.s_addr = net ? htonl(-1 << (32-net)) : 0;
+	} else if (af == AF_INET6) {
+		sa->sin6.sin6_len = sizeof(sa->sin6);
+		sa->sin6.sin6_family = AF_INET6;
+		for (i = 0; i < 4; i++) {
+			if (net <= 32) {
+				sa->sin6.sin6_addr.s6_addr32[i] =
+				    net ? htonl(-1 << (32-net)) : 0;
+				break;
+			}
+			sa->sin6.sin6_addr.s6_addr32[i] = 0xFFFFFFFF;
+			net -= 32;
+		}
+	}
+}
+
+int
+pfr_route_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke)
+{
+	union sockaddr_union	 mask;
+	struct radix_node	*rn;
+	struct radix_node_head	*head = NULL;	/* make the compiler happy */
+	int			 s;
+
+	bzero(ke->pfrke_node, sizeof(ke->pfrke_node));
+	if (ke->pfrke_af == AF_INET)
+		head = kt->pfrkt_ip4;
+	else if (ke->pfrke_af == AF_INET6)
+		head = kt->pfrkt_ip6;
+
+	s = splsoftnet();
+#ifdef __FreeBSD__
+	PF_ASSERT(MA_OWNED);
+#endif
+	if (KENTRY_NETWORK(ke)) {
+		pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net);
+		rn = rn_addroute(&ke->pfrke_sa, &mask, head, ke->pfrke_node);
+	} else
+		rn = rn_addroute(&ke->pfrke_sa, NULL, head, ke->pfrke_node);
+	splx(s);
+
+	return (rn == NULL ? -1 : 0);
+}
+
+int
+pfr_unroute_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke)
+{
+	union sockaddr_union	 mask;
+	struct radix_node	*rn;
+	struct radix_node_head	*head = NULL;	/* make the compiler happy */
+	int			 s;
+
+	if (ke->pfrke_af == AF_INET)
+		head = kt->pfrkt_ip4;
+	else if (ke->pfrke_af == AF_INET6)
+		head = kt->pfrkt_ip6;
+
+	s = splsoftnet();
+#ifdef __FreeBSD__
+	PF_ASSERT(MA_OWNED);
+#endif
+	if (KENTRY_NETWORK(ke)) {
+		pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net);
+#ifdef __FreeBSD__
+		rn = rn_delete(&ke->pfrke_sa, &mask, head);
+#else
+		rn = rn_delete(&ke->pfrke_sa, &mask, head, NULL);
+#endif
+	} else
+#ifdef __FreeBSD__
+		rn = rn_delete(&ke->pfrke_sa, NULL, head);
+#else
+		rn = rn_delete(&ke->pfrke_sa, NULL, head, NULL);
+#endif
+	splx(s);
+
+	if (rn == NULL) {
+		printf("pfr_unroute_kentry: delete failed.\n");
+		return (-1);
+	}
+	return (0);
+}
+
+void
+pfr_copyout_addr(struct pfr_addr *ad, struct pfr_kentry *ke)
+{
+	bzero(ad, sizeof(*ad));
+	if (ke == NULL)
+		return;
+	ad->pfra_af = ke->pfrke_af;
+	ad->pfra_net = ke->pfrke_net;
+	ad->pfra_not = ke->pfrke_not;
+	if (ad->pfra_af == AF_INET)
+		ad->pfra_ip4addr = ke->pfrke_sa.sin.sin_addr;
+	else if (ad->pfra_af == AF_INET6)
+		ad->pfra_ip6addr = ke->pfrke_sa.sin6.sin6_addr;
+}
+
+int
+pfr_walktree(struct radix_node *rn, void *arg)
+{
+	struct pfr_kentry	*ke = (struct pfr_kentry *)rn;
+	struct pfr_walktree	*w = arg;
+	int			 s, flags = w->pfrw_flags;
+
+	switch (w->pfrw_op) {
+	case PFRW_MARK:
+		ke->pfrke_mark = 0;
+		break;
+	case PFRW_SWEEP:
+		if (ke->pfrke_mark)
+			break;
+		/* FALLTHROUGH */
+	case PFRW_ENQUEUE:
+		SLIST_INSERT_HEAD(w->pfrw_workq, ke, pfrke_workq);
+		w->pfrw_cnt++;
+		break;
+	case PFRW_GET_ADDRS:
+		if (w->pfrw_free-- > 0) {
+			struct pfr_addr ad;
+
+			pfr_copyout_addr(&ad, ke);
+			if (COPYOUT(&ad, w->pfrw_addr, sizeof(ad)))
+				return (EFAULT);
+			w->pfrw_addr++;
+		}
+		break;
+	case PFRW_GET_ASTATS:
+		if (w->pfrw_free-- > 0) {
+			struct pfr_astats as;
+
+			pfr_copyout_addr(&as.pfras_a, ke);
+
+			s = splsoftnet();
+			bcopy(ke->pfrke_packets, as.pfras_packets,
+			    sizeof(as.pfras_packets));
+			bcopy(ke->pfrke_bytes, as.pfras_bytes,
+			    sizeof(as.pfras_bytes));
+			splx(s);
+			as.pfras_tzero = ke->pfrke_tzero;
+
+			if (COPYOUT(&as, w->pfrw_astats, sizeof(as)))
+				return (EFAULT);
+			w->pfrw_astats++;
+		}
+		break;
+	case PFRW_POOL_GET:
+		if (ke->pfrke_not)
+			break; /* negative entries are ignored */
+		if (!w->pfrw_cnt--) {
+			w->pfrw_kentry = ke;
+			return (1); /* finish search */
+		}
+		break;
+	case PFRW_DYNADDR_UPDATE:
+		if (ke->pfrke_af == AF_INET) {
+			if (w->pfrw_dyn->pfid_acnt4++ > 0)
+				break;
+			pfr_prepare_network(&pfr_mask, AF_INET, ke->pfrke_net);
+			w->pfrw_dyn->pfid_addr4 = *SUNION2PF(
+			    &ke->pfrke_sa, AF_INET);
+			w->pfrw_dyn->pfid_mask4 = *SUNION2PF(
+			    &pfr_mask, AF_INET);
+		} else if (ke->pfrke_af == AF_INET6){
+			if (w->pfrw_dyn->pfid_acnt6++ > 0)
+				break;
+			pfr_prepare_network(&pfr_mask, AF_INET6, ke->pfrke_net);
+			w->pfrw_dyn->pfid_addr6 = *SUNION2PF(
+			    &ke->pfrke_sa, AF_INET6);
+			w->pfrw_dyn->pfid_mask6 = *SUNION2PF(
+			    &pfr_mask, AF_INET6);
+		}
+		break;
+	}
+	return (0);
+}
+
+int
+pfr_clr_tables(struct pfr_table *filter, int *ndel, int flags)
+{
+	struct pfr_ktableworkq	 workq;
+	struct pfr_ktable	*p;
+	int			 s = 0, xdel = 0;
+
+	ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_ALLRSETS);
+	if (pfr_fix_anchor(filter->pfrt_anchor))
+		return (EINVAL);
+	if (pfr_table_count(filter, flags) < 0)
+		return (ENOENT);
+
+	SLIST_INIT(&workq);
+	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
+		if (pfr_skip_table(filter, p, flags))
+			continue;
+		if (!strcmp(p->pfrkt_anchor, PF_RESERVED_ANCHOR))
+			continue;
+		if (!(p->pfrkt_flags & PFR_TFLAG_ACTIVE))
+			continue;
+		p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_ACTIVE;
+		SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+		xdel++;
+	}
+	if (!(flags & PFR_FLAG_DUMMY)) {
+		if (flags & PFR_FLAG_ATOMIC)
+			s = splsoftnet();
+		pfr_setflags_ktables(&workq);
+		if (flags & PFR_FLAG_ATOMIC)
+			splx(s);
+	}
+	if (ndel != NULL)
+		*ndel = xdel;
+	return (0);
+}
+
+int
+pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags)
+{
+	struct pfr_ktableworkq	 addq, changeq;
+	struct pfr_ktable	*p, *q, *r, key;
+	int			 i, rv, s = 0, xadd = 0;
+	long			 tzero = time_second;
+
+	ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY);
+	SLIST_INIT(&addq);
+	SLIST_INIT(&changeq);
+	for (i = 0; i < size; i++) {
+		if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t)))
+			senderr(EFAULT);
+		if (pfr_validate_table(&key.pfrkt_t, PFR_TFLAG_USRMASK,
+		    flags & PFR_FLAG_USERIOCTL))
+			senderr(EINVAL);
+		key.pfrkt_flags |= PFR_TFLAG_ACTIVE;
+		p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
+		if (p == NULL) {
+			p = pfr_create_ktable(&key.pfrkt_t, tzero, 1);
+			if (p == NULL)
+				senderr(ENOMEM);
+			SLIST_FOREACH(q, &addq, pfrkt_workq) {
+				if (!pfr_ktable_compare(p, q))
+					goto _skip;
+			}
+			SLIST_INSERT_HEAD(&addq, p, pfrkt_workq);
+			xadd++;
+			if (!key.pfrkt_anchor[0])
+				goto _skip;
+
+			/* find or create root table */
+			bzero(key.pfrkt_anchor, sizeof(key.pfrkt_anchor));
+			r = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
+			if (r != NULL) {
+				p->pfrkt_root = r;
+				goto _skip;
+			}
+			SLIST_FOREACH(q, &addq, pfrkt_workq) {
+				if (!pfr_ktable_compare(&key, q)) {
+					p->pfrkt_root = q;
+					goto _skip;
+				}
+			}
+			key.pfrkt_flags = 0;
+			r = pfr_create_ktable(&key.pfrkt_t, 0, 1);
+			if (r == NULL)
+				senderr(ENOMEM);
+			SLIST_INSERT_HEAD(&addq, r, pfrkt_workq);
+			p->pfrkt_root = r;
+		} else if (!(p->pfrkt_flags & PFR_TFLAG_ACTIVE)) {
+			SLIST_FOREACH(q, &changeq, pfrkt_workq)
+				if (!pfr_ktable_compare(&key, q))
+					goto _skip;
+			p->pfrkt_nflags = (p->pfrkt_flags &
+			    ~PFR_TFLAG_USRMASK) | key.pfrkt_flags;
+			SLIST_INSERT_HEAD(&changeq, p, pfrkt_workq);
+			xadd++;
+		}
+_skip:
+	;
+	}
+	if (!(flags & PFR_FLAG_DUMMY)) {
+		if (flags & PFR_FLAG_ATOMIC)
+			s = splsoftnet();
+		pfr_insert_ktables(&addq);
+		pfr_setflags_ktables(&changeq);
+		if (flags & PFR_FLAG_ATOMIC)
+			splx(s);
+	} else
+		 pfr_destroy_ktables(&addq, 0);
+	if (nadd != NULL)
+		*nadd = xadd;
+	return (0);
+_bad:
+	pfr_destroy_ktables(&addq, 0);
+	return (rv);
+}
+
+int
+pfr_del_tables(struct pfr_table *tbl, int size, int *ndel, int flags)
+{
+	struct pfr_ktableworkq	 workq;
+	struct pfr_ktable	*p, *q, key;
+	int			 i, s = 0, xdel = 0;
+
+	ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY);
+	SLIST_INIT(&workq);
+	for (i = 0; i < size; i++) {
+		if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t)))
+			return (EFAULT);
+		if (pfr_validate_table(&key.pfrkt_t, 0,
+		    flags & PFR_FLAG_USERIOCTL))
+			return (EINVAL);
+		p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
+		if (p != NULL && (p->pfrkt_flags & PFR_TFLAG_ACTIVE)) {
+			SLIST_FOREACH(q, &workq, pfrkt_workq)
+				if (!pfr_ktable_compare(p, q))
+					goto _skip;
+			p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_ACTIVE;
+			SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+			xdel++;
+		}
+_skip:
+	;
+	}
+
+	if (!(flags & PFR_FLAG_DUMMY)) {
+		if (flags & PFR_FLAG_ATOMIC)
+			s = splsoftnet();
+		pfr_setflags_ktables(&workq);
+		if (flags & PFR_FLAG_ATOMIC)
+			splx(s);
+	}
+	if (ndel != NULL)
+		*ndel = xdel;
+	return (0);
+}
+
+int
+pfr_get_tables(struct pfr_table *filter, struct pfr_table *tbl, int *size,
+	int flags)
+{
+	struct pfr_ktable	*p;
+	int			 n, nn;
+
+	ACCEPT_FLAGS(PFR_FLAG_ALLRSETS);
+	if (pfr_fix_anchor(filter->pfrt_anchor))
+		return (EINVAL);
+	n = nn = pfr_table_count(filter, flags);
+	if (n < 0)
+		return (ENOENT);
+	if (n > *size) {
+		*size = n;
+		return (0);
+	}
+	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
+		if (pfr_skip_table(filter, p, flags))
+			continue;
+		if (n-- <= 0)
+			continue;
+		if (COPYOUT(&p->pfrkt_t, tbl++, sizeof(*tbl)))
+			return (EFAULT);
+	}
+	if (n) {
+		printf("pfr_get_tables: corruption detected (%d).\n", n);
+		return (ENOTTY);
+	}
+	*size = nn;
+	return (0);
+}
+
+int
+pfr_get_tstats(struct pfr_table *filter, struct pfr_tstats *tbl, int *size,
+	int flags)
+{
+	struct pfr_ktable	*p;
+	struct pfr_ktableworkq	 workq;
+	int			 s = 0, n, nn;
+	long			 tzero = time_second;
+
+	ACCEPT_FLAGS(PFR_FLAG_ATOMIC|PFR_FLAG_ALLRSETS);
+					/* XXX PFR_FLAG_CLSTATS disabled */
+	if (pfr_fix_anchor(filter->pfrt_anchor))
+		return (EINVAL);
+	n = nn = pfr_table_count(filter, flags);
+	if (n < 0)
+		return (ENOENT);
+	if (n > *size) {
+		*size = n;
+		return (0);
+	}
+	SLIST_INIT(&workq);
+	if (flags & PFR_FLAG_ATOMIC)
+		s = splsoftnet();
+	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
+		if (pfr_skip_table(filter, p, flags))
+			continue;
+		if (n-- <= 0)
+			continue;
+		if (!(flags & PFR_FLAG_ATOMIC))
+			s = splsoftnet();
+		if (COPYOUT(&p->pfrkt_ts, tbl++, sizeof(*tbl))) {
+			if (!(flags & PFR_FLAG_ATOMIC))
+				splx(s);
+			return (EFAULT);
+		}
+		if (!(flags & PFR_FLAG_ATOMIC))
+			splx(s);
+		SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+	}
+	if (flags & PFR_FLAG_CLSTATS)
+		pfr_clstats_ktables(&workq, tzero,
+		    flags & PFR_FLAG_ADDRSTOO);
+	if (flags & PFR_FLAG_ATOMIC)
+		splx(s);
+	if (n) {
+		printf("pfr_get_tstats: corruption detected (%d).\n", n);
+		return (ENOTTY);
+	}
+	*size = nn;
+	return (0);
+}
+
+int
+pfr_clr_tstats(struct pfr_table *tbl, int size, int *nzero, int flags)
+{
+	struct pfr_ktableworkq	 workq;
+	struct pfr_ktable	*p, key;
+	int			 i, s = 0, xzero = 0;
+	long			 tzero = time_second;
+
+	ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_ADDRSTOO);
+	SLIST_INIT(&workq);
+	for (i = 0; i < size; i++) {
+		if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t)))
+			return (EFAULT);
+		if (pfr_validate_table(&key.pfrkt_t, 0, 0))
+			return (EINVAL);
+		p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
+		if (p != NULL) {
+			SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+			xzero++;
+		}
+	}
+	if (!(flags & PFR_FLAG_DUMMY)) {
+		if (flags & PFR_FLAG_ATOMIC)
+			s = splsoftnet();
+		pfr_clstats_ktables(&workq, tzero, flags & PFR_FLAG_ADDRSTOO);
+		if (flags & PFR_FLAG_ATOMIC)
+			splx(s);
+	}
+	if (nzero != NULL)
+		*nzero = xzero;
+	return (0);
+}
+
+int
+pfr_set_tflags(struct pfr_table *tbl, int size, int setflag, int clrflag,
+	int *nchange, int *ndel, int flags)
+{
+	struct pfr_ktableworkq	 workq;
+	struct pfr_ktable	*p, *q, key;
+	int			 i, s = 0, xchange = 0, xdel = 0;
+
+	ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY);
+	if ((setflag & ~PFR_TFLAG_USRMASK) ||
+	    (clrflag & ~PFR_TFLAG_USRMASK) ||
+	    (setflag & clrflag))
+		return (EINVAL);
+	SLIST_INIT(&workq);
+	for (i = 0; i < size; i++) {
+		if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t)))
+			return (EFAULT);
+		if (pfr_validate_table(&key.pfrkt_t, 0,
+		    flags & PFR_FLAG_USERIOCTL))
+			return (EINVAL);
+		p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
+		if (p != NULL && (p->pfrkt_flags & PFR_TFLAG_ACTIVE)) {
+			p->pfrkt_nflags = (p->pfrkt_flags | setflag) &
+			    ~clrflag;
+			if (p->pfrkt_nflags == p->pfrkt_flags)
+				goto _skip;
+			SLIST_FOREACH(q, &workq, pfrkt_workq)
+				if (!pfr_ktable_compare(p, q))
+					goto _skip;
+			SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+			if ((p->pfrkt_flags & PFR_TFLAG_PERSIST) &&
+			    (clrflag & PFR_TFLAG_PERSIST) &&
+			    !(p->pfrkt_flags & PFR_TFLAG_REFERENCED))
+				xdel++;
+			else
+				xchange++;
+		}
+_skip:
+	;
+	}
+	if (!(flags & PFR_FLAG_DUMMY)) {
+		if (flags & PFR_FLAG_ATOMIC)
+			s = splsoftnet();
+		pfr_setflags_ktables(&workq);
+		if (flags & PFR_FLAG_ATOMIC)
+			splx(s);
+	}
+	if (nchange != NULL)
+		*nchange = xchange;
+	if (ndel != NULL)
+		*ndel = xdel;
+	return (0);
+}
+
+int
+pfr_ina_begin(struct pfr_table *trs, u_int32_t *ticket, int *ndel, int flags)
+{
+	struct pfr_ktableworkq	 workq;
+	struct pfr_ktable	*p;
+	struct pf_ruleset	*rs;
+	int			 xdel = 0;
+
+	ACCEPT_FLAGS(PFR_FLAG_DUMMY);
+	rs = pf_find_or_create_ruleset(trs->pfrt_anchor);
+	if (rs == NULL)
+		return (ENOMEM);
+	SLIST_INIT(&workq);
+	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
+		if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) ||
+		    pfr_skip_table(trs, p, 0))
+			continue;
+		p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_INACTIVE;
+		SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+		xdel++;
+	}
+	if (!(flags & PFR_FLAG_DUMMY)) {
+		pfr_setflags_ktables(&workq);
+		if (ticket != NULL)
+			*ticket = ++rs->tticket;
+		rs->topen = 1;
+	} else
+		pf_remove_if_empty_ruleset(rs);
+	if (ndel != NULL)
+		*ndel = xdel;
+	return (0);
+}
+
+int
+pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size,
+    int *nadd, int *naddr, u_int32_t ticket, int flags)
+{
+	struct pfr_ktableworkq	 tableq;
+	struct pfr_kentryworkq	 addrq;
+	struct pfr_ktable	*kt, *rt, *shadow, key;
+	struct pfr_kentry	*p;
+	struct pfr_addr		 ad;
+	struct pf_ruleset	*rs;
+	int			 i, rv, xadd = 0, xaddr = 0;
+
+	ACCEPT_FLAGS(PFR_FLAG_DUMMY|PFR_FLAG_ADDRSTOO);
+	if (size && !(flags & PFR_FLAG_ADDRSTOO))
+		return (EINVAL);
+	if (pfr_validate_table(tbl, PFR_TFLAG_USRMASK,
+	    flags & PFR_FLAG_USERIOCTL))
+		return (EINVAL);
+	rs = pf_find_ruleset(tbl->pfrt_anchor);
+	if (rs == NULL || !rs->topen || ticket != rs->tticket)
+		return (EBUSY);
+	tbl->pfrt_flags |= PFR_TFLAG_INACTIVE;
+	SLIST_INIT(&tableq);
+	kt = RB_FIND(pfr_ktablehead, &pfr_ktables, (struct pfr_ktable *)tbl);
+	if (kt == NULL) {
+		kt = pfr_create_ktable(tbl, 0, 1);
+		if (kt == NULL)
+			return (ENOMEM);
+		SLIST_INSERT_HEAD(&tableq, kt, pfrkt_workq);
+		xadd++;
+		if (!tbl->pfrt_anchor[0])
+			goto _skip;
+
+		/* find or create root table */
+		bzero(&key, sizeof(key));
+		strlcpy(key.pfrkt_name, tbl->pfrt_name, sizeof(key.pfrkt_name));
+		rt = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
+		if (rt != NULL) {
+			kt->pfrkt_root = rt;
+			goto _skip;
+		}
+		rt = pfr_create_ktable(&key.pfrkt_t, 0, 1);
+		if (rt == NULL) {
+			pfr_destroy_ktables(&tableq, 0);
+			return (ENOMEM);
+		}
+		SLIST_INSERT_HEAD(&tableq, rt, pfrkt_workq);
+		kt->pfrkt_root = rt;
+	} else if (!(kt->pfrkt_flags & PFR_TFLAG_INACTIVE))
+		xadd++;
+_skip:
+	shadow = pfr_create_ktable(tbl, 0, 0);
+	if (shadow == NULL) {
+		pfr_destroy_ktables(&tableq, 0);
+		return (ENOMEM);
+	}
+	SLIST_INIT(&addrq);
+	for (i = 0; i < size; i++) {
+		if (COPYIN(addr+i, &ad, sizeof(ad)))
+			senderr(EFAULT);
+		if (pfr_validate_addr(&ad))
+			senderr(EINVAL);
+		if (pfr_lookup_addr(shadow, &ad, 1) != NULL)
+			continue;
+		p = pfr_create_kentry(&ad, 0);
+		if (p == NULL)
+			senderr(ENOMEM);
+		if (pfr_route_kentry(shadow, p)) {
+			pfr_destroy_kentry(p);
+			continue;
+		}
+		SLIST_INSERT_HEAD(&addrq, p, pfrke_workq);
+		xaddr++;
+	}
+	if (!(flags & PFR_FLAG_DUMMY)) {
+		if (kt->pfrkt_shadow != NULL)
+			pfr_destroy_ktable(kt->pfrkt_shadow, 1);
+		kt->pfrkt_flags |= PFR_TFLAG_INACTIVE;
+		pfr_insert_ktables(&tableq);
+		shadow->pfrkt_cnt = (flags & PFR_FLAG_ADDRSTOO) ?
+		    xaddr : NO_ADDRESSES;
+		kt->pfrkt_shadow = shadow;
+	} else {
+		pfr_clean_node_mask(shadow, &addrq);
+		pfr_destroy_ktable(shadow, 0);
+		pfr_destroy_ktables(&tableq, 0);
+		pfr_destroy_kentries(&addrq);
+	}
+	if (nadd != NULL)
+		*nadd = xadd;
+	if (naddr != NULL)
+		*naddr = xaddr;
+	return (0);
+_bad:
+	pfr_destroy_ktable(shadow, 0);
+	pfr_destroy_ktables(&tableq, 0);
+	pfr_destroy_kentries(&addrq);
+	return (rv);
+}
+
+int
+pfr_ina_rollback(struct pfr_table *trs, u_int32_t ticket, int *ndel, int flags)
+{
+	struct pfr_ktableworkq	 workq;
+	struct pfr_ktable	*p;
+	struct pf_ruleset	*rs;
+	int			 xdel = 0;
+
+	ACCEPT_FLAGS(PFR_FLAG_DUMMY);
+	rs = pf_find_ruleset(trs->pfrt_anchor);
+	if (rs == NULL || !rs->topen || ticket != rs->tticket)
+		return (0);
+	SLIST_INIT(&workq);
+	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
+		if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) ||
+		    pfr_skip_table(trs, p, 0))
+			continue;
+		p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_INACTIVE;
+		SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+		xdel++;
+	}
+	if (!(flags & PFR_FLAG_DUMMY)) {
+		pfr_setflags_ktables(&workq);
+		rs->topen = 0;
+		pf_remove_if_empty_ruleset(rs);
+	}
+	if (ndel != NULL)
+		*ndel = xdel;
+	return (0);
+}
+
+int
+pfr_ina_commit(struct pfr_table *trs, u_int32_t ticket, int *nadd,
+    int *nchange, int flags)
+{
+	struct pfr_ktable	*p, *q;
+	struct pfr_ktableworkq	 workq;
+	struct pf_ruleset	*rs;
+	int			 s = 0, xadd = 0, xchange = 0;
+	long			 tzero = time_second;
+
+	ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY);
+	rs = pf_find_ruleset(trs->pfrt_anchor);
+	if (rs == NULL || !rs->topen || ticket != rs->tticket)
+		return (EBUSY);
+
+	SLIST_INIT(&workq);
+	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
+		if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) ||
+		    pfr_skip_table(trs, p, 0))
+			continue;
+		SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+		if (p->pfrkt_flags & PFR_TFLAG_ACTIVE)
+			xchange++;
+		else
+			xadd++;
+	}
+
+	if (!(flags & PFR_FLAG_DUMMY)) {
+		if (flags & PFR_FLAG_ATOMIC)
+			s = splsoftnet();
+		for (p = SLIST_FIRST(&workq); p != NULL; p = q) {
+			q = SLIST_NEXT(p, pfrkt_workq);
+			pfr_commit_ktable(p, tzero);
+		}
+		if (flags & PFR_FLAG_ATOMIC)
+			splx(s);
+		rs->topen = 0;
+		pf_remove_if_empty_ruleset(rs);
+	}
+	if (nadd != NULL)
+		*nadd = xadd;
+	if (nchange != NULL)
+		*nchange = xchange;
+
+	return (0);
+}
+
+void
+pfr_commit_ktable(struct pfr_ktable *kt, long tzero)
+{
+	struct pfr_ktable	*shadow = kt->pfrkt_shadow;
+	int			 nflags;
+
+	if (shadow->pfrkt_cnt == NO_ADDRESSES) {
+		if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+			pfr_clstats_ktable(kt, tzero, 1);
+	} else if (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) {
+		/* kt might contain addresses */
+		struct pfr_kentryworkq	 addrq, addq, changeq, delq, garbageq;
+		struct pfr_kentry	*p, *q, *next;
+		struct pfr_addr		 ad;
+
+		pfr_enqueue_addrs(shadow, &addrq, NULL, 0);
+		pfr_mark_addrs(kt);
+		SLIST_INIT(&addq);
+		SLIST_INIT(&changeq);
+		SLIST_INIT(&delq);
+		SLIST_INIT(&garbageq);
+		pfr_clean_node_mask(shadow, &addrq);
+		for (p = SLIST_FIRST(&addrq); p != NULL; p = next) {
+			next = SLIST_NEXT(p, pfrke_workq);	/* XXX */
+			pfr_copyout_addr(&ad, p);
+			q = pfr_lookup_addr(kt, &ad, 1);
+			if (q != NULL) {
+				if (q->pfrke_not != p->pfrke_not)
+					SLIST_INSERT_HEAD(&changeq, q,
+					    pfrke_workq);
+				q->pfrke_mark = 1;
+				SLIST_INSERT_HEAD(&garbageq, p, pfrke_workq);
+			} else {
+				p->pfrke_tzero = tzero;
+				SLIST_INSERT_HEAD(&addq, p, pfrke_workq);
+			}
+		}
+		pfr_enqueue_addrs(kt, &delq, NULL, ENQUEUE_UNMARKED_ONLY);
+		pfr_insert_kentries(kt, &addq, tzero);
+		pfr_remove_kentries(kt, &delq);
+		pfr_clstats_kentries(&changeq, tzero, INVERT_NEG_FLAG);
+		pfr_destroy_kentries(&garbageq);
+	} else {
+		/* kt cannot contain addresses */
+		SWAP(struct radix_node_head *, kt->pfrkt_ip4,
+		    shadow->pfrkt_ip4);
+		SWAP(struct radix_node_head *, kt->pfrkt_ip6,
+		    shadow->pfrkt_ip6);
+		SWAP(int, kt->pfrkt_cnt, shadow->pfrkt_cnt);
+		pfr_clstats_ktable(kt, tzero, 1);
+	}
+	nflags = ((shadow->pfrkt_flags & PFR_TFLAG_USRMASK) |
+	    (kt->pfrkt_flags & PFR_TFLAG_SETMASK) | PFR_TFLAG_ACTIVE)
+		& ~PFR_TFLAG_INACTIVE;
+	pfr_destroy_ktable(shadow, 0);
+	kt->pfrkt_shadow = NULL;
+	pfr_setflags_ktable(kt, nflags);
+}
+
+int
+pfr_validate_table(struct pfr_table *tbl, int allowedflags, int no_reserved)
+{
+	int i;
+
+	if (!tbl->pfrt_name[0])
+		return (-1);
+	if (no_reserved && !strcmp(tbl->pfrt_anchor, PF_RESERVED_ANCHOR))
+		 return (-1);
+	if (tbl->pfrt_name[PF_TABLE_NAME_SIZE-1])
+		return (-1);
+	for (i = strlen(tbl->pfrt_name); i < PF_TABLE_NAME_SIZE; i++)
+		if (tbl->pfrt_name[i])
+			return (-1);
+	if (pfr_fix_anchor(tbl->pfrt_anchor))
+		return (-1);
+	if (tbl->pfrt_flags & ~allowedflags)
+		return (-1);
+	return (0);
+}
+
+/*
+ * Rewrite anchors referenced by tables to remove slashes
+ * and check for validity.
+ */
+int
+pfr_fix_anchor(char *anchor)
+{
+	size_t siz = MAXPATHLEN;
+	int i;
+
+	if (anchor[0] == '/') {
+		char *path;
+		int off;
+
+		path = anchor;
+		off = 1;
+		while (*++path == '/')
+			off++;
+		bcopy(path, anchor, siz - off);
+		memset(anchor + siz - off, 0, off);
+	}
+	if (anchor[siz - 1])
+		return (-1);
+	for (i = strlen(anchor); i < siz; i++)
+		if (anchor[i])
+			return (-1);
+	return (0);
+}
+
+int
+pfr_table_count(struct pfr_table *filter, int flags)
+{
+	struct pf_ruleset *rs;
+
+	if (flags & PFR_FLAG_ALLRSETS)
+		return (pfr_ktable_cnt);
+	if (filter->pfrt_anchor[0]) {
+		rs = pf_find_ruleset(filter->pfrt_anchor);
+		return ((rs != NULL) ? rs->tables : -1);
+	}
+	return (pf_main_ruleset.tables);
+}
+
+int
+pfr_skip_table(struct pfr_table *filter, struct pfr_ktable *kt, int flags)
+{
+	if (flags & PFR_FLAG_ALLRSETS)
+		return (0);
+	if (strcmp(filter->pfrt_anchor, kt->pfrkt_anchor))
+		return (1);
+	return (0);
+}
+
+void
+pfr_insert_ktables(struct pfr_ktableworkq *workq)
+{
+	struct pfr_ktable	*p;
+
+	SLIST_FOREACH(p, workq, pfrkt_workq)
+		pfr_insert_ktable(p);
+}
+
+void
+pfr_insert_ktable(struct pfr_ktable *kt)
+{
+	RB_INSERT(pfr_ktablehead, &pfr_ktables, kt);
+	pfr_ktable_cnt++;
+	if (kt->pfrkt_root != NULL)
+		if (!kt->pfrkt_root->pfrkt_refcnt[PFR_REFCNT_ANCHOR]++)
+			pfr_setflags_ktable(kt->pfrkt_root,
+			    kt->pfrkt_root->pfrkt_flags|PFR_TFLAG_REFDANCHOR);
+}
+
+void
+pfr_setflags_ktables(struct pfr_ktableworkq *workq)
+{
+	struct pfr_ktable	*p, *q;
+
+	for (p = SLIST_FIRST(workq); p; p = q) {
+		q = SLIST_NEXT(p, pfrkt_workq);
+		pfr_setflags_ktable(p, p->pfrkt_nflags);
+	}
+}
+
+void
+pfr_setflags_ktable(struct pfr_ktable *kt, int newf)
+{
+	struct pfr_kentryworkq	addrq;
+
+	if (!(newf & PFR_TFLAG_REFERENCED) &&
+	    !(newf & PFR_TFLAG_PERSIST))
+		newf &= ~PFR_TFLAG_ACTIVE;
+	if (!(newf & PFR_TFLAG_ACTIVE))
+		newf &= ~PFR_TFLAG_USRMASK;
+	if (!(newf & PFR_TFLAG_SETMASK)) {
+		RB_REMOVE(pfr_ktablehead, &pfr_ktables, kt);
+		if (kt->pfrkt_root != NULL)
+			if (!--kt->pfrkt_root->pfrkt_refcnt[PFR_REFCNT_ANCHOR])
+				pfr_setflags_ktable(kt->pfrkt_root,
+				    kt->pfrkt_root->pfrkt_flags &
+					~PFR_TFLAG_REFDANCHOR);
+		pfr_destroy_ktable(kt, 1);
+		pfr_ktable_cnt--;
+		return;
+	}
+	if (!(newf & PFR_TFLAG_ACTIVE) && kt->pfrkt_cnt) {
+		pfr_enqueue_addrs(kt, &addrq, NULL, 0);
+		pfr_remove_kentries(kt, &addrq);
+	}
+	if (!(newf & PFR_TFLAG_INACTIVE) && kt->pfrkt_shadow != NULL) {
+		pfr_destroy_ktable(kt->pfrkt_shadow, 1);
+		kt->pfrkt_shadow = NULL;
+	}
+	kt->pfrkt_flags = newf;
+}
+
+void
+pfr_clstats_ktables(struct pfr_ktableworkq *workq, long tzero, int recurse)
+{
+	struct pfr_ktable	*p;
+
+	SLIST_FOREACH(p, workq, pfrkt_workq)
+		pfr_clstats_ktable(p, tzero, recurse);
+}
+
+void
+pfr_clstats_ktable(struct pfr_ktable *kt, long tzero, int recurse)
+{
+	struct pfr_kentryworkq	 addrq;
+	int			 s;
+
+	if (recurse) {
+		pfr_enqueue_addrs(kt, &addrq, NULL, 0);
+		pfr_clstats_kentries(&addrq, tzero, 0);
+	}
+	s = splsoftnet();
+	bzero(kt->pfrkt_packets, sizeof(kt->pfrkt_packets));
+	bzero(kt->pfrkt_bytes, sizeof(kt->pfrkt_bytes));
+	kt->pfrkt_match = kt->pfrkt_nomatch = 0;
+	splx(s);
+	kt->pfrkt_tzero = tzero;
+}
+
+struct pfr_ktable *
+pfr_create_ktable(struct pfr_table *tbl, long tzero, int attachruleset)
+{
+	struct pfr_ktable	*kt;
+	struct pf_ruleset	*rs;
+
+	kt = pool_get(&pfr_ktable_pl, PR_NOWAIT);
+	if (kt == NULL)
+		return (NULL);
+	bzero(kt, sizeof(*kt));
+	kt->pfrkt_t = *tbl;
+
+	if (attachruleset) {
+		rs = pf_find_or_create_ruleset(tbl->pfrt_anchor);
+		if (!rs) {
+			pfr_destroy_ktable(kt, 0);
+			return (NULL);
+		}
+		kt->pfrkt_rs = rs;
+		rs->tables++;
+	}
+
+	if (!rn_inithead((void **)&kt->pfrkt_ip4,
+	    offsetof(struct sockaddr_in, sin_addr) * 8) ||
+	    !rn_inithead((void **)&kt->pfrkt_ip6,
+	    offsetof(struct sockaddr_in6, sin6_addr) * 8)) {
+		pfr_destroy_ktable(kt, 0);
+		return (NULL);
+	}
+	kt->pfrkt_tzero = tzero;
+
+	return (kt);
+}
+
+void
+pfr_destroy_ktables(struct pfr_ktableworkq *workq, int flushaddr)
+{
+	struct pfr_ktable	*p, *q;
+
+	for (p = SLIST_FIRST(workq); p; p = q) {
+		q = SLIST_NEXT(p, pfrkt_workq);
+		pfr_destroy_ktable(p, flushaddr);
+	}
+}
+
+void
+pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr)
+{
+	struct pfr_kentryworkq	 addrq;
+
+	if (flushaddr) {
+		pfr_enqueue_addrs(kt, &addrq, NULL, 0);
+		pfr_clean_node_mask(kt, &addrq);
+		pfr_destroy_kentries(&addrq);
+	}
+#if defined(__FreeBSD__) && (__FreeBSD_version >= 500100)
+	if (kt->pfrkt_ip4 != NULL) {
+		RADIX_NODE_HEAD_DESTROY(kt->pfrkt_ip4);
+		free((caddr_t)kt->pfrkt_ip4, M_RTABLE);
+	}
+	if (kt->pfrkt_ip6 != NULL) {
+		RADIX_NODE_HEAD_DESTROY(kt->pfrkt_ip6);
+		free((caddr_t)kt->pfrkt_ip6, M_RTABLE);
+	}
+#else
+	if (kt->pfrkt_ip4 != NULL)
+		free((caddr_t)kt->pfrkt_ip4, M_RTABLE);
+	if (kt->pfrkt_ip6 != NULL)
+		free((caddr_t)kt->pfrkt_ip6, M_RTABLE);
+#endif
+	if (kt->pfrkt_shadow != NULL)
+		pfr_destroy_ktable(kt->pfrkt_shadow, flushaddr);
+	if (kt->pfrkt_rs != NULL) {
+		kt->pfrkt_rs->tables--;
+		pf_remove_if_empty_ruleset(kt->pfrkt_rs);
+	}
+	pool_put(&pfr_ktable_pl, kt);
+}
+
+int
+pfr_ktable_compare(struct pfr_ktable *p, struct pfr_ktable *q)
+{
+	int d;
+
+	if ((d = strncmp(p->pfrkt_name, q->pfrkt_name, PF_TABLE_NAME_SIZE)))
+		return (d);
+	return (strcmp(p->pfrkt_anchor, q->pfrkt_anchor));
+}
+
+struct pfr_ktable *
+pfr_lookup_table(struct pfr_table *tbl)
+{
+	/* struct pfr_ktable start like a struct pfr_table */
+	return (RB_FIND(pfr_ktablehead, &pfr_ktables,
+	    (struct pfr_ktable *)tbl));
+}
+
+int
+pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af)
+{
+	struct pfr_kentry	*ke = NULL;
+	int			 match;
+
+	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
+		kt = kt->pfrkt_root;
+	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (0);
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		pfr_sin.sin_addr.s_addr = a->addr32[0];
+		ke = (struct pfr_kentry *)rn_match(&pfr_sin, kt->pfrkt_ip4);
+		if (ke && KENTRY_RNF_ROOT(ke))
+			ke = NULL;
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		bcopy(a, &pfr_sin6.sin6_addr, sizeof(pfr_sin6.sin6_addr));
+		ke = (struct pfr_kentry *)rn_match(&pfr_sin6, kt->pfrkt_ip6);
+		if (ke && KENTRY_RNF_ROOT(ke))
+			ke = NULL;
+		break;
+#endif /* INET6 */
+	}
+	match = (ke && !ke->pfrke_not);
+	if (match)
+		kt->pfrkt_match++;
+	else
+		kt->pfrkt_nomatch++;
+	return (match);
+}
+
+void
+pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af,
+    u_int64_t len, int dir_out, int op_pass, int notrule)
+{
+	struct pfr_kentry	*ke = NULL;
+
+	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
+		kt = kt->pfrkt_root;
+	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return;
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		pfr_sin.sin_addr.s_addr = a->addr32[0];
+		ke = (struct pfr_kentry *)rn_match(&pfr_sin, kt->pfrkt_ip4);
+		if (ke && KENTRY_RNF_ROOT(ke))
+			ke = NULL;
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		bcopy(a, &pfr_sin6.sin6_addr, sizeof(pfr_sin6.sin6_addr));
+		ke = (struct pfr_kentry *)rn_match(&pfr_sin6, kt->pfrkt_ip6);
+		if (ke && KENTRY_RNF_ROOT(ke))
+			ke = NULL;
+		break;
+#endif /* INET6 */
+	default:
+		;
+	}
+	if ((ke == NULL || ke->pfrke_not) != notrule) {
+		if (op_pass != PFR_OP_PASS)
+			printf("pfr_update_stats: assertion failed.\n");
+		op_pass = PFR_OP_XPASS;
+	}
+	kt->pfrkt_packets[dir_out][op_pass]++;
+	kt->pfrkt_bytes[dir_out][op_pass] += len;
+	if (ke != NULL && op_pass != PFR_OP_XPASS) {
+		ke->pfrke_packets[dir_out][op_pass]++;
+		ke->pfrke_bytes[dir_out][op_pass] += len;
+	}
+}
+
+struct pfr_ktable *
+pfr_attach_table(struct pf_ruleset *rs, char *name)
+{
+	struct pfr_ktable	*kt, *rt;
+	struct pfr_table	 tbl;
+	struct pf_anchor	*ac = rs->anchor;
+
+	bzero(&tbl, sizeof(tbl));
+	strlcpy(tbl.pfrt_name, name, sizeof(tbl.pfrt_name));
+	if (ac != NULL)
+		strlcpy(tbl.pfrt_anchor, ac->path, sizeof(tbl.pfrt_anchor));
+	kt = pfr_lookup_table(&tbl);
+	if (kt == NULL) {
+		kt = pfr_create_ktable(&tbl, time_second, 1);
+		if (kt == NULL)
+			return (NULL);
+		if (ac != NULL) {
+			bzero(tbl.pfrt_anchor, sizeof(tbl.pfrt_anchor));
+			rt = pfr_lookup_table(&tbl);
+			if (rt == NULL) {
+				rt = pfr_create_ktable(&tbl, 0, 1);
+				if (rt == NULL) {
+					pfr_destroy_ktable(kt, 0);
+					return (NULL);
+				}
+				pfr_insert_ktable(rt);
+			}
+			kt->pfrkt_root = rt;
+		}
+		pfr_insert_ktable(kt);
+	}
+	if (!kt->pfrkt_refcnt[PFR_REFCNT_RULE]++)
+		pfr_setflags_ktable(kt, kt->pfrkt_flags|PFR_TFLAG_REFERENCED);
+	return (kt);
+}
+
+void
+pfr_detach_table(struct pfr_ktable *kt)
+{
+	if (kt->pfrkt_refcnt[PFR_REFCNT_RULE] <= 0)
+		printf("pfr_detach_table: refcount = %d.\n",
+		    kt->pfrkt_refcnt[PFR_REFCNT_RULE]);
+	else if (!--kt->pfrkt_refcnt[PFR_REFCNT_RULE])
+		pfr_setflags_ktable(kt, kt->pfrkt_flags&~PFR_TFLAG_REFERENCED);
+}
+
+
+int
+pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter,
+    struct pf_addr **raddr, struct pf_addr **rmask, sa_family_t af)
+{
+	struct pfr_kentry	*ke, *ke2 = NULL;
+	struct pf_addr		*addr = NULL;
+	union sockaddr_union	 mask;
+	int			 idx = -1, use_counter = 0;
+
+	if (af == AF_INET)
+		addr = (struct pf_addr *)&pfr_sin.sin_addr;
+	else if (af == AF_INET6)
+		addr = (struct pf_addr *)&pfr_sin6.sin6_addr;
+	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
+		kt = kt->pfrkt_root;
+	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (-1);
+
+	if (pidx != NULL)
+		idx = *pidx;
+	if (counter != NULL && idx >= 0)
+		use_counter = 1;
+	if (idx < 0)
+		idx = 0;
+
+_next_block:
+	ke = pfr_kentry_byidx(kt, idx, af);
+	if (ke == NULL)
+		return (1);
+	pfr_prepare_network(&pfr_mask, af, ke->pfrke_net);
+	*raddr = SUNION2PF(&ke->pfrke_sa, af);
+	*rmask = SUNION2PF(&pfr_mask, af);
+
+	if (use_counter) {
+		/* is supplied address within block? */
+		if (!PF_MATCHA(0, *raddr, *rmask, counter, af)) {
+			/* no, go to next block in table */
+			idx++;
+			use_counter = 0;
+			goto _next_block;
+		}
+		PF_ACPY(addr, counter, af);
+	} else {
+		/* use first address of block */
+		PF_ACPY(addr, *raddr, af);
+	}
+
+	if (!KENTRY_NETWORK(ke)) {
+		/* this is a single IP address - no possible nested block */
+		PF_ACPY(counter, addr, af);
+		*pidx = idx;
+		return (0);
+	}
+	for (;;) {
+		/* we don't want to use a nested block */
+		if (af == AF_INET)
+			ke2 = (struct pfr_kentry *)rn_match(&pfr_sin,
+			    kt->pfrkt_ip4);
+		else if (af == AF_INET6)
+			ke2 = (struct pfr_kentry *)rn_match(&pfr_sin6,
+			    kt->pfrkt_ip6);
+		/* no need to check KENTRY_RNF_ROOT() here */
+		if (ke2 == ke) {
+			/* lookup return the same block - perfect */
+			PF_ACPY(counter, addr, af);
+			*pidx = idx;
+			return (0);
+		}
+
+		/* we need to increase the counter past the nested block */
+		pfr_prepare_network(&mask, AF_INET, ke2->pfrke_net);
+		PF_POOLMASK(addr, addr, SUNION2PF(&mask, af), &pfr_ffaddr, af);
+		PF_AINC(addr, af);
+		if (!PF_MATCHA(0, *raddr, *rmask, addr, af)) {
+			/* ok, we reached the end of our main block */
+			/* go to next block in table */
+			idx++;
+			use_counter = 0;
+			goto _next_block;
+		}
+	}
+}
+
+struct pfr_kentry *
+pfr_kentry_byidx(struct pfr_ktable *kt, int idx, int af)
+{
+	struct pfr_walktree	w;
+
+	bzero(&w, sizeof(w));
+	w.pfrw_op = PFRW_POOL_GET;
+	w.pfrw_cnt = idx;
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+#ifdef __FreeBSD__
+		kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w);
+#else
+		rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w);
+#endif
+		return (w.pfrw_kentry);
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+#ifdef __FreeBSD__
+		kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w);
+#else
+		rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w);
+#endif
+		return (w.pfrw_kentry);
+#endif /* INET6 */
+	default:
+		return (NULL);
+	}
+}
+
+void
+pfr_dynaddr_update(struct pfr_ktable *kt, struct pfi_dynaddr *dyn)
+{
+	struct pfr_walktree	w;
+	int			s;
+
+	bzero(&w, sizeof(w));
+	w.pfrw_op = PFRW_DYNADDR_UPDATE;
+	w.pfrw_dyn = dyn;
+
+	s = splsoftnet();
+	dyn->pfid_acnt4 = 0;
+	dyn->pfid_acnt6 = 0;
+	if (!dyn->pfid_af || dyn->pfid_af == AF_INET)
+#ifdef __FreeBSD__
+		kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w);
+#else
+		rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w);
+#endif
+	if (!dyn->pfid_af || dyn->pfid_af == AF_INET6)
+#ifdef __FreeBSD__
+		kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w);
+#else
+		rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w);
+#endif
+	splx(s);
+}
diff --git a/freebsd/sys/contrib/pf/net/pfvar.h b/freebsd/sys/contrib/pf/net/pfvar.h
new file mode 100644
index 00000000..c6429781
--- /dev/null
+++ b/freebsd/sys/contrib/pf/net/pfvar.h
@@ -0,0 +1,1866 @@
+/*	$FreeBSD$	*/
+/*	$OpenBSD: pfvar.h,v 1.244 2007/02/23 21:31:51 deraadt Exp $ */
+
+/*
+ * Copyright (c) 2001 Daniel Hartmeier
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    - Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    - Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _NET_PFVAR_HH_
+#define _NET_PFVAR_HH_
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/types.h>
+#include <freebsd/sys/queue.h>
+#include <freebsd/sys/tree.h>
+#ifdef __FreeBSD__
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/sx.h>
+#else
+#include <freebsd/sys/rwlock.h>
+#endif
+
+#include <freebsd/net/radix.h>
+#include <freebsd/net/route.h>
+#ifdef __FreeBSD__
+#include <freebsd/net/if_clone.h>
+#include <freebsd/net/pf_mtag.h>
+#include <freebsd/vm/uma.h>
+#else
+#include <freebsd/netinet/ip_ipsp.h>
+#endif
+
+#ifdef __FreeBSD__
+#include <freebsd/netinet/in.h>
+#endif
+
+#include <freebsd/netinet/tcp_fsm.h>
+
+struct ip;
+struct ip6_hdr;
+#ifdef __FreeBSD__
+struct inpcb;
+#endif
+
+#define	PF_TCPS_PROXY_SRC	((TCP_NSTATES)+0)
+#define	PF_TCPS_PROXY_DST	((TCP_NSTATES)+1)
+
+#define	PF_MD5_DIGEST_LENGTH	16
+#ifdef MD5_DIGEST_LENGTH
+#if PF_MD5_DIGEST_LENGTH != MD5_DIGEST_LENGTH
+#error
+#endif
+#endif
+
+enum	{ PF_INOUT, PF_IN, PF_OUT };
+enum	{ PF_LAN_EXT, PF_EXT_GWY, PF_ID };
+enum	{ PF_PASS, PF_DROP, PF_SCRUB, PF_NOSCRUB, PF_NAT, PF_NONAT,
+	  PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP };
+enum	{ PF_RULESET_SCRUB, PF_RULESET_FILTER, PF_RULESET_NAT,
+	  PF_RULESET_BINAT, PF_RULESET_RDR, PF_RULESET_MAX };
+enum	{ PF_OP_NONE, PF_OP_IRG, PF_OP_EQ, PF_OP_NE, PF_OP_LT,
+	  PF_OP_LE, PF_OP_GT, PF_OP_GE, PF_OP_XRG, PF_OP_RRG };
+enum	{ PF_DEBUG_NONE, PF_DEBUG_URGENT, PF_DEBUG_MISC, PF_DEBUG_NOISY };
+enum	{ PF_CHANGE_NONE, PF_CHANGE_ADD_HEAD, PF_CHANGE_ADD_TAIL,
+	  PF_CHANGE_ADD_BEFORE, PF_CHANGE_ADD_AFTER,
+	  PF_CHANGE_REMOVE, PF_CHANGE_GET_TICKET };
+enum	{ PF_GET_NONE, PF_GET_CLR_CNTR };
+
+/*
+ * Note about PFTM_*: real indices into pf_rule.timeout[] come before
+ * PFTM_MAX, special cases afterwards. See pf_state_expires().
+ */
+enum	{ PFTM_TCP_FIRST_PACKET, PFTM_TCP_OPENING, PFTM_TCP_ESTABLISHED,
+	  PFTM_TCP_CLOSING, PFTM_TCP_FIN_WAIT, PFTM_TCP_CLOSED,
+	  PFTM_UDP_FIRST_PACKET, PFTM_UDP_SINGLE, PFTM_UDP_MULTIPLE,
+	  PFTM_ICMP_FIRST_PACKET, PFTM_ICMP_ERROR_REPLY,
+	  PFTM_OTHER_FIRST_PACKET, PFTM_OTHER_SINGLE,
+	  PFTM_OTHER_MULTIPLE, PFTM_FRAG, PFTM_INTERVAL,
+	  PFTM_ADAPTIVE_START, PFTM_ADAPTIVE_END, PFTM_SRC_NODE,
+	  PFTM_TS_DIFF, PFTM_MAX, PFTM_PURGE, PFTM_UNLINKED,
+	  PFTM_UNTIL_PACKET };
+
+/* PFTM default values */
+#define PFTM_TCP_FIRST_PACKET_VAL	120	/* First TCP packet */
+#define PFTM_TCP_OPENING_VAL		30	/* No response yet */
+#define PFTM_TCP_ESTABLISHED_VAL	24*60*60/* Established */
+#define PFTM_TCP_CLOSING_VAL		15 * 60	/* Half closed */
+#define PFTM_TCP_FIN_WAIT_VAL		45	/* Got both FINs */
+#define PFTM_TCP_CLOSED_VAL		90	/* Got a RST */
+#define PFTM_UDP_FIRST_PACKET_VAL	60	/* First UDP packet */
+#define PFTM_UDP_SINGLE_VAL		30	/* Unidirectional */
+#define PFTM_UDP_MULTIPLE_VAL		60	/* Bidirectional */
+#define PFTM_ICMP_FIRST_PACKET_VAL	20	/* First ICMP packet */
+#define PFTM_ICMP_ERROR_REPLY_VAL	10	/* Got error response */
+#define PFTM_OTHER_FIRST_PACKET_VAL	60	/* First packet */
+#define PFTM_OTHER_SINGLE_VAL		30	/* Unidirectional */
+#define PFTM_OTHER_MULTIPLE_VAL		60	/* Bidirectional */
+#define PFTM_FRAG_VAL			30	/* Fragment expire */
+#define PFTM_INTERVAL_VAL		10	/* Expire interval */
+#define PFTM_SRC_NODE_VAL		0	/* Source tracking */
+#define PFTM_TS_DIFF_VAL		30	/* Allowed TS diff */
+
+enum	{ PF_NOPFROUTE, PF_FASTROUTE, PF_ROUTETO, PF_DUPTO, PF_REPLYTO };
+enum	{ PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS,
+	  PF_LIMIT_TABLES, PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_MAX };
+#define PF_POOL_IDMASK		0x0f
+enum	{ PF_POOL_NONE, PF_POOL_BITMASK, PF_POOL_RANDOM,
+	  PF_POOL_SRCHASH, PF_POOL_ROUNDROBIN };
+enum	{ PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL,
+	  PF_ADDR_TABLE, PF_ADDR_RTLABEL, PF_ADDR_URPFFAILED };
+#define PF_POOL_TYPEMASK	0x0f
+#define PF_POOL_STICKYADDR	0x20
+#define	PF_WSCALE_FLAG		0x80
+#define	PF_WSCALE_MASK		0x0f
+
+#define	PF_LOG			0x01
+#define	PF_LOG_ALL		0x02
+#define	PF_LOG_SOCKET_LOOKUP	0x04
+
+struct pf_addr {
+	union {
+		struct in_addr		v4;
+		struct in6_addr		v6;
+		u_int8_t		addr8[16];
+		u_int16_t		addr16[8];
+		u_int32_t		addr32[4];
+	} pfa;		    /* 128-bit address */
+#define v4	pfa.v4
+#define v6	pfa.v6
+#define addr8	pfa.addr8
+#define addr16	pfa.addr16
+#define addr32	pfa.addr32
+};
+
+#define	PF_TABLE_NAME_SIZE	 32
+
+#define PFI_AFLAG_NETWORK	0x01
+#define PFI_AFLAG_BROADCAST	0x02
+#define PFI_AFLAG_PEER		0x04
+#define PFI_AFLAG_MODEMASK	0x07
+#define PFI_AFLAG_NOALIAS	0x08
+
+struct pf_addr_wrap {
+	union {
+		struct {
+			struct pf_addr		 addr;
+			struct pf_addr		 mask;
+		}			 a;
+		char			 ifname[IFNAMSIZ];
+		char			 tblname[PF_TABLE_NAME_SIZE];
+#ifdef __FreeBSD__
+#define	RTLABEL_LEN	32
+#endif
+		char			 rtlabelname[RTLABEL_LEN];
+		u_int32_t		 rtlabel;
+	}			 v;
+	union {
+		struct pfi_dynaddr	*dyn;
+		struct pfr_ktable	*tbl;
+		int			 dyncnt;
+		int			 tblcnt;
+	}			 p;
+	u_int8_t		 type;		/* PF_ADDR_* */
+	u_int8_t		 iflags;	/* PFI_AFLAG_* */
+};
+
+#ifdef _KERNEL
+
+struct pfi_dynaddr {
+	TAILQ_ENTRY(pfi_dynaddr)	 entry;
+	struct pf_addr			 pfid_addr4;
+	struct pf_addr			 pfid_mask4;
+	struct pf_addr			 pfid_addr6;
+	struct pf_addr			 pfid_mask6;
+	struct pfr_ktable		*pfid_kt;
+	struct pfi_kif			*pfid_kif;
+	void				*pfid_hook_cookie;
+	int				 pfid_net;	/* mask or 128 */
+	int				 pfid_acnt4;	/* address count IPv4 */
+	int				 pfid_acnt6;	/* address count IPv6 */
+	sa_family_t			 pfid_af;	/* rule af */
+	u_int8_t			 pfid_iflags;	/* PFI_AFLAG_* */
+};
+
+/*
+ * Address manipulation macros
+ */
+
+#ifdef __FreeBSD__
+#define splsoftnet()	splnet()
+
+#define	HTONL(x)	(x) = htonl((__uint32_t)(x))
+#define	HTONS(x)	(x) = htons((__uint16_t)(x))
+#define	NTOHL(x)	(x) = ntohl((__uint32_t)(x))
+#define	NTOHS(x)	(x) = ntohs((__uint16_t)(x))
+
+#define PF_NAME		"pf"
+
+#define PR_NOWAIT	M_NOWAIT
+#define pool_get(p, f)	uma_zalloc(*(p), (f))
+#define pool_put(p, o)	uma_zfree(*(p), (o))
+
+#define UMA_CREATE(var, type, desc) \
+		var = uma_zcreate(desc, sizeof(type),	\
+			NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); \
+		if (var == NULL) break
+#define UMA_DESTROY(var) \
+		if(var) uma_zdestroy(var)
+
+extern struct mtx pf_task_mtx;
+
+#define	PF_ASSERT(h) mtx_assert(&pf_task_mtx, (h))
+
+#define PF_LOCK()	do {			\
+	PF_ASSERT(MA_NOTOWNED);			\
+	mtx_lock(&pf_task_mtx);			\
+} while(0)
+#define PF_UNLOCK()	do {			\
+	PF_ASSERT(MA_OWNED);			\
+	mtx_unlock(&pf_task_mtx);		\
+} while(0)
+
+#define PF_COPYIN(uaddr, kaddr, len, r) do {	\
+	PF_UNLOCK();				\
+	r = copyin((uaddr), (kaddr), (len));	\
+	PF_LOCK();				\
+} while(0)
+
+#define PF_COPYOUT(kaddr, uaddr, len, r) do {	\
+	PF_UNLOCK();				\
+	r = copyout((kaddr), (uaddr), (len));	\
+	PF_LOCK();				\
+} while(0)
+
+extern void init_pf_mutex(void);
+extern void destroy_pf_mutex(void);
+
+#define PF_MODVER	1
+#define PFLOG_MODVER	1
+#define PFSYNC_MODVER	1
+
+#define PFLOG_MINVER	1
+#define PFLOG_PREFVER	PFLOG_MODVER
+#define PFLOG_MAXVER	1
+#define PFSYNC_MINVER	1
+#define PFSYNC_PREFVER	PFSYNC_MODVER
+#define PFSYNC_MAXVER	1
+#endif	/* __FreeBSD__ */
+
+#ifdef INET
+#ifndef INET6
+#define PF_INET_ONLY
+#endif /* ! INET6 */
+#endif /* INET */
+
+#ifdef INET6
+#ifndef INET
+#define PF_INET6_ONLY
+#endif /* ! INET */
+#endif /* INET6 */
+
+#ifdef INET
+#ifdef INET6
+#define PF_INET_INET6
+#endif /* INET6 */
+#endif /* INET */
+
+#else
+
+#define PF_INET_INET6
+
+#endif /* _KERNEL */
+
+/* Both IPv4 and IPv6 */
+#ifdef PF_INET_INET6
+
+#define PF_AEQ(a, b, c) \
+	((c == AF_INET && (a)->addr32[0] == (b)->addr32[0]) || \
+	((a)->addr32[3] == (b)->addr32[3] && \
+	(a)->addr32[2] == (b)->addr32[2] && \
+	(a)->addr32[1] == (b)->addr32[1] && \
+	(a)->addr32[0] == (b)->addr32[0])) \
+
+#define PF_ANEQ(a, b, c) \
+	((c == AF_INET && (a)->addr32[0] != (b)->addr32[0]) || \
+	((a)->addr32[3] != (b)->addr32[3] || \
+	(a)->addr32[2] != (b)->addr32[2] || \
+	(a)->addr32[1] != (b)->addr32[1] || \
+	(a)->addr32[0] != (b)->addr32[0])) \
+
+#define PF_AZERO(a, c) \
+	((c == AF_INET && !(a)->addr32[0]) || \
+	(!(a)->addr32[0] && !(a)->addr32[1] && \
+	!(a)->addr32[2] && !(a)->addr32[3] )) \
+
+#define PF_MATCHA(n, a, m, b, f) \
+	pf_match_addr(n, a, m, b, f)
+
+#define PF_ACPY(a, b, f) \
+	pf_addrcpy(a, b, f)
+
+#define PF_AINC(a, f) \
+	pf_addr_inc(a, f)
+
+#define PF_POOLMASK(a, b, c, d, f) \
+	pf_poolmask(a, b, c, d, f)
+
+#else
+
+/* Just IPv6 */
+
+#ifdef PF_INET6_ONLY
+
+#define PF_AEQ(a, b, c) \
+	((a)->addr32[3] == (b)->addr32[3] && \
+	(a)->addr32[2] == (b)->addr32[2] && \
+	(a)->addr32[1] == (b)->addr32[1] && \
+	(a)->addr32[0] == (b)->addr32[0]) \
+
+#define PF_ANEQ(a, b, c) \
+	((a)->addr32[3] != (b)->addr32[3] || \
+	(a)->addr32[2] != (b)->addr32[2] || \
+	(a)->addr32[1] != (b)->addr32[1] || \
+	(a)->addr32[0] != (b)->addr32[0]) \
+
+#define PF_AZERO(a, c) \
+	(!(a)->addr32[0] && \
+	!(a)->addr32[1] && \
+	!(a)->addr32[2] && \
+	!(a)->addr32[3] ) \
+
+#define PF_MATCHA(n, a, m, b, f) \
+	pf_match_addr(n, a, m, b, f)
+
+#define PF_ACPY(a, b, f) \
+	pf_addrcpy(a, b, f)
+
+#define PF_AINC(a, f) \
+	pf_addr_inc(a, f)
+
+#define PF_POOLMASK(a, b, c, d, f) \
+	pf_poolmask(a, b, c, d, f)
+
+#else
+
+/* Just IPv4 */
+#ifdef PF_INET_ONLY
+
+#define PF_AEQ(a, b, c) \
+	((a)->addr32[0] == (b)->addr32[0])
+
+#define PF_ANEQ(a, b, c) \
+	((a)->addr32[0] != (b)->addr32[0])
+
+#define PF_AZERO(a, c) \
+	(!(a)->addr32[0])
+
+#define PF_MATCHA(n, a, m, b, f) \
+	pf_match_addr(n, a, m, b, f)
+
+#define PF_ACPY(a, b, f) \
+	(a)->v4.s_addr = (b)->v4.s_addr
+
+#define PF_AINC(a, f) \
+	do { \
+		(a)->addr32[0] = htonl(ntohl((a)->addr32[0]) + 1); \
+	} while (0)
+
+#define PF_POOLMASK(a, b, c, d, f) \
+	do { \
+		(a)->addr32[0] = ((b)->addr32[0] & (c)->addr32[0]) | \
+		(((c)->addr32[0] ^ 0xffffffff ) & (d)->addr32[0]); \
+	} while (0)
+
+#endif /* PF_INET_ONLY */
+#endif /* PF_INET6_ONLY */
+#endif /* PF_INET_INET6 */
+
+#define	PF_MISMATCHAW(aw, x, af, neg, ifp)				\
+	(								\
+		(((aw)->type == PF_ADDR_NOROUTE &&			\
+		    pf_routable((x), (af), NULL)) ||			\
+		(((aw)->type == PF_ADDR_URPFFAILED && (ifp) != NULL &&	\
+		    pf_routable((x), (af), (ifp))) ||			\
+		((aw)->type == PF_ADDR_RTLABEL &&			\
+		    !pf_rtlabel_match((x), (af), (aw))) ||		\
+		((aw)->type == PF_ADDR_TABLE &&				\
+		    !pfr_match_addr((aw)->p.tbl, (x), (af))) ||		\
+		((aw)->type == PF_ADDR_DYNIFTL &&			\
+		    !pfi_match_addr((aw)->p.dyn, (x), (af))) || 	\
+		((aw)->type == PF_ADDR_ADDRMASK &&			\
+		    !PF_AZERO(&(aw)->v.a.mask, (af)) &&			\
+		    !PF_MATCHA(0, &(aw)->v.a.addr,			\
+		    &(aw)->v.a.mask, (x), (af))))) !=			\
+		(neg)							\
+	)
+
+
+struct pf_rule_uid {
+	uid_t		 uid[2];
+	u_int8_t	 op;
+};
+
+struct pf_rule_gid {
+	uid_t		 gid[2];
+	u_int8_t	 op;
+};
+
+struct pf_rule_addr {
+	struct pf_addr_wrap	 addr;
+	u_int16_t		 port[2];
+	u_int8_t		 neg;
+	u_int8_t		 port_op;
+};
+
+struct pf_pooladdr {
+	struct pf_addr_wrap		 addr;
+	TAILQ_ENTRY(pf_pooladdr)	 entries;
+	char				 ifname[IFNAMSIZ];
+	struct pfi_kif			*kif;
+};
+
+TAILQ_HEAD(pf_palist, pf_pooladdr);
+
+struct pf_poolhashkey {
+	union {
+		u_int8_t		key8[16];
+		u_int16_t		key16[8];
+		u_int32_t		key32[4];
+	} pfk;		    /* 128-bit hash key */
+#define key8	pfk.key8
+#define key16	pfk.key16
+#define key32	pfk.key32
+};
+
+struct pf_pool {
+	struct pf_palist	 list;
+	struct pf_pooladdr	*cur;
+	struct pf_poolhashkey	 key;
+	struct pf_addr		 counter;
+	int			 tblidx;
+	u_int16_t		 proxy_port[2];
+	u_int8_t		 port_op;
+	u_int8_t		 opts;
+};
+
+
+/* A packed Operating System description for fingerprinting */
+typedef u_int32_t pf_osfp_t;
+#define PF_OSFP_ANY	((pf_osfp_t)0)
+#define PF_OSFP_UNKNOWN	((pf_osfp_t)-1)
+#define PF_OSFP_NOMATCH	((pf_osfp_t)-2)
+
+struct pf_osfp_entry {
+	SLIST_ENTRY(pf_osfp_entry) fp_entry;
+	pf_osfp_t		fp_os;
+	int			fp_enflags;
+#define PF_OSFP_EXPANDED	0x001		/* expanded entry */
+#define PF_OSFP_GENERIC		0x002		/* generic signature */
+#define PF_OSFP_NODETAIL	0x004		/* no p0f details */
+#define PF_OSFP_LEN	32
+	char			fp_class_nm[PF_OSFP_LEN];
+	char			fp_version_nm[PF_OSFP_LEN];
+	char			fp_subtype_nm[PF_OSFP_LEN];
+};
+#define PF_OSFP_ENTRY_EQ(a, b) \
+    ((a)->fp_os == (b)->fp_os && \
+    memcmp((a)->fp_class_nm, (b)->fp_class_nm, PF_OSFP_LEN) == 0 && \
+    memcmp((a)->fp_version_nm, (b)->fp_version_nm, PF_OSFP_LEN) == 0 && \
+    memcmp((a)->fp_subtype_nm, (b)->fp_subtype_nm, PF_OSFP_LEN) == 0)
+
+/* handle pf_osfp_t packing */
+#define _FP_RESERVED_BIT	1  /* For the special negative #defines */
+#define _FP_UNUSED_BITS		1
+#define _FP_CLASS_BITS		10 /* OS Class (Windows, Linux) */
+#define _FP_VERSION_BITS	10 /* OS version (95, 98, NT, 2.4.54, 3.2) */
+#define _FP_SUBTYPE_BITS	10 /* patch level (NT SP4, SP3, ECN patch) */
+#define PF_OSFP_UNPACK(osfp, class, version, subtype) do { \
+	(class) = ((osfp) >> (_FP_VERSION_BITS+_FP_SUBTYPE_BITS)) & \
+	    ((1 << _FP_CLASS_BITS) - 1); \
+	(version) = ((osfp) >> _FP_SUBTYPE_BITS) & \
+	    ((1 << _FP_VERSION_BITS) - 1);\
+	(subtype) = (osfp) & ((1 << _FP_SUBTYPE_BITS) - 1); \
+} while(0)
+#define PF_OSFP_PACK(osfp, class, version, subtype) do { \
+	(osfp) = ((class) & ((1 << _FP_CLASS_BITS) - 1)) << (_FP_VERSION_BITS \
+	    + _FP_SUBTYPE_BITS); \
+	(osfp) |= ((version) & ((1 << _FP_VERSION_BITS) - 1)) << \
+	    _FP_SUBTYPE_BITS; \
+	(osfp) |= (subtype) & ((1 << _FP_SUBTYPE_BITS) - 1); \
+} while(0)
+
+/* the fingerprint of an OSes TCP SYN packet */
+typedef u_int64_t	pf_tcpopts_t;
+struct pf_os_fingerprint {
+	SLIST_HEAD(pf_osfp_enlist, pf_osfp_entry) fp_oses; /* list of matches */
+	pf_tcpopts_t		fp_tcpopts;	/* packed TCP options */
+	u_int16_t		fp_wsize;	/* TCP window size */
+	u_int16_t		fp_psize;	/* ip->ip_len */
+	u_int16_t		fp_mss;		/* TCP MSS */
+	u_int16_t		fp_flags;
+#define PF_OSFP_WSIZE_MOD	0x0001		/* Window modulus */
+#define PF_OSFP_WSIZE_DC	0x0002		/* Window don't care */
+#define PF_OSFP_WSIZE_MSS	0x0004		/* Window multiple of MSS */
+#define PF_OSFP_WSIZE_MTU	0x0008		/* Window multiple of MTU */
+#define PF_OSFP_PSIZE_MOD	0x0010		/* packet size modulus */
+#define PF_OSFP_PSIZE_DC	0x0020		/* packet size don't care */
+#define PF_OSFP_WSCALE		0x0040		/* TCP window scaling */
+#define PF_OSFP_WSCALE_MOD	0x0080		/* TCP window scale modulus */
+#define PF_OSFP_WSCALE_DC	0x0100		/* TCP window scale dont-care */
+#define PF_OSFP_MSS		0x0200		/* TCP MSS */
+#define PF_OSFP_MSS_MOD		0x0400		/* TCP MSS modulus */
+#define PF_OSFP_MSS_DC		0x0800		/* TCP MSS dont-care */
+#define PF_OSFP_DF		0x1000		/* IPv4 don't fragment bit */
+#define PF_OSFP_TS0		0x2000		/* Zero timestamp */
+#define PF_OSFP_INET6		0x4000		/* IPv6 */
+	u_int8_t		fp_optcnt;	/* TCP option count */
+	u_int8_t		fp_wscale;	/* TCP window scaling */
+	u_int8_t		fp_ttl;		/* IPv4 TTL */
+#define PF_OSFP_MAXTTL_OFFSET	40
+/* TCP options packing */
+#define PF_OSFP_TCPOPT_NOP	0x0		/* TCP NOP option */
+#define PF_OSFP_TCPOPT_WSCALE	0x1		/* TCP window scaling option */
+#define PF_OSFP_TCPOPT_MSS	0x2		/* TCP max segment size opt */
+#define PF_OSFP_TCPOPT_SACK	0x3		/* TCP SACK OK option */
+#define PF_OSFP_TCPOPT_TS	0x4		/* TCP timestamp option */
+#define PF_OSFP_TCPOPT_BITS	3		/* bits used by each option */
+#define PF_OSFP_MAX_OPTS \
+    (sizeof(((struct pf_os_fingerprint *)0)->fp_tcpopts) * 8) \
+    / PF_OSFP_TCPOPT_BITS
+
+	SLIST_ENTRY(pf_os_fingerprint)	fp_next;
+};
+
+struct pf_osfp_ioctl {
+	struct pf_osfp_entry	fp_os;
+	pf_tcpopts_t		fp_tcpopts;	/* packed TCP options */
+	u_int16_t		fp_wsize;	/* TCP window size */
+	u_int16_t		fp_psize;	/* ip->ip_len */
+	u_int16_t		fp_mss;		/* TCP MSS */
+	u_int16_t		fp_flags;
+	u_int8_t		fp_optcnt;	/* TCP option count */
+	u_int8_t		fp_wscale;	/* TCP window scaling */
+	u_int8_t		fp_ttl;		/* IPv4 TTL */
+
+	int			fp_getnum;	/* DIOCOSFPGET number */
+};
+
+
+union pf_rule_ptr {
+	struct pf_rule		*ptr;
+	u_int32_t		 nr;
+};
+
+#define	PF_ANCHOR_NAME_SIZE	 64
+
+struct pf_rule {
+	struct pf_rule_addr	 src;
+	struct pf_rule_addr	 dst;
+#define PF_SKIP_IFP		0
+#define PF_SKIP_DIR		1
+#define PF_SKIP_AF		2
+#define PF_SKIP_PROTO		3
+#define PF_SKIP_SRC_ADDR	4
+#define PF_SKIP_SRC_PORT	5
+#define PF_SKIP_DST_ADDR	6
+#define PF_SKIP_DST_PORT	7
+#define PF_SKIP_COUNT		8
+	union pf_rule_ptr	 skip[PF_SKIP_COUNT];
+#define PF_RULE_LABEL_SIZE	 64
+	char			 label[PF_RULE_LABEL_SIZE];
+#define PF_QNAME_SIZE		 64
+	char			 ifname[IFNAMSIZ];
+	char			 qname[PF_QNAME_SIZE];
+	char			 pqname[PF_QNAME_SIZE];
+#define	PF_TAG_NAME_SIZE	 64
+	char			 tagname[PF_TAG_NAME_SIZE];
+	char			 match_tagname[PF_TAG_NAME_SIZE];
+
+	char			 overload_tblname[PF_TABLE_NAME_SIZE];
+
+	TAILQ_ENTRY(pf_rule)	 entries;
+	struct pf_pool		 rpool;
+
+	u_int64_t		 evaluations;
+	u_int64_t		 packets[2];
+	u_int64_t		 bytes[2];
+
+	struct pfi_kif		*kif;
+	struct pf_anchor	*anchor;
+	struct pfr_ktable	*overload_tbl;
+
+	pf_osfp_t		 os_fingerprint;
+
+	int			 rtableid;
+	u_int32_t		 timeout[PFTM_MAX];
+	u_int32_t		 states;
+	u_int32_t		 max_states;
+	u_int32_t		 src_nodes;
+	u_int32_t		 max_src_nodes;
+	u_int32_t		 max_src_states;
+	u_int32_t		 spare1;		/* netgraph */
+	u_int32_t		 max_src_conn;
+	struct {
+		u_int32_t		limit;
+		u_int32_t		seconds;
+	}			 max_src_conn_rate;
+	u_int32_t		 qid;
+	u_int32_t		 pqid;
+	u_int32_t		 rt_listid;
+	u_int32_t		 nr;
+	u_int32_t		 prob;
+	uid_t			 cuid;
+	pid_t			 cpid;
+
+	u_int16_t		 return_icmp;
+	u_int16_t		 return_icmp6;
+	u_int16_t		 max_mss;
+	u_int16_t		 tag;
+	u_int16_t		 match_tag;
+	u_int16_t		 spare2;		/* netgraph */
+
+	struct pf_rule_uid	 uid;
+	struct pf_rule_gid	 gid;
+
+	u_int32_t		 rule_flag;
+	u_int8_t		 action;
+	u_int8_t		 direction;
+	u_int8_t		 log;
+	u_int8_t		 logif;
+	u_int8_t		 quick;
+	u_int8_t		 ifnot;
+	u_int8_t		 match_tag_not;
+	u_int8_t		 natpass;
+
+#define PF_STATE_NORMAL		0x1
+#define PF_STATE_MODULATE	0x2
+#define PF_STATE_SYNPROXY	0x3
+	u_int8_t		 keep_state;
+	sa_family_t		 af;
+	u_int8_t		 proto;
+	u_int8_t		 type;
+	u_int8_t		 code;
+	u_int8_t		 flags;
+	u_int8_t		 flagset;
+	u_int8_t		 min_ttl;
+	u_int8_t		 allow_opts;
+	u_int8_t		 rt;
+	u_int8_t		 return_ttl;
+	u_int8_t		 tos;
+	u_int8_t		 anchor_relative;
+	u_int8_t		 anchor_wildcard;
+
+#define PF_FLUSH		0x01
+#define PF_FLUSH_GLOBAL		0x02
+	u_int8_t		 flush;
+};
+
+/* rule flags */
+#define	PFRULE_DROP		0x0000
+#define	PFRULE_RETURNRST	0x0001
+#define	PFRULE_FRAGMENT		0x0002
+#define	PFRULE_RETURNICMP	0x0004
+#define	PFRULE_RETURN		0x0008
+#define	PFRULE_NOSYNC		0x0010
+#define PFRULE_SRCTRACK		0x0020  /* track source states */
+#define PFRULE_RULESRCTRACK	0x0040  /* per rule */
+
+/* scrub flags */
+#define	PFRULE_NODF		0x0100
+#define	PFRULE_FRAGCROP		0x0200	/* non-buffering frag cache */
+#define	PFRULE_FRAGDROP		0x0400	/* drop funny fragments */
+#define PFRULE_RANDOMID		0x0800
+#define PFRULE_REASSEMBLE_TCP	0x1000
+
+/* rule flags again */
+#define PFRULE_IFBOUND		0x00010000	/* if-bound */
+#define PFRULE_STATESLOPPY	0x00020000	/* sloppy state tracking */
+
+#define PFSTATE_HIWAT		10000	/* default state table size */
+#define PFSTATE_ADAPT_START	6000	/* default adaptive timeout start */
+#define PFSTATE_ADAPT_END	12000	/* default adaptive timeout end */
+
+
+struct pf_threshold {
+	u_int32_t	limit;
+#define	PF_THRESHOLD_MULT	1000
+#define PF_THRESHOLD_MAX	0xffffffff / PF_THRESHOLD_MULT
+	u_int32_t	seconds;
+	u_int32_t	count;
+	u_int32_t	last;
+};
+
+struct pf_src_node {
+	RB_ENTRY(pf_src_node) entry;
+	struct pf_addr	 addr;
+	struct pf_addr	 raddr;
+	union pf_rule_ptr rule;
+	struct pfi_kif	*kif;
+	u_int64_t	 bytes[2];
+	u_int64_t	 packets[2];
+	u_int32_t	 states;
+	u_int32_t	 conn;
+	struct pf_threshold	conn_rate;
+	u_int32_t	 creation;
+	u_int32_t	 expire;
+	sa_family_t	 af;
+	u_int8_t	 ruletype;
+};
+
+#define PFSNODE_HIWAT		10000	/* default source node table size */
+
+struct pf_state_scrub {
+	struct timeval	pfss_last;	/* time received last packet	*/
+	u_int32_t	pfss_tsecr;	/* last echoed timestamp	*/
+	u_int32_t	pfss_tsval;	/* largest timestamp		*/
+	u_int32_t	pfss_tsval0;	/* original timestamp		*/
+	u_int16_t	pfss_flags;
+#define PFSS_TIMESTAMP	0x0001		/* modulate timestamp		*/
+#define PFSS_PAWS	0x0010		/* stricter PAWS checks		*/
+#define PFSS_PAWS_IDLED	0x0020		/* was idle too long.  no PAWS	*/
+#define PFSS_DATA_TS	0x0040		/* timestamp on data packets	*/
+#define PFSS_DATA_NOTS	0x0080		/* no timestamp on data packets	*/
+	u_int8_t	pfss_ttl;	/* stashed TTL			*/
+	u_int8_t	pad;
+	u_int32_t	pfss_ts_mod;	/* timestamp modulation		*/
+};
+
+struct pf_state_host {
+	struct pf_addr	addr;
+	u_int16_t	port;
+	u_int16_t	pad;
+};
+
+struct pf_state_peer {
+	u_int32_t	seqlo;		/* Max sequence number sent	*/
+	u_int32_t	seqhi;		/* Max the other end ACKd + win	*/
+	u_int32_t	seqdiff;	/* Sequence number modulator	*/
+	u_int16_t	max_win;	/* largest window (pre scaling)	*/
+	u_int8_t	state;		/* active state level		*/
+	u_int8_t	wscale;		/* window scaling factor	*/
+	u_int16_t	mss;		/* Maximum segment size option	*/
+	u_int8_t	tcp_est;	/* Did we reach TCPS_ESTABLISHED */
+	struct pf_state_scrub	*scrub;	/* state is scrubbed		*/
+	u_int8_t	pad[3];
+};
+
+TAILQ_HEAD(pf_state_queue, pf_state);
+
+/* keep synced with struct pf_state, used in RB_FIND */
+struct pf_state_cmp {
+	u_int64_t	 id;
+	u_int32_t	 creatorid;
+	struct pf_state_host lan;
+	struct pf_state_host gwy;
+	struct pf_state_host ext;
+	sa_family_t	 af;
+	u_int8_t	 proto;
+	u_int8_t	 direction;
+	u_int8_t	 pad;
+};
+
+struct pf_state {
+	u_int64_t	 id;
+	u_int32_t	 creatorid;
+	struct pf_state_host lan;
+	struct pf_state_host gwy;
+	struct pf_state_host ext;
+	sa_family_t	 af;
+	u_int8_t	 proto;
+	u_int8_t	 direction;
+#ifdef __FreeBSD__
+	u_int8_t	 local_flags;
+#define	PFSTATE_EXPIRING 0x01
+#else
+	u_int8_t	 pad;
+#endif
+	u_int8_t	 log;
+	u_int8_t	 state_flags;
+#define	PFSTATE_ALLOWOPTS	0x01
+#define	PFSTATE_SLOPPY		0x02
+	u_int8_t	 timeout;
+	u_int8_t	 sync_flags;
+#define	PFSTATE_NOSYNC	 0x01
+#define	PFSTATE_FROMSYNC 0x02
+#define	PFSTATE_STALE	 0x04
+	union {
+		struct {
+			RB_ENTRY(pf_state)	 entry_lan_ext;
+			RB_ENTRY(pf_state)	 entry_ext_gwy;
+			RB_ENTRY(pf_state)	 entry_id;
+			TAILQ_ENTRY(pf_state)	 entry_list;
+			struct pfi_kif		*kif;
+		} s;
+		char	 ifname[IFNAMSIZ];
+	} u;
+	struct pf_state_peer src;
+	struct pf_state_peer dst;
+	union pf_rule_ptr rule;
+	union pf_rule_ptr anchor;
+	union pf_rule_ptr nat_rule;
+	struct pf_addr	 rt_addr;
+	struct pfi_kif	*rt_kif;
+	struct pf_src_node	*src_node;
+	struct pf_src_node	*nat_src_node;
+	u_int64_t	 packets[2];
+	u_int64_t	 bytes[2];
+	u_int32_t	 creation;
+	u_int32_t	 expire;
+	u_int32_t	 pfsync_time;
+	u_int16_t	 tag;
+};
+
+TAILQ_HEAD(pf_rulequeue, pf_rule);
+
+struct pf_anchor;
+
+struct pf_ruleset {
+	struct {
+		struct pf_rulequeue	 queues[2];
+		struct {
+			struct pf_rulequeue	*ptr;
+			struct pf_rule		**ptr_array;
+			u_int32_t		 rcount;
+			u_int32_t		 ticket;
+			int			 open;
+		}			 active, inactive;
+	}			 rules[PF_RULESET_MAX];
+	struct pf_anchor	*anchor;
+	u_int32_t		 tticket;
+	int			 tables;
+	int			 topen;
+};
+
+RB_HEAD(pf_anchor_global, pf_anchor);
+RB_HEAD(pf_anchor_node, pf_anchor);
+struct pf_anchor {
+	RB_ENTRY(pf_anchor)	 entry_global;
+	RB_ENTRY(pf_anchor)	 entry_node;
+	struct pf_anchor	*parent;
+	struct pf_anchor_node	 children;
+	char			 name[PF_ANCHOR_NAME_SIZE];
+	char			 path[MAXPATHLEN];
+	struct pf_ruleset	 ruleset;
+	int			 refcnt;	/* anchor rules */
+	int			 match;
+};
+RB_PROTOTYPE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare);
+RB_PROTOTYPE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare);
+
+#define PF_RESERVED_ANCHOR	"_pf"
+
+#define PFR_TFLAG_PERSIST	0x00000001
+#define PFR_TFLAG_CONST		0x00000002
+#define PFR_TFLAG_ACTIVE	0x00000004
+#define PFR_TFLAG_INACTIVE	0x00000008
+#define PFR_TFLAG_REFERENCED	0x00000010
+#define PFR_TFLAG_REFDANCHOR	0x00000020
+#define PFR_TFLAG_USRMASK	0x00000003
+#define PFR_TFLAG_SETMASK	0x0000003C
+#define PFR_TFLAG_ALLMASK	0x0000003F
+
+struct pfr_table {
+	char			 pfrt_anchor[MAXPATHLEN];
+	char			 pfrt_name[PF_TABLE_NAME_SIZE];
+	u_int32_t		 pfrt_flags;
+	u_int8_t		 pfrt_fback;
+};
+
+enum { PFR_FB_NONE, PFR_FB_MATCH, PFR_FB_ADDED, PFR_FB_DELETED,
+	PFR_FB_CHANGED, PFR_FB_CLEARED, PFR_FB_DUPLICATE,
+	PFR_FB_NOTMATCH, PFR_FB_CONFLICT, PFR_FB_MAX };
+
+struct pfr_addr {
+	union {
+		struct in_addr	 _pfra_ip4addr;
+		struct in6_addr	 _pfra_ip6addr;
+	}		 pfra_u;
+	u_int8_t	 pfra_af;
+	u_int8_t	 pfra_net;
+	u_int8_t	 pfra_not;
+	u_int8_t	 pfra_fback;
+};
+#define	pfra_ip4addr	pfra_u._pfra_ip4addr
+#define	pfra_ip6addr	pfra_u._pfra_ip6addr
+
+enum { PFR_DIR_IN, PFR_DIR_OUT, PFR_DIR_MAX };
+enum { PFR_OP_BLOCK, PFR_OP_PASS, PFR_OP_ADDR_MAX, PFR_OP_TABLE_MAX };
+#define PFR_OP_XPASS	PFR_OP_ADDR_MAX
+
+struct pfr_astats {
+	struct pfr_addr	 pfras_a;
+	u_int64_t	 pfras_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+	u_int64_t	 pfras_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+	long		 pfras_tzero;
+};
+
+enum { PFR_REFCNT_RULE, PFR_REFCNT_ANCHOR, PFR_REFCNT_MAX };
+
+struct pfr_tstats {
+	struct pfr_table pfrts_t;
+	u_int64_t	 pfrts_packets[PFR_DIR_MAX][PFR_OP_TABLE_MAX];
+	u_int64_t	 pfrts_bytes[PFR_DIR_MAX][PFR_OP_TABLE_MAX];
+	u_int64_t	 pfrts_match;
+	u_int64_t	 pfrts_nomatch;
+	long		 pfrts_tzero;
+	int		 pfrts_cnt;
+	int		 pfrts_refcnt[PFR_REFCNT_MAX];
+};
+#define	pfrts_name	pfrts_t.pfrt_name
+#define pfrts_flags	pfrts_t.pfrt_flags
+
+#ifndef _SOCKADDR_UNION_DEFINED
+#define	_SOCKADDR_UNION_DEFINED
+union sockaddr_union {
+	struct sockaddr		sa;
+	struct sockaddr_in	sin;
+	struct sockaddr_in6	sin6;
+};
+#endif /* _SOCKADDR_UNION_DEFINED */
+
+SLIST_HEAD(pfr_kentryworkq, pfr_kentry);
+struct pfr_kentry {
+	struct radix_node	 pfrke_node[2];
+	union sockaddr_union	 pfrke_sa;
+	u_int64_t		 pfrke_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+	u_int64_t		 pfrke_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+	SLIST_ENTRY(pfr_kentry)	 pfrke_workq;
+	long			 pfrke_tzero;
+	u_int8_t		 pfrke_af;
+	u_int8_t		 pfrke_net;
+	u_int8_t		 pfrke_not;
+	u_int8_t		 pfrke_mark;
+	u_int8_t		 pfrke_intrpool;
+};
+
+SLIST_HEAD(pfr_ktableworkq, pfr_ktable);
+RB_HEAD(pfr_ktablehead, pfr_ktable);
+struct pfr_ktable {
+	struct pfr_tstats	 pfrkt_ts;
+	RB_ENTRY(pfr_ktable)	 pfrkt_tree;
+	SLIST_ENTRY(pfr_ktable)	 pfrkt_workq;
+	struct radix_node_head	*pfrkt_ip4;
+	struct radix_node_head	*pfrkt_ip6;
+	struct pfr_ktable	*pfrkt_shadow;
+	struct pfr_ktable	*pfrkt_root;
+	struct pf_ruleset	*pfrkt_rs;
+	long			 pfrkt_larg;
+	int			 pfrkt_nflags;
+};
+#define pfrkt_t		pfrkt_ts.pfrts_t
+#define pfrkt_name	pfrkt_t.pfrt_name
+#define pfrkt_anchor	pfrkt_t.pfrt_anchor
+#define pfrkt_ruleset	pfrkt_t.pfrt_ruleset
+#define pfrkt_flags	pfrkt_t.pfrt_flags
+#define pfrkt_cnt	pfrkt_ts.pfrts_cnt
+#define pfrkt_refcnt	pfrkt_ts.pfrts_refcnt
+#define pfrkt_packets	pfrkt_ts.pfrts_packets
+#define pfrkt_bytes	pfrkt_ts.pfrts_bytes
+#define pfrkt_match	pfrkt_ts.pfrts_match
+#define pfrkt_nomatch	pfrkt_ts.pfrts_nomatch
+#define pfrkt_tzero	pfrkt_ts.pfrts_tzero
+
+RB_HEAD(pf_state_tree_lan_ext, pf_state);
+RB_PROTOTYPE(pf_state_tree_lan_ext, pf_state,
+    u.s.entry_lan_ext, pf_state_compare_lan_ext);
+
+RB_HEAD(pf_state_tree_ext_gwy, pf_state);
+RB_PROTOTYPE(pf_state_tree_ext_gwy, pf_state,
+    u.s.entry_ext_gwy, pf_state_compare_ext_gwy);
+
+TAILQ_HEAD(pfi_statehead, pfi_kif);
+RB_HEAD(pfi_ifhead, pfi_kif);
+
+/* keep synced with pfi_kif, used in RB_FIND */
+struct pfi_kif_cmp {
+	char				 pfik_name[IFNAMSIZ];
+};
+
+struct pfi_kif {
+	char				 pfik_name[IFNAMSIZ];
+	RB_ENTRY(pfi_kif)		 pfik_tree;
+	u_int64_t			 pfik_packets[2][2][2];
+	u_int64_t			 pfik_bytes[2][2][2];
+	u_int32_t			 pfik_tzero;
+	int				 pfik_flags;
+	struct pf_state_tree_lan_ext	 pfik_lan_ext;
+	struct pf_state_tree_ext_gwy	 pfik_ext_gwy;
+	TAILQ_ENTRY(pfi_kif)		 pfik_w_states;
+#ifndef __FreeBSD__
+	void				*pfik_ah_cookie;
+#endif
+	struct ifnet			*pfik_ifp;
+	struct ifg_group		*pfik_group;
+	int				 pfik_states;
+	int				 pfik_rules;
+	TAILQ_HEAD(, pfi_dynaddr)	 pfik_dynaddrs;
+};
+
+enum pfi_kif_refs {
+	PFI_KIF_REF_NONE,
+	PFI_KIF_REF_STATE,
+	PFI_KIF_REF_RULE
+};
+
+#define PFI_IFLAG_SKIP		0x0100	/* skip filtering on interface */
+/* XXX: revisist */
+#define PFI_IFLAG_SETABLE_MASK	0x0100	/* setable via DIOC{SET,CLR}IFFLAG */
+#define PFI_IFLAG_PLACEHOLDER	0x8000	/* placeholder group/interface */
+
+struct pf_pdesc {
+	struct {
+		int	 done;
+		uid_t	 uid;
+		gid_t	 gid;
+		pid_t	 pid;
+	}		 lookup;
+	u_int64_t	 tot_len;	/* Make Mickey money */
+	union {
+		struct tcphdr		*tcp;
+		struct udphdr		*udp;
+		struct icmp		*icmp;
+#ifdef INET6
+		struct icmp6_hdr	*icmp6;
+#endif /* INET6 */
+		void			*any;
+	} hdr;
+	struct pf_addr	 baddr;		/* address before translation */
+	struct pf_addr	 naddr;		/* address after translation */
+	struct pf_rule	*nat_rule;	/* nat/rdr rule applied to packet */
+	struct pf_addr	*src;
+	struct pf_addr	*dst;
+	struct ether_header
+			*eh;
+	struct pf_mtag	*pf_mtag;
+	u_int16_t	*ip_sum;
+	u_int32_t	 p_len;		/* total length of payload */
+	u_int16_t	 flags;		/* Let SCRUB trigger behavior in
+					 * state code. Easier than tags */
+#define PFDESC_TCP_NORM	0x0001		/* TCP shall be statefully scrubbed */
+#define PFDESC_IP_REAS	0x0002		/* IP frags would've been reassembled */
+	sa_family_t	 af;
+	u_int8_t	 proto;
+	u_int8_t	 tos;
+};
+
+/* flags for RDR options */
+#define PF_DPORT_RANGE	0x01		/* Dest port uses range */
+#define PF_RPORT_RANGE	0x02		/* RDR'ed port uses range */
+
+/* Reasons code for passing/dropping a packet */
+#define PFRES_MATCH	0		/* Explicit match of a rule */
+#define PFRES_BADOFF	1		/* Bad offset for pull_hdr */
+#define PFRES_FRAG	2		/* Dropping following fragment */
+#define PFRES_SHORT	3		/* Dropping short packet */
+#define PFRES_NORM	4		/* Dropping by normalizer */
+#define PFRES_MEMORY	5		/* Dropped due to lacking mem */
+#define PFRES_TS	6		/* Bad TCP Timestamp (RFC1323) */
+#define PFRES_CONGEST	7		/* Congestion (of ipintrq) */
+#define PFRES_IPOPTIONS 8		/* IP option */
+#define PFRES_PROTCKSUM 9		/* Protocol checksum invalid */
+#define PFRES_BADSTATE	10		/* State mismatch */
+#define PFRES_STATEINS	11		/* State insertion failure */
+#define PFRES_MAXSTATES	12		/* State limit */
+#define PFRES_SRCLIMIT	13		/* Source node/conn limit */
+#define PFRES_SYNPROXY	14		/* SYN proxy */
+#define PFRES_MAX	15		/* total+1 */
+
+#define PFRES_NAMES { \
+	"match", \
+	"bad-offset", \
+	"fragment", \
+	"short", \
+	"normalize", \
+	"memory", \
+	"bad-timestamp", \
+	"congestion", \
+	"ip-option", \
+	"proto-cksum", \
+	"state-mismatch", \
+	"state-insert", \
+	"state-limit", \
+	"src-limit", \
+	"synproxy", \
+	NULL \
+}
+
+/* Counters for other things we want to keep track of */
+#define LCNT_STATES		0	/* states */
+#define LCNT_SRCSTATES		1	/* max-src-states */
+#define LCNT_SRCNODES		2	/* max-src-nodes */
+#define LCNT_SRCCONN		3	/* max-src-conn */
+#define LCNT_SRCCONNRATE	4	/* max-src-conn-rate */
+#define LCNT_OVERLOAD_TABLE	5	/* entry added to overload table */
+#define LCNT_OVERLOAD_FLUSH	6	/* state entries flushed */
+#define LCNT_MAX		7	/* total+1 */
+
+#define LCNT_NAMES { \
+	"max states per rule", \
+	"max-src-states", \
+	"max-src-nodes", \
+	"max-src-conn", \
+	"max-src-conn-rate", \
+	"overload table insertion", \
+	"overload flush states", \
+	NULL \
+}
+
+/* UDP state enumeration */
+#define PFUDPS_NO_TRAFFIC	0
+#define PFUDPS_SINGLE		1
+#define PFUDPS_MULTIPLE		2
+
+#define PFUDPS_NSTATES		3	/* number of state levels */
+
+#define PFUDPS_NAMES { \
+	"NO_TRAFFIC", \
+	"SINGLE", \
+	"MULTIPLE", \
+	NULL \
+}
+
+/* Other protocol state enumeration */
+#define PFOTHERS_NO_TRAFFIC	0
+#define PFOTHERS_SINGLE		1
+#define PFOTHERS_MULTIPLE	2
+
+#define PFOTHERS_NSTATES	3	/* number of state levels */
+
+#define PFOTHERS_NAMES { \
+	"NO_TRAFFIC", \
+	"SINGLE", \
+	"MULTIPLE", \
+	NULL \
+}
+
+#define FCNT_STATE_SEARCH	0
+#define FCNT_STATE_INSERT	1
+#define FCNT_STATE_REMOVALS	2
+#define FCNT_MAX		3
+
+#define SCNT_SRC_NODE_SEARCH	0
+#define SCNT_SRC_NODE_INSERT	1
+#define SCNT_SRC_NODE_REMOVALS	2
+#define SCNT_MAX		3
+
+#define ACTION_SET(a, x) \
+	do { \
+		if ((a) != NULL) \
+			*(a) = (x); \
+	} while (0)
+
+#define REASON_SET(a, x) \
+	do { \
+		if ((a) != NULL) \
+			*(a) = (x); \
+		if (x < PFRES_MAX) \
+			pf_status.counters[x]++; \
+	} while (0)
+
+struct pf_status {
+	u_int64_t	counters[PFRES_MAX];
+	u_int64_t	lcounters[LCNT_MAX];	/* limit counters */
+	u_int64_t	fcounters[FCNT_MAX];
+	u_int64_t	scounters[SCNT_MAX];
+	u_int64_t	pcounters[2][2][3];
+	u_int64_t	bcounters[2][2];
+	u_int64_t	stateid;
+	u_int32_t	running;
+	u_int32_t	states;
+	u_int32_t	src_nodes;
+	u_int32_t	since;
+	u_int32_t	debug;
+	u_int32_t	hostid;
+	char		ifname[IFNAMSIZ];
+	u_int8_t	pf_chksum[PF_MD5_DIGEST_LENGTH];
+};
+
+struct cbq_opts {
+	u_int		minburst;
+	u_int		maxburst;
+	u_int		pktsize;
+	u_int		maxpktsize;
+	u_int		ns_per_byte;
+	u_int		maxidle;
+	int		minidle;
+	u_int		offtime;
+	int		flags;
+};
+
+struct priq_opts {
+	int		flags;
+};
+
+struct hfsc_opts {
+	/* real-time service curve */
+	u_int		rtsc_m1;	/* slope of the 1st segment in bps */
+	u_int		rtsc_d;		/* the x-projection of m1 in msec */
+	u_int		rtsc_m2;	/* slope of the 2nd segment in bps */
+	/* link-sharing service curve */
+	u_int		lssc_m1;
+	u_int		lssc_d;
+	u_int		lssc_m2;
+	/* upper-limit service curve */
+	u_int		ulsc_m1;
+	u_int		ulsc_d;
+	u_int		ulsc_m2;
+	int		flags;
+};
+
+struct pf_altq {
+	char			 ifname[IFNAMSIZ];
+
+	void			*altq_disc;	/* discipline-specific state */
+	TAILQ_ENTRY(pf_altq)	 entries;
+
+	/* scheduler spec */
+	u_int8_t		 scheduler;	/* scheduler type */
+	u_int16_t		 tbrsize;	/* tokenbucket regulator size */
+	u_int32_t		 ifbandwidth;	/* interface bandwidth */
+
+	/* queue spec */
+	char			 qname[PF_QNAME_SIZE];	/* queue name */
+	char			 parent[PF_QNAME_SIZE];	/* parent name */
+	u_int32_t		 parent_qid;	/* parent queue id */
+	u_int32_t		 bandwidth;	/* queue bandwidth */
+	u_int8_t		 priority;	/* priority */
+#ifdef __FreeBSD__
+	u_int8_t		 local_flags;	/* dynamic interface */
+#define	PFALTQ_FLAG_IF_REMOVED		0x01
+#endif
+	u_int16_t		 qlimit;	/* queue size limit */
+	u_int16_t		 flags;		/* misc flags */
+	union {
+		struct cbq_opts		 cbq_opts;
+		struct priq_opts	 priq_opts;
+		struct hfsc_opts	 hfsc_opts;
+	} pq_u;
+
+	u_int32_t		 qid;		/* return value */
+};
+
+#ifndef __FreeBSD__
+
+#define	PF_TAG_GENERATED		0x01
+#define	PF_TAG_FRAGCACHE		0x02
+#define	PF_TAG_TRANSLATE_LOCALHOST	0x04
+
+struct pf_mtag {
+	void		*hdr;		/* saved hdr pos in mbuf, for ECN */
+	u_int		 rtableid;	/* alternate routing table id */
+	u_int32_t	 qid;		/* queue id */
+	u_int16_t	 tag;		/* tag id */
+	u_int8_t	 flags;
+	u_int8_t	 routed;
+	sa_family_t	 af;		/* for ECN */
+};
+#endif
+
+struct pf_tag {
+	u_int16_t	tag;		/* tag id */
+};
+
+struct pf_tagname {
+	TAILQ_ENTRY(pf_tagname)	entries;
+	char			name[PF_TAG_NAME_SIZE];
+	u_int16_t		tag;
+	int			ref;
+};
+
+#define PFFRAG_FRENT_HIWAT	5000	/* Number of fragment entries */
+#define PFFRAG_FRAG_HIWAT	1000	/* Number of fragmented packets */
+#define PFFRAG_FRCENT_HIWAT	50000	/* Number of fragment cache entries */
+#define PFFRAG_FRCACHE_HIWAT	10000	/* Number of fragment descriptors */
+
+#define PFR_KTABLE_HIWAT	1000	/* Number of tables */
+#define PFR_KENTRY_HIWAT	200000	/* Number of table entries */
+#define PFR_KENTRY_HIWAT_SMALL	100000	/* Number of table entries (tiny hosts) */
+
+/*
+ * ioctl parameter structures
+ */
+
+struct pfioc_pooladdr {
+	u_int32_t		 action;
+	u_int32_t		 ticket;
+	u_int32_t		 nr;
+	u_int32_t		 r_num;
+	u_int8_t		 r_action;
+	u_int8_t		 r_last;
+	u_int8_t		 af;
+	char			 anchor[MAXPATHLEN];
+	struct pf_pooladdr	 addr;
+};
+
+struct pfioc_rule {
+	u_int32_t	 action;
+	u_int32_t	 ticket;
+	u_int32_t	 pool_ticket;
+	u_int32_t	 nr;
+	char		 anchor[MAXPATHLEN];
+	char		 anchor_call[MAXPATHLEN];
+	struct pf_rule	 rule;
+};
+
+struct pfioc_natlook {
+	struct pf_addr	 saddr;
+	struct pf_addr	 daddr;
+	struct pf_addr	 rsaddr;
+	struct pf_addr	 rdaddr;
+	u_int16_t	 sport;
+	u_int16_t	 dport;
+	u_int16_t	 rsport;
+	u_int16_t	 rdport;
+	sa_family_t	 af;
+	u_int8_t	 proto;
+	u_int8_t	 direction;
+};
+
+struct pfioc_state {
+	u_int32_t	 nr;
+	struct pf_state	 state;
+};
+
+struct pfioc_src_node_kill {
+	/* XXX returns the number of src nodes killed in psnk_af */
+	sa_family_t psnk_af;
+	struct pf_rule_addr psnk_src;
+	struct pf_rule_addr psnk_dst;
+};
+
+struct pfioc_state_kill {
+	/* XXX returns the number of states killed in psk_af */
+	sa_family_t		psk_af;
+	int			psk_proto;
+	struct pf_rule_addr	psk_src;
+	struct pf_rule_addr	psk_dst;
+	char			psk_ifname[IFNAMSIZ];
+};
+
+struct pfioc_states {
+	int	ps_len;
+	union {
+		caddr_t		 psu_buf;
+		struct pf_state	*psu_states;
+	} ps_u;
+#define ps_buf		ps_u.psu_buf
+#define ps_states	ps_u.psu_states
+};
+
+struct pfioc_src_nodes {
+	int	psn_len;
+	union {
+		caddr_t		 psu_buf;
+		struct pf_src_node	*psu_src_nodes;
+	} psn_u;
+#define psn_buf		psn_u.psu_buf
+#define psn_src_nodes	psn_u.psu_src_nodes
+};
+
+struct pfioc_if {
+	char		 ifname[IFNAMSIZ];
+};
+
+struct pfioc_tm {
+	int		 timeout;
+	int		 seconds;
+};
+
+struct pfioc_limit {
+	int		 index;
+	unsigned	 limit;
+};
+
+struct pfioc_altq {
+	u_int32_t	 action;
+	u_int32_t	 ticket;
+	u_int32_t	 nr;
+	struct pf_altq	 altq;
+};
+
+struct pfioc_qstats {
+	u_int32_t	 ticket;
+	u_int32_t	 nr;
+	void		*buf;
+	int		 nbytes;
+	u_int8_t	 scheduler;
+};
+
+struct pfioc_ruleset {
+	u_int32_t	 nr;
+	char		 path[MAXPATHLEN];
+	char		 name[PF_ANCHOR_NAME_SIZE];
+};
+
+#define PF_RULESET_ALTQ		(PF_RULESET_MAX)
+#define PF_RULESET_TABLE	(PF_RULESET_MAX+1)
+struct pfioc_trans {
+	int		 size;	/* number of elements */
+	int		 esize; /* size of each element in bytes */
+	struct pfioc_trans_e {
+		int		rs_num;
+		char		anchor[MAXPATHLEN];
+		u_int32_t	ticket;
+	}		*array;
+};
+
+#define PFR_FLAG_ATOMIC		0x00000001
+#define PFR_FLAG_DUMMY		0x00000002
+#define PFR_FLAG_FEEDBACK	0x00000004
+#define PFR_FLAG_CLSTATS	0x00000008
+#define PFR_FLAG_ADDRSTOO	0x00000010
+#define PFR_FLAG_REPLACE	0x00000020
+#define PFR_FLAG_ALLRSETS	0x00000040
+#define PFR_FLAG_ALLMASK	0x0000007F
+#ifdef _KERNEL
+#define PFR_FLAG_USERIOCTL	0x10000000
+#endif
+
+struct pfioc_table {
+	struct pfr_table	 pfrio_table;
+	void			*pfrio_buffer;
+	int			 pfrio_esize;
+	int			 pfrio_size;
+	int			 pfrio_size2;
+	int			 pfrio_nadd;
+	int			 pfrio_ndel;
+	int			 pfrio_nchange;
+	int			 pfrio_flags;
+	u_int32_t		 pfrio_ticket;
+};
+#define	pfrio_exists	pfrio_nadd
+#define	pfrio_nzero	pfrio_nadd
+#define	pfrio_nmatch	pfrio_nadd
+#define pfrio_naddr	pfrio_size2
+#define pfrio_setflag	pfrio_size2
+#define pfrio_clrflag	pfrio_nadd
+
+struct pfioc_iface {
+	char	 pfiio_name[IFNAMSIZ];
+	void	*pfiio_buffer;
+	int	 pfiio_esize;
+	int	 pfiio_size;
+	int	 pfiio_nzero;
+	int	 pfiio_flags;
+};
+
+
+/*
+ * ioctl operations
+ */
+
+#define DIOCSTART	_IO  ('D',  1)
+#define DIOCSTOP	_IO  ('D',  2)
+#define DIOCADDRULE	_IOWR('D',  4, struct pfioc_rule)
+#define DIOCGETRULES	_IOWR('D',  6, struct pfioc_rule)
+#define DIOCGETRULE	_IOWR('D',  7, struct pfioc_rule)
+/* XXX cut 8 - 17 */
+#define DIOCCLRSTATES	_IOWR('D', 18, struct pfioc_state_kill)
+#define DIOCGETSTATE	_IOWR('D', 19, struct pfioc_state)
+#define DIOCSETSTATUSIF _IOWR('D', 20, struct pfioc_if)
+#define DIOCGETSTATUS	_IOWR('D', 21, struct pf_status)
+#define DIOCCLRSTATUS	_IO  ('D', 22)
+#define DIOCNATLOOK	_IOWR('D', 23, struct pfioc_natlook)
+#define DIOCSETDEBUG	_IOWR('D', 24, u_int32_t)
+#define DIOCGETSTATES	_IOWR('D', 25, struct pfioc_states)
+#define DIOCCHANGERULE	_IOWR('D', 26, struct pfioc_rule)
+/* XXX cut 26 - 28 */
+#define DIOCSETTIMEOUT	_IOWR('D', 29, struct pfioc_tm)
+#define DIOCGETTIMEOUT	_IOWR('D', 30, struct pfioc_tm)
+#define DIOCADDSTATE	_IOWR('D', 37, struct pfioc_state)
+#define DIOCCLRRULECTRS	_IO  ('D', 38)
+#define DIOCGETLIMIT	_IOWR('D', 39, struct pfioc_limit)
+#define DIOCSETLIMIT	_IOWR('D', 40, struct pfioc_limit)
+#define DIOCKILLSTATES	_IOWR('D', 41, struct pfioc_state_kill)
+#define DIOCSTARTALTQ	_IO  ('D', 42)
+#define DIOCSTOPALTQ	_IO  ('D', 43)
+#define DIOCADDALTQ	_IOWR('D', 45, struct pfioc_altq)
+#define DIOCGETALTQS	_IOWR('D', 47, struct pfioc_altq)
+#define DIOCGETALTQ	_IOWR('D', 48, struct pfioc_altq)
+#define DIOCCHANGEALTQ	_IOWR('D', 49, struct pfioc_altq)
+#define DIOCGETQSTATS	_IOWR('D', 50, struct pfioc_qstats)
+#define DIOCBEGINADDRS	_IOWR('D', 51, struct pfioc_pooladdr)
+#define DIOCADDADDR	_IOWR('D', 52, struct pfioc_pooladdr)
+#define DIOCGETADDRS	_IOWR('D', 53, struct pfioc_pooladdr)
+#define DIOCGETADDR	_IOWR('D', 54, struct pfioc_pooladdr)
+#define DIOCCHANGEADDR	_IOWR('D', 55, struct pfioc_pooladdr)
+/* XXX cut 55 - 57 */
+#define	DIOCGETRULESETS	_IOWR('D', 58, struct pfioc_ruleset)
+#define	DIOCGETRULESET	_IOWR('D', 59, struct pfioc_ruleset)
+#define	DIOCRCLRTABLES	_IOWR('D', 60, struct pfioc_table)
+#define	DIOCRADDTABLES	_IOWR('D', 61, struct pfioc_table)
+#define	DIOCRDELTABLES	_IOWR('D', 62, struct pfioc_table)
+#define	DIOCRGETTABLES	_IOWR('D', 63, struct pfioc_table)
+#define	DIOCRGETTSTATS	_IOWR('D', 64, struct pfioc_table)
+#define DIOCRCLRTSTATS  _IOWR('D', 65, struct pfioc_table)
+#define	DIOCRCLRADDRS	_IOWR('D', 66, struct pfioc_table)
+#define	DIOCRADDADDRS	_IOWR('D', 67, struct pfioc_table)
+#define	DIOCRDELADDRS	_IOWR('D', 68, struct pfioc_table)
+#define	DIOCRSETADDRS	_IOWR('D', 69, struct pfioc_table)
+#define	DIOCRGETADDRS	_IOWR('D', 70, struct pfioc_table)
+#define	DIOCRGETASTATS	_IOWR('D', 71, struct pfioc_table)
+#define DIOCRCLRASTATS  _IOWR('D', 72, struct pfioc_table)
+#define	DIOCRTSTADDRS	_IOWR('D', 73, struct pfioc_table)
+#define	DIOCRSETTFLAGS	_IOWR('D', 74, struct pfioc_table)
+#define DIOCRINADEFINE	_IOWR('D', 77, struct pfioc_table)
+#define DIOCOSFPFLUSH	_IO('D', 78)
+#define DIOCOSFPADD	_IOWR('D', 79, struct pf_osfp_ioctl)
+#define DIOCOSFPGET	_IOWR('D', 80, struct pf_osfp_ioctl)
+#define DIOCXBEGIN      _IOWR('D', 81, struct pfioc_trans)
+#define DIOCXCOMMIT     _IOWR('D', 82, struct pfioc_trans)
+#define DIOCXROLLBACK   _IOWR('D', 83, struct pfioc_trans)
+#define DIOCGETSRCNODES	_IOWR('D', 84, struct pfioc_src_nodes)
+#define DIOCCLRSRCNODES	_IO('D', 85)
+#define DIOCSETHOSTID	_IOWR('D', 86, u_int32_t)
+#define DIOCIGETIFACES	_IOWR('D', 87, struct pfioc_iface)
+#define DIOCSETIFFLAG	_IOWR('D', 89, struct pfioc_iface)
+#define DIOCCLRIFFLAG	_IOWR('D', 90, struct pfioc_iface)
+#define DIOCKILLSRCNODES	_IOWR('D', 91, struct pfioc_src_node_kill)
+#ifdef __FreeBSD__
+struct pf_ifspeed {
+	char			ifname[IFNAMSIZ];
+	u_int32_t		baudrate;
+};
+#define DIOCGIFSPEED	_IOWR('D', 92, struct pf_ifspeed)
+#endif
+
+#ifdef _KERNEL
+RB_HEAD(pf_src_tree, pf_src_node);
+RB_PROTOTYPE(pf_src_tree, pf_src_node, entry, pf_src_compare);
+extern struct pf_src_tree tree_src_tracking;
+
+RB_HEAD(pf_state_tree_id, pf_state);
+RB_PROTOTYPE(pf_state_tree_id, pf_state,
+    entry_id, pf_state_compare_id);
+extern struct pf_state_tree_id tree_id;
+extern struct pf_state_queue state_list;
+
+TAILQ_HEAD(pf_poolqueue, pf_pool);
+extern struct pf_poolqueue		  pf_pools[2];
+TAILQ_HEAD(pf_altqqueue, pf_altq);
+extern struct pf_altqqueue		  pf_altqs[2];
+extern struct pf_palist			  pf_pabuf;
+
+extern u_int32_t		 ticket_altqs_active;
+extern u_int32_t		 ticket_altqs_inactive;
+extern int			 altqs_inactive_open;
+extern u_int32_t		 ticket_pabuf;
+extern struct pf_altqqueue	*pf_altqs_active;
+extern struct pf_altqqueue	*pf_altqs_inactive;
+extern struct pf_poolqueue	*pf_pools_active;
+extern struct pf_poolqueue	*pf_pools_inactive;
+extern int			 pf_tbladdr_setup(struct pf_ruleset *,
+				    struct pf_addr_wrap *);
+extern void			 pf_tbladdr_remove(struct pf_addr_wrap *);
+extern void			 pf_tbladdr_copyout(struct pf_addr_wrap *);
+extern void			 pf_calc_skip_steps(struct pf_rulequeue *);
+#ifdef __FreeBSD__
+#ifdef ALTQ
+extern void			 pf_altq_ifnet_event(struct ifnet *, int);
+#endif
+extern uma_zone_t		 pf_src_tree_pl, pf_rule_pl;
+extern uma_zone_t		 pf_state_pl, pf_altq_pl, pf_pooladdr_pl;
+extern uma_zone_t		 pfr_ktable_pl, pfr_kentry_pl, pfr_kentry_pl2;
+extern uma_zone_t		 pf_cache_pl, pf_cent_pl;
+extern uma_zone_t		 pf_state_scrub_pl;
+extern uma_zone_t		 pfi_addr_pl;
+#else
+extern struct pool		 pf_src_tree_pl, pf_rule_pl;
+extern struct pool		 pf_state_pl, pf_altq_pl, pf_pooladdr_pl;
+extern struct pool		 pf_state_scrub_pl;
+#endif
+extern void			 pf_purge_thread(void *);
+#ifdef __FreeBSD__
+extern int			 pf_purge_expired_src_nodes(int);
+extern int			 pf_purge_expired_states(u_int32_t, int);
+#else
+extern void			 pf_purge_expired_src_nodes(int);
+extern void			 pf_purge_expired_states(u_int32_t);
+#endif
+extern void			 pf_unlink_state(struct pf_state *);
+extern void			 pf_free_state(struct pf_state *);
+extern int			 pf_insert_state(struct pfi_kif *,
+				    struct pf_state *);
+extern int			 pf_insert_src_node(struct pf_src_node **,
+				    struct pf_rule *, struct pf_addr *,
+				    sa_family_t);
+void				 pf_src_tree_remove_state(struct pf_state *);
+extern struct pf_state		*pf_find_state_byid(struct pf_state_cmp *);
+extern struct pf_state		*pf_find_state_all(struct pf_state_cmp *key,
+				    u_int8_t tree, int *more);
+extern void			 pf_print_state(struct pf_state *);
+extern void			 pf_print_flags(u_int8_t);
+extern u_int16_t		 pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t,
+				    u_int8_t);
+
+extern struct ifnet		*sync_ifp;
+extern struct pf_rule		 pf_default_rule;
+extern void			 pf_addrcpy(struct pf_addr *, struct pf_addr *,
+				    u_int8_t);
+void				 pf_rm_rule(struct pf_rulequeue *,
+				    struct pf_rule *);
+
+#ifdef INET
+#ifdef __FreeBSD__
+int	pf_test(int, struct ifnet *, struct mbuf **, struct ether_header *,
+    struct inpcb *);
+#else
+int	pf_test(int, struct ifnet *, struct mbuf **, struct ether_header *);
+#endif
+#endif /* INET */
+
+#ifdef INET6
+#ifdef __FreeBSD__
+int	pf_test6(int, struct ifnet *, struct mbuf **, struct ether_header *,
+    struct inpcb *);
+#else
+int	pf_test6(int, struct ifnet *, struct mbuf **, struct ether_header *);
+#endif
+void	pf_poolmask(struct pf_addr *, struct pf_addr*,
+	    struct pf_addr *, struct pf_addr *, u_int8_t);
+void	pf_addr_inc(struct pf_addr *, sa_family_t);
+#endif /* INET6 */
+
+#ifdef __FreeBSD__
+u_int32_t	pf_new_isn(struct pf_state *);
+#endif
+void   *pf_pull_hdr(struct mbuf *, int, void *, int, u_short *, u_short *,
+	    sa_family_t);
+void	pf_change_a(void *, u_int16_t *, u_int32_t, u_int8_t);
+int	pflog_packet(struct pfi_kif *, struct mbuf *, sa_family_t, u_int8_t,
+	    u_int8_t, struct pf_rule *, struct pf_rule *, struct pf_ruleset *,
+	    struct pf_pdesc *);
+int	pf_match_addr(u_int8_t, struct pf_addr *, struct pf_addr *,
+	    struct pf_addr *, sa_family_t);
+int	pf_match(u_int8_t, u_int32_t, u_int32_t, u_int32_t);
+int	pf_match_port(u_int8_t, u_int16_t, u_int16_t, u_int16_t);
+int	pf_match_uid(u_int8_t, uid_t, uid_t, uid_t);
+int	pf_match_gid(u_int8_t, gid_t, gid_t, gid_t);
+
+void	pf_normalize_init(void);
+int	pf_normalize_ip(struct mbuf **, int, struct pfi_kif *, u_short *,
+	    struct pf_pdesc *);
+int	pf_normalize_ip6(struct mbuf **, int, struct pfi_kif *, u_short *,
+	    struct pf_pdesc *);
+int	pf_normalize_tcp(int, struct pfi_kif *, struct mbuf *, int, int, void *,
+	    struct pf_pdesc *);
+void	pf_normalize_tcp_cleanup(struct pf_state *);
+int	pf_normalize_tcp_init(struct mbuf *, int, struct pf_pdesc *,
+	    struct tcphdr *, struct pf_state_peer *, struct pf_state_peer *);
+int	pf_normalize_tcp_stateful(struct mbuf *, int, struct pf_pdesc *,
+	    u_short *, struct tcphdr *, struct pf_state *,
+	    struct pf_state_peer *, struct pf_state_peer *, int *);
+u_int32_t
+	pf_state_expires(const struct pf_state *);
+void	pf_purge_expired_fragments(void);
+int	pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *);
+int	pf_rtlabel_match(struct pf_addr *, sa_family_t, struct pf_addr_wrap *);
+#ifdef __FreeBSD__
+int	pf_socket_lookup(int, struct pf_pdesc *, struct inpcb *);
+#else
+int	pf_socket_lookup(int, struct pf_pdesc *);
+#endif
+void	pfr_initialize(void);
+int	pfr_match_addr(struct pfr_ktable *, struct pf_addr *, sa_family_t);
+void	pfr_update_stats(struct pfr_ktable *, struct pf_addr *, sa_family_t,
+	    u_int64_t, int, int, int);
+int	pfr_pool_get(struct pfr_ktable *, int *, struct pf_addr *,
+	    struct pf_addr **, struct pf_addr **, sa_family_t);
+void	pfr_dynaddr_update(struct pfr_ktable *, struct pfi_dynaddr *);
+struct pfr_ktable *
+	pfr_attach_table(struct pf_ruleset *, char *);
+void	pfr_detach_table(struct pfr_ktable *);
+int	pfr_clr_tables(struct pfr_table *, int *, int);
+int	pfr_add_tables(struct pfr_table *, int, int *, int);
+int	pfr_del_tables(struct pfr_table *, int, int *, int);
+int	pfr_get_tables(struct pfr_table *, struct pfr_table *, int *, int);
+int	pfr_get_tstats(struct pfr_table *, struct pfr_tstats *, int *, int);
+int	pfr_clr_tstats(struct pfr_table *, int, int *, int);
+int	pfr_set_tflags(struct pfr_table *, int, int, int, int *, int *, int);
+int	pfr_clr_addrs(struct pfr_table *, int *, int);
+int	pfr_insert_kentry(struct pfr_ktable *, struct pfr_addr *, long);
+int	pfr_add_addrs(struct pfr_table *, struct pfr_addr *, int, int *,
+	    int);
+int	pfr_del_addrs(struct pfr_table *, struct pfr_addr *, int, int *,
+	    int);
+int	pfr_set_addrs(struct pfr_table *, struct pfr_addr *, int, int *,
+	    int *, int *, int *, int, u_int32_t);
+int	pfr_get_addrs(struct pfr_table *, struct pfr_addr *, int *, int);
+int	pfr_get_astats(struct pfr_table *, struct pfr_astats *, int *, int);
+int	pfr_clr_astats(struct pfr_table *, struct pfr_addr *, int, int *,
+	    int);
+int	pfr_tst_addrs(struct pfr_table *, struct pfr_addr *, int, int *,
+	    int);
+int	pfr_ina_begin(struct pfr_table *, u_int32_t *, int *, int);
+int	pfr_ina_rollback(struct pfr_table *, u_int32_t, int *, int);
+int	pfr_ina_commit(struct pfr_table *, u_int32_t, int *, int *, int);
+int	pfr_ina_define(struct pfr_table *, struct pfr_addr *, int, int *,
+	    int *, u_int32_t, int);
+
+extern struct pfi_statehead	 pfi_statehead;
+extern struct pfi_kif		*pfi_all;
+
+void		 pfi_initialize(void);
+#ifdef __FreeBSD__
+void		 pfi_cleanup(void);
+#endif
+struct pfi_kif	*pfi_kif_get(const char *);
+void		 pfi_kif_ref(struct pfi_kif *, enum pfi_kif_refs);
+void		 pfi_kif_unref(struct pfi_kif *, enum pfi_kif_refs);
+int		 pfi_kif_match(struct pfi_kif *, struct pfi_kif *);
+void		 pfi_attach_ifnet(struct ifnet *);
+void		 pfi_detach_ifnet(struct ifnet *);
+void		 pfi_attach_ifgroup(struct ifg_group *);
+void		 pfi_detach_ifgroup(struct ifg_group *);
+void		 pfi_group_change(const char *);
+int		 pfi_match_addr(struct pfi_dynaddr *, struct pf_addr *,
+		    sa_family_t);
+int		 pfi_dynaddr_setup(struct pf_addr_wrap *, sa_family_t);
+void		 pfi_dynaddr_remove(struct pf_addr_wrap *);
+void		 pfi_dynaddr_copyout(struct pf_addr_wrap *);
+void		 pfi_fill_oldstatus(struct pf_status *);
+int		 pfi_clr_istats(const char *);
+int		 pfi_get_ifaces(const char *, struct pfi_kif *, int *);
+int		 pfi_set_flags(const char *, int);
+int		 pfi_clear_flags(const char *, int);
+
+u_int16_t	 pf_tagname2tag(char *);
+void		 pf_tag2tagname(u_int16_t, char *);
+void		 pf_tag_ref(u_int16_t);
+void		 pf_tag_unref(u_int16_t);
+int		 pf_tag_packet(struct mbuf *, struct pf_mtag *, int, int);
+u_int32_t	 pf_qname2qid(char *);
+void		 pf_qid2qname(u_int32_t, char *);
+void		 pf_qid_unref(u_int32_t);
+#ifndef __FreeBSD__
+struct pf_mtag	*pf_find_mtag(struct mbuf *);
+struct pf_mtag	*pf_get_mtag(struct mbuf *);
+#endif
+
+extern struct pf_status	pf_status;
+
+#ifdef __FreeBSD__
+extern uma_zone_t	pf_frent_pl, pf_frag_pl;
+extern struct sx	pf_consistency_lock;
+#else
+extern struct pool	pf_frent_pl, pf_frag_pl;
+extern struct rwlock	pf_consistency_lock;
+#endif
+
+struct pf_pool_limit {
+	void		*pp;
+	unsigned	 limit;
+};
+extern struct pf_pool_limit	pf_pool_limits[PF_LIMIT_MAX];
+
+#ifdef __FreeBSD__
+struct pf_frent {
+	LIST_ENTRY(pf_frent) fr_next;
+	struct ip *fr_ip;
+	struct mbuf *fr_m;
+};
+
+struct pf_frcache {
+	LIST_ENTRY(pf_frcache) fr_next;
+	uint16_t        fr_off;
+	uint16_t        fr_end;
+};
+
+struct pf_fragment {
+	RB_ENTRY(pf_fragment) fr_entry;
+	TAILQ_ENTRY(pf_fragment) frag_next;
+	struct in_addr  fr_src;
+	struct in_addr  fr_dst;
+	u_int8_t        fr_p;           /* protocol of this fragment */
+	u_int8_t        fr_flags;       /* status flags */
+	u_int16_t       fr_id;          /* fragment id for reassemble */
+	u_int16_t       fr_max;         /* fragment data max */
+	u_int32_t       fr_timeout;
+#define fr_queue        fr_u.fru_queue
+#define fr_cache        fr_u.fru_cache
+	union {
+		LIST_HEAD(pf_fragq, pf_frent) fru_queue;        /* buffering */
+		LIST_HEAD(pf_cacheq, pf_frcache) fru_cache;     /* non-buf */
+	} fr_u;
+};
+#endif /* (__FreeBSD__) */
+
+#endif /* _KERNEL */
+
+extern struct pf_anchor_global  pf_anchors;
+extern struct pf_anchor        pf_main_anchor;
+#define pf_main_ruleset	pf_main_anchor.ruleset
+
+/* these ruleset functions can be linked into userland programs (pfctl) */
+int			 pf_get_ruleset_number(u_int8_t);
+void			 pf_init_ruleset(struct pf_ruleset *);
+int			 pf_anchor_setup(struct pf_rule *,
+			    const struct pf_ruleset *, const char *);
+int			 pf_anchor_copyout(const struct pf_ruleset *,
+			    const struct pf_rule *, struct pfioc_rule *);
+void			 pf_anchor_remove(struct pf_rule *);
+void			 pf_remove_if_empty_ruleset(struct pf_ruleset *);
+struct pf_anchor	*pf_find_anchor(const char *);
+struct pf_ruleset	*pf_find_ruleset(const char *);
+struct pf_ruleset	*pf_find_or_create_ruleset(const char *);
+void			 pf_rs_initialize(void);
+
+#ifndef __FreeBSD__
+/* ?!? */
+#ifdef _KERNEL
+int			 pf_anchor_copyout(const struct pf_ruleset *,
+			    const struct pf_rule *, struct pfioc_rule *);
+void			 pf_anchor_remove(struct pf_rule *);
+
+#endif /* _KERNEL */
+#endif
+
+/* The fingerprint functions can be linked into userland programs (tcpdump) */
+int	pf_osfp_add(struct pf_osfp_ioctl *);
+#ifdef _KERNEL
+struct pf_osfp_enlist *
+	pf_osfp_fingerprint(struct pf_pdesc *, struct mbuf *, int,
+	    const struct tcphdr *);
+#endif /* _KERNEL */
+struct pf_osfp_enlist *
+	pf_osfp_fingerprint_hdr(const struct ip *, const struct ip6_hdr *,
+	    const struct tcphdr *);
+void	pf_osfp_flush(void);
+int	pf_osfp_get(struct pf_osfp_ioctl *);
+#ifdef __FreeBSD__
+int	pf_osfp_initialize(void);
+void	pf_osfp_cleanup(void);
+#else
+void	pf_osfp_initialize(void);
+#endif
+int	pf_osfp_match(struct pf_osfp_enlist *, pf_osfp_t);
+struct pf_os_fingerprint *
+	pf_osfp_validate(void);
+
+#endif /* _NET_PFVAR_HH_ */
diff --git a/freebsd/sys/contrib/pf/netinet/in4_cksum.c b/freebsd/sys/contrib/pf/netinet/in4_cksum.c
new file mode 100644
index 00000000..105f26f0
--- /dev/null
+++ b/freebsd/sys/contrib/pf/netinet/in4_cksum.c
@@ -0,0 +1,122 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*	$FreeBSD$	*/
+/*	$OpenBSD: in4_cksum.c,v 1.7 2003/06/02 23:28:13 millert Exp $	*/
+/*	$KAME: in4_cksum.c,v 1.10 2001/11/30 10:06:15 itojun Exp $	*/
+/*	$NetBSD: in_cksum.c,v 1.13 1996/10/13 02:03:03 christos Exp $	*/
+
+/*
+ * Copyright (C) 1999 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 1988, 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)in_cksum.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/mbuf.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/ip_var.h>
+
+#include <freebsd/machine/in_cksum.h>
+
+#define ADDCARRY(x)  (x > 65535 ? x -= 65535 : x)
+#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);}
+
+int in4_cksum(struct mbuf *, u_int8_t, int, int);
+
+int
+in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len)
+{
+	union {
+		struct ipovly ipov;
+		u_int16_t w[10];
+	} u;
+	union {
+		u_int16_t s[2];
+		u_int32_t l;
+	} l_util;
+
+	u_int16_t *w;
+	int psum;
+	int sum = 0;
+
+	if (nxt != 0) {
+		/* pseudo header */
+		if (off < sizeof(struct ipovly))
+			panic("in4_cksum: offset too short");
+		if (m->m_len < sizeof(struct ip))
+			panic("in4_cksum: bad mbuf chain");
+		bzero(&u.ipov, sizeof(u.ipov));
+		u.ipov.ih_len = htons(len);
+		u.ipov.ih_pr = nxt;
+		u.ipov.ih_src = mtod(m, struct ip *)->ip_src;
+		u.ipov.ih_dst = mtod(m, struct ip *)->ip_dst;
+		w = u.w;
+		/* assumes sizeof(ipov) == 20 */
+		sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; sum += w[4];
+		sum += w[5]; sum += w[6]; sum += w[7]; sum += w[8]; sum += w[9];
+	}
+
+	psum = in_cksum_skip(m, len + off, off);
+	psum = ~psum & 0xffff;
+	sum += psum;
+	REDUCE;
+	return (~sum & 0xffff);
+}
author	Sebastian Huber <sebastian.huber@embedded-brains.de>	2013-10-09 22:42:09 +0200
committer	Sebastian Huber <sebastian.huber@embedded-brains.de>	2013-10-10 09:06:58 +0200
commit	bceabc95c1c85d793200446fa85f1ddc6313ea29 (patch)
tree	973c8bd8deca9fd69913f2895cc91e0e6114d46c /freebsd/sys/contrib
parent	Add FreeBSD sources as a submodule (diff)
download	rtems-libbsd-bceabc95c1c85d793200446fa85f1ddc6313ea29.tar.bz2