Update to FreeBSD head 2016-08-23

Git mirror commit 9fe7c416e6abb28b1398fd3e5687099846800cfd.
author: Sebastian Huber <sebastian.huber@embedded-brains.de> 2016-10-07 15:10:20 +0200
committer: Sebastian Huber <sebastian.huber@embedded-brains.de> 2017-01-10 09:53:31 +0100
commit: c40e45b75eb76d79a05c7fa85c1fa9b5c728a12f (patch)
tree: ad4f2519067709f00ab98b3c591186c26dc3a21f /freebsd/sys/netpfil
parent: userspace-header-gen.py: Simplify program ports (diff)
download: rtems-libbsd-c40e45b75eb76d79a05c7fa85c1fa9b5c728a12f.tar.bz2
58 files changed, 44729 insertions, 7825 deletions
diff --git a/freebsd/sys/netpfil/ipfw/dn_aqm.h b/freebsd/sys/netpfil/ipfw/dn_aqm.h
new file mode 100644
index 00000000..d01e98eb
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/dn_aqm.h
@@ -0,0 +1,167 @@
+/*-
+ * Copyright (C) 2016 Centre for Advanced Internet Architectures,
+ *  Swinburne University of Technology, Melbourne, Australia.
+ * Portions of this code were made possible in part by a gift from 
+ *  The Comcast Innovation Fund.
+ * Implemented by Rasool Al-Saadi <ralsaadi@swin.edu.au>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * API for writing an Active Queue Management algorithm for Dummynet
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IP_DN_AQM_H
+#define _IP_DN_AQM_H
+
+
+/* NOW is the current time in millisecond*/
+#define NOW ((dn_cfg.curr_time * tick) / 1000)
+
+#define AQM_UNOW (dn_cfg.curr_time * tick)
+#define AQM_TIME_1US ((aqm_time_t)(1))
+#define AQM_TIME_1MS ((aqm_time_t)(1000))
+#define AQM_TIME_1S ((aqm_time_t)(AQM_TIME_1MS * 1000))
+
+/* aqm time allows to store up to 4294 seconds */
+typedef uint32_t aqm_time_t;
+typedef int32_t aqm_stime_t;
+
+#define DN_AQM_MTAG_TS 55345
+
+/* Macro for variable bounding */
+#define BOUND_VAR(x,l,h)  ((x) > (h)? (h) : ((x) > (l)? (x) : (l)))
+
+/* sysctl variable to count number of dropped packets */
+extern unsigned long io_pkt_drop; 
+
+/*
+ * Structure for holding data and function pointers that together represent a
+ * AQM algorithm.
+ */
+ struct dn_aqm {
+#define DN_AQM_NAME_MAX 50
+	char			name[DN_AQM_NAME_MAX];	/* name of AQM algorithm */
+	uint32_t	type;	/* AQM type number */
+	
+	/* Methods implemented by AQM algorithm:
+	 * 
+	 * enqueue	enqueue packet 'm' on queue 'q'.
+	 * 	Return 0 on success, 1 on drop.
+	 * 
+	 * dequeue	dequeue a packet from queue 'q'.
+	 * 	Return a packet, NULL if no packet available.
+	 * 
+	 * config	configure AQM algorithm
+	 * If required, this function should allocate space to store 
+	 * the configurations and set 'fs->aqmcfg' to point to this space.
+	 * 'dn_extra_parms' includes array of parameters send
+	 * from ipfw userland command.
+	 * 	Return 0 on success, non-zero otherwise.
+	 * 
+	 * deconfig	deconfigure AQM algorithm.
+	 * The allocated configuration memory space should be freed here.
+	 * 	Return 0 on success, non-zero otherwise.
+	 * 
+	 * init	initialise AQM status variables of queue 'q'
+	 * This function is used to allocate space and init AQM status for a
+	 * queue and q->aqm_status to point to this space.
+	 * 	Return 0 on success, non-zero otherwise.
+	 * 
+	 * cleanup	cleanup AQM status variables of queue 'q'
+	 * The allocated memory space for AQM status should be freed here.
+	 * 	Return 0 on success, non-zero otherwise.
+	 * 
+	 * getconfig	retrieve AQM configurations 
+	 * This function is used to return AQM parameters to userland
+	 * command. The function should fill 'dn_extra_parms' struct with 
+	 * the AQM configurations using 'par' array.
+	 * 
+	 */
+	
+	int (*enqueue)(struct dn_queue *, struct mbuf *);
+	struct mbuf * (*dequeue)(struct dn_queue *);
+	int (*config)(struct dn_fsk *, struct dn_extra_parms *ep, int);
+	int (*deconfig)(struct dn_fsk *);
+	int (*init)(struct dn_queue *);
+	int (*cleanup)(struct dn_queue *);
+	int (*getconfig)(struct dn_fsk *, struct dn_extra_parms *);
+
+	int	ref_count; /*Number of queues instances in the system */
+	int	cfg_ref_count;	/*Number of AQM instances in the system */
+	SLIST_ENTRY (dn_aqm) next; /* Next AQM in the list */
+};
+
+/* Helper function to update queue and scheduler statistics.
+ * negative len + drop -> drop
+ * negative len -> dequeue
+ * positive len -> enqueue
+ * positive len + drop -> drop during enqueue
+ */
+__inline static void
+update_stats(struct dn_queue *q, int len, int drop)
+{
+	int inc = 0;
+	struct dn_flow *sni;
+	struct dn_flow *qni;
+	
+	sni = &q->_si->ni;
+	qni = &q->ni;
+
+	if (len < 0)
+			inc = -1;
+	else if(len > 0)
+			inc = 1;
+
+	if (drop) {
+			qni->drops++;
+			sni->drops++;
+			io_pkt_drop++;
+	} else {
+		/*update queue stats */
+		qni->length += inc;
+		qni->len_bytes += len;
+
+		/*update scheduler instance stats */
+		sni->length += inc;
+		sni->len_bytes += len;
+	}
+	/* tot_pkts  is updated in dn_enqueue function */
+}
+
+
+/* kernel module related function */
+int
+dn_aqm_modevent(module_t mod, int cmd, void *arg);
+
+#define DECLARE_DNAQM_MODULE(name, dnaqm)			\
+	static moduledata_t name##_mod = {			\
+		#name, dn_aqm_modevent, dnaqm		\
+	};							\
+	DECLARE_MODULE(name, name##_mod, 			\
+		SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); 	\
+        MODULE_DEPEND(name, dummynet, 3, 3, 3)
+
+#endif
diff --git a/freebsd/sys/netpfil/ipfw/dn_aqm_codel.h b/freebsd/sys/netpfil/ipfw/dn_aqm_codel.h
new file mode 100644
index 00000000..f5618e76
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/dn_aqm_codel.h
@@ -0,0 +1,222 @@
+/*
+ * Codel - The Controlled-Delay Active Queue Management algorithm.
+ *
+ * $FreeBSD$
+ * 
+ * Copyright (C) 2016 Centre for Advanced Internet Architectures,
+ *  Swinburne University of Technology, Melbourne, Australia.
+ * Portions of this code were made possible in part by a gift from 
+ *  The Comcast Innovation Fund.
+ * Implemented by Rasool Al-Saadi <ralsaadi@swin.edu.au>
+ * 
+ * Copyright (C) 2011-2014 Kathleen Nichols <nichols@pollere.com>.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * o  Redistributions of source code must retain the above copyright
+ *  notice, this list of conditions, and the following disclaimer,
+ *  without modification.
+ *
+ * o  Redistributions in binary form must reproduce the above copyright
+ *  notice, this list of conditions and the following disclaimer in
+ *  the documentation and/or other materials provided with the
+ *  distribution.
+ * 
+ * o  The names of the authors may not be used to endorse or promote
+ *  products derived from this software without specific prior written
+ *  permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General Public
+ * License ("GPL") version 2, in which case the provisions of the GPL
+ * apply INSTEAD OF those given above.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _IP_DN_AQM_CODEL_H
+#define _IP_DN_AQM_CODEL_H
+
+
+// XXX How to choose MTAG?
+#define FIX_POINT_BITS 16 
+
+enum {
+	CODEL_ECN_ENABLED = 1
+};
+
+/* Codel parameters */
+struct dn_aqm_codel_parms {
+	aqm_time_t	target;
+	aqm_time_t	interval;
+	uint32_t	flags;
+};
+
+/* codel status variables */
+struct codel_status {
+	uint32_t	count;	/* number of dropped pkts since entering drop state */
+	uint16_t	dropping;	/* dropping state */
+	aqm_time_t	drop_next_time;	/* time for next drop */
+	aqm_time_t	first_above_time;	/* time for first ts over target we observed */
+	uint16_t	isqrt;	/* last isqrt for control low */
+	uint16_t	maxpkt_size;	/* max packet size seen so far */
+};
+
+struct mbuf *codel_extract_head(struct dn_queue *, aqm_time_t *);
+aqm_time_t control_law(struct codel_status *,
+	struct dn_aqm_codel_parms *, aqm_time_t );
+
+__inline static struct mbuf *
+codel_dodequeue(struct dn_queue *q, aqm_time_t now, uint16_t *ok_to_drop)
+{
+	struct mbuf * m;
+	struct dn_aqm_codel_parms *cprms;
+	struct codel_status *cst;
+	aqm_time_t  pkt_ts, sojourn_time;
+
+	*ok_to_drop = 0;
+	m = codel_extract_head(q, &pkt_ts);
+	
+	cst = q->aqm_status;
+	
+	if (m == NULL) {
+		/* queue is empty - we can't be above target */
+		cst->first_above_time= 0;
+		return m;
+	}
+
+	cprms = q->fs->aqmcfg;
+
+	/* To span a large range of bandwidths, CoDel runs two
+	 * different AQMs in parallel. One is sojourn-time-based
+	 * and takes effect when the time to send an MTU-sized
+	 * packet is less than target.  The 1st term of the "if"
+	 * below does this.  The other is backlog-based and takes
+	 * effect when the time to send an MTU-sized packet is >=
+	* target. The goal here is to keep the output link
+	* utilization high by never allowing the queue to get
+	* smaller than the amount that arrives in a typical
+	 * interarrival time (MTU-sized packets arriving spaced
+	 * by the amount of time it takes to send such a packet on
+	 * the bottleneck). The 2nd term of the "if" does this.
+	 */
+	sojourn_time = now - pkt_ts;
+	if (sojourn_time < cprms->target || q->ni.len_bytes <= cst->maxpkt_size) {
+		/* went below - stay below for at least interval */
+		cst->first_above_time = 0;
+	} else {
+		if (cst->first_above_time == 0) {
+			/* just went above from below. if still above at
+			 * first_above_time, will say it's ok to drop. */
+			cst->first_above_time = now + cprms->interval;
+		} else if (now >= cst->first_above_time) {
+			*ok_to_drop = 1;
+		}
+	}
+	return m;
+}
+
+/* 
+ * Dequeue a packet from queue 'q'
+ */
+__inline static struct mbuf * 
+codel_dequeue(struct dn_queue *q)
+{
+	struct mbuf *m;
+	struct dn_aqm_codel_parms *cprms;
+	struct codel_status *cst;
+	aqm_time_t now;
+	uint16_t ok_to_drop;
+
+	cst = q->aqm_status;;
+	cprms = q->fs->aqmcfg;
+	now = AQM_UNOW;
+
+	m = codel_dodequeue(q, now, &ok_to_drop);
+	if (cst->dropping) {
+		if (!ok_to_drop) {
+			/* sojourn time below target - leave dropping state */
+			cst->dropping = false;
+		}
+		/*
+		 * Time for the next drop. Drop current packet and dequeue
+		 * next.  If the dequeue doesn't take us out of dropping
+		 * state, schedule the next drop. A large backlog might
+		 * result in drop rates so high that the next drop should
+		 * happen now, hence the 'while' loop.
+		 */
+		while (now >= cst->drop_next_time && cst->dropping) {
+
+			/* mark the packet */
+			if (cprms->flags & CODEL_ECN_ENABLED && ecn_mark(m)) {
+				cst->count++;
+				/* schedule the next mark. */
+				cst->drop_next_time = control_law(cst, cprms,
+					cst->drop_next_time);
+				return m;
+			}
+
+			/* drop the packet */
+			update_stats(q, 0, 1);
+			FREE_PKT(m);
+			m = codel_dodequeue(q, now, &ok_to_drop);
+
+			if (!ok_to_drop) {
+				/* leave dropping state */
+				cst->dropping = false;
+			} else {
+				cst->count++;
+				/* schedule the next drop. */
+				cst->drop_next_time = control_law(cst, cprms,
+					cst->drop_next_time);
+			}
+		}
+	/* If we get here we're not in dropping state. The 'ok_to_drop'
+	 * return from dodequeue means that the sojourn time has been
+	 * above 'target' for 'interval' so enter dropping state.
+	 */
+	} else if (ok_to_drop) {
+
+		/* if ECN option is disabled or the packet cannot be marked,
+		 * drop the packet and extract another.
+		 */
+		if (!(cprms->flags & CODEL_ECN_ENABLED) || !ecn_mark(m)) {
+			update_stats(q, 0, 1);
+			FREE_PKT(m);
+			m = codel_dodequeue(q, now, &ok_to_drop);
+		}
+
+		cst->dropping = true;
+
+		/* If min went above target close to when it last went
+		 * below, assume that the drop rate that controlled the
+		 * queue on the last cycle is a good starting point to
+		 * control it now. ('drop_next' will be at most 'interval'
+		 * later than the time of the last drop so 'now - drop_next'
+		 * is a good approximation of the time from the last drop
+		 * until now.)
+		 */
+		cst->count = (cst->count > 2 && ((aqm_stime_t)now - 
+			(aqm_stime_t)cst->drop_next_time) < 8* cprms->interval)?
+				cst->count - 2 : 1;
+		/* we don't have to set initial guess for Newton's method isqrt as
+		 * we initilaize  isqrt in control_law function when count == 1 */
+		cst->drop_next_time = control_law(cst, cprms, now);
+	}
+	
+	return m;
+}
+
+#endif
diff --git a/freebsd/sys/netpfil/ipfw/dn_aqm_pie.h b/freebsd/sys/netpfil/ipfw/dn_aqm_pie.h
new file mode 100644
index 00000000..aa2fceba
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/dn_aqm_pie.h
@@ -0,0 +1,153 @@
+/*
+ * PIE - Proportional Integral controller Enhanced AQM algorithm.
+ *
+ * $FreeBSD$
+ * 
+ * Copyright (C) 2016 Centre for Advanced Internet Architectures,
+ *  Swinburne University of Technology, Melbourne, Australia.
+ * Portions of this code were made possible in part by a gift from 
+ *  The Comcast Innovation Fund.
+ * Implemented by Rasool Al-Saadi <ralsaadi@swin.edu.au>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _IP_DN_AQM_PIE_H
+#define _IP_DN_AQM_PIE_H
+
+#define DN_AQM_PIE 2
+#define PIE_DQ_THRESHOLD_BITS 14
+/* 2^14 =16KB */
+#define PIE_DQ_THRESHOLD (1UL << PIE_DQ_THRESHOLD_BITS) 
+#define MEAN_PKTSIZE 800
+
+/* 31-bits because random() generates range from 0->(2**31)-1 */
+#define PIE_PROB_BITS 31
+#define PIE_MAX_PROB ((1ULL<<PIE_PROB_BITS) -1)
+
+/* for 16-bits, we have 3-bits for integer part and 13-bits for fraction */
+#define PIE_FIX_POINT_BITS 13
+#define PIE_SCALE (1UL<<PIE_FIX_POINT_BITS)
+
+
+/* PIE options */
+enum {
+	PIE_ECN_ENABLED =1,
+	PIE_CAPDROP_ENABLED = 2,
+	PIE_ON_OFF_MODE_ENABLED = 4,
+	PIE_DEPRATEEST_ENABLED = 8,
+	PIE_DERAND_ENABLED = 16
+};
+
+/* PIE parameters */
+struct dn_aqm_pie_parms {
+	aqm_time_t	qdelay_ref;	/* AQM Latency Target (default: 15ms) */
+	aqm_time_t	tupdate;		/* a period to calculate drop probability (default:15ms) */
+	aqm_time_t	max_burst;	/* AQM Max Burst Allowance (default: 150ms) */
+	uint16_t	max_ecnth;	/*AQM Max ECN Marking Threshold (default: 10%) */
+	uint16_t	alpha;			/* (default: 1/8) */
+	uint16_t	beta;			/* (default: 1+1/4) */
+	uint32_t	flags;			/* PIE options */
+};
+
+/* PIE status variables */
+struct pie_status{
+	struct callout	aqm_pie_callout;
+	aqm_time_t	burst_allowance;
+	uint32_t	drop_prob;
+	aqm_time_t	current_qdelay;
+	aqm_time_t	qdelay_old;
+	uint64_t	accu_prob;
+	aqm_time_t	measurement_start;
+	aqm_time_t	avg_dq_time;
+	uint32_t	dq_count;
+	uint32_t	sflags;
+	struct dn_aqm_pie_parms *parms;	/* pointer to PIE configurations */
+	/* pointer to parent queue of FQ-PIE sub-queues, or  queue of owner fs. */
+	struct dn_queue	*pq;	
+	struct mtx	lock_mtx;
+	uint32_t one_third_q_size; /* 1/3 of queue size, for speed optization */
+};
+
+enum { 
+	ENQUE = 1,
+	DROP,
+	MARKECN
+};
+
+/* PIE current state */
+enum { 
+	PIE_ACTIVE = 1,
+	PIE_INMEASUREMENT = 2
+};
+
+/* 
+ * Check if eneque should drop packet to control delay or not based on
+ * PIe algorithm.
+ * return  DROP if it is time to drop or  ENQUE otherwise.
+ * This function is used by PIE and FQ-PIE.
+ */
+__inline static int
+drop_early(struct pie_status *pst, uint32_t qlen)
+{
+	struct dn_aqm_pie_parms *pprms;
+
+	pprms = pst->parms;
+
+	/* queue is not congested */
+
+	if ((pst->qdelay_old < (pprms->qdelay_ref >> 1)
+		&& pst->drop_prob < PIE_MAX_PROB / 5 )
+		||  qlen <= 2 * MEAN_PKTSIZE)
+		return ENQUE;
+
+
+	if (pst->drop_prob == 0)
+		pst->accu_prob = 0;
+
+	/* increment accu_prob */
+	if (pprms->flags & PIE_DERAND_ENABLED)
+		pst->accu_prob += pst->drop_prob;
+
+	/* De-randomize option 
+	 * if accu_prob < 0.85 -> enqueue
+	 * if accu_prob>8.5 ->drop
+	 * between 0.85 and 8.5 || !De-randomize --> drop on prob
+	 * 
+	 * (0.85 = 17/20 ,8.5 = 17/2)
+	 */
+	if (pprms->flags & PIE_DERAND_ENABLED) {
+		if(pst->accu_prob < (uint64_t) (PIE_MAX_PROB * 17 / 20))
+			return ENQUE;
+		 if( pst->accu_prob >= (uint64_t) (PIE_MAX_PROB * 17 / 2))
+			return DROP;
+	}
+
+	if (random() < pst->drop_prob) {
+		pst->accu_prob = 0;
+		return DROP;
+	}
+
+	return ENQUE;
+}
+
+#endif
diff --git a/freebsd/sys/netpfil/ipfw/dn_heap.c b/freebsd/sys/netpfil/ipfw/dn_heap.c
deleted file mode 100644
index 15e2870d..00000000
--- a/freebsd/sys/netpfil/ipfw/dn_heap.c
+++ /dev/null
@@ -1,554 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/*-
- * Copyright (c) 1998-2002,2010 Luigi Rizzo, Universita` di Pisa
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * Binary heap and hash tables, used in dummynet
- *
- * $FreeBSD$
- */
-
-#include <sys/cdefs.h>
-#include <rtems/bsd/sys/param.h>
-#ifdef _KERNEL
-__FBSDID("$FreeBSD$");
-#include <sys/systm.h>
-#include <sys/malloc.h>
-#include <sys/kernel.h>
-#include <netpfil/ipfw/dn_heap.h>
-#ifndef log
-#define log(x, arg...)
-#endif
-
-#else /* !_KERNEL */
-
-#include <stdio.h>
-#include <dn_test.h>
-#include <strings.h>
-#include <stdlib.h>
-
-#include  "dn_heap.h"
-#define log(x, arg...)	fprintf(stderr, ## arg)
-#define panic(x...)	fprintf(stderr, ## x), exit(1)
-#define MALLOC_DEFINE(a, b, c)
-static void *my_malloc(int s) {	return malloc(s); }
-static void my_free(void *p) {	free(p); }
-#define malloc(s, t, w)	my_malloc(s)
-#define free(p, t)	my_free(p)
-#endif /* !_KERNEL */
-
-MALLOC_DEFINE(M_DN_HEAP, "dummynet", "dummynet heap");
-
-/*
- * Heap management functions.
- *
- * In the heap, first node is element 0. Children of i are 2i+1 and 2i+2.
- * Some macros help finding parent/children so we can optimize them.
- *
- * heap_init() is called to expand the heap when needed.
- * Increment size in blocks of 16 entries.
- * Returns 1 on error, 0 on success
- */
-#define HEAP_FATHER(x) ( ( (x) - 1 ) / 2 )
-#define HEAP_LEFT(x) ( (x)+(x) + 1 )
-#define	HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; }
-#define HEAP_INCREMENT	15
-
-static int
-heap_resize(struct dn_heap *h, unsigned int new_size)
-{
-	struct dn_heap_entry *p;
-
-	if (h->size >= new_size )	/* have enough room */
-		return 0;
-#if 1  /* round to the next power of 2 */
-	new_size |= new_size >> 1;
-	new_size |= new_size >> 2;
-	new_size |= new_size >> 4;
-	new_size |= new_size >> 8;
-	new_size |= new_size >> 16;
-#else
-	new_size = (new_size + HEAP_INCREMENT ) & ~HEAP_INCREMENT;
-#endif
-	p = malloc(new_size * sizeof(*p), M_DN_HEAP, M_NOWAIT);
-	if (p == NULL) {
-		printf("--- %s, resize %d failed\n", __func__, new_size );
-		return 1; /* error */
-	}
-	if (h->size > 0) {
-		bcopy(h->p, p, h->size * sizeof(*p) );
-		free(h->p, M_DN_HEAP);
-	}
-	h->p = p;
-	h->size = new_size;
-	return 0;
-}
-
-int
-heap_init(struct dn_heap *h, int size, int ofs)
-{
-	if (heap_resize(h, size))
-		return 1;
-	h->elements = 0;
-	h->ofs = ofs;
-	return 0;
-}
-
-/*
- * Insert element in heap. Normally, p != NULL, we insert p in
- * a new position and bubble up. If p == NULL, then the element is
- * already in place, and key is the position where to start the
- * bubble-up.
- * Returns 1 on failure (cannot allocate new heap entry)
- *
- * If ofs > 0 the position (index, int) of the element in the heap is
- * also stored in the element itself at the given offset in bytes.
- */
-#define SET_OFFSET(h, i) do {					\
-	if (h->ofs > 0)						\
-	    *((int32_t *)((char *)(h->p[i].object) + h->ofs)) = i;	\
-	} while (0)
-/*
- * RESET_OFFSET is used for sanity checks. It sets ofs
- * to an invalid value.
- */
-#define RESET_OFFSET(h, i) do {					\
-	if (h->ofs > 0)						\
-	    *((int32_t *)((char *)(h->p[i].object) + h->ofs)) = -16;	\
-	} while (0)
-
-int
-heap_insert(struct dn_heap *h, uint64_t key1, void *p)
-{
-	int son = h->elements;
-
-	//log("%s key %llu p %p\n", __FUNCTION__, key1, p);
-	if (p == NULL) { /* data already there, set starting point */
-		son = key1;
-	} else { /* insert new element at the end, possibly resize */
-		son = h->elements;
-		if (son == h->size) /* need resize... */
-			// XXX expand by 16 or so
-			if (heap_resize(h, h->elements+16) )
-				return 1; /* failure... */
-		h->p[son].object = p;
-		h->p[son].key = key1;
-		h->elements++;
-	}
-	/* make sure that son >= father along the path */
-	while (son > 0) {
-		int father = HEAP_FATHER(son);
-		struct dn_heap_entry tmp;
-
-		if (DN_KEY_LT( h->p[father].key, h->p[son].key ) )
-			break; /* found right position */
-		/* son smaller than father, swap and repeat */
-		HEAP_SWAP(h->p[son], h->p[father], tmp);
-		SET_OFFSET(h, son);
-		son = father;
-	}
-	SET_OFFSET(h, son);
-	return 0;
-}
-
-/*
- * remove top element from heap, or obj if obj != NULL
- */
-void
-heap_extract(struct dn_heap *h, void *obj)
-{
-	int child, father, max = h->elements - 1;
-
-	if (max < 0) {
-		printf("--- %s: empty heap 0x%p\n", __FUNCTION__, h);
-		return;
-	}
-	if (obj == NULL)
-		father = 0; /* default: move up smallest child */
-	else { /* extract specific element, index is at offset */
-		if (h->ofs <= 0)
-			panic("%s: extract from middle not set on %p\n",
-				__FUNCTION__, h);
-		father = *((int *)((char *)obj + h->ofs));
-		if (father < 0 || father >= h->elements) {
-			panic("%s: father %d out of bound 0..%d\n",
-				__FUNCTION__, father, h->elements);
-		}
-	}
-	/*
-	 * below, father is the index of the empty element, which
-	 * we replace at each step with the smallest child until we
-	 * reach the bottom level.
-	 */
-	// XXX why removing RESET_OFFSET increases runtime by 10% ?
-	RESET_OFFSET(h, father);
-	while ( (child = HEAP_LEFT(father)) <= max ) {
-		if (child != max &&
-		    DN_KEY_LT(h->p[child+1].key, h->p[child].key) )
-			child++; /* take right child, otherwise left */
-		h->p[father] = h->p[child];
-		SET_OFFSET(h, father);
-		father = child;
-	}
-	h->elements--;
-	if (father != max) {
-		/*
-		 * Fill hole with last entry and bubble up,
-		 * reusing the insert code
-		 */
-		h->p[father] = h->p[max];
-		heap_insert(h, father, NULL);
-	}
-}
-
-#if 0
-/*
- * change object position and update references
- * XXX this one is never used!
- */
-static void
-heap_move(struct dn_heap *h, uint64_t new_key, void *object)
-{
-	int temp, i, max = h->elements-1;
-	struct dn_heap_entry *p, buf;
-
-	if (h->ofs <= 0)
-		panic("cannot move items on this heap");
-	p = h->p;	/* shortcut */
-
-	i = *((int *)((char *)object + h->ofs));
-	if (DN_KEY_LT(new_key, p[i].key) ) { /* must move up */
-		p[i].key = new_key;
-		for (; i>0 &&
-		    DN_KEY_LT(new_key, p[(temp = HEAP_FATHER(i))].key);
-		    i = temp ) { /* bubble up */
-			HEAP_SWAP(p[i], p[temp], buf);
-			SET_OFFSET(h, i);
-		}
-	} else {		/* must move down */
-		p[i].key = new_key;
-		while ( (temp = HEAP_LEFT(i)) <= max ) {
-			/* found left child */
-			if (temp != max &&
-			    DN_KEY_LT(p[temp+1].key, p[temp].key))
-				temp++; /* select child with min key */
-			if (DN_KEY_LT(>p[temp].key, new_key)) {
-				/* go down */
-				HEAP_SWAP(p[i], p[temp], buf);
-				SET_OFFSET(h, i);
-			} else
-				break;
-			i = temp;
-		}
-	}
-	SET_OFFSET(h, i);
-}
-#endif /* heap_move, unused */
-
-/*
- * heapify() will reorganize data inside an array to maintain the
- * heap property. It is needed when we delete a bunch of entries.
- */
-static void
-heapify(struct dn_heap *h)
-{
-	int i;
-
-	for (i = 0; i < h->elements; i++ )
-		heap_insert(h, i , NULL);
-}
-
-int
-heap_scan(struct dn_heap *h, int (*fn)(void *, uintptr_t),
-	uintptr_t arg)
-{
-	int i, ret, found;
-
-	for (i = found = 0 ; i < h->elements ;) {
-		ret = fn(h->p[i].object, arg);
-		if (ret & HEAP_SCAN_DEL) {
-			h->elements-- ;
-			h->p[i] = h->p[h->elements] ;
-			found++ ;
-		} else
-			i++ ;
-		if (ret & HEAP_SCAN_END)
-			break;
-	}
-	if (found)
-		heapify(h);
-	return found;
-}
-
-/*
- * cleanup the heap and free data structure
- */
-void
-heap_free(struct dn_heap *h)
-{
-	if (h->size >0 )
-		free(h->p, M_DN_HEAP);
-	bzero(h, sizeof(*h) );
-}
-
-/*
- * hash table support.
- */
-
-struct dn_ht {
-        int buckets;            /* how many buckets, really buckets - 1*/
-        int entries;            /* how many entries */
-        int ofs;	        /* offset of link field */
-        uint32_t (*hash)(uintptr_t, int, void *arg);
-        int (*match)(void *_el, uintptr_t key, int, void *);
-        void *(*newh)(uintptr_t, int, void *);
-        void **ht;              /* bucket heads */
-};
-/*
- * Initialize, allocating bucket pointers inline.
- * Recycle previous record if possible.
- * If the 'newh' function is not supplied, we assume that the
- * key passed to ht_find is the same object to be stored in.
- */
-struct dn_ht *
-dn_ht_init(struct dn_ht *ht, int buckets, int ofs,
-        uint32_t (*h)(uintptr_t, int, void *),
-        int (*match)(void *, uintptr_t, int, void *),
-	void *(*newh)(uintptr_t, int, void *))
-{
-	int l;
-
-	/*
-	 * Notes about rounding bucket size to a power of two.
-	 * Given the original bucket size, we compute the nearest lower and
-	 * higher power of two, minus 1  (respectively b_min and b_max) because
-	 * this value will be used to do an AND with the index returned
-	 * by hash function.
-	 * To choice between these two values, the original bucket size is
-	 * compared with b_min. If the original size is greater than 4/3 b_min,
-	 * we round the bucket size to b_max, else to b_min.
-	 * This ratio try to round to the nearest power of two, advantaging
-	 * the greater size if the different between two power is relatively
-	 * big.
-	 * Rounding the bucket size to a power of two avoid the use of
-	 * module when calculating the correct bucket.
-	 * The ht->buckets variable store the bucket size - 1 to simply
-	 * do an AND between the index returned by hash function and ht->bucket
-	 * instead of a module.
-	 */
-	int b_min; /* min buckets */
-	int b_max; /* max buckets */
-	int b_ori; /* original buckets */
-
-	if (h == NULL || match == NULL) {
-		printf("--- missing hash or match function");
-		return NULL;
-	}
-	if (buckets < 1 || buckets > 65536)
-		return NULL;
-
-	b_ori = buckets;
-	/* calculate next power of 2, - 1*/
-	buckets |= buckets >> 1;
-	buckets |= buckets >> 2;
-	buckets |= buckets >> 4;
-	buckets |= buckets >> 8;
-	buckets |= buckets >> 16;
-
-	b_max = buckets; /* Next power */
-	b_min = buckets >> 1; /* Previous power */
-
-	/* Calculate the 'nearest' bucket size */
-	if (b_min * 4000 / 3000 < b_ori)
-		buckets = b_max;
-	else
-		buckets = b_min;
-
-	if (ht) {	/* see if we can reuse */
-		if (buckets <= ht->buckets) {
-			ht->buckets = buckets;
-		} else {
-			/* free pointers if not allocated inline */
-			if (ht->ht != (void *)(ht + 1))
-				free(ht->ht, M_DN_HEAP);
-			free(ht, M_DN_HEAP);
-			ht = NULL;
-		}
-	}
-	if (ht == NULL) {
-		/* Allocate buckets + 1 entries because buckets is use to
-		 * do the AND with the index returned by hash function
-		 */
-		l = sizeof(*ht) + (buckets + 1) * sizeof(void **);
-		ht = malloc(l, M_DN_HEAP, M_NOWAIT | M_ZERO);
-	}
-	if (ht) {
-		ht->ht = (void **)(ht + 1);
-		ht->buckets = buckets;
-		ht->ofs = ofs;
-		ht->hash = h;
-		ht->match = match;
-		ht->newh = newh;
-	}
-	return ht;
-}
-
-/* dummy callback for dn_ht_free to unlink all */
-static int
-do_del(void *obj, void *arg)
-{
-	return DNHT_SCAN_DEL;
-}
-
-void
-dn_ht_free(struct dn_ht *ht, int flags)
-{
-	if (ht == NULL)
-		return;
-	if (flags & DNHT_REMOVE) {
-		(void)dn_ht_scan(ht, do_del, NULL);
-	} else {
-		if (ht->ht && ht->ht != (void *)(ht + 1))
-			free(ht->ht, M_DN_HEAP);
-		free(ht, M_DN_HEAP);
-	}
-}
-
-int
-dn_ht_entries(struct dn_ht *ht)
-{
-	return ht ? ht->entries : 0;
-}
-
-/* lookup and optionally create or delete element */
-void *
-dn_ht_find(struct dn_ht *ht, uintptr_t key, int flags, void *arg)
-{
-	int i;
-	void **pp, *p;
-
-	if (ht == NULL)	/* easy on an empty hash */
-		return NULL;
-	i = (ht->buckets == 1) ? 0 :
-		(ht->hash(key, flags, arg) & ht->buckets);
-
-	for (pp = &ht->ht[i]; (p = *pp); pp = (void **)((char *)p + ht->ofs)) {
-		if (flags & DNHT_MATCH_PTR) {
-			if (key == (uintptr_t)p)
-				break;
-		} else if (ht->match(p, key, flags, arg)) /* found match */
-			break;
-	}
-	if (p) {
-		if (flags & DNHT_REMOVE) {
-			/* link in the next element */
-			*pp = *(void **)((char *)p + ht->ofs);
-			*(void **)((char *)p + ht->ofs) = NULL;
-			ht->entries--;
-		}
-	} else if (flags & DNHT_INSERT) {
-		// printf("%s before calling new, bucket %d ofs %d\n",
-		//	__FUNCTION__, i, ht->ofs);
-		p = ht->newh ? ht->newh(key, flags, arg) : (void *)key;
-		// printf("%s newh returns %p\n", __FUNCTION__, p);
-		if (p) {
-			ht->entries++;
-			*(void **)((char *)p + ht->ofs) = ht->ht[i];
-			ht->ht[i] = p;
-		}
-	}
-	return p;
-}
-
-/*
- * do a scan with the option to delete the object. Extract next before
- * running the callback because the element may be destroyed there.
- */
-int
-dn_ht_scan(struct dn_ht *ht, int (*fn)(void *, void *), void *arg)
-{
-	int i, ret, found = 0;
-	void **curp, *cur, *next;
-
-	if (ht == NULL || fn == NULL)
-		return 0;
-	for (i = 0; i <= ht->buckets; i++) {
-		curp = &ht->ht[i];
-		while ( (cur = *curp) != NULL) {
-			next = *(void **)((char *)cur + ht->ofs);
-			ret = fn(cur, arg);
-			if (ret & DNHT_SCAN_DEL) {
-				found++;
-				ht->entries--;
-				*curp = next;
-			} else {
-				curp = (void **)((char *)cur + ht->ofs);
-			}
-			if (ret & DNHT_SCAN_END)
-				return found;
-		}
-	}
-	return found;
-}
-
-/*
- * Similar to dn_ht_scan(), except that the scan is performed only
- * in the bucket 'bucket'. The function returns a correct bucket number if
- * the original is invalid.
- * If the callback returns DNHT_SCAN_END, the function move the ht->ht[i]
- * pointer to the last entry processed. Moreover, the bucket number passed
- * by caller is decremented, because usually the caller increment it.
- */
-int
-dn_ht_scan_bucket(struct dn_ht *ht, int *bucket, int (*fn)(void *, void *),
-		 void *arg)
-{
-	int i, ret, found = 0;
-	void **curp, *cur, *next;
-
-	if (ht == NULL || fn == NULL)
-		return 0;
-	if (*bucket > ht->buckets)
-		*bucket = 0;
-	i = *bucket;
-
-	curp = &ht->ht[i];
-	while ( (cur = *curp) != NULL) {
-		next = *(void **)((char *)cur + ht->ofs);
-		ret = fn(cur, arg);
-		if (ret & DNHT_SCAN_DEL) {
-			found++;
-			ht->entries--;
-			*curp = next;
-		} else {
-			curp = (void **)((char *)cur + ht->ofs);
-		}
-		if (ret & DNHT_SCAN_END)
-			return found;
-	}
-	return found;
-}
diff --git a/freebsd/sys/netpfil/ipfw/dn_heap.h b/freebsd/sys/netpfil/ipfw/dn_heap.h
index c95473ad..cb6e03ef 100644
--- a/freebsd/sys/netpfil/ipfw/dn_heap.h
+++ b/freebsd/sys/netpfil/ipfw/dn_heap.h
@@ -83,7 +83,7 @@ enum {
  * heap_insert() adds a key-pointer pair to the heap
  *
  * HEAP_TOP() returns a pointer to the top element of the heap,
- *	but makes no checks on its existance (XXX should we change ?)
+ *	but makes no checks on its existence (XXX should we change ?)
  *
  * heap_extract() removes the entry at the top, returing the pointer.
  *	(the key should have been read before).
@@ -146,7 +146,7 @@ int heap_scan(struct dn_heap *, int (*)(void *, uintptr_t), uintptr_t);
  * of the dn_ht_find(), and of the callbacks:
  *
  * DNHT_KEY_IS_OBJ	means the key is the object pointer.
- *	It is usally of interest for the hash and match functions.
+ *	It is usually of interest for the hash and match functions.
  *
  * DNHT_MATCH_PTR	during a lookup, match pointers instead
  *	of calling match(). Normally used when removing specific
diff --git a/freebsd/sys/netpfil/ipfw/dn_sched.h b/freebsd/sys/netpfil/ipfw/dn_sched.h
index ab823fe7..ab32771b 100644
--- a/freebsd/sys/netpfil/ipfw/dn_sched.h
+++ b/freebsd/sys/netpfil/ipfw/dn_sched.h
@@ -132,6 +132,10 @@ struct dn_alg {
 	int (*free_fsk)(struct dn_fsk *f);
 	int (*new_queue)(struct dn_queue *q);
 	int (*free_queue)(struct dn_queue *q);
+#ifdef NEW_AQM
+	/* Getting scheduler extra parameters */
+	int (*getconfig)(struct dn_schk *, struct dn_extra_parms *);
+#endif
 
 	/* run-time fields */
 	int ref_count;      /* XXX number of instances in the system */
@@ -165,7 +169,13 @@ dn_dequeue(struct dn_queue *q)
 	struct mbuf *m = q->mq.head;
 	if (m == NULL)
 		return NULL;
+#ifdef NEW_AQM
+	/* Call AQM dequeue function  */
+	if (q->fs->aqmfp && q->fs->aqmfp->dequeue )
+		return q->fs->aqmfp->dequeue(q);
+#endif
 	q->mq.head = m->m_nextpkt;
+	q->mq.count--;
 
 	/* Update stats for the queue */
 	q->ni.length--;
@@ -186,6 +196,6 @@ int dn_sched_modevent(module_t mod, int cmd, void *arg);
 		#name, dn_sched_modevent, dnsched		\
 	};							\
 	DECLARE_MODULE(name, name##_mod, 			\
-		SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); 	\
-        MODULE_DEPEND(name, dummynet, 3, 3, 3);
+		SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY); 		\
+        MODULE_DEPEND(name, dummynet, 3, 3, 3)
 #endif /* _DN_SCHED_H */
diff --git a/freebsd/sys/netpfil/ipfw/dn_sched_fifo.c b/freebsd/sys/netpfil/ipfw/dn_sched_fifo.c
deleted file mode 100644
index 154a7ac6..00000000
--- a/freebsd/sys/netpfil/ipfw/dn_sched_fifo.c
+++ /dev/null
@@ -1,122 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/*
- * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $FreeBSD$
- */
-
-#ifdef _KERNEL
-#include <sys/malloc.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/kernel.h>
-#include <sys/mbuf.h>
-#include <sys/module.h>
-#include <net/if.h>	/* IFNAMSIZ */
-#include <netinet/in.h>
-#include <netinet/ip_var.h>		/* ipfw_rule_ref */
-#include <netinet/ip_fw.h>	/* flow_id */
-#include <netinet/ip_dummynet.h>
-#include <netpfil/ipfw/dn_heap.h>
-#include <netpfil/ipfw/ip_dn_private.h>
-#include <netpfil/ipfw/dn_sched.h>
-#else
-#include <dn_test.h>
-#endif
-
-/*
- * This file implements a FIFO scheduler for a single queue.
- * The queue is allocated as part of the scheduler instance,
- * and there is a single flowset is in the template which stores
- * queue size and policy.
- * Enqueue and dequeue use the default library functions.
- */
-static int 
-fifo_enqueue(struct dn_sch_inst *si, struct dn_queue *q, struct mbuf *m)
-{
-	/* XXX if called with q != NULL and m=NULL, this is a
-	 * re-enqueue from an existing scheduler, which we should
-	 * handle.
-	 */
-	return dn_enqueue((struct dn_queue *)(si+1), m, 0);
-}
-
-static struct mbuf *
-fifo_dequeue(struct dn_sch_inst *si)
-{
-	return dn_dequeue((struct dn_queue *)(si + 1));
-}
-
-static int
-fifo_new_sched(struct dn_sch_inst *si)
-{
-	/* This scheduler instance contains the queue */
-	struct dn_queue *q = (struct dn_queue *)(si + 1);
-
-        set_oid(&q->ni.oid, DN_QUEUE, sizeof(*q));
-	q->_si = si;
-	q->fs = si->sched->fs;
-	return 0;
-}
-
-static int
-fifo_free_sched(struct dn_sch_inst *si)
-{
-	struct dn_queue *q = (struct dn_queue *)(si + 1);
-	dn_free_pkts(q->mq.head);
-	bzero(q, sizeof(*q));
-	return 0;
-}
-
-/*
- * FIFO scheduler descriptor
- * contains the type of the scheduler, the name, the size of extra
- * data structures, and function pointers.
- */
-static struct dn_alg fifo_desc = {
-	_SI( .type = )  DN_SCHED_FIFO,
-	_SI( .name = )  "FIFO",
-	_SI( .flags = ) 0,
-
-	_SI( .schk_datalen = ) 0,
-	_SI( .si_datalen = )  sizeof(struct dn_queue),
-	_SI( .q_datalen = )  0,
-
-	_SI( .enqueue = )  fifo_enqueue,
-	_SI( .dequeue = )  fifo_dequeue,
-	_SI( .config = )  NULL,
-	_SI( .destroy = )  NULL,
-	_SI( .new_sched = )  fifo_new_sched,
-	_SI( .free_sched = )  fifo_free_sched,
-	_SI( .new_fsk = )  NULL,
-	_SI( .free_fsk = )  NULL,
-	_SI( .new_queue = )  NULL,
-	_SI( .free_queue = )  NULL,
-};
-
-DECLARE_DNSCHED_MODULE(dn_fifo, &fifo_desc);
diff --git a/freebsd/sys/netpfil/ipfw/dn_sched_fq_codel.h b/freebsd/sys/netpfil/ipfw/dn_sched_fq_codel.h
new file mode 100644
index 00000000..4b65781e
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/dn_sched_fq_codel.h
@@ -0,0 +1,167 @@
+/*-
+ * Copyright (C) 2016 Centre for Advanced Internet Architectures,
+ *  Swinburne University of Technology, Melbourne, Australia.
+ * Portions of this code were made possible in part by a gift from 
+ *  The Comcast Innovation Fund.
+ * Implemented by Rasool Al-Saadi <ralsaadi@swin.edu.au>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * FQ_Codel Structures and helper functions
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IP_DN_SCHED_FQ_CODEL_H
+#define _IP_DN_SCHED_FQ_CODEL_H
+
+/* list of queues */
+STAILQ_HEAD(fq_codel_list, fq_codel_flow) ;
+
+/* fq_codel parameters including codel */
+struct dn_sch_fq_codel_parms {
+	struct dn_aqm_codel_parms	ccfg;	/* CoDel Parameters */
+	/* FQ_CODEL Parameters */
+	uint32_t flows_cnt;	/* number of flows */
+	uint32_t limit;	/* hard limit of fq_codel queue size*/
+	uint32_t quantum;
+};	/* defaults */
+
+/* flow (sub-queue) stats */
+struct flow_stats {
+	uint64_t tot_pkts;	/* statistics counters  */
+	uint64_t tot_bytes;
+	uint32_t length;		/* Queue length, in packets */
+	uint32_t len_bytes;	/* Queue length, in bytes */
+	uint32_t drops;
+};
+
+/* A flow of packets (sub-queue).*/
+struct fq_codel_flow {
+	struct mq	mq;	/* list of packets */
+	struct flow_stats stats;	/* statistics */
+	int	deficit;
+	int active;		/* 1: flow is active (in a list) */
+	struct codel_status cst;
+	STAILQ_ENTRY(fq_codel_flow) flowchain;
+};
+
+/* extra fq_codel scheduler configurations */
+struct fq_codel_schk {
+	struct dn_sch_fq_codel_parms cfg;
+};
+
+/* fq_codel scheduler instance */
+struct fq_codel_si {
+	struct dn_sch_inst _si;	/* standard scheduler instance */
+	struct dn_queue main_q; /* main queue is after si directly */
+
+	struct fq_codel_flow *flows; /* array of flows (queues) */
+	uint32_t perturbation; /* random value */
+	struct fq_codel_list newflows;	/* list of new queues */
+	struct fq_codel_list oldflows;		/* list of old queues */
+};
+
+/* Helper function to update queue&main-queue and scheduler statistics.
+ * negative len + drop -> drop
+ * negative len -> dequeue
+ * positive len -> enqueue
+ * positive len + drop -> drop during enqueue
+ */
+__inline static void
+fq_update_stats(struct fq_codel_flow *q, struct fq_codel_si *si, int len,
+	int drop)
+{
+	int inc = 0;
+
+	if (len < 0) 
+		inc = -1;
+	else if (len > 0)
+		inc = 1;
+
+	if (drop) {
+		si->main_q.ni.drops ++;
+		q->stats.drops ++;
+		si->_si.ni.drops ++;
+		io_pkt_drop ++;
+	} 
+
+	if (!drop || (drop && len < 0)) {
+		/* Update stats for the main queue */
+		si->main_q.ni.length += inc;
+		si->main_q.ni.len_bytes += len;
+
+		/*update sub-queue stats */
+		q->stats.length += inc;
+		q->stats.len_bytes += len;
+
+		/*update scheduler instance stats */
+		si->_si.ni.length += inc;
+		si->_si.ni.len_bytes += len;
+	}
+
+	if (inc > 0) {
+		si->main_q.ni.tot_bytes += len;
+		si->main_q.ni.tot_pkts ++;
+		
+		q->stats.tot_bytes +=len;
+		q->stats.tot_pkts++;
+		
+		si->_si.ni.tot_bytes +=len;
+		si->_si.ni.tot_pkts ++;
+	}
+
+}
+
+/* extract the head of fq_codel sub-queue */
+__inline static struct mbuf *
+fq_codel_extract_head(struct fq_codel_flow *q, aqm_time_t *pkt_ts, struct fq_codel_si *si)
+{
+	struct mbuf *m = q->mq.head;
+
+	if (m == NULL)
+		return m;
+	q->mq.head = m->m_nextpkt;
+
+	fq_update_stats(q, si, -m->m_pkthdr.len, 0);
+
+	if (si->main_q.ni.length == 0) /* queue is now idle */
+			si->main_q.q_time = dn_cfg.curr_time;
+
+	/* extract packet timestamp*/
+	struct m_tag *mtag;
+	mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
+	if (mtag == NULL){
+		D("timestamp tag is not found!");
+		*pkt_ts = 0;
+	} else {
+		*pkt_ts = *(aqm_time_t *)(mtag + 1);
+		m_tag_delete(m,mtag); 
+	}
+
+	return m;
+}
+
+
+#endif
diff --git a/freebsd/sys/netpfil/ipfw/dn_sched_fq_codel_helper.h b/freebsd/sys/netpfil/ipfw/dn_sched_fq_codel_helper.h
new file mode 100644
index 00000000..da663dc8
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/dn_sched_fq_codel_helper.h
@@ -0,0 +1,187 @@
+/*
+ * Codel - The Controlled-Delay Active Queue Management algorithm.
+ *
+ * $FreeBSD$
+ * 
+ * Copyright (C) 2016 Centre for Advanced Internet Architectures,
+ *  Swinburne University of Technology, Melbourne, Australia.
+ * Portions of this code were made possible in part by a gift from 
+ *  The Comcast Innovation Fund.
+ * Implemented by Rasool Al-Saadi <ralsaadi@swin.edu.au>
+ *
+ * Copyright (C) 2011-2014 Kathleen Nichols <nichols@pollere.com>.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * o  Redistributions of source code must retain the above copyright
+ *  notice, this list of conditions, and the following disclaimer,
+ *  without modification.
+ *
+ * o  Redistributions in binary form must reproduce the above copyright
+ *  notice, this list of conditions and the following disclaimer in
+ *  the documentation and/or other materials provided with the
+ *  distribution.
+ * 
+ * o  The names of the authors may not be used to endorse or promote
+ *  products derived from this software without specific prior written
+ *  permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General Public
+ * License ("GPL") version 2, in which case the provisions of the GPL
+ * apply INSTEAD OF those given above.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _IP_DN_SCHED_FQ_CODEL_HELPER_H
+#define _IP_DN_SCHED_FQ_CODEL_HELPER_H
+
+__inline static struct mbuf *
+fqc_dodequeue(struct fq_codel_flow *q, aqm_time_t now, uint16_t *ok_to_drop,
+	struct fq_codel_si *si)
+{
+	struct mbuf * m;
+	struct fq_codel_schk *schk = (struct fq_codel_schk *)(si->_si.sched+1);
+	aqm_time_t  pkt_ts, sojourn_time;
+
+	*ok_to_drop = 0;
+	m = fq_codel_extract_head(q, &pkt_ts, si);
+
+	if (m == NULL) {
+		/*queue is empty - we can't be above target*/
+		q->cst.first_above_time= 0;
+		return m;
+	}
+
+	/* To span a large range of bandwidths, CoDel runs two
+	 * different AQMs in parallel. One is sojourn-time-based
+	 * and takes effect when the time to send an MTU-sized
+	 * packet is less than target.  The 1st term of the "if"
+	 * below does this.  The other is backlog-based and takes
+	 * effect when the time to send an MTU-sized packet is >=
+	* target. The goal here is to keep the output link
+	* utilization high by never allowing the queue to get
+	* smaller than the amount that arrives in a typical
+	 * interarrival time (MTU-sized packets arriving spaced
+	 * by the amount of time it takes to send such a packet on
+	 * the bottleneck). The 2nd term of the "if" does this.
+	 */
+	sojourn_time = now - pkt_ts;
+	if (sojourn_time < schk->cfg.ccfg.target || q->stats.len_bytes <= q->cst.maxpkt_size) {
+		/* went below - stay below for at least interval */
+		q->cst.first_above_time = 0;
+	} else {
+		if (q->cst.first_above_time == 0) {
+			/* just went above from below. if still above at
+			 * first_above_time, will say it's ok to drop. */
+			q->cst.first_above_time = now + schk->cfg.ccfg.interval;
+		} else if (now >= q->cst.first_above_time) {
+			*ok_to_drop = 1;
+		}
+	}
+	return m;
+}
+
+/* Codel dequeue function */
+__inline static struct mbuf * 
+fqc_codel_dequeue(struct fq_codel_flow *q, struct fq_codel_si *si)
+{
+	struct mbuf *m;
+	struct dn_aqm_codel_parms *cprms;
+	struct codel_status *cst;
+	aqm_time_t now;
+	uint16_t ok_to_drop;
+	struct fq_codel_schk *schk = (struct fq_codel_schk *)(si->_si.sched+1);
+
+	cst = &q->cst;
+	cprms = &schk->cfg.ccfg;
+
+	now = AQM_UNOW;
+	m = fqc_dodequeue(q, now, &ok_to_drop, si);
+
+	if (cst->dropping) {
+		if (!ok_to_drop) {
+			/* sojourn time below target - leave dropping state */
+			cst->dropping = false;
+		}
+
+		/* Time for the next drop. Drop current packet and dequeue
+		 * next.  If the dequeue doesn't take us out of dropping
+		 * state, schedule the next drop. A large backlog might
+		 * result in drop rates so high that the next drop should
+		 * happen now, hence the 'while' loop.
+		 */
+		while (now >= cst->drop_next_time && cst->dropping) {
+
+			/* mark the packet */
+			if (cprms->flags & CODEL_ECN_ENABLED && ecn_mark(m)) {
+				cst->count++;
+				/* schedule the next mark. */
+				cst->drop_next_time = control_law(cst, cprms, cst->drop_next_time);
+				return m;
+			}
+
+			/* drop the packet */
+			fq_update_stats(q, si, 0, 1);
+			m_freem(m);
+			m = fqc_dodequeue(q, now, &ok_to_drop, si);
+
+			if (!ok_to_drop) {
+				/* leave dropping state */
+				cst->dropping = false;
+			} else {
+				cst->count++;
+				/* schedule the next drop. */
+				cst->drop_next_time = control_law(cst, cprms, cst->drop_next_time);
+			}
+		}
+	/* If we get here we're not in dropping state. The 'ok_to_drop'
+	 * return from dodequeue means that the sojourn time has been
+	 * above 'target' for 'interval' so enter dropping state.
+	 */
+	} else if (ok_to_drop) {
+
+		/* if ECN option is disabled or the packet cannot be marked,
+		 * drop the packet and extract another.
+		 */
+		if (!(cprms->flags & CODEL_ECN_ENABLED) || !ecn_mark(m)) {
+			fq_update_stats(q, si, 0, 1);
+			m_freem(m);
+			m = fqc_dodequeue(q, now, &ok_to_drop,si);
+		}
+
+		cst->dropping = true;
+
+		/* If min went above target close to when it last went
+		 * below, assume that the drop rate that controlled the
+		 * queue on the last cycle is a good starting point to
+		 * control it now. ('drop_next' will be at most 'interval'
+		 * later than the time of the last drop so 'now - drop_next'
+		 * is a good approximation of the time from the last drop
+		 * until now.)
+		 */
+		cst->count = (cst->count > 2 && ((aqm_stime_t)now - 
+			(aqm_stime_t)cst->drop_next_time) < 8* cprms->interval)? cst->count - 2 : 1;
+
+		/* we don't have to set initial guess for Newton's method isqrt as
+		 * we initilaize  isqrt in control_law function when count == 1 */
+		cst->drop_next_time = control_law(cst, cprms, now);
+	}
+
+	return m;
+}
+
+#endif
diff --git a/freebsd/sys/netpfil/ipfw/dn_sched_prio.c b/freebsd/sys/netpfil/ipfw/dn_sched_prio.c
deleted file mode 100644
index 0679db9d..00000000
--- a/freebsd/sys/netpfil/ipfw/dn_sched_prio.c
+++ /dev/null
@@ -1,231 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/*
- * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $FreeBSD$
- */
-#ifdef _KERNEL
-#include <sys/malloc.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/kernel.h>
-#include <sys/mbuf.h>
-#include <sys/module.h>
-#include <net/if.h>	/* IFNAMSIZ */
-#include <netinet/in.h>
-#include <netinet/ip_var.h>		/* ipfw_rule_ref */
-#include <netinet/ip_fw.h>	/* flow_id */
-#include <netinet/ip_dummynet.h>
-#include <netpfil/ipfw/dn_heap.h>
-#include <netpfil/ipfw/ip_dn_private.h>
-#include <netpfil/ipfw/dn_sched.h>
-#else
-#include <dn_test.h>
-#endif
-
-#define DN_SCHED_PRIO	5 //XXX
-
-#if !defined(_KERNEL) || !defined(__linux__)
-#define test_bit(ix, pData)	((*pData) & (1<<(ix)))
-#define __set_bit(ix, pData)	(*pData) |= (1<<(ix))
-#define __clear_bit(ix, pData)	(*pData) &= ~(1<<(ix))
-#endif
-
-#ifdef __MIPSEL__
-#define __clear_bit(ix, pData)	(*pData) &= ~(1<<(ix))
-#endif
-
-/* Size of the array of queues pointers. */
-#define BITMAP_T	unsigned long
-#define MAXPRIO		(sizeof(BITMAP_T) * 8)
-
-/*
- * The scheduler instance contains an array of pointers to queues,
- * one for each priority, and a bitmap listing backlogged queues.
- */
-struct prio_si {
-	BITMAP_T bitmap;			/* array bitmap */
-	struct dn_queue *q_array[MAXPRIO];	/* Array of queues pointers */
-};
-
-/*
- * If a queue with the same priority is already backlogged, use
- * that one instead of the queue passed as argument.
- */
-static int 
-prio_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m)
-{
-	struct prio_si *si = (struct prio_si *)(_si + 1);
-	int prio = q->fs->fs.par[0];
-
-	if (test_bit(prio, &si->bitmap) == 0) {
-		/* No queue with this priority, insert */
-		__set_bit(prio, &si->bitmap);
-		si->q_array[prio] = q;
-	} else { /* use the existing queue */
-		q = si->q_array[prio];
-	}
-	if (dn_enqueue(q, m, 0))
-		return 1;
-	return 0;
-}
-
-/*
- * Packets are dequeued only from the highest priority queue.
- * The function ffs() return the lowest bit in the bitmap that rapresent
- * the array index (-1) which contains the pointer to the highest priority
- * queue.
- * After the dequeue, if this queue become empty, it is index is removed
- * from the bitmap.
- * Scheduler is idle if the bitmap is empty
- *
- * NOTE: highest priority is 0, lowest is sched->max_prio_q
- */
-static struct mbuf *
-prio_dequeue(struct dn_sch_inst *_si)
-{
-	struct prio_si *si = (struct prio_si *)(_si + 1);
-	struct mbuf *m;
-	struct dn_queue *q;
-	int prio;
-
-	if (si->bitmap == 0) /* scheduler idle */
-		return NULL;
-
-	prio = ffs(si->bitmap) - 1;
-
-	/* Take the highest priority queue in the scheduler */
-	q = si->q_array[prio];
-	// assert(q)
-
-	m = dn_dequeue(q);
-	if (q->mq.head == NULL) {
-		/* Queue is now empty, remove from scheduler
-		 * and mark it
-		 */
-		si->q_array[prio] = NULL;
-		__clear_bit(prio, &si->bitmap);
-	}
-	return m;
-}
-
-static int
-prio_new_sched(struct dn_sch_inst *_si)
-{
-	struct prio_si *si = (struct prio_si *)(_si + 1);
-
-	bzero(si->q_array, sizeof(si->q_array));
-	si->bitmap = 0;
-
-	return 0;
-}
-
-static int
-prio_new_fsk(struct dn_fsk *fs)
-{
-	/* Check if the prioritiy is between 0 and MAXPRIO-1 */
-	ipdn_bound_var(&fs->fs.par[0], 0, 0, MAXPRIO - 1, "PRIO priority");
-	return 0;
-}
-
-static int
-prio_new_queue(struct dn_queue *q)
-{
-	struct prio_si *si = (struct prio_si *)(q->_si + 1);
-	int prio = q->fs->fs.par[0];
-	struct dn_queue *oldq;
-
-	q->ni.oid.subtype = DN_SCHED_PRIO;
-
-	if (q->mq.head == NULL)
-		return 0;
-
-	/* Queue already full, must insert in the scheduler or append
-	 * mbufs to existing queue. This partly duplicates prio_enqueue
-	 */
-	if (test_bit(prio, &si->bitmap) == 0) {
-		/* No queue with this priority, insert */
-		__set_bit(prio, &si->bitmap);
-		si->q_array[prio] = q;
-	} else if ( (oldq = si->q_array[prio]) != q) {
-		/* must append to the existing queue.
-		 * can simply append q->mq.head to q2->...
-		 * and add the counters to those of q2
-		 */
-		oldq->mq.tail->m_nextpkt = q->mq.head;
-		oldq->mq.tail = q->mq.tail;
-		oldq->ni.length += q->ni.length;
-		q->ni.length = 0;
-		oldq->ni.len_bytes += q->ni.len_bytes;
-		q->ni.len_bytes = 0;
-		q->mq.tail = q->mq.head = NULL;
-	}
-	return 0;
-}
-
-static int
-prio_free_queue(struct dn_queue *q)
-{
-	int prio = q->fs->fs.par[0];
-	struct prio_si *si = (struct prio_si *)(q->_si + 1);
-
-	if (si->q_array[prio] == q) {
-		si->q_array[prio] = NULL;
-		__clear_bit(prio, &si->bitmap);
-	}
-	return 0;
-}
-
-
-static struct dn_alg prio_desc = {
-	_SI( .type = ) DN_SCHED_PRIO,
-	_SI( .name = ) "PRIO",
-	_SI( .flags = ) DN_MULTIQUEUE,
-
-	/* we need extra space in the si and the queue */
-	_SI( .schk_datalen = ) 0,
-	_SI( .si_datalen = ) sizeof(struct prio_si),
-	_SI( .q_datalen = ) 0,
-
-	_SI( .enqueue = ) prio_enqueue,
-	_SI( .dequeue = ) prio_dequeue,
-
-	_SI( .config = )  NULL,
-	_SI( .destroy = )  NULL,
-	_SI( .new_sched = ) prio_new_sched,
-	_SI( .free_sched = ) NULL,
-
-	_SI( .new_fsk = ) prio_new_fsk,
-	_SI( .free_fsk = )  NULL,
-
-	_SI( .new_queue = ) prio_new_queue,
-	_SI( .free_queue = ) prio_free_queue,
-};
-
-
-DECLARE_DNSCHED_MODULE(dn_prio, &prio_desc);
diff --git a/freebsd/sys/netpfil/ipfw/dn_sched_qfq.c b/freebsd/sys/netpfil/ipfw/dn_sched_qfq.c
deleted file mode 100644
index 461c40a5..00000000
--- a/freebsd/sys/netpfil/ipfw/dn_sched_qfq.c
+++ /dev/null
@@ -1,866 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/*
- * Copyright (c) 2010 Fabio Checconi, Luigi Rizzo, Paolo Valente
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $FreeBSD$
- */
-
-#ifdef _KERNEL
-#include <sys/malloc.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/kernel.h>
-#include <sys/mbuf.h>
-#include <sys/module.h>
-#include <net/if.h>	/* IFNAMSIZ */
-#include <netinet/in.h>
-#include <netinet/ip_var.h>		/* ipfw_rule_ref */
-#include <netinet/ip_fw.h>	/* flow_id */
-#include <netinet/ip_dummynet.h>
-#include <netpfil/ipfw/dn_heap.h>
-#include <netpfil/ipfw/ip_dn_private.h>
-#include <netpfil/ipfw/dn_sched.h>
-#else
-#include <dn_test.h>
-#endif
-
-#ifdef QFQ_DEBUG
-struct qfq_sched;
-static void dump_sched(struct qfq_sched *q, const char *msg);
-#define	NO(x)	x
-#else
-#define NO(x)
-#endif
-#define DN_SCHED_QFQ	4 // XXX Where?
-typedef	unsigned long	bitmap;
-
-/*
- * bitmaps ops are critical. Some linux versions have __fls
- * and the bitmap ops. Some machines have ffs
- */
-#if defined(_WIN32) || (defined(__MIPSEL__) && defined(LINUX_24))
-int fls(unsigned int n)
-{
-	int i = 0;
-	for (i = 0; n > 0; n >>= 1, i++)
-		;
-	return i;
-}
-#endif
-
-#if !defined(_KERNEL) || defined( __FreeBSD__ ) || defined(_WIN32) || (defined(__MIPSEL__) && defined(LINUX_24))
-static inline unsigned long __fls(unsigned long word)
-{
-	return fls(word) - 1;
-}
-#endif
-
-#if !defined(_KERNEL) || !defined(__linux__)
-#ifdef QFQ_DEBUG
-int test_bit(int ix, bitmap *p)
-{
-	if (ix < 0 || ix > 31)
-		D("bad index %d", ix);
-	return *p & (1<<ix);
-}
-void __set_bit(int ix, bitmap *p)
-{
-	if (ix < 0 || ix > 31)
-		D("bad index %d", ix);
-	*p |= (1<<ix);
-}
-void __clear_bit(int ix, bitmap *p)
-{
-	if (ix < 0 || ix > 31)
-		D("bad index %d", ix);
-	*p &= ~(1<<ix);
-}
-#else /* !QFQ_DEBUG */
-/* XXX do we have fast version, or leave it to the compiler ? */
-#define test_bit(ix, pData)	((*pData) & (1<<(ix)))
-#define __set_bit(ix, pData)	(*pData) |= (1<<(ix))
-#define __clear_bit(ix, pData)	(*pData) &= ~(1<<(ix))
-#endif /* !QFQ_DEBUG */
-#endif /* !__linux__ */
-
-#ifdef __MIPSEL__
-#define __clear_bit(ix, pData)	(*pData) &= ~(1<<(ix))
-#endif
-
-/*-------------------------------------------*/
-/*
-
-Virtual time computations.
-
-S, F and V are all computed in fixed point arithmetic with
-FRAC_BITS decimal bits.
-
-   QFQ_MAX_INDEX is the maximum index allowed for a group. We need
-  	one bit per index.
-   QFQ_MAX_WSHIFT is the maximum power of two supported as a weight.
-   The layout of the bits is as below:
-  
-                   [ MTU_SHIFT ][      FRAC_BITS    ]
-                   [ MAX_INDEX    ][ MIN_SLOT_SHIFT ]
-  				 ^.__grp->index = 0
-  				 *.__grp->slot_shift
-  
-   where MIN_SLOT_SHIFT is derived by difference from the others.
-
-The max group index corresponds to Lmax/w_min, where
-Lmax=1<<MTU_SHIFT, w_min = 1 .
-From this, and knowing how many groups (MAX_INDEX) we want,
-we can derive the shift corresponding to each group.
-
-Because we often need to compute
-	F = S + len/w_i  and V = V + len/wsum
-instead of storing w_i store the value
-	inv_w = (1<<FRAC_BITS)/w_i
-so we can do F = S + len * inv_w * wsum.
-We use W_TOT in the formulas so we can easily move between
-static and adaptive weight sum.
-
-The per-scheduler-instance data contain all the data structures
-for the scheduler: bitmaps and bucket lists.
-
- */
-/*
- * Maximum number of consecutive slots occupied by backlogged classes
- * inside a group. This is approx lmax/lmin + 5.
- * XXX check because it poses constraints on MAX_INDEX
- */
-#define QFQ_MAX_SLOTS	32
-/*
- * Shifts used for class<->group mapping. Class weights are
- * in the range [1, QFQ_MAX_WEIGHT], we to map each class i to the
- * group with the smallest index that can support the L_i / r_i
- * configured for the class.
- *
- * grp->index is the index of the group; and grp->slot_shift
- * is the shift for the corresponding (scaled) sigma_i.
- *
- * When computing the group index, we do (len<<FP_SHIFT)/weight,
- * then compute an FLS (which is like a log2()), and if the result
- * is below the MAX_INDEX region we use 0 (which is the same as
- * using a larger len).
- */
-#define QFQ_MAX_INDEX		19
-#define QFQ_MAX_WSHIFT		16	/* log2(max_weight) */
-
-#define	QFQ_MAX_WEIGHT		(1<<QFQ_MAX_WSHIFT)
-#define QFQ_MAX_WSUM		(2*QFQ_MAX_WEIGHT)
-//#define IWSUM	(q->i_wsum)
-#define IWSUM	((1<<FRAC_BITS)/QFQ_MAX_WSUM)
-
-#define FRAC_BITS		30	/* fixed point arithmetic */
-#define ONE_FP			(1UL << FRAC_BITS)
-
-#define QFQ_MTU_SHIFT		11	/* log2(max_len) */
-#define QFQ_MIN_SLOT_SHIFT	(FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX)
-
-/*
- * Possible group states, also indexes for the bitmaps array in
- * struct qfq_queue. We rely on ER, IR, EB, IB being numbered 0..3
- */
-enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE };
-
-struct qfq_group;
-/*
- * additional queue info. Some of this info should come from
- * the flowset, we copy them here for faster processing.
- * This is an overlay of the struct dn_queue
- */
-struct qfq_class {
-	struct dn_queue _q;
-	uint64_t S, F;		/* flow timestamps (exact) */
-	struct qfq_class *next; /* Link for the slot list. */
-
-	/* group we belong to. In principle we would need the index,
-	 * which is log_2(lmax/weight), but we never reference it
-	 * directly, only the group.
-	 */
-	struct qfq_group *grp;
-
-	/* these are copied from the flowset. */
-	uint32_t	inv_w;	/* ONE_FP/weight */
-	uint32_t 	lmax;	/* Max packet size for this flow. */
-};
-
-/* Group descriptor, see the paper for details.
- * Basically this contains the bucket lists
- */
-struct qfq_group {
-	uint64_t S, F;			/* group timestamps (approx). */
-	unsigned int slot_shift;	/* Slot shift. */
-	unsigned int index;		/* Group index. */
-	unsigned int front;		/* Index of the front slot. */
-	bitmap full_slots;		/* non-empty slots */
-
-	/* Array of lists of active classes. */
-	struct qfq_class *slots[QFQ_MAX_SLOTS];
-};
-
-/* scheduler instance descriptor. */
-struct qfq_sched {
-	uint64_t	V;		/* Precise virtual time. */
-	uint32_t	wsum;		/* weight sum */
-	NO(uint32_t	i_wsum;		/* ONE_FP/w_sum */
-	uint32_t	_queued;	/* debugging */
-	uint32_t	loops;	/* debugging */)
-	bitmap bitmaps[QFQ_MAX_STATE];	/* Group bitmaps. */
-	struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */
-};
-
-/*---- support functions ----------------------------*/
-
-/* Generic comparison function, handling wraparound. */
-static inline int qfq_gt(uint64_t a, uint64_t b)
-{
-	return (int64_t)(a - b) > 0;
-}
-
-/* Round a precise timestamp to its slotted value. */
-static inline uint64_t qfq_round_down(uint64_t ts, unsigned int shift)
-{
-	return ts & ~((1ULL << shift) - 1);
-}
-
-/* return the pointer to the group with lowest index in the bitmap */
-static inline struct qfq_group *qfq_ffs(struct qfq_sched *q,
-					unsigned long bitmap)
-{
-	int index = ffs(bitmap) - 1; // zero-based
-	return &q->groups[index];
-}
-
-/*
- * Calculate a flow index, given its weight and maximum packet length.
- * index = log_2(maxlen/weight) but we need to apply the scaling.
- * This is used only once at flow creation.
- */
-static int qfq_calc_index(uint32_t inv_w, unsigned int maxlen)
-{
-	uint64_t slot_size = (uint64_t)maxlen *inv_w;
-	unsigned long size_map;
-	int index = 0;
-
-	size_map = (unsigned long)(slot_size >> QFQ_MIN_SLOT_SHIFT);
-	if (!size_map)
-		goto out;
-
-	index = __fls(size_map) + 1;	// basically a log_2()
-	index -= !(slot_size - (1ULL << (index + QFQ_MIN_SLOT_SHIFT - 1)));
-
-	if (index < 0)
-		index = 0;
-
-out:
-	ND("W = %d, L = %d, I = %d\n", ONE_FP/inv_w, maxlen, index);
-	return index;
-}
-/*---- end support functions ----*/
-
-/*-------- API calls --------------------------------*/
-/*
- * Validate and copy parameters from flowset.
- */
-static int
-qfq_new_queue(struct dn_queue *_q)
-{
-	struct qfq_sched *q = (struct qfq_sched *)(_q->_si + 1);
-	struct qfq_class *cl = (struct qfq_class *)_q;
-	int i;
-	uint32_t w;	/* approximated weight */
-
-	/* import parameters from the flowset. They should be correct
-	 * already.
-	 */
-	w = _q->fs->fs.par[0];
-	cl->lmax = _q->fs->fs.par[1];
-	if (!w || w > QFQ_MAX_WEIGHT) {
-		w = 1;
-		D("rounding weight to 1");
-	}
-	cl->inv_w = ONE_FP/w;
-	w = ONE_FP/cl->inv_w;	
-	if (q->wsum + w > QFQ_MAX_WSUM)
-		return EINVAL;
-
-	i = qfq_calc_index(cl->inv_w, cl->lmax);
-	cl->grp = &q->groups[i];
-	q->wsum += w;
-	// XXX cl->S = q->V; ?
-	// XXX compute q->i_wsum
-	return 0;
-}
-
-/* remove an empty queue */
-static int
-qfq_free_queue(struct dn_queue *_q)
-{
-	struct qfq_sched *q = (struct qfq_sched *)(_q->_si + 1);
-	struct qfq_class *cl = (struct qfq_class *)_q;
-	if (cl->inv_w) {
-		q->wsum -= ONE_FP/cl->inv_w;
-		cl->inv_w = 0; /* reset weight to avoid run twice */
-	}
-	return 0;
-}
-
-/* Calculate a mask to mimic what would be ffs_from(). */
-static inline unsigned long
-mask_from(unsigned long bitmap, int from)
-{
-	return bitmap & ~((1UL << from) - 1);
-}
-
-/*
- * The state computation relies on ER=0, IR=1, EB=2, IB=3
- * First compute eligibility comparing grp->S, q->V,
- * then check if someone is blocking us and possibly add EB
- */
-static inline unsigned int
-qfq_calc_state(struct qfq_sched *q, struct qfq_group *grp)
-{
-	/* if S > V we are not eligible */
-	unsigned int state = qfq_gt(grp->S, q->V);
-	unsigned long mask = mask_from(q->bitmaps[ER], grp->index);
-	struct qfq_group *next;
-
-	if (mask) {
-		next = qfq_ffs(q, mask);
-		if (qfq_gt(grp->F, next->F))
-			state |= EB;
-	}
-
-	return state;
-}
-
-/*
- * In principle
- *	q->bitmaps[dst] |= q->bitmaps[src] & mask;
- *	q->bitmaps[src] &= ~mask;
- * but we should make sure that src != dst
- */
-static inline void
-qfq_move_groups(struct qfq_sched *q, unsigned long mask, int src, int dst)
-{
-	q->bitmaps[dst] |= q->bitmaps[src] & mask;
-	q->bitmaps[src] &= ~mask;
-}
-
-static inline void
-qfq_unblock_groups(struct qfq_sched *q, int index, uint64_t old_finish)
-{
-	unsigned long mask = mask_from(q->bitmaps[ER], index + 1);
-	struct qfq_group *next;
-
-	if (mask) {
-		next = qfq_ffs(q, mask);
-		if (!qfq_gt(next->F, old_finish))
-			return;
-	}
-
-	mask = (1UL << index) - 1;
-	qfq_move_groups(q, mask, EB, ER);
-	qfq_move_groups(q, mask, IB, IR);
-}
-
-/*
- * perhaps
- *
-	old_V ^= q->V;
-	old_V >>= QFQ_MIN_SLOT_SHIFT;
-	if (old_V) {
-		...
-	}
- *
- */
-static inline void
-qfq_make_eligible(struct qfq_sched *q, uint64_t old_V)
-{
-	unsigned long mask, vslot, old_vslot;
-
-	vslot = q->V >> QFQ_MIN_SLOT_SHIFT;
-	old_vslot = old_V >> QFQ_MIN_SLOT_SHIFT;
-
-	if (vslot != old_vslot) {
-		mask = (2UL << (__fls(vslot ^ old_vslot))) - 1;
-		qfq_move_groups(q, mask, IR, ER);
-		qfq_move_groups(q, mask, IB, EB);
-	}
-}
-
-/*
- * XXX we should make sure that slot becomes less than 32.
- * This is guaranteed by the input values.
- * roundedS is always cl->S rounded on grp->slot_shift bits.
- */
-static inline void
-qfq_slot_insert(struct qfq_group *grp, struct qfq_class *cl, uint64_t roundedS)
-{
-	uint64_t slot = (roundedS - grp->S) >> grp->slot_shift;
-	unsigned int i = (grp->front + slot) % QFQ_MAX_SLOTS;
-
-	cl->next = grp->slots[i];
-	grp->slots[i] = cl;
-	__set_bit(slot, &grp->full_slots);
-}
-
-/*
- * remove the entry from the slot
- */
-static inline void
-qfq_front_slot_remove(struct qfq_group *grp)
-{
-	struct qfq_class **h = &grp->slots[grp->front];
-
-	*h = (*h)->next;
-	if (!*h)
-		__clear_bit(0, &grp->full_slots);
-}
-
-/*
- * Returns the first full queue in a group. As a side effect,
- * adjust the bucket list so the first non-empty bucket is at
- * position 0 in full_slots.
- */
-static inline struct qfq_class *
-qfq_slot_scan(struct qfq_group *grp)
-{
-	int i;
-
-	ND("grp %d full %x", grp->index, grp->full_slots);
-	if (!grp->full_slots)
-		return NULL;
-
-	i = ffs(grp->full_slots) - 1; // zero-based
-	if (i > 0) {
-		grp->front = (grp->front + i) % QFQ_MAX_SLOTS;
-		grp->full_slots >>= i;
-	}
-
-	return grp->slots[grp->front];
-}
-
-/*
- * adjust the bucket list. When the start time of a group decreases,
- * we move the index down (modulo QFQ_MAX_SLOTS) so we don't need to
- * move the objects. The mask of occupied slots must be shifted
- * because we use ffs() to find the first non-empty slot.
- * This covers decreases in the group's start time, but what about
- * increases of the start time ?
- * Here too we should make sure that i is less than 32
- */
-static inline void
-qfq_slot_rotate(struct qfq_sched *q, struct qfq_group *grp, uint64_t roundedS)
-{
-	unsigned int i = (grp->S - roundedS) >> grp->slot_shift;
-
-	grp->full_slots <<= i;
-	grp->front = (grp->front - i) % QFQ_MAX_SLOTS;
-}
-
-
-static inline void
-qfq_update_eligible(struct qfq_sched *q, uint64_t old_V)
-{
-	bitmap ineligible;
-
-	ineligible = q->bitmaps[IR] | q->bitmaps[IB];
-	if (ineligible) {
-		if (!q->bitmaps[ER]) {
-			struct qfq_group *grp;
-			grp = qfq_ffs(q, ineligible);
-			if (qfq_gt(grp->S, q->V))
-				q->V = grp->S;
-		}
-		qfq_make_eligible(q, old_V);
-	}
-}
-
-/*
- * Updates the class, returns true if also the group needs to be updated.
- */
-static inline int
-qfq_update_class(struct qfq_sched *q, struct qfq_group *grp,
-	    struct qfq_class *cl)
-{
-
-	cl->S = cl->F;
-	if (cl->_q.mq.head == NULL)  {
-		qfq_front_slot_remove(grp);
-	} else {
-		unsigned int len;
-		uint64_t roundedS;
-
-		len = cl->_q.mq.head->m_pkthdr.len;
-		cl->F = cl->S + (uint64_t)len * cl->inv_w;
-		roundedS = qfq_round_down(cl->S, grp->slot_shift);
-		if (roundedS == grp->S)
-			return 0;
-
-		qfq_front_slot_remove(grp);
-		qfq_slot_insert(grp, cl, roundedS);
-	}
-	return 1;
-}
-
-static struct mbuf *
-qfq_dequeue(struct dn_sch_inst *si)
-{
-	struct qfq_sched *q = (struct qfq_sched *)(si + 1);
-	struct qfq_group *grp;
-	struct qfq_class *cl;
-	struct mbuf *m;
-	uint64_t old_V;
-
-	NO(q->loops++;)
-	if (!q->bitmaps[ER]) {
-		NO(if (q->queued)
-			dump_sched(q, "start dequeue");)
-		return NULL;
-	}
-
-	grp = qfq_ffs(q, q->bitmaps[ER]);
-
-	cl = grp->slots[grp->front];
-	/* extract from the first bucket in the bucket list */
-	m = dn_dequeue(&cl->_q);
-
-	if (!m) {
-		D("BUG/* non-workconserving leaf */");
-		return NULL;
-	}
-	NO(q->queued--;)
-	old_V = q->V;
-	q->V += (uint64_t)m->m_pkthdr.len * IWSUM;
-	ND("m is %p F 0x%llx V now 0x%llx", m, cl->F, q->V);
-
-	if (qfq_update_class(q, grp, cl)) {
-		uint64_t old_F = grp->F;
-		cl = qfq_slot_scan(grp);
-		if (!cl) { /* group gone, remove from ER */
-			__clear_bit(grp->index, &q->bitmaps[ER]);
-			// grp->S = grp->F + 1; // XXX debugging only
-		} else {
-			uint64_t roundedS = qfq_round_down(cl->S, grp->slot_shift);
-			unsigned int s;
-
-			if (grp->S == roundedS)
-				goto skip_unblock;
-			grp->S = roundedS;
-			grp->F = roundedS + (2ULL << grp->slot_shift);
-			/* remove from ER and put in the new set */
-			__clear_bit(grp->index, &q->bitmaps[ER]);
-			s = qfq_calc_state(q, grp);
-			__set_bit(grp->index, &q->bitmaps[s]);
-		}
-		/* we need to unblock even if the group has gone away */
-		qfq_unblock_groups(q, grp->index, old_F);
-	}
-
-skip_unblock:
-	qfq_update_eligible(q, old_V);
-	NO(if (!q->bitmaps[ER] && q->queued)
-		dump_sched(q, "end dequeue");)
-
-	return m;
-}
-
-/*
- * Assign a reasonable start time for a new flow k in group i.
- * Admissible values for \hat(F) are multiples of \sigma_i
- * no greater than V+\sigma_i . Larger values mean that
- * we had a wraparound so we consider the timestamp to be stale.
- *
- * If F is not stale and F >= V then we set S = F.
- * Otherwise we should assign S = V, but this may violate
- * the ordering in ER. So, if we have groups in ER, set S to
- * the F_j of the first group j which would be blocking us.
- * We are guaranteed not to move S backward because
- * otherwise our group i would still be blocked.
- */
-static inline void
-qfq_update_start(struct qfq_sched *q, struct qfq_class *cl)
-{
-	unsigned long mask;
-	uint32_t limit, roundedF;
-	int slot_shift = cl->grp->slot_shift;
-
-	roundedF = qfq_round_down(cl->F, slot_shift);
-	limit = qfq_round_down(q->V, slot_shift) + (1UL << slot_shift);
-
-	if (!qfq_gt(cl->F, q->V) || qfq_gt(roundedF, limit)) {
-		/* timestamp was stale */
-		mask = mask_from(q->bitmaps[ER], cl->grp->index);
-		if (mask) {
-			struct qfq_group *next = qfq_ffs(q, mask);
-			if (qfq_gt(roundedF, next->F)) {
-				cl->S = next->F;
-				return;
-			}
-		}
-		cl->S = q->V;
-	} else { /* timestamp is not stale */
-		cl->S = cl->F;
-	}
-}
-
-static int
-qfq_enqueue(struct dn_sch_inst *si, struct dn_queue *_q, struct mbuf *m)
-{
-	struct qfq_sched *q = (struct qfq_sched *)(si + 1);
-	struct qfq_group *grp;
-	struct qfq_class *cl = (struct qfq_class *)_q;
-	uint64_t roundedS;
-	int s;
-
-	NO(q->loops++;)
-	DX(4, "len %d flow %p inv_w 0x%x grp %d", m->m_pkthdr.len,
-		_q, cl->inv_w, cl->grp->index);
-	/* XXX verify that the packet obeys the parameters */
-	if (m != _q->mq.head) {
-		if (dn_enqueue(_q, m, 0)) /* packet was dropped */
-			return 1;
-		NO(q->queued++;)
-		if (m != _q->mq.head)
-			return 0;
-	}
-	/* If reach this point, queue q was idle */
-	grp = cl->grp;
-	qfq_update_start(q, cl); /* adjust start time */
-	/* compute new finish time and rounded start. */
-	cl->F = cl->S + (uint64_t)(m->m_pkthdr.len) * cl->inv_w;
-	roundedS = qfq_round_down(cl->S, grp->slot_shift);
-
-	/*
-	 * insert cl in the correct bucket.
-	 * If cl->S >= grp->S we don't need to adjust the
-	 * bucket list and simply go to the insertion phase.
-	 * Otherwise grp->S is decreasing, we must make room
-	 * in the bucket list, and also recompute the group state.
-	 * Finally, if there were no flows in this group and nobody
-	 * was in ER make sure to adjust V.
-	 */
-	if (grp->full_slots) {
-		if (!qfq_gt(grp->S, cl->S))
-			goto skip_update;
-		/* create a slot for this cl->S */
-		qfq_slot_rotate(q, grp, roundedS);
-		/* group was surely ineligible, remove */
-		__clear_bit(grp->index, &q->bitmaps[IR]);
-		__clear_bit(grp->index, &q->bitmaps[IB]);
-	} else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V))
-		q->V = roundedS;
-
-	grp->S = roundedS;
-	grp->F = roundedS + (2ULL << grp->slot_shift); // i.e. 2\sigma_i
-	s = qfq_calc_state(q, grp);
-	__set_bit(grp->index, &q->bitmaps[s]);
-	ND("new state %d 0x%x", s, q->bitmaps[s]);
-	ND("S %llx F %llx V %llx", cl->S, cl->F, q->V);
-skip_update:
-	qfq_slot_insert(grp, cl, roundedS);
-
-	return 0;
-}
-
-
-#if 0
-static inline void
-qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp,
-	struct qfq_class *cl, struct qfq_class **pprev)
-{
-	unsigned int i, offset;
-	uint64_t roundedS;
-
-	roundedS = qfq_round_down(cl->S, grp->slot_shift);
-	offset = (roundedS - grp->S) >> grp->slot_shift;
-	i = (grp->front + offset) % QFQ_MAX_SLOTS;
-
-#ifdef notyet
-	if (!pprev) {
-		pprev = &grp->slots[i];
-		while (*pprev && *pprev != cl)
-			pprev = &(*pprev)->next;
-	}
-#endif
-
-	*pprev = cl->next;
-	if (!grp->slots[i])
-		__clear_bit(offset, &grp->full_slots);
-}
-
-/*
- * called to forcibly destroy a queue.
- * If the queue is not in the front bucket, or if it has
- * other queues in the front bucket, we can simply remove
- * the queue with no other side effects.
- * Otherwise we must propagate the event up.
- * XXX description to be completed.
- */
-static void
-qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl,
-				 struct qfq_class **pprev)
-{
-	struct qfq_group *grp = &q->groups[cl->index];
-	unsigned long mask;
-	uint64_t roundedS;
-	int s;
-
-	cl->F = cl->S;	// not needed if the class goes away.
-	qfq_slot_remove(q, grp, cl, pprev);
-
-	if (!grp->full_slots) {
-		/* nothing left in the group, remove from all sets.
-		 * Do ER last because if we were blocking other groups
-		 * we must unblock them.
-		 */
-		__clear_bit(grp->index, &q->bitmaps[IR]);
-		__clear_bit(grp->index, &q->bitmaps[EB]);
-		__clear_bit(grp->index, &q->bitmaps[IB]);
-
-		if (test_bit(grp->index, &q->bitmaps[ER]) &&
-		    !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) {
-			mask = q->bitmaps[ER] & ((1UL << grp->index) - 1);
-			if (mask)
-				mask = ~((1UL << __fls(mask)) - 1);
-			else
-				mask = ~0UL;
-			qfq_move_groups(q, mask, EB, ER);
-			qfq_move_groups(q, mask, IB, IR);
-		}
-		__clear_bit(grp->index, &q->bitmaps[ER]);
-	} else if (!grp->slots[grp->front]) {
-		cl = qfq_slot_scan(grp);
-		roundedS = qfq_round_down(cl->S, grp->slot_shift);
-		if (grp->S != roundedS) {
-			__clear_bit(grp->index, &q->bitmaps[ER]);
-			__clear_bit(grp->index, &q->bitmaps[IR]);
-			__clear_bit(grp->index, &q->bitmaps[EB]);
-			__clear_bit(grp->index, &q->bitmaps[IB]);
-			grp->S = roundedS;
-			grp->F = roundedS + (2ULL << grp->slot_shift);
-			s = qfq_calc_state(q, grp);
-			__set_bit(grp->index, &q->bitmaps[s]);
-		}
-	}
-	qfq_update_eligible(q, q->V);
-}
-#endif
-
-static int
-qfq_new_fsk(struct dn_fsk *f)
-{
-	ipdn_bound_var(&f->fs.par[0], 1, 1, QFQ_MAX_WEIGHT, "qfq weight");
-	ipdn_bound_var(&f->fs.par[1], 1500, 1, 2000, "qfq maxlen");
-	ND("weight %d len %d\n", f->fs.par[0], f->fs.par[1]);
-	return 0;
-}
-
-/*
- * initialize a new scheduler instance
- */
-static int
-qfq_new_sched(struct dn_sch_inst *si)
-{
-	struct qfq_sched *q = (struct qfq_sched *)(si + 1);
-	struct qfq_group *grp;
-	int i;
-
-	for (i = 0; i <= QFQ_MAX_INDEX; i++) {
-		grp = &q->groups[i];
-		grp->index = i;
-		grp->slot_shift = QFQ_MTU_SHIFT + FRAC_BITS -
-					(QFQ_MAX_INDEX - i);
-	}
-	return 0;
-}
-
-/*
- * QFQ scheduler descriptor
- */
-static struct dn_alg qfq_desc = {
-	_SI( .type = ) DN_SCHED_QFQ,
-	_SI( .name = ) "QFQ",
-	_SI( .flags = ) DN_MULTIQUEUE,
-
-	_SI( .schk_datalen = ) 0,
-	_SI( .si_datalen = ) sizeof(struct qfq_sched),
-	_SI( .q_datalen = ) sizeof(struct qfq_class) - sizeof(struct dn_queue),
-
-	_SI( .enqueue = ) qfq_enqueue,
-	_SI( .dequeue = ) qfq_dequeue,
-
-	_SI( .config = )  NULL,
-	_SI( .destroy = )  NULL,
-	_SI( .new_sched = ) qfq_new_sched,
-	_SI( .free_sched = )  NULL,
-	_SI( .new_fsk = ) qfq_new_fsk,
-	_SI( .free_fsk = )  NULL,
-	_SI( .new_queue = ) qfq_new_queue,
-	_SI( .free_queue = ) qfq_free_queue,
-};
-
-DECLARE_DNSCHED_MODULE(dn_qfq, &qfq_desc);
-
-#ifdef QFQ_DEBUG
-static void
-dump_groups(struct qfq_sched *q, uint32_t mask)
-{
-	int i, j;
-
-	for (i = 0; i < QFQ_MAX_INDEX + 1; i++) {
-		struct qfq_group *g = &q->groups[i];
-
-		if (0 == (mask & (1<<i)))
-			continue;
-		for (j = 0; j < QFQ_MAX_SLOTS; j++) {
-			if (g->slots[j])
-				D("    bucket %d %p", j, g->slots[j]);
-		}
-		D("full_slots 0x%x", g->full_slots);
-		D("        %2d S 0x%20llx F 0x%llx %c", i,
-			g->S, g->F,
-			mask & (1<<i) ? '1' : '0');
-	}
-}
-
-static void
-dump_sched(struct qfq_sched *q, const char *msg)
-{
-	D("--- in %s: ---", msg);
-	ND("loops %d queued %d V 0x%llx", q->loops, q->queued, q->V);
-	D("    ER 0x%08x", q->bitmaps[ER]);
-	D("    EB 0x%08x", q->bitmaps[EB]);
-	D("    IR 0x%08x", q->bitmaps[IR]);
-	D("    IB 0x%08x", q->bitmaps[IB]);
-	dump_groups(q, 0xffffffff);
-};
-#endif /* QFQ_DEBUG */
diff --git a/freebsd/sys/netpfil/ipfw/dn_sched_rr.c b/freebsd/sys/netpfil/ipfw/dn_sched_rr.c
deleted file mode 100644
index c1862ab0..00000000
--- a/freebsd/sys/netpfil/ipfw/dn_sched_rr.c
+++ /dev/null
@@ -1,309 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/*
- * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $FreeBSD$
- */
-
-#ifdef _KERNEL
-#include <sys/malloc.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/kernel.h>
-#include <sys/mbuf.h>
-#include <sys/module.h>
-#include <net/if.h>	/* IFNAMSIZ */
-#include <netinet/in.h>
-#include <netinet/ip_var.h>		/* ipfw_rule_ref */
-#include <netinet/ip_fw.h>	/* flow_id */
-#include <netinet/ip_dummynet.h>
-#include <netpfil/ipfw/dn_heap.h>
-#include <netpfil/ipfw/ip_dn_private.h>
-#include <netpfil/ipfw/dn_sched.h>
-#else
-#include <dn_test.h>
-#endif
-
-#define DN_SCHED_RR	3 // XXX Where?
-
-struct rr_queue {
-	struct dn_queue q;		/* Standard queue */
-	int status;			/* 1: queue is in the list */
-	int credit;			/* Number of bytes to transmit */
-	int quantum;			/* quantum * C */
-	struct rr_queue *qnext;		/* */
-};
-
-/* struct rr_schk contains global config parameters
- * and is right after dn_schk
- */
-struct rr_schk {
-	int min_q;		/* Min quantum */
-	int max_q;		/* Max quantum */
-	int q_bytes;		/* Bytes per quantum */
-};
-
-/* per-instance round robin list, right after dn_sch_inst */
-struct rr_si {
-	struct rr_queue *head, *tail;	/* Pointer to current queue */
-};
-
-/* Append a queue to the rr list */
-static inline void
-rr_append(struct rr_queue *q, struct rr_si *si)
-{
-	q->status = 1;		/* mark as in-rr_list */
-	q->credit = q->quantum;	/* initialize credit */
-
-	/* append to the tail */
-	if (si->head == NULL)
-		si->head = q;
-	else
-		si->tail->qnext = q;
-	si->tail = q;		/* advance the tail pointer */
-	q->qnext = si->head;	/* make it circular */
-}
-
-/* Remove the head queue from circular list. */
-static inline void
-rr_remove_head(struct rr_si *si)
-{
-	if (si->head == NULL)
-		return; /* empty queue */
-	si->head->status = 0;
-
-	if (si->head == si->tail) {
-		si->head = si->tail = NULL;
-		return;
-	}
-
-	si->head = si->head->qnext;
-	si->tail->qnext = si->head;
-}
-
-/* Remove a queue from circular list.
- * XXX see if ti can be merge with remove_queue()
- */
-static inline void
-remove_queue_q(struct rr_queue *q, struct rr_si *si)
-{
-	struct rr_queue *prev;
-
-	if (q->status != 1)
-		return;
-	if (q == si->head) {
-		rr_remove_head(si);
-		return;
-	}
-
-	for (prev = si->head; prev; prev = prev->qnext) {
-		if (prev->qnext != q)
-			continue;
-		prev->qnext = q->qnext;
-		if (q == si->tail)
-			si->tail = prev;
-		q->status = 0;
-		break;
-	}
-}
-
-
-static inline void
-next_pointer(struct rr_si *si)
-{
-	if (si->head == NULL)
-		return; /* empty queue */
-
-	si->head = si->head->qnext;
-	si->tail = si->tail->qnext;
-}
-
-static int
-rr_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m)
-{
-	struct rr_si *si;
-	struct rr_queue *rrq;
-
-	if (m != q->mq.head) {
-		if (dn_enqueue(q, m, 0)) /* packet was dropped */
-			return 1;
-		if (m != q->mq.head)
-			return 0;
-	}
-
-	/* If reach this point, queue q was idle */
-	si = (struct rr_si *)(_si + 1);
-	rrq = (struct rr_queue *)q;
-
-	if (rrq->status == 1) /* Queue is already in the queue list */
-		return 0;
-
-	/* Insert the queue in the queue list */
-	rr_append(rrq, si);
-
-	return 0;
-}
-
-static struct mbuf *
-rr_dequeue(struct dn_sch_inst *_si)
-{
-	/* Access scheduler instance private data */
-	struct rr_si *si = (struct rr_si *)(_si + 1);
-	struct rr_queue *rrq;
-	uint64_t len;
-
-	while ( (rrq = si->head) ) {
-		struct mbuf *m = rrq->q.mq.head;
-		if ( m == NULL) {
-			/* empty queue, remove from list */
-			rr_remove_head(si);
-			continue;
-		}
-		len = m->m_pkthdr.len;
-
-		if (len > rrq->credit) {
-			/* Packet too big */
-			rrq->credit += rrq->quantum;
-			/* Try next queue */
-			next_pointer(si);
-		} else {
-			rrq->credit -= len;
-			return dn_dequeue(&rrq->q);
-		}
-	}
-
-	/* no packet to dequeue*/
-	return NULL;
-}
-
-static int
-rr_config(struct dn_schk *_schk)
-{
-	struct rr_schk *schk = (struct rr_schk *)(_schk + 1);
-	ND("called");
-
-	/* use reasonable quantums (64..2k bytes, default 1500) */
-	schk->min_q = 64;
-	schk->max_q = 2048;
-	schk->q_bytes = 1500;	/* quantum */
-
-	return 0;
-}
-
-static int
-rr_new_sched(struct dn_sch_inst *_si)
-{
-	struct rr_si *si = (struct rr_si *)(_si + 1);
-
-	ND("called");
-	si->head = si->tail = NULL;
-
-	return 0;
-}
-
-static int
-rr_free_sched(struct dn_sch_inst *_si)
-{
-	ND("called");
-	/* Nothing to do? */
-	return 0;
-}
-
-static int
-rr_new_fsk(struct dn_fsk *fs)
-{
-	struct rr_schk *schk = (struct rr_schk *)(fs->sched + 1);
-	/* par[0] is the weight, par[1] is the quantum step */
-	ipdn_bound_var(&fs->fs.par[0], 1,
-		1, 65536, "RR weight");
-	ipdn_bound_var(&fs->fs.par[1], schk->q_bytes,
-		schk->min_q, schk->max_q, "RR quantum");
-	return 0;
-}
-
-static int
-rr_new_queue(struct dn_queue *_q)
-{
-	struct rr_queue *q = (struct rr_queue *)_q;
-
-	_q->ni.oid.subtype = DN_SCHED_RR;
-
-	q->quantum = _q->fs->fs.par[0] * _q->fs->fs.par[1];
-	ND("called, q->quantum %d", q->quantum);
-	q->credit = q->quantum;
-	q->status = 0;
-
-	if (_q->mq.head != NULL) {
-		/* Queue NOT empty, insert in the queue list */
-		rr_append(q, (struct rr_si *)(_q->_si + 1));
-	}
-	return 0;
-}
-
-static int
-rr_free_queue(struct dn_queue *_q)
-{
-	struct rr_queue *q = (struct rr_queue *)_q;
-
-	ND("called");
-	if (q->status == 1) {
-		struct rr_si *si = (struct rr_si *)(_q->_si + 1);
-		remove_queue_q(q, si);
-	}
-	return 0;
-}
-
-/*
- * RR scheduler descriptor
- * contains the type of the scheduler, the name, the size of the
- * structures and function pointers.
- */
-static struct dn_alg rr_desc = {
-	_SI( .type = ) DN_SCHED_RR,
-	_SI( .name = ) "RR",
-	_SI( .flags = ) DN_MULTIQUEUE,
-
-	_SI( .schk_datalen = ) 0,
-	_SI( .si_datalen = ) sizeof(struct rr_si),
-	_SI( .q_datalen = ) sizeof(struct rr_queue) - sizeof(struct dn_queue),
-
-	_SI( .enqueue = ) rr_enqueue,
-	_SI( .dequeue = ) rr_dequeue,
-
-	_SI( .config = ) rr_config,
-	_SI( .destroy = ) NULL,
-	_SI( .new_sched = ) rr_new_sched,
-	_SI( .free_sched = ) rr_free_sched,
-	_SI( .new_fsk = ) rr_new_fsk,
-	_SI( .free_fsk = ) NULL,
-	_SI( .new_queue = ) rr_new_queue,
-	_SI( .free_queue = ) rr_free_queue,
-};
-
-
-DECLARE_DNSCHED_MODULE(dn_rr, &rr_desc);
diff --git a/freebsd/sys/netpfil/ipfw/dn_sched_wf2q.c b/freebsd/sys/netpfil/ipfw/dn_sched_wf2q.c
deleted file mode 100644
index 77c4bbad..00000000
--- a/freebsd/sys/netpfil/ipfw/dn_sched_wf2q.c
+++ /dev/null
@@ -1,375 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/*
- * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
- * Copyright (c) 2000-2002 Luigi Rizzo, Universita` di Pisa
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $FreeBSD$
- */
-
-#ifdef _KERNEL
-#include <sys/malloc.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/kernel.h>
-#include <sys/mbuf.h>
-#include <sys/module.h>
-#include <net/if.h>	/* IFNAMSIZ */
-#include <netinet/in.h>
-#include <netinet/ip_var.h>		/* ipfw_rule_ref */
-#include <netinet/ip_fw.h>	/* flow_id */
-#include <netinet/ip_dummynet.h>
-#include <netpfil/ipfw/dn_heap.h>
-#include <netpfil/ipfw/ip_dn_private.h>
-#include <netpfil/ipfw/dn_sched.h>
-#else
-#include <dn_test.h>
-#endif
-
-#ifndef MAX64
-#define MAX64(x,y)  (( (int64_t) ( (y)-(x) )) > 0 ) ? (y) : (x)
-#endif
-
-/*
- * timestamps are computed on 64 bit using fixed point arithmetic.
- * LMAX_BITS, WMAX_BITS are the max number of bits for the packet len
- * and sum of weights, respectively. FRAC_BITS is the number of
- * fractional bits. We want FRAC_BITS >> WMAX_BITS to avoid too large
- * errors when computing the inverse, FRAC_BITS < 32 so we can do 1/w
- * using an unsigned 32-bit division, and to avoid wraparounds we need
- * LMAX_BITS + WMAX_BITS + FRAC_BITS << 64
- * As an example
- * FRAC_BITS = 26, LMAX_BITS=14, WMAX_BITS = 19
- */
-#ifndef FRAC_BITS
-#define FRAC_BITS    28 /* shift for fixed point arithmetic */
-#define	ONE_FP	(1UL << FRAC_BITS)
-#endif
-
-/*
- * Private information for the scheduler instance:
- * sch_heap (key is Finish time) returns the next queue to serve
- * ne_heap (key is Start time) stores not-eligible queues
- * idle_heap (key=start/finish time) stores idle flows. It must
- *	support extract-from-middle.
- * A flow is only in 1 of the three heaps.
- * XXX todo: use a more efficient data structure, e.g. a tree sorted
- * by F with min_subtree(S) in each node
- */
-struct wf2qp_si {
-    struct dn_heap sch_heap;	/* top extract - key Finish  time */
-    struct dn_heap ne_heap;	/* top extract - key Start   time */
-    struct dn_heap idle_heap;	/* random extract - key Start=Finish time */
-    uint64_t V;			/* virtual time */
-    uint32_t inv_wsum;		/* inverse of sum of weights */
-    uint32_t wsum;		/* sum of weights */
-};
-
-struct wf2qp_queue {
-    struct dn_queue _q;
-    uint64_t S, F;		/* start time, finish time */
-    uint32_t inv_w;		/* ONE_FP / weight */
-    int32_t heap_pos;		/* position (index) of struct in heap */
-};
-
-/*
- * This file implements a WF2Q+ scheduler as it has been in dummynet
- * since 2000.
- * The scheduler supports per-flow queues and has O(log N) complexity.
- *
- * WF2Q+ needs to drain entries from the idle heap so that we
- * can keep the sum of weights up to date. We can do it whenever
- * we get a chance, or periodically, or following some other
- * strategy. The function idle_check() drains at most N elements
- * from the idle heap.
- */
-static void
-idle_check(struct wf2qp_si *si, int n, int force)
-{
-    struct dn_heap *h = &si->idle_heap;
-    while (n-- > 0 && h->elements > 0 &&
-		(force || DN_KEY_LT(HEAP_TOP(h)->key, si->V))) {
-	struct dn_queue *q = HEAP_TOP(h)->object;
-        struct wf2qp_queue *alg_fq = (struct wf2qp_queue *)q;
-
-        heap_extract(h, NULL);
-        /* XXX to let the flowset delete the queue we should
-	 * mark it as 'unused' by the scheduler.
-	 */
-        alg_fq->S = alg_fq->F + 1; /* Mark timestamp as invalid. */
-        si->wsum -= q->fs->fs.par[0];	/* adjust sum of weights */
-	if (si->wsum > 0)
-		si->inv_wsum = ONE_FP/si->wsum;
-    }
-}
-
-static int
-wf2qp_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m)
-{
-    struct dn_fsk *fs = q->fs;
-    struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
-    struct wf2qp_queue *alg_fq;
-    uint64_t len = m->m_pkthdr.len;
-
-    if (m != q->mq.head) {
-	if (dn_enqueue(q, m, 0)) /* packet was dropped */
-	    return 1;
-	if (m != q->mq.head)	/* queue was already busy */
-	    return 0;
-    }
-
-    /* If reach this point, queue q was idle */
-    alg_fq = (struct wf2qp_queue *)q;
-
-    if (DN_KEY_LT(alg_fq->F, alg_fq->S)) {
-        /* F<S means timestamps are invalid ->brand new queue. */
-        alg_fq->S = si->V;		/* init start time */
-        si->wsum += fs->fs.par[0];	/* add weight of new queue. */
-	si->inv_wsum = ONE_FP/si->wsum;
-    } else { /* if it was idle then it was in the idle heap */
-        heap_extract(&si->idle_heap, q);
-        alg_fq->S = MAX64(alg_fq->F, si->V);	/* compute new S */
-    }
-    alg_fq->F = alg_fq->S + len * alg_fq->inv_w;
-
-    /* if nothing is backlogged, make sure this flow is eligible */
-    if (si->ne_heap.elements == 0 && si->sch_heap.elements == 0)
-        si->V = MAX64(alg_fq->S, si->V);
-
-    /*
-     * Look at eligibility. A flow is not eligibile if S>V (when
-     * this happens, it means that there is some other flow already
-     * scheduled for the same pipe, so the sch_heap cannot be
-     * empty). If the flow is not eligible we just store it in the
-     * ne_heap. Otherwise, we store in the sch_heap.
-     * Note that for all flows in sch_heap (SCH), S_i <= V,
-     * and for all flows in ne_heap (NEH), S_i > V.
-     * So when we need to compute max(V, min(S_i)) forall i in
-     * SCH+NEH, we only need to look into NEH.
-     */
-    if (DN_KEY_LT(si->V, alg_fq->S)) {
-        /* S>V means flow Not eligible. */
-        if (si->sch_heap.elements == 0)
-            D("++ ouch! not eligible but empty scheduler!");
-        heap_insert(&si->ne_heap, alg_fq->S, q);
-    } else {
-        heap_insert(&si->sch_heap, alg_fq->F, q);
-    }
-    return 0;
-}
-
-/* XXX invariant: sch > 0 || V >= min(S in neh) */
-static struct mbuf *
-wf2qp_dequeue(struct dn_sch_inst *_si)
-{
-	/* Access scheduler instance private data */
-	struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
-	struct mbuf *m;
-	struct dn_queue *q;
-	struct dn_heap *sch = &si->sch_heap;
-	struct dn_heap *neh = &si->ne_heap;
-	struct wf2qp_queue *alg_fq;
-
-	if (sch->elements == 0 && neh->elements == 0) {
-		/* we have nothing to do. We could kill the idle heap
-		 * altogether and reset V
-		 */
-		idle_check(si, 0x7fffffff, 1);
-		si->V = 0;
-		si->wsum = 0;	/* should be set already */
-		return NULL;	/* quick return if nothing to do */
-	}
-	idle_check(si, 1, 0);	/* drain something from the idle heap */
-
-	/* make sure at least one element is eligible, bumping V
-	 * and moving entries that have become eligible.
-	 * We need to repeat the first part twice, before and
-	 * after extracting the candidate, or enqueue() will
-	 * find the data structure in a wrong state.
-	 */
-  m = NULL;
-  for(;;) {
-	/*
-	 * Compute V = max(V, min(S_i)). Remember that all elements
-	 * in sch have by definition S_i <= V so if sch is not empty,
-	 * V is surely the max and we must not update it. Conversely,
-	 * if sch is empty we only need to look at neh.
-	 * We don't need to move the queues, as it will be done at the
-	 * next enqueue
-	 */
-	if (sch->elements == 0 && neh->elements > 0) {
-		si->V = MAX64(si->V, HEAP_TOP(neh)->key);
-	}
-	while (neh->elements > 0 &&
-		    DN_KEY_LEQ(HEAP_TOP(neh)->key, si->V)) {
-		q = HEAP_TOP(neh)->object;
-		alg_fq = (struct wf2qp_queue *)q;
-		heap_extract(neh, NULL);
-		heap_insert(sch, alg_fq->F, q);
-	}
-	if (m) /* pkt found in previous iteration */
-		break;
-	/* ok we have at least one eligible pkt */
-	q = HEAP_TOP(sch)->object;
-	alg_fq = (struct wf2qp_queue *)q;
-	m = dn_dequeue(q);
-	heap_extract(sch, NULL); /* Remove queue from heap. */
-	si->V += (uint64_t)(m->m_pkthdr.len) * si->inv_wsum;
-	alg_fq->S = alg_fq->F;  /* Update start time. */
-	if (q->mq.head == 0) {	/* not backlogged any more. */
-		heap_insert(&si->idle_heap, alg_fq->F, q);
-	} else {			/* Still backlogged. */
-		/* Update F, store in neh or sch */
-		uint64_t len = q->mq.head->m_pkthdr.len;
-		alg_fq->F += len * alg_fq->inv_w;
-		if (DN_KEY_LEQ(alg_fq->S, si->V)) {
-			heap_insert(sch, alg_fq->F, q);
-		} else {
-			heap_insert(neh, alg_fq->S, q);
-		}
-	}
-    }
-	return m;
-}
-
-static int
-wf2qp_new_sched(struct dn_sch_inst *_si)
-{
-	struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
-	int ofs = offsetof(struct wf2qp_queue, heap_pos);
-
-	/* all heaps support extract from middle */
-	if (heap_init(&si->idle_heap, 16, ofs) ||
-	    heap_init(&si->sch_heap, 16, ofs) ||
-	    heap_init(&si->ne_heap, 16, ofs)) {
-		heap_free(&si->ne_heap);
-		heap_free(&si->sch_heap);
-		heap_free(&si->idle_heap);
-		return ENOMEM;
-	}
-	return 0;
-}
-
-static int
-wf2qp_free_sched(struct dn_sch_inst *_si)
-{
-	struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
-
-	heap_free(&si->sch_heap);
-	heap_free(&si->ne_heap);
-	heap_free(&si->idle_heap);
-
-	return 0;
-}
-
-static int
-wf2qp_new_fsk(struct dn_fsk *fs)
-{
-	ipdn_bound_var(&fs->fs.par[0], 1,
-		1, 100, "WF2Q+ weight");
-	return 0;
-}
-
-static int
-wf2qp_new_queue(struct dn_queue *_q)
-{
-	struct wf2qp_queue *q = (struct wf2qp_queue *)_q;
-
-	_q->ni.oid.subtype = DN_SCHED_WF2QP;
-	q->F = 0;	/* not strictly necessary */
-	q->S = q->F + 1;    /* mark timestamp as invalid. */
-        q->inv_w = ONE_FP / _q->fs->fs.par[0];
-	if (_q->mq.head != NULL) {
-		wf2qp_enqueue(_q->_si, _q, _q->mq.head);
-	}
-	return 0;
-}
-
-/*
- * Called when the infrastructure removes a queue (e.g. flowset
- * is reconfigured). Nothing to do if we did not 'own' the queue,
- * otherwise remove it from the right heap and adjust the sum
- * of weights.
- */
-static int
-wf2qp_free_queue(struct dn_queue *q)
-{
-	struct wf2qp_queue *alg_fq = (struct wf2qp_queue *)q;
-	struct wf2qp_si *si = (struct wf2qp_si *)(q->_si + 1);
-
-	if (alg_fq->S >= alg_fq->F + 1)
-		return 0;	/* nothing to do, not in any heap */
-	si->wsum -= q->fs->fs.par[0];
-	if (si->wsum > 0)
-		si->inv_wsum = ONE_FP/si->wsum;
-
-	/* extract from the heap. XXX TODO we may need to adjust V
-	 * to make sure the invariants hold.
-	 */
-	if (q->mq.head == NULL) {
-		heap_extract(&si->idle_heap, q);
-	} else if (DN_KEY_LT(si->V, alg_fq->S)) {
-		heap_extract(&si->ne_heap, q);
-	} else {
-		heap_extract(&si->sch_heap, q);
-	}
-	return 0;
-}
-
-/*
- * WF2Q+ scheduler descriptor
- * contains the type of the scheduler, the name, the size of the
- * structures and function pointers.
- */
-static struct dn_alg wf2qp_desc = {
-	_SI( .type = ) DN_SCHED_WF2QP,
-	_SI( .name = ) "WF2Q+",
-	_SI( .flags = ) DN_MULTIQUEUE,
-
-	/* we need extra space in the si and the queue */
-	_SI( .schk_datalen = ) 0,
-	_SI( .si_datalen = ) sizeof(struct wf2qp_si),
-	_SI( .q_datalen = ) sizeof(struct wf2qp_queue) -
-				sizeof(struct dn_queue),
-
-	_SI( .enqueue = ) wf2qp_enqueue,
-	_SI( .dequeue = ) wf2qp_dequeue,
-
-	_SI( .config = )  NULL,
-	_SI( .destroy = )  NULL,
-	_SI( .new_sched = ) wf2qp_new_sched,
-	_SI( .free_sched = ) wf2qp_free_sched,
-
-	_SI( .new_fsk = ) wf2qp_new_fsk,
-	_SI( .free_fsk = )  NULL,
-
-	_SI( .new_queue = ) wf2qp_new_queue,
-	_SI( .free_queue = ) wf2qp_free_queue,
-};
-
-
-DECLARE_DNSCHED_MODULE(dn_wf2qp, &wf2qp_desc);
diff --git a/freebsd/sys/netpfil/ipfw/ip_dn_glue.c b/freebsd/sys/netpfil/ipfw/ip_dn_glue.c
deleted file mode 100644
index 8e0cc36d..00000000
--- a/freebsd/sys/netpfil/ipfw/ip_dn_glue.c
+++ /dev/null
@@ -1,848 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/*-
- * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $FreeBSD$
- *
- * Binary compatibility support for /sbin/ipfw RELENG_7 and RELENG_8
- */
-
-#include <rtems/bsd/local/opt_inet6.h>
-
-#include <rtems/bsd/sys/param.h>
-#include <sys/systm.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/kernel.h>
-#include <rtems/bsd/sys/lock.h>
-#include <sys/module.h>
-#include <sys/priv.h>
-#include <sys/proc.h>
-#include <sys/rwlock.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/time.h>
-#include <sys/taskqueue.h>
-#include <net/if.h>	/* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
-#include <netinet/in.h>
-#include <netinet/ip_var.h>	/* ip_output(), IP_FORWARDING */
-#include <netinet/ip_fw.h>
-#include <netinet/ip_dummynet.h>
-
-#include <netpfil/ipfw/ip_fw_private.h>
-#include <netpfil/ipfw/dn_heap.h>
-#include <netpfil/ipfw/ip_dn_private.h>
-#include <netpfil/ipfw/dn_sched.h>
-
-/* FREEBSD7.2 ip_dummynet.h r191715*/
-
-struct dn_heap_entry7 {
-	int64_t key;        /* sorting key. Topmost element is smallest one */
-	void *object;      /* object pointer */
-};
-
-struct dn_heap7 {
-	int size;
-	int elements;
-	int offset; /* XXX if > 0 this is the offset of direct ptr to obj */
-	struct dn_heap_entry7 *p;   /* really an array of "size" entries */
-};
-
-/* Common to 7.2 and 8 */
-struct dn_flow_set {
-	SLIST_ENTRY(dn_flow_set)    next;   /* linked list in a hash slot */
-
-	u_short fs_nr ;             /* flow_set number       */
-	u_short flags_fs;
-#define DNOLD_HAVE_FLOW_MASK   0x0001
-#define DNOLD_IS_RED       0x0002
-#define DNOLD_IS_GENTLE_RED    0x0004
-#define DNOLD_QSIZE_IS_BYTES   0x0008  /* queue size is measured in bytes */
-#define DNOLD_NOERROR      0x0010  /* do not report ENOBUFS on drops  */
-#define DNOLD_HAS_PROFILE      0x0020  /* the pipe has a delay profile. */
-#define DNOLD_IS_PIPE      0x4000
-#define DNOLD_IS_QUEUE     0x8000
-
-	struct dn_pipe7 *pipe ;  /* pointer to parent pipe */
-	u_short parent_nr ;     /* parent pipe#, 0 if local to a pipe */
-
-	int weight ;        /* WFQ queue weight */
-	int qsize ;         /* queue size in slots or bytes */
-	int plr ;           /* pkt loss rate (2^31-1 means 100%) */
-
-	struct ipfw_flow_id flow_mask ;
-
-	/* hash table of queues onto this flow_set */
-	int rq_size ;       /* number of slots */
-	int rq_elements ;       /* active elements */
-	struct dn_flow_queue7 **rq;  /* array of rq_size entries */
-
-	u_int32_t last_expired ;    /* do not expire too frequently */
-	int backlogged ;        /* #active queues for this flowset */
-
-        /* RED parameters */
-#define SCALE_RED               16
-#define SCALE(x)                ( (x) << SCALE_RED )
-#define SCALE_VAL(x)            ( (x) >> SCALE_RED )
-#define SCALE_MUL(x,y)          ( ( (x) * (y) ) >> SCALE_RED )
-	int w_q ;           /* queue weight (scaled) */
-	int max_th ;        /* maximum threshold for queue (scaled) */
-	int min_th ;        /* minimum threshold for queue (scaled) */
-	int max_p ;         /* maximum value for p_b (scaled) */
-	u_int c_1 ;         /* max_p/(max_th-min_th) (scaled) */
-	u_int c_2 ;         /* max_p*min_th/(max_th-min_th) (scaled) */
-	u_int c_3 ;         /* for GRED, (1-max_p)/max_th (scaled) */
-	u_int c_4 ;         /* for GRED, 1 - 2*max_p (scaled) */
-	u_int * w_q_lookup ;    /* lookup table for computing (1-w_q)^t */
-	u_int lookup_depth ;    /* depth of lookup table */
-	int lookup_step ;       /* granularity inside the lookup table */
-	int lookup_weight ;     /* equal to (1-w_q)^t / (1-w_q)^(t+1) */
-	int avg_pkt_size ;      /* medium packet size */
-	int max_pkt_size ;      /* max packet size */
-};
-SLIST_HEAD(dn_flow_set_head, dn_flow_set);
-
-#define DN_IS_PIPE		0x4000
-#define DN_IS_QUEUE		0x8000
-struct dn_flow_queue7 {
-	struct dn_flow_queue7 *next ;
-	struct ipfw_flow_id id ;
-
-	struct mbuf *head, *tail ;  /* queue of packets */
-	u_int len ;
-	u_int len_bytes ;
-
-	u_long numbytes;
-
-	u_int64_t tot_pkts ;    /* statistics counters  */
-	u_int64_t tot_bytes ;
-	u_int32_t drops ;
-
-	int hash_slot ;     /* debugging/diagnostic */
-
-	/* RED parameters */
-	int avg ;                   /* average queue length est. (scaled) */
-	int count ;                 /* arrivals since last RED drop */
-	int random ;                /* random value (scaled) */
-	u_int32_t q_time;      /* start of queue idle time */
-
-	/* WF2Q+ support */
-	struct dn_flow_set *fs ;    /* parent flow set */
-	int heap_pos ;      /* position (index) of struct in heap */
-	int64_t sched_time ;     /* current time when queue enters ready_heap */
-
-	int64_t S,F ;        /* start time, finish time */
-};
-
-struct dn_pipe7 {        /* a pipe */
-	SLIST_ENTRY(dn_pipe7)    next;   /* linked list in a hash slot */
-
-	int pipe_nr ;       /* number   */
-	int bandwidth;      /* really, bytes/tick.  */
-	int delay ;         /* really, ticks    */
-
-	struct  mbuf *head, *tail ; /* packets in delay line */
-
-	/* WF2Q+ */
-	struct dn_heap7 scheduler_heap ; /* top extract - key Finish time*/
-	struct dn_heap7 not_eligible_heap; /* top extract- key Start time */
-	struct dn_heap7 idle_heap ; /* random extract - key Start=Finish time */
-
-	int64_t V ;          /* virtual time */
-	int sum;            /* sum of weights of all active sessions */
-
-	int numbytes;
-
-	int64_t sched_time ;     /* time pipe was scheduled in ready_heap */
-
-	/*
-	* When the tx clock come from an interface (if_name[0] != '\0'), its name
-	* is stored below, whereas the ifp is filled when the rule is configured.
-	*/
-	char if_name[IFNAMSIZ];
-	struct ifnet *ifp ;
-	int ready ; /* set if ifp != NULL and we got a signal from it */
-
-	struct dn_flow_set fs ; /* used with fixed-rate flows */
-};
-SLIST_HEAD(dn_pipe_head7, dn_pipe7);
-
-
-/* FREEBSD8 ip_dummynet.h r196045 */
-struct dn_flow_queue8 {
-	struct dn_flow_queue8 *next ;
-	struct ipfw_flow_id id ;
-
-	struct mbuf *head, *tail ;  /* queue of packets */
-	u_int len ;
-	u_int len_bytes ;
-
-	uint64_t numbytes ;     /* credit for transmission (dynamic queues) */
-	int64_t extra_bits;     /* extra bits simulating unavailable channel */
-
-	u_int64_t tot_pkts ;    /* statistics counters  */
-	u_int64_t tot_bytes ;
-	u_int32_t drops ;
-
-	int hash_slot ;     /* debugging/diagnostic */
-
-	/* RED parameters */
-	int avg ;                   /* average queue length est. (scaled) */
-	int count ;                 /* arrivals since last RED drop */
-	int random ;                /* random value (scaled) */
-	int64_t idle_time;       /* start of queue idle time */
-
-	/* WF2Q+ support */
-	struct dn_flow_set *fs ;    /* parent flow set */
-	int heap_pos ;      /* position (index) of struct in heap */
-	int64_t sched_time ;     /* current time when queue enters ready_heap */
-
-	int64_t S,F ;        /* start time, finish time */
-};
-
-struct dn_pipe8 {        /* a pipe */
-	SLIST_ENTRY(dn_pipe8)    next;   /* linked list in a hash slot */
-
-	int pipe_nr ;       /* number   */
-	int bandwidth;      /* really, bytes/tick.  */
-	int delay ;         /* really, ticks    */
-
-	struct  mbuf *head, *tail ; /* packets in delay line */
-
-	/* WF2Q+ */
-	struct dn_heap7 scheduler_heap ; /* top extract - key Finish time*/
-	struct dn_heap7 not_eligible_heap; /* top extract- key Start time */
-	struct dn_heap7 idle_heap ; /* random extract - key Start=Finish time */
-
-	int64_t V ;          /* virtual time */
-	int sum;            /* sum of weights of all active sessions */
-
-	/* Same as in dn_flow_queue, numbytes can become large */
-	int64_t numbytes;       /* bits I can transmit (more or less). */
-	uint64_t burst;     /* burst size, scaled: bits * hz */
-
-	int64_t sched_time ;     /* time pipe was scheduled in ready_heap */
-	int64_t idle_time;       /* start of pipe idle time */
-
-	char if_name[IFNAMSIZ];
-	struct ifnet *ifp ;
-	int ready ; /* set if ifp != NULL and we got a signal from it */
-
-	struct dn_flow_set fs ; /* used with fixed-rate flows */
-
-    /* fields to simulate a delay profile */
-#define ED_MAX_NAME_LEN     32
-	char name[ED_MAX_NAME_LEN];
-	int loss_level;
-	int samples_no;
-	int *samples;
-};
-
-#define ED_MAX_SAMPLES_NO   1024
-struct dn_pipe_max8 {
-	struct dn_pipe8 pipe;
-	int samples[ED_MAX_SAMPLES_NO];
-};
-SLIST_HEAD(dn_pipe_head8, dn_pipe8);
-
-/*
- * Changes from 7.2 to 8:
- * dn_pipe:
- *      numbytes from int to int64_t
- *      add burst (int64_t)
- *      add idle_time (int64_t)
- *      add profile
- *      add struct dn_pipe_max
- *      add flag DN_HAS_PROFILE
- *
- * dn_flow_queue
- *      numbytes from u_long to int64_t
- *      add extra_bits (int64_t)
- *      q_time from u_int32_t to int64_t and name idle_time
- *
- * dn_flow_set unchanged
- *
- */
-
-/* NOTE:XXX copied from dummynet.c */
-#define O_NEXT(p, len) ((void *)((char *)p + len))
-static void
-oid_fill(struct dn_id *oid, int len, int type, uintptr_t id)
-{
-	oid->len = len;
-	oid->type = type;
-	oid->subtype = 0;
-	oid->id = id;
-}
-/* make room in the buffer and move the pointer forward */
-static void *
-o_next(struct dn_id **o, int len, int type)
-{
-	struct dn_id *ret = *o;
-	oid_fill(ret, len, type, 0);
-	*o = O_NEXT(*o, len);
-	return ret;
-}
-
-
-static size_t pipesize7 = sizeof(struct dn_pipe7);
-static size_t pipesize8 = sizeof(struct dn_pipe8);
-static size_t pipesizemax8 = sizeof(struct dn_pipe_max8);
-
-/* Indicate 'ipfw' version
- * 1: from FreeBSD 7.2
- * 0: from FreeBSD 8
- * -1: unknow (for now is unused)
- *
- * It is update when a IP_DUMMYNET_DEL or IP_DUMMYNET_CONFIGURE request arrives
- * NOTE: if a IP_DUMMYNET_GET arrives and the 'ipfw' version is unknow,
- *       it is suppose to be the FreeBSD 8 version.
- */
-static int is7 = 0;
-
-static int
-convertflags2new(int src)
-{
-	int dst = 0;
-
-	if (src & DNOLD_HAVE_FLOW_MASK)
-		dst |= DN_HAVE_MASK;
-	if (src & DNOLD_QSIZE_IS_BYTES)
-		dst |= DN_QSIZE_BYTES;
-	if (src & DNOLD_NOERROR)
-		dst |= DN_NOERROR;
-	if (src & DNOLD_IS_RED)
-		dst |= DN_IS_RED;
-	if (src & DNOLD_IS_GENTLE_RED)
-		dst |= DN_IS_GENTLE_RED;
-	if (src & DNOLD_HAS_PROFILE)
-		dst |= DN_HAS_PROFILE;
-
-	return dst;
-}
-
-static int
-convertflags2old(int src)
-{
-	int dst = 0;
-
-	if (src & DN_HAVE_MASK)
-		dst |= DNOLD_HAVE_FLOW_MASK;
-	if (src & DN_IS_RED)
-		dst |= DNOLD_IS_RED;
-	if (src & DN_IS_GENTLE_RED)
-		dst |= DNOLD_IS_GENTLE_RED;
-	if (src & DN_NOERROR)
-		dst |= DNOLD_NOERROR;
-	if (src & DN_HAS_PROFILE)
-		dst |= DNOLD_HAS_PROFILE;
-	if (src & DN_QSIZE_BYTES)
-		dst |= DNOLD_QSIZE_IS_BYTES;
-
-	return dst;
-}
-
-static int
-dn_compat_del(void *v)
-{
-	struct dn_pipe7 *p = (struct dn_pipe7 *) v;
-	struct dn_pipe8 *p8 = (struct dn_pipe8 *) v;
-	struct {
-		struct dn_id oid;
-		uintptr_t a[1];	/* add more if we want a list */
-	} cmd;
-
-	/* XXX DN_API_VERSION ??? */
-	oid_fill((void *)&cmd, sizeof(cmd), DN_CMD_DELETE, DN_API_VERSION);
-
-	if (is7) {
-		if (p->pipe_nr == 0 && p->fs.fs_nr == 0)
-			return EINVAL;
-		if (p->pipe_nr != 0 && p->fs.fs_nr != 0)
-			return EINVAL;
-	} else {
-		if (p8->pipe_nr == 0 && p8->fs.fs_nr == 0)
-			return EINVAL;
-		if (p8->pipe_nr != 0 && p8->fs.fs_nr != 0)
-			return EINVAL;
-	}
-
-	if (p->pipe_nr != 0) { /* pipe x delete */
-		cmd.a[0] = p->pipe_nr;
-		cmd.oid.subtype = DN_LINK;
-	} else { /* queue x delete */
-		cmd.oid.subtype = DN_FS;
-		cmd.a[0] = (is7) ? p->fs.fs_nr : p8->fs.fs_nr;
-	}
-
-	return do_config(&cmd, cmd.oid.len);
-}
-
-static int
-dn_compat_config_queue(struct dn_fs *fs, void* v)
-{
-	struct dn_pipe7 *p7 = (struct dn_pipe7 *)v;
-	struct dn_pipe8 *p8 = (struct dn_pipe8 *)v;
-	struct dn_flow_set *f;
-
-	if (is7)
-		f = &p7->fs;
-	else
-		f = &p8->fs;
-
-	fs->fs_nr = f->fs_nr;
-	fs->sched_nr = f->parent_nr;
-	fs->flow_mask = f->flow_mask;
-	fs->buckets = f->rq_size;
-	fs->qsize = f->qsize;
-	fs->plr = f->plr;
-	fs->par[0] = f->weight;
-	fs->flags = convertflags2new(f->flags_fs);
-	if (fs->flags & DN_IS_GENTLE_RED || fs->flags & DN_IS_RED) {
-		fs->w_q = f->w_q;
-		fs->max_th = f->max_th;
-		fs->min_th = f->min_th;
-		fs->max_p = f->max_p;
-	}
-
-	return 0;
-}
-
-static int
-dn_compat_config_pipe(struct dn_sch *sch, struct dn_link *p, 
-		      struct dn_fs *fs, void* v)
-{
-	struct dn_pipe7 *p7 = (struct dn_pipe7 *)v;
-	struct dn_pipe8 *p8 = (struct dn_pipe8 *)v;
-	int i = p7->pipe_nr;
-
-	sch->sched_nr = i;
-	sch->oid.subtype = 0;
-	p->link_nr = i;
-	fs->fs_nr = i + 2*DN_MAX_ID;
-	fs->sched_nr = i + DN_MAX_ID;
-
-	/* Common to 7 and 8 */
-	p->bandwidth = p7->bandwidth;
-	p->delay = p7->delay;
-	if (!is7) {
-		/* FreeBSD 8 has burst  */
-		p->burst = p8->burst;
-	}
-
-	/* fill the fifo flowset */
-	dn_compat_config_queue(fs, v);
-	fs->fs_nr = i + 2*DN_MAX_ID;
-	fs->sched_nr = i + DN_MAX_ID;
-
-	/* Move scheduler related parameter from fs to sch */
-	sch->buckets = fs->buckets; /*XXX*/
-	fs->buckets = 0;
-	if (fs->flags & DN_HAVE_MASK) {
-		sch->flags |= DN_HAVE_MASK;
-		fs->flags &= ~DN_HAVE_MASK;
-		sch->sched_mask = fs->flow_mask;
-		bzero(&fs->flow_mask, sizeof(struct ipfw_flow_id));
-	}
-
-	return 0;
-}
-
-static int
-dn_compat_config_profile(struct dn_profile *pf, struct dn_link *p,
-			 void *v)
-{
-	struct dn_pipe8 *p8 = (struct dn_pipe8 *)v;
-
-	p8->samples = &(((struct dn_pipe_max8 *)p8)->samples[0]);
-	
-	pf->link_nr = p->link_nr;
-	pf->loss_level = p8->loss_level;
-// 	pf->bandwidth = p->bandwidth; //XXX bandwidth redundant?
-	pf->samples_no = p8->samples_no;
-	strncpy(pf->name, p8->name,sizeof(pf->name));
-	bcopy(p8->samples, pf->samples, sizeof(pf->samples));
-
-	return 0;
-}
-
-/*
- * If p->pipe_nr != 0 the command is 'pipe x config', so need to create
- * the three main struct, else only a flowset is created
- */
-static int
-dn_compat_configure(void *v)
-{
-	struct dn_id *buf = NULL, *base;
-	struct dn_sch *sch = NULL;
-	struct dn_link *p = NULL;
-	struct dn_fs *fs = NULL;
-	struct dn_profile *pf = NULL;
-	int lmax;
-	int error;
-
-	struct dn_pipe7 *p7 = (struct dn_pipe7 *)v;
-	struct dn_pipe8 *p8 = (struct dn_pipe8 *)v;
-
-	int i; /* number of object to configure */
-
-	lmax = sizeof(struct dn_id);	/* command header */
-	lmax += sizeof(struct dn_sch) + sizeof(struct dn_link) +
-		sizeof(struct dn_fs) + sizeof(struct dn_profile);
-
-	base = buf = malloc(lmax, M_DUMMYNET, M_WAIT|M_ZERO);
-	o_next(&buf, sizeof(struct dn_id), DN_CMD_CONFIG);
-	base->id = DN_API_VERSION;
-
-	/* pipe_nr is the same in p7 and p8 */
-	i = p7->pipe_nr;
-	if (i != 0) { /* pipe config */
-		sch = o_next(&buf, sizeof(*sch), DN_SCH);
-		p = o_next(&buf, sizeof(*p), DN_LINK);
-		fs = o_next(&buf, sizeof(*fs), DN_FS);
-
-		error = dn_compat_config_pipe(sch, p, fs, v);
-		if (error) {
-			free(buf, M_DUMMYNET);
-			return error;
-		}
-		if (!is7 && p8->samples_no > 0) {
-			/* Add profiles*/
-			pf = o_next(&buf, sizeof(*pf), DN_PROFILE);
-			error = dn_compat_config_profile(pf, p, v);
-			if (error) {
-				free(buf, M_DUMMYNET);
-				return error;
-			}
-		}
-	} else { /* queue config */
-		fs = o_next(&buf, sizeof(*fs), DN_FS);
-		error = dn_compat_config_queue(fs, v);
-		if (error) {
-			free(buf, M_DUMMYNET);
-			return error;
-		}
-	}
-	error = do_config(base, (char *)buf - (char *)base);
-
-	if (buf)
-		free(buf, M_DUMMYNET);
-	return error;
-}
-
-int
-dn_compat_calc_size(void)
-{
-	int need = 0;
-	/* XXX use FreeBSD 8 struct size */
-	/* NOTE:
-	 * - half scheduler: 		schk_count/2
-	 * - all flowset:		fsk_count
-	 * - all flowset queues:	queue_count
-	 * - all pipe queue:		si_count
-	 */
-	need += dn_cfg.schk_count * sizeof(struct dn_pipe8) / 2;
-	need += dn_cfg.fsk_count * sizeof(struct dn_flow_set);
-	need += dn_cfg.si_count * sizeof(struct dn_flow_queue8);
-	need += dn_cfg.queue_count * sizeof(struct dn_flow_queue8);
-
-	return need;
-}
-
-int
-dn_c_copy_q (void *_ni, void *arg)
-{
-	struct copy_args *a = arg;
-	struct dn_flow_queue7 *fq7 = (struct dn_flow_queue7 *)*a->start;
-	struct dn_flow_queue8 *fq8 = (struct dn_flow_queue8 *)*a->start;
-	struct dn_flow *ni = (struct dn_flow *)_ni;
-	int size = 0;
-
-	/* XXX hash slot not set */
-	/* No difference between 7.2/8 */
-	fq7->len = ni->length;
-	fq7->len_bytes = ni->len_bytes;
-	fq7->id = ni->fid;
-
-	if (is7) {
-		size = sizeof(struct dn_flow_queue7);
-		fq7->tot_pkts = ni->tot_pkts;
-		fq7->tot_bytes = ni->tot_bytes;
-		fq7->drops = ni->drops;
-	} else {
-		size = sizeof(struct dn_flow_queue8);
-		fq8->tot_pkts = ni->tot_pkts;
-		fq8->tot_bytes = ni->tot_bytes;
-		fq8->drops = ni->drops;
-	}
-
-	*a->start += size;
-	return 0;
-}
-
-int
-dn_c_copy_pipe(struct dn_schk *s, struct copy_args *a, int nq)
-{
-	struct dn_link *l = &s->link;
-	struct dn_fsk *f = s->fs;
-
-	struct dn_pipe7 *pipe7 = (struct dn_pipe7 *)*a->start;
-	struct dn_pipe8 *pipe8 = (struct dn_pipe8 *)*a->start;
-	struct dn_flow_set *fs;
-	int size = 0;
-
-	if (is7) {
-		fs = &pipe7->fs;
-		size = sizeof(struct dn_pipe7);
-	} else {
-		fs = &pipe8->fs;
-		size = sizeof(struct dn_pipe8);
-	}
-
-	/* These 4 field are the same in pipe7 and pipe8 */
-	pipe7->next.sle_next = (struct dn_pipe7 *)DN_IS_PIPE;
-	pipe7->bandwidth = l->bandwidth;
-	pipe7->delay = l->delay * 1000 / hz;
-	pipe7->pipe_nr = l->link_nr - DN_MAX_ID;
-
-	if (!is7) {
-		if (s->profile) {
-			struct dn_profile *pf = s->profile;
-			strncpy(pipe8->name, pf->name, sizeof(pf->name));
-			pipe8->loss_level = pf->loss_level;
-			pipe8->samples_no = pf->samples_no;
-		}
-		pipe8->burst = div64(l->burst , 8 * hz);
-	}
-
-	fs->flow_mask = s->sch.sched_mask;
-	fs->rq_size = s->sch.buckets ? s->sch.buckets : 1;
-
-	fs->parent_nr = l->link_nr - DN_MAX_ID;
-	fs->qsize = f->fs.qsize;
-	fs->plr = f->fs.plr;
-	fs->w_q = f->fs.w_q;
-	fs->max_th = f->max_th;
-	fs->min_th = f->min_th;
-	fs->max_p = f->fs.max_p;
-	fs->rq_elements = nq;
-
-	fs->flags_fs = convertflags2old(f->fs.flags);
-
-	*a->start += size;
-	return 0;
-}
-
-
-int
-dn_compat_copy_pipe(struct copy_args *a, void *_o)
-{
-	int have = a->end - *a->start;
-	int need = 0;
-	int pipe_size = sizeof(struct dn_pipe8);
-	int queue_size = sizeof(struct dn_flow_queue8);
-	int n_queue = 0; /* number of queues */
-
-	struct dn_schk *s = (struct dn_schk *)_o;
-	/* calculate needed space:
-	 * - struct dn_pipe
-	 * - if there are instances, dn_queue * n_instances
-	 */
-	n_queue = (s->sch.flags & DN_HAVE_MASK ? dn_ht_entries(s->siht) :
-						(s->siht ? 1 : 0));
-	need = pipe_size + queue_size * n_queue;
-	if (have < need) {
-		D("have %d < need %d", have, need);
-		return 1;
-	}
-	/* copy pipe */
-	dn_c_copy_pipe(s, a, n_queue);
-
-	/* copy queues */
-	if (s->sch.flags & DN_HAVE_MASK)
-		dn_ht_scan(s->siht, dn_c_copy_q, a);
-	else if (s->siht)
-		dn_c_copy_q(s->siht, a);
-	return 0;
-}
-
-int
-dn_c_copy_fs(struct dn_fsk *f, struct copy_args *a, int nq)
-{
-	struct dn_flow_set *fs = (struct dn_flow_set *)*a->start;
-
-	fs->next.sle_next = (struct dn_flow_set *)DN_IS_QUEUE;
-	fs->fs_nr = f->fs.fs_nr;
-	fs->qsize = f->fs.qsize;
-	fs->plr = f->fs.plr;
-	fs->w_q = f->fs.w_q;
-	fs->max_th = f->max_th;
-	fs->min_th = f->min_th;
-	fs->max_p = f->fs.max_p;
-	fs->flow_mask = f->fs.flow_mask;
-	fs->rq_elements = nq;
-	fs->rq_size = (f->fs.buckets ? f->fs.buckets : 1);
-	fs->parent_nr = f->fs.sched_nr;
-	fs->weight = f->fs.par[0];
-
-	fs->flags_fs = convertflags2old(f->fs.flags);
-	*a->start += sizeof(struct dn_flow_set);
-	return 0;
-}
-
-int
-dn_compat_copy_queue(struct copy_args *a, void *_o)
-{
-	int have = a->end - *a->start;
-	int need = 0;
-	int fs_size = sizeof(struct dn_flow_set);
-	int queue_size = sizeof(struct dn_flow_queue8);
-
-	struct dn_fsk *fs = (struct dn_fsk *)_o;
-	int n_queue = 0; /* number of queues */
-
-	n_queue = (fs->fs.flags & DN_HAVE_MASK ? dn_ht_entries(fs->qht) :
-						(fs->qht ? 1 : 0));
-
-	need = fs_size + queue_size * n_queue;
-	if (have < need) {
-		D("have < need");
-		return 1;
-	}
-
-	/* copy flowset */
-	dn_c_copy_fs(fs, a, n_queue);
-
-	/* copy queues */
-	if (fs->fs.flags & DN_HAVE_MASK)
-		dn_ht_scan(fs->qht, dn_c_copy_q, a);
-	else if (fs->qht)
-		dn_c_copy_q(fs->qht, a);
-
-	return 0;
-}
-
-int
-copy_data_helper_compat(void *_o, void *_arg)
-{
-	struct copy_args *a = _arg;
-
-	if (a->type == DN_COMPAT_PIPE) {
-		struct dn_schk *s = _o;
-		if (s->sch.oid.subtype != 1 || s->sch.sched_nr <= DN_MAX_ID) {
-			return 0;	/* not old type */
-		}
-		/* copy pipe parameters, and if instance exists, copy
-		 * other parameters and eventually queues.
-		 */
-		if(dn_compat_copy_pipe(a, _o))
-			return DNHT_SCAN_END;
-	} else if (a->type == DN_COMPAT_QUEUE) {
-		struct dn_fsk *fs = _o;
-		if (fs->fs.fs_nr >= DN_MAX_ID)
-			return 0;
-		if (dn_compat_copy_queue(a, _o))
-			return DNHT_SCAN_END;
-	}
-	return 0;
-}
-
-/* Main function to manage old requests */
-int
-ip_dummynet_compat(struct sockopt *sopt)
-{
-	int error=0;
-	void *v = NULL;
-	struct dn_id oid;
-
-	/* Lenght of data, used to found ipfw version... */
-	int len = sopt->sopt_valsize;
-
-	/* len can be 0 if command was dummynet_flush */
-	if (len == pipesize7) {
-		D("setting compatibility with FreeBSD 7.2");
-		is7 = 1;
-	}
-	else if (len == pipesize8 || len == pipesizemax8) {
-		D("setting compatibility with FreeBSD 8");
-		is7 = 0;
-	}
-
-	switch (sopt->sopt_name) {
-	default:
-		printf("dummynet: -- unknown option %d", sopt->sopt_name);
-		error = EINVAL;
-		break;
-
-	case IP_DUMMYNET_FLUSH:
-		oid_fill(&oid, sizeof(oid), DN_CMD_FLUSH, DN_API_VERSION);
-		do_config(&oid, oid.len);
-		break;
-
-	case IP_DUMMYNET_DEL:
-		v = malloc(len, M_TEMP, M_WAITOK);
-		error = sooptcopyin(sopt, v, len, len);
-		if (error)
-			break;
-		error = dn_compat_del(v);
-		free(v, M_TEMP);
-		break;
-
-	case IP_DUMMYNET_CONFIGURE:
-		v = malloc(len, M_TEMP, M_WAITOK);
-		error = sooptcopyin(sopt, v, len, len);
-		if (error)
-			break;
-		error = dn_compat_configure(v);
-		free(v, M_TEMP);
-		break;
-
-	case IP_DUMMYNET_GET: {
-		void *buf;
-		int ret;
-		int original_size = sopt->sopt_valsize;
-		int size;
-
-		ret = dummynet_get(sopt, &buf);
-		if (ret)
-			return 0;//XXX ?
-		size = sopt->sopt_valsize;
-		sopt->sopt_valsize = original_size;
-		D("size=%d, buf=%p", size, buf);
-		ret = sooptcopyout(sopt, buf, size);
-		if (ret)
-			printf("  %s ERROR sooptcopyout\n", __FUNCTION__);
-		if (buf)
-			free(buf, M_DUMMYNET);
-	    }
-	}
-
-	return error;
-}
-
-
diff --git a/freebsd/sys/netpfil/ipfw/ip_dn_io.c b/freebsd/sys/netpfil/ipfw/ip_dn_io.c
deleted file mode 100644
index 23392a55..00000000
--- a/freebsd/sys/netpfil/ipfw/ip_dn_io.c
+++ /dev/null
@@ -1,852 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/*-
- * Copyright (c) 2010 Luigi Rizzo, Riccardo Panicucci, Universita` di Pisa
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * Dummynet portions related to packet handling.
- */
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <rtems/bsd/local/opt_inet6.h>
-
-#include <rtems/bsd/sys/param.h>
-#include <sys/systm.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/kernel.h>
-#include <rtems/bsd/sys/lock.h>
-#include <sys/module.h>
-#include <sys/priv.h>
-#include <sys/proc.h>
-#include <sys/rwlock.h>
-#include <sys/socket.h>
-#include <sys/time.h>
-#include <sys/sysctl.h>
-
-#include <net/if.h>	/* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
-#include <net/netisr.h>
-#include <net/vnet.h>
-
-#include <netinet/in.h>
-#include <netinet/ip.h>		/* ip_len, ip_off */
-#include <netinet/ip_var.h>	/* ip_output(), IP_FORWARDING */
-#include <netinet/ip_fw.h>
-#include <netinet/ip_dummynet.h>
-#include <netinet/if_ether.h> /* various ether_* routines */
-#include <netinet/ip6.h>       /* for ip6_input, ip6_output prototypes */
-#include <netinet6/ip6_var.h>
-
-#include <netpfil/ipfw/ip_fw_private.h>
-#include <netpfil/ipfw/dn_heap.h>
-#include <netpfil/ipfw/ip_dn_private.h>
-#include <netpfil/ipfw/dn_sched.h>
-
-/*
- * We keep a private variable for the simulation time, but we could
- * probably use an existing one ("softticks" in sys/kern/kern_timeout.c)
- * instead of dn_cfg.curr_time
- */
-
-struct dn_parms dn_cfg;
-//VNET_DEFINE(struct dn_parms, _base_dn_cfg);
-
-static long tick_last;		/* Last tick duration (usec). */
-static long tick_delta;		/* Last vs standard tick diff (usec). */
-static long tick_delta_sum;	/* Accumulated tick difference (usec).*/
-static long tick_adjustment;	/* Tick adjustments done. */
-static long tick_lost;		/* Lost(coalesced) ticks number. */
-/* Adjusted vs non-adjusted curr_time difference (ticks). */
-static long tick_diff;
-
-static unsigned long	io_pkt;
-static unsigned long	io_pkt_fast;
-static unsigned long	io_pkt_drop;
-
-/*
- * We use a heap to store entities for which we have pending timer events.
- * The heap is checked at every tick and all entities with expired events
- * are extracted.
- */
-  
-MALLOC_DEFINE(M_DUMMYNET, "dummynet", "dummynet heap");
-
-extern	void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
-
-#ifdef SYSCTL_NODE
-
-SYSBEGIN(f4)
-
-SYSCTL_DECL(_net_inet);
-SYSCTL_DECL(_net_inet_ip);
-static SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet");
-
-/* wrapper to pass dn_cfg fields to SYSCTL_* */
-//#define DC(x)	(&(VNET_NAME(_base_dn_cfg).x))
-#define DC(x)	(&(dn_cfg.x))
-/* parameters */
-
-static int
-sysctl_hash_size(SYSCTL_HANDLER_ARGS)
-{
-	int error, value;
-
-	value = dn_cfg.hash_size;
-	error = sysctl_handle_int(oidp, &value, 0, req);
-	if (error != 0 || req->newptr == NULL)
-		return (error);
-	if (value < 16 || value > 65536)
-		return (EINVAL);
-	dn_cfg.hash_size = value;
-	return (0);
-}
-
-SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, hash_size,
-    CTLTYPE_INT | CTLFLAG_RW, 0, 0, sysctl_hash_size,
-    "I", "Default hash table size");
-
-static int
-sysctl_limits(SYSCTL_HANDLER_ARGS)
-{
-	int error;
-	long value;
-
-	if (arg2 != 0)
-		value = dn_cfg.slot_limit;
-	else
-		value = dn_cfg.byte_limit;
-	error = sysctl_handle_long(oidp, &value, 0, req);
-
-	if (error != 0 || req->newptr == NULL)
-		return (error);
-	if (arg2 != 0) {
-		if (value < 1)
-			return (EINVAL);
-		dn_cfg.slot_limit = value;
-	} else {
-		if (value < 1500)
-			return (EINVAL);
-		dn_cfg.byte_limit = value;
-	}
-	return (0);
-}
-
-SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, pipe_slot_limit,
-    CTLTYPE_LONG | CTLFLAG_RW, 0, 1, sysctl_limits,
-    "L", "Upper limit in slots for pipe queue.");
-SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, pipe_byte_limit,
-    CTLTYPE_LONG | CTLFLAG_RW, 0, 0, sysctl_limits,
-    "L", "Upper limit in bytes for pipe queue.");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, io_fast,
-    CTLFLAG_RW, DC(io_fast), 0, "Enable fast dummynet io.");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, debug,
-    CTLFLAG_RW, DC(debug), 0, "Dummynet debug level");
-
-/* RED parameters */
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth,
-    CTLFLAG_RD, DC(red_lookup_depth), 0, "Depth of RED lookup table");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_avg_pkt_size,
-    CTLFLAG_RD, DC(red_avg_pkt_size), 0, "RED Medium packet size");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size,
-    CTLFLAG_RD, DC(red_max_pkt_size), 0, "RED Max packet size");
-
-/* time adjustment */
-SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta,
-    CTLFLAG_RD, &tick_delta, 0, "Last vs standard tick difference (usec).");
-SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta_sum,
-    CTLFLAG_RD, &tick_delta_sum, 0, "Accumulated tick difference (usec).");
-SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_adjustment,
-    CTLFLAG_RD, &tick_adjustment, 0, "Tick adjustments done.");
-SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_diff,
-    CTLFLAG_RD, &tick_diff, 0,
-    "Adjusted vs non-adjusted curr_time difference (ticks).");
-SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_lost,
-    CTLFLAG_RD, &tick_lost, 0,
-    "Number of ticks coalesced by dummynet taskqueue.");
-
-/* Drain parameters */
-SYSCTL_UINT(_net_inet_ip_dummynet, OID_AUTO, expire,
-    CTLFLAG_RW, DC(expire), 0, "Expire empty queues/pipes");
-SYSCTL_UINT(_net_inet_ip_dummynet, OID_AUTO, expire_cycle,
-    CTLFLAG_RD, DC(expire_cycle), 0, "Expire cycle for queues/pipes");
-
-/* statistics */
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, schk_count,
-    CTLFLAG_RD, DC(schk_count), 0, "Number of schedulers");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, si_count,
-    CTLFLAG_RD, DC(si_count), 0, "Number of scheduler instances");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, fsk_count,
-    CTLFLAG_RD, DC(fsk_count), 0, "Number of flowsets");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, queue_count,
-    CTLFLAG_RD, DC(queue_count), 0, "Number of queues");
-SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt,
-    CTLFLAG_RD, &io_pkt, 0,
-    "Number of packets passed to dummynet.");
-SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_fast,
-    CTLFLAG_RD, &io_pkt_fast, 0,
-    "Number of packets bypassed dummynet scheduler.");
-SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_drop,
-    CTLFLAG_RD, &io_pkt_drop, 0,
-    "Number of packets dropped by dummynet.");
-#undef DC
-SYSEND
-
-#endif
-
-static void	dummynet_send(struct mbuf *);
-
-/*
- * Packets processed by dummynet have an mbuf tag associated with
- * them that carries their dummynet state.
- * Outside dummynet, only the 'rule' field is relevant, and it must
- * be at the beginning of the structure.
- */
-struct dn_pkt_tag {
-	struct ipfw_rule_ref rule;	/* matching rule	*/
-
-	/* second part, dummynet specific */
-	int dn_dir;		/* action when packet comes out.*/
-				/* see ip_fw_private.h		*/
-	uint64_t output_time;	/* when the pkt is due for delivery*/
-	struct ifnet *ifp;	/* interface, for ip_output	*/
-	struct _ip6dn_args ip6opt;	/* XXX ipv6 options	*/
-};
-
-/*
- * Return the mbuf tag holding the dummynet state (it should
- * be the first one on the list).
- */
-static struct dn_pkt_tag *
-dn_tag_get(struct mbuf *m)
-{
-	struct m_tag *mtag = m_tag_first(m);
-	KASSERT(mtag != NULL &&
-	    mtag->m_tag_cookie == MTAG_ABI_COMPAT &&
-	    mtag->m_tag_id == PACKET_TAG_DUMMYNET,
-	    ("packet on dummynet queue w/o dummynet tag!"));
-	return (struct dn_pkt_tag *)(mtag+1);
-}
-
-static inline void
-mq_append(struct mq *q, struct mbuf *m)
-{
-	if (q->head == NULL)
-		q->head = m;
-	else
-		q->tail->m_nextpkt = m;
-	q->tail = m;
-	m->m_nextpkt = NULL;
-}
-
-/*
- * Dispose a list of packet. Use a functions so if we need to do
- * more work, this is a central point to do it.
- */
-void dn_free_pkts(struct mbuf *mnext)
-{
-        struct mbuf *m;
-    
-        while ((m = mnext) != NULL) {
-                mnext = m->m_nextpkt;
-                FREE_PKT(m);
-        }
-}
-
-static int
-red_drops (struct dn_queue *q, int len)
-{
-	/*
-	 * RED algorithm
-	 *
-	 * RED calculates the average queue size (avg) using a low-pass filter
-	 * with an exponential weighted (w_q) moving average:
-	 * 	avg  <-  (1-w_q) * avg + w_q * q_size
-	 * where q_size is the queue length (measured in bytes or * packets).
-	 *
-	 * If q_size == 0, we compute the idle time for the link, and set
-	 *	avg = (1 - w_q)^(idle/s)
-	 * where s is the time needed for transmitting a medium-sized packet.
-	 *
-	 * Now, if avg < min_th the packet is enqueued.
-	 * If avg > max_th the packet is dropped. Otherwise, the packet is
-	 * dropped with probability P function of avg.
-	 */
-
-	struct dn_fsk *fs = q->fs;
-	int64_t p_b = 0;
-
-	/* Queue in bytes or packets? */
-	uint32_t q_size = (fs->fs.flags & DN_QSIZE_BYTES) ?
-	    q->ni.len_bytes : q->ni.length;
-
-	/* Average queue size estimation. */
-	if (q_size != 0) {
-		/* Queue is not empty, avg <- avg + (q_size - avg) * w_q */
-		int diff = SCALE(q_size) - q->avg;
-		int64_t v = SCALE_MUL((int64_t)diff, (int64_t)fs->w_q);
-
-		q->avg += (int)v;
-	} else {
-		/*
-		 * Queue is empty, find for how long the queue has been
-		 * empty and use a lookup table for computing
-		 * (1 - * w_q)^(idle_time/s) where s is the time to send a
-		 * (small) packet.
-		 * XXX check wraps...
-		 */
-		if (q->avg) {
-			u_int t = div64((dn_cfg.curr_time - q->q_time), fs->lookup_step);
-
-			q->avg = (t < fs->lookup_depth) ?
-			    SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0;
-		}
-	}
-
-	/* Should i drop? */
-	if (q->avg < fs->min_th) {
-		q->count = -1;
-		return (0);	/* accept packet */
-	}
-	if (q->avg >= fs->max_th) {	/* average queue >=  max threshold */
-		if (fs->fs.flags & DN_IS_GENTLE_RED) {
-			/*
-			 * According to Gentle-RED, if avg is greater than
-			 * max_th the packet is dropped with a probability
-			 *	 p_b = c_3 * avg - c_4
-			 * where c_3 = (1 - max_p) / max_th
-			 *       c_4 = 1 - 2 * max_p
-			 */
-			p_b = SCALE_MUL((int64_t)fs->c_3, (int64_t)q->avg) -
-			    fs->c_4;
-		} else {
-			q->count = -1;
-			return (1);
-		}
-	} else if (q->avg > fs->min_th) {
-		/*
-		 * We compute p_b using the linear dropping function
-		 *	 p_b = c_1 * avg - c_2
-		 * where c_1 = max_p / (max_th - min_th)
-		 * 	 c_2 = max_p * min_th / (max_th - min_th)
-		 */
-		p_b = SCALE_MUL((int64_t)fs->c_1, (int64_t)q->avg) - fs->c_2;
-	}
-
-	if (fs->fs.flags & DN_QSIZE_BYTES)
-		p_b = div64((p_b * len) , fs->max_pkt_size);
-	if (++q->count == 0)
-		q->random = random() & 0xffff;
-	else {
-		/*
-		 * q->count counts packets arrived since last drop, so a greater
-		 * value of q->count means a greater packet drop probability.
-		 */
-		if (SCALE_MUL(p_b, SCALE((int64_t)q->count)) > q->random) {
-			q->count = 0;
-			/* After a drop we calculate a new random value. */
-			q->random = random() & 0xffff;
-			return (1);	/* drop */
-		}
-	}
-	/* End of RED algorithm. */
-
-	return (0);	/* accept */
-
-}
-
-/*
- * Enqueue a packet in q, subject to space and queue management policy
- * (whose parameters are in q->fs).
- * Update stats for the queue and the scheduler.
- * Return 0 on success, 1 on drop. The packet is consumed anyways.
- */
-int
-dn_enqueue(struct dn_queue *q, struct mbuf* m, int drop)
-{   
-	struct dn_fs *f;
-	struct dn_flow *ni;	/* stats for scheduler instance */
-	uint64_t len;
-
-	if (q->fs == NULL || q->_si == NULL) {
-		printf("%s fs %p si %p, dropping\n",
-			__FUNCTION__, q->fs, q->_si);
-		FREE_PKT(m);
-		return 1;
-	}
-	f = &(q->fs->fs);
-	ni = &q->_si->ni;
-	len = m->m_pkthdr.len;
-	/* Update statistics, then check reasons to drop pkt. */
-	q->ni.tot_bytes += len;
-	q->ni.tot_pkts++;
-	ni->tot_bytes += len;
-	ni->tot_pkts++;
-	if (drop)
-		goto drop;
-	if (f->plr && random() < f->plr)
-		goto drop;
-	if (f->flags & DN_IS_RED && red_drops(q, m->m_pkthdr.len))
-		goto drop;
-	if (f->flags & DN_QSIZE_BYTES) {
-		if (q->ni.len_bytes > f->qsize)
-			goto drop;
-	} else if (q->ni.length >= f->qsize) {
-		goto drop;
-	}
-	mq_append(&q->mq, m);
-	q->ni.length++;
-	q->ni.len_bytes += len;
-	ni->length++;
-	ni->len_bytes += len;
-	return 0;
-
-drop:
-	io_pkt_drop++;
-	q->ni.drops++;
-	ni->drops++;
-	FREE_PKT(m);
-	return 1;
-}
-
-/*
- * Fetch packets from the delay line which are due now. If there are
- * leftover packets, reinsert the delay line in the heap.
- * Runs under scheduler lock.
- */
-static void
-transmit_event(struct mq *q, struct delay_line *dline, uint64_t now)
-{
-	struct mbuf *m;
-	struct dn_pkt_tag *pkt = NULL;
-
-	dline->oid.subtype = 0; /* not in heap */
-	while ((m = dline->mq.head) != NULL) {
-		pkt = dn_tag_get(m);
-		if (!DN_KEY_LEQ(pkt->output_time, now))
-			break;
-		dline->mq.head = m->m_nextpkt;
-		mq_append(q, m);
-	}
-	if (m != NULL) {
-		dline->oid.subtype = 1; /* in heap */
-		heap_insert(&dn_cfg.evheap, pkt->output_time, dline);
-	}
-}
-
-/*
- * Convert the additional MAC overheads/delays into an equivalent
- * number of bits for the given data rate. The samples are
- * in milliseconds so we need to divide by 1000.
- */
-static uint64_t
-extra_bits(struct mbuf *m, struct dn_schk *s)
-{
-	int index;
-	uint64_t bits;
-	struct dn_profile *pf = s->profile;
-
-	if (!pf || pf->samples_no == 0)
-		return 0;
-	index  = random() % pf->samples_no;
-	bits = div64((uint64_t)pf->samples[index] * s->link.bandwidth, 1000);
-	if (index >= pf->loss_level) {
-		struct dn_pkt_tag *dt = dn_tag_get(m);
-		if (dt)
-			dt->dn_dir = DIR_DROP;
-	}
-	return bits;
-}
-
-/*
- * Send traffic from a scheduler instance due by 'now'.
- * Return a pointer to the head of the queue.
- */
-static struct mbuf *
-serve_sched(struct mq *q, struct dn_sch_inst *si, uint64_t now)
-{
-	struct mq def_q;
-	struct dn_schk *s = si->sched;
-	struct mbuf *m = NULL;
-	int delay_line_idle = (si->dline.mq.head == NULL);
-	int done, bw;
-
-	if (q == NULL) {
-		q = &def_q;
-		q->head = NULL;
-	}
-
-	bw = s->link.bandwidth;
-	si->kflags &= ~DN_ACTIVE;
-
-	if (bw > 0)
-		si->credit += (now - si->sched_time) * bw;
-	else
-		si->credit = 0;
-	si->sched_time = now;
-	done = 0;
-	while (si->credit >= 0 && (m = s->fp->dequeue(si)) != NULL) {
-		uint64_t len_scaled;
-
-		done++;
-		len_scaled = (bw == 0) ? 0 : hz *
-			(m->m_pkthdr.len * 8 + extra_bits(m, s));
-		si->credit -= len_scaled;
-		/* Move packet in the delay line */
-		dn_tag_get(m)->output_time = dn_cfg.curr_time + s->link.delay ;
-		mq_append(&si->dline.mq, m);
-	}
-
-	/*
-	 * If credit >= 0 the instance is idle, mark time.
-	 * Otherwise put back in the heap, and adjust the output
-	 * time of the last inserted packet, m, which was too early.
-	 */
-	if (si->credit >= 0) {
-		si->idle_time = now;
-	} else {
-		uint64_t t;
-		KASSERT (bw > 0, ("bw=0 and credit<0 ?"));
-		t = div64(bw - 1 - si->credit, bw);
-		if (m)
-			dn_tag_get(m)->output_time += t;
-		si->kflags |= DN_ACTIVE;
-		heap_insert(&dn_cfg.evheap, now + t, si);
-	}
-	if (delay_line_idle && done)
-		transmit_event(q, &si->dline, now);
-	return q->head;
-}
-
-/*
- * The timer handler for dummynet. Time is computed in ticks, but
- * but the code is tolerant to the actual rate at which this is called.
- * Once complete, the function reschedules itself for the next tick.
- */
-void
-dummynet_task(void *context, int pending)
-{
-	struct timeval t;
-	struct mq q = { NULL, NULL }; /* queue to accumulate results */
-
-	CURVNET_SET((struct vnet *)context);
-
-	DN_BH_WLOCK();
-
-	/* Update number of lost(coalesced) ticks. */
-	tick_lost += pending - 1;
-
-	getmicrouptime(&t);
-	/* Last tick duration (usec). */
-	tick_last = (t.tv_sec - dn_cfg.prev_t.tv_sec) * 1000000 +
-	(t.tv_usec - dn_cfg.prev_t.tv_usec);
-	/* Last tick vs standard tick difference (usec). */
-	tick_delta = (tick_last * hz - 1000000) / hz;
-	/* Accumulated tick difference (usec). */
-	tick_delta_sum += tick_delta;
-
-	dn_cfg.prev_t = t;
-
-	/*
-	* Adjust curr_time if the accumulated tick difference is
-	* greater than the 'standard' tick. Since curr_time should
-	* be monotonically increasing, we do positive adjustments
-	* as required, and throttle curr_time in case of negative
-	* adjustment.
-	*/
-	dn_cfg.curr_time++;
-	if (tick_delta_sum - tick >= 0) {
-		int diff = tick_delta_sum / tick;
-
-		dn_cfg.curr_time += diff;
-		tick_diff += diff;
-		tick_delta_sum %= tick;
-		tick_adjustment++;
-	} else if (tick_delta_sum + tick <= 0) {
-		dn_cfg.curr_time--;
-		tick_diff--;
-		tick_delta_sum += tick;
-		tick_adjustment++;
-	}
-
-	/* serve pending events, accumulate in q */
-	for (;;) {
-		struct dn_id *p;    /* generic parameter to handler */
-
-		if (dn_cfg.evheap.elements == 0 ||
-		    DN_KEY_LT(dn_cfg.curr_time, HEAP_TOP(&dn_cfg.evheap)->key))
-			break;
-		p = HEAP_TOP(&dn_cfg.evheap)->object;
-		heap_extract(&dn_cfg.evheap, NULL);
-
-		if (p->type == DN_SCH_I) {
-			serve_sched(&q, (struct dn_sch_inst *)p, dn_cfg.curr_time);
-		} else { /* extracted a delay line */
-			transmit_event(&q, (struct delay_line *)p, dn_cfg.curr_time);
-		}
-	}
-	if (dn_cfg.expire && ++dn_cfg.expire_cycle >= dn_cfg.expire) {
-		dn_cfg.expire_cycle = 0;
-		dn_drain_scheduler();
-		dn_drain_queue();
-	}
-
-	DN_BH_WUNLOCK();
-	dn_reschedule();
-	if (q.head != NULL)
-		dummynet_send(q.head);
-	CURVNET_RESTORE();
-}
-
-/*
- * forward a chain of packets to the proper destination.
- * This runs outside the dummynet lock.
- */
-static void
-dummynet_send(struct mbuf *m)
-{
-	struct mbuf *n;
-
-	for (; m != NULL; m = n) {
-		struct ifnet *ifp = NULL;	/* gcc 3.4.6 complains */
-        	struct m_tag *tag;
-		int dst;
-
-		n = m->m_nextpkt;
-		m->m_nextpkt = NULL;
-		tag = m_tag_first(m);
-		if (tag == NULL) { /* should not happen */
-			dst = DIR_DROP;
-		} else {
-			struct dn_pkt_tag *pkt = dn_tag_get(m);
-			/* extract the dummynet info, rename the tag
-			 * to carry reinject info.
-			 */
-			dst = pkt->dn_dir;
-			ifp = pkt->ifp;
-			tag->m_tag_cookie = MTAG_IPFW_RULE;
-			tag->m_tag_id = 0;
-		}
-
-		switch (dst) {
-		case DIR_OUT:
-			SET_HOST_IPLEN(mtod(m, struct ip *));
-			ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
-			break ;
-
-		case DIR_IN :
-			/* put header in network format for ip_input() */
-			//SET_NET_IPLEN(mtod(m, struct ip *));
-			netisr_dispatch(NETISR_IP, m);
-			break;
-
-#ifdef INET6
-		case DIR_IN | PROTO_IPV6:
-			netisr_dispatch(NETISR_IPV6, m);
-			break;
-
-		case DIR_OUT | PROTO_IPV6:
-			ip6_output(m, NULL, NULL, IPV6_FORWARDING, NULL, NULL, NULL);
-			break;
-#endif
-
-		case DIR_FWD | PROTO_IFB: /* DN_TO_IFB_FWD: */
-			if (bridge_dn_p != NULL)
-				((*bridge_dn_p)(m, ifp));
-			else
-				printf("dummynet: if_bridge not loaded\n");
-
-			break;
-
-		case DIR_IN | PROTO_LAYER2: /* DN_TO_ETH_DEMUX: */
-			/*
-			 * The Ethernet code assumes the Ethernet header is
-			 * contiguous in the first mbuf header.
-			 * Insure this is true.
-			 */
-			if (m->m_len < ETHER_HDR_LEN &&
-			    (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) {
-				printf("dummynet/ether: pullup failed, "
-				    "dropping packet\n");
-				break;
-			}
-			ether_demux(m->m_pkthdr.rcvif, m);
-			break;
-
-		case DIR_OUT | PROTO_LAYER2: /* N_TO_ETH_OUT: */
-			ether_output_frame(ifp, m);
-			break;
-
-		case DIR_DROP:
-			/* drop the packet after some time */
-			FREE_PKT(m);
-			break;
-
-		default:
-			printf("dummynet: bad switch %d!\n", dst);
-			FREE_PKT(m);
-			break;
-		}
-	}
-}
-
-static inline int
-tag_mbuf(struct mbuf *m, int dir, struct ip_fw_args *fwa)
-{
-	struct dn_pkt_tag *dt;
-	struct m_tag *mtag;
-
-	mtag = m_tag_get(PACKET_TAG_DUMMYNET,
-		    sizeof(*dt), M_NOWAIT | M_ZERO);
-	if (mtag == NULL)
-		return 1;		/* Cannot allocate packet header. */
-	m_tag_prepend(m, mtag);		/* Attach to mbuf chain. */
-	dt = (struct dn_pkt_tag *)(mtag + 1);
-	dt->rule = fwa->rule;
-	dt->rule.info &= IPFW_ONEPASS;	/* only keep this info */
-	dt->dn_dir = dir;
-	dt->ifp = fwa->oif;
-	/* dt->output tame is updated as we move through */
-	dt->output_time = dn_cfg.curr_time;
-	return 0;
-}
-
-
-/*
- * dummynet hook for packets.
- * We use the argument to locate the flowset fs and the sched_set sch
- * associated to it. The we apply flow_mask and sched_mask to
- * determine the queue and scheduler instances.
- *
- * dir		where shall we send the packet after dummynet.
- * *m0		the mbuf with the packet
- * ifp		the 'ifp' parameter from the caller.
- *		NULL in ip_input, destination interface in ip_output,
- */
-int
-dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa)
-{
-	struct mbuf *m = *m0;
-	struct dn_fsk *fs = NULL;
-	struct dn_sch_inst *si;
-	struct dn_queue *q = NULL;	/* default */
-
-	int fs_id = (fwa->rule.info & IPFW_INFO_MASK) +
-		((fwa->rule.info & IPFW_IS_PIPE) ? 2*DN_MAX_ID : 0);
-	DN_BH_WLOCK();
-	io_pkt++;
-	/* we could actually tag outside the lock, but who cares... */
-	if (tag_mbuf(m, dir, fwa))
-		goto dropit;
-	if (dn_cfg.busy) {
-		/* if the upper half is busy doing something expensive,
-		 * lets queue the packet and move forward
-		 */
-		mq_append(&dn_cfg.pending, m);
-		m = *m0 = NULL; /* consumed */
-		goto done; /* already active, nothing to do */
-	}
-	/* XXX locate_flowset could be optimised with a direct ref. */
-	fs = dn_ht_find(dn_cfg.fshash, fs_id, 0, NULL);
-	if (fs == NULL)
-		goto dropit;	/* This queue/pipe does not exist! */
-	if (fs->sched == NULL)	/* should not happen */
-		goto dropit;
-	/* find scheduler instance, possibly applying sched_mask */
-	si = ipdn_si_find(fs->sched, &(fwa->f_id));
-	if (si == NULL)
-		goto dropit;
-	/*
-	 * If the scheduler supports multiple queues, find the right one
-	 * (otherwise it will be ignored by enqueue).
-	 */
-	if (fs->sched->fp->flags & DN_MULTIQUEUE) {
-		q = ipdn_q_find(fs, si, &(fwa->f_id));
-		if (q == NULL)
-			goto dropit;
-	}
-	if (fs->sched->fp->enqueue(si, q, m)) {
-		/* packet was dropped by enqueue() */
-		m = *m0 = NULL;
-		goto dropit;
-	}
-
-	if (si->kflags & DN_ACTIVE) {
-		m = *m0 = NULL; /* consumed */
-		goto done; /* already active, nothing to do */
-	}
-
-	/* compute the initial allowance */
-	if (si->idle_time < dn_cfg.curr_time) {
-	    /* Do this only on the first packet on an idle pipe */
-	    struct dn_link *p = &fs->sched->link;
-
-	    si->sched_time = dn_cfg.curr_time;
-	    si->credit = dn_cfg.io_fast ? p->bandwidth : 0;
-	    if (p->burst) {
-		uint64_t burst = (dn_cfg.curr_time - si->idle_time) * p->bandwidth;
-		if (burst > p->burst)
-			burst = p->burst;
-		si->credit += burst;
-	    }
-	}
-	/* pass through scheduler and delay line */
-	m = serve_sched(NULL, si, dn_cfg.curr_time);
-
-	/* optimization -- pass it back to ipfw for immediate send */
-	/* XXX Don't call dummynet_send() if scheduler return the packet
-	 *     just enqueued. This avoid a lock order reversal.
-	 *     
-	 */
-	if (/*dn_cfg.io_fast &&*/ m == *m0 && (dir & PROTO_LAYER2) == 0 ) {
-		/* fast io, rename the tag * to carry reinject info. */
-		struct m_tag *tag = m_tag_first(m);
-
-		tag->m_tag_cookie = MTAG_IPFW_RULE;
-		tag->m_tag_id = 0;
-		io_pkt_fast++;
-		if (m->m_nextpkt != NULL) {
-			printf("dummynet: fast io: pkt chain detected!\n");
-			m->m_nextpkt = NULL;
-		}
-		m = NULL;
-	} else {
-		*m0 = NULL;
-	}
-done:
-	DN_BH_WUNLOCK();
-	if (m)
-		dummynet_send(m);
-	return 0;
-
-dropit:
-	io_pkt_drop++;
-	DN_BH_WUNLOCK();
-	if (m)
-		FREE_PKT(m);
-	*m0 = NULL;
-	return (fs && (fs->fs.flags & DN_NOERROR)) ? 0 : ENOBUFS;
-}
diff --git a/freebsd/sys/netpfil/ipfw/ip_dn_private.h b/freebsd/sys/netpfil/ipfw/ip_dn_private.h
index 159ddc9a..2fce1366 100644
--- a/freebsd/sys/netpfil/ipfw/ip_dn_private.h
+++ b/freebsd/sys/netpfil/ipfw/ip_dn_private.h
@@ -81,8 +81,13 @@ SLIST_HEAD(dn_fsk_head, dn_fsk);
 SLIST_HEAD(dn_queue_head, dn_queue);
 SLIST_HEAD(dn_alg_head, dn_alg);
 
+#ifdef NEW_AQM
+SLIST_HEAD(dn_aqm_head, dn_aqm); /* for new AQMs */
+#endif
+
 struct mq {	/* a basic queue of packets*/
         struct mbuf *head, *tail;
+	int count;
 };
 
 static inline void
@@ -91,7 +96,7 @@ set_oid(struct dn_id *o, int type, int len)
         o->type = type;
         o->len = len;
         o->subtype = 0;
-};
+}
 
 /*
  * configuration and global data for a dummynet instance
@@ -135,6 +140,9 @@ struct dn_parms {
 	/* list of flowsets without a scheduler -- use sch_chain */
 	struct dn_fsk_head	fsu;	/* list of unlinked flowsets */
 	struct dn_alg_head	schedlist;	/* list of algorithms */
+#ifdef NEW_AQM
+	struct dn_aqm_head	aqmlist;	/* list of AQMs */
+#endif
 
 	/* Store the fs/sch to scan when draining. The value is the
 	 * bucket number of the hash table. Expire can be disabled
@@ -231,6 +239,10 @@ struct dn_fsk { /* kernel side of a flowset */
 	int lookup_weight ;	/* equal to (1-w_q)^t / (1-w_q)^(t+1) */
 	int avg_pkt_size ;	/* medium packet size */
 	int max_pkt_size ;	/* max packet size */
+#ifdef NEW_AQM
+	struct dn_aqm *aqmfp;	/* Pointer to AQM functions */
+	void *aqmcfg;	/* configuration parameters for AQM */
+#endif
 };
 
 /*
@@ -253,6 +265,9 @@ struct dn_queue {
 	int count;		/* arrivals since last RED drop */
 	int random;		/* random value (scaled) */
 	uint64_t q_time;	/* start of queue idle time */
+#ifdef NEW_AQM
+	void *aqm_status;	/* per-queue status variables*/
+#endif
 
 };
 
@@ -400,4 +415,49 @@ int do_config(void *p, int l);
 void dn_drain_scheduler(void);
 void dn_drain_queue(void);
 
+#ifdef NEW_AQM
+int ecn_mark(struct mbuf* m);
+
+/* moved from ip_dn_io.c to here to be available for AQMs modules*/
+static inline void
+mq_append(struct mq *q, struct mbuf *m)
+{
+#ifdef USERSPACE
+	// buffers from netmap need to be copied
+	// XXX note that the routine is not expected to fail
+	ND("append %p to %p", m, q);
+	if (m->m_flags & M_STACK) {
+		struct mbuf *m_new;
+		void *p;
+		int l, ofs;
+
+		ofs = m->m_data - m->__m_extbuf;
+		// XXX allocate
+		MGETHDR(m_new, M_NOWAIT, MT_DATA);
+		ND("*** WARNING, volatile buf %p ext %p %d dofs %d m_new %p",
+			m, m->__m_extbuf, m->__m_extlen, ofs, m_new);
+		p = m_new->__m_extbuf;	/* new pointer */
+		l = m_new->__m_extlen;	/* new len */
+		if (l <= m->__m_extlen) {
+			panic("extlen too large");
+		}
+
+		*m_new = *m;	// copy
+		m_new->m_flags &= ~M_STACK;
+		m_new->__m_extbuf = p; // point to new buffer
+		_pkt_copy(m->__m_extbuf, p, m->__m_extlen);
+		m_new->m_data = p + ofs;
+		m = m_new;
+	}
+#endif /* USERSPACE */
+	if (q->head == NULL)
+		q->head = m;
+	else
+		q->tail->m_nextpkt = m;
+	q->count++;
+	q->tail = m;
+	m->m_nextpkt = NULL;
+}
+#endif /* NEW_AQM */
+
 #endif /* _IP_DN_PRIVATE_H */
diff --git a/freebsd/sys/netpfil/ipfw/ip_dummynet.c b/freebsd/sys/netpfil/ipfw/ip_dummynet.c
deleted file mode 100644
index 40c37d80..00000000
--- a/freebsd/sys/netpfil/ipfw/ip_dummynet.c
+++ /dev/null
@@ -1,2309 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/*-
- * Copyright (c) 1998-2002,2010 Luigi Rizzo, Universita` di Pisa
- * Portions Copyright (c) 2000 Akamba Corp.
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-/*
- * Configuration and internal object management for dummynet.
- */
-
-#include <rtems/bsd/local/opt_inet6.h>
-
-#include <rtems/bsd/sys/param.h>
-#include <sys/systm.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/kernel.h>
-#include <rtems/bsd/sys/lock.h>
-#include <sys/module.h>
-#include <sys/priv.h>
-#include <sys/proc.h>
-#include <sys/rwlock.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/time.h>
-#include <sys/taskqueue.h>
-#include <net/if.h>	/* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
-#include <netinet/in.h>
-#include <netinet/ip_var.h>	/* ip_output(), IP_FORWARDING */
-#include <netinet/ip_fw.h>
-#include <netinet/ip_dummynet.h>
-
-#include <netpfil/ipfw/ip_fw_private.h>
-#include <netpfil/ipfw/dn_heap.h>
-#include <netpfil/ipfw/ip_dn_private.h>
-#include <netpfil/ipfw/dn_sched.h>
-
-/* which objects to copy */
-#define DN_C_LINK 	0x01
-#define DN_C_SCH	0x02
-#define DN_C_FLOW	0x04
-#define DN_C_FS		0x08
-#define DN_C_QUEUE	0x10
-
-/* we use this argument in case of a schk_new */
-struct schk_new_arg {
-	struct dn_alg *fp;
-	struct dn_sch *sch;
-};
-
-/*---- callout hooks. ----*/
-static struct callout dn_timeout;
-static struct task	dn_task;
-static struct taskqueue	*dn_tq = NULL;
-
-static void
-dummynet(void * __unused unused)
-{
-
-	taskqueue_enqueue(dn_tq, &dn_task);
-}
-
-void
-dn_reschedule(void)
-{
-	callout_reset(&dn_timeout, 1, dummynet, NULL);
-}
-/*----- end of callout hooks -----*/
-
-/* Return a scheduler descriptor given the type or name. */
-static struct dn_alg *
-find_sched_type(int type, char *name)
-{
-	struct dn_alg *d;
-
-	SLIST_FOREACH(d, &dn_cfg.schedlist, next) {
-		if (d->type == type || (name && !strcasecmp(d->name, name)))
-			return d;
-	}
-	return NULL; /* not found */
-}
-
-int
-ipdn_bound_var(int *v, int dflt, int lo, int hi, const char *msg)
-{
-	int oldv = *v;
-	const char *op = NULL;
-	if (dflt < lo)
-		dflt = lo;
-	if (dflt > hi)
-		dflt = hi;
-	if (oldv < lo) {
-		*v = dflt;
-		op = "Bump";
-	} else if (oldv > hi) {
-		*v = hi;
-		op = "Clamp";
-	} else
-		return *v;
-	if (op && msg)
-		printf("%s %s to %d (was %d)\n", op, msg, *v, oldv);
-	return *v;
-}
-
-/*---- flow_id mask, hash and compare functions ---*/
-/*
- * The flow_id includes the 5-tuple, the queue/pipe number
- * which we store in the extra area in host order,
- * and for ipv6 also the flow_id6.
- * XXX see if we want the tos byte (can store in 'flags')
- */
-static struct ipfw_flow_id *
-flow_id_mask(struct ipfw_flow_id *mask, struct ipfw_flow_id *id)
-{
-	int is_v6 = IS_IP6_FLOW_ID(id);
-
-	id->dst_port &= mask->dst_port;
-	id->src_port &= mask->src_port;
-	id->proto &= mask->proto;
-	id->extra &= mask->extra;
-	if (is_v6) {
-		APPLY_MASK(&id->dst_ip6, &mask->dst_ip6);
-		APPLY_MASK(&id->src_ip6, &mask->src_ip6);
-		id->flow_id6 &= mask->flow_id6;
-	} else {
-		id->dst_ip &= mask->dst_ip;
-		id->src_ip &= mask->src_ip;
-	}
-	return id;
-}
-
-/* computes an OR of two masks, result in dst and also returned */
-static struct ipfw_flow_id *
-flow_id_or(struct ipfw_flow_id *src, struct ipfw_flow_id *dst)
-{
-	int is_v6 = IS_IP6_FLOW_ID(dst);
-
-	dst->dst_port |= src->dst_port;
-	dst->src_port |= src->src_port;
-	dst->proto |= src->proto;
-	dst->extra |= src->extra;
-	if (is_v6) {
-#define OR_MASK(_d, _s)                          \
-    (_d)->__u6_addr.__u6_addr32[0] |= (_s)->__u6_addr.__u6_addr32[0]; \
-    (_d)->__u6_addr.__u6_addr32[1] |= (_s)->__u6_addr.__u6_addr32[1]; \
-    (_d)->__u6_addr.__u6_addr32[2] |= (_s)->__u6_addr.__u6_addr32[2]; \
-    (_d)->__u6_addr.__u6_addr32[3] |= (_s)->__u6_addr.__u6_addr32[3];
-		OR_MASK(&dst->dst_ip6, &src->dst_ip6);
-		OR_MASK(&dst->src_ip6, &src->src_ip6);
-#undef OR_MASK
-		dst->flow_id6 |= src->flow_id6;
-	} else {
-		dst->dst_ip |= src->dst_ip;
-		dst->src_ip |= src->src_ip;
-	}
-	return dst;
-}
-
-static int
-nonzero_mask(struct ipfw_flow_id *m)
-{
-	if (m->dst_port || m->src_port || m->proto || m->extra)
-		return 1;
-	if (IS_IP6_FLOW_ID(m)) {
-		return
-			m->dst_ip6.__u6_addr.__u6_addr32[0] ||
-			m->dst_ip6.__u6_addr.__u6_addr32[1] ||
-			m->dst_ip6.__u6_addr.__u6_addr32[2] ||
-			m->dst_ip6.__u6_addr.__u6_addr32[3] ||
-			m->src_ip6.__u6_addr.__u6_addr32[0] ||
-			m->src_ip6.__u6_addr.__u6_addr32[1] ||
-			m->src_ip6.__u6_addr.__u6_addr32[2] ||
-			m->src_ip6.__u6_addr.__u6_addr32[3] ||
-			m->flow_id6;
-	} else {
-		return m->dst_ip || m->src_ip;
-	}
-}
-
-/* XXX we may want a better hash function */
-static uint32_t
-flow_id_hash(struct ipfw_flow_id *id)
-{
-    uint32_t i;
-
-    if (IS_IP6_FLOW_ID(id)) {
-	uint32_t *d = (uint32_t *)&id->dst_ip6;
-	uint32_t *s = (uint32_t *)&id->src_ip6;
-        i = (d[0]      ) ^ (d[1])       ^
-            (d[2]      ) ^ (d[3])       ^
-            (d[0] >> 15) ^ (d[1] >> 15) ^
-            (d[2] >> 15) ^ (d[3] >> 15) ^
-            (s[0] <<  1) ^ (s[1] <<  1) ^
-            (s[2] <<  1) ^ (s[3] <<  1) ^
-            (s[0] << 16) ^ (s[1] << 16) ^
-            (s[2] << 16) ^ (s[3] << 16) ^
-            (id->dst_port << 1) ^ (id->src_port) ^
-	    (id->extra) ^
-            (id->proto ) ^ (id->flow_id6);
-    } else {
-        i = (id->dst_ip)        ^ (id->dst_ip >> 15) ^
-            (id->src_ip << 1)   ^ (id->src_ip >> 16) ^
-	    (id->extra) ^
-            (id->dst_port << 1) ^ (id->src_port)     ^ (id->proto);
-    }
-    return i;
-}
-
-/* Like bcmp, returns 0 if ids match, 1 otherwise. */
-static int
-flow_id_cmp(struct ipfw_flow_id *id1, struct ipfw_flow_id *id2)
-{
-	int is_v6 = IS_IP6_FLOW_ID(id1);
-
-	if (!is_v6) {
-	    if (IS_IP6_FLOW_ID(id2))
-		return 1; /* different address families */
-
-	    return (id1->dst_ip == id2->dst_ip &&
-		    id1->src_ip == id2->src_ip &&
-		    id1->dst_port == id2->dst_port &&
-		    id1->src_port == id2->src_port &&
-		    id1->proto == id2->proto &&
-		    id1->extra == id2->extra) ? 0 : 1;
-	}
-	/* the ipv6 case */
-	return (
-	    !bcmp(&id1->dst_ip6,&id2->dst_ip6, sizeof(id1->dst_ip6)) &&
-	    !bcmp(&id1->src_ip6,&id2->src_ip6, sizeof(id1->src_ip6)) &&
-	    id1->dst_port == id2->dst_port &&
-	    id1->src_port == id2->src_port &&
-	    id1->proto == id2->proto &&
-	    id1->extra == id2->extra &&
-	    id1->flow_id6 == id2->flow_id6) ? 0 : 1;
-}
-/*--------- end of flow-id mask, hash and compare ---------*/
-
-/*--- support functions for the qht hashtable ----
- * Entries are hashed by flow-id
- */
-static uint32_t
-q_hash(uintptr_t key, int flags, void *arg)
-{
-	/* compute the hash slot from the flow id */
-	struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ?
-		&((struct dn_queue *)key)->ni.fid :
-		(struct ipfw_flow_id *)key;
-
-	return flow_id_hash(id);
-}
-
-static int
-q_match(void *obj, uintptr_t key, int flags, void *arg)
-{
-	struct dn_queue *o = (struct dn_queue *)obj;
-	struct ipfw_flow_id *id2;
-
-	if (flags & DNHT_KEY_IS_OBJ) {
-		/* compare pointers */
-		id2 = &((struct dn_queue *)key)->ni.fid;
-	} else {
-		id2 = (struct ipfw_flow_id *)key;
-	}
-	return (0 == flow_id_cmp(&o->ni.fid,  id2));
-}
-
-/*
- * create a new queue instance for the given 'key'.
- */
-static void *
-q_new(uintptr_t key, int flags, void *arg)
-{   
-	struct dn_queue *q, *template = arg;
-	struct dn_fsk *fs = template->fs;
-	int size = sizeof(*q) + fs->sched->fp->q_datalen;
-
-	q = malloc(size, M_DUMMYNET, M_NOWAIT | M_ZERO);
-	if (q == NULL) {
-		D("no memory for new queue");
-		return NULL;
-	}
-
-	set_oid(&q->ni.oid, DN_QUEUE, size);
-	if (fs->fs.flags & DN_QHT_HASH)
-		q->ni.fid = *(struct ipfw_flow_id *)key;
-	q->fs = fs;
-	q->_si = template->_si;
-	q->_si->q_count++;
-
-	if (fs->sched->fp->new_queue)
-		fs->sched->fp->new_queue(q);
-	dn_cfg.queue_count++;
-	return q;
-}
-
-/*
- * Notify schedulers that a queue is going away.
- * If (flags & DN_DESTROY), also free the packets.
- * The version for callbacks is called q_delete_cb().
- */
-static void
-dn_delete_queue(struct dn_queue *q, int flags)
-{
-	struct dn_fsk *fs = q->fs;
-
-	// D("fs %p si %p\n", fs, q->_si);
-	/* notify the parent scheduler that the queue is going away */
-	if (fs && fs->sched->fp->free_queue)
-		fs->sched->fp->free_queue(q);
-	q->_si->q_count--;
-	q->_si = NULL;
-	if (flags & DN_DESTROY) {
-		if (q->mq.head)
-			dn_free_pkts(q->mq.head);
-		bzero(q, sizeof(*q));	// safety
-		free(q, M_DUMMYNET);
-		dn_cfg.queue_count--;
-	}
-}
-
-static int
-q_delete_cb(void *q, void *arg)
-{
-	int flags = (int)(uintptr_t)arg;
-	dn_delete_queue(q, flags);
-	return (flags & DN_DESTROY) ? DNHT_SCAN_DEL : 0;
-}
-
-/*
- * calls dn_delete_queue/q_delete_cb on all queues,
- * which notifies the parent scheduler and possibly drains packets.
- * flags & DN_DESTROY: drains queues and destroy qht;
- */
-static void
-qht_delete(struct dn_fsk *fs, int flags)
-{
-	ND("fs %d start flags %d qht %p",
-		fs->fs.fs_nr, flags, fs->qht);
-	if (!fs->qht)
-		return;
-	if (fs->fs.flags & DN_QHT_HASH) {
-		dn_ht_scan(fs->qht, q_delete_cb, (void *)(uintptr_t)flags);
-		if (flags & DN_DESTROY) {
-			dn_ht_free(fs->qht, 0);
-			fs->qht = NULL;
-		}
-	} else {
-		dn_delete_queue((struct dn_queue *)(fs->qht), flags);
-		if (flags & DN_DESTROY)
-			fs->qht = NULL;
-	}
-}
-
-/*
- * Find and possibly create the queue for a MULTIQUEUE scheduler.
- * We never call it for !MULTIQUEUE (the queue is in the sch_inst).
- */
-struct dn_queue *
-ipdn_q_find(struct dn_fsk *fs, struct dn_sch_inst *si,
-	struct ipfw_flow_id *id)
-{
-	struct dn_queue template;
-
-	template._si = si;
-	template.fs = fs;
-
-	if (fs->fs.flags & DN_QHT_HASH) {
-		struct ipfw_flow_id masked_id;
-		if (fs->qht == NULL) {
-			fs->qht = dn_ht_init(NULL, fs->fs.buckets,
-				offsetof(struct dn_queue, q_next),
-				q_hash, q_match, q_new);
-			if (fs->qht == NULL)
-				return NULL;
-		}
-		masked_id = *id;
-		flow_id_mask(&fs->fsk_mask, &masked_id);
-		return dn_ht_find(fs->qht, (uintptr_t)&masked_id,
-			DNHT_INSERT, &template);
-	} else {
-		if (fs->qht == NULL)
-			fs->qht = q_new(0, 0, &template);
-		return (struct dn_queue *)fs->qht;
-	}
-}
-/*--- end of queue hash table ---*/
-
-/*--- support functions for the sch_inst hashtable ----
- *
- * These are hashed by flow-id
- */
-static uint32_t
-si_hash(uintptr_t key, int flags, void *arg)
-{
-	/* compute the hash slot from the flow id */
-	struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ?
-		&((struct dn_sch_inst *)key)->ni.fid :
-		(struct ipfw_flow_id *)key;
-
-	return flow_id_hash(id);
-}
-
-static int
-si_match(void *obj, uintptr_t key, int flags, void *arg)
-{
-	struct dn_sch_inst *o = obj;
-	struct ipfw_flow_id *id2;
-
-	id2 = (flags & DNHT_KEY_IS_OBJ) ?
-		&((struct dn_sch_inst *)key)->ni.fid :
-		(struct ipfw_flow_id *)key;
-	return flow_id_cmp(&o->ni.fid,  id2) == 0;
-}
-
-/*
- * create a new instance for the given 'key'
- * Allocate memory for instance, delay line and scheduler private data.
- */
-static void *
-si_new(uintptr_t key, int flags, void *arg)
-{
-	struct dn_schk *s = arg;
-	struct dn_sch_inst *si;
-	int l = sizeof(*si) + s->fp->si_datalen;
-
-	si = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO);
-	if (si == NULL)
-		goto error;
-
-	/* Set length only for the part passed up to userland. */
-	set_oid(&si->ni.oid, DN_SCH_I, sizeof(struct dn_flow));
-	set_oid(&(si->dline.oid), DN_DELAY_LINE,
-		sizeof(struct delay_line));
-	/* mark si and dline as outside the event queue */
-	si->ni.oid.id = si->dline.oid.id = -1;
-
-	si->sched = s;
-	si->dline.si = si;
-
-	if (s->fp->new_sched && s->fp->new_sched(si)) {
-		D("new_sched error");
-		goto error;
-	}
-	if (s->sch.flags & DN_HAVE_MASK)
-		si->ni.fid = *(struct ipfw_flow_id *)key;
-
-	dn_cfg.si_count++;
-	return si;
-
-error:
-	if (si) {
-		bzero(si, sizeof(*si)); // safety
-		free(si, M_DUMMYNET);
-	}
-        return NULL;
-}
-
-/*
- * Callback from siht to delete all scheduler instances. Remove
- * si and delay line from the system heap, destroy all queues.
- * We assume that all flowset have been notified and do not
- * point to us anymore.
- */
-static int
-si_destroy(void *_si, void *arg)
-{
-	struct dn_sch_inst *si = _si;
-	struct dn_schk *s = si->sched;
-	struct delay_line *dl = &si->dline;
-
-	if (dl->oid.subtype) /* remove delay line from event heap */
-		heap_extract(&dn_cfg.evheap, dl);
-	dn_free_pkts(dl->mq.head);	/* drain delay line */
-	if (si->kflags & DN_ACTIVE) /* remove si from event heap */
-		heap_extract(&dn_cfg.evheap, si);
-	if (s->fp->free_sched)
-		s->fp->free_sched(si);
-	bzero(si, sizeof(*si));	/* safety */
-	free(si, M_DUMMYNET);
-	dn_cfg.si_count--;
-	return DNHT_SCAN_DEL;
-}
-
-/*
- * Find the scheduler instance for this packet. If we need to apply
- * a mask, do on a local copy of the flow_id to preserve the original.
- * Assume siht is always initialized if we have a mask.
- */
-struct dn_sch_inst *
-ipdn_si_find(struct dn_schk *s, struct ipfw_flow_id *id)
-{
-
-	if (s->sch.flags & DN_HAVE_MASK) {
-		struct ipfw_flow_id id_t = *id;
-		flow_id_mask(&s->sch.sched_mask, &id_t);
-		return dn_ht_find(s->siht, (uintptr_t)&id_t,
-			DNHT_INSERT, s);
-	}
-	if (!s->siht)
-		s->siht = si_new(0, 0, s);
-	return (struct dn_sch_inst *)s->siht;
-}
-
-/* callback to flush credit for the scheduler instance */
-static int
-si_reset_credit(void *_si, void *arg)
-{
-	struct dn_sch_inst *si = _si;
-	struct dn_link *p = &si->sched->link;
-
-	si->credit = p->burst + (dn_cfg.io_fast ?  p->bandwidth : 0);
-	return 0;
-}
-
-static void
-schk_reset_credit(struct dn_schk *s)
-{
-	if (s->sch.flags & DN_HAVE_MASK)
-		dn_ht_scan(s->siht, si_reset_credit, NULL);
-	else if (s->siht)
-		si_reset_credit(s->siht, NULL);
-}
-/*---- end of sch_inst hashtable ---------------------*/
-
-/*-------------------------------------------------------
- * flowset hash (fshash) support. Entries are hashed by fs_nr.
- * New allocations are put in the fsunlinked list, from which
- * they are removed when they point to a specific scheduler.
- */
-static uint32_t
-fsk_hash(uintptr_t key, int flags, void *arg)
-{
-	uint32_t i = !(flags & DNHT_KEY_IS_OBJ) ? key :
-		((struct dn_fsk *)key)->fs.fs_nr;
-
-	return ( (i>>8)^(i>>4)^i );
-}
-
-static int
-fsk_match(void *obj, uintptr_t key, int flags, void *arg)
-{
-	struct dn_fsk *fs = obj;
-	int i = !(flags & DNHT_KEY_IS_OBJ) ? key :
-		((struct dn_fsk *)key)->fs.fs_nr;
-
-	return (fs->fs.fs_nr == i);
-}
-
-static void *
-fsk_new(uintptr_t key, int flags, void *arg)
-{
-	struct dn_fsk *fs;
-
-	fs = malloc(sizeof(*fs), M_DUMMYNET, M_NOWAIT | M_ZERO);
-	if (fs) {
-		set_oid(&fs->fs.oid, DN_FS, sizeof(fs->fs));
-		dn_cfg.fsk_count++;
-		fs->drain_bucket = 0;
-		SLIST_INSERT_HEAD(&dn_cfg.fsu, fs, sch_chain);
-	}
-	return fs;
-}
-
-/*
- * detach flowset from its current scheduler. Flags as follows:
- * DN_DETACH removes from the fsk_list
- * DN_DESTROY deletes individual queues
- * DN_DELETE_FS destroys the flowset (otherwise goes in unlinked).
- */
-static void
-fsk_detach(struct dn_fsk *fs, int flags)
-{
-	if (flags & DN_DELETE_FS)
-		flags |= DN_DESTROY;
-	ND("fs %d from sched %d flags %s %s %s",
-		fs->fs.fs_nr, fs->fs.sched_nr,
-		(flags & DN_DELETE_FS) ? "DEL_FS":"",
-		(flags & DN_DESTROY) ? "DEL":"",
-		(flags & DN_DETACH) ? "DET":"");
-	if (flags & DN_DETACH) { /* detach from the list */
-		struct dn_fsk_head *h;
-		h = fs->sched ? &fs->sched->fsk_list : &dn_cfg.fsu;
-		SLIST_REMOVE(h, fs, dn_fsk, sch_chain);
-	}
-	/* Free the RED parameters, they will be recomputed on
-	 * subsequent attach if needed.
-	 */
-	if (fs->w_q_lookup)
-		free(fs->w_q_lookup, M_DUMMYNET);
-	fs->w_q_lookup = NULL;
-	qht_delete(fs, flags);
-	if (fs->sched && fs->sched->fp->free_fsk)
-		fs->sched->fp->free_fsk(fs);
-	fs->sched = NULL;
-	if (flags & DN_DELETE_FS) {
-		bzero(fs, sizeof(*fs));	/* safety */
-		free(fs, M_DUMMYNET);
-		dn_cfg.fsk_count--;
-	} else {
-		SLIST_INSERT_HEAD(&dn_cfg.fsu, fs, sch_chain);
-	}
-}
-
-/*
- * Detach or destroy all flowsets in a list.
- * flags specifies what to do:
- * DN_DESTROY:	flush all queues
- * DN_DELETE_FS:	DN_DESTROY + destroy flowset
- *	DN_DELETE_FS implies DN_DESTROY
- */
-static void
-fsk_detach_list(struct dn_fsk_head *h, int flags)
-{
-	struct dn_fsk *fs;
-	int n = 0; /* only for stats */
-
-	ND("head %p flags %x", h, flags);
-	while ((fs = SLIST_FIRST(h))) {
-		SLIST_REMOVE_HEAD(h, sch_chain);
-		n++;
-		fsk_detach(fs, flags);
-	}
-	ND("done %d flowsets", n);
-}
-
-/*
- * called on 'queue X delete' -- removes the flowset from fshash,
- * deletes all queues for the flowset, and removes the flowset.
- */
-static int
-delete_fs(int i, int locked)
-{
-	struct dn_fsk *fs;
-	int err = 0;
-
-	if (!locked)
-		DN_BH_WLOCK();
-	fs = dn_ht_find(dn_cfg.fshash, i, DNHT_REMOVE, NULL);
-	ND("fs %d found %p", i, fs);
-	if (fs) {
-		fsk_detach(fs, DN_DETACH | DN_DELETE_FS);
-		err = 0;
-	} else
-		err = EINVAL;
-	if (!locked)
-		DN_BH_WUNLOCK();
-	return err;
-}
-
-/*----- end of flowset hashtable support -------------*/
-
-/*------------------------------------------------------------
- * Scheduler hash. When searching by index we pass sched_nr,
- * otherwise we pass struct dn_sch * which is the first field in
- * struct dn_schk so we can cast between the two. We use this trick
- * because in the create phase (but it should be fixed).
- */
-static uint32_t
-schk_hash(uintptr_t key, int flags, void *_arg)
-{
-	uint32_t i = !(flags & DNHT_KEY_IS_OBJ) ? key :
-		((struct dn_schk *)key)->sch.sched_nr;
-	return ( (i>>8)^(i>>4)^i );
-}
-
-static int
-schk_match(void *obj, uintptr_t key, int flags, void *_arg)
-{
-	struct dn_schk *s = (struct dn_schk *)obj;
-	int i = !(flags & DNHT_KEY_IS_OBJ) ? key :
-		((struct dn_schk *)key)->sch.sched_nr;
-	return (s->sch.sched_nr == i);
-}
-
-/*
- * Create the entry and intialize with the sched hash if needed.
- * Leave s->fp unset so we can tell whether a dn_ht_find() returns
- * a new object or a previously existing one.
- */
-static void *
-schk_new(uintptr_t key, int flags, void *arg)
-{
-	struct schk_new_arg *a = arg;
-	struct dn_schk *s;
-	int l = sizeof(*s) +a->fp->schk_datalen;
-
-	s = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO);
-	if (s == NULL)
-		return NULL;
-	set_oid(&s->link.oid, DN_LINK, sizeof(s->link));
-	s->sch = *a->sch; // copy initial values
-	s->link.link_nr = s->sch.sched_nr;
-	SLIST_INIT(&s->fsk_list);
-	/* initialize the hash table or create the single instance */
-	s->fp = a->fp;	/* si_new needs this */
-	s->drain_bucket = 0;
-	if (s->sch.flags & DN_HAVE_MASK) {
-		s->siht = dn_ht_init(NULL, s->sch.buckets,
-			offsetof(struct dn_sch_inst, si_next),
-			si_hash, si_match, si_new);
-		if (s->siht == NULL) {
-			free(s, M_DUMMYNET);
-			return NULL;
-		}
-	}
-	s->fp = NULL;	/* mark as a new scheduler */
-	dn_cfg.schk_count++;
-	return s;
-}
-
-/*
- * Callback for sched delete. Notify all attached flowsets to
- * detach from the scheduler, destroy the internal flowset, and
- * all instances. The scheduler goes away too.
- * arg is 0 (only detach flowsets and destroy instances)
- * DN_DESTROY (detach & delete queues, delete schk)
- * or DN_DELETE_FS (delete queues and flowsets, delete schk)
- */
-static int
-schk_delete_cb(void *obj, void *arg)
-{
-	struct dn_schk *s = obj;
-#if 0
-	int a = (int)arg;
-	ND("sched %d arg %s%s",
-		s->sch.sched_nr,
-		a&DN_DESTROY ? "DEL ":"",
-		a&DN_DELETE_FS ? "DEL_FS":"");
-#endif
-	fsk_detach_list(&s->fsk_list, arg ? DN_DESTROY : 0);
-	/* no more flowset pointing to us now */
-	if (s->sch.flags & DN_HAVE_MASK) {
-		dn_ht_scan(s->siht, si_destroy, NULL);
-		dn_ht_free(s->siht, 0);
-	} else if (s->siht)
-		si_destroy(s->siht, NULL);
-	if (s->profile) {
-		free(s->profile, M_DUMMYNET);
-		s->profile = NULL;
-	}
-	s->siht = NULL;
-	if (s->fp->destroy)
-		s->fp->destroy(s);
-	bzero(s, sizeof(*s));	// safety
-	free(obj, M_DUMMYNET);
-	dn_cfg.schk_count--;
-	return DNHT_SCAN_DEL;
-}
-
-/*
- * called on a 'sched X delete' command. Deletes a single scheduler.
- * This is done by removing from the schedhash, unlinking all
- * flowsets and deleting their traffic.
- */
-static int
-delete_schk(int i)
-{
-	struct dn_schk *s;
-
-	s = dn_ht_find(dn_cfg.schedhash, i, DNHT_REMOVE, NULL);
-	ND("%d %p", i, s);
-	if (!s)
-		return EINVAL;
-	delete_fs(i + DN_MAX_ID, 1); /* first delete internal fs */
-	/* then detach flowsets, delete traffic */
-	schk_delete_cb(s, (void*)(uintptr_t)DN_DESTROY);
-	return 0;
-}
-/*--- end of schk hashtable support ---*/
-
-static int
-copy_obj(char **start, char *end, void *_o, const char *msg, int i)
-{
-	struct dn_id *o = _o;
-	int have = end - *start;
-
-	if (have < o->len || o->len == 0 || o->type == 0) {
-		D("(WARN) type %d %s %d have %d need %d",
-			o->type, msg, i, have, o->len);
-		return 1;
-	}
-	ND("type %d %s %d len %d", o->type, msg, i, o->len);
-	bcopy(_o, *start, o->len);
-	if (o->type == DN_LINK) {
-		/* Adjust burst parameter for link */
-		struct dn_link *l = (struct dn_link *)*start;
-		l->burst =  div64(l->burst, 8 * hz);
-		l->delay = l->delay * 1000 / hz;
-	} else if (o->type == DN_SCH) {
-		/* Set id->id to the number of instances */
-		struct dn_schk *s = _o;
-		struct dn_id *id = (struct dn_id *)(*start);
-		id->id = (s->sch.flags & DN_HAVE_MASK) ?
-			dn_ht_entries(s->siht) : (s->siht ? 1 : 0);
-	}
-	*start += o->len;
-	return 0;
-}
-
-/* Specific function to copy a queue.
- * Copies only the user-visible part of a queue (which is in
- * a struct dn_flow), and sets len accordingly.
- */
-static int
-copy_obj_q(char **start, char *end, void *_o, const char *msg, int i)
-{
-	struct dn_id *o = _o;
-	int have = end - *start;
-	int len = sizeof(struct dn_flow); /* see above comment */
-
-	if (have < len || o->len == 0 || o->type != DN_QUEUE) {
-		D("ERROR type %d %s %d have %d need %d",
-			o->type, msg, i, have, len);
-		return 1;
-	}
-	ND("type %d %s %d len %d", o->type, msg, i, len);
-	bcopy(_o, *start, len);
-	((struct dn_id*)(*start))->len = len;
-	*start += len;
-	return 0;
-}
-
-static int
-copy_q_cb(void *obj, void *arg)
-{
-	struct dn_queue *q = obj;
-	struct copy_args *a = arg;
-	struct dn_flow *ni = (struct dn_flow *)(*a->start);
-        if (copy_obj_q(a->start, a->end, &q->ni, "queue", -1))
-                return DNHT_SCAN_END;
-        ni->oid.type = DN_FLOW; /* override the DN_QUEUE */
-        ni->oid.id = si_hash((uintptr_t)&ni->fid, 0, NULL);
-        return 0;
-}
-
-static int
-copy_q(struct copy_args *a, struct dn_fsk *fs, int flags)
-{
-	if (!fs->qht)
-		return 0;
-	if (fs->fs.flags & DN_QHT_HASH)
-		dn_ht_scan(fs->qht, copy_q_cb, a);
-	else
-		copy_q_cb(fs->qht, a);
-	return 0;
-}
-
-/*
- * This routine only copies the initial part of a profile ? XXX
- */
-static int
-copy_profile(struct copy_args *a, struct dn_profile *p)
-{
-	int have = a->end - *a->start;
-	/* XXX here we check for max length */
-	int profile_len = sizeof(struct dn_profile) - 
-		ED_MAX_SAMPLES_NO*sizeof(int);
-
-	if (p == NULL)
-		return 0;
-	if (have < profile_len) {
-		D("error have %d need %d", have, profile_len);
-		return 1;
-	}
-	bcopy(p, *a->start, profile_len);
-	((struct dn_id *)(*a->start))->len = profile_len;
-	*a->start += profile_len;
-	return 0;
-}
-
-static int
-copy_flowset(struct copy_args *a, struct dn_fsk *fs, int flags)
-{
-	struct dn_fs *ufs = (struct dn_fs *)(*a->start);
-	if (!fs)
-		return 0;
-	ND("flowset %d", fs->fs.fs_nr);
-	if (copy_obj(a->start, a->end, &fs->fs, "flowset", fs->fs.fs_nr))
-		return DNHT_SCAN_END;
-	ufs->oid.id = (fs->fs.flags & DN_QHT_HASH) ?
-		dn_ht_entries(fs->qht) : (fs->qht ? 1 : 0);
-	if (flags) {	/* copy queues */
-		copy_q(a, fs, 0);
-	}
-	return 0;
-}
-
-static int
-copy_si_cb(void *obj, void *arg)
-{
-	struct dn_sch_inst *si = obj;
-	struct copy_args *a = arg;
-	struct dn_flow *ni = (struct dn_flow *)(*a->start);
-	if (copy_obj(a->start, a->end, &si->ni, "inst",
-			si->sched->sch.sched_nr))
-		return DNHT_SCAN_END;
-	ni->oid.type = DN_FLOW; /* override the DN_SCH_I */
-	ni->oid.id = si_hash((uintptr_t)si, DNHT_KEY_IS_OBJ, NULL);
-	return 0;
-}
-
-static int
-copy_si(struct copy_args *a, struct dn_schk *s, int flags)
-{
-	if (s->sch.flags & DN_HAVE_MASK)
-		dn_ht_scan(s->siht, copy_si_cb, a);
-	else if (s->siht)
-		copy_si_cb(s->siht, a);
-	return 0;
-}
-
-/*
- * compute a list of children of a scheduler and copy up
- */
-static int
-copy_fsk_list(struct copy_args *a, struct dn_schk *s, int flags)
-{
-	struct dn_fsk *fs;
-	struct dn_id *o;
-	uint32_t *p;
-
-	int n = 0, space = sizeof(*o);
-	SLIST_FOREACH(fs, &s->fsk_list, sch_chain) {
-		if (fs->fs.fs_nr < DN_MAX_ID)
-			n++;
-	}
-	space += n * sizeof(uint32_t);
-	DX(3, "sched %d has %d flowsets", s->sch.sched_nr, n);
-	if (a->end - *(a->start) < space)
-		return DNHT_SCAN_END;
-	o = (struct dn_id *)(*(a->start));
-	o->len = space;
-	*a->start += o->len;
-	o->type = DN_TEXT;
-	p = (uint32_t *)(o+1);
-	SLIST_FOREACH(fs, &s->fsk_list, sch_chain)
-		if (fs->fs.fs_nr < DN_MAX_ID)
-			*p++ = fs->fs.fs_nr;
-	return 0;
-}
-
-static int
-copy_data_helper(void *_o, void *_arg)
-{
-	struct copy_args *a = _arg;
-	uint32_t *r = a->extra->r; /* start of first range */
-	uint32_t *lim;	/* first invalid pointer */
-	int n;
-
-	lim = (uint32_t *)((char *)(a->extra) + a->extra->o.len);
-
-	if (a->type == DN_LINK || a->type == DN_SCH) {
-		/* pipe|sched show, we receive a dn_schk */
-		struct dn_schk *s = _o;
-
-		n = s->sch.sched_nr;
-		if (a->type == DN_SCH && n >= DN_MAX_ID)
-			return 0;	/* not a scheduler */
-		if (a->type == DN_LINK && n <= DN_MAX_ID)
-		    return 0;	/* not a pipe */
-
-		/* see if the object is within one of our ranges */
-		for (;r < lim; r += 2) {
-			if (n < r[0] || n > r[1])
-				continue;
-			/* Found a valid entry, copy and we are done */
-			if (a->flags & DN_C_LINK) {
-				if (copy_obj(a->start, a->end,
-				    &s->link, "link", n))
-					return DNHT_SCAN_END;
-				if (copy_profile(a, s->profile))
-					return DNHT_SCAN_END;
-				if (copy_flowset(a, s->fs, 0))
-					return DNHT_SCAN_END;
-			}
-			if (a->flags & DN_C_SCH) {
-				if (copy_obj(a->start, a->end,
-				    &s->sch, "sched", n))
-					return DNHT_SCAN_END;
-				/* list all attached flowsets */
-				if (copy_fsk_list(a, s, 0))
-					return DNHT_SCAN_END;
-			}
-			if (a->flags & DN_C_FLOW)
-				copy_si(a, s, 0);
-			break;
-		}
-	} else if (a->type == DN_FS) {
-		/* queue show, skip internal flowsets */
-		struct dn_fsk *fs = _o;
-
-		n = fs->fs.fs_nr;
-		if (n >= DN_MAX_ID)
-			return 0;
-		/* see if the object is within one of our ranges */
-		for (;r < lim; r += 2) {
-			if (n < r[0] || n > r[1])
-				continue;
-			if (copy_flowset(a, fs, 0))
-				return DNHT_SCAN_END;
-			copy_q(a, fs, 0);
-			break; /* we are done */
-		}
-	}
-	return 0;
-}
-
-static inline struct dn_schk *
-locate_scheduler(int i)
-{
-	return dn_ht_find(dn_cfg.schedhash, i, 0, NULL);
-}
-
-/*
- * red parameters are in fixed point arithmetic.
- */
-static int
-config_red(struct dn_fsk *fs)
-{
-	int64_t s, idle, weight, w0;
-	int t, i;
-
-	fs->w_q = fs->fs.w_q;
-	fs->max_p = fs->fs.max_p;
-	ND("called");
-	/* Doing stuff that was in userland */
-	i = fs->sched->link.bandwidth;
-	s = (i <= 0) ? 0 :
-		hz * dn_cfg.red_avg_pkt_size * 8 * SCALE(1) / i;
-
-	idle = div64((s * 3) , fs->w_q); /* s, fs->w_q scaled; idle not scaled */
-	fs->lookup_step = div64(idle , dn_cfg.red_lookup_depth);
-	/* fs->lookup_step not scaled, */
-	if (!fs->lookup_step)
-		fs->lookup_step = 1;
-	w0 = weight = SCALE(1) - fs->w_q; //fs->w_q scaled
-
-	for (t = fs->lookup_step; t > 1; --t)
-		weight = SCALE_MUL(weight, w0);
-	fs->lookup_weight = (int)(weight); // scaled
-
-	/* Now doing stuff that was in kerneland */
-	fs->min_th = SCALE(fs->fs.min_th);
-	fs->max_th = SCALE(fs->fs.max_th);
-
-	fs->c_1 = fs->max_p / (fs->fs.max_th - fs->fs.min_th);
-	fs->c_2 = SCALE_MUL(fs->c_1, SCALE(fs->fs.min_th));
-
-	if (fs->fs.flags & DN_IS_GENTLE_RED) {
-		fs->c_3 = (SCALE(1) - fs->max_p) / fs->fs.max_th;
-		fs->c_4 = SCALE(1) - 2 * fs->max_p;
-	}
-
-	/* If the lookup table already exist, free and create it again. */
-	if (fs->w_q_lookup) {
-		free(fs->w_q_lookup, M_DUMMYNET);
-		fs->w_q_lookup = NULL;
-	}
-	if (dn_cfg.red_lookup_depth == 0) {
-		printf("\ndummynet: net.inet.ip.dummynet.red_lookup_depth"
-		    "must be > 0\n");
-		fs->fs.flags &= ~DN_IS_RED;
-		fs->fs.flags &= ~DN_IS_GENTLE_RED;
-		return (EINVAL);
-	}
-	fs->lookup_depth = dn_cfg.red_lookup_depth;
-	fs->w_q_lookup = (u_int *)malloc(fs->lookup_depth * sizeof(int),
-	    M_DUMMYNET, M_NOWAIT);
-	if (fs->w_q_lookup == NULL) {
-		printf("dummynet: sorry, cannot allocate red lookup table\n");
-		fs->fs.flags &= ~DN_IS_RED;
-		fs->fs.flags &= ~DN_IS_GENTLE_RED;
-		return(ENOSPC);
-	}
-
-	/* Fill the lookup table with (1 - w_q)^x */
-	fs->w_q_lookup[0] = SCALE(1) - fs->w_q;
-
-	for (i = 1; i < fs->lookup_depth; i++)
-		fs->w_q_lookup[i] =
-		    SCALE_MUL(fs->w_q_lookup[i - 1], fs->lookup_weight);
-
-	if (dn_cfg.red_avg_pkt_size < 1)
-		dn_cfg.red_avg_pkt_size = 512;
-	fs->avg_pkt_size = dn_cfg.red_avg_pkt_size;
-	if (dn_cfg.red_max_pkt_size < 1)
-		dn_cfg.red_max_pkt_size = 1500;
-	fs->max_pkt_size = dn_cfg.red_max_pkt_size;
-	ND("exit");
-	return 0;
-}
-
-/* Scan all flowset attached to this scheduler and update red */
-static void
-update_red(struct dn_schk *s)
-{
-	struct dn_fsk *fs;
-	SLIST_FOREACH(fs, &s->fsk_list, sch_chain) {
-		if (fs && (fs->fs.flags & DN_IS_RED))
-			config_red(fs);
-	}
-}
-
-/* attach flowset to scheduler s, possibly requeue */
-static void
-fsk_attach(struct dn_fsk *fs, struct dn_schk *s)
-{
-	ND("remove fs %d from fsunlinked, link to sched %d",
-		fs->fs.fs_nr, s->sch.sched_nr);
-	SLIST_REMOVE(&dn_cfg.fsu, fs, dn_fsk, sch_chain);
-	fs->sched = s;
-	SLIST_INSERT_HEAD(&s->fsk_list, fs, sch_chain);
-	if (s->fp->new_fsk)
-		s->fp->new_fsk(fs);
-	/* XXX compute fsk_mask */
-	fs->fsk_mask = fs->fs.flow_mask;
-	if (fs->sched->sch.flags & DN_HAVE_MASK)
-		flow_id_or(&fs->sched->sch.sched_mask, &fs->fsk_mask);
-	if (fs->qht) {
-		/*
-		 * we must drain qht according to the old
-		 * type, and reinsert according to the new one.
-		 * The requeue is complex -- in general we need to
-		 * reclassify every single packet.
-		 * For the time being, let's hope qht is never set
-		 * when we reach this point.
-		 */
-		D("XXX TODO requeue from fs %d to sch %d",
-			fs->fs.fs_nr, s->sch.sched_nr);
-		fs->qht = NULL;
-	}
-	/* set the new type for qht */
-	if (nonzero_mask(&fs->fsk_mask))
-		fs->fs.flags |= DN_QHT_HASH;
-	else
-		fs->fs.flags &= ~DN_QHT_HASH;
-
-	/* XXX config_red() can fail... */
-	if (fs->fs.flags & DN_IS_RED)
-		config_red(fs);
-}
-
-/* update all flowsets which may refer to this scheduler */
-static void
-update_fs(struct dn_schk *s)
-{
-	struct dn_fsk *fs, *tmp;
-
-	SLIST_FOREACH_SAFE(fs, &dn_cfg.fsu, sch_chain, tmp) {
-		if (s->sch.sched_nr != fs->fs.sched_nr) {
-			D("fs %d for sch %d not %d still unlinked",
-				fs->fs.fs_nr, fs->fs.sched_nr,
-				s->sch.sched_nr);
-			continue;
-		}
-		fsk_attach(fs, s);
-	}
-}
-
-/*
- * Configuration -- to preserve backward compatibility we use
- * the following scheme (N is 65536)
- *	NUMBER		SCHED	LINK	FLOWSET
- *	   1 ..  N-1	(1)WFQ	(2)WFQ	(3)queue
- *	 N+1 .. 2N-1	(4)FIFO (5)FIFO	(6)FIFO for sched 1..N-1
- *	2N+1 .. 3N-1	--	--	(7)FIFO for sched N+1..2N-1
- *
- * "pipe i config" configures #1, #2 and #3
- * "sched i config" configures #1 and possibly #6
- * "queue i config" configures #3
- * #1 is configured with 'pipe i config' or 'sched i config'
- * #2 is configured with 'pipe i config', and created if not
- *	existing with 'sched i config'
- * #3 is configured with 'queue i config'
- * #4 is automatically configured after #1, can only be FIFO
- * #5 is automatically configured after #2
- * #6 is automatically created when #1 is !MULTIQUEUE,
- *	and can be updated.
- * #7 is automatically configured after #2
- */
-
-/*
- * configure a link (and its FIFO instance)
- */
-static int
-config_link(struct dn_link *p, struct dn_id *arg)
-{
-	int i;
-
-	if (p->oid.len != sizeof(*p)) {
-		D("invalid pipe len %d", p->oid.len);
-		return EINVAL;
-	}
-	i = p->link_nr;
-	if (i <= 0 || i >= DN_MAX_ID)
-		return EINVAL;
-	/*
-	 * The config program passes parameters as follows:
-	 * bw = bits/second (0 means no limits),
-	 * delay = ms, must be translated into ticks.
-	 * qsize = slots/bytes
-	 * burst ???
-	 */
-	p->delay = (p->delay * hz) / 1000;
-	/* Scale burst size: bytes -> bits * hz */
-	p->burst *= 8 * hz;
-
-	DN_BH_WLOCK();
-	/* do it twice, base link and FIFO link */
-	for (; i < 2*DN_MAX_ID; i += DN_MAX_ID) {
-	    struct dn_schk *s = locate_scheduler(i);
-	    if (s == NULL) {
-		DN_BH_WUNLOCK();
-		D("sched %d not found", i);
-		return EINVAL;
-	    }
-	    /* remove profile if exists */
-	    if (s->profile) {
-		free(s->profile, M_DUMMYNET);
-		s->profile = NULL;
-	    }
-	    /* copy all parameters */
-	    s->link.oid = p->oid;
-	    s->link.link_nr = i;
-	    s->link.delay = p->delay;
-	    if (s->link.bandwidth != p->bandwidth) {
-		/* XXX bandwidth changes, need to update red params */
-	    s->link.bandwidth = p->bandwidth;
-		update_red(s);
-	    }
-	    s->link.burst = p->burst;
-	    schk_reset_credit(s);
-	}
-	dn_cfg.id++;
-	DN_BH_WUNLOCK();
-	return 0;
-}
-
-/*
- * configure a flowset. Can be called from inside with locked=1,
- */
-static struct dn_fsk *
-config_fs(struct dn_fs *nfs, struct dn_id *arg, int locked)
-{
-	int i;
-	struct dn_fsk *fs;
-
-	if (nfs->oid.len != sizeof(*nfs)) {
-		D("invalid flowset len %d", nfs->oid.len);
-		return NULL;
-	}
-	i = nfs->fs_nr;
-	if (i <= 0 || i >= 3*DN_MAX_ID)
-		return NULL;
-	ND("flowset %d", i);
-	/* XXX other sanity checks */
-        if (nfs->flags & DN_QSIZE_BYTES) {
-		ipdn_bound_var(&nfs->qsize, 16384,
-		    1500, dn_cfg.byte_limit, NULL); // "queue byte size");
-        } else {
-		ipdn_bound_var(&nfs->qsize, 50,
-		    1, dn_cfg.slot_limit, NULL); // "queue slot size");
-        }
-	if (nfs->flags & DN_HAVE_MASK) {
-		/* make sure we have some buckets */
-		ipdn_bound_var(&nfs->buckets, dn_cfg.hash_size,
-			1, dn_cfg.max_hash_size, "flowset buckets");
-	} else {
-		nfs->buckets = 1;	/* we only need 1 */
-	}
-	if (!locked)
-		DN_BH_WLOCK();
-	do { /* exit with break when done */
-	    struct dn_schk *s;
-	    int flags = nfs->sched_nr ? DNHT_INSERT : 0;
-	    int j;
-	    int oldc = dn_cfg.fsk_count;
-	    fs = dn_ht_find(dn_cfg.fshash, i, flags, NULL);
-	    if (fs == NULL) {
-		D("missing sched for flowset %d", i);
-	        break;
-	    }
-	    /* grab some defaults from the existing one */
-	    if (nfs->sched_nr == 0) /* reuse */
-		nfs->sched_nr = fs->fs.sched_nr;
-	    for (j = 0; j < sizeof(nfs->par)/sizeof(nfs->par[0]); j++) {
-		if (nfs->par[j] == -1) /* reuse */
-		    nfs->par[j] = fs->fs.par[j];
-	    }
-	    if (bcmp(&fs->fs, nfs, sizeof(*nfs)) == 0) {
-		ND("flowset %d unchanged", i);
-		break; /* no change, nothing to do */
-	    }
-	    if (oldc != dn_cfg.fsk_count)	/* new item */
-		dn_cfg.id++;
-	    s = locate_scheduler(nfs->sched_nr);
-	    /* detach from old scheduler if needed, preserving
-	     * queues if we need to reattach. Then update the
-	     * configuration, and possibly attach to the new sched.
-	     */
-	    DX(2, "fs %d changed sched %d@%p to %d@%p",
-		fs->fs.fs_nr,
-		fs->fs.sched_nr, fs->sched, nfs->sched_nr, s);
-	    if (fs->sched) {
-		int flags = s ? DN_DETACH : (DN_DETACH | DN_DESTROY);
-		flags |= DN_DESTROY; /* XXX temporary */
-		fsk_detach(fs, flags);
-	    }
-	    fs->fs = *nfs; /* copy configuration */
-	    if (s != NULL)
-		fsk_attach(fs, s);
-	} while (0);
-	if (!locked)
-		DN_BH_WUNLOCK();
-	return fs;
-}
-
-/*
- * config/reconfig a scheduler and its FIFO variant.
- * For !MULTIQUEUE schedulers, also set up the flowset.
- *
- * On reconfigurations (detected because s->fp is set),
- * detach existing flowsets preserving traffic, preserve link,
- * and delete the old scheduler creating a new one.
- */
-static int
-config_sched(struct dn_sch *_nsch, struct dn_id *arg)
-{
-	struct dn_schk *s;
-	struct schk_new_arg a; /* argument for schk_new */
-	int i;
-	struct dn_link p;	/* copy of oldlink */
-	struct dn_profile *pf = NULL;	/* copy of old link profile */
-	/* Used to preserv mask parameter */
-	struct ipfw_flow_id new_mask;
-	int new_buckets = 0;
-	int new_flags = 0;
-	int pipe_cmd;
-	int err = ENOMEM;
-
-	a.sch = _nsch;
-	if (a.sch->oid.len != sizeof(*a.sch)) {
-		D("bad sched len %d", a.sch->oid.len);
-		return EINVAL;
-	}
-	i = a.sch->sched_nr;
-	if (i <= 0 || i >= DN_MAX_ID)
-		return EINVAL;
-	/* make sure we have some buckets */
-	if (a.sch->flags & DN_HAVE_MASK)
-		ipdn_bound_var(&a.sch->buckets, dn_cfg.hash_size,
-			1, dn_cfg.max_hash_size, "sched buckets");
-	/* XXX other sanity checks */
-	bzero(&p, sizeof(p));
-
-	pipe_cmd = a.sch->flags & DN_PIPE_CMD;
-	a.sch->flags &= ~DN_PIPE_CMD; //XXX do it even if is not set?
-	if (pipe_cmd) {
-		/* Copy mask parameter */
-		new_mask = a.sch->sched_mask;
-		new_buckets = a.sch->buckets;
-		new_flags = a.sch->flags;
-	}
-	DN_BH_WLOCK();
-again: /* run twice, for wfq and fifo */
-	/*
-	 * lookup the type. If not supplied, use the previous one
-	 * or default to WF2Q+. Otherwise, return an error.
-	 */
-	dn_cfg.id++;
-	a.fp = find_sched_type(a.sch->oid.subtype, a.sch->name);
-	if (a.fp != NULL) {
-		/* found. Lookup or create entry */
-		s = dn_ht_find(dn_cfg.schedhash, i, DNHT_INSERT, &a);
-	} else if (a.sch->oid.subtype == 0 && !a.sch->name[0]) {
-		/* No type. search existing s* or retry with WF2Q+ */
-		s = dn_ht_find(dn_cfg.schedhash, i, 0, &a);
-		if (s != NULL) {
-			a.fp = s->fp;
-			/* Scheduler exists, skip to FIFO scheduler 
-			 * if command was pipe config...
-			 */
-			if (pipe_cmd)
-				goto next;
-		} else {
-			/* New scheduler, create a wf2q+ with no mask
-			 * if command was pipe config...
-			 */
-			if (pipe_cmd) {
-				/* clear mask parameter */
-				bzero(&a.sch->sched_mask, sizeof(new_mask));
-				a.sch->buckets = 0;
-				a.sch->flags &= ~DN_HAVE_MASK;
-			}
-			a.sch->oid.subtype = DN_SCHED_WF2QP;
-			goto again;
-		}
-	} else {
-		D("invalid scheduler type %d %s",
-			a.sch->oid.subtype, a.sch->name);
-		err = EINVAL;
-		goto error;
-	}
-	/* normalize name and subtype */
-	a.sch->oid.subtype = a.fp->type;
-	bzero(a.sch->name, sizeof(a.sch->name));
-	strlcpy(a.sch->name, a.fp->name, sizeof(a.sch->name));
-	if (s == NULL) {
-		D("cannot allocate scheduler %d", i);
-		goto error;
-	}
-	/* restore existing link if any */
-	if (p.link_nr) {
-		s->link = p;
-		if (!pf || pf->link_nr != p.link_nr) { /* no saved value */
-			s->profile = NULL; /* XXX maybe not needed */
-		} else {
-			s->profile = malloc(sizeof(struct dn_profile),
-					     M_DUMMYNET, M_NOWAIT | M_ZERO);
-			if (s->profile == NULL) {
-				D("cannot allocate profile");
-				goto error; //XXX
-			}
-			bcopy(pf, s->profile, sizeof(*pf));
-		}
-	}
-	p.link_nr = 0;
-	if (s->fp == NULL) {
-		DX(2, "sched %d new type %s", i, a.fp->name);
-	} else if (s->fp != a.fp ||
-			bcmp(a.sch, &s->sch, sizeof(*a.sch)) ) {
-		/* already existing. */
-		DX(2, "sched %d type changed from %s to %s",
-			i, s->fp->name, a.fp->name);
-		DX(4, "   type/sub %d/%d -> %d/%d",
-			s->sch.oid.type, s->sch.oid.subtype, 
-			a.sch->oid.type, a.sch->oid.subtype);
-		if (s->link.link_nr == 0)
-			D("XXX WARNING link 0 for sched %d", i);
-		p = s->link;	/* preserve link */
-		if (s->profile) {/* preserve profile */
-			if (!pf)
-				pf = malloc(sizeof(*pf),
-				    M_DUMMYNET, M_NOWAIT | M_ZERO);
-			if (pf)	/* XXX should issue a warning otherwise */
-				bcopy(s->profile, pf, sizeof(*pf));
-		}
-		/* remove from the hash */
-		dn_ht_find(dn_cfg.schedhash, i, DNHT_REMOVE, NULL);
-		/* Detach flowsets, preserve queues. */
-		// schk_delete_cb(s, NULL);
-		// XXX temporarily, kill queues
-		schk_delete_cb(s, (void *)DN_DESTROY);
-		goto again;
-	} else {
-		DX(4, "sched %d unchanged type %s", i, a.fp->name);
-	}
-	/* complete initialization */
-	s->sch = *a.sch;
-	s->fp = a.fp;
-	s->cfg = arg;
-	// XXX schk_reset_credit(s);
-	/* create the internal flowset if needed,
-	 * trying to reuse existing ones if available
-	 */
-	if (!(s->fp->flags & DN_MULTIQUEUE) && !s->fs) {
-	        s->fs = dn_ht_find(dn_cfg.fshash, i, 0, NULL);
-		if (!s->fs) {
-			struct dn_fs fs;
-			bzero(&fs, sizeof(fs));
-			set_oid(&fs.oid, DN_FS, sizeof(fs));
-			fs.fs_nr = i + DN_MAX_ID;
-			fs.sched_nr = i;
-			s->fs = config_fs(&fs, NULL, 1 /* locked */);
-		}
-		if (!s->fs) {
-			schk_delete_cb(s, (void *)DN_DESTROY);
-			D("error creating internal fs for %d", i);
-			goto error;
-		}
-	}
-	/* call init function after the flowset is created */
-	if (s->fp->config)
-		s->fp->config(s);
-	update_fs(s);
-next:
-	if (i < DN_MAX_ID) { /* now configure the FIFO instance */
-		i += DN_MAX_ID;
-		if (pipe_cmd) {
-			/* Restore mask parameter for FIFO */
-			a.sch->sched_mask = new_mask;
-			a.sch->buckets = new_buckets;
-			a.sch->flags = new_flags;
-		} else {
-			/* sched config shouldn't modify the FIFO scheduler */
-			if (dn_ht_find(dn_cfg.schedhash, i, 0, &a) != NULL) {
-				/* FIFO already exist, don't touch it */
-				err = 0; /* and this is not an error */
-				goto error;
-			}
-		}
-		a.sch->sched_nr = i;
-		a.sch->oid.subtype = DN_SCHED_FIFO;
-		bzero(a.sch->name, sizeof(a.sch->name));
-		goto again;
-	}
-	err = 0;
-error:
-	DN_BH_WUNLOCK();
-	if (pf)
-		free(pf, M_DUMMYNET);
-	return err;
-}
-
-/*
- * attach a profile to a link
- */
-static int
-config_profile(struct dn_profile *pf, struct dn_id *arg)
-{
-	struct dn_schk *s;
-	int i, olen, err = 0;
-
-	if (pf->oid.len < sizeof(*pf)) {
-		D("short profile len %d", pf->oid.len);
-		return EINVAL;
-	}
-	i = pf->link_nr;
-	if (i <= 0 || i >= DN_MAX_ID)
-		return EINVAL;
-	/* XXX other sanity checks */
-	DN_BH_WLOCK();
-	for (; i < 2*DN_MAX_ID; i += DN_MAX_ID) {
-		s = locate_scheduler(i);
-
-		if (s == NULL) {
-			err = EINVAL;
-			break;
-		}
-		dn_cfg.id++;
-		/*
-		 * If we had a profile and the new one does not fit,
-		 * or it is deleted, then we need to free memory.
-		 */
-		if (s->profile && (pf->samples_no == 0 ||
-		    s->profile->oid.len < pf->oid.len)) {
-			free(s->profile, M_DUMMYNET);
-			s->profile = NULL;
-		}
-		if (pf->samples_no == 0)
-			continue;
-		/*
-		 * new profile, possibly allocate memory
-		 * and copy data.
-		 */
-		if (s->profile == NULL)
-			s->profile = malloc(pf->oid.len,
-				    M_DUMMYNET, M_NOWAIT | M_ZERO);
-		if (s->profile == NULL) {
-			D("no memory for profile %d", i);
-			err = ENOMEM;
-			break;
-		}
-		/* preserve larger length XXX double check */
-		olen = s->profile->oid.len;
-		if (olen < pf->oid.len)
-			olen = pf->oid.len;
-		bcopy(pf, s->profile, pf->oid.len);
-		s->profile->oid.len = olen;
-	}
-	DN_BH_WUNLOCK();
-	return err;
-}
-
-/*
- * Delete all objects:
- */
-static void
-dummynet_flush(void)
-{
-
-	/* delete all schedulers and related links/queues/flowsets */
-	dn_ht_scan(dn_cfg.schedhash, schk_delete_cb,
-		(void *)(uintptr_t)DN_DELETE_FS);
-	/* delete all remaining (unlinked) flowsets */
-	DX(4, "still %d unlinked fs", dn_cfg.fsk_count);
-	dn_ht_free(dn_cfg.fshash, DNHT_REMOVE);
-	fsk_detach_list(&dn_cfg.fsu, DN_DELETE_FS);
-	/* Reinitialize system heap... */
-	heap_init(&dn_cfg.evheap, 16, offsetof(struct dn_id, id));
-}
-
-/*
- * Main handler for configuration. We are guaranteed to be called
- * with an oid which is at least a dn_id.
- * - the first object is the command (config, delete, flush, ...)
- * - config_link must be issued after the corresponding config_sched
- * - parameters (DN_TXT) for an object must preceed the object
- *   processed on a config_sched.
- */
-int
-do_config(void *p, int l)
-{
-	struct dn_id *next, *o;
-	int err = 0, err2 = 0;
-	struct dn_id *arg = NULL;
-	uintptr_t *a;
-
-	o = p;
-	if (o->id != DN_API_VERSION) {
-		D("invalid api version got %d need %d",
-			o->id, DN_API_VERSION);
-		return EINVAL;
-	}
-	for (; l >= sizeof(*o); o = next) {
-		struct dn_id *prev = arg;
-		if (o->len < sizeof(*o) || l < o->len) {
-			D("bad len o->len %d len %d", o->len, l);
-			err = EINVAL;
-			break;
-		}
-		l -= o->len;
-		next = (struct dn_id *)((char *)o + o->len);
-		err = 0;
-		switch (o->type) {
-		default:
-			D("cmd %d not implemented", o->type);
-			break;
-
-#ifdef EMULATE_SYSCTL
-		/* sysctl emulation.
-		 * if we recognize the command, jump to the correct
-		 * handler and return
-		 */
-		case DN_SYSCTL_SET:
-			err = kesysctl_emu_set(p, l);
-			return err;
-#endif
-
-		case DN_CMD_CONFIG: /* simply a header */
-			break;
-
-		case DN_CMD_DELETE:
-			/* the argument is in the first uintptr_t after o */
-			a = (uintptr_t *)(o+1);
-			if (o->len < sizeof(*o) + sizeof(*a)) {
-				err = EINVAL;
-				break;
-			}
-			switch (o->subtype) {
-			case DN_LINK:
-				/* delete base and derived schedulers */
-				DN_BH_WLOCK();
-				err = delete_schk(*a);
-				err2 = delete_schk(*a + DN_MAX_ID);
-				DN_BH_WUNLOCK();
-				if (!err)
-					err = err2;
-				break;
-
-			default:
-				D("invalid delete type %d",
-					o->subtype);
-				err = EINVAL;
-				break;
-
-			case DN_FS:
-				err = (*a <1 || *a >= DN_MAX_ID) ?
-					EINVAL : delete_fs(*a, 0) ;
-				break;
-			}
-			break;
-
-		case DN_CMD_FLUSH:
-			DN_BH_WLOCK();
-			dummynet_flush();
-			DN_BH_WUNLOCK();
-			break;
-		case DN_TEXT:	/* store argument the next block */
-			prev = NULL;
-			arg = o;
-			break;
-		case DN_LINK:
-			err = config_link((struct dn_link *)o, arg);
-			break;
-		case DN_PROFILE:
-			err = config_profile((struct dn_profile *)o, arg);
-			break;
-		case DN_SCH:
-			err = config_sched((struct dn_sch *)o, arg);
-			break;
-		case DN_FS:
-			err = (NULL==config_fs((struct dn_fs *)o, arg, 0));
-			break;
-		}
-		if (prev)
-			arg = NULL;
-		if (err != 0)
-			break;
-	}
-	return err;
-}
-
-static int
-compute_space(struct dn_id *cmd, struct copy_args *a)
-{
-	int x = 0, need = 0;
-	int profile_size = sizeof(struct dn_profile) - 
-		ED_MAX_SAMPLES_NO*sizeof(int);
-
-	/* NOTE about compute space:
-	 * NP 	= dn_cfg.schk_count
-	 * NSI 	= dn_cfg.si_count
-	 * NF 	= dn_cfg.fsk_count
-	 * NQ 	= dn_cfg.queue_count
-	 * - ipfw pipe show
-	 *   (NP/2)*(dn_link + dn_sch + dn_id + dn_fs) only half scheduler
-	 *                             link, scheduler template, flowset
-	 *                             integrated in scheduler and header
-	 *                             for flowset list
-	 *   (NSI)*(dn_flow) all scheduler instance (includes
-	 *                              the queue instance)
-	 * - ipfw sched show
-	 *   (NP/2)*(dn_link + dn_sch + dn_id + dn_fs) only half scheduler
-	 *                             link, scheduler template, flowset
-	 *                             integrated in scheduler and header
-	 *                             for flowset list
-	 *   (NSI * dn_flow) all scheduler instances
-	 *   (NF * sizeof(uint_32)) space for flowset list linked to scheduler
-	 *   (NQ * dn_queue) all queue [XXXfor now not listed]
-	 * - ipfw queue show
-	 *   (NF * dn_fs) all flowset
-	 *   (NQ * dn_queue) all queues
-	 */
-	switch (cmd->subtype) {
-	default:
-		return -1;
-	/* XXX where do LINK and SCH differ ? */
-	/* 'ipfw sched show' could list all queues associated to
-	 * a scheduler. This feature for now is disabled
-	 */
-	case DN_LINK:	/* pipe show */
-		x = DN_C_LINK | DN_C_SCH | DN_C_FLOW;
-		need += dn_cfg.schk_count *
-			(sizeof(struct dn_fs) + profile_size) / 2;
-		need += dn_cfg.fsk_count * sizeof(uint32_t);
-		break;
-	case DN_SCH:	/* sched show */
-		need += dn_cfg.schk_count *
-			(sizeof(struct dn_fs) + profile_size) / 2;
-		need += dn_cfg.fsk_count * sizeof(uint32_t);
-		x = DN_C_SCH | DN_C_LINK | DN_C_FLOW;
-		break;
-	case DN_FS:	/* queue show */
-		x = DN_C_FS | DN_C_QUEUE;
-		break;
-	case DN_GET_COMPAT:	/* compatibility mode */
-		need =  dn_compat_calc_size(); 
-		break;
-	}
-	a->flags = x;
-	if (x & DN_C_SCH) {
-		need += dn_cfg.schk_count * sizeof(struct dn_sch) / 2;
-		/* NOT also, each fs might be attached to a sched */
-		need += dn_cfg.schk_count * sizeof(struct dn_id) / 2;
-	}
-	if (x & DN_C_FS)
-		need += dn_cfg.fsk_count * sizeof(struct dn_fs);
-	if (x & DN_C_LINK) {
-		need += dn_cfg.schk_count * sizeof(struct dn_link) / 2;
-	}
-	/*
-	 * When exporting a queue to userland, only pass up the
-	 * struct dn_flow, which is the only visible part.
-	 */
-
-	if (x & DN_C_QUEUE)
-		need += dn_cfg.queue_count * sizeof(struct dn_flow);
-	if (x & DN_C_FLOW)
-		need += dn_cfg.si_count * (sizeof(struct dn_flow));
-	return need;
-}
-
-/*
- * If compat != NULL dummynet_get is called in compatibility mode.
- * *compat will be the pointer to the buffer to pass to ipfw
- */
-int
-dummynet_get(struct sockopt *sopt, void **compat)
-{
-	int have, i, need, error;
-	char *start = NULL, *buf;
-	size_t sopt_valsize;
-	struct dn_id *cmd;
-	struct copy_args a;
-	struct copy_range r;
-	int l = sizeof(struct dn_id);
-
-	bzero(&a, sizeof(a));
-	bzero(&r, sizeof(r));
-
-	/* save and restore original sopt_valsize around copyin */
-	sopt_valsize = sopt->sopt_valsize;
-
-	cmd = &r.o;
-
-	if (!compat) {
-		/* copy at least an oid, and possibly a full object */
-		error = sooptcopyin(sopt, cmd, sizeof(r), sizeof(*cmd));
-		sopt->sopt_valsize = sopt_valsize;
-		if (error)
-			goto done;
-		l = cmd->len;
-#ifdef EMULATE_SYSCTL
-		/* sysctl emulation. */
-		if (cmd->type == DN_SYSCTL_GET)
-			return kesysctl_emu_get(sopt);
-#endif
-		if (l > sizeof(r)) {
-			/* request larger than default, allocate buffer */
-			cmd = malloc(l,  M_DUMMYNET, M_WAITOK);
-			error = sooptcopyin(sopt, cmd, l, l);
-			sopt->sopt_valsize = sopt_valsize;
-			if (error)
-				goto done;
-		}
-	} else { /* compatibility */
-		error = 0;
-		cmd->type = DN_CMD_GET;
-		cmd->len = sizeof(struct dn_id);
-		cmd->subtype = DN_GET_COMPAT;
-		// cmd->id = sopt_valsize;
-		D("compatibility mode");
-	}
-	a.extra = (struct copy_range *)cmd;
-	if (cmd->len == sizeof(*cmd)) { /* no range, create a default */
-		uint32_t *rp = (uint32_t *)(cmd + 1);
-		cmd->len += 2* sizeof(uint32_t);
-		rp[0] = 1;
-		rp[1] = DN_MAX_ID - 1;
-		if (cmd->subtype == DN_LINK) {
-			rp[0] += DN_MAX_ID;
-			rp[1] += DN_MAX_ID;
-		}
-	}
-	/* Count space (under lock) and allocate (outside lock).
-	 * Exit with lock held if we manage to get enough buffer.
-	 * Try a few times then give up.
-	 */
-	for (have = 0, i = 0; i < 10; i++) {
-		DN_BH_WLOCK();
-		need = compute_space(cmd, &a);
-
-		/* if there is a range, ignore value from compute_space() */
-		if (l > sizeof(*cmd))
-			need = sopt_valsize - sizeof(*cmd);
-
-		if (need < 0) {
-			DN_BH_WUNLOCK();
-			error = EINVAL;
-			goto done;
-		}
-		need += sizeof(*cmd);
-		cmd->id = need;
-		if (have >= need)
-			break;
-
-		DN_BH_WUNLOCK();
-		if (start)
-			free(start, M_DUMMYNET);
-		start = NULL;
-		if (need > sopt_valsize)
-			break;
-
-		have = need;
-		start = malloc(have, M_DUMMYNET, M_WAITOK | M_ZERO);
-	}
-
-	if (start == NULL) {
-		if (compat) {
-			*compat = NULL;
-			error =  1; // XXX
-		} else {
-			error = sooptcopyout(sopt, cmd, sizeof(*cmd));
-		}
-		goto done;
-	}
-	ND("have %d:%d sched %d, %d:%d links %d, %d:%d flowsets %d, "
-		"%d:%d si %d, %d:%d queues %d",
-		dn_cfg.schk_count, sizeof(struct dn_sch), DN_SCH,
-		dn_cfg.schk_count, sizeof(struct dn_link), DN_LINK,
-		dn_cfg.fsk_count, sizeof(struct dn_fs), DN_FS,
-		dn_cfg.si_count, sizeof(struct dn_flow), DN_SCH_I,
-		dn_cfg.queue_count, sizeof(struct dn_queue), DN_QUEUE);
-	sopt->sopt_valsize = sopt_valsize;
-	a.type = cmd->subtype;
-
-	if (compat == NULL) {
-		bcopy(cmd, start, sizeof(*cmd));
-		((struct dn_id*)(start))->len = sizeof(struct dn_id);
-		buf = start + sizeof(*cmd);
-	} else
-		buf = start;
-	a.start = &buf;
-	a.end = start + have;
-	/* start copying other objects */
-	if (compat) {
-		a.type = DN_COMPAT_PIPE;
-		dn_ht_scan(dn_cfg.schedhash, copy_data_helper_compat, &a);
-		a.type = DN_COMPAT_QUEUE;
-		dn_ht_scan(dn_cfg.fshash, copy_data_helper_compat, &a);
-	} else if (a.type == DN_FS) {
-		dn_ht_scan(dn_cfg.fshash, copy_data_helper, &a);
-	} else {
-		dn_ht_scan(dn_cfg.schedhash, copy_data_helper, &a);
-	}
-	DN_BH_WUNLOCK();
-
-	if (compat) {
-		*compat = start;
-		sopt->sopt_valsize = buf - start;
-		/* free() is done by ip_dummynet_compat() */
-		start = NULL; //XXX hack
-	} else {
-		error = sooptcopyout(sopt, start, buf - start);
-	}
-done:
-	if (cmd && cmd != &r.o)
-		free(cmd, M_DUMMYNET);
-	if (start)
-		free(start, M_DUMMYNET);
-	return error;
-}
-
-/* Callback called on scheduler instance to delete it if idle */
-static int
-drain_scheduler_cb(void *_si, void *arg)
-{
-	struct dn_sch_inst *si = _si;
-
-	if ((si->kflags & DN_ACTIVE) || si->dline.mq.head != NULL)
-		return 0;
-
-	if (si->sched->fp->flags & DN_MULTIQUEUE) {
-		if (si->q_count == 0)
-			return si_destroy(si, NULL);
-		else
-			return 0;
-	} else { /* !DN_MULTIQUEUE */
-		if ((si+1)->ni.length == 0)
-			return si_destroy(si, NULL);
-		else
-			return 0;
-	}
-	return 0; /* unreachable */
-}
-
-/* Callback called on scheduler to check if it has instances */
-static int
-drain_scheduler_sch_cb(void *_s, void *arg)
-{
-	struct dn_schk *s = _s;
-
-	if (s->sch.flags & DN_HAVE_MASK) {
-		dn_ht_scan_bucket(s->siht, &s->drain_bucket,
-				drain_scheduler_cb, NULL);
-		s->drain_bucket++;
-	} else {
-		if (s->siht) {
-			if (drain_scheduler_cb(s->siht, NULL) == DNHT_SCAN_DEL)
-				s->siht = NULL;
-		}
-	}
-	return 0;
-}
-
-/* Called every tick, try to delete a 'bucket' of scheduler */
-void
-dn_drain_scheduler(void)
-{
-	dn_ht_scan_bucket(dn_cfg.schedhash, &dn_cfg.drain_sch,
-			   drain_scheduler_sch_cb, NULL);
-	dn_cfg.drain_sch++;
-}
-
-/* Callback called on queue to delete if it is idle */
-static int
-drain_queue_cb(void *_q, void *arg)
-{
-	struct dn_queue *q = _q;
-
-	if (q->ni.length == 0) {
-		dn_delete_queue(q, DN_DESTROY);
-		return DNHT_SCAN_DEL; /* queue is deleted */
-	}
-
-	return 0; /* queue isn't deleted */
-}
-
-/* Callback called on flowset used to check if it has queues */
-static int
-drain_queue_fs_cb(void *_fs, void *arg)
-{
-	struct dn_fsk *fs = _fs;
-
-	if (fs->fs.flags & DN_QHT_HASH) {
-		/* Flowset has a hash table for queues */
-		dn_ht_scan_bucket(fs->qht, &fs->drain_bucket,
-				drain_queue_cb, NULL);
-		fs->drain_bucket++;
-	} else {
-		/* No hash table for this flowset, null the pointer 
-		 * if the queue is deleted
-		 */
-		if (fs->qht) {
-			if (drain_queue_cb(fs->qht, NULL) == DNHT_SCAN_DEL)
-				fs->qht = NULL;
-		}
-	}
-	return 0;
-}
-
-/* Called every tick, try to delete a 'bucket' of queue */
-void
-dn_drain_queue(void)
-{
-	/* scan a bucket of flowset */
-	dn_ht_scan_bucket(dn_cfg.fshash, &dn_cfg.drain_fs,
-                               drain_queue_fs_cb, NULL);
-	dn_cfg.drain_fs++;
-}
-
-/*
- * Handler for the various dummynet socket options
- */
-static int
-ip_dn_ctl(struct sockopt *sopt)
-{
-	void *p = NULL;
-	int error, l;
-
-	error = priv_check(sopt->sopt_td, PRIV_NETINET_DUMMYNET);
-	if (error)
-		return (error);
-
-	/* Disallow sets in really-really secure mode. */
-	if (sopt->sopt_dir == SOPT_SET) {
-		error =  securelevel_ge(sopt->sopt_td->td_ucred, 3);
-		if (error)
-			return (error);
-	}
-
-	switch (sopt->sopt_name) {
-	default :
-		D("dummynet: unknown option %d", sopt->sopt_name);
-		error = EINVAL;
-		break;
-
-	case IP_DUMMYNET_FLUSH:
-	case IP_DUMMYNET_CONFIGURE:
-	case IP_DUMMYNET_DEL:	/* remove a pipe or queue */
-	case IP_DUMMYNET_GET:
-		D("dummynet: compat option %d", sopt->sopt_name);
-		error = ip_dummynet_compat(sopt);
-		break;
-
-	case IP_DUMMYNET3 :
-		if (sopt->sopt_dir == SOPT_GET) {
-			error = dummynet_get(sopt, NULL);
-			break;
-		}
-		l = sopt->sopt_valsize;
-		if (l < sizeof(struct dn_id) || l > 12000) {
-			D("argument len %d invalid", l);
-			break;
-		}
-		p = malloc(l, M_TEMP, M_WAITOK); // XXX can it fail ?
-		error = sooptcopyin(sopt, p, l, l);
-		if (error)
-			break ;
-		error = do_config(p, l);
-		break;
-	}
-
-	if (p != NULL)
-		free(p, M_TEMP);
-
-	return error ;
-}
-
-
-static void
-ip_dn_init(void)
-{
-	if (dn_cfg.init_done)
-		return;
-	printf("DUMMYNET %p with IPv6 initialized (100409)\n", curvnet);
-	dn_cfg.init_done = 1;
-	/* Set defaults here. MSVC does not accept initializers,
-	 * and this is also useful for vimages
-	 */
-	/* queue limits */
-	dn_cfg.slot_limit = 100; /* Foot shooting limit for queues. */
-	dn_cfg.byte_limit = 1024 * 1024;
-	dn_cfg.expire = 1;
-
-	/* RED parameters */
-	dn_cfg.red_lookup_depth = 256;	/* default lookup table depth */
-	dn_cfg.red_avg_pkt_size = 512;	/* default medium packet size */
-	dn_cfg.red_max_pkt_size = 1500;	/* default max packet size */
-
-	/* hash tables */
-	dn_cfg.max_hash_size = 65536;	/* max in the hash tables */
-	dn_cfg.hash_size = 64;		/* default hash size */
-
-	/* create hash tables for schedulers and flowsets.
-	 * In both we search by key and by pointer.
-	 */
-	dn_cfg.schedhash = dn_ht_init(NULL, dn_cfg.hash_size,
-		offsetof(struct dn_schk, schk_next),
-		schk_hash, schk_match, schk_new);
-	dn_cfg.fshash = dn_ht_init(NULL, dn_cfg.hash_size,
-		offsetof(struct dn_fsk, fsk_next),
-		fsk_hash, fsk_match, fsk_new);
-
-	/* bucket index to drain object */
-	dn_cfg.drain_fs = 0;
-	dn_cfg.drain_sch = 0;
-
-	heap_init(&dn_cfg.evheap, 16, offsetof(struct dn_id, id));
-	SLIST_INIT(&dn_cfg.fsu);
-	SLIST_INIT(&dn_cfg.schedlist);
-
-	DN_LOCK_INIT();
-
-	TASK_INIT(&dn_task, 0, dummynet_task, curvnet);
-	dn_tq = taskqueue_create("dummynet", M_WAITOK,
-	    taskqueue_thread_enqueue, &dn_tq);
-	taskqueue_start_threads(&dn_tq, 1, PI_NET, "dummynet");
-
-	callout_init(&dn_timeout, CALLOUT_MPSAFE);
-	callout_reset(&dn_timeout, 1, dummynet, NULL);
-
-	/* Initialize curr_time adjustment mechanics. */
-	getmicrouptime(&dn_cfg.prev_t);
-}
-
-static void
-ip_dn_destroy(int last)
-{
-	callout_drain(&dn_timeout);
-
-	DN_BH_WLOCK();
-	if (last) {
-		ND("removing last instance\n");
-		ip_dn_ctl_ptr = NULL;
-		ip_dn_io_ptr = NULL;
-	}
-
-	dummynet_flush();
-	DN_BH_WUNLOCK();
-	taskqueue_drain(dn_tq, &dn_task);
-	taskqueue_free(dn_tq);
-
-	dn_ht_free(dn_cfg.schedhash, 0);
-	dn_ht_free(dn_cfg.fshash, 0);
-	heap_free(&dn_cfg.evheap);
-
-	DN_LOCK_DESTROY();
-}
-
-static int
-dummynet_modevent(module_t mod, int type, void *data)
-{
-
-	if (type == MOD_LOAD) {
-		if (ip_dn_io_ptr) {
-			printf("DUMMYNET already loaded\n");
-			return EEXIST ;
-		}
-		ip_dn_init();
-		ip_dn_ctl_ptr = ip_dn_ctl;
-		ip_dn_io_ptr = dummynet_io;
-		return 0;
-	} else if (type == MOD_UNLOAD) {
-		ip_dn_destroy(1 /* last */);
-		return 0;
-	} else
-		return EOPNOTSUPP;
-}
-
-/* modevent helpers for the modules */
-static int
-load_dn_sched(struct dn_alg *d)
-{
-	struct dn_alg *s;
-
-	if (d == NULL)
-		return 1; /* error */
-	ip_dn_init();	/* just in case, we need the lock */
-
-	/* Check that mandatory funcs exists */
-	if (d->enqueue == NULL || d->dequeue == NULL) {
-		D("missing enqueue or dequeue for %s", d->name);
-		return 1;
-	}
-
-	/* Search if scheduler already exists */
-	DN_BH_WLOCK();
-	SLIST_FOREACH(s, &dn_cfg.schedlist, next) {
-		if (strcmp(s->name, d->name) == 0) {
-			D("%s already loaded", d->name);
-			break; /* scheduler already exists */
-		}
-	}
-	if (s == NULL)
-		SLIST_INSERT_HEAD(&dn_cfg.schedlist, d, next);
-	DN_BH_WUNLOCK();
-	D("dn_sched %s %sloaded", d->name, s ? "not ":"");
-	return s ? 1 : 0;
-}
-
-static int
-unload_dn_sched(struct dn_alg *s)
-{
-	struct dn_alg *tmp, *r;
-	int err = EINVAL;
-
-	ND("called for %s", s->name);
-
-	DN_BH_WLOCK();
-	SLIST_FOREACH_SAFE(r, &dn_cfg.schedlist, next, tmp) {
-		if (strcmp(s->name, r->name) != 0)
-			continue;
-		ND("ref_count = %d", r->ref_count);
-		err = (r->ref_count != 0) ? EBUSY : 0;
-		if (err == 0)
-			SLIST_REMOVE(&dn_cfg.schedlist, r, dn_alg, next);
-		break;
-	}
-	DN_BH_WUNLOCK();
-	D("dn_sched %s %sunloaded", s->name, err ? "not ":"");
-	return err;
-}
-
-int
-dn_sched_modevent(module_t mod, int cmd, void *arg)
-{
-	struct dn_alg *sch = arg;
-
-	if (cmd == MOD_LOAD)
-		return load_dn_sched(sch);
-	else if (cmd == MOD_UNLOAD)
-		return unload_dn_sched(sch);
-	else
-		return EINVAL;
-}
-
-static moduledata_t dummynet_mod = {
-	"dummynet", dummynet_modevent, NULL
-};
-
-#define	DN_SI_SUB	SI_SUB_PROTO_IFATTACHDOMAIN
-#define	DN_MODEV_ORD	(SI_ORDER_ANY - 128) /* after ipfw */
-DECLARE_MODULE(dummynet, dummynet_mod, DN_SI_SUB, DN_MODEV_ORD);
-MODULE_DEPEND(dummynet, ipfw, 2, 2, 2);
-MODULE_VERSION(dummynet, 3);
-
-/*
- * Starting up. Done in order after dummynet_modevent() has been called.
- * VNET_SYSINIT is also called for each existing vnet and each new vnet.
- */
-//VNET_SYSINIT(vnet_dn_init, DN_SI_SUB, DN_MODEV_ORD+2, ip_dn_init, NULL);
-
-/*
- * Shutdown handlers up shop. These are done in REVERSE ORDER, but still
- * after dummynet_modevent() has been called. Not called on reboot.
- * VNET_SYSUNINIT is also called for each exiting vnet as it exits.
- * or when the module is unloaded.
- */
-//VNET_SYSUNINIT(vnet_dn_uninit, DN_SI_SUB, DN_MODEV_ORD+2, ip_dn_destroy, NULL);
-
-/* end of file */
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw2.c b/freebsd/sys/netpfil/ipfw/ip_fw2.c
index 224ba937..a3a11819 100644
--- a/freebsd/sys/netpfil/ipfw/ip_fw2.c
+++ b/freebsd/sys/netpfil/ipfw/ip_fw2.c
@@ -36,7 +36,7 @@ __FBSDID("$FreeBSD$");
 #include <rtems/bsd/local/opt_ipdivert.h>
 #include <rtems/bsd/local/opt_inet.h>
 #ifndef INET
-#error IPFIREWALL requires INET.
+#error "IPFIREWALL requires INET"
 #endif /* INET */
 #include <rtems/bsd/local/opt_inet6.h>
 #include <rtems/bsd/local/opt_ipsec.h>
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
 #include <rtems/bsd/sys/param.h>
 #include <sys/systm.h>
 #include <sys/condvar.h>
+#include <sys/counter.h>
 #include <sys/eventhandler.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
@@ -54,6 +55,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
+#include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
@@ -61,11 +63,13 @@ __FBSDID("$FreeBSD$");
 #include <sys/ucred.h>
 #include <net/ethernet.h> /* for ETHERTYPE_IP */
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/route.h>
-#include <net/pf_mtag.h>
 #include <net/pfil.h>
 #include <net/vnet.h>
 
+#include <netpfil/pf/pf_mtag.h>
+
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_pcb.h>
@@ -82,7 +86,9 @@ __FBSDID("$FreeBSD$");
 
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
+#include <netinet/in_fib.h>
 #ifdef INET6
+#include <netinet6/in6_fib.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/ip6_var.h>
@@ -101,10 +107,6 @@ __FBSDID("$FreeBSD$");
  * All ipfw global variables are here.
  */
 
-/* ipfw_vnet_ready controls when we are open for business */
-static VNET_DEFINE(int, ipfw_vnet_ready) = 0;
-#define	V_ipfw_vnet_ready	VNET(ipfw_vnet_ready)
-
 static VNET_DEFINE(int, fw_deny_unknown_exthdrs);
 #define	V_fw_deny_unknown_exthdrs	VNET(fw_deny_unknown_exthdrs)
 
@@ -121,9 +123,20 @@ VNET_DEFINE(int, autoinc_step);
 VNET_DEFINE(int, fw_one_pass) = 1;
 
 VNET_DEFINE(unsigned int, fw_tables_max);
+VNET_DEFINE(unsigned int, fw_tables_sets) = 0;	/* Don't use set-aware tables */
 /* Use 128 tables by default */
 static unsigned int default_fw_tables = IPFW_TABLES_DEFAULT;
 
+#ifndef LINEAR_SKIPTO
+static int jump_fast(struct ip_fw_chain *chain, struct ip_fw *f, int num,
+    int tablearg, int jump_backwards);
+#define	JUMP(ch, f, num, targ, back)	jump_fast(ch, f, num, targ, back)
+#else
+static int jump_linear(struct ip_fw_chain *chain, struct ip_fw *f, int num,
+    int tablearg, int jump_backwards);
+#define	JUMP(ch, f, num, targ, back)	jump_linear(ch, f, num, targ, back)
+#endif
+
 /*
  * Each rule belongs to one of 32 different sets (0..31).
  * The variable set_disable contains one bit per set.
@@ -144,6 +157,9 @@ VNET_DEFINE(int, verbose_limit);
 /* layer3_chain contains the list of rules for layer 3 */
 VNET_DEFINE(struct ip_fw_chain, layer3_chain);
 
+/* ipfw_vnet_ready controls when we are open for business */
+VNET_DEFINE(int, ipfw_vnet_ready) = 0;
+
 VNET_DEFINE(int, ipfw_nat_ready) = 0;
 
 ipfw_nat_t *ipfw_nat_ptr = NULL;
@@ -156,45 +172,51 @@ ipfw_nat_cfg_t *ipfw_nat_get_log_ptr;
 #ifdef SYSCTL_NODE
 uint32_t dummy_def = IPFW_DEFAULT_RULE;
 static int sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS);
+static int sysctl_ipfw_tables_sets(SYSCTL_HANDLER_ARGS);
 
 SYSBEGIN(f3)
 
 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
-SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, one_pass,
-    CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_one_pass), 0,
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, one_pass,
+    CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_one_pass), 0,
     "Only do a single pass through ipfw when using dummynet(4)");
-SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step,
-    CTLFLAG_RW, &VNET_NAME(autoinc_step), 0,
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step,
+    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(autoinc_step), 0,
     "Rule number auto-increment step");
-SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, verbose,
-    CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_verbose), 0,
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose,
+    CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_verbose), 0,
     "Log matches to ipfw rules");
-SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit,
-    CTLFLAG_RW, &VNET_NAME(verbose_limit), 0,
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit,
+    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(verbose_limit), 0,
     "Set upper limit of matches of ipfw rules logged");
 SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, default_rule, CTLFLAG_RD,
     &dummy_def, 0,
     "The default/max possible rule number.");
-SYSCTL_VNET_PROC(_net_inet_ip_fw, OID_AUTO, tables_max,
-    CTLTYPE_UINT|CTLFLAG_RW, 0, 0, sysctl_ipfw_table_num, "IU",
-    "Maximum number of tables");
+SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, tables_max,
+    CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, 0, 0, sysctl_ipfw_table_num, "IU",
+    "Maximum number of concurrently used tables");
+SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, tables_sets,
+    CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW,
+    0, 0, sysctl_ipfw_tables_sets, "IU",
+    "Use per-set namespace for tables");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, default_to_accept, CTLFLAG_RDTUN,
     &default_to_accept, 0,
     "Make the default rule accept all packets.");
-TUNABLE_INT("net.inet.ip.fw.default_to_accept", &default_to_accept);
-TUNABLE_INT("net.inet.ip.fw.tables_max", &default_fw_tables);
-SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, static_count,
-    CTLFLAG_RD, &VNET_NAME(layer3_chain.n_rules), 0,
+TUNABLE_INT("net.inet.ip.fw.tables_max", (int *)&default_fw_tables);
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count,
+    CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(layer3_chain.n_rules), 0,
     "Number of static rules");
 
 #ifdef INET6
 SYSCTL_DECL(_net_inet6_ip6);
 SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
-SYSCTL_VNET_INT(_net_inet6_ip6_fw, OID_AUTO, deny_unknown_exthdrs,
-    CTLFLAG_RW | CTLFLAG_SECURE, &VNET_NAME(fw_deny_unknown_exthdrs), 0,
+SYSCTL_INT(_net_inet6_ip6_fw, OID_AUTO, deny_unknown_exthdrs,
+    CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE,
+    &VNET_NAME(fw_deny_unknown_exthdrs), 0,
     "Deny packets with unknown IPv6 Extension Headers");
-SYSCTL_VNET_INT(_net_inet6_ip6_fw, OID_AUTO, permit_single_frag6,
-    CTLFLAG_RW | CTLFLAG_SECURE, &VNET_NAME(fw_permit_single_frag6), 0,
+SYSCTL_INT(_net_inet6_ip6_fw, OID_AUTO, permit_single_frag6,
+    CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE,
+    &VNET_NAME(fw_permit_single_frag6), 0,
     "Permit single packet IPv6 fragments");
 #endif /* INET6 */
 
@@ -352,15 +374,18 @@ tcpopts_match(struct tcphdr *tcp, ipfw_insn *cmd)
 }
 
 static int
-iface_match(struct ifnet *ifp, ipfw_insn_if *cmd, struct ip_fw_chain *chain, uint32_t *tablearg)
+iface_match(struct ifnet *ifp, ipfw_insn_if *cmd, struct ip_fw_chain *chain,
+    uint32_t *tablearg)
 {
+
 	if (ifp == NULL)	/* no iface with this packet, match fails */
-		return 0;
+		return (0);
+
 	/* Check by name or by IP address */
 	if (cmd->name[0] != '\0') { /* match by name */
 		if (cmd->name[0] == '\1') /* use tablearg to match */
-			return ipfw_lookup_table_extended(chain, cmd->p.glob,
-				ifp->if_xname, tablearg, IPFW_TABLE_INTERFACE);
+			return ipfw_lookup_table_extended(chain, cmd->p.kidx, 0,
+				&ifp->if_index, tablearg);
 		/* Check name */
 		if (cmd->p.glob) {
 			if (fnmatch(cmd->name, ifp->if_xname, 0) == 0)
@@ -370,7 +395,7 @@ iface_match(struct ifnet *ifp, ipfw_insn_if *cmd, struct ip_fw_chain *chain, uin
 				return(1);
 		}
 	} else {
-#ifdef __FreeBSD__	/* and OSX too ? */
+#if !defined(USERSPACE) && defined(__FreeBSD__)	/* and OSX too ? */
 		struct ifaddr *ia;
 
 		if_addr_rlock(ifp);
@@ -413,50 +438,33 @@ iface_match(struct ifnet *ifp, ipfw_insn_if *cmd, struct ip_fw_chain *chain, uin
 static int
 verify_path(struct in_addr src, struct ifnet *ifp, u_int fib)
 {
-#ifndef __FreeBSD__
+#if defined(USERSPACE) || !defined(__FreeBSD__)
 	return 0;
 #else
-	struct route ro;
-	struct sockaddr_in *dst;
-
-	bzero(&ro, sizeof(ro));
-
-	dst = (struct sockaddr_in *)&(ro.ro_dst);
-	dst->sin_family = AF_INET;
-	dst->sin_len = sizeof(*dst);
-	dst->sin_addr = src;
-	in_rtalloc_ign(&ro, 0, fib);
+	struct nhop4_basic nh4;
 
-	if (ro.ro_rt == NULL)
-		return 0;
+	if (fib4_lookup_nh_basic(fib, src, NHR_IFAIF, 0, &nh4) != 0)
+		return (0);
 
 	/*
 	 * If ifp is provided, check for equality with rtentry.
 	 * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp,
 	 * in order to pass packets injected back by if_simloop():
-	 * if useloopback == 1 routing entry (via lo0) for our own address
+	 * routing entry (via lo0) for our own address
 	 * may exist, so we need to handle routing assymetry.
 	 */
-	if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) {
-		RTFREE(ro.ro_rt);
-		return 0;
-	}
+	if (ifp != NULL && ifp != nh4.nh_ifp)
+		return (0);
 
 	/* if no ifp provided, check if rtentry is not default route */
-	if (ifp == NULL &&
-	     satosin(rt_key(ro.ro_rt))->sin_addr.s_addr == INADDR_ANY) {
-		RTFREE(ro.ro_rt);
-		return 0;
-	}
+	if (ifp == NULL && (nh4.nh_flags & NHF_DEFAULT) != 0)
+		return (0);
 
 	/* or if this is a blackhole/reject route */
-	if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
-		RTFREE(ro.ro_rt);
-		return 0;
-	}
+	if (ifp == NULL && (nh4.nh_flags & (NHF_REJECT|NHF_BLACKHOLE)) != 0)
+		return (0);
 
 	/* found valid route */
-	RTFREE(ro.ro_rt);
 	return 1;
 #endif /* __FreeBSD__ */
 }
@@ -482,79 +490,62 @@ flow6id_match( int curr_flow, ipfw_insn_u32 *cmd )
 }
 
 /* support for IP6_*_ME opcodes */
+static const struct in6_addr lla_mask = {{{
+	0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+}}};
+
 static int
-search_ip6_addr_net (struct in6_addr * ip6_addr)
+ipfw_localip6(struct in6_addr *in6)
 {
-	struct ifnet *mdc;
-	struct ifaddr *mdc2;
-	struct in6_ifaddr *fdm;
-	struct in6_addr copia;
-
-	TAILQ_FOREACH(mdc, &V_ifnet, if_link) {
-		if_addr_rlock(mdc);
-		TAILQ_FOREACH(mdc2, &mdc->if_addrhead, ifa_link) {
-			if (mdc2->ifa_addr->sa_family == AF_INET6) {
-				fdm = (struct in6_ifaddr *)mdc2;
-				copia = fdm->ia_addr.sin6_addr;
-				/* need for leaving scope_id in the sock_addr */
-				in6_clearscope(&copia);
-				if (IN6_ARE_ADDR_EQUAL(ip6_addr, &copia)) {
-					if_addr_runlock(mdc);
-					return 1;
-				}
-			}
+	struct rm_priotracker in6_ifa_tracker;
+	struct in6_ifaddr *ia;
+
+	if (IN6_IS_ADDR_MULTICAST(in6))
+		return (0);
+
+	if (!IN6_IS_ADDR_LINKLOCAL(in6))
+		return (in6_localip(in6));
+
+	IN6_IFADDR_RLOCK(&in6_ifa_tracker);
+	TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
+		if (!IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr))
+			continue;
+		if (IN6_ARE_MASKED_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
+		    in6, &lla_mask)) {
+			IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
+			return (1);
 		}
-		if_addr_runlock(mdc);
 	}
-	return 0;
+	IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
+	return (0);
 }
 
 static int
 verify_path6(struct in6_addr *src, struct ifnet *ifp, u_int fib)
 {
-	struct route_in6 ro;
-	struct sockaddr_in6 *dst;
+	struct nhop6_basic nh6;
 
-	bzero(&ro, sizeof(ro));
-
-	dst = (struct sockaddr_in6 * )&(ro.ro_dst);
-	dst->sin6_family = AF_INET6;
-	dst->sin6_len = sizeof(*dst);
-	dst->sin6_addr = *src;
+	if (IN6_IS_SCOPE_LINKLOCAL(src))
+		return (1);
 
-	in6_rtalloc_ign(&ro, 0, fib);
-	if (ro.ro_rt == NULL)
-		return 0;
+	if (fib6_lookup_nh_basic(fib, src, 0, NHR_IFAIF, 0, &nh6) != 0)
+		return (0);
 
-	/* 
-	 * if ifp is provided, check for equality with rtentry
-	 * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp,
-	 * to support the case of sending packets to an address of our own.
-	 * (where the former interface is the first argument of if_simloop()
-	 *  (=ifp), the latter is lo0)
-	 */
-	if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) {
-		RTFREE(ro.ro_rt);
-		return 0;
-	}
+	/* If ifp is provided, check for equality with route table. */
+	if (ifp != NULL && ifp != nh6.nh_ifp)
+		return (0);
 
 	/* if no ifp provided, check if rtentry is not default route */
-	if (ifp == NULL &&
-	    IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(ro.ro_rt))->sin6_addr)) {
-		RTFREE(ro.ro_rt);
-		return 0;
-	}
+	if (ifp == NULL && (nh6.nh_flags & NHF_DEFAULT) != 0)
+		return (0);
 
 	/* or if this is a blackhole/reject route */
-	if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
-		RTFREE(ro.ro_rt);
-		return 0;
-	}
+	if (ifp == NULL && (nh6.nh_flags & (NHF_REJECT|NHF_BLACKHOLE)) != 0)
+		return (0);
 
 	/* found valid route */
-	RTFREE(ro.ro_rt);
 	return 1;
-
 }
 
 static int
@@ -632,8 +623,6 @@ send_reject(struct ip_fw_args *args, int code, int iplen, struct ip *ip)
 		m_adj(m, args->L3offset);
 #endif
 	if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */
-		/* We need the IP header in host order for icmp_error(). */
-		SET_HOST_IPLEN(ip);
 		icmp_error(args->m, ICMP_UNREACH, code, 0L, 0);
 	} else if (args->f_id.proto == IPPROTO_TCP) {
 		struct tcphdr *const tcp =
@@ -666,6 +655,9 @@ static int
 check_uidgid(ipfw_insn_u32 *insn, struct ip_fw_args *args, int *ugid_lookupp,
     struct ucred **uc)
 {
+#if defined(USERSPACE)
+	return 0;	// not supported in userspace
+#else
 #ifndef __FreeBSD__
 	/* XXX */
 	return cred_check(insn, proto, oif,
@@ -776,6 +768,7 @@ check_uidgid(ipfw_insn_u32 *insn, struct ip_fw_args *args, int *ugid_lookupp,
 #endif /* __rtems__ */
 	return (match);
 #endif /* __FreeBSD__ */
+#endif /* not supported in userspace */
 }
 
 /*
@@ -793,9 +786,10 @@ set_match(struct ip_fw_args *args, int slot,
 	args->rule.rulenum = chain->map[slot]->rulenum;
 }
 
+#ifndef LINEAR_SKIPTO
 /*
  * Helper function to enable cached rule lookups using
- * x_next and next_rule fields in ipfw rule.
+ * cached_id and cached_pos fields in ipfw rule.
  */
 static int
 jump_fast(struct ip_fw_chain *chain, struct ip_fw *f, int num,
@@ -803,28 +797,51 @@ jump_fast(struct ip_fw_chain *chain, struct ip_fw *f, int num,
 {
 	int f_pos;
 
-	/* If possible use cached f_pos (in f->next_rule),
-	 * whose version is written in f->next_rule
+	/* If possible use cached f_pos (in f->cached_pos),
+	 * whose version is written in f->cached_id
 	 * (horrible hacks to avoid changing the ABI).
 	 */
-	if (num != IP_FW_TABLEARG && (uintptr_t)f->x_next == chain->id)
-		f_pos = (uintptr_t)f->next_rule;
+	if (num != IP_FW_TARG && f->cached_id == chain->id)
+		f_pos = f->cached_pos;
 	else {
-		int i = IP_FW_ARG_TABLEARG(num);
+		int i = IP_FW_ARG_TABLEARG(chain, num, skipto);
 		/* make sure we do not jump backward */
 		if (jump_backwards == 0 && i <= f->rulenum)
 			i = f->rulenum + 1;
-		f_pos = ipfw_find_rule(chain, i, 0);
+		if (chain->idxmap != NULL)
+			f_pos = chain->idxmap[i];
+		else
+			f_pos = ipfw_find_rule(chain, i, 0);
 		/* update the cache */
-		if (num != IP_FW_TABLEARG) {
-			f->next_rule = (void *)(uintptr_t)f_pos;
-			f->x_next = (void *)(uintptr_t)chain->id;
+		if (num != IP_FW_TARG) {
+			f->cached_id = chain->id;
+			f->cached_pos = f_pos;
 		}
 	}
 
 	return (f_pos);
 }
+#else
+/*
+ * Helper function to enable real fast rule lookups.
+ */
+static int
+jump_linear(struct ip_fw_chain *chain, struct ip_fw *f, int num,
+    int tablearg, int jump_backwards)
+{
+	int f_pos;
+
+	num = IP_FW_ARG_TABLEARG(chain, num, skipto);
+	/* make sure we do not jump backward */
+	if (jump_backwards == 0 && num <= f->rulenum)
+		num = f->rulenum + 1;
+	f_pos = chain->idxmap[num];
+
+	return (f_pos);
+}
+#endif
 
+#define	TARG(k, f)	IP_FW_ARG_TABLEARG(chain, k, f)
 /*
  * The main check routine for the firewall.
  *
@@ -929,7 +946,7 @@ ipfw_chk(struct ip_fw_args *args)
 	 *	offset == 0 means that (if this is an IPv4 packet)
 	 *	this is the first or only fragment.
 	 *	For IPv6 offset|ip6f_mf == 0 means there is no Fragment Header
-	 *	or there is a single packet fragement (fragement header added
+	 *	or there is a single packet fragment (fragment header added
 	 *	without needed).  We will treat a single packet fragment as if
 	 *	there was no fragment header (or log/block depending on the
 	 *	V_fw_permit_single_frag6 sysctl setting).
@@ -964,6 +981,7 @@ ipfw_chk(struct ip_fw_args *args)
 	 *	MATCH_FORWARD or MATCH_REVERSE otherwise (q != NULL)
 	 */
 	int dyn_dir = MATCH_UNKNOWN;
+	uint16_t dyn_name = 0;
 	ipfw_dyn_rule *q = NULL;
 	struct ip_fw_chain *chain = &V_layer3_chain;
 
@@ -984,6 +1002,7 @@ ipfw_chk(struct ip_fw_args *args)
 	int is_ipv4 = 0;
 
 	int done = 0;		/* flag to exit the outer loop */
+	IPFW_RLOCK_TRACKER;
 
 	if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready))
 		return (IP_FW_PASS);	/* accept */
@@ -1249,9 +1268,9 @@ do {								\
 		args->f_id.dst_port = dst_port = ntohs(dst_port);
 	}
 
-	IPFW_RLOCK(chain);
+	IPFW_PF_RLOCK(chain);
 	if (! V_ipfw_vnet_ready) { /* shutting down, leave NOW. */
-		IPFW_RUNLOCK(chain);
+		IPFW_PF_RUNLOCK(chain);
 		return (IP_FW_PASS);	/* accept */
 	}
 	if (args->rule.slot) {
@@ -1471,9 +1490,10 @@ do {								\
 						proto != IPPROTO_UDP)
 					    break;
 					else if (v == 2)
-					    key = htonl(dst_port);
+					    key = dst_port;
 					else if (v == 3)
-					    key = htonl(src_port);
+					    key = src_port;
+#ifndef USERSPACE
 					else if (v == 4 || v == 5) {
 					    check_uidgid(
 						(ipfw_insn_u32 *)cmd,
@@ -1499,8 +1519,9 @@ do {								\
 					    else if (v == 5 /* O_JAIL */)
 						key = ucred_cache.xid;
 #endif /* !__FreeBSD__ */
-					    key = htonl(key);
-					} else
+					}
+#endif /* !USERSPACE */
+					else
 					    break;
 				    }
 				    match = ipfw_lookup_table(chain,
@@ -1517,8 +1538,9 @@ do {								\
 					void *pkey = (cmd->opcode == O_IP_DST_LOOKUP) ?
 						&args->f_id.dst_ip6: &args->f_id.src_ip6;
 					match = ipfw_lookup_table_extended(chain,
-							cmd->arg1, pkey, &v,
-							IPFW_TABLE_CIDR);
+							cmd->arg1,
+							sizeof(struct in6_addr),
+							pkey, &v);
 					if (cmdlen == F_INSN_SIZE(ipfw_insn_u32))
 						match = ((ipfw_insn_u32 *)cmd)->d[0] == v;
 					if (match)
@@ -1526,6 +1548,17 @@ do {								\
 				}
 				break;
 
+			case O_IP_FLOW_LOOKUP:
+				{
+					uint32_t v = 0;
+					match = ipfw_lookup_table_extended(chain,
+					    cmd->arg1, 0, &args->f_id, &v);
+					if (cmdlen == F_INSN_SIZE(ipfw_insn_u32))
+						match = ((ipfw_insn_u32 *)cmd)->d[0] == v;
+					if (match)
+						tablearg = v;
+				}
+				break;
 			case O_IP_SRC_MASK:
 			case O_IP_DST_MASK:
 				if (is_ipv4) {
@@ -1551,7 +1584,7 @@ do {								\
 #ifdef INET6
 				/* FALLTHROUGH */
 			case O_IP6_SRC_ME:
-				match= is_ipv6 && search_ip6_addr_net(&args->f_id.src_ip6);
+				match= is_ipv6 && ipfw_localip6(&args->f_id.src_ip6);
 #endif
 				break;
 
@@ -1590,7 +1623,7 @@ do {								\
 #ifdef INET6
 				/* FALLTHROUGH */
 			case O_IP6_DST_ME:
-				match= is_ipv6 && search_ip6_addr_net(&args->f_id.dst_ip6);
+				match= is_ipv6 && ipfw_localip6(&args->f_id.dst_ip6);
 #endif
 				break;
 
@@ -1697,7 +1730,7 @@ do {								\
 					break;
 
 				/* DSCP bitmask is stored as low_u32 high_u32 */
-				if (x > 32)
+				if (x >= 32)
 					match = *(p + 1) & (1 << (x - 32));
 				else
 					match = *p & (1 << x);
@@ -1732,9 +1765,11 @@ do {								\
 				break;
 
 			case O_TCPOPTS:
-				PULLUP_LEN(hlen, ulp, (TCP(ulp)->th_off << 2));
-				match = (proto == IPPROTO_TCP && offset == 0 &&
-				    tcpopts_match(TCP(ulp), cmd));
+				if (proto == IPPROTO_TCP && offset == 0 && ulp){
+					PULLUP_LEN(hlen, ulp,
+					    (TCP(ulp)->th_off << 2));
+					match = tcpopts_match(TCP(ulp), cmd);
+				}
 				break;
 
 			case O_TCPSEQ:
@@ -1778,27 +1813,37 @@ do {								\
 
 			case O_ALTQ: {
 				struct pf_mtag *at;
+				struct m_tag *mtag;
 				ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd;
 
+				/*
+				 * ALTQ uses mbuf tags from another
+				 * packet filtering system - pf(4).
+				 * We allocate a tag in its format
+				 * and fill it in, pretending to be pf(4).
+				 */
 				match = 1;
 				at = pf_find_mtag(m);
 				if (at != NULL && at->qid != 0)
 					break;
-				at = pf_get_mtag(m);
-				if (at == NULL) {
+				mtag = m_tag_get(PACKET_TAG_PF,
+				    sizeof(struct pf_mtag), M_NOWAIT | M_ZERO);
+				if (mtag == NULL) {
 					/*
 					 * Let the packet fall back to the
 					 * default ALTQ.
 					 */
 					break;
 				}
+				m_tag_prepend(m, mtag);
+				at = (struct pf_mtag *)(mtag + 1);
 				at->qid = altq->qid;
 				at->hdr = ip;
 				break;
 			}
 
 			case O_LOG:
-				ipfw_log(f, hlen, args, m,
+				ipfw_log(chain, f, hlen, args, m,
 				    oif, offset | ip6f_mf, tablearg, ip);
 				match = 1;
 				break;
@@ -1920,7 +1965,7 @@ do {								\
 
 			case O_TAG: {
 				struct m_tag *mtag;
-				uint32_t tag = IP_FW_ARG_TABLEARG(cmd->arg1);
+				uint32_t tag = TARG(cmd->arg1, tag);
 
 				/* Packet is already tagged with this tag? */
 				mtag = m_tag_locate(m, MTAG_IPFW, tag, NULL);
@@ -1954,6 +1999,7 @@ do {								\
 				break;
 
 			case O_SOCKARG:	{
+#ifndef USERSPACE	/* not supported in userspace */
 				struct inpcb *inp = args->inp;
 				struct inpcbinfo *pi;
 				
@@ -1972,7 +2018,7 @@ do {								\
 				 * certainly be inp_user_cookie?
 				 */
 
-				/* For incomming packet, lookup up the 
+				/* For incoming packet, lookup up the 
 				inpcb using the src/dest ip/port tuple */
 				if (inp == NULL) {
 					inp = in_pcblookup(pi, 
@@ -1994,12 +2040,13 @@ do {								\
 							match = 1;
 					}
 				}
+#endif /* !USERSPACE */
 				break;
 			}
 
 			case O_TAGGED: {
 				struct m_tag *mtag;
-				uint32_t tag = IP_FW_ARG_TABLEARG(cmd->arg1);
+				uint32_t tag = TARG(cmd->arg1, tag);
 
 				if (cmdlen == 1) {
 					match = m_tag_locate(m, MTAG_IPFW,
@@ -2070,7 +2117,7 @@ do {								\
 			 */
 			case O_LIMIT:
 			case O_KEEP_STATE:
-				if (ipfw_install_state(f,
+				if (ipfw_install_state(chain, f,
 				    (ipfw_insn_limit *)cmd, args, tablearg)) {
 					/* error or limit violation */
 					retval = IP_FW_DENY;
@@ -2085,17 +2132,35 @@ do {								\
 				/*
 				 * dynamic rules are checked at the first
 				 * keep-state or check-state occurrence,
-				 * with the result being stored in dyn_dir.
+				 * with the result being stored in dyn_dir
+				 * and dyn_name.
 				 * The compiler introduces a PROBE_STATE
 				 * instruction for us when we have a
 				 * KEEP_STATE (because PROBE_STATE needs
 				 * to be run first).
+				 *
+				 * (dyn_dir == MATCH_UNKNOWN) means this is
+				 * first lookup for such f_id. Do lookup.
+				 *
+				 * (dyn_dir != MATCH_UNKNOWN &&
+				 *  dyn_name != 0 && dyn_name != cmd->arg1)
+				 * means previous lookup didn't find dynamic
+				 * rule for specific state name and current
+				 * lookup will search rule with another state
+				 * name. Redo lookup.
+				 *
+				 * (dyn_dir != MATCH_UNKNOWN && dyn_name == 0)
+				 * means previous lookup was for `any' name
+				 * and it didn't find rule. No need to do
+				 * lookup again.
 				 */
-				if (dyn_dir == MATCH_UNKNOWN &&
+				if ((dyn_dir == MATCH_UNKNOWN ||
+				    (dyn_name != 0 &&
+				    dyn_name != cmd->arg1)) &&
 				    (q = ipfw_lookup_dyn_rule(&args->f_id,
 				     &dyn_dir, proto == IPPROTO_TCP ?
-					TCP(ulp) : NULL))
-					!= NULL) {
+				     TCP(ulp): NULL,
+				     (dyn_name = cmd->arg1))) != NULL) {
 					/*
 					 * Found dynamic entry, update stats
 					 * and jump to the 'action' part of
@@ -2137,7 +2202,7 @@ do {								\
 			case O_PIPE:
 			case O_QUEUE:
 				set_match(args, f_pos, chain);
-				args->rule.info = IP_FW_ARG_TABLEARG(cmd->arg1);
+				args->rule.info = TARG(cmd->arg1, pipe);
 				if (cmd->opcode == O_PIPE)
 					args->rule.info |= IPFW_IS_PIPE;
 				if (V_fw_one_pass)
@@ -2157,7 +2222,7 @@ do {								\
 				retval = (cmd->opcode == O_DIVERT) ?
 					IP_FW_DIVERT : IP_FW_TEE;
 				set_match(args, f_pos, chain);
-				args->rule.info = IP_FW_ARG_TABLEARG(cmd->arg1);
+				args->rule.info = TARG(cmd->arg1, divert);
 				break;
 
 			case O_COUNT:
@@ -2167,7 +2232,7 @@ do {								\
 
 			case O_SKIPTO:
 			    IPFW_INC_RULE_COUNTER(f, pktlen);
-			    f_pos = jump_fast(chain, f, cmd->arg1, tablearg, 0);
+			    f_pos = JUMP(chain, f, cmd->arg1, tablearg, 0);
 			    /*
 			     * Skip disabled rules, and re-enter
 			     * the inner loop with the correct
@@ -2256,7 +2321,7 @@ do {								\
 				if (IS_CALL) {
 					stack[mtag->m_tag_id] = f->rulenum;
 					mtag->m_tag_id++;
-			    		f_pos = jump_fast(chain, f, cmd->arg1,
+			    		f_pos = JUMP(chain, f, cmd->arg1,
 					    tablearg, 1);
 				} else {	/* `return' action */
 					mtag->m_tag_id--;
@@ -2328,13 +2393,48 @@ do {								\
 				if (q == NULL || q->rule != f ||
 				    dyn_dir == MATCH_FORWARD) {
 				    struct sockaddr_in *sa;
+
 				    sa = &(((ipfw_insn_sa *)cmd)->sa);
 				    if (sa->sin_addr.s_addr == INADDR_ANY) {
-					bcopy(sa, &args->hopstore,
-							sizeof(*sa));
-					args->hopstore.sin_addr.s_addr =
-						    htonl(tablearg);
-					args->next_hop = &args->hopstore;
+#ifdef INET6
+					/*
+					 * We use O_FORWARD_IP opcode for
+					 * fwd rule with tablearg, but tables
+					 * now support IPv6 addresses. And
+					 * when we are inspecting IPv6 packet,
+					 * we can use nh6 field from
+					 * table_value as next_hop6 address.
+					 */
+					if (is_ipv6) {
+						struct sockaddr_in6 *sa6;
+
+						sa6 = args->next_hop6 =
+						    &args->hopstore6;
+						sa6->sin6_family = AF_INET6;
+						sa6->sin6_len = sizeof(*sa6);
+						sa6->sin6_addr = TARG_VAL(
+						    chain, tablearg, nh6);
+						/*
+						 * Set sin6_scope_id only for
+						 * link-local unicast addresses.
+						 */
+						if (IN6_IS_ADDR_LINKLOCAL(
+						    &sa6->sin6_addr))
+							sa6->sin6_scope_id =
+							    TARG_VAL(chain,
+								tablearg,
+								zoneid);
+					} else
+#endif
+					{
+						sa = args->next_hop =
+						    &args->hopstore;
+						sa->sin_family = AF_INET;
+						sa->sin_len = sizeof(*sa);
+						sa->sin_addr.s_addr = htonl(
+						    TARG_VAL(chain, tablearg,
+						    nh4));
+					}
 				    } else {
 					args->next_hop = sa;
 				    }
@@ -2364,7 +2464,7 @@ do {								\
 			case O_NETGRAPH:
 			case O_NGTEE:
 				set_match(args, f_pos, chain);
-				args->rule.info = IP_FW_ARG_TABLEARG(cmd->arg1);
+				args->rule.info = TARG(cmd->arg1, netgraph);
 				if (V_fw_one_pass)
 					args->rule.info |= IPFW_ONEPASS;
 				retval = (cmd->opcode == O_NETGRAPH) ?
@@ -2377,7 +2477,7 @@ do {								\
 				uint32_t fib;
 
 				IPFW_INC_RULE_COUNTER(f, pktlen);
-				fib = IP_FW_ARG_TABLEARG(cmd->arg1);
+				fib = TARG(cmd->arg1, fib) & 0x7FFF;
 				if (fib >= rt_numfibs)
 					fib = 0;
 				M_SETFIB(m, fib);
@@ -2389,15 +2489,16 @@ do {								\
 			case O_SETDSCP: {
 				uint16_t code;
 
-				code = IP_FW_ARG_TABLEARG(cmd->arg1) & 0x3F;
+				code = TARG(cmd->arg1, dscp) & 0x3F;
 				l = 0;		/* exit inner loop */
 				if (is_ipv4) {
-					uint16_t a;
+					uint16_t old;
 
-					a = ip->ip_tos;
-					ip->ip_tos = (code << 2) | (ip->ip_tos & 0x03);
-					a += ntohs(ip->ip_sum) - ip->ip_tos;
-					ip->ip_sum = htons(a);
+					old = *(uint16_t *)ip;
+					ip->ip_tos = (code << 2) |
+					    (ip->ip_tos & 0x03);
+					ip->ip_sum = cksum_adjust(ip->ip_sum,
+					    old, *(uint16_t *)ip);
 				} else if (is_ipv6) {
 					uint8_t *v;
 
@@ -2425,20 +2526,20 @@ do {								\
 
 				set_match(args, f_pos, chain);
 				/* Check if this is 'global' nat rule */
-				if (cmd->arg1 == 0) {
+				if (cmd->arg1 == IP_FW_NAT44_GLOBAL) {
 					retval = ipfw_nat_ptr(args, NULL, m);
 					break;
 				}
 				t = ((ipfw_insn_nat *)cmd)->nat;
 				if (t == NULL) {
-					nat_id = IP_FW_ARG_TABLEARG(cmd->arg1);
+					nat_id = TARG(cmd->arg1, nat);
 					t = (*lookup_nat_ptr)(&chain->nat, nat_id);
 
 					if (t == NULL) {
 					    retval = IP_FW_DENY;
 					    break;
 					}
-					if (cmd->arg1 != IP_FW_TABLEARG)
+					if (cmd->arg1 != IP_FW_TARG)
 					    ((ipfw_insn_nat *)cmd)->nat = t;
 				}
 				retval = ipfw_nat_ptr(args, t, m);
@@ -2454,11 +2555,6 @@ do {								\
 				/* if not fragmented, go to next rule */
 				if ((ip_off & (IP_MF | IP_OFFMASK)) == 0)
 				    break;
-				/* 
-				 * ip_reass() expects len & off in host
-				 * byte order.
-				 */
-				SET_HOST_IPLEN(ip);
 
 				args->m = m = ip_reass(m);
 
@@ -2472,7 +2568,6 @@ do {								\
 
 				    ip = mtod(m, struct ip *);
 				    hlen = ip->ip_hl << 2;
-				    SET_NET_IPLEN(ip);
 				    ip->ip_sum = 0;
 				    if (hlen == sizeof(struct ip))
 					ip->ip_sum = in_cksum_hdr(ip);
@@ -2484,6 +2579,11 @@ do {								\
 				done = 1;	/* exit outer loop */
 				break;
 			}
+			case O_EXTERNAL_ACTION:
+				l = 0; /* in any case exit inner loop */
+				retval = ipfw_run_eaction(chain, args,
+				    cmd, &done);
+				break;
 
 			default:
 				panic("-- unknown opcode %d\n", cmd->opcode);
@@ -2521,7 +2621,7 @@ do {								\
 		retval = IP_FW_DENY;
 		printf("ipfw: ouch!, skip past end of rules, denying packet\n");
 	}
-	IPFW_RUNLOCK(chain);
+	IPFW_PF_RUNLOCK(chain);
 #ifdef __FreeBSD__
 	if (ucred_cache != NULL)
 		crfree(ucred_cache);
@@ -2553,7 +2653,27 @@ sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS)
 
 	return (ipfw_resize_tables(&V_layer3_chain, ntables));
 }
+
+/*
+ * Switches table namespace between global and per-set.
+ */
+static int
+sysctl_ipfw_tables_sets(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	unsigned int sets;
+
+	sets = V_fw_tables_sets;
+
+	error = sysctl_handle_int(oidp, &sets, 0, req);
+	/* Read operation or some error */
+	if ((error != 0) || (req->newptr == NULL))
+		return (error);
+
+	return (ipfw_switch_tables_namespace(&V_layer3_chain, sets));
+}
 #endif
+
 /*
  * Module and VNET glue
  */
@@ -2607,7 +2727,8 @@ ipfw_init(void)
 	if (default_fw_tables > IPFW_TABLES_MAX)
 	  default_fw_tables = IPFW_TABLES_MAX;
 
-	ipfw_log_bpf(1); /* init */
+	ipfw_init_sopt_handler();
+	ipfw_iface_init();
 	return (error);
 }
 
@@ -2619,7 +2740,8 @@ static void
 ipfw_destroy(void)
 {
 
-	ipfw_log_bpf(0); /* uninit */
+	ipfw_iface_destroy();
+	ipfw_destroy_sopt_handler();
 	printf("IP firewall unloaded\n");
 }
 #endif /* __rtems__ */
@@ -2631,12 +2753,14 @@ ipfw_destroy(void)
 static int
 vnet_ipfw_init(const void *unused)
 {
-	int error;
+	int error, first;
 	struct ip_fw *rule = NULL;
 	struct ip_fw_chain *chain;
 
 	chain = &V_layer3_chain;
 
+	first = IS_DEFAULT_VNET(curvnet) ? 1 : 0;
+
 	/* First set up some values that are compile time options */
 	V_autoinc_step = 100;	/* bounded to 1..1000 in add_rule() */
 	V_fw_deny_unknown_exthdrs = 1;
@@ -2650,16 +2774,19 @@ vnet_ipfw_init(const void *unused)
 	LIST_INIT(&chain->nat);
 #endif
 
+	/* Init shared services hash table */
+	ipfw_init_srv(chain);
+
+	ipfw_init_obj_rewriter();
+	ipfw_init_counters();
 	/* insert the default rule and create the initial map */
 	chain->n_rules = 1;
-	chain->static_len = sizeof(struct ip_fw);
 	chain->map = malloc(sizeof(struct ip_fw *), M_IPFW, M_WAITOK | M_ZERO);
-	if (chain->map)
-		rule = malloc(chain->static_len, M_IPFW, M_WAITOK | M_ZERO);
+	rule = ipfw_alloc_rule(chain, sizeof(struct ip_fw));
 
 	/* Set initial number of tables */
 	V_fw_tables_max = default_fw_tables;
-	error = ipfw_init_tables(chain);
+	error = ipfw_init_tables(chain, first);
 	if (error) {
 		printf("ipfw2: setting up tables failed\n");
 		free(chain->map, M_IPFW);
@@ -2676,18 +2803,24 @@ vnet_ipfw_init(const void *unused)
 	rule->cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY;
 	chain->default_rule = chain->map[0] = rule;
 	chain->id = rule->id = 1;
+	/* Pre-calculate rules length for legacy dump format */
+	chain->static_len = sizeof(struct ip_fw_rule0);
 
 	IPFW_LOCK_INIT(chain);
 	ipfw_dyn_init(chain);
+	ipfw_eaction_init(chain, first);
+#ifdef LINEAR_SKIPTO
+	ipfw_init_skipto_cache(chain);
+#endif
+	ipfw_bpf_init(first);
 
 	/* First set up some values that are compile time options */
 	V_ipfw_vnet_ready = 1;		/* Open for business */
 
 	/*
-	 * Hook the sockopt handler, and the layer2 (V_ip_fw_chk_ptr)
-	 * and pfil hooks for ipv4 and ipv6. Even if the latter two fail
-	 * we still keep the module alive because the sockopt and
-	 * layer2 paths are still useful.
+	 * Hook the sockopt handler and pfil hooks for ipv4 and ipv6.
+	 * Even if the latter two fail we still keep the module alive
+	 * because the sockopt and layer2 paths are still useful.
 	 * ipfw[6]_hook return 0 on success, ENOENT on failure,
 	 * so we can ignore the exact return value and just set a flag.
 	 *
@@ -2697,8 +2830,7 @@ vnet_ipfw_init(const void *unused)
 	 * In layer2 we have the same behaviour, except that V_ether_ipfw
 	 * is checked on each packet because there are no pfil hooks.
 	 */
-	V_ip_fw_ctl_ptr = ipfw_ctl;
-	V_ip_fw_chk_ptr = ipfw_chk;
+	V_ip_fw_ctl_ptr = ipfw_ctl3;
 	error = ipfw_attach_hooks(1);
 	return (error);
 }
@@ -2710,9 +2842,9 @@ vnet_ipfw_init(const void *unused)
 static int
 vnet_ipfw_uninit(const void *unused)
 {
-	struct ip_fw *reap, *rule;
+	struct ip_fw *reap;
 	struct ip_fw_chain *chain = &V_layer3_chain;
-	int i;
+	int i, last;
 
 	V_ipfw_vnet_ready = 0; /* tell new callers to go away */
 	/*
@@ -2721,33 +2853,39 @@ vnet_ipfw_uninit(const void *unused)
 	 * sure the update is propagated and nobody will be in.
 	 */
 	(void)ipfw_attach_hooks(0 /* detach */);
-	V_ip_fw_chk_ptr = NULL;
 	V_ip_fw_ctl_ptr = NULL;
+
+	last = IS_DEFAULT_VNET(curvnet) ? 1 : 0;
+
 	IPFW_UH_WLOCK(chain);
 	IPFW_UH_WUNLOCK(chain);
-	IPFW_UH_WLOCK(chain);
 
-	IPFW_WLOCK(chain);
 	ipfw_dyn_uninit(0);	/* run the callout_drain */
-	IPFW_WUNLOCK(chain);
 
-	ipfw_destroy_tables(chain);
+	IPFW_UH_WLOCK(chain);
+
 	reap = NULL;
 	IPFW_WLOCK(chain);
-	for (i = 0; i < chain->n_rules; i++) {
-		rule = chain->map[i];
-		rule->x_next = reap;
-		reap = rule;
-	}
-	if (chain->map)
-		free(chain->map, M_IPFW);
+	for (i = 0; i < chain->n_rules; i++)
+		ipfw_reap_add(chain, &reap, chain->map[i]);
+	free(chain->map, M_IPFW);
+#ifdef LINEAR_SKIPTO
+	ipfw_destroy_skipto_cache(chain);
+#endif
 	IPFW_WUNLOCK(chain);
 	IPFW_UH_WUNLOCK(chain);
+	ipfw_destroy_tables(chain, last);
+	ipfw_eaction_uninit(chain, last);
 	if (reap != NULL)
 		ipfw_reap_rules(reap);
+	vnet_ipfw_iface_destroy(chain);
+	ipfw_destroy_srv(chain);
 	IPFW_LOCK_DESTROY(chain);
 	ipfw_dyn_uninit(1);	/* free the remaining parts */
-	return 0;
+	ipfw_destroy_counters();
+	ipfw_destroy_obj_rewriter();
+	ipfw_bpf_uninit(last);
+	return (0);
 }
 #endif /* __rtems__ */
 
@@ -2793,13 +2931,14 @@ static moduledata_t ipfwmod = {
 };
 
 /* Define startup order. */
-#define	IPFW_SI_SUB_FIREWALL	SI_SUB_PROTO_IFATTACHDOMAIN
+#define	IPFW_SI_SUB_FIREWALL	SI_SUB_PROTO_FIREWALL
 #define	IPFW_MODEVENT_ORDER	(SI_ORDER_ANY - 255) /* On boot slot in here. */
 #define	IPFW_MODULE_ORDER	(IPFW_MODEVENT_ORDER + 1) /* A little later. */
 #define	IPFW_VNET_ORDER		(IPFW_MODEVENT_ORDER + 2) /* Later still. */
 
 DECLARE_MODULE(ipfw, ipfwmod, IPFW_SI_SUB_FIREWALL, IPFW_MODEVENT_ORDER);
-MODULE_VERSION(ipfw, 2);
+FEATURE(ipfw_ctl3, "ipfw new sockopt calls");
+MODULE_VERSION(ipfw, 3);
 /* should declare some dependencies here */
 
 /*
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_bpf.c b/freebsd/sys/netpfil/ipfw/ip_fw_bpf.c
new file mode 100644
index 00000000..3127809b
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_bpf.c
@@ -0,0 +1,211 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2016 Yandex LLC
+ * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/rmlock.h>
+#include <sys/socket.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_pflog.h>
+#include <net/if_var.h>
+#include <net/if_clone.h>
+#include <net/if_types.h>
+#include <net/vnet.h>
+#include <net/bpf.h>
+
+#include <netinet/in.h>
+#include <netinet/ip_fw.h>
+#include <netinet/ip_var.h>
+#include <netpfil/ipfw/ip_fw_private.h>
+
+static VNET_DEFINE(struct ifnet *, log_if);
+static VNET_DEFINE(struct ifnet *, pflog_if);
+static VNET_DEFINE(struct if_clone *, ipfw_cloner);
+static VNET_DEFINE(struct if_clone *, ipfwlog_cloner);
+#define	V_ipfw_cloner		VNET(ipfw_cloner)
+#define	V_ipfwlog_cloner	VNET(ipfwlog_cloner)
+#define	V_log_if		VNET(log_if)
+#define	V_pflog_if		VNET(pflog_if)
+
+static struct rmlock log_if_lock;
+#define	LOGIF_LOCK_INIT(x)	rm_init(&log_if_lock, "ipfw log_if lock")
+#define	LOGIF_LOCK_DESTROY(x)	rm_destroy(&log_if_lock)
+#define	LOGIF_RLOCK_TRACKER	struct rm_priotracker _log_tracker
+#define	LOGIF_RLOCK(x)		rm_rlock(&log_if_lock, &_log_tracker)
+#define	LOGIF_RUNLOCK(x)	rm_runlock(&log_if_lock, &_log_tracker)
+#define	LOGIF_WLOCK(x)		rm_wlock(&log_if_lock)
+#define	LOGIF_WUNLOCK(x)	rm_wunlock(&log_if_lock)
+
+static const char ipfwname[] = "ipfw";
+static const char ipfwlogname[] = "ipfwlog";
+
+static int
+ipfw_bpf_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
+{
+
+	return (EINVAL);
+}
+
+static int
+ipfw_bpf_output(struct ifnet *ifp, struct mbuf *m,
+	const struct sockaddr *dst, struct route *ro)
+{
+
+	if (m != NULL)
+		FREE_PKT(m);
+	return (0);
+}
+
+static void
+ipfw_clone_destroy(struct ifnet *ifp)
+{
+
+	LOGIF_WLOCK();
+	if (ifp->if_hdrlen == ETHER_HDR_LEN)
+		V_log_if = NULL;
+	else
+		V_pflog_if = NULL;
+	LOGIF_WUNLOCK();
+
+	bpfdetach(ifp);
+	if_detach(ifp);
+	if_free(ifp);
+}
+
+static int
+ipfw_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+	struct ifnet *ifp;
+
+	ifp = if_alloc(IFT_PFLOG);
+	if (ifp == NULL)
+		return (ENOSPC);
+	if_initname(ifp, ipfwname, unit);
+	ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST;
+	ifp->if_mtu = 65536;
+	ifp->if_ioctl = ipfw_bpf_ioctl;
+	ifp->if_output = ipfw_bpf_output;
+	ifp->if_hdrlen = ETHER_HDR_LEN;
+	if_attach(ifp);
+	bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
+	LOGIF_WLOCK();
+	if (V_log_if != NULL) {
+		LOGIF_WUNLOCK();
+		bpfdetach(ifp);
+		if_detach(ifp);
+		if_free(ifp);
+		return (EEXIST);
+	}
+	V_log_if = ifp;
+	LOGIF_WUNLOCK();
+	return (0);
+}
+
+static int
+ipfwlog_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+	struct ifnet *ifp;
+
+	ifp = if_alloc(IFT_PFLOG);
+	if (ifp == NULL)
+		return (ENOSPC);
+	if_initname(ifp, ipfwlogname, unit);
+	ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST;
+	ifp->if_mtu = 65536;
+	ifp->if_ioctl = ipfw_bpf_ioctl;
+	ifp->if_output = ipfw_bpf_output;
+	ifp->if_hdrlen = PFLOG_HDRLEN;
+	if_attach(ifp);
+	bpfattach(ifp, DLT_PFLOG, PFLOG_HDRLEN);
+	LOGIF_WLOCK();
+	if (V_pflog_if != NULL) {
+		LOGIF_WUNLOCK();
+		bpfdetach(ifp);
+		if_detach(ifp);
+		if_free(ifp);
+		return (EEXIST);
+	}
+	V_pflog_if = ifp;
+	LOGIF_WUNLOCK();
+	return (0);
+}
+
+void
+ipfw_bpf_mtap2(void *data, u_int dlen, struct mbuf *m)
+{
+	LOGIF_RLOCK_TRACKER;
+
+	LOGIF_RLOCK();
+	if (dlen == ETHER_HDR_LEN) {
+		if (V_log_if == NULL) {
+			LOGIF_RUNLOCK();
+			return;
+		}
+		BPF_MTAP2(V_log_if, data, dlen, m);
+	} else if (dlen == PFLOG_HDRLEN) {
+		if (V_pflog_if == NULL) {
+			LOGIF_RUNLOCK();
+			return;
+		}
+		BPF_MTAP2(V_pflog_if, data, dlen, m);
+	}
+	LOGIF_RUNLOCK();
+}
+
+void
+ipfw_bpf_init(int first)
+{
+
+	if (first) {
+		LOGIF_LOCK_INIT();
+		V_log_if = NULL;
+		V_pflog_if = NULL;
+	}
+	V_ipfw_cloner = if_clone_simple(ipfwname, ipfw_clone_create,
+	    ipfw_clone_destroy, 0);
+	V_ipfwlog_cloner = if_clone_simple(ipfwlogname, ipfwlog_clone_create,
+	    ipfw_clone_destroy, 0);
+}
+
+void
+ipfw_bpf_uninit(int last)
+{
+
+	if_clone_detach(V_ipfw_cloner);
+	if_clone_detach(V_ipfwlog_cloner);
+	if (last)
+		LOGIF_LOCK_DESTROY();
+}
+
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_dynamic.c b/freebsd/sys/netpfil/ipfw/ip_fw_dynamic.c
new file mode 100644
index 00000000..4696faac
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_dynamic.c
@@ -0,0 +1,1822 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#define        DEB(x)
+#define        DDB(x) x
+
+/*
+ * Dynamic rule support for ipfw
+ */
+
+#include <rtems/bsd/local/opt_ipfw.h>
+#include <rtems/bsd/local/opt_inet.h>
+#ifndef INET
+#error IPFIREWALL requires INET.
+#endif /* INET */
+#include <rtems/bsd/local/opt_inet6.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/rmlock.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <net/ethernet.h> /* for ETHERTYPE_IP */
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>	/* ip_defttl */
+#include <netinet/ip_fw.h>
+#include <netinet/tcp_var.h>
+#include <netinet/udp.h>
+
+#include <netinet/ip6.h>	/* IN6_ARE_ADDR_EQUAL */
+#ifdef INET6
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_var.h>
+#endif
+
+#include <netpfil/ipfw/ip_fw_private.h>
+
+#include <machine/in_cksum.h>	/* XXX for in_cksum */
+
+#ifdef MAC
+#include <security/mac/mac_framework.h>
+#endif
+
+/*
+ * Description of dynamic rules.
+ *
+ * Dynamic rules are stored in lists accessed through a hash table
+ * (ipfw_dyn_v) whose size is curr_dyn_buckets. This value can
+ * be modified through the sysctl variable dyn_buckets which is
+ * updated when the table becomes empty.
+ *
+ * XXX currently there is only one list, ipfw_dyn.
+ *
+ * When a packet is received, its address fields are first masked
+ * with the mask defined for the rule, then hashed, then matched
+ * against the entries in the corresponding list.
+ * Dynamic rules can be used for different purposes:
+ *  + stateful rules;
+ *  + enforcing limits on the number of sessions;
+ *  + in-kernel NAT (not implemented yet)
+ *
+ * The lifetime of dynamic rules is regulated by dyn_*_lifetime,
+ * measured in seconds and depending on the flags.
+ *
+ * The total number of dynamic rules is equal to UMA zone items count.
+ * The max number of dynamic rules is dyn_max. When we reach
+ * the maximum number of rules we do not create anymore. This is
+ * done to avoid consuming too much memory, but also too much
+ * time when searching on each packet (ideally, we should try instead
+ * to put a limit on the length of the list on each bucket...).
+ *
+ * Each dynamic rule holds a pointer to the parent ipfw rule so
+ * we know what action to perform. Dynamic rules are removed when
+ * the parent rule is deleted. This can be changed by dyn_keep_states
+ * sysctl.
+ *
+ * There are some limitations with dynamic rules -- we do not
+ * obey the 'randomized match', and we do not do multiple
+ * passes through the firewall. XXX check the latter!!!
+ */
+
+struct ipfw_dyn_bucket {
+	struct mtx	mtx;		/* Bucket protecting lock */
+	ipfw_dyn_rule	*head;		/* Pointer to first rule */
+};
+
+/*
+ * Static variables followed by global ones
+ */
+static VNET_DEFINE(struct ipfw_dyn_bucket *, ipfw_dyn_v);
+static VNET_DEFINE(u_int32_t, dyn_buckets_max);
+static VNET_DEFINE(u_int32_t, curr_dyn_buckets);
+static VNET_DEFINE(struct callout, ipfw_timeout);
+#define	V_ipfw_dyn_v			VNET(ipfw_dyn_v)
+#define	V_dyn_buckets_max		VNET(dyn_buckets_max)
+#define	V_curr_dyn_buckets		VNET(curr_dyn_buckets)
+#define V_ipfw_timeout                  VNET(ipfw_timeout)
+
+static VNET_DEFINE(uma_zone_t, ipfw_dyn_rule_zone);
+#define	V_ipfw_dyn_rule_zone		VNET(ipfw_dyn_rule_zone)
+
+#define	IPFW_BUCK_LOCK_INIT(b)	\
+	mtx_init(&(b)->mtx, "IPFW dynamic bucket", NULL, MTX_DEF)
+#define	IPFW_BUCK_LOCK_DESTROY(b)	\
+	mtx_destroy(&(b)->mtx)
+#define	IPFW_BUCK_LOCK(i)	mtx_lock(&V_ipfw_dyn_v[(i)].mtx)
+#define	IPFW_BUCK_UNLOCK(i)	mtx_unlock(&V_ipfw_dyn_v[(i)].mtx)
+#define	IPFW_BUCK_ASSERT(i)	mtx_assert(&V_ipfw_dyn_v[(i)].mtx, MA_OWNED)
+
+
+static VNET_DEFINE(int, dyn_keep_states);
+#define	V_dyn_keep_states		VNET(dyn_keep_states)
+
+/*
+ * Timeouts for various events in handing dynamic rules.
+ */
+static VNET_DEFINE(u_int32_t, dyn_ack_lifetime);
+static VNET_DEFINE(u_int32_t, dyn_syn_lifetime);
+static VNET_DEFINE(u_int32_t, dyn_fin_lifetime);
+static VNET_DEFINE(u_int32_t, dyn_rst_lifetime);
+static VNET_DEFINE(u_int32_t, dyn_udp_lifetime);
+static VNET_DEFINE(u_int32_t, dyn_short_lifetime);
+
+#define	V_dyn_ack_lifetime		VNET(dyn_ack_lifetime)
+#define	V_dyn_syn_lifetime		VNET(dyn_syn_lifetime)
+#define	V_dyn_fin_lifetime		VNET(dyn_fin_lifetime)
+#define	V_dyn_rst_lifetime		VNET(dyn_rst_lifetime)
+#define	V_dyn_udp_lifetime		VNET(dyn_udp_lifetime)
+#define	V_dyn_short_lifetime		VNET(dyn_short_lifetime)
+
+/*
+ * Keepalives are sent if dyn_keepalive is set. They are sent every
+ * dyn_keepalive_period seconds, in the last dyn_keepalive_interval
+ * seconds of lifetime of a rule.
+ * dyn_rst_lifetime and dyn_fin_lifetime should be strictly lower
+ * than dyn_keepalive_period.
+ */
+
+static VNET_DEFINE(u_int32_t, dyn_keepalive_interval);
+static VNET_DEFINE(u_int32_t, dyn_keepalive_period);
+static VNET_DEFINE(u_int32_t, dyn_keepalive);
+static VNET_DEFINE(time_t, dyn_keepalive_last);
+
+#define	V_dyn_keepalive_interval	VNET(dyn_keepalive_interval)
+#define	V_dyn_keepalive_period		VNET(dyn_keepalive_period)
+#define	V_dyn_keepalive			VNET(dyn_keepalive)
+#define	V_dyn_keepalive_last		VNET(dyn_keepalive_last)
+
+static VNET_DEFINE(u_int32_t, dyn_max);		/* max # of dynamic rules */
+
+#define	DYN_COUNT			uma_zone_get_cur(V_ipfw_dyn_rule_zone)
+#define	V_dyn_max			VNET(dyn_max)
+
+/* for userspace, we emulate the uma_zone_counter with ipfw_dyn_count */
+static int ipfw_dyn_count;	/* number of objects */
+
+#ifdef USERSPACE /* emulation of UMA object counters for userspace */
+#define uma_zone_get_cur(x)	ipfw_dyn_count
+#endif /* USERSPACE */
+
+static int last_log;	/* Log ratelimiting */
+
+static void ipfw_dyn_tick(void *vnetx);
+static void check_dyn_rules(struct ip_fw_chain *, ipfw_range_tlv *, int, int);
+#ifdef SYSCTL_NODE
+
+static int sysctl_ipfw_dyn_count(SYSCTL_HANDLER_ARGS);
+static int sysctl_ipfw_dyn_max(SYSCTL_HANDLER_ARGS);
+
+SYSBEGIN(f2)
+
+SYSCTL_DECL(_net_inet_ip_fw);
+SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_buckets,
+    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_buckets_max), 0,
+    "Max number of dyn. buckets");
+SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets,
+    CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(curr_dyn_buckets), 0,
+    "Current Number of dyn. buckets");
+SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_count,
+    CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RD, 0, 0, sysctl_ipfw_dyn_count, "IU",
+    "Number of dyn. rules");
+SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_max,
+    CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, 0, 0, sysctl_ipfw_dyn_max, "IU",
+    "Max number of dyn. rules");
+SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime,
+    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_ack_lifetime), 0,
+    "Lifetime of dyn. rules for acks");
+SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime,
+    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_syn_lifetime), 0,
+    "Lifetime of dyn. rules for syn");
+SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime,
+    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_fin_lifetime), 0,
+    "Lifetime of dyn. rules for fin");
+SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime,
+    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_rst_lifetime), 0,
+    "Lifetime of dyn. rules for rst");
+SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime,
+    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_udp_lifetime), 0,
+    "Lifetime of dyn. rules for UDP");
+SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime,
+    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_short_lifetime), 0,
+    "Lifetime of dyn. rules for other situations");
+SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive,
+    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_keepalive), 0,
+    "Enable keepalives for dyn. rules");
+SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_keep_states,
+    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_keep_states), 0,
+    "Do not flush dynamic states on rule deletion");
+
+SYSEND
+
+#endif /* SYSCTL_NODE */
+
+
+#ifdef INET6
+static __inline int
+hash_packet6(struct ipfw_flow_id *id)
+{
+	u_int32_t i;
+	i = (id->dst_ip6.__u6_addr.__u6_addr32[2]) ^
+	    (id->dst_ip6.__u6_addr.__u6_addr32[3]) ^
+	    (id->src_ip6.__u6_addr.__u6_addr32[2]) ^
+	    (id->src_ip6.__u6_addr.__u6_addr32[3]) ^
+	    (id->dst_port) ^ (id->src_port);
+	return i;
+}
+#endif
+
+/*
+ * IMPORTANT: the hash function for dynamic rules must be commutative
+ * in source and destination (ip,port), because rules are bidirectional
+ * and we want to find both in the same bucket.
+ */
+static __inline int
+hash_packet(struct ipfw_flow_id *id, int buckets)
+{
+	u_int32_t i;
+
+#ifdef INET6
+	if (IS_IP6_FLOW_ID(id)) 
+		i = hash_packet6(id);
+	else
+#endif /* INET6 */
+	i = (id->dst_ip) ^ (id->src_ip) ^ (id->dst_port) ^ (id->src_port);
+	i &= (buckets - 1);
+	return i;
+}
+
+#if 0
+#define	DYN_DEBUG(fmt, ...)	do {			\
+	printf("%s: " fmt "\n", __func__, __VA_ARGS__);	\
+} while (0)
+#else
+#define	DYN_DEBUG(fmt, ...)
+#endif
+
+static char *default_state_name = "default";
+struct dyn_state_obj {
+	struct named_object	no;
+	char			name[64];
+};
+
+#define	DYN_STATE_OBJ(ch, cmd)	\
+    ((struct dyn_state_obj *)SRV_OBJECT(ch, (cmd)->arg1))
+/*
+ * Classifier callback.
+ * Return 0 if opcode contains object that should be referenced
+ * or rewritten.
+ */
+static int
+dyn_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
+{
+
+	DYN_DEBUG("opcode %d, arg1 %d", cmd->opcode, cmd->arg1);
+	/* Don't rewrite "check-state any" */
+	if (cmd->arg1 == 0 &&
+	    cmd->opcode == O_CHECK_STATE)
+		return (1);
+
+	*puidx = cmd->arg1;
+	*ptype = 0;
+	return (0);
+}
+
+static void
+dyn_update(ipfw_insn *cmd, uint16_t idx)
+{
+
+	cmd->arg1 = idx;
+	DYN_DEBUG("opcode %d, arg1 %d", cmd->opcode, cmd->arg1);
+}
+
+static int
+dyn_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
+    struct named_object **pno)
+{
+	ipfw_obj_ntlv *ntlv;
+	const char *name;
+
+	DYN_DEBUG("uidx %d", ti->uidx);
+	if (ti->uidx != 0) {
+		if (ti->tlvs == NULL)
+			return (EINVAL);
+		/* Search ntlv in the buffer provided by user */
+		ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx,
+		    IPFW_TLV_STATE_NAME);
+		if (ntlv == NULL)
+			return (EINVAL);
+		name = ntlv->name;
+	} else
+		name = default_state_name;
+	/*
+	 * Search named object with corresponding name.
+	 * Since states objects are global - ignore the set value
+	 * and use zero instead.
+	 */
+	*pno = ipfw_objhash_lookup_name_type(CHAIN_TO_SRV(ch), 0,
+	    IPFW_TLV_STATE_NAME, name);
+	/*
+	 * We always return success here.
+	 * The caller will check *pno and mark object as unresolved,
+	 * then it will automatically create "default" object.
+	 */
+	return (0);
+}
+
+static struct named_object *
+dyn_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
+{
+
+	DYN_DEBUG("kidx %d", idx);
+	return (ipfw_objhash_lookup_kidx(CHAIN_TO_SRV(ch), idx));
+}
+
+static int
+dyn_create(struct ip_fw_chain *ch, struct tid_info *ti,
+    uint16_t *pkidx)
+{
+	struct namedobj_instance *ni;
+	struct dyn_state_obj *obj;
+	struct named_object *no;
+	ipfw_obj_ntlv *ntlv;
+	char *name;
+
+	DYN_DEBUG("uidx %d", ti->uidx);
+	if (ti->uidx != 0) {
+		if (ti->tlvs == NULL)
+			return (EINVAL);
+		ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx,
+		    IPFW_TLV_STATE_NAME);
+		if (ntlv == NULL)
+			return (EINVAL);
+		name = ntlv->name;
+	} else
+		name = default_state_name;
+
+	ni = CHAIN_TO_SRV(ch);
+	obj = malloc(sizeof(*obj), M_IPFW, M_WAITOK | M_ZERO);
+	obj->no.name = obj->name;
+	obj->no.etlv = IPFW_TLV_STATE_NAME;
+	strlcpy(obj->name, name, sizeof(obj->name));
+
+	IPFW_UH_WLOCK(ch);
+	no = ipfw_objhash_lookup_name_type(ni, 0,
+	    IPFW_TLV_STATE_NAME, name);
+	if (no != NULL) {
+		/*
+		 * Object is already created.
+		 * Just return its kidx and bump refcount.
+		 */
+		*pkidx = no->kidx;
+		no->refcnt++;
+		IPFW_UH_WUNLOCK(ch);
+		free(obj, M_IPFW);
+		DYN_DEBUG("\tfound kidx %d", *pkidx);
+		return (0);
+	}
+	if (ipfw_objhash_alloc_idx(ni, &obj->no.kidx) != 0) {
+		DYN_DEBUG("\talloc_idx failed for %s", name);
+		IPFW_UH_WUNLOCK(ch);
+		free(obj, M_IPFW);
+		return (ENOSPC);
+	}
+	ipfw_objhash_add(ni, &obj->no);
+	IPFW_WLOCK(ch);
+	SRV_OBJECT(ch, obj->no.kidx) = obj;
+	IPFW_WUNLOCK(ch);
+	obj->no.refcnt++;
+	*pkidx = obj->no.kidx;
+	IPFW_UH_WUNLOCK(ch);
+	DYN_DEBUG("\tcreated kidx %d", *pkidx);
+	return (0);
+}
+
+static void
+dyn_destroy(struct ip_fw_chain *ch, struct named_object *no)
+{
+	struct dyn_state_obj *obj;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+
+	KASSERT(no->refcnt == 1,
+	    ("Destroying object '%s' (type %u, idx %u) with refcnt %u",
+	    no->name, no->etlv, no->kidx, no->refcnt));
+
+	DYN_DEBUG("kidx %d", no->kidx);
+	IPFW_WLOCK(ch);
+	obj = SRV_OBJECT(ch, no->kidx);
+	SRV_OBJECT(ch, no->kidx) = NULL;
+	IPFW_WUNLOCK(ch);
+	ipfw_objhash_del(CHAIN_TO_SRV(ch), no);
+	ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), no->kidx);
+
+	free(obj, M_IPFW);
+}
+
+static struct opcode_obj_rewrite dyn_opcodes[] = {
+	{
+		O_KEEP_STATE, IPFW_TLV_STATE_NAME,
+		dyn_classify, dyn_update,
+		dyn_findbyname, dyn_findbykidx,
+		dyn_create, dyn_destroy
+	},
+	{
+		O_CHECK_STATE, IPFW_TLV_STATE_NAME,
+		dyn_classify, dyn_update,
+		dyn_findbyname, dyn_findbykidx,
+		dyn_create, dyn_destroy
+	},
+	{
+		O_PROBE_STATE, IPFW_TLV_STATE_NAME,
+		dyn_classify, dyn_update,
+		dyn_findbyname, dyn_findbykidx,
+		dyn_create, dyn_destroy
+	},
+	{
+		O_LIMIT, IPFW_TLV_STATE_NAME,
+		dyn_classify, dyn_update,
+		dyn_findbyname, dyn_findbykidx,
+		dyn_create, dyn_destroy
+	},
+};
+/**
+ * Print customizable flow id description via log(9) facility.
+ */
+static void
+print_dyn_rule_flags(struct ipfw_flow_id *id, int dyn_type, int log_flags,
+    char *prefix, char *postfix)
+{
+	struct in_addr da;
+#ifdef INET6
+	char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN];
+#else
+	char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN];
+#endif
+
+#ifdef INET6
+	if (IS_IP6_FLOW_ID(id)) {
+		ip6_sprintf(src, &id->src_ip6);
+		ip6_sprintf(dst, &id->dst_ip6);
+	} else
+#endif
+	{
+		da.s_addr = htonl(id->src_ip);
+		inet_ntop(AF_INET, &da, src, sizeof(src));
+		da.s_addr = htonl(id->dst_ip);
+		inet_ntop(AF_INET, &da, dst, sizeof(dst));
+	}
+	log(log_flags, "ipfw: %s type %d %s %d -> %s %d, %d %s\n",
+	    prefix, dyn_type, src, id->src_port, dst,
+	    id->dst_port, DYN_COUNT, postfix);
+}
+
+#define	print_dyn_rule(id, dtype, prefix, postfix)	\
+	print_dyn_rule_flags(id, dtype, LOG_DEBUG, prefix, postfix)
+
+#define TIME_LEQ(a,b)       ((int)((a)-(b)) <= 0)
+#define TIME_LE(a,b)       ((int)((a)-(b)) < 0)
+
+static void
+dyn_update_proto_state(ipfw_dyn_rule *q, const struct ipfw_flow_id *id,
+    const struct tcphdr *tcp, int dir)
+{
+	uint32_t ack;
+	u_char flags;
+
+	if (id->proto == IPPROTO_TCP) {
+		flags = id->_flags & (TH_FIN | TH_SYN | TH_RST);
+#define BOTH_SYN	(TH_SYN | (TH_SYN << 8))
+#define BOTH_FIN	(TH_FIN | (TH_FIN << 8))
+#define	TCP_FLAGS	(TH_FLAGS | (TH_FLAGS << 8))
+#define	ACK_FWD		0x10000			/* fwd ack seen */
+#define	ACK_REV		0x20000			/* rev ack seen */
+
+		q->state |= (dir == MATCH_FORWARD) ? flags : (flags << 8);
+		switch (q->state & TCP_FLAGS) {
+		case TH_SYN:			/* opening */
+			q->expire = time_uptime + V_dyn_syn_lifetime;
+			break;
+
+		case BOTH_SYN:			/* move to established */
+		case BOTH_SYN | TH_FIN:		/* one side tries to close */
+		case BOTH_SYN | (TH_FIN << 8):
+#define _SEQ_GE(a,b) ((int)(a) - (int)(b) >= 0)
+			if (tcp == NULL)
+				break;
+
+			ack = ntohl(tcp->th_ack);
+			if (dir == MATCH_FORWARD) {
+				if (q->ack_fwd == 0 ||
+				    _SEQ_GE(ack, q->ack_fwd)) {
+					q->ack_fwd = ack;
+					q->state |= ACK_FWD;
+				}
+			} else {
+				if (q->ack_rev == 0 ||
+				    _SEQ_GE(ack, q->ack_rev)) {
+					q->ack_rev = ack;
+					q->state |= ACK_REV;
+				}
+			}
+			if ((q->state & (ACK_FWD | ACK_REV)) ==
+			    (ACK_FWD | ACK_REV)) {
+				q->expire = time_uptime + V_dyn_ack_lifetime;
+				q->state &= ~(ACK_FWD | ACK_REV);
+			}
+			break;
+
+		case BOTH_SYN | BOTH_FIN:	/* both sides closed */
+			if (V_dyn_fin_lifetime >= V_dyn_keepalive_period)
+				V_dyn_fin_lifetime =
+				    V_dyn_keepalive_period - 1;
+			q->expire = time_uptime + V_dyn_fin_lifetime;
+			break;
+
+		default:
+#if 0
+			/*
+			 * reset or some invalid combination, but can also
+			 * occur if we use keep-state the wrong way.
+			 */
+			if ( (q->state & ((TH_RST << 8)|TH_RST)) == 0)
+				printf("invalid state: 0x%x\n", q->state);
+#endif
+			if (V_dyn_rst_lifetime >= V_dyn_keepalive_period)
+				V_dyn_rst_lifetime =
+				    V_dyn_keepalive_period - 1;
+			q->expire = time_uptime + V_dyn_rst_lifetime;
+			break;
+		}
+	} else if (id->proto == IPPROTO_UDP) {
+		q->expire = time_uptime + V_dyn_udp_lifetime;
+	} else {
+		/* other protocols */
+		q->expire = time_uptime + V_dyn_short_lifetime;
+	}
+}
+
+/*
+ * Lookup a dynamic rule, locked version.
+ */
+static ipfw_dyn_rule *
+lookup_dyn_rule_locked(struct ipfw_flow_id *pkt, int i, int *match_direction,
+    struct tcphdr *tcp, uint16_t kidx)
+{
+	/*
+	 * Stateful ipfw extensions.
+	 * Lookup into dynamic session queue.
+	 */
+	ipfw_dyn_rule *prev, *q = NULL;
+	int dir;
+
+	IPFW_BUCK_ASSERT(i);
+
+	dir = MATCH_NONE;
+	for (prev = NULL, q = V_ipfw_dyn_v[i].head; q; prev = q, q = q->next) {
+		if (q->dyn_type == O_LIMIT_PARENT)
+			continue;
+
+		if (pkt->proto != q->id.proto)
+			continue;
+
+		if (kidx != 0 && kidx != q->kidx)
+			continue;
+
+		if (IS_IP6_FLOW_ID(pkt)) {
+			if (IN6_ARE_ADDR_EQUAL(&pkt->src_ip6, &q->id.src_ip6) &&
+			    IN6_ARE_ADDR_EQUAL(&pkt->dst_ip6, &q->id.dst_ip6) &&
+			    pkt->src_port == q->id.src_port &&
+			    pkt->dst_port == q->id.dst_port) {
+				dir = MATCH_FORWARD;
+				break;
+			}
+			if (IN6_ARE_ADDR_EQUAL(&pkt->src_ip6, &q->id.dst_ip6) &&
+			    IN6_ARE_ADDR_EQUAL(&pkt->dst_ip6, &q->id.src_ip6) &&
+			    pkt->src_port == q->id.dst_port &&
+			    pkt->dst_port == q->id.src_port) {
+				dir = MATCH_REVERSE;
+				break;
+			}
+		} else {
+			if (pkt->src_ip == q->id.src_ip &&
+			    pkt->dst_ip == q->id.dst_ip &&
+			    pkt->src_port == q->id.src_port &&
+			    pkt->dst_port == q->id.dst_port) {
+				dir = MATCH_FORWARD;
+				break;
+			}
+			if (pkt->src_ip == q->id.dst_ip &&
+			    pkt->dst_ip == q->id.src_ip &&
+			    pkt->src_port == q->id.dst_port &&
+			    pkt->dst_port == q->id.src_port) {
+				dir = MATCH_REVERSE;
+				break;
+			}
+		}
+	}
+	if (q == NULL)
+		goto done;	/* q = NULL, not found */
+
+	if (prev != NULL) {	/* found and not in front */
+		prev->next = q->next;
+		q->next = V_ipfw_dyn_v[i].head;
+		V_ipfw_dyn_v[i].head = q;
+	}
+
+	/* update state according to flags */
+	dyn_update_proto_state(q, pkt, tcp, dir);
+done:
+	if (match_direction != NULL)
+		*match_direction = dir;
+	return (q);
+}
+
+ipfw_dyn_rule *
+ipfw_lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction,
+    struct tcphdr *tcp, uint16_t kidx)
+{
+	ipfw_dyn_rule *q;
+	int i;
+
+	i = hash_packet(pkt, V_curr_dyn_buckets);
+
+	IPFW_BUCK_LOCK(i);
+	q = lookup_dyn_rule_locked(pkt, i, match_direction, tcp, kidx);
+	if (q == NULL)
+		IPFW_BUCK_UNLOCK(i);
+	/* NB: return table locked when q is not NULL */
+	return q;
+}
+
+/*
+ * Unlock bucket mtx
+ * @p - pointer to dynamic rule
+ */
+void
+ipfw_dyn_unlock(ipfw_dyn_rule *q)
+{
+
+	IPFW_BUCK_UNLOCK(q->bucket);
+}
+
+static int
+resize_dynamic_table(struct ip_fw_chain *chain, int nbuckets)
+{
+	int i, k, nbuckets_old;
+	ipfw_dyn_rule *q;
+	struct ipfw_dyn_bucket *dyn_v, *dyn_v_old;
+
+	/* Check if given number is power of 2 and less than 64k */
+	if ((nbuckets > 65536) || (!powerof2(nbuckets)))
+		return 1;
+
+	CTR3(KTR_NET, "%s: resize dynamic hash: %d -> %d", __func__,
+	    V_curr_dyn_buckets, nbuckets);
+
+	/* Allocate and initialize new hash */
+	dyn_v = malloc(nbuckets * sizeof(*dyn_v), M_IPFW,
+	    M_WAITOK | M_ZERO);
+
+	for (i = 0 ; i < nbuckets; i++)
+		IPFW_BUCK_LOCK_INIT(&dyn_v[i]);
+
+	/*
+	 * Call upper half lock, as get_map() do to ease
+	 * read-only access to dynamic rules hash from sysctl
+	 */
+	IPFW_UH_WLOCK(chain);
+
+	/*
+	 * Acquire chain write lock to permit hash access
+	 * for main traffic path without additional locks
+	 */
+	IPFW_WLOCK(chain);
+
+	/* Save old values */
+	nbuckets_old = V_curr_dyn_buckets;
+	dyn_v_old = V_ipfw_dyn_v;
+
+	/* Skip relinking if array is not set up */
+	if (V_ipfw_dyn_v == NULL)
+		V_curr_dyn_buckets = 0;
+
+	/* Re-link all dynamic states */
+	for (i = 0 ; i < V_curr_dyn_buckets ; i++) {
+		while (V_ipfw_dyn_v[i].head != NULL) {
+			/* Remove from current chain */
+			q = V_ipfw_dyn_v[i].head;
+			V_ipfw_dyn_v[i].head = q->next;
+
+			/* Get new hash value */
+			k = hash_packet(&q->id, nbuckets);
+			q->bucket = k;
+			/* Add to the new head */
+			q->next = dyn_v[k].head;
+			dyn_v[k].head = q;
+             }
+	}
+
+	/* Update current pointers/buckets values */
+	V_curr_dyn_buckets = nbuckets;
+	V_ipfw_dyn_v = dyn_v;
+
+	IPFW_WUNLOCK(chain);
+
+	IPFW_UH_WUNLOCK(chain);
+
+	/* Start periodic callout on initial creation */
+	if (dyn_v_old == NULL) {
+        	callout_reset_on(&V_ipfw_timeout, hz, ipfw_dyn_tick, curvnet, 0);
+		return (0);
+	}
+
+	/* Destroy all mutexes */
+	for (i = 0 ; i < nbuckets_old ; i++)
+		IPFW_BUCK_LOCK_DESTROY(&dyn_v_old[i]);
+
+	/* Free old hash */
+	free(dyn_v_old, M_IPFW);
+
+	return 0;
+}
+
+/**
+ * Install state of type 'type' for a dynamic session.
+ * The hash table contains two type of rules:
+ * - regular rules (O_KEEP_STATE)
+ * - rules for sessions with limited number of sess per user
+ *   (O_LIMIT). When they are created, the parent is
+ *   increased by 1, and decreased on delete. In this case,
+ *   the third parameter is the parent rule and not the chain.
+ * - "parent" rules for the above (O_LIMIT_PARENT).
+ */
+static ipfw_dyn_rule *
+add_dyn_rule(struct ipfw_flow_id *id, int i, uint8_t dyn_type,
+    struct ip_fw *rule, uint16_t kidx)
+{
+	ipfw_dyn_rule *r;
+
+	IPFW_BUCK_ASSERT(i);
+
+	r = uma_zalloc(V_ipfw_dyn_rule_zone, M_NOWAIT | M_ZERO);
+	if (r == NULL) {
+		if (last_log != time_uptime) {
+			last_log = time_uptime;
+			log(LOG_DEBUG,
+			    "ipfw: Cannot allocate dynamic state, "
+			    "consider increasing net.inet.ip.fw.dyn_max\n");
+		}
+		return NULL;
+	}
+	ipfw_dyn_count++;
+
+	/*
+	 * refcount on parent is already incremented, so
+	 * it is safe to use parent unlocked.
+	 */
+	if (dyn_type == O_LIMIT) {
+		ipfw_dyn_rule *parent = (ipfw_dyn_rule *)rule;
+		if ( parent->dyn_type != O_LIMIT_PARENT)
+			panic("invalid parent");
+		r->parent = parent;
+		rule = parent->rule;
+	}
+
+	r->id = *id;
+	r->expire = time_uptime + V_dyn_syn_lifetime;
+	r->rule = rule;
+	r->dyn_type = dyn_type;
+	IPFW_ZERO_DYN_COUNTER(r);
+	r->count = 0;
+	r->kidx = kidx;
+	r->bucket = i;
+	r->next = V_ipfw_dyn_v[i].head;
+	V_ipfw_dyn_v[i].head = r;
+	DEB(print_dyn_rule(id, dyn_type, "add dyn entry", "total");)
+	return r;
+}
+
+/**
+ * lookup dynamic parent rule using pkt and rule as search keys.
+ * If the lookup fails, then install one.
+ */
+static ipfw_dyn_rule *
+lookup_dyn_parent(struct ipfw_flow_id *pkt, int *pindex, struct ip_fw *rule,
+    uint16_t kidx)
+{
+	ipfw_dyn_rule *q;
+	int i, is_v6;
+
+	is_v6 = IS_IP6_FLOW_ID(pkt);
+	i = hash_packet( pkt, V_curr_dyn_buckets );
+	*pindex = i;
+	IPFW_BUCK_LOCK(i);
+	for (q = V_ipfw_dyn_v[i].head ; q != NULL ; q=q->next)
+		if (q->dyn_type == O_LIMIT_PARENT &&
+		    kidx == q->kidx &&
+		    rule == q->rule &&
+		    pkt->proto == q->id.proto &&
+		    pkt->src_port == q->id.src_port &&
+		    pkt->dst_port == q->id.dst_port &&
+		    (
+			(is_v6 &&
+			 IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6),
+				&(q->id.src_ip6)) &&
+			 IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6),
+				&(q->id.dst_ip6))) ||
+			(!is_v6 &&
+			 pkt->src_ip == q->id.src_ip &&
+			 pkt->dst_ip == q->id.dst_ip)
+		    )
+		) {
+			q->expire = time_uptime + V_dyn_short_lifetime;
+			DEB(print_dyn_rule(pkt, q->dyn_type,
+			    "lookup_dyn_parent found", "");)
+			return q;
+		}
+
+	/* Add virtual limiting rule */
+	return add_dyn_rule(pkt, i, O_LIMIT_PARENT, rule, kidx);
+}
+
+/**
+ * Install dynamic state for rule type cmd->o.opcode
+ *
+ * Returns 1 (failure) if state is not installed because of errors or because
+ * session limitations are enforced.
+ */
+int
+ipfw_install_state(struct ip_fw_chain *chain, struct ip_fw *rule,
+    ipfw_insn_limit *cmd, struct ip_fw_args *args, uint32_t tablearg)
+{
+	ipfw_dyn_rule *q;
+	int i;
+
+	DEB(print_dyn_rule(&args->f_id, cmd->o.opcode, "install_state",
+	    (cmd->o.arg1 == 0 ? "": DYN_STATE_OBJ(chain, &cmd->o)->name));)
+
+	i = hash_packet(&args->f_id, V_curr_dyn_buckets);
+
+	IPFW_BUCK_LOCK(i);
+
+	q = lookup_dyn_rule_locked(&args->f_id, i, NULL, NULL, cmd->o.arg1);
+	if (q != NULL) {	/* should never occur */
+		DEB(
+		if (last_log != time_uptime) {
+			last_log = time_uptime;
+			printf("ipfw: %s: entry already present, done\n",
+			    __func__);
+		})
+		IPFW_BUCK_UNLOCK(i);
+		return (0);
+	}
+
+	/*
+	 * State limiting is done via uma(9) zone limiting.
+	 * Save pointer to newly-installed rule and reject
+	 * packet if add_dyn_rule() returned NULL.
+	 * Note q is currently set to NULL.
+	 */
+
+	switch (cmd->o.opcode) {
+	case O_KEEP_STATE:	/* bidir rule */
+		q = add_dyn_rule(&args->f_id, i, O_KEEP_STATE, rule,
+		    cmd->o.arg1);
+		break;
+
+	case O_LIMIT: {		/* limit number of sessions */
+		struct ipfw_flow_id id;
+		ipfw_dyn_rule *parent;
+		uint32_t conn_limit;
+		uint16_t limit_mask = cmd->limit_mask;
+		int pindex;
+
+		conn_limit = IP_FW_ARG_TABLEARG(chain, cmd->conn_limit, limit);
+		  
+		DEB(
+		if (cmd->conn_limit == IP_FW_TARG)
+			printf("ipfw: %s: O_LIMIT rule, conn_limit: %u "
+			    "(tablearg)\n", __func__, conn_limit);
+		else
+			printf("ipfw: %s: O_LIMIT rule, conn_limit: %u\n",
+			    __func__, conn_limit);
+		)
+
+		id.dst_ip = id.src_ip = id.dst_port = id.src_port = 0;
+		id.proto = args->f_id.proto;
+		id.addr_type = args->f_id.addr_type;
+		id.fib = M_GETFIB(args->m);
+
+		if (IS_IP6_FLOW_ID (&(args->f_id))) {
+			bzero(&id.src_ip6, sizeof(id.src_ip6));
+			bzero(&id.dst_ip6, sizeof(id.dst_ip6));
+
+			if (limit_mask & DYN_SRC_ADDR)
+				id.src_ip6 = args->f_id.src_ip6;
+			if (limit_mask & DYN_DST_ADDR)
+				id.dst_ip6 = args->f_id.dst_ip6;
+		} else {
+			if (limit_mask & DYN_SRC_ADDR)
+				id.src_ip = args->f_id.src_ip;
+			if (limit_mask & DYN_DST_ADDR)
+				id.dst_ip = args->f_id.dst_ip;
+		}
+		if (limit_mask & DYN_SRC_PORT)
+			id.src_port = args->f_id.src_port;
+		if (limit_mask & DYN_DST_PORT)
+			id.dst_port = args->f_id.dst_port;
+
+		/*
+		 * We have to release lock for previous bucket to
+		 * avoid possible deadlock
+		 */
+		IPFW_BUCK_UNLOCK(i);
+
+		parent = lookup_dyn_parent(&id, &pindex, rule, cmd->o.arg1);
+		if (parent == NULL) {
+			printf("ipfw: %s: add parent failed\n", __func__);
+			IPFW_BUCK_UNLOCK(pindex);
+			return (1);
+		}
+
+		if (parent->count >= conn_limit) {
+			if (V_fw_verbose && last_log != time_uptime) {
+				last_log = time_uptime;
+				char sbuf[24];
+				last_log = time_uptime;
+				snprintf(sbuf, sizeof(sbuf),
+				    "%d drop session",
+				    parent->rule->rulenum);
+				print_dyn_rule_flags(&args->f_id,
+				    cmd->o.opcode,
+				    LOG_SECURITY | LOG_DEBUG,
+				    sbuf, "too many entries");
+			}
+			IPFW_BUCK_UNLOCK(pindex);
+			return (1);
+		}
+		/* Increment counter on parent */
+		parent->count++;
+		IPFW_BUCK_UNLOCK(pindex);
+
+		IPFW_BUCK_LOCK(i);
+		q = add_dyn_rule(&args->f_id, i, O_LIMIT,
+		    (struct ip_fw *)parent, cmd->o.arg1);
+		if (q == NULL) {
+			/* Decrement index and notify caller */
+			IPFW_BUCK_UNLOCK(i);
+			IPFW_BUCK_LOCK(pindex);
+			parent->count--;
+			IPFW_BUCK_UNLOCK(pindex);
+			return (1);
+		}
+		break;
+	}
+	default:
+		printf("ipfw: %s: unknown dynamic rule type %u\n",
+		    __func__, cmd->o.opcode);
+	}
+
+	if (q == NULL) {
+		IPFW_BUCK_UNLOCK(i);
+		return (1);	/* Notify caller about failure */
+	}
+
+	dyn_update_proto_state(q, &args->f_id, NULL, MATCH_FORWARD);
+	IPFW_BUCK_UNLOCK(i);
+	return (0);
+}
+
+/*
+ * Generate a TCP packet, containing either a RST or a keepalive.
+ * When flags & TH_RST, we are sending a RST packet, because of a
+ * "reset" action matched the packet.
+ * Otherwise we are sending a keepalive, and flags & TH_
+ * The 'replyto' mbuf is the mbuf being replied to, if any, and is required
+ * so that MAC can label the reply appropriately.
+ */
+struct mbuf *
+ipfw_send_pkt(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t seq,
+    u_int32_t ack, int flags)
+{
+	struct mbuf *m = NULL;		/* stupid compiler */
+	int len, dir;
+	struct ip *h = NULL;		/* stupid compiler */
+#ifdef INET6
+	struct ip6_hdr *h6 = NULL;
+#endif
+	struct tcphdr *th = NULL;
+
+	MGETHDR(m, M_NOWAIT, MT_DATA);
+	if (m == NULL)
+		return (NULL);
+
+	M_SETFIB(m, id->fib);
+#ifdef MAC
+	if (replyto != NULL)
+		mac_netinet_firewall_reply(replyto, m);
+	else
+		mac_netinet_firewall_send(m);
+#else
+	(void)replyto;		/* don't warn about unused arg */
+#endif
+
+	switch (id->addr_type) {
+	case 4:
+		len = sizeof(struct ip) + sizeof(struct tcphdr);
+		break;
+#ifdef INET6
+	case 6:
+		len = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
+		break;
+#endif
+	default:
+		/* XXX: log me?!? */
+		FREE_PKT(m);
+		return (NULL);
+	}
+	dir = ((flags & (TH_SYN | TH_RST)) == TH_SYN);
+
+	m->m_data += max_linkhdr;
+	m->m_flags |= M_SKIP_FIREWALL;
+	m->m_pkthdr.len = m->m_len = len;
+	m->m_pkthdr.rcvif = NULL;
+	bzero(m->m_data, len);
+
+	switch (id->addr_type) {
+	case 4:
+		h = mtod(m, struct ip *);
+
+		/* prepare for checksum */
+		h->ip_p = IPPROTO_TCP;
+		h->ip_len = htons(sizeof(struct tcphdr));
+		if (dir) {
+			h->ip_src.s_addr = htonl(id->src_ip);
+			h->ip_dst.s_addr = htonl(id->dst_ip);
+		} else {
+			h->ip_src.s_addr = htonl(id->dst_ip);
+			h->ip_dst.s_addr = htonl(id->src_ip);
+		}
+
+		th = (struct tcphdr *)(h + 1);
+		break;
+#ifdef INET6
+	case 6:
+		h6 = mtod(m, struct ip6_hdr *);
+
+		/* prepare for checksum */
+		h6->ip6_nxt = IPPROTO_TCP;
+		h6->ip6_plen = htons(sizeof(struct tcphdr));
+		if (dir) {
+			h6->ip6_src = id->src_ip6;
+			h6->ip6_dst = id->dst_ip6;
+		} else {
+			h6->ip6_src = id->dst_ip6;
+			h6->ip6_dst = id->src_ip6;
+		}
+
+		th = (struct tcphdr *)(h6 + 1);
+		break;
+#endif
+	}
+
+	if (dir) {
+		th->th_sport = htons(id->src_port);
+		th->th_dport = htons(id->dst_port);
+	} else {
+		th->th_sport = htons(id->dst_port);
+		th->th_dport = htons(id->src_port);
+	}
+	th->th_off = sizeof(struct tcphdr) >> 2;
+
+	if (flags & TH_RST) {
+		if (flags & TH_ACK) {
+			th->th_seq = htonl(ack);
+			th->th_flags = TH_RST;
+		} else {
+			if (flags & TH_SYN)
+				seq++;
+			th->th_ack = htonl(seq);
+			th->th_flags = TH_RST | TH_ACK;
+		}
+	} else {
+		/*
+		 * Keepalive - use caller provided sequence numbers
+		 */
+		th->th_seq = htonl(seq);
+		th->th_ack = htonl(ack);
+		th->th_flags = TH_ACK;
+	}
+
+	switch (id->addr_type) {
+	case 4:
+		th->th_sum = in_cksum(m, len);
+
+		/* finish the ip header */
+		h->ip_v = 4;
+		h->ip_hl = sizeof(*h) >> 2;
+		h->ip_tos = IPTOS_LOWDELAY;
+		h->ip_off = htons(0);
+		h->ip_len = htons(len);
+		h->ip_ttl = V_ip_defttl;
+		h->ip_sum = 0;
+		break;
+#ifdef INET6
+	case 6:
+		th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(*h6),
+		    sizeof(struct tcphdr));
+
+		/* finish the ip6 header */
+		h6->ip6_vfc |= IPV6_VERSION;
+		h6->ip6_hlim = IPV6_DEFHLIM;
+		break;
+#endif
+	}
+
+	return (m);
+}
+
+/*
+ * Queue keepalive packets for given dynamic rule
+ */
+static struct mbuf **
+ipfw_dyn_send_ka(struct mbuf **mtailp, ipfw_dyn_rule *q)
+{
+	struct mbuf *m_rev, *m_fwd;
+
+	m_rev = (q->state & ACK_REV) ? NULL :
+	    ipfw_send_pkt(NULL, &(q->id), q->ack_rev - 1, q->ack_fwd, TH_SYN);
+	m_fwd = (q->state & ACK_FWD) ? NULL :
+	    ipfw_send_pkt(NULL, &(q->id), q->ack_fwd - 1, q->ack_rev, 0);
+
+	if (m_rev != NULL) {
+		*mtailp = m_rev;
+		mtailp = &(*mtailp)->m_nextpkt;
+	}
+	if (m_fwd != NULL) {
+		*mtailp = m_fwd;
+		mtailp = &(*mtailp)->m_nextpkt;
+	}
+
+	return (mtailp);
+}
+
+/*
+ * This procedure is used to perform various maintenance
+ * on dynamic hash list. Currently it is called every second.
+ */
+static void
+ipfw_dyn_tick(void * vnetx) 
+{
+	struct ip_fw_chain *chain;
+	int check_ka = 0;
+#ifdef VIMAGE
+	struct vnet *vp = vnetx;
+#endif
+
+	CURVNET_SET(vp);
+
+	chain = &V_layer3_chain;
+
+	/* Run keepalive checks every keepalive_period iff ka is enabled */
+	if ((V_dyn_keepalive_last + V_dyn_keepalive_period <= time_uptime) &&
+	    (V_dyn_keepalive != 0)) {
+		V_dyn_keepalive_last = time_uptime;
+		check_ka = 1;
+	}
+
+	check_dyn_rules(chain, NULL, check_ka, 1);
+
+	callout_reset_on(&V_ipfw_timeout, hz, ipfw_dyn_tick, vnetx, 0);
+
+	CURVNET_RESTORE();
+}
+
+
+/*
+ * Walk through all dynamic states doing generic maintenance:
+ * 1) free expired states
+ * 2) free all states based on deleted rule / set
+ * 3) send keepalives for states if needed
+ *
+ * @chain - pointer to current ipfw rules chain
+ * @rule - delete all states originated by given rule if != NULL
+ * @set - delete all states originated by any rule in set @set if != RESVD_SET
+ * @check_ka - perform checking/sending keepalives
+ * @timer - indicate call from timer routine.
+ *
+ * Timer routine must call this function unlocked to permit
+ * sending keepalives/resizing table.
+ *
+ * Others has to call function with IPFW_UH_WLOCK held.
+ * Additionally, function assume that dynamic rule/set is
+ * ALREADY deleted so no new states can be generated by
+ * 'deleted' rules.
+ *
+ * Write lock is needed to ensure that unused parent rules
+ * are not freed by other instance (see stage 2, 3)
+ */
+static void
+check_dyn_rules(struct ip_fw_chain *chain, ipfw_range_tlv *rt,
+    int check_ka, int timer)
+{
+	struct mbuf *m0, *m, *mnext, **mtailp;
+	struct ip *h;
+	int i, dyn_count, new_buckets = 0, max_buckets;
+	int expired = 0, expired_limits = 0, parents = 0, total = 0;
+	ipfw_dyn_rule *q, *q_prev, *q_next;
+	ipfw_dyn_rule *exp_head, **exptailp;
+	ipfw_dyn_rule *exp_lhead, **expltailp;
+
+	KASSERT(V_ipfw_dyn_v != NULL, ("%s: dynamic table not allocated",
+	    __func__));
+
+	/* Avoid possible LOR */
+	KASSERT(!check_ka || timer, ("%s: keepalive check with lock held",
+	    __func__));
+
+	/*
+	 * Do not perform any checks if we currently have no dynamic states
+	 */
+	if (DYN_COUNT == 0)
+		return;
+
+	/* Expired states */
+	exp_head = NULL;
+	exptailp = &exp_head;
+
+	/* Expired limit states */
+	exp_lhead = NULL;
+	expltailp = &exp_lhead;
+
+	/*
+	 * We make a chain of packets to go out here -- not deferring
+	 * until after we drop the IPFW dynamic rule lock would result
+	 * in a lock order reversal with the normal packet input -> ipfw
+	 * call stack.
+	 */
+	m0 = NULL;
+	mtailp = &m0;
+
+	/* Protect from hash resizing */
+	if (timer != 0)
+		IPFW_UH_WLOCK(chain);
+	else
+		IPFW_UH_WLOCK_ASSERT(chain);
+
+#define	NEXT_RULE()	{ q_prev = q; q = q->next ; continue; }
+
+	/* Stage 1: perform requested deletion */
+	for (i = 0 ; i < V_curr_dyn_buckets ; i++) {
+		IPFW_BUCK_LOCK(i);
+		for (q = V_ipfw_dyn_v[i].head, q_prev = q; q ; ) {
+			/* account every rule */
+			total++;
+
+			/* Skip parent rules at all */
+			if (q->dyn_type == O_LIMIT_PARENT) {
+				parents++;
+				NEXT_RULE();
+			}
+
+			/*
+			 * Remove rules which are:
+			 * 1) expired
+			 * 2) matches deletion range
+			 */
+			if ((TIME_LEQ(q->expire, time_uptime)) ||
+			    (rt != NULL && ipfw_match_range(q->rule, rt))) {
+				if (TIME_LE(time_uptime, q->expire) &&
+				    q->dyn_type == O_KEEP_STATE &&
+				    V_dyn_keep_states != 0) {
+					/*
+					 * Do not delete state if
+					 * it is not expired and
+					 * dyn_keep_states is ON.
+					 * However we need to re-link it
+					 * to any other stable rule
+					 */
+					q->rule = chain->default_rule;
+					NEXT_RULE();
+				}
+
+				/* Unlink q from current list */
+				q_next = q->next;
+				if (q == V_ipfw_dyn_v[i].head)
+					V_ipfw_dyn_v[i].head = q_next;
+				else
+					q_prev->next = q_next;
+
+				q->next = NULL;
+
+				/* queue q to expire list */
+				if (q->dyn_type != O_LIMIT) {
+					*exptailp = q;
+					exptailp = &(*exptailp)->next;
+					DEB(print_dyn_rule(&q->id, q->dyn_type,
+					    "unlink entry", "left");
+					)
+				} else {
+					/* Separate list for limit rules */
+					*expltailp = q;
+					expltailp = &(*expltailp)->next;
+					expired_limits++;
+					DEB(print_dyn_rule(&q->id, q->dyn_type,
+					    "unlink limit entry", "left");
+					)
+				}
+
+				q = q_next;
+				expired++;
+				continue;
+			}
+
+			/*
+			 * Check if we need to send keepalive:
+			 * we need to ensure if is time to do KA,
+			 * this is established TCP session, and
+			 * expire time is within keepalive interval
+			 */
+			if ((check_ka != 0) && (q->id.proto == IPPROTO_TCP) &&
+			    ((q->state & BOTH_SYN) == BOTH_SYN) &&
+			    (TIME_LEQ(q->expire, time_uptime +
+			      V_dyn_keepalive_interval)))
+				mtailp = ipfw_dyn_send_ka(mtailp, q);
+
+			NEXT_RULE();
+		}
+		IPFW_BUCK_UNLOCK(i);
+	}
+
+	/* Stage 2: decrement counters from O_LIMIT parents */
+	if (expired_limits != 0) {
+		/*
+		 * XXX: Note that deleting set with more than one
+		 * heavily-used LIMIT rules can result in overwhelming
+		 * locking due to lack of per-hash value sorting
+		 *
+		 * We should probably think about:
+		 * 1) pre-allocating hash of size, say,
+		 * MAX(16, V_curr_dyn_buckets / 1024)
+		 * 2) checking if expired_limits is large enough
+		 * 3) If yes, init hash (or its part), re-link
+		 * current list and start decrementing procedure in
+		 * each bucket separately
+		 */
+
+		/*
+		 * Small optimization: do not unlock bucket until
+		 * we see the next item resides in different bucket
+		 */
+		if (exp_lhead != NULL) {
+			i = exp_lhead->parent->bucket;
+			IPFW_BUCK_LOCK(i);
+		}
+		for (q = exp_lhead; q != NULL; q = q->next) {
+			if (i != q->parent->bucket) {
+				IPFW_BUCK_UNLOCK(i);
+				i = q->parent->bucket;
+				IPFW_BUCK_LOCK(i);
+			}
+
+			/* Decrease parent refcount */
+			q->parent->count--;
+		}
+		if (exp_lhead != NULL)
+			IPFW_BUCK_UNLOCK(i);
+	}
+
+	/*
+	 * We protectet ourselves from unused parent deletion
+	 * (from the timer function) by holding UH write lock.
+	 */
+
+	/* Stage 3: remove unused parent rules */
+	if ((parents != 0) && (expired != 0)) {
+		for (i = 0 ; i < V_curr_dyn_buckets ; i++) {
+			IPFW_BUCK_LOCK(i);
+			for (q = V_ipfw_dyn_v[i].head, q_prev = q ; q ; ) {
+				if (q->dyn_type != O_LIMIT_PARENT)
+					NEXT_RULE();
+
+				if (q->count != 0)
+					NEXT_RULE();
+
+				/* Parent rule without consumers */
+
+				/* Unlink q from current list */
+				q_next = q->next;
+				if (q == V_ipfw_dyn_v[i].head)
+					V_ipfw_dyn_v[i].head = q_next;
+				else
+					q_prev->next = q_next;
+
+				q->next = NULL;
+
+				/* Add to expired list */
+				*exptailp = q;
+				exptailp = &(*exptailp)->next;
+
+				DEB(print_dyn_rule(&q->id, q->dyn_type,
+				    "unlink parent entry", "left");
+				)
+
+				expired++;
+
+				q = q_next;
+			}
+			IPFW_BUCK_UNLOCK(i);
+		}
+	}
+
+#undef NEXT_RULE
+
+	if (timer != 0) {
+		/*
+		 * Check if we need to resize hash:
+		 * if current number of states exceeds number of buckes in hash,
+		 * grow hash size to the minimum power of 2 which is bigger than
+		 * current states count. Limit hash size by 64k.
+		 */
+		max_buckets = (V_dyn_buckets_max > 65536) ?
+		    65536 : V_dyn_buckets_max;
+	
+		dyn_count = DYN_COUNT;
+	
+		if ((dyn_count > V_curr_dyn_buckets * 2) &&
+		    (dyn_count < max_buckets)) {
+			new_buckets = V_curr_dyn_buckets;
+			while (new_buckets < dyn_count) {
+				new_buckets *= 2;
+	
+				if (new_buckets >= max_buckets)
+					break;
+			}
+		}
+
+		IPFW_UH_WUNLOCK(chain);
+	}
+
+	/* Finally delete old states ad limits if any */
+	for (q = exp_head; q != NULL; q = q_next) {
+		q_next = q->next;
+		uma_zfree(V_ipfw_dyn_rule_zone, q);
+		ipfw_dyn_count--;
+	}
+
+	for (q = exp_lhead; q != NULL; q = q_next) {
+		q_next = q->next;
+		uma_zfree(V_ipfw_dyn_rule_zone, q);
+		ipfw_dyn_count--;
+	}
+
+	/*
+	 * The rest code MUST be called from timer routine only
+	 * without holding any locks
+	 */
+	if (timer == 0)
+		return;
+
+	/* Send keepalive packets if any */
+	for (m = m0; m != NULL; m = mnext) {
+		mnext = m->m_nextpkt;
+		m->m_nextpkt = NULL;
+		h = mtod(m, struct ip *);
+		if (h->ip_v == 4)
+			ip_output(m, NULL, NULL, 0, NULL, NULL);
+#ifdef INET6
+		else
+			ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
+#endif
+	}
+
+	/* Run table resize without holding any locks */
+	if (new_buckets != 0)
+		resize_dynamic_table(chain, new_buckets);
+}
+
+/*
+ * Deletes all dynamic rules originated by given rule or all rules in
+ * given set. Specify RESVD_SET to indicate set should not be used.
+ * @chain - pointer to current ipfw rules chain
+ * @rr - delete all states originated by rules in matched range.
+ *
+ * Function has to be called with IPFW_UH_WLOCK held.
+ * Additionally, function assume that dynamic rule/set is
+ * ALREADY deleted so no new states can be generated by
+ * 'deleted' rules.
+ */
+void
+ipfw_expire_dyn_rules(struct ip_fw_chain *chain, ipfw_range_tlv *rt)
+{
+
+	check_dyn_rules(chain, rt, 0, 0);
+}
+
+/*
+ * Check if rule contains at least one dynamic opcode.
+ *
+ * Returns 1 if such opcode is found, 0 otherwise.
+ */
+int
+ipfw_is_dyn_rule(struct ip_fw *rule)
+{
+	int cmdlen, l;
+	ipfw_insn *cmd;
+
+	l = rule->cmd_len;
+	cmd = rule->cmd;
+	cmdlen = 0;
+	for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
+		cmdlen = F_LEN(cmd);
+
+		switch (cmd->opcode) {
+		case O_LIMIT:
+		case O_KEEP_STATE:
+		case O_PROBE_STATE:
+		case O_CHECK_STATE:
+			return (1);
+		}
+	}
+
+	return (0);
+}
+
+void
+ipfw_dyn_init(struct ip_fw_chain *chain)
+{
+
+        V_ipfw_dyn_v = NULL;
+        V_dyn_buckets_max = 256; /* must be power of 2 */
+        V_curr_dyn_buckets = 256; /* must be power of 2 */
+ 
+        V_dyn_ack_lifetime = 300;
+        V_dyn_syn_lifetime = 20;
+        V_dyn_fin_lifetime = 1;
+        V_dyn_rst_lifetime = 1;
+        V_dyn_udp_lifetime = 10;
+        V_dyn_short_lifetime = 5;
+
+        V_dyn_keepalive_interval = 20;
+        V_dyn_keepalive_period = 5;
+        V_dyn_keepalive = 1;    /* do send keepalives */
+	V_dyn_keepalive_last = time_uptime;
+        
+        V_dyn_max = 16384; /* max # of dynamic rules */
+
+	V_ipfw_dyn_rule_zone = uma_zcreate("IPFW dynamic rule",
+	    sizeof(ipfw_dyn_rule), NULL, NULL, NULL, NULL,
+	    UMA_ALIGN_PTR, 0);
+
+	/* Enforce limit on dynamic rules */
+	uma_zone_set_max(V_ipfw_dyn_rule_zone, V_dyn_max);
+
+        callout_init(&V_ipfw_timeout, 1);
+
+	/*
+	 * This can potentially be done on first dynamic rule
+	 * being added to chain.
+	 */
+	resize_dynamic_table(chain, V_curr_dyn_buckets);
+	IPFW_ADD_OBJ_REWRITER(IS_DEFAULT_VNET(curvnet), dyn_opcodes);
+}
+
+void
+ipfw_dyn_uninit(int pass)
+{
+	int i;
+
+	if (pass == 0) {
+		callout_drain(&V_ipfw_timeout);
+		return;
+	}
+	IPFW_DEL_OBJ_REWRITER(IS_DEFAULT_VNET(curvnet), dyn_opcodes);
+
+	if (V_ipfw_dyn_v != NULL) {
+		/*
+		 * Skip deleting all dynamic states -
+		 * uma_zdestroy() does this more efficiently;
+		 */
+
+		/* Destroy all mutexes */
+		for (i = 0 ; i < V_curr_dyn_buckets ; i++)
+			IPFW_BUCK_LOCK_DESTROY(&V_ipfw_dyn_v[i]);
+		free(V_ipfw_dyn_v, M_IPFW);
+		V_ipfw_dyn_v = NULL;
+	}
+
+        uma_zdestroy(V_ipfw_dyn_rule_zone);
+}
+
+#ifdef SYSCTL_NODE
+/*
+ * Get/set maximum number of dynamic states in given VNET instance.
+ */
+static int
+sysctl_ipfw_dyn_max(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	unsigned int nstates;
+
+	nstates = V_dyn_max;
+
+	error = sysctl_handle_int(oidp, &nstates, 0, req);
+	/* Read operation or some error */
+	if ((error != 0) || (req->newptr == NULL))
+		return (error);
+
+	V_dyn_max = nstates;
+	uma_zone_set_max(V_ipfw_dyn_rule_zone, V_dyn_max);
+
+	return (0);
+}
+
+/*
+ * Get current number of dynamic states in given VNET instance.
+ */
+static int
+sysctl_ipfw_dyn_count(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	unsigned int nstates;
+
+	nstates = DYN_COUNT;
+
+	error = sysctl_handle_int(oidp, &nstates, 0, req);
+
+	return (error);
+}
+#endif
+
+/*
+ * Returns size of dynamic states in legacy format
+ */
+int
+ipfw_dyn_len(void)
+{
+
+	return (V_ipfw_dyn_v == NULL) ? 0 :
+		(DYN_COUNT * sizeof(ipfw_dyn_rule));
+}
+
+/*
+ * Returns number of dynamic states.
+ * Used by dump format v1 (current).
+ */
+int
+ipfw_dyn_get_count(void)
+{
+
+	return (V_ipfw_dyn_v == NULL) ? 0 : DYN_COUNT;
+}
+
+static void
+export_dyn_rule(ipfw_dyn_rule *src, ipfw_dyn_rule *dst)
+{
+
+	memcpy(dst, src, sizeof(*src));
+	memcpy(&(dst->rule), &(src->rule->rulenum), sizeof(src->rule->rulenum));
+	/*
+	 * store set number into high word of
+	 * dst->rule pointer.
+	 */
+	memcpy((char *)&dst->rule + sizeof(src->rule->rulenum),
+	    &(src->rule->set), sizeof(src->rule->set));
+	/*
+	 * store a non-null value in "next".
+	 * The userland code will interpret a
+	 * NULL here as a marker
+	 * for the last dynamic rule.
+	 */
+	memcpy(&dst->next, &dst, sizeof(dst));
+	dst->expire =
+	    TIME_LEQ(dst->expire, time_uptime) ?  0 : dst->expire - time_uptime;
+}
+
+/*
+ * Fills int buffer given by @sd with dynamic states.
+ * Used by dump format v1 (current).
+ *
+ * Returns 0 on success.
+ */
+int
+ipfw_dump_states(struct ip_fw_chain *chain, struct sockopt_data *sd)
+{
+	ipfw_dyn_rule *p;
+	ipfw_obj_dyntlv *dst, *last;
+	ipfw_obj_ctlv *ctlv;
+	int i;
+	size_t sz;
+
+	if (V_ipfw_dyn_v == NULL)
+		return (0);
+
+	IPFW_UH_RLOCK_ASSERT(chain);
+
+	ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv));
+	if (ctlv == NULL)
+		return (ENOMEM);
+	sz = sizeof(ipfw_obj_dyntlv);
+	ctlv->head.type = IPFW_TLV_DYNSTATE_LIST;
+	ctlv->objsize = sz;
+	last = NULL;
+
+	for (i = 0 ; i < V_curr_dyn_buckets; i++) {
+		IPFW_BUCK_LOCK(i);
+		for (p = V_ipfw_dyn_v[i].head ; p != NULL; p = p->next) {
+			dst = (ipfw_obj_dyntlv *)ipfw_get_sopt_space(sd, sz);
+			if (dst == NULL) {
+				IPFW_BUCK_UNLOCK(i);
+				return (ENOMEM);
+			}
+
+			export_dyn_rule(p, &dst->state);
+			dst->head.length = sz;
+			dst->head.type = IPFW_TLV_DYN_ENT;
+			last = dst;
+		}
+		IPFW_BUCK_UNLOCK(i);
+	}
+
+	if (last != NULL) /* mark last dynamic rule */
+		last->head.flags = IPFW_DF_LAST;
+
+	return (0);
+}
+
+/*
+ * Fill given buffer with dynamic states (legacy format).
+ * IPFW_UH_RLOCK has to be held while calling.
+ */
+void
+ipfw_get_dynamic(struct ip_fw_chain *chain, char **pbp, const char *ep)
+{
+	ipfw_dyn_rule *p, *last = NULL;
+	char *bp;
+	int i;
+
+	if (V_ipfw_dyn_v == NULL)
+		return;
+	bp = *pbp;
+
+	IPFW_UH_RLOCK_ASSERT(chain);
+
+	for (i = 0 ; i < V_curr_dyn_buckets; i++) {
+		IPFW_BUCK_LOCK(i);
+		for (p = V_ipfw_dyn_v[i].head ; p != NULL; p = p->next) {
+			if (bp + sizeof *p <= ep) {
+				ipfw_dyn_rule *dst =
+					(ipfw_dyn_rule *)bp;
+
+				export_dyn_rule(p, dst);
+				last = dst;
+				bp += sizeof(ipfw_dyn_rule);
+			}
+		}
+		IPFW_BUCK_UNLOCK(i);
+	}
+
+	if (last != NULL) /* mark last dynamic rule */
+		bzero(&last->next, sizeof(last));
+	*pbp = bp;
+}
+/* end of file */
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_eaction.c b/freebsd/sys/netpfil/ipfw/ip_fw_eaction.c
new file mode 100644
index 00000000..2c6ba8b9
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_eaction.c
@@ -0,0 +1,383 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2016 Yandex LLC
+ * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/hash.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/rmlock.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/queue.h>
+#include <net/pfil.h>
+
+#include <net/if.h>	/* ip_fw.h requires IFNAMSIZ */
+#include <netinet/in.h>
+#include <netinet/ip_var.h>	/* struct ipfw_rule_ref */
+#include <netinet/ip_fw.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+
+#include <rtems/bsd/local/opt_ipfw.h>
+
+/*
+ * External actions support for ipfw.
+ *
+ * This code provides KPI for implementing loadable modules, that
+ * can provide handlers for external action opcodes in the ipfw's
+ * rules.
+ * Module should implement opcode handler with type ipfw_eaction_t.
+ * This handler will be called by ipfw_chk() function when
+ * O_EXTERNAL_ACTION opcode will be matched. The handler must return
+ * value used as return value in ipfw_chk(), i.e. IP_FW_PASS,
+ * IP_FW_DENY (see ip_fw_private.h).
+ * Also the last argument must be set by handler. If it is zero,
+ * the search continues to the next rule. If it has non zero value,
+ * the search terminates.
+ *
+ * The module that implements external action should register its
+ * handler and name with ipfw_add_eaction() function.
+ * This function will return eaction_id, that can be used by module.
+ *
+ * It is possible to pass some additional information to external
+ * action handler via the O_EXTERNAL_INSTANCE opcode. This opcode
+ * will be next after the O_EXTERNAL_ACTION opcode. cmd->arg1 will
+ * contain index of named object related to instance of external action.
+ *
+ * In case when eaction module uses named instances, it should register
+ * opcode rewriting routines for O_EXTERNAL_INSTANCE opcode. The
+ * classifier callback can look back into O_EXTERNAL_ACTION opcode (it
+ * must be in the (ipfw_insn *)(cmd - 1)). By arg1 from O_EXTERNAL_ACTION
+ * it can deteremine eaction_id and compare it with its own.
+ * The macro IPFW_TLV_EACTION_NAME(eaction_id) can be used to deteremine
+ * the type of named_object related to external action instance.
+ *
+ * On module unload handler should be deregistered with ipfw_del_eaction()
+ * function using known eaction_id.
+ */
+
+struct eaction_obj {
+	struct named_object	no;
+	ipfw_eaction_t		*handler;
+	char			name[64];
+};
+
+#define	EACTION_OBJ(ch, cmd)			\
+    ((struct eaction_obj *)SRV_OBJECT((ch), (cmd)->arg1))
+
+#if 0
+#define	EACTION_DEBUG(fmt, ...)	do {			\
+	printf("%s: " fmt "\n", __func__, ## __VA_ARGS__);	\
+} while (0)
+#else
+#define	EACTION_DEBUG(fmt, ...)
+#endif
+
+const char *default_eaction_typename = "drop";
+static int
+default_eaction(struct ip_fw_chain *ch, struct ip_fw_args *args,
+    ipfw_insn *cmd, int *done)
+{
+
+	*done = 1; /* terminate the search */
+	return (IP_FW_DENY);
+}
+
+/*
+ * Opcode rewriting callbacks.
+ */
+static int
+eaction_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
+{
+
+	EACTION_DEBUG("opcode %d, arg1 %d", cmd->opcode, cmd->arg1);
+	*puidx = cmd->arg1;
+	*ptype = 0;
+	return (0);
+}
+
+static void
+eaction_update(ipfw_insn *cmd, uint16_t idx)
+{
+
+	cmd->arg1 = idx;
+	EACTION_DEBUG("opcode %d, arg1 -> %d", cmd->opcode, cmd->arg1);
+}
+
+static int
+eaction_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
+    struct named_object **pno)
+{
+	ipfw_obj_ntlv *ntlv;
+
+	if (ti->tlvs == NULL)
+		return (EINVAL);
+
+	/* Search ntlv in the buffer provided by user */
+	ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx,
+	    IPFW_TLV_EACTION);
+	if (ntlv == NULL)
+		return (EINVAL);
+	EACTION_DEBUG("name %s, uidx %u, type %u", ntlv->name,
+	    ti->uidx, ti->type);
+	/*
+	 * Search named object with corresponding name.
+	 * Since eaction objects are global - ignore the set value
+	 * and use zero instead.
+	 */
+	*pno = ipfw_objhash_lookup_name_type(CHAIN_TO_SRV(ch),
+	    0, IPFW_TLV_EACTION, ntlv->name);
+	if (*pno == NULL)
+		return (ESRCH);
+	return (0);
+}
+
+static struct named_object *
+eaction_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
+{
+
+	EACTION_DEBUG("kidx %u", idx);
+	return (ipfw_objhash_lookup_kidx(CHAIN_TO_SRV(ch), idx));
+}
+
+static struct opcode_obj_rewrite eaction_opcodes[] = {
+	{
+		.opcode = O_EXTERNAL_ACTION,
+		.etlv = IPFW_TLV_EACTION,
+		.classifier = eaction_classify,
+		.update = eaction_update,
+		.find_byname = eaction_findbyname,
+		.find_bykidx = eaction_findbykidx,
+	},
+};
+
+static int
+create_eaction_obj(struct ip_fw_chain *ch, ipfw_eaction_t handler,
+    const char *name, uint16_t *eaction_id)
+{
+	struct namedobj_instance *ni;
+	struct eaction_obj *obj;
+
+	IPFW_UH_UNLOCK_ASSERT(ch);
+
+	ni = CHAIN_TO_SRV(ch);
+	obj = malloc(sizeof(*obj), M_IPFW, M_WAITOK | M_ZERO);
+	obj->no.name = obj->name;
+	obj->no.etlv = IPFW_TLV_EACTION;
+	obj->handler = handler;
+	strlcpy(obj->name, name, sizeof(obj->name));
+
+	IPFW_UH_WLOCK(ch);
+	if (ipfw_objhash_lookup_name_type(ni, 0, IPFW_TLV_EACTION,
+	    name) != NULL) {
+		/*
+		 * Object is already created.
+		 * We don't allow eactions with the same name.
+		 */
+		IPFW_UH_WUNLOCK(ch);
+		free(obj, M_IPFW);
+		EACTION_DEBUG("External action with typename "
+		    "'%s' already exists", name);
+		return (EEXIST);
+	}
+	if (ipfw_objhash_alloc_idx(ni, &obj->no.kidx) != 0) {
+		IPFW_UH_WUNLOCK(ch);
+		free(obj, M_IPFW);
+		EACTION_DEBUG("alloc_idx failed");
+		return (ENOSPC);
+	}
+	ipfw_objhash_add(ni, &obj->no);
+	IPFW_WLOCK(ch);
+	SRV_OBJECT(ch, obj->no.kidx) = obj;
+	IPFW_WUNLOCK(ch);
+	obj->no.refcnt++;
+	IPFW_UH_WUNLOCK(ch);
+
+	if (eaction_id != NULL)
+		*eaction_id = obj->no.kidx;
+	return (0);
+}
+
+static void
+destroy_eaction_obj(struct ip_fw_chain *ch, struct named_object *no)
+{
+	struct namedobj_instance *ni;
+	struct eaction_obj *obj;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+
+	ni = CHAIN_TO_SRV(ch);
+	IPFW_WLOCK(ch);
+	obj = SRV_OBJECT(ch, no->kidx);
+	SRV_OBJECT(ch, no->kidx) = NULL;
+	IPFW_WUNLOCK(ch);
+	ipfw_objhash_del(ni, no);
+	ipfw_objhash_free_idx(ni, no->kidx);
+	free(obj, M_IPFW);
+}
+
+/*
+ * Resets all eaction opcodes to default handlers.
+ */
+static void
+reset_eaction_obj(struct ip_fw_chain *ch, uint16_t eaction_id)
+{
+	struct named_object *no;
+	struct ip_fw *rule;
+	ipfw_insn *cmd;
+	int i;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+
+	no = ipfw_objhash_lookup_name_type(CHAIN_TO_SRV(ch), 0,
+	    IPFW_TLV_EACTION, default_eaction_typename);
+	if (no == NULL)
+		panic("Default external action handler is not found");
+	if (eaction_id == no->kidx)
+		panic("Wrong eaction_id");
+	EACTION_DEBUG("replace id %u with %u", eaction_id, no->kidx);
+	IPFW_WLOCK(ch);
+	for (i = 0; i < ch->n_rules; i++) {
+		rule = ch->map[i];
+		cmd = ACTION_PTR(rule);
+		if (cmd->opcode != O_EXTERNAL_ACTION)
+			continue;
+		if (cmd->arg1 != eaction_id)
+			continue;
+		cmd->arg1 = no->kidx; /* Set to default id */
+		/*
+		 * XXX: we only bump refcount on default_eaction.
+		 * Refcount on the original object will be just
+		 * ignored on destroy. But on default_eaction it
+		 * will be decremented on rule deletion.
+		 */
+		no->refcnt++;
+		/*
+		 * Since named_object related to this instance will be
+		 * also destroyed, truncate the chain of opcodes to
+		 * remove O_EXTERNAL_INSTANCE opcode.
+		 */
+		if (rule->act_ofs < rule->cmd_len - 1) {
+			EACTION_DEBUG("truncate rule %d", rule->rulenum);
+			rule->cmd_len--;
+		}
+	}
+	IPFW_WUNLOCK(ch);
+}
+
+/*
+ * Initialize external actions framework.
+ * Create object with default eaction handler "drop".
+ */
+int
+ipfw_eaction_init(struct ip_fw_chain *ch, int first)
+{
+	int error;
+
+	error = create_eaction_obj(ch, default_eaction,
+	    default_eaction_typename, NULL);
+	if (error != 0)
+		return (error);
+	IPFW_ADD_OBJ_REWRITER(first, eaction_opcodes);
+	EACTION_DEBUG("External actions support initialized");
+	return (0);
+}
+
+void
+ipfw_eaction_uninit(struct ip_fw_chain *ch, int last)
+{
+	struct namedobj_instance *ni;
+	struct named_object *no;
+
+	ni = CHAIN_TO_SRV(ch);
+
+	IPFW_UH_WLOCK(ch);
+	no = ipfw_objhash_lookup_name_type(ni, 0, IPFW_TLV_EACTION,
+	    default_eaction_typename);
+	if (no != NULL)
+		destroy_eaction_obj(ch, no);
+	IPFW_UH_WUNLOCK(ch);
+	IPFW_DEL_OBJ_REWRITER(last, eaction_opcodes);
+	EACTION_DEBUG("External actions support uninitialized");
+}
+
+/*
+ * Registers external action handler to the global array.
+ * On success it returns eaction id, otherwise - zero.
+ */
+uint16_t
+ipfw_add_eaction(struct ip_fw_chain *ch, ipfw_eaction_t handler,
+    const char *name)
+{
+	uint16_t eaction_id;
+
+	eaction_id = 0;
+	if (ipfw_check_object_name_generic(name) == 0) {
+		create_eaction_obj(ch, handler, name, &eaction_id);
+		EACTION_DEBUG("Registered external action '%s' with id %u",
+		    name, eaction_id);
+	}
+	return (eaction_id);
+}
+
+/*
+ * Deregisters external action handler with id eaction_id.
+ */
+int
+ipfw_del_eaction(struct ip_fw_chain *ch, uint16_t eaction_id)
+{
+	struct named_object *no;
+
+	IPFW_UH_WLOCK(ch);
+	no = ipfw_objhash_lookup_kidx(CHAIN_TO_SRV(ch), eaction_id);
+	if (no == NULL || no->etlv != IPFW_TLV_EACTION) {
+		IPFW_UH_WUNLOCK(ch);
+		return (EINVAL);
+	}
+	if (no->refcnt > 1)
+		reset_eaction_obj(ch, eaction_id);
+	EACTION_DEBUG("External action '%s' with id %u unregistered",
+	    no->name, eaction_id);
+	destroy_eaction_obj(ch, no);
+	IPFW_UH_WUNLOCK(ch);
+	return (0);
+}
+
+int
+ipfw_run_eaction(struct ip_fw_chain *ch, struct ip_fw_args *args,
+    ipfw_insn *cmd, int *done)
+{
+
+	return (EACTION_OBJ(ch, cmd)->handler(ch, args, cmd, done));
+}
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_iface.c b/freebsd/sys/netpfil/ipfw/ip_fw_iface.c
new file mode 100644
index 00000000..f8973a91
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_iface.c
@@ -0,0 +1,541 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2014 Yandex LLC.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * Kernel interface tracking API.
+ *
+ */
+
+#include <rtems/bsd/local/opt_ipfw.h>
+#include <rtems/bsd/local/opt_inet.h>
+#ifndef INET
+#error IPFIREWALL requires INET.
+#endif /* INET */
+#include <rtems/bsd/local/opt_inet6.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/rmlock.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+#include <sys/eventhandler.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/ip_var.h>	/* struct ipfw_rule_ref */
+#include <netinet/ip_fw.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+
+#define	CHAIN_TO_II(ch)		((struct namedobj_instance *)ch->ifcfg)
+
+#define	DEFAULT_IFACES	128
+
+static void handle_ifdetach(struct ip_fw_chain *ch, struct ipfw_iface *iif,
+    uint16_t ifindex);
+static void handle_ifattach(struct ip_fw_chain *ch, struct ipfw_iface *iif,
+    uint16_t ifindex);
+static int list_ifaces(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+    struct sockopt_data *sd);
+
+static struct ipfw_sopt_handler	scodes[] = {
+	{ IP_FW_XIFLIST,	0,	HDIR_GET,	list_ifaces },
+};
+
+/*
+ * FreeBSD Kernel interface.
+ */
+static void ipfw_kifhandler(void *arg, struct ifnet *ifp);
+static int ipfw_kiflookup(char *name);
+static void iface_khandler_register(void);
+static void iface_khandler_deregister(void);
+
+static eventhandler_tag ipfw_ifdetach_event, ipfw_ifattach_event;
+static int num_vnets = 0;
+static struct mtx vnet_mtx;
+
+/*
+ * Checks if kernel interface is contained in our tracked
+ * interface list and calls attach/detach handler.
+ */
+static void
+ipfw_kifhandler(void *arg, struct ifnet *ifp)
+{
+	struct ip_fw_chain *ch;
+	struct ipfw_iface *iif;
+	struct namedobj_instance *ii;
+	uintptr_t htype;
+
+	if (V_ipfw_vnet_ready == 0)
+		return;
+
+	ch = &V_layer3_chain;
+	htype = (uintptr_t)arg;
+
+	IPFW_UH_WLOCK(ch);
+	ii = CHAIN_TO_II(ch);
+	if (ii == NULL) {
+		IPFW_UH_WUNLOCK(ch);
+		return;
+	}
+	iif = (struct ipfw_iface*)ipfw_objhash_lookup_name(ii, 0,
+	    if_name(ifp));
+	if (iif != NULL) {
+		if (htype == 1)
+			handle_ifattach(ch, iif, ifp->if_index);
+		else
+			handle_ifdetach(ch, iif, ifp->if_index);
+	}
+	IPFW_UH_WUNLOCK(ch);
+}
+
+/*
+ * Reference current VNET as iface tracking API user.
+ * Registers interface tracking handlers for first VNET.
+ */
+static void
+iface_khandler_register()
+{
+	int create;
+
+	create = 0;
+
+	mtx_lock(&vnet_mtx);
+	if (num_vnets == 0)
+		create = 1;
+	num_vnets++;
+	mtx_unlock(&vnet_mtx);
+
+	if (create == 0)
+		return;
+
+	printf("IPFW: starting up interface tracker\n");
+
+	ipfw_ifdetach_event = EVENTHANDLER_REGISTER(
+	    ifnet_departure_event, ipfw_kifhandler, NULL,
+	    EVENTHANDLER_PRI_ANY);
+	ipfw_ifattach_event = EVENTHANDLER_REGISTER(
+	    ifnet_arrival_event, ipfw_kifhandler, (void*)((uintptr_t)1),
+	    EVENTHANDLER_PRI_ANY);
+}
+
+/*
+ *
+ * Detach interface event handlers on last VNET instance
+ * detach.
+ */
+static void
+iface_khandler_deregister()
+{
+	int destroy;
+
+	destroy = 0;
+	mtx_lock(&vnet_mtx);
+	if (num_vnets == 1)
+		destroy = 1;
+	num_vnets--;
+	mtx_unlock(&vnet_mtx);
+
+	if (destroy == 0)
+		return;
+
+	EVENTHANDLER_DEREGISTER(ifnet_arrival_event,
+	    ipfw_ifattach_event);
+	EVENTHANDLER_DEREGISTER(ifnet_departure_event,
+	    ipfw_ifdetach_event);
+}
+
+/*
+ * Retrieves ifindex for given @name.
+ *
+ * Returns ifindex or 0.
+ */
+static int
+ipfw_kiflookup(char *name)
+{
+	struct ifnet *ifp;
+	int ifindex;
+
+	ifindex = 0;
+
+	if ((ifp = ifunit_ref(name)) != NULL) {
+		ifindex = ifp->if_index;
+		if_rele(ifp);
+	}
+
+	return (ifindex);
+}
+
+/*
+ * Global ipfw startup hook.
+ * Since we perform lazy initialization, do nothing except
+ * mutex init.
+ */
+int
+ipfw_iface_init()
+{
+
+	mtx_init(&vnet_mtx, "IPFW ifhandler mtx", NULL, MTX_DEF);
+	IPFW_ADD_SOPT_HANDLER(1, scodes);
+	return (0);
+}
+
+/*
+ * Global ipfw destroy hook.
+ * Unregister khandlers iff init has been done.
+ */
+void
+ipfw_iface_destroy()
+{
+
+	IPFW_DEL_SOPT_HANDLER(1, scodes);
+	mtx_destroy(&vnet_mtx);
+}
+
+/*
+ * Perform actual init on internal request.
+ * Inits both namehash and global khandler.
+ */
+static void
+vnet_ipfw_iface_init(struct ip_fw_chain *ch)
+{
+	struct namedobj_instance *ii;
+
+	ii = ipfw_objhash_create(DEFAULT_IFACES);
+	IPFW_UH_WLOCK(ch);
+	if (ch->ifcfg == NULL) {
+		ch->ifcfg = ii;
+		ii = NULL;
+	}
+	IPFW_UH_WUNLOCK(ch);
+
+	if (ii != NULL) {
+		/* Already initialized. Free namehash. */
+		ipfw_objhash_destroy(ii);
+	} else {
+		/* We're the first ones. Init kernel hooks. */
+		iface_khandler_register();
+	}
+}
+
+static int
+destroy_iface(struct namedobj_instance *ii, struct named_object *no,
+    void *arg)
+{
+
+	/* Assume all consumers have been already detached */
+	free(no, M_IPFW);
+	return (0);
+}
+
+/*
+ * Per-VNET ipfw detach hook.
+ *
+ */
+void
+vnet_ipfw_iface_destroy(struct ip_fw_chain *ch)
+{
+	struct namedobj_instance *ii;
+
+	IPFW_UH_WLOCK(ch);
+	ii = CHAIN_TO_II(ch);
+	ch->ifcfg = NULL;
+	IPFW_UH_WUNLOCK(ch);
+
+	if (ii != NULL) {
+		ipfw_objhash_foreach(ii, destroy_iface, ch);
+		ipfw_objhash_destroy(ii);
+		iface_khandler_deregister();
+	}
+}
+
+/*
+ * Notify the subsystem that we are interested in tracking
+ * interface @name. This function has to be called without
+ * holding any locks to permit allocating the necessary states
+ * for proper interface tracking.
+ *
+ * Returns 0 on success.
+ */
+int
+ipfw_iface_ref(struct ip_fw_chain *ch, char *name,
+    struct ipfw_ifc *ic)
+{
+	struct namedobj_instance *ii;
+	struct ipfw_iface *iif, *tmp;
+
+	if (strlen(name) >= sizeof(iif->ifname))
+		return (EINVAL);
+
+	IPFW_UH_WLOCK(ch);
+
+	ii = CHAIN_TO_II(ch);
+	if (ii == NULL) {
+
+		/*
+		 * First request to subsystem.
+		 * Let's perform init.
+		 */
+		IPFW_UH_WUNLOCK(ch);
+		vnet_ipfw_iface_init(ch);
+		IPFW_UH_WLOCK(ch);
+		ii = CHAIN_TO_II(ch);
+	}
+
+	iif = (struct ipfw_iface *)ipfw_objhash_lookup_name(ii, 0, name);
+
+	if (iif != NULL) {
+		iif->no.refcnt++;
+		ic->iface = iif;
+		IPFW_UH_WUNLOCK(ch);
+		return (0);
+	}
+
+	IPFW_UH_WUNLOCK(ch);
+
+	/* Not found. Let's create one */
+	iif = malloc(sizeof(struct ipfw_iface), M_IPFW, M_WAITOK | M_ZERO);
+	TAILQ_INIT(&iif->consumers);
+	iif->no.name = iif->ifname;
+	strlcpy(iif->ifname, name, sizeof(iif->ifname));
+
+	/*
+	 * Ref & link to the list.
+	 *
+	 * We assume  ifnet_arrival_event / ifnet_departure_event
+	 * are not holding any locks.
+	 */
+	iif->no.refcnt = 1;
+	IPFW_UH_WLOCK(ch);
+
+	tmp = (struct ipfw_iface *)ipfw_objhash_lookup_name(ii, 0, name);
+	if (tmp != NULL) {
+		/* Interface has been created since unlock. Ref and return */
+		tmp->no.refcnt++;
+		ic->iface = tmp;
+		IPFW_UH_WUNLOCK(ch);
+		free(iif, M_IPFW);
+		return (0);
+	}
+
+	iif->ifindex = ipfw_kiflookup(name);
+	if (iif->ifindex != 0)
+		iif->resolved = 1;
+
+	ipfw_objhash_add(ii, &iif->no);
+	ic->iface = iif;
+
+	IPFW_UH_WUNLOCK(ch);
+
+	return (0);
+}
+
+/*
+ * Adds @ic to the list of iif interface consumers.
+ * Must be called with holding both UH+WLOCK.
+ * Callback may be immediately called (if interface exists).
+ */
+void
+ipfw_iface_add_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic)
+{
+	struct ipfw_iface *iif;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+	IPFW_WLOCK_ASSERT(ch);
+
+	iif = ic->iface;
+	
+	TAILQ_INSERT_TAIL(&iif->consumers, ic, next);
+	if (iif->resolved != 0)
+		ic->cb(ch, ic->cbdata, iif->ifindex);
+}
+
+/*
+ * Unlinks interface tracker object @ic from interface.
+ * Must be called while holding UH lock.
+ */
+void
+ipfw_iface_del_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic)
+{
+	struct ipfw_iface *iif;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+
+	iif = ic->iface;
+	TAILQ_REMOVE(&iif->consumers, ic, next);
+}
+
+/*
+ * Unreference interface specified by @ic.
+ * Must be called while holding UH lock.
+ */
+void
+ipfw_iface_unref(struct ip_fw_chain *ch, struct ipfw_ifc *ic)
+{
+	struct ipfw_iface *iif;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+
+	iif = ic->iface;
+	ic->iface = NULL;
+
+	iif->no.refcnt--;
+	/* TODO: check for references & delete */
+}
+
+/*
+ * Interface arrival handler.
+ */
+static void
+handle_ifattach(struct ip_fw_chain *ch, struct ipfw_iface *iif,
+    uint16_t ifindex)
+{
+	struct ipfw_ifc *ic;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+
+	iif->gencnt++;
+	iif->resolved = 1;
+	iif->ifindex = ifindex;
+
+	IPFW_WLOCK(ch);
+	TAILQ_FOREACH(ic, &iif->consumers, next)
+		ic->cb(ch, ic->cbdata, iif->ifindex);
+	IPFW_WUNLOCK(ch);
+}
+
+/*
+ * Interface departure handler.
+ */
+static void
+handle_ifdetach(struct ip_fw_chain *ch, struct ipfw_iface *iif,
+    uint16_t ifindex)
+{
+	struct ipfw_ifc *ic;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+
+	IPFW_WLOCK(ch);
+	TAILQ_FOREACH(ic, &iif->consumers, next)
+		ic->cb(ch, ic->cbdata, 0);
+	IPFW_WUNLOCK(ch);
+
+	iif->gencnt++;
+	iif->resolved = 0;
+	iif->ifindex = 0;
+}
+
+struct dump_iface_args {
+	struct ip_fw_chain *ch;
+	struct sockopt_data *sd;
+};
+
+static int
+export_iface_internal(struct namedobj_instance *ii, struct named_object *no,
+    void *arg)
+{
+	ipfw_iface_info *i;
+	struct dump_iface_args *da;
+	struct ipfw_iface *iif;
+
+	da = (struct dump_iface_args *)arg;
+
+	i = (ipfw_iface_info *)ipfw_get_sopt_space(da->sd, sizeof(*i));
+	KASSERT(i != NULL, ("previously checked buffer is not enough"));
+
+	iif = (struct ipfw_iface *)no;
+
+	strlcpy(i->ifname, iif->ifname, sizeof(i->ifname));
+	if (iif->resolved)
+		i->flags |= IPFW_IFFLAG_RESOLVED;
+	i->ifindex = iif->ifindex;
+	i->refcnt = iif->no.refcnt;
+	i->gencnt = iif->gencnt;
+	return (0);
+}
+
+/*
+ * Lists all interface currently tracked by ipfw.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
+ * Reply: [ ipfw_obj_lheader ipfw_iface_info x N ]
+ *
+ * Returns 0 on success
+ */
+static int
+list_ifaces(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	struct namedobj_instance *ii;
+	struct _ipfw_obj_lheader *olh;
+	struct dump_iface_args da;
+	uint32_t count, size;
+
+	olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
+	if (olh == NULL)
+		return (EINVAL);
+	if (sd->valsize < olh->size)
+		return (EINVAL);
+
+	IPFW_UH_RLOCK(ch);
+	ii = CHAIN_TO_II(ch);
+	if (ii != NULL)
+		count = ipfw_objhash_count(ii);
+	else
+		count = 0;
+	size = count * sizeof(ipfw_iface_info) + sizeof(ipfw_obj_lheader);
+
+	/* Fill in header regadless of buffer size */
+	olh->count = count;
+	olh->objsize = sizeof(ipfw_iface_info);
+
+	if (size > olh->size) {
+		olh->size = size;
+		IPFW_UH_RUNLOCK(ch);
+		return (ENOMEM);
+	}
+	olh->size = size;
+
+	da.ch = ch;
+	da.sd = sd;
+
+	if (ii != NULL)
+		ipfw_objhash_foreach(ii, export_iface_internal, &da);
+	IPFW_UH_RUNLOCK(ch);
+
+	return (0);
+}
+
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_log.c b/freebsd/sys/netpfil/ipfw/ip_fw_log.c
index 60b0df7d..658e1256 100644
--- a/freebsd/sys/netpfil/ipfw/ip_fw_log.c
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_log.c
@@ -41,16 +41,15 @@ __FBSDID("$FreeBSD$");
 
 #include <rtems/bsd/sys/param.h>
 #include <sys/systm.h>
-#include <sys/mbuf.h>
 #include <sys/kernel.h>
+#include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <net/ethernet.h> /* for ETHERTYPE_IP */
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/vnet.h>
-#include <net/if_types.h>	/* for IFT_ETHER */
-#include <net/bpf.h>		/* for BPF */
 
 #include <netinet/in.h>
 #include <netinet/ip.h>
@@ -83,111 +82,48 @@ __FBSDID("$FreeBSD$");
 #define	ICMP(p)		((struct icmphdr *)(p))
 #define	ICMP6(p)	((struct icmp6_hdr *)(p))
 
+#ifdef __APPLE__
+#undef snprintf
+#define snprintf	sprintf
+#define SNPARGS(buf, len) buf + len
+#define SNP(buf) buf
+#else	/* !__APPLE__ */
 #define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0
 #define SNP(buf) buf, sizeof(buf)
+#endif /* !__APPLE__ */
 
-#ifdef WITHOUT_BPF
-void
-ipfw_log_bpf(int onoff)
-{
-}
-#else /* !WITHOUT_BPF */
-static struct ifnet *log_if;	/* hook to attach to bpf */
-
-/* we use this dummy function for all ifnet callbacks */
-static int
-log_dummy(struct ifnet *ifp, u_long cmd, caddr_t addr)
-{
-	return EINVAL;
-}
-
-static int
-ipfw_log_output(struct ifnet *ifp, struct mbuf *m,
-	struct sockaddr *dst, struct route *ro)
-{
-	if (m != NULL)
-		m_freem(m);
-	return EINVAL;
-}
-
-static void
-ipfw_log_start(struct ifnet* ifp)
-{
-	panic("ipfw_log_start() must not be called");
-}
-
-static const u_char ipfwbroadcastaddr[6] =
-	{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
-
-void
-ipfw_log_bpf(int onoff)
-{
-	struct ifnet *ifp;
-
-	if (onoff) {
-		if (log_if)
-			return;
-		ifp = if_alloc(IFT_ETHER);
-		if (ifp == NULL)
-			return;
-		if_initname(ifp, "ipfw", 0);
-		ifp->if_mtu = 65536;
-		ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST;
-		ifp->if_init = (void *)log_dummy;
-		ifp->if_ioctl = log_dummy;
-		ifp->if_start = ipfw_log_start;
-		ifp->if_output = ipfw_log_output;
-		ifp->if_addrlen = 6;
-		ifp->if_hdrlen = 14;
-		if_attach(ifp);
-		ifp->if_broadcastaddr = ipfwbroadcastaddr;
-		ifp->if_baudrate = IF_Mbps(10);
-		bpfattach(ifp, DLT_EN10MB, 14);
-		log_if = ifp;
-	} else {
-		if (log_if) {
-			ether_ifdetach(log_if);
-			if_free(log_if);
-		}
-		log_if = NULL;
-	}
-}
-#endif /* !WITHOUT_BPF */
-
+#define	TARG(k, f)	IP_FW_ARG_TABLEARG(chain, k, f)
 /*
  * We enter here when we have a rule with O_LOG.
  * XXX this function alone takes about 2Kbytes of code!
  */
 void
-ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args,
-    struct mbuf *m, struct ifnet *oif, u_short offset, uint32_t tablearg,
-    struct ip *ip)
+ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
+    struct ip_fw_args *args, struct mbuf *m, struct ifnet *oif,
+    u_short offset, uint32_t tablearg, struct ip *ip)
 {
 	char *action;
 	int limit_reached = 0;
 	char action2[92], proto[128], fragment[32];
 
 	if (V_fw_verbose == 0) {
-#ifndef WITHOUT_BPF
-
-		if (log_if == NULL || log_if->if_bpf == NULL)
-			return;
-
 		if (args->eh) /* layer2, use orig hdr */
-			BPF_MTAP2(log_if, args->eh, ETHER_HDR_LEN, m);
+			ipfw_bpf_mtap2(args->eh, ETHER_HDR_LEN, m);
 		else {
 			/* Add fake header. Later we will store
 			 * more info in the header.
 			 */
 			if (ip->ip_v == 4)
-				BPF_MTAP2(log_if, "DDDDDDSSSSSS\x08\x00", ETHER_HDR_LEN, m);
-			else if  (ip->ip_v == 6)
-				BPF_MTAP2(log_if, "DDDDDDSSSSSS\x86\xdd", ETHER_HDR_LEN, m);
+				ipfw_bpf_mtap2("DDDDDDSSSSSS\x08\x00",
+				    ETHER_HDR_LEN, m);
+			else if (ip->ip_v == 6)
+				ipfw_bpf_mtap2("DDDDDDSSSSSS\x86\xdd",
+				    ETHER_HDR_LEN, m);
 			else
 				/* Obviously bogus EtherType. */
-				BPF_MTAP2(log_if, "DDDDDDSSSSSS\xff\xff", ETHER_HDR_LEN, m);
+				ipfw_bpf_mtap2("DDDDDDSSSSSS\xff\xff",
+				    ETHER_HDR_LEN, m);
 		}
-#endif /* !WITHOUT_BPF */
 		return;
 	}
 	/* the old 'log' function */
@@ -254,27 +190,27 @@ ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args,
 			break;
 		case O_DIVERT:
 			snprintf(SNPARGS(action2, 0), "Divert %d",
-				cmd->arg1);
+				TARG(cmd->arg1, divert));
 			break;
 		case O_TEE:
 			snprintf(SNPARGS(action2, 0), "Tee %d",
-				cmd->arg1);
+				TARG(cmd->arg1, divert));
 			break;
 		case O_SETFIB:
 			snprintf(SNPARGS(action2, 0), "SetFib %d",
-				IP_FW_ARG_TABLEARG(cmd->arg1));
+				TARG(cmd->arg1, fib) & 0x7FFF);
 			break;
 		case O_SKIPTO:
 			snprintf(SNPARGS(action2, 0), "SkipTo %d",
-				IP_FW_ARG_TABLEARG(cmd->arg1));
+				TARG(cmd->arg1, skipto));
 			break;
 		case O_PIPE:
 			snprintf(SNPARGS(action2, 0), "Pipe %d",
-				IP_FW_ARG_TABLEARG(cmd->arg1));
+				TARG(cmd->arg1, pipe));
 			break;
 		case O_QUEUE:
 			snprintf(SNPARGS(action2, 0), "Queue %d",
-				IP_FW_ARG_TABLEARG(cmd->arg1));
+				TARG(cmd->arg1, pipe));
 			break;
 		case O_FORWARD_IP: {
 			ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd;
@@ -435,7 +371,7 @@ ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args,
 
 #ifdef INET6
 		if (IS_IP6_FLOW_ID(&(args->f_id))) {
-			if (offset & (IP6F_OFF_MASK | IP6F_MORE_FRAG))
+			if (offset || ip6f_mf)
 				snprintf(SNPARGS(fragment, 0),
 				    " (frag %08x:%d@%d%s)",
 				    args->f_id.extra,
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_nat.c b/freebsd/sys/netpfil/ipfw/ip_fw_nat.c
index 5d4dcc9f..58bc1f3c 100644
--- a/freebsd/sys/netpfil/ipfw/ip_fw_nat.c
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_nat.c
@@ -33,17 +33,18 @@ __FBSDID("$FreeBSD$");
 #include <sys/systm.h>
 #include <sys/eventhandler.h>
 #include <sys/malloc.h>
+#include <sys/mbuf.h>
 #include <sys/kernel.h>
 #include <rtems/bsd/sys/lock.h>
 #include <sys/module.h>
 #include <sys/rwlock.h>
-
-#define        IPFW_INTERNAL   /* Access to protected data structures in ip_fw.h. */
+#include <sys/rmlock.h>
 
 #include <netinet/libalias/alias.h>
 #include <netinet/libalias/alias_local.h>
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
@@ -55,6 +56,45 @@ __FBSDID("$FreeBSD$");
 
 #include <machine/in_cksum.h>	/* XXX for in_cksum */
 
+struct cfg_spool {
+	LIST_ENTRY(cfg_spool)   _next;          /* chain of spool instances */
+	struct in_addr          addr;
+	uint16_t		port;
+};
+
+/* Nat redirect configuration. */
+struct cfg_redir {
+	LIST_ENTRY(cfg_redir)	_next;	/* chain of redir instances */
+	uint16_t		mode;	/* type of redirect mode */
+	uint16_t		proto;	/* protocol: tcp/udp */
+	struct in_addr		laddr;	/* local ip address */
+	struct in_addr		paddr;	/* public ip address */
+	struct in_addr		raddr;	/* remote ip address */
+	uint16_t		lport;	/* local port */
+	uint16_t		pport;	/* public port */
+	uint16_t		rport;	/* remote port	*/
+	uint16_t		pport_cnt;	/* number of public ports */
+	uint16_t		rport_cnt;	/* number of remote ports */
+	struct alias_link	**alink;	
+	u_int16_t		spool_cnt; /* num of entry in spool chain */
+	/* chain of spool instances */
+	LIST_HEAD(spool_chain, cfg_spool) spool_chain;
+};
+
+/* Nat configuration data struct. */
+struct cfg_nat {
+	/* chain of nat instances */
+	LIST_ENTRY(cfg_nat)	_next;
+	int			id;		/* nat id  */
+	struct in_addr		ip;		/* nat ip address */
+	struct libalias		*lib;		/* libalias instance */
+	int			mode;		/* aliasing mode */
+	int			redir_cnt; /* number of entry in spool chain */
+	/* chain of redir instances */
+	LIST_HEAD(redir_chain, cfg_redir) redir_chain;  
+	char			if_name[IF_NAMESIZE];	/* interface name */
+};
+
 static eventhandler_tag ifaddr_event_tag;
 
 static void
@@ -66,8 +106,12 @@ ifaddr_change(void *arg __unused, struct ifnet *ifp)
 
 	KASSERT(curvnet == ifp->if_vnet,
 	    ("curvnet(%p) differs from iface vnet(%p)", curvnet, ifp->if_vnet));
+
+	if (V_ipfw_vnet_ready == 0 || V_ipfw_nat_ready == 0)
+		return;
+
 	chain = &V_layer3_chain;
-	IPFW_WLOCK(chain);
+	IPFW_UH_WLOCK(chain);
 	/* Check every nat entry... */
 	LIST_FOREACH(ptr, &chain->nat, _next) {
 		/* ...using nic 'ifp->if_xname' as dynamic alias address. */
@@ -79,13 +123,15 @@ ifaddr_change(void *arg __unused, struct ifnet *ifp)
 				continue;
 			if (ifa->ifa_addr->sa_family != AF_INET)
 				continue;
+			IPFW_WLOCK(chain);
 			ptr->ip = ((struct sockaddr_in *)
 			    (ifa->ifa_addr))->sin_addr;
 			LibAliasSetAddress(ptr->lib, ptr->ip);
+			IPFW_WUNLOCK(chain);
 		}
 		if_addr_runlock(ifp);
 	}
-	IPFW_WUNLOCK(chain);
+	IPFW_UH_WUNLOCK(chain);
 }
 
 /*
@@ -117,11 +163,11 @@ del_redir_spool_cfg(struct cfg_nat *n, struct redir_chain *head)
 	LIST_FOREACH_SAFE(r, head, _next, tmp_r) {
 		num = 1; /* Number of alias_link to delete. */
 		switch (r->mode) {
-		case REDIR_PORT:
+		case NAT44_REDIR_PORT:
 			num = r->pport_cnt;
 			/* FALLTHROUGH */
-		case REDIR_ADDR:
-		case REDIR_PROTO:
+		case NAT44_REDIR_ADDR:
+		case NAT44_REDIR_PROTO:
 			/* Delete all libalias redirect entry. */
 			for (i = 0; i < num; i++)
 				LibAliasRedirectDelete(n->lib, r->alink[i]);
@@ -142,27 +188,41 @@ del_redir_spool_cfg(struct cfg_nat *n, struct redir_chain *head)
 	}
 }
 
-static void
+static int
 add_redir_spool_cfg(char *buf, struct cfg_nat *ptr)
 {
-	struct cfg_redir *r, *ser_r;
-	struct cfg_spool *s, *ser_s;
+	struct cfg_redir *r;
+	struct cfg_spool *s;
+	struct nat44_cfg_redir *ser_r;
+	struct nat44_cfg_spool *ser_s;
+
 	int cnt, off, i;
 
 	for (cnt = 0, off = 0; cnt < ptr->redir_cnt; cnt++) {
-		ser_r = (struct cfg_redir *)&buf[off];
-		r = malloc(SOF_REDIR, M_IPFW, M_WAITOK | M_ZERO);
-		memcpy(r, ser_r, SOF_REDIR);
+		ser_r = (struct nat44_cfg_redir *)&buf[off];
+		r = malloc(sizeof(*r), M_IPFW, M_WAITOK | M_ZERO);
+		r->mode = ser_r->mode;
+		r->laddr = ser_r->laddr;
+		r->paddr = ser_r->paddr;
+		r->raddr = ser_r->raddr;
+		r->lport = ser_r->lport;
+		r->pport = ser_r->pport;
+		r->rport = ser_r->rport;
+		r->pport_cnt = ser_r->pport_cnt;
+		r->rport_cnt = ser_r->rport_cnt;
+		r->proto = ser_r->proto;
+		r->spool_cnt = ser_r->spool_cnt;
+		//memcpy(r, ser_r, SOF_REDIR);
 		LIST_INIT(&r->spool_chain);
-		off += SOF_REDIR;
+		off += sizeof(struct nat44_cfg_redir);
 		r->alink = malloc(sizeof(struct alias_link *) * r->pport_cnt,
 		    M_IPFW, M_WAITOK | M_ZERO);
 		switch (r->mode) {
-		case REDIR_ADDR:
+		case NAT44_REDIR_ADDR:
 			r->alink[0] = LibAliasRedirectAddr(ptr->lib, r->laddr,
 			    r->paddr);
 			break;
-		case REDIR_PORT:
+		case NAT44_REDIR_PORT:
 			for (i = 0 ; i < r->pport_cnt; i++) {
 				/* If remotePort is all ports, set it to 0. */
 				u_short remotePortCopy = r->rport + i;
@@ -178,7 +238,7 @@ add_redir_spool_cfg(char *buf, struct cfg_nat *ptr)
 				}
 			}
 			break;
-		case REDIR_PROTO:
+		case NAT44_REDIR_PROTO:
 			r->alink[0] = LibAliasRedirectProto(ptr->lib ,r->laddr,
 			    r->raddr, r->paddr, r->proto);
 			break;
@@ -186,25 +246,41 @@ add_redir_spool_cfg(char *buf, struct cfg_nat *ptr)
 			printf("unknown redirect mode: %u\n", r->mode);
 			break;
 		}
-		/* XXX perhaps return an error instead of panic ? */
-		if (r->alink[0] == NULL)
-			panic("LibAliasRedirect* returned NULL");
+		if (r->alink[0] == NULL) {
+			printf("LibAliasRedirect* returned NULL\n");
+			free(r->alink, M_IPFW);
+			free(r, M_IPFW);
+			return (EINVAL);
+		}
 		/* LSNAT handling. */
 		for (i = 0; i < r->spool_cnt; i++) {
-			ser_s = (struct cfg_spool *)&buf[off];
-			s = malloc(SOF_REDIR, M_IPFW, M_WAITOK | M_ZERO);
-			memcpy(s, ser_s, SOF_SPOOL);
+			ser_s = (struct nat44_cfg_spool *)&buf[off];
+			s = malloc(sizeof(*s), M_IPFW, M_WAITOK | M_ZERO);
+			s->addr = ser_s->addr;
+			s->port = ser_s->port;
 			LibAliasAddServer(ptr->lib, r->alink[0],
 			    s->addr, htons(s->port));
-			off += SOF_SPOOL;
+			off += sizeof(struct nat44_cfg_spool);
 			/* Hook spool entry. */
 			LIST_INSERT_HEAD(&r->spool_chain, s, _next);
 		}
 		/* And finally hook this redir entry. */
 		LIST_INSERT_HEAD(&ptr->redir_chain, r, _next);
 	}
+
+	return (0);
+}
+
+static void
+free_nat_instance(struct cfg_nat *ptr)
+{
+
+	del_redir_spool_cfg(ptr, &ptr->redir_chain);
+	LibAliasUninit(ptr->lib);
+	free(ptr, M_IPFW);
 }
 
+
 /*
  * ipfw_nat - perform mbuf header translation.
  *
@@ -345,11 +421,11 @@ ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m)
 	if (ldt) {
 		struct tcphdr 	*th;
 		struct udphdr 	*uh;
-		u_short cksum;
+		uint16_t ip_len, cksum;
 
-		ip->ip_len = ntohs(ip->ip_len);
+		ip_len = ntohs(ip->ip_len);
 		cksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
-		    htons(ip->ip_p + ip->ip_len - (ip->ip_hl << 2)));
+		    htons(ip->ip_p + ip_len - (ip->ip_hl << 2)));
 
 		switch (ip->ip_p) {
 		case IPPROTO_TCP:
@@ -375,7 +451,6 @@ ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m)
 			in_delayed_cksum(mcl);
 			mcl->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 		}
-		ip->ip_len = htons(ip->ip_len);
 	}
 	args->m = mcl;
 	return (IP_FW_NAT);
@@ -393,60 +468,68 @@ lookup_nat(struct nat_list *l, int nat_id)
 	return res;
 }
 
-static int
-ipfw_nat_cfg(struct sockopt *sopt)
+static struct cfg_nat *
+lookup_nat_name(struct nat_list *l, char *name)
 {
-	struct cfg_nat *cfg, *ptr;
-	char *buf;
-	struct ip_fw_chain *chain = &V_layer3_chain;
-	size_t len;
-	int gencnt, error = 0;
+	struct cfg_nat *res;
+	int id;
+	char *errptr;
 
-	len = sopt->sopt_valsize;
-	buf = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
-	if ((error = sooptcopyin(sopt, buf, len, sizeof(struct cfg_nat))) != 0)
-		goto out;
+	id = strtol(name, &errptr, 10);
+	if (id == 0 || *errptr != '\0')
+		return (NULL);
 
-	cfg = (struct cfg_nat *)buf;
-	if (cfg->id < 0) {
-		error = EINVAL;
-		goto out;
+	LIST_FOREACH(res, l, _next) {
+		if (res->id == id)
+			break;
 	}
+	return (res);
+}
+
+/* IP_FW3 configuration routines */
+
+static void
+nat44_config(struct ip_fw_chain *chain, struct nat44_cfg_nat *ucfg)
+{
+	struct cfg_nat *ptr, *tcfg;
+	int gencnt;
 
 	/*
 	 * Find/create nat rule.
 	 */
-	IPFW_WLOCK(chain);
+	IPFW_UH_WLOCK(chain);
 	gencnt = chain->gencnt;
-	ptr = lookup_nat(&chain->nat, cfg->id);
+	ptr = lookup_nat_name(&chain->nat, ucfg->name);
 	if (ptr == NULL) {
-		IPFW_WUNLOCK(chain);
+		IPFW_UH_WUNLOCK(chain);
 		/* New rule: allocate and init new instance. */
 		ptr = malloc(sizeof(struct cfg_nat), M_IPFW, M_WAITOK | M_ZERO);
 		ptr->lib = LibAliasInit(NULL);
 		LIST_INIT(&ptr->redir_chain);
 	} else {
 		/* Entry already present: temporarily unhook it. */
+		IPFW_WLOCK(chain);
 		LIST_REMOVE(ptr, _next);
-		flush_nat_ptrs(chain, cfg->id);
+		flush_nat_ptrs(chain, ptr->id);
 		IPFW_WUNLOCK(chain);
+		IPFW_UH_WUNLOCK(chain);
 	}
 
 	/*
-	 * Basic nat configuration.
+	 * Basic nat (re)configuration.
 	 */
-	ptr->id = cfg->id;
+	ptr->id = strtol(ucfg->name, NULL, 10);
 	/*
 	 * XXX - what if this rule doesn't nat any ip and just
 	 * redirect?
 	 * do we set aliasaddress to 0.0.0.0?
 	 */
-	ptr->ip = cfg->ip;
-	ptr->redir_cnt = cfg->redir_cnt;
-	ptr->mode = cfg->mode;
-	LibAliasSetMode(ptr->lib, cfg->mode, ~0);
+	ptr->ip = ucfg->ip;
+	ptr->redir_cnt = ucfg->redir_cnt;
+	ptr->mode = ucfg->mode;
+	strlcpy(ptr->if_name, ucfg->if_name, sizeof(ptr->if_name));
+	LibAliasSetMode(ptr->lib, ptr->mode, ~0);
 	LibAliasSetAddress(ptr->lib, ptr->ip);
-	memcpy(ptr->if_name, cfg->if_name, IF_NAMESIZE);
 
 	/*
 	 * Redir and LSNAT configuration.
@@ -454,16 +537,453 @@ ipfw_nat_cfg(struct sockopt *sopt)
 	/* Delete old cfgs. */
 	del_redir_spool_cfg(ptr, &ptr->redir_chain);
 	/* Add new entries. */
-	add_redir_spool_cfg(&buf[(sizeof(struct cfg_nat))], ptr);
+	add_redir_spool_cfg((char *)(ucfg + 1), ptr);
+	IPFW_UH_WLOCK(chain);
 
-	IPFW_WLOCK(chain);
 	/* Extra check to avoid race with another ipfw_nat_cfg() */
-	if (gencnt != chain->gencnt &&
-	    ((cfg = lookup_nat(&chain->nat, ptr->id)) != NULL))
-		LIST_REMOVE(cfg, _next);
+	tcfg = NULL;
+	if (gencnt != chain->gencnt)
+	    tcfg = lookup_nat_name(&chain->nat, ucfg->name);
+	IPFW_WLOCK(chain);
+	if (tcfg != NULL)
+		LIST_REMOVE(tcfg, _next);
 	LIST_INSERT_HEAD(&chain->nat, ptr, _next);
+	IPFW_WUNLOCK(chain);
 	chain->gencnt++;
+
+	IPFW_UH_WUNLOCK(chain);
+
+	if (tcfg != NULL)
+		free_nat_instance(ptr);
+}
+
+/*
+ * Creates/configure nat44 instance
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header nat44_cfg_nat .. ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat44_cfg(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	ipfw_obj_header *oh;
+	struct nat44_cfg_nat *ucfg;
+	int id;
+	size_t read;
+	char *errptr;
+
+	/* Check minimum header size */
+	if (sd->valsize < (sizeof(*oh) + sizeof(*ucfg)))
+		return (EINVAL);
+
+	oh = (ipfw_obj_header *)sd->kbuf;
+
+	/* Basic length checks for TLVs */
+	if (oh->ntlv.head.length != sizeof(oh->ntlv))
+		return (EINVAL);
+
+	ucfg = (struct nat44_cfg_nat *)(oh + 1);
+
+	/* Check if name is properly terminated and looks like number */
+	if (strnlen(ucfg->name, sizeof(ucfg->name)) == sizeof(ucfg->name))
+		return (EINVAL);
+	id = strtol(ucfg->name, &errptr, 10);
+	if (id == 0 || *errptr != '\0')
+		return (EINVAL);
+
+	read = sizeof(*oh) + sizeof(*ucfg);
+	/* Check number of redirs */
+	if (sd->valsize < read + ucfg->redir_cnt*sizeof(struct nat44_cfg_redir))
+		return (EINVAL);
+
+	nat44_config(chain, ucfg);
+	return (0);
+}
+
+/*
+ * Destroys given nat instances.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat44_destroy(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	ipfw_obj_header *oh;
+	struct cfg_nat *ptr;
+	ipfw_obj_ntlv *ntlv;
+
+	/* Check minimum header size */
+	if (sd->valsize < sizeof(*oh))
+		return (EINVAL);
+
+	oh = (ipfw_obj_header *)sd->kbuf;
+
+	/* Basic length checks for TLVs */
+	if (oh->ntlv.head.length != sizeof(oh->ntlv))
+		return (EINVAL);
+
+	ntlv = &oh->ntlv;
+	/* Check if name is properly terminated */
+	if (strnlen(ntlv->name, sizeof(ntlv->name)) == sizeof(ntlv->name))
+		return (EINVAL);
+
+	IPFW_UH_WLOCK(chain);
+	ptr = lookup_nat_name(&chain->nat, ntlv->name);
+	if (ptr == NULL) {
+		IPFW_UH_WUNLOCK(chain);
+		return (ESRCH);
+	}
+	IPFW_WLOCK(chain);
+	LIST_REMOVE(ptr, _next);
+	flush_nat_ptrs(chain, ptr->id);
 	IPFW_WUNLOCK(chain);
+	IPFW_UH_WUNLOCK(chain);
+
+	free_nat_instance(ptr);
+
+	return (0);
+}
+
+static void
+export_nat_cfg(struct cfg_nat *ptr, struct nat44_cfg_nat *ucfg)
+{
+
+	snprintf(ucfg->name, sizeof(ucfg->name), "%d", ptr->id);
+	ucfg->ip = ptr->ip;
+	ucfg->redir_cnt = ptr->redir_cnt;
+	ucfg->mode = ptr->mode;
+	strlcpy(ucfg->if_name, ptr->if_name, sizeof(ucfg->if_name));
+}
+
+/*
+ * Gets config for given nat instance
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header nat44_cfg_nat .. ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat44_get_cfg(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	ipfw_obj_header *oh;
+	struct nat44_cfg_nat *ucfg;
+	struct cfg_nat *ptr;
+	struct cfg_redir *r;
+	struct cfg_spool *s;
+	struct nat44_cfg_redir *ser_r;
+	struct nat44_cfg_spool *ser_s;
+	size_t sz;
+
+	sz = sizeof(*oh) + sizeof(*ucfg);
+	/* Check minimum header size */
+	if (sd->valsize < sz)
+		return (EINVAL);
+
+	oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
+
+	/* Basic length checks for TLVs */
+	if (oh->ntlv.head.length != sizeof(oh->ntlv))
+		return (EINVAL);
+
+	ucfg = (struct nat44_cfg_nat *)(oh + 1);
+
+	/* Check if name is properly terminated */
+	if (strnlen(ucfg->name, sizeof(ucfg->name)) == sizeof(ucfg->name))
+		return (EINVAL);
+
+	IPFW_UH_RLOCK(chain);
+	ptr = lookup_nat_name(&chain->nat, ucfg->name);
+	if (ptr == NULL) {
+		IPFW_UH_RUNLOCK(chain);
+		return (ESRCH);
+	}
+
+	export_nat_cfg(ptr, ucfg);
+	
+	/* Estimate memory amount */
+	sz = sizeof(ipfw_obj_header) + sizeof(struct nat44_cfg_nat);
+	LIST_FOREACH(r, &ptr->redir_chain, _next) {
+		sz += sizeof(struct nat44_cfg_redir);
+		LIST_FOREACH(s, &r->spool_chain, _next)
+			sz += sizeof(struct nat44_cfg_spool);
+	}
+
+	ucfg->size = sz;
+	if (sd->valsize < sz) {
+
+		/*
+		 * Submitted buffer size is not enough.
+		 * WE've already filled in @ucfg structure with
+		 * relevant info including size, so we
+		 * can return. Buffer will be flushed automatically.
+		 */
+		IPFW_UH_RUNLOCK(chain);
+		return (ENOMEM);
+	}
+
+	/* Size OK, let's copy data */
+	LIST_FOREACH(r, &ptr->redir_chain, _next) {
+		ser_r = (struct nat44_cfg_redir *)ipfw_get_sopt_space(sd,
+		    sizeof(*ser_r));
+		ser_r->mode = r->mode;
+		ser_r->laddr = r->laddr;
+		ser_r->paddr = r->paddr;
+		ser_r->raddr = r->raddr;
+		ser_r->lport = r->lport;
+		ser_r->pport = r->pport;
+		ser_r->rport = r->rport;
+		ser_r->pport_cnt = r->pport_cnt;
+		ser_r->rport_cnt = r->rport_cnt;
+		ser_r->proto = r->proto;
+		ser_r->spool_cnt = r->spool_cnt;
+
+		LIST_FOREACH(s, &r->spool_chain, _next) {
+			ser_s = (struct nat44_cfg_spool *)ipfw_get_sopt_space(
+			    sd, sizeof(*ser_s));
+
+			ser_s->addr = s->addr;
+			ser_s->port = s->port;
+		}
+	}
+
+	IPFW_UH_RUNLOCK(chain);
+
+	return (0);
+}
+
+/*
+ * Lists all nat44 instances currently available in kernel.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ]
+ * Reply: [ ipfw_obj_lheader nat44_cfg_nat x N ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat44_list_nat(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	ipfw_obj_lheader *olh;
+	struct nat44_cfg_nat *ucfg;
+	struct cfg_nat *ptr;
+	int nat_count;
+
+	/* Check minimum header size */
+	if (sd->valsize < sizeof(ipfw_obj_lheader))
+		return (EINVAL);
+
+	olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh));
+	IPFW_UH_RLOCK(chain);
+	nat_count = 0;
+	LIST_FOREACH(ptr, &chain->nat, _next)
+		nat_count++;
+
+	olh->count = nat_count;
+	olh->objsize = sizeof(struct nat44_cfg_nat);
+	olh->size = sizeof(*olh) + olh->count * olh->objsize;
+
+	if (sd->valsize < olh->size) {
+		IPFW_UH_RUNLOCK(chain);
+		return (ENOMEM);
+	}
+
+	LIST_FOREACH(ptr, &chain->nat, _next) {
+		ucfg = (struct nat44_cfg_nat *)ipfw_get_sopt_space(sd,
+		    sizeof(*ucfg));
+		export_nat_cfg(ptr, ucfg);
+	}
+
+	IPFW_UH_RUNLOCK(chain);
+
+	return (0);
+}
+
+/*
+ * Gets log for given nat instance
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header nat44_cfg_nat ]
+ * Reply: [ ipfw_obj_header nat44_cfg_nat LOGBUFFER ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat44_get_log(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	ipfw_obj_header *oh;
+	struct nat44_cfg_nat *ucfg;
+	struct cfg_nat *ptr;
+	void *pbuf;
+	size_t sz;
+
+	sz = sizeof(*oh) + sizeof(*ucfg);
+	/* Check minimum header size */
+	if (sd->valsize < sz)
+		return (EINVAL);
+
+	oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
+
+	/* Basic length checks for TLVs */
+	if (oh->ntlv.head.length != sizeof(oh->ntlv))
+		return (EINVAL);
+
+	ucfg = (struct nat44_cfg_nat *)(oh + 1);
+
+	/* Check if name is properly terminated */
+	if (strnlen(ucfg->name, sizeof(ucfg->name)) == sizeof(ucfg->name))
+		return (EINVAL);
+
+	IPFW_UH_RLOCK(chain);
+	ptr = lookup_nat_name(&chain->nat, ucfg->name);
+	if (ptr == NULL) {
+		IPFW_UH_RUNLOCK(chain);
+		return (ESRCH);
+	}
+
+	if (ptr->lib->logDesc == NULL) {
+		IPFW_UH_RUNLOCK(chain);
+		return (ENOENT);
+	}
+
+	export_nat_cfg(ptr, ucfg);
+	
+	/* Estimate memory amount */
+	ucfg->size = sizeof(struct nat44_cfg_nat) + LIBALIAS_BUF_SIZE;
+	if (sd->valsize < sz + sizeof(*oh)) {
+
+		/*
+		 * Submitted buffer size is not enough.
+		 * WE've already filled in @ucfg structure with
+		 * relevant info including size, so we
+		 * can return. Buffer will be flushed automatically.
+		 */
+		IPFW_UH_RUNLOCK(chain);
+		return (ENOMEM);
+	}
+
+	pbuf = (void *)ipfw_get_sopt_space(sd, LIBALIAS_BUF_SIZE);
+	memcpy(pbuf, ptr->lib->logDesc, LIBALIAS_BUF_SIZE);
+	
+	IPFW_UH_RUNLOCK(chain);
+
+	return (0);
+}
+
+static struct ipfw_sopt_handler	scodes[] = {
+	{ IP_FW_NAT44_XCONFIG,	0,	HDIR_SET,	nat44_cfg },
+	{ IP_FW_NAT44_DESTROY,	0,	HDIR_SET,	nat44_destroy },
+	{ IP_FW_NAT44_XGETCONFIG,	0,	HDIR_GET,	nat44_get_cfg },
+	{ IP_FW_NAT44_LIST_NAT,	0,	HDIR_GET,	nat44_list_nat },
+	{ IP_FW_NAT44_XGETLOG,	0,	HDIR_GET,	nat44_get_log },
+};
+
+
+/*
+ * Legacy configuration routines
+ */
+
+struct cfg_spool_legacy {
+	LIST_ENTRY(cfg_spool_legacy)	_next;
+	struct in_addr			addr;
+	u_short				port;
+};
+
+struct cfg_redir_legacy {
+	LIST_ENTRY(cfg_redir)   _next;
+	u_int16_t               mode;
+	struct in_addr	        laddr;
+	struct in_addr	        paddr;
+	struct in_addr	        raddr;
+	u_short                 lport;
+	u_short                 pport;
+	u_short                 rport;
+	u_short                 pport_cnt;
+	u_short                 rport_cnt;
+	int                     proto;
+	struct alias_link       **alink;
+	u_int16_t               spool_cnt;
+	LIST_HEAD(, cfg_spool_legacy) spool_chain;
+};
+
+struct cfg_nat_legacy {
+	LIST_ENTRY(cfg_nat_legacy)	_next;
+	int				id;
+	struct in_addr			ip;
+	char				if_name[IF_NAMESIZE];
+	int				mode;
+	struct libalias			*lib;
+	int				redir_cnt;
+	LIST_HEAD(, cfg_redir_legacy)	redir_chain;
+};
+
+static int
+ipfw_nat_cfg(struct sockopt *sopt)
+{
+	struct cfg_nat_legacy *cfg;
+	struct nat44_cfg_nat *ucfg;
+	struct cfg_redir_legacy *rdir;
+	struct nat44_cfg_redir *urdir;
+	char *buf;
+	size_t len, len2;
+	int error, i;
+
+	len = sopt->sopt_valsize;
+	len2 = len + 128;
+
+	/*
+	 * Allocate 2x buffer to store converted structures.
+	 * new redir_cfg has shrunk, so we're sure that
+	 * new buffer size is enough.
+	 */
+	buf = malloc(roundup2(len, 8) + len2, M_TEMP, M_WAITOK | M_ZERO);
+	error = sooptcopyin(sopt, buf, len, sizeof(struct cfg_nat_legacy));
+	if (error != 0)
+		goto out;
+
+	cfg = (struct cfg_nat_legacy *)buf;
+	if (cfg->id < 0) {
+		error = EINVAL;
+		goto out;
+	}
+
+	ucfg = (struct nat44_cfg_nat *)&buf[roundup2(len, 8)];
+	snprintf(ucfg->name, sizeof(ucfg->name), "%d", cfg->id);
+	strlcpy(ucfg->if_name, cfg->if_name, sizeof(ucfg->if_name));
+	ucfg->ip = cfg->ip;
+	ucfg->mode = cfg->mode;
+	ucfg->redir_cnt = cfg->redir_cnt;
+
+	if (len < sizeof(*cfg) + cfg->redir_cnt * sizeof(*rdir)) {
+		error = EINVAL;
+		goto out;
+	}
+
+	urdir = (struct nat44_cfg_redir *)(ucfg + 1);
+	rdir = (struct cfg_redir_legacy *)(cfg + 1);
+	for (i = 0; i < cfg->redir_cnt; i++) {
+		urdir->mode = rdir->mode;
+		urdir->laddr = rdir->laddr;
+		urdir->paddr = rdir->paddr;
+		urdir->raddr = rdir->raddr;
+		urdir->lport = rdir->lport;
+		urdir->pport = rdir->pport;
+		urdir->rport = rdir->rport;
+		urdir->pport_cnt = rdir->pport_cnt;
+		urdir->rport_cnt = rdir->rport_cnt;
+		urdir->proto = rdir->proto;
+		urdir->spool_cnt = rdir->spool_cnt;
+
+		urdir++;
+		rdir++;
+	}
+
+	nat44_config(&V_layer3_chain, ucfg);
 
 out:
 	free(buf, M_TEMP);
@@ -479,18 +999,18 @@ ipfw_nat_del(struct sockopt *sopt)
 
 	sooptcopyin(sopt, &i, sizeof i, sizeof i);
 	/* XXX validate i */
-	IPFW_WLOCK(chain);
+	IPFW_UH_WLOCK(chain);
 	ptr = lookup_nat(&chain->nat, i);
 	if (ptr == NULL) {
-		IPFW_WUNLOCK(chain);
+		IPFW_UH_WUNLOCK(chain);
 		return (EINVAL);
 	}
+	IPFW_WLOCK(chain);
 	LIST_REMOVE(ptr, _next);
 	flush_nat_ptrs(chain, i);
 	IPFW_WUNLOCK(chain);
-	del_redir_spool_cfg(ptr, &ptr->redir_chain);
-	LibAliasUninit(ptr->lib);
-	free(ptr, M_IPFW);
+	IPFW_UH_WUNLOCK(chain);
+	free_nat_instance(ptr);
 	return (0);
 }
 
@@ -499,28 +1019,31 @@ ipfw_nat_get_cfg(struct sockopt *sopt)
 {
 	struct ip_fw_chain *chain = &V_layer3_chain;
 	struct cfg_nat *n;
+	struct cfg_nat_legacy *ucfg;
 	struct cfg_redir *r;
 	struct cfg_spool *s;
+	struct cfg_redir_legacy *ser_r;
+	struct cfg_spool_legacy *ser_s;
 	char *data;
 	int gencnt, nat_cnt, len, error;
 
 	nat_cnt = 0;
 	len = sizeof(nat_cnt);
 
-	IPFW_RLOCK(chain);
+	IPFW_UH_RLOCK(chain);
 retry:
 	gencnt = chain->gencnt;
 	/* Estimate memory amount */
 	LIST_FOREACH(n, &chain->nat, _next) {
 		nat_cnt++;
-		len += sizeof(struct cfg_nat);
+		len += sizeof(struct cfg_nat_legacy);
 		LIST_FOREACH(r, &n->redir_chain, _next) {
-			len += sizeof(struct cfg_redir);
+			len += sizeof(struct cfg_redir_legacy);
 			LIST_FOREACH(s, &r->spool_chain, _next)
-				len += sizeof(struct cfg_spool);
+				len += sizeof(struct cfg_spool_legacy);
 		}
 	}
-	IPFW_RUNLOCK(chain);
+	IPFW_UH_RUNLOCK(chain);
 
 	data = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
 	bcopy(&nat_cnt, data, sizeof(nat_cnt));
@@ -528,25 +1051,43 @@ retry:
 	nat_cnt = 0;
 	len = sizeof(nat_cnt);
 
-	IPFW_RLOCK(chain);
+	IPFW_UH_RLOCK(chain);
 	if (gencnt != chain->gencnt) {
 		free(data, M_TEMP);
 		goto retry;
 	}
 	/* Serialize all the data. */
 	LIST_FOREACH(n, &chain->nat, _next) {
-		bcopy(n, &data[len], sizeof(struct cfg_nat));
-		len += sizeof(struct cfg_nat);
+		ucfg = (struct cfg_nat_legacy *)&data[len];
+		ucfg->id = n->id;
+		ucfg->ip = n->ip;
+		ucfg->redir_cnt = n->redir_cnt;
+		ucfg->mode = n->mode;
+		strlcpy(ucfg->if_name, n->if_name, sizeof(ucfg->if_name));
+		len += sizeof(struct cfg_nat_legacy);
 		LIST_FOREACH(r, &n->redir_chain, _next) {
-			bcopy(r, &data[len], sizeof(struct cfg_redir));
-			len += sizeof(struct cfg_redir);
+			ser_r = (struct cfg_redir_legacy *)&data[len];
+			ser_r->mode = r->mode;
+			ser_r->laddr = r->laddr;
+			ser_r->paddr = r->paddr;
+			ser_r->raddr = r->raddr;
+			ser_r->lport = r->lport;
+			ser_r->pport = r->pport;
+			ser_r->rport = r->rport;
+			ser_r->pport_cnt = r->pport_cnt;
+			ser_r->rport_cnt = r->rport_cnt;
+			ser_r->proto = r->proto;
+			ser_r->spool_cnt = r->spool_cnt;
+			len += sizeof(struct cfg_redir_legacy);
 			LIST_FOREACH(s, &r->spool_chain, _next) {
-				bcopy(s, &data[len], sizeof(struct cfg_spool));
-				len += sizeof(struct cfg_spool);
+				ser_s = (struct cfg_spool_legacy *)&data[len];
+				ser_s->addr = s->addr;
+				ser_s->port = s->port;
+				len += sizeof(struct cfg_spool_legacy);
 			}
 		}
 	}
-	IPFW_RUNLOCK(chain);
+	IPFW_UH_RUNLOCK(chain);
 
 	error = sooptcopyout(sopt, data, len);
 	free(data, M_TEMP);
@@ -561,6 +1102,7 @@ ipfw_nat_get_log(struct sockopt *sopt)
 	struct cfg_nat *ptr;
 	int i, size;
 	struct ip_fw_chain *chain;
+	IPFW_RLOCK_TRACKER;
 
 	chain = &V_layer3_chain;
 
@@ -609,14 +1151,12 @@ vnet_ipfw_nat_uninit(const void *arg __unused)
 
 	chain = &V_layer3_chain;
 	IPFW_WLOCK(chain);
+	V_ipfw_nat_ready = 0;
 	LIST_FOREACH_SAFE(ptr, &chain->nat, _next, ptr_temp) {
 		LIST_REMOVE(ptr, _next);
-		del_redir_spool_cfg(ptr, &ptr->redir_chain);
-		LibAliasUninit(ptr->lib);
-		free(ptr, M_IPFW);
+		free_nat_instance(ptr);
 	}
 	flush_nat_ptrs(chain, -1 /* flush all */);
-	V_ipfw_nat_ready = 0;
 	IPFW_WUNLOCK(chain);
 	return (0);
 }
@@ -632,6 +1172,7 @@ ipfw_nat_init(void)
 	ipfw_nat_del_ptr = ipfw_nat_del;
 	ipfw_nat_get_cfg_ptr = ipfw_nat_get_cfg;
 	ipfw_nat_get_log_ptr = ipfw_nat_get_log;
+	IPFW_ADD_SOPT_HANDLER(1, scodes);
 
 	ifaddr_event_tag = EVENTHANDLER_REGISTER(ifaddr_event, ifaddr_change,
 	    NULL, EVENTHANDLER_PRI_ANY);
@@ -643,6 +1184,7 @@ ipfw_nat_destroy(void)
 
 	EVENTHANDLER_DEREGISTER(ifaddr_event, ifaddr_event_tag);
 	/* deregister ipfw_nat */
+	IPFW_DEL_SOPT_HANDLER(1, scodes);
 	ipfw_nat_ptr = NULL;
 	lookup_nat_ptr = NULL;
 	ipfw_nat_cfg_ptr = NULL;
@@ -677,14 +1219,14 @@ static moduledata_t ipfw_nat_mod = {
 };
 
 /* Define startup order. */
-#define	IPFW_NAT_SI_SUB_FIREWALL	SI_SUB_PROTO_IFATTACHDOMAIN
-#define	IPFW_NAT_MODEVENT_ORDER		(SI_ORDER_ANY - 128)
+#define	IPFW_NAT_SI_SUB_FIREWALL	SI_SUB_PROTO_FIREWALL
+#define	IPFW_NAT_MODEVENT_ORDER		(SI_ORDER_ANY - 128) /* after ipfw */
 #define	IPFW_NAT_MODULE_ORDER		(IPFW_NAT_MODEVENT_ORDER + 1)
 #define	IPFW_NAT_VNET_ORDER		(IPFW_NAT_MODEVENT_ORDER + 2)
 
 DECLARE_MODULE(ipfw_nat, ipfw_nat_mod, IPFW_NAT_SI_SUB_FIREWALL, SI_ORDER_ANY);
 MODULE_DEPEND(ipfw_nat, libalias, 1, 1, 1);
-MODULE_DEPEND(ipfw_nat, ipfw, 2, 2, 2);
+MODULE_DEPEND(ipfw_nat, ipfw, 3, 3, 3);
 MODULE_VERSION(ipfw_nat, 1);
 
 SYSINIT(ipfw_nat_init, IPFW_NAT_SI_SUB_FIREWALL, IPFW_NAT_MODULE_ORDER,
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_pfil.c b/freebsd/sys/netpfil/ipfw/ip_fw_pfil.c
index d2e1b448..59c13aa5 100644
--- a/freebsd/sys/netpfil/ipfw/ip_fw_pfil.c
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_pfil.c
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
 
 #include <net/if.h>
 #include <net/route.h>
+#include <net/ethernet.h>
 #include <net/pfil.h>
 #include <net/vnet.h>
 
@@ -60,6 +61,7 @@ __FBSDID("$FreeBSD$");
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
+#include <netinet6/scope6_var.h>
 #endif
 
 #include <netgraph/ng_ipfw.h>
@@ -76,26 +78,39 @@ static VNET_DEFINE(int, fw6_enable) = 1;
 #define V_fw6_enable	VNET(fw6_enable)
 #endif
 
+static VNET_DEFINE(int, fwlink_enable) = 0;
+#define V_fwlink_enable	VNET(fwlink_enable)
+
 int ipfw_chg_hook(SYSCTL_HANDLER_ARGS);
 
 /* Forward declarations. */
 static int ipfw_divert(struct mbuf **, int, struct ipfw_rule_ref *, int);
+int ipfw_check_packet(void *, struct mbuf **, struct ifnet *, int,
+	struct inpcb *);
+int ipfw_check_frame(void *, struct mbuf **, struct ifnet *, int,
+	struct inpcb *);
 
 #ifdef SYSCTL_NODE
 
 SYSBEGIN(f1)
 
 SYSCTL_DECL(_net_inet_ip_fw);
-SYSCTL_VNET_PROC(_net_inet_ip_fw, OID_AUTO, enable,
-    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_enable), 0,
-    ipfw_chg_hook, "I", "Enable ipfw");
+SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, enable,
+    CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3,
+    &VNET_NAME(fw_enable), 0, ipfw_chg_hook, "I", "Enable ipfw");
 #ifdef INET6
 SYSCTL_DECL(_net_inet6_ip6_fw);
-SYSCTL_VNET_PROC(_net_inet6_ip6_fw, OID_AUTO, enable,
-    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw6_enable), 0,
-    ipfw_chg_hook, "I", "Enable ipfw+6");
+SYSCTL_PROC(_net_inet6_ip6_fw, OID_AUTO, enable,
+    CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3,
+    &VNET_NAME(fw6_enable), 0, ipfw_chg_hook, "I", "Enable ipfw+6");
 #endif /* INET6 */
 
+SYSCTL_DECL(_net_link_ether);
+SYSCTL_PROC(_net_link_ether, OID_AUTO, ipfw,
+    CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3,
+    &VNET_NAME(fwlink_enable), 0, ipfw_chg_hook, "I",
+    "Pass ether pkts through firewall");
+
 SYSEND
 
 #endif /* SYSCTL_NODE */
@@ -106,7 +121,7 @@ SYSEND
  * The packet may be consumed.
  */
 int
-ipfw_check_hook(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
+ipfw_check_packet(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
     struct inpcb *inp)
 {
 	struct ip_fw_args args;
@@ -114,10 +129,6 @@ ipfw_check_hook(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
 	int ipfw;
 	int ret;
 
-	/* all the processing now uses ip_len in net format */
-	if (mtod(*m0, struct ip *)->ip_v == 4)
-		SET_NET_IPLEN(mtod(*m0, struct ip *));
-
 	/* convert dir to IPFW values */
 	dir = (dir == PFIL_IN) ? DIR_IN : DIR_OUT;
 	bzero(&args, sizeof(args));
@@ -131,11 +142,8 @@ again:
 	if (tag != NULL) {
 		args.rule = *((struct ipfw_rule_ref *)(tag+1));
 		m_tag_delete(*m0, tag);
-		if (args.rule.info & IPFW_ONEPASS) {
-			if (mtod(*m0, struct ip *)->ip_v == 4)
-				SET_HOST_IPLEN(mtod(*m0, struct ip *));
+		if (args.rule.info & IPFW_ONEPASS)
 			return (0);
-		}
 	}
 
 	args.m = *m0;
@@ -192,8 +200,20 @@ again:
 		}
 #ifdef INET6
 		if (args.next_hop6 != NULL) {
-			bcopy(args.next_hop6, (fwd_tag+1), len);
-			if (in6_localip(&args.next_hop6->sin6_addr))
+			struct sockaddr_in6 *sa6;
+
+			sa6 = (struct sockaddr_in6 *)(fwd_tag + 1);
+			bcopy(args.next_hop6, sa6, len);
+			/*
+			 * If nh6 address is link-local we should convert
+			 * it to kernel internal form before doing any
+			 * comparisons.
+			 */
+			if (sa6_embedscope(sa6, V_ip6_use_defzone) != 0) {
+				ret = EACCES;
+				break;
+			}
+			if (in6_localip(&sa6->sin6_addr))
 				(*m0)->m_flags |= M_FASTFWD_OURS;
 			(*m0)->m_flags |= M_IP6_NEXTHOP;
 		}
@@ -279,8 +299,112 @@ again:
 			FREE_PKT(*m0);
 		*m0 = NULL;
 	}
-	if (*m0 && mtod(*m0, struct ip *)->ip_v == 4)
-		SET_HOST_IPLEN(mtod(*m0, struct ip *));
+
+	return ret;
+}
+
+/*
+ * ipfw processing for ethernet packets (in and out).
+ * Inteface is NULL from ether_demux, and ifp from
+ * ether_output_frame.
+ */
+int
+ipfw_check_frame(void *arg, struct mbuf **m0, struct ifnet *dst, int dir,
+    struct inpcb *inp)
+{
+	struct ether_header *eh;
+	struct ether_header save_eh;
+	struct mbuf *m;
+	int i, ret;
+	struct ip_fw_args args;
+	struct m_tag *mtag;
+
+	/* fetch start point from rule, if any */
+	mtag = m_tag_locate(*m0, MTAG_IPFW_RULE, 0, NULL);
+	if (mtag == NULL) {
+		args.rule.slot = 0;
+	} else {
+		/* dummynet packet, already partially processed */
+		struct ipfw_rule_ref *r;
+
+		/* XXX can we free it after use ? */
+		mtag->m_tag_id = PACKET_TAG_NONE;
+		r = (struct ipfw_rule_ref *)(mtag + 1);
+		if (r->info & IPFW_ONEPASS)
+			return (0);
+		args.rule = *r;
+	}
+
+	/* I need some amt of data to be contiguous */
+	m = *m0;
+	i = min(m->m_pkthdr.len, max_protohdr);
+	if (m->m_len < i) {
+		m = m_pullup(m, i);
+		if (m == NULL) {
+			*m0 = m;
+			return (0);
+		}
+	}
+	eh = mtod(m, struct ether_header *);
+	save_eh = *eh;			/* save copy for restore below */
+	m_adj(m, ETHER_HDR_LEN);	/* strip ethernet header */
+
+	args.m = m;		/* the packet we are looking at		*/
+	args.oif = dir == PFIL_OUT ? dst: NULL;	/* destination, if any	*/
+	args.next_hop = NULL;	/* we do not support forward yet	*/
+	args.next_hop6 = NULL;	/* we do not support forward yet	*/
+	args.eh = &save_eh;	/* MAC header for bridged/MAC packets	*/
+	args.inp = NULL;	/* used by ipfw uid/gid/jail rules	*/
+	i = ipfw_chk(&args);
+	m = args.m;
+	if (m != NULL) {
+		/*
+		 * Restore Ethernet header, as needed, in case the
+		 * mbuf chain was replaced by ipfw.
+		 */
+		M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
+		if (m == NULL) {
+			*m0 = NULL;
+			return (0);
+		}
+		if (eh != mtod(m, struct ether_header *))
+			bcopy(&save_eh, mtod(m, struct ether_header *),
+				ETHER_HDR_LEN);
+	}
+	*m0 = m;
+
+	ret = 0;
+	/* Check result of ipfw_chk() */
+	switch (i) {
+	case IP_FW_PASS:
+		break;
+
+	case IP_FW_DENY:
+		ret = EACCES;
+		break; /* i.e. drop */
+
+	case IP_FW_DUMMYNET:
+		ret = EACCES;
+		int dir;
+
+		if (ip_dn_io_ptr == NULL)
+			break; /* i.e. drop */
+
+		*m0 = NULL;
+		dir = PROTO_LAYER2 | (dst ? DIR_OUT : DIR_IN);
+		ip_dn_io_ptr(&m, dir, &args);
+		return 0;
+
+	default:
+		KASSERT(0, ("%s: unknown retval", __func__));
+	}
+
+	if (ret != 0) {
+		if (*m0)
+			FREE_PKT(*m0);
+		*m0 = NULL;
+	}
+
 	return ret;
 }
 
@@ -303,7 +427,7 @@ ipfw_divert(struct mbuf **m0, int incoming, struct ipfw_rule_ref *rule,
 		clone = *m0;	/* use the original mbuf */
 		*m0 = NULL;
 	} else {
-		clone = m_dup(*m0, M_DONTWAIT);
+		clone = m_dup(*m0, M_NOWAIT);
 		/* If we cannot duplicate the mbuf, we sacrifice the divert
 		 * chain and continue with the tee-ed packet.
 		 */
@@ -325,7 +449,6 @@ ipfw_divert(struct mbuf **m0, int incoming, struct ipfw_rule_ref *rule,
 		int hlen;
 		struct mbuf *reass;
 
-		SET_HOST_IPLEN(ip); /* ip_reass wants host order */
 		reass = ip_reass(clone); /* Reassemble packet. */
 		if (reass == NULL)
 			return 0; /* not an error */
@@ -336,7 +459,6 @@ ipfw_divert(struct mbuf **m0, int incoming, struct ipfw_rule_ref *rule,
 		 */
 		ip = mtod(reass, struct ip *);
 		hlen = ip->ip_hl << 2;
-		SET_NET_IPLEN(ip);
 		ip->ip_sum = 0;
 		if (hlen == sizeof(struct ip))
 			ip->ip_sum = in_cksum_hdr(ip);
@@ -385,13 +507,16 @@ static int
 ipfw_hook(int onoff, int pf)
 {
 	struct pfil_head *pfh;
+	pfil_func_t hook_func;
 
 	pfh = pfil_head_get(PFIL_TYPE_AF, pf);
 	if (pfh == NULL)
 		return ENOENT;
 
+	hook_func = (pf == AF_LINK) ? ipfw_check_frame : ipfw_check_packet;
+
 	(void) (onoff ? pfil_add_hook : pfil_remove_hook)
-	    (ipfw_check_hook, NULL, PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh);
+	    (hook_func, NULL, PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh);
 
 	return 0;
 }
@@ -415,51 +540,50 @@ ipfw_attach_hooks(int arg)
                 printf("ipfw6_hook() error\n");
         }
 #endif
+	if (arg == 0) /* detach */
+		ipfw_hook(0, AF_LINK);
+	else if (V_fwlink_enable && ipfw_hook(1, AF_LINK) != 0) {
+                error = ENOENT;
+                printf("ipfw_link_hook() error\n");
+        }
 	return error;
 }
 
 int
 ipfw_chg_hook(SYSCTL_HANDLER_ARGS)
 {
-	int enable;
-	int oldenable;
+	int newval;
 	int error;
 	int af;
 
-	if (arg1 == &VNET_NAME(fw_enable)) {
-		enable = V_fw_enable;
+	if (arg1 == &V_fw_enable)
 		af = AF_INET;
-	}
 #ifdef INET6
-	else if (arg1 == &VNET_NAME(fw6_enable)) {
-		enable = V_fw6_enable;
+	else if (arg1 == &V_fw6_enable)
 		af = AF_INET6;
-	}
 #endif
+	else if (arg1 == &V_fwlink_enable)
+		af = AF_LINK;
 	else 
 		return (EINVAL);
 
-	oldenable = enable;
-
-	error = sysctl_handle_int(oidp, &enable, 0, req);
+	newval = *(int *)arg1;
+	/* Handle sysctl change */
+	error = sysctl_handle_int(oidp, &newval, 0, req);
 
 	if (error)
 		return (error);
 
-	enable = (enable) ? 1 : 0;
+	/* Formalize new value */
+	newval = (newval) ? 1 : 0;
 
-	if (enable == oldenable)
+	if (*(int *)arg1 == newval)
 		return (0);
 
-	error = ipfw_hook(enable, af);
+	error = ipfw_hook(newval, af);
 	if (error)
 		return (error);
-	if (af == AF_INET)
-		V_fw_enable = enable;
-#ifdef INET6
-	else if (af == AF_INET6)
-		V_fw6_enable = enable;
-#endif
+	*(int *)arg1 = newval;
 
 	return (0);
 }
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_private.h b/freebsd/sys/netpfil/ipfw/ip_fw_private.h
index ceabf88d..3b483625 100644
--- a/freebsd/sys/netpfil/ipfw/ip_fw_private.h
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_private.h
@@ -66,14 +66,12 @@ enum {
  */
 struct _ip6dn_args {
        struct ip6_pktopts *opt_or;
-       struct route_in6 ro_or;
        int flags_or;
        struct ip6_moptions *im6o_or;
        struct ifnet *origifp_or;
        struct ifnet *ifp_or;
        struct sockaddr_in6 dst_or;
        u_long mtu_or;
-       struct route_in6 ro_pmtu_or;
 };
 
 
@@ -104,7 +102,10 @@ struct ip_fw_args {
 	struct inpcb	*inp;
 
 	struct _ip6dn_args	dummypar; /* dummynet->ip6_output */
-	struct sockaddr_in hopstore;	/* store here if cannot use a pointer */
+	union {		/* store here if cannot use a pointer */
+		struct sockaddr_in hopstore;
+		struct sockaddr_in6 hopstore6;
+	};
 };
 
 MALLOC_DECLARE(M_IPFW);
@@ -152,10 +153,13 @@ void ipfw_nat_destroy(void);
 
 /* In ip_fw_log.c */
 struct ip;
-void ipfw_log_bpf(int);
-void ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args,
-	struct mbuf *m, struct ifnet *oif, u_short offset, uint32_t tablearg,
-	struct ip *ip);
+struct ip_fw_chain;
+void ipfw_bpf_init(int);
+void ipfw_bpf_uninit(int);
+void ipfw_bpf_mtap2(void *, u_int, struct mbuf *);
+void ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
+    struct ip_fw_args *args, struct mbuf *m, struct ifnet *oif,
+    u_short offset, uint32_t tablearg, struct ip *ip);
 VNET_DECLARE(u_int64_t, norule_counter);
 #define	V_norule_counter	VNET(norule_counter)
 VNET_DECLARE(int, verbose_limit);
@@ -176,22 +180,26 @@ enum { /* result for matching dynamic rules */
  * Eventually we may implement it with a callback on the function.
  */
 struct ip_fw_chain;
-void ipfw_expire_dyn_rules(struct ip_fw_chain *, struct ip_fw *, int);
+struct sockopt_data;
+int ipfw_is_dyn_rule(struct ip_fw *rule);
+void ipfw_expire_dyn_rules(struct ip_fw_chain *, ipfw_range_tlv *);
 void ipfw_dyn_unlock(ipfw_dyn_rule *q);
 
 struct tcphdr;
 struct mbuf *ipfw_send_pkt(struct mbuf *, struct ipfw_flow_id *,
     u_int32_t, u_int32_t, int);
-int ipfw_install_state(struct ip_fw *rule, ipfw_insn_limit *cmd,
-    struct ip_fw_args *args, uint32_t tablearg);
+int ipfw_install_state(struct ip_fw_chain *chain, struct ip_fw *rule,
+    ipfw_insn_limit *cmd, struct ip_fw_args *args, uint32_t tablearg);
 ipfw_dyn_rule *ipfw_lookup_dyn_rule(struct ipfw_flow_id *pkt,
-	int *match_direction, struct tcphdr *tcp);
+	int *match_direction, struct tcphdr *tcp, uint16_t kidx);
 void ipfw_remove_dyn_children(struct ip_fw *rule);
 void ipfw_get_dynamic(struct ip_fw_chain *chain, char **bp, const char *ep);
+int ipfw_dump_states(struct ip_fw_chain *chain, struct sockopt_data *sd);
 
 void ipfw_dyn_init(struct ip_fw_chain *);	/* per-vnet initialization */
 void ipfw_dyn_uninit(int);	/* per-vnet deinitialization */
 int ipfw_dyn_len(void);
+int ipfw_dyn_get_count(void);
 
 /* common variables */
 VNET_DECLARE(int, fw_one_pass);
@@ -203,6 +211,9 @@ VNET_DECLARE(int, fw_verbose);
 VNET_DECLARE(struct ip_fw_chain, layer3_chain);
 #define	V_layer3_chain		VNET(layer3_chain)
 
+VNET_DECLARE(int, ipfw_vnet_ready);
+#define	V_ipfw_vnet_ready	VNET(ipfw_vnet_ready)
+
 VNET_DECLARE(u_int32_t, set_disable);
 #define	V_set_disable		VNET(set_disable)
 
@@ -212,23 +223,66 @@ VNET_DECLARE(int, autoinc_step);
 VNET_DECLARE(unsigned int, fw_tables_max);
 #define V_fw_tables_max		VNET(fw_tables_max)
 
+VNET_DECLARE(unsigned int, fw_tables_sets);
+#define V_fw_tables_sets	VNET(fw_tables_sets)
+
+struct tables_config;
+
+#ifdef _KERNEL
+/*
+ * Here we have the structure representing an ipfw rule.
+ *
+ * It starts with a general area 
+ * followed by an array of one or more instructions, which the code
+ * accesses as an array of 32-bit values.
+ *
+ * Given a rule pointer  r:
+ *
+ *  r->cmd		is the start of the first instruction.
+ *  ACTION_PTR(r)	is the start of the first action (things to do
+ *			once a rule matched).
+ */
+
+struct ip_fw {
+	uint16_t	act_ofs;	/* offset of action in 32-bit units */
+	uint16_t	cmd_len;	/* # of 32-bit words in cmd	*/
+	uint16_t	rulenum;	/* rule number			*/
+	uint8_t		set;		/* rule set (0..31)		*/
+	uint8_t		flags;		/* currently unused		*/
+	counter_u64_t	cntr;		/* Pointer to rule counters	*/
+	uint32_t	timestamp;	/* tv_sec of last match		*/
+	uint32_t	id;		/* rule id			*/
+	uint32_t	cached_id;	/* used by jump_fast		*/
+	uint32_t	cached_pos;	/* used by jump_fast		*/
+
+	ipfw_insn	cmd[1];		/* storage for commands		*/
+};
+
+#define	IPFW_RULE_CNTR_SIZE	(2 * sizeof(uint64_t))
+
+#endif
+
 struct ip_fw_chain {
 	struct ip_fw	**map;		/* array of rule ptrs to ease lookup */
 	uint32_t	id;		/* ruleset id */
 	int		n_rules;	/* number of static rules */
-	LIST_HEAD(nat_list, cfg_nat) nat;       /* list of nat entries */
-	struct radix_node_head **tables;	/* IPv4 tables */
-	struct radix_node_head **xtables;	/* extended tables */
-	uint8_t		*tabletype;	/* Array of table types */
+	void		*tablestate;	/* runtime table info */
+	void		*valuestate;	/* runtime table value info */
+	int		*idxmap;	/* skipto array of rules */
+	void		**srvstate;	/* runtime service mappings */
 #if defined( __linux__ ) || defined( _WIN32 )
 	spinlock_t rwmtx;
 #else
-	struct rwlock	rwmtx;
+	struct rmlock	rwmtx;
 #endif
-	int		static_len;	/* total len of static rules */
+	int		static_len;	/* total len of static rules (v0) */
 	uint32_t	gencnt;		/* NAT generation count */
-	struct ip_fw	*reap;		/* list of rules to reap */
+	LIST_HEAD(nat_list, cfg_nat) nat;       /* list of nat entries */
 	struct ip_fw	*default_rule;
+	struct tables_config *tblcfg;	/* tables module data */
+	void		*ifcfg;		/* interface module data */
+	int		*idxmap_back;	/* standby skipto array of rules */
+	struct namedobj_instance	*srvmap; /* cfg name->number mappings */
 #if defined( __linux__ ) || defined( _WIN32 )
 	spinlock_t uh_lock;
 #else
@@ -236,13 +290,81 @@ struct ip_fw_chain {
 #endif
 };
 
+/* 64-byte structure representing multi-field table value */
+struct table_value {
+	uint32_t	tag;		/* O_TAG/O_TAGGED */
+	uint32_t	pipe;		/* O_PIPE/O_QUEUE */
+	uint16_t	divert;		/* O_DIVERT/O_TEE */
+	uint16_t	skipto;		/* skipto, CALLRET */
+	uint32_t	netgraph;	/* O_NETGRAPH/O_NGTEE */
+	uint32_t	fib;		/* O_SETFIB */
+	uint32_t	nat;		/* O_NAT */
+	uint32_t	nh4;
+	uint8_t		dscp;
+	uint8_t		spare0;
+	uint16_t	spare1;
+	/* -- 32 bytes -- */
+	struct in6_addr	nh6;
+	uint32_t	limit;		/* O_LIMIT */
+	uint32_t	zoneid;		/* scope zone id for nh6 */
+	uint64_t	refcnt;		/* Number of references */
+};
+
+
+struct named_object {
+	TAILQ_ENTRY(named_object)	nn_next;	/* namehash */
+	TAILQ_ENTRY(named_object)	nv_next;	/* valuehash */
+	char			*name;	/* object name */
+	uint16_t		etlv;	/* Export TLV id */
+	uint8_t			subtype;/* object subtype within class */
+	uint8_t			set;	/* set object belongs to */
+	uint16_t		kidx;	/* object kernel index */
+	uint16_t		spare;
+	uint32_t		ocnt;	/* object counter for internal use */
+	uint32_t		refcnt;	/* number of references */
+};
+TAILQ_HEAD(namedobjects_head, named_object);
+
 struct sockopt;	/* used by tcp_var.h */
+struct sockopt_data {
+	caddr_t		kbuf;		/* allocated buffer */
+	size_t		ksize;		/* given buffer size */
+	size_t		koff;		/* data already used */
+	size_t		kavail;		/* number of bytes available */
+	size_t		ktotal;		/* total bytes pushed */
+	struct sockopt	*sopt;		/* socket data */
+	caddr_t		sopt_val;	/* sopt user buffer */
+	size_t		valsize;	/* original data size */
+};
+
+struct ipfw_ifc;
+
+typedef void (ipfw_ifc_cb)(struct ip_fw_chain *ch, void *cbdata,
+    uint16_t ifindex);
+
+struct ipfw_iface {
+	struct named_object	no;
+	char ifname[64];
+	int resolved;
+	uint16_t ifindex;
+	uint16_t spare;
+	uint64_t gencnt;
+	TAILQ_HEAD(, ipfw_ifc)	consumers;
+};
+
+struct ipfw_ifc {
+	TAILQ_ENTRY(ipfw_ifc)	next;
+	struct ipfw_iface	*iface;
+	ipfw_ifc_cb		*cb;
+	void			*cbdata;
+};
 
 /* Macro for working with various counters */
 #define	IPFW_INC_RULE_COUNTER(_cntr, _bytes)	do {	\
-	(_cntr)->pcnt++;				\
-	(_cntr)->bcnt += _bytes;			\
-	(_cntr)->timestamp = time_uptime;		\
+	counter_u64_add((_cntr)->cntr, 1);		\
+	counter_u64_add((_cntr)->cntr + 1, _bytes);	\
+	if ((_cntr)->timestamp != time_uptime)		\
+		(_cntr)->timestamp = time_uptime;	\
 	} while (0)
 
 #define	IPFW_INC_DYN_COUNTER(_cntr, _bytes)	do {		\
@@ -251,8 +373,8 @@ struct sockopt;	/* used by tcp_var.h */
 	} while (0)
 
 #define	IPFW_ZERO_RULE_COUNTER(_cntr) do {		\
-	(_cntr)->pcnt = 0;				\
-	(_cntr)->bcnt = 0;				\
+	counter_u64_zero((_cntr)->cntr);		\
+	counter_u64_zero((_cntr)->cntr + 1);		\
 	(_cntr)->timestamp = 0;				\
 	} while (0)
 
@@ -261,12 +383,15 @@ struct sockopt;	/* used by tcp_var.h */
 	(_cntr)->bcnt = 0;				\
 	} while (0)
 
-#define	IP_FW_ARG_TABLEARG(a)	((a) == IP_FW_TABLEARG) ? tablearg : (a)
+#define	TARG_VAL(ch, k, f)	((struct table_value *)((ch)->valuestate))[k].f
+#define	IP_FW_ARG_TABLEARG(ch, a, f)	\
+	(((a) == IP_FW_TARG) ? TARG_VAL(ch, tablearg, f) : (a))
 /*
  * The lock is heavily used by ip_fw2.c (the main file) and ip_fw_nat.c
  * so the variable and the macros must be here.
  */
 
+#if defined( __linux__ ) || defined( _WIN32 )
 #define	IPFW_LOCK_INIT(_chain) do {			\
 	rw_init(&(_chain)->rwmtx, "IPFW static rules");	\
 	rw_init(&(_chain)->uh_lock, "IPFW UH lock");	\
@@ -280,49 +405,354 @@ struct sockopt;	/* used by tcp_var.h */
 #define	IPFW_RLOCK_ASSERT(_chain)	rw_assert(&(_chain)->rwmtx, RA_RLOCKED)
 #define	IPFW_WLOCK_ASSERT(_chain)	rw_assert(&(_chain)->rwmtx, RA_WLOCKED)
 
-#define IPFW_RLOCK(p) rw_rlock(&(p)->rwmtx)
-#define IPFW_RUNLOCK(p) rw_runlock(&(p)->rwmtx)
-#define IPFW_WLOCK(p) rw_wlock(&(p)->rwmtx)
-#define IPFW_WUNLOCK(p) rw_wunlock(&(p)->rwmtx)
+#define	IPFW_RLOCK_TRACKER
+#define	IPFW_RLOCK(p)			rw_rlock(&(p)->rwmtx)
+#define	IPFW_RUNLOCK(p)			rw_runlock(&(p)->rwmtx)
+#define	IPFW_WLOCK(p)			rw_wlock(&(p)->rwmtx)
+#define	IPFW_WUNLOCK(p)			rw_wunlock(&(p)->rwmtx)
+#define	IPFW_PF_RLOCK(p)		IPFW_RLOCK(p)
+#define	IPFW_PF_RUNLOCK(p)		IPFW_RUNLOCK(p)
+#else /* FreeBSD */
+#define	IPFW_LOCK_INIT(_chain) do {			\
+	rm_init(&(_chain)->rwmtx, "IPFW static rules");	\
+	rw_init(&(_chain)->uh_lock, "IPFW UH lock");	\
+	} while (0)
+
+#define	IPFW_LOCK_DESTROY(_chain) do {			\
+	rm_destroy(&(_chain)->rwmtx);			\
+	rw_destroy(&(_chain)->uh_lock);			\
+	} while (0)
+
+#define	IPFW_RLOCK_ASSERT(_chain)	rm_assert(&(_chain)->rwmtx, RA_RLOCKED)
+#define	IPFW_WLOCK_ASSERT(_chain)	rm_assert(&(_chain)->rwmtx, RA_WLOCKED)
+
+#define	IPFW_RLOCK_TRACKER		struct rm_priotracker _tracker
+#define	IPFW_RLOCK(p)			rm_rlock(&(p)->rwmtx, &_tracker)
+#define	IPFW_RUNLOCK(p)			rm_runlock(&(p)->rwmtx, &_tracker)
+#define	IPFW_WLOCK(p)			rm_wlock(&(p)->rwmtx)
+#define	IPFW_WUNLOCK(p)			rm_wunlock(&(p)->rwmtx)
+#define	IPFW_PF_RLOCK(p)		IPFW_RLOCK(p)
+#define	IPFW_PF_RUNLOCK(p)		IPFW_RUNLOCK(p)
+#endif
 
 #define	IPFW_UH_RLOCK_ASSERT(_chain)	rw_assert(&(_chain)->uh_lock, RA_RLOCKED)
 #define	IPFW_UH_WLOCK_ASSERT(_chain)	rw_assert(&(_chain)->uh_lock, RA_WLOCKED)
+#define	IPFW_UH_UNLOCK_ASSERT(_chain)	rw_assert(&(_chain)->uh_lock, RA_UNLOCKED)
 
 #define IPFW_UH_RLOCK(p) rw_rlock(&(p)->uh_lock)
 #define IPFW_UH_RUNLOCK(p) rw_runlock(&(p)->uh_lock)
 #define IPFW_UH_WLOCK(p) rw_wlock(&(p)->uh_lock)
 #define IPFW_UH_WUNLOCK(p) rw_wunlock(&(p)->uh_lock)
 
+struct obj_idx {
+	uint16_t	uidx;	/* internal index supplied by userland */
+	uint16_t	kidx;	/* kernel object index */
+	uint16_t	off;	/* tlv offset from rule end in 4-byte words */
+	uint8_t		spare;
+	uint8_t		type;	/* object type within its category */
+};
+
+struct rule_check_info {
+	uint16_t	flags;		/* rule-specific check flags */
+	uint16_t	object_opcodes;	/* num of opcodes referencing objects */
+	uint16_t	urule_numoff;	/* offset of rulenum in bytes */
+	uint8_t		version;	/* rule version */
+	uint8_t		spare;
+	ipfw_obj_ctlv	*ctlv;		/* name TLV containter */
+	struct ip_fw	*krule;		/* resulting rule pointer */
+	caddr_t		urule;		/* original rule pointer */
+	struct obj_idx	obuf[8];	/* table references storage */
+};
+
+/* Legacy interface support */
+/*
+ * FreeBSD 8 export rule format
+ */
+struct ip_fw_rule0 {
+	struct ip_fw	*x_next;	/* linked list of rules		*/
+	struct ip_fw	*next_rule;	/* ptr to next [skipto] rule	*/
+	/* 'next_rule' is used to pass up 'set_disable' status		*/
+
+	uint16_t	act_ofs;	/* offset of action in 32-bit units */
+	uint16_t	cmd_len;	/* # of 32-bit words in cmd	*/
+	uint16_t	rulenum;	/* rule number			*/
+	uint8_t		set;		/* rule set (0..31)		*/
+	uint8_t		_pad;		/* padding			*/
+	uint32_t	id;		/* rule id */
+
+	/* These fields are present in all rules.			*/
+	uint64_t	pcnt;		/* Packet counter		*/
+	uint64_t	bcnt;		/* Byte counter			*/
+	uint32_t	timestamp;	/* tv_sec of last match		*/
+
+	ipfw_insn	cmd[1];		/* storage for commands		*/
+};
+
+struct ip_fw_bcounter0 {
+	uint64_t	pcnt;		/* Packet counter		*/
+	uint64_t	bcnt;		/* Byte counter			*/
+	uint32_t	timestamp;	/* tv_sec of last match		*/
+};
+
+/* Kernel rule length */
+/*
+ * RULE _K_ SIZE _V_ ->
+ * get kernel size from userland rool version _V_.
+ * RULE _U_ SIZE _V_ ->
+ * get user size version _V_ from kernel rule
+ * RULESIZE _V_ ->
+ * get user size rule length 
+ */
+/* FreeBSD8 <> current kernel format */
+#define	RULEUSIZE0(r)	(sizeof(struct ip_fw_rule0) + (r)->cmd_len * 4 - 4)
+#define	RULEKSIZE0(r)	roundup2((sizeof(struct ip_fw) + (r)->cmd_len*4 - 4), 8)
+/* FreeBSD11 <> current kernel format */
+#define	RULEUSIZE1(r)	(roundup2(sizeof(struct ip_fw_rule) + \
+    (r)->cmd_len * 4 - 4, 8))
+#define	RULEKSIZE1(r)	roundup2((sizeof(struct ip_fw) + (r)->cmd_len*4 - 4), 8)
+
+/*
+ * Tables/Objects index rewriting code
+ */
+
+/* Default and maximum number of ipfw tables/objects. */
+#define	IPFW_TABLES_MAX		65536
+#define	IPFW_TABLES_DEFAULT	128
+#define	IPFW_OBJECTS_MAX	65536
+#define	IPFW_OBJECTS_DEFAULT	1024
+
+#define	CHAIN_TO_SRV(ch)	((ch)->srvmap)
+#define	SRV_OBJECT(ch, idx)	((ch)->srvstate[(idx)])
+
+struct tid_info {
+	uint32_t	set;	/* table set */
+	uint16_t	uidx;	/* table index */
+	uint8_t		type;	/* table type */
+	uint8_t		atype;
+	uint8_t		spare;
+	int		tlen;	/* Total TLV size block */
+	void		*tlvs;	/* Pointer to first TLV */
+};
+
+/*
+ * Classifier callback. Checks if @cmd opcode contains kernel object reference.
+ * If true, returns its index and type.
+ * Returns 0 if match is found, 1 overwise.
+ */
+typedef int (ipfw_obj_rw_cl)(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype);
+/*
+ * Updater callback. Sets kernel object reference index to @puidx
+ */
+typedef void (ipfw_obj_rw_upd)(ipfw_insn *cmd, uint16_t puidx);
+/*
+ * Finder callback. Tries to find named object by name (specified via @ti).
+ * Stores found named object pointer in @pno.
+ * If object was not found, NULL is stored.
+ *
+ * Return 0 if input data was valid.
+ */
+typedef int (ipfw_obj_fname_cb)(struct ip_fw_chain *ch,
+    struct tid_info *ti, struct named_object **pno);
+/*
+ * Another finder callback. Tries to findex named object by kernel index.
+ *
+ * Returns pointer to named object or NULL.
+ */
+typedef struct named_object *(ipfw_obj_fidx_cb)(struct ip_fw_chain *ch,
+    uint16_t kidx);
+/*
+ * Object creator callback. Tries to create object specified by @ti.
+ * Stores newly-allocated object index in @pkidx.
+ *
+ * Returns 0 on success.
+ */
+typedef int (ipfw_obj_create_cb)(struct ip_fw_chain *ch, struct tid_info *ti,
+    uint16_t *pkidx);
+/*
+ * Object destroy callback. Intended to free resources allocated by
+ * create_object callback.
+ */
+typedef void (ipfw_obj_destroy_cb)(struct ip_fw_chain *ch,
+    struct named_object *no);
+/*
+ * Sets handler callback. Handles moving and swaping set of named object.
+ *  SWAP_ALL moves all named objects from set `set' to `new_set' and vise versa;
+ *  TEST_ALL checks that there aren't any named object with conflicting names;
+ *  MOVE_ALL moves all named objects from set `set' to `new_set';
+ *  COUNT_ONE used to count number of references used by object with kidx `set';
+ *  TEST_ONE checks that named object with kidx `set' can be moved to `new_set`;
+ *  MOVE_ONE moves named object with kidx `set' to set `new_set'.
+ */
+enum ipfw_sets_cmd {
+	SWAP_ALL = 0, TEST_ALL, MOVE_ALL, COUNT_ONE, TEST_ONE, MOVE_ONE
+};
+typedef int (ipfw_obj_sets_cb)(struct ip_fw_chain *ch,
+    uint16_t set, uint8_t new_set, enum ipfw_sets_cmd cmd);
+
+
+struct opcode_obj_rewrite {
+	uint32_t		opcode;		/* Opcode to act upon */
+	uint32_t		etlv;		/* Relevant export TLV id  */
+	ipfw_obj_rw_cl		*classifier;	/* Check if rewrite is needed */
+	ipfw_obj_rw_upd		*update;	/* update cmd with new value */
+	ipfw_obj_fname_cb	*find_byname;	/* Find named object by name */
+	ipfw_obj_fidx_cb	*find_bykidx;	/* Find named object by kidx */
+	ipfw_obj_create_cb	*create_object;	/* Create named object */
+	ipfw_obj_destroy_cb	*destroy_object;/* Destroy named object */
+	ipfw_obj_sets_cb	*manage_sets;	/* Swap or move sets */
+};
+
+#define	IPFW_ADD_OBJ_REWRITER(f, c)	do {	\
+	if ((f) != 0) 				\
+		ipfw_add_obj_rewriter(c,	\
+		    sizeof(c) / sizeof(c[0]));	\
+	} while(0)
+#define	IPFW_DEL_OBJ_REWRITER(l, c)	do {	\
+	if ((l) != 0) 				\
+		ipfw_del_obj_rewriter(c,	\
+		    sizeof(c) / sizeof(c[0]));	\
+	} while(0)
+
+/* In ip_fw_iface.c */
+int ipfw_iface_init(void);
+void ipfw_iface_destroy(void);
+void vnet_ipfw_iface_destroy(struct ip_fw_chain *ch);
+int ipfw_iface_ref(struct ip_fw_chain *ch, char *name,
+    struct ipfw_ifc *ic);
+void ipfw_iface_unref(struct ip_fw_chain *ch, struct ipfw_ifc *ic);
+void ipfw_iface_add_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic);
+void ipfw_iface_del_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic);
+
 /* In ip_fw_sockopt.c */
+void ipfw_init_skipto_cache(struct ip_fw_chain *chain);
+void ipfw_destroy_skipto_cache(struct ip_fw_chain *chain);
 int ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id);
-int ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule);
-int ipfw_ctl(struct sockopt *sopt);
+int ipfw_ctl3(struct sockopt *sopt);
 int ipfw_chk(struct ip_fw_args *args);
+void ipfw_reap_add(struct ip_fw_chain *chain, struct ip_fw **head,
+    struct ip_fw *rule);
 void ipfw_reap_rules(struct ip_fw *head);
-
-/* In ip_fw_pfil */
-int ipfw_check_hook(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
-     struct inpcb *inp);
+void ipfw_init_counters(void);
+void ipfw_destroy_counters(void);
+struct ip_fw *ipfw_alloc_rule(struct ip_fw_chain *chain, size_t rulesize);
+int ipfw_match_range(struct ip_fw *rule, ipfw_range_tlv *rt);
+
+typedef int (sopt_handler_f)(struct ip_fw_chain *ch,
+    ip_fw3_opheader *op3, struct sockopt_data *sd);
+struct ipfw_sopt_handler {
+	uint16_t	opcode;
+	uint8_t		version;
+	uint8_t		dir;
+	sopt_handler_f	*handler;
+	uint64_t	refcnt;
+};
+#define	HDIR_SET	0x01	/* Handler is used to set some data */
+#define	HDIR_GET	0x02	/* Handler is used to retrieve data */
+#define	HDIR_BOTH	HDIR_GET|HDIR_SET
+
+void ipfw_init_sopt_handler(void);
+void ipfw_destroy_sopt_handler(void);
+void ipfw_add_sopt_handler(struct ipfw_sopt_handler *sh, size_t count);
+int ipfw_del_sopt_handler(struct ipfw_sopt_handler *sh, size_t count);
+caddr_t ipfw_get_sopt_space(struct sockopt_data *sd, size_t needed);
+caddr_t ipfw_get_sopt_header(struct sockopt_data *sd, size_t needed);
+#define	IPFW_ADD_SOPT_HANDLER(f, c)	do {	\
+	if ((f) != 0) 				\
+		ipfw_add_sopt_handler(c,	\
+		    sizeof(c) / sizeof(c[0]));	\
+	} while(0)
+#define	IPFW_DEL_SOPT_HANDLER(l, c)	do {	\
+	if ((l) != 0) 				\
+		ipfw_del_sopt_handler(c,	\
+		    sizeof(c) / sizeof(c[0]));	\
+	} while(0)
+
+struct namedobj_instance;
+typedef int (objhash_cb_t)(struct namedobj_instance *ni, struct named_object *,
+    void *arg);
+typedef uint32_t (objhash_hash_f)(struct namedobj_instance *ni, const void *key,
+    uint32_t kopt);
+typedef int (objhash_cmp_f)(struct named_object *no, const void *key,
+    uint32_t kopt);
+struct namedobj_instance *ipfw_objhash_create(uint32_t items);
+void ipfw_objhash_destroy(struct namedobj_instance *);
+void ipfw_objhash_bitmap_alloc(uint32_t items, void **idx, int *pblocks);
+void ipfw_objhash_bitmap_merge(struct namedobj_instance *ni,
+    void **idx, int *blocks);
+void ipfw_objhash_bitmap_swap(struct namedobj_instance *ni,
+    void **idx, int *blocks);
+void ipfw_objhash_bitmap_free(void *idx, int blocks);
+void ipfw_objhash_set_hashf(struct namedobj_instance *ni, objhash_hash_f *f);
+struct named_object *ipfw_objhash_lookup_name(struct namedobj_instance *ni,
+    uint32_t set, char *name);
+struct named_object *ipfw_objhash_lookup_name_type(struct namedobj_instance *ni,
+    uint32_t set, uint32_t type, const char *name);
+struct named_object *ipfw_objhash_lookup_kidx(struct namedobj_instance *ni,
+    uint16_t idx);
+int ipfw_objhash_same_name(struct namedobj_instance *ni, struct named_object *a,
+    struct named_object *b);
+void ipfw_objhash_add(struct namedobj_instance *ni, struct named_object *no);
+void ipfw_objhash_del(struct namedobj_instance *ni, struct named_object *no);
+uint32_t ipfw_objhash_count(struct namedobj_instance *ni);
+uint32_t ipfw_objhash_count_type(struct namedobj_instance *ni, uint16_t type);
+int ipfw_objhash_foreach(struct namedobj_instance *ni, objhash_cb_t *f,
+    void *arg);
+int ipfw_objhash_foreach_type(struct namedobj_instance *ni, objhash_cb_t *f,
+    void *arg, uint16_t type);
+int ipfw_objhash_free_idx(struct namedobj_instance *ni, uint16_t idx);
+int ipfw_objhash_alloc_idx(void *n, uint16_t *pidx);
+void ipfw_objhash_set_funcs(struct namedobj_instance *ni,
+    objhash_hash_f *hash_f, objhash_cmp_f *cmp_f);
+int ipfw_objhash_find_type(struct namedobj_instance *ni, struct tid_info *ti,
+    uint32_t etlv, struct named_object **pno);
+void ipfw_export_obj_ntlv(struct named_object *no, ipfw_obj_ntlv *ntlv);
+ipfw_obj_ntlv *ipfw_find_name_tlv_type(void *tlvs, int len, uint16_t uidx,
+    uint32_t etlv);
+void ipfw_init_obj_rewriter(void);
+void ipfw_destroy_obj_rewriter(void);
+void ipfw_add_obj_rewriter(struct opcode_obj_rewrite *rw, size_t count);
+int ipfw_del_obj_rewriter(struct opcode_obj_rewrite *rw, size_t count);
+
+int create_objects_compat(struct ip_fw_chain *ch, ipfw_insn *cmd,
+    struct obj_idx *oib, struct obj_idx *pidx, struct tid_info *ti);
+void update_opcode_kidx(ipfw_insn *cmd, uint16_t idx);
+int classify_opcode_kidx(ipfw_insn *cmd, uint16_t *puidx);
+void ipfw_init_srv(struct ip_fw_chain *ch);
+void ipfw_destroy_srv(struct ip_fw_chain *ch);
+int ipfw_check_object_name_generic(const char *name);
+int ipfw_obj_manage_sets(struct namedobj_instance *ni, uint16_t type,
+    uint16_t set, uint8_t new_set, enum ipfw_sets_cmd cmd);
+
+/* In ip_fw_eaction.c */
+typedef int (ipfw_eaction_t)(struct ip_fw_chain *ch, struct ip_fw_args *args,
+    ipfw_insn *cmd, int *done);
+int ipfw_eaction_init(struct ip_fw_chain *ch, int first);
+void ipfw_eaction_uninit(struct ip_fw_chain *ch, int last);
+
+uint16_t ipfw_add_eaction(struct ip_fw_chain *ch, ipfw_eaction_t handler,
+    const char *name);
+int ipfw_del_eaction(struct ip_fw_chain *ch, uint16_t eaction_id);
+int ipfw_run_eaction(struct ip_fw_chain *ch, struct ip_fw_args *args,
+    ipfw_insn *cmd, int *done);
 
 /* In ip_fw_table.c */
-struct radix_node;
+struct table_info;
+
+typedef int (table_lookup_t)(struct table_info *ti, void *key, uint32_t keylen,
+    uint32_t *val);
+
 int ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
     uint32_t *val);
-int ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
-    uint32_t *val, int type);
-int ipfw_init_tables(struct ip_fw_chain *ch);
-void ipfw_destroy_tables(struct ip_fw_chain *ch);
-int ipfw_flush_table(struct ip_fw_chain *ch, uint16_t tbl);
-int ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
-    uint8_t plen, uint8_t mlen, uint8_t type, uint32_t value);
-int ipfw_del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
-    uint8_t plen, uint8_t mlen, uint8_t type);
-int ipfw_count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt);
-int ipfw_dump_table_entry(struct radix_node *rn, void *arg);
-int ipfw_dump_table(struct ip_fw_chain *ch, ipfw_table *tbl);
-int ipfw_count_xtable(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt);
-int ipfw_dump_xtable(struct ip_fw_chain *ch, ipfw_xtable *tbl);
+int ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl,
+    uint16_t plen, void *paddr, uint32_t *val);
+struct named_object *ipfw_objhash_lookup_table_kidx(struct ip_fw_chain *ch,
+    uint16_t kidx);
+int ipfw_ref_table(struct ip_fw_chain *ch, ipfw_obj_ntlv *ntlv, uint16_t *kidx);
+void ipfw_unref_table(struct ip_fw_chain *ch, uint16_t kidx);
+int ipfw_init_tables(struct ip_fw_chain *ch, int first);
 int ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables);
+int ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int nsets);
+void ipfw_destroy_tables(struct ip_fw_chain *ch, int last);
 
 /* In ip_fw_nat.c -- XXX to be moved to ip_var.h */
 
@@ -341,5 +771,22 @@ extern ipfw_nat_cfg_t *ipfw_nat_del_ptr;
 extern ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr;
 extern ipfw_nat_cfg_t *ipfw_nat_get_log_ptr;
 
+/* Helper functions for IP checksum adjustment */
+static __inline uint16_t
+cksum_add(uint16_t sum, uint16_t a)
+{
+	uint16_t res;
+
+	res = sum + a;
+	return (res + (res < a));
+}
+
+static __inline uint16_t
+cksum_adjust(uint16_t oldsum, uint16_t old, uint16_t new)
+{
+
+	return (~cksum_add(cksum_add(~oldsum, ~old), new));
+}
+
 #endif /* _KERNEL */
 #endif /* _IPFW2_PRIVATE_H */
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_sockopt.c b/freebsd/sys/netpfil/ipfw/ip_fw_sockopt.c
index 95cd8c81..468e4ad4 100644
--- a/freebsd/sys/netpfil/ipfw/ip_fw_sockopt.c
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_sockopt.c
@@ -2,6 +2,8 @@
 
 /*-
  * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
+ * Copyright (c) 2014 Yandex LLC
+ * Copyright (c) 2014 Alexander V. Chernikov
  *
  * Supported by: Valeria Paoli
  *
@@ -31,8 +33,8 @@
 __FBSDID("$FreeBSD$");
 
 /*
- * Sockopt support for ipfw. The routines here implement
- * the upper half of the ipfw code.
+ * Control socket and rule management routines for ipfw.
+ * Control is currently implemented via IP_FW3 setsockopt() code.
  */
 
 #include <rtems/bsd/local/opt_ipfw.h>
@@ -51,30 +53,174 @@ __FBSDID("$FreeBSD$");
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
+#include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
+#include <sys/fnv_hash.h>
 #include <net/if.h>
 #include <net/route.h>
 #include <net/vnet.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
 
 #include <netinet/in.h>
 #include <netinet/ip_var.h> /* hooks */
 #include <netinet/ip_fw.h>
 
 #include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/ip_fw_table.h>
 
 #ifdef MAC
 #include <security/mac/mac_framework.h>
 #endif
 
+static int ipfw_ctl(struct sockopt *sopt);
+static int check_ipfw_rule_body(ipfw_insn *cmd, int cmd_len,
+    struct rule_check_info *ci);
+static int check_ipfw_rule1(struct ip_fw_rule *rule, int size,
+    struct rule_check_info *ci);
+static int check_ipfw_rule0(struct ip_fw_rule0 *rule, int size,
+    struct rule_check_info *ci);
+static int rewrite_rule_uidx(struct ip_fw_chain *chain,
+    struct rule_check_info *ci);
+
+#define	NAMEDOBJ_HASH_SIZE	32
+
+struct namedobj_instance {
+	struct namedobjects_head	*names;
+	struct namedobjects_head	*values;
+	uint32_t nn_size;		/* names hash size */
+	uint32_t nv_size;		/* number hash size */
+	u_long *idx_mask;		/* used items bitmask */
+	uint32_t max_blocks;		/* number of "long" blocks in bitmask */
+	uint32_t count;			/* number of items */
+	uint16_t free_off[IPFW_MAX_SETS];	/* first possible free offset */
+	objhash_hash_f	*hash_f;
+	objhash_cmp_f	*cmp_f;
+};
+#define	BLOCK_ITEMS	(8 * sizeof(u_long))	/* Number of items for ffsl() */
+
+static uint32_t objhash_hash_name(struct namedobj_instance *ni,
+    const void *key, uint32_t kopt);
+static uint32_t objhash_hash_idx(struct namedobj_instance *ni, uint32_t val);
+static int objhash_cmp_name(struct named_object *no, const void *name,
+    uint32_t set);
+
 MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's");
 
+static int dump_config(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+    struct sockopt_data *sd);
+static int add_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+    struct sockopt_data *sd);
+static int del_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+    struct sockopt_data *sd);
+static int clear_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+    struct sockopt_data *sd);
+static int move_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+    struct sockopt_data *sd);
+static int manage_sets(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+    struct sockopt_data *sd);
+static int dump_soptcodes(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+    struct sockopt_data *sd);
+static int dump_srvobjects(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+    struct sockopt_data *sd);
+
+/* ctl3 handler data */
+struct mtx ctl3_lock;
+#define	CTL3_LOCK_INIT()	mtx_init(&ctl3_lock, "ctl3_lock", NULL, MTX_DEF)
+#define	CTL3_LOCK_DESTROY()	mtx_destroy(&ctl3_lock)
+#define	CTL3_LOCK()		mtx_lock(&ctl3_lock)
+#define	CTL3_UNLOCK()		mtx_unlock(&ctl3_lock)
+
+static struct ipfw_sopt_handler *ctl3_handlers;
+static size_t ctl3_hsize;
+static uint64_t ctl3_refct, ctl3_gencnt;
+#define	CTL3_SMALLBUF	4096			/* small page-size write buffer */
+#define	CTL3_LARGEBUF	16 * 1024 * 1024	/* handle large rulesets */
+
+static int ipfw_flush_sopt_data(struct sockopt_data *sd);
+
+static struct ipfw_sopt_handler	scodes[] = {
+	{ IP_FW_XGET,		0,	HDIR_GET,	dump_config },
+	{ IP_FW_XADD,		0,	HDIR_BOTH,	add_rules },
+	{ IP_FW_XDEL,		0,	HDIR_BOTH,	del_rules },
+	{ IP_FW_XZERO,		0,	HDIR_SET,	clear_rules },
+	{ IP_FW_XRESETLOG,	0,	HDIR_SET,	clear_rules },
+	{ IP_FW_XMOVE,		0,	HDIR_SET,	move_rules },
+	{ IP_FW_SET_SWAP,	0,	HDIR_SET,	manage_sets },
+	{ IP_FW_SET_MOVE,	0,	HDIR_SET,	manage_sets },
+	{ IP_FW_SET_ENABLE,	0,	HDIR_SET,	manage_sets },
+	{ IP_FW_DUMP_SOPTCODES,	0,	HDIR_GET,	dump_soptcodes },
+	{ IP_FW_DUMP_SRVOBJECTS,0,	HDIR_GET,	dump_srvobjects },
+};
+
+static int
+set_legacy_obj_kidx(struct ip_fw_chain *ch, struct ip_fw_rule0 *rule);
+static struct opcode_obj_rewrite *find_op_rw(ipfw_insn *cmd,
+    uint16_t *puidx, uint8_t *ptype);
+static int mark_object_kidx(struct ip_fw_chain *ch, struct ip_fw *rule,
+    uint32_t *bmask);
+static int ref_rule_objects(struct ip_fw_chain *ch, struct ip_fw *rule,
+    struct rule_check_info *ci, struct obj_idx *oib, struct tid_info *ti);
+static int ref_opcode_object(struct ip_fw_chain *ch, ipfw_insn *cmd,
+    struct tid_info *ti, struct obj_idx *pidx, int *unresolved);
+static void unref_rule_objects(struct ip_fw_chain *chain, struct ip_fw *rule);
+static void unref_oib_objects(struct ip_fw_chain *ch, ipfw_insn *cmd,
+    struct obj_idx *oib, struct obj_idx *end);
+static int export_objhash_ntlv(struct namedobj_instance *ni, uint16_t kidx,
+    struct sockopt_data *sd);
+
+/*
+ * Opcode object rewriter variables
+ */
+struct opcode_obj_rewrite *ctl3_rewriters;
+static size_t ctl3_rsize;
+
 /*
- * static variables followed by global ones (none in this file)
+ * static variables followed by global ones
  */
 
+static VNET_DEFINE(uma_zone_t, ipfw_cntr_zone);
+#define	V_ipfw_cntr_zone		VNET(ipfw_cntr_zone)
+
+void
+ipfw_init_counters()
+{
+
+	V_ipfw_cntr_zone = uma_zcreate("IPFW counters",
+	    IPFW_RULE_CNTR_SIZE, NULL, NULL, NULL, NULL,
+	    UMA_ALIGN_PTR, UMA_ZONE_PCPU);
+}
+
+void
+ipfw_destroy_counters()
+{
+	
+	uma_zdestroy(V_ipfw_cntr_zone);
+}
+
+struct ip_fw *
+ipfw_alloc_rule(struct ip_fw_chain *chain, size_t rulesize)
+{
+	struct ip_fw *rule;
+
+	rule = malloc(rulesize, M_IPFW, M_WAITOK | M_ZERO);
+	rule->cntr = uma_zalloc(V_ipfw_cntr_zone, M_WAITOK | M_ZERO);
+
+	return (rule);
+}
+
+static void
+free_rule(struct ip_fw *rule)
+{
+
+	uma_zfree(V_ipfw_cntr_zone, rule->cntr);
+	free(rule, M_IPFW);
+}
+
+
 /*
  * Find the smallest rule >= key, id.
  * We could use bsearch but it is so simple that we code it directly
@@ -96,11 +242,109 @@ ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id)
 			lo = i + 1;	/* continue from the next one */
 		else /* r->id >= id */
 			hi = i;		/* this might be good */
-	};
+	}
 	return hi;
 }
 
 /*
+ * Builds skipto cache on rule set @map.
+ */
+static void
+update_skipto_cache(struct ip_fw_chain *chain, struct ip_fw **map)
+{
+	int *smap, rulenum;
+	int i, mi;
+
+	IPFW_UH_WLOCK_ASSERT(chain);
+
+	mi = 0;
+	rulenum = map[mi]->rulenum;
+	smap = chain->idxmap_back;
+
+	if (smap == NULL)
+		return;
+
+	for (i = 0; i < 65536; i++) {
+		smap[i] = mi;
+		/* Use the same rule index until i < rulenum */
+		if (i != rulenum || i == 65535)
+			continue;
+		/* Find next rule with num > i */
+		rulenum = map[++mi]->rulenum;
+		while (rulenum == i)
+			rulenum = map[++mi]->rulenum;
+	}
+}
+
+/*
+ * Swaps prepared (backup) index with current one.
+ */
+static void
+swap_skipto_cache(struct ip_fw_chain *chain)
+{
+	int *map;
+
+	IPFW_UH_WLOCK_ASSERT(chain);
+	IPFW_WLOCK_ASSERT(chain);
+
+	map = chain->idxmap;
+	chain->idxmap = chain->idxmap_back;
+	chain->idxmap_back = map;
+}
+
+/*
+ * Allocate and initialize skipto cache.
+ */
+void
+ipfw_init_skipto_cache(struct ip_fw_chain *chain)
+{
+	int *idxmap, *idxmap_back;
+
+	idxmap = malloc(65536 * sizeof(uint32_t *), M_IPFW,
+	    M_WAITOK | M_ZERO);
+	idxmap_back = malloc(65536 * sizeof(uint32_t *), M_IPFW,
+	    M_WAITOK | M_ZERO);
+
+	/*
+	 * Note we may be called at any time after initialization,
+	 * for example, on first skipto rule, so we need to
+	 * provide valid chain->idxmap on return
+	 */
+
+	IPFW_UH_WLOCK(chain);
+	if (chain->idxmap != NULL) {
+		IPFW_UH_WUNLOCK(chain);
+		free(idxmap, M_IPFW);
+		free(idxmap_back, M_IPFW);
+		return;
+	}
+
+	/* Set backup pointer first to permit building cache */
+	chain->idxmap_back = idxmap_back;
+	update_skipto_cache(chain, chain->map);
+	IPFW_WLOCK(chain);
+	/* It is now safe to set chain->idxmap ptr */
+	chain->idxmap = idxmap;
+	swap_skipto_cache(chain);
+	IPFW_WUNLOCK(chain);
+	IPFW_UH_WUNLOCK(chain);
+}
+
+/*
+ * Destroys skipto cache.
+ */
+void
+ipfw_destroy_skipto_cache(struct ip_fw_chain *chain)
+{
+
+	if (chain->idxmap != NULL)
+		free(chain->idxmap, M_IPFW);
+	if (chain->idxmap != NULL)
+		free(chain->idxmap_back, M_IPFW);
+}
+
+
+/*
  * allocate a new map, returns the chain locked. extra is the number
  * of entries to add or delete.
  */
@@ -110,11 +354,12 @@ get_map(struct ip_fw_chain *chain, int extra, int locked)
 
 	for (;;) {
 		struct ip_fw **map;
-		int i;
+		int i, mflags;
+
+		mflags = M_ZERO | ((locked != 0) ? M_NOWAIT : M_WAITOK);
 
 		i = chain->n_rules + extra;
-		map = malloc(i * sizeof(struct ip_fw *), M_IPFW,
-			locked ? M_NOWAIT : M_WAITOK);
+		map = malloc(i * sizeof(struct ip_fw *), M_IPFW, mflags);
 		if (map == NULL) {
 			printf("%s: cannot allocate map\n", __FUNCTION__);
 			return NULL;
@@ -143,69 +388,403 @@ swap_map(struct ip_fw_chain *chain, struct ip_fw **new_map, int new_len)
 	chain->n_rules = new_len;
 	old_map = chain->map;
 	chain->map = new_map;
+	swap_skipto_cache(chain);
 	IPFW_WUNLOCK(chain);
 	return old_map;
 }
 
+
+static void
+export_cntr1_base(struct ip_fw *krule, struct ip_fw_bcounter *cntr)
+{
+	struct timeval boottime;
+
+	cntr->size = sizeof(*cntr);
+
+	if (krule->cntr != NULL) {
+		cntr->pcnt = counter_u64_fetch(krule->cntr);
+		cntr->bcnt = counter_u64_fetch(krule->cntr + 1);
+		cntr->timestamp = krule->timestamp;
+	}
+	if (cntr->timestamp > 0) {
+		getboottime(&boottime);
+		cntr->timestamp += boottime.tv_sec;
+	}
+}
+
+static void
+export_cntr0_base(struct ip_fw *krule, struct ip_fw_bcounter0 *cntr)
+{
+	struct timeval boottime;
+
+	if (krule->cntr != NULL) {
+		cntr->pcnt = counter_u64_fetch(krule->cntr);
+		cntr->bcnt = counter_u64_fetch(krule->cntr + 1);
+		cntr->timestamp = krule->timestamp;
+	}
+	if (cntr->timestamp > 0) {
+		getboottime(&boottime);
+		cntr->timestamp += boottime.tv_sec;
+	}
+}
+
+/*
+ * Copies rule @urule from v1 userland format (current).
+ * to kernel @krule.
+ * Assume @krule is zeroed.
+ */
+static void
+import_rule1(struct rule_check_info *ci)
+{
+	struct ip_fw_rule *urule;
+	struct ip_fw *krule;
+
+	urule = (struct ip_fw_rule *)ci->urule;
+	krule = (struct ip_fw *)ci->krule;
+
+	/* copy header */
+	krule->act_ofs = urule->act_ofs;
+	krule->cmd_len = urule->cmd_len;
+	krule->rulenum = urule->rulenum;
+	krule->set = urule->set;
+	krule->flags = urule->flags;
+
+	/* Save rulenum offset */
+	ci->urule_numoff = offsetof(struct ip_fw_rule, rulenum);
+
+	/* Copy opcodes */
+	memcpy(krule->cmd, urule->cmd, krule->cmd_len * sizeof(uint32_t));
+}
+
+/*
+ * Export rule into v1 format (Current).
+ * Layout:
+ * [ ipfw_obj_tlv(IPFW_TLV_RULE_ENT)
+ *     [ ip_fw_rule ] OR
+ *     [ ip_fw_bcounter ip_fw_rule] (depends on rcntrs).
+ * ]
+ * Assume @data is zeroed.
+ */
+static void
+export_rule1(struct ip_fw *krule, caddr_t data, int len, int rcntrs)
+{
+	struct ip_fw_bcounter *cntr;
+	struct ip_fw_rule *urule;
+	ipfw_obj_tlv *tlv;
+
+	/* Fill in TLV header */
+	tlv = (ipfw_obj_tlv *)data;
+	tlv->type = IPFW_TLV_RULE_ENT;
+	tlv->length = len;
+
+	if (rcntrs != 0) {
+		/* Copy counters */
+		cntr = (struct ip_fw_bcounter *)(tlv + 1);
+		urule = (struct ip_fw_rule *)(cntr + 1);
+		export_cntr1_base(krule, cntr);
+	} else
+		urule = (struct ip_fw_rule *)(tlv + 1);
+
+	/* copy header */
+	urule->act_ofs = krule->act_ofs;
+	urule->cmd_len = krule->cmd_len;
+	urule->rulenum = krule->rulenum;
+	urule->set = krule->set;
+	urule->flags = krule->flags;
+	urule->id = krule->id;
+
+	/* Copy opcodes */
+	memcpy(urule->cmd, krule->cmd, krule->cmd_len * sizeof(uint32_t));
+}
+
+
+/*
+ * Copies rule @urule from FreeBSD8 userland format (v0)
+ * to kernel @krule.
+ * Assume @krule is zeroed.
+ */
+static void
+import_rule0(struct rule_check_info *ci)
+{
+	struct ip_fw_rule0 *urule;
+	struct ip_fw *krule;
+	int cmdlen, l;
+	ipfw_insn *cmd;
+	ipfw_insn_limit *lcmd;
+	ipfw_insn_if *cmdif;
+
+	urule = (struct ip_fw_rule0 *)ci->urule;
+	krule = (struct ip_fw *)ci->krule;
+
+	/* copy header */
+	krule->act_ofs = urule->act_ofs;
+	krule->cmd_len = urule->cmd_len;
+	krule->rulenum = urule->rulenum;
+	krule->set = urule->set;
+	if ((urule->_pad & 1) != 0)
+		krule->flags |= IPFW_RULE_NOOPT;
+
+	/* Save rulenum offset */
+	ci->urule_numoff = offsetof(struct ip_fw_rule0, rulenum);
+
+	/* Copy opcodes */
+	memcpy(krule->cmd, urule->cmd, krule->cmd_len * sizeof(uint32_t));
+
+	/*
+	 * Alter opcodes:
+	 * 1) convert tablearg value from 65535 to 0
+	 * 2) Add high bit to O_SETFIB/O_SETDSCP values (to make room
+	 *    for targ).
+	 * 3) convert table number in iface opcodes to u16
+	 * 4) convert old `nat global` into new 65535
+	 */
+	l = krule->cmd_len;
+	cmd = krule->cmd;
+	cmdlen = 0;
+
+	for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
+		cmdlen = F_LEN(cmd);
+
+		switch (cmd->opcode) {
+		/* Opcodes supporting tablearg */
+		case O_TAG:
+		case O_TAGGED:
+		case O_PIPE:
+		case O_QUEUE:
+		case O_DIVERT:
+		case O_TEE:
+		case O_SKIPTO:
+		case O_CALLRETURN:
+		case O_NETGRAPH:
+		case O_NGTEE:
+		case O_NAT:
+			if (cmd->arg1 == IP_FW_TABLEARG)
+				cmd->arg1 = IP_FW_TARG;
+			else if (cmd->arg1 == 0)
+				cmd->arg1 = IP_FW_NAT44_GLOBAL;
+			break;
+		case O_SETFIB:
+		case O_SETDSCP:
+			if (cmd->arg1 == IP_FW_TABLEARG)
+				cmd->arg1 = IP_FW_TARG;
+			else
+				cmd->arg1 |= 0x8000;
+			break;
+		case O_LIMIT:
+			lcmd = (ipfw_insn_limit *)cmd;
+			if (lcmd->conn_limit == IP_FW_TABLEARG)
+				lcmd->conn_limit = IP_FW_TARG;
+			break;
+		/* Interface tables */
+		case O_XMIT:
+		case O_RECV:
+		case O_VIA:
+			/* Interface table, possibly */
+			cmdif = (ipfw_insn_if *)cmd;
+			if (cmdif->name[0] != '\1')
+				break;
+
+			cmdif->p.kidx = (uint16_t)cmdif->p.glob;
+			break;
+		}
+	}
+}
+
+/*
+ * Copies rule @krule from kernel to FreeBSD8 userland format (v0)
+ */
+static void
+export_rule0(struct ip_fw *krule, struct ip_fw_rule0 *urule, int len)
+{
+	int cmdlen, l;
+	ipfw_insn *cmd;
+	ipfw_insn_limit *lcmd;
+	ipfw_insn_if *cmdif;
+
+	/* copy header */
+	memset(urule, 0, len);
+	urule->act_ofs = krule->act_ofs;
+	urule->cmd_len = krule->cmd_len;
+	urule->rulenum = krule->rulenum;
+	urule->set = krule->set;
+	if ((krule->flags & IPFW_RULE_NOOPT) != 0)
+		urule->_pad |= 1;
+
+	/* Copy opcodes */
+	memcpy(urule->cmd, krule->cmd, krule->cmd_len * sizeof(uint32_t));
+
+	/* Export counters */
+	export_cntr0_base(krule, (struct ip_fw_bcounter0 *)&urule->pcnt);
+
+	/*
+	 * Alter opcodes:
+	 * 1) convert tablearg value from 0 to 65535
+	 * 2) Remove highest bit from O_SETFIB/O_SETDSCP values.
+	 * 3) convert table number in iface opcodes to int
+	 */
+	l = urule->cmd_len;
+	cmd = urule->cmd;
+	cmdlen = 0;
+
+	for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
+		cmdlen = F_LEN(cmd);
+
+		switch (cmd->opcode) {
+		/* Opcodes supporting tablearg */
+		case O_TAG:
+		case O_TAGGED:
+		case O_PIPE:
+		case O_QUEUE:
+		case O_DIVERT:
+		case O_TEE:
+		case O_SKIPTO:
+		case O_CALLRETURN:
+		case O_NETGRAPH:
+		case O_NGTEE:
+		case O_NAT:
+			if (cmd->arg1 == IP_FW_TARG)
+				cmd->arg1 = IP_FW_TABLEARG;
+			else if (cmd->arg1 == IP_FW_NAT44_GLOBAL)
+				cmd->arg1 = 0;
+			break;
+		case O_SETFIB:
+		case O_SETDSCP:
+			if (cmd->arg1 == IP_FW_TARG)
+				cmd->arg1 = IP_FW_TABLEARG;
+			else
+				cmd->arg1 &= ~0x8000;
+			break;
+		case O_LIMIT:
+			lcmd = (ipfw_insn_limit *)cmd;
+			if (lcmd->conn_limit == IP_FW_TARG)
+				lcmd->conn_limit = IP_FW_TABLEARG;
+			break;
+		/* Interface tables */
+		case O_XMIT:
+		case O_RECV:
+		case O_VIA:
+			/* Interface table, possibly */
+			cmdif = (ipfw_insn_if *)cmd;
+			if (cmdif->name[0] != '\1')
+				break;
+
+			cmdif->p.glob = cmdif->p.kidx;
+			break;
+		}
+	}
+}
+
 /*
- * Add a new rule to the list. Copy the rule into a malloc'ed area, then
- * possibly create a rule number and add the rule to the list.
+ * Add new rule(s) to the list possibly creating rule number for each.
  * Update the rule_number in the input struct so the caller knows it as well.
- * XXX DO NOT USE FOR THE DEFAULT RULE.
  * Must be called without IPFW_UH held
  */
-int
-ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule)
+static int
+commit_rules(struct ip_fw_chain *chain, struct rule_check_info *rci, int count)
 {
-	struct ip_fw *rule;
-	int i, l, insert_before;
+	int error, i, insert_before, tcount;
+	uint16_t rulenum, *pnum;
+	struct rule_check_info *ci;
+	struct ip_fw *krule;
 	struct ip_fw **map;	/* the new array of pointers */
 
-	if (chain->map == NULL || input_rule->rulenum > IPFW_DEFAULT_RULE - 1)
-		return (EINVAL);
+	/* Check if we need to do table/obj index remap */
+	tcount = 0;
+	for (ci = rci, i = 0; i < count; ci++, i++) {
+		if (ci->object_opcodes == 0)
+			continue;
+
+		/*
+		 * Rule has some object opcodes.
+		 * We need to find (and create non-existing)
+		 * kernel objects, and reference existing ones.
+		 */
+		error = rewrite_rule_uidx(chain, ci);
+		if (error != 0) {
+
+			/*
+			 * rewrite failed, state for current rule
+			 * has been reverted. Check if we need to
+			 * revert more.
+			 */
+			if (tcount > 0) {
+
+				/*
+				 * We have some more table rules
+				 * we need to rollback.
+				 */
+
+				IPFW_UH_WLOCK(chain);
+				while (ci != rci) {
+					ci--;
+					if (ci->object_opcodes == 0)
+						continue;
+					unref_rule_objects(chain,ci->krule);
+
+				}
+				IPFW_UH_WUNLOCK(chain);
+
+			}
+
+			return (error);
+		}
+
+		tcount++;
+	}
 
-	l = RULESIZE(input_rule);
-	rule = malloc(l, M_IPFW, M_WAITOK | M_ZERO);
-	if (rule == NULL)
-		return (ENOSPC);
 	/* get_map returns with IPFW_UH_WLOCK if successful */
-	map = get_map(chain, 1, 0 /* not locked */);
+	map = get_map(chain, count, 0 /* not locked */);
 	if (map == NULL) {
-		free(rule, M_IPFW);
-		return ENOSPC;
-	}
+		if (tcount > 0) {
+			/* Unbind tables */
+			IPFW_UH_WLOCK(chain);
+			for (ci = rci, i = 0; i < count; ci++, i++) {
+				if (ci->object_opcodes == 0)
+					continue;
+
+				unref_rule_objects(chain, ci->krule);
+			}
+			IPFW_UH_WUNLOCK(chain);
+		}
 
-	bcopy(input_rule, rule, l);
-	/* clear fields not settable from userland */
-	rule->x_next = NULL;
-	rule->next_rule = NULL;
-	IPFW_ZERO_RULE_COUNTER(rule);
+		return (ENOSPC);
+	}
 
 	if (V_autoinc_step < 1)
 		V_autoinc_step = 1;
 	else if (V_autoinc_step > 1000)
 		V_autoinc_step = 1000;
+
+	/* FIXME: Handle count > 1 */
+	ci = rci;
+	krule = ci->krule;
+	rulenum = krule->rulenum;
+
 	/* find the insertion point, we will insert before */
-	insert_before = rule->rulenum ? rule->rulenum + 1 : IPFW_DEFAULT_RULE;
+	insert_before = rulenum ? rulenum + 1 : IPFW_DEFAULT_RULE;
 	i = ipfw_find_rule(chain, insert_before, 0);
 	/* duplicate first part */
 	if (i > 0)
 		bcopy(chain->map, map, i * sizeof(struct ip_fw *));
-	map[i] = rule;
+	map[i] = krule;
 	/* duplicate remaining part, we always have the default rule */
 	bcopy(chain->map + i, map + i + 1,
 		sizeof(struct ip_fw *) *(chain->n_rules - i));
-	if (rule->rulenum == 0) {
-		/* write back the number */
-		rule->rulenum = i > 0 ? map[i-1]->rulenum : 0;
-		if (rule->rulenum < IPFW_DEFAULT_RULE - V_autoinc_step)
-			rule->rulenum += V_autoinc_step;
-		input_rule->rulenum = rule->rulenum;
+	if (rulenum == 0) {
+		/* Compute rule number and write it back */
+		rulenum = i > 0 ? map[i-1]->rulenum : 0;
+		if (rulenum < IPFW_DEFAULT_RULE - V_autoinc_step)
+			rulenum += V_autoinc_step;
+		krule->rulenum = rulenum;
+		/* Save number to userland rule */
+		pnum = (uint16_t *)((caddr_t)ci->urule + ci->urule_numoff);
+		*pnum = rulenum;
 	}
 
-	rule->id = chain->id + 1;
+	krule->id = chain->id + 1;
+	update_skipto_cache(chain, map);
 	map = swap_map(chain, map, chain->n_rules + 1);
-	chain->static_len += l;
+	chain->static_len += RULEUSIZE0(krule);
 	IPFW_UH_WUNLOCK(chain);
 	if (map)
 		free(map, M_IPFW);
@@ -213,6 +792,23 @@ ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule)
 }
 
 /*
+ * Adds @rule to the list of rules to reap
+ */
+void
+ipfw_reap_add(struct ip_fw_chain *chain, struct ip_fw **head,
+    struct ip_fw *rule)
+{
+
+	IPFW_UH_WLOCK_ASSERT(chain);
+
+	/* Unlink rule from everywhere */
+	unref_rule_objects(chain, rule);
+
+	*((struct ip_fw **)rule) = *head;
+	*head = rule;
+}
+
+/*
  * Reclaim storage associated with a list of rules.  This is
  * typically the list created using remove_rule.
  * A NULL pointer on input is handled correctly.
@@ -223,22 +819,12 @@ ipfw_reap_rules(struct ip_fw *head)
 	struct ip_fw *rule;
 
 	while ((rule = head) != NULL) {
-		head = head->x_next;
-		free(rule, M_IPFW);
+		head = *((struct ip_fw **)head);
+		free_rule(rule);
 	}
 }
 
 /*
- * Used by del_entry() to check if a rule should be kept.
- * Returns 1 if the rule must be kept, 0 otherwise.
- *
- * Called with cmd = {0,1,5}.
- * cmd == 0 matches on rule numbers, excludes rules in RESVD_SET if n == 0 ;
- * cmd == 1 matches on set numbers only, rule numbers are ignored;
- * cmd == 5 matches on rule and set numbers.
- *
- * n == 0 is a wildcard for rule numbers, there is no wildcard for sets.
- *
  * Rules to keep are
  *	(default || reserved || !match_set || !match_number)
  * where
@@ -255,14 +841,608 @@ ipfw_reap_rules(struct ip_fw *head)
  *	// number is ignored for cmd == 1 or n == 0
  *
  */
+int
+ipfw_match_range(struct ip_fw *rule, ipfw_range_tlv *rt)
+{
+
+	/* Don't match default rule for modification queries */
+	if (rule->rulenum == IPFW_DEFAULT_RULE &&
+	    (rt->flags & IPFW_RCFLAG_DEFAULT) == 0)
+		return (0);
+
+	/* Don't match rules in reserved set for flush requests */
+	if ((rt->flags & IPFW_RCFLAG_ALL) != 0 && rule->set == RESVD_SET)
+		return (0);
+
+	/* If we're filtering by set, don't match other sets */
+	if ((rt->flags & IPFW_RCFLAG_SET) != 0 && rule->set != rt->set)
+		return (0);
+
+	if ((rt->flags & IPFW_RCFLAG_RANGE) != 0 &&
+	    (rule->rulenum < rt->start_rule || rule->rulenum > rt->end_rule))
+		return (0);
+
+	return (1);
+}
+
+struct manage_sets_args {
+	uint16_t	set;
+	uint8_t		new_set;
+};
+
+static int
+swap_sets_cb(struct namedobj_instance *ni, struct named_object *no,
+    void *arg)
+{
+	struct manage_sets_args *args;
+
+	args = (struct manage_sets_args *)arg;
+	if (no->set == (uint8_t)args->set)
+		no->set = args->new_set;
+	else if (no->set == args->new_set)
+		no->set = (uint8_t)args->set;
+	return (0);
+}
+
+static int
+move_sets_cb(struct namedobj_instance *ni, struct named_object *no,
+    void *arg)
+{
+	struct manage_sets_args *args;
+
+	args = (struct manage_sets_args *)arg;
+	if (no->set == (uint8_t)args->set)
+		no->set = args->new_set;
+	return (0);
+}
+
+static int
+test_sets_cb(struct namedobj_instance *ni, struct named_object *no,
+    void *arg)
+{
+	struct manage_sets_args *args;
+
+	args = (struct manage_sets_args *)arg;
+	if (no->set != (uint8_t)args->set)
+		return (0);
+	if (ipfw_objhash_lookup_name_type(ni, args->new_set,
+	    no->etlv, no->name) != NULL)
+		return (EEXIST);
+	return (0);
+}
+
+/*
+ * Generic function to handler moving and swapping sets.
+ */
+int
+ipfw_obj_manage_sets(struct namedobj_instance *ni, uint16_t type,
+    uint16_t set, uint8_t new_set, enum ipfw_sets_cmd cmd)
+{
+	struct manage_sets_args args;
+	struct named_object *no;
+
+	args.set = set;
+	args.new_set = new_set;
+	switch (cmd) {
+	case SWAP_ALL:
+		return (ipfw_objhash_foreach_type(ni, swap_sets_cb,
+		    &args, type));
+	case TEST_ALL:
+		return (ipfw_objhash_foreach_type(ni, test_sets_cb,
+		    &args, type));
+	case MOVE_ALL:
+		return (ipfw_objhash_foreach_type(ni, move_sets_cb,
+		    &args, type));
+	case COUNT_ONE:
+		/*
+		 * @set used to pass kidx.
+		 * When @new_set is zero - reset object counter,
+		 * otherwise increment it.
+		 */
+		no = ipfw_objhash_lookup_kidx(ni, set);
+		if (new_set != 0)
+			no->ocnt++;
+		else
+			no->ocnt = 0;
+		return (0);
+	case TEST_ONE:
+		/* @set used to pass kidx */
+		no = ipfw_objhash_lookup_kidx(ni, set);
+		/*
+		 * First check number of references:
+		 * when it differs, this mean other rules are holding
+		 * reference to given object, so it is not possible to
+		 * change its set. Note that refcnt may account references
+		 * to some going-to-be-added rules. Since we don't know
+		 * their numbers (and even if they will be added) it is
+		 * perfectly OK to return error here.
+		 */
+		if (no->ocnt != no->refcnt)
+			return (EBUSY);
+		if (ipfw_objhash_lookup_name_type(ni, new_set, type,
+		    no->name) != NULL)
+			return (EEXIST);
+		return (0);
+	case MOVE_ONE:
+		/* @set used to pass kidx */
+		no = ipfw_objhash_lookup_kidx(ni, set);
+		no->set = new_set;
+		return (0);
+	}
+	return (EINVAL);
+}
+
+/*
+ * Delete rules matching range @rt.
+ * Saves number of deleted rules in @ndel.
+ *
+ * Returns 0 on success.
+ */
+static int
+delete_range(struct ip_fw_chain *chain, ipfw_range_tlv *rt, int *ndel)
+{
+	struct ip_fw *reap, *rule, **map;
+	int end, start;
+	int i, n, ndyn, ofs;
+
+	reap = NULL;
+	IPFW_UH_WLOCK(chain);	/* arbitrate writers */
+
+	/*
+	 * Stage 1: Determine range to inspect.
+	 * Range is half-inclusive, e.g [start, end).
+	 */
+	start = 0;
+	end = chain->n_rules - 1;
+
+	if ((rt->flags & IPFW_RCFLAG_RANGE) != 0) {
+		start = ipfw_find_rule(chain, rt->start_rule, 0);
+
+		end = ipfw_find_rule(chain, rt->end_rule, 0);
+		if (rt->end_rule != IPFW_DEFAULT_RULE)
+			while (chain->map[end]->rulenum == rt->end_rule)
+				end++;
+	}
+
+	/* Allocate new map of the same size */
+	map = get_map(chain, 0, 1 /* locked */);
+	if (map == NULL) {
+		IPFW_UH_WUNLOCK(chain);
+		return (ENOMEM);
+	}
+
+	n = 0;
+	ndyn = 0;
+	ofs = start;
+	/* 1. bcopy the initial part of the map */
+	if (start > 0)
+		bcopy(chain->map, map, start * sizeof(struct ip_fw *));
+	/* 2. copy active rules between start and end */
+	for (i = start; i < end; i++) {
+		rule = chain->map[i];
+		if (ipfw_match_range(rule, rt) == 0) {
+			map[ofs++] = rule;
+			continue;
+		}
+
+		n++;
+		if (ipfw_is_dyn_rule(rule) != 0)
+			ndyn++;
+	}
+	/* 3. copy the final part of the map */
+	bcopy(chain->map + end, map + ofs,
+		(chain->n_rules - end) * sizeof(struct ip_fw *));
+	/* 4. recalculate skipto cache */
+	update_skipto_cache(chain, map);
+	/* 5. swap the maps (under UH_WLOCK + WHLOCK) */
+	map = swap_map(chain, map, chain->n_rules - n);
+	/* 6. Remove all dynamic states originated by deleted rules */
+	if (ndyn > 0)
+		ipfw_expire_dyn_rules(chain, rt);
+	/* 7. now remove the rules deleted from the old map */
+	for (i = start; i < end; i++) {
+		rule = map[i];
+		if (ipfw_match_range(rule, rt) == 0)
+			continue;
+		chain->static_len -= RULEUSIZE0(rule);
+		ipfw_reap_add(chain, &reap, rule);
+	}
+	IPFW_UH_WUNLOCK(chain);
+
+	ipfw_reap_rules(reap);
+	if (map != NULL)
+		free(map, M_IPFW);
+	*ndel = n;
+	return (0);
+}
+
+static int
+move_objects(struct ip_fw_chain *ch, ipfw_range_tlv *rt)
+{
+	struct opcode_obj_rewrite *rw;
+	struct ip_fw *rule;
+	ipfw_insn *cmd;
+	int cmdlen, i, l, c;
+	uint16_t kidx;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+
+	/* Stage 1: count number of references by given rules */
+	for (c = 0, i = 0; i < ch->n_rules - 1; i++) {
+		rule = ch->map[i];
+		if (ipfw_match_range(rule, rt) == 0)
+			continue;
+		if (rule->set == rt->new_set) /* nothing to do */
+			continue;
+		/* Search opcodes with named objects */
+		for (l = rule->cmd_len, cmdlen = 0, cmd = rule->cmd;
+		    l > 0; l -= cmdlen, cmd += cmdlen) {
+			cmdlen = F_LEN(cmd);
+			rw = find_op_rw(cmd, &kidx, NULL);
+			if (rw == NULL || rw->manage_sets == NULL)
+				continue;
+			/*
+			 * When manage_sets() returns non-zero value to
+			 * COUNT_ONE command, consider this as an object
+			 * doesn't support sets (e.g. disabled with sysctl).
+			 * So, skip checks for this object.
+			 */
+			if (rw->manage_sets(ch, kidx, 1, COUNT_ONE) != 0)
+				continue;
+			c++;
+		}
+	}
+	if (c == 0) /* No objects found */
+		return (0);
+	/* Stage 2: verify "ownership" */
+	for (c = 0, i = 0; (i < ch->n_rules - 1) && c == 0; i++) {
+		rule = ch->map[i];
+		if (ipfw_match_range(rule, rt) == 0)
+			continue;
+		if (rule->set == rt->new_set) /* nothing to do */
+			continue;
+		/* Search opcodes with named objects */
+		for (l = rule->cmd_len, cmdlen = 0, cmd = rule->cmd;
+		    l > 0 && c == 0; l -= cmdlen, cmd += cmdlen) {
+			cmdlen = F_LEN(cmd);
+			rw = find_op_rw(cmd, &kidx, NULL);
+			if (rw == NULL || rw->manage_sets == NULL)
+				continue;
+			/* Test for ownership and conflicting names */
+			c = rw->manage_sets(ch, kidx,
+			    (uint8_t)rt->new_set, TEST_ONE);
+		}
+	}
+	/* Stage 3: change set and cleanup */
+	for (i = 0; i < ch->n_rules - 1; i++) {
+		rule = ch->map[i];
+		if (ipfw_match_range(rule, rt) == 0)
+			continue;
+		if (rule->set == rt->new_set) /* nothing to do */
+			continue;
+		/* Search opcodes with named objects */
+		for (l = rule->cmd_len, cmdlen = 0, cmd = rule->cmd;
+		    l > 0; l -= cmdlen, cmd += cmdlen) {
+			cmdlen = F_LEN(cmd);
+			rw = find_op_rw(cmd, &kidx, NULL);
+			if (rw == NULL || rw->manage_sets == NULL)
+				continue;
+			/* cleanup object counter */
+			rw->manage_sets(ch, kidx,
+			    0 /* reset counter */, COUNT_ONE);
+			if (c != 0)
+				continue;
+			/* change set */
+			rw->manage_sets(ch, kidx,
+			    (uint8_t)rt->new_set, MOVE_ONE);
+		}
+	}
+	return (c);
+}/*
+ * Changes set of given rule rannge @rt
+ * with each other.
+ *
+ * Returns 0 on success.
+ */
+static int
+move_range(struct ip_fw_chain *chain, ipfw_range_tlv *rt)
+{
+	struct ip_fw *rule;
+	int i;
+
+	IPFW_UH_WLOCK(chain);
+
+	/*
+	 * Move rules with matching paramenerts to a new set.
+	 * This one is much more complex. We have to ensure
+	 * that all referenced tables (if any) are referenced
+	 * by given rule subset only. Otherwise, we can't move
+	 * them to new set and have to return error.
+	 */
+	if ((i = move_objects(chain, rt)) != 0) {
+		IPFW_UH_WUNLOCK(chain);
+		return (i);
+	}
+
+	/* XXX: We have to do swap holding WLOCK */
+	for (i = 0; i < chain->n_rules; i++) {
+		rule = chain->map[i];
+		if (ipfw_match_range(rule, rt) == 0)
+			continue;
+		rule->set = rt->new_set;
+	}
+
+	IPFW_UH_WUNLOCK(chain);
+
+	return (0);
+}
+
+/*
+ * Clear counters for a specific rule.
+ * Normally run under IPFW_UH_RLOCK, but these are idempotent ops
+ * so we only care that rules do not disappear.
+ */
+static void
+clear_counters(struct ip_fw *rule, int log_only)
+{
+	ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule);
+
+	if (log_only == 0)
+		IPFW_ZERO_RULE_COUNTER(rule);
+	if (l->o.opcode == O_LOG)
+		l->log_left = l->max_log;
+}
+
+/*
+ * Flushes rules counters and/or log values on matching range.
+ *
+ * Returns number of items cleared.
+ */
+static int
+clear_range(struct ip_fw_chain *chain, ipfw_range_tlv *rt, int log_only)
+{
+	struct ip_fw *rule;
+	int num;
+	int i;
+
+	num = 0;
+	rt->flags |= IPFW_RCFLAG_DEFAULT;
+
+	IPFW_UH_WLOCK(chain);	/* arbitrate writers */
+	for (i = 0; i < chain->n_rules; i++) {
+		rule = chain->map[i];
+		if (ipfw_match_range(rule, rt) == 0)
+			continue;
+		clear_counters(rule, log_only);
+		num++;
+	}
+	IPFW_UH_WUNLOCK(chain);
+
+	return (num);
+}
+
+static int
+check_range_tlv(ipfw_range_tlv *rt)
+{
+
+	if (rt->head.length != sizeof(*rt))
+		return (1);
+	if (rt->start_rule > rt->end_rule)
+		return (1);
+	if (rt->set >= IPFW_MAX_SETS || rt->new_set >= IPFW_MAX_SETS)
+		return (1);
+
+	if ((rt->flags & IPFW_RCFLAG_USER) != rt->flags)
+		return (1);
+
+	return (0);
+}
+
+/*
+ * Delete rules matching specified parameters
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_range_tlv ]
+ * Reply: [ ipfw_obj_header ipfw_range_tlv ]
+ *
+ * Saves number of deleted rules in ipfw_range_tlv->new_set.
+ *
+ * Returns 0 on success.
+ */
+static int
+del_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	ipfw_range_header *rh;
+	int error, ndel;
+
+	if (sd->valsize != sizeof(*rh))
+		return (EINVAL);
+
+	rh = (ipfw_range_header *)ipfw_get_sopt_space(sd, sd->valsize);
+
+	if (check_range_tlv(&rh->range) != 0)
+		return (EINVAL);
+
+	ndel = 0;
+	if ((error = delete_range(chain, &rh->range, &ndel)) != 0)
+		return (error);
+
+	/* Save number of rules deleted */
+	rh->range.new_set = ndel;
+	return (0);
+}
+
+/*
+ * Move rules/sets matching specified parameters
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_range_tlv ]
+ *
+ * Returns 0 on success.
+ */
+static int
+move_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	ipfw_range_header *rh;
+
+	if (sd->valsize != sizeof(*rh))
+		return (EINVAL);
+
+	rh = (ipfw_range_header *)ipfw_get_sopt_space(sd, sd->valsize);
+
+	if (check_range_tlv(&rh->range) != 0)
+		return (EINVAL);
+
+	return (move_range(chain, &rh->range));
+}
+
+/*
+ * Clear rule accounting data matching specified parameters
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_range_tlv ]
+ * Reply: [ ipfw_obj_header ipfw_range_tlv ]
+ *
+ * Saves number of cleared rules in ipfw_range_tlv->new_set.
+ *
+ * Returns 0 on success.
+ */
 static int
-keep_rule(struct ip_fw *rule, uint8_t cmd, uint8_t set, uint32_t n)
+clear_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
 {
-	return
-		 (rule->rulenum == IPFW_DEFAULT_RULE)		||
-		 (cmd == 0 && n == 0 && rule->set == RESVD_SET)	||
-		!(cmd == 0 || rule->set == set)			||
-		!(cmd == 1 || n == 0 || n == rule->rulenum);
+	ipfw_range_header *rh;
+	int log_only, num;
+	char *msg;
+
+	if (sd->valsize != sizeof(*rh))
+		return (EINVAL);
+
+	rh = (ipfw_range_header *)ipfw_get_sopt_space(sd, sd->valsize);
+
+	if (check_range_tlv(&rh->range) != 0)
+		return (EINVAL);
+
+	log_only = (op3->opcode == IP_FW_XRESETLOG);
+
+	num = clear_range(chain, &rh->range, log_only);
+
+	if (rh->range.flags & IPFW_RCFLAG_ALL)
+		msg = log_only ? "All logging counts reset" :
+		    "Accounting cleared";
+	else
+		msg = log_only ? "logging count reset" : "cleared";
+
+	if (V_fw_verbose) {
+		int lev = LOG_SECURITY | LOG_NOTICE;
+		log(lev, "ipfw: %s.\n", msg);
+	}
+
+	/* Save number of rules cleared */
+	rh->range.new_set = num;
+	return (0);
+}
+
+static void
+enable_sets(struct ip_fw_chain *chain, ipfw_range_tlv *rt)
+{
+	uint32_t v_set;
+
+	IPFW_UH_WLOCK_ASSERT(chain);
+
+	/* Change enabled/disabled sets mask */
+	v_set = (V_set_disable | rt->set) & ~rt->new_set;
+	v_set &= ~(1 << RESVD_SET); /* set RESVD_SET always enabled */
+	IPFW_WLOCK(chain);
+	V_set_disable = v_set;
+	IPFW_WUNLOCK(chain);
+}
+
+static int
+swap_sets(struct ip_fw_chain *chain, ipfw_range_tlv *rt, int mv)
+{
+	struct opcode_obj_rewrite *rw;
+	struct ip_fw *rule;
+	int i;
+
+	IPFW_UH_WLOCK_ASSERT(chain);
+
+	if (rt->set == rt->new_set) /* nothing to do */
+		return (0);
+
+	if (mv != 0) {
+		/*
+		 * Berfore moving the rules we need to check that
+		 * there aren't any conflicting named objects.
+		 */
+		for (rw = ctl3_rewriters;
+		    rw < ctl3_rewriters + ctl3_rsize; rw++) {
+			if (rw->manage_sets == NULL)
+				continue;
+			i = rw->manage_sets(chain, (uint8_t)rt->set,
+			    (uint8_t)rt->new_set, TEST_ALL);
+			if (i != 0)
+				return (EEXIST);
+		}
+	}
+	/* Swap or move two sets */
+	for (i = 0; i < chain->n_rules - 1; i++) {
+		rule = chain->map[i];
+		if (rule->set == (uint8_t)rt->set)
+			rule->set = (uint8_t)rt->new_set;
+		else if (rule->set == (uint8_t)rt->new_set && mv == 0)
+			rule->set = (uint8_t)rt->set;
+	}
+	for (rw = ctl3_rewriters; rw < ctl3_rewriters + ctl3_rsize; rw++) {
+		if (rw->manage_sets == NULL)
+			continue;
+		rw->manage_sets(chain, (uint8_t)rt->set,
+		    (uint8_t)rt->new_set, mv != 0 ? MOVE_ALL: SWAP_ALL);
+	}
+	return (0);
+}
+
+/*
+ * Swaps or moves set
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_range_tlv ]
+ *
+ * Returns 0 on success.
+ */
+static int
+manage_sets(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	ipfw_range_header *rh;
+	int ret;
+
+	if (sd->valsize != sizeof(*rh))
+		return (EINVAL);
+
+	rh = (ipfw_range_header *)ipfw_get_sopt_space(sd, sd->valsize);
+
+	if (rh->range.head.length != sizeof(ipfw_range_tlv))
+		return (1);
+	/* enable_sets() expects bitmasks. */
+	if (op3->opcode != IP_FW_SET_ENABLE &&
+	    (rh->range.set >= IPFW_MAX_SETS ||
+	    rh->range.new_set >= IPFW_MAX_SETS))
+		return (EINVAL);
+
+	ret = 0;
+	IPFW_UH_WLOCK(chain);
+	switch (op3->opcode) {
+	case IP_FW_SET_SWAP:
+	case IP_FW_SET_MOVE:
+		ret = swap_sets(chain, &rh->range,
+		    op3->opcode == IP_FW_SET_MOVE);
+		break;
+	case IP_FW_SET_ENABLE:
+		enable_sets(chain, &rh->range);
+		break;
+	}
+	IPFW_UH_WUNLOCK(chain);
+
+	return (ret);
 }
 
 /**
@@ -282,12 +1462,11 @@ keep_rule(struct ip_fw *rule, uint8_t cmd, uint8_t set, uint32_t n)
 static int
 del_entry(struct ip_fw_chain *chain, uint32_t arg)
 {
-	struct ip_fw *rule;
 	uint32_t num;	/* rule number or old_set */
 	uint8_t cmd, new_set;
-	int start, end, i, ofs, n;
-	struct ip_fw **map = NULL;
+	int do_del, ndel;
 	int error = 0;
+	ipfw_range_tlv rt;
 
 	num = arg & 0xffff;
 	cmd = (arg >> 24) & 0xff;
@@ -303,149 +1482,60 @@ del_entry(struct ip_fw_chain *chain, uint32_t arg)
 			return EINVAL;
 	}
 
-	IPFW_UH_WLOCK(chain);	/* arbitrate writers */
-	chain->reap = NULL;	/* prepare for deletions */
+	/* Convert old requests into new representation */
+	memset(&rt, 0, sizeof(rt));
+	rt.start_rule = num;
+	rt.end_rule = num;
+	rt.set = num;
+	rt.new_set = new_set;
+	do_del = 0;
 
 	switch (cmd) {
-	case 0:	/* delete rules "num" (num == 0 matches all) */
-	case 1:	/* delete all rules in set N */
-	case 5: /* delete rules with number N and set "new_set". */
-
-		/*
-		 * Locate first rule to delete (start), the rule after
-		 * the last one to delete (end), and count how many
-		 * rules to delete (n). Always use keep_rule() to
-		 * determine which rules to keep.
-		 */
-		n = 0;
-		if (cmd == 1) {
-			/* look for a specific set including RESVD_SET.
-			 * Must scan the entire range, ignore num.
-			 */
-			new_set = num;
-			for (start = -1, end = i = 0; i < chain->n_rules; i++) {
-				if (keep_rule(chain->map[i], cmd, new_set, 0))
-					continue;
-				if (start < 0)
-					start = i;
-				end = i;
-				n++;
-			}
-			end++;	/* first non-matching */
-		} else {
-			/* Optimized search on rule numbers */
-			start = ipfw_find_rule(chain, num, 0);
-			for (end = start; end < chain->n_rules; end++) {
-				rule = chain->map[end];
-				if (num > 0 && rule->rulenum != num)
-					break;
-				if (!keep_rule(rule, cmd, new_set, num))
-					n++;
-			}
-		}
-
-		if (n == 0) {
-			/* A flush request (arg == 0 or cmd == 1) on empty
-			 * ruleset returns with no error. On the contrary,
-			 * if there is no match on a specific request,
-			 * we return EINVAL.
-			 */
-			if (arg != 0 && cmd != 1)
-				error = EINVAL;
-			break;
-		}
-
-		/* We have something to delete. Allocate the new map */
-		map = get_map(chain, -n, 1 /* locked */);
-		if (map == NULL) {
-			error = EINVAL;
-			break;
-		}
-
-		/* 1. bcopy the initial part of the map */
-		if (start > 0)
-			bcopy(chain->map, map, start * sizeof(struct ip_fw *));
-		/* 2. copy active rules between start and end */
-		for (i = ofs = start; i < end; i++) {
-			rule = chain->map[i];
-			if (keep_rule(rule, cmd, new_set, num))
-				map[ofs++] = rule;
-		}
-		/* 3. copy the final part of the map */
-		bcopy(chain->map + end, map + ofs,
-			(chain->n_rules - end) * sizeof(struct ip_fw *));
-		/* 4. swap the maps (under BH_LOCK) */
-		map = swap_map(chain, map, chain->n_rules - n);
-		/* 5. now remove the rules deleted from the old map */
-		if (cmd == 1)
-			ipfw_expire_dyn_rules(chain, NULL, new_set);
-		for (i = start; i < end; i++) {
-			rule = map[i];
-			if (keep_rule(rule, cmd, new_set, num))
-				continue;
-			chain->static_len -= RULESIZE(rule);
-			if (cmd != 1)
-				ipfw_expire_dyn_rules(chain, rule, RESVD_SET);
-			rule->x_next = chain->reap;
-			chain->reap = rule;
-		}
+	case 0: /* delete rules numbered "rulenum" */
+		if (num == 0)
+			rt.flags |= IPFW_RCFLAG_ALL;
+		else
+			rt.flags |= IPFW_RCFLAG_RANGE;
+		do_del = 1;
 		break;
-
-	/*
-	 * In the next 3 cases the loop stops at (n_rules - 1)
-	 * because the default rule is never eligible..
-	 */
-
-	case 2:	/* move rules with given RULE number to new set */
-		for (i = 0; i < chain->n_rules - 1; i++) {
-			rule = chain->map[i];
-			if (rule->rulenum == num)
-				rule->set = new_set;
-		}
+	case 1: /* delete rules in set "rulenum" */
+		rt.flags |= IPFW_RCFLAG_SET;
+		do_del = 1;
 		break;
-
-	case 3: /* move rules with given SET number to new set */
-		for (i = 0; i < chain->n_rules - 1; i++) {
-			rule = chain->map[i];
-			if (rule->set == num)
-				rule->set = new_set;
-		}
+	case 5: /* delete rules "rulenum" and set "new_set" */
+		rt.flags |= IPFW_RCFLAG_RANGE | IPFW_RCFLAG_SET;
+		rt.set = new_set;
+		rt.new_set = 0;
+		do_del = 1;
 		break;
-
-	case 4: /* swap two sets */
-		for (i = 0; i < chain->n_rules - 1; i++) {
-			rule = chain->map[i];
-			if (rule->set == num)
-				rule->set = new_set;
-			else if (rule->set == new_set)
-				rule->set = num;
-		}
+	case 2: /* move rules "rulenum" to set "new_set" */
+		rt.flags |= IPFW_RCFLAG_RANGE;
 		break;
+	case 3: /* move rules from set "rulenum" to set "new_set" */
+		IPFW_UH_WLOCK(chain);
+		error = swap_sets(chain, &rt, 1);
+		IPFW_UH_WUNLOCK(chain);
+		return (error);
+	case 4: /* swap sets "rulenum" and "new_set" */
+		IPFW_UH_WLOCK(chain);
+		error = swap_sets(chain, &rt, 0);
+		IPFW_UH_WUNLOCK(chain);
+		return (error);
+	default:
+		return (ENOTSUP);
 	}
 
-	rule = chain->reap;
-	chain->reap = NULL;
-	IPFW_UH_WUNLOCK(chain);
-	ipfw_reap_rules(rule);
-	if (map)
-		free(map, M_IPFW);
-	return error;
-}
+	if (do_del != 0) {
+		if ((error = delete_range(chain, &rt, &ndel)) != 0)
+			return (error);
 
-/*
- * Clear counters for a specific rule.
- * Normally run under IPFW_UH_RLOCK, but these are idempotent ops
- * so we only care that rules do not disappear.
- */
-static void
-clear_counters(struct ip_fw *rule, int log_only)
-{
-	ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule);
+		if (ndel == 0 && (cmd != 1 && num != 0))
+			return (EINVAL);
 
-	if (log_only == 0)
-		IPFW_ZERO_RULE_COUNTER(rule);
-	if (l->o.opcode == O_LOG)
-		l->log_left = l->max_log;
+		return (0);
+	}
+
+	return (move_range(chain, &rt));
 }
 
 /**
@@ -516,23 +1606,57 @@ zero_entry(struct ip_fw_chain *chain, u_int32_t arg, int log_only)
 	return (0);
 }
 
+
 /*
- * Check validity of the structure before insert.
- * Rules are simple, so this mostly need to check rule sizes.
+ * Check rule head in FreeBSD11 format
+ *
  */
 static int
-check_ipfw_struct(struct ip_fw *rule, int size)
+check_ipfw_rule1(struct ip_fw_rule *rule, int size,
+    struct rule_check_info *ci)
 {
-	int l, cmdlen = 0;
-	int have_action=0;
-	ipfw_insn *cmd;
+	int l;
+
+	if (size < sizeof(*rule)) {
+		printf("ipfw: rule too short\n");
+		return (EINVAL);
+	}
+
+	/* Check for valid cmd_len */
+	l = roundup2(RULESIZE(rule), sizeof(uint64_t));
+	if (l != size) {
+		printf("ipfw: size mismatch (have %d want %d)\n", size, l);
+		return (EINVAL);
+	}
+	if (rule->act_ofs >= rule->cmd_len) {
+		printf("ipfw: bogus action offset (%u > %u)\n",
+		    rule->act_ofs, rule->cmd_len - 1);
+		return (EINVAL);
+	}
+
+	if (rule->rulenum > IPFW_DEFAULT_RULE - 1)
+		return (EINVAL);
+
+	return (check_ipfw_rule_body(rule->cmd, rule->cmd_len, ci));
+}
+
+/*
+ * Check rule head in FreeBSD8 format
+ *
+ */
+static int
+check_ipfw_rule0(struct ip_fw_rule0 *rule, int size,
+    struct rule_check_info *ci)
+{
+	int l;
 
 	if (size < sizeof(*rule)) {
 		printf("ipfw: rule too short\n");
 		return (EINVAL);
 	}
-	/* first, check for valid size */
-	l = RULESIZE(rule);
+
+	/* Check for valid cmd_len */
+	l = sizeof(*rule) + rule->cmd_len * 4 - 4;
 	if (l != size) {
 		printf("ipfw: size mismatch (have %d want %d)\n", size, l);
 		return (EINVAL);
@@ -542,12 +1666,26 @@ check_ipfw_struct(struct ip_fw *rule, int size)
 		    rule->act_ofs, rule->cmd_len - 1);
 		return (EINVAL);
 	}
+
+	if (rule->rulenum > IPFW_DEFAULT_RULE - 1)
+		return (EINVAL);
+
+	return (check_ipfw_rule_body(rule->cmd, rule->cmd_len, ci));
+}
+
+static int
+check_ipfw_rule_body(ipfw_insn *cmd, int cmd_len, struct rule_check_info *ci)
+{
+	int cmdlen, l;
+	int have_action;
+
+	have_action = 0;
+
 	/*
 	 * Now go for the individual checks. Very simple ones, basically only
 	 * instruction sizes.
 	 */
-	for (l = rule->cmd_len, cmd = rule->cmd ;
-			l > 0 ; l -= cmdlen, cmd += cmdlen) {
+	for (l = cmd_len; l > 0 ; l -= cmdlen, cmd += cmdlen) {
 		cmdlen = F_LEN(cmd);
 		if (cmdlen > l) {
 			printf("ipfw: opcode %d size truncated\n",
@@ -557,6 +1695,10 @@ check_ipfw_struct(struct ip_fw *rule, int size)
 		switch (cmd->opcode) {
 		case O_PROBE_STATE:
 		case O_KEEP_STATE:
+			if (cmdlen != F_INSN_SIZE(ipfw_insn))
+				goto bad_size;
+			ci->object_opcodes++;
+			break;
 		case O_PROTO:
 		case O_IP_SRC_ME:
 		case O_IP_DST_ME:
@@ -588,6 +1730,35 @@ check_ipfw_struct(struct ip_fw *rule, int size)
 				goto bad_size;
 			break;
 
+		case O_EXTERNAL_ACTION:
+			if (cmd->arg1 == 0 ||
+			    cmdlen != F_INSN_SIZE(ipfw_insn)) {
+				printf("ipfw: invalid external "
+				    "action opcode\n");
+				return (EINVAL);
+			}
+			ci->object_opcodes++;
+			/* Do we have O_EXTERNAL_INSTANCE opcode? */
+			if (l != cmdlen) {
+				l -= cmdlen;
+				cmd += cmdlen;
+				cmdlen = F_LEN(cmd);
+				if (cmd->opcode != O_EXTERNAL_INSTANCE) {
+					printf("ipfw: invalid opcode "
+					    "next to external action %u\n",
+					    cmd->opcode);
+					return (EINVAL);
+				}
+				if (cmd->arg1 == 0 ||
+				    cmdlen != F_INSN_SIZE(ipfw_insn)) {
+					printf("ipfw: invalid external "
+					    "action instance opcode\n");
+					return (EINVAL);
+				}
+				ci->object_opcodes++;
+			}
+			goto check_action;
+
 		case O_FIB:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn))
 				goto bad_size;
@@ -601,10 +1772,10 @@ check_ipfw_struct(struct ip_fw *rule, int size)
 		case O_SETFIB:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn))
 				goto bad_size;
-			if ((cmd->arg1 != IP_FW_TABLEARG) &&
-			    (cmd->arg1 >= rt_numfibs)) {
+			if ((cmd->arg1 != IP_FW_TARG) &&
+			    ((cmd->arg1 & 0x7FFF) >= rt_numfibs)) {
 				printf("ipfw: invalid fib number %d\n",
-					cmd->arg1);
+					cmd->arg1 & 0x7FFF);
 				return EINVAL;
 			}
 			goto check_action;
@@ -625,6 +1796,7 @@ check_ipfw_struct(struct ip_fw *rule, int size)
 		case O_LIMIT:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_limit))
 				goto bad_size;
+			ci->object_opcodes++;
 			break;
 
 		case O_LOG:
@@ -639,7 +1811,7 @@ check_ipfw_struct(struct ip_fw *rule, int size)
 		case O_IP_SRC_MASK:
 		case O_IP_DST_MASK:
 			/* only odd command lengths */
-			if ( !(cmdlen & 1) || cmdlen > 31)
+			if ((cmdlen & 1) == 0)
 				goto bad_size;
 			break;
 
@@ -666,6 +1838,18 @@ check_ipfw_struct(struct ip_fw *rule, int size)
 			    cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 1 &&
 			    cmdlen != F_INSN_SIZE(ipfw_insn_u32))
 				goto bad_size;
+			ci->object_opcodes++;
+			break;
+		case O_IP_FLOW_LOOKUP:
+			if (cmd->arg1 >= V_fw_tables_max) {
+				printf("ipfw: invalid table number %d\n",
+				    cmd->arg1);
+				return (EINVAL);
+			}
+			if (cmdlen != F_INSN_SIZE(ipfw_insn) &&
+			    cmdlen != F_INSN_SIZE(ipfw_insn_u32))
+				goto bad_size;
+			ci->object_opcodes++;
 			break;
 		case O_MACADDR2:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_mac))
@@ -700,6 +1884,7 @@ check_ipfw_struct(struct ip_fw *rule, int size)
 		case O_VIA:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_if))
 				goto bad_size;
+			ci->object_opcodes++;
 			break;
 
 		case O_ALTQ:
@@ -742,8 +1927,10 @@ check_ipfw_struct(struct ip_fw *rule, int size)
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_nat))
  				goto bad_size;		
  			goto check_action;
-		case O_FORWARD_MAC: /* XXX not implemented yet */
 		case O_CHECK_STATE:
+			ci->object_opcodes++;
+			/* FALLTHROUGH */
+		case O_FORWARD_MAC: /* XXX not implemented yet */
 		case O_COUNT:
 		case O_ACCEPT:
 		case O_DENY:
@@ -763,14 +1950,14 @@ check_action:
 				printf("ipfw: opcode %d, multiple actions"
 					" not allowed\n",
 					cmd->opcode);
-				return EINVAL;
+				return (EINVAL);
 			}
 			have_action = 1;
 			if (l != cmdlen) {
 				printf("ipfw: opcode %d, action must be"
 					" last opcode\n",
 					cmd->opcode);
-				return EINVAL;
+				return (EINVAL);
 			}
 			break;
 #ifdef INET6
@@ -813,25 +2000,25 @@ check_action:
 			case O_IP6_DST_MASK:
 			case O_ICMP6TYPE:
 				printf("ipfw: no IPv6 support in kernel\n");
-				return EPROTONOSUPPORT;
+				return (EPROTONOSUPPORT);
 #endif
 			default:
 				printf("ipfw: opcode %d, unknown opcode\n",
 					cmd->opcode);
-				return EINVAL;
+				return (EINVAL);
 			}
 		}
 	}
 	if (have_action == 0) {
 		printf("ipfw: missing action\n");
-		return EINVAL;
+		return (EINVAL);
 	}
 	return 0;
 
 bad_size:
 	printf("ipfw: opcode %d size %d wrong\n",
 		cmd->opcode, cmdlen);
-	return EINVAL;
+	return (EINVAL);
 }
 
 
@@ -863,8 +2050,8 @@ struct ip_fw7 {
 	ipfw_insn	cmd[1];		/* storage for commands     */
 };
 
-	int convert_rule_to_7(struct ip_fw *rule);
-int convert_rule_to_8(struct ip_fw *rule);
+static int convert_rule_to_7(struct ip_fw_rule0 *rule);
+static int convert_rule_to_8(struct ip_fw_rule0 *rule);
 
 #ifndef RULESIZE7
 #define RULESIZE7(rule)  (sizeof(struct ip_fw7) + \
@@ -882,10 +2069,15 @@ ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space)
 {
 	char *bp = buf;
 	char *ep = bp + space;
-	struct ip_fw *rule, *dst;
-	int l, i;
+	struct ip_fw *rule;
+	struct ip_fw_rule0 *dst;
+	struct timeval boottime;
+	int error, i, l, warnflag;
 	time_t	boot_seconds;
 
+	warnflag = 0;
+
+	getboottime(&boottime);
         boot_seconds = boottime.tv_sec;
 	for (i = 0; i < chain->n_rules; i++) {
 		rule = chain->map[i];
@@ -894,9 +2086,12 @@ ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space)
 		    /* Convert rule to FreeBSd 7.2 format */
 		    l = RULESIZE7(rule);
 		    if (bp + l + sizeof(uint32_t) <= ep) {
-			int error;
 			bcopy(rule, bp, l + sizeof(uint32_t));
-			error = convert_rule_to_7((struct ip_fw *) bp);
+			error = set_legacy_obj_kidx(chain,
+			    (struct ip_fw_rule0 *)bp);
+			if (error != 0)
+				return (0);
+			error = convert_rule_to_7((struct ip_fw_rule0 *) bp);
 			if (error)
 				return 0; /*XXX correct? */
 			/*
@@ -914,76 +2109,1631 @@ ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space)
 		    continue; /* go to next rule */
 		}
 
-		/* normal mode, don't touch rules */
-		l = RULESIZE(rule);
+		l = RULEUSIZE0(rule);
 		if (bp + l > ep) { /* should not happen */
 			printf("overflow dumping static rules\n");
 			break;
 		}
-		dst = (struct ip_fw *)bp;
-		bcopy(rule, dst, l);
+		dst = (struct ip_fw_rule0 *)bp;
+		export_rule0(rule, dst, l);
+		error = set_legacy_obj_kidx(chain, dst);
+
 		/*
 		 * XXX HACK. Store the disable mask in the "next"
 		 * pointer in a wild attempt to keep the ABI the same.
 		 * Why do we do this on EVERY rule?
+		 *
+		 * XXX: "ipfw set show" (ab)uses IP_FW_GET to read disabled mask
+		 * so we need to fail _after_ saving at least one mask.
 		 */
 		bcopy(&V_set_disable, &dst->next_rule, sizeof(V_set_disable));
 		if (dst->timestamp)
 			dst->timestamp += boot_seconds;
 		bp += l;
+
+		if (error != 0) {
+			if (error == 2) {
+				/* Non-fatal table rewrite error. */
+				warnflag = 1;
+				continue;
+			}
+			printf("Stop on rule %d. Fail to convert table\n",
+			    rule->rulenum);
+			break;
+		}
 	}
+	if (warnflag != 0)
+		printf("ipfw: process %s is using legacy interfaces,"
+		    " consider rebuilding\n", "");
 	ipfw_get_dynamic(chain, &bp, ep); /* protected by the dynamic lock */
 	return (bp - (char *)buf);
 }
 
 
-#define IP_FW3_OPLENGTH(x)	((x)->sopt_valsize - sizeof(ip_fw3_opheader))
-/**
- * {set|get}sockopt parser.
+struct dump_args {
+	uint32_t	b;	/* start rule */
+	uint32_t	e;	/* end rule */
+	uint32_t	rcount;	/* number of rules */
+	uint32_t	rsize;	/* rules size */
+	uint32_t	tcount;	/* number of tables */
+	int		rcounters;	/* counters */
+};
+
+void
+ipfw_export_obj_ntlv(struct named_object *no, ipfw_obj_ntlv *ntlv)
+{
+
+	ntlv->head.type = no->etlv;
+	ntlv->head.length = sizeof(*ntlv);
+	ntlv->idx = no->kidx;
+	strlcpy(ntlv->name, no->name, sizeof(ntlv->name));
+}
+
+/*
+ * Export named object info in instance @ni, identified by @kidx
+ * to ipfw_obj_ntlv. TLV is allocated from @sd space.
+ *
+ * Returns 0 on success.
+ */
+static int
+export_objhash_ntlv(struct namedobj_instance *ni, uint16_t kidx,
+    struct sockopt_data *sd)
+{
+	struct named_object *no;
+	ipfw_obj_ntlv *ntlv;
+
+	no = ipfw_objhash_lookup_kidx(ni, kidx);
+	KASSERT(no != NULL, ("invalid object kernel index passed"));
+
+	ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv));
+	if (ntlv == NULL)
+		return (ENOMEM);
+
+	ipfw_export_obj_ntlv(no, ntlv);
+	return (0);
+}
+
+/*
+ * Dumps static rules with table TLVs in buffer @sd.
+ *
+ * Returns 0 on success.
+ */
+static int
+dump_static_rules(struct ip_fw_chain *chain, struct dump_args *da,
+    uint32_t *bmask, struct sockopt_data *sd)
+{
+	int error;
+	int i, l;
+	uint32_t tcount;
+	ipfw_obj_ctlv *ctlv;
+	struct ip_fw *krule;
+	struct namedobj_instance *ni;
+	caddr_t dst;
+
+	/* Dump table names first (if any) */
+	if (da->tcount > 0) {
+		/* Header first */
+		ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv));
+		if (ctlv == NULL)
+			return (ENOMEM);
+		ctlv->head.type = IPFW_TLV_TBLNAME_LIST;
+		ctlv->head.length = da->tcount * sizeof(ipfw_obj_ntlv) + 
+		    sizeof(*ctlv);
+		ctlv->count = da->tcount;
+		ctlv->objsize = sizeof(ipfw_obj_ntlv);
+	}
+
+	i = 0;
+	tcount = da->tcount;
+	ni = ipfw_get_table_objhash(chain);
+	while (tcount > 0) {
+		if ((bmask[i / 32] & (1 << (i % 32))) == 0) {
+			i++;
+			continue;
+		}
+
+		/* Jump to shared named object bitmask */
+		if (i >= IPFW_TABLES_MAX) {
+			ni = CHAIN_TO_SRV(chain);
+			i -= IPFW_TABLES_MAX;
+			bmask += IPFW_TABLES_MAX / 32;
+		}
+
+		if ((error = export_objhash_ntlv(ni, i, sd)) != 0)
+			return (error);
+
+		i++;
+		tcount--;
+	}
+
+	/* Dump rules */
+	ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv));
+	if (ctlv == NULL)
+		return (ENOMEM);
+	ctlv->head.type = IPFW_TLV_RULE_LIST;
+	ctlv->head.length = da->rsize + sizeof(*ctlv);
+	ctlv->count = da->rcount;
+
+	for (i = da->b; i < da->e; i++) {
+		krule = chain->map[i];
+
+		l = RULEUSIZE1(krule) + sizeof(ipfw_obj_tlv);
+		if (da->rcounters != 0)
+			l += sizeof(struct ip_fw_bcounter);
+		dst = (caddr_t)ipfw_get_sopt_space(sd, l);
+		if (dst == NULL)
+			return (ENOMEM);
+
+		export_rule1(krule, dst, l, da->rcounters);
+	}
+
+	return (0);
+}
+
+/*
+ * Marks every object index used in @rule with bit in @bmask.
+ * Used to generate bitmask of referenced tables/objects for given ruleset
+ * or its part.
+ *
+ * Returns number of newly-referenced objects.
+ */
+static int
+mark_object_kidx(struct ip_fw_chain *ch, struct ip_fw *rule,
+    uint32_t *bmask)
+{
+	struct opcode_obj_rewrite *rw;
+	ipfw_insn *cmd;
+	int bidx, cmdlen, l, count;
+	uint16_t kidx;
+	uint8_t subtype;
+
+	l = rule->cmd_len;
+	cmd = rule->cmd;
+	cmdlen = 0;
+	count = 0;
+	for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
+		cmdlen = F_LEN(cmd);
+
+		rw = find_op_rw(cmd, &kidx, &subtype);
+		if (rw == NULL)
+			continue;
+
+		bidx = kidx / 32;
+		/*
+		 * Maintain separate bitmasks for table and
+		 * non-table objects.
+		 */
+		if (rw->etlv != IPFW_TLV_TBL_NAME)
+			bidx += IPFW_TABLES_MAX / 32;
+
+		if ((bmask[bidx] & (1 << (kidx % 32))) == 0)
+			count++;
+
+		bmask[bidx] |= 1 << (kidx % 32);
+	}
+
+	return (count);
+}
+
+/*
+ * Dumps requested objects data
+ * Data layout (version 0)(current):
+ * Request: [ ipfw_cfg_lheader ] + IPFW_CFG_GET_* flags
+ *   size = ipfw_cfg_lheader.size
+ * Reply: [ ipfw_cfg_lheader 
+ *   [ ipfw_obj_ctlv(IPFW_TLV_TBL_LIST) ipfw_obj_ntlv x N ] (optional)
+ *   [ ipfw_obj_ctlv(IPFW_TLV_RULE_LIST)
+ *     ipfw_obj_tlv(IPFW_TLV_RULE_ENT) [ ip_fw_bcounter (optional) ip_fw_rule ]
+ *   ] (optional)
+ *   [ ipfw_obj_ctlv(IPFW_TLV_STATE_LIST) ipfw_obj_dyntlv x N ] (optional)
+ * ]
+ * * NOTE IPFW_TLV_STATE_LIST has the single valid field: objsize.
+ * The rest (size, count) are set to zero and needs to be ignored.
+ *
+ * Returns 0 on success.
  */
+static int
+dump_config(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	ipfw_cfg_lheader *hdr;
+	struct ip_fw *rule;
+	size_t sz, rnum;
+	uint32_t hdr_flags;
+	int error, i;
+	struct dump_args da;
+	uint32_t *bmask;
+
+	hdr = (ipfw_cfg_lheader *)ipfw_get_sopt_header(sd, sizeof(*hdr));
+	if (hdr == NULL)
+		return (EINVAL);
+
+	error = 0;
+	bmask = NULL;
+	/* Allocate needed state. Note we allocate 2xspace mask, for table&srv  */
+	if (hdr->flags & IPFW_CFG_GET_STATIC)
+		bmask = malloc(IPFW_TABLES_MAX / 4, M_TEMP, M_WAITOK | M_ZERO);
+
+	IPFW_UH_RLOCK(chain);
+
+	/*
+	 * STAGE 1: Determine size/count for objects in range.
+	 * Prepare used tables bitmask.
+	 */
+	sz = sizeof(ipfw_cfg_lheader);
+	memset(&da, 0, sizeof(da));
+
+	da.b = 0;
+	da.e = chain->n_rules;
+
+	if (hdr->end_rule != 0) {
+		/* Handle custom range */
+		if ((rnum = hdr->start_rule) > IPFW_DEFAULT_RULE)
+			rnum = IPFW_DEFAULT_RULE;
+		da.b = ipfw_find_rule(chain, rnum, 0);
+		rnum = hdr->end_rule;
+		rnum = (rnum < IPFW_DEFAULT_RULE) ? rnum+1 : IPFW_DEFAULT_RULE;
+		da.e = ipfw_find_rule(chain, rnum, 0) + 1;
+	}
+
+	if (hdr->flags & IPFW_CFG_GET_STATIC) {
+		for (i = da.b; i < da.e; i++) {
+			rule = chain->map[i];
+			da.rsize += RULEUSIZE1(rule) + sizeof(ipfw_obj_tlv);
+			da.rcount++;
+			/* Update bitmask of used objects for given range */
+			da.tcount += mark_object_kidx(chain, rule, bmask);
+		}
+		/* Add counters if requested */
+		if (hdr->flags & IPFW_CFG_GET_COUNTERS) {
+			da.rsize += sizeof(struct ip_fw_bcounter) * da.rcount;
+			da.rcounters = 1;
+		}
+
+		if (da.tcount > 0)
+			sz += da.tcount * sizeof(ipfw_obj_ntlv) +
+			    sizeof(ipfw_obj_ctlv);
+		sz += da.rsize + sizeof(ipfw_obj_ctlv);
+	}
+
+	if (hdr->flags & IPFW_CFG_GET_STATES)
+		sz += ipfw_dyn_get_count() * sizeof(ipfw_obj_dyntlv) +
+		     sizeof(ipfw_obj_ctlv);
+
+
+	/*
+	 * Fill header anyway.
+	 * Note we have to save header fields to stable storage
+	 * buffer inside @sd can be flushed after dumping rules
+	 */
+	hdr->size = sz;
+	hdr->set_mask = ~V_set_disable;
+	hdr_flags = hdr->flags;
+	hdr = NULL;
+
+	if (sd->valsize < sz) {
+		error = ENOMEM;
+		goto cleanup;
+	}
+
+	/* STAGE2: Store actual data */
+	if (hdr_flags & IPFW_CFG_GET_STATIC) {
+		error = dump_static_rules(chain, &da, bmask, sd);
+		if (error != 0)
+			goto cleanup;
+	}
+
+	if (hdr_flags & IPFW_CFG_GET_STATES)
+		error = ipfw_dump_states(chain, sd);
+
+cleanup:
+	IPFW_UH_RUNLOCK(chain);
+
+	if (bmask != NULL)
+		free(bmask, M_TEMP);
+
+	return (error);
+}
+
 int
-ipfw_ctl(struct sockopt *sopt)
+ipfw_check_object_name_generic(const char *name)
+{
+	int nsize;
+
+	nsize = sizeof(((ipfw_obj_ntlv *)0)->name);
+	if (strnlen(name, nsize) == nsize)
+		return (EINVAL);
+	if (name[0] == '\0')
+		return (EINVAL);
+	return (0);
+}
+
+/*
+ * Creates non-existent objects referenced by rule.
+ *
+ * Return 0 on success.
+ */
+int
+create_objects_compat(struct ip_fw_chain *ch, ipfw_insn *cmd,
+    struct obj_idx *oib, struct obj_idx *pidx, struct tid_info *ti)
+{
+	struct opcode_obj_rewrite *rw;
+	struct obj_idx *p;
+	uint16_t kidx;
+	int error;
+
+	/*
+	 * Compatibility stuff: do actual creation for non-existing,
+	 * but referenced objects.
+	 */
+	for (p = oib; p < pidx; p++) {
+		if (p->kidx != 0)
+			continue;
+
+		ti->uidx = p->uidx;
+		ti->type = p->type;
+		ti->atype = 0;
+
+		rw = find_op_rw(cmd + p->off, NULL, NULL);
+		KASSERT(rw != NULL, ("Unable to find handler for op %d",
+		    (cmd + p->off)->opcode));
+
+		if (rw->create_object == NULL)
+			error = EOPNOTSUPP;
+		else
+			error = rw->create_object(ch, ti, &kidx);
+		if (error == 0) {
+			p->kidx = kidx;
+			continue;
+		}
+
+		/*
+		 * Error happened. We have to rollback everything.
+		 * Drop all already acquired references.
+		 */
+		IPFW_UH_WLOCK(ch);
+		unref_oib_objects(ch, cmd, oib, pidx);
+		IPFW_UH_WUNLOCK(ch);
+
+		return (error);
+	}
+
+	return (0);
+}
+
+/*
+ * Compatibility function for old ipfw(8) binaries.
+ * Rewrites table/nat kernel indices with userland ones.
+ * Convert tables matching '/^\d+$/' to their atoi() value.
+ * Use number 65535 for other tables.
+ *
+ * Returns 0 on success.
+ */
+static int
+set_legacy_obj_kidx(struct ip_fw_chain *ch, struct ip_fw_rule0 *rule)
+{
+	struct opcode_obj_rewrite *rw;
+	struct named_object *no;
+	ipfw_insn *cmd;
+	char *end;
+	long val;
+	int cmdlen, error, l;
+	uint16_t kidx, uidx;
+	uint8_t subtype;
+
+	error = 0;
+
+	l = rule->cmd_len;
+	cmd = rule->cmd;
+	cmdlen = 0;
+	for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
+		cmdlen = F_LEN(cmd);
+
+		/* Check if is index in given opcode */
+		rw = find_op_rw(cmd, &kidx, &subtype);
+		if (rw == NULL)
+			continue;
+
+		/* Try to find referenced kernel object */
+		no = rw->find_bykidx(ch, kidx);
+		if (no == NULL)
+			continue;
+
+		val = strtol(no->name, &end, 10);
+		if (*end == '\0' && val < 65535) {
+			uidx = val;
+		} else {
+
+			/*
+			 * We are called via legacy opcode.
+			 * Save error and show table as fake number
+			 * not to make ipfw(8) hang.
+			 */
+			uidx = 65535;
+			error = 2;
+		}
+
+		rw->update(cmd, uidx);
+	}
+
+	return (error);
+}
+
+
+/*
+ * Unreferences all already-referenced objects in given @cmd rule,
+ * using information in @oib.
+ *
+ * Used to rollback partially converted rule on error.
+ */
+static void
+unref_oib_objects(struct ip_fw_chain *ch, ipfw_insn *cmd, struct obj_idx *oib,
+    struct obj_idx *end)
+{
+	struct opcode_obj_rewrite *rw;
+	struct named_object *no;
+	struct obj_idx *p;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+
+	for (p = oib; p < end; p++) {
+		if (p->kidx == 0)
+			continue;
+
+		rw = find_op_rw(cmd + p->off, NULL, NULL);
+		KASSERT(rw != NULL, ("Unable to find handler for op %d",
+		    (cmd + p->off)->opcode));
+
+		/* Find & unref by existing idx */
+		no = rw->find_bykidx(ch, p->kidx);
+		KASSERT(no != NULL, ("Ref'd object %d disappeared", p->kidx));
+		no->refcnt--;
+	}
+}
+
+/*
+ * Remove references from every object used in @rule.
+ * Used at rule removal code.
+ */
+static void
+unref_rule_objects(struct ip_fw_chain *ch, struct ip_fw *rule)
+{
+	struct opcode_obj_rewrite *rw;
+	struct named_object *no;
+	ipfw_insn *cmd;
+	int cmdlen, l;
+	uint16_t kidx;
+	uint8_t subtype;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+
+	l = rule->cmd_len;
+	cmd = rule->cmd;
+	cmdlen = 0;
+	for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
+		cmdlen = F_LEN(cmd);
+
+		rw = find_op_rw(cmd, &kidx, &subtype);
+		if (rw == NULL)
+			continue;
+		no = rw->find_bykidx(ch, kidx);
+
+		KASSERT(no != NULL, ("table id %d not found", kidx));
+		KASSERT(no->subtype == subtype,
+		    ("wrong type %d (%d) for table id %d",
+		    no->subtype, subtype, kidx));
+		KASSERT(no->refcnt > 0, ("refcount for table %d is %d",
+		    kidx, no->refcnt));
+
+		if (no->refcnt == 1 && rw->destroy_object != NULL)
+			rw->destroy_object(ch, no);
+		else
+			no->refcnt--;
+	}
+}
+
+
+/*
+ * Find and reference object (if any) stored in instruction @cmd.
+ *
+ * Saves object info in @pidx, sets
+ *  - @unresolved to 1 if object should exists but not found
+ *
+ * Returns non-zero value in case of error.
+ */
+static int
+ref_opcode_object(struct ip_fw_chain *ch, ipfw_insn *cmd, struct tid_info *ti,
+    struct obj_idx *pidx, int *unresolved)
+{
+	struct named_object *no;
+	struct opcode_obj_rewrite *rw;
+	int error;
+
+	/* Check if this opcode is candidate for rewrite */
+	rw = find_op_rw(cmd, &ti->uidx, &ti->type);
+	if (rw == NULL)
+		return (0);
+
+	/* Need to rewrite. Save necessary fields */
+	pidx->uidx = ti->uidx;
+	pidx->type = ti->type;
+
+	/* Try to find referenced kernel object */
+	error = rw->find_byname(ch, ti, &no);
+	if (error != 0)
+		return (error);
+	if (no == NULL) {
+		/*
+		 * Report about unresolved object for automaic
+		 * creation.
+		 */
+		*unresolved = 1;
+		return (0);
+	}
+
+	/* Found. Bump refcount and update kidx. */
+	no->refcnt++;
+	rw->update(cmd, no->kidx);
+	return (0);
+}
+
+/*
+ * Finds and bumps refcount for objects referenced by given @rule.
+ * Auto-creates non-existing tables.
+ * Fills in @oib array with userland/kernel indexes.
+ *
+ * Returns 0 on success.
+ */
+static int
+ref_rule_objects(struct ip_fw_chain *ch, struct ip_fw *rule,
+    struct rule_check_info *ci, struct obj_idx *oib, struct tid_info *ti)
+{
+	struct obj_idx *pidx;
+	ipfw_insn *cmd;
+	int cmdlen, error, l, unresolved;
+
+	pidx = oib;
+	l = rule->cmd_len;
+	cmd = rule->cmd;
+	cmdlen = 0;
+	error = 0;
+
+	IPFW_UH_WLOCK(ch);
+
+	/* Increase refcount on each existing referenced table. */
+	for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
+		cmdlen = F_LEN(cmd);
+		unresolved = 0;
+
+		error = ref_opcode_object(ch, cmd, ti, pidx, &unresolved);
+		if (error != 0)
+			break;
+		/*
+		 * Compatibility stuff for old clients:
+		 * prepare to automaitcally create non-existing objects.
+		 */
+		if (unresolved != 0) {
+			pidx->off = rule->cmd_len - l;
+			pidx++;
+		}
+	}
+
+	if (error != 0) {
+		/* Unref everything we have already done */
+		unref_oib_objects(ch, rule->cmd, oib, pidx);
+		IPFW_UH_WUNLOCK(ch);
+		return (error);
+	}
+	IPFW_UH_WUNLOCK(ch);
+
+	/* Perform auto-creation for non-existing objects */
+	if (pidx != oib)
+		error = create_objects_compat(ch, rule->cmd, oib, pidx, ti);
+
+	/* Calculate real number of dynamic objects */
+	ci->object_opcodes = (uint16_t)(pidx - oib);
+
+	return (error);
+}
+
+/*
+ * Checks is opcode is referencing table of appropriate type.
+ * Adds reference count for found table if true.
+ * Rewrites user-supplied opcode values with kernel ones.
+ *
+ * Returns 0 on success and appropriate error code otherwise.
+ */
+static int
+rewrite_rule_uidx(struct ip_fw_chain *chain, struct rule_check_info *ci)
+{
+	int error;
+	ipfw_insn *cmd;
+	uint8_t type;
+	struct obj_idx *p, *pidx_first, *pidx_last;
+	struct tid_info ti;
+
+	/*
+	 * Prepare an array for storing opcode indices.
+	 * Use stack allocation by default.
+	 */
+	if (ci->object_opcodes <= (sizeof(ci->obuf)/sizeof(ci->obuf[0]))) {
+		/* Stack */
+		pidx_first = ci->obuf;
+	} else
+		pidx_first = malloc(
+		    ci->object_opcodes * sizeof(struct obj_idx),
+		    M_IPFW, M_WAITOK | M_ZERO);
+
+	error = 0;
+	type = 0;
+	memset(&ti, 0, sizeof(ti));
+
+	/* Use set rule is assigned to. */
+	ti.set = ci->krule->set;
+	if (ci->ctlv != NULL) {
+		ti.tlvs = (void *)(ci->ctlv + 1);
+		ti.tlen = ci->ctlv->head.length - sizeof(ipfw_obj_ctlv);
+	}
+
+	/* Reference all used tables and other objects */
+	error = ref_rule_objects(chain, ci->krule, ci, pidx_first, &ti);
+	if (error != 0)
+		goto free;
+	/*
+	 * Note that ref_rule_objects() might have updated ci->object_opcodes
+	 * to reflect actual number of object opcodes.
+	 */
+
+	/* Perform rewrite of remaining opcodes */
+	p = pidx_first;
+	pidx_last = pidx_first + ci->object_opcodes;
+	for (p = pidx_first; p < pidx_last; p++) {
+		cmd = ci->krule->cmd + p->off;
+		update_opcode_kidx(cmd, p->kidx);
+	}
+
+free:
+	if (pidx_first != ci->obuf)
+		free(pidx_first, M_IPFW);
+
+	return (error);
+}
+
+/*
+ * Adds one or more rules to ipfw @chain.
+ * Data layout (version 0)(current):
+ * Request:
+ * [
+ *   ip_fw3_opheader
+ *   [ ipfw_obj_ctlv(IPFW_TLV_TBL_LIST) ipfw_obj_ntlv x N ] (optional *1)
+ *   [ ipfw_obj_ctlv(IPFW_TLV_RULE_LIST) ip_fw x N ] (*2) (*3)
+ * ]
+ * Reply:
+ * [
+ *   ip_fw3_opheader
+ *   [ ipfw_obj_ctlv(IPFW_TLV_TBL_LIST) ipfw_obj_ntlv x N ] (optional)
+ *   [ ipfw_obj_ctlv(IPFW_TLV_RULE_LIST) ip_fw x N ]
+ * ]
+ *
+ * Rules in reply are modified to store their actual ruleset number.
+ *
+ * (*1) TLVs inside IPFW_TLV_TBL_LIST needs to be sorted ascending
+ * according to their idx field and there has to be no duplicates.
+ * (*2) Numbered rules inside IPFW_TLV_RULE_LIST needs to be sorted ascending.
+ * (*3) Each ip_fw structure needs to be aligned to u64 boundary.
+ *
+ * Returns 0 on success.
+ */
+static int
+add_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	ipfw_obj_ctlv *ctlv, *rtlv, *tstate;
+	ipfw_obj_ntlv *ntlv;
+	int clen, error, idx;
+	uint32_t count, read;
+	struct ip_fw_rule *r;
+	struct rule_check_info rci, *ci, *cbuf;
+	int i, rsize;
+
+	op3 = (ip_fw3_opheader *)ipfw_get_sopt_space(sd, sd->valsize);
+	ctlv = (ipfw_obj_ctlv *)(op3 + 1);
+
+	read = sizeof(ip_fw3_opheader);
+	rtlv = NULL;
+	tstate = NULL;
+	cbuf = NULL;
+	memset(&rci, 0, sizeof(struct rule_check_info));
+
+	if (read + sizeof(*ctlv) > sd->valsize)
+		return (EINVAL);
+
+	if (ctlv->head.type == IPFW_TLV_TBLNAME_LIST) {
+		clen = ctlv->head.length;
+		/* Check size and alignment */
+		if (clen > sd->valsize || clen < sizeof(*ctlv))
+			return (EINVAL);
+		if ((clen % sizeof(uint64_t)) != 0)
+			return (EINVAL);
+
+		/*
+		 * Some table names or other named objects.
+		 * Check for validness.
+		 */
+		count = (ctlv->head.length - sizeof(*ctlv)) / sizeof(*ntlv);
+		if (ctlv->count != count || ctlv->objsize != sizeof(*ntlv))
+			return (EINVAL);
+
+		/*
+		 * Check each TLV.
+		 * Ensure TLVs are sorted ascending and
+		 * there are no duplicates.
+		 */
+		idx = -1;
+		ntlv = (ipfw_obj_ntlv *)(ctlv + 1);
+		while (count > 0) {
+			if (ntlv->head.length != sizeof(ipfw_obj_ntlv))
+				return (EINVAL);
+
+			error = ipfw_check_object_name_generic(ntlv->name);
+			if (error != 0)
+				return (error);
+
+			if (ntlv->idx <= idx)
+				return (EINVAL);
+
+			idx = ntlv->idx;
+			count--;
+			ntlv++;
+		}
+
+		tstate = ctlv;
+		read += ctlv->head.length;
+		ctlv = (ipfw_obj_ctlv *)((caddr_t)ctlv + ctlv->head.length);
+	}
+
+	if (read + sizeof(*ctlv) > sd->valsize)
+		return (EINVAL);
+
+	if (ctlv->head.type == IPFW_TLV_RULE_LIST) {
+		clen = ctlv->head.length;
+		if (clen + read > sd->valsize || clen < sizeof(*ctlv))
+			return (EINVAL);
+		if ((clen % sizeof(uint64_t)) != 0)
+			return (EINVAL);
+
+		/*
+		 * TODO: Permit adding multiple rules at once
+		 */
+		if (ctlv->count != 1)
+			return (ENOTSUP);
+
+		clen -= sizeof(*ctlv);
+
+		if (ctlv->count > clen / sizeof(struct ip_fw_rule))
+			return (EINVAL);
+
+		/* Allocate state for each rule or use stack */
+		if (ctlv->count == 1) {
+			memset(&rci, 0, sizeof(struct rule_check_info));
+			cbuf = &rci;
+		} else
+			cbuf = malloc(ctlv->count * sizeof(*ci), M_TEMP,
+			    M_WAITOK | M_ZERO);
+		ci = cbuf;
+
+		/*
+		 * Check each rule for validness.
+		 * Ensure numbered rules are sorted ascending
+		 * and properly aligned
+		 */
+		idx = 0;
+		r = (struct ip_fw_rule *)(ctlv + 1);
+		count = 0;
+		error = 0;
+		while (clen > 0) {
+			rsize = roundup2(RULESIZE(r), sizeof(uint64_t));
+			if (rsize > clen || ctlv->count <= count) {
+				error = EINVAL;
+				break;
+			}
+
+			ci->ctlv = tstate;
+			error = check_ipfw_rule1(r, rsize, ci);
+			if (error != 0)
+				break;
+
+			/* Check sorting */
+			if (r->rulenum != 0 && r->rulenum < idx) {
+				printf("rulenum %d idx %d\n", r->rulenum, idx);
+				error = EINVAL;
+				break;
+			}
+			idx = r->rulenum;
+
+			ci->urule = (caddr_t)r;
+
+			rsize = roundup2(rsize, sizeof(uint64_t));
+			clen -= rsize;
+			r = (struct ip_fw_rule *)((caddr_t)r + rsize);
+			count++;
+			ci++;
+		}
+
+		if (ctlv->count != count || error != 0) {
+			if (cbuf != &rci)
+				free(cbuf, M_TEMP);
+			return (EINVAL);
+		}
+
+		rtlv = ctlv;
+		read += ctlv->head.length;
+		ctlv = (ipfw_obj_ctlv *)((caddr_t)ctlv + ctlv->head.length);
+	}
+
+	if (read != sd->valsize || rtlv == NULL || rtlv->count == 0) {
+		if (cbuf != NULL && cbuf != &rci)
+			free(cbuf, M_TEMP);
+		return (EINVAL);
+	}
+
+	/*
+	 * Passed rules seems to be valid.
+	 * Allocate storage and try to add them to chain.
+	 */
+	for (i = 0, ci = cbuf; i < rtlv->count; i++, ci++) {
+		clen = RULEKSIZE1((struct ip_fw_rule *)ci->urule);
+		ci->krule = ipfw_alloc_rule(chain, clen);
+		import_rule1(ci);
+	}
+
+	if ((error = commit_rules(chain, cbuf, rtlv->count)) != 0) {
+		/* Free allocate krules */
+		for (i = 0, ci = cbuf; i < rtlv->count; i++, ci++)
+			free_rule(ci->krule);
+	}
+
+	if (cbuf != NULL && cbuf != &rci)
+		free(cbuf, M_TEMP);
+
+	return (error);
+}
+
+/*
+ * Lists all sopts currently registered.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
+ * Reply: [ ipfw_obj_lheader ipfw_sopt_info x N ]
+ *
+ * Returns 0 on success
+ */
+static int
+dump_soptcodes(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	struct _ipfw_obj_lheader *olh;
+	ipfw_sopt_info *i;
+	struct ipfw_sopt_handler *sh;
+	uint32_t count, n, size;
+
+	olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
+	if (olh == NULL)
+		return (EINVAL);
+	if (sd->valsize < olh->size)
+		return (EINVAL);
+
+	CTL3_LOCK();
+	count = ctl3_hsize;
+	size = count * sizeof(ipfw_sopt_info) + sizeof(ipfw_obj_lheader);
+
+	/* Fill in header regadless of buffer size */
+	olh->count = count;
+	olh->objsize = sizeof(ipfw_sopt_info);
+
+	if (size > olh->size) {
+		olh->size = size;
+		CTL3_UNLOCK();
+		return (ENOMEM);
+	}
+	olh->size = size;
+
+	for (n = 1; n <= count; n++) {
+		i = (ipfw_sopt_info *)ipfw_get_sopt_space(sd, sizeof(*i));
+		KASSERT(i != NULL, ("previously checked buffer is not enough"));
+		sh = &ctl3_handlers[n];
+		i->opcode = sh->opcode;
+		i->version = sh->version;
+		i->refcnt = sh->refcnt;
+	}
+	CTL3_UNLOCK();
+
+	return (0);
+}
+
+/*
+ * Compares two opcodes.
+ * Used both in qsort() and bsearch().
+ *
+ * Returns 0 if match is found.
+ */
+static int
+compare_opcodes(const void *_a, const void *_b)
+{
+	const struct opcode_obj_rewrite *a, *b;
+
+	a = (const struct opcode_obj_rewrite *)_a;
+	b = (const struct opcode_obj_rewrite *)_b;
+
+	if (a->opcode < b->opcode)
+		return (-1);
+	else if (a->opcode > b->opcode)
+		return (1);
+
+	return (0);
+}
+
+/*
+ * XXX: Rewrite bsearch()
+ */
+static int
+find_op_rw_range(uint16_t op, struct opcode_obj_rewrite **plo,
+    struct opcode_obj_rewrite **phi)
+{
+	struct opcode_obj_rewrite *ctl3_max, *lo, *hi, h, *rw;
+
+	memset(&h, 0, sizeof(h));
+	h.opcode = op;
+
+	rw = (struct opcode_obj_rewrite *)bsearch(&h, ctl3_rewriters,
+	    ctl3_rsize, sizeof(h), compare_opcodes);
+	if (rw == NULL)
+		return (1);
+
+	/* Find the first element matching the same opcode */
+	lo = rw;
+	for ( ; lo > ctl3_rewriters && (lo - 1)->opcode == op; lo--)
+		;
+
+	/* Find the last element matching the same opcode */
+	hi = rw;
+	ctl3_max = ctl3_rewriters + ctl3_rsize;
+	for ( ; (hi + 1) < ctl3_max && (hi + 1)->opcode == op; hi++)
+		;
+
+	*plo = lo;
+	*phi = hi;
+
+	return (0);
+}
+
+/*
+ * Finds opcode object rewriter based on @code.
+ *
+ * Returns pointer to handler or NULL.
+ */
+static struct opcode_obj_rewrite *
+find_op_rw(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
+{
+	struct opcode_obj_rewrite *rw, *lo, *hi;
+	uint16_t uidx;
+	uint8_t subtype;
+
+	if (find_op_rw_range(cmd->opcode, &lo, &hi) != 0)
+		return (NULL);
+
+	for (rw = lo; rw <= hi; rw++) {
+		if (rw->classifier(cmd, &uidx, &subtype) == 0) {
+			if (puidx != NULL)
+				*puidx = uidx;
+			if (ptype != NULL)
+				*ptype = subtype;
+			return (rw);
+		}
+	}
+
+	return (NULL);
+}
+int
+classify_opcode_kidx(ipfw_insn *cmd, uint16_t *puidx)
+{
+
+	if (find_op_rw(cmd, puidx, NULL) == 0)
+		return (1);
+	return (0);
+}
+
+void
+update_opcode_kidx(ipfw_insn *cmd, uint16_t idx)
+{
+	struct opcode_obj_rewrite *rw;
+
+	rw = find_op_rw(cmd, NULL, NULL);
+	KASSERT(rw != NULL, ("No handler to update opcode %d", cmd->opcode));
+	rw->update(cmd, idx);
+}
+
+void
+ipfw_init_obj_rewriter()
+{
+
+	ctl3_rewriters = NULL;
+	ctl3_rsize = 0;
+}
+
+void
+ipfw_destroy_obj_rewriter()
+{
+
+	if (ctl3_rewriters != NULL)
+		free(ctl3_rewriters, M_IPFW);
+	ctl3_rewriters = NULL;
+	ctl3_rsize = 0;
+}
+
+/*
+ * Adds one or more opcode object rewrite handlers to the global array.
+ * Function may sleep.
+ */
+void
+ipfw_add_obj_rewriter(struct opcode_obj_rewrite *rw, size_t count)
+{
+	size_t sz;
+	struct opcode_obj_rewrite *tmp;
+
+	CTL3_LOCK();
+
+	for (;;) {
+		sz = ctl3_rsize + count;
+		CTL3_UNLOCK();
+		tmp = malloc(sizeof(*rw) * sz, M_IPFW, M_WAITOK | M_ZERO);
+		CTL3_LOCK();
+		if (ctl3_rsize + count <= sz)
+			break;
+
+		/* Retry */
+		free(tmp, M_IPFW);
+	}
+
+	/* Merge old & new arrays */
+	sz = ctl3_rsize + count;
+	memcpy(tmp, ctl3_rewriters, ctl3_rsize * sizeof(*rw));
+	memcpy(&tmp[ctl3_rsize], rw, count * sizeof(*rw));
+	qsort(tmp, sz, sizeof(*rw), compare_opcodes);
+	/* Switch new and free old */
+	if (ctl3_rewriters != NULL)
+		free(ctl3_rewriters, M_IPFW);
+	ctl3_rewriters = tmp;
+	ctl3_rsize = sz;
+
+	CTL3_UNLOCK();
+}
+
+/*
+ * Removes one or more object rewrite handlers from the global array.
+ */
+int
+ipfw_del_obj_rewriter(struct opcode_obj_rewrite *rw, size_t count)
+{
+	size_t sz;
+	struct opcode_obj_rewrite *ctl3_max, *ktmp, *lo, *hi;
+	int i;
+
+	CTL3_LOCK();
+
+	for (i = 0; i < count; i++) {
+		if (find_op_rw_range(rw[i].opcode, &lo, &hi) != 0)
+			continue;
+
+		for (ktmp = lo; ktmp <= hi; ktmp++) {
+			if (ktmp->classifier != rw[i].classifier)
+				continue;
+
+			ctl3_max = ctl3_rewriters + ctl3_rsize;
+			sz = (ctl3_max - (ktmp + 1)) * sizeof(*ktmp);
+			memmove(ktmp, ktmp + 1, sz);
+			ctl3_rsize--;
+			break;
+		}
+
+	}
+
+	if (ctl3_rsize == 0) {
+		if (ctl3_rewriters != NULL)
+			free(ctl3_rewriters, M_IPFW);
+		ctl3_rewriters = NULL;
+	}
+
+	CTL3_UNLOCK();
+
+	return (0);
+}
+
+static int
+export_objhash_ntlv_internal(struct namedobj_instance *ni,
+    struct named_object *no, void *arg)
+{
+	struct sockopt_data *sd;
+	ipfw_obj_ntlv *ntlv;
+
+	sd = (struct sockopt_data *)arg;
+	ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv));
+	if (ntlv == NULL)
+		return (ENOMEM);
+	ipfw_export_obj_ntlv(no, ntlv);
+	return (0);
+}
+
+/*
+ * Lists all service objects.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ] size = ipfw_obj_lheader.size
+ * Reply: [ ipfw_obj_lheader [ ipfw_obj_ntlv x N ] (optional) ]
+ * Returns 0 on success
+ */
+static int
+dump_srvobjects(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	ipfw_obj_lheader *hdr;
+	int count;
+
+	hdr = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*hdr));
+	if (hdr == NULL)
+		return (EINVAL);
+
+	IPFW_UH_RLOCK(chain);
+	count = ipfw_objhash_count(CHAIN_TO_SRV(chain));
+	hdr->size = sizeof(ipfw_obj_lheader) + count * sizeof(ipfw_obj_ntlv);
+	if (sd->valsize < hdr->size) {
+		IPFW_UH_RUNLOCK(chain);
+		return (ENOMEM);
+	}
+	hdr->count = count;
+	hdr->objsize = sizeof(ipfw_obj_ntlv);
+	if (count > 0)
+		ipfw_objhash_foreach(CHAIN_TO_SRV(chain),
+		    export_objhash_ntlv_internal, sd);
+	IPFW_UH_RUNLOCK(chain);
+	return (0);
+}
+
+/*
+ * Compares two sopt handlers (code, version and handler ptr).
+ * Used both as qsort() and bsearch().
+ * Does not compare handler for latter case.
+ *
+ * Returns 0 if match is found.
+ */
+static int
+compare_sh(const void *_a, const void *_b)
+{
+	const struct ipfw_sopt_handler *a, *b;
+
+	a = (const struct ipfw_sopt_handler *)_a;
+	b = (const struct ipfw_sopt_handler *)_b;
+
+	if (a->opcode < b->opcode)
+		return (-1);
+	else if (a->opcode > b->opcode)
+		return (1);
+
+	if (a->version < b->version)
+		return (-1);
+	else if (a->version > b->version)
+		return (1);
+
+	/* bsearch helper */
+	if (a->handler == NULL)
+		return (0);
+
+	if ((uintptr_t)a->handler < (uintptr_t)b->handler)
+		return (-1);
+	else if ((uintptr_t)a->handler > (uintptr_t)b->handler)
+		return (1);
+
+	return (0);
+}
+
+/*
+ * Finds sopt handler based on @code and @version.
+ *
+ * Returns pointer to handler or NULL.
+ */
+static struct ipfw_sopt_handler *
+find_sh(uint16_t code, uint8_t version, sopt_handler_f *handler)
+{
+	struct ipfw_sopt_handler *sh, h;
+
+	memset(&h, 0, sizeof(h));
+	h.opcode = code;
+	h.version = version;
+	h.handler = handler;
+
+	sh = (struct ipfw_sopt_handler *)bsearch(&h, ctl3_handlers,
+	    ctl3_hsize, sizeof(h), compare_sh);
+
+	return (sh);
+}
+
+static int
+find_ref_sh(uint16_t opcode, uint8_t version, struct ipfw_sopt_handler *psh)
+{
+	struct ipfw_sopt_handler *sh;
+
+	CTL3_LOCK();
+	if ((sh = find_sh(opcode, version, NULL)) == NULL) {
+		CTL3_UNLOCK();
+		printf("ipfw: ipfw_ctl3 invalid option %d""v""%d\n",
+		    opcode, version);
+		return (EINVAL);
+	}
+	sh->refcnt++;
+	ctl3_refct++;
+	/* Copy handler data to requested buffer */
+	*psh = *sh; 
+	CTL3_UNLOCK();
+
+	return (0);
+}
+
+static void
+find_unref_sh(struct ipfw_sopt_handler *psh)
+{
+	struct ipfw_sopt_handler *sh;
+
+	CTL3_LOCK();
+	sh = find_sh(psh->opcode, psh->version, NULL);
+	KASSERT(sh != NULL, ("ctl3 handler disappeared"));
+	sh->refcnt--;
+	ctl3_refct--;
+	CTL3_UNLOCK();
+}
+
+void
+ipfw_init_sopt_handler()
+{
+
+	CTL3_LOCK_INIT();
+	IPFW_ADD_SOPT_HANDLER(1, scodes);
+}
+
+void
+ipfw_destroy_sopt_handler()
+{
+
+	IPFW_DEL_SOPT_HANDLER(1, scodes);
+	CTL3_LOCK_DESTROY();
+}
+
+/*
+ * Adds one or more sockopt handlers to the global array.
+ * Function may sleep.
+ */
+void
+ipfw_add_sopt_handler(struct ipfw_sopt_handler *sh, size_t count)
+{
+	size_t sz;
+	struct ipfw_sopt_handler *tmp;
+
+	CTL3_LOCK();
+
+	for (;;) {
+		sz = ctl3_hsize + count;
+		CTL3_UNLOCK();
+		tmp = malloc(sizeof(*sh) * sz, M_IPFW, M_WAITOK | M_ZERO);
+		CTL3_LOCK();
+		if (ctl3_hsize + count <= sz)
+			break;
+
+		/* Retry */
+		free(tmp, M_IPFW);
+	}
+
+	/* Merge old & new arrays */
+	sz = ctl3_hsize + count;
+	memcpy(tmp, ctl3_handlers, ctl3_hsize * sizeof(*sh));
+	memcpy(&tmp[ctl3_hsize], sh, count * sizeof(*sh));
+	qsort(tmp, sz, sizeof(*sh), compare_sh);
+	/* Switch new and free old */
+	if (ctl3_handlers != NULL)
+		free(ctl3_handlers, M_IPFW);
+	ctl3_handlers = tmp;
+	ctl3_hsize = sz;
+	ctl3_gencnt++;
+
+	CTL3_UNLOCK();
+}
+
+/*
+ * Removes one or more sockopt handlers from the global array.
+ */
+int
+ipfw_del_sopt_handler(struct ipfw_sopt_handler *sh, size_t count)
+{
+	size_t sz;
+	struct ipfw_sopt_handler *tmp, *h;
+	int i;
+
+	CTL3_LOCK();
+
+	for (i = 0; i < count; i++) {
+		tmp = &sh[i];
+		h = find_sh(tmp->opcode, tmp->version, tmp->handler);
+		if (h == NULL)
+			continue;
+
+		sz = (ctl3_handlers + ctl3_hsize - (h + 1)) * sizeof(*h);
+		memmove(h, h + 1, sz);
+		ctl3_hsize--;
+	}
+
+	if (ctl3_hsize == 0) {
+		if (ctl3_handlers != NULL)
+			free(ctl3_handlers, M_IPFW);
+		ctl3_handlers = NULL;
+	}
+
+	ctl3_gencnt++;
+
+	CTL3_UNLOCK();
+
+	return (0);
+}
+
+/*
+ * Writes data accumulated in @sd to sockopt buffer.
+ * Zeroes internal @sd buffer.
+ */
+static int
+ipfw_flush_sopt_data(struct sockopt_data *sd)
+{
+	struct sockopt *sopt;
+	int error;
+	size_t sz;
+
+	sz = sd->koff;
+	if (sz == 0)
+		return (0);
+
+	sopt = sd->sopt;
+
+	if (sopt->sopt_dir == SOPT_GET) {
+		error = copyout(sd->kbuf, sopt->sopt_val, sz);
+		if (error != 0)
+			return (error);
+	}
+
+	memset(sd->kbuf, 0, sd->ksize);
+	sd->ktotal += sz;
+	sd->koff = 0;
+	if (sd->ktotal + sd->ksize < sd->valsize)
+		sd->kavail = sd->ksize;
+	else
+		sd->kavail = sd->valsize - sd->ktotal;
+
+	/* Update sopt buffer data */
+	sopt->sopt_valsize = sd->ktotal;
+	sopt->sopt_val = sd->sopt_val + sd->ktotal;
+
+	return (0);
+}
+
+/*
+ * Ensures that @sd buffer has contiguous @neeeded number of
+ * bytes.
+ *
+ * Returns pointer to requested space or NULL.
+ */
+caddr_t
+ipfw_get_sopt_space(struct sockopt_data *sd, size_t needed)
 {
-#define	RULE_MAXSIZE	(256*sizeof(u_int32_t))
 	int error;
-	size_t size, len, valsize;
-	struct ip_fw *buf, *rule;
+	caddr_t addr;
+
+	if (sd->kavail < needed) {
+		/*
+		 * Flush data and try another time.
+		 */
+		error = ipfw_flush_sopt_data(sd);
+
+		if (sd->kavail < needed || error != 0)
+			return (NULL);
+	}
+
+	addr = sd->kbuf + sd->koff;
+	sd->koff += needed;
+	sd->kavail -= needed;
+	return (addr);
+}
+
+/*
+ * Requests @needed contiguous bytes from @sd buffer.
+ * Function is used to notify subsystem that we are
+ * interesed in first @needed bytes (request header)
+ * and the rest buffer can be safely zeroed.
+ *
+ * Returns pointer to requested space or NULL.
+ */
+caddr_t
+ipfw_get_sopt_header(struct sockopt_data *sd, size_t needed)
+{
+	caddr_t addr;
+
+	if ((addr = ipfw_get_sopt_space(sd, needed)) == NULL)
+		return (NULL);
+
+	if (sd->kavail > 0)
+		memset(sd->kbuf + sd->koff, 0, sd->kavail);
+	
+	return (addr);
+}
+
+/*
+ * New sockopt handler.
+ */
+int
+ipfw_ctl3(struct sockopt *sopt)
+{
+	int error, locked;
+	size_t size, valsize;
 	struct ip_fw_chain *chain;
-	u_int32_t rulenum[2];
-	uint32_t opt;
-	char xbuf[128];
+	char xbuf[256];
+	struct sockopt_data sdata;
+	struct ipfw_sopt_handler h;
 	ip_fw3_opheader *op3 = NULL;
 
 	error = priv_check(sopt->sopt_td, PRIV_NETINET_IPFW);
-	if (error)
+	if (error != 0)
+		return (error);
+
+	if (sopt->sopt_name != IP_FW3)
+		return (ipfw_ctl(sopt));
+
+	chain = &V_layer3_chain;
+	error = 0;
+
+	/* Save original valsize before it is altered via sooptcopyin() */
+	valsize = sopt->sopt_valsize;
+	memset(&sdata, 0, sizeof(sdata));
+	/* Read op3 header first to determine actual operation */
+	op3 = (ip_fw3_opheader *)xbuf;
+	error = sooptcopyin(sopt, op3, sizeof(*op3), sizeof(*op3));
+	if (error != 0)
+		return (error);
+	sopt->sopt_valsize = valsize;
+
+	/*
+	 * Find and reference command.
+	 */
+	error = find_ref_sh(op3->opcode, op3->version, &h);
+	if (error != 0)
 		return (error);
 
 	/*
 	 * Disallow modifications in really-really secure mode, but still allow
 	 * the logging counters to be reset.
 	 */
-	if (sopt->sopt_name == IP_FW_ADD ||
-	    (sopt->sopt_dir == SOPT_SET && sopt->sopt_name != IP_FW_RESETLOG)) {
+	if ((h.dir & HDIR_SET) != 0 && h.opcode != IP_FW_XRESETLOG) {
 		error = securelevel_ge(sopt->sopt_td->td_ucred, 3);
-		if (error)
+		if (error != 0) {
+			find_unref_sh(&h);
 			return (error);
+		}
 	}
 
+	/*
+	 * Fill in sockopt_data structure that may be useful for
+	 * IP_FW3 get requests.
+	 */
+	locked = 0;
+	if (valsize <= sizeof(xbuf)) {
+		/* use on-stack buffer */
+		sdata.kbuf = xbuf;
+		sdata.ksize = sizeof(xbuf);
+		sdata.kavail = valsize;
+	} else {
+
+		/*
+		 * Determine opcode type/buffer size:
+		 * allocate sliding-window buf for data export or
+		 * contiguous buffer for special ops.
+		 */
+		if ((h.dir & HDIR_SET) != 0) {
+			/* Set request. Allocate contigous buffer. */
+			if (valsize > CTL3_LARGEBUF) {
+				find_unref_sh(&h);
+				return (EFBIG);
+			}
+
+			size = valsize;
+		} else {
+			/* Get request. Allocate sliding window buffer */
+			size = (valsize<CTL3_SMALLBUF) ? valsize:CTL3_SMALLBUF;
+
+			if (size < valsize) {
+				/* We have to wire user buffer */
+				error = vslock(sopt->sopt_val, valsize);
+				if (error != 0)
+					return (error);
+				locked = 1;
+			}
+		}
+
+		sdata.kbuf = malloc(size, M_TEMP, M_WAITOK | M_ZERO);
+		sdata.ksize = size;
+		sdata.kavail = size;
+	}
+
+	sdata.sopt = sopt;
+	sdata.sopt_val = sopt->sopt_val;
+	sdata.valsize = valsize;
+
+	/*
+	 * Copy either all request (if valsize < bsize_max)
+	 * or first bsize_max bytes to guarantee most consumers
+	 * that all necessary data has been copied).
+	 * Anyway, copy not less than sizeof(ip_fw3_opheader).
+	 */
+	if ((error = sooptcopyin(sopt, sdata.kbuf, sdata.ksize,
+	    sizeof(ip_fw3_opheader))) != 0)
+		return (error);
+	op3 = (ip_fw3_opheader *)sdata.kbuf;
+
+	/* Finally, run handler */
+	error = h.handler(chain, op3, &sdata);
+	find_unref_sh(&h);
+
+	/* Flush state and free buffers */
+	if (error == 0)
+		error = ipfw_flush_sopt_data(&sdata);
+	else
+		ipfw_flush_sopt_data(&sdata);
+
+	if (locked != 0)
+		vsunlock(sdata.sopt_val, valsize);
+
+	/* Restore original pointer and set number of bytes written */
+	sopt->sopt_val = sdata.sopt_val;
+	sopt->sopt_valsize = sdata.ktotal;
+	if (sdata.kbuf != xbuf)
+		free(sdata.kbuf, M_TEMP);
+
+	return (error);
+}
+
+/**
+ * {set|get}sockopt parser.
+ */
+int
+ipfw_ctl(struct sockopt *sopt)
+{
+#define	RULE_MAXSIZE	(512*sizeof(u_int32_t))
+	int error;
+	size_t size, valsize;
+	struct ip_fw *buf;
+	struct ip_fw_rule0 *rule;
+	struct ip_fw_chain *chain;
+	u_int32_t rulenum[2];
+	uint32_t opt;
+	struct rule_check_info ci;
+	IPFW_RLOCK_TRACKER;
+
 	chain = &V_layer3_chain;
 	error = 0;
 
 	/* Save original valsize before it is altered via sooptcopyin() */
 	valsize = sopt->sopt_valsize;
-	if ((opt = sopt->sopt_name) == IP_FW3) {
-		/* 
-		 * Copy not less than sizeof(ip_fw3_opheader).
-		 * We hope any IP_FW3 command will fit into 128-byte buffer.
-		 */
-		if ((error = sooptcopyin(sopt, xbuf, sizeof(xbuf),
-			sizeof(ip_fw3_opheader))) != 0)
+	opt = sopt->sopt_name;
+
+	/*
+	 * Disallow modifications in really-really secure mode, but still allow
+	 * the logging counters to be reset.
+	 */
+	if (opt == IP_FW_ADD ||
+	    (sopt->sopt_dir == SOPT_SET && opt != IP_FW_RESETLOG)) {
+		error = securelevel_ge(sopt->sopt_td->td_ucred, 3);
+		if (error != 0)
 			return (error);
-		op3 = (ip_fw3_opheader *)xbuf;
-		opt = op3->opcode;
 	}
 
 	switch (opt) {
@@ -1006,9 +3756,7 @@ ipfw_ctl(struct sockopt *sopt)
 			size += ipfw_dyn_len();
 			if (size >= sopt->sopt_valsize)
 				break;
-			buf = malloc(size, M_TEMP, M_WAITOK);
-			if (buf == NULL)
-				break;
+			buf = malloc(size, M_TEMP, M_WAITOK | M_ZERO);
 			IPFW_UH_RLOCK(chain);
 			/* check again how much space we need */
 			want = chain->static_len + ipfw_dyn_len();
@@ -1033,6 +3781,8 @@ ipfw_ctl(struct sockopt *sopt)
 		error = sooptcopyin(sopt, rule, RULE_MAXSIZE,
 			sizeof(struct ip_fw7) );
 
+		memset(&ci, 0, sizeof(struct rule_check_info));
+
 		/*
 		 * If the size of commands equals RULESIZE7 then we assume
 		 * a FreeBSD7.2 binary is talking to us (set is7=1).
@@ -1042,25 +3792,30 @@ ipfw_ctl(struct sockopt *sopt)
 		 *       the first ipfw command is 'ipfw [pipe] list')
 		 *       the ipfw binary may crash or loop infinitly...
 		 */
-		if (sopt->sopt_valsize == RULESIZE7(rule)) {
+		size = sopt->sopt_valsize;
+		if (size == RULESIZE7(rule)) {
 		    is7 = 1;
 		    error = convert_rule_to_8(rule);
 		    if (error) {
 			free(rule, M_TEMP);
 			return error;
 		    }
-		    if (error == 0)
-			error = check_ipfw_struct(rule, RULESIZE(rule));
-		} else {
+		    size = RULESIZE(rule);
+		} else
 		    is7 = 0;
 		if (error == 0)
-			error = check_ipfw_struct(rule, sopt->sopt_valsize);
-		}
+			error = check_ipfw_rule0(rule, size, &ci);
 		if (error == 0) {
-			/* locking is done within ipfw_add_rule() */
-			error = ipfw_add_rule(chain, rule);
-			size = RULESIZE(rule);
-			if (!error && sopt->sopt_dir == SOPT_GET) {
+			/* locking is done within add_rule() */
+			struct ip_fw *krule;
+			krule = ipfw_alloc_rule(chain, RULEKSIZE0(rule));
+			ci.urule = (caddr_t)rule;
+			ci.krule = krule;
+			import_rule0(&ci);
+			error = commit_rules(chain, &ci, 1);
+			if (error != 0)
+				free_rule(ci.krule);
+			else if (sopt->sopt_dir == SOPT_GET) {
 				if (is7) {
 					error = convert_rule_to_7(rule);
 					size = RULESIZE7(rule);
@@ -1119,82 +3874,64 @@ ipfw_ctl(struct sockopt *sopt)
 			sopt->sopt_name == IP_FW_RESETLOG);
 		break;
 
-	/*--- TABLE manipulations are protected by the IPFW_LOCK ---*/
+	/*--- TABLE opcodes ---*/
 	case IP_FW_TABLE_ADD:
-		{
-			ipfw_table_entry ent;
-
-			error = sooptcopyin(sopt, &ent,
-			    sizeof(ent), sizeof(ent));
-			if (error)
-				break;
-			error = ipfw_add_table_entry(chain, ent.tbl,
-			    &ent.addr, sizeof(ent.addr), ent.masklen, 
-			    IPFW_TABLE_CIDR, ent.value);
-		}
-		break;
-
 	case IP_FW_TABLE_DEL:
 		{
 			ipfw_table_entry ent;
+			struct tentry_info tei;
+			struct tid_info ti;
+			struct table_value v;
 
 			error = sooptcopyin(sopt, &ent,
 			    sizeof(ent), sizeof(ent));
 			if (error)
 				break;
-			error = ipfw_del_table_entry(chain, ent.tbl,
-			    &ent.addr, sizeof(ent.addr), ent.masklen, IPFW_TABLE_CIDR);
-		}
-		break;
-
-	case IP_FW_TABLE_XADD: /* IP_FW3 */
-	case IP_FW_TABLE_XDEL: /* IP_FW3 */
-		{
-			ipfw_table_xentry *xent = (ipfw_table_xentry *)(op3 + 1);
-
-			/* Check minimum header size */
-			if (IP_FW3_OPLENGTH(sopt) < offsetof(ipfw_table_xentry, k)) {
-				error = EINVAL;
-				break;
-			}
 
-			/* Check if len field is valid */
-			if (xent->len > sizeof(ipfw_table_xentry)) {
-				error = EINVAL;
-				break;
-			}
-			
-			len = xent->len - offsetof(ipfw_table_xentry, k);
-
-			error = (opt == IP_FW_TABLE_XADD) ?
-				ipfw_add_table_entry(chain, xent->tbl, &xent->k, 
-					len, xent->masklen, xent->type, xent->value) :
-				ipfw_del_table_entry(chain, xent->tbl, &xent->k,
-					len, xent->masklen, xent->type);
+			memset(&tei, 0, sizeof(tei));
+			tei.paddr = &ent.addr;
+			tei.subtype = AF_INET;
+			tei.masklen = ent.masklen;
+			ipfw_import_table_value_legacy(ent.value, &v);
+			tei.pvalue = &v;
+			memset(&ti, 0, sizeof(ti));
+			ti.uidx = ent.tbl;
+			ti.type = IPFW_TABLE_CIDR;
+
+			error = (opt == IP_FW_TABLE_ADD) ?
+			    add_table_entry(chain, &ti, &tei, 0, 1) :
+			    del_table_entry(chain, &ti, &tei, 0, 1);
 		}
 		break;
 
+
 	case IP_FW_TABLE_FLUSH:
 		{
 			u_int16_t tbl;
+			struct tid_info ti;
 
 			error = sooptcopyin(sopt, &tbl,
 			    sizeof(tbl), sizeof(tbl));
 			if (error)
 				break;
-			error = ipfw_flush_table(chain, tbl);
+			memset(&ti, 0, sizeof(ti));
+			ti.uidx = tbl;
+			error = flush_table(chain, &ti);
 		}
 		break;
 
 	case IP_FW_TABLE_GETSIZE:
 		{
 			u_int32_t tbl, cnt;
+			struct tid_info ti;
 
 			if ((error = sooptcopyin(sopt, &tbl, sizeof(tbl),
 			    sizeof(tbl))))
 				break;
+			memset(&ti, 0, sizeof(ti));
+			ti.uidx = tbl;
 			IPFW_RLOCK(chain);
-			error = ipfw_count_table(chain, tbl, &cnt);
+			error = ipfw_count_table(chain, &ti, &cnt);
 			IPFW_RUNLOCK(chain);
 			if (error)
 				break;
@@ -1205,6 +3942,7 @@ ipfw_ctl(struct sockopt *sopt)
 	case IP_FW_TABLE_LIST:
 		{
 			ipfw_table *tbl;
+			struct tid_info ti;
 
 			if (sopt->sopt_valsize < sizeof(*tbl)) {
 				error = EINVAL;
@@ -1219,8 +3957,10 @@ ipfw_ctl(struct sockopt *sopt)
 			}
 			tbl->size = (size - sizeof(*tbl)) /
 			    sizeof(ipfw_table_entry);
+			memset(&ti, 0, sizeof(ti));
+			ti.uidx = tbl->tbl;
 			IPFW_RLOCK(chain);
-			error = ipfw_dump_table(chain, tbl);
+			error = ipfw_dump_table_legacy(chain, &ti, tbl);
 			IPFW_RUNLOCK(chain);
 			if (error) {
 				free(tbl, M_TEMP);
@@ -1231,62 +3971,6 @@ ipfw_ctl(struct sockopt *sopt)
 		}
 		break;
 
-	case IP_FW_TABLE_XGETSIZE: /* IP_FW3 */
-		{
-			uint32_t *tbl;
-
-			if (IP_FW3_OPLENGTH(sopt) < sizeof(uint32_t)) {
-				error = EINVAL;
-				break;
-			}
-
-			tbl = (uint32_t *)(op3 + 1);
-
-			IPFW_RLOCK(chain);
-			error = ipfw_count_xtable(chain, *tbl, tbl);
-			IPFW_RUNLOCK(chain);
-			if (error)
-				break;
-			error = sooptcopyout(sopt, op3, sopt->sopt_valsize);
-		}
-		break;
-
-	case IP_FW_TABLE_XLIST: /* IP_FW3 */
-		{
-			ipfw_xtable *tbl;
-
-			if ((size = valsize) < sizeof(ipfw_xtable)) {
-				error = EINVAL;
-				break;
-			}
-
-			tbl = malloc(size, M_TEMP, M_ZERO | M_WAITOK);
-			memcpy(tbl, op3, sizeof(ipfw_xtable));
-
-			/* Get maximum number of entries we can store */
-			tbl->size = (size - sizeof(ipfw_xtable)) /
-			    sizeof(ipfw_table_xentry);
-			IPFW_RLOCK(chain);
-			error = ipfw_dump_xtable(chain, tbl);
-			IPFW_RUNLOCK(chain);
-			if (error) {
-				free(tbl, M_TEMP);
-				break;
-			}
-
-			/* Revert size field back to bytes */
-			tbl->size = tbl->size * sizeof(ipfw_table_xentry) +
-				sizeof(ipfw_table);
-			/* 
-			 * Since we call sooptcopyin() with small buffer, sopt_valsize is
-			 * decreased to reflect supplied buffer size. Set it back to original value
-			 */
-			sopt->sopt_valsize = valsize;
-			error = sooptcopyout(sopt, tbl, size);
-			free(tbl, M_TEMP);
-		}
-		break;
-
 	/*--- NAT operations are protected by the IPFW_LOCK ---*/
 	case IP_FW_NAT_CFG:
 		if (IPFW_NAT_LOADED)
@@ -1336,18 +4020,16 @@ ipfw_ctl(struct sockopt *sopt)
 	return (error);
 #undef RULE_MAXSIZE
 }
-
-
 #define	RULE_MAXSIZE	(256*sizeof(u_int32_t))
 
 /* Functions to convert rules 7.2 <==> 8.0 */
-int
-convert_rule_to_7(struct ip_fw *rule)
+static int
+convert_rule_to_7(struct ip_fw_rule0 *rule)
 {
 	/* Used to modify original rule */
 	struct ip_fw7 *rule7 = (struct ip_fw7 *)rule;
 	/* copy of original rule, version 8 */
-	struct ip_fw *tmp;
+	struct ip_fw_rule0 *tmp;
 
 	/* Used to copy commands */
 	ipfw_insn *ccmd, *dst;
@@ -1360,13 +4042,12 @@ convert_rule_to_7(struct ip_fw *rule)
 	bcopy(rule, tmp, RULE_MAXSIZE);
 
 	/* Copy fields */
-	rule7->_pad = tmp->_pad;
+	//rule7->_pad = tmp->_pad;
 	rule7->set = tmp->set;
 	rule7->rulenum = tmp->rulenum;
 	rule7->cmd_len = tmp->cmd_len;
 	rule7->act_ofs = tmp->act_ofs;
 	rule7->next_rule = (struct ip_fw7 *)tmp->next_rule;
-	rule7->next = (struct ip_fw7 *)tmp->x_next;
 	rule7->cmd_len = tmp->cmd_len;
 	rule7->pcnt = tmp->pcnt;
 	rule7->bcnt = tmp->bcnt;
@@ -1396,8 +4077,8 @@ convert_rule_to_7(struct ip_fw *rule)
 	return 0;
 }
 
-int
-convert_rule_to_8(struct ip_fw *rule)
+static int
+convert_rule_to_8(struct ip_fw_rule0 *rule)
 {
 	/* Used to modify original rule */
 	struct ip_fw7 *rule7 = (struct ip_fw7 *) rule;
@@ -1439,7 +4120,6 @@ convert_rule_to_8(struct ip_fw *rule)
 	rule->cmd_len = tmp->cmd_len;
 	rule->act_ofs = tmp->act_ofs;
 	rule->next_rule = (struct ip_fw *)tmp->next_rule;
-	rule->x_next = (struct ip_fw *)tmp->next;
 	rule->cmd_len = tmp->cmd_len;
 	rule->id = 0; /* XXX see if is ok = 0 */
 	rule->pcnt = tmp->pcnt;
@@ -1450,4 +4130,486 @@ convert_rule_to_8(struct ip_fw *rule)
 	return 0;
 }
 
+/*
+ * Named object api
+ *
+ */
+
+void
+ipfw_init_srv(struct ip_fw_chain *ch)
+{
+
+	ch->srvmap = ipfw_objhash_create(IPFW_OBJECTS_DEFAULT);
+	ch->srvstate = malloc(sizeof(void *) * IPFW_OBJECTS_DEFAULT,
+	    M_IPFW, M_WAITOK | M_ZERO);
+}
+
+void
+ipfw_destroy_srv(struct ip_fw_chain *ch)
+{
+
+	free(ch->srvstate, M_IPFW);
+	ipfw_objhash_destroy(ch->srvmap);
+}
+
+/*
+ * Allocate new bitmask which can be used to enlarge/shrink
+ * named instance index.
+ */
+void
+ipfw_objhash_bitmap_alloc(uint32_t items, void **idx, int *pblocks)
+{
+	size_t size;
+	int max_blocks;
+	u_long *idx_mask;
+
+	KASSERT((items % BLOCK_ITEMS) == 0,
+	   ("bitmask size needs to power of 2 and greater or equal to %zu",
+	    BLOCK_ITEMS));
+
+	max_blocks = items / BLOCK_ITEMS;
+	size = items / 8;
+	idx_mask = malloc(size * IPFW_MAX_SETS, M_IPFW, M_WAITOK);
+	/* Mark all as free */
+	memset(idx_mask, 0xFF, size * IPFW_MAX_SETS);
+	*idx_mask &= ~(u_long)1; /* Skip index 0 */
+
+	*idx = idx_mask;
+	*pblocks = max_blocks;
+}
+
+/*
+ * Copy current bitmask index to new one.
+ */
+void
+ipfw_objhash_bitmap_merge(struct namedobj_instance *ni, void **idx, int *blocks)
+{
+	int old_blocks, new_blocks;
+	u_long *old_idx, *new_idx;
+	int i;
+
+	old_idx = ni->idx_mask;
+	old_blocks = ni->max_blocks;
+	new_idx = *idx;
+	new_blocks = *blocks;
+
+	for (i = 0; i < IPFW_MAX_SETS; i++) {
+		memcpy(&new_idx[new_blocks * i], &old_idx[old_blocks * i],
+		    old_blocks * sizeof(u_long));
+	}
+}
+
+/*
+ * Swaps current @ni index with new one.
+ */
+void
+ipfw_objhash_bitmap_swap(struct namedobj_instance *ni, void **idx, int *blocks)
+{
+	int old_blocks;
+	u_long *old_idx;
+
+	old_idx = ni->idx_mask;
+	old_blocks = ni->max_blocks;
+
+	ni->idx_mask = *idx;
+	ni->max_blocks = *blocks;
+
+	/* Save old values */
+	*idx = old_idx;
+	*blocks = old_blocks;
+}
+
+void
+ipfw_objhash_bitmap_free(void *idx, int blocks)
+{
+
+	free(idx, M_IPFW);
+}
+
+/*
+ * Creates named hash instance.
+ * Must be called without holding any locks.
+ * Return pointer to new instance.
+ */
+struct namedobj_instance *
+ipfw_objhash_create(uint32_t items)
+{
+	struct namedobj_instance *ni;
+	int i;
+	size_t size;
+
+	size = sizeof(struct namedobj_instance) +
+	    sizeof(struct namedobjects_head) * NAMEDOBJ_HASH_SIZE +
+	    sizeof(struct namedobjects_head) * NAMEDOBJ_HASH_SIZE;
+
+	ni = malloc(size, M_IPFW, M_WAITOK | M_ZERO);
+	ni->nn_size = NAMEDOBJ_HASH_SIZE;
+	ni->nv_size = NAMEDOBJ_HASH_SIZE;
+
+	ni->names = (struct namedobjects_head *)(ni +1);
+	ni->values = &ni->names[ni->nn_size];
+
+	for (i = 0; i < ni->nn_size; i++)
+		TAILQ_INIT(&ni->names[i]);
+
+	for (i = 0; i < ni->nv_size; i++)
+		TAILQ_INIT(&ni->values[i]);
+
+	/* Set default hashing/comparison functions */
+	ni->hash_f = objhash_hash_name;
+	ni->cmp_f = objhash_cmp_name;
+
+	/* Allocate bitmask separately due to possible resize */
+	ipfw_objhash_bitmap_alloc(items, (void*)&ni->idx_mask, &ni->max_blocks);
+
+	return (ni);
+}
+
+void
+ipfw_objhash_destroy(struct namedobj_instance *ni)
+{
+
+	free(ni->idx_mask, M_IPFW);
+	free(ni, M_IPFW);
+}
+
+void
+ipfw_objhash_set_funcs(struct namedobj_instance *ni, objhash_hash_f *hash_f,
+    objhash_cmp_f *cmp_f)
+{
+
+	ni->hash_f = hash_f;
+	ni->cmp_f = cmp_f;
+}
+
+static uint32_t
+objhash_hash_name(struct namedobj_instance *ni, const void *name, uint32_t set)
+{
+
+	return (fnv_32_str((const char *)name, FNV1_32_INIT));
+}
+
+static int
+objhash_cmp_name(struct named_object *no, const void *name, uint32_t set)
+{
+
+	if ((strcmp(no->name, (const char *)name) == 0) && (no->set == set))
+		return (0);
+
+	return (1);
+}
+
+static uint32_t
+objhash_hash_idx(struct namedobj_instance *ni, uint32_t val)
+{
+	uint32_t v;
+
+	v = val % (ni->nv_size - 1);
+
+	return (v);
+}
+
+struct named_object *
+ipfw_objhash_lookup_name(struct namedobj_instance *ni, uint32_t set, char *name)
+{
+	struct named_object *no;
+	uint32_t hash;
+
+	hash = ni->hash_f(ni, name, set) % ni->nn_size;
+	
+	TAILQ_FOREACH(no, &ni->names[hash], nn_next) {
+		if (ni->cmp_f(no, name, set) == 0)
+			return (no);
+	}
+
+	return (NULL);
+}
+
+/*
+ * Find named object by @uid.
+ * Check @tlvs for valid data inside.
+ *
+ * Returns pointer to found TLV or NULL.
+ */
+ipfw_obj_ntlv *
+ipfw_find_name_tlv_type(void *tlvs, int len, uint16_t uidx, uint32_t etlv)
+{
+	ipfw_obj_ntlv *ntlv;
+	uintptr_t pa, pe;
+	int l;
+
+	pa = (uintptr_t)tlvs;
+	pe = pa + len;
+	l = 0;
+	for (; pa < pe; pa += l) {
+		ntlv = (ipfw_obj_ntlv *)pa;
+		l = ntlv->head.length;
+
+		if (l != sizeof(*ntlv))
+			return (NULL);
+
+		if (ntlv->idx != uidx)
+			continue;
+		/*
+		 * When userland has specified zero TLV type, do
+		 * not compare it with eltv. In some cases userland
+		 * doesn't know what type should it have. Use only
+		 * uidx and name for search named_object.
+		 */
+		if (ntlv->head.type != 0 &&
+		    ntlv->head.type != (uint16_t)etlv)
+			continue;
+
+		if (ipfw_check_object_name_generic(ntlv->name) != 0)
+			return (NULL);
+
+		return (ntlv);
+	}
+
+	return (NULL);
+}
+
+/*
+ * Finds object config based on either legacy index
+ * or name in ntlv.
+ * Note @ti structure contains unchecked data from userland.
+ *
+ * Returns 0 in success and fills in @pno with found config
+ */
+int
+ipfw_objhash_find_type(struct namedobj_instance *ni, struct tid_info *ti,
+    uint32_t etlv, struct named_object **pno)
+{
+	char *name;
+	ipfw_obj_ntlv *ntlv;
+	uint32_t set;
+
+	if (ti->tlvs == NULL)
+		return (EINVAL);
+
+	ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx, etlv);
+	if (ntlv == NULL)
+		return (EINVAL);
+	name = ntlv->name;
+
+	/*
+	 * Use set provided by @ti instead of @ntlv one.
+	 * This is needed due to different sets behavior
+	 * controlled by V_fw_tables_sets.
+	 */
+	set = ti->set;
+	*pno = ipfw_objhash_lookup_name(ni, set, name);
+	if (*pno == NULL)
+		return (ESRCH);
+	return (0);
+}
+
+/*
+ * Find named object by name, considering also its TLV type.
+ */
+struct named_object *
+ipfw_objhash_lookup_name_type(struct namedobj_instance *ni, uint32_t set,
+    uint32_t type, const char *name)
+{
+	struct named_object *no;
+	uint32_t hash;
+
+	hash = ni->hash_f(ni, name, set) % ni->nn_size;
+
+	TAILQ_FOREACH(no, &ni->names[hash], nn_next) {
+		if (ni->cmp_f(no, name, set) == 0 &&
+		    no->etlv == (uint16_t)type)
+			return (no);
+	}
+
+	return (NULL);
+}
+
+struct named_object *
+ipfw_objhash_lookup_kidx(struct namedobj_instance *ni, uint16_t kidx)
+{
+	struct named_object *no;
+	uint32_t hash;
+
+	hash = objhash_hash_idx(ni, kidx);
+	
+	TAILQ_FOREACH(no, &ni->values[hash], nv_next) {
+		if (no->kidx == kidx)
+			return (no);
+	}
+
+	return (NULL);
+}
+
+int
+ipfw_objhash_same_name(struct namedobj_instance *ni, struct named_object *a,
+    struct named_object *b)
+{
+
+	if ((strcmp(a->name, b->name) == 0) && a->set == b->set)
+		return (1);
+
+	return (0);
+}
+
+void
+ipfw_objhash_add(struct namedobj_instance *ni, struct named_object *no)
+{
+	uint32_t hash;
+
+	hash = ni->hash_f(ni, no->name, no->set) % ni->nn_size;
+	TAILQ_INSERT_HEAD(&ni->names[hash], no, nn_next);
+
+	hash = objhash_hash_idx(ni, no->kidx);
+	TAILQ_INSERT_HEAD(&ni->values[hash], no, nv_next);
+
+	ni->count++;
+}
+
+void
+ipfw_objhash_del(struct namedobj_instance *ni, struct named_object *no)
+{
+	uint32_t hash;
+
+	hash = ni->hash_f(ni, no->name, no->set) % ni->nn_size;
+	TAILQ_REMOVE(&ni->names[hash], no, nn_next);
+
+	hash = objhash_hash_idx(ni, no->kidx);
+	TAILQ_REMOVE(&ni->values[hash], no, nv_next);
+
+	ni->count--;
+}
+
+uint32_t
+ipfw_objhash_count(struct namedobj_instance *ni)
+{
+
+	return (ni->count);
+}
+
+uint32_t
+ipfw_objhash_count_type(struct namedobj_instance *ni, uint16_t type)
+{
+	struct named_object *no;
+	uint32_t count;
+	int i;
+
+	count = 0;
+	for (i = 0; i < ni->nn_size; i++) {
+		TAILQ_FOREACH(no, &ni->names[i], nn_next) {
+			if (no->etlv == type)
+				count++;
+		}
+	}
+	return (count);
+}
+
+/*
+ * Runs @func for each found named object.
+ * It is safe to delete objects from callback
+ */
+int
+ipfw_objhash_foreach(struct namedobj_instance *ni, objhash_cb_t *f, void *arg)
+{
+	struct named_object *no, *no_tmp;
+	int i, ret;
+
+	for (i = 0; i < ni->nn_size; i++) {
+		TAILQ_FOREACH_SAFE(no, &ni->names[i], nn_next, no_tmp) {
+			ret = f(ni, no, arg);
+			if (ret != 0)
+				return (ret);
+		}
+	}
+	return (0);
+}
+
+/*
+ * Runs @f for each found named object with type @type.
+ * It is safe to delete objects from callback
+ */
+int
+ipfw_objhash_foreach_type(struct namedobj_instance *ni, objhash_cb_t *f,
+    void *arg, uint16_t type)
+{
+	struct named_object *no, *no_tmp;
+	int i, ret;
+
+	for (i = 0; i < ni->nn_size; i++) {
+		TAILQ_FOREACH_SAFE(no, &ni->names[i], nn_next, no_tmp) {
+			if (no->etlv != type)
+				continue;
+			ret = f(ni, no, arg);
+			if (ret != 0)
+				return (ret);
+		}
+	}
+	return (0);
+}
+
+/*
+ * Removes index from given set.
+ * Returns 0 on success.
+ */
+int
+ipfw_objhash_free_idx(struct namedobj_instance *ni, uint16_t idx)
+{
+	u_long *mask;
+	int i, v;
+
+	i = idx / BLOCK_ITEMS;
+	v = idx % BLOCK_ITEMS;
+
+	if (i >= ni->max_blocks)
+		return (1);
+
+	mask = &ni->idx_mask[i];
+
+	if ((*mask & ((u_long)1 << v)) != 0)
+		return (1);
+
+	/* Mark as free */
+	*mask |= (u_long)1 << v;
+
+	/* Update free offset */
+	if (ni->free_off[0] > i)
+		ni->free_off[0] = i;
+	
+	return (0);
+}
+
+/*
+ * Allocate new index in given instance and stores in in @pidx.
+ * Returns 0 on success.
+ */
+int
+ipfw_objhash_alloc_idx(void *n, uint16_t *pidx)
+{
+	struct namedobj_instance *ni;
+	u_long *mask;
+	int i, off, v;
+
+	ni = (struct namedobj_instance *)n;
+
+	off = ni->free_off[0];
+	mask = &ni->idx_mask[off];
+
+	for (i = off; i < ni->max_blocks; i++, mask++) {
+		if ((v = ffsl(*mask)) == 0)
+			continue;
+
+		/* Mark as busy */
+		*mask &= ~ ((u_long)1 << (v - 1));
+
+		ni->free_off[0] = i;
+		
+		v = BLOCK_ITEMS * i + v - 1;
+
+		*pidx = v;
+		return (0);
+	}
+
+	return (1);
+}
+
 /* end of file */
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_table.c b/freebsd/sys/netpfil/ipfw/ip_fw_table.c
index 71579795..9d2baad2 100644
--- a/freebsd/sys/netpfil/ipfw/ip_fw_table.c
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_table.c
@@ -2,6 +2,8 @@
 
 /*-
  * Copyright (c) 2004 Ruslan Ermilov and Vsevolod Lobko.
+ * Copyright (c) 2014 Yandex LLC
+ * Copyright (c) 2014 Alexander V. Chernikov
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -29,24 +31,18 @@
 __FBSDID("$FreeBSD$");
 
 /*
- * Lookup table support for ipfw
+ * Lookup table support for ipfw.
  *
- * Lookup tables are implemented (at the moment) using the radix
- * tree used for routing tables. Tables store key-value entries, where
- * keys are network prefixes (addr/masklen), and values are integers.
- * As a degenerate case we can interpret keys as 32-bit integers
- * (with a /32 mask).
+ * This file contains handlers for all generic tables' operations:
+ * add/del/flush entries, list/dump tables etc..
  *
- * The table is protected by the IPFW lock even for manipulation coming
- * from userland, because operations are typically fast.
+ * Table data modification is protected by both UH and runtime lock
+ * while reading configuration/data is protected by UH lock.
+ *
+ * Lookup algorithms for all table types are located in ip_fw_table_algo.c
  */
 
 #include <rtems/bsd/local/opt_ipfw.h>
-#include <rtems/bsd/local/opt_inet.h>
-#ifndef INET
-#error IPFIREWALL requires INET.
-#endif /* INET */
-#include <rtems/bsd/local/opt_inet6.h>
 
 #include <rtems/bsd/sys/param.h>
 #include <sys/systm.h>
@@ -54,713 +50,3296 @@ __FBSDID("$FreeBSD$");
 #include <sys/kernel.h>
 #include <rtems/bsd/sys/lock.h>
 #include <sys/rwlock.h>
+#include <sys/rmlock.h>
 #include <sys/socket.h>
+#include <sys/socketvar.h>
 #include <sys/queue.h>
 #include <net/if.h>	/* ip_fw.h requires IFNAMSIZ */
-#include <net/radix.h>
-#include <net/route.h>
-#include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/ip_var.h>	/* struct ipfw_rule_ref */
 #include <netinet/ip_fw.h>
 
 #include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/ip_fw_table.h>
 
-#ifdef MAC
-#include <security/mac/mac_framework.h>
-#endif
+ /*
+ * Table has the following `type` concepts:
+ *
+ * `no.type` represents lookup key type (addr, ifp, uid, etc..)
+ * vmask represents bitmask of table values which are present at the moment.
+ * Special IPFW_VTYPE_LEGACY ( (uint32_t)-1 ) represents old
+ * single-value-for-all approach.
+ */
+struct table_config {
+	struct named_object	no;
+	uint8_t		tflags;		/* type flags */
+	uint8_t		locked;		/* 1 if locked from changes */
+	uint8_t		linked;		/* 1 if already linked */
+	uint8_t		ochanged;	/* used by set swapping */
+	uint8_t		vshared;	/* 1 if using shared value array */
+	uint8_t		spare[3];
+	uint32_t	count;		/* Number of records */
+	uint32_t	limit;		/* Max number of records */
+	uint32_t	vmask;		/* bitmask with supported values */
+	uint32_t	ocount;		/* used by set swapping */
+	uint64_t	gencnt;		/* generation count */
+	char		tablename[64];	/* table name */
+	struct table_algo	*ta;	/* Callbacks for given algo */
+	void		*astate;	/* algorithm state */
+	struct table_info	ti_copy;	/* data to put to table_info */
+	struct namedobj_instance	*vi;
+};
 
-MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables");
+static int find_table_err(struct namedobj_instance *ni, struct tid_info *ti,
+    struct table_config **tc);
+static struct table_config *find_table(struct namedobj_instance *ni,
+    struct tid_info *ti);
+static struct table_config *alloc_table_config(struct ip_fw_chain *ch,
+    struct tid_info *ti, struct table_algo *ta, char *adata, uint8_t tflags);
+static void free_table_config(struct namedobj_instance *ni,
+    struct table_config *tc);
+static int create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti,
+    char *aname, ipfw_xtable_info *i, uint16_t *pkidx, int ref);
+static void link_table(struct ip_fw_chain *ch, struct table_config *tc);
+static void unlink_table(struct ip_fw_chain *ch, struct table_config *tc);
+static int find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti,
+    struct tentry_info *tei, uint32_t count, int op, struct table_config **ptc);
+#define	OP_ADD	1
+#define	OP_DEL	0
+static int export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh,
+    struct sockopt_data *sd);
+static void export_table_info(struct ip_fw_chain *ch, struct table_config *tc,
+    ipfw_xtable_info *i);
+static int dump_table_tentry(void *e, void *arg);
+static int dump_table_xentry(void *e, void *arg);
+
+static int swap_tables(struct ip_fw_chain *ch, struct tid_info *a,
+    struct tid_info *b);
+
+static int check_table_name(const char *name);
+static int check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts,
+    struct table_config *tc, struct table_info *ti, uint32_t count);
+static int destroy_table(struct ip_fw_chain *ch, struct tid_info *ti);
+
+static struct table_algo *find_table_algo(struct tables_config *tableconf,
+    struct tid_info *ti, char *name);
+
+static void objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti);
+static void ntlv_to_ti(struct _ipfw_obj_ntlv *ntlv, struct tid_info *ti);
+
+#define	CHAIN_TO_NI(chain)	(CHAIN_TO_TCFG(chain)->namehash)
+#define	KIDX_TO_TI(ch, k)	(&(((struct table_info *)(ch)->tablestate)[k]))
+
+#define	TA_BUF_SZ	128	/* On-stack buffer for add/delete state */
 
-struct table_entry {
-	struct radix_node	rn[2];
-	struct sockaddr_in	addr, mask;
-	u_int32_t		value;
-};
+void
+rollback_toperation_state(struct ip_fw_chain *ch, void *object)
+{
+	struct tables_config *tcfg;
+	struct op_state *os;
 
-struct xaddr_iface {
-	uint8_t		if_len;		/* length of this struct */
-	uint8_t		pad[7];		/* Align name */
-	char 		ifname[IF_NAMESIZE];	/* Interface name */
-};
+	tcfg = CHAIN_TO_TCFG(ch);
+	TAILQ_FOREACH(os, &tcfg->state_list, next)
+		os->func(object, os);
+}
+
+void
+add_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts)
+{
+	struct tables_config *tcfg;
+
+	tcfg = CHAIN_TO_TCFG(ch);
+	TAILQ_INSERT_HEAD(&tcfg->state_list, &ts->opstate, next);
+}
+
+void
+del_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts)
+{
+	struct tables_config *tcfg;
+
+	tcfg = CHAIN_TO_TCFG(ch);
+	TAILQ_REMOVE(&tcfg->state_list, &ts->opstate, next);
+}
+
+void
+tc_ref(struct table_config *tc)
+{
+
+	tc->no.refcnt++;
+}
+
+void
+tc_unref(struct table_config *tc)
+{
+
+	tc->no.refcnt--;
+}
+
+static struct table_value *
+get_table_value(struct ip_fw_chain *ch, struct table_config *tc, uint32_t kidx)
+{
+	struct table_value *pval;
+
+	pval = (struct table_value *)ch->valuestate;
+
+	return (&pval[kidx]);
+}
 
-struct table_xentry {
-	struct radix_node	rn[2];
-	union {
-#ifdef INET6
-		struct sockaddr_in6	addr6;
-#endif
-		struct xaddr_iface	iface;
-	} a;
-	union {
-#ifdef INET6
-		struct sockaddr_in6	mask6;
-#endif
-		struct xaddr_iface	ifmask;
-	} m;
-	u_int32_t		value;
-};
 
 /*
- * The radix code expects addr and mask to be array of bytes,
- * with the first byte being the length of the array. rn_inithead
- * is called with the offset in bits of the lookup key within the
- * array. If we use a sockaddr_in as the underlying type,
- * sin_len is conveniently located at offset 0, sin_addr is at
- * offset 4 and normally aligned.
- * But for portability, let's avoid assumption and make the code explicit
+ * Checks if we're able to insert/update entry @tei into table
+ * w.r.t @tc limits.
+ * May alter @tei to indicate insertion error / insert
+ * options.
+ *
+ * Returns 0 if operation can be performed/
  */
-#define KEY_LEN(v)	*((uint8_t *)&(v))
-#define KEY_OFS		(8*offsetof(struct sockaddr_in, sin_addr))
+static int
+check_table_limit(struct table_config *tc, struct tentry_info *tei)
+{
+
+	if (tc->limit == 0 || tc->count < tc->limit)
+		return (0);
+
+	if ((tei->flags & TEI_FLAGS_UPDATE) == 0) {
+		/* Notify userland on error cause */
+		tei->flags |= TEI_FLAGS_LIMIT;
+		return (EFBIG);
+	}
+
+	/*
+	 * We have UPDATE flag set.
+	 * Permit updating record (if found),
+	 * but restrict adding new one since we've
+	 * already hit the limit.
+	 */
+	tei->flags |= TEI_FLAGS_DONTADD;
+
+	return (0);
+}
+
 /*
- * Do not require radix to compare more than actual IPv4/IPv6 address
+ * Convert algorithm callback return code into
+ * one of pre-defined states known by userland.
  */
-#define KEY_LEN_INET	(offsetof(struct sockaddr_in, sin_addr) + sizeof(in_addr_t))
-#define KEY_LEN_INET6	(offsetof(struct sockaddr_in6, sin6_addr) + sizeof(struct in6_addr))
-#define KEY_LEN_IFACE	(offsetof(struct xaddr_iface, ifname))
+static void
+store_tei_result(struct tentry_info *tei, int op, int error, uint32_t num)
+{
+	int flag;
 
-#define OFF_LEN_INET	(8 * offsetof(struct sockaddr_in, sin_addr))
-#define OFF_LEN_INET6	(8 * offsetof(struct sockaddr_in6, sin6_addr))
-#define OFF_LEN_IFACE	(8 * offsetof(struct xaddr_iface, ifname))
+	flag = 0;
 
+	switch (error) {
+	case 0:
+		if (op == OP_ADD && num != 0)
+			flag = TEI_FLAGS_ADDED;
+		if (op == OP_DEL)
+			flag = TEI_FLAGS_DELETED;
+		break;
+	case ENOENT:
+		flag = TEI_FLAGS_NOTFOUND;
+		break;
+	case EEXIST:
+		flag = TEI_FLAGS_EXISTS;
+		break;
+	default:
+		flag = TEI_FLAGS_ERROR;
+	}
 
-#ifdef INET6
-static inline void
-ipv6_writemask(struct in6_addr *addr6, uint8_t mask)
+	tei->flags |= flag;
+}
+
+/*
+ * Creates and references table with default parameters.
+ * Saves table config, algo and allocated kidx info @ptc, @pta and
+ * @pkidx if non-zero.
+ * Used for table auto-creation to support old binaries.
+ *
+ * Returns 0 on success.
+ */
+static int
+create_table_compat(struct ip_fw_chain *ch, struct tid_info *ti,
+    uint16_t *pkidx)
 {
-	uint32_t *cp;
+	ipfw_xtable_info xi;
+	int error;
+
+	memset(&xi, 0, sizeof(xi));
+	/* Set default value mask for legacy clients */
+	xi.vmask = IPFW_VTYPE_LEGACY;
+
+	error = create_table_internal(ch, ti, NULL, &xi, pkidx, 1);
+	if (error != 0)
+		return (error);
 
-	for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32)
-		*cp++ = 0xFFFFFFFF;
-	*cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0);
+	return (0);
+}
+
+/*
+ * Find and reference existing table optionally
+ * creating new one.
+ *
+ * Saves found table config into @ptc.
+ * Note function may drop/acquire UH_WLOCK.
+ * Returns 0 if table was found/created and referenced
+ * or non-zero return code.
+ */
+static int
+find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti,
+    struct tentry_info *tei, uint32_t count, int op,
+    struct table_config **ptc)
+{
+	struct namedobj_instance *ni;
+	struct table_config *tc;
+	uint16_t kidx;
+	int error;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+
+	ni = CHAIN_TO_NI(ch);
+	tc = NULL;
+	if ((tc = find_table(ni, ti)) != NULL) {
+		/* check table type */
+		if (tc->no.subtype != ti->type)
+			return (EINVAL);
+
+		if (tc->locked != 0)
+			return (EACCES);
+
+		/* Try to exit early on limit hit */
+		if (op == OP_ADD && count == 1 &&
+		    check_table_limit(tc, tei) != 0)
+			return (EFBIG);
+
+		/* Reference and return */
+		tc->no.refcnt++;
+		*ptc = tc;
+		return (0);
+	}
+
+	if (op == OP_DEL)
+		return (ESRCH);
+
+	/* Compatibility mode: create new table for old clients */
+	if ((tei->flags & TEI_FLAGS_COMPAT) == 0)
+		return (ESRCH);
+
+	IPFW_UH_WUNLOCK(ch);
+	error = create_table_compat(ch, ti, &kidx);
+	IPFW_UH_WLOCK(ch);
+	
+	if (error != 0)
+		return (error);
+
+	tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx);
+	KASSERT(tc != NULL, ("create_table_compat returned bad idx %d", kidx));
+
+	/* OK, now we've got referenced table. */
+	*ptc = tc;
+	return (0);
+}
+
+/*
+ * Rolls back already @added to @tc entries using state array @ta_buf_m.
+ * Assume the following layout:
+ * 1) ADD state (ta_buf_m[0] ... t_buf_m[added - 1]) for handling update cases
+ * 2) DEL state (ta_buf_m[count[ ... t_buf_m[count + added - 1])
+ *   for storing deleted state
+ */
+static void
+rollback_added_entries(struct ip_fw_chain *ch, struct table_config *tc,
+    struct table_info *tinfo, struct tentry_info *tei, caddr_t ta_buf_m,
+    uint32_t count, uint32_t added)
+{
+	struct table_algo *ta;
+	struct tentry_info *ptei;
+	caddr_t v, vv;
+	size_t ta_buf_sz;
+	int error, i;
+	uint32_t num;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+
+	ta = tc->ta;
+	ta_buf_sz = ta->ta_buf_size;
+	v = ta_buf_m;
+	vv = v + count * ta_buf_sz;
+	for (i = 0; i < added; i++, v += ta_buf_sz, vv += ta_buf_sz) {
+		ptei = &tei[i];
+		if ((ptei->flags & TEI_FLAGS_UPDATED) != 0) {
+
+			/*
+			 * We have old value stored by previous
+			 * call in @ptei->value. Do add once again
+			 * to restore it.
+			 */
+			error = ta->add(tc->astate, tinfo, ptei, v, &num);
+			KASSERT(error == 0, ("rollback UPDATE fail"));
+			KASSERT(num == 0, ("rollback UPDATE fail2"));
+			continue;
+		}
+
+		error = ta->prepare_del(ch, ptei, vv);
+		KASSERT(error == 0, ("pre-rollback INSERT failed"));
+		error = ta->del(tc->astate, tinfo, ptei, vv, &num);
+		KASSERT(error == 0, ("rollback INSERT failed"));
+		tc->count -= num;
+	}
+}
+
+/*
+ * Prepares add/del state for all @count entries in @tei.
+ * Uses either stack buffer (@ta_buf) or allocates a new one.
+ * Stores pointer to allocated buffer back to @ta_buf.
+ *
+ * Returns 0 on success.
+ */
+static int
+prepare_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta,
+    struct tentry_info *tei, uint32_t count, int op, caddr_t *ta_buf)
+{
+	caddr_t ta_buf_m, v;
+	size_t ta_buf_sz, sz;
+	struct tentry_info *ptei;
+	int error, i;
+
+	error = 0;
+	ta_buf_sz = ta->ta_buf_size;
+	if (count == 1) {
+		/* Sigle add/delete, use on-stack buffer */
+		memset(*ta_buf, 0, TA_BUF_SZ);
+		ta_buf_m = *ta_buf;
+	} else {
+
+		/*
+		 * Multiple adds/deletes, allocate larger buffer
+		 *
+		 * Note we need 2xcount buffer for add case:
+		 * we have hold both ADD state
+		 * and DELETE state (this may be needed
+		 * if we need to rollback all changes)
+		 */
+		sz = count * ta_buf_sz;
+		ta_buf_m = malloc((op == OP_ADD) ? sz * 2 : sz, M_TEMP,
+		    M_WAITOK | M_ZERO);
+	}
+
+	v = ta_buf_m;
+	for (i = 0; i < count; i++, v += ta_buf_sz) {
+		ptei = &tei[i];
+		error = (op == OP_ADD) ?
+		    ta->prepare_add(ch, ptei, v) : ta->prepare_del(ch, ptei, v);
+
+		/*
+		 * Some syntax error (incorrect mask, or address, or
+		 * anything). Return error regardless of atomicity
+		 * settings.
+		 */
+		if (error != 0)
+			break;
+	}
+
+	*ta_buf = ta_buf_m;
+	return (error);
 }
-#endif
 
+/*
+ * Flushes allocated state for each @count entries in @tei.
+ * Frees @ta_buf_m if differs from stack buffer @ta_buf.
+ */
+static void
+flush_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta,
+    struct tentry_info *tei, uint32_t count, int rollback,
+    caddr_t ta_buf_m, caddr_t ta_buf)
+{
+	caddr_t v;
+	struct tentry_info *ptei;
+	size_t ta_buf_sz;
+	int i;
+
+	ta_buf_sz = ta->ta_buf_size;
+
+	/* Run cleaning callback anyway */
+	v = ta_buf_m;
+	for (i = 0; i < count; i++, v += ta_buf_sz) {
+		ptei = &tei[i];
+		ta->flush_entry(ch, ptei, v);
+		if (ptei->ptv != NULL) {
+			free(ptei->ptv, M_IPFW);
+			ptei->ptv = NULL;
+		}
+	}
+
+	/* Clean up "deleted" state in case of rollback */
+	if (rollback != 0) {
+		v = ta_buf_m + count * ta_buf_sz;
+		for (i = 0; i < count; i++, v += ta_buf_sz)
+			ta->flush_entry(ch, &tei[i], v);
+	}
+
+	if (ta_buf_m != ta_buf)
+		free(ta_buf_m, M_TEMP);
+}
+
+
+static void
+rollback_add_entry(void *object, struct op_state *_state)
+{
+	struct ip_fw_chain *ch;
+	struct tableop_state *ts;
+
+	ts = (struct tableop_state *)_state;
+
+	if (ts->tc != object && ts->ch != object)
+		return;
+
+	ch = ts->ch;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+
+	/* Call specifid unlockers */
+	rollback_table_values(ts);
+
+	/* Indicate we've called */
+	ts->modified = 1;
+}
+
+/*
+ * Adds/updates one or more entries in table @ti.
+ *
+ * Function may drop/reacquire UH wlock multiple times due to
+ * items alloc, algorithm callbacks (check_space), value linkage
+ * (new values, value storage realloc), etc..
+ * Other processes like other adds (which may involve storage resize),
+ * table swaps (which changes table data and may change algo type),
+ * table modify (which may change value mask) may be executed
+ * simultaneously so we need to deal with it.
+ *
+ * The following approach was implemented:
+ * we have per-chain linked list, protected with UH lock.
+ * add_table_entry prepares special on-stack structure wthich is passed
+ * to its descendants. Users add this structure to this list before unlock.
+ * After performing needed operations and acquiring UH lock back, each user
+ * checks if structure has changed. If true, it rolls local state back and
+ * returns without error to the caller.
+ * add_table_entry() on its own checks if structure has changed and restarts
+ * its operation from the beginning (goto restart).
+ *
+ * Functions which are modifying fields of interest (currently
+ *   resize_shared_value_storage() and swap_tables() )
+ * traverses given list while holding UH lock immediately before
+ * performing their operations calling function provided be list entry
+ * ( currently rollback_add_entry  ) which performs rollback for all necessary
+ * state and sets appropriate values in structure indicating rollback
+ * has happened.
+ *
+ * Algo interaction:
+ * Function references @ti first to ensure table won't
+ * disappear or change its type.
+ * After that, prepare_add callback is called for each @tei entry.
+ * Next, we try to add each entry under UH+WHLOCK
+ * using add() callback.
+ * Finally, we free all state by calling flush_entry callback
+ * for each @tei.
+ *
+ * Returns 0 on success.
+ */
 int
-ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
-    uint8_t plen, uint8_t mlen, uint8_t type, uint32_t value)
-{
-	struct radix_node_head *rnh, **rnh_ptr;
-	struct table_entry *ent;
-	struct table_xentry *xent;
-	struct radix_node *rn;
-	in_addr_t addr;
-	int offset;
-	void *ent_ptr;
-	struct sockaddr *addr_ptr, *mask_ptr;
-	char c;
-
-	if (tbl >= V_fw_tables_max)
-		return (EINVAL);
+add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti,
+    struct tentry_info *tei, uint8_t flags, uint32_t count)
+{
+	struct table_config *tc;
+	struct table_algo *ta;
+	uint16_t kidx;
+	int error, first_error, i, rollback;
+	uint32_t num, numadd;
+	struct tentry_info *ptei;
+	struct tableop_state ts;
+	char ta_buf[TA_BUF_SZ];
+	caddr_t ta_buf_m, v;
+
+	memset(&ts, 0, sizeof(ts));
+	ta = NULL;
+	IPFW_UH_WLOCK(ch);
 
-	switch (type) {
-	case IPFW_TABLE_CIDR:
-		if (plen == sizeof(in_addr_t)) {
-#ifdef INET
-			/* IPv4 case */
-			if (mlen > 32)
-				return (EINVAL);
-			ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO);
-			ent->value = value;
-			/* Set 'total' structure length */
-			KEY_LEN(ent->addr) = KEY_LEN_INET;
-			KEY_LEN(ent->mask) = KEY_LEN_INET;
-			/* Set offset of IPv4 address in bits */
-			offset = OFF_LEN_INET;
-			ent->mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
-			addr = *((in_addr_t *)paddr);
-			ent->addr.sin_addr.s_addr = addr & ent->mask.sin_addr.s_addr;
-			/* Set pointers */
-			rnh_ptr = &ch->tables[tbl];
-			ent_ptr = ent;
-			addr_ptr = (struct sockaddr *)&ent->addr;
-			mask_ptr = (struct sockaddr *)&ent->mask;
-#endif
-#ifdef INET6
-		} else if (plen == sizeof(struct in6_addr)) {
-			/* IPv6 case */
-			if (mlen > 128)
-				return (EINVAL);
-			xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO);
-			xent->value = value;
-			/* Set 'total' structure length */
-			KEY_LEN(xent->a.addr6) = KEY_LEN_INET6;
-			KEY_LEN(xent->m.mask6) = KEY_LEN_INET6;
-			/* Set offset of IPv6 address in bits */
-			offset = OFF_LEN_INET6;
-			ipv6_writemask(&xent->m.mask6.sin6_addr, mlen);
-			memcpy(&xent->a.addr6.sin6_addr, paddr, sizeof(struct in6_addr));
-			APPLY_MASK(&xent->a.addr6.sin6_addr, &xent->m.mask6.sin6_addr);
-			/* Set pointers */
-			rnh_ptr = &ch->xtables[tbl];
-			ent_ptr = xent;
-			addr_ptr = (struct sockaddr *)&xent->a.addr6;
-			mask_ptr = (struct sockaddr *)&xent->m.mask6;
-#endif
-		} else {
-			/* Unknown CIDR type */
-			return (EINVAL);
+	/*
+	 * Find and reference existing table.
+	 */
+restart:
+	if (ts.modified != 0) {
+		IPFW_UH_WUNLOCK(ch);
+		flush_batch_buffer(ch, ta, tei, count, rollback,
+		    ta_buf_m, ta_buf);
+		memset(&ts, 0, sizeof(ts));
+		ta = NULL;
+		IPFW_UH_WLOCK(ch);
+	}
+
+	error = find_ref_table(ch, ti, tei, count, OP_ADD, &tc);
+	if (error != 0) {
+		IPFW_UH_WUNLOCK(ch);
+		return (error);
+	}
+	ta = tc->ta;
+
+	/* Fill in tablestate */
+	ts.ch = ch;
+	ts.opstate.func = rollback_add_entry;
+	ts.tc = tc;
+	ts.vshared = tc->vshared;
+	ts.vmask = tc->vmask;
+	ts.ta = ta;
+	ts.tei = tei;
+	ts.count = count;
+	rollback = 0;
+	add_toperation_state(ch, &ts);
+	IPFW_UH_WUNLOCK(ch);
+
+	/* Allocate memory and prepare record(s) */
+	/* Pass stack buffer by default */
+	ta_buf_m = ta_buf;
+	error = prepare_batch_buffer(ch, ta, tei, count, OP_ADD, &ta_buf_m);
+
+	IPFW_UH_WLOCK(ch);
+	del_toperation_state(ch, &ts);
+	/* Drop reference we've used in first search */
+	tc->no.refcnt--;
+
+	/* Check prepare_batch_buffer() error */
+	if (error != 0)
+		goto cleanup;
+
+	/*
+	 * Check if table swap has happened.
+	 * (so table algo might be changed).
+	 * Restart operation to achieve consistent behavior.
+	 */
+	if (ts.modified != 0)
+		goto restart;
+
+	/*
+	 * Link all values values to shared/per-table value array.
+	 *
+	 * May release/reacquire UH_WLOCK.
+	 */
+	error = ipfw_link_table_values(ch, &ts);
+	if (error != 0)
+		goto cleanup;
+	if (ts.modified != 0)
+		goto restart;
+
+	/*
+	 * Ensure we are able to add all entries without additional
+	 * memory allocations. May release/reacquire UH_WLOCK.
+	 */
+	kidx = tc->no.kidx;
+	error = check_table_space(ch, &ts, tc, KIDX_TO_TI(ch, kidx), count);
+	if (error != 0)
+		goto cleanup;
+	if (ts.modified != 0)
+		goto restart;
+
+	/* We've got valid table in @tc. Let's try to add data */
+	kidx = tc->no.kidx;
+	ta = tc->ta;
+	numadd = 0;
+	first_error = 0;
+
+	IPFW_WLOCK(ch);
+
+	v = ta_buf_m;
+	for (i = 0; i < count; i++, v += ta->ta_buf_size) {
+		ptei = &tei[i];
+		num = 0;
+		/* check limit before adding */
+		if ((error = check_table_limit(tc, ptei)) == 0) {
+			error = ta->add(tc->astate, KIDX_TO_TI(ch, kidx),
+			    ptei, v, &num);
+			/* Set status flag to inform userland */
+			store_tei_result(ptei, OP_ADD, error, num);
 		}
+		if (error == 0) {
+			/* Update number of records to ease limit checking */
+			tc->count += num;
+			numadd += num;
+			continue;
+		}
+
+		if (first_error == 0)
+			first_error = error;
+
+		/*
+		 * Some error have happened. Check our atomicity
+		 * settings: continue if atomicity is not required,
+		 * rollback changes otherwise.
+		 */
+		if ((flags & IPFW_CTF_ATOMIC) == 0)
+			continue;
+
+		rollback_added_entries(ch, tc, KIDX_TO_TI(ch, kidx),
+		    tei, ta_buf_m, count, i);
+
+		rollback = 1;
 		break;
+	}
+
+	IPFW_WUNLOCK(ch);
+
+	ipfw_garbage_table_values(ch, tc, tei, count, rollback);
+
+	/* Permit post-add algorithm grow/rehash. */
+	if (numadd != 0)
+		check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0);
+
+	/* Return first error to user, if any */
+	error = first_error;
+
+cleanup:
+	IPFW_UH_WUNLOCK(ch);
+
+	flush_batch_buffer(ch, ta, tei, count, rollback, ta_buf_m, ta_buf);
 	
-	case IPFW_TABLE_INTERFACE:
-		/* Check if string is terminated */
-		c = ((char *)paddr)[IF_NAMESIZE - 1];
-		((char *)paddr)[IF_NAMESIZE - 1] = '\0';
-		if (((mlen = strlen((char *)paddr)) == IF_NAMESIZE - 1) && (c != '\0'))
-			return (EINVAL);
+	return (error);
+}
 
-		/* Include last \0 into comparison */
-		mlen++;
-
-		xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO);
-		xent->value = value;
-		/* Set 'total' structure length */
-		KEY_LEN(xent->a.iface) = KEY_LEN_IFACE + mlen;
-		KEY_LEN(xent->m.ifmask) = KEY_LEN_IFACE + mlen;
-		/* Set offset of interface name in bits */
-		offset = OFF_LEN_IFACE;
-		memcpy(xent->a.iface.ifname, paddr, mlen);
-		/* Assume direct match */
-		/* TODO: Add interface pattern matching */
-#if 0
-		memset(xent->m.ifmask.ifname, 0xFF, IF_NAMESIZE);
-		mask_ptr = (struct sockaddr *)&xent->m.ifmask;
-#endif
-		/* Set pointers */
-		rnh_ptr = &ch->xtables[tbl];
-		ent_ptr = xent;
-		addr_ptr = (struct sockaddr *)&xent->a.iface;
-		mask_ptr = NULL;
-		break;
+/*
+ * Deletes one or more entries in table @ti.
+ *
+ * Returns 0 on success.
+ */
+int
+del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti,
+    struct tentry_info *tei, uint8_t flags, uint32_t count)
+{
+	struct table_config *tc;
+	struct table_algo *ta;
+	struct tentry_info *ptei;
+	uint16_t kidx;
+	int error, first_error, i;
+	uint32_t num, numdel;
+	char ta_buf[TA_BUF_SZ];
+	caddr_t ta_buf_m, v;
 
-	default:
-		return (EINVAL);
+	/*
+	 * Find and reference existing table.
+	 */
+	IPFW_UH_WLOCK(ch);
+	error = find_ref_table(ch, ti, tei, count, OP_DEL, &tc);
+	if (error != 0) {
+		IPFW_UH_WUNLOCK(ch);
+		return (error);
+	}
+	ta = tc->ta;
+	IPFW_UH_WUNLOCK(ch);
+
+	/* Allocate memory and prepare record(s) */
+	/* Pass stack buffer by default */
+	ta_buf_m = ta_buf;
+	error = prepare_batch_buffer(ch, ta, tei, count, OP_DEL, &ta_buf_m);
+	if (error != 0)
+		goto cleanup;
+
+	IPFW_UH_WLOCK(ch);
+
+	/* Drop reference we've used in first search */
+	tc->no.refcnt--;
+
+	/*
+	 * Check if table algo is still the same.
+	 * (changed ta may be the result of table swap).
+	 */
+	if (ta != tc->ta) {
+		IPFW_UH_WUNLOCK(ch);
+		error = EINVAL;
+		goto cleanup;
 	}
 
+	kidx = tc->no.kidx;
+	numdel = 0;
+	first_error = 0;
+
 	IPFW_WLOCK(ch);
+	v = ta_buf_m;
+	for (i = 0; i < count; i++, v += ta->ta_buf_size) {
+		ptei = &tei[i];
+		num = 0;
+		error = ta->del(tc->astate, KIDX_TO_TI(ch, kidx), ptei, v,
+		    &num);
+		/* Save state for userland */
+		store_tei_result(ptei, OP_DEL, error, num);
+		if (error != 0 && first_error == 0)
+			first_error = error;
+		tc->count -= num;
+		numdel += num;
+	}
+	IPFW_WUNLOCK(ch);
 
-	/* Check if tabletype is valid */
-	if ((ch->tabletype[tbl] != 0) && (ch->tabletype[tbl] != type)) {
-		IPFW_WUNLOCK(ch);
-		free(ent_ptr, M_IPFW_TBL);
-		return (EINVAL);
+	/* Unlink non-used values */
+	ipfw_garbage_table_values(ch, tc, tei, count, 0);
+
+	if (numdel != 0) {
+		/* Run post-del hook to permit shrinking */
+		check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0);
 	}
 
-	/* Check if radix tree exists */
-	if ((rnh = *rnh_ptr) == NULL) {
-		IPFW_WUNLOCK(ch);
-		/* Create radix for a new table */
-		if (!rn_inithead((void **)&rnh, offset)) {
-			free(ent_ptr, M_IPFW_TBL);
-			return (ENOMEM);
+	IPFW_UH_WUNLOCK(ch);
+
+	/* Return first error to user, if any */
+	error = first_error;
+
+cleanup:
+	flush_batch_buffer(ch, ta, tei, count, 0, ta_buf_m, ta_buf);
+
+	return (error);
+}
+
+/*
+ * Ensure that table @tc has enough space to add @count entries without
+ * need for reallocation.
+ *
+ * Callbacks order:
+ * 0) need_modify() (UH_WLOCK) - checks if @count items can be added w/o resize.
+ *
+ * 1) alloc_modify (no locks, M_WAITOK) - alloc new state based on @pflags.
+ * 2) prepare_modifyt (UH_WLOCK) - copy old data into new storage
+ * 3) modify (UH_WLOCK + WLOCK) - switch pointers
+ * 4) flush_modify (UH_WLOCK) - free state, if needed
+ *
+ * Returns 0 on success.
+ */
+static int
+check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts,
+    struct table_config *tc, struct table_info *ti, uint32_t count)
+{
+	struct table_algo *ta;
+	uint64_t pflags;
+	char ta_buf[TA_BUF_SZ];
+	int error;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+
+	error = 0;
+	ta = tc->ta;
+	if (ta->need_modify == NULL)
+		return (0);
+
+	/* Acquire reference not to loose @tc between locks/unlocks */
+	tc->no.refcnt++;
+
+	/*
+	 * TODO: think about avoiding race between large add/large delete
+	 * operation on algorithm which implements shrinking along with
+	 * growing.
+	 */
+	while (true) {
+		pflags = 0;
+		if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) {
+			error = 0;
+			break;
 		}
 
-		IPFW_WLOCK(ch);
-		if (*rnh_ptr != NULL) {
-			/* Tree is already attached by other thread */
-			rn_detachhead((void **)&rnh);
-			rnh = *rnh_ptr;
-			/* Check table type another time */
-			if (ch->tabletype[tbl] != type) {
-				IPFW_WUNLOCK(ch);
-				free(ent_ptr, M_IPFW_TBL);
-				return (EINVAL);
-			}
-		} else {
-			*rnh_ptr = rnh;
-			/* 
-			 * Set table type. It can be set already
-			 * (if we have IPv6-only table) but setting
-			 * it another time does not hurt
+		/* We have to shrink/grow table */
+		if (ts != NULL)
+			add_toperation_state(ch, ts);
+		IPFW_UH_WUNLOCK(ch);
+
+		memset(&ta_buf, 0, sizeof(ta_buf));
+		error = ta->prepare_mod(ta_buf, &pflags);
+
+		IPFW_UH_WLOCK(ch);
+		if (ts != NULL)
+			del_toperation_state(ch, ts);
+
+		if (error != 0)
+			break;
+
+		if (ts != NULL && ts->modified != 0) {
+
+			/*
+			 * Swap operation has happened
+			 * so we're currently operating on other
+			 * table data. Stop doing this.
+			 */
+			ta->flush_mod(ta_buf);
+			break;
+		}
+
+		/* Check if we still need to alter table */
+		ti = KIDX_TO_TI(ch, tc->no.kidx);
+		if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) {
+			IPFW_UH_WUNLOCK(ch);
+
+			/*
+			 * Other thread has already performed resize.
+			 * Flush our state and return.
 			 */
-			ch->tabletype[tbl] = type;
+			ta->flush_mod(ta_buf);
+			break;
+		}
+	
+		error = ta->fill_mod(tc->astate, ti, ta_buf, &pflags);
+		if (error == 0) {
+			/* Do actual modification */
+			IPFW_WLOCK(ch);
+			ta->modify(tc->astate, ti, ta_buf, pflags);
+			IPFW_WUNLOCK(ch);
 		}
+
+		/* Anyway, flush data and retry */
+		ta->flush_mod(ta_buf);
 	}
 
-	rn = rnh->rnh_addaddr(addr_ptr, mask_ptr, rnh, ent_ptr);
-	IPFW_WUNLOCK(ch);
+	tc->no.refcnt--;
+	return (error);
+}
 
-	if (rn == NULL) {
-		free(ent_ptr, M_IPFW_TBL);
-		return (EEXIST);
+/*
+ * Adds or deletes record in table.
+ * Data layout (v0):
+ * Request: [ ip_fw3_opheader ipfw_table_xentry ]
+ *
+ * Returns 0 on success
+ */
+static int
+manage_table_ent_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	ipfw_table_xentry *xent;
+	struct tentry_info tei;
+	struct tid_info ti;
+	struct table_value v;
+	int error, hdrlen, read;
+
+	hdrlen = offsetof(ipfw_table_xentry, k);
+
+	/* Check minimum header size */
+	if (sd->valsize < (sizeof(*op3) + hdrlen))
+		return (EINVAL);
+
+	read = sizeof(ip_fw3_opheader);
+
+	/* Check if xentry len field is valid */
+	xent = (ipfw_table_xentry *)(op3 + 1);
+	if (xent->len < hdrlen || xent->len + read > sd->valsize)
+		return (EINVAL);
+	
+	memset(&tei, 0, sizeof(tei));
+	tei.paddr = &xent->k;
+	tei.masklen = xent->masklen;
+	ipfw_import_table_value_legacy(xent->value, &v);
+	tei.pvalue = &v;
+	/* Old requests compatibility */
+	tei.flags = TEI_FLAGS_COMPAT;
+	if (xent->type == IPFW_TABLE_ADDR) {
+		if (xent->len - hdrlen == sizeof(in_addr_t))
+			tei.subtype = AF_INET;
+		else
+			tei.subtype = AF_INET6;
 	}
-	return (0);
+
+	memset(&ti, 0, sizeof(ti));
+	ti.uidx = xent->tbl;
+	ti.type = xent->type;
+
+	error = (op3->opcode == IP_FW_TABLE_XADD) ?
+	    add_table_entry(ch, &ti, &tei, 0, 1) :
+	    del_table_entry(ch, &ti, &tei, 0, 1);
+
+	return (error);
 }
 
-int
-ipfw_del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
-    uint8_t plen, uint8_t mlen, uint8_t type)
+/*
+ * Adds or deletes record in table.
+ * Data layout (v1)(current):
+ * Request: [ ipfw_obj_header
+ *   ipfw_obj_ctlv(IPFW_TLV_TBLENT_LIST) [ ipfw_obj_tentry x N ]
+ * ]
+ *
+ * Returns 0 on success
+ */
+static int
+manage_table_ent_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
 {
-	struct radix_node_head *rnh, **rnh_ptr;
-	struct table_entry *ent;
-	in_addr_t addr;
-	struct sockaddr_in sa, mask;
-	struct sockaddr *sa_ptr, *mask_ptr;
-	char c;
+	ipfw_obj_tentry *tent, *ptent;
+	ipfw_obj_ctlv *ctlv;
+	ipfw_obj_header *oh;
+	struct tentry_info *ptei, tei, *tei_buf;
+	struct tid_info ti;
+	int error, i, kidx, read;
+
+	/* Check minimum header size */
+	if (sd->valsize < (sizeof(*oh) + sizeof(*ctlv)))
+		return (EINVAL);
 
-	if (tbl >= V_fw_tables_max)
+	/* Check if passed data is too long */
+	if (sd->valsize != sd->kavail)
 		return (EINVAL);
 
-	switch (type) {
-	case IPFW_TABLE_CIDR:
-		if (plen == sizeof(in_addr_t)) {
-			/* Set 'total' structure length */
-			KEY_LEN(sa) = KEY_LEN_INET;
-			KEY_LEN(mask) = KEY_LEN_INET;
-			mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
-			addr = *((in_addr_t *)paddr);
-			sa.sin_addr.s_addr = addr & mask.sin_addr.s_addr;
-			rnh_ptr = &ch->tables[tbl];
-			sa_ptr = (struct sockaddr *)&sa;
-			mask_ptr = (struct sockaddr *)&mask;
-#ifdef INET6
-		} else if (plen == sizeof(struct in6_addr)) {
-			/* IPv6 case */
-			if (mlen > 128)
-				return (EINVAL);
-			struct sockaddr_in6 sa6, mask6;
-			memset(&sa6, 0, sizeof(struct sockaddr_in6));
-			memset(&mask6, 0, sizeof(struct sockaddr_in6));
-			/* Set 'total' structure length */
-			KEY_LEN(sa6) = KEY_LEN_INET6;
-			KEY_LEN(mask6) = KEY_LEN_INET6;
-			ipv6_writemask(&mask6.sin6_addr, mlen);
-			memcpy(&sa6.sin6_addr, paddr, sizeof(struct in6_addr));
-			APPLY_MASK(&sa6.sin6_addr, &mask6.sin6_addr);
-			rnh_ptr = &ch->xtables[tbl];
-			sa_ptr = (struct sockaddr *)&sa6;
-			mask_ptr = (struct sockaddr *)&mask6;
-#endif
-		} else {
-			/* Unknown CIDR type */
-			return (EINVAL);
-		}
-		break;
+	oh = (ipfw_obj_header *)sd->kbuf;
 
-	case IPFW_TABLE_INTERFACE:
-		/* Check if string is terminated */
-		c = ((char *)paddr)[IF_NAMESIZE - 1];
-		((char *)paddr)[IF_NAMESIZE - 1] = '\0';
-		if (((mlen = strlen((char *)paddr)) == IF_NAMESIZE - 1) && (c != '\0'))
-			return (EINVAL);
+	/* Basic length checks for TLVs */
+	if (oh->ntlv.head.length != sizeof(oh->ntlv))
+		return (EINVAL);
 
-		struct xaddr_iface ifname, ifmask;
-		memset(&ifname, 0, sizeof(ifname));
-
-		/* Include last \0 into comparison */
-		mlen++;
-
-		/* Set 'total' structure length */
-		KEY_LEN(ifname) = KEY_LEN_IFACE + mlen;
-		KEY_LEN(ifmask) = KEY_LEN_IFACE + mlen;
-		/* Assume direct match */
-		/* FIXME: Add interface pattern matching */
-#if 0
-		memset(ifmask.ifname, 0xFF, IF_NAMESIZE);
-		mask_ptr = (struct sockaddr *)&ifmask;
-#endif
-		mask_ptr = NULL;
-		memcpy(ifname.ifname, paddr, mlen);
-		/* Set pointers */
-		rnh_ptr = &ch->xtables[tbl];
-		sa_ptr = (struct sockaddr *)&ifname;
+	read = sizeof(*oh);
 
-		break;
+	ctlv = (ipfw_obj_ctlv *)(oh + 1);
+	if (ctlv->head.length + read != sd->valsize)
+		return (EINVAL);
 
-	default:
+	read += sizeof(*ctlv);
+	tent = (ipfw_obj_tentry *)(ctlv + 1);
+	if (ctlv->count * sizeof(*tent) + read != sd->valsize)
 		return (EINVAL);
+
+	if (ctlv->count == 0)
+		return (0);
+
+	/*
+	 * Mark entire buffer as "read".
+	 * This instructs sopt api write it back
+	 * after function return.
+	 */
+	ipfw_get_sopt_header(sd, sd->valsize);
+
+	/* Perform basic checks for each entry */
+	ptent = tent;
+	kidx = tent->idx;
+	for (i = 0; i < ctlv->count; i++, ptent++) {
+		if (ptent->head.length != sizeof(*ptent))
+			return (EINVAL);
+		if (ptent->idx != kidx)
+			return (ENOTSUP);
 	}
 
-	IPFW_WLOCK(ch);
-	if ((rnh = *rnh_ptr) == NULL) {
-		IPFW_WUNLOCK(ch);
+	/* Convert data into kernel request objects */
+	objheader_to_ti(oh, &ti);
+	ti.type = oh->ntlv.type;
+	ti.uidx = kidx;
+
+	/* Use on-stack buffer for single add/del */
+	if (ctlv->count == 1) {
+		memset(&tei, 0, sizeof(tei));
+		tei_buf = &tei;
+	} else
+		tei_buf = malloc(ctlv->count * sizeof(tei), M_TEMP,
+		    M_WAITOK | M_ZERO);
+
+	ptei = tei_buf;
+	ptent = tent;
+	for (i = 0; i < ctlv->count; i++, ptent++, ptei++) {
+		ptei->paddr = &ptent->k;
+		ptei->subtype = ptent->subtype;
+		ptei->masklen = ptent->masklen;
+		if (ptent->head.flags & IPFW_TF_UPDATE)
+			ptei->flags |= TEI_FLAGS_UPDATE;
+
+		ipfw_import_table_value_v1(&ptent->v.value);
+		ptei->pvalue = (struct table_value *)&ptent->v.value;
+	}
+
+	error = (oh->opheader.opcode == IP_FW_TABLE_XADD) ?
+	    add_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count) :
+	    del_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count);
+
+	/* Translate result back to userland */
+	ptei = tei_buf;
+	ptent = tent;
+	for (i = 0; i < ctlv->count; i++, ptent++, ptei++) {
+		if (ptei->flags & TEI_FLAGS_ADDED)
+			ptent->result = IPFW_TR_ADDED;
+		else if (ptei->flags & TEI_FLAGS_DELETED)
+			ptent->result = IPFW_TR_DELETED;
+		else if (ptei->flags & TEI_FLAGS_UPDATED)
+			ptent->result = IPFW_TR_UPDATED;
+		else if (ptei->flags & TEI_FLAGS_LIMIT)
+			ptent->result = IPFW_TR_LIMIT;
+		else if (ptei->flags & TEI_FLAGS_ERROR)
+			ptent->result = IPFW_TR_ERROR;
+		else if (ptei->flags & TEI_FLAGS_NOTFOUND)
+			ptent->result = IPFW_TR_NOTFOUND;
+		else if (ptei->flags & TEI_FLAGS_EXISTS)
+			ptent->result = IPFW_TR_EXISTS;
+		ipfw_export_table_value_v1(ptei->pvalue, &ptent->v.value);
+	}
+
+	if (tei_buf != &tei)
+		free(tei_buf, M_TEMP);
+
+	return (error);
+}
+
+/*
+ * Looks up an entry in given table.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_obj_tentry ]
+ * Reply: [ ipfw_obj_header ipfw_obj_tentry ]
+ *
+ * Returns 0 on success
+ */
+static int
+find_table_entry(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	ipfw_obj_tentry *tent;
+	ipfw_obj_header *oh;
+	struct tid_info ti;
+	struct table_config *tc;
+	struct table_algo *ta;
+	struct table_info *kti;
+	struct namedobj_instance *ni;
+	int error;
+	size_t sz;
+
+	/* Check minimum header size */
+	sz = sizeof(*oh) + sizeof(*tent);
+	if (sd->valsize != sz)
+		return (EINVAL);
+
+	oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
+	tent = (ipfw_obj_tentry *)(oh + 1);
+
+	/* Basic length checks for TLVs */
+	if (oh->ntlv.head.length != sizeof(oh->ntlv))
+		return (EINVAL);
+
+	objheader_to_ti(oh, &ti);
+	ti.type = oh->ntlv.type;
+	ti.uidx = tent->idx;
+
+	IPFW_UH_RLOCK(ch);
+	ni = CHAIN_TO_NI(ch);
+
+	/*
+	 * Find existing table and check its type .
+	 */
+	ta = NULL;
+	if ((tc = find_table(ni, &ti)) == NULL) {
+		IPFW_UH_RUNLOCK(ch);
 		return (ESRCH);
 	}
 
-	if (ch->tabletype[tbl] != type) {
-		IPFW_WUNLOCK(ch);
+	/* check table type */
+	if (tc->no.subtype != ti.type) {
+		IPFW_UH_RUNLOCK(ch);
 		return (EINVAL);
 	}
 
-	ent = (struct table_entry *)rnh->rnh_deladdr(sa_ptr, mask_ptr, rnh);
-	IPFW_WUNLOCK(ch);
+	kti = KIDX_TO_TI(ch, tc->no.kidx);
+	ta = tc->ta;
 
-	if (ent == NULL)
-		return (ESRCH);
+	if (ta->find_tentry == NULL)
+		return (ENOTSUP);
 
-	free(ent, M_IPFW_TBL);
-	return (0);
+	error = ta->find_tentry(tc->astate, kti, tent);
+
+	IPFW_UH_RUNLOCK(ch);
+
+	return (error);
 }
 
+/*
+ * Flushes all entries or destroys given table.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ *
+ * Returns 0 on success
+ */
 static int
-flush_table_entry(struct radix_node *rn, void *arg)
+flush_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
 {
-	struct radix_node_head * const rnh = arg;
-	struct table_entry *ent;
+	int error;
+	struct _ipfw_obj_header *oh;
+	struct tid_info ti;
 
-	ent = (struct table_entry *)
-	    rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh);
-	if (ent != NULL)
-		free(ent, M_IPFW_TBL);
-	return (0);
+	if (sd->valsize != sizeof(*oh))
+		return (EINVAL);
+
+	oh = (struct _ipfw_obj_header *)op3;
+	objheader_to_ti(oh, &ti);
+
+	if (op3->opcode == IP_FW_TABLE_XDESTROY)
+		error = destroy_table(ch, &ti);
+	else if (op3->opcode == IP_FW_TABLE_XFLUSH)
+		error = flush_table(ch, &ti);
+	else
+		return (ENOTSUP);
+
+	return (error);
 }
 
+static void
+restart_flush(void *object, struct op_state *_state)
+{
+	struct tableop_state *ts;
+
+	ts = (struct tableop_state *)_state;
+
+	if (ts->tc != object)
+		return;
+
+	/* Indicate we've called */
+	ts->modified = 1;
+}
+
+/*
+ * Flushes given table.
+ *
+ * Function create new table instance with the same
+ * parameters, swaps it with old one and
+ * flushes state without holding runtime WLOCK.
+ *
+ * Returns 0 on success.
+ */
 int
-ipfw_flush_table(struct ip_fw_chain *ch, uint16_t tbl)
+flush_table(struct ip_fw_chain *ch, struct tid_info *ti)
 {
-	struct radix_node_head *rnh, *xrnh;
+	struct namedobj_instance *ni;
+	struct table_config *tc;
+	struct table_algo *ta;
+	struct table_info ti_old, ti_new, *tablestate;
+	void *astate_old, *astate_new;
+	char algostate[64], *pstate;
+	struct tableop_state ts;
+	int error, need_gc;
+	uint16_t kidx;
+	uint8_t tflags;
 
-	if (tbl >= V_fw_tables_max)
-		return (EINVAL);
+	/*
+	 * Stage 1: save table algorithm.
+	 * Reference found table to ensure it won't disappear.
+	 */
+	IPFW_UH_WLOCK(ch);
+	ni = CHAIN_TO_NI(ch);
+	if ((tc = find_table(ni, ti)) == NULL) {
+		IPFW_UH_WUNLOCK(ch);
+		return (ESRCH);
+	}
+	need_gc = 0;
+	astate_new = NULL;
+	memset(&ti_new, 0, sizeof(ti_new));
+restart:
+	/* Set up swap handler */
+	memset(&ts, 0, sizeof(ts));
+	ts.opstate.func = restart_flush;
+	ts.tc = tc;
+
+	ta = tc->ta;
+	/* Do not flush readonly tables */
+	if ((ta->flags & TA_FLAG_READONLY) != 0) {
+		IPFW_UH_WUNLOCK(ch);
+		return (EACCES);
+	}
+	/* Save startup algo parameters */
+	if (ta->print_config != NULL) {
+		ta->print_config(tc->astate, KIDX_TO_TI(ch, tc->no.kidx),
+		    algostate, sizeof(algostate));
+		pstate = algostate;
+	} else
+		pstate = NULL;
+	tflags = tc->tflags;
+	tc->no.refcnt++;
+	add_toperation_state(ch, &ts);
+	IPFW_UH_WUNLOCK(ch);
+
+	/*
+	 * Stage 1.5: if this is not the first attempt, destroy previous state
+	 */
+	if (need_gc != 0) {
+		ta->destroy(astate_new, &ti_new);
+		need_gc = 0;
+	}
 
 	/*
-	 * We free both (IPv4 and extended) radix trees and
-	 * clear table type here to permit table to be reused
-	 * for different type without module reload
+	 * Stage 2: allocate new table instance using same algo.
 	 */
+	memset(&ti_new, 0, sizeof(struct table_info));
+	error = ta->init(ch, &astate_new, &ti_new, pstate, tflags);
+
+	/*
+	 * Stage 3: swap old state pointers with newly-allocated ones.
+	 * Decrease refcount.
+	 */
+	IPFW_UH_WLOCK(ch);
+	tc->no.refcnt--;
+	del_toperation_state(ch, &ts);
+
+	if (error != 0) {
+		IPFW_UH_WUNLOCK(ch);
+		return (error);
+	}
+
+	/*
+	 * Restart operation if table swap has happened:
+	 * even if algo may be the same, algo init parameters
+	 * may change. Restart operation instead of doing
+	 * complex checks.
+	 */
+	if (ts.modified != 0) {
+		/* Delay destroying data since we're holding UH lock */
+		need_gc = 1;
+		goto restart;
+	}
+
+	ni = CHAIN_TO_NI(ch);
+	kidx = tc->no.kidx;
+	tablestate = (struct table_info *)ch->tablestate;
 
 	IPFW_WLOCK(ch);
-	/* Set IPv4 table pointer to zero */
-	if ((rnh = ch->tables[tbl]) != NULL)
-		ch->tables[tbl] = NULL;
-	/* Set extended table pointer to zero */
-	if ((xrnh = ch->xtables[tbl]) != NULL)
-		ch->xtables[tbl] = NULL;
-	/* Zero table type */
-	ch->tabletype[tbl] = 0;
+	ti_old = tablestate[kidx];
+	tablestate[kidx] = ti_new;
 	IPFW_WUNLOCK(ch);
 
-	if (rnh != NULL) {
-		rnh->rnh_walktree(rnh, flush_table_entry, rnh);
-		rn_detachhead((void **)&rnh);
+	astate_old = tc->astate;
+	tc->astate = astate_new;
+	tc->ti_copy = ti_new;
+	tc->count = 0;
+
+	/* Notify algo on real @ti address */
+	if (ta->change_ti != NULL)
+		ta->change_ti(tc->astate, &tablestate[kidx]);
+
+	/*
+	 * Stage 4: unref values.
+	 */
+	ipfw_unref_table_values(ch, tc, ta, astate_old, &ti_old);
+	IPFW_UH_WUNLOCK(ch);
+
+	/*
+	 * Stage 5: perform real flush/destroy.
+	 */
+	ta->destroy(astate_old, &ti_old);
+
+	return (0);
+}
+
+/*
+ * Swaps two tables.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_obj_ntlv ]
+ *
+ * Returns 0 on success
+ */
+static int
+swap_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	int error;
+	struct _ipfw_obj_header *oh;
+	struct tid_info ti_a, ti_b;
+
+	if (sd->valsize != sizeof(*oh) + sizeof(ipfw_obj_ntlv))
+		return (EINVAL);
+
+	oh = (struct _ipfw_obj_header *)op3;
+	ntlv_to_ti(&oh->ntlv, &ti_a);
+	ntlv_to_ti((ipfw_obj_ntlv *)(oh + 1), &ti_b);
+
+	error = swap_tables(ch, &ti_a, &ti_b);
+
+	return (error);
+}
+
+/*
+ * Swaps two tables of the same type/valtype.
+ *
+ * Checks if tables are compatible and limits
+ * permits swap, than actually perform swap.
+ *
+ * Each table consists of 2 different parts:
+ * config:
+ *   @tc (with name, set, kidx) and rule bindings, which is "stable".
+ *   number of items
+ *   table algo
+ * runtime:
+ *   runtime data @ti (ch->tablestate)
+ *   runtime cache in @tc
+ *   algo-specific data (@tc->astate)
+ *
+ * So we switch:
+ *  all runtime data
+ *   number of items
+ *   table algo
+ *
+ * After that we call @ti change handler for each table.
+ *
+ * Note that referencing @tc won't protect tc->ta from change.
+ * XXX: Do we need to restrict swap between locked tables?
+ * XXX: Do we need to exchange ftype?
+ *
+ * Returns 0 on success.
+ */
+static int
+swap_tables(struct ip_fw_chain *ch, struct tid_info *a,
+    struct tid_info *b)
+{
+	struct namedobj_instance *ni;
+	struct table_config *tc_a, *tc_b;
+	struct table_algo *ta;
+	struct table_info ti, *tablestate;
+	void *astate;
+	uint32_t count;
+
+	/*
+	 * Stage 1: find both tables and ensure they are of
+	 * the same type.
+	 */
+	IPFW_UH_WLOCK(ch);
+	ni = CHAIN_TO_NI(ch);
+	if ((tc_a = find_table(ni, a)) == NULL) {
+		IPFW_UH_WUNLOCK(ch);
+		return (ESRCH);
+	}
+	if ((tc_b = find_table(ni, b)) == NULL) {
+		IPFW_UH_WUNLOCK(ch);
+		return (ESRCH);
+	}
+
+	/* It is very easy to swap between the same table */
+	if (tc_a == tc_b) {
+		IPFW_UH_WUNLOCK(ch);
+		return (0);
+	}
+
+	/* Check type and value are the same */
+	if (tc_a->no.subtype!=tc_b->no.subtype || tc_a->tflags!=tc_b->tflags) {
+		IPFW_UH_WUNLOCK(ch);
+		return (EINVAL);
 	}
 
-	if (xrnh != NULL) {
-		xrnh->rnh_walktree(xrnh, flush_table_entry, xrnh);
-		rn_detachhead((void **)&xrnh);
+	/* Check limits before swap */
+	if ((tc_a->limit != 0 && tc_b->count > tc_a->limit) ||
+	    (tc_b->limit != 0 && tc_a->count > tc_b->limit)) {
+		IPFW_UH_WUNLOCK(ch);
+		return (EFBIG);
 	}
 
+	/* Check if one of the tables is readonly */
+	if (((tc_a->ta->flags | tc_b->ta->flags) & TA_FLAG_READONLY) != 0) {
+		IPFW_UH_WUNLOCK(ch);
+		return (EACCES);
+	}
+
+	/* Notify we're going to swap */
+	rollback_toperation_state(ch, tc_a);
+	rollback_toperation_state(ch, tc_b);
+
+	/* Everything is fine, prepare to swap */
+	tablestate = (struct table_info *)ch->tablestate;
+	ti = tablestate[tc_a->no.kidx];
+	ta = tc_a->ta;
+	astate = tc_a->astate;
+	count = tc_a->count;
+
+	IPFW_WLOCK(ch);
+	/* a <- b */
+	tablestate[tc_a->no.kidx] = tablestate[tc_b->no.kidx];
+	tc_a->ta = tc_b->ta;
+	tc_a->astate = tc_b->astate;
+	tc_a->count = tc_b->count;
+	/* b <- a */
+	tablestate[tc_b->no.kidx] = ti;
+	tc_b->ta = ta;
+	tc_b->astate = astate;
+	tc_b->count = count;
+	IPFW_WUNLOCK(ch);
+
+	/* Ensure tc.ti copies are in sync */
+	tc_a->ti_copy = tablestate[tc_a->no.kidx];
+	tc_b->ti_copy = tablestate[tc_b->no.kidx];
+
+	/* Notify both tables on @ti change */
+	if (tc_a->ta->change_ti != NULL)
+		tc_a->ta->change_ti(tc_a->astate, &tablestate[tc_a->no.kidx]);
+	if (tc_b->ta->change_ti != NULL)
+		tc_b->ta->change_ti(tc_b->astate, &tablestate[tc_b->no.kidx]);
+
+	IPFW_UH_WUNLOCK(ch);
+
 	return (0);
 }
 
-void
-ipfw_destroy_tables(struct ip_fw_chain *ch)
+/*
+ * Destroys table specified by @ti.
+ * Data layout (v0)(current):
+ * Request: [ ip_fw3_opheader ]
+ *
+ * Returns 0 on success
+ */
+static int
+destroy_table(struct ip_fw_chain *ch, struct tid_info *ti)
 {
-	uint16_t tbl;
+	struct namedobj_instance *ni;
+	struct table_config *tc;
 
-	/* Flush all tables */
-	for (tbl = 0; tbl < V_fw_tables_max; tbl++)
-		ipfw_flush_table(ch, tbl);
+	IPFW_UH_WLOCK(ch);
 
-	/* Free pointers itself */
-	free(ch->tables, M_IPFW);
-	free(ch->xtables, M_IPFW);
-	free(ch->tabletype, M_IPFW);
+	ni = CHAIN_TO_NI(ch);
+	if ((tc = find_table(ni, ti)) == NULL) {
+		IPFW_UH_WUNLOCK(ch);
+		return (ESRCH);
+	}
+
+	/* Do not permit destroying referenced tables */
+	if (tc->no.refcnt > 0) {
+		IPFW_UH_WUNLOCK(ch);
+		return (EBUSY);
+	}
+
+	IPFW_WLOCK(ch);
+	unlink_table(ch, tc);
+	IPFW_WUNLOCK(ch);
+
+	/* Free obj index */
+	if (ipfw_objhash_free_idx(ni, tc->no.kidx) != 0)
+		printf("Error unlinking kidx %d from table %s\n",
+		    tc->no.kidx, tc->tablename);
+
+	/* Unref values used in tables while holding UH lock */
+	ipfw_unref_table_values(ch, tc, tc->ta, tc->astate, &tc->ti_copy);
+	IPFW_UH_WUNLOCK(ch);
+
+	free_table_config(ni, tc);
+
+	return (0);
 }
 
-int
-ipfw_init_tables(struct ip_fw_chain *ch)
+static uint32_t
+roundup2p(uint32_t v)
 {
-	/* Allocate pointers */
-	ch->tables = malloc(V_fw_tables_max * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO);
-	ch->xtables = malloc(V_fw_tables_max * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO);
-	ch->tabletype = malloc(V_fw_tables_max * sizeof(uint8_t), M_IPFW, M_WAITOK | M_ZERO);
-	return (0);
+
+	v--;
+	v |= v >> 1;
+	v |= v >> 2;
+	v |= v >> 4;
+	v |= v >> 8;
+	v |= v >> 16;
+	v++;
+
+	return (v);
 }
 
+/*
+ * Grow tables index.
+ *
+ * Returns 0 on success.
+ */
 int
 ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables)
 {
-	struct radix_node_head **tables, **xtables, *rnh;
-	struct radix_node_head **tables_old, **xtables_old;
-	uint8_t *tabletype, *tabletype_old;
 	unsigned int ntables_old, tbl;
+	struct namedobj_instance *ni;
+	void *new_idx, *old_tablestate, *tablestate;
+	struct table_info *ti;
+	struct table_config *tc;
+	int i, new_blocks;
 
 	/* Check new value for validity */
+	if (ntables == 0)
+		return (EINVAL);
 	if (ntables > IPFW_TABLES_MAX)
 		ntables = IPFW_TABLES_MAX;
+	/* Alight to nearest power of 2 */
+	ntables = (unsigned int)roundup2p(ntables); 
 
 	/* Allocate new pointers */
-	tables = malloc(ntables * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO);
-	xtables = malloc(ntables * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO);
-	tabletype = malloc(ntables * sizeof(uint8_t), M_IPFW, M_WAITOK | M_ZERO);
+	tablestate = malloc(ntables * sizeof(struct table_info),
+	    M_IPFW, M_WAITOK | M_ZERO);
 
-	IPFW_WLOCK(ch);
+	ipfw_objhash_bitmap_alloc(ntables, (void *)&new_idx, &new_blocks);
+
+	IPFW_UH_WLOCK(ch);
 
 	tbl = (ntables >= V_fw_tables_max) ? V_fw_tables_max : ntables;
+	ni = CHAIN_TO_NI(ch);
 
-	/* Copy old table pointers */
-	memcpy(tables, ch->tables, sizeof(void *) * tbl);
-	memcpy(xtables, ch->xtables, sizeof(void *) * tbl);
-	memcpy(tabletype, ch->tabletype, sizeof(uint8_t) * tbl);
+	/* Temporary restrict decreasing max_tables */
+	if (ntables < V_fw_tables_max) {
 
-	/* Change pointers and number of tables */
-	tables_old = ch->tables;
-	xtables_old = ch->xtables;
-	tabletype_old = ch->tabletype;
-	ch->tables = tables;
-	ch->xtables = xtables;
-	ch->tabletype = tabletype;
+		/*
+		 * FIXME: Check if we really can shrink
+		 */
+		IPFW_UH_WUNLOCK(ch);
+		return (EINVAL);
+	}
+
+	/* Copy table info/indices */
+	memcpy(tablestate, ch->tablestate, sizeof(struct table_info) * tbl);
+	ipfw_objhash_bitmap_merge(ni, &new_idx, &new_blocks);
+
+	IPFW_WLOCK(ch);
+
+	/* Change pointers */
+	old_tablestate = ch->tablestate;
+	ch->tablestate = tablestate;
+	ipfw_objhash_bitmap_swap(ni, &new_idx, &new_blocks);
 
 	ntables_old = V_fw_tables_max;
 	V_fw_tables_max = ntables;
 
 	IPFW_WUNLOCK(ch);
 
-	/* Check if we need to destroy radix trees */
-	if (ntables < ntables_old) {
-		for (tbl = ntables; tbl < ntables_old; tbl++) {
-			if ((rnh = tables_old[tbl]) != NULL) {
-				rnh->rnh_walktree(rnh, flush_table_entry, rnh);
-				rn_detachhead((void **)&rnh);
-			}
+	/* Notify all consumers that their @ti pointer has changed */
+	ti = (struct table_info *)ch->tablestate;
+	for (i = 0; i < tbl; i++, ti++) {
+		if (ti->lookup == NULL)
+			continue;
+		tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, i);
+		if (tc == NULL || tc->ta->change_ti == NULL)
+			continue;
 
-			if ((rnh = xtables_old[tbl]) != NULL) {
-				rnh->rnh_walktree(rnh, flush_table_entry, rnh);
-				rn_detachhead((void **)&rnh);
-			}
-		}
+		tc->ta->change_ti(tc->astate, ti);
 	}
 
+	IPFW_UH_WUNLOCK(ch);
+
 	/* Free old pointers */
-	free(tables_old, M_IPFW);
-	free(xtables_old, M_IPFW);
-	free(tabletype_old, M_IPFW);
+	free(old_tablestate, M_IPFW);
+	ipfw_objhash_bitmap_free(new_idx, new_blocks);
+
+	return (0);
+}
+
+/*
+ * Lookup table's named object by its @kidx.
+ */
+struct named_object *
+ipfw_objhash_lookup_table_kidx(struct ip_fw_chain *ch, uint16_t kidx)
+{
+
+	return (ipfw_objhash_lookup_kidx(CHAIN_TO_NI(ch), kidx));
+}
+
+/*
+ * Take reference to table specified in @ntlv.
+ * On success return its @kidx.
+ */
+int
+ipfw_ref_table(struct ip_fw_chain *ch, ipfw_obj_ntlv *ntlv, uint16_t *kidx)
+{
+	struct tid_info ti;
+	struct table_config *tc;
+	int error;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+
+	ntlv_to_ti(ntlv, &ti);
+	error = find_table_err(CHAIN_TO_NI(ch), &ti, &tc);
+	if (error != 0)
+		return (error);
+
+	if (tc == NULL)
+		return (ESRCH);
+
+	tc_ref(tc);
+	*kidx = tc->no.kidx;
 
 	return (0);
 }
 
+void
+ipfw_unref_table(struct ip_fw_chain *ch, uint16_t kidx)
+{
+
+	struct namedobj_instance *ni;
+	struct named_object *no;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+	ni = CHAIN_TO_NI(ch);
+	no = ipfw_objhash_lookup_kidx(ni, kidx);
+	KASSERT(no != NULL, ("Table with index %d not found", kidx));
+	no->refcnt--;
+}
+
+/*
+ * Lookup an IP @addr in table @tbl.
+ * Stores found value in @val.
+ *
+ * Returns 1 if @addr was found.
+ */
 int
 ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
     uint32_t *val)
 {
-	struct radix_node_head *rnh;
-	struct table_entry *ent;
-	struct sockaddr_in sa;
+	struct table_info *ti;
 
-	if (tbl >= V_fw_tables_max)
-		return (0);
-	if ((rnh = ch->tables[tbl]) == NULL)
-		return (0);
-	KEY_LEN(sa) = KEY_LEN_INET;
-	sa.sin_addr.s_addr = addr;
-	ent = (struct table_entry *)(rnh->rnh_matchaddr(&sa, rnh));
-	if (ent != NULL) {
-		*val = ent->value;
-		return (1);
+	ti = KIDX_TO_TI(ch, tbl);
+
+	return (ti->lookup(ti, &addr, sizeof(in_addr_t), val));
+}
+
+/*
+ * Lookup an arbtrary key @paddr of legth @plen in table @tbl.
+ * Stores found value in @val.
+ *
+ * Returns 1 if key was found.
+ */
+int
+ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen,
+    void *paddr, uint32_t *val)
+{
+	struct table_info *ti;
+
+	ti = KIDX_TO_TI(ch, tbl);
+
+	return (ti->lookup(ti, paddr, plen, val));
+}
+
+/*
+ * Info/List/dump support for tables.
+ *
+ */
+
+/*
+ * High-level 'get' cmds sysctl handlers
+ */
+
+/*
+ * Lists all tables currently available in kernel.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
+ * Reply: [ ipfw_obj_lheader ipfw_xtable_info x N ]
+ *
+ * Returns 0 on success
+ */
+static int
+list_tables(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	struct _ipfw_obj_lheader *olh;
+	int error;
+
+	olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
+	if (olh == NULL)
+		return (EINVAL);
+	if (sd->valsize < olh->size)
+		return (EINVAL);
+
+	IPFW_UH_RLOCK(ch);
+	error = export_tables(ch, olh, sd);
+	IPFW_UH_RUNLOCK(ch);
+
+	return (error);
+}
+
+/*
+ * Store table info to buffer provided by @sd.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_xtable_info(empty)]
+ * Reply: [ ipfw_obj_header ipfw_xtable_info ]
+ *
+ * Returns 0 on success.
+ */
+static int
+describe_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	struct _ipfw_obj_header *oh;
+	struct table_config *tc;
+	struct tid_info ti;
+	size_t sz;
+
+	sz = sizeof(*oh) + sizeof(ipfw_xtable_info);
+	oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
+	if (oh == NULL)
+		return (EINVAL);
+
+	objheader_to_ti(oh, &ti);
+
+	IPFW_UH_RLOCK(ch);
+	if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
+		IPFW_UH_RUNLOCK(ch);
+		return (ESRCH);
 	}
+
+	export_table_info(ch, tc, (ipfw_xtable_info *)(oh + 1));
+	IPFW_UH_RUNLOCK(ch);
+
 	return (0);
 }
 
-int
-ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
-    uint32_t *val, int type)
+/*
+ * Modifies existing table.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_xtable_info ]
+ *
+ * Returns 0 on success
+ */
+static int
+modify_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
 {
-	struct radix_node_head *rnh;
-	struct table_xentry *xent;
-	struct sockaddr_in6 sa6;
-	struct xaddr_iface iface;
+	struct _ipfw_obj_header *oh;
+	ipfw_xtable_info *i;
+	char *tname;
+	struct tid_info ti;
+	struct namedobj_instance *ni;
+	struct table_config *tc;
+
+	if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info))
+		return (EINVAL);
 
-	if (tbl >= V_fw_tables_max)
-		return (0);
-	if ((rnh = ch->xtables[tbl]) == NULL)
-		return (0);
+	oh = (struct _ipfw_obj_header *)sd->kbuf;
+	i = (ipfw_xtable_info *)(oh + 1);
 
-	switch (type) {
-	case IPFW_TABLE_CIDR:
-		KEY_LEN(sa6) = KEY_LEN_INET6;
-		memcpy(&sa6.sin6_addr, paddr, sizeof(struct in6_addr));
-		xent = (struct table_xentry *)(rnh->rnh_matchaddr(&sa6, rnh));
-		break;
+	/*
+	 * Verify user-supplied strings.
+	 * Check for null-terminated/zero-length strings/
+	 */
+	tname = oh->ntlv.name;
+	if (check_table_name(tname) != 0)
+		return (EINVAL);
 
-	case IPFW_TABLE_INTERFACE:
-		KEY_LEN(iface) = KEY_LEN_IFACE +
-		    strlcpy(iface.ifname, (char *)paddr, IF_NAMESIZE) + 1;
-		/* Assume direct match */
-		/* FIXME: Add interface pattern matching */
-		xent = (struct table_xentry *)(rnh->rnh_matchaddr(&iface, rnh));
-		break;
+	objheader_to_ti(oh, &ti);
+	ti.type = i->type;
 
-	default:
-		return (0);
+	IPFW_UH_WLOCK(ch);
+	ni = CHAIN_TO_NI(ch);
+	if ((tc = find_table(ni, &ti)) == NULL) {
+		IPFW_UH_WUNLOCK(ch);
+		return (ESRCH);
 	}
 
-	if (xent != NULL) {
-		*val = xent->value;
-		return (1);
+	/* Do not support any modifications for readonly tables */
+	if ((tc->ta->flags & TA_FLAG_READONLY) != 0) {
+		IPFW_UH_WUNLOCK(ch);
+		return (EACCES);
 	}
+
+	if ((i->mflags & IPFW_TMFLAGS_LIMIT) != 0)
+		tc->limit = i->limit;
+	if ((i->mflags & IPFW_TMFLAGS_LOCK) != 0)
+		tc->locked = ((i->flags & IPFW_TGFLAGS_LOCKED) != 0);
+	IPFW_UH_WUNLOCK(ch);
+
 	return (0);
 }
 
+/*
+ * Creates new table.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_xtable_info ]
+ *
+ * Returns 0 on success
+ */
 static int
-count_table_entry(struct radix_node *rn, void *arg)
+create_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
 {
-	u_int32_t * const cnt = arg;
+	struct _ipfw_obj_header *oh;
+	ipfw_xtable_info *i;
+	char *tname, *aname;
+	struct tid_info ti;
+	struct namedobj_instance *ni;
+
+	if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info))
+		return (EINVAL);
+
+	oh = (struct _ipfw_obj_header *)sd->kbuf;
+	i = (ipfw_xtable_info *)(oh + 1);
+
+	/*
+	 * Verify user-supplied strings.
+	 * Check for null-terminated/zero-length strings/
+	 */
+	tname = oh->ntlv.name;
+	aname = i->algoname;
+	if (check_table_name(tname) != 0 ||
+	    strnlen(aname, sizeof(i->algoname)) == sizeof(i->algoname))
+		return (EINVAL);
+
+	if (aname[0] == '\0') {
+		/* Use default algorithm */
+		aname = NULL;
+	}
+
+	objheader_to_ti(oh, &ti);
+	ti.type = i->type;
+
+	ni = CHAIN_TO_NI(ch);
+
+	IPFW_UH_RLOCK(ch);
+	if (find_table(ni, &ti) != NULL) {
+		IPFW_UH_RUNLOCK(ch);
+		return (EEXIST);
+	}
+	IPFW_UH_RUNLOCK(ch);
+
+	return (create_table_internal(ch, &ti, aname, i, NULL, 0));
+}
+
+/*
+ * Creates new table based on @ti and @aname.
+ *
+ * Assume @aname to be checked and valid.
+ * Stores allocated table kidx inside @pkidx (if non-NULL).
+ * Reference created table if @compat is non-zero.
+ *
+ * Returns 0 on success.
+ */
+static int
+create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti,
+    char *aname, ipfw_xtable_info *i, uint16_t *pkidx, int compat)
+{
+	struct namedobj_instance *ni;
+	struct table_config *tc, *tc_new, *tmp;
+	struct table_algo *ta;
+	uint16_t kidx;
+
+	ni = CHAIN_TO_NI(ch);
+
+	ta = find_table_algo(CHAIN_TO_TCFG(ch), ti, aname);
+	if (ta == NULL)
+		return (ENOTSUP);
+
+	tc = alloc_table_config(ch, ti, ta, aname, i->tflags);
+	if (tc == NULL)
+		return (ENOMEM);
+
+	tc->vmask = i->vmask;
+	tc->limit = i->limit;
+	if (ta->flags & TA_FLAG_READONLY)
+		tc->locked = 1;
+	else
+		tc->locked = (i->flags & IPFW_TGFLAGS_LOCKED) != 0;
+
+	IPFW_UH_WLOCK(ch);
+
+	/* Check if table has been already created */
+	tc_new = find_table(ni, ti);
+	if (tc_new != NULL) {
+
+		/*
+		 * Compat: do not fail if we're
+		 * requesting to create existing table
+		 * which has the same type
+		 */
+		if (compat == 0 || tc_new->no.subtype != tc->no.subtype) {
+			IPFW_UH_WUNLOCK(ch);
+			free_table_config(ni, tc);
+			return (EEXIST);
+		}
+
+		/* Exchange tc and tc_new for proper refcounting & freeing */
+		tmp = tc;
+		tc = tc_new;
+		tc_new = tmp;
+	} else {
+		/* New table */
+		if (ipfw_objhash_alloc_idx(ni, &kidx) != 0) {
+			IPFW_UH_WUNLOCK(ch);
+			printf("Unable to allocate table index."
+			    " Consider increasing net.inet.ip.fw.tables_max");
+			free_table_config(ni, tc);
+			return (EBUSY);
+		}
+		tc->no.kidx = kidx;
+		tc->no.etlv = IPFW_TLV_TBL_NAME;
+
+		IPFW_WLOCK(ch);
+		link_table(ch, tc);
+		IPFW_WUNLOCK(ch);
+	}
+
+	if (compat != 0)
+		tc->no.refcnt++;
+	if (pkidx != NULL)
+		*pkidx = tc->no.kidx;
+
+	IPFW_UH_WUNLOCK(ch);
+
+	if (tc_new != NULL)
+		free_table_config(ni, tc_new);
 
-	(*cnt)++;
 	return (0);
 }
 
+static void
+ntlv_to_ti(ipfw_obj_ntlv *ntlv, struct tid_info *ti)
+{
+
+	memset(ti, 0, sizeof(struct tid_info));
+	ti->set = ntlv->set;
+	ti->uidx = ntlv->idx;
+	ti->tlvs = ntlv;
+	ti->tlen = ntlv->head.length;
+}
+
+static void
+objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti)
+{
+
+	ntlv_to_ti(&oh->ntlv, ti);
+}
+
+struct namedobj_instance *
+ipfw_get_table_objhash(struct ip_fw_chain *ch)
+{
+
+	return (CHAIN_TO_NI(ch));
+}
+
+/*
+ * Exports basic table info as name TLV.
+ * Used inside dump_static_rules() to provide info
+ * about all tables referenced by current ruleset.
+ *
+ * Returns 0 on success.
+ */
 int
-ipfw_count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt)
+ipfw_export_table_ntlv(struct ip_fw_chain *ch, uint16_t kidx,
+    struct sockopt_data *sd)
+{
+	struct namedobj_instance *ni;
+	struct named_object *no;
+	ipfw_obj_ntlv *ntlv;
+
+	ni = CHAIN_TO_NI(ch);
+
+	no = ipfw_objhash_lookup_kidx(ni, kidx);
+	KASSERT(no != NULL, ("invalid table kidx passed"));
+
+	ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv));
+	if (ntlv == NULL)
+		return (ENOMEM);
+
+	ntlv->head.type = IPFW_TLV_TBL_NAME;
+	ntlv->head.length = sizeof(*ntlv);
+	ntlv->idx = no->kidx;
+	strlcpy(ntlv->name, no->name, sizeof(ntlv->name));
+
+	return (0);
+}
+
+struct dump_args {
+	struct ip_fw_chain *ch;
+	struct table_info *ti;
+	struct table_config *tc;
+	struct sockopt_data *sd;
+	uint32_t cnt;
+	uint16_t uidx;
+	int error;
+	uint32_t size;
+	ipfw_table_entry *ent;
+	ta_foreach_f *f;
+	void *farg;
+	ipfw_obj_tentry tent;
+};
+
+static int
+count_ext_entries(void *e, void *arg)
 {
-	struct radix_node_head *rnh;
+	struct dump_args *da;
 
-	if (tbl >= V_fw_tables_max)
+	da = (struct dump_args *)arg;
+	da->cnt++;
+
+	return (0);
+}
+
+/*
+ * Gets number of items from table either using
+ * internal counter or calling algo callback for
+ * externally-managed tables.
+ *
+ * Returns number of records.
+ */
+static uint32_t
+table_get_count(struct ip_fw_chain *ch, struct table_config *tc)
+{
+	struct table_info *ti;
+	struct table_algo *ta;
+	struct dump_args da;
+
+	ti = KIDX_TO_TI(ch, tc->no.kidx);
+	ta = tc->ta;
+
+	/* Use internal counter for self-managed tables */
+	if ((ta->flags & TA_FLAG_READONLY) == 0)
+		return (tc->count);
+
+	/* Use callback to quickly get number of items */
+	if ((ta->flags & TA_FLAG_EXTCOUNTER) != 0)
+		return (ta->get_count(tc->astate, ti));
+
+	/* Count number of iterms ourselves */
+	memset(&da, 0, sizeof(da));
+	ta->foreach(tc->astate, ti, count_ext_entries, &da);
+
+	return (da.cnt);
+}
+
+/*
+ * Exports table @tc info into standard ipfw_xtable_info format.
+ */
+static void
+export_table_info(struct ip_fw_chain *ch, struct table_config *tc,
+    ipfw_xtable_info *i)
+{
+	struct table_info *ti;
+	struct table_algo *ta;
+	
+	i->type = tc->no.subtype;
+	i->tflags = tc->tflags;
+	i->vmask = tc->vmask;
+	i->set = tc->no.set;
+	i->kidx = tc->no.kidx;
+	i->refcnt = tc->no.refcnt;
+	i->count = table_get_count(ch, tc);
+	i->limit = tc->limit;
+	i->flags |= (tc->locked != 0) ? IPFW_TGFLAGS_LOCKED : 0;
+	i->size = i->count * sizeof(ipfw_obj_tentry);
+	i->size += sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info);
+	strlcpy(i->tablename, tc->tablename, sizeof(i->tablename));
+	ti = KIDX_TO_TI(ch, tc->no.kidx);
+	ta = tc->ta;
+	if (ta->print_config != NULL) {
+		/* Use algo function to print table config to string */
+		ta->print_config(tc->astate, ti, i->algoname,
+		    sizeof(i->algoname));
+	} else
+		strlcpy(i->algoname, ta->name, sizeof(i->algoname));
+	/* Dump algo-specific data, if possible */
+	if (ta->dump_tinfo != NULL) {
+		ta->dump_tinfo(tc->astate, ti, &i->ta_info);
+		i->ta_info.flags |= IPFW_TATFLAGS_DATA;
+	}
+}
+
+struct dump_table_args {
+	struct ip_fw_chain *ch;
+	struct sockopt_data *sd;
+};
+
+static int
+export_table_internal(struct namedobj_instance *ni, struct named_object *no,
+    void *arg)
+{
+	ipfw_xtable_info *i;
+	struct dump_table_args *dta;
+
+	dta = (struct dump_table_args *)arg;
+
+	i = (ipfw_xtable_info *)ipfw_get_sopt_space(dta->sd, sizeof(*i));
+	KASSERT(i != NULL, ("previously checked buffer is not enough"));
+
+	export_table_info(dta->ch, (struct table_config *)no, i);
+	return (0);
+}
+
+/*
+ * Export all tables as ipfw_xtable_info structures to
+ * storage provided by @sd.
+ *
+ * If supplied buffer is too small, fills in required size
+ * and returns ENOMEM.
+ * Returns 0 on success.
+ */
+static int
+export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh,
+    struct sockopt_data *sd)
+{
+	uint32_t size;
+	uint32_t count;
+	struct dump_table_args dta;
+
+	count = ipfw_objhash_count(CHAIN_TO_NI(ch));
+	size = count * sizeof(ipfw_xtable_info) + sizeof(ipfw_obj_lheader);
+
+	/* Fill in header regadless of buffer size */
+	olh->count = count;
+	olh->objsize = sizeof(ipfw_xtable_info);
+
+	if (size > olh->size) {
+		olh->size = size;
+		return (ENOMEM);
+	}
+
+	olh->size = size;
+
+	dta.ch = ch;
+	dta.sd = sd;
+
+	ipfw_objhash_foreach(CHAIN_TO_NI(ch), export_table_internal, &dta);
+
+	return (0);
+}
+
+/*
+ * Dumps all table data
+ * Data layout (v1)(current):
+ * Request: [ ipfw_obj_header ], size = ipfw_xtable_info.size
+ * Reply: [ ipfw_obj_header ipfw_xtable_info ipfw_obj_tentry x N ]
+ *
+ * Returns 0 on success
+ */
+static int
+dump_table_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	struct _ipfw_obj_header *oh;
+	ipfw_xtable_info *i;
+	struct tid_info ti;
+	struct table_config *tc;
+	struct table_algo *ta;
+	struct dump_args da;
+	uint32_t sz;
+
+	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info);
+	oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
+	if (oh == NULL)
+		return (EINVAL);
+
+	i = (ipfw_xtable_info *)(oh + 1);
+	objheader_to_ti(oh, &ti);
+
+	IPFW_UH_RLOCK(ch);
+	if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
+		IPFW_UH_RUNLOCK(ch);
+		return (ESRCH);
+	}
+	export_table_info(ch, tc, i);
+
+	if (sd->valsize < i->size) {
+
+		/*
+		 * Submitted buffer size is not enough.
+		 * WE've already filled in @i structure with
+		 * relevant table info including size, so we
+		 * can return. Buffer will be flushed automatically.
+		 */
+		IPFW_UH_RUNLOCK(ch);
+		return (ENOMEM);
+	}
+
+	/*
+	 * Do the actual dump in eXtended format
+	 */
+	memset(&da, 0, sizeof(da));
+	da.ch = ch;
+	da.ti = KIDX_TO_TI(ch, tc->no.kidx);
+	da.tc = tc;
+	da.sd = sd;
+
+	ta = tc->ta;
+
+	ta->foreach(tc->astate, da.ti, dump_table_tentry, &da);
+	IPFW_UH_RUNLOCK(ch);
+
+	return (da.error);
+}
+
+/*
+ * Dumps all table data
+ * Data layout (version 0)(legacy):
+ * Request: [ ipfw_xtable ], size = IP_FW_TABLE_XGETSIZE()
+ * Reply: [ ipfw_xtable ipfw_table_xentry x N ]
+ *
+ * Returns 0 on success
+ */
+static int
+dump_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	ipfw_xtable *xtbl;
+	struct tid_info ti;
+	struct table_config *tc;
+	struct table_algo *ta;
+	struct dump_args da;
+	size_t sz, count;
+
+	xtbl = (ipfw_xtable *)ipfw_get_sopt_header(sd, sizeof(ipfw_xtable));
+	if (xtbl == NULL)
 		return (EINVAL);
-	*cnt = 0;
-	if ((rnh = ch->tables[tbl]) == NULL)
+
+	memset(&ti, 0, sizeof(ti));
+	ti.uidx = xtbl->tbl;
+	
+	IPFW_UH_RLOCK(ch);
+	if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
+		IPFW_UH_RUNLOCK(ch);
 		return (0);
-	rnh->rnh_walktree(rnh, count_table_entry, cnt);
+	}
+	count = table_get_count(ch, tc);
+	sz = count * sizeof(ipfw_table_xentry) + sizeof(ipfw_xtable);
+
+	xtbl->cnt = count;
+	xtbl->size = sz;
+	xtbl->type = tc->no.subtype;
+	xtbl->tbl = ti.uidx;
+
+	if (sd->valsize < sz) {
+
+		/*
+		 * Submitted buffer size is not enough.
+		 * WE've already filled in @i structure with
+		 * relevant table info including size, so we
+		 * can return. Buffer will be flushed automatically.
+		 */
+		IPFW_UH_RUNLOCK(ch);
+		return (ENOMEM);
+	}
+
+	/* Do the actual dump in eXtended format */
+	memset(&da, 0, sizeof(da));
+	da.ch = ch;
+	da.ti = KIDX_TO_TI(ch, tc->no.kidx);
+	da.tc = tc;
+	da.sd = sd;
+
+	ta = tc->ta;
+
+	ta->foreach(tc->astate, da.ti, dump_table_xentry, &da);
+	IPFW_UH_RUNLOCK(ch);
+
+	return (0);
+}
+
+/*
+ * Legacy function to retrieve number of items in table.
+ */
+static int
+get_table_size(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	uint32_t *tbl;
+	struct tid_info ti;
+	size_t sz;
+	int error;
+
+	sz = sizeof(*op3) + sizeof(uint32_t);
+	op3 = (ip_fw3_opheader *)ipfw_get_sopt_header(sd, sz);
+	if (op3 == NULL)
+		return (EINVAL);
+
+	tbl = (uint32_t *)(op3 + 1);
+	memset(&ti, 0, sizeof(ti));
+	ti.uidx = *tbl;
+	IPFW_UH_RLOCK(ch);
+	error = ipfw_count_xtable(ch, &ti, tbl);
+	IPFW_UH_RUNLOCK(ch);
+	return (error);
+}
+
+/*
+ * Legacy IP_FW_TABLE_GETSIZE handler
+ */
+int
+ipfw_count_table(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt)
+{
+	struct table_config *tc;
+
+	if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL)
+		return (ESRCH);
+	*cnt = table_get_count(ch, tc);
+	return (0);
+}
+
+/*
+ * Legacy IP_FW_TABLE_XGETSIZE handler
+ */
+int
+ipfw_count_xtable(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt)
+{
+	struct table_config *tc;
+	uint32_t count;
+
+	if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) {
+		*cnt = 0;
+		return (0); /* 'table all list' requires success */
+	}
+
+	count = table_get_count(ch, tc);
+	*cnt = count * sizeof(ipfw_table_xentry);
+	if (count > 0)
+		*cnt += sizeof(ipfw_xtable);
 	return (0);
 }
 
 static int
-dump_table_entry(struct radix_node *rn, void *arg)
+dump_table_entry(void *e, void *arg)
 {
-	struct table_entry * const n = (struct table_entry *)rn;
-	ipfw_table * const tbl = arg;
+	struct dump_args *da;
+	struct table_config *tc;
+	struct table_algo *ta;
 	ipfw_table_entry *ent;
+	struct table_value *pval;
+	int error;
+
+	da = (struct dump_args *)arg;
+
+	tc = da->tc;
+	ta = tc->ta;
 
-	if (tbl->cnt == tbl->size)
+	/* Out of memory, returning */
+	if (da->cnt == da->size)
 		return (1);
-	ent = &tbl->ent[tbl->cnt];
-	ent->tbl = tbl->tbl;
-	if (in_nullhost(n->mask.sin_addr))
-		ent->masklen = 0;
-	else
-		ent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr));
-	ent->addr = n->addr.sin_addr.s_addr;
-	ent->value = n->value;
-	tbl->cnt++;
+	ent = da->ent++;
+	ent->tbl = da->uidx;
+	da->cnt++;
+
+	error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent);
+	if (error != 0)
+		return (error);
+
+	ent->addr = da->tent.k.addr.s_addr;
+	ent->masklen = da->tent.masklen;
+	pval = get_table_value(da->ch, da->tc, da->tent.v.kidx);
+	ent->value = ipfw_export_table_value_legacy(pval);
+
 	return (0);
 }
 
+/*
+ * Dumps table in pre-8.1 legacy format.
+ */
 int
-ipfw_dump_table(struct ip_fw_chain *ch, ipfw_table *tbl)
+ipfw_dump_table_legacy(struct ip_fw_chain *ch, struct tid_info *ti,
+    ipfw_table *tbl)
 {
-	struct radix_node_head *rnh;
+	struct table_config *tc;
+	struct table_algo *ta;
+	struct dump_args da;
 
-	if (tbl->tbl >= V_fw_tables_max)
-		return (EINVAL);
 	tbl->cnt = 0;
-	if ((rnh = ch->tables[tbl->tbl]) == NULL)
+
+	if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL)
+		return (0);	/* XXX: We should return ESRCH */
+
+	ta = tc->ta;
+
+	/* This dump format supports IPv4 only */
+	if (tc->no.subtype != IPFW_TABLE_ADDR)
 		return (0);
-	rnh->rnh_walktree(rnh, dump_table_entry, tbl);
+
+	memset(&da, 0, sizeof(da));
+	da.ch = ch;
+	da.ti = KIDX_TO_TI(ch, tc->no.kidx);
+	da.tc = tc;
+	da.ent = &tbl->ent[0];
+	da.size = tbl->size;
+
+	tbl->cnt = 0;
+	ta->foreach(tc->astate, da.ti, dump_table_entry, &da);
+	tbl->cnt = da.cnt;
+
+	return (0);
+}
+
+/*
+ * Dumps table entry in eXtended format (v1)(current).
+ */
+static int
+dump_table_tentry(void *e, void *arg)
+{
+	struct dump_args *da;
+	struct table_config *tc;
+	struct table_algo *ta;
+	struct table_value *pval;
+	ipfw_obj_tentry *tent;
+	int error;
+
+	da = (struct dump_args *)arg;
+
+	tc = da->tc;
+	ta = tc->ta;
+
+	tent = (ipfw_obj_tentry *)ipfw_get_sopt_space(da->sd, sizeof(*tent));
+	/* Out of memory, returning */
+	if (tent == NULL) {
+		da->error = ENOMEM;
+		return (1);
+	}
+	tent->head.length = sizeof(ipfw_obj_tentry);
+	tent->idx = da->uidx;
+
+	error = ta->dump_tentry(tc->astate, da->ti, e, tent);
+	if (error != 0)
+		return (error);
+
+	pval = get_table_value(da->ch, da->tc, tent->v.kidx);
+	ipfw_export_table_value_v1(pval, &tent->v.value);
+
+	return (0);
+}
+
+/*
+ * Dumps table entry in eXtended format (v0).
+ */
+static int
+dump_table_xentry(void *e, void *arg)
+{
+	struct dump_args *da;
+	struct table_config *tc;
+	struct table_algo *ta;
+	ipfw_table_xentry *xent;
+	ipfw_obj_tentry *tent;
+	struct table_value *pval;
+	int error;
+
+	da = (struct dump_args *)arg;
+
+	tc = da->tc;
+	ta = tc->ta;
+
+	xent = (ipfw_table_xentry *)ipfw_get_sopt_space(da->sd, sizeof(*xent));
+	/* Out of memory, returning */
+	if (xent == NULL)
+		return (1);
+	xent->len = sizeof(ipfw_table_xentry);
+	xent->tbl = da->uidx;
+
+	memset(&da->tent, 0, sizeof(da->tent));
+	tent = &da->tent;
+	error = ta->dump_tentry(tc->astate, da->ti, e, tent);
+	if (error != 0)
+		return (error);
+
+	/* Convert current format to previous one */
+	xent->masklen = tent->masklen;
+	pval = get_table_value(da->ch, da->tc, da->tent.v.kidx);
+	xent->value = ipfw_export_table_value_legacy(pval);
+	/* Apply some hacks */
+	if (tc->no.subtype == IPFW_TABLE_ADDR && tent->subtype == AF_INET) {
+		xent->k.addr6.s6_addr32[3] = tent->k.addr.s_addr;
+		xent->flags = IPFW_TCF_INET;
+	} else
+		memcpy(&xent->k, &tent->k, sizeof(xent->k));
+
 	return (0);
 }
 
+/*
+ * Helper function to export table algo data
+ * to tentry format before calling user function.
+ *
+ * Returns 0 on success.
+ */
 static int
-count_table_xentry(struct radix_node *rn, void *arg)
+prepare_table_tentry(void *e, void *arg)
 {
-	uint32_t * const cnt = arg;
+	struct dump_args *da;
+	struct table_config *tc;
+	struct table_algo *ta;
+	int error;
+
+	da = (struct dump_args *)arg;
+
+	tc = da->tc;
+	ta = tc->ta;
+
+	error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent);
+	if (error != 0)
+		return (error);
+
+	da->f(&da->tent, da->farg);
 
-	(*cnt) += sizeof(ipfw_table_xentry);
 	return (0);
 }
 
+/*
+ * Allow external consumers to read table entries in standard format.
+ */
 int
-ipfw_count_xtable(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt)
+ipfw_foreach_table_tentry(struct ip_fw_chain *ch, uint16_t kidx,
+    ta_foreach_f *f, void *arg)
+{
+	struct namedobj_instance *ni;
+	struct table_config *tc;
+	struct table_algo *ta;
+	struct dump_args da;
+
+	ni = CHAIN_TO_NI(ch);
+
+	tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx);
+	if (tc == NULL)
+		return (ESRCH);
+
+	ta = tc->ta;
+
+	memset(&da, 0, sizeof(da));
+	da.ch = ch;
+	da.ti = KIDX_TO_TI(ch, tc->no.kidx);
+	da.tc = tc;
+	da.f = f;
+	da.farg = arg;
+
+	ta->foreach(tc->astate, da.ti, prepare_table_tentry, &da);
+
+	return (0);
+}
+
+/*
+ * Table algorithms
+ */ 
+
+/*
+ * Finds algorithm by index, table type or supplied name.
+ *
+ * Returns pointer to algo or NULL.
+ */
+static struct table_algo *
+find_table_algo(struct tables_config *tcfg, struct tid_info *ti, char *name)
 {
-	struct radix_node_head *rnh;
+	int i, l;
+	struct table_algo *ta;
+
+	if (ti->type > IPFW_TABLE_MAXTYPE)
+		return (NULL);
+
+	/* Search by index */
+	if (ti->atype != 0) {
+		if (ti->atype > tcfg->algo_count)
+			return (NULL);
+		return (tcfg->algo[ti->atype]);
+	}
+
+	if (name == NULL) {
+		/* Return default algorithm for given type if set */
+		return (tcfg->def_algo[ti->type]);
+	}
+
+	/* Search by name */
+	/* TODO: better search */
+	for (i = 1; i <= tcfg->algo_count; i++) {
+		ta = tcfg->algo[i];
+
+		/*
+		 * One can supply additional algorithm
+		 * parameters so we compare only the first word
+		 * of supplied name:
+		 * 'addr:chash hsize=32'
+		 * '^^^^^^^^^'
+		 *
+		 */
+		l = strlen(ta->name);
+		if (strncmp(name, ta->name, l) != 0)
+			continue;
+		if (name[l] != '\0' && name[l] != ' ')
+			continue;
+		/* Check if we're requesting proper table type */
+		if (ti->type != 0 && ti->type != ta->type)
+			return (NULL);
+		return (ta);
+	}
 
-	if (tbl >= V_fw_tables_max)
+	return (NULL);
+}
+
+/*
+ * Register new table algo @ta.
+ * Stores algo id inside @idx.
+ *
+ * Returns 0 on success.
+ */
+int
+ipfw_add_table_algo(struct ip_fw_chain *ch, struct table_algo *ta, size_t size,
+    int *idx)
+{
+	struct tables_config *tcfg;
+	struct table_algo *ta_new;
+	size_t sz;
+
+	if (size > sizeof(struct table_algo))
 		return (EINVAL);
-	*cnt = 0;
-	if ((rnh = ch->tables[tbl]) != NULL)
-		rnh->rnh_walktree(rnh, count_table_xentry, cnt);
-	if ((rnh = ch->xtables[tbl]) != NULL)
-		rnh->rnh_walktree(rnh, count_table_xentry, cnt);
-	/* Return zero if table is empty */
-	if (*cnt > 0)
-		(*cnt) += sizeof(ipfw_xtable);
+
+	/* Check for the required on-stack size for add/del */
+	sz = roundup2(ta->ta_buf_size, sizeof(void *));
+	if (sz > TA_BUF_SZ)
+		return (EINVAL);
+
+	KASSERT(ta->type <= IPFW_TABLE_MAXTYPE,("Increase IPFW_TABLE_MAXTYPE"));
+
+	/* Copy algorithm data to stable storage. */
+	ta_new = malloc(sizeof(struct table_algo), M_IPFW, M_WAITOK | M_ZERO);
+	memcpy(ta_new, ta, size);
+
+	tcfg = CHAIN_TO_TCFG(ch);
+
+	KASSERT(tcfg->algo_count < 255, ("Increase algo array size"));
+
+	tcfg->algo[++tcfg->algo_count] = ta_new;
+	ta_new->idx = tcfg->algo_count;
+
+	/* Set algorithm as default one for given type */
+	if ((ta_new->flags & TA_FLAG_DEFAULT) != 0 &&
+	    tcfg->def_algo[ta_new->type] == NULL)
+		tcfg->def_algo[ta_new->type] = ta_new;
+
+	*idx = ta_new->idx;
+	
 	return (0);
 }
 
+/*
+ * Unregisters table algo using @idx as id.
+ * XXX: It is NOT safe to call this function in any place
+ * other than ipfw instance destroy handler.
+ */
+void
+ipfw_del_table_algo(struct ip_fw_chain *ch, int idx)
+{
+	struct tables_config *tcfg;
+	struct table_algo *ta;
+
+	tcfg = CHAIN_TO_TCFG(ch);
+
+	KASSERT(idx <= tcfg->algo_count, ("algo idx %d out of range 1..%d",
+	    idx, tcfg->algo_count));
 
+	ta = tcfg->algo[idx];
+	KASSERT(ta != NULL, ("algo idx %d is NULL", idx));
+
+	if (tcfg->def_algo[ta->type] == ta)
+		tcfg->def_algo[ta->type] = NULL;
+
+	free(ta, M_IPFW);
+}
+
+/*
+ * Lists all table algorithms currently available.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
+ * Reply: [ ipfw_obj_lheader ipfw_ta_info x N ]
+ *
+ * Returns 0 on success
+ */
 static int
-dump_table_xentry_base(struct radix_node *rn, void *arg)
+list_table_algo(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
 {
-	struct table_entry * const n = (struct table_entry *)rn;
-	ipfw_xtable * const tbl = arg;
-	ipfw_table_xentry *xent;
+	struct _ipfw_obj_lheader *olh;
+	struct tables_config *tcfg;
+	ipfw_ta_info *i;
+	struct table_algo *ta;
+	uint32_t count, n, size;
+
+	olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
+	if (olh == NULL)
+		return (EINVAL);
+	if (sd->valsize < olh->size)
+		return (EINVAL);
+
+	IPFW_UH_RLOCK(ch);
+	tcfg = CHAIN_TO_TCFG(ch);
+	count = tcfg->algo_count;
+	size = count * sizeof(ipfw_ta_info) + sizeof(ipfw_obj_lheader);
+
+	/* Fill in header regadless of buffer size */
+	olh->count = count;
+	olh->objsize = sizeof(ipfw_ta_info);
+
+	if (size > olh->size) {
+		olh->size = size;
+		IPFW_UH_RUNLOCK(ch);
+		return (ENOMEM);
+	}
+	olh->size = size;
+
+	for (n = 1; n <= count; n++) {
+		i = (ipfw_ta_info *)ipfw_get_sopt_space(sd, sizeof(*i));
+		KASSERT(i != NULL, ("previously checked buffer is not enough"));
+		ta = tcfg->algo[n];
+		strlcpy(i->algoname, ta->name, sizeof(i->algoname));
+		i->type = ta->type;
+		i->refcnt = ta->refcnt;
+	}
+
+	IPFW_UH_RUNLOCK(ch);
 
-	/* Out of memory, returning */
-	if (tbl->cnt == tbl->size)
-		return (1);
-	xent = &tbl->xent[tbl->cnt];
-	xent->len = sizeof(ipfw_table_xentry);
-	xent->tbl = tbl->tbl;
-	if (in_nullhost(n->mask.sin_addr))
-		xent->masklen = 0;
-	else
-		xent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr));
-	/* Save IPv4 address as deprecated IPv6 compatible */
-	xent->k.addr6.s6_addr32[3] = n->addr.sin_addr.s_addr;
-	xent->value = n->value;
-	tbl->cnt++;
 	return (0);
 }
 
 static int
-dump_table_xentry_extended(struct radix_node *rn, void *arg)
+classify_srcdst(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
 {
-	struct table_xentry * const n = (struct table_xentry *)rn;
-	ipfw_xtable * const tbl = arg;
-	ipfw_table_xentry *xent;
-#ifdef INET6
-	int i;
-	uint32_t *v;
-#endif
-	/* Out of memory, returning */
-	if (tbl->cnt == tbl->size)
+	/* Basic IPv4/IPv6 or u32 lookups */
+	*puidx = cmd->arg1;
+	/* Assume ADDR by default */
+	*ptype = IPFW_TABLE_ADDR;
+	int v;
+		
+	if (F_LEN(cmd) > F_INSN_SIZE(ipfw_insn_u32)) {
+		/*
+		 * generic lookup. The key must be
+		 * in 32bit big-endian format.
+		 */
+		v = ((ipfw_insn_u32 *)cmd)->d[1];
+		switch (v) {
+		case 0:
+		case 1:
+			/* IPv4 src/dst */
+			break;
+		case 2:
+		case 3:
+			/* src/dst port */
+			*ptype = IPFW_TABLE_NUMBER;
+			break;
+		case 4:
+			/* uid/gid */
+			*ptype = IPFW_TABLE_NUMBER;
+			break;
+		case 5:
+			/* jid */
+			*ptype = IPFW_TABLE_NUMBER;
+			break;
+		case 6:
+			/* dscp */
+			*ptype = IPFW_TABLE_NUMBER;
+			break;
+		}
+	}
+
+	return (0);
+}
+
+static int
+classify_via(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
+{
+	ipfw_insn_if *cmdif;
+
+	/* Interface table, possibly */
+	cmdif = (ipfw_insn_if *)cmd;
+	if (cmdif->name[0] != '\1')
 		return (1);
-	xent = &tbl->xent[tbl->cnt];
-	xent->len = sizeof(ipfw_table_xentry);
-	xent->tbl = tbl->tbl;
-
-	switch (tbl->type) {
-#ifdef INET6
-	case IPFW_TABLE_CIDR:
-		/* Count IPv6 mask */
-		v = (uint32_t *)&n->m.mask6.sin6_addr;
-		for (i = 0; i < sizeof(struct in6_addr) / 4; i++, v++)
-			xent->masklen += bitcount32(*v);
-		memcpy(&xent->k, &n->a.addr6.sin6_addr, sizeof(struct in6_addr));
-		break;
-#endif
-	case IPFW_TABLE_INTERFACE:
-		/* Assume exact mask */
-		xent->masklen = 8 * IF_NAMESIZE;
-		memcpy(&xent->k, &n->a.iface.ifname, IF_NAMESIZE);
+
+	*ptype = IPFW_TABLE_INTERFACE;
+	*puidx = cmdif->p.kidx;
+
+	return (0);
+}
+
+static int
+classify_flow(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
+{
+
+	*puidx = cmd->arg1;
+	*ptype = IPFW_TABLE_FLOW;
+
+	return (0);
+}
+
+static void
+update_arg1(ipfw_insn *cmd, uint16_t idx)
+{
+
+	cmd->arg1 = idx;
+}
+
+static void
+update_via(ipfw_insn *cmd, uint16_t idx)
+{
+	ipfw_insn_if *cmdif;
+
+	cmdif = (ipfw_insn_if *)cmd;
+	cmdif->p.kidx = idx;
+}
+
+static int
+table_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
+    struct named_object **pno)
+{
+	struct table_config *tc;
+	int error;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+
+	error = find_table_err(CHAIN_TO_NI(ch), ti, &tc);
+	if (error != 0)
+		return (error);
+
+	*pno = &tc->no;
+	return (0);
+}
+
+/* XXX: sets-sets! */
+static struct named_object *
+table_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
+{
+	struct namedobj_instance *ni;
+	struct table_config *tc;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+	ni = CHAIN_TO_NI(ch);
+	tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, idx);
+	KASSERT(tc != NULL, ("Table with index %d not found", idx));
+
+	return (&tc->no);
+}
+
+static int
+table_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
+    enum ipfw_sets_cmd cmd)
+{
+
+	switch (cmd) {
+	case SWAP_ALL:
+	case TEST_ALL:
+		/*
+		 * Return success for TEST_ALL, since nothing prevents
+		 * move rules from one set to another. All tables are
+		 * accessible from all sets when per-set tables sysctl
+		 * is disabled.
+		 */
+	case MOVE_ALL:
+	case TEST_ONE:
+	case MOVE_ONE:
+		/*
+		 * NOTE: we need to use ipfw_objhash_del/ipfw_objhash_add
+		 * if set number will be used in hash function. Currently
+		 * we can just use generic handler that replaces set value.
+		 */
+		if (V_fw_tables_sets == 0)
+			return (0);
 		break;
-	
-	default:
-		/* unknown, skip entry */
+	case COUNT_ONE:
+		/*
+		 * Return EOPNOTSUPP for COUNT_ONE when per-set sysctl is
+		 * disabled. This allow skip table's opcodes from additional
+		 * checks when specific rules moved to another set.
+		 */
+		if (V_fw_tables_sets == 0)
+			return (EOPNOTSUPP);
+	}
+	/* Use generic sets handler when per-set sysctl is enabled. */
+	return (ipfw_obj_manage_sets(CHAIN_TO_NI(ch), IPFW_TLV_TBL_NAME,
+	    set, new_set, cmd));
+}
+
+static struct opcode_obj_rewrite opcodes[] = {
+	{
+		.opcode = O_IP_SRC_LOOKUP,
+		.etlv = IPFW_TLV_TBL_NAME,
+		.classifier = classify_srcdst,
+		.update = update_arg1,
+		.find_byname = table_findbyname,
+		.find_bykidx = table_findbykidx,
+		.create_object = create_table_compat,
+		.manage_sets = table_manage_sets,
+	},
+	{
+		.opcode = O_IP_DST_LOOKUP,
+		.etlv = IPFW_TLV_TBL_NAME,
+		.classifier = classify_srcdst,
+		.update = update_arg1,
+		.find_byname = table_findbyname,
+		.find_bykidx = table_findbykidx,
+		.create_object = create_table_compat,
+		.manage_sets = table_manage_sets,
+	},
+	{
+		.opcode = O_IP_FLOW_LOOKUP,
+		.etlv = IPFW_TLV_TBL_NAME,
+		.classifier = classify_flow,
+		.update = update_arg1,
+		.find_byname = table_findbyname,
+		.find_bykidx = table_findbykidx,
+		.create_object = create_table_compat,
+		.manage_sets = table_manage_sets,
+	},
+	{
+		.opcode = O_XMIT,
+		.etlv = IPFW_TLV_TBL_NAME,
+		.classifier = classify_via,
+		.update = update_via,
+		.find_byname = table_findbyname,
+		.find_bykidx = table_findbykidx,
+		.create_object = create_table_compat,
+		.manage_sets = table_manage_sets,
+	},
+	{
+		.opcode = O_RECV,
+		.etlv = IPFW_TLV_TBL_NAME,
+		.classifier = classify_via,
+		.update = update_via,
+		.find_byname = table_findbyname,
+		.find_bykidx = table_findbykidx,
+		.create_object = create_table_compat,
+		.manage_sets = table_manage_sets,
+	},
+	{
+		.opcode = O_VIA,
+		.etlv = IPFW_TLV_TBL_NAME,
+		.classifier = classify_via,
+		.update = update_via,
+		.find_byname = table_findbyname,
+		.find_bykidx = table_findbykidx,
+		.create_object = create_table_compat,
+		.manage_sets = table_manage_sets,
+	},
+};
+
+static int
+test_sets_cb(struct namedobj_instance *ni __unused, struct named_object *no,
+    void *arg __unused)
+{
+
+	/* Check that there aren't any tables in not default set */
+	if (no->set != 0)
+		return (EBUSY);
+	return (0);
+}
+
+/*
+ * Switch between "set 0" and "rule's set" table binding,
+ * Check all ruleset bindings and permits changing
+ * IFF each binding has both rule AND table in default set (set 0).
+ *
+ * Returns 0 on success.
+ */
+int
+ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int sets)
+{
+	struct opcode_obj_rewrite *rw;
+	struct namedobj_instance *ni;
+	struct named_object *no;
+	struct ip_fw *rule;
+	ipfw_insn *cmd;
+	int cmdlen, i, l;
+	uint16_t kidx;
+	uint8_t subtype;
+
+	IPFW_UH_WLOCK(ch);
+
+	if (V_fw_tables_sets == sets) {
+		IPFW_UH_WUNLOCK(ch);
 		return (0);
 	}
+	ni = CHAIN_TO_NI(ch);
+	if (sets == 0) {
+		/*
+		 * Prevent disabling sets support if we have some tables
+		 * in not default sets.
+		 */
+		if (ipfw_objhash_foreach_type(ni, test_sets_cb,
+		    NULL, IPFW_TLV_TBL_NAME) != 0) {
+			IPFW_UH_WUNLOCK(ch);
+			return (EBUSY);
+		}
+	}
+	/*
+	 * Scan all rules and examine tables opcodes.
+	 */
+	for (i = 0; i < ch->n_rules; i++) {
+		rule = ch->map[i];
+
+		l = rule->cmd_len;
+		cmd = rule->cmd;
+		cmdlen = 0;
+		for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
+			cmdlen = F_LEN(cmd);
+			/* Check only tables opcodes */
+			for (kidx = 0, rw = opcodes;
+			    rw < opcodes + nitems(opcodes); rw++) {
+				if (rw->opcode != cmd->opcode)
+					continue;
+				if (rw->classifier(cmd, &kidx, &subtype) == 0)
+					break;
+			}
+			if (kidx == 0)
+				continue;
+			no = ipfw_objhash_lookup_kidx(ni, kidx);
+			/* Check if both table object and rule has the set 0 */
+			if (no->set != 0 || rule->set != 0) {
+				IPFW_UH_WUNLOCK(ch);
+				return (EBUSY);
+			}
+
+		}
+	}
+	V_fw_tables_sets = sets;
+	IPFW_UH_WUNLOCK(ch);
+	return (0);
+}
+
+/*
+ * Checks table name for validity.
+ * Enforce basic length checks, the rest
+ * should be done in userland.
+ *
+ * Returns 0 if name is considered valid.
+ */
+static int
+check_table_name(const char *name)
+{
+
+	/*
+	 * TODO: do some more complicated checks
+	 */
+	return (ipfw_check_object_name_generic(name));
+}
+
+/*
+ * Finds table config based on either legacy index
+ * or name in ntlv.
+ * Note @ti structure contains unchecked data from userland.
+ *
+ * Returns 0 in success and fills in @tc with found config
+ */
+static int
+find_table_err(struct namedobj_instance *ni, struct tid_info *ti,
+    struct table_config **tc)
+{
+	char *name, bname[16];
+	struct named_object *no;
+	ipfw_obj_ntlv *ntlv;
+	uint32_t set;
+
+	if (ti->tlvs != NULL) {
+		ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx,
+		    IPFW_TLV_TBL_NAME);
+		if (ntlv == NULL)
+			return (EINVAL);
+		name = ntlv->name;
+
+		/*
+		 * Use set provided by @ti instead of @ntlv one.
+		 * This is needed due to different sets behavior
+		 * controlled by V_fw_tables_sets.
+		 */
+		set = (V_fw_tables_sets != 0) ? ti->set : 0;
+	} else {
+		snprintf(bname, sizeof(bname), "%d", ti->uidx);
+		name = bname;
+		set = 0;
+	}
+
+	no = ipfw_objhash_lookup_name(ni, set, name);
+	*tc = (struct table_config *)no;
+
+	return (0);
+}
+
+/*
+ * Finds table config based on either legacy index
+ * or name in ntlv.
+ * Note @ti structure contains unchecked data from userland.
+ *
+ * Returns pointer to table_config or NULL.
+ */
+static struct table_config *
+find_table(struct namedobj_instance *ni, struct tid_info *ti)
+{
+	struct table_config *tc;
+
+	if (find_table_err(ni, ti, &tc) != 0)
+		return (NULL);
+
+	return (tc);
+}
+
+/*
+ * Allocate new table config structure using
+ * specified @algo and @aname.
+ *
+ * Returns pointer to config or NULL.
+ */
+static struct table_config *
+alloc_table_config(struct ip_fw_chain *ch, struct tid_info *ti,
+    struct table_algo *ta, char *aname, uint8_t tflags)
+{
+	char *name, bname[16];
+	struct table_config *tc;
+	int error;
+	ipfw_obj_ntlv *ntlv;
+	uint32_t set;
+
+	if (ti->tlvs != NULL) {
+		ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx,
+		    IPFW_TLV_TBL_NAME);
+		if (ntlv == NULL)
+			return (NULL);
+		name = ntlv->name;
+		set = ntlv->set;
+	} else {
+		/* Compat part: convert number to string representation */
+		snprintf(bname, sizeof(bname), "%d", ti->uidx);
+		name = bname;
+		set = 0;
+	}
+
+	tc = malloc(sizeof(struct table_config), M_IPFW, M_WAITOK | M_ZERO);
+	tc->no.name = tc->tablename;
+	tc->no.subtype = ta->type;
+	tc->no.set = set;
+	tc->tflags = tflags;
+	tc->ta = ta;
+	strlcpy(tc->tablename, name, sizeof(tc->tablename));
+	/* Set "shared" value type by default */
+	tc->vshared = 1;
+
+	/* Preallocate data structures for new tables */
+	error = ta->init(ch, &tc->astate, &tc->ti_copy, aname, tflags);
+	if (error != 0) {
+		free(tc, M_IPFW);
+		return (NULL);
+	}
+	
+	return (tc);
+}
+
+/*
+ * Destroys table state and config.
+ */
+static void
+free_table_config(struct namedobj_instance *ni, struct table_config *tc)
+{
+
+	KASSERT(tc->linked == 0, ("free() on linked config"));
+	/* UH lock MUST NOT be held */
+
+	/*
+	 * We're using ta without any locking/referencing.
+	 * TODO: fix this if we're going to use unloadable algos.
+	 */
+	tc->ta->destroy(tc->astate, &tc->ti_copy);
+	free(tc, M_IPFW);
+}
+
+/*
+ * Links @tc to @chain table named instance.
+ * Sets appropriate type/states in @chain table info.
+ */
+static void
+link_table(struct ip_fw_chain *ch, struct table_config *tc)
+{
+	struct namedobj_instance *ni;
+	struct table_info *ti;
+	uint16_t kidx;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+	IPFW_WLOCK_ASSERT(ch);
+
+	ni = CHAIN_TO_NI(ch);
+	kidx = tc->no.kidx;
+
+	ipfw_objhash_add(ni, &tc->no);
+
+	ti = KIDX_TO_TI(ch, kidx);
+	*ti = tc->ti_copy;
+
+	/* Notify algo on real @ti address */
+	if (tc->ta->change_ti != NULL)
+		tc->ta->change_ti(tc->astate, ti);
+
+	tc->linked = 1;
+	tc->ta->refcnt++;
+}
+
+/*
+ * Unlinks @tc from @chain table named instance.
+ * Zeroes states in @chain and stores them in @tc.
+ */
+static void
+unlink_table(struct ip_fw_chain *ch, struct table_config *tc)
+{
+	struct namedobj_instance *ni;
+	struct table_info *ti;
+	uint16_t kidx;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+	IPFW_WLOCK_ASSERT(ch);
+
+	ni = CHAIN_TO_NI(ch);
+	kidx = tc->no.kidx;
+
+	/* Clear state. @ti copy is already saved inside @tc */
+	ipfw_objhash_del(ni, &tc->no);
+	ti = KIDX_TO_TI(ch, kidx);
+	memset(ti, 0, sizeof(struct table_info));
+	tc->linked = 0;
+	tc->ta->refcnt--;
+
+	/* Notify algo on real @ti address */
+	if (tc->ta->change_ti != NULL)
+		tc->ta->change_ti(tc->astate, NULL);
+}
+
+static struct ipfw_sopt_handler	scodes[] = {
+	{ IP_FW_TABLE_XCREATE,	0,	HDIR_SET,	create_table },
+	{ IP_FW_TABLE_XDESTROY,	0,	HDIR_SET,	flush_table_v0 },
+	{ IP_FW_TABLE_XFLUSH,	0,	HDIR_SET,	flush_table_v0 },
+	{ IP_FW_TABLE_XMODIFY,	0,	HDIR_BOTH,	modify_table },
+	{ IP_FW_TABLE_XINFO,	0,	HDIR_GET,	describe_table },
+	{ IP_FW_TABLES_XLIST,	0,	HDIR_GET,	list_tables },
+	{ IP_FW_TABLE_XLIST,	0,	HDIR_GET,	dump_table_v0 },
+	{ IP_FW_TABLE_XLIST,	1,	HDIR_GET,	dump_table_v1 },
+	{ IP_FW_TABLE_XADD,	0,	HDIR_BOTH,	manage_table_ent_v0 },
+	{ IP_FW_TABLE_XADD,	1,	HDIR_BOTH,	manage_table_ent_v1 },
+	{ IP_FW_TABLE_XDEL,	0,	HDIR_BOTH,	manage_table_ent_v0 },
+	{ IP_FW_TABLE_XDEL,	1,	HDIR_BOTH,	manage_table_ent_v1 },
+	{ IP_FW_TABLE_XFIND,	0,	HDIR_GET,	find_table_entry },
+	{ IP_FW_TABLE_XSWAP,	0,	HDIR_SET,	swap_table },
+	{ IP_FW_TABLES_ALIST,	0,	HDIR_GET,	list_table_algo },
+	{ IP_FW_TABLE_XGETSIZE,	0,	HDIR_GET,	get_table_size },
+};
 
-	xent->value = n->value;
-	tbl->cnt++;
+static int
+destroy_table_locked(struct namedobj_instance *ni, struct named_object *no,
+    void *arg)
+{
+
+	unlink_table((struct ip_fw_chain *)arg, (struct table_config *)no);
+	if (ipfw_objhash_free_idx(ni, no->kidx) != 0)
+		printf("Error unlinking kidx %d from table %s\n",
+		    no->kidx, no->name);
+	free_table_config(ni, (struct table_config *)no);
 	return (0);
 }
 
+/*
+ * Shuts tables module down.
+ */
+void
+ipfw_destroy_tables(struct ip_fw_chain *ch, int last)
+{
+
+	IPFW_DEL_SOPT_HANDLER(last, scodes);
+	IPFW_DEL_OBJ_REWRITER(last, opcodes);
+
+	/* Remove all tables from working set */
+	IPFW_UH_WLOCK(ch);
+	IPFW_WLOCK(ch);
+	ipfw_objhash_foreach(CHAIN_TO_NI(ch), destroy_table_locked, ch);
+	IPFW_WUNLOCK(ch);
+	IPFW_UH_WUNLOCK(ch);
+
+	/* Free pointers itself */
+	free(ch->tablestate, M_IPFW);
+
+	ipfw_table_value_destroy(ch, last);
+	ipfw_table_algo_destroy(ch);
+
+	ipfw_objhash_destroy(CHAIN_TO_NI(ch));
+	free(CHAIN_TO_TCFG(ch), M_IPFW);
+}
+
+/*
+ * Starts tables module.
+ */
 int
-ipfw_dump_xtable(struct ip_fw_chain *ch, ipfw_xtable *tbl)
+ipfw_init_tables(struct ip_fw_chain *ch, int first)
 {
-	struct radix_node_head *rnh;
+	struct tables_config *tcfg;
 
-	if (tbl->tbl >= V_fw_tables_max)
-		return (EINVAL);
-	tbl->cnt = 0;
-	tbl->type = ch->tabletype[tbl->tbl];
-	if ((rnh = ch->tables[tbl->tbl]) != NULL)
-		rnh->rnh_walktree(rnh, dump_table_xentry_base, tbl);
-	if ((rnh = ch->xtables[tbl->tbl]) != NULL)
-		rnh->rnh_walktree(rnh, dump_table_xentry_extended, tbl);
+	/* Allocate pointers */
+	ch->tablestate = malloc(V_fw_tables_max * sizeof(struct table_info),
+	    M_IPFW, M_WAITOK | M_ZERO);
+
+	tcfg = malloc(sizeof(struct tables_config), M_IPFW, M_WAITOK | M_ZERO);
+	tcfg->namehash = ipfw_objhash_create(V_fw_tables_max);
+	ch->tblcfg = tcfg;
+
+	ipfw_table_value_init(ch, first);
+	ipfw_table_algo_init(ch);
+
+	IPFW_ADD_OBJ_REWRITER(first, opcodes);
+	IPFW_ADD_SOPT_HANDLER(first, scodes);
 	return (0);
 }
 
-/* end of file */
+
+
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_table.h b/freebsd/sys/netpfil/ipfw/ip_fw_table.h
new file mode 100644
index 00000000..d6578482
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_table.h
@@ -0,0 +1,234 @@
+/*-
+ * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IPFW2_TABLE_H
+#define _IPFW2_TABLE_H
+
+/*
+ * Internal constants and data structures used by ipfw tables
+ * not meant to be exported outside the kernel.
+ */
+#ifdef _KERNEL
+
+struct table_algo;
+struct tables_config {
+	struct namedobj_instance	*namehash;
+	struct namedobj_instance	*valhash;
+	uint32_t			val_size;
+	uint32_t			algo_count;
+	struct table_algo 		*algo[256];
+	struct table_algo		*def_algo[IPFW_TABLE_MAXTYPE + 1];
+	TAILQ_HEAD(op_state_l,op_state)	state_list;
+};
+#define	CHAIN_TO_TCFG(chain)	((struct tables_config *)(chain)->tblcfg)
+
+struct table_info {
+	table_lookup_t	*lookup;	/* Lookup function */
+	void		*state;		/* Lookup radix/other structure */
+	void		*xstate;	/* eXtended state */
+	u_long		data;		/* Hints for given func */
+};
+
+struct table_value;
+struct tentry_info {
+	void		*paddr;
+	struct table_value	*pvalue;
+	void		*ptv;		/* Temporary field to hold obj	*/		
+	uint8_t		masklen;	/* mask length			*/
+	uint8_t		subtype;
+	uint16_t	flags;		/* record flags			*/
+	uint32_t	value;		/* value index			*/
+};
+#define	TEI_FLAGS_UPDATE	0x0001	/* Add or update rec if exists	*/
+#define	TEI_FLAGS_UPDATED	0x0002	/* Entry has been updated	*/
+#define	TEI_FLAGS_COMPAT	0x0004	/* Called from old ABI		*/
+#define	TEI_FLAGS_DONTADD	0x0008	/* Do not create new rec	*/
+#define	TEI_FLAGS_ADDED		0x0010	/* Entry was added		*/
+#define	TEI_FLAGS_DELETED	0x0020	/* Entry was deleted		*/
+#define	TEI_FLAGS_LIMIT		0x0040	/* Limit was hit		*/
+#define	TEI_FLAGS_ERROR		0x0080	/* Unknown request error	*/
+#define	TEI_FLAGS_NOTFOUND	0x0100	/* Entry was not found		*/
+#define	TEI_FLAGS_EXISTS	0x0200	/* Entry already exists		*/
+
+typedef int (ta_init)(struct ip_fw_chain *ch, void **ta_state,
+    struct table_info *ti, char *data, uint8_t tflags);
+typedef void (ta_destroy)(void *ta_state, struct table_info *ti);
+typedef int (ta_prepare_add)(struct ip_fw_chain *ch, struct tentry_info *tei,
+    void *ta_buf);
+typedef int (ta_prepare_del)(struct ip_fw_chain *ch, struct tentry_info *tei,
+    void *ta_buf);
+typedef int (ta_add)(void *ta_state, struct table_info *ti,
+    struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
+typedef int (ta_del)(void *ta_state, struct table_info *ti,
+    struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
+typedef void (ta_flush_entry)(struct ip_fw_chain *ch, struct tentry_info *tei,
+    void *ta_buf);
+
+typedef int (ta_need_modify)(void *ta_state, struct table_info *ti,
+    uint32_t count, uint64_t *pflags);
+typedef int (ta_prepare_mod)(void *ta_buf, uint64_t *pflags);
+typedef int (ta_fill_mod)(void *ta_state, struct table_info *ti,
+    void *ta_buf, uint64_t *pflags);
+typedef void (ta_modify)(void *ta_state, struct table_info *ti,
+    void *ta_buf, uint64_t pflags);
+typedef void (ta_flush_mod)(void *ta_buf);
+
+typedef void (ta_change_ti)(void *ta_state, struct table_info *ti);
+typedef void (ta_print_config)(void *ta_state, struct table_info *ti, char *buf,
+    size_t bufsize);
+
+typedef int ta_foreach_f(void *node, void *arg);
+typedef void ta_foreach(void *ta_state, struct table_info *ti, ta_foreach_f *f,
+  void *arg);
+typedef int ta_dump_tentry(void *ta_state, struct table_info *ti, void *e,
+    ipfw_obj_tentry *tent);
+typedef int ta_find_tentry(void *ta_state, struct table_info *ti,
+    ipfw_obj_tentry *tent);
+typedef void ta_dump_tinfo(void *ta_state, struct table_info *ti, 
+    ipfw_ta_tinfo *tinfo);
+typedef uint32_t ta_get_count(void *ta_state, struct table_info *ti);
+
+struct table_algo {
+	char		name[16];
+	uint32_t	idx;
+	uint32_t	type;
+	uint32_t	refcnt;
+	uint32_t	flags;
+	uint32_t	vlimit;
+	size_t		ta_buf_size;
+	ta_init		*init;
+	ta_destroy	*destroy;
+	ta_prepare_add	*prepare_add;
+	ta_prepare_del	*prepare_del;
+	ta_add		*add;
+	ta_del		*del;
+	ta_flush_entry	*flush_entry;
+	ta_find_tentry	*find_tentry;
+	ta_need_modify	*need_modify;
+	ta_prepare_mod	*prepare_mod;
+	ta_fill_mod	*fill_mod;
+	ta_modify	*modify;
+	ta_flush_mod	*flush_mod;
+	ta_change_ti	*change_ti;
+	ta_foreach	*foreach;
+	ta_dump_tentry	*dump_tentry;
+	ta_print_config	*print_config;
+	ta_dump_tinfo	*dump_tinfo;
+	ta_get_count	*get_count;
+};
+#define	TA_FLAG_DEFAULT		0x01	/* Algo is default for given type */
+#define	TA_FLAG_READONLY	0x02	/* Algo does not support modifications*/
+#define	TA_FLAG_EXTCOUNTER	0x04	/* Algo has external counter available*/
+
+int ipfw_add_table_algo(struct ip_fw_chain *ch, struct table_algo *ta,
+    size_t size, int *idx);
+void ipfw_del_table_algo(struct ip_fw_chain *ch, int idx);
+
+void ipfw_table_algo_init(struct ip_fw_chain *chain);
+void ipfw_table_algo_destroy(struct ip_fw_chain *chain);
+
+MALLOC_DECLARE(M_IPFW_TBL);
+/* Exported to support legacy opcodes */
+int add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti,
+    struct tentry_info *tei, uint8_t flags, uint32_t count);
+int del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti,
+    struct tentry_info *tei, uint8_t flags, uint32_t count);
+int flush_table(struct ip_fw_chain *ch, struct tid_info *ti);
+void ipfw_import_table_value_legacy(uint32_t value, struct table_value *v);
+uint32_t ipfw_export_table_value_legacy(struct table_value *v);
+int ipfw_get_table_size(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+    struct sockopt_data *sd);
+
+/* ipfw_table_value.c functions */
+struct table_config;
+struct tableop_state;
+void ipfw_table_value_init(struct ip_fw_chain *ch, int first);
+void ipfw_table_value_destroy(struct ip_fw_chain *ch, int last);
+int ipfw_link_table_values(struct ip_fw_chain *ch, struct tableop_state *ts);
+void ipfw_garbage_table_values(struct ip_fw_chain *ch, struct table_config *tc,
+    struct tentry_info *tei, uint32_t count, int rollback);
+void ipfw_import_table_value_v1(ipfw_table_value *iv);
+void ipfw_export_table_value_v1(struct table_value *v, ipfw_table_value *iv);
+void ipfw_unref_table_values(struct ip_fw_chain *ch, struct table_config *tc,
+    struct table_algo *ta, void *astate, struct table_info *ti);
+void rollback_table_values(struct tableop_state *ts);
+
+int ipfw_rewrite_table_uidx(struct ip_fw_chain *chain,
+    struct rule_check_info *ci);
+int ipfw_mark_table_kidx(struct ip_fw_chain *chain, struct ip_fw *rule,
+    uint32_t *bmask);
+int ipfw_export_table_ntlv(struct ip_fw_chain *ch, uint16_t kidx,
+    struct sockopt_data *sd);
+void ipfw_unref_rule_tables(struct ip_fw_chain *chain, struct ip_fw *rule);
+struct namedobj_instance *ipfw_get_table_objhash(struct ip_fw_chain *ch);
+
+/* utility functions  */
+int ipfw_move_tables_sets(struct ip_fw_chain *ch, ipfw_range_tlv *rt,
+    uint32_t new_set);
+void ipfw_swap_tables_sets(struct ip_fw_chain *ch, uint32_t old_set,
+    uint32_t new_set, int mv);
+int ipfw_foreach_table_tentry(struct ip_fw_chain *ch, uint16_t kidx,
+    ta_foreach_f f, void *arg);
+
+/* internal functions */
+void tc_ref(struct table_config *tc);
+void tc_unref(struct table_config *tc);
+
+struct op_state;
+typedef void (op_rollback_f)(void *object, struct op_state *state);
+struct op_state {
+	TAILQ_ENTRY(op_state)	next;	/* chain link */
+	op_rollback_f		*func;
+};
+
+struct tableop_state {
+	struct op_state	opstate;
+	struct ip_fw_chain *ch;
+	struct table_config *tc;
+	struct table_algo *ta;
+	struct tentry_info *tei;
+	uint32_t count;
+	uint32_t vmask;
+	int vshared;
+	int modified;
+};
+
+void add_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts);
+void del_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts);
+void rollback_toperation_state(struct ip_fw_chain *ch, void *object);
+
+/* Legacy interfaces */
+int ipfw_count_table(struct ip_fw_chain *ch, struct tid_info *ti,
+    uint32_t *cnt);
+int ipfw_count_xtable(struct ip_fw_chain *ch, struct tid_info *ti,
+    uint32_t *cnt);
+int ipfw_dump_table_legacy(struct ip_fw_chain *ch, struct tid_info *ti,
+    ipfw_table *tbl);
+
+
+#endif /* _KERNEL */
+#endif /* _IPFW2_TABLE_H */
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_table_algo.c b/freebsd/sys/netpfil/ipfw/ip_fw_table_algo.c
new file mode 100644
index 00000000..e4c82131
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_table_algo.c
@@ -0,0 +1,4112 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2014 Yandex LLC
+ * Copyright (c) 2014 Alexander V. Chernikov
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * Lookup table algorithms.
+ *
+ */
+
+#include <rtems/bsd/local/opt_ipfw.h>
+#include <rtems/bsd/local/opt_inet.h>
+#ifndef INET
+#error IPFIREWALL requires INET.
+#endif /* INET */
+#include <rtems/bsd/local/opt_inet6.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/rmlock.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+#include <net/if.h>	/* ip_fw.h requires IFNAMSIZ */
+#include <net/radix.h>
+#include <net/route.h>
+#include <net/route_var.h>
+
+#include <netinet/in.h>
+#include <netinet/in_fib.h>
+#include <netinet/ip_var.h>	/* struct ipfw_rule_ref */
+#include <netinet/ip_fw.h>
+#include <netinet6/in6_fib.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/ip_fw_table.h>
+
+
+/*
+ * IPFW table lookup algorithms.
+ *
+ * What is needed to add another table algo?
+ *
+ * Algo init:
+ * * struct table_algo has to be filled with:
+ *   name: "type:algoname" format, e.g. "addr:radix". Currently
+ *     there are the following types: "addr", "iface", "number" and "flow".
+ *   type: one of IPFW_TABLE_* types
+ *   flags: one or more TA_FLAGS_*
+ *   ta_buf_size: size of structure used to store add/del item state.
+ *     Needs to be less than TA_BUF_SZ.
+ *   callbacks: see below for description.
+ * * ipfw_add_table_algo / ipfw_del_table_algo has to be called
+ *
+ * Callbacks description:
+ *
+ * -init: request to initialize new table instance.
+ * typedef int (ta_init)(struct ip_fw_chain *ch, void **ta_state,
+ *     struct table_info *ti, char *data, uint8_t tflags);
+ * MANDATORY, unlocked. (M_WAITOK). Returns 0 on success.
+ *
+ *  Allocate all structures needed for normal operations.
+ *  * Caller may want to parse @data for some algo-specific
+ *    options provided by userland.
+ *  * Caller may want to save configuration state pointer to @ta_state
+ *  * Caller needs to save desired runtime structure pointer(s)
+ *    inside @ti fields. Note that it is not correct to save
+ *    @ti pointer at this moment. Use -change_ti hook for that.
+ *  * Caller has to fill in ti->lookup to appropriate function
+ *    pointer.
+ *
+ *
+ *
+ * -destroy: request to destroy table instance.
+ * typedef void (ta_destroy)(void *ta_state, struct table_info *ti);
+ * MANDATORY, unlocked. (M_WAITOK).
+ *
+ * Frees all table entries and all tables structures allocated by -init.
+ *
+ *
+ *
+ * -prepare_add: request to allocate state for adding new entry.
+ * typedef int (ta_prepare_add)(struct ip_fw_chain *ch, struct tentry_info *tei,
+ *     void *ta_buf);
+ * MANDATORY, unlocked. (M_WAITOK). Returns 0 on success.
+ *
+ * Allocates state and fills it in with all necessary data (EXCEPT value)
+ * from @tei to minimize operations needed to be done under WLOCK.
+ * "value" field has to be copied to new entry in @add callback.
+ * Buffer ta_buf of size ta->ta_buf_sz may be used to store
+ * allocated state.
+ *
+ *
+ *
+ * -prepare_del: request to set state for deleting existing entry.
+ * typedef int (ta_prepare_del)(struct ip_fw_chain *ch, struct tentry_info *tei,
+ *     void *ta_buf);
+ * MANDATORY, locked, UH. (M_NOWAIT). Returns 0 on success.
+ *
+ * Buffer ta_buf of size ta->ta_buf_sz may be used to store
+ * allocated state. Caller should use on-stack ta_buf allocation
+ * instead of doing malloc().
+ *
+ *
+ *
+ * -add: request to insert new entry into runtime/config structures.
+ *  typedef int (ta_add)(void *ta_state, struct table_info *ti,
+ *     struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
+ * MANDATORY, UH+WLOCK. (M_NOWAIT). Returns 0 on success.
+ *
+ * Insert new entry using previously-allocated state in @ta_buf.
+ * * @tei may have the following flags:
+ *   TEI_FLAGS_UPDATE: request to add or update entry.
+ *   TEI_FLAGS_DONTADD: request to update (but not add) entry.
+ * * Caller is required to do the following:
+ *   copy real entry value from @tei
+ *   entry added: return 0, set 1 to @pnum
+ *   entry updated: return 0, store 0 to @pnum, store old value in @tei,
+ *     add TEI_FLAGS_UPDATED flag to @tei.
+ *   entry exists: return EEXIST
+ *   entry not found: return ENOENT
+ *   other error: return non-zero error code.
+ *
+ *
+ *
+ * -del: request to delete existing entry from runtime/config structures.
+ *  typedef int (ta_del)(void *ta_state, struct table_info *ti,
+ *     struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
+ *  MANDATORY, UH+WLOCK. (M_NOWAIT). Returns 0 on success.
+ *
+ *  Delete entry using previously set up in @ta_buf.
+ * * Caller is required to do the following:
+ *   entry deleted: return 0, set 1 to @pnum, store old value in @tei.
+ *   entry not found: return ENOENT
+ *   other error: return non-zero error code.
+ *
+ *
+ *
+ * -flush_entry: flush entry state created by -prepare_add / -del / others
+ *  typedef void (ta_flush_entry)(struct ip_fw_chain *ch,
+ *      struct tentry_info *tei, void *ta_buf);
+ *  MANDATORY, may be locked. (M_NOWAIT).
+ *
+ *  Delete state allocated by:
+ *  -prepare_add (-add returned EEXIST|UPDATED)
+ *  -prepare_del (if any)
+ *  -del
+ *  * Caller is required to handle empty @ta_buf correctly.
+ *
+ *
+ * -find_tentry: finds entry specified by key @tei
+ *  typedef int ta_find_tentry(void *ta_state, struct table_info *ti,
+ *      ipfw_obj_tentry *tent);
+ *  OPTIONAL, locked (UH). (M_NOWAIT). Returns 0 on success.
+ *
+ *  Finds entry specified by given key.
+ *  * Caller is required to do the following:
+ *    entry found: returns 0, export entry to @tent
+ *    entry not found: returns ENOENT
+ *
+ *
+ * -need_modify: checks if @ti has enough space to hold another @count items.
+ *  typedef int (ta_need_modify)(void *ta_state, struct table_info *ti,
+ *      uint32_t count, uint64_t *pflags);
+ *  OPTIONAL, locked (UH). (M_NOWAIT). Returns 0 if has.
+ *
+ *  Checks if given table has enough space to add @count items without
+ *  resize. Caller may use @pflags to store desired modification data.
+ *
+ *
+ *
+ * -prepare_mod: allocate structures for table modification.
+ *  typedef int (ta_prepare_mod)(void *ta_buf, uint64_t *pflags);
+ * OPTIONAL(need_modify), unlocked. (M_WAITOK). Returns 0 on success.
+ *
+ * Allocate all needed state for table modification. Caller
+ * should use `struct mod_item` to store new state in @ta_buf.
+ * Up to TA_BUF_SZ (128 bytes) can be stored in @ta_buf.
+ * 
+ *
+ *
+ * -fill_mod: copy some data to new state/
+ *  typedef int (ta_fill_mod)(void *ta_state, struct table_info *ti,
+ *      void *ta_buf, uint64_t *pflags);
+ * OPTIONAL(need_modify), locked (UH). (M_NOWAIT). Returns 0 on success.
+ *
+ * Copy as much data as we can to minimize changes under WLOCK.
+ * For example, array can be merged inside this callback.
+ *
+ *
+ *
+ * -modify: perform final modification.
+ *  typedef void (ta_modify)(void *ta_state, struct table_info *ti,
+ *      void *ta_buf, uint64_t pflags);
+ * OPTIONAL(need_modify), locked (UH+WLOCK). (M_NOWAIT). 
+ *
+ * Performs all changes necessary to switch to new structures.
+ * * Caller should save old pointers to @ta_buf storage.
+ *
+ *
+ *
+ * -flush_mod: flush table modification state.
+ *  typedef void (ta_flush_mod)(void *ta_buf);
+ * OPTIONAL(need_modify), unlocked. (M_WAITOK).
+ *
+ * Performs flush for the following:
+ *   - prepare_mod (modification was not necessary)
+ *   - modify (for the old state)
+ *
+ *
+ *
+ * -change_gi: monitor table info pointer changes
+ * typedef void (ta_change_ti)(void *ta_state, struct table_info *ti);
+ * OPTIONAL, locked (UH). (M_NOWAIT).
+ *
+ * Called on @ti pointer changed. Called immediately after -init
+ * to set initial state.
+ *
+ *
+ *
+ * -foreach: calls @f for each table entry
+ *  typedef void ta_foreach(void *ta_state, struct table_info *ti,
+ *      ta_foreach_f *f, void *arg);
+ * MANDATORY, locked(UH). (M_NOWAIT).
+ *
+ * Runs callback with specified argument for each table entry,
+ * Typically used for dumping table entries.
+ *
+ *
+ *
+ * -dump_tentry: dump table entry in current @tentry format.
+ *  typedef int ta_dump_tentry(void *ta_state, struct table_info *ti, void *e,
+ *      ipfw_obj_tentry *tent);
+ * MANDATORY, locked(UH). (M_NOWAIT). Returns 0 on success.
+ *
+ * Dumps entry @e to @tent.
+ *
+ *
+ * -print_config: prints custom algorithm options into buffer.
+ *  typedef void (ta_print_config)(void *ta_state, struct table_info *ti,
+ *      char *buf, size_t bufsize);
+ * OPTIONAL. locked(UH). (M_NOWAIT).
+ *
+ * Prints custom algorithm options in the format suitable to pass
+ * back to -init callback.
+ *
+ *
+ *
+ * -dump_tinfo: dumps algo-specific info.
+ *  typedef void ta_dump_tinfo(void *ta_state, struct table_info *ti,
+ *      ipfw_ta_tinfo *tinfo);
+ * OPTIONAL. locked(UH). (M_NOWAIT).
+ *
+ * Dumps options like items size/hash size, etc.
+ */
+
+MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables");
+
+/*
+ * Utility structures/functions common to more than one algo
+ */
+
+struct mod_item {
+	void	*main_ptr;
+	size_t	size;
+	void	*main_ptr6;
+	size_t	size6;
+};
+
+static int badd(const void *key, void *item, void *base, size_t nmemb,
+    size_t size, int (*compar) (const void *, const void *));
+static int bdel(const void *key, void *base, size_t nmemb, size_t size,
+    int (*compar) (const void *, const void *));
+
+
+/*
+ * ADDR implementation using radix
+ *
+ */
+
+/*
+ * The radix code expects addr and mask to be array of bytes,
+ * with the first byte being the length of the array. rn_inithead
+ * is called with the offset in bits of the lookup key within the
+ * array. If we use a sockaddr_in as the underlying type,
+ * sin_len is conveniently located at offset 0, sin_addr is at
+ * offset 4 and normally aligned.
+ * But for portability, let's avoid assumption and make the code explicit
+ */
+#define KEY_LEN(v)	*((uint8_t *)&(v))
+/*
+ * Do not require radix to compare more than actual IPv4/IPv6 address
+ */
+#define KEY_LEN_INET	(offsetof(struct sockaddr_in, sin_addr) + sizeof(in_addr_t))
+#define KEY_LEN_INET6	(offsetof(struct sa_in6, sin6_addr) + sizeof(struct in6_addr))
+
+#define OFF_LEN_INET	(8 * offsetof(struct sockaddr_in, sin_addr))
+#define OFF_LEN_INET6	(8 * offsetof(struct sa_in6, sin6_addr))
+
+struct radix_addr_entry {
+	struct radix_node	rn[2];
+	struct sockaddr_in	addr;
+	uint32_t		value;
+	uint8_t			masklen;
+};
+
+struct sa_in6 {
+	uint8_t			sin6_len;
+	uint8_t			sin6_family;
+	uint8_t			pad[2];
+	struct in6_addr		sin6_addr;
+};
+
+struct radix_addr_xentry {
+	struct radix_node	rn[2];
+	struct sa_in6		addr6;
+	uint32_t		value;
+	uint8_t			masklen;
+};
+
+struct radix_cfg {
+	struct radix_node_head	*head4;
+	struct radix_node_head	*head6;
+	size_t			count4;
+	size_t			count6;
+};
+
+struct ta_buf_radix
+{
+	void *ent_ptr;
+	struct sockaddr	*addr_ptr;
+	struct sockaddr	*mask_ptr;
+	union {
+		struct {
+			struct sockaddr_in sa;
+			struct sockaddr_in ma;
+		} a4;
+		struct {
+			struct sa_in6 sa;
+			struct sa_in6 ma;
+		} a6;
+	} addr;
+};
+
+static int ta_lookup_radix(struct table_info *ti, void *key, uint32_t keylen,
+    uint32_t *val);
+static int ta_init_radix(struct ip_fw_chain *ch, void **ta_state,
+    struct table_info *ti, char *data, uint8_t tflags);
+static int flush_radix_entry(struct radix_node *rn, void *arg);
+static void ta_destroy_radix(void *ta_state, struct table_info *ti);
+static void ta_dump_radix_tinfo(void *ta_state, struct table_info *ti,
+    ipfw_ta_tinfo *tinfo);
+static int ta_dump_radix_tentry(void *ta_state, struct table_info *ti,
+    void *e, ipfw_obj_tentry *tent);
+static int ta_find_radix_tentry(void *ta_state, struct table_info *ti,
+    ipfw_obj_tentry *tent);
+static void ta_foreach_radix(void *ta_state, struct table_info *ti,
+    ta_foreach_f *f, void *arg);
+static void tei_to_sockaddr_ent(struct tentry_info *tei, struct sockaddr *sa,
+    struct sockaddr *ma, int *set_mask);
+static int ta_prepare_add_radix(struct ip_fw_chain *ch, struct tentry_info *tei,
+    void *ta_buf);
+static int ta_add_radix(void *ta_state, struct table_info *ti,
+    struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
+static int ta_prepare_del_radix(struct ip_fw_chain *ch, struct tentry_info *tei,
+    void *ta_buf);
+static int ta_del_radix(void *ta_state, struct table_info *ti,
+    struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
+static void ta_flush_radix_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
+    void *ta_buf);
+static int ta_need_modify_radix(void *ta_state, struct table_info *ti,
+    uint32_t count, uint64_t *pflags);
+
+static int
+ta_lookup_radix(struct table_info *ti, void *key, uint32_t keylen,
+    uint32_t *val)
+{
+	struct radix_node_head *rnh;
+
+	if (keylen == sizeof(in_addr_t)) {
+		struct radix_addr_entry *ent;
+		struct sockaddr_in sa;
+		KEY_LEN(sa) = KEY_LEN_INET;
+		sa.sin_addr.s_addr = *((in_addr_t *)key);
+		rnh = (struct radix_node_head *)ti->state;
+		ent = (struct radix_addr_entry *)(rnh->rnh_matchaddr(&sa, &rnh->rh));
+		if (ent != NULL) {
+			*val = ent->value;
+			return (1);
+		}
+	} else {
+		struct radix_addr_xentry *xent;
+		struct sa_in6 sa6;
+		KEY_LEN(sa6) = KEY_LEN_INET6;
+		memcpy(&sa6.sin6_addr, key, sizeof(struct in6_addr));
+		rnh = (struct radix_node_head *)ti->xstate;
+		xent = (struct radix_addr_xentry *)(rnh->rnh_matchaddr(&sa6, &rnh->rh));
+		if (xent != NULL) {
+			*val = xent->value;
+			return (1);
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * New table
+ */
+static int
+ta_init_radix(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
+    char *data, uint8_t tflags)
+{
+	struct radix_cfg *cfg;
+
+	if (!rn_inithead(&ti->state, OFF_LEN_INET))
+		return (ENOMEM);
+	if (!rn_inithead(&ti->xstate, OFF_LEN_INET6)) {
+		rn_detachhead(&ti->state);
+		return (ENOMEM);
+	}
+
+	cfg = malloc(sizeof(struct radix_cfg), M_IPFW, M_WAITOK | M_ZERO);
+
+	*ta_state = cfg;
+	ti->lookup = ta_lookup_radix;
+
+	return (0);
+}
+
+static int
+flush_radix_entry(struct radix_node *rn, void *arg)
+{
+	struct radix_node_head * const rnh = arg;
+	struct radix_addr_entry *ent;
+
+	ent = (struct radix_addr_entry *)
+	    rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, &rnh->rh);
+	if (ent != NULL)
+		free(ent, M_IPFW_TBL);
+	return (0);
+}
+
+static void
+ta_destroy_radix(void *ta_state, struct table_info *ti)
+{
+	struct radix_cfg *cfg;
+	struct radix_node_head *rnh;
+
+	cfg = (struct radix_cfg *)ta_state;
+
+	rnh = (struct radix_node_head *)(ti->state);
+	rnh->rnh_walktree(&rnh->rh, flush_radix_entry, rnh);
+	rn_detachhead(&ti->state);
+
+	rnh = (struct radix_node_head *)(ti->xstate);
+	rnh->rnh_walktree(&rnh->rh, flush_radix_entry, rnh);
+	rn_detachhead(&ti->xstate);
+
+	free(cfg, M_IPFW);
+}
+
+/*
+ * Provide algo-specific table info
+ */
+static void
+ta_dump_radix_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
+{
+	struct radix_cfg *cfg;
+
+	cfg = (struct radix_cfg *)ta_state;
+
+	tinfo->flags = IPFW_TATFLAGS_AFDATA | IPFW_TATFLAGS_AFITEM;
+	tinfo->taclass4 = IPFW_TACLASS_RADIX;
+	tinfo->count4 = cfg->count4;
+	tinfo->itemsize4 = sizeof(struct radix_addr_entry);
+	tinfo->taclass6 = IPFW_TACLASS_RADIX;
+	tinfo->count6 = cfg->count6;
+	tinfo->itemsize6 = sizeof(struct radix_addr_xentry);
+}
+
+static int
+ta_dump_radix_tentry(void *ta_state, struct table_info *ti, void *e,
+    ipfw_obj_tentry *tent)
+{
+	struct radix_addr_entry *n;
+#ifdef INET6
+	struct radix_addr_xentry *xn;
+#endif
+
+	n = (struct radix_addr_entry *)e;
+
+	/* Guess IPv4/IPv6 radix by sockaddr family */
+	if (n->addr.sin_family == AF_INET) {
+		tent->k.addr.s_addr = n->addr.sin_addr.s_addr;
+		tent->masklen = n->masklen;
+		tent->subtype = AF_INET;
+		tent->v.kidx = n->value;
+#ifdef INET6
+	} else {
+		xn = (struct radix_addr_xentry *)e;
+		memcpy(&tent->k, &xn->addr6.sin6_addr, sizeof(struct in6_addr));
+		tent->masklen = xn->masklen;
+		tent->subtype = AF_INET6;
+		tent->v.kidx = xn->value;
+#endif
+	}
+
+	return (0);
+}
+
+static int
+ta_find_radix_tentry(void *ta_state, struct table_info *ti,
+    ipfw_obj_tentry *tent)
+{
+	struct radix_node_head *rnh;
+	void *e;
+
+	e = NULL;
+	if (tent->subtype == AF_INET) {
+		struct sockaddr_in sa;
+		KEY_LEN(sa) = KEY_LEN_INET;
+		sa.sin_addr.s_addr = tent->k.addr.s_addr;
+		rnh = (struct radix_node_head *)ti->state;
+		e = rnh->rnh_matchaddr(&sa, &rnh->rh);
+	} else {
+		struct sa_in6 sa6;
+		KEY_LEN(sa6) = KEY_LEN_INET6;
+		memcpy(&sa6.sin6_addr, &tent->k.addr6, sizeof(struct in6_addr));
+		rnh = (struct radix_node_head *)ti->xstate;
+		e = rnh->rnh_matchaddr(&sa6, &rnh->rh);
+	}
+
+	if (e != NULL) {
+		ta_dump_radix_tentry(ta_state, ti, e, tent);
+		return (0);
+	}
+
+	return (ENOENT);
+}
+
+static void
+ta_foreach_radix(void *ta_state, struct table_info *ti, ta_foreach_f *f,
+    void *arg)
+{
+	struct radix_node_head *rnh;
+
+	rnh = (struct radix_node_head *)(ti->state);
+	rnh->rnh_walktree(&rnh->rh, (walktree_f_t *)f, arg);
+
+	rnh = (struct radix_node_head *)(ti->xstate);
+	rnh->rnh_walktree(&rnh->rh, (walktree_f_t *)f, arg);
+}
+
+
+#ifdef INET6
+static inline void ipv6_writemask(struct in6_addr *addr6, uint8_t mask);
+
+static inline void
+ipv6_writemask(struct in6_addr *addr6, uint8_t mask)
+{
+	uint32_t *cp;
+
+	for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32)
+		*cp++ = 0xFFFFFFFF;
+	if (mask > 0)
+		*cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0);
+}
+#endif
+
+static void
+tei_to_sockaddr_ent(struct tentry_info *tei, struct sockaddr *sa,
+    struct sockaddr *ma, int *set_mask)
+{
+	int mlen;
+#ifdef INET
+	struct sockaddr_in *addr, *mask;
+#endif
+#ifdef INET6
+	struct sa_in6 *addr6, *mask6;
+#endif
+	in_addr_t a4;
+
+	mlen = tei->masklen;
+
+	if (tei->subtype == AF_INET) {
+#ifdef INET
+		addr = (struct sockaddr_in *)sa;
+		mask = (struct sockaddr_in *)ma;
+		/* Set 'total' structure length */
+		KEY_LEN(*addr) = KEY_LEN_INET;
+		KEY_LEN(*mask) = KEY_LEN_INET;
+		addr->sin_family = AF_INET;
+		mask->sin_addr.s_addr =
+		    htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
+		a4 = *((in_addr_t *)tei->paddr);
+		addr->sin_addr.s_addr = a4 & mask->sin_addr.s_addr;
+		if (mlen != 32)
+			*set_mask = 1;
+		else
+			*set_mask = 0;
+#endif
+#ifdef INET6
+	} else if (tei->subtype == AF_INET6) {
+		/* IPv6 case */
+		addr6 = (struct sa_in6 *)sa;
+		mask6 = (struct sa_in6 *)ma;
+		/* Set 'total' structure length */
+		KEY_LEN(*addr6) = KEY_LEN_INET6;
+		KEY_LEN(*mask6) = KEY_LEN_INET6;
+		addr6->sin6_family = AF_INET6;
+		ipv6_writemask(&mask6->sin6_addr, mlen);
+		memcpy(&addr6->sin6_addr, tei->paddr, sizeof(struct in6_addr));
+		APPLY_MASK(&addr6->sin6_addr, &mask6->sin6_addr);
+		if (mlen != 128)
+			*set_mask = 1;
+		else
+			*set_mask = 0;
+#endif
+	}
+}
+
+static int
+ta_prepare_add_radix(struct ip_fw_chain *ch, struct tentry_info *tei,
+    void *ta_buf)
+{
+	struct ta_buf_radix *tb;
+	struct radix_addr_entry *ent;
+#ifdef INET6
+	struct radix_addr_xentry *xent;
+#endif
+	struct sockaddr *addr, *mask;
+	int mlen, set_mask;
+
+	tb = (struct ta_buf_radix *)ta_buf;
+
+	mlen = tei->masklen;
+	set_mask = 0;
+	
+	if (tei->subtype == AF_INET) {
+#ifdef INET
+		if (mlen > 32)
+			return (EINVAL);
+		ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO);
+		ent->masklen = mlen;
+
+		addr = (struct sockaddr *)&ent->addr;
+		mask = (struct sockaddr *)&tb->addr.a4.ma;
+		tb->ent_ptr = ent;
+#endif
+#ifdef INET6
+	} else if (tei->subtype == AF_INET6) {
+		/* IPv6 case */
+		if (mlen > 128)
+			return (EINVAL);
+		xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO);
+		xent->masklen = mlen;
+
+		addr = (struct sockaddr *)&xent->addr6;
+		mask = (struct sockaddr *)&tb->addr.a6.ma;
+		tb->ent_ptr = xent;
+#endif
+	} else {
+		/* Unknown CIDR type */
+		return (EINVAL);
+	}
+
+	tei_to_sockaddr_ent(tei, addr, mask, &set_mask);
+	/* Set pointers */
+	tb->addr_ptr = addr;
+	if (set_mask != 0)
+		tb->mask_ptr = mask;
+
+	return (0);
+}
+
+static int
+ta_add_radix(void *ta_state, struct table_info *ti, struct tentry_info *tei,
+    void *ta_buf, uint32_t *pnum)
+{
+	struct radix_cfg *cfg;
+	struct radix_node_head *rnh;
+	struct radix_node *rn;
+	struct ta_buf_radix *tb;
+	uint32_t *old_value, value;
+
+	cfg = (struct radix_cfg *)ta_state;
+	tb = (struct ta_buf_radix *)ta_buf;
+
+	/* Save current entry value from @tei */
+	if (tei->subtype == AF_INET) {
+		rnh = ti->state;
+		((struct radix_addr_entry *)tb->ent_ptr)->value = tei->value;
+	} else {
+		rnh = ti->xstate;
+		((struct radix_addr_xentry *)tb->ent_ptr)->value = tei->value;
+	}
+
+	/* Search for an entry first */
+	rn = rnh->rnh_lookup(tb->addr_ptr, tb->mask_ptr, &rnh->rh);
+	if (rn != NULL) {
+		if ((tei->flags & TEI_FLAGS_UPDATE) == 0)
+			return (EEXIST);
+		/* Record already exists. Update value if we're asked to */
+		if (tei->subtype == AF_INET)
+			old_value = &((struct radix_addr_entry *)rn)->value;
+		else
+			old_value = &((struct radix_addr_xentry *)rn)->value;
+
+		value = *old_value;
+		*old_value = tei->value;
+		tei->value = value;
+
+		/* Indicate that update has happened instead of addition */
+		tei->flags |= TEI_FLAGS_UPDATED;
+		*pnum = 0;
+
+		return (0);
+	}
+
+	if ((tei->flags & TEI_FLAGS_DONTADD) != 0)
+		return (EFBIG);
+
+	rn = rnh->rnh_addaddr(tb->addr_ptr, tb->mask_ptr, &rnh->rh,tb->ent_ptr);
+	if (rn == NULL) {
+		/* Unknown error */
+		return (EINVAL);
+	}
+	
+	if (tei->subtype == AF_INET)
+		cfg->count4++;
+	else
+		cfg->count6++;
+	tb->ent_ptr = NULL;
+	*pnum = 1;
+
+	return (0);
+}
+
+static int
+ta_prepare_del_radix(struct ip_fw_chain *ch, struct tentry_info *tei,
+    void *ta_buf)
+{
+	struct ta_buf_radix *tb;
+	struct sockaddr *addr, *mask;
+	int mlen, set_mask;
+
+	tb = (struct ta_buf_radix *)ta_buf;
+
+	mlen = tei->masklen;
+	set_mask = 0;
+
+	if (tei->subtype == AF_INET) {
+		if (mlen > 32)
+			return (EINVAL);
+
+		addr = (struct sockaddr *)&tb->addr.a4.sa;
+		mask = (struct sockaddr *)&tb->addr.a4.ma;
+#ifdef INET6
+	} else if (tei->subtype == AF_INET6) {
+		if (mlen > 128)
+			return (EINVAL);
+
+		addr = (struct sockaddr *)&tb->addr.a6.sa;
+		mask = (struct sockaddr *)&tb->addr.a6.ma;
+#endif
+	} else
+		return (EINVAL);
+
+	tei_to_sockaddr_ent(tei, addr, mask, &set_mask);
+	tb->addr_ptr = addr;
+	if (set_mask != 0)
+		tb->mask_ptr = mask;
+
+	return (0);
+}
+
+static int
+ta_del_radix(void *ta_state, struct table_info *ti, struct tentry_info *tei,
+    void *ta_buf, uint32_t *pnum)
+{
+	struct radix_cfg *cfg;
+	struct radix_node_head *rnh;
+	struct radix_node *rn;
+	struct ta_buf_radix *tb;
+
+	cfg = (struct radix_cfg *)ta_state;
+	tb = (struct ta_buf_radix *)ta_buf;
+
+	if (tei->subtype == AF_INET)
+		rnh = ti->state;
+	else
+		rnh = ti->xstate;
+
+	rn = rnh->rnh_deladdr(tb->addr_ptr, tb->mask_ptr, &rnh->rh);
+
+	if (rn == NULL)
+		return (ENOENT);
+
+	/* Save entry value to @tei */
+	if (tei->subtype == AF_INET)
+		tei->value = ((struct radix_addr_entry *)rn)->value;
+	else
+		tei->value = ((struct radix_addr_xentry *)rn)->value;
+
+	tb->ent_ptr = rn;
+	
+	if (tei->subtype == AF_INET)
+		cfg->count4--;
+	else
+		cfg->count6--;
+	*pnum = 1;
+
+	return (0);
+}
+
+static void
+ta_flush_radix_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
+    void *ta_buf)
+{
+	struct ta_buf_radix *tb;
+
+	tb = (struct ta_buf_radix *)ta_buf;
+
+	if (tb->ent_ptr != NULL)
+		free(tb->ent_ptr, M_IPFW_TBL);
+}
+
+static int
+ta_need_modify_radix(void *ta_state, struct table_info *ti, uint32_t count,
+    uint64_t *pflags)
+{
+
+	/*
+	 * radix does not require additional memory allocations
+	 * other than nodes itself. Adding new masks to the tree do
+	 * but we don't have any API to call (and we don't known which
+	 * sizes do we need).
+	 */
+	return (0);
+}
+
+struct table_algo addr_radix = {
+	.name		= "addr:radix",
+	.type		= IPFW_TABLE_ADDR,
+	.flags		= TA_FLAG_DEFAULT,
+	.ta_buf_size	= sizeof(struct ta_buf_radix),
+	.init		= ta_init_radix,
+	.destroy	= ta_destroy_radix,
+	.prepare_add	= ta_prepare_add_radix,
+	.prepare_del	= ta_prepare_del_radix,
+	.add		= ta_add_radix,
+	.del		= ta_del_radix,
+	.flush_entry	= ta_flush_radix_entry,
+	.foreach	= ta_foreach_radix,
+	.dump_tentry	= ta_dump_radix_tentry,
+	.find_tentry	= ta_find_radix_tentry,
+	.dump_tinfo	= ta_dump_radix_tinfo,
+	.need_modify	= ta_need_modify_radix,
+};
+
+
+/*
+ * addr:hash cmds
+ *
+ *
+ * ti->data:
+ * [inv.mask4][inv.mask6][log2hsize4][log2hsize6]
+ * [        8][        8[          8][         8]
+ *
+ * inv.mask4: 32 - mask
+ * inv.mask6:
+ * 1) _slow lookup: mask
+ * 2) _aligned: (128 - mask) / 8
+ * 3) _64: 8
+ *
+ *
+ * pflags:
+ * [v4=1/v6=0][hsize]
+ * [       32][   32]
+ */
+
+struct chashentry;
+
+SLIST_HEAD(chashbhead, chashentry);
+
+struct chash_cfg {
+	struct chashbhead *head4;
+	struct chashbhead *head6;
+	size_t	size4;
+	size_t	size6;
+	size_t	items4;
+	size_t	items6;
+	uint8_t	mask4;
+	uint8_t	mask6;
+};
+
+struct chashentry {
+	SLIST_ENTRY(chashentry)	next;
+	uint32_t	value;
+	uint32_t	type;
+	union {
+		uint32_t	a4;	/* Host format */
+		struct in6_addr	a6;	/* Network format */
+	} a;
+};
+
+struct ta_buf_chash
+{
+	void *ent_ptr;
+	struct chashentry ent;
+};
+
+#ifdef INET
+static __inline uint32_t hash_ip(uint32_t addr, int hsize);
+#endif
+#ifdef INET6
+static __inline uint32_t hash_ip6(struct in6_addr *addr6, int hsize);
+static __inline uint16_t hash_ip64(struct in6_addr *addr6, int hsize);
+static __inline uint32_t hash_ip6_slow(struct in6_addr *addr6, void *key,
+    int mask, int hsize);
+static __inline uint32_t hash_ip6_al(struct in6_addr *addr6, void *key, int mask,
+    int hsize);
+#endif
+static int ta_lookup_chash_slow(struct table_info *ti, void *key, uint32_t keylen,
+    uint32_t *val);
+static int ta_lookup_chash_aligned(struct table_info *ti, void *key,
+    uint32_t keylen, uint32_t *val);
+static int ta_lookup_chash_64(struct table_info *ti, void *key, uint32_t keylen,
+    uint32_t *val);
+static int chash_parse_opts(struct chash_cfg *cfg, char *data);
+static void ta_print_chash_config(void *ta_state, struct table_info *ti,
+    char *buf, size_t bufsize);
+static int ta_log2(uint32_t v);
+static int ta_init_chash(struct ip_fw_chain *ch, void **ta_state,
+    struct table_info *ti, char *data, uint8_t tflags);
+static void ta_destroy_chash(void *ta_state, struct table_info *ti);
+static void ta_dump_chash_tinfo(void *ta_state, struct table_info *ti,
+    ipfw_ta_tinfo *tinfo);
+static int ta_dump_chash_tentry(void *ta_state, struct table_info *ti,
+    void *e, ipfw_obj_tentry *tent);
+static uint32_t hash_ent(struct chashentry *ent, int af, int mlen,
+    uint32_t size);
+static int tei_to_chash_ent(struct tentry_info *tei, struct chashentry *ent);
+static int ta_find_chash_tentry(void *ta_state, struct table_info *ti,
+    ipfw_obj_tentry *tent);
+static void ta_foreach_chash(void *ta_state, struct table_info *ti,
+    ta_foreach_f *f, void *arg);
+static int ta_prepare_add_chash(struct ip_fw_chain *ch, struct tentry_info *tei,
+    void *ta_buf);
+static int ta_add_chash(void *ta_state, struct table_info *ti,
+    struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
+static int ta_prepare_del_chash(struct ip_fw_chain *ch, struct tentry_info *tei,
+    void *ta_buf);
+static int ta_del_chash(void *ta_state, struct table_info *ti,
+    struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
+static void ta_flush_chash_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
+    void *ta_buf);
+static int ta_need_modify_chash(void *ta_state, struct table_info *ti,
+    uint32_t count, uint64_t *pflags);
+static int ta_prepare_mod_chash(void *ta_buf, uint64_t *pflags);
+static int ta_fill_mod_chash(void *ta_state, struct table_info *ti, void *ta_buf,
+    uint64_t *pflags);
+static void ta_modify_chash(void *ta_state, struct table_info *ti, void *ta_buf,
+    uint64_t pflags);
+static void ta_flush_mod_chash(void *ta_buf);
+
+
+#ifdef INET
+static __inline uint32_t
+hash_ip(uint32_t addr, int hsize)
+{
+
+	return (addr % (hsize - 1));
+}
+#endif
+
+#ifdef INET6
+static __inline uint32_t
+hash_ip6(struct in6_addr *addr6, int hsize)
+{
+	uint32_t i;
+
+	i = addr6->s6_addr32[0] ^ addr6->s6_addr32[1] ^
+	    addr6->s6_addr32[2] ^ addr6->s6_addr32[3];
+
+	return (i % (hsize - 1));
+}
+
+
+static __inline uint16_t
+hash_ip64(struct in6_addr *addr6, int hsize)
+{
+	uint32_t i;
+
+	i = addr6->s6_addr32[0] ^ addr6->s6_addr32[1];
+
+	return (i % (hsize - 1));
+}
+
+
+static __inline uint32_t
+hash_ip6_slow(struct in6_addr *addr6, void *key, int mask, int hsize)
+{
+	struct in6_addr mask6;
+
+	ipv6_writemask(&mask6, mask);
+	memcpy(addr6, key, sizeof(struct in6_addr));
+	APPLY_MASK(addr6, &mask6);
+	return (hash_ip6(addr6, hsize));
+}
+
+static __inline uint32_t
+hash_ip6_al(struct in6_addr *addr6, void *key, int mask, int hsize)
+{
+	uint64_t *paddr;
+
+	paddr = (uint64_t *)addr6;
+	*paddr = 0;
+	*(paddr + 1) = 0;
+	memcpy(addr6, key, mask);
+	return (hash_ip6(addr6, hsize));
+}
+#endif
+
+static int
+ta_lookup_chash_slow(struct table_info *ti, void *key, uint32_t keylen,
+    uint32_t *val)
+{
+	struct chashbhead *head;
+	struct chashentry *ent;
+	uint16_t hash, hsize;
+	uint8_t imask;
+
+	if (keylen == sizeof(in_addr_t)) {
+#ifdef INET
+		head = (struct chashbhead *)ti->state;
+		imask = ti->data >> 24;
+		hsize = 1 << ((ti->data & 0xFFFF) >> 8);
+		uint32_t a;
+		a = ntohl(*((in_addr_t *)key));
+		a = a >> imask;
+		hash = hash_ip(a, hsize);
+		SLIST_FOREACH(ent, &head[hash], next) {
+			if (ent->a.a4 == a) {
+				*val = ent->value;
+				return (1);
+			}
+		}
+#endif
+	} else {
+#ifdef INET6
+		/* IPv6: worst scenario: non-round mask */
+		struct in6_addr addr6;
+		head = (struct chashbhead *)ti->xstate;
+		imask = (ti->data & 0xFF0000) >> 16;
+		hsize = 1 << (ti->data & 0xFF);
+		hash = hash_ip6_slow(&addr6, key, imask, hsize);
+		SLIST_FOREACH(ent, &head[hash], next) {
+			if (memcmp(&ent->a.a6, &addr6, 16) == 0) {
+				*val = ent->value;
+				return (1);
+			}
+		}
+#endif
+	}
+
+	return (0);
+}
+
+static int
+ta_lookup_chash_aligned(struct table_info *ti, void *key, uint32_t keylen,
+    uint32_t *val)
+{
+	struct chashbhead *head;
+	struct chashentry *ent;
+	uint16_t hash, hsize;
+	uint8_t imask;
+
+	if (keylen == sizeof(in_addr_t)) {
+#ifdef INET
+		head = (struct chashbhead *)ti->state;
+		imask = ti->data >> 24;
+		hsize = 1 << ((ti->data & 0xFFFF) >> 8);
+		uint32_t a;
+		a = ntohl(*((in_addr_t *)key));
+		a = a >> imask;
+		hash = hash_ip(a, hsize);
+		SLIST_FOREACH(ent, &head[hash], next) {
+			if (ent->a.a4 == a) {
+				*val = ent->value;
+				return (1);
+			}
+		}
+#endif
+	} else {
+#ifdef INET6
+		/* IPv6: aligned to 8bit mask */
+		struct in6_addr addr6;
+		uint64_t *paddr, *ptmp;
+		head = (struct chashbhead *)ti->xstate;
+		imask = (ti->data & 0xFF0000) >> 16;
+		hsize = 1 << (ti->data & 0xFF);
+
+		hash = hash_ip6_al(&addr6, key, imask, hsize);
+		paddr = (uint64_t *)&addr6;
+		SLIST_FOREACH(ent, &head[hash], next) {
+			ptmp = (uint64_t *)&ent->a.a6;
+			if (paddr[0] == ptmp[0] && paddr[1] == ptmp[1]) {
+				*val = ent->value;
+				return (1);
+			}
+		}
+#endif
+	}
+
+	return (0);
+}
+
+static int
+ta_lookup_chash_64(struct table_info *ti, void *key, uint32_t keylen,
+    uint32_t *val)
+{
+	struct chashbhead *head;
+	struct chashentry *ent;
+	uint16_t hash, hsize;
+	uint8_t imask;
+
+	if (keylen == sizeof(in_addr_t)) {
+#ifdef INET
+		head = (struct chashbhead *)ti->state;
+		imask = ti->data >> 24;
+		hsize = 1 << ((ti->data & 0xFFFF) >> 8);
+		uint32_t a;
+		a = ntohl(*((in_addr_t *)key));
+		a = a >> imask;
+		hash = hash_ip(a, hsize);
+		SLIST_FOREACH(ent, &head[hash], next) {
+			if (ent->a.a4 == a) {
+				*val = ent->value;
+				return (1);
+			}
+		}
+#endif
+	} else {
+#ifdef INET6
+		/* IPv6: /64 */
+		uint64_t a6, *paddr;
+		head = (struct chashbhead *)ti->xstate;
+		paddr = (uint64_t *)key;
+		hsize = 1 << (ti->data & 0xFF);
+		a6 = *paddr;
+		hash = hash_ip64((struct in6_addr *)key, hsize);
+		SLIST_FOREACH(ent, &head[hash], next) {
+			paddr = (uint64_t *)&ent->a.a6;
+			if (a6 == *paddr) {
+				*val = ent->value;
+				return (1);
+			}
+		}
+#endif
+	}
+
+	return (0);
+}
+
+static int
+chash_parse_opts(struct chash_cfg *cfg, char *data)
+{
+	char *pdel, *pend, *s;
+	int mask4, mask6;
+
+	mask4 = cfg->mask4;
+	mask6 = cfg->mask6;
+
+	if (data == NULL)
+		return (0);
+	if ((pdel = strchr(data, ' ')) == NULL)
+		return (0);
+	while (*pdel == ' ')
+		pdel++;
+	if (strncmp(pdel, "masks=", 6) != 0)
+		return (EINVAL);
+	if ((s = strchr(pdel, ' ')) != NULL)
+		*s++ = '\0';
+
+	pdel += 6;
+	/* Need /XX[,/YY] */
+	if (*pdel++ != '/')
+		return (EINVAL);
+	mask4 = strtol(pdel, &pend, 10);
+	if (*pend == ',') {
+		/* ,/YY */
+		pdel = pend + 1;
+		if (*pdel++ != '/')
+			return (EINVAL);
+		mask6 = strtol(pdel, &pend, 10);
+		if (*pend != '\0')
+			return (EINVAL);
+	} else if (*pend != '\0')
+		return (EINVAL);
+
+	if (mask4 < 0 || mask4 > 32 || mask6 < 0 || mask6 > 128)
+		return (EINVAL);
+
+	cfg->mask4 = mask4;
+	cfg->mask6 = mask6;
+
+	return (0);
+}
+
+static void
+ta_print_chash_config(void *ta_state, struct table_info *ti, char *buf,
+    size_t bufsize)
+{
+	struct chash_cfg *cfg;
+
+	cfg = (struct chash_cfg *)ta_state;
+
+	if (cfg->mask4 != 32 || cfg->mask6 != 128)
+		snprintf(buf, bufsize, "%s masks=/%d,/%d", "addr:hash",
+		    cfg->mask4, cfg->mask6);
+	else
+		snprintf(buf, bufsize, "%s", "addr:hash");
+}
+
+static int
+ta_log2(uint32_t v)
+{
+	uint32_t r;
+
+	r = 0;
+	while (v >>= 1)
+		r++;
+
+	return (r);
+}
+
+/*
+ * New table.
+ * We assume 'data' to be either NULL or the following format:
+ * 'addr:hash [masks=/32[,/128]]'
+ */
+static int
+ta_init_chash(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
+    char *data, uint8_t tflags)
+{
+	int error, i;
+	uint32_t hsize;
+	struct chash_cfg *cfg;
+
+	cfg = malloc(sizeof(struct chash_cfg), M_IPFW, M_WAITOK | M_ZERO);
+
+	cfg->mask4 = 32;
+	cfg->mask6 = 128;
+
+	if ((error = chash_parse_opts(cfg, data)) != 0) {
+		free(cfg, M_IPFW);
+		return (error);
+	}
+
+	cfg->size4 = 128;
+	cfg->size6 = 128;
+
+	cfg->head4 = malloc(sizeof(struct chashbhead) * cfg->size4, M_IPFW,
+	    M_WAITOK | M_ZERO);
+	cfg->head6 = malloc(sizeof(struct chashbhead) * cfg->size6, M_IPFW,
+	    M_WAITOK | M_ZERO);
+	for (i = 0; i < cfg->size4; i++)
+		SLIST_INIT(&cfg->head4[i]);
+	for (i = 0; i < cfg->size6; i++)
+		SLIST_INIT(&cfg->head6[i]);
+
+
+	*ta_state = cfg;
+	ti->state = cfg->head4;
+	ti->xstate = cfg->head6;
+
+	/* Store data depending on v6 mask length */
+	hsize = ta_log2(cfg->size4) << 8 | ta_log2(cfg->size6);
+	if (cfg->mask6 == 64) {
+		ti->data = (32 - cfg->mask4) << 24 | (128 - cfg->mask6) << 16|
+		    hsize;
+		ti->lookup = ta_lookup_chash_64;
+	} else if ((cfg->mask6  % 8) == 0) {
+		ti->data = (32 - cfg->mask4) << 24 |
+		    cfg->mask6 << 13 | hsize;
+		ti->lookup = ta_lookup_chash_aligned;
+	} else {
+		/* don't do that! */
+		ti->data = (32 - cfg->mask4) << 24 |
+		    cfg->mask6 << 16 | hsize;
+		ti->lookup = ta_lookup_chash_slow;
+	}
+
+	return (0);
+}
+
+static void
+ta_destroy_chash(void *ta_state, struct table_info *ti)
+{
+	struct chash_cfg *cfg;
+	struct chashentry *ent, *ent_next;
+	int i;
+
+	cfg = (struct chash_cfg *)ta_state;
+
+	for (i = 0; i < cfg->size4; i++)
+		SLIST_FOREACH_SAFE(ent, &cfg->head4[i], next, ent_next)
+			free(ent, M_IPFW_TBL);
+
+	for (i = 0; i < cfg->size6; i++)
+		SLIST_FOREACH_SAFE(ent, &cfg->head6[i], next, ent_next)
+			free(ent, M_IPFW_TBL);
+
+	free(cfg->head4, M_IPFW);
+	free(cfg->head6, M_IPFW);
+
+	free(cfg, M_IPFW);
+}
+
+static void
+ta_dump_chash_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
+{
+	struct chash_cfg *cfg;
+
+	cfg = (struct chash_cfg *)ta_state;
+
+	tinfo->flags = IPFW_TATFLAGS_AFDATA | IPFW_TATFLAGS_AFITEM;
+	tinfo->taclass4 = IPFW_TACLASS_HASH;
+	tinfo->size4 = cfg->size4;
+	tinfo->count4 = cfg->items4;
+	tinfo->itemsize4 = sizeof(struct chashentry);
+	tinfo->taclass6 = IPFW_TACLASS_HASH;
+	tinfo->size6 = cfg->size6;
+	tinfo->count6 = cfg->items6;
+	tinfo->itemsize6 = sizeof(struct chashentry);
+}
+
+static int
+ta_dump_chash_tentry(void *ta_state, struct table_info *ti, void *e,
+    ipfw_obj_tentry *tent)
+{
+	struct chash_cfg *cfg;
+	struct chashentry *ent;
+
+	cfg = (struct chash_cfg *)ta_state;
+	ent = (struct chashentry *)e;
+
+	if (ent->type == AF_INET) {
+		tent->k.addr.s_addr = htonl(ent->a.a4 << (32 - cfg->mask4));
+		tent->masklen = cfg->mask4;
+		tent->subtype = AF_INET;
+		tent->v.kidx = ent->value;
+#ifdef INET6
+	} else {
+		memcpy(&tent->k, &ent->a.a6, sizeof(struct in6_addr));
+		tent->masklen = cfg->mask6;
+		tent->subtype = AF_INET6;
+		tent->v.kidx = ent->value;
+#endif
+	}
+
+	return (0);
+}
+
+static uint32_t
+hash_ent(struct chashentry *ent, int af, int mlen, uint32_t size)
+{
+	uint32_t hash;
+
+	hash = 0;
+
+	if (af == AF_INET) {
+#ifdef INET
+		hash = hash_ip(ent->a.a4, size);
+#endif
+	} else {
+#ifdef INET6
+		if (mlen == 64)
+			hash = hash_ip64(&ent->a.a6, size);
+		else
+			hash = hash_ip6(&ent->a.a6, size);
+#endif
+	}
+
+	return (hash);
+}
+
+static int
+tei_to_chash_ent(struct tentry_info *tei, struct chashentry *ent)
+{
+	int mlen;
+#ifdef INET6
+	struct in6_addr mask6;
+#endif
+
+
+	mlen = tei->masklen;
+	
+	if (tei->subtype == AF_INET) {
+#ifdef INET
+		if (mlen > 32)
+			return (EINVAL);
+		ent->type = AF_INET;
+
+		/* Calculate masked address */
+		ent->a.a4 = ntohl(*((in_addr_t *)tei->paddr)) >> (32 - mlen);
+#endif
+#ifdef INET6
+	} else if (tei->subtype == AF_INET6) {
+		/* IPv6 case */
+		if (mlen > 128)
+			return (EINVAL);
+		ent->type = AF_INET6;
+
+		ipv6_writemask(&mask6, mlen);
+		memcpy(&ent->a.a6, tei->paddr, sizeof(struct in6_addr));
+		APPLY_MASK(&ent->a.a6, &mask6);
+#endif
+	} else {
+		/* Unknown CIDR type */
+		return (EINVAL);
+	}
+
+	return (0);
+}
+
+static int
+ta_find_chash_tentry(void *ta_state, struct table_info *ti,
+    ipfw_obj_tentry *tent)
+{
+	struct chash_cfg *cfg;
+	struct chashbhead *head;
+	struct chashentry ent, *tmp;
+	struct tentry_info tei;
+	int error;
+	uint32_t hash;
+
+	cfg = (struct chash_cfg *)ta_state;
+
+	memset(&ent, 0, sizeof(ent));
+	memset(&tei, 0, sizeof(tei));
+
+	if (tent->subtype == AF_INET) {
+		tei.paddr = &tent->k.addr;
+		tei.masklen = cfg->mask4;
+		tei.subtype = AF_INET;
+
+		if ((error = tei_to_chash_ent(&tei, &ent)) != 0)
+			return (error);
+
+		head = cfg->head4;
+		hash = hash_ent(&ent, AF_INET, cfg->mask4, cfg->size4);
+		/* Check for existence */
+		SLIST_FOREACH(tmp, &head[hash], next) {
+			if (tmp->a.a4 != ent.a.a4)
+				continue;
+
+			ta_dump_chash_tentry(ta_state, ti, tmp, tent);
+			return (0);
+		}
+	} else {
+		tei.paddr = &tent->k.addr6;
+		tei.masklen = cfg->mask6;
+		tei.subtype = AF_INET6;
+
+		if ((error = tei_to_chash_ent(&tei, &ent)) != 0)
+			return (error);
+
+		head = cfg->head6;
+		hash = hash_ent(&ent, AF_INET6, cfg->mask6, cfg->size6);
+		/* Check for existence */
+		SLIST_FOREACH(tmp, &head[hash], next) {
+			if (memcmp(&tmp->a.a6, &ent.a.a6, 16) != 0)
+				continue;
+			ta_dump_chash_tentry(ta_state, ti, tmp, tent);
+			return (0);
+		}
+	}
+
+	return (ENOENT);
+}
+
+static void
+ta_foreach_chash(void *ta_state, struct table_info *ti, ta_foreach_f *f,
+    void *arg)
+{
+	struct chash_cfg *cfg;
+	struct chashentry *ent, *ent_next;
+	int i;
+
+	cfg = (struct chash_cfg *)ta_state;
+
+	for (i = 0; i < cfg->size4; i++)
+		SLIST_FOREACH_SAFE(ent, &cfg->head4[i], next, ent_next)
+			f(ent, arg);
+
+	for (i = 0; i < cfg->size6; i++)
+		SLIST_FOREACH_SAFE(ent, &cfg->head6[i], next, ent_next)
+			f(ent, arg);
+}
+
+static int
+ta_prepare_add_chash(struct ip_fw_chain *ch, struct tentry_info *tei,
+    void *ta_buf)
+{
+	struct ta_buf_chash *tb;
+	struct chashentry *ent;
+	int error;
+
+	tb = (struct ta_buf_chash *)ta_buf;
+
+	ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO);
+
+	error = tei_to_chash_ent(tei, ent);
+	if (error != 0) {
+		free(ent, M_IPFW_TBL);
+		return (error);
+	}
+	tb->ent_ptr = ent;
+
+	return (0);
+}
+
+static int
+ta_add_chash(void *ta_state, struct table_info *ti, struct tentry_info *tei,
+    void *ta_buf, uint32_t *pnum)
+{
+	struct chash_cfg *cfg;
+	struct chashbhead *head;
+	struct chashentry *ent, *tmp;
+	struct ta_buf_chash *tb;
+	int exists;
+	uint32_t hash, value;
+
+	cfg = (struct chash_cfg *)ta_state;
+	tb = (struct ta_buf_chash *)ta_buf;
+	ent = (struct chashentry *)tb->ent_ptr;
+	hash = 0;
+	exists = 0;
+
+	/* Read current value from @tei */
+	ent->value = tei->value;
+
+	/* Read cuurrent value */
+	if (tei->subtype == AF_INET) {
+		if (tei->masklen != cfg->mask4)
+			return (EINVAL);
+		head = cfg->head4;
+		hash = hash_ent(ent, AF_INET, cfg->mask4, cfg->size4);
+
+		/* Check for existence */
+		SLIST_FOREACH(tmp, &head[hash], next) {
+			if (tmp->a.a4 == ent->a.a4) {
+				exists = 1;
+				break;
+			}
+		}
+	} else {
+		if (tei->masklen != cfg->mask6)
+			return (EINVAL);
+		head = cfg->head6;
+		hash = hash_ent(ent, AF_INET6, cfg->mask6, cfg->size6);
+		/* Check for existence */
+		SLIST_FOREACH(tmp, &head[hash], next) {
+			if (memcmp(&tmp->a.a6, &ent->a.a6, 16) == 0) {
+				exists = 1;
+				break;
+			}
+		}
+	}
+
+	if (exists == 1) {
+		if ((tei->flags & TEI_FLAGS_UPDATE) == 0)
+			return (EEXIST);
+		/* Record already exists. Update value if we're asked to */
+		value = tmp->value;
+		tmp->value = tei->value;
+		tei->value = value;
+		/* Indicate that update has happened instead of addition */
+		tei->flags |= TEI_FLAGS_UPDATED;
+		*pnum = 0;
+	} else {
+		if ((tei->flags & TEI_FLAGS_DONTADD) != 0)
+			return (EFBIG);
+		SLIST_INSERT_HEAD(&head[hash], ent, next);
+		tb->ent_ptr = NULL;
+		*pnum = 1;
+
+		/* Update counters */
+		if (tei->subtype == AF_INET)
+			cfg->items4++;
+		else
+			cfg->items6++;
+	}
+
+	return (0);
+}
+
+static int
+ta_prepare_del_chash(struct ip_fw_chain *ch, struct tentry_info *tei,
+    void *ta_buf)
+{
+	struct ta_buf_chash *tb;
+
+	tb = (struct ta_buf_chash *)ta_buf;
+
+	return (tei_to_chash_ent(tei, &tb->ent));
+}
+
+static int
+ta_del_chash(void *ta_state, struct table_info *ti, struct tentry_info *tei,
+    void *ta_buf, uint32_t *pnum)
+{
+	struct chash_cfg *cfg;
+	struct chashbhead *head;
+	struct chashentry *tmp, *tmp_next, *ent;
+	struct ta_buf_chash *tb;
+	uint32_t hash;
+
+	cfg = (struct chash_cfg *)ta_state;
+	tb = (struct ta_buf_chash *)ta_buf;
+	ent = &tb->ent;
+
+	if (tei->subtype == AF_INET) {
+		if (tei->masklen != cfg->mask4)
+			return (EINVAL);
+		head = cfg->head4;
+		hash = hash_ent(ent, AF_INET, cfg->mask4, cfg->size4);
+
+		SLIST_FOREACH_SAFE(tmp, &head[hash], next, tmp_next) {
+			if (tmp->a.a4 != ent->a.a4)
+				continue;
+
+			SLIST_REMOVE(&head[hash], tmp, chashentry, next);
+			cfg->items4--;
+			tb->ent_ptr = tmp;
+			tei->value = tmp->value;
+			*pnum = 1;
+			return (0);
+		}
+	} else {
+		if (tei->masklen != cfg->mask6)
+			return (EINVAL);
+		head = cfg->head6;
+		hash = hash_ent(ent, AF_INET6, cfg->mask6, cfg->size6);
+		SLIST_FOREACH_SAFE(tmp, &head[hash], next, tmp_next) {
+			if (memcmp(&tmp->a.a6, &ent->a.a6, 16) != 0)
+				continue;
+
+			SLIST_REMOVE(&head[hash], tmp, chashentry, next);
+			cfg->items6--;
+			tb->ent_ptr = tmp;
+			tei->value = tmp->value;
+			*pnum = 1;
+			return (0);
+		}
+	}
+
+	return (ENOENT);
+}
+
+static void
+ta_flush_chash_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
+    void *ta_buf)
+{
+	struct ta_buf_chash *tb;
+
+	tb = (struct ta_buf_chash *)ta_buf;
+
+	if (tb->ent_ptr != NULL)
+		free(tb->ent_ptr, M_IPFW_TBL);
+}
+
+/*
+ * Hash growing callbacks.
+ */
+
+static int
+ta_need_modify_chash(void *ta_state, struct table_info *ti, uint32_t count,
+    uint64_t *pflags)
+{
+	struct chash_cfg *cfg;
+	uint64_t data;
+
+	/*
+	 * Since we don't know exact number of IPv4/IPv6 records in @count,
+	 * ignore non-zero @count value at all. Check current hash sizes
+	 * and return appropriate data.
+	 */
+
+	cfg = (struct chash_cfg *)ta_state;
+
+	data = 0;
+	if (cfg->items4 > cfg->size4 && cfg->size4 < 65536)
+		data |= (cfg->size4 * 2) << 16;
+	if (cfg->items6 > cfg->size6 && cfg->size6 < 65536)
+		data |= cfg->size6 * 2;
+
+	if (data != 0) {
+		*pflags = data;
+		return (1);
+	}
+
+	return (0);
+}
+
+/*
+ * Allocate new, larger chash.
+ */
+static int
+ta_prepare_mod_chash(void *ta_buf, uint64_t *pflags)
+{
+	struct mod_item *mi;
+	struct chashbhead *head;
+	int i;
+
+	mi = (struct mod_item *)ta_buf;
+
+	memset(mi, 0, sizeof(struct mod_item));
+	mi->size = (*pflags >> 16) & 0xFFFF;
+	mi->size6 = *pflags & 0xFFFF;
+	if (mi->size > 0) {
+		head = malloc(sizeof(struct chashbhead) * mi->size,
+		    M_IPFW, M_WAITOK | M_ZERO);
+		for (i = 0; i < mi->size; i++)
+			SLIST_INIT(&head[i]);
+		mi->main_ptr = head;
+	}
+
+	if (mi->size6 > 0) {
+		head = malloc(sizeof(struct chashbhead) * mi->size6,
+		    M_IPFW, M_WAITOK | M_ZERO);
+		for (i = 0; i < mi->size6; i++)
+			SLIST_INIT(&head[i]);
+		mi->main_ptr6 = head;
+	}
+
+	return (0);
+}
+
+/*
+ * Copy data from old runtime array to new one.
+ */
+static int
+ta_fill_mod_chash(void *ta_state, struct table_info *ti, void *ta_buf,
+    uint64_t *pflags)
+{
+
+	/* In is not possible to do rehash if we're not holidng WLOCK. */
+	return (0);
+}
+
+/*
+ * Switch old & new arrays.
+ */
+static void
+ta_modify_chash(void *ta_state, struct table_info *ti, void *ta_buf,
+    uint64_t pflags)
+{
+	struct mod_item *mi;
+	struct chash_cfg *cfg;
+	struct chashbhead *old_head, *new_head;
+	struct chashentry *ent, *ent_next;
+	int af, i, mlen;
+	uint32_t nhash;
+	size_t old_size, new_size;
+
+	mi = (struct mod_item *)ta_buf;
+	cfg = (struct chash_cfg *)ta_state;
+
+	/* Check which hash we need to grow and do we still need that */
+	if (mi->size > 0 && cfg->size4 < mi->size) {
+		new_head = (struct chashbhead *)mi->main_ptr;
+		new_size = mi->size;
+		old_size = cfg->size4;
+		old_head = ti->state;
+		mlen = cfg->mask4;
+		af = AF_INET;
+
+		for (i = 0; i < old_size; i++) {
+			SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) {
+				nhash = hash_ent(ent, af, mlen, new_size);
+				SLIST_INSERT_HEAD(&new_head[nhash], ent, next);
+			}
+		}
+
+		ti->state = new_head;
+		cfg->head4 = new_head;
+		cfg->size4 = mi->size;
+		mi->main_ptr = old_head;
+	}
+
+	if (mi->size6 > 0 && cfg->size6 < mi->size6) {
+		new_head = (struct chashbhead *)mi->main_ptr6;
+		new_size = mi->size6;
+		old_size = cfg->size6;
+		old_head = ti->xstate;
+		mlen = cfg->mask6;
+		af = AF_INET6;
+
+		for (i = 0; i < old_size; i++) {
+			SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) {
+				nhash = hash_ent(ent, af, mlen, new_size);
+				SLIST_INSERT_HEAD(&new_head[nhash], ent, next);
+			}
+		}
+
+		ti->xstate = new_head;
+		cfg->head6 = new_head;
+		cfg->size6 = mi->size6;
+		mi->main_ptr6 = old_head;
+	}
+
+	/* Update lower 32 bits with new values */
+	ti->data &= 0xFFFFFFFF00000000;
+	ti->data |= ta_log2(cfg->size4) << 8 | ta_log2(cfg->size6);
+}
+
+/*
+ * Free unneded array.
+ */
+static void
+ta_flush_mod_chash(void *ta_buf)
+{
+	struct mod_item *mi;
+
+	mi = (struct mod_item *)ta_buf;
+	if (mi->main_ptr != NULL)
+		free(mi->main_ptr, M_IPFW);
+	if (mi->main_ptr6 != NULL)
+		free(mi->main_ptr6, M_IPFW);
+}
+
+struct table_algo addr_hash = {
+	.name		= "addr:hash",
+	.type		= IPFW_TABLE_ADDR,
+	.ta_buf_size	= sizeof(struct ta_buf_chash),
+	.init		= ta_init_chash,
+	.destroy	= ta_destroy_chash,
+	.prepare_add	= ta_prepare_add_chash,
+	.prepare_del	= ta_prepare_del_chash,
+	.add		= ta_add_chash,
+	.del		= ta_del_chash,
+	.flush_entry	= ta_flush_chash_entry,
+	.foreach	= ta_foreach_chash,
+	.dump_tentry	= ta_dump_chash_tentry,
+	.find_tentry	= ta_find_chash_tentry,
+	.print_config	= ta_print_chash_config,
+	.dump_tinfo	= ta_dump_chash_tinfo,
+	.need_modify	= ta_need_modify_chash,
+	.prepare_mod	= ta_prepare_mod_chash,
+	.fill_mod	= ta_fill_mod_chash,
+	.modify		= ta_modify_chash,
+	.flush_mod	= ta_flush_mod_chash,
+};
+
+
+/*
+ * Iface table cmds.
+ *
+ * Implementation:
+ *
+ * Runtime part:
+ * - sorted array of "struct ifidx" pointed by ti->state.
+ *   Array is allocated with rounding up to IFIDX_CHUNK. Only existing
+ *   interfaces are stored in array, however its allocated size is
+ *   sufficient to hold all table records if needed.
+ * - current array size is stored in ti->data
+ *
+ * Table data:
+ * - "struct iftable_cfg" is allocated to store table state (ta_state).
+ * - All table records are stored inside namedobj instance.
+ *
+ */
+
+struct ifidx {
+	uint16_t	kidx;
+	uint16_t	spare;
+	uint32_t	value;
+};
+#define	DEFAULT_IFIDX_SIZE	64
+
+struct iftable_cfg;
+
+struct ifentry {
+	struct named_object	no;
+	struct ipfw_ifc		ic;
+	struct iftable_cfg	*icfg;
+	uint32_t		value;
+	int			linked;
+};
+
+struct iftable_cfg {
+	struct namedobj_instance	*ii;
+	struct ip_fw_chain	*ch;
+	struct table_info	*ti;
+	void	*main_ptr;
+	size_t	size;	/* Number of items allocated in array */
+	size_t	count;	/* Number of all items */
+	size_t	used;	/* Number of items _active_ now */
+};
+
+struct ta_buf_ifidx
+{
+	struct ifentry *ife;
+	uint32_t value;
+};
+
+int compare_ifidx(const void *k, const void *v);
+static struct ifidx * ifidx_find(struct table_info *ti, void *key);
+static int ta_lookup_ifidx(struct table_info *ti, void *key, uint32_t keylen,
+    uint32_t *val);
+static int ta_init_ifidx(struct ip_fw_chain *ch, void **ta_state,
+    struct table_info *ti, char *data, uint8_t tflags);
+static void ta_change_ti_ifidx(void *ta_state, struct table_info *ti);
+static int destroy_ifidx_locked(struct namedobj_instance *ii,
+    struct named_object *no, void *arg);
+static void ta_destroy_ifidx(void *ta_state, struct table_info *ti);
+static void ta_dump_ifidx_tinfo(void *ta_state, struct table_info *ti,
+    ipfw_ta_tinfo *tinfo);
+static int ta_prepare_add_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei,
+    void *ta_buf);
+static int ta_add_ifidx(void *ta_state, struct table_info *ti,
+    struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
+static int ta_prepare_del_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei,
+    void *ta_buf);
+static int ta_del_ifidx(void *ta_state, struct table_info *ti,
+    struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
+static void ta_flush_ifidx_entry(struct ip_fw_chain *ch,
+    struct tentry_info *tei, void *ta_buf);
+static void if_notifier(struct ip_fw_chain *ch, void *cbdata, uint16_t ifindex);
+static int ta_need_modify_ifidx(void *ta_state, struct table_info *ti,
+    uint32_t count, uint64_t *pflags);
+static int ta_prepare_mod_ifidx(void *ta_buf, uint64_t *pflags);
+static int ta_fill_mod_ifidx(void *ta_state, struct table_info *ti,
+    void *ta_buf, uint64_t *pflags);
+static void ta_modify_ifidx(void *ta_state, struct table_info *ti, void *ta_buf,
+    uint64_t pflags);
+static void ta_flush_mod_ifidx(void *ta_buf);
+static int ta_dump_ifidx_tentry(void *ta_state, struct table_info *ti, void *e,
+    ipfw_obj_tentry *tent);
+static int ta_find_ifidx_tentry(void *ta_state, struct table_info *ti,
+    ipfw_obj_tentry *tent);
+static int foreach_ifidx(struct namedobj_instance *ii, struct named_object *no,
+    void *arg);
+static void ta_foreach_ifidx(void *ta_state, struct table_info *ti,
+    ta_foreach_f *f, void *arg);
+
+int
+compare_ifidx(const void *k, const void *v)
+{
+	const struct ifidx *ifidx;
+	uint16_t key;
+
+	key = *((const uint16_t *)k);
+	ifidx = (const struct ifidx *)v;
+
+	if (key < ifidx->kidx)
+		return (-1);
+	else if (key > ifidx->kidx)
+		return (1);
+
+	return (0);
+}
+
+/*
+ * Adds item @item with key @key into ascending-sorted array @base.
+ * Assumes @base has enough additional storage.
+ *
+ * Returns 1 on success, 0 on duplicate key.
+ */
+static int
+badd(const void *key, void *item, void *base, size_t nmemb,
+    size_t size, int (*compar) (const void *, const void *))
+{
+	int min, max, mid, shift, res;
+	caddr_t paddr;
+
+	if (nmemb == 0) {
+		memcpy(base, item, size);
+		return (1);
+	}
+
+	/* Binary search */
+	min = 0;
+	max = nmemb - 1;
+	mid = 0;
+	while (min <= max) {
+		mid = (min + max) / 2;
+		res = compar(key, (const void *)((caddr_t)base + mid * size));
+		if (res == 0)
+			return (0);
+
+		if (res > 0)
+			min = mid + 1;
+		else
+			max = mid - 1;
+	}
+
+	/* Item not found. */
+	res = compar(key, (const void *)((caddr_t)base + mid * size));
+	if (res > 0)
+		shift = mid + 1;
+	else
+		shift = mid;
+
+	paddr = (caddr_t)base + shift * size;
+	if (nmemb > shift)
+		memmove(paddr + size, paddr, (nmemb - shift) * size);
+
+	memcpy(paddr, item, size);
+
+	return (1);
+}
+
+/*
+ * Deletes item with key @key from ascending-sorted array @base.
+ *
+ * Returns 1 on success, 0 for non-existent key.
+ */
+static int
+bdel(const void *key, void *base, size_t nmemb, size_t size,
+    int (*compar) (const void *, const void *))
+{
+	caddr_t item;
+	size_t sz;
+
+	item = (caddr_t)bsearch(key, base, nmemb, size, compar);
+
+	if (item == NULL)
+		return (0);
+
+	sz = (caddr_t)base + nmemb * size - item;
+
+	if (sz > 0)
+		memmove(item, item + size, sz);
+
+	return (1);
+}
+
+static struct ifidx *
+ifidx_find(struct table_info *ti, void *key)
+{
+	struct ifidx *ifi;
+
+	ifi = bsearch(key, ti->state, ti->data, sizeof(struct ifidx),
+	    compare_ifidx);
+
+	return (ifi);
+}
+
+static int
+ta_lookup_ifidx(struct table_info *ti, void *key, uint32_t keylen,
+    uint32_t *val)
+{
+	struct ifidx *ifi;
+
+	ifi = ifidx_find(ti, key);
+
+	if (ifi != NULL) {
+		*val = ifi->value;
+		return (1);
+	}
+
+	return (0);
+}
+
+static int
+ta_init_ifidx(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
+    char *data, uint8_t tflags)
+{
+	struct iftable_cfg *icfg;
+
+	icfg = malloc(sizeof(struct iftable_cfg), M_IPFW, M_WAITOK | M_ZERO);
+
+	icfg->ii = ipfw_objhash_create(DEFAULT_IFIDX_SIZE);
+	icfg->size = DEFAULT_IFIDX_SIZE;
+	icfg->main_ptr = malloc(sizeof(struct ifidx) * icfg->size, M_IPFW,
+	    M_WAITOK | M_ZERO);
+	icfg->ch = ch;
+
+	*ta_state = icfg;
+	ti->state = icfg->main_ptr;
+	ti->lookup = ta_lookup_ifidx;
+
+	return (0);
+}
+
+/*
+ * Handle tableinfo @ti pointer change (on table array resize).
+ */
+static void
+ta_change_ti_ifidx(void *ta_state, struct table_info *ti)
+{
+	struct iftable_cfg *icfg;
+
+	icfg = (struct iftable_cfg *)ta_state;
+	icfg->ti = ti;
+}
+
+static int
+destroy_ifidx_locked(struct namedobj_instance *ii, struct named_object *no,
+    void *arg)
+{
+	struct ifentry *ife;
+	struct ip_fw_chain *ch;
+
+	ch = (struct ip_fw_chain *)arg;
+	ife = (struct ifentry *)no;
+
+	ipfw_iface_del_notify(ch, &ife->ic);
+	ipfw_iface_unref(ch, &ife->ic);
+	free(ife, M_IPFW_TBL);
+	return (0);
+}
+
+
+/*
+ * Destroys table @ti
+ */
+static void
+ta_destroy_ifidx(void *ta_state, struct table_info *ti)
+{
+	struct iftable_cfg *icfg;
+	struct ip_fw_chain *ch;
+
+	icfg = (struct iftable_cfg *)ta_state;
+	ch = icfg->ch;
+
+	if (icfg->main_ptr != NULL)
+		free(icfg->main_ptr, M_IPFW);
+
+	IPFW_UH_WLOCK(ch);
+	ipfw_objhash_foreach(icfg->ii, destroy_ifidx_locked, ch);
+	IPFW_UH_WUNLOCK(ch);
+
+	ipfw_objhash_destroy(icfg->ii);
+
+	free(icfg, M_IPFW);
+}
+
+/*
+ * Provide algo-specific table info
+ */
+static void
+ta_dump_ifidx_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
+{
+	struct iftable_cfg *cfg;
+
+	cfg = (struct iftable_cfg *)ta_state;
+
+	tinfo->taclass4 = IPFW_TACLASS_ARRAY;
+	tinfo->size4 = cfg->size;
+	tinfo->count4 = cfg->used;
+	tinfo->itemsize4 = sizeof(struct ifidx);
+}
+
+/*
+ * Prepare state to add to the table:
+ * allocate ifentry and reference needed interface.
+ */
+static int
+ta_prepare_add_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei,
+    void *ta_buf)
+{
+	struct ta_buf_ifidx *tb;
+	char *ifname;
+	struct ifentry *ife;
+
+	tb = (struct ta_buf_ifidx *)ta_buf;
+
+	/* Check if string is terminated */
+	ifname = (char *)tei->paddr;
+	if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE)
+		return (EINVAL);
+
+	ife = malloc(sizeof(struct ifentry), M_IPFW_TBL, M_WAITOK | M_ZERO);
+	ife->ic.cb = if_notifier;
+	ife->ic.cbdata = ife;
+
+	if (ipfw_iface_ref(ch, ifname, &ife->ic) != 0) {
+		free(ife, M_IPFW_TBL);
+		return (EINVAL);
+	}
+
+	/* Use ipfw_iface 'ifname' field as stable storage */
+	ife->no.name = ife->ic.iface->ifname;
+
+	tb->ife = ife;
+
+	return (0);
+}
+
+static int
+ta_add_ifidx(void *ta_state, struct table_info *ti, struct tentry_info *tei,
+    void *ta_buf, uint32_t *pnum)
+{
+	struct iftable_cfg *icfg;
+	struct ifentry *ife, *tmp;
+	struct ta_buf_ifidx *tb;
+	struct ipfw_iface *iif;
+	struct ifidx *ifi;
+	char *ifname;
+	uint32_t value;
+
+	tb = (struct ta_buf_ifidx *)ta_buf;
+	ifname = (char *)tei->paddr;
+	icfg = (struct iftable_cfg *)ta_state;
+	ife = tb->ife;
+
+	ife->icfg = icfg;
+	ife->value = tei->value;
+
+	tmp = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname);
+
+	if (tmp != NULL) {
+		if ((tei->flags & TEI_FLAGS_UPDATE) == 0)
+			return (EEXIST);
+
+		/* Exchange values in @tmp and @tei */
+		value = tmp->value;
+		tmp->value = tei->value;
+		tei->value = value;
+
+		iif = tmp->ic.iface;
+		if (iif->resolved != 0) {
+			/* We have to update runtime value, too */
+			ifi = ifidx_find(ti, &iif->ifindex);
+			ifi->value = ife->value;
+		}
+
+		/* Indicate that update has happened instead of addition */
+		tei->flags |= TEI_FLAGS_UPDATED;
+		*pnum = 0;
+		return (0);
+	}
+
+	if ((tei->flags & TEI_FLAGS_DONTADD) != 0)
+		return (EFBIG);
+
+	/* Link to internal list */
+	ipfw_objhash_add(icfg->ii, &ife->no);
+
+	/* Link notifier (possible running its callback) */
+	ipfw_iface_add_notify(icfg->ch, &ife->ic);
+	icfg->count++;
+
+	tb->ife = NULL;
+	*pnum = 1;
+
+	return (0);
+}
+
+/*
+ * Prepare to delete key from table.
+ * Do basic interface name checks.
+ */
+static int
+ta_prepare_del_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei,
+    void *ta_buf)
+{
+	struct ta_buf_ifidx *tb;
+	char *ifname;
+
+	tb = (struct ta_buf_ifidx *)ta_buf;
+
+	/* Check if string is terminated */
+	ifname = (char *)tei->paddr;
+	if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE)
+		return (EINVAL);
+
+	return (0);
+}
+
+/*
+ * Remove key from both configuration list and
+ * runtime array. Removed interface notification.
+ */
+static int
+ta_del_ifidx(void *ta_state, struct table_info *ti, struct tentry_info *tei,
+    void *ta_buf, uint32_t *pnum)
+{
+	struct iftable_cfg *icfg;
+	struct ifentry *ife;
+	struct ta_buf_ifidx *tb;
+	char *ifname;
+	uint16_t ifindex;
+	int res;
+
+	tb = (struct ta_buf_ifidx *)ta_buf;
+	ifname = (char *)tei->paddr;
+	icfg = (struct iftable_cfg *)ta_state;
+	ife = tb->ife;
+
+	ife = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname);
+
+	if (ife == NULL)
+		return (ENOENT);
+
+	if (ife->linked != 0) {
+		/* We have to remove item from runtime */
+		ifindex = ife->ic.iface->ifindex;
+
+		res = bdel(&ifindex, icfg->main_ptr, icfg->used,
+		    sizeof(struct ifidx), compare_ifidx);
+
+		KASSERT(res == 1, ("index %d does not exist", ifindex));
+		icfg->used--;
+		ti->data = icfg->used;
+		ife->linked = 0;
+	}
+
+	/* Unlink from local list */
+	ipfw_objhash_del(icfg->ii, &ife->no);
+	/* Unlink notifier and deref */
+	ipfw_iface_del_notify(icfg->ch, &ife->ic);
+	ipfw_iface_unref(icfg->ch, &ife->ic);
+
+	icfg->count--;
+	tei->value = ife->value;
+
+	tb->ife = ife;
+	*pnum = 1;
+
+	return (0);
+}
+
+/*
+ * Flush deleted entry.
+ * Drops interface reference and frees entry.
+ */
+static void
+ta_flush_ifidx_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
+    void *ta_buf)
+{
+	struct ta_buf_ifidx *tb;
+
+	tb = (struct ta_buf_ifidx *)ta_buf;
+
+	if (tb->ife != NULL)
+		free(tb->ife, M_IPFW_TBL);
+}
+
+
+/*
+ * Handle interface announce/withdrawal for particular table.
+ * Every real runtime array modification happens here.
+ */
+static void
+if_notifier(struct ip_fw_chain *ch, void *cbdata, uint16_t ifindex)
+{
+	struct ifentry *ife;
+	struct ifidx ifi;
+	struct iftable_cfg *icfg;
+	struct table_info *ti;
+	int res;
+
+	ife = (struct ifentry *)cbdata;
+	icfg = ife->icfg;
+	ti = icfg->ti;
+
+	KASSERT(ti != NULL, ("ti=NULL, check change_ti handler"));
+
+	if (ife->linked == 0 && ifindex != 0) {
+		/* Interface announce */
+		ifi.kidx = ifindex;
+		ifi.spare = 0;
+		ifi.value = ife->value;
+		res = badd(&ifindex, &ifi, icfg->main_ptr, icfg->used,
+		    sizeof(struct ifidx), compare_ifidx);
+		KASSERT(res == 1, ("index %d already exists", ifindex));
+		icfg->used++;
+		ti->data = icfg->used;
+		ife->linked = 1;
+	} else if (ife->linked != 0 && ifindex == 0) {
+		/* Interface withdrawal */
+		ifindex = ife->ic.iface->ifindex;
+
+		res = bdel(&ifindex, icfg->main_ptr, icfg->used,
+		    sizeof(struct ifidx), compare_ifidx);
+
+		KASSERT(res == 1, ("index %d does not exist", ifindex));
+		icfg->used--;
+		ti->data = icfg->used;
+		ife->linked = 0;
+	}
+}
+
+
+/*
+ * Table growing callbacks.
+ */
+
+static int
+ta_need_modify_ifidx(void *ta_state, struct table_info *ti, uint32_t count,
+    uint64_t *pflags)
+{
+	struct iftable_cfg *cfg;
+	uint32_t size;
+
+	cfg = (struct iftable_cfg *)ta_state;
+
+	size = cfg->size;
+	while (size < cfg->count + count)
+		size *= 2;
+
+	if (size != cfg->size) {
+		*pflags = size;
+		return (1);
+	}
+
+	return (0);
+}
+
+/*
+ * Allocate ned, larger runtime ifidx array.
+ */
+static int
+ta_prepare_mod_ifidx(void *ta_buf, uint64_t *pflags)
+{
+	struct mod_item *mi;
+
+	mi = (struct mod_item *)ta_buf;
+
+	memset(mi, 0, sizeof(struct mod_item));
+	mi->size = *pflags;
+	mi->main_ptr = malloc(sizeof(struct ifidx) * mi->size, M_IPFW,
+	    M_WAITOK | M_ZERO);
+
+	return (0);
+}
+
+/*
+ * Copy data from old runtime array to new one.
+ */
+static int
+ta_fill_mod_ifidx(void *ta_state, struct table_info *ti, void *ta_buf,
+    uint64_t *pflags)
+{
+	struct mod_item *mi;
+	struct iftable_cfg *icfg;
+
+	mi = (struct mod_item *)ta_buf;
+	icfg = (struct iftable_cfg *)ta_state;
+
+	/* Check if we still need to grow array */
+	if (icfg->size >= mi->size) {
+		*pflags = 0;
+		return (0);
+	}
+
+	memcpy(mi->main_ptr, icfg->main_ptr, icfg->used * sizeof(struct ifidx));
+
+	return (0);
+}
+
+/*
+ * Switch old & new arrays.
+ */
+static void
+ta_modify_ifidx(void *ta_state, struct table_info *ti, void *ta_buf,
+    uint64_t pflags)
+{
+	struct mod_item *mi;
+	struct iftable_cfg *icfg;
+	void *old_ptr;
+
+	mi = (struct mod_item *)ta_buf;
+	icfg = (struct iftable_cfg *)ta_state;
+
+	old_ptr = icfg->main_ptr;
+	icfg->main_ptr = mi->main_ptr;
+	icfg->size = mi->size;
+	ti->state = icfg->main_ptr;
+
+	mi->main_ptr = old_ptr;
+}
+
+/*
+ * Free unneded array.
+ */
+static void
+ta_flush_mod_ifidx(void *ta_buf)
+{
+	struct mod_item *mi;
+
+	mi = (struct mod_item *)ta_buf;
+	if (mi->main_ptr != NULL)
+		free(mi->main_ptr, M_IPFW);
+}
+
+static int
+ta_dump_ifidx_tentry(void *ta_state, struct table_info *ti, void *e,
+    ipfw_obj_tentry *tent)
+{
+	struct ifentry *ife;
+
+	ife = (struct ifentry *)e;
+
+	tent->masklen = 8 * IF_NAMESIZE;
+	memcpy(&tent->k, ife->no.name, IF_NAMESIZE);
+	tent->v.kidx = ife->value;
+
+	return (0);
+}
+
+static int
+ta_find_ifidx_tentry(void *ta_state, struct table_info *ti,
+    ipfw_obj_tentry *tent)
+{
+	struct iftable_cfg *icfg;
+	struct ifentry *ife;
+	char *ifname;
+
+	icfg = (struct iftable_cfg *)ta_state;
+	ifname = tent->k.iface;
+
+	if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE)
+		return (EINVAL);
+
+	ife = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname);
+
+	if (ife != NULL) {
+		ta_dump_ifidx_tentry(ta_state, ti, ife, tent);
+		return (0);
+	}
+
+	return (ENOENT);
+}
+
+struct wa_ifidx {
+	ta_foreach_f	*f;
+	void		*arg;
+};
+
+static int
+foreach_ifidx(struct namedobj_instance *ii, struct named_object *no,
+    void *arg)
+{
+	struct ifentry *ife;
+	struct wa_ifidx *wa;
+
+	ife = (struct ifentry *)no;
+	wa = (struct wa_ifidx *)arg;
+
+	wa->f(ife, wa->arg);
+	return (0);
+}
+
+static void
+ta_foreach_ifidx(void *ta_state, struct table_info *ti, ta_foreach_f *f,
+    void *arg)
+{
+	struct iftable_cfg *icfg;
+	struct wa_ifidx wa;
+
+	icfg = (struct iftable_cfg *)ta_state;
+
+	wa.f = f;
+	wa.arg = arg;
+
+	ipfw_objhash_foreach(icfg->ii, foreach_ifidx, &wa);
+}
+
+struct table_algo iface_idx = {
+	.name		= "iface:array",
+	.type		= IPFW_TABLE_INTERFACE,
+	.flags		= TA_FLAG_DEFAULT,
+	.ta_buf_size	= sizeof(struct ta_buf_ifidx),
+	.init		= ta_init_ifidx,
+	.destroy	= ta_destroy_ifidx,
+	.prepare_add	= ta_prepare_add_ifidx,
+	.prepare_del	= ta_prepare_del_ifidx,
+	.add		= ta_add_ifidx,
+	.del		= ta_del_ifidx,
+	.flush_entry	= ta_flush_ifidx_entry,
+	.foreach	= ta_foreach_ifidx,
+	.dump_tentry	= ta_dump_ifidx_tentry,
+	.find_tentry	= ta_find_ifidx_tentry,
+	.dump_tinfo	= ta_dump_ifidx_tinfo,
+	.need_modify	= ta_need_modify_ifidx,
+	.prepare_mod	= ta_prepare_mod_ifidx,
+	.fill_mod	= ta_fill_mod_ifidx,
+	.modify		= ta_modify_ifidx,
+	.flush_mod	= ta_flush_mod_ifidx,
+	.change_ti	= ta_change_ti_ifidx,
+};
+
+/*
+ * Number array cmds.
+ *
+ * Implementation:
+ *
+ * Runtime part:
+ * - sorted array of "struct numarray" pointed by ti->state.
+ *   Array is allocated with rounding up to NUMARRAY_CHUNK.
+ * - current array size is stored in ti->data
+ *
+ */
+
+struct numarray {
+	uint32_t	number;
+	uint32_t	value;
+};
+
+struct numarray_cfg {
+	void	*main_ptr;
+	size_t	size;	/* Number of items allocated in array */
+	size_t	used;	/* Number of items _active_ now */
+};
+
+struct ta_buf_numarray
+{
+	struct numarray na;
+};
+
+int compare_numarray(const void *k, const void *v);
+static struct numarray *numarray_find(struct table_info *ti, void *key);
+static int ta_lookup_numarray(struct table_info *ti, void *key,
+    uint32_t keylen, uint32_t *val);
+static int ta_init_numarray(struct ip_fw_chain *ch, void **ta_state,
+    struct table_info *ti, char *data, uint8_t tflags);
+static void ta_destroy_numarray(void *ta_state, struct table_info *ti);
+static void ta_dump_numarray_tinfo(void *ta_state, struct table_info *ti,
+    ipfw_ta_tinfo *tinfo);
+static int ta_prepare_add_numarray(struct ip_fw_chain *ch,
+    struct tentry_info *tei, void *ta_buf);
+static int ta_add_numarray(void *ta_state, struct table_info *ti,
+    struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
+static int ta_del_numarray(void *ta_state, struct table_info *ti,
+    struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
+static void ta_flush_numarray_entry(struct ip_fw_chain *ch,
+    struct tentry_info *tei, void *ta_buf);
+static int ta_need_modify_numarray(void *ta_state, struct table_info *ti,
+    uint32_t count, uint64_t *pflags);
+static int ta_prepare_mod_numarray(void *ta_buf, uint64_t *pflags);
+static int ta_fill_mod_numarray(void *ta_state, struct table_info *ti,
+    void *ta_buf, uint64_t *pflags);
+static void ta_modify_numarray(void *ta_state, struct table_info *ti,
+    void *ta_buf, uint64_t pflags);
+static void ta_flush_mod_numarray(void *ta_buf);
+static int ta_dump_numarray_tentry(void *ta_state, struct table_info *ti,
+    void *e, ipfw_obj_tentry *tent);
+static int ta_find_numarray_tentry(void *ta_state, struct table_info *ti,
+    ipfw_obj_tentry *tent);
+static void ta_foreach_numarray(void *ta_state, struct table_info *ti,
+    ta_foreach_f *f, void *arg);
+
+int
+compare_numarray(const void *k, const void *v)
+{
+	const struct numarray *na;
+	uint32_t key;
+
+	key = *((const uint32_t *)k);
+	na = (const struct numarray *)v;
+
+	if (key < na->number)
+		return (-1);
+	else if (key > na->number)
+		return (1);
+
+	return (0);
+}
+
+static struct numarray *
+numarray_find(struct table_info *ti, void *key)
+{
+	struct numarray *ri;
+
+	ri = bsearch(key, ti->state, ti->data, sizeof(struct numarray),
+	    compare_ifidx);
+
+	return (ri);
+}
+
+static int
+ta_lookup_numarray(struct table_info *ti, void *key, uint32_t keylen,
+    uint32_t *val)
+{
+	struct numarray *ri;
+
+	ri = numarray_find(ti, key);
+
+	if (ri != NULL) {
+		*val = ri->value;
+		return (1);
+	}
+
+	return (0);
+}
+
+static int
+ta_init_numarray(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
+    char *data, uint8_t tflags)
+{
+	struct numarray_cfg *cfg;
+
+	cfg = malloc(sizeof(*cfg), M_IPFW, M_WAITOK | M_ZERO);
+
+	cfg->size = 16;
+	cfg->main_ptr = malloc(sizeof(struct numarray) * cfg->size, M_IPFW,
+	    M_WAITOK | M_ZERO);
+
+	*ta_state = cfg;
+	ti->state = cfg->main_ptr;
+	ti->lookup = ta_lookup_numarray;
+
+	return (0);
+}
+
+/*
+ * Destroys table @ti
+ */
+static void
+ta_destroy_numarray(void *ta_state, struct table_info *ti)
+{
+	struct numarray_cfg *cfg;
+
+	cfg = (struct numarray_cfg *)ta_state;
+
+	if (cfg->main_ptr != NULL)
+		free(cfg->main_ptr, M_IPFW);
+
+	free(cfg, M_IPFW);
+}
+
+/*
+ * Provide algo-specific table info
+ */
+static void
+ta_dump_numarray_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
+{
+	struct numarray_cfg *cfg;
+
+	cfg = (struct numarray_cfg *)ta_state;
+
+	tinfo->taclass4 = IPFW_TACLASS_ARRAY;
+	tinfo->size4 = cfg->size;
+	tinfo->count4 = cfg->used;
+	tinfo->itemsize4 = sizeof(struct numarray);
+}
+
+/*
+ * Prepare for addition/deletion to an array.
+ */
+static int
+ta_prepare_add_numarray(struct ip_fw_chain *ch, struct tentry_info *tei,
+    void *ta_buf)
+{
+	struct ta_buf_numarray *tb;
+
+	tb = (struct ta_buf_numarray *)ta_buf;
+
+	tb->na.number = *((uint32_t *)tei->paddr);
+
+	return (0);
+}
+
+static int
+ta_add_numarray(void *ta_state, struct table_info *ti, struct tentry_info *tei,
+    void *ta_buf, uint32_t *pnum)
+{
+	struct numarray_cfg *cfg;
+	struct ta_buf_numarray *tb;
+	struct numarray *ri;
+	int res;
+	uint32_t value;
+
+	tb = (struct ta_buf_numarray *)ta_buf;
+	cfg = (struct numarray_cfg *)ta_state;
+
+	/* Read current value from @tei */
+	tb->na.value = tei->value;
+
+	ri = numarray_find(ti, &tb->na.number);
+	
+	if (ri != NULL) {
+		if ((tei->flags & TEI_FLAGS_UPDATE) == 0)
+			return (EEXIST);
+
+		/* Exchange values between ri and @tei */
+		value = ri->value;
+		ri->value = tei->value;
+		tei->value = value;
+		/* Indicate that update has happened instead of addition */
+		tei->flags |= TEI_FLAGS_UPDATED;
+		*pnum = 0;
+		return (0);
+	}
+
+	if ((tei->flags & TEI_FLAGS_DONTADD) != 0)
+		return (EFBIG);
+
+	res = badd(&tb->na.number, &tb->na, cfg->main_ptr, cfg->used,
+	    sizeof(struct numarray), compare_numarray);
+
+	KASSERT(res == 1, ("number %d already exists", tb->na.number));
+	cfg->used++;
+	ti->data = cfg->used;
+	*pnum = 1;
+
+	return (0);
+}
+
+/*
+ * Remove key from both configuration list and
+ * runtime array. Removed interface notification.
+ */
+static int
+ta_del_numarray(void *ta_state, struct table_info *ti, struct tentry_info *tei,
+    void *ta_buf, uint32_t *pnum)
+{
+	struct numarray_cfg *cfg;
+	struct ta_buf_numarray *tb;
+	struct numarray *ri;
+	int res;
+
+	tb = (struct ta_buf_numarray *)ta_buf;
+	cfg = (struct numarray_cfg *)ta_state;
+
+	ri = numarray_find(ti, &tb->na.number);
+	if (ri == NULL)
+		return (ENOENT);
+
+	tei->value = ri->value;
+	
+	res = bdel(&tb->na.number, cfg->main_ptr, cfg->used,
+	    sizeof(struct numarray), compare_numarray);
+
+	KASSERT(res == 1, ("number %u does not exist", tb->na.number));
+	cfg->used--;
+	ti->data = cfg->used;
+	*pnum = 1;
+
+	return (0);
+}
+
+static void
+ta_flush_numarray_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
+    void *ta_buf)
+{
+
+	/* We don't have any state, do nothing */
+}
+
+
+/*
+ * Table growing callbacks.
+ */
+
+static int
+ta_need_modify_numarray(void *ta_state, struct table_info *ti, uint32_t count,
+    uint64_t *pflags)
+{
+	struct numarray_cfg *cfg;
+	size_t size;
+
+	cfg = (struct numarray_cfg *)ta_state;
+
+	size = cfg->size;
+	while (size < cfg->used + count)
+		size *= 2;
+
+	if (size != cfg->size) {
+		*pflags = size;
+		return (1);
+	}
+
+	return (0);
+}
+
+/*
+ * Allocate new, larger runtime array.
+ */
+static int
+ta_prepare_mod_numarray(void *ta_buf, uint64_t *pflags)
+{
+	struct mod_item *mi;
+
+	mi = (struct mod_item *)ta_buf;
+
+	memset(mi, 0, sizeof(struct mod_item));
+	mi->size = *pflags;
+	mi->main_ptr = malloc(sizeof(struct numarray) * mi->size, M_IPFW,
+	    M_WAITOK | M_ZERO);
+
+	return (0);
+}
+
+/*
+ * Copy data from old runtime array to new one.
+ */
+static int
+ta_fill_mod_numarray(void *ta_state, struct table_info *ti, void *ta_buf,
+    uint64_t *pflags)
+{
+	struct mod_item *mi;
+	struct numarray_cfg *cfg;
+
+	mi = (struct mod_item *)ta_buf;
+	cfg = (struct numarray_cfg *)ta_state;
+
+	/* Check if we still need to grow array */
+	if (cfg->size >= mi->size) {
+		*pflags = 0;
+		return (0);
+	}
+
+	memcpy(mi->main_ptr, cfg->main_ptr, cfg->used * sizeof(struct numarray));
+
+	return (0);
+}
+
+/*
+ * Switch old & new arrays.
+ */
+static void
+ta_modify_numarray(void *ta_state, struct table_info *ti, void *ta_buf,
+    uint64_t pflags)
+{
+	struct mod_item *mi;
+	struct numarray_cfg *cfg;
+	void *old_ptr;
+
+	mi = (struct mod_item *)ta_buf;
+	cfg = (struct numarray_cfg *)ta_state;
+
+	old_ptr = cfg->main_ptr;
+	cfg->main_ptr = mi->main_ptr;
+	cfg->size = mi->size;
+	ti->state = cfg->main_ptr;
+
+	mi->main_ptr = old_ptr;
+}
+
+/*
+ * Free unneded array.
+ */
+static void
+ta_flush_mod_numarray(void *ta_buf)
+{
+	struct mod_item *mi;
+
+	mi = (struct mod_item *)ta_buf;
+	if (mi->main_ptr != NULL)
+		free(mi->main_ptr, M_IPFW);
+}
+
+static int
+ta_dump_numarray_tentry(void *ta_state, struct table_info *ti, void *e,
+    ipfw_obj_tentry *tent)
+{
+	struct numarray *na;
+
+	na = (struct numarray *)e;
+
+	tent->k.key = na->number;
+	tent->v.kidx = na->value;
+
+	return (0);
+}
+
+static int
+ta_find_numarray_tentry(void *ta_state, struct table_info *ti,
+    ipfw_obj_tentry *tent)
+{
+	struct numarray_cfg *cfg;
+	struct numarray *ri;
+
+	cfg = (struct numarray_cfg *)ta_state;
+
+	ri = numarray_find(ti, &tent->k.key);
+
+	if (ri != NULL) {
+		ta_dump_numarray_tentry(ta_state, ti, ri, tent);
+		return (0);
+	}
+
+	return (ENOENT);
+}
+
+static void
+ta_foreach_numarray(void *ta_state, struct table_info *ti, ta_foreach_f *f,
+    void *arg)
+{
+	struct numarray_cfg *cfg;
+	struct numarray *array;
+	int i;
+
+	cfg = (struct numarray_cfg *)ta_state;
+	array = cfg->main_ptr;
+
+	for (i = 0; i < cfg->used; i++)
+		f(&array[i], arg);
+}
+
+struct table_algo number_array = {
+	.name		= "number:array",
+	.type		= IPFW_TABLE_NUMBER,
+	.ta_buf_size	= sizeof(struct ta_buf_numarray),
+	.init		= ta_init_numarray,
+	.destroy	= ta_destroy_numarray,
+	.prepare_add	= ta_prepare_add_numarray,
+	.prepare_del	= ta_prepare_add_numarray,
+	.add		= ta_add_numarray,
+	.del		= ta_del_numarray,
+	.flush_entry	= ta_flush_numarray_entry,
+	.foreach	= ta_foreach_numarray,
+	.dump_tentry	= ta_dump_numarray_tentry,
+	.find_tentry	= ta_find_numarray_tentry,
+	.dump_tinfo	= ta_dump_numarray_tinfo,
+	.need_modify	= ta_need_modify_numarray,
+	.prepare_mod	= ta_prepare_mod_numarray,
+	.fill_mod	= ta_fill_mod_numarray,
+	.modify		= ta_modify_numarray,
+	.flush_mod	= ta_flush_mod_numarray,
+};
+
+/*
+ * flow:hash cmds
+ *
+ *
+ * ti->data:
+ * [inv.mask4][inv.mask6][log2hsize4][log2hsize6]
+ * [        8][        8[          8][         8]
+ *
+ * inv.mask4: 32 - mask
+ * inv.mask6:
+ * 1) _slow lookup: mask
+ * 2) _aligned: (128 - mask) / 8
+ * 3) _64: 8
+ *
+ *
+ * pflags:
+ * [hsize4][hsize6]
+ * [    16][    16]
+ */
+
+struct fhashentry;
+
+SLIST_HEAD(fhashbhead, fhashentry);
+
+struct fhashentry {
+	SLIST_ENTRY(fhashentry)	next;
+	uint8_t		af;
+	uint8_t		proto;
+	uint16_t	spare0;
+	uint16_t	dport;
+	uint16_t	sport;
+	uint32_t	value;
+	uint32_t	spare1;
+};
+
+struct fhashentry4 {
+	struct fhashentry	e;
+	struct in_addr		dip;
+	struct in_addr		sip;
+};
+
+struct fhashentry6 {
+	struct fhashentry	e;
+	struct in6_addr		dip6;
+	struct in6_addr		sip6;
+};
+
+struct fhash_cfg {
+	struct fhashbhead	*head;
+	size_t			size;
+	size_t			items;
+	struct fhashentry4	fe4;
+	struct fhashentry6	fe6;
+};
+
+struct ta_buf_fhash {
+	void	*ent_ptr;
+	struct fhashentry6 fe6;
+};
+
+static __inline int cmp_flow_ent(struct fhashentry *a,
+    struct fhashentry *b, size_t sz);
+static __inline uint32_t hash_flow4(struct fhashentry4 *f, int hsize);
+static __inline uint32_t hash_flow6(struct fhashentry6 *f, int hsize);
+static uint32_t hash_flow_ent(struct fhashentry *ent, uint32_t size);
+static int ta_lookup_fhash(struct table_info *ti, void *key, uint32_t keylen,
+    uint32_t *val);
+static int ta_init_fhash(struct ip_fw_chain *ch, void **ta_state,
+struct table_info *ti, char *data, uint8_t tflags);
+static void ta_destroy_fhash(void *ta_state, struct table_info *ti);
+static void ta_dump_fhash_tinfo(void *ta_state, struct table_info *ti,
+    ipfw_ta_tinfo *tinfo);
+static int ta_dump_fhash_tentry(void *ta_state, struct table_info *ti,
+    void *e, ipfw_obj_tentry *tent);
+static int tei_to_fhash_ent(struct tentry_info *tei, struct fhashentry *ent);
+static int ta_find_fhash_tentry(void *ta_state, struct table_info *ti,
+    ipfw_obj_tentry *tent);
+static void ta_foreach_fhash(void *ta_state, struct table_info *ti,
+    ta_foreach_f *f, void *arg);
+static int ta_prepare_add_fhash(struct ip_fw_chain *ch,
+    struct tentry_info *tei, void *ta_buf);
+static int ta_add_fhash(void *ta_state, struct table_info *ti,
+    struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
+static int ta_prepare_del_fhash(struct ip_fw_chain *ch, struct tentry_info *tei,
+    void *ta_buf);
+static int ta_del_fhash(void *ta_state, struct table_info *ti,
+    struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
+static void ta_flush_fhash_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
+    void *ta_buf);
+static int ta_need_modify_fhash(void *ta_state, struct table_info *ti,
+    uint32_t count, uint64_t *pflags);
+static int ta_prepare_mod_fhash(void *ta_buf, uint64_t *pflags);
+static int ta_fill_mod_fhash(void *ta_state, struct table_info *ti,
+    void *ta_buf, uint64_t *pflags);
+static void ta_modify_fhash(void *ta_state, struct table_info *ti, void *ta_buf,
+    uint64_t pflags);
+static void ta_flush_mod_fhash(void *ta_buf);
+
+static __inline int
+cmp_flow_ent(struct fhashentry *a, struct fhashentry *b, size_t sz)
+{
+	uint64_t *ka, *kb;
+
+	ka = (uint64_t *)(&a->next + 1);
+	kb = (uint64_t *)(&b->next + 1);
+
+	if (*ka == *kb && (memcmp(a + 1, b + 1, sz) == 0))
+		return (1);
+
+	return (0);
+}
+
+static __inline uint32_t
+hash_flow4(struct fhashentry4 *f, int hsize)
+{
+	uint32_t i;
+
+	i = (f->dip.s_addr) ^ (f->sip.s_addr) ^ (f->e.dport) ^ (f->e.sport);
+
+	return (i % (hsize - 1));
+}
+
+static __inline uint32_t
+hash_flow6(struct fhashentry6 *f, int hsize)
+{
+	uint32_t i;
+
+	i = (f->dip6.__u6_addr.__u6_addr32[2]) ^
+	    (f->dip6.__u6_addr.__u6_addr32[3]) ^
+	    (f->sip6.__u6_addr.__u6_addr32[2]) ^
+	    (f->sip6.__u6_addr.__u6_addr32[3]) ^
+	    (f->e.dport) ^ (f->e.sport);
+
+	return (i % (hsize - 1));
+}
+
+static uint32_t
+hash_flow_ent(struct fhashentry *ent, uint32_t size)
+{
+	uint32_t hash;
+
+	if (ent->af == AF_INET) {
+		hash = hash_flow4((struct fhashentry4 *)ent, size);
+	} else {
+		hash = hash_flow6((struct fhashentry6 *)ent, size);
+	}
+
+	return (hash);
+}
+
+static int
+ta_lookup_fhash(struct table_info *ti, void *key, uint32_t keylen,
+    uint32_t *val)
+{
+	struct fhashbhead *head;
+	struct fhashentry *ent;
+	struct fhashentry4 *m4;
+	struct ipfw_flow_id *id;
+	uint16_t hash, hsize;
+
+	id = (struct ipfw_flow_id *)key;
+	head = (struct fhashbhead *)ti->state;
+	hsize = ti->data;
+	m4 = (struct fhashentry4 *)ti->xstate;
+
+	if (id->addr_type == 4) {
+		struct fhashentry4 f;
+
+		/* Copy hash mask */
+		f = *m4;
+
+		f.dip.s_addr &= id->dst_ip;
+		f.sip.s_addr &= id->src_ip;
+		f.e.dport &= id->dst_port;
+		f.e.sport &= id->src_port;
+		f.e.proto &= id->proto;
+		hash = hash_flow4(&f, hsize);
+		SLIST_FOREACH(ent, &head[hash], next) {
+			if (cmp_flow_ent(ent, &f.e, 2 * 4) != 0) {
+				*val = ent->value;
+				return (1);
+			}
+		}
+	} else if (id->addr_type == 6) {
+		struct fhashentry6 f;
+		uint64_t *fp, *idp;
+
+		/* Copy hash mask */
+		f = *((struct fhashentry6 *)(m4 + 1));
+
+		/* Handle lack of __u6_addr.__u6_addr64 */
+		fp = (uint64_t *)&f.dip6;
+		idp = (uint64_t *)&id->dst_ip6;
+		/* src IPv6 is stored after dst IPv6 */
+		*fp++ &= *idp++;
+		*fp++ &= *idp++;
+		*fp++ &= *idp++;
+		*fp &= *idp;
+		f.e.dport &= id->dst_port;
+		f.e.sport &= id->src_port;
+		f.e.proto &= id->proto;
+		hash = hash_flow6(&f, hsize);
+		SLIST_FOREACH(ent, &head[hash], next) {
+			if (cmp_flow_ent(ent, &f.e, 2 * 16) != 0) {
+				*val = ent->value;
+				return (1);
+			}
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * New table.
+ */
+static int
+ta_init_fhash(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
+    char *data, uint8_t tflags)
+{
+	int i;
+	struct fhash_cfg *cfg;
+	struct fhashentry4 *fe4;
+	struct fhashentry6 *fe6;
+
+	cfg = malloc(sizeof(struct fhash_cfg), M_IPFW, M_WAITOK | M_ZERO);
+
+	cfg->size = 512;
+
+	cfg->head = malloc(sizeof(struct fhashbhead) * cfg->size, M_IPFW,
+	    M_WAITOK | M_ZERO);
+	for (i = 0; i < cfg->size; i++)
+		SLIST_INIT(&cfg->head[i]);
+
+	/* Fill in fe masks based on @tflags */
+	fe4 = &cfg->fe4;
+	fe6 = &cfg->fe6;
+	if (tflags & IPFW_TFFLAG_SRCIP) {
+		memset(&fe4->sip, 0xFF, sizeof(fe4->sip));
+		memset(&fe6->sip6, 0xFF, sizeof(fe6->sip6));
+	}
+	if (tflags & IPFW_TFFLAG_DSTIP) {
+		memset(&fe4->dip, 0xFF, sizeof(fe4->dip));
+		memset(&fe6->dip6, 0xFF, sizeof(fe6->dip6));
+	}
+	if (tflags & IPFW_TFFLAG_SRCPORT) {
+		memset(&fe4->e.sport, 0xFF, sizeof(fe4->e.sport));
+		memset(&fe6->e.sport, 0xFF, sizeof(fe6->e.sport));
+	}
+	if (tflags & IPFW_TFFLAG_DSTPORT) {
+		memset(&fe4->e.dport, 0xFF, sizeof(fe4->e.dport));
+		memset(&fe6->e.dport, 0xFF, sizeof(fe6->e.dport));
+	}
+	if (tflags & IPFW_TFFLAG_PROTO) {
+		memset(&fe4->e.proto, 0xFF, sizeof(fe4->e.proto));
+		memset(&fe6->e.proto, 0xFF, sizeof(fe6->e.proto));
+	}
+
+	fe4->e.af = AF_INET;
+	fe6->e.af = AF_INET6;
+
+	*ta_state = cfg;
+	ti->state = cfg->head;
+	ti->xstate = &cfg->fe4;
+	ti->data = cfg->size;
+	ti->lookup = ta_lookup_fhash;
+
+	return (0);
+}
+
+static void
+ta_destroy_fhash(void *ta_state, struct table_info *ti)
+{
+	struct fhash_cfg *cfg;
+	struct fhashentry *ent, *ent_next;
+	int i;
+
+	cfg = (struct fhash_cfg *)ta_state;
+
+	for (i = 0; i < cfg->size; i++)
+		SLIST_FOREACH_SAFE(ent, &cfg->head[i], next, ent_next)
+			free(ent, M_IPFW_TBL);
+
+	free(cfg->head, M_IPFW);
+	free(cfg, M_IPFW);
+}
+
+/*
+ * Provide algo-specific table info
+ */
+static void
+ta_dump_fhash_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
+{
+	struct fhash_cfg *cfg;
+
+	cfg = (struct fhash_cfg *)ta_state;
+
+	tinfo->flags = IPFW_TATFLAGS_AFITEM;
+	tinfo->taclass4 = IPFW_TACLASS_HASH;
+	tinfo->size4 = cfg->size;
+	tinfo->count4 = cfg->items;
+	tinfo->itemsize4 = sizeof(struct fhashentry4);
+	tinfo->itemsize6 = sizeof(struct fhashentry6);
+}
+
+static int
+ta_dump_fhash_tentry(void *ta_state, struct table_info *ti, void *e,
+    ipfw_obj_tentry *tent)
+{
+	struct fhash_cfg *cfg;
+	struct fhashentry *ent;
+	struct fhashentry4 *fe4;
+#ifdef INET6
+	struct fhashentry6 *fe6;
+#endif
+	struct tflow_entry *tfe;
+
+	cfg = (struct fhash_cfg *)ta_state;
+	ent = (struct fhashentry *)e;
+	tfe = &tent->k.flow;
+
+	tfe->af = ent->af;
+	tfe->proto = ent->proto;
+	tfe->dport = htons(ent->dport);
+	tfe->sport = htons(ent->sport);
+	tent->v.kidx = ent->value;
+	tent->subtype = ent->af;
+
+	if (ent->af == AF_INET) {
+		fe4 = (struct fhashentry4 *)ent;
+		tfe->a.a4.sip.s_addr = htonl(fe4->sip.s_addr);
+		tfe->a.a4.dip.s_addr = htonl(fe4->dip.s_addr);
+		tent->masklen = 32;
+#ifdef INET6
+	} else {
+		fe6 = (struct fhashentry6 *)ent;
+		tfe->a.a6.sip6 = fe6->sip6;
+		tfe->a.a6.dip6 = fe6->dip6;
+		tent->masklen = 128;
+#endif
+	}
+
+	return (0);
+}
+
+static int
+tei_to_fhash_ent(struct tentry_info *tei, struct fhashentry *ent)
+{
+#ifdef INET
+	struct fhashentry4 *fe4;
+#endif
+#ifdef INET6
+	struct fhashentry6 *fe6;
+#endif
+	struct tflow_entry *tfe;
+
+	tfe = (struct tflow_entry *)tei->paddr;
+
+	ent->af = tei->subtype;
+	ent->proto = tfe->proto;
+	ent->dport = ntohs(tfe->dport);
+	ent->sport = ntohs(tfe->sport);
+
+	if (tei->subtype == AF_INET) {
+#ifdef INET
+		fe4 = (struct fhashentry4 *)ent;
+		fe4->sip.s_addr = ntohl(tfe->a.a4.sip.s_addr);
+		fe4->dip.s_addr = ntohl(tfe->a.a4.dip.s_addr);
+#endif
+#ifdef INET6
+	} else if (tei->subtype == AF_INET6) {
+		fe6 = (struct fhashentry6 *)ent;
+		fe6->sip6 = tfe->a.a6.sip6;
+		fe6->dip6 = tfe->a.a6.dip6;
+#endif
+	} else {
+		/* Unknown CIDR type */
+		return (EINVAL);
+	}
+
+	return (0);
+}
+
+
+static int
+ta_find_fhash_tentry(void *ta_state, struct table_info *ti,
+    ipfw_obj_tentry *tent)
+{
+	struct fhash_cfg *cfg;
+	struct fhashbhead *head;
+	struct fhashentry *ent, *tmp;
+	struct fhashentry6 fe6;
+	struct tentry_info tei;
+	int error;
+	uint32_t hash;
+	size_t sz;
+
+	cfg = (struct fhash_cfg *)ta_state;
+
+	ent = &fe6.e;
+
+	memset(&fe6, 0, sizeof(fe6));
+	memset(&tei, 0, sizeof(tei));
+
+	tei.paddr = &tent->k.flow;
+	tei.subtype = tent->subtype;
+
+	if ((error = tei_to_fhash_ent(&tei, ent)) != 0)
+		return (error);
+
+	head = cfg->head;
+	hash = hash_flow_ent(ent, cfg->size);
+
+	if (tei.subtype == AF_INET)
+		sz = 2 * sizeof(struct in_addr);
+	else
+		sz = 2 * sizeof(struct in6_addr);
+
+	/* Check for existence */
+	SLIST_FOREACH(tmp, &head[hash], next) {
+		if (cmp_flow_ent(tmp, ent, sz) != 0) {
+			ta_dump_fhash_tentry(ta_state, ti, tmp, tent);
+			return (0);
+		}
+	}
+
+	return (ENOENT);
+}
+
+static void
+ta_foreach_fhash(void *ta_state, struct table_info *ti, ta_foreach_f *f,
+    void *arg)
+{
+	struct fhash_cfg *cfg;
+	struct fhashentry *ent, *ent_next;
+	int i;
+
+	cfg = (struct fhash_cfg *)ta_state;
+
+	for (i = 0; i < cfg->size; i++)
+		SLIST_FOREACH_SAFE(ent, &cfg->head[i], next, ent_next)
+			f(ent, arg);
+}
+
+static int
+ta_prepare_add_fhash(struct ip_fw_chain *ch, struct tentry_info *tei,
+    void *ta_buf)
+{
+	struct ta_buf_fhash *tb;
+	struct fhashentry *ent;
+	size_t sz;
+	int error;
+
+	tb = (struct ta_buf_fhash *)ta_buf;
+
+	if (tei->subtype == AF_INET)
+		sz = sizeof(struct fhashentry4);
+	else if (tei->subtype == AF_INET6)
+		sz = sizeof(struct fhashentry6);
+	else
+		return (EINVAL);
+
+	ent = malloc(sz, M_IPFW_TBL, M_WAITOK | M_ZERO);
+
+	error = tei_to_fhash_ent(tei, ent);
+	if (error != 0) {
+		free(ent, M_IPFW_TBL);
+		return (error);
+	}
+	tb->ent_ptr = ent;
+
+	return (0);
+}
+
+static int
+ta_add_fhash(void *ta_state, struct table_info *ti, struct tentry_info *tei,
+    void *ta_buf, uint32_t *pnum)
+{
+	struct fhash_cfg *cfg;
+	struct fhashbhead *head;
+	struct fhashentry *ent, *tmp;
+	struct ta_buf_fhash *tb;
+	int exists;
+	uint32_t hash, value;
+	size_t sz;
+
+	cfg = (struct fhash_cfg *)ta_state;
+	tb = (struct ta_buf_fhash *)ta_buf;
+	ent = (struct fhashentry *)tb->ent_ptr;
+	exists = 0;
+
+	/* Read current value from @tei */
+	ent->value = tei->value;
+
+	head = cfg->head;
+	hash = hash_flow_ent(ent, cfg->size);
+
+	if (tei->subtype == AF_INET)
+		sz = 2 * sizeof(struct in_addr);
+	else
+		sz = 2 * sizeof(struct in6_addr);
+
+	/* Check for existence */
+	SLIST_FOREACH(tmp, &head[hash], next) {
+		if (cmp_flow_ent(tmp, ent, sz) != 0) {
+			exists = 1;
+			break;
+		}
+	}
+
+	if (exists == 1) {
+		if ((tei->flags & TEI_FLAGS_UPDATE) == 0)
+			return (EEXIST);
+		/* Record already exists. Update value if we're asked to */
+		/* Exchange values between tmp and @tei */
+		value = tmp->value;
+		tmp->value = tei->value;
+		tei->value = value;
+		/* Indicate that update has happened instead of addition */
+		tei->flags |= TEI_FLAGS_UPDATED;
+		*pnum = 0;
+	} else {
+		if ((tei->flags & TEI_FLAGS_DONTADD) != 0)
+			return (EFBIG);
+
+		SLIST_INSERT_HEAD(&head[hash], ent, next);
+		tb->ent_ptr = NULL;
+		*pnum = 1;
+
+		/* Update counters and check if we need to grow hash */
+		cfg->items++;
+	}
+
+	return (0);
+}
+
+static int
+ta_prepare_del_fhash(struct ip_fw_chain *ch, struct tentry_info *tei,
+    void *ta_buf)
+{
+	struct ta_buf_fhash *tb;
+
+	tb = (struct ta_buf_fhash *)ta_buf;
+
+	return (tei_to_fhash_ent(tei, &tb->fe6.e));
+}
+
+static int
+ta_del_fhash(void *ta_state, struct table_info *ti, struct tentry_info *tei,
+    void *ta_buf, uint32_t *pnum)
+{
+	struct fhash_cfg *cfg;
+	struct fhashbhead *head;
+	struct fhashentry *ent, *tmp;
+	struct ta_buf_fhash *tb;
+	uint32_t hash;
+	size_t sz;
+
+	cfg = (struct fhash_cfg *)ta_state;
+	tb = (struct ta_buf_fhash *)ta_buf;
+	ent = &tb->fe6.e;
+
+	head = cfg->head;
+	hash = hash_flow_ent(ent, cfg->size);
+
+	if (tei->subtype == AF_INET)
+		sz = 2 * sizeof(struct in_addr);
+	else
+		sz = 2 * sizeof(struct in6_addr);
+
+	/* Check for existence */
+	SLIST_FOREACH(tmp, &head[hash], next) {
+		if (cmp_flow_ent(tmp, ent, sz) == 0)
+			continue;
+
+		SLIST_REMOVE(&head[hash], tmp, fhashentry, next);
+		tei->value = tmp->value;
+		*pnum = 1;
+		cfg->items--;
+		tb->ent_ptr = tmp;
+		return (0);
+	}
+
+	return (ENOENT);
+}
+
+static void
+ta_flush_fhash_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
+    void *ta_buf)
+{
+	struct ta_buf_fhash *tb;
+
+	tb = (struct ta_buf_fhash *)ta_buf;
+
+	if (tb->ent_ptr != NULL)
+		free(tb->ent_ptr, M_IPFW_TBL);
+}
+
+/*
+ * Hash growing callbacks.
+ */
+
+static int
+ta_need_modify_fhash(void *ta_state, struct table_info *ti, uint32_t count,
+    uint64_t *pflags)
+{
+	struct fhash_cfg *cfg;
+
+	cfg = (struct fhash_cfg *)ta_state;
+
+	if (cfg->items > cfg->size && cfg->size < 65536) {
+		*pflags = cfg->size * 2;
+		return (1);
+	}
+
+	return (0);
+}
+
+/*
+ * Allocate new, larger fhash.
+ */
+static int
+ta_prepare_mod_fhash(void *ta_buf, uint64_t *pflags)
+{
+	struct mod_item *mi;
+	struct fhashbhead *head;
+	int i;
+
+	mi = (struct mod_item *)ta_buf;
+
+	memset(mi, 0, sizeof(struct mod_item));
+	mi->size = *pflags;
+	head = malloc(sizeof(struct fhashbhead) * mi->size, M_IPFW,
+	    M_WAITOK | M_ZERO);
+	for (i = 0; i < mi->size; i++)
+		SLIST_INIT(&head[i]);
+
+	mi->main_ptr = head;
+
+	return (0);
+}
+
+/*
+ * Copy data from old runtime array to new one.
+ */
+static int
+ta_fill_mod_fhash(void *ta_state, struct table_info *ti, void *ta_buf,
+    uint64_t *pflags)
+{
+
+	/* In is not possible to do rehash if we're not holidng WLOCK. */
+	return (0);
+}
+
+/*
+ * Switch old & new arrays.
+ */
+static void
+ta_modify_fhash(void *ta_state, struct table_info *ti, void *ta_buf,
+    uint64_t pflags)
+{
+	struct mod_item *mi;
+	struct fhash_cfg *cfg;
+	struct fhashbhead *old_head, *new_head;
+	struct fhashentry *ent, *ent_next;
+	int i;
+	uint32_t nhash;
+	size_t old_size;
+
+	mi = (struct mod_item *)ta_buf;
+	cfg = (struct fhash_cfg *)ta_state;
+
+	old_size = cfg->size;
+	old_head = ti->state;
+
+	new_head = (struct fhashbhead *)mi->main_ptr;
+	for (i = 0; i < old_size; i++) {
+		SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) {
+			nhash = hash_flow_ent(ent, mi->size);
+			SLIST_INSERT_HEAD(&new_head[nhash], ent, next);
+		}
+	}
+
+	ti->state = new_head;
+	ti->data = mi->size;
+	cfg->head = new_head;
+	cfg->size = mi->size;
+
+	mi->main_ptr = old_head;
+}
+
+/*
+ * Free unneded array.
+ */
+static void
+ta_flush_mod_fhash(void *ta_buf)
+{
+	struct mod_item *mi;
+
+	mi = (struct mod_item *)ta_buf;
+	if (mi->main_ptr != NULL)
+		free(mi->main_ptr, M_IPFW);
+}
+
+struct table_algo flow_hash = {
+	.name		= "flow:hash",
+	.type		= IPFW_TABLE_FLOW,
+	.flags		= TA_FLAG_DEFAULT,
+	.ta_buf_size	= sizeof(struct ta_buf_fhash),
+	.init		= ta_init_fhash,
+	.destroy	= ta_destroy_fhash,
+	.prepare_add	= ta_prepare_add_fhash,
+	.prepare_del	= ta_prepare_del_fhash,
+	.add		= ta_add_fhash,
+	.del		= ta_del_fhash,
+	.flush_entry	= ta_flush_fhash_entry,
+	.foreach	= ta_foreach_fhash,
+	.dump_tentry	= ta_dump_fhash_tentry,
+	.find_tentry	= ta_find_fhash_tentry,
+	.dump_tinfo	= ta_dump_fhash_tinfo,
+	.need_modify	= ta_need_modify_fhash,
+	.prepare_mod	= ta_prepare_mod_fhash,
+	.fill_mod	= ta_fill_mod_fhash,
+	.modify		= ta_modify_fhash,
+	.flush_mod	= ta_flush_mod_fhash,
+};
+
+/*
+ * Kernel fibs bindings.
+ *
+ * Implementation:
+ *
+ * Runtime part:
+ * - fully relies on route API
+ * - fib number is stored in ti->data
+ *
+ */
+
+static int ta_lookup_kfib(struct table_info *ti, void *key, uint32_t keylen,
+    uint32_t *val);
+static int kfib_parse_opts(int *pfib, char *data);
+static void ta_print_kfib_config(void *ta_state, struct table_info *ti,
+    char *buf, size_t bufsize);
+static int ta_init_kfib(struct ip_fw_chain *ch, void **ta_state,
+    struct table_info *ti, char *data, uint8_t tflags);
+static void ta_destroy_kfib(void *ta_state, struct table_info *ti);
+static void ta_dump_kfib_tinfo(void *ta_state, struct table_info *ti,
+    ipfw_ta_tinfo *tinfo);
+static int contigmask(uint8_t *p, int len);
+static int ta_dump_kfib_tentry(void *ta_state, struct table_info *ti, void *e,
+    ipfw_obj_tentry *tent);
+static int ta_dump_kfib_tentry_int(struct sockaddr *paddr,
+    struct sockaddr *pmask, ipfw_obj_tentry *tent);
+static int ta_find_kfib_tentry(void *ta_state, struct table_info *ti,
+    ipfw_obj_tentry *tent);
+static void ta_foreach_kfib(void *ta_state, struct table_info *ti,
+    ta_foreach_f *f, void *arg);
+
+
+static int
+ta_lookup_kfib(struct table_info *ti, void *key, uint32_t keylen,
+    uint32_t *val)
+{
+#ifdef INET
+	struct nhop4_basic nh4;
+	struct in_addr in;
+#endif
+#ifdef INET6
+	struct nhop6_basic nh6;
+#endif
+	int error;
+
+	error = ENOENT;
+#ifdef INET
+	if (keylen == 4) {
+		in.s_addr = *(in_addr_t *)key;
+		error = fib4_lookup_nh_basic(ti->data,
+		    in, 0, 0, &nh4);
+	}
+#endif
+#ifdef INET6
+	if (keylen == 6)
+		error = fib6_lookup_nh_basic(ti->data,
+		    (struct in6_addr *)key, 0, 0, 0, &nh6);
+#endif
+
+	if (error != 0)
+		return (0);
+
+	*val = 0;
+
+	return (1);
+}
+
+/* Parse 'fib=%d' */
+static int
+kfib_parse_opts(int *pfib, char *data)
+{
+	char *pdel, *pend, *s;
+	int fibnum;
+
+	if (data == NULL)
+		return (0);
+	if ((pdel = strchr(data, ' ')) == NULL)
+		return (0);
+	while (*pdel == ' ')
+		pdel++;
+	if (strncmp(pdel, "fib=", 4) != 0)
+		return (EINVAL);
+	if ((s = strchr(pdel, ' ')) != NULL)
+		*s++ = '\0';
+
+	pdel += 4;
+	/* Need \d+ */
+	fibnum = strtol(pdel, &pend, 10);
+	if (*pend != '\0')
+		return (EINVAL);
+
+	*pfib = fibnum;
+
+	return (0);
+}
+
+static void
+ta_print_kfib_config(void *ta_state, struct table_info *ti, char *buf,
+    size_t bufsize)
+{
+
+	if (ti->data != 0)
+		snprintf(buf, bufsize, "%s fib=%lu", "addr:kfib", ti->data);
+	else
+		snprintf(buf, bufsize, "%s", "addr:kfib");
+}
+
+static int
+ta_init_kfib(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
+    char *data, uint8_t tflags)
+{
+	int error, fibnum;
+
+	fibnum = 0;
+	if ((error = kfib_parse_opts(&fibnum, data)) != 0)
+		return (error);
+
+	if (fibnum >= rt_numfibs)
+		return (E2BIG);
+
+	ti->data = fibnum;
+	ti->lookup = ta_lookup_kfib;
+
+	return (0);
+}
+
+/*
+ * Destroys table @ti
+ */
+static void
+ta_destroy_kfib(void *ta_state, struct table_info *ti)
+{
+
+}
+
+/*
+ * Provide algo-specific table info
+ */
+static void
+ta_dump_kfib_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
+{
+
+	tinfo->flags = IPFW_TATFLAGS_AFDATA;
+	tinfo->taclass4 = IPFW_TACLASS_RADIX;
+	tinfo->count4 = 0;
+	tinfo->itemsize4 = sizeof(struct rtentry);
+	tinfo->taclass6 = IPFW_TACLASS_RADIX;
+	tinfo->count6 = 0;
+	tinfo->itemsize6 = sizeof(struct rtentry);
+}
+
+static int
+contigmask(uint8_t *p, int len)
+{
+	int i, n;
+
+	for (i = 0; i < len ; i++)
+		if ( (p[i/8] & (1 << (7 - (i%8)))) == 0) /* first bit unset */
+			break;
+	for (n= i + 1; n < len; n++)
+		if ( (p[n/8] & (1 << (7 - (n % 8)))) != 0)
+			return (-1); /* mask not contiguous */
+	return (i);
+}
+
+
+static int
+ta_dump_kfib_tentry(void *ta_state, struct table_info *ti, void *e,
+    ipfw_obj_tentry *tent)
+{
+	struct rtentry *rte;
+
+	rte = (struct rtentry *)e;
+
+	return ta_dump_kfib_tentry_int(rt_key(rte), rt_mask(rte), tent);
+}
+
+static int
+ta_dump_kfib_tentry_int(struct sockaddr *paddr, struct sockaddr *pmask,
+    ipfw_obj_tentry *tent)
+{
+#ifdef INET
+	struct sockaddr_in *addr, *mask;
+#endif
+#ifdef INET6
+	struct sockaddr_in6 *addr6, *mask6;
+#endif
+	int len;
+
+	len = 0;
+
+	/* Guess IPv4/IPv6 radix by sockaddr family */
+#ifdef INET
+	if (paddr->sa_family == AF_INET) {
+		addr = (struct sockaddr_in *)paddr;
+		mask = (struct sockaddr_in *)pmask;
+		tent->k.addr.s_addr = addr->sin_addr.s_addr;
+		len = 32;
+		if (mask != NULL)
+			len = contigmask((uint8_t *)&mask->sin_addr, 32);
+		if (len == -1)
+			len = 0;
+		tent->masklen = len;
+		tent->subtype = AF_INET;
+		tent->v.kidx = 0; /* Do we need to put GW here? */
+	}
+#endif
+#ifdef INET6
+	if (paddr->sa_family == AF_INET6) {
+		addr6 = (struct sockaddr_in6 *)paddr;
+		mask6 = (struct sockaddr_in6 *)pmask;
+		memcpy(&tent->k, &addr6->sin6_addr, sizeof(struct in6_addr));
+		len = 128;
+		if (mask6 != NULL)
+			len = contigmask((uint8_t *)&mask6->sin6_addr, 128);
+		if (len == -1)
+			len = 0;
+		tent->masklen = len;
+		tent->subtype = AF_INET6;
+		tent->v.kidx = 0;
+	}
+#endif
+
+	return (0);
+}
+
+static int
+ta_find_kfib_tentry(void *ta_state, struct table_info *ti,
+    ipfw_obj_tentry *tent)
+{
+	struct rt_addrinfo info;
+	struct sockaddr_in6 key6, dst6, mask6;
+	struct sockaddr *dst, *key, *mask;
+
+	/* Prepare sockaddr for prefix/mask and info */
+	bzero(&dst6, sizeof(dst6));
+	dst6.sin6_len = sizeof(dst6);
+	dst = (struct sockaddr *)&dst6;
+	bzero(&mask6, sizeof(mask6));
+	mask6.sin6_len = sizeof(mask6);
+	mask = (struct sockaddr *)&mask6;
+
+	bzero(&info, sizeof(info));
+	info.rti_info[RTAX_DST] = dst;
+	info.rti_info[RTAX_NETMASK] = mask;
+
+	/* Prepare the lookup key */
+	bzero(&key6, sizeof(key6));
+	key6.sin6_family = tent->subtype;
+	key = (struct sockaddr *)&key6;
+
+	if (tent->subtype == AF_INET) {
+		((struct sockaddr_in *)&key6)->sin_addr = tent->k.addr;
+		key6.sin6_len = sizeof(struct sockaddr_in);
+	} else {
+		key6.sin6_addr = tent->k.addr6;
+		key6.sin6_len = sizeof(struct sockaddr_in6);
+	}
+
+	if (rib_lookup_info(ti->data, key, 0, 0, &info) != 0)
+		return (ENOENT);
+	if ((info.rti_addrs & RTA_NETMASK) == 0)
+		mask = NULL;
+
+	ta_dump_kfib_tentry_int(dst, mask, tent);
+
+	return (0);
+}
+
+static void
+ta_foreach_kfib(void *ta_state, struct table_info *ti, ta_foreach_f *f,
+    void *arg)
+{
+	struct rib_head *rh;
+	int error;
+
+	rh = rt_tables_get_rnh(ti->data, AF_INET);
+	if (rh != NULL) {
+		RIB_RLOCK(rh); 
+		error = rh->rnh_walktree(&rh->head, (walktree_f_t *)f, arg);
+		RIB_RUNLOCK(rh);
+	}
+
+	rh = rt_tables_get_rnh(ti->data, AF_INET6);
+	if (rh != NULL) {
+		RIB_RLOCK(rh); 
+		error = rh->rnh_walktree(&rh->head, (walktree_f_t *)f, arg);
+		RIB_RUNLOCK(rh); 
+	}
+}
+
+struct table_algo addr_kfib = {
+	.name		= "addr:kfib",
+	.type		= IPFW_TABLE_ADDR,
+	.flags		= TA_FLAG_READONLY,
+	.ta_buf_size	= 0,
+	.init		= ta_init_kfib,
+	.destroy	= ta_destroy_kfib,
+	.foreach	= ta_foreach_kfib,
+	.dump_tentry	= ta_dump_kfib_tentry,
+	.find_tentry	= ta_find_kfib_tentry,
+	.dump_tinfo	= ta_dump_kfib_tinfo,
+	.print_config	= ta_print_kfib_config,
+};
+
+void
+ipfw_table_algo_init(struct ip_fw_chain *ch)
+{
+	size_t sz;
+
+	/*
+	 * Register all algorithms presented here.
+	 */
+	sz = sizeof(struct table_algo);
+	ipfw_add_table_algo(ch, &addr_radix, sz, &addr_radix.idx);
+	ipfw_add_table_algo(ch, &addr_hash, sz, &addr_hash.idx);
+	ipfw_add_table_algo(ch, &iface_idx, sz, &iface_idx.idx);
+	ipfw_add_table_algo(ch, &number_array, sz, &number_array.idx);
+	ipfw_add_table_algo(ch, &flow_hash, sz, &flow_hash.idx);
+	ipfw_add_table_algo(ch, &addr_kfib, sz, &addr_kfib.idx);
+}
+
+void
+ipfw_table_algo_destroy(struct ip_fw_chain *ch)
+{
+
+	ipfw_del_table_algo(ch, addr_radix.idx);
+	ipfw_del_table_algo(ch, addr_hash.idx);
+	ipfw_del_table_algo(ch, iface_idx.idx);
+	ipfw_del_table_algo(ch, number_array.idx);
+	ipfw_del_table_algo(ch, flow_hash.idx);
+	ipfw_del_table_algo(ch, addr_kfib.idx);
+}
+
+
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_table_value.c b/freebsd/sys/netpfil/ipfw/ip_fw_table_value.c
new file mode 100644
index 00000000..ef42e401
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_table_value.c
@@ -0,0 +1,810 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2014 Yandex LLC
+ * Copyright (c) 2014 Alexander V. Chernikov
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * Multi-field value support for ipfw tables.
+ *
+ * This file contains necessary functions to convert
+ * large multi-field values into u32 indices suitable to be fed
+ * to various table algorithms. Other machinery like proper refcounting,
+ * internal structures resizing are also kept here.
+ */
+
+#include <rtems/bsd/local/opt_ipfw.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/hash.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/rmlock.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/queue.h>
+#include <net/if.h>	/* ip_fw.h requires IFNAMSIZ */
+
+#include <netinet/in.h>
+#include <netinet/ip_var.h>	/* struct ipfw_rule_ref */
+#include <netinet/ip_fw.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/ip_fw_table.h>
+
+static uint32_t hash_table_value(struct namedobj_instance *ni, const void *key,
+    uint32_t kopt);
+static int cmp_table_value(struct named_object *no, const void *key,
+    uint32_t kopt);
+
+static int list_table_values(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+    struct sockopt_data *sd);
+
+static struct ipfw_sopt_handler	scodes[] = {
+	{ IP_FW_TABLE_VLIST,	0,	HDIR_GET,	list_table_values },
+};
+
+#define	CHAIN_TO_VI(chain)	(CHAIN_TO_TCFG(chain)->valhash)
+
+struct table_val_link
+{
+	struct named_object	no;
+	struct table_value	*pval;	/* Pointer to real table value */
+};
+#define	VALDATA_START_SIZE	64	/* Allocate 64-items array by default */
+
+struct vdump_args {
+	struct ip_fw_chain *ch;
+	struct sockopt_data *sd;
+	struct table_value *pval;
+	int error;
+};
+
+
+static uint32_t
+hash_table_value(struct namedobj_instance *ni, const void *key, uint32_t kopt)
+{
+
+	return (hash32_buf(key, 56, 0));
+}
+
+static int
+cmp_table_value(struct named_object *no, const void *key, uint32_t kopt)
+{
+
+	return (memcmp(((struct table_val_link *)no)->pval, key, 56));
+}
+
+static void
+mask_table_value(struct table_value *src, struct table_value *dst,
+    uint32_t mask)
+{
+#define	_MCPY(f, b)	if ((mask & (b)) != 0) { dst->f = src->f; }
+
+	memset(dst, 0, sizeof(*dst));
+	_MCPY(tag, IPFW_VTYPE_TAG);
+	_MCPY(pipe, IPFW_VTYPE_PIPE);
+	_MCPY(divert, IPFW_VTYPE_DIVERT);
+	_MCPY(skipto, IPFW_VTYPE_SKIPTO);
+	_MCPY(netgraph, IPFW_VTYPE_NETGRAPH);
+	_MCPY(fib, IPFW_VTYPE_FIB);
+	_MCPY(nat, IPFW_VTYPE_NAT);
+	_MCPY(dscp, IPFW_VTYPE_DSCP);
+	_MCPY(nh4, IPFW_VTYPE_NH4);
+	_MCPY(nh6, IPFW_VTYPE_NH6);
+	_MCPY(zoneid, IPFW_VTYPE_NH6);
+#undef	_MCPY
+}
+
+static void
+get_value_ptrs(struct ip_fw_chain *ch, struct table_config *tc, int vshared,
+    struct table_value **ptv, struct namedobj_instance **pvi)
+{
+	struct table_value *pval;
+	struct namedobj_instance *vi;
+
+	if (vshared != 0) {
+		pval = (struct table_value *)ch->valuestate;
+		vi = CHAIN_TO_VI(ch);
+	} else {
+		pval = NULL;
+		vi = NULL;
+		//pval = (struct table_value *)&tc->ti.data;
+	}
+
+	if (ptv != NULL)
+		*ptv = pval;
+	if (pvi != NULL)
+		*pvi = vi;
+}
+
+/*
+ * Update pointers to real vaues after @pval change.
+ */
+static int
+update_tvalue(struct namedobj_instance *ni, struct named_object *no, void *arg)
+{
+	struct vdump_args *da;
+	struct table_val_link *ptv;
+	struct table_value *pval;
+
+	da = (struct vdump_args *)arg;
+	ptv = (struct table_val_link *)no;
+
+	pval = da->pval;
+	ptv->pval = &pval[ptv->no.kidx];
+	ptv->no.name = (char *)&pval[ptv->no.kidx];
+	return (0);
+}
+
+/*
+ * Grows value storage shared among all tables.
+ * Drops/reacquires UH locks.
+ * Notifies other running adds on @ch shared storage resize.
+ * Note function does not guarantee that free space
+ * will be available after invocation, so one caller needs
+ * to roll cycle himself.
+ *
+ * Returns 0 if case of no errors.
+ */
+static int
+resize_shared_value_storage(struct ip_fw_chain *ch)
+{
+	struct tables_config *tcfg;
+	struct namedobj_instance *vi;
+	struct table_value *pval, *valuestate, *old_valuestate;
+	void *new_idx;
+	struct vdump_args da;
+	int new_blocks;
+	int val_size, val_size_old;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+
+	valuestate = NULL;
+	new_idx = NULL;
+
+	pval = (struct table_value *)ch->valuestate;
+	vi = CHAIN_TO_VI(ch);
+	tcfg = CHAIN_TO_TCFG(ch);
+
+	val_size = tcfg->val_size * 2;
+
+	if (val_size == (1 << 30))
+		return (ENOSPC);
+
+	IPFW_UH_WUNLOCK(ch);
+
+	valuestate = malloc(sizeof(struct table_value) * val_size, M_IPFW,
+	    M_WAITOK | M_ZERO);
+	ipfw_objhash_bitmap_alloc(val_size, (void *)&new_idx,
+	    &new_blocks);
+
+	IPFW_UH_WLOCK(ch);
+
+	/*
+	 * Check if we still need to resize
+	 */
+	if (tcfg->val_size >= val_size)
+		goto done;
+
+	/* Update pointers and notify everyone we're changing @ch */
+	pval = (struct table_value *)ch->valuestate;
+	rollback_toperation_state(ch, ch);
+
+	/* Good. Let's merge */
+	memcpy(valuestate, pval, sizeof(struct table_value) * tcfg->val_size);
+	ipfw_objhash_bitmap_merge(CHAIN_TO_VI(ch), &new_idx, &new_blocks);
+
+	IPFW_WLOCK(ch);
+	/* Change pointers */
+	old_valuestate = ch->valuestate;
+	ch->valuestate = valuestate;
+	valuestate = old_valuestate;
+	ipfw_objhash_bitmap_swap(CHAIN_TO_VI(ch), &new_idx, &new_blocks);
+
+	val_size_old = tcfg->val_size;
+	tcfg->val_size = val_size;
+	val_size = val_size_old;
+	IPFW_WUNLOCK(ch);
+	/* Update pointers to reflect resize */
+	memset(&da, 0, sizeof(da));
+	da.pval = (struct table_value *)ch->valuestate;
+	ipfw_objhash_foreach(vi, update_tvalue, &da);
+
+done:
+	free(valuestate, M_IPFW);
+	ipfw_objhash_bitmap_free(new_idx, new_blocks);
+
+	return (0);
+}
+
+/*
+ * Drops reference for table value with index @kidx, stored in @pval and
+ * @vi. Frees value if it has no references.
+ */
+static void
+unref_table_value(struct namedobj_instance *vi, struct table_value *pval,
+    uint32_t kidx)
+{
+	struct table_val_link *ptvl;
+
+	KASSERT(pval[kidx].refcnt > 0, ("Refcount is 0 on kidx %d", kidx));
+	if (--pval[kidx].refcnt > 0)
+		return;
+
+	/* Last reference, delete item */
+	ptvl = (struct table_val_link *)ipfw_objhash_lookup_kidx(vi, kidx);
+	KASSERT(ptvl != NULL, ("lookup on value kidx %d failed", kidx));
+	ipfw_objhash_del(vi, &ptvl->no);
+	ipfw_objhash_free_idx(vi, kidx);
+	free(ptvl, M_IPFW);
+}
+
+struct flush_args {
+	struct ip_fw_chain *ch;
+	struct table_algo *ta;
+	struct table_info *ti;
+	void *astate;
+	ipfw_obj_tentry tent;
+};
+
+static int
+unref_table_value_cb(void *e, void *arg)
+{
+	struct flush_args *fa;
+	struct ip_fw_chain *ch;
+	struct table_algo *ta;
+	ipfw_obj_tentry *tent;
+	int error;
+
+	fa = (struct flush_args *)arg;
+
+	ta = fa->ta;
+	memset(&fa->tent, 0, sizeof(fa->tent));
+	tent = &fa->tent;
+	error = ta->dump_tentry(fa->astate, fa->ti, e, tent);
+	if (error != 0)
+		return (error);
+
+	ch = fa->ch;
+
+	unref_table_value(CHAIN_TO_VI(ch),
+	    (struct table_value *)ch->valuestate, tent->v.kidx);
+
+	return (0);
+}
+
+/*
+ * Drop references for each value used in @tc.
+ */
+void
+ipfw_unref_table_values(struct ip_fw_chain *ch, struct table_config *tc,
+    struct table_algo *ta, void *astate, struct table_info *ti)
+{
+	struct flush_args fa;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+
+	memset(&fa, 0, sizeof(fa));
+	fa.ch = ch;
+	fa.ta = ta;
+	fa.astate = astate;
+	fa.ti = ti;
+
+	ta->foreach(astate, ti, unref_table_value_cb, &fa);
+}
+
+/*
+ * Table operation state handler.
+ * Called when we are going to change something in @tc which
+ * may lead to inconsistencies in on-going table data addition.
+ *
+ * Here we rollback all already committed state (table values, currently)
+ * and set "modified" field to non-zero value to indicate
+ * that we need to restart original operation.
+ */
+void
+rollback_table_values(struct tableop_state *ts)
+{
+	struct ip_fw_chain *ch;
+	struct table_value *pval;
+	struct tentry_info *ptei;
+	struct namedobj_instance *vi;
+	int i;
+
+	ch = ts->ch;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+
+	/* Get current table value pointer */
+	get_value_ptrs(ch, ts->tc, ts->vshared, &pval, &vi);
+
+	for (i = 0; i < ts->count; i++) {
+		ptei = &ts->tei[i];
+
+		if (ptei->value == 0)
+			continue;
+
+		unref_table_value(vi, pval, ptei->value);
+	}
+}
+
+/*
+ * Allocate new value index in either shared or per-table array.
+ * Function may drop/reacquire UH lock.
+ *
+ * Returns 0 on success.
+ */
+static int
+alloc_table_vidx(struct ip_fw_chain *ch, struct tableop_state *ts,
+    struct namedobj_instance *vi, uint16_t *pvidx)
+{
+	int error, vlimit;
+	uint16_t vidx;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+
+	error = ipfw_objhash_alloc_idx(vi, &vidx);
+	if (error != 0) {
+
+		/*
+		 * We need to resize array. This involves
+		 * lock/unlock, so we need to check "modified"
+		 * state.
+		 */
+		ts->opstate.func(ts->tc, &ts->opstate);
+		error = resize_shared_value_storage(ch);
+		return (error); /* ts->modified should be set, we will restart */
+	}
+
+	vlimit = ts->ta->vlimit;
+	if (vlimit != 0 && vidx >= vlimit) {
+
+		/*
+		 * Algorithm is not able to store given index.
+		 * We have to rollback state, start using
+		 * per-table value array or return error
+		 * if we're already using it.
+		 *
+		 * TODO: do not rollback state if
+		 * atomicity is not required.
+		 */
+		if (ts->vshared != 0) {
+			/* shared -> per-table  */
+			return (ENOSPC); /* TODO: proper error */
+		}
+
+		/* per-table. Fail for now. */
+		return (ENOSPC); /* TODO: proper error */
+	}
+
+	*pvidx = vidx;
+	return (0);
+}
+
+/*
+ * Drops value reference for unused values (updates, deletes, partially
+ * successful adds or rollbacks).
+ */
+void
+ipfw_garbage_table_values(struct ip_fw_chain *ch, struct table_config *tc,
+    struct tentry_info *tei, uint32_t count, int rollback)
+{
+	int i;
+	struct tentry_info *ptei;
+	struct table_value *pval;
+	struct namedobj_instance *vi;
+
+	/*
+	 * We have two slightly different ADD cases here:
+	 * either (1) we are successful / partially successful,
+	 * in that case we need
+	 * * to ignore ADDED entries values
+	 * * rollback every other values (either UPDATED since
+	 *   old value has been stored there, or some failure like
+	 *   EXISTS or LIMIT or simply "ignored" case.
+	 *
+	 * (2): atomic rollback of partially successful operation
+	 * in that case we simply need to unref all entries.
+	 *
+	 * DELETE case is simpler: no atomic support there, so
+	 * we simply unref all non-zero values.
+	 */
+
+	/*
+	 * Get current table value pointers.
+	 * XXX: Properly read vshared
+	 */
+	get_value_ptrs(ch, tc, 1, &pval, &vi);
+
+	for (i = 0; i < count; i++) {
+		ptei = &tei[i];
+
+		if (ptei->value == 0) {
+
+			/*
+			 * We may be deleting non-existing record.
+			 * Skip.
+			 */
+			continue;
+		}
+
+		if ((ptei->flags & TEI_FLAGS_ADDED) != 0 && rollback == 0) {
+			ptei->value = 0;
+			continue;
+		}
+
+		unref_table_value(vi, pval, ptei->value);
+		ptei->value = 0;
+	}
+}
+
+/*
+ * Main function used to link values of entries going to be added,
+ * to the index. Since we may perform many UH locks drops/acquires,
+ * handle changes by checking tablestate "modified" field.
+ *
+ * Success: return 0.
+ */
+int
+ipfw_link_table_values(struct ip_fw_chain *ch, struct tableop_state *ts)
+{
+	int error, i, found;
+	struct namedobj_instance *vi;
+	struct table_config *tc;
+	struct tentry_info *tei, *ptei;
+	uint32_t count, vlimit;
+	uint16_t vidx;
+	struct table_val_link *ptv;
+	struct table_value tval, *pval;
+
+	/*
+	 * Stage 1: reference all existing values and
+	 * save their indices.
+	 */
+	IPFW_UH_WLOCK_ASSERT(ch);
+	get_value_ptrs(ch, ts->tc, ts->vshared, &pval, &vi);
+
+	error = 0;
+	found = 0;
+	vlimit = ts->ta->vlimit;
+	vidx = 0;
+	tc = ts->tc;
+	tei = ts->tei;
+	count = ts->count;
+	for (i = 0; i < count; i++) {
+		ptei = &tei[i];
+		ptei->value = 0; /* Ensure value is always 0 in the beginning */
+		mask_table_value(ptei->pvalue, &tval, ts->vmask);
+		ptv = (struct table_val_link *)ipfw_objhash_lookup_name(vi, 0,
+		    (char *)&tval);
+		if (ptv == NULL)
+			continue;
+		/* Deal with vlimit later */
+		if (vlimit > 0 && vlimit <= ptv->no.kidx)
+			continue;
+
+		/* Value found. Bump refcount */
+		ptv->pval->refcnt++;
+		ptei->value = ptv->no.kidx;
+		found++;
+	}
+
+	if (ts->count == found) {
+		/* We've found all values , no need ts create new ones */
+		return (0);
+	}
+
+	/*
+	 * we have added some state here, let's attach operation
+	 * state ts the list ts be able ts rollback if necessary.
+	 */
+	add_toperation_state(ch, ts);
+	/* Ensure table won't disappear */
+	tc_ref(tc);
+	IPFW_UH_WUNLOCK(ch);
+
+	/*
+	 * Stage 2: allocate objects for non-existing values.
+	 */
+	for (i = 0; i < count; i++) {
+		ptei = &tei[i];
+		if (ptei->value != 0)
+			continue;
+		if (ptei->ptv != NULL)
+			continue;
+		ptei->ptv = malloc(sizeof(struct table_val_link), M_IPFW,
+		    M_WAITOK | M_ZERO);
+	}
+
+	/*
+	 * Stage 3: allocate index numbers for new values
+	 * and link them to index.
+	 */
+	IPFW_UH_WLOCK(ch);
+	tc_unref(tc);
+	del_toperation_state(ch, ts);
+	if (ts->modified != 0) {
+
+		/*
+		 * In general, we should free all state/indexes here
+		 * and return. However, we keep allocated state instead
+		 * to ensure we achieve some progress on each restart.
+		 */
+		return (0);
+	}
+
+	KASSERT(pval == ch->valuestate, ("resize_storage() notify failure"));
+
+	/* Let's try to link values */
+	for (i = 0; i < count; i++) {
+		ptei = &tei[i];
+
+		/* Check if record has appeared */
+		mask_table_value(ptei->pvalue, &tval, ts->vmask);
+		ptv = (struct table_val_link *)ipfw_objhash_lookup_name(vi, 0,
+		    (char *)&tval);
+		if (ptv != NULL) {
+			ptv->pval->refcnt++;
+			ptei->value = ptv->no.kidx;
+			continue;
+		}
+
+		/* May perform UH unlock/lock */
+		error = alloc_table_vidx(ch, ts, vi, &vidx);
+		if (error != 0) {
+			ts->opstate.func(ts->tc, &ts->opstate);
+			return (error);
+		}
+		/* value storage resize has happened, return */
+		if (ts->modified != 0)
+			return (0);
+
+		/* Finally, we have allocated valid index, let's add entry */
+		ptei->value = vidx;
+		ptv = (struct table_val_link *)ptei->ptv;
+		ptei->ptv = NULL;
+
+		ptv->no.kidx = vidx;
+		ptv->no.name = (char *)&pval[vidx];
+		ptv->pval = &pval[vidx];
+		memcpy(ptv->pval, &tval, sizeof(struct table_value));
+		pval[vidx].refcnt = 1;
+		ipfw_objhash_add(vi, &ptv->no);
+	}
+
+	return (0);
+}
+
+/*
+ * Compatibility function used to import data from old
+ * IP_FW_TABLE_ADD / IP_FW_TABLE_XADD opcodes.
+ */
+void
+ipfw_import_table_value_legacy(uint32_t value, struct table_value *v)
+{
+
+	memset(v, 0, sizeof(*v));
+	v->tag = value;
+	v->pipe = value;
+	v->divert = value;
+	v->skipto = value;
+	v->netgraph = value;
+	v->fib = value;
+	v->nat = value;
+	v->nh4 = value; /* host format */
+	v->dscp = value;
+	v->limit = value;
+}
+
+/*
+ * Export data to legacy table dumps opcodes.
+ */
+uint32_t
+ipfw_export_table_value_legacy(struct table_value *v)
+{
+
+	/*
+	 * TODO: provide more compatibility depending on
+	 * vmask value.
+	 */
+	return (v->tag);
+}
+
+/*
+ * Imports table value from current userland format.
+ * Saves value in kernel format to the same place.
+ */
+void
+ipfw_import_table_value_v1(ipfw_table_value *iv)
+{
+	struct table_value v;
+
+	memset(&v, 0, sizeof(v));
+	v.tag = iv->tag;
+	v.pipe = iv->pipe;
+	v.divert = iv->divert;
+	v.skipto = iv->skipto;
+	v.netgraph = iv->netgraph;
+	v.fib = iv->fib;
+	v.nat = iv->nat;
+	v.dscp = iv->dscp;
+	v.nh4 = iv->nh4;
+	v.nh6 = iv->nh6;
+	v.limit = iv->limit;
+	v.zoneid = iv->zoneid;
+
+	memcpy(iv, &v, sizeof(ipfw_table_value));
+}
+
+/*
+ * Export real table value @v to current userland format.
+ * Note that @v and @piv may point to the same memory.
+ */
+void
+ipfw_export_table_value_v1(struct table_value *v, ipfw_table_value *piv)
+{
+	ipfw_table_value iv;
+
+	memset(&iv, 0, sizeof(iv));
+	iv.tag = v->tag;
+	iv.pipe = v->pipe;
+	iv.divert = v->divert;
+	iv.skipto = v->skipto;
+	iv.netgraph = v->netgraph;
+	iv.fib = v->fib;
+	iv.nat = v->nat;
+	iv.dscp = v->dscp;
+	iv.limit = v->limit;
+	iv.nh4 = v->nh4;
+	iv.nh6 = v->nh6;
+	iv.zoneid = v->zoneid;
+
+	memcpy(piv, &iv, sizeof(iv));
+}
+
+/*
+ * Exports real value data into ipfw_table_value structure.
+ * Utilizes "spare1" field to store kernel index.
+ */
+static int
+dump_tvalue(struct namedobj_instance *ni, struct named_object *no, void *arg)
+{
+	struct vdump_args *da;
+	struct table_val_link *ptv;
+	struct table_value *v;
+
+	da = (struct vdump_args *)arg;
+	ptv = (struct table_val_link *)no;
+
+	v = (struct table_value *)ipfw_get_sopt_space(da->sd, sizeof(*v));
+	/* Out of memory, returning */
+	if (v == NULL) {
+		da->error = ENOMEM;
+		return (ENOMEM);
+	}
+
+	memcpy(v, ptv->pval, sizeof(*v));
+	v->spare1 = ptv->no.kidx;
+	return (0);
+}
+
+/*
+ * Dumps all shared/table value data
+ * Data layout (v1)(current):
+ * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
+ * Reply: [ ipfw_obj_lheader ipfw_table_value x N ]
+ *
+ * Returns 0 on success
+ */
+static int
+list_table_values(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	struct _ipfw_obj_lheader *olh;
+	struct namedobj_instance *vi;
+	struct vdump_args da;
+	uint32_t count, size;
+
+	olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
+	if (olh == NULL)
+		return (EINVAL);
+	if (sd->valsize < olh->size)
+		return (EINVAL);
+
+	IPFW_UH_RLOCK(ch);
+	vi = CHAIN_TO_VI(ch);
+
+	count = ipfw_objhash_count(vi);
+	size = count * sizeof(ipfw_table_value) + sizeof(ipfw_obj_lheader);
+
+	/* Fill in header regadless of buffer size */
+	olh->count = count;
+	olh->objsize = sizeof(ipfw_table_value);
+
+	if (size > olh->size) {
+		olh->size = size;
+		IPFW_UH_RUNLOCK(ch);
+		return (ENOMEM);
+	}
+	olh->size = size;
+
+	/*
+	 * Do the actual value dump
+	 */
+	memset(&da, 0, sizeof(da));
+	da.ch = ch;
+	da.sd = sd;
+	ipfw_objhash_foreach(vi, dump_tvalue, &da);
+
+	IPFW_UH_RUNLOCK(ch);
+
+	return (0);
+}
+
+void
+ipfw_table_value_init(struct ip_fw_chain *ch, int first)
+{
+	struct tables_config *tcfg;
+
+	ch->valuestate = malloc(VALDATA_START_SIZE * sizeof(struct table_value),
+	    M_IPFW, M_WAITOK | M_ZERO);
+
+	tcfg = ch->tblcfg;
+
+	tcfg->val_size = VALDATA_START_SIZE;
+	tcfg->valhash = ipfw_objhash_create(tcfg->val_size);
+	ipfw_objhash_set_funcs(tcfg->valhash, hash_table_value,
+	    cmp_table_value);
+
+	IPFW_ADD_SOPT_HANDLER(first, scodes);
+}
+
+static int
+destroy_value(struct namedobj_instance *ni, struct named_object *no,
+    void *arg)
+{
+
+	free(no, M_IPFW);
+	return (0);
+}
+
+void
+ipfw_table_value_destroy(struct ip_fw_chain *ch, int last)
+{
+
+	IPFW_DEL_SOPT_HANDLER(last, scodes);
+
+	free(ch->valuestate, M_IPFW);
+	ipfw_objhash_foreach(CHAIN_TO_VI(ch), destroy_value, ch);
+	ipfw_objhash_destroy(CHAIN_TO_VI(ch));
+}
+
diff --git a/freebsd/sys/netpfil/ipfw/nat64/ip_fw_nat64.c b/freebsd/sys/netpfil/ipfw/nat64/ip_fw_nat64.c
new file mode 100644
index 00000000..03ca9599
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/nat64/ip_fw_nat64.c
@@ -0,0 +1,131 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nat64/ip_fw_nat64.h>
+#include <netpfil/ipfw/nat64/nat64_translate.h>
+
+
+int nat64_debug = 0;
+SYSCTL_DECL(_net_inet_ip_fw);
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, nat64_debug, CTLFLAG_RW,
+    &nat64_debug, 0, "Debug level for NAT64 module");
+
+int nat64_allow_private = 0;
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, nat64_allow_private, CTLFLAG_RW,
+    &nat64_allow_private, 0,
+    "Allow use of non-global IPv4 addresses with NAT64");
+
+static int
+vnet_ipfw_nat64_init(const void *arg __unused)
+{
+	struct ip_fw_chain *ch;
+	int first, error;
+
+	ch = &V_layer3_chain;
+	first = IS_DEFAULT_VNET(curvnet) ? 1: 0;
+	error = nat64stl_init(ch, first);
+	if (error != 0)
+		return (error);
+	error = nat64lsn_init(ch, first);
+	if (error != 0) {
+		nat64stl_uninit(ch, first);
+		return (error);
+	}
+	return (0);
+}
+
+static int
+vnet_ipfw_nat64_uninit(const void *arg __unused)
+{
+	struct ip_fw_chain *ch;
+	int last;
+
+	ch = &V_layer3_chain;
+	last = IS_DEFAULT_VNET(curvnet) ? 1: 0;
+	nat64stl_uninit(ch, last);
+	nat64lsn_uninit(ch, last);
+	return (0);
+}
+
+static int
+ipfw_nat64_modevent(module_t mod, int type, void *unused)
+{
+
+	switch (type) {
+	case MOD_LOAD:
+	case MOD_UNLOAD:
+		break;
+	default:
+		return (EOPNOTSUPP);
+	}
+	return (0);
+}
+
+static moduledata_t ipfw_nat64_mod = {
+	"ipfw_nat64",
+	ipfw_nat64_modevent,
+	0
+};
+
+/* Define startup order. */
+#define	IPFW_NAT64_SI_SUB_FIREWALL	SI_SUB_PROTO_IFATTACHDOMAIN
+#define	IPFW_NAT64_MODEVENT_ORDER	(SI_ORDER_ANY - 128) /* after ipfw */
+#define	IPFW_NAT64_MODULE_ORDER		(IPFW_NAT64_MODEVENT_ORDER + 1)
+#define	IPFW_NAT64_VNET_ORDER		(IPFW_NAT64_MODEVENT_ORDER + 2)
+
+DECLARE_MODULE(ipfw_nat64, ipfw_nat64_mod, IPFW_NAT64_SI_SUB_FIREWALL,
+    SI_ORDER_ANY);
+MODULE_DEPEND(ipfw_nat64, ipfw, 3, 3, 3);
+MODULE_VERSION(ipfw_nat64, 1);
+
+VNET_SYSINIT(vnet_ipfw_nat64_init, IPFW_NAT64_SI_SUB_FIREWALL,
+    IPFW_NAT64_VNET_ORDER, vnet_ipfw_nat64_init, NULL);
+VNET_SYSUNINIT(vnet_ipfw_nat64_uninit, IPFW_NAT64_SI_SUB_FIREWALL,
+    IPFW_NAT64_VNET_ORDER, vnet_ipfw_nat64_uninit, NULL);
diff --git a/freebsd/sys/netpfil/ipfw/nat64/ip_fw_nat64.h b/freebsd/sys/netpfil/ipfw/nat64/ip_fw_nat64.h
new file mode 100644
index 00000000..1d2bb774
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/nat64/ip_fw_nat64.h
@@ -0,0 +1,117 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_IP_FW_NAT64_H_
+#define	_IP_FW_NAT64_H_
+
+#define	DPRINTF(mask, fmt, ...)	\
+    if (nat64_debug & (mask))	\
+	printf("NAT64: %s: " fmt "\n", __func__, ## __VA_ARGS__)
+#define	DP_GENERIC	0x0001
+#define	DP_OBJ		0x0002
+#define	DP_JQUEUE	0x0004
+#define	DP_STATE	0x0008
+#define	DP_DROPS	0x0010
+#define	DP_ALL		0xFFFF
+extern int nat64_debug;
+
+#if 0
+#define	NAT64NOINLINE	__noinline
+#else
+#define	NAT64NOINLINE
+#endif
+
+int nat64stl_init(struct ip_fw_chain *ch, int first);
+void nat64stl_uninit(struct ip_fw_chain *ch, int last);
+int nat64lsn_init(struct ip_fw_chain *ch, int first);
+void nat64lsn_uninit(struct ip_fw_chain *ch, int last);
+
+struct ip_fw_nat64_stats {
+	counter_u64_t	opcnt64;	/* 6to4 of packets translated */
+	counter_u64_t	opcnt46;	/* 4to6 of packets translated */
+	counter_u64_t	ofrags;		/* number of fragments generated */
+	counter_u64_t	ifrags;		/* number of fragments received */
+	counter_u64_t	oerrors;	/* number of output errors */
+	counter_u64_t	noroute4;
+	counter_u64_t	noroute6;
+	counter_u64_t	nomatch4;	/* No addr/port match */
+	counter_u64_t	noproto;	/* Protocol not supported */
+	counter_u64_t	nomem;		/* mbufs allocation failed */
+	counter_u64_t	dropped;	/* number of packets silently
+					 * dropped due to some errors/
+					 * unsupported/etc.
+					 */
+
+	counter_u64_t	jrequests;	/* number of jobs requests queued */
+	counter_u64_t	jcalls;		/* number of jobs handler calls */
+	counter_u64_t	jhostsreq;	/* number of hosts requests */
+	counter_u64_t	jportreq;
+	counter_u64_t	jhostfails;
+	counter_u64_t	jportfails;
+	counter_u64_t	jmaxlen;
+	counter_u64_t	jnomem;
+	counter_u64_t	jreinjected;
+
+	counter_u64_t	screated;
+	counter_u64_t	sdeleted;
+	counter_u64_t	spgcreated;
+	counter_u64_t	spgdeleted;
+};
+
+#define	IPFW_NAT64_VERSION	1
+#define	NAT64STATS	(sizeof(struct ip_fw_nat64_stats) / sizeof(uint64_t))
+typedef struct _nat64_stats_block {
+	counter_u64_t		stats[NAT64STATS];
+} nat64_stats_block;
+#define	NAT64STAT_ADD(s, f, v)		\
+    counter_u64_add((s)->stats[		\
+	offsetof(struct ip_fw_nat64_stats, f) / sizeof(uint64_t)], (v))
+#define	NAT64STAT_INC(s, f)	NAT64STAT_ADD(s, f, 1)
+#define	NAT64STAT_FETCH(s, f)		\
+    counter_u64_fetch((s)->stats[	\
+	offsetof(struct ip_fw_nat64_stats, f) / sizeof(uint64_t)])
+
+#define	L3HDR(_ip, _t)	((_t)((u_int32_t *)(_ip) + (_ip)->ip_hl))
+#define	TCP(p)		((struct tcphdr *)(p))
+#define	UDP(p)		((struct udphdr *)(p))
+#define	ICMP(p)		((struct icmphdr *)(p))
+#define	ICMP6(p)	((struct icmp6_hdr *)(p))
+
+#define	NAT64SKIP	0
+#define	NAT64RETURN	1
+#define	NAT64MFREE	-1
+
+/* Well-known prefix 64:ff9b::/96 */
+#define	IPV6_ADDR_INT32_WKPFX	htonl(0x64ff9b)
+#define	IN6_IS_ADDR_WKPFX(a)	\
+    ((a)->s6_addr32[0] == IPV6_ADDR_INT32_WKPFX && \
+	(a)->s6_addr32[1] == 0 && (a)->s6_addr32[2] == 0)
+
+#endif
+
diff --git a/freebsd/sys/netpfil/ipfw/nat64/nat64_translate.c b/freebsd/sys/netpfil/ipfw/nat64/nat64_translate.c
new file mode 100644
index 00000000..d2507674
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/nat64/nat64_translate.c
@@ -0,0 +1,1574 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rtems/bsd/local/opt_ipfw.h>
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <rtems/bsd/sys/errno.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/rmlock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_pflog.h>
+#include <net/pfil.h>
+#include <net/netisr.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_var.h>
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nat64/ip_fw_nat64.h>
+#include <netpfil/ipfw/nat64/nat64_translate.h>
+#include <machine/in_cksum.h>
+
+static void
+nat64_log(struct pfloghdr *logdata, struct mbuf *m, sa_family_t family)
+{
+
+	logdata->dir = PF_OUT;
+	logdata->af = family;
+	ipfw_bpf_mtap2(logdata, PFLOG_HDRLEN, m);
+}
+#ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT
+static NAT64NOINLINE struct sockaddr* nat64_find_route4(struct route *ro,
+    in_addr_t dest, struct mbuf *m);
+static NAT64NOINLINE struct sockaddr* nat64_find_route6(struct route_in6 *ro,
+    struct in6_addr *dest, struct mbuf *m);
+
+static NAT64NOINLINE int
+nat64_output(struct ifnet *ifp, struct mbuf *m,
+    struct sockaddr *dst, struct route *ro, nat64_stats_block *stats,
+    void *logdata)
+{
+	int error;
+
+	if (logdata != NULL)
+		nat64_log(logdata, m, dst->sa_family);
+	error = (*ifp->if_output)(ifp, m, dst, ro);
+	if (error != 0)
+		NAT64STAT_INC(stats, oerrors);
+	return (error);
+}
+
+static NAT64NOINLINE int
+nat64_output_one(struct mbuf *m, nat64_stats_block *stats, void *logdata)
+{
+	struct route_in6 ro6;
+	struct route ro4, *ro;
+	struct sockaddr *dst;
+	struct ifnet *ifp;
+	struct ip6_hdr *ip6;
+	struct ip *ip4;
+	int error;
+
+	ip4 = mtod(m, struct ip *);
+	switch (ip4->ip_v) {
+	case IPVERSION:
+		ro = &ro4;
+		dst = nat64_find_route4(&ro4, ip4->ip_dst.s_addr, m);
+		if (dst == NULL)
+			NAT64STAT_INC(stats, noroute4);
+		break;
+	case (IPV6_VERSION >> 4):
+		ip6 = (struct ip6_hdr *)ip4;
+		ro = (struct route *)&ro6;
+		dst = nat64_find_route6(&ro6, &ip6->ip6_dst, m);
+		if (dst == NULL)
+			NAT64STAT_INC(stats, noroute6);
+		break;
+	default:
+		m_freem(m);
+		NAT64STAT_INC(stats, dropped);
+		DPRINTF(DP_DROPS, "dropped due to unknown IP version");
+		return (EAFNOSUPPORT);
+	}
+	if (dst == NULL) {
+		FREE_ROUTE(ro);
+		m_freem(m);
+		return (EHOSTUNREACH);
+	}
+	if (logdata != NULL)
+		nat64_log(logdata, m, dst->sa_family);
+	ifp = ro->ro_rt->rt_ifp;
+	error = (*ifp->if_output)(ifp, m, dst, ro);
+	if (error != 0)
+		NAT64STAT_INC(stats, oerrors);
+	FREE_ROUTE(ro);
+	return (error);
+}
+#else /* !IPFIREWALL_NAT64_DIRECT_OUTPUT */
+static NAT64NOINLINE int
+nat64_output(struct ifnet *ifp, struct mbuf *m,
+    struct sockaddr *dst, struct route *ro, nat64_stats_block *stats,
+    void *logdata)
+{
+	struct ip *ip4;
+	int ret, af;
+
+	ip4 = mtod(m, struct ip *);
+	switch (ip4->ip_v) {
+	case IPVERSION:
+		af = AF_INET;
+		ret = NETISR_IP;
+		break;
+	case (IPV6_VERSION >> 4):
+		af = AF_INET6;
+		ret = NETISR_IPV6;
+		break;
+	default:
+		m_freem(m);
+		NAT64STAT_INC(stats, dropped);
+		DPRINTF(DP_DROPS, "unknown IP version");
+		return (EAFNOSUPPORT);
+	}
+	if (logdata != NULL)
+		nat64_log(logdata, m, af);
+	ret = netisr_queue(ret, m);
+	if (ret != 0)
+		NAT64STAT_INC(stats, oerrors);
+	return (ret);
+}
+
+static NAT64NOINLINE int
+nat64_output_one(struct mbuf *m, nat64_stats_block *stats, void *logdata)
+{
+
+	return (nat64_output(NULL, m, NULL, NULL, stats, logdata));
+}
+#endif /* !IPFIREWALL_NAT64_DIRECT_OUTPUT */
+
+
+#if 0
+void print_ipv6_header(struct ip6_hdr *ip6, char *buf, size_t bufsize);
+
+void
+print_ipv6_header(struct ip6_hdr *ip6, char *buf, size_t bufsize)
+{
+	char sbuf[INET6_ADDRSTRLEN], dbuf[INET6_ADDRSTRLEN];
+
+	inet_ntop(AF_INET6, &ip6->ip6_src, sbuf, sizeof(sbuf));
+	inet_ntop(AF_INET6, &ip6->ip6_dst, dbuf, sizeof(dbuf));
+	snprintf(buf, bufsize, "%s -> %s %d", sbuf, dbuf, ip6->ip6_nxt);
+}
+
+
+static NAT64NOINLINE int
+nat64_embed_ip4(struct nat64_cfg *cfg, in_addr_t ia, struct in6_addr *ip6)
+{
+
+	/* assume the prefix is properly filled with zeros */
+	bcopy(&cfg->prefix, ip6, sizeof(*ip6));
+	switch (cfg->plen) {
+	case 32:
+	case 96:
+		ip6->s6_addr32[cfg->plen / 32] = ia;
+		break;
+	case 40:
+	case 48:
+	case 56:
+#if BYTE_ORDER == BIG_ENDIAN
+		ip6->s6_addr32[1] = cfg->prefix.s6_addr32[1] |
+		    (ia >> (cfg->plen % 32));
+		ip6->s6_addr32[2] = ia << (24 - cfg->plen % 32);
+#elif BYTE_ORDER == LITTLE_ENDIAN
+		ip6->s6_addr32[1] = cfg->prefix.s6_addr32[1] |
+		    (ia << (cfg->plen % 32));
+		ip6->s6_addr32[2] = ia >> (24 - cfg->plen % 32);
+#endif
+		break;
+	case 64:
+#if BYTE_ORDER == BIG_ENDIAN
+		ip6->s6_addr32[2] = ia >> 8;
+		ip6->s6_addr32[3] = ia << 24;
+#elif BYTE_ORDER == LITTLE_ENDIAN
+		ip6->s6_addr32[2] = ia << 8;
+		ip6->s6_addr32[3] = ia >> 24;
+#endif
+		break;
+	default:
+		return (0);
+	};
+	ip6->s6_addr8[8] = 0;
+	return (1);
+}
+
+static NAT64NOINLINE in_addr_t
+nat64_extract_ip4(struct in6_addr *ip6, int plen)
+{
+	in_addr_t ia;
+
+	/*
+	 * According to RFC 6052 p2.2:
+	 * IPv4-embedded IPv6 addresses are composed of a variable-length
+	 * prefix, the embedded IPv4 address, and a variable length suffix.
+	 * The suffix bits are reserved for future extensions and SHOULD
+	 * be set to zero.
+	 */
+	switch (plen) {
+	case 32:
+		if (ip6->s6_addr32[3] != 0 || ip6->s6_addr32[2] != 0)
+			goto badip6;
+		break;
+	case 40:
+		if (ip6->s6_addr32[3] != 0 ||
+		    (ip6->s6_addr32[2] & htonl(0xff00ffff)) != 0)
+			goto badip6;
+		break;
+	case 48:
+		if (ip6->s6_addr32[3] != 0 ||
+		    (ip6->s6_addr32[2] & htonl(0xff0000ff)) != 0)
+			goto badip6;
+		break;
+	case 56:
+		if (ip6->s6_addr32[3] != 0 || ip6->s6_addr8[8] != 0)
+			goto badip6;
+		break;
+	case 64:
+		if (ip6->s6_addr8[8] != 0 ||
+		    (ip6->s6_addr32[3] & htonl(0x00ffffff)) != 0)
+			goto badip6;
+	};
+	switch (plen) {
+	case 32:
+	case 96:
+		ia = ip6->s6_addr32[plen / 32];
+		break;
+	case 40:
+	case 48:
+	case 56:
+#if BYTE_ORDER == BIG_ENDIAN
+		ia = (ip6->s6_addr32[1] << (plen % 32)) |
+		    (ip6->s6_addr32[2] >> (24 - plen % 32));
+#elif BYTE_ORDER == LITTLE_ENDIAN
+		ia = (ip6->s6_addr32[1] >> (plen % 32)) |
+		    (ip6->s6_addr32[2] << (24 - plen % 32));
+#endif
+		break;
+	case 64:
+#if BYTE_ORDER == BIG_ENDIAN
+		ia = (ip6->s6_addr32[2] << 8) | (ip6->s6_addr32[3] >> 24);
+#elif BYTE_ORDER == LITTLE_ENDIAN
+		ia = (ip6->s6_addr32[2] >> 8) | (ip6->s6_addr32[3] << 24);
+#endif
+		break;
+	default:
+		return (0);
+	};
+	if (nat64_check_ip4(ia) != 0 ||
+	    nat64_check_private_ip4(ia) != 0)
+		goto badip4;
+
+	return (ia);
+badip4:
+	DPRINTF(DP_GENERIC, "invalid destination address: %08x", ia);
+	return (0);
+badip6:
+	DPRINTF(DP_GENERIC, "invalid IPv4-embedded IPv6 address");
+	return (0);
+}
+#endif
+
+/*
+ * According to RFC 1624 the equation for incremental checksum update is:
+ *	HC' = ~(~HC + ~m + m')	--	[Eqn. 3]
+ *	HC' = HC - ~m - m'	--	[Eqn. 4]
+ * So, when we are replacing IPv4 addresses to IPv6, we
+ * can assume, that new bytes previously were zeros, and vise versa -
+ * when we replacing IPv6 addresses to IPv4, now unused bytes become
+ * zeros. The payload length in pseudo header has bigger size, but one
+ * half of it should be zero. Using the equation 4 we get:
+ *	HC' = HC - (~m0 + m0')	-- m0 is first changed word
+ *	HC' = (HC - (~m0 + m0')) - (~m1 + m1')	-- m1 is second changed word
+ *	HC' = HC - ~m0 - m0' - ~m1 - m1' - ... =
+ *	  = HC - sum(~m[i] + m'[i])
+ *
+ * The function result should be used as follows:
+ *	IPv6 to IPv4:	HC' = cksum_add(HC, result)
+ *	IPv4 to IPv6:	HC' = cksum_add(HC, ~result)
+ */
+static NAT64NOINLINE uint16_t
+nat64_cksum_convert(struct ip6_hdr *ip6, struct ip *ip)
+{
+	uint32_t sum;
+	uint16_t *p;
+
+	sum = ~ip->ip_src.s_addr >> 16;
+	sum += ~ip->ip_src.s_addr & 0xffff;
+	sum += ~ip->ip_dst.s_addr >> 16;
+	sum += ~ip->ip_dst.s_addr & 0xffff;
+
+	for (p = (uint16_t *)&ip6->ip6_src;
+	    p < (uint16_t *)(&ip6->ip6_src + 2); p++)
+		sum += *p;
+
+	while (sum >> 16)
+		sum = (sum & 0xffff) + (sum >> 16);
+	return (sum);
+}
+
+#if __FreeBSD_version < 1100000
+#define	ip_fillid(ip)		(ip)->ip_id = ip_newid()
+#endif
+static NAT64NOINLINE void
+nat64_init_ip4hdr(const struct ip6_hdr *ip6, const struct ip6_frag *frag,
+    uint16_t plen, uint8_t proto, struct ip *ip)
+{
+
+	/* assume addresses are already initialized */
+	ip->ip_v = IPVERSION;
+	ip->ip_hl = sizeof(*ip) >> 2;
+	ip->ip_tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+	ip->ip_len = htons(sizeof(*ip) + plen);
+#ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT
+	ip->ip_ttl = ip6->ip6_hlim - IPV6_HLIMDEC;
+#else
+	/* Forwarding code will decrement TTL. */
+	ip->ip_ttl = ip6->ip6_hlim;
+#endif
+	ip->ip_sum = 0;
+	ip->ip_p = (proto == IPPROTO_ICMPV6) ? IPPROTO_ICMP: proto;
+	ip_fillid(ip);
+	if (frag != NULL) {
+		ip->ip_off = htons(ntohs(frag->ip6f_offlg) >> 3);
+		if (frag->ip6f_offlg & IP6F_MORE_FRAG)
+			ip->ip_off |= htons(IP_MF);
+	} else {
+		ip->ip_off = htons(IP_DF);
+	}
+	ip->ip_sum = in_cksum_hdr(ip);
+}
+
+#define	FRAGSZ(mtu) ((mtu) - sizeof(struct ip6_hdr) - sizeof(struct ip6_frag))
+static NAT64NOINLINE int
+nat64_fragment6(nat64_stats_block *stats, struct ip6_hdr *ip6, struct mbufq *mq,
+    struct mbuf *m, uint32_t mtu, uint16_t ip_id, uint16_t ip_off)
+{
+	struct ip6_frag ip6f;
+	struct mbuf *n;
+	uint16_t hlen, len, offset;
+	int plen;
+
+	plen = ntohs(ip6->ip6_plen);
+	hlen = sizeof(struct ip6_hdr);
+
+	/* Fragmentation isn't needed */
+	if (ip_off == 0 && plen <= mtu - hlen) {
+		M_PREPEND(m, hlen, M_NOWAIT);
+		if (m == NULL) {
+			NAT64STAT_INC(stats, nomem);
+			return (ENOMEM);
+		}
+		bcopy(ip6, mtod(m, void *), hlen);
+		if (mbufq_enqueue(mq, m) != 0) {
+			m_freem(m);
+			NAT64STAT_INC(stats, dropped);
+			DPRINTF(DP_DROPS, "dropped due to mbufq overflow");
+			return (ENOBUFS);
+		}
+		return (0);
+	}
+
+	hlen += sizeof(struct ip6_frag);
+	ip6f.ip6f_reserved = 0;
+	ip6f.ip6f_nxt = ip6->ip6_nxt;
+	ip6->ip6_nxt = IPPROTO_FRAGMENT;
+	if (ip_off != 0) {
+		/*
+		 * We have got an IPv4 fragment.
+		 * Use offset value and ip_id from original fragment.
+		 */
+		ip6f.ip6f_ident = htonl(ntohs(ip_id));
+		offset = (ntohs(ip_off) & IP_OFFMASK) << 3;
+		NAT64STAT_INC(stats, ifrags);
+	} else {
+		/* The packet size exceeds interface MTU */
+		ip6f.ip6f_ident = htonl(ip6_randomid());
+		offset = 0; /* First fragment*/
+	}
+	while (plen > 0 && m != NULL) {
+		n = NULL;
+		len = FRAGSZ(mtu) & ~7;
+		if (len > plen)
+			len = plen;
+		ip6->ip6_plen = htons(len + sizeof(ip6f));
+		ip6f.ip6f_offlg = ntohs(offset);
+		if (len < plen || (ip_off & htons(IP_MF)) != 0)
+			ip6f.ip6f_offlg |= IP6F_MORE_FRAG;
+		offset += len;
+		plen -= len;
+		if (plen > 0) {
+			n = m_split(m, len, M_NOWAIT);
+			if (n == NULL)
+				goto fail;
+		}
+		M_PREPEND(m, hlen, M_NOWAIT);
+		if (m == NULL)
+			goto fail;
+		bcopy(ip6, mtod(m, void *), sizeof(struct ip6_hdr));
+		bcopy(&ip6f, mtodo(m, sizeof(struct ip6_hdr)),
+		    sizeof(struct ip6_frag));
+		if (mbufq_enqueue(mq, m) != 0)
+			goto fail;
+		m = n;
+	}
+	NAT64STAT_ADD(stats, ofrags, mbufq_len(mq));
+	return (0);
+fail:
+	if (m != NULL)
+		m_freem(m);
+	if (n != NULL)
+		m_freem(n);
+	mbufq_drain(mq);
+	NAT64STAT_INC(stats, nomem);
+	return (ENOMEM);
+}
+
+#if __FreeBSD_version < 1100000
+#define	rt_expire	rt_rmx.rmx_expire
+#define	rt_mtu		rt_rmx.rmx_mtu
+#endif
+static NAT64NOINLINE struct sockaddr*
+nat64_find_route6(struct route_in6 *ro, struct in6_addr *dest, struct mbuf *m)
+{
+	struct sockaddr_in6 *dst;
+	struct rtentry *rt;
+
+	bzero(ro, sizeof(*ro));
+	dst = (struct sockaddr_in6 *)&ro->ro_dst;
+	dst->sin6_family = AF_INET6;
+	dst->sin6_len = sizeof(*dst);
+	dst->sin6_addr = *dest;
+	IN6_LOOKUP_ROUTE(ro, M_GETFIB(m));
+	rt = ro->ro_rt;
+	if (rt && (rt->rt_flags & RTF_UP) &&
+	    (rt->rt_ifp->if_flags & IFF_UP) &&
+	    (rt->rt_ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+		if (rt->rt_flags & RTF_GATEWAY)
+			dst = (struct sockaddr_in6 *)rt->rt_gateway;
+	} else
+		return (NULL);
+	if (((rt->rt_flags & RTF_REJECT) &&
+	    (rt->rt_expire == 0 ||
+	    time_uptime < rt->rt_expire)) ||
+	    rt->rt_ifp->if_link_state == LINK_STATE_DOWN)
+		return (NULL);
+	return ((struct sockaddr *)dst);
+}
+
+#define	NAT64_ICMP6_PLEN	64
+static NAT64NOINLINE void
+nat64_icmp6_reflect(struct mbuf *m, uint8_t type, uint8_t code, uint32_t mtu,
+    nat64_stats_block *stats, void *logdata)
+{
+	struct icmp6_hdr *icmp6;
+	struct ip6_hdr *ip6, *oip6;
+	struct mbuf *n;
+	int len, plen;
+
+	len = 0;
+	plen = nat64_getlasthdr(m, &len);
+	if (plen < 0) {
+		DPRINTF(DP_DROPS, "mbuf isn't contigious");
+		goto freeit;
+	}
+	/*
+	 * Do not send ICMPv6 in reply to ICMPv6 errors.
+	 */
+	if (plen == IPPROTO_ICMPV6) {
+		if (m->m_len < len + sizeof(*icmp6)) {
+			DPRINTF(DP_DROPS, "mbuf isn't contigious");
+			goto freeit;
+		}
+		icmp6 = mtodo(m, len);
+		if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST ||
+		    icmp6->icmp6_type == ND_REDIRECT) {
+			DPRINTF(DP_DROPS, "do not send ICMPv6 in reply to "
+			    "ICMPv6 errors");
+			goto freeit;
+		}
+	}
+	/*
+	if (icmp6_ratelimit(&ip6->ip6_src, type, code))
+		goto freeit;
+		*/
+	ip6 = mtod(m, struct ip6_hdr *);
+	switch (type) {
+	case ICMP6_DST_UNREACH:
+	case ICMP6_PACKET_TOO_BIG:
+	case ICMP6_TIME_EXCEEDED:
+	case ICMP6_PARAM_PROB:
+		break;
+	default:
+		goto freeit;
+	}
+	/* Calculate length of ICMPv6 payload */
+	len = (m->m_pkthdr.len > NAT64_ICMP6_PLEN) ? NAT64_ICMP6_PLEN:
+	    m->m_pkthdr.len;
+
+	/* Create new ICMPv6 datagram */
+	plen = len + sizeof(struct icmp6_hdr);
+	n = m_get2(sizeof(struct ip6_hdr) + plen + max_hdr, M_NOWAIT,
+	    MT_HEADER, M_PKTHDR);
+	if (n == NULL) {
+		NAT64STAT_INC(stats, nomem);
+		m_freem(m);
+		return;
+	}
+	/*
+	 * Move pkthdr from original mbuf. We should have initialized some
+	 * fields, because we can reinject this mbuf to netisr and it will
+	 * go trough input path (it requires at least rcvif should be set).
+	 * Also do M_ALIGN() to reduce chances of need to allocate new mbuf
+	 * in the chain, when we will do M_PREPEND() or make some type of
+	 * tunneling.
+	 */
+	m_move_pkthdr(n, m);
+	M_ALIGN(n, sizeof(struct ip6_hdr) + plen + max_hdr);
+
+	n->m_len = n->m_pkthdr.len = sizeof(struct ip6_hdr) + plen;
+	oip6 = mtod(n, struct ip6_hdr *);
+	oip6->ip6_src = ip6->ip6_dst;
+	oip6->ip6_dst = ip6->ip6_src;
+	oip6->ip6_nxt = IPPROTO_ICMPV6;
+	oip6->ip6_flow = 0;
+	oip6->ip6_vfc |= IPV6_VERSION;
+	oip6->ip6_hlim = V_ip6_defhlim;
+	oip6->ip6_plen = htons(plen);
+
+	icmp6 = mtodo(n, sizeof(struct ip6_hdr));
+	icmp6->icmp6_cksum = 0;
+	icmp6->icmp6_type = type;
+	icmp6->icmp6_code = code;
+	icmp6->icmp6_mtu = htonl(mtu);
+
+	m_copydata(m, 0, len, mtodo(n, sizeof(struct ip6_hdr) +
+	    sizeof(struct icmp6_hdr)));
+	icmp6->icmp6_cksum = in6_cksum(n, IPPROTO_ICMPV6,
+	    sizeof(struct ip6_hdr), plen);
+	m_freem(m);
+	nat64_output_one(n, stats, logdata);
+	return;
+freeit:
+	NAT64STAT_INC(stats, dropped);
+	m_freem(m);
+}
+
+static NAT64NOINLINE struct sockaddr*
+nat64_find_route4(struct route *ro, in_addr_t dest, struct mbuf *m)
+{
+	struct sockaddr_in *dst;
+	struct rtentry *rt;
+
+	bzero(ro, sizeof(*ro));
+	dst = (struct sockaddr_in *)&ro->ro_dst;
+	dst->sin_family = AF_INET;
+	dst->sin_len = sizeof(*dst);
+	dst->sin_addr.s_addr = dest;
+	IN_LOOKUP_ROUTE(ro, M_GETFIB(m));
+	rt = ro->ro_rt;
+	if (rt && (rt->rt_flags & RTF_UP) &&
+	    (rt->rt_ifp->if_flags & IFF_UP) &&
+	    (rt->rt_ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+		if (rt->rt_flags & RTF_GATEWAY)
+			dst = (struct sockaddr_in *)rt->rt_gateway;
+	} else
+		return (NULL);
+	if (((rt->rt_flags & RTF_REJECT) &&
+	    (rt->rt_expire == 0 ||
+	    time_uptime < rt->rt_expire)) ||
+	    rt->rt_ifp->if_link_state == LINK_STATE_DOWN)
+		return (NULL);
+	return ((struct sockaddr *)dst);
+}
+
+#define	NAT64_ICMP_PLEN	64
+static NAT64NOINLINE void
+nat64_icmp_reflect(struct mbuf *m, uint8_t type,
+    uint8_t code, uint16_t mtu, nat64_stats_block *stats, void *logdata)
+{
+	struct icmp *icmp;
+	struct ip *ip, *oip;
+	struct mbuf *n;
+	int len, plen;
+
+	ip = mtod(m, struct ip *);
+	/* Do not send ICMP error if packet is not the first fragment */
+	if (ip->ip_off & ~ntohs(IP_MF|IP_DF)) {
+		DPRINTF(DP_DROPS, "not first fragment");
+		goto freeit;
+	}
+	/* Do not send ICMP in reply to ICMP errors */
+	if (ip->ip_p == IPPROTO_ICMP) {
+		if (m->m_len < (ip->ip_hl << 2)) {
+			DPRINTF(DP_DROPS, "mbuf isn't contigious");
+			goto freeit;
+		}
+		icmp = mtodo(m, ip->ip_hl << 2);
+		if (!ICMP_INFOTYPE(icmp->icmp_type)) {
+			DPRINTF(DP_DROPS, "do not send ICMP in reply to "
+			    "ICMP errors");
+			goto freeit;
+		}
+	}
+	switch (type) {
+	case ICMP_UNREACH:
+	case ICMP_TIMXCEED:
+	case ICMP_PARAMPROB:
+		break;
+	default:
+		goto freeit;
+	}
+	/* Calculate length of ICMP payload */
+	len = (m->m_pkthdr.len > NAT64_ICMP_PLEN) ? (ip->ip_hl << 2) + 8:
+	    m->m_pkthdr.len;
+
+	/* Create new ICMPv4 datagram */
+	plen = len + sizeof(struct icmphdr) + sizeof(uint32_t);
+	n = m_get2(sizeof(struct ip) + plen + max_hdr, M_NOWAIT,
+	    MT_HEADER, M_PKTHDR);
+	if (n == NULL) {
+		NAT64STAT_INC(stats, nomem);
+		m_freem(m);
+		return;
+	}
+	m_move_pkthdr(n, m);
+	M_ALIGN(n, sizeof(struct ip) + plen + max_hdr);
+
+	n->m_len = n->m_pkthdr.len = sizeof(struct ip) + plen;
+	oip = mtod(n, struct ip *);
+	oip->ip_v = IPVERSION;
+	oip->ip_hl = sizeof(struct ip) >> 2;
+	oip->ip_tos = 0;
+	oip->ip_len = htons(n->m_pkthdr.len);
+	oip->ip_ttl = V_ip_defttl;
+	oip->ip_p = IPPROTO_ICMP;
+	ip_fillid(oip);
+	oip->ip_off = htons(IP_DF);
+	oip->ip_src = ip->ip_dst;
+	oip->ip_dst = ip->ip_src;
+	oip->ip_sum = 0;
+	oip->ip_sum = in_cksum_hdr(oip);
+
+	icmp = mtodo(n, sizeof(struct ip));
+	icmp->icmp_type = type;
+	icmp->icmp_code = code;
+	icmp->icmp_cksum = 0;
+	icmp->icmp_pmvoid = 0;
+	icmp->icmp_nextmtu = htons(mtu);
+	m_copydata(m, 0, len, mtodo(n, sizeof(struct ip) +
+	    sizeof(struct icmphdr) + sizeof(uint32_t)));
+	icmp->icmp_cksum = in_cksum_skip(n, sizeof(struct ip) + plen,
+	    sizeof(struct ip));
+	m_freem(m);
+	nat64_output_one(n, stats, logdata);
+	return;
+freeit:
+	NAT64STAT_INC(stats, dropped);
+	m_freem(m);
+}
+
+/* Translate ICMP echo request/reply into ICMPv6 */
+static void
+nat64_icmp_handle_echo(struct ip6_hdr *ip6, struct icmp6_hdr *icmp6,
+    uint16_t id, uint8_t type)
+{
+	uint16_t old;
+
+	old = *(uint16_t *)icmp6;	/* save type+code in one word */
+	icmp6->icmp6_type = type;
+	/* Reflect ICMPv6 -> ICMPv4 type translation in the cksum */
+	icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum,
+	    old, *(uint16_t *)icmp6);
+	if (id != 0) {
+		old = icmp6->icmp6_id;
+		icmp6->icmp6_id = id;
+		/* Reflect ICMP id translation in the cksum */
+		icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum,
+		    old, id);
+	}
+	/* Reflect IPv6 pseudo header in the cksum */
+	icmp6->icmp6_cksum = ~in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen),
+	    IPPROTO_ICMPV6, ~icmp6->icmp6_cksum);
+}
+
+static NAT64NOINLINE struct mbuf *
+nat64_icmp_translate(struct mbuf *m, struct ip6_hdr *ip6, uint16_t icmpid,
+    int offset, nat64_stats_block *stats)
+{
+	struct ip ip;
+	struct icmp *icmp;
+	struct tcphdr *tcp;
+	struct udphdr *udp;
+	struct ip6_hdr *eip6;
+	struct mbuf *n;
+	uint32_t mtu;
+	int len, hlen, plen;
+	uint8_t type, code;
+
+	if (m->m_len < offset + ICMP_MINLEN)
+		m = m_pullup(m, offset + ICMP_MINLEN);
+	if (m == NULL) {
+		NAT64STAT_INC(stats, nomem);
+		return (m);
+	}
+	mtu = 0;
+	icmp = mtodo(m, offset);
+	/* RFC 7915 p4.2 */
+	switch (icmp->icmp_type) {
+	case ICMP_ECHOREPLY:
+		type = ICMP6_ECHO_REPLY;
+		code = 0;
+		break;
+	case ICMP_UNREACH:
+		type = ICMP6_DST_UNREACH;
+		switch (icmp->icmp_code) {
+		case ICMP_UNREACH_NET:
+		case ICMP_UNREACH_HOST:
+		case ICMP_UNREACH_SRCFAIL:
+		case ICMP_UNREACH_NET_UNKNOWN:
+		case ICMP_UNREACH_HOST_UNKNOWN:
+		case ICMP_UNREACH_TOSNET:
+		case ICMP_UNREACH_TOSHOST:
+			code = ICMP6_DST_UNREACH_NOROUTE;
+			break;
+		case ICMP_UNREACH_PROTOCOL:
+			type = ICMP6_PARAM_PROB;
+			code = ICMP6_PARAMPROB_NEXTHEADER;
+			break;
+		case ICMP_UNREACH_PORT:
+			code = ICMP6_DST_UNREACH_NOPORT;
+			break;
+		case ICMP_UNREACH_NEEDFRAG:
+			type = ICMP6_PACKET_TOO_BIG;
+			code = 0;
+			/* XXX: needs an additional look */
+			mtu = max(IPV6_MMTU, ntohs(icmp->icmp_nextmtu) + 20);
+			break;
+		case ICMP_UNREACH_NET_PROHIB:
+		case ICMP_UNREACH_HOST_PROHIB:
+		case ICMP_UNREACH_FILTER_PROHIB:
+		case ICMP_UNREACH_PRECEDENCE_CUTOFF:
+			code = ICMP6_DST_UNREACH_ADMIN;
+			break;
+		default:
+			DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d",
+			    icmp->icmp_type, icmp->icmp_code);
+			goto freeit;
+		}
+		break;
+	case ICMP_TIMXCEED:
+		type = ICMP6_TIME_EXCEEDED;
+		code = icmp->icmp_code;
+		break;
+	case ICMP_ECHO:
+		type = ICMP6_ECHO_REQUEST;
+		code = 0;
+		break;
+	case ICMP_PARAMPROB:
+		type = ICMP6_PARAM_PROB;
+		switch (icmp->icmp_code) {
+		case ICMP_PARAMPROB_ERRATPTR:
+		case ICMP_PARAMPROB_LENGTH:
+			code = ICMP6_PARAMPROB_HEADER;
+			switch (icmp->icmp_pptr) {
+			case 0: /* Version/IHL */
+			case 1: /* Type Of Service */
+				mtu = icmp->icmp_pptr;
+				break;
+			case 2: /* Total Length */
+			case 3: mtu = 4; /* Payload Length */
+				break;
+			case 8: /* Time to Live */
+				mtu = 7; /* Hop Limit */
+				break;
+			case 9: /* Protocol */
+				mtu = 6; /* Next Header */
+				break;
+			case 12: /* Source address */
+			case 13:
+			case 14:
+			case 15:
+				mtu = 8;
+				break;
+			case 16: /* Destination address */
+			case 17:
+			case 18:
+			case 19:
+				mtu = 24;
+				break;
+			default: /* Silently drop */
+				DPRINTF(DP_DROPS, "Unsupported ICMP type %d,"
+				    " code %d, pptr %d", icmp->icmp_type,
+				    icmp->icmp_code, icmp->icmp_pptr);
+				goto freeit;
+			}
+			break;
+		default:
+			DPRINTF(DP_DROPS, "Unsupported ICMP type %d,"
+			    " code %d, pptr %d", icmp->icmp_type,
+			    icmp->icmp_code, icmp->icmp_pptr);
+			goto freeit;
+		}
+		break;
+	default:
+		DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d",
+		    icmp->icmp_type, icmp->icmp_code);
+		goto freeit;
+	}
+	/*
+	 * For echo request/reply we can use original payload,
+	 * but we need adjust icmp_cksum, because ICMPv6 cksum covers
+	 * IPv6 pseudo header and ICMPv6 types differs from ICMPv4.
+	 */
+	if (type == ICMP6_ECHO_REQUEST || type == ICMP6_ECHO_REPLY) {
+		nat64_icmp_handle_echo(ip6, ICMP6(icmp), icmpid, type);
+		return (m);
+	}
+	/*
+	 * For other types of ICMP messages we need to translate inner
+	 * IPv4 header to IPv6 header.
+	 * Assume ICMP src is the same as payload dst
+	 * E.g. we have ( GWsrc1 , NATIP1 ) in outer header
+	 * and          ( NATIP1, Hostdst1 ) in ICMP copy header.
+	 * In that case, we already have map for NATIP1 and GWsrc1.
+	 * The only thing we need is to copy IPv6 map prefix to
+	 * Hostdst1.
+	 */
+	hlen = offset + ICMP_MINLEN;
+	if (m->m_pkthdr.len < hlen + sizeof(struct ip) + ICMP_MINLEN) {
+		DPRINTF(DP_DROPS, "Message is too short %d",
+		    m->m_pkthdr.len);
+		goto freeit;
+	}
+	m_copydata(m, hlen, sizeof(struct ip), (char *)&ip);
+	if (ip.ip_v != IPVERSION) {
+		DPRINTF(DP_DROPS, "Wrong IP version %d", ip.ip_v);
+		goto freeit;
+	}
+	hlen += ip.ip_hl << 2; /* Skip inner IP header */
+	if (nat64_check_ip4(ip.ip_src.s_addr) != 0 ||
+	    nat64_check_ip4(ip.ip_dst.s_addr) != 0 ||
+	    nat64_check_private_ip4(ip.ip_src.s_addr) != 0 ||
+	    nat64_check_private_ip4(ip.ip_dst.s_addr) != 0) {
+		DPRINTF(DP_DROPS, "IP addresses checks failed %04x -> %04x",
+		    ntohl(ip.ip_src.s_addr), ntohl(ip.ip_dst.s_addr));
+		goto freeit;
+	}
+	if (m->m_pkthdr.len < hlen + ICMP_MINLEN) {
+		DPRINTF(DP_DROPS, "Message is too short %d",
+		    m->m_pkthdr.len);
+		goto freeit;
+	}
+#if 0
+	/*
+	 * Check that inner source matches the outer destination.
+	 * XXX: We need some method to convert IPv4 into IPv6 address here,
+	 *	and compare IPv6 addresses.
+	 */
+	if (ip.ip_src.s_addr != nat64_get_ip4(&ip6->ip6_dst)) {
+		DPRINTF(DP_GENERIC, "Inner source doesn't match destination ",
+		    "%04x vs %04x", ip.ip_src.s_addr,
+		    nat64_get_ip4(&ip6->ip6_dst));
+		goto freeit;
+	}
+#endif
+	/*
+	 * Create new mbuf for ICMPv6 datagram.
+	 * NOTE: len is data length just after inner IP header.
+	 */
+	len = m->m_pkthdr.len - hlen;
+	if (sizeof(struct ip6_hdr) +
+	    sizeof(struct icmp6_hdr) + len > NAT64_ICMP6_PLEN)
+		len = NAT64_ICMP6_PLEN - sizeof(struct icmp6_hdr) -
+		    sizeof(struct ip6_hdr);
+	plen = sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr) + len;
+	n = m_get2(offset + plen + max_hdr, M_NOWAIT, MT_HEADER, M_PKTHDR);
+	if (n == NULL) {
+		NAT64STAT_INC(stats, nomem);
+		m_freem(m);
+		return (NULL);
+	}
+	m_move_pkthdr(n, m);
+	M_ALIGN(n, offset + plen + max_hdr);
+	n->m_len = n->m_pkthdr.len = offset + plen;
+	/* Adjust ip6_plen in outer header */
+	ip6->ip6_plen = htons(plen);
+	/* Construct new inner IPv6 header */
+	eip6 = mtodo(n, offset + sizeof(struct icmp6_hdr));
+	eip6->ip6_src = ip6->ip6_dst;
+	/* Use the fact that we have single /96 prefix for IPv4 map */
+	eip6->ip6_dst = ip6->ip6_src;
+	nat64_set_ip4(&eip6->ip6_dst, ip.ip_dst.s_addr);
+
+	eip6->ip6_flow = htonl(ip.ip_tos << 20);
+	eip6->ip6_vfc |= IPV6_VERSION;
+	eip6->ip6_hlim = ip.ip_ttl;
+	eip6->ip6_plen = htons(ntohs(ip.ip_len) - (ip.ip_hl << 2));
+	eip6->ip6_nxt = (ip.ip_p == IPPROTO_ICMP) ? IPPROTO_ICMPV6: ip.ip_p;
+	m_copydata(m, hlen, len, (char *)(eip6 + 1));
+	/*
+	 * We need to translate source port in the inner ULP header,
+	 * and adjust ULP checksum.
+	 */
+	switch (ip.ip_p) {
+	case IPPROTO_TCP:
+		if (len < offsetof(struct tcphdr, th_sum))
+			break;
+		tcp = TCP(eip6 + 1);
+		if (icmpid != 0) {
+			tcp->th_sum = cksum_adjust(tcp->th_sum,
+			    tcp->th_sport, icmpid);
+			tcp->th_sport = icmpid;
+		}
+		tcp->th_sum = cksum_add(tcp->th_sum,
+		    ~nat64_cksum_convert(eip6, &ip));
+		break;
+	case IPPROTO_UDP:
+		if (len < offsetof(struct udphdr, uh_sum))
+			break;
+		udp = UDP(eip6 + 1);
+		if (icmpid != 0) {
+			udp->uh_sum = cksum_adjust(udp->uh_sum,
+			    udp->uh_sport, icmpid);
+			udp->uh_sport = icmpid;
+		}
+		udp->uh_sum = cksum_add(udp->uh_sum,
+		    ~nat64_cksum_convert(eip6, &ip));
+		break;
+	case IPPROTO_ICMP:
+		/*
+		 * Check if this is an ICMP error message for echo request
+		 * that we sent. I.e. ULP in the data containing invoking
+		 * packet is IPPROTO_ICMP and its type is ICMP_ECHO.
+		 */
+		icmp = (struct icmp *)(eip6 + 1);
+		if (icmp->icmp_type != ICMP_ECHO) {
+			m_freem(n);
+			goto freeit;
+		}
+		/*
+		 * For our client this original datagram should looks
+		 * like it was ICMPv6 datagram with type ICMP6_ECHO_REQUEST.
+		 * Thus we need adjust icmp_cksum and convert type from
+		 * ICMP_ECHO to ICMP6_ECHO_REQUEST.
+		 */
+		nat64_icmp_handle_echo(eip6, ICMP6(icmp), icmpid,
+		    ICMP6_ECHO_REQUEST);
+	}
+	m_freem(m);
+	/* Convert ICMPv4 into ICMPv6 header */
+	icmp = mtodo(n, offset);
+	ICMP6(icmp)->icmp6_type = type;
+	ICMP6(icmp)->icmp6_code = code;
+	ICMP6(icmp)->icmp6_mtu = htonl(mtu);
+	ICMP6(icmp)->icmp6_cksum = 0;
+	ICMP6(icmp)->icmp6_cksum = cksum_add(
+	    ~in6_cksum_pseudo(ip6, plen, IPPROTO_ICMPV6, 0),
+	    in_cksum_skip(n, n->m_pkthdr.len, offset));
+	return (n);
+freeit:
+	m_freem(m);
+	NAT64STAT_INC(stats, dropped);
+	return (NULL);
+}
+
+int
+nat64_getlasthdr(struct mbuf *m, int *offset)
+{
+	struct ip6_hdr *ip6;
+	struct ip6_hbh *hbh;
+	int proto, hlen;
+
+	if (offset != NULL)
+		hlen = *offset;
+	else
+		hlen = 0;
+
+	if (m->m_len < hlen + sizeof(*ip6))
+		return (-1);
+
+	ip6 = mtodo(m, hlen);
+	hlen += sizeof(*ip6);
+	proto = ip6->ip6_nxt;
+	/* Skip extension headers */
+	while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING ||
+	    proto == IPPROTO_DSTOPTS) {
+		hbh = mtodo(m, hlen);
+		/*
+		 * We expect mbuf has contigious data up to
+		 * upper level header.
+		 */
+		if (m->m_len < hlen)
+			return (-1);
+		/*
+		 * We doesn't support Jumbo payload option,
+		 * so return error.
+		 */
+		if (proto == IPPROTO_HOPOPTS && ip6->ip6_plen == 0)
+			return (-1);
+		proto = hbh->ip6h_nxt;
+		hlen += hbh->ip6h_len << 3;
+	}
+	if (offset != NULL)
+		*offset = hlen;
+	return (proto);
+}
+
+int
+nat64_do_handle_ip4(struct mbuf *m, struct in6_addr *saddr,
+    struct in6_addr *daddr, uint16_t lport, nat64_stats_block *stats,
+    void *logdata)
+{
+	struct route_in6 ro;
+	struct ip6_hdr ip6;
+	struct ifnet *ifp;
+	struct ip *ip;
+	struct mbufq mq;
+	struct sockaddr *dst;
+	uint32_t mtu;
+	uint16_t ip_id, ip_off;
+	uint16_t *csum;
+	int plen, hlen;
+	uint8_t proto;
+
+	ip = mtod(m, struct ip*);
+
+	if (ip->ip_ttl <= IPTTLDEC) {
+		nat64_icmp_reflect(m, ICMP_TIMXCEED,
+		    ICMP_TIMXCEED_INTRANS, 0, stats, logdata);
+		return (NAT64RETURN);
+	}
+
+	ip6.ip6_dst = *daddr;
+	ip6.ip6_src = *saddr;
+
+	hlen = ip->ip_hl << 2;
+	plen = ntohs(ip->ip_len) - hlen;
+	proto = ip->ip_p;
+
+	/* Save ip_id and ip_off, both are in network byte order */
+	ip_id = ip->ip_id;
+	ip_off = ip->ip_off & htons(IP_OFFMASK | IP_MF);
+
+	/* Fragment length must be multiple of 8 octets */
+	if ((ip->ip_off & htons(IP_MF)) != 0 && (plen & 0x7) != 0) {
+		nat64_icmp_reflect(m, ICMP_PARAMPROB,
+		    ICMP_PARAMPROB_LENGTH, 0, stats, logdata);
+		return (NAT64RETURN);
+	}
+	/* Fragmented ICMP is unsupported */
+	if (proto == IPPROTO_ICMP && ip_off != 0) {
+		DPRINTF(DP_DROPS, "dropped due to fragmented ICMP");
+		NAT64STAT_INC(stats, dropped);
+		return (NAT64MFREE);
+	}
+
+	dst = nat64_find_route6(&ro, &ip6.ip6_dst, m);
+	if (dst == NULL) {
+		FREE_ROUTE(&ro);
+		NAT64STAT_INC(stats, noroute6);
+		nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0,
+		    stats, logdata);
+		return (NAT64RETURN);
+	}
+	ifp = ro.ro_rt->rt_ifp;
+	if (ro.ro_rt->rt_mtu != 0)
+		mtu = min(ro.ro_rt->rt_mtu, ifp->if_mtu);
+	else
+		mtu = ifp->if_mtu;
+	if (mtu < plen + sizeof(ip6) && (ip->ip_off & htons(IP_DF)) != 0) {
+		FREE_ROUTE(&ro);
+		nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
+		    FRAGSZ(mtu) + sizeof(struct ip), stats, logdata);
+		return (NAT64RETURN);
+	}
+
+	ip6.ip6_flow = htonl(ip->ip_tos << 20);
+	ip6.ip6_vfc |= IPV6_VERSION;
+#ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT
+	ip6.ip6_hlim = ip->ip_ttl - IPTTLDEC;
+#else
+	/* Forwarding code will decrement HLIM. */
+	ip6.ip6_hlim = ip->ip_ttl;
+#endif
+	ip6.ip6_plen = htons(plen);
+	ip6.ip6_nxt = (proto == IPPROTO_ICMP) ? IPPROTO_ICMPV6: proto;
+	/* Convert checksums. */
+	switch (proto) {
+	case IPPROTO_TCP:
+		csum = &TCP(mtodo(m, hlen))->th_sum;
+		if (lport != 0) {
+			struct tcphdr *tcp = TCP(mtodo(m, hlen));
+			*csum = cksum_adjust(*csum, tcp->th_dport, lport);
+			tcp->th_dport = lport;
+		}
+		*csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip));
+		break;
+	case IPPROTO_UDP:
+		csum = &UDP(mtodo(m, hlen))->uh_sum;
+		if (lport != 0) {
+			struct udphdr *udp = UDP(mtodo(m, hlen));
+			*csum = cksum_adjust(*csum, udp->uh_dport, lport);
+			udp->uh_dport = lport;
+		}
+		*csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip));
+		break;
+	case IPPROTO_ICMP:
+		m = nat64_icmp_translate(m, &ip6, lport, hlen, stats);
+		if (m == NULL) {
+			FREE_ROUTE(&ro);
+			/* stats already accounted */
+			return (NAT64RETURN);
+		}
+	}
+
+	m_adj(m, hlen);
+	mbufq_init(&mq, 255);
+	nat64_fragment6(stats, &ip6, &mq, m, mtu, ip_id, ip_off);
+	while ((m = mbufq_dequeue(&mq)) != NULL) {
+		if (nat64_output(ifp, m, dst, (struct route *)&ro, stats,
+		    logdata) != 0)
+			break;
+		NAT64STAT_INC(stats, opcnt46);
+	}
+	mbufq_drain(&mq);
+	FREE_ROUTE(&ro);
+	return (NAT64RETURN);
+}
+
+int
+nat64_handle_icmp6(struct mbuf *m, int hlen, uint32_t aaddr, uint16_t aport,
+    nat64_stats_block *stats, void *logdata)
+{
+	struct ip ip;
+	struct icmp6_hdr *icmp6;
+	struct ip6_frag *ip6f;
+	struct ip6_hdr *ip6, *ip6i;
+	uint32_t mtu;
+	int plen, proto;
+	uint8_t type, code;
+
+	if (hlen == 0) {
+		ip6 = mtod(m, struct ip6_hdr *);
+		if (nat64_check_ip6(&ip6->ip6_src) != 0 ||
+		    nat64_check_ip6(&ip6->ip6_dst) != 0)
+			return (NAT64SKIP);
+
+		proto = nat64_getlasthdr(m, &hlen);
+		if (proto != IPPROTO_ICMPV6) {
+			DPRINTF(DP_DROPS,
+			    "dropped due to mbuf isn't contigious");
+			NAT64STAT_INC(stats, dropped);
+			return (NAT64MFREE);
+		}
+	}
+
+	/*
+	 * Translate ICMPv6 type and code to ICMPv4 (RFC7915).
+	 * NOTE: ICMPv6 echo handled by nat64_do_handle_ip6().
+	 */
+	icmp6 = mtodo(m, hlen);
+	mtu = 0;
+	switch (icmp6->icmp6_type) {
+	case ICMP6_DST_UNREACH:
+		type = ICMP_UNREACH;
+		switch (icmp6->icmp6_code) {
+		case ICMP6_DST_UNREACH_NOROUTE:
+		case ICMP6_DST_UNREACH_BEYONDSCOPE:
+		case ICMP6_DST_UNREACH_ADDR:
+			code = ICMP_UNREACH_HOST;
+			break;
+		case ICMP6_DST_UNREACH_ADMIN:
+			code = ICMP_UNREACH_HOST_PROHIB;
+			break;
+		case ICMP6_DST_UNREACH_NOPORT:
+			code = ICMP_UNREACH_PORT;
+			break;
+		default:
+			DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
+			    " code %d", icmp6->icmp6_type,
+			    icmp6->icmp6_code);
+			NAT64STAT_INC(stats, dropped);
+			return (NAT64MFREE);
+		}
+		break;
+	case ICMP6_PACKET_TOO_BIG:
+		type = ICMP_UNREACH;
+		code = ICMP_UNREACH_NEEDFRAG;
+		mtu = ntohl(icmp6->icmp6_mtu);
+		if (mtu < IPV6_MMTU) {
+			DPRINTF(DP_DROPS, "Wrong MTU %d in ICMPv6 type %d,"
+			    " code %d", mtu, icmp6->icmp6_type,
+			    icmp6->icmp6_code);
+			NAT64STAT_INC(stats, dropped);
+			return (NAT64MFREE);
+		}
+		/*
+		 * Adjust MTU to reflect difference between
+		 * IPv6 an IPv4 headers.
+		 */
+		mtu -= sizeof(struct ip6_hdr) - sizeof(struct ip);
+		break;
+	case ICMP6_TIME_EXCEED_TRANSIT:
+		type = ICMP_TIMXCEED;
+		code = ICMP_TIMXCEED_INTRANS;
+		break;
+	case ICMP6_PARAM_PROB:
+		switch (icmp6->icmp6_code) {
+		case ICMP6_PARAMPROB_HEADER:
+			type = ICMP_PARAMPROB;
+			code = ICMP_PARAMPROB_ERRATPTR;
+			mtu = ntohl(icmp6->icmp6_pptr);
+			switch (mtu) {
+			case 0: /* Version/Traffic Class */
+			case 1: /* Traffic Class/Flow Label */
+				break;
+			case 4: /* Payload Length */
+			case 5:
+				mtu = 2;
+				break;
+			case 6: /* Next Header */
+				mtu = 9;
+				break;
+			case 7: /* Hop Limit */
+				mtu = 8;
+				break;
+			default:
+				if (mtu >= 8 && mtu <= 23) {
+					mtu = 12; /* Source address */
+					break;
+				}
+				if (mtu >= 24 && mtu <= 39) {
+					mtu = 16; /* Destination address */
+					break;
+				}
+				DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
+				    " code %d, pptr %d", icmp6->icmp6_type,
+				    icmp6->icmp6_code, mtu);
+				NAT64STAT_INC(stats, dropped);
+				return (NAT64MFREE);
+			}
+		case ICMP6_PARAMPROB_NEXTHEADER:
+			type = ICMP_UNREACH;
+			code = ICMP_UNREACH_PROTOCOL;
+			break;
+		default:
+			DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
+			    " code %d, pptr %d", icmp6->icmp6_type,
+			    icmp6->icmp6_code, ntohl(icmp6->icmp6_pptr));
+			NAT64STAT_INC(stats, dropped);
+			return (NAT64MFREE);
+		}
+		break;
+	default:
+		DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d, code %d",
+		    icmp6->icmp6_type, icmp6->icmp6_code);
+		NAT64STAT_INC(stats, dropped);
+		return (NAT64MFREE);
+	}
+
+	hlen += sizeof(struct icmp6_hdr);
+	if (m->m_pkthdr.len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) {
+		NAT64STAT_INC(stats, dropped);
+		DPRINTF(DP_DROPS, "Message is too short %d",
+		    m->m_pkthdr.len);
+		return (NAT64MFREE);
+	}
+	/*
+	 * We need at least ICMP_MINLEN bytes of original datagram payload
+	 * to generate ICMP message. It is nice that ICMP_MINLEN is equal
+	 * to sizeof(struct ip6_frag). So, if embedded datagram had a fragment
+	 * header we will not have to do m_pullup() again.
+	 *
+	 * What we have here:
+	 * Outer header: (IPv6iGW, v4mapPRefix+v4exthost)
+	 * Inner header: (v4mapPRefix+v4host, IPv6iHost) [sport, dport]
+	 * We need to translate it to:
+	 *
+	 * Outer header: (alias_host, v4exthost)
+	 * Inner header: (v4exthost, alias_host) [sport, alias_port]
+	 *
+	 * Assume caller function has checked if v4mapPRefix+v4host
+	 * matches configured prefix.
+	 * The only two things we should be provided with are mapping between
+	 * IPv6iHost <> alias_host and between dport and alias_port.
+	 */
+	if (m->m_len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN)
+		m = m_pullup(m, hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN);
+	if (m == NULL) {
+		NAT64STAT_INC(stats, nomem);
+		return (NAT64RETURN);
+	}
+	ip6 = mtod(m, struct ip6_hdr *);
+	ip6i = mtodo(m, hlen);
+	ip6f = NULL;
+	proto = ip6i->ip6_nxt;
+	plen = ntohs(ip6i->ip6_plen);
+	hlen += sizeof(struct ip6_hdr);
+	if (proto == IPPROTO_FRAGMENT) {
+		if (m->m_pkthdr.len < hlen + sizeof(struct ip6_frag) +
+		    ICMP_MINLEN)
+			goto fail;
+		ip6f = mtodo(m, hlen);
+		proto = ip6f->ip6f_nxt;
+		plen -= sizeof(struct ip6_frag);
+		hlen += sizeof(struct ip6_frag);
+		/* Ajust MTU to reflect frag header size */
+		if (type == ICMP_UNREACH && code == ICMP_UNREACH_NEEDFRAG)
+			mtu -= sizeof(struct ip6_frag);
+	}
+	if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
+		DPRINTF(DP_DROPS, "Unsupported proto %d in the inner header",
+		    proto);
+		goto fail;
+	}
+	if (nat64_check_ip6(&ip6i->ip6_src) != 0 ||
+	    nat64_check_ip6(&ip6i->ip6_dst) != 0) {
+		DPRINTF(DP_DROPS, "Inner addresses do not passes the check");
+		goto fail;
+	}
+	/* Check if outer dst is the same as inner src */
+	if (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6i->ip6_src)) {
+		DPRINTF(DP_DROPS, "Inner src doesn't match outer dst");
+		goto fail;
+	}
+
+	/* Now we need to make a fake IPv4 packet to generate ICMP message */
+	ip.ip_dst.s_addr = aaddr;
+	ip.ip_src.s_addr = nat64_get_ip4(&ip6i->ip6_src);
+	/* XXX: Make fake ulp header */
+#ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT
+	ip6i->ip6_hlim += IPV6_HLIMDEC; /* init_ip4hdr will decrement it */
+#endif
+	nat64_init_ip4hdr(ip6i, ip6f, plen, proto, &ip);
+	m_adj(m, hlen - sizeof(struct ip));
+	bcopy(&ip, mtod(m, void *), sizeof(ip));
+	nat64_icmp_reflect(m, type, code, (uint16_t)mtu, stats, logdata);
+	return (NAT64RETURN);
+fail:
+	/*
+	 * We must call m_freem() because mbuf pointer could be
+	 * changed with m_pullup().
+	 */
+	m_freem(m);
+	NAT64STAT_INC(stats, dropped);
+	return (NAT64RETURN);
+}
+
+int
+nat64_do_handle_ip6(struct mbuf *m, uint32_t aaddr, uint16_t aport,
+    nat64_stats_block *stats, void *logdata)
+{
+	struct route ro;
+	struct ip ip;
+	struct ifnet *ifp;
+	struct ip6_frag *frag;
+	struct ip6_hdr *ip6;
+	struct icmp6_hdr *icmp6;
+	struct sockaddr *dst;
+	uint16_t *csum;
+	uint32_t mtu;
+	int plen, hlen, proto;
+
+	/*
+	 * XXX: we expect ipfw_chk() did m_pullup() up to upper level
+	 * protocol's headers. Also we skip some checks, that ip6_input(),
+	 * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did.
+	 */
+	ip6 = mtod(m, struct ip6_hdr *);
+	if (nat64_check_ip6(&ip6->ip6_src) != 0 ||
+	    nat64_check_ip6(&ip6->ip6_dst) != 0) {
+		return (NAT64SKIP);
+	}
+
+	/* Starting from this point we must not return zero */
+	ip.ip_src.s_addr = aaddr;
+	if (nat64_check_ip4(ip.ip_src.s_addr) != 0) {
+		DPRINTF(DP_GENERIC, "invalid source address: %08x",
+		    ip.ip_src.s_addr);
+		/* XXX: stats? */
+		return (NAT64MFREE);
+	}
+
+	ip.ip_dst.s_addr = nat64_get_ip4(&ip6->ip6_dst);
+	if (ip.ip_dst.s_addr == 0) {
+		/* XXX: stats? */
+		return (NAT64MFREE);
+	}
+
+	if (ip6->ip6_hlim <= IPV6_HLIMDEC) {
+		nat64_icmp6_reflect(m, ICMP6_TIME_EXCEEDED,
+		    ICMP6_TIME_EXCEED_TRANSIT, 0, stats, logdata);
+		return (NAT64RETURN);
+	}
+
+	hlen = 0;
+	plen = ntohs(ip6->ip6_plen);
+	proto = nat64_getlasthdr(m, &hlen);
+	if (proto < 0) {
+		DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious");
+		NAT64STAT_INC(stats, dropped);
+		return (NAT64MFREE);
+	}
+	frag = NULL;
+	if (proto == IPPROTO_FRAGMENT) {
+		/* ipfw_chk should m_pullup up to frag header */
+		if (m->m_len < hlen + sizeof(*frag)) {
+			DPRINTF(DP_DROPS,
+			    "dropped due to mbuf isn't contigious");
+			NAT64STAT_INC(stats, dropped);
+			return (NAT64MFREE);
+		}
+		frag = mtodo(m, hlen);
+		proto = frag->ip6f_nxt;
+		hlen += sizeof(*frag);
+		/* Fragmented ICMPv6 is unsupported */
+		if (proto == IPPROTO_ICMPV6) {
+			DPRINTF(DP_DROPS, "dropped due to fragmented ICMPv6");
+			NAT64STAT_INC(stats, dropped);
+			return (NAT64MFREE);
+		}
+		/* Fragment length must be multiple of 8 octets */
+		if ((frag->ip6f_offlg & IP6F_MORE_FRAG) != 0 &&
+		    ((plen + sizeof(struct ip6_hdr) - hlen) & 0x7) != 0) {
+			nat64_icmp6_reflect(m, ICMP6_PARAM_PROB,
+			    ICMP6_PARAMPROB_HEADER,
+			    offsetof(struct ip6_hdr, ip6_plen), stats,
+			    logdata);
+			return (NAT64RETURN);
+		}
+	}
+	plen -= hlen - sizeof(struct ip6_hdr);
+	if (plen < 0 || m->m_pkthdr.len < plen + hlen) {
+		DPRINTF(DP_DROPS, "plen %d, pkthdr.len %d, hlen %d",
+		    plen, m->m_pkthdr.len, hlen);
+		NAT64STAT_INC(stats, dropped);
+		return (NAT64MFREE);
+	}
+
+	icmp6 = NULL;	/* Make gcc happy */
+	if (proto == IPPROTO_ICMPV6) {
+		icmp6 = mtodo(m, hlen);
+		if (icmp6->icmp6_type != ICMP6_ECHO_REQUEST &&
+		    icmp6->icmp6_type != ICMP6_ECHO_REPLY)
+			return (nat64_handle_icmp6(m, hlen, aaddr, aport,
+			    stats, logdata));
+	}
+	dst = nat64_find_route4(&ro, ip.ip_dst.s_addr, m);
+	if (dst == NULL) {
+		FREE_ROUTE(&ro);
+		NAT64STAT_INC(stats, noroute4);
+		nat64_icmp6_reflect(m, ICMP6_DST_UNREACH,
+		    ICMP6_DST_UNREACH_NOROUTE, 0, stats, logdata);
+		return (NAT64RETURN);
+	}
+
+	ifp = ro.ro_rt->rt_ifp;
+	if (ro.ro_rt->rt_mtu != 0)
+		mtu = min(ro.ro_rt->rt_mtu, ifp->if_mtu);
+	else
+		mtu = ifp->if_mtu;
+	if (mtu < plen + sizeof(ip)) {
+		FREE_ROUTE(&ro);
+		nat64_icmp6_reflect(m, ICMP6_PACKET_TOO_BIG, 0, mtu, stats,
+		    logdata);
+		return (NAT64RETURN);
+	}
+	nat64_init_ip4hdr(ip6, frag, plen, proto, &ip);
+	/* Convert checksums. */
+	switch (proto) {
+	case IPPROTO_TCP:
+		csum = &TCP(mtodo(m, hlen))->th_sum;
+		if (aport != 0) {
+			struct tcphdr *tcp = TCP(mtodo(m, hlen));
+			*csum = cksum_adjust(*csum, tcp->th_sport, aport);
+			tcp->th_sport = aport;
+		}
+		*csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip));
+		break;
+	case IPPROTO_UDP:
+		csum = &UDP(mtodo(m, hlen))->uh_sum;
+		if (aport != 0) {
+			struct udphdr *udp = UDP(mtodo(m, hlen));
+			*csum = cksum_adjust(*csum, udp->uh_sport, aport);
+			udp->uh_sport = aport;
+		}
+		*csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip));
+		break;
+	case IPPROTO_ICMPV6:
+		/* Checksum in ICMPv6 covers pseudo header */
+		csum = &icmp6->icmp6_cksum;
+		*csum = cksum_add(*csum, in6_cksum_pseudo(ip6, plen,
+		    IPPROTO_ICMPV6, 0));
+		/* Convert ICMPv6 types to ICMP */
+		mtu = *(uint16_t *)icmp6; /* save old word for cksum_adjust */
+		if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST)
+			icmp6->icmp6_type = ICMP_ECHO;
+		else /* ICMP6_ECHO_REPLY */
+			icmp6->icmp6_type = ICMP_ECHOREPLY;
+		*csum = cksum_adjust(*csum, (uint16_t)mtu, *(uint16_t *)icmp6);
+		if (aport != 0) {
+			uint16_t old_id = icmp6->icmp6_id;
+			icmp6->icmp6_id = aport;
+			*csum = cksum_adjust(*csum, old_id, aport);
+		}
+		break;
+	};
+
+	m_adj(m, hlen - sizeof(ip));
+	bcopy(&ip, mtod(m, void *), sizeof(ip));
+	if (nat64_output(ifp, m, dst, &ro, stats, logdata) == 0)
+		NAT64STAT_INC(stats, opcnt64);
+	FREE_ROUTE(&ro);
+	return (NAT64RETURN);
+}
+
diff --git a/freebsd/sys/netpfil/ipfw/nat64/nat64_translate.h b/freebsd/sys/netpfil/ipfw/nat64/nat64_translate.h
new file mode 100644
index 00000000..9f653954
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/nat64/nat64_translate.h
@@ -0,0 +1,116 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_IP_FW_NAT64_TRANSLATE_H_
+#define	_IP_FW_NAT64_TRANSLATE_H_
+
+#ifdef RTALLOC_NOLOCK
+#define	IN_LOOKUP_ROUTE(ro, fib)	rtalloc_fib_nolock((ro), 0, (fib))
+#define	IN6_LOOKUP_ROUTE(ro, fib)	in6_rtalloc_nolock((ro), (fib))
+#define	FREE_ROUTE(ro)
+#else
+#define	IN_LOOKUP_ROUTE(ro, fib)	rtalloc_ign_fib((ro), 0, (fib))
+#define	IN6_LOOKUP_ROUTE(ro, fib)	in6_rtalloc((ro), (fib))
+#define	FREE_ROUTE(ro)			RO_RTFREE((ro))
+#endif
+
+static inline int
+nat64_check_ip6(struct in6_addr *addr)
+{
+
+	/* XXX: We should really check /8 */
+	if (addr->s6_addr16[0] == 0 || /* 0000::/8 Reserved by IETF */
+	    IN6_IS_ADDR_MULTICAST(addr) || IN6_IS_ADDR_LINKLOCAL(addr))
+		return (1);
+	return (0);
+}
+
+extern int nat64_allow_private;
+static inline int
+nat64_check_private_ip4(in_addr_t ia)
+{
+
+	if (nat64_allow_private)
+		return (0);
+	/* WKPFX must not be used to represent non-global IPv4 addresses */
+//	if (cfg->flags & NAT64_WKPFX) {
+		/* IN_PRIVATE */
+		if ((ia & htonl(0xff000000)) == htonl(0x0a000000) ||
+		    (ia & htonl(0xfff00000)) == htonl(0xac100000) ||
+		    (ia & htonl(0xffff0000)) == htonl(0xc0a80000))
+			return (1);
+		/*
+		 * RFC 5735:
+		 *  192.0.0.0/24 - reserved for IETF protocol assignments
+		 *  192.88.99.0/24 - for use as 6to4 relay anycast addresses
+		 *  198.18.0.0/15 - for use in benchmark tests
+		 *  192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24 - for use
+		 *   in documentation and example code
+		 */
+		if ((ia & htonl(0xffffff00)) == htonl(0xc0000000) ||
+		    (ia & htonl(0xffffff00)) == htonl(0xc0586300) ||
+		    (ia & htonl(0xfffffe00)) == htonl(0xc6120000) ||
+		    (ia & htonl(0xffffff00)) == htonl(0xc0000200) ||
+		    (ia & htonl(0xfffffe00)) == htonl(0xc6336400) ||
+		    (ia & htonl(0xffffff00)) == htonl(0xcb007100))
+			return (1);
+//	}
+	return (0);
+}
+
+static inline int
+nat64_check_ip4(in_addr_t ia)
+{
+
+	/* IN_LOOPBACK */
+	if ((ia & htonl(0xff000000)) == htonl(0x7f000000))
+		return (1);
+	/* IN_LINKLOCAL */
+	if ((ia & htonl(0xffff0000)) == htonl(0xa9fe0000))
+		return (1);
+	/* IN_MULTICAST & IN_EXPERIMENTAL */
+	if ((ia & htonl(0xe0000000)) == htonl(0xe0000000))
+		return (1);
+	return (0);
+}
+
+#define	nat64_get_ip4(_ip6)		((_ip6)->s6_addr32[3])
+#define	nat64_set_ip4(_ip6, _ip4)	(_ip6)->s6_addr32[3] = (_ip4)
+
+int nat64_getlasthdr(struct mbuf *m, int *offset);
+int nat64_do_handle_ip4(struct mbuf *m, struct in6_addr *saddr,
+    struct in6_addr *daddr, uint16_t lport, nat64_stats_block *stats,
+    void *logdata);
+int nat64_do_handle_ip6(struct mbuf *m, uint32_t aaddr, uint16_t aport,
+    nat64_stats_block *stats, void *logdata);
+int nat64_handle_icmp6(struct mbuf *m, int hlen, uint32_t aaddr, uint16_t aport,
+    nat64_stats_block *stats, void *logdata);
+
+#endif
+
diff --git a/freebsd/sys/netpfil/ipfw/nat64/nat64lsn.c b/freebsd/sys/netpfil/ipfw/nat64/nat64lsn.c
new file mode 100644
index 00000000..ce666213
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/nat64/nat64lsn.c
@@ -0,0 +1,1772 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
+ * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <rtems/bsd/sys/errno.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/rmlock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+#include <sys/syslog.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_pflog.h>
+#include <net/pfil.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/ip_fw_nat64.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nat64/ip_fw_nat64.h>
+#include <netpfil/ipfw/nat64/nat64lsn.h>
+#include <netpfil/ipfw/nat64/nat64_translate.h>
+#include <netpfil/pf/pf.h>
+
+MALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN");
+
+static void nat64lsn_periodic(void *data);
+#define	PERIODIC_DELAY	4
+static uint8_t nat64lsn_proto_map[256];
+uint8_t nat64lsn_rproto_map[NAT_MAX_PROTO];
+
+#define	NAT64_FLAG_FIN		0x01	/* FIN was seen */
+#define	NAT64_FLAG_SYN		0x02	/* First syn in->out */
+#define	NAT64_FLAG_ESTAB	0x04	/* Packet with Ack */
+#define	NAT64_FLAGS_TCP	(NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN)
+
+#define	NAT64_FLAG_RDR		0x80	/* Port redirect */
+#define	NAT64_LOOKUP(chain, cmd)	\
+	(struct nat64lsn_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
+/*
+ * Delayed job queue, used to create new hosts
+ * and new portgroups
+ */
+enum nat64lsn_jtype {
+	JTYPE_NEWHOST = 1,
+	JTYPE_NEWPORTGROUP,
+	JTYPE_DELPORTGROUP,
+};
+
+struct nat64lsn_job_item {
+	TAILQ_ENTRY(nat64lsn_job_item)	next;
+	enum nat64lsn_jtype	jtype;
+	struct nat64lsn_host	*nh;
+	struct nat64lsn_portgroup	*pg;
+	void			*spare_idx;
+	struct in6_addr		haddr;
+	uint8_t			nat_proto;
+	uint8_t			done;
+	int			needs_idx;
+	int			delcount;
+	unsigned int		fhash;	/* Flow hash */
+	uint32_t		aaddr;	/* Last used address (net) */
+	struct mbuf		*m;
+	struct ipfw_flow_id	f_id;
+	uint64_t		delmask[NAT64LSN_PGPTRNMASK];
+};
+
+static struct mtx jmtx;
+#define	JQUEUE_LOCK_INIT()	mtx_init(&jmtx, "qlock", NULL, MTX_DEF)
+#define	JQUEUE_LOCK_DESTROY()	mtx_destroy(&jmtx)
+#define	JQUEUE_LOCK()		mtx_lock(&jmtx)
+#define	JQUEUE_UNLOCK()		mtx_unlock(&jmtx)
+
+static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg,
+    struct nat64lsn_job_item *ji);
+static void nat64lsn_enqueue_jobs(struct nat64lsn_cfg *cfg,
+    struct nat64lsn_job_head *jhead, int jlen);
+
+static struct nat64lsn_job_item *nat64lsn_create_job(struct nat64lsn_cfg *cfg,
+    const struct ipfw_flow_id *f_id, int jtype);
+static int nat64lsn_request_portgroup(struct nat64lsn_cfg *cfg,
+    const struct ipfw_flow_id *f_id, struct mbuf **pm, uint32_t aaddr,
+    int needs_idx);
+static int nat64lsn_request_host(struct nat64lsn_cfg *cfg,
+    const struct ipfw_flow_id *f_id, struct mbuf **pm);
+static int nat64lsn_translate4(struct nat64lsn_cfg *cfg,
+    const struct ipfw_flow_id *f_id, struct mbuf **pm);
+static int nat64lsn_translate6(struct nat64lsn_cfg *cfg,
+    struct ipfw_flow_id *f_id, struct mbuf **pm);
+
+static int alloc_portgroup(struct nat64lsn_job_item *ji);
+static void destroy_portgroup(struct nat64lsn_portgroup *pg);
+static void destroy_host6(struct nat64lsn_host *nh);
+static int alloc_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji);
+
+static int attach_portgroup(struct nat64lsn_cfg *cfg,
+    struct nat64lsn_job_item *ji);
+static int attach_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji);
+
+
+/* XXX tmp */
+static uma_zone_t nat64lsn_host_zone;
+static uma_zone_t nat64lsn_pg_zone;
+static uma_zone_t nat64lsn_pgidx_zone;
+
+static unsigned int nat64lsn_periodic_chkstates(struct nat64lsn_cfg *cfg,
+    struct nat64lsn_host *nh);
+
+#define	I6_hash(x)		(djb_hash((const unsigned char *)(x), 16))
+#define	I6_first(_ph, h)	(_ph)[h]
+#define	I6_next(x)		(x)->next
+#define	I6_val(x)		(&(x)->addr)
+#define	I6_cmp(a, b)		IN6_ARE_ADDR_EQUAL(a, b)
+#define	I6_lock(a, b)
+#define	I6_unlock(a, b)
+
+#define	I6HASH_FIND(_cfg, _res, _a) \
+	CHT_FIND(_cfg->ih, _cfg->ihsize, I6_, _res, _a)
+#define	I6HASH_INSERT(_cfg, _i)	\
+	CHT_INSERT_HEAD(_cfg->ih, _cfg->ihsize, I6_, _i)
+#define	I6HASH_REMOVE(_cfg, _res, _tmp, _a)	\
+	CHT_REMOVE(_cfg->ih, _cfg->ihsize, I6_, _res, _tmp, _a)
+
+#define	I6HASH_FOREACH_SAFE(_cfg, _x, _tmp, _cb, _arg)	\
+	CHT_FOREACH_SAFE(_cfg->ih, _cfg->ihsize, I6_, _x, _tmp, _cb, _arg)
+
+#define	HASH_IN4(x)	djb_hash((const unsigned char *)(x), 8)
+
+static unsigned
+djb_hash(const unsigned char *h, const int len)
+{
+	unsigned int result = 0;
+	int i;
+
+	for (i = 0; i < len; i++)
+		result = 33 * result ^ h[i];
+
+	return (result);
+}
+
+/*
+static size_t 
+bitmask_size(size_t num, int *level)
+{
+	size_t x;
+	int c;
+
+	for (c = 0, x = num; num > 1; num /= 64, c++)
+		;
+
+	return (x);
+}
+
+static void
+bitmask_prepare(uint64_t *pmask, size_t bufsize, int level)
+{
+	size_t x, z;
+
+	memset(pmask, 0xFF, bufsize);
+	for (x = 0, z = 1; level > 1; x += z, z *= 64, level--)
+		;
+	pmask[x] ~= 0x01;
+}
+*/
+
+static void
+nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
+    uint32_t n, uint32_t sn)
+{
+
+	memset(plog, 0, sizeof(plog));
+	plog->length = PFLOG_REAL_HDRLEN;
+	plog->af = family;
+	plog->action = PF_NAT;
+	plog->dir = PF_IN;
+	plog->rulenr = htonl(n);
+	plog->subrulenr = htonl(sn);
+	plog->ruleset[0] = '\0';
+	strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname));
+	ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
+}
+/*
+ * Inspects icmp packets to see if the message contains different
+ * packet header so we need to alter @addr and @port.
+ */
+static int
+inspect_icmp_mbuf(struct mbuf **m, uint8_t *nat_proto, uint32_t *addr,
+    uint16_t *port)
+{
+	struct ip *ip;
+	struct tcphdr *tcp;
+	struct udphdr *udp;
+	struct icmphdr *icmp;
+	int off;
+	uint8_t proto;
+
+	ip = mtod(*m, struct ip *); /* Outer IP header */
+	off = (ip->ip_hl << 2) + ICMP_MINLEN;
+	if ((*m)->m_len < off)
+		*m = m_pullup(*m, off);
+	if (*m == NULL)
+		return (ENOMEM);
+
+	ip = mtod(*m, struct ip *); /* Outer IP header */
+	icmp = L3HDR(ip, struct icmphdr *);
+	switch (icmp->icmp_type) {
+	case ICMP_ECHO:
+	case ICMP_ECHOREPLY:
+		/* Use icmp ID as distinguisher */
+		*port = ntohs(*((uint16_t *)(icmp + 1)));
+		return (0);
+	case ICMP_UNREACH:
+	case ICMP_TIMXCEED:
+		break;
+	default:
+		return (EOPNOTSUPP);
+	}
+	/*
+	 * ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits
+	 * of ULP header.
+	 */
+	if ((*m)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN)
+		return (EINVAL);
+	if ((*m)->m_len < off + sizeof(struct ip) + ICMP_MINLEN)
+		*m = m_pullup(*m, off + sizeof(struct ip) + ICMP_MINLEN);
+	if (*m == NULL)
+		return (ENOMEM);
+	ip = mtodo(*m, off); /* Inner IP header */
+	proto = ip->ip_p;
+	off += ip->ip_hl << 2; /* Skip inner IP header */
+	*addr = ntohl(ip->ip_src.s_addr);
+	if ((*m)->m_len < off + ICMP_MINLEN)
+		*m = m_pullup(*m, off + ICMP_MINLEN);
+	if (*m == NULL)
+		return (ENOMEM);
+	switch (proto) {
+	case IPPROTO_TCP:
+		tcp = mtodo(*m, off);
+		*nat_proto = NAT_PROTO_TCP;
+		*port = ntohs(tcp->th_sport);
+		return (0);
+	case IPPROTO_UDP:
+		udp = mtodo(*m, off);
+		*nat_proto = NAT_PROTO_UDP;
+		*port = ntohs(udp->uh_sport);
+		return (0);
+	case IPPROTO_ICMP:
+		/*
+		 * We will translate only ICMP errors for our ICMP
+		 * echo requests.
+		 */
+		icmp = mtodo(*m, off);
+		if (icmp->icmp_type != ICMP_ECHO)
+			return (EOPNOTSUPP);
+		*port = ntohs(*((uint16_t *)(icmp + 1)));
+		return (0);
+	};
+	return (EOPNOTSUPP);
+}
+
+static inline uint8_t
+convert_tcp_flags(uint8_t flags)
+{
+	uint8_t result;
+
+	result = flags & (TH_FIN|TH_SYN);
+	result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */
+	result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */
+
+	return (result);
+}
+
+static NAT64NOINLINE int
+nat64lsn_translate4(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id,
+    struct mbuf **pm)
+{
+	struct pfloghdr loghdr, *logdata;
+	struct in6_addr src6;
+	struct nat64lsn_portgroup *pg;
+	struct nat64lsn_host *nh;
+	struct nat64lsn_state *st;
+	struct ip *ip;
+	uint32_t addr;
+	uint16_t state_flags, state_ts;
+	uint16_t port, lport;
+	uint8_t nat_proto;
+	int ret;
+
+	addr = f_id->dst_ip;
+	port = f_id->dst_port;
+	if (addr < cfg->prefix4 || addr > cfg->pmask4) {
+		NAT64STAT_INC(&cfg->stats, nomatch4);
+		return (cfg->nomatch_verdict);
+	}
+
+	/* Check if protocol is supported and get its short id */
+	nat_proto = nat64lsn_proto_map[f_id->proto];
+	if (nat_proto == 0) {
+		NAT64STAT_INC(&cfg->stats, noproto);
+		return (cfg->nomatch_verdict);
+	}
+
+	/* We might need to handle icmp differently */
+	if (nat_proto == NAT_PROTO_ICMP) {
+		ret = inspect_icmp_mbuf(pm, &nat_proto, &addr, &port);
+		if (ret != 0) {
+			if (ret == ENOMEM)
+				NAT64STAT_INC(&cfg->stats, nomem);
+			else
+				NAT64STAT_INC(&cfg->stats, noproto);
+			return (cfg->nomatch_verdict);
+		}
+		/* XXX: Check addr for validity */
+		if (addr < cfg->prefix4 || addr > cfg->pmask4) {
+			NAT64STAT_INC(&cfg->stats, nomatch4);
+			return (cfg->nomatch_verdict);
+		}
+	}
+
+	/* Calc portgroup offset w.r.t protocol */
+	pg = GET_PORTGROUP(cfg, addr, nat_proto, port);
+
+	/* Check if this port is occupied by any portgroup */
+	if (pg == NULL) {
+		NAT64STAT_INC(&cfg->stats, nomatch4);
+#if 0
+		DPRINTF(DP_STATE, "NOMATCH %u %d %d (%d)", addr, nat_proto, port,
+		    _GET_PORTGROUP_IDX(cfg, addr, nat_proto, port));
+#endif
+		return (cfg->nomatch_verdict);
+	}
+
+	/* TODO: Check flags to see if we need to do some static mapping */
+	nh = pg->host;
+
+	/* Prepare some fields we might need to update */
+	SET_AGE(state_ts);
+	ip = mtod(*pm, struct ip *);
+	if (ip->ip_p == IPPROTO_TCP)
+		state_flags = convert_tcp_flags(
+		    L3HDR(ip, struct tcphdr *)->th_flags);
+	else
+		state_flags = 0;
+
+	/* Lock host and get port mapping */
+	NAT64_LOCK(nh);
+
+	st = &pg->states[port & (NAT64_CHUNK_SIZE - 1)];
+	if (st->timestamp != state_ts)
+		st->timestamp = state_ts;
+	if ((st->flags & state_flags) != state_flags)
+		st->flags |= state_flags;
+	lport = htons(st->u.s.lport);
+
+	NAT64_UNLOCK(nh);
+
+	if (cfg->flags & NAT64_LOG) {
+		logdata = &loghdr;
+		nat64lsn_log(logdata, *pm, AF_INET, pg->idx, st->cur.off);
+	} else
+		logdata = NULL;
+
+	src6.s6_addr32[0] = cfg->prefix6.s6_addr32[0];
+	src6.s6_addr32[1] = cfg->prefix6.s6_addr32[1];
+	src6.s6_addr32[2] = cfg->prefix6.s6_addr32[2];
+	src6.s6_addr32[3] = htonl(f_id->src_ip);
+
+	ret = nat64_do_handle_ip4(*pm, &src6, &nh->addr, lport,
+	    &cfg->stats, logdata);
+
+	if (ret == NAT64SKIP)
+		return (IP_FW_PASS);
+	if (ret == NAT64MFREE)
+		m_freem(*pm);
+	*pm = NULL;
+
+	return (IP_FW_DENY);
+}
+
+void
+nat64lsn_dump_state(const struct nat64lsn_cfg *cfg,
+   const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st,
+   const char *px, int off)
+{
+	char s[INET6_ADDRSTRLEN], a[INET_ADDRSTRLEN], d[INET_ADDRSTRLEN];
+
+	if ((nat64_debug & DP_STATE) == 0)
+		return;
+	inet_ntop(AF_INET6, &pg->host->addr, s, sizeof(s));
+	inet_ntop(AF_INET, &pg->aaddr, a, sizeof(a));
+	inet_ntop(AF_INET, &st->u.s.faddr, d, sizeof(d));
+
+	DPRINTF(DP_STATE, "%s: PG %d ST [%p|%d]: %s:%d/%d <%s:%d> "
+	    "%s:%d AGE %d", px, pg->idx, st, off,
+	    s, st->u.s.lport, pg->nat_proto, a, pg->aport + off,
+	    d, st->u.s.fport, GET_AGE(st->timestamp));
+}
+
+/*
+ * Check if particular TCP state is stale and should be deleted.
+ * Return 1 if true, 0 otherwise.
+ */
+static int
+nat64lsn_periodic_check_tcp(const struct nat64lsn_cfg *cfg,
+    const struct nat64lsn_state *st, int age)
+{
+	int ttl;
+
+	if (st->flags & NAT64_FLAG_FIN)
+		ttl = cfg->st_close_ttl;
+	else if (st->flags & NAT64_FLAG_ESTAB)
+		ttl = cfg->st_estab_ttl;
+	else if (st->flags & NAT64_FLAG_SYN)
+		ttl = cfg->st_syn_ttl;
+	else
+		ttl = cfg->st_syn_ttl;
+
+	if (age > ttl)
+		return (1);
+	return (0);
+}
+
+/*
+ * Check if nat state @st is stale and should be deleted.
+ * Return 1 if true, 0 otherwise.
+ */
+static NAT64NOINLINE int
+nat64lsn_periodic_chkstate(const struct nat64lsn_cfg *cfg,
+    const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st)
+{
+	int age, delete;
+
+	age = GET_AGE(st->timestamp);
+	delete = 0;
+
+	/* Skip immutable records */
+	if (st->flags & NAT64_FLAG_RDR)
+		return (0);
+
+	switch (pg->nat_proto) {
+		case NAT_PROTO_TCP:
+			delete = nat64lsn_periodic_check_tcp(cfg, st, age);
+			break;
+		case NAT_PROTO_UDP:
+			if (age > cfg->st_udp_ttl)
+				delete = 1;
+			break;
+		case NAT_PROTO_ICMP:
+			if (age > cfg->st_icmp_ttl)
+				delete = 1;
+			break;
+	}
+
+	return (delete);
+}
+
+
+/*
+ * The following structures and functions
+ * are used to perform SLIST_FOREACH_SAFE()
+ * analog for states identified by struct st_ptr.
+ */
+
+struct st_idx {
+	struct nat64lsn_portgroup *pg;
+	struct nat64lsn_state *st;
+	struct st_ptr sidx_next;
+};
+
+static struct st_idx *
+st_first(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh,
+    struct st_ptr *sidx, struct st_idx *si)
+{
+	struct nat64lsn_portgroup *pg;
+	struct nat64lsn_state *st;
+
+	if (sidx->idx == 0) {
+		memset(si, 0, sizeof(*si));
+		return (si);
+	}
+
+	pg = PORTGROUP_BYSIDX(cfg, nh, sidx->idx);
+	st = &pg->states[sidx->off];
+
+	si->pg = pg;
+	si->st = st;
+	si->sidx_next = st->next;
+
+	return (si);
+}
+
+static struct st_idx *
+st_next(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh,
+    struct st_idx *si)
+{
+	struct st_ptr sidx;
+	struct nat64lsn_portgroup *pg;
+	struct nat64lsn_state *st;
+
+	sidx = si->sidx_next;
+	if (sidx.idx == 0) {
+		memset(si, 0, sizeof(*si));
+		si->st = NULL;
+		si->pg = NULL;
+		return (si);
+	}
+
+	pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx);
+	st = &pg->states[sidx.off];
+
+	si->pg = pg;
+	si->st = st;
+	si->sidx_next = st->next;
+
+	return (si);
+}
+
+static struct st_idx *
+st_save_cond(struct st_idx *si_dst, struct st_idx *si)
+{
+	if (si->st != NULL)
+		*si_dst = *si;
+
+	return (si_dst);
+}
+
+unsigned int
+nat64lsn_periodic_chkstates(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh)
+{
+	struct st_idx si, si_prev;
+	int i;
+	unsigned int delcount;
+
+	delcount = 0;
+	for (i = 0; i < nh->hsize; i++) {
+		memset(&si_prev, 0, sizeof(si_prev));
+		for (st_first(cfg, nh, &nh->phash[i], &si);
+		    si.st != NULL;
+		    st_save_cond(&si_prev, &si), st_next(cfg, nh, &si)) {
+			if (nat64lsn_periodic_chkstate(cfg, si.pg, si.st) == 0)
+				continue;
+			nat64lsn_dump_state(cfg, si.pg, si.st, "DELETE STATE",
+			    si.st->cur.off);
+			/* Unlink from hash */
+			if (si_prev.st != NULL)
+				si_prev.st->next = si.st->next;
+			else
+				nh->phash[i] = si.st->next;
+			/* Delete state and free its data */
+			PG_MARK_FREE_IDX(si.pg, si.st->cur.off);
+			memset(si.st, 0, sizeof(struct nat64lsn_state));
+			si.st = NULL;
+			delcount++;
+
+			/* Update portgroup timestamp */
+			SET_AGE(si.pg->timestamp);
+		}
+	}
+	NAT64STAT_ADD(&cfg->stats, sdeleted, delcount);
+	return (delcount);
+}
+
+/*
+ * Checks if portgroup is not used and can be deleted,
+ * Returns 1 if stale, 0 otherwise
+ */
+static int
+stale_pg(const struct nat64lsn_cfg *cfg, const struct nat64lsn_portgroup *pg)
+{
+
+	if (!PG_IS_EMPTY(pg))
+		return (0);
+	if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay)
+		return (0);
+	return (1);
+}
+
+/*
+ * Checks if host record is not used and can be deleted,
+ * Returns 1 if stale, 0 otherwise
+ */
+static int
+stale_nh(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh)
+{
+
+	if (nh->pg_used != 0)
+		return (0);
+	if (GET_AGE(nh->timestamp) < cfg->nh_delete_delay)
+		return (0);
+	return (1);
+}
+
+struct nat64lsn_periodic_data {
+	struct nat64lsn_cfg *cfg;
+	struct nat64lsn_job_head jhead;
+	int jlen;
+};
+
+static NAT64NOINLINE int
+nat64lsn_periodic_chkhost(struct nat64lsn_host *nh,
+    struct nat64lsn_periodic_data *d)
+{
+	char a[INET6_ADDRSTRLEN];
+	struct nat64lsn_portgroup *pg;
+	struct nat64lsn_job_item *ji;
+	uint64_t delmask[NAT64LSN_PGPTRNMASK];
+	int delcount, i;
+
+	delcount = 0;
+	memset(delmask, 0, sizeof(delmask));
+
+	inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+	DPRINTF(DP_JQUEUE, "Checking %s host %s on cpu %d",
+	    stale_nh(d->cfg, nh) ? "stale" : "non-stale", a, curcpu);
+	if (!stale_nh(d->cfg, nh)) {
+		/* Non-stale host. Inspect internals */
+		NAT64_LOCK(nh);
+
+		/* Stage 1: Check&expire states */
+		if (nat64lsn_periodic_chkstates(d->cfg, nh) != 0)
+			SET_AGE(nh->timestamp);
+
+		/* Stage 2: Check if we need to expire */
+		for (i = 0; i < nh->pg_used; i++) {
+			pg = PORTGROUP_BYSIDX(d->cfg, nh, i + 1);
+			if (pg == NULL)
+				continue;
+
+			/* Check if we can delete portgroup */
+			if (stale_pg(d->cfg, pg) == 0)
+				continue;
+
+			DPRINTF(DP_JQUEUE, "Check PG %d", i);
+			delmask[i / 64] |= ((uint64_t)1 << (i % 64));
+			delcount++;
+		}
+
+		NAT64_UNLOCK(nh);
+		if (delcount == 0)
+			return (0);
+	}
+
+	DPRINTF(DP_JQUEUE, "Queueing %d portgroups for deleting", delcount);
+	/* We have something to delete - add it to queue */
+	ji = nat64lsn_create_job(d->cfg, NULL, JTYPE_DELPORTGROUP);
+	if (ji == NULL)
+		return (0);
+
+	ji->haddr = nh->addr;
+	ji->delcount = delcount;
+	memcpy(ji->delmask, delmask, sizeof(ji->delmask));
+
+	TAILQ_INSERT_TAIL(&d->jhead, ji, next);
+	d->jlen++;
+	return (0);
+}
+
+/*
+ * This procedure is used to perform various maintance
+ * on dynamic hash list. Currently it is called every second.
+ */
+static void
+nat64lsn_periodic(void *data)
+{
+	struct ip_fw_chain *ch;
+	IPFW_RLOCK_TRACKER;
+	struct nat64lsn_cfg *cfg;
+	struct nat64lsn_periodic_data d;
+	struct nat64lsn_host *nh, *tmp;
+
+	cfg = (struct nat64lsn_cfg *) data;
+	ch = cfg->ch;
+	CURVNET_SET(cfg->vp);
+
+	memset(&d, 0, sizeof(d));
+	d.cfg = cfg;
+	TAILQ_INIT(&d.jhead);
+
+	IPFW_RLOCK(ch);
+
+	/* Stage 1: foreach host, check all its portgroups */
+	I6HASH_FOREACH_SAFE(cfg, nh, tmp, nat64lsn_periodic_chkhost, &d);
+
+	/* Enqueue everything we have requested */
+	nat64lsn_enqueue_jobs(cfg, &d.jhead, d.jlen);
+
+	callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY);
+
+	IPFW_RUNLOCK(ch);
+
+	CURVNET_RESTORE();
+}
+
+static NAT64NOINLINE void
+reinject_mbuf(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+{
+
+	if (ji->m == NULL)
+		return;
+
+	/* Request has failed or packet type is wrong */
+	if (ji->f_id.addr_type != 6 || ji->done == 0) {
+		m_freem(ji->m);
+		ji->m = NULL;
+		NAT64STAT_INC(&cfg->stats, dropped);
+		DPRINTF(DP_DROPS, "mbuf dropped: type %d, done %d",
+		    ji->jtype, ji->done);
+		return;
+	}
+
+	/*
+	 * XXX: Limit recursion level
+	 */
+
+	NAT64STAT_INC(&cfg->stats, jreinjected);
+	DPRINTF(DP_JQUEUE, "Reinject mbuf");
+	nat64lsn_translate6(cfg, &ji->f_id, &ji->m);
+}
+
+static void
+destroy_portgroup(struct nat64lsn_portgroup *pg)
+{
+
+	DPRINTF(DP_OBJ, "DESTROY PORTGROUP %d %p", pg->idx, pg);
+	uma_zfree(nat64lsn_pg_zone, pg);
+}
+
+static NAT64NOINLINE int
+alloc_portgroup(struct nat64lsn_job_item *ji)
+{
+	struct nat64lsn_portgroup *pg;
+
+	pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT);
+	if (pg == NULL)
+		return (1);
+
+	if (ji->needs_idx != 0) {
+		ji->spare_idx = uma_zalloc(nat64lsn_pgidx_zone, M_NOWAIT);
+		/* Failed alloc isn't always fatal, so don't check */
+	}
+	memset(&pg->freemask, 0xFF, sizeof(pg->freemask));
+	pg->nat_proto = ji->nat_proto;
+	ji->pg = pg;
+	return (0);
+
+}
+
+static void
+destroy_host6(struct nat64lsn_host *nh)
+{
+	char a[INET6_ADDRSTRLEN];
+	int i;
+
+	inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+	DPRINTF(DP_OBJ, "DESTROY HOST %s %p (pg used %d)", a, nh,
+	    nh->pg_used);
+	NAT64_LOCK_DESTROY(nh);
+	for (i = 0; i < nh->pg_allocated / NAT64LSN_PGIDX_CHUNK; i++)
+		uma_zfree(nat64lsn_pgidx_zone, PORTGROUP_CHUNK(nh, i));
+	uma_zfree(nat64lsn_host_zone, nh);
+}
+
+static NAT64NOINLINE int
+alloc_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+{
+	struct nat64lsn_host *nh;
+	char a[INET6_ADDRSTRLEN];
+
+	nh = uma_zalloc(nat64lsn_host_zone, M_NOWAIT);
+	if (nh == NULL)
+		return (1);
+	PORTGROUP_CHUNK(nh, 0) = uma_zalloc(nat64lsn_pgidx_zone, M_NOWAIT);
+	if (PORTGROUP_CHUNK(nh, 0) == NULL) {
+		uma_zfree(nat64lsn_host_zone, nh);
+		return (2);
+	}
+	if (alloc_portgroup(ji) != 0) {
+		NAT64STAT_INC(&cfg->stats, jportfails);
+		uma_zfree(nat64lsn_pgidx_zone, PORTGROUP_CHUNK(nh, 0));
+		uma_zfree(nat64lsn_host_zone, nh);
+		return (3);
+	}
+
+	NAT64_LOCK_INIT(nh);
+	nh->addr = ji->haddr;
+	nh->hsize = NAT64LSN_HSIZE; /* XXX: hardcoded size */
+	nh->pg_allocated = NAT64LSN_PGIDX_CHUNK;
+	nh->pg_used = 0;
+	ji->nh = nh;
+
+	inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+	DPRINTF(DP_OBJ, "ALLOC HOST %s %p", a, ji->nh);
+	return (0);
+}
+
+/*
+ * Finds free @pg index inside @nh
+ */
+static NAT64NOINLINE int
+find_nh_pg_idx(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh, int *idx)
+{
+	int i;
+
+	for (i = 0; i < nh->pg_allocated; i++) {
+		if (PORTGROUP_BYSIDX(cfg, nh, i + 1) == NULL) {
+			*idx = i;
+			return (0);
+		}
+	}
+	return (1);
+}
+
+static NAT64NOINLINE int
+attach_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+{
+	char a[INET6_ADDRSTRLEN];
+	struct nat64lsn_host *nh;
+
+	I6HASH_FIND(cfg, nh, &ji->haddr);
+	if (nh == NULL) {
+		/* Add new host to list */
+		nh = ji->nh;
+		I6HASH_INSERT(cfg, nh);
+		cfg->ihcount++;
+		ji->nh = NULL;
+
+		inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+		DPRINTF(DP_OBJ, "ATTACH HOST %s %p", a, nh);
+		/*
+		 * Try to add portgroup.
+		 * Note it will automatically set
+		 * 'done' on ji if successful.
+		 */
+		if (attach_portgroup(cfg, ji) != 0) {
+			DPRINTF(DP_DROPS, "%s %p failed to attach PG",
+			    a, nh);
+			NAT64STAT_INC(&cfg->stats, jportfails);
+			return (1);
+		}
+		return (0);
+	}
+
+	/*
+	 * nh isn't NULL. This probably means we had several simultaneous
+	 * host requests. The previous one request has already attached
+	 * this host. Requeue attached mbuf and mark job as done, but
+	 * leave nh and pg pointers not changed, so nat64lsn_do_request()
+	 * will release all allocated resources.
+	 */
+	inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+	DPRINTF(DP_OBJ, "%s %p is already attached as %p",
+	    a, ji->nh, nh);
+	ji->done = 1;
+	return (0);
+}
+
+static NAT64NOINLINE int
+find_pg_place_addr(const struct nat64lsn_cfg *cfg, int addr_off,
+    int nat_proto, uint16_t *aport, int *ppg_idx)
+{
+	int j, pg_idx;
+
+	pg_idx = addr_off * _ADDR_PG_COUNT +
+	    (nat_proto - 1) * _ADDR_PG_PROTO_COUNT;
+
+	for (j = NAT64_MIN_CHUNK; j < _ADDR_PG_PROTO_COUNT; j++) {
+		if (cfg->pg[pg_idx + j] != NULL)
+			continue;
+
+		*aport = j * NAT64_CHUNK_SIZE;
+		*ppg_idx = pg_idx + j;
+		return (1);
+	}
+
+	return (0);
+}
+
+/*
+ * XXX: This function needs to be rewritten to
+ * use free bitmask for faster pg finding,
+ * additionally, it should take into consideration
+ * a) randomization and
+ * b) previous addresses allocated to given nat instance
+ *
+ */
+static NAT64NOINLINE int
+find_portgroup_place(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji,
+    uint32_t *aaddr, uint16_t *aport, int *ppg_idx)
+{
+	int i, nat_proto;
+
+	/*
+	 * XXX: Use bitmask index to be able to find/check if IP address
+	 * has some spare pg's
+	 */
+	nat_proto = ji->nat_proto;
+
+	/* First, try to use same address */
+	if (ji->aaddr != 0) {
+		i = ntohl(ji->aaddr) - cfg->prefix4;
+		if (find_pg_place_addr(cfg, i, nat_proto, aport,
+		    ppg_idx) != 0){
+			/* Found! */
+			*aaddr = htonl(cfg->prefix4 + i);
+			return (0);
+		}
+	}
+
+	/* Next, try to use random address based on flow hash */
+	i = ji->fhash % (1 << (32 - cfg->plen4));
+	if (find_pg_place_addr(cfg, i, nat_proto, aport, ppg_idx) != 0) {
+		/* Found! */
+		*aaddr = htonl(cfg->prefix4 + i);
+		return (0);
+	}
+
+
+	/* Last one: simply find ANY available */
+	for (i = 0; i < (1 << (32 - cfg->plen4)); i++) {
+		if (find_pg_place_addr(cfg, i, nat_proto, aport,
+		    ppg_idx) != 0){
+			/* Found! */
+			*aaddr = htonl(cfg->prefix4 + i);
+			return (0);
+		}
+	}
+
+	return (1);
+}
+
+static NAT64NOINLINE int
+attach_portgroup(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+{
+	char a[INET6_ADDRSTRLEN];
+	struct nat64lsn_portgroup *pg;
+	struct nat64lsn_host *nh;
+	uint32_t aaddr;
+	uint16_t aport;
+	int nh_pg_idx, pg_idx;
+
+	pg = ji->pg;
+
+	/*
+	 * Find source host and bind: we can't rely on
+	 * pg->host
+	 */
+	I6HASH_FIND(cfg, nh, &ji->haddr);
+	if (nh == NULL)
+		return (1);
+
+	/* Find spare port chunk */
+	if (find_portgroup_place(cfg, ji, &aaddr, &aport, &pg_idx) != 0) {
+		inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+		DPRINTF(DP_OBJ | DP_DROPS, "empty PG not found for %s", a);
+		return (2);
+	}
+
+	/* Expand PG indexes if needed */
+	if (nh->pg_allocated < cfg->max_chunks && ji->spare_idx != NULL) {
+		PORTGROUP_CHUNK(nh, nh->pg_allocated / NAT64LSN_PGIDX_CHUNK) =
+		    ji->spare_idx;
+		nh->pg_allocated += NAT64LSN_PGIDX_CHUNK;
+		ji->spare_idx = NULL;
+	}
+
+	/* Find empty index to store PG in the @nh */
+	if (find_nh_pg_idx(cfg, nh, &nh_pg_idx) != 0) {
+		inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+		DPRINTF(DP_OBJ | DP_DROPS, "free PG index not found for %s",
+		    a);
+		return (3);
+	}
+
+	cfg->pg[pg_idx] = pg;
+	cfg->protochunks[pg->nat_proto]++;
+	NAT64STAT_INC(&cfg->stats, spgcreated);
+
+	pg->aaddr = aaddr;
+	pg->aport = aport;
+	pg->host = nh;
+	pg->idx = pg_idx;
+	SET_AGE(pg->timestamp);
+
+	PORTGROUP_BYSIDX(cfg, nh, nh_pg_idx + 1) = pg;
+	if (nh->pg_used == nh_pg_idx)
+		nh->pg_used++;
+	SET_AGE(nh->timestamp);
+
+	ji->pg = NULL;
+	ji->done = 1;
+
+	return (0);
+}
+
+static NAT64NOINLINE void
+consider_del_portgroup(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+{
+	struct nat64lsn_host *nh, *nh_tmp;
+	struct nat64lsn_portgroup *pg, *pg_list[256];
+	int i, pg_lidx, idx;
+
+	/* Find source host */
+	I6HASH_FIND(cfg, nh, &ji->haddr);
+	if (nh == NULL || nh->pg_used == 0)
+		return;
+
+	memset(pg_list, 0, sizeof(pg_list));
+	pg_lidx = 0;
+
+	NAT64_LOCK(nh);
+
+	for (i = nh->pg_used - 1; i >= 0; i--) {
+		if ((ji->delmask[i / 64] & ((uint64_t)1 << (i % 64))) == 0)
+			continue;
+		pg = PORTGROUP_BYSIDX(cfg, nh, i + 1);
+
+		/* Check that PG isn't busy. */
+		if (stale_pg(cfg, pg) == 0)
+			continue;
+
+		/* DO delete */
+		pg_list[pg_lidx++] = pg;
+		PORTGROUP_BYSIDX(cfg, nh, i + 1) = NULL;
+
+		idx = _GET_PORTGROUP_IDX(cfg, ntohl(pg->aaddr), pg->nat_proto,
+		    pg->aport);
+		KASSERT(cfg->pg[idx] == pg, ("Non matched pg"));
+		cfg->pg[idx] = NULL;
+		cfg->protochunks[pg->nat_proto]--;
+		NAT64STAT_INC(&cfg->stats, spgdeleted);
+
+		/* Decrease pg_used */
+		while (nh->pg_used > 0 &&
+		    PORTGROUP_BYSIDX(cfg, nh, nh->pg_used) == NULL)
+			nh->pg_used--;
+
+		/* Check if on-stack buffer has ended */
+		if (pg_lidx == nitems(pg_list))
+			break;
+	}
+
+	NAT64_UNLOCK(nh);
+
+	if (stale_nh(cfg, nh)) {
+		I6HASH_REMOVE(cfg, nh, nh_tmp, &ji->haddr);
+		KASSERT(nh != NULL, ("Unable to find address"));
+		cfg->ihcount--;
+		ji->nh = nh;
+		I6HASH_FIND(cfg, nh, &ji->haddr);
+		KASSERT(nh == NULL, ("Failed to delete address"));
+	}
+
+	/* TODO: Delay freeing portgroups */
+	while (pg_lidx > 0) {
+		pg_lidx--;
+		NAT64STAT_INC(&cfg->stats, spgdeleted);
+		destroy_portgroup(pg_list[pg_lidx]);
+	}
+}
+
+/*
+ * Main request handler.
+ * Responsible for handling jqueue, e.g.
+ * creating new hosts, addind/deleting portgroups.
+ */
+static NAT64NOINLINE void
+nat64lsn_do_request(void *data) 
+{
+	IPFW_RLOCK_TRACKER;
+	struct nat64lsn_job_head jhead;
+	struct nat64lsn_job_item *ji;
+	int jcount, nhsize;
+	struct nat64lsn_cfg *cfg = (struct nat64lsn_cfg *) data;
+	struct ip_fw_chain *ch;
+	int delcount;
+
+	CURVNET_SET(cfg->vp);
+
+	TAILQ_INIT(&jhead);
+
+	/* XXX: We're running unlocked here */
+
+	ch = cfg->ch;
+	delcount = 0;
+	IPFW_RLOCK(ch);
+
+	/* Grab queue */
+	JQUEUE_LOCK();
+	TAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item, next);
+	jcount = cfg->jlen;
+	cfg->jlen = 0;
+	JQUEUE_UNLOCK();
+
+	/* check if we need to resize hash */
+	nhsize = 0;
+	if (cfg->ihcount > cfg->ihsize && cfg->ihsize < 65536) {
+		nhsize = cfg->ihsize;
+		for ( ; cfg->ihcount > nhsize && nhsize < 65536; nhsize *= 2)
+			;
+	} else if (cfg->ihcount < cfg->ihsize * 4) {
+		nhsize = cfg->ihsize;
+		for ( ; cfg->ihcount < nhsize * 4 && nhsize > 32; nhsize /= 2)
+			;
+	}
+
+	IPFW_RUNLOCK(ch);
+
+	if (TAILQ_EMPTY(&jhead)) {
+		CURVNET_RESTORE();
+		return;
+	}
+
+	NAT64STAT_INC(&cfg->stats, jcalls);
+	DPRINTF(DP_JQUEUE, "count=%d", jcount);
+
+	/*
+	 * TODO:
+	 * What we should do here is to build a hash
+	 * to ensure we don't have lots of duplicate requests.
+	 * Skip this for now.
+	 *
+	 * TODO: Limit per-call number of items
+	 */
+
+	/* Pre-allocate everything for entire chain */
+	TAILQ_FOREACH(ji, &jhead,  next) {
+		switch (ji->jtype) {
+			case JTYPE_NEWHOST:
+				if (alloc_host6(cfg, ji) != 0)
+					NAT64STAT_INC(&cfg->stats, jhostfails);
+				break;
+			case JTYPE_NEWPORTGROUP:
+				if (alloc_portgroup(ji) != 0)
+					NAT64STAT_INC(&cfg->stats, jportfails);
+				break;
+			case JTYPE_DELPORTGROUP:
+				delcount += ji->delcount;
+				break;
+			default:
+				break;
+		}
+	}
+
+	/*
+	 * TODO: Alloc hew hash
+	 */
+	nhsize = 0;
+	if (nhsize > 0) {
+		/* XXX: */
+	}
+
+	/* Apply all changes in batch */
+	IPFW_UH_WLOCK(ch);
+	IPFW_WLOCK(ch);
+
+	TAILQ_FOREACH(ji, &jhead,  next) {
+		switch (ji->jtype) {
+			case JTYPE_NEWHOST:
+				if (ji->nh != NULL)
+					attach_host6(cfg, ji);
+				break;
+			case JTYPE_NEWPORTGROUP:
+				if (ji->pg != NULL &&
+				    attach_portgroup(cfg, ji) != 0)
+					NAT64STAT_INC(&cfg->stats, jportfails);
+				break;
+			case JTYPE_DELPORTGROUP:
+				consider_del_portgroup(cfg, ji);
+				break;
+		}
+	}
+
+	if (nhsize > 0) {
+		/* XXX: Move everything to new hash */
+	}
+
+	IPFW_WUNLOCK(ch);
+	IPFW_UH_WUNLOCK(ch);
+
+	/* Flush unused entries */
+	while (!TAILQ_EMPTY(&jhead)) {
+		ji = TAILQ_FIRST(&jhead);
+		TAILQ_REMOVE(&jhead, ji, next);
+		if (ji->nh != NULL)
+			destroy_host6(ji->nh);
+		if (ji->pg != NULL)
+			destroy_portgroup(ji->pg);
+		if (ji->m != NULL)
+			reinject_mbuf(cfg, ji);
+		if (ji->spare_idx != NULL)
+			uma_zfree(nat64lsn_pgidx_zone, ji->spare_idx);
+		free(ji, M_IPFW);
+	}
+	CURVNET_RESTORE();
+}
+
+static NAT64NOINLINE struct nat64lsn_job_item *
+nat64lsn_create_job(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id,
+    int jtype)
+{
+	struct nat64lsn_job_item *ji;
+	struct in6_addr haddr;
+	uint8_t nat_proto;
+
+	/*
+	 * Do not try to lock possibly contested mutex if we're near the limit.
+	 * Drop packet instead.
+	 */
+	if (cfg->jlen >= cfg->jmaxlen) {
+		NAT64STAT_INC(&cfg->stats, jmaxlen);
+		return (NULL);
+	}
+
+	memset(&haddr, 0, sizeof(haddr));
+	nat_proto = 0;
+	if (f_id != NULL) {
+		haddr = f_id->src_ip6;
+		nat_proto = nat64lsn_proto_map[f_id->proto];
+
+		DPRINTF(DP_JQUEUE, "REQUEST pg nat_proto %d on proto %d",
+		    nat_proto, f_id->proto);
+
+		if (nat_proto == 0)
+			return (NULL);
+	}
+
+	ji = malloc(sizeof(struct nat64lsn_job_item), M_IPFW,
+	    M_NOWAIT | M_ZERO);
+
+	if (ji == NULL) {
+		NAT64STAT_INC(&cfg->stats, jnomem);
+		return (NULL);
+	}
+
+	ji->jtype = jtype;
+
+	if (f_id != NULL) {
+		ji->f_id = *f_id;
+		ji->haddr = haddr;
+		ji->nat_proto = nat_proto;
+	}
+
+	return (ji);
+}
+
+static NAT64NOINLINE void
+nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+{
+
+	if (ji == NULL)
+		return;
+
+	JQUEUE_LOCK();
+	TAILQ_INSERT_TAIL(&cfg->jhead, ji, next);
+	cfg->jlen++;
+	NAT64STAT_INC(&cfg->stats, jrequests);
+
+	if (callout_pending(&cfg->jcallout) == 0)
+		callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
+	JQUEUE_UNLOCK();
+}
+
+static NAT64NOINLINE void
+nat64lsn_enqueue_jobs(struct nat64lsn_cfg *cfg,
+    struct nat64lsn_job_head *jhead, int jlen)
+{
+
+	if (TAILQ_EMPTY(jhead))
+		return;
+
+	/* Attach current queue to execution one */
+	JQUEUE_LOCK();
+	TAILQ_CONCAT(&cfg->jhead, jhead, next);
+	cfg->jlen += jlen;
+	NAT64STAT_ADD(&cfg->stats, jrequests, jlen);
+
+	if (callout_pending(&cfg->jcallout) == 0)
+		callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
+	JQUEUE_UNLOCK();
+}
+
+static unsigned int
+flow6_hash(const struct ipfw_flow_id *f_id)
+{
+	unsigned char hbuf[36];
+
+	memcpy(hbuf, &f_id->dst_ip6, 16);
+	memcpy(&hbuf[16], &f_id->src_ip6, 16);
+	memcpy(&hbuf[32], &f_id->dst_port, 2);
+	memcpy(&hbuf[32], &f_id->src_port, 2);
+
+	return (djb_hash(hbuf, sizeof(hbuf)));
+}
+
+static NAT64NOINLINE int
+nat64lsn_request_host(struct nat64lsn_cfg *cfg,
+    const struct ipfw_flow_id *f_id, struct mbuf **pm)
+{
+	struct nat64lsn_job_item *ji;
+	struct mbuf *m;
+
+	m = *pm;
+	*pm = NULL;
+
+	ji = nat64lsn_create_job(cfg, f_id, JTYPE_NEWHOST);
+	if (ji == NULL) {
+		m_freem(m);
+		NAT64STAT_INC(&cfg->stats, dropped);
+		DPRINTF(DP_DROPS, "failed to create job");
+	} else {
+		ji->m = m;
+		/* Provide pseudo-random value based on flow */
+		ji->fhash = flow6_hash(f_id);
+		nat64lsn_enqueue_job(cfg, ji);
+		NAT64STAT_INC(&cfg->stats, jhostsreq);
+	}
+
+	return (IP_FW_PASS);
+}
+
+static NAT64NOINLINE int
+nat64lsn_request_portgroup(struct nat64lsn_cfg *cfg,
+    const struct ipfw_flow_id *f_id, struct mbuf **pm, uint32_t aaddr,
+    int needs_idx)
+{
+	struct nat64lsn_job_item *ji;
+	struct mbuf *m;
+
+	m = *pm;
+	*pm = NULL;
+
+	ji = nat64lsn_create_job(cfg, f_id, JTYPE_NEWPORTGROUP);
+	if (ji == NULL) {
+		m_freem(m);
+		NAT64STAT_INC(&cfg->stats, dropped);
+		DPRINTF(DP_DROPS, "failed to create job");
+	} else {
+		ji->m = m;
+		/* Provide pseudo-random value based on flow */
+		ji->fhash = flow6_hash(f_id);
+		ji->aaddr = aaddr;
+		ji->needs_idx = needs_idx;
+		nat64lsn_enqueue_job(cfg, ji);
+		NAT64STAT_INC(&cfg->stats, jportreq);
+	}
+
+	return (IP_FW_PASS);
+}
+
+static NAT64NOINLINE struct nat64lsn_state * 
+nat64lsn_create_state(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh,
+    int nat_proto, struct nat64lsn_state *kst, uint32_t *aaddr)
+{
+	struct nat64lsn_portgroup *pg;
+	struct nat64lsn_state *st;
+	int i, hval, off;
+
+	/* XXX: create additional bitmask for selecting proper portgroup */
+	for (i = 0; i < nh->pg_used; i++) {
+		pg = PORTGROUP_BYSIDX(cfg, nh, i + 1);
+		if (pg == NULL)
+			continue;
+		if (*aaddr == 0)
+			*aaddr = pg->aaddr;
+		if (pg->nat_proto != nat_proto)
+			continue;
+
+		off = PG_GET_FREE_IDX(pg);
+		if (off != 0) {
+			/* We have found spare state. Use it */
+			off--;
+			PG_MARK_BUSY_IDX(pg, off);
+			st = &pg->states[off];
+
+			/*
+			 * Fill in new info. Assume state was zeroed.
+			 * Timestamp and flags will be filled by caller.
+			 */
+			st->u.s = kst->u.s;
+			st->cur.idx = i + 1;
+			st->cur.off = off;
+
+			/* Insert into host hash table */
+			hval = HASH_IN4(&st->u.hkey) & (nh->hsize - 1);
+			st->next = nh->phash[hval];
+			nh->phash[hval] = st->cur;
+
+			nat64lsn_dump_state(cfg, pg, st, "ALLOC STATE", off);
+
+			NAT64STAT_INC(&cfg->stats, screated);
+
+			return (st);
+		}
+		/* Saev last used alias affress */
+		*aaddr = pg->aaddr;
+	}
+
+	return (NULL);
+}
+
+static NAT64NOINLINE int
+nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id,
+    struct mbuf **pm)
+{
+	struct pfloghdr loghdr, *logdata;
+	char a[INET6_ADDRSTRLEN];
+	struct nat64lsn_host *nh;
+	struct st_ptr sidx;
+	struct nat64lsn_state *st, kst;
+	struct nat64lsn_portgroup *pg;
+	struct icmp6_hdr *icmp6;
+	uint32_t aaddr;
+	int action, hval, nat_proto, proto;
+	uint16_t aport, state_ts, state_flags;
+
+	/* Check if af/protocol is supported and get it short id */
+	nat_proto = nat64lsn_proto_map[f_id->proto];
+	if (nat_proto == 0) {
+		/*
+		 * Since we can be called from jobs handler, we need
+		 * to free mbuf by self, do not leave this task to
+		 * ipfw_check_packet().
+		 */
+		NAT64STAT_INC(&cfg->stats, noproto);
+		m_freem(*pm);
+		*pm = NULL;
+		return (IP_FW_DENY);
+	}
+
+	/* Try to find host first */
+	I6HASH_FIND(cfg, nh, &f_id->src_ip6);
+
+	if (nh == NULL)
+		return (nat64lsn_request_host(cfg, f_id, pm));
+
+	/* Fill-in on-stack state structure */
+	kst.u.s.faddr = f_id->dst_ip6.s6_addr32[3];
+	kst.u.s.fport = f_id->dst_port;
+	kst.u.s.lport = f_id->src_port;
+
+	/* Prepare some fields we might need to update */
+	hval = 0;
+	proto = nat64_getlasthdr(*pm, &hval);
+	if (proto < 0) {
+		NAT64STAT_INC(&cfg->stats, dropped);
+		DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious");
+		m_freem(*pm);
+		*pm = NULL;
+		return (IP_FW_DENY);
+	}
+
+	SET_AGE(state_ts);
+	if (proto == IPPROTO_TCP)
+		state_flags = convert_tcp_flags(
+		    TCP(mtodo(*pm, hval))->th_flags);
+	else
+		state_flags = 0;
+	if (proto == IPPROTO_ICMPV6) {
+		/* Alter local port data */
+		icmp6 = mtodo(*pm, hval);
+		if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST ||
+		    icmp6->icmp6_type == ICMP6_ECHO_REPLY)
+			kst.u.s.lport = ntohs(icmp6->icmp6_id);
+	}
+
+	hval = HASH_IN4(&kst.u.hkey) & (nh->hsize - 1);
+	pg = NULL;
+	st = NULL;
+
+	/* OK, let's find state in host hash */
+	NAT64_LOCK(nh);
+	sidx = nh->phash[hval];
+	int k = 0;
+	while (sidx.idx != 0) {
+		pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx);
+		st = &pg->states[sidx.off];
+		//DPRINTF("SISX: %d/%d next: %d/%d", sidx.idx, sidx.off,
+		//st->next.idx, st->next.off);
+		if (st->u.hkey == kst.u.hkey && pg->nat_proto == nat_proto)
+			break;
+		if (k++ > 1000) {
+			DPRINTF(DP_ALL, "XXX: too long %d/%d %d/%d\n",
+			    sidx.idx, sidx.off, st->next.idx, st->next.off);
+			inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+			DPRINTF(DP_GENERIC, "TR host %s %p on cpu %d",
+			    a, nh, curcpu);
+			k = 0;
+		}
+		sidx = st->next;
+	}
+
+	if (sidx.idx == 0) {
+		aaddr = 0;
+		st = nat64lsn_create_state(cfg, nh, nat_proto, &kst, &aaddr);
+		if (st == NULL) {
+			/* No free states. Request more if we can */
+			if (nh->pg_used >= cfg->max_chunks) {
+				/* Limit reached */
+				NAT64STAT_INC(&cfg->stats, dropped);
+				inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+				DPRINTF(DP_DROPS, "PG limit reached "
+				    " for host %s (used %u, allocated %u, "
+				    "limit %u)", a,
+				    nh->pg_used * NAT64_CHUNK_SIZE,
+				    nh->pg_allocated * NAT64_CHUNK_SIZE,
+				    cfg->max_chunks * NAT64_CHUNK_SIZE);
+				m_freem(*pm);
+				*pm = NULL;
+				NAT64_UNLOCK(nh);
+				return (IP_FW_DENY);
+			}
+			if ((nh->pg_allocated <=
+			    nh->pg_used + NAT64LSN_REMAININGPG) &&
+			    nh->pg_allocated < cfg->max_chunks)
+				action = 1; /* Request new indexes */
+			else
+				action = 0;
+			NAT64_UNLOCK(nh);
+			//DPRINTF("No state, unlock for %p", nh);
+			return (nat64lsn_request_portgroup(cfg, f_id,
+			    pm, aaddr, action));
+		}
+
+		/* We've got new state. */
+		sidx = st->cur;
+		pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx);
+	}
+
+	/* Okay, state found */
+
+	/* Update necessary fileds */
+	if (st->timestamp != state_ts)
+		st->timestamp = state_ts;
+	if ((st->flags & state_flags) != 0)
+		st->flags |= state_flags;
+
+	/* Copy needed state data */
+	aaddr = pg->aaddr;
+	aport = htons(pg->aport + sidx.off);
+
+	NAT64_UNLOCK(nh);
+
+	if (cfg->flags & NAT64_LOG) {
+		logdata = &loghdr;
+		nat64lsn_log(logdata, *pm, AF_INET6, pg->idx, st->cur.off);
+	} else
+		logdata = NULL;
+
+	action = nat64_do_handle_ip6(*pm, aaddr, aport, &cfg->stats, logdata);
+	if (action == NAT64SKIP)
+		return (IP_FW_PASS);
+	if (action == NAT64MFREE)
+		m_freem(*pm);
+	*pm = NULL;	/* mark mbuf as consumed */
+	return (IP_FW_DENY);
+}
+
+/*
+ * Main dataplane entry point.
+ */
+int
+ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
+    ipfw_insn *cmd, int *done)
+{
+	ipfw_insn *icmd;
+	struct nat64lsn_cfg *cfg;
+	int ret;
+
+	IPFW_RLOCK_ASSERT(ch);
+
+	*done = 1; /* terminate the search */
+	icmd = cmd + 1;
+	if (cmd->opcode != O_EXTERNAL_ACTION ||
+	    cmd->arg1 != V_nat64lsn_eid ||
+	    icmd->opcode != O_EXTERNAL_INSTANCE ||
+	    (cfg = NAT64_LOOKUP(ch, icmd)) == NULL)
+		return (0);
+
+	switch (args->f_id.addr_type) {
+	case 4:
+		ret = nat64lsn_translate4(cfg, &args->f_id, &args->m);
+		break;
+	case 6:
+		ret = nat64lsn_translate6(cfg, &args->f_id, &args->m);
+		break;
+	default:
+		return (0);
+	}
+	return (ret);
+}
+
+static int
+nat64lsn_ctor_host(void *mem, int size, void *arg, int flags)
+{
+	struct nat64lsn_host *nh;
+
+	nh = (struct nat64lsn_host *)mem;
+	memset(nh->pg_ptr, 0, sizeof(nh->pg_ptr));
+	memset(nh->phash, 0, sizeof(nh->phash));
+	return (0);
+}
+
+static int
+nat64lsn_ctor_pgidx(void *mem, int size, void *arg, int flags)
+{
+
+	memset(mem, 0, size);
+	return (0);
+}
+
+void
+nat64lsn_init_internal(void)
+{
+
+	memset(nat64lsn_proto_map, 0, sizeof(nat64lsn_proto_map));
+	/* Set up supported protocol map */
+	nat64lsn_proto_map[IPPROTO_TCP] = NAT_PROTO_TCP;
+	nat64lsn_proto_map[IPPROTO_UDP] = NAT_PROTO_UDP;
+	nat64lsn_proto_map[IPPROTO_ICMP] = NAT_PROTO_ICMP;
+	nat64lsn_proto_map[IPPROTO_ICMPV6] = NAT_PROTO_ICMP;
+	/* Fill in reverse proto map */
+	memset(nat64lsn_rproto_map, 0, sizeof(nat64lsn_rproto_map));
+	nat64lsn_rproto_map[NAT_PROTO_TCP] = IPPROTO_TCP;
+	nat64lsn_rproto_map[NAT_PROTO_UDP] = IPPROTO_UDP;
+	nat64lsn_rproto_map[NAT_PROTO_ICMP] = IPPROTO_ICMPV6;
+
+	JQUEUE_LOCK_INIT();
+	nat64lsn_host_zone = uma_zcreate("NAT64 hosts zone",
+	    sizeof(struct nat64lsn_host), nat64lsn_ctor_host, NULL,
+	    NULL, NULL, UMA_ALIGN_PTR, 0);
+	nat64lsn_pg_zone = uma_zcreate("NAT64 portgroups zone",
+	    sizeof(struct nat64lsn_portgroup), NULL, NULL, NULL, NULL,
+	    UMA_ALIGN_PTR, 0);
+	nat64lsn_pgidx_zone = uma_zcreate("NAT64 portgroup indexes zone",
+	    sizeof(struct nat64lsn_portgroup *) * NAT64LSN_PGIDX_CHUNK,
+	    nat64lsn_ctor_pgidx, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+}
+
+void
+nat64lsn_uninit_internal(void)
+{
+
+	JQUEUE_LOCK_DESTROY();
+	uma_zdestroy(nat64lsn_host_zone);
+	uma_zdestroy(nat64lsn_pg_zone);
+	uma_zdestroy(nat64lsn_pgidx_zone);
+}
+
+void
+nat64lsn_start_instance(struct nat64lsn_cfg *cfg)
+{
+
+	callout_reset(&cfg->periodic, hz * PERIODIC_DELAY,
+	    nat64lsn_periodic, cfg);
+}
+
+struct nat64lsn_cfg *
+nat64lsn_init_instance(struct ip_fw_chain *ch, size_t numaddr)
+{
+	struct nat64lsn_cfg *cfg;
+
+	cfg = malloc(sizeof(struct nat64lsn_cfg), M_IPFW, M_WAITOK | M_ZERO);
+	TAILQ_INIT(&cfg->jhead);
+	cfg->vp = curvnet;
+	cfg->ch = ch;
+	COUNTER_ARRAY_ALLOC(cfg->stats.stats, NAT64STATS, M_WAITOK);
+
+	cfg->ihsize = NAT64LSN_HSIZE;
+	cfg->ih = malloc(sizeof(void *) * cfg->ihsize, M_IPFW,
+	    M_WAITOK | M_ZERO);
+
+	cfg->pg = malloc(sizeof(void *) * numaddr * _ADDR_PG_COUNT, M_IPFW,
+	    M_WAITOK | M_ZERO);
+
+        callout_init(&cfg->periodic, CALLOUT_MPSAFE);
+        callout_init(&cfg->jcallout, CALLOUT_MPSAFE);
+
+	return (cfg);
+}
+
+/*
+ * Destroy all hosts callback.
+ * Called on module unload when all activity already finished, so
+ * can work without any locks.
+ */
+static NAT64NOINLINE int
+nat64lsn_destroy_host(struct nat64lsn_host *nh, struct nat64lsn_cfg *cfg)
+{
+	struct nat64lsn_portgroup *pg;
+	int i;
+
+	for (i = nh->pg_used; i > 0; i--) {
+		pg = PORTGROUP_BYSIDX(cfg, nh, i);
+		if (pg == NULL)
+			continue;
+		cfg->pg[pg->idx] = NULL;
+		destroy_portgroup(pg);
+		nh->pg_used--;
+	}
+	destroy_host6(nh);
+	cfg->ihcount--;
+	return (0);
+}
+
+void
+nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg)
+{
+	struct nat64lsn_host *nh, *tmp;
+
+	JQUEUE_LOCK();
+	callout_drain(&cfg->jcallout);
+	JQUEUE_UNLOCK();
+
+	callout_drain(&cfg->periodic);
+	I6HASH_FOREACH_SAFE(cfg, nh, tmp, nat64lsn_destroy_host, cfg);
+	DPRINTF(DP_OBJ, "instance %s: hosts %d", cfg->name, cfg->ihcount);
+
+	COUNTER_ARRAY_FREE(cfg->stats.stats, NAT64STATS);
+	free(cfg->ih, M_IPFW);
+	free(cfg->pg, M_IPFW);
+	free(cfg, M_IPFW);
+}
+
diff --git a/freebsd/sys/netpfil/ipfw/nat64/nat64lsn.h b/freebsd/sys/netpfil/ipfw/nat64/nat64lsn.h
new file mode 100644
index 00000000..e6ceb1dd
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/nat64/nat64lsn.h
@@ -0,0 +1,351 @@
+/*-
+ * Copyright (c) 2015 Yandex LLC
+ * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
+ * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_IP_FW_NAT64LSN_H_
+#define	_IP_FW_NAT64LSN_H_
+
+#define	NAT64_CHUNK_SIZE_BITS	6	/* 64 ports */
+#define	NAT64_CHUNK_SIZE	(1 << NAT64_CHUNK_SIZE_BITS)
+
+#define	NAT64_MIN_PORT		1024
+#define	NAT64_MIN_CHUNK		(NAT64_MIN_PORT >> NAT64_CHUNK_SIZE_BITS)
+
+struct st_ptr {
+	uint8_t			idx;	/* index in nh->pg_ptr array.
+					 * NOTE: it starts from 1.
+					 */
+	uint8_t			off;
+};
+#define	NAT64LSN_MAXPGPTR	((1 << (sizeof(uint8_t) * NBBY)) - 1)
+#define	NAT64LSN_PGPTRMASKBITS	(sizeof(uint64_t) * NBBY)
+#define	NAT64LSN_PGPTRNMASK	(roundup(NAT64LSN_MAXPGPTR,	\
+    NAT64LSN_PGPTRMASKBITS) / NAT64LSN_PGPTRMASKBITS)
+
+struct nat64lsn_portgroup;
+/* sizeof(struct nat64lsn_host) = 64 + 64x2 + 8x8 = 256 bytes */
+struct nat64lsn_host {
+	struct rwlock	h_lock;		/* Host states lock */
+
+	struct in6_addr	addr;
+	struct nat64lsn_host	*next;
+	uint16_t	timestamp;	/* Last altered */
+	uint16_t	hsize;		/* ports hash size */
+	uint16_t	pg_used;	/* Number of portgroups used */
+#define	NAT64LSN_REMAININGPG	8	/* Number of remaining PG before
+					 * requesting of new chunk of indexes.
+					 */
+	uint16_t	pg_allocated;	/* Number of portgroups indexes
+					 * allocated.
+					 */
+#define	NAT64LSN_HSIZE	64
+	struct st_ptr	phash[NAT64LSN_HSIZE]; /* XXX: hardcoded size */
+	/*
+	 * PG indexes are stored in chunks with 32 elements.
+	 * The maximum count is limited to 255 due to st_ptr->idx is uint8_t.
+	 */
+#define	NAT64LSN_PGIDX_CHUNK	32
+#define	NAT64LSN_PGNIDX		(roundup(NAT64LSN_MAXPGPTR, \
+    NAT64LSN_PGIDX_CHUNK) / NAT64LSN_PGIDX_CHUNK)
+	struct nat64lsn_portgroup **pg_ptr[NAT64LSN_PGNIDX]; /* PG indexes */
+};
+
+#define	NAT64_RLOCK_ASSERT(h)	rw_assert(&(h)->h_lock, RA_RLOCKED)
+#define	NAT64_WLOCK_ASSERT(h)	rw_assert(&(h)->h_lock, RA_WLOCKED)
+
+#define	NAT64_RLOCK(h)		rw_rlock(&(h)->h_lock)
+#define	NAT64_RUNLOCK(h)	rw_runlock(&(h)->h_lock)
+#define	NAT64_WLOCK(h)		rw_wlock(&(h)->h_lock)
+#define	NAT64_WUNLOCK(h)	rw_wunlock(&(h)->h_lock)
+#define	NAT64_LOCK(h)		NAT64_WLOCK(h)
+#define	NAT64_UNLOCK(h)		NAT64_WUNLOCK(h)
+#define	NAT64_LOCK_INIT(h) do {			\
+	rw_init(&(h)->h_lock, "NAT64 host lock");	\
+	} while (0)
+
+#define	NAT64_LOCK_DESTROY(h) do {			\
+	rw_destroy(&(h)->h_lock);			\
+	} while (0)
+
+/* Internal proto index */
+#define	NAT_PROTO_TCP	1
+#define	NAT_PROTO_UDP	2
+#define	NAT_PROTO_ICMP	3
+
+#define	NAT_MAX_PROTO	4
+extern uint8_t nat64lsn_rproto_map[NAT_MAX_PROTO];
+
+VNET_DECLARE(uint16_t, nat64lsn_eid);
+#define	V_nat64lsn_eid		VNET(nat64lsn_eid)
+#define	IPFW_TLV_NAT64LSN_NAME	IPFW_TLV_EACTION_NAME(V_nat64lsn_eid)
+
+/* Timestamp macro */
+#define	_CT		((int)time_uptime % 65536)
+#define	SET_AGE(x)	(x) = _CT
+#define	GET_AGE(x)	((_CT >= (x)) ? _CT - (x) :	\
+	(int)65536 + _CT - (x))
+
+#ifdef __LP64__
+/* ffsl() is capable of checking 64-bit ints */
+#define	_FFS64
+#endif
+
+/* 16 bytes */
+struct nat64lsn_state {
+	union {
+		struct {
+			in_addr_t	faddr;	/* Remote IPv4 address */
+			uint16_t	fport;	/* Remote IPv4 port */
+			uint16_t	lport;	/* Local IPv6 port */
+		}s;
+		uint64_t		hkey;
+	} u;
+	uint8_t		nat_proto;
+	uint8_t		flags;
+	uint16_t	timestamp;
+	struct st_ptr	cur; /* Index of portgroup in nat64lsn_host */
+	struct st_ptr	next; /* Next entry index */
+};
+
+/*
+ * 1024+32 bytes per 64 states, used to store state
+ * AND for outside-in state lookup 
+ */
+struct nat64lsn_portgroup {
+	struct nat64lsn_host	*host;	/* IPv6 source host info */
+	in_addr_t		aaddr;	/* Alias addr, network format */
+	uint16_t		aport;	/* Base port */
+	uint16_t		timestamp;
+	uint8_t			nat_proto;
+	uint8_t			spare[3];
+	uint32_t		idx;
+#ifdef _FFS64
+	uint64_t		freemask;	/* Mask of free entries */
+#else
+	uint32_t		freemask[2];	/* Mask of free entries */
+#endif
+	struct nat64lsn_state	states[NAT64_CHUNK_SIZE]; /* State storage */
+};
+#ifdef _FFS64
+#define	PG_MARK_BUSY_IDX(_pg, _idx)	(_pg)->freemask &= ~((uint64_t)1<<(_idx))
+#define	PG_MARK_FREE_IDX(_pg, _idx)	(_pg)->freemask |= ((uint64_t)1<<(_idx))
+#define	PG_IS_FREE_IDX(_pg, _idx)	((_pg)->freemask & ((uint64_t)1<<(_idx)))
+#define	PG_IS_BUSY_IDX(_pg, _idx)	(PG_IS_FREE_IDX(_pg, _idx) == 0)
+#define	PG_GET_FREE_IDX(_pg)		(ffsll((_pg)->freemask))
+#define	PG_IS_EMPTY(_pg)		(((_pg)->freemask + 1) == 0)
+#else
+#define	PG_MARK_BUSY_IDX(_pg, _idx)	\
+	(_pg)->freemask[(_idx) / 32] &= ~((u_long)1<<((_idx) % 32))
+#define	PG_MARK_FREE_IDX(_pg, _idx)	\
+	(_pg)->freemask[(_idx) / 32] |= ((u_long)1<<((_idx)  % 32))
+#define	PG_IS_FREE_IDX(_pg, _idx)	\
+	((_pg)->freemask[(_idx) / 32] & ((u_long)1<<((_idx) % 32)))
+#define	PG_IS_BUSY_IDX(_pg, _idx)	(PG_IS_FREE_IDX(_pg, _idx) == 0)
+#define	PG_GET_FREE_IDX(_pg)		_pg_get_free_idx(_pg)
+#define	PG_IS_EMPTY(_pg)		\
+	((((_pg)->freemask[0] + 1) == 0 && ((_pg)->freemask[1] + 1) == 0))
+
+static inline int
+_pg_get_free_idx(const struct nat64lsn_portgroup *pg)
+{
+	int i;
+
+	if ((i = ffsl(pg->freemask[0])) != 0)
+		return (i);
+	if ((i = ffsl(pg->freemask[1])) != 0)
+		return (i + 32);
+	return (0);
+}
+
+#endif
+
+TAILQ_HEAD(nat64lsn_job_head, nat64lsn_job_item);
+
+#define	NAT64LSN_FLAGSMASK	(NAT64_LOG)
+struct nat64lsn_cfg {
+	struct named_object	no;
+	//struct nat64_exthost	*ex;	/* Pointer to external addr array */
+	struct nat64lsn_portgroup	**pg;	/* XXX: array of pointers */
+	struct nat64lsn_host	**ih;	/* Host hash */
+	uint32_t	prefix4;	/* IPv4 prefix */
+	uint32_t	pmask4;		/* IPv4 prefix mask */
+	uint32_t	ihsize;		/* IPv6 host hash size */
+	uint8_t		plen4;
+	uint8_t		plen6;
+	uint8_t		nomatch_verdict;/* What to return to ipfw on no-match */
+	uint8_t		nomatch_final;	/* Exit outer loop? */
+	struct in6_addr	prefix6;	/* IPv6 prefix to embed IPv4 hosts */
+
+	uint32_t	ihcount;	/* Number of items in host hash */
+	int		max_chunks;	/* Max chunks per client */
+	int		agg_prefix_len;	/* Prefix length to count */
+	int		agg_prefix_max;	/* Max hosts per agg prefix */
+	uint32_t	jmaxlen;	/* Max jobqueue length */
+	uint32_t	flags;
+	uint16_t	min_chunk;	/* Min port group # to use */
+	uint16_t	max_chunk;	/* Max port group # to use */
+	uint16_t	nh_delete_delay;	/* Stale host delete delay */
+	uint16_t	pg_delete_delay;	/* Stale portgroup del delay */
+	uint16_t	st_syn_ttl;	/* TCP syn expire */
+	uint16_t	st_close_ttl;	/* TCP fin expire */
+	uint16_t	st_estab_ttl;	/* TCP established expire */
+	uint16_t	st_udp_ttl;	/* UDP expire */
+	uint16_t	st_icmp_ttl;	/* ICMP expire */
+	uint32_t	protochunks[NAT_MAX_PROTO];/* Number of chunks used */
+
+	struct callout		periodic;
+	struct callout		jcallout;
+	struct ip_fw_chain	*ch;
+	struct vnet		*vp;
+	struct nat64lsn_job_head	jhead;
+	int			jlen;
+	char			name[64];	/* Nat instance name */
+	nat64_stats_block	stats;
+};
+
+struct nat64lsn_cfg *nat64lsn_init_instance(struct ip_fw_chain *ch,
+    size_t numaddr);
+void nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg);
+void nat64lsn_start_instance(struct nat64lsn_cfg *cfg);
+void nat64lsn_init_internal(void);
+void nat64lsn_uninit_internal(void);
+int ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
+    ipfw_insn *cmd, int *done);
+
+void
+nat64lsn_dump_state(const struct nat64lsn_cfg *cfg,
+    const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st,
+    const char *px, int off);
+/*
+ * Portgroup layout
+ * addr x nat_proto x port_off
+ *
+ */
+
+#define	_ADDR_PG_PROTO_COUNT	(65536 >> NAT64_CHUNK_SIZE_BITS)
+#define	_ADDR_PG_COUNT		(_ADDR_PG_PROTO_COUNT * NAT_MAX_PROTO)
+
+#define	GET_ADDR_IDX(_cfg, _addr)	((_addr) - ((_cfg)->prefix4))
+#define	__GET_PORTGROUP_IDX(_proto, _port)	\
+    ((_proto - 1) * _ADDR_PG_PROTO_COUNT +	\
+	((_port) >> NAT64_CHUNK_SIZE_BITS))
+
+#define	_GET_PORTGROUP_IDX(_cfg, _addr, _proto, _port)	\
+    GET_ADDR_IDX(_cfg, _addr) * _ADDR_PG_COUNT +	\
+	__GET_PORTGROUP_IDX(_proto, _port)
+#define	GET_PORTGROUP(_cfg, _addr, _proto, _port)	\
+    ((_cfg)->pg[_GET_PORTGROUP_IDX(_cfg, _addr, _proto, _port)])
+
+#define	PORTGROUP_CHUNK(_nh, _idx)		\
+    ((_nh)->pg_ptr[(_idx)])
+#define	PORTGROUP_BYSIDX(_cfg, _nh, _idx)	\
+    (PORTGROUP_CHUNK(_nh, (_idx - 1) / NAT64LSN_PGIDX_CHUNK) \
+	[((_idx) - 1) % NAT64LSN_PGIDX_CHUNK])
+
+
+/* Chained hash table */
+#define CHT_FIND(_ph, _hsize, _PX, _x, _key) do {			\
+	unsigned int _buck = _PX##hash(_key) & (_hsize - 1);		\
+	_PX##lock(_ph, _buck);						\
+	_x = _PX##first(_ph, _buck);					\
+	for ( ; _x != NULL; _x = _PX##next(_x)) {			\
+		if (_PX##cmp(_key, _PX##val(_x)))			\
+			break;						\
+	}								\
+	if (_x == NULL)							\
+		_PX##unlock(_ph, _buck);				\
+} while(0)
+
+#define	CHT_UNLOCK_BUCK(_ph, _PX, _buck)				\
+	_PX##unlock(_ph, _buck);
+
+#define	CHT_UNLOCK_KEY(_ph, _hsize, _PX, _key) do {			\
+	unsigned int _buck = _PX##hash(_key) & (_hsize - 1);		\
+	_PX##unlock(_ph, _buck);					\
+} while(0)
+
+#define	CHT_INSERT_HEAD(_ph, _hsize, _PX, _i) do {			\
+	unsigned int _buck = _PX##hash(_PX##val(_i)) & (_hsize - 1);	\
+	_PX##lock(_ph, _buck);						\
+	_PX##next(_i) = _PX##first(_ph, _buck);				\
+	_PX##first(_ph, _buck) = _i;					\
+	_PX##unlock(_ph, _buck);					\
+} while(0)
+
+#define	CHT_REMOVE(_ph, _hsize, _PX, _x, _tmp, _key) do {		\
+	unsigned int _buck = _PX##hash(_key) & (_hsize - 1);		\
+	_PX##lock(_ph, _buck);						\
+	_x = _PX##first(_ph, _buck);					\
+	_tmp = NULL;							\
+	for ( ; _x != NULL; _tmp = _x, _x = _PX##next(_x)) {		\
+		if (_PX##cmp(_key, _PX##val(_x)))			\
+			break;						\
+	}								\
+	if (_x != NULL) {						\
+		if (_tmp == NULL)					\
+			_PX##first(_ph, _buck) = _PX##next(_x);		\
+		else							\
+			_PX##next(_tmp) = _PX##next(_x);		\
+	}								\
+	_PX##unlock(_ph, _buck);					\
+} while(0)
+
+#define	CHT_FOREACH_SAFE(_ph, _hsize, _PX, _x, _tmp, _cb, _arg) do {	\
+	for (unsigned int _i = 0; _i < _hsize; _i++) {			\
+		_PX##lock(_ph, _i);					\
+		_x = _PX##first(_ph, _i);				\
+		_tmp = NULL;						\
+		for (; _x != NULL; _tmp = _x, _x = _PX##next(_x)) {	\
+			if (_cb(_x, _arg) == 0)				\
+				continue;				\
+			if (_tmp == NULL)				\
+				_PX##first(_ph, _i) = _PX##next(_x);	\
+			else						\
+				_tmp = _PX##next(_x);			\
+		}							\
+		_PX##unlock(_ph, _i);					\
+	}								\
+} while(0)
+
+#define	CHT_RESIZE(_ph, _hsize, _nph, _nhsize, _PX, _x, _y) do {	\
+	unsigned int _buck;						\
+	for (unsigned int _i = 0; _i < _hsize; _i++) {			\
+		_x = _PX##first(_ph, _i);				\
+		_y = _x;						\
+		while (_y != NULL) {					\
+			_buck = _PX##hash(_PX##val(_x)) & (_nhsize - 1);\
+			_y = _PX##next(_x);				\
+			_PX##next(_x) = _PX##first(_nph, _buck);	\
+			_PX##first(_nph, _buck) = _x;			\
+		}							\
+	}								\
+} while(0)
+
+#endif /* _IP_FW_NAT64LSN_H_ */
+
diff --git a/freebsd/sys/netpfil/ipfw/nat64/nat64lsn_control.c b/freebsd/sys/netpfil/ipfw/nat64/nat64lsn_control.c
new file mode 100644
index 00000000..a20a52ea
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/nat64/nat64lsn_control.c
@@ -0,0 +1,919 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2015 Yandex LLC
+ * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
+ * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <rtems/bsd/sys/errno.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/rmlock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/sockopt.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/pfil.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nat64/ip_fw_nat64.h>
+#include <netpfil/ipfw/nat64/nat64lsn.h>
+#include <netinet6/ip_fw_nat64.h>
+
+VNET_DEFINE(uint16_t, nat64lsn_eid) = 0;
+
+static struct nat64lsn_cfg *
+nat64lsn_find(struct namedobj_instance *ni, const char *name, uint8_t set)
+{
+	struct nat64lsn_cfg *cfg;
+
+	cfg = (struct nat64lsn_cfg *)ipfw_objhash_lookup_name_type(ni, set,
+	    IPFW_TLV_NAT64LSN_NAME, name);
+
+	return (cfg);
+}
+
+static void
+nat64lsn_default_config(ipfw_nat64lsn_cfg *uc)
+{
+
+	if (uc->max_ports == 0)
+		uc->max_ports = NAT64LSN_MAX_PORTS;
+	else
+		uc->max_ports = roundup(uc->max_ports, NAT64_CHUNK_SIZE);
+	if (uc->max_ports > NAT64_CHUNK_SIZE * NAT64LSN_MAXPGPTR)
+		uc->max_ports = NAT64_CHUNK_SIZE * NAT64LSN_MAXPGPTR;
+	if (uc->jmaxlen == 0)
+		uc->jmaxlen = NAT64LSN_JMAXLEN;
+	if (uc->jmaxlen > 65536)
+		uc->jmaxlen = 65536;
+	if (uc->nh_delete_delay == 0)
+		uc->nh_delete_delay = NAT64LSN_HOST_AGE;
+	if (uc->pg_delete_delay == 0)
+		uc->pg_delete_delay = NAT64LSN_PG_AGE;
+	if (uc->st_syn_ttl == 0)
+		uc->st_syn_ttl = NAT64LSN_TCP_SYN_AGE;
+	if (uc->st_close_ttl == 0)
+		uc->st_close_ttl = NAT64LSN_TCP_FIN_AGE;
+	if (uc->st_estab_ttl == 0)
+		uc->st_estab_ttl = NAT64LSN_TCP_EST_AGE;
+	if (uc->st_udp_ttl == 0)
+		uc->st_udp_ttl = NAT64LSN_UDP_AGE;
+	if (uc->st_icmp_ttl == 0)
+		uc->st_icmp_ttl = NAT64LSN_ICMP_AGE;
+}
+
+/*
+ * Creates new nat64lsn instance.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ipfw_nat64lsn_cfg ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	ipfw_obj_lheader *olh;
+	ipfw_nat64lsn_cfg *uc;
+	struct nat64lsn_cfg *cfg;
+	struct namedobj_instance *ni;
+	uint32_t addr4, mask4;
+
+	if (sd->valsize != sizeof(*olh) + sizeof(*uc))
+		return (EINVAL);
+
+	olh = (ipfw_obj_lheader *)sd->kbuf;
+	uc = (ipfw_nat64lsn_cfg *)(olh + 1);
+
+	if (ipfw_check_object_name_generic(uc->name) != 0)
+		return (EINVAL);
+
+	if (uc->agg_prefix_len > 127 || uc->set >= IPFW_MAX_SETS)
+		return (EINVAL);
+
+	if (uc->plen4 > 32)
+		return (EINVAL);
+	if (uc->plen6 > 128 || ((uc->plen6 % 8) != 0))
+		return (EINVAL);
+
+	/* XXX: Check prefix4 to be global */
+	addr4 = ntohl(uc->prefix4.s_addr);
+	mask4 = ~((1 << (32 - uc->plen4)) - 1);
+	if ((addr4 & mask4) != addr4)
+		return (EINVAL);
+
+	/* XXX: Check prefix6 */
+	if (uc->min_port == 0)
+		uc->min_port = NAT64_MIN_PORT;
+	if (uc->max_port == 0)
+		uc->max_port = 65535;
+	if (uc->min_port > uc->max_port)
+		return (EINVAL);
+	uc->min_port = roundup(uc->min_port, NAT64_CHUNK_SIZE);
+	uc->max_port = roundup(uc->max_port, NAT64_CHUNK_SIZE);
+
+	nat64lsn_default_config(uc);
+
+	ni = CHAIN_TO_SRV(ch);
+	IPFW_UH_RLOCK(ch);
+	if (nat64lsn_find(ni, uc->name, uc->set) != NULL) {
+		IPFW_UH_RUNLOCK(ch);
+		return (EEXIST);
+	}
+	IPFW_UH_RUNLOCK(ch);
+
+	cfg = nat64lsn_init_instance(ch, 1 << (32 - uc->plen4));
+	strlcpy(cfg->name, uc->name, sizeof(cfg->name));
+	cfg->no.name = cfg->name;
+	cfg->no.etlv = IPFW_TLV_NAT64LSN_NAME;
+	cfg->no.set = uc->set;
+
+	cfg->prefix4 = addr4;
+	cfg->pmask4 = addr4 | ~mask4;
+	/* XXX: Copy 96 bits */
+	cfg->plen6 = 96;
+	memcpy(&cfg->prefix6, &uc->prefix6, cfg->plen6 / 8);
+	cfg->plen4 = uc->plen4;
+	cfg->flags = uc->flags & NAT64LSN_FLAGSMASK;
+	cfg->max_chunks = uc->max_ports / NAT64_CHUNK_SIZE;
+	cfg->agg_prefix_len = uc->agg_prefix_len;
+	cfg->agg_prefix_max = uc->agg_prefix_max;
+
+	cfg->min_chunk = uc->min_port / NAT64_CHUNK_SIZE;
+	cfg->max_chunk = uc->max_port / NAT64_CHUNK_SIZE;
+
+	cfg->jmaxlen = uc->jmaxlen;
+	cfg->nh_delete_delay = uc->nh_delete_delay;
+	cfg->pg_delete_delay = uc->pg_delete_delay;
+	cfg->st_syn_ttl = uc->st_syn_ttl;
+	cfg->st_close_ttl = uc->st_close_ttl;
+	cfg->st_estab_ttl = uc->st_estab_ttl;
+	cfg->st_udp_ttl = uc->st_udp_ttl;
+	cfg->st_icmp_ttl = uc->st_icmp_ttl;
+
+	cfg->nomatch_verdict = IP_FW_DENY;
+	cfg->nomatch_final = 1;	/* Exit outer loop by default */
+
+	IPFW_UH_WLOCK(ch);
+
+	if (nat64lsn_find(ni, uc->name, uc->set) != NULL) {
+		IPFW_UH_WUNLOCK(ch);
+		nat64lsn_destroy_instance(cfg);
+		return (EEXIST);
+	}
+
+	if (ipfw_objhash_alloc_idx(CHAIN_TO_SRV(ch), &cfg->no.kidx) != 0) {
+		IPFW_UH_WUNLOCK(ch);
+		nat64lsn_destroy_instance(cfg);
+		return (ENOSPC);
+	}
+	ipfw_objhash_add(CHAIN_TO_SRV(ch), &cfg->no);
+
+	/* Okay, let's link data */
+	IPFW_WLOCK(ch);
+	SRV_OBJECT(ch, cfg->no.kidx) = cfg;
+	IPFW_WUNLOCK(ch);
+
+	nat64lsn_start_instance(cfg);
+
+	IPFW_UH_WUNLOCK(ch);
+	return (0);
+}
+
+static void
+nat64lsn_detach_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg)
+{
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+
+	ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no);
+	ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx);
+}
+
+/*
+ * Destroys nat64 instance.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	struct nat64lsn_cfg *cfg;
+	ipfw_obj_header *oh;
+
+	if (sd->valsize != sizeof(*oh))
+		return (EINVAL);
+
+	oh = (ipfw_obj_header *)op3;
+
+	IPFW_UH_WLOCK(ch);
+	cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+	if (cfg == NULL) {
+		IPFW_UH_WUNLOCK(ch);
+		return (ESRCH);
+	}
+
+	if (cfg->no.refcnt > 0) {
+		IPFW_UH_WUNLOCK(ch);
+		return (EBUSY);
+	}
+
+	IPFW_WLOCK(ch);
+	SRV_OBJECT(ch, cfg->no.kidx) = NULL;
+	IPFW_WUNLOCK(ch);
+
+	nat64lsn_detach_config(ch, cfg);
+	IPFW_UH_WUNLOCK(ch);
+
+	nat64lsn_destroy_instance(cfg);
+	return (0);
+}
+
+#define	__COPY_STAT_FIELD(_cfg, _stats, _field)	\
+	(_stats)->_field = NAT64STAT_FETCH(&(_cfg)->stats, _field)
+static void
+export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
+    struct ipfw_nat64lsn_stats *stats)
+{
+
+	__COPY_STAT_FIELD(cfg, stats, opcnt64);
+	__COPY_STAT_FIELD(cfg, stats, opcnt46);
+	__COPY_STAT_FIELD(cfg, stats, ofrags);
+	__COPY_STAT_FIELD(cfg, stats, ifrags);
+	__COPY_STAT_FIELD(cfg, stats, oerrors);
+	__COPY_STAT_FIELD(cfg, stats, noroute4);
+	__COPY_STAT_FIELD(cfg, stats, noroute6);
+	__COPY_STAT_FIELD(cfg, stats, nomatch4);
+	__COPY_STAT_FIELD(cfg, stats, noproto);
+	__COPY_STAT_FIELD(cfg, stats, nomem);
+	__COPY_STAT_FIELD(cfg, stats, dropped);
+
+	__COPY_STAT_FIELD(cfg, stats, jcalls);
+	__COPY_STAT_FIELD(cfg, stats, jrequests);
+	__COPY_STAT_FIELD(cfg, stats, jhostsreq);
+	__COPY_STAT_FIELD(cfg, stats, jportreq);
+	__COPY_STAT_FIELD(cfg, stats, jhostfails);
+	__COPY_STAT_FIELD(cfg, stats, jportfails);
+	__COPY_STAT_FIELD(cfg, stats, jmaxlen);
+	__COPY_STAT_FIELD(cfg, stats, jnomem);
+	__COPY_STAT_FIELD(cfg, stats, jreinjected);
+	__COPY_STAT_FIELD(cfg, stats, screated);
+	__COPY_STAT_FIELD(cfg, stats, sdeleted);
+	__COPY_STAT_FIELD(cfg, stats, spgcreated);
+	__COPY_STAT_FIELD(cfg, stats, spgdeleted);
+
+	stats->hostcount = cfg->ihcount;
+	stats->tcpchunks = cfg->protochunks[NAT_PROTO_TCP];
+	stats->udpchunks = cfg->protochunks[NAT_PROTO_UDP];
+	stats->icmpchunks = cfg->protochunks[NAT_PROTO_ICMP];
+}
+#undef	__COPY_STAT_FIELD
+
+static void
+nat64lsn_export_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
+    ipfw_nat64lsn_cfg *uc)
+{
+
+	uc->flags = cfg->flags & NAT64LSN_FLAGSMASK;
+	uc->max_ports = cfg->max_chunks * NAT64_CHUNK_SIZE;
+	uc->agg_prefix_len = cfg->agg_prefix_len;
+	uc->agg_prefix_max = cfg->agg_prefix_max;
+
+	uc->jmaxlen = cfg->jmaxlen;
+	uc->nh_delete_delay = cfg->nh_delete_delay;
+	uc->pg_delete_delay = cfg->pg_delete_delay;
+	uc->st_syn_ttl = cfg->st_syn_ttl;
+	uc->st_close_ttl = cfg->st_close_ttl;
+	uc->st_estab_ttl = cfg->st_estab_ttl;
+	uc->st_udp_ttl = cfg->st_udp_ttl;
+	uc->st_icmp_ttl = cfg->st_icmp_ttl;
+	uc->prefix4.s_addr = htonl(cfg->prefix4);
+	uc->prefix6 = cfg->prefix6;
+	uc->plen4 = cfg->plen4;
+	uc->plen6 = cfg->plen6;
+	uc->set = cfg->no.set;
+	strlcpy(uc->name, cfg->no.name, sizeof(uc->name));
+}
+
+struct nat64_dump_arg {
+	struct ip_fw_chain *ch;
+	struct sockopt_data *sd;
+};
+
+static int
+export_config_cb(struct namedobj_instance *ni, struct named_object *no,
+    void *arg)
+{
+	struct nat64_dump_arg *da = (struct nat64_dump_arg *)arg;
+	ipfw_nat64lsn_cfg *uc;
+
+	uc = (struct _ipfw_nat64lsn_cfg *)ipfw_get_sopt_space(da->sd,
+	    sizeof(*uc));
+	nat64lsn_export_config(da->ch, (struct nat64lsn_cfg *)no, uc);
+	return (0);
+}
+
+/*
+ * Lists all nat64 lsn instances currently available in kernel.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ]
+ * Reply: [ ipfw_obj_lheader ipfw_nat64lsn_cfg x N ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	ipfw_obj_lheader *olh;
+	struct nat64_dump_arg da;
+
+	/* Check minimum header size */
+	if (sd->valsize < sizeof(ipfw_obj_lheader))
+		return (EINVAL);
+
+	olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh));
+
+	IPFW_UH_RLOCK(ch);
+	olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch),
+	    IPFW_TLV_NAT64LSN_NAME);
+	olh->objsize = sizeof(ipfw_nat64lsn_cfg);
+	olh->size = sizeof(*olh) + olh->count * olh->objsize;
+
+	if (sd->valsize < olh->size) {
+		IPFW_UH_RUNLOCK(ch);
+		return (ENOMEM);
+	}
+	memset(&da, 0, sizeof(da));
+	da.ch = ch;
+	da.sd = sd;
+	ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb, &da,
+	    IPFW_TLV_NAT64LSN_NAME);
+	IPFW_UH_RUNLOCK(ch);
+
+	return (0);
+}
+
+/*
+ * Change existing nat64lsn instance configuration.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_nat64lsn_cfg ]
+ * Reply: [ ipfw_obj_header ipfw_nat64lsn_cfg ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op,
+    struct sockopt_data *sd)
+{
+	ipfw_obj_header *oh;
+	ipfw_nat64lsn_cfg *uc;
+	struct nat64lsn_cfg *cfg;
+	struct namedobj_instance *ni;
+
+	if (sd->valsize != sizeof(*oh) + sizeof(*uc))
+		return (EINVAL);
+
+	oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd,
+	    sizeof(*oh) + sizeof(*uc));
+	uc = (ipfw_nat64lsn_cfg *)(oh + 1);
+
+	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
+	    oh->ntlv.set >= IPFW_MAX_SETS)
+		return (EINVAL);
+
+	ni = CHAIN_TO_SRV(ch);
+	if (sd->sopt->sopt_dir == SOPT_GET) {
+		IPFW_UH_RLOCK(ch);
+		cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
+		if (cfg == NULL) {
+			IPFW_UH_RUNLOCK(ch);
+			return (EEXIST);
+		}
+		nat64lsn_export_config(ch, cfg, uc);
+		IPFW_UH_RUNLOCK(ch);
+		return (0);
+	}
+
+	nat64lsn_default_config(uc);
+
+	IPFW_UH_WLOCK(ch);
+	cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
+	if (cfg == NULL) {
+		IPFW_UH_WUNLOCK(ch);
+		return (EEXIST);
+	}
+
+	/*
+	 * For now allow to change only following values:
+	 *  jmaxlen, nh_del_age, pg_del_age, tcp_syn_age, tcp_close_age,
+	 *  tcp_est_age, udp_age, icmp_age, flags, max_ports.
+	 */
+
+	cfg->max_chunks = uc->max_ports / NAT64_CHUNK_SIZE;
+	cfg->jmaxlen = uc->jmaxlen;
+	cfg->nh_delete_delay = uc->nh_delete_delay;
+	cfg->pg_delete_delay = uc->pg_delete_delay;
+	cfg->st_syn_ttl = uc->st_syn_ttl;
+	cfg->st_close_ttl = uc->st_close_ttl;
+	cfg->st_estab_ttl = uc->st_estab_ttl;
+	cfg->st_udp_ttl = uc->st_udp_ttl;
+	cfg->st_icmp_ttl = uc->st_icmp_ttl;
+	cfg->flags = uc->flags & NAT64LSN_FLAGSMASK;
+
+	IPFW_UH_WUNLOCK(ch);
+
+	return (0);
+}
+
+/*
+ * Get nat64lsn statistics.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ * Reply: [ ipfw_obj_header ipfw_counter_tlv ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
+    struct sockopt_data *sd)
+{
+	struct ipfw_nat64lsn_stats stats;
+	struct nat64lsn_cfg *cfg;
+	ipfw_obj_header *oh;
+	ipfw_obj_ctlv *ctlv;
+	size_t sz;
+
+	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats);
+	if (sd->valsize % sizeof(uint64_t))
+		return (EINVAL);
+	if (sd->valsize < sz)
+		return (ENOMEM);
+	oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
+	if (oh == NULL)
+		return (EINVAL);
+	memset(&stats, 0, sizeof(stats));
+
+	IPFW_UH_RLOCK(ch);
+	cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+	if (cfg == NULL) {
+		IPFW_UH_RUNLOCK(ch);
+		return (ESRCH);
+	}
+
+	export_stats(ch, cfg, &stats);
+	IPFW_UH_RUNLOCK(ch);
+
+	ctlv = (ipfw_obj_ctlv *)(oh + 1);
+	memset(ctlv, 0, sizeof(*ctlv));
+	ctlv->head.type = IPFW_TLV_COUNTERS;
+	ctlv->head.length = sz - sizeof(ipfw_obj_header);
+	ctlv->count = sizeof(stats) / sizeof(uint64_t);
+	ctlv->objsize = sizeof(uint64_t);
+	ctlv->version = IPFW_NAT64_VERSION;
+	memcpy(ctlv + 1, &stats, sizeof(stats));
+	return (0);
+}
+
+/*
+ * Reset nat64lsn statistics.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
+    struct sockopt_data *sd)
+{
+	struct nat64lsn_cfg *cfg;
+	ipfw_obj_header *oh;
+
+	if (sd->valsize != sizeof(*oh))
+		return (EINVAL);
+	oh = (ipfw_obj_header *)sd->kbuf;
+	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
+	    oh->ntlv.set >= IPFW_MAX_SETS)
+		return (EINVAL);
+
+	IPFW_UH_WLOCK(ch);
+	cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+	if (cfg == NULL) {
+		IPFW_UH_WUNLOCK(ch);
+		return (ESRCH);
+	}
+	COUNTER_ARRAY_ZERO(cfg->stats.stats, NAT64STATS);
+	IPFW_UH_WUNLOCK(ch);
+	return (0);
+}
+
+/*
+ * Reply: [ ipfw_obj_header ipfw_obj_data [ ipfw_nat64lsn_stg
+ *	ipfw_nat64lsn_state x count, ... ] ]
+ */
+static int
+export_pg_states(struct nat64lsn_cfg *cfg, struct nat64lsn_portgroup *pg,
+    ipfw_nat64lsn_stg *stg, struct sockopt_data *sd)
+{
+	ipfw_nat64lsn_state *ste;
+	struct nat64lsn_state *st;
+	int i, count;
+
+	NAT64_LOCK(pg->host);
+	count = 0;
+	for (i = 0; i < 64; i++) {
+		if (PG_IS_BUSY_IDX(pg, i))
+			count++;
+	}
+	DPRINTF(DP_STATE, "EXPORT PG %d, count %d", pg->idx, count);
+
+	if (count == 0) {
+		stg->count = 0;
+		NAT64_UNLOCK(pg->host);
+		return (0);
+	}
+	ste = (ipfw_nat64lsn_state *)ipfw_get_sopt_space(sd,
+	    count * sizeof(ipfw_nat64lsn_state));
+	if (ste == NULL) {
+		NAT64_UNLOCK(pg->host);
+		return (1);
+	}
+
+	stg->alias4.s_addr = pg->aaddr;
+	stg->proto = nat64lsn_rproto_map[pg->nat_proto];
+	stg->flags = 0;
+	stg->host6 = pg->host->addr;
+	stg->count = count;
+	for (i = 0; i < 64; i++) {
+		if (PG_IS_FREE_IDX(pg, i))
+			continue;
+		st = &pg->states[i];
+		ste->daddr.s_addr = st->u.s.faddr;
+		ste->dport = st->u.s.fport;
+		ste->aport = pg->aport + i;
+		ste->sport = st->u.s.lport;
+		ste->flags = st->flags; /* XXX filter flags */
+		ste->idle = GET_AGE(st->timestamp);
+		ste++;
+	}
+	NAT64_UNLOCK(pg->host);
+
+	return (0);
+}
+
+static int
+get_next_idx(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto,
+    uint16_t *port)
+{
+
+	if (*port < 65536 - NAT64_CHUNK_SIZE) {
+		*port += NAT64_CHUNK_SIZE;
+		return (0);
+	}
+	*port = 0;
+
+	if (*nat_proto < NAT_MAX_PROTO - 1) {
+		*nat_proto += 1;
+		return (0);
+	}
+	*nat_proto = 1;
+
+	if (*addr < cfg->pmask4) {
+		*addr += 1;
+		return (0);
+	}
+
+	/* End of space. */
+	return (1);
+}
+
+#define	PACK_IDX(addr, proto, port)	\
+	((uint64_t)addr << 32) | ((uint32_t)port << 16) | (proto << 8)
+#define	UNPACK_IDX(idx, addr, proto, port)		\
+	(addr) = (uint32_t)((idx) >> 32);		\
+	(port) = (uint16_t)(((idx) >> 16) & 0xFFFF);	\
+	(proto) = (uint8_t)(((idx) >> 8) & 0xFF)
+
+static struct nat64lsn_portgroup *
+get_next_pg(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto,
+  uint16_t *port)
+{
+	struct nat64lsn_portgroup *pg;
+	uint64_t pre_pack, post_pack;
+
+	pg = NULL;
+	pre_pack = PACK_IDX(*addr, *nat_proto, *port);
+	for (;;) {
+		if (get_next_idx(cfg, addr, nat_proto, port) != 0) {
+			/* End of states */
+			return (pg);
+		}
+
+		pg = GET_PORTGROUP(cfg, *addr, *nat_proto, *port);
+		if (pg != NULL)
+			break;
+	}
+
+	post_pack = PACK_IDX(*addr, *nat_proto, *port);
+	if (pre_pack == post_pack)
+		DPRINTF(DP_STATE, "XXX: PACK_IDX %u %d %d",
+		    *addr, *nat_proto, *port);
+	return (pg);
+}
+
+static NAT64NOINLINE struct nat64lsn_portgroup *
+get_first_pg(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto,
+  uint16_t *port)
+{
+	struct nat64lsn_portgroup *pg;
+
+	pg = GET_PORTGROUP(cfg, *addr, *nat_proto, *port);
+	if (pg == NULL)
+		pg = get_next_pg(cfg, addr, nat_proto, port);
+
+	return (pg);
+}
+
+/*
+ * Lists nat64lsn states.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]]
+ * Reply: [ ipfw_obj_header ipfw_obj_data [
+ *		ipfw_nat64lsn_stg ipfw_nat64lsn_state x N] ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_states(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	ipfw_obj_header *oh;
+	ipfw_obj_data *od;
+	ipfw_nat64lsn_stg *stg;
+	struct nat64lsn_cfg *cfg;
+	struct nat64lsn_portgroup *pg, *pg_next;
+	uint64_t next_idx;
+	size_t sz;
+	uint32_t addr, states;
+	uint16_t port;
+	uint8_t nat_proto;
+
+	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
+	    sizeof(uint64_t);
+	/* Check minimum header size */
+	if (sd->valsize < sz)
+		return (EINVAL);
+
+	oh = (ipfw_obj_header *)sd->kbuf;
+	od = (ipfw_obj_data *)(oh + 1);
+	if (od->head.type != IPFW_TLV_OBJDATA ||
+	    od->head.length != sz - sizeof(ipfw_obj_header))
+		return (EINVAL);
+
+	next_idx = *(uint64_t *)(od + 1);
+	/* Translate index to the request position to start from */
+	UNPACK_IDX(next_idx, addr, nat_proto, port);
+	if (nat_proto >= NAT_MAX_PROTO)
+		return (EINVAL);
+	if (nat_proto == 0 && addr != 0)
+		return (EINVAL);
+
+	IPFW_UH_RLOCK(ch);
+	cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+	if (cfg == NULL) {
+		IPFW_UH_RUNLOCK(ch);
+		return (ESRCH);
+	}
+	/* Fill in starting point */
+	if (addr == 0) {
+		addr = cfg->prefix4;
+		nat_proto = 1;
+		port = 0;
+	}
+	if (addr < cfg->prefix4 || addr > cfg->pmask4) {
+		IPFW_UH_RUNLOCK(ch);
+		DPRINTF(DP_GENERIC | DP_STATE, "XXX: %ju %u %u",
+		    (uintmax_t)next_idx, addr, cfg->pmask4);
+		return (EINVAL);
+	}
+
+	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
+	    sizeof(ipfw_nat64lsn_stg);
+	if (sd->valsize < sz)
+		return (ENOMEM);
+	oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, sz);
+	od = (ipfw_obj_data *)(oh + 1);
+	od->head.type = IPFW_TLV_OBJDATA;
+	od->head.length = sz - sizeof(ipfw_obj_header);
+	stg = (ipfw_nat64lsn_stg *)(od + 1);
+
+	pg = get_first_pg(cfg, &addr, &nat_proto, &port);
+	if (pg == NULL) {
+		/* No states */
+		stg->next_idx = 0xFF;
+		stg->count = 0;
+		IPFW_UH_RUNLOCK(ch);
+		return (0);
+	}
+	states = 0;
+	pg_next = NULL;
+	while (pg != NULL) {
+		pg_next = get_next_pg(cfg, &addr, &nat_proto, &port);
+		if (pg_next == NULL)
+			stg->next_idx = 0xFF;
+		else
+			stg->next_idx = PACK_IDX(addr, nat_proto, port);
+
+		if (export_pg_states(cfg, pg, stg, sd) != 0) {
+			IPFW_UH_RUNLOCK(ch);
+			return (states == 0 ? ENOMEM: 0);
+		}
+		states += stg->count;
+		od->head.length += stg->count * sizeof(ipfw_nat64lsn_state);
+		sz += stg->count * sizeof(ipfw_nat64lsn_state);
+		if (pg_next != NULL) {
+			sz += sizeof(ipfw_nat64lsn_stg);
+			if (sd->valsize < sz)
+				break;
+			stg = (ipfw_nat64lsn_stg *)ipfw_get_sopt_space(sd,
+			    sizeof(ipfw_nat64lsn_stg));
+		}
+		pg = pg_next;
+	}
+	IPFW_UH_RUNLOCK(ch);
+	return (0);
+}
+
+static struct ipfw_sopt_handler	scodes[] = {
+	{ IP_FW_NAT64LSN_CREATE, 0,	HDIR_BOTH,	nat64lsn_create },
+	{ IP_FW_NAT64LSN_DESTROY,0,	HDIR_SET,	nat64lsn_destroy },
+	{ IP_FW_NAT64LSN_CONFIG, 0,	HDIR_BOTH,	nat64lsn_config },
+	{ IP_FW_NAT64LSN_LIST,	 0,	HDIR_GET,	nat64lsn_list },
+	{ IP_FW_NAT64LSN_STATS,	 0,	HDIR_GET,	nat64lsn_stats },
+	{ IP_FW_NAT64LSN_RESET_STATS,0,	HDIR_SET,	nat64lsn_reset_stats },
+	{ IP_FW_NAT64LSN_LIST_STATES,0,	HDIR_GET,	nat64lsn_states },
+};
+
+static int
+nat64lsn_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
+{
+	ipfw_insn *icmd;
+
+	icmd = cmd - 1;
+	if (icmd->opcode != O_EXTERNAL_ACTION ||
+	    icmd->arg1 != V_nat64lsn_eid)
+		return (1);
+
+	*puidx = cmd->arg1;
+	*ptype = 0;
+	return (0);
+}
+
+static void
+nat64lsn_update_arg1(ipfw_insn *cmd, uint16_t idx)
+{
+
+	cmd->arg1 = idx;
+}
+
+static int
+nat64lsn_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
+    struct named_object **pno)
+{
+	int err;
+
+	err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti,
+	    IPFW_TLV_NAT64LSN_NAME, pno);
+	return (err);
+}
+
+static struct named_object *
+nat64lsn_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
+{
+	struct namedobj_instance *ni;
+	struct named_object *no;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+	ni = CHAIN_TO_SRV(ch);
+	no = ipfw_objhash_lookup_kidx(ni, idx);
+	KASSERT(no != NULL, ("NAT64LSN with index %d not found", idx));
+
+	return (no);
+}
+
+static int
+nat64lsn_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
+    enum ipfw_sets_cmd cmd)
+{
+
+	return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64LSN_NAME,
+	    set, new_set, cmd));
+}
+
+static struct opcode_obj_rewrite opcodes[] = {
+	{
+		.opcode = O_EXTERNAL_INSTANCE,
+		.etlv = IPFW_TLV_EACTION /* just show it isn't table */,
+		.classifier = nat64lsn_classify,
+		.update = nat64lsn_update_arg1,
+		.find_byname = nat64lsn_findbyname,
+		.find_bykidx = nat64lsn_findbykidx,
+		.manage_sets = nat64lsn_manage_sets,
+	},
+};
+
+static int
+destroy_config_cb(struct namedobj_instance *ni, struct named_object *no,
+    void *arg)
+{
+	struct nat64lsn_cfg *cfg;
+	struct ip_fw_chain *ch;
+
+	ch = (struct ip_fw_chain *)arg;
+	cfg = (struct nat64lsn_cfg *)SRV_OBJECT(ch, no->kidx);
+	SRV_OBJECT(ch, no->kidx) = NULL;
+	nat64lsn_detach_config(ch, cfg);
+	nat64lsn_destroy_instance(cfg);
+	return (0);
+}
+
+int
+nat64lsn_init(struct ip_fw_chain *ch, int first)
+{
+
+	if (first != 0)
+		nat64lsn_init_internal();
+	V_nat64lsn_eid = ipfw_add_eaction(ch, ipfw_nat64lsn, "nat64lsn");
+	if (V_nat64lsn_eid == 0)
+		return (ENXIO);
+	IPFW_ADD_SOPT_HANDLER(first, scodes);
+	IPFW_ADD_OBJ_REWRITER(first, opcodes);
+	return (0);
+}
+
+void
+nat64lsn_uninit(struct ip_fw_chain *ch, int last)
+{
+
+	IPFW_DEL_OBJ_REWRITER(last, opcodes);
+	IPFW_DEL_SOPT_HANDLER(last, scodes);
+	ipfw_del_eaction(ch, V_nat64lsn_eid);
+	/*
+	 * Since we already have deregistered external action,
+	 * our named objects become unaccessible via rules, because
+	 * all rules were truncated by ipfw_del_eaction().
+	 * So, we can unlink and destroy our named objects without holding
+	 * IPFW_WLOCK().
+	 */
+	IPFW_UH_WLOCK(ch);
+	ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch,
+	    IPFW_TLV_NAT64LSN_NAME);
+	V_nat64lsn_eid = 0;
+	IPFW_UH_WUNLOCK(ch);
+	if (last != 0)
+		nat64lsn_uninit_internal();
+}
+
diff --git a/freebsd/sys/netpfil/ipfw/nat64/nat64stl.c b/freebsd/sys/netpfil/ipfw/nat64/nat64stl.c
new file mode 100644
index 00000000..36e6e268
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/nat64/nat64stl.c
@@ -0,0 +1,262 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/rmlock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_pflog.h>
+#include <net/pfil.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#include <netinet6/ip_fw_nat64.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nat64/ip_fw_nat64.h>
+#include <netpfil/ipfw/nat64/nat64_translate.h>
+#include <netpfil/ipfw/nat64/nat64stl.h>
+#include <netpfil/pf/pf.h>
+
+#define	NAT64_LOOKUP(chain, cmd)	\
+	(struct nat64stl_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
+
+static void
+nat64stl_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
+    uint32_t kidx)
+{
+	static uint32_t pktid = 0;
+
+	memset(plog, 0, sizeof(plog));
+	plog->length = PFLOG_REAL_HDRLEN;
+	plog->af = family;
+	plog->action = PF_NAT;
+	plog->dir = PF_IN;
+	plog->rulenr = htonl(kidx);
+	plog->subrulenr = htonl(++pktid);
+	plog->ruleset[0] = '\0';
+	strlcpy(plog->ifname, "NAT64STL", sizeof(plog->ifname));
+	ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
+}
+
+static int
+nat64stl_handle_ip4(struct ip_fw_chain *chain, struct nat64stl_cfg *cfg,
+    struct mbuf *m, uint32_t tablearg)
+{
+	struct pfloghdr loghdr, *logdata;
+	struct in6_addr saddr, daddr;
+	struct ip *ip;
+
+	ip = mtod(m, struct ip*);
+	if (nat64_check_ip4(ip->ip_src.s_addr) != 0 ||
+	    nat64_check_ip4(ip->ip_dst.s_addr) != 0 ||
+	    nat64_check_private_ip4(ip->ip_src.s_addr) != 0 ||
+	    nat64_check_private_ip4(ip->ip_dst.s_addr) != 0)
+		return (NAT64SKIP);
+
+	daddr = TARG_VAL(chain, tablearg, nh6);
+	if (nat64_check_ip6(&daddr) != 0)
+		return (NAT64MFREE);
+	saddr = cfg->prefix6;
+	nat64_set_ip4(&saddr, ip->ip_src.s_addr);
+
+	if (cfg->flags & NAT64_LOG) {
+		logdata = &loghdr;
+		nat64stl_log(logdata, m, AF_INET, cfg->no.kidx);
+	} else
+		logdata = NULL;
+	return (nat64_do_handle_ip4(m, &saddr, &daddr, 0, &cfg->stats,
+	    logdata));
+}
+
+static int
+nat64stl_handle_ip6(struct ip_fw_chain *chain, struct nat64stl_cfg *cfg,
+    struct mbuf *m, uint32_t tablearg)
+{
+	struct pfloghdr loghdr, *logdata;
+	struct ip6_hdr *ip6;
+	uint32_t aaddr;
+
+	aaddr = htonl(TARG_VAL(chain, tablearg, nh4));
+
+	/*
+	 * NOTE: we expect ipfw_chk() did m_pullup() up to upper level
+	 * protocol's headers. Also we skip some checks, that ip6_input(),
+	 * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did.
+	 */
+	ip6 = mtod(m, struct ip6_hdr *);
+	/* Check ip6_dst matches configured prefix */
+	if (bcmp(&ip6->ip6_dst, &cfg->prefix6, cfg->plen6 / 8) != 0)
+		return (NAT64SKIP);
+
+	if (cfg->flags & NAT64_LOG) {
+		logdata = &loghdr;
+		nat64stl_log(logdata, m, AF_INET6, cfg->no.kidx);
+	} else
+		logdata = NULL;
+	return (nat64_do_handle_ip6(m, aaddr, 0, &cfg->stats, logdata));
+}
+
+static int
+nat64stl_handle_icmp6(struct ip_fw_chain *chain, struct nat64stl_cfg *cfg,
+    struct mbuf *m)
+{
+	struct pfloghdr loghdr, *logdata;
+	nat64_stats_block *stats;
+	struct ip6_hdr *ip6i;
+	struct icmp6_hdr *icmp6;
+	uint32_t tablearg;
+	int hlen, proto;
+
+	hlen = 0;
+	stats = &cfg->stats;
+	proto = nat64_getlasthdr(m, &hlen);
+	if (proto != IPPROTO_ICMPV6) {
+		NAT64STAT_INC(stats, dropped);
+		return (NAT64MFREE);
+	}
+	icmp6 = mtodo(m, hlen);
+	switch (icmp6->icmp6_type) {
+	case ICMP6_DST_UNREACH:
+	case ICMP6_PACKET_TOO_BIG:
+	case ICMP6_TIME_EXCEED_TRANSIT:
+	case ICMP6_PARAM_PROB:
+		break;
+	default:
+		NAT64STAT_INC(stats, dropped);
+		return (NAT64MFREE);
+	}
+	hlen += sizeof(struct icmp6_hdr);
+	if (m->m_pkthdr.len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) {
+		NAT64STAT_INC(stats, dropped);
+		return (NAT64MFREE);
+	}
+	if (m->m_len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN)
+		m = m_pullup(m, hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN);
+	if (m == NULL) {
+		NAT64STAT_INC(stats, nomem);
+		return (NAT64RETURN);
+	}
+	/*
+	 * Use destination address from inner IPv6 header to determine
+	 * IPv4 mapped address.
+	 */
+	ip6i = mtodo(m, hlen);
+	if (ipfw_lookup_table_extended(chain, cfg->map64,
+	    sizeof(struct in6_addr), &ip6i->ip6_dst, &tablearg) == 0) {
+		m_freem(m);
+		return (NAT64RETURN);
+	}
+	if (cfg->flags & NAT64_LOG) {
+		logdata = &loghdr;
+		nat64stl_log(logdata, m, AF_INET6, cfg->no.kidx);
+	} else
+		logdata = NULL;
+	return (nat64_handle_icmp6(m, 0,
+	    htonl(TARG_VAL(chain, tablearg, nh4)), 0, stats, logdata));
+}
+
+int
+ipfw_nat64stl(struct ip_fw_chain *chain, struct ip_fw_args *args,
+    ipfw_insn *cmd, int *done)
+{
+	ipfw_insn *icmd;
+	struct nat64stl_cfg *cfg;
+	uint32_t tablearg;
+	int ret;
+
+	IPFW_RLOCK_ASSERT(chain);
+
+	*done = 0; /* try next rule if not matched */
+	icmd = cmd + 1;
+	if (cmd->opcode != O_EXTERNAL_ACTION ||
+	    cmd->arg1 != V_nat64stl_eid ||
+	    icmd->opcode != O_EXTERNAL_INSTANCE ||
+	    (cfg = NAT64_LOOKUP(chain, icmd)) == NULL)
+		return (0);
+
+	switch (args->f_id.addr_type) {
+	case 4:
+		ret = ipfw_lookup_table(chain, cfg->map46,
+		    htonl(args->f_id.dst_ip), &tablearg);
+		break;
+	case 6:
+		ret = ipfw_lookup_table_extended(chain, cfg->map64,
+		    sizeof(struct in6_addr), &args->f_id.src_ip6, &tablearg);
+		break;
+	default:
+		return (0);
+	}
+	if (ret == 0) {
+		/*
+		 * In case when packet is ICMPv6 message from an intermediate
+		 * router, the source address of message will not match the
+		 * addresses from our map64 table.
+		 */
+		if (args->f_id.proto != IPPROTO_ICMPV6)
+			return (0);
+
+		ret = nat64stl_handle_icmp6(chain, cfg, args->m);
+	} else {
+		if (args->f_id.addr_type == 4)
+			ret = nat64stl_handle_ip4(chain, cfg, args->m,
+			    tablearg);
+		else
+			ret = nat64stl_handle_ip6(chain, cfg, args->m,
+			    tablearg);
+	}
+	if (ret == NAT64SKIP)
+		return (0);
+
+	*done = 1; /* terminate the search */
+	if (ret == NAT64MFREE)
+		m_freem(args->m);
+	args->m = NULL;
+	return (IP_FW_DENY);
+}
+
+
diff --git a/freebsd/sys/netpfil/ipfw/nat64/nat64stl.h b/freebsd/sys/netpfil/ipfw/nat64/nat64stl.h
new file mode 100644
index 00000000..42ec20ea
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/nat64/nat64stl.h
@@ -0,0 +1,58 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_IP_FW_NAT64STL_H_
+#define	_IP_FW_NAT64STL_H_
+
+struct nat64stl_cfg {
+	struct named_object	no;
+
+	uint16_t		map64;	/* table with 6to4 mapping */
+	uint16_t		map46;	/* table with 4to6 mapping */
+
+	struct in6_addr		prefix6;/* IPv6 prefix */
+	uint8_t			plen6;	/* prefix length */
+	uint8_t			flags;	/* flags for internal use */
+#define	NAT64STL_KIDX		0x0100
+#define	NAT64STL_46T		0x0200
+#define	NAT64STL_64T		0x0400
+#define	NAT64STL_FLAGSMASK	(NAT64_LOG) /* flags to pass to userland */
+	char			name[64];
+	nat64_stats_block	stats;
+};
+
+VNET_DECLARE(uint16_t, nat64stl_eid);
+#define	V_nat64stl_eid	VNET(nat64stl_eid)
+#define	IPFW_TLV_NAT64STL_NAME	IPFW_TLV_EACTION_NAME(V_nat64stl_eid)
+
+int ipfw_nat64stl(struct ip_fw_chain *chain, struct ip_fw_args *args,
+    ipfw_insn *cmd, int *done);
+
+#endif
+
diff --git a/freebsd/sys/netpfil/ipfw/nat64/nat64stl_control.c b/freebsd/sys/netpfil/ipfw/nat64/nat64stl_control.c
new file mode 100644
index 00000000..6ee04867
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/nat64/nat64stl_control.c
@@ -0,0 +1,623 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <rtems/bsd/sys/errno.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/rmlock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/sockopt.h>
+#include <sys/queue.h>
+#include <sys/syslog.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/pfil.h>
+#include <net/route.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_var.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nat64/ip_fw_nat64.h>
+#include <netpfil/ipfw/nat64/nat64stl.h>
+#include <netinet6/ip_fw_nat64.h>
+
+VNET_DEFINE(uint16_t, nat64stl_eid) = 0;
+
+static struct nat64stl_cfg *nat64stl_alloc_config(const char *name, uint8_t set);
+static void nat64stl_free_config(struct nat64stl_cfg *cfg);
+static struct nat64stl_cfg *nat64stl_find(struct namedobj_instance *ni,
+    const char *name, uint8_t set);
+
+static struct nat64stl_cfg *
+nat64stl_alloc_config(const char *name, uint8_t set)
+{
+	struct nat64stl_cfg *cfg;
+
+	cfg = malloc(sizeof(struct nat64stl_cfg), M_IPFW, M_WAITOK | M_ZERO);
+	COUNTER_ARRAY_ALLOC(cfg->stats.stats, NAT64STATS, M_WAITOK);
+	cfg->no.name = cfg->name;
+	cfg->no.etlv = IPFW_TLV_NAT64STL_NAME;
+	cfg->no.set = set;
+	strlcpy(cfg->name, name, sizeof(cfg->name));
+	return (cfg);
+}
+
+static void
+nat64stl_free_config(struct nat64stl_cfg *cfg)
+{
+
+	COUNTER_ARRAY_FREE(cfg->stats.stats, NAT64STATS);
+	free(cfg, M_IPFW);
+}
+
+static void
+nat64stl_export_config(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg,
+    ipfw_nat64stl_cfg *uc)
+{
+	struct named_object *no;
+
+	uc->prefix6 = cfg->prefix6;
+	uc->plen6 = cfg->plen6;
+	uc->flags = cfg->flags & NAT64STL_FLAGSMASK;
+	uc->set = cfg->no.set;
+	strlcpy(uc->name, cfg->no.name, sizeof(uc->name));
+
+	no = ipfw_objhash_lookup_table_kidx(ch, cfg->map64);
+	ipfw_export_obj_ntlv(no, &uc->ntlv6);
+	no = ipfw_objhash_lookup_table_kidx(ch, cfg->map46);
+	ipfw_export_obj_ntlv(no, &uc->ntlv4);
+}
+
+struct nat64stl_dump_arg {
+	struct ip_fw_chain *ch;
+	struct sockopt_data *sd;
+};
+
+static int
+export_config_cb(struct namedobj_instance *ni, struct named_object *no,
+    void *arg)
+{
+	struct nat64stl_dump_arg *da = (struct nat64stl_dump_arg *)arg;
+	ipfw_nat64stl_cfg *uc;
+
+	uc = (ipfw_nat64stl_cfg *)ipfw_get_sopt_space(da->sd, sizeof(*uc));
+	nat64stl_export_config(da->ch, (struct nat64stl_cfg *)no, uc);
+	return (0);
+}
+
+static struct nat64stl_cfg *
+nat64stl_find(struct namedobj_instance *ni, const char *name, uint8_t set)
+{
+	struct nat64stl_cfg *cfg;
+
+	cfg = (struct nat64stl_cfg *)ipfw_objhash_lookup_name_type(ni, set,
+	    IPFW_TLV_NAT64STL_NAME, name);
+
+	return (cfg);
+}
+
+
+static int
+nat64stl_create_internal(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg,
+    ipfw_nat64stl_cfg *i)
+{
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+
+	if (ipfw_objhash_alloc_idx(CHAIN_TO_SRV(ch), &cfg->no.kidx) != 0)
+		return (ENOSPC);
+	cfg->flags |= NAT64STL_KIDX;
+
+	if (ipfw_ref_table(ch, &i->ntlv4, &cfg->map46) != 0)
+		return (EINVAL);
+	cfg->flags |= NAT64STL_46T;
+
+	if (ipfw_ref_table(ch, &i->ntlv6, &cfg->map64) != 0)
+		return (EINVAL);
+	cfg->flags |= NAT64STL_64T;
+
+	ipfw_objhash_add(CHAIN_TO_SRV(ch), &cfg->no);
+
+	return (0);
+}
+
+/*
+ * Creates new nat64 instance.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ipfw_nat64stl_cfg ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64stl_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	ipfw_obj_lheader *olh;
+	ipfw_nat64stl_cfg *uc;
+	struct namedobj_instance *ni;
+	struct nat64stl_cfg *cfg;
+	int error;
+
+	if (sd->valsize != sizeof(*olh) + sizeof(*uc))
+		return (EINVAL);
+
+	olh = (ipfw_obj_lheader *)sd->kbuf;
+	uc = (ipfw_nat64stl_cfg *)(olh + 1);
+
+	if (ipfw_check_object_name_generic(uc->name) != 0)
+		return (EINVAL);
+	if (!IN6_IS_ADDR_WKPFX(&uc->prefix6))
+		return (EINVAL);
+	if (uc->plen6 != 96 || uc->set >= IPFW_MAX_SETS)
+		return (EINVAL);
+
+	/* XXX: check types of tables */
+
+	ni = CHAIN_TO_SRV(ch);
+	error = 0;
+
+	IPFW_UH_RLOCK(ch);
+	if (nat64stl_find(ni, uc->name, uc->set) != NULL) {
+		IPFW_UH_RUNLOCK(ch);
+		return (EEXIST);
+	}
+	IPFW_UH_RUNLOCK(ch);
+
+	cfg = nat64stl_alloc_config(uc->name, uc->set);
+	cfg->prefix6 = uc->prefix6;
+	cfg->plen6 = uc->plen6;
+	cfg->flags = uc->flags & NAT64STL_FLAGSMASK;
+
+	IPFW_UH_WLOCK(ch);
+
+	if (nat64stl_find(ni, uc->name, uc->set) != NULL) {
+		IPFW_UH_WUNLOCK(ch);
+		nat64stl_free_config(cfg);
+		return (EEXIST);
+	}
+	error = nat64stl_create_internal(ch, cfg, uc);
+	if (error == 0) {
+		/* Okay, let's link data */
+		IPFW_WLOCK(ch);
+		SRV_OBJECT(ch, cfg->no.kidx) = cfg;
+		IPFW_WUNLOCK(ch);
+
+		IPFW_UH_WUNLOCK(ch);
+		return (0);
+	}
+
+	if (cfg->flags & NAT64STL_KIDX)
+		ipfw_objhash_free_idx(ni, cfg->no.kidx);
+	if (cfg->flags & NAT64STL_46T)
+		ipfw_unref_table(ch, cfg->map46);
+	if (cfg->flags & NAT64STL_64T)
+		ipfw_unref_table(ch, cfg->map64);
+
+	IPFW_UH_WUNLOCK(ch);
+	nat64stl_free_config(cfg);
+	return (error);
+}
+
+/*
+ * Change existing nat64stl instance configuration.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_nat64stl_cfg ]
+ * Reply: [ ipfw_obj_header ipfw_nat64stl_cfg ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64stl_config(struct ip_fw_chain *ch, ip_fw3_opheader *op,
+    struct sockopt_data *sd)
+{
+	ipfw_obj_header *oh;
+	ipfw_nat64stl_cfg *uc;
+	struct nat64stl_cfg *cfg;
+	struct namedobj_instance *ni;
+
+	if (sd->valsize != sizeof(*oh) + sizeof(*uc))
+		return (EINVAL);
+
+	oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd,
+	    sizeof(*oh) + sizeof(*uc));
+	uc = (ipfw_nat64stl_cfg *)(oh + 1);
+
+	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
+	    oh->ntlv.set >= IPFW_MAX_SETS)
+		return (EINVAL);
+
+	ni = CHAIN_TO_SRV(ch);
+	if (sd->sopt->sopt_dir == SOPT_GET) {
+		IPFW_UH_RLOCK(ch);
+		cfg = nat64stl_find(ni, oh->ntlv.name, oh->ntlv.set);
+		if (cfg == NULL) {
+			IPFW_UH_RUNLOCK(ch);
+			return (EEXIST);
+		}
+		nat64stl_export_config(ch, cfg, uc);
+		IPFW_UH_RUNLOCK(ch);
+		return (0);
+	}
+
+	IPFW_UH_WLOCK(ch);
+	cfg = nat64stl_find(ni, oh->ntlv.name, oh->ntlv.set);
+	if (cfg == NULL) {
+		IPFW_UH_WUNLOCK(ch);
+		return (EEXIST);
+	}
+
+	/*
+	 * For now allow to change only following values:
+	 *  flags.
+	 */
+
+	cfg->flags = uc->flags & NAT64STL_FLAGSMASK;
+	IPFW_UH_WUNLOCK(ch);
+	return (0);
+}
+
+static void
+nat64stl_detach_config(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg)
+{
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+
+	ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no);
+	ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx);
+	ipfw_unref_table(ch, cfg->map46);
+	ipfw_unref_table(ch, cfg->map64);
+}
+
+/*
+ * Destroys nat64 instance.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64stl_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	ipfw_obj_header *oh;
+	struct nat64stl_cfg *cfg;
+
+	if (sd->valsize != sizeof(*oh))
+		return (EINVAL);
+
+	oh = (ipfw_obj_header *)sd->kbuf;
+	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0)
+		return (EINVAL);
+
+	IPFW_UH_WLOCK(ch);
+	cfg = nat64stl_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+	if (cfg == NULL) {
+		IPFW_UH_WUNLOCK(ch);
+		return (ESRCH);
+	}
+	if (cfg->no.refcnt > 0) {
+		IPFW_UH_WUNLOCK(ch);
+		return (EBUSY);
+	}
+
+	IPFW_WLOCK(ch);
+	SRV_OBJECT(ch, cfg->no.kidx) = NULL;
+	IPFW_WUNLOCK(ch);
+
+	nat64stl_detach_config(ch, cfg);
+	IPFW_UH_WUNLOCK(ch);
+
+	nat64stl_free_config(cfg);
+	return (0);
+}
+
+/*
+ * Lists all nat64stl instances currently available in kernel.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ]
+ * Reply: [ ipfw_obj_lheader ipfw_nat64stl_cfg x N ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64stl_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	ipfw_obj_lheader *olh;
+	struct nat64stl_dump_arg da;
+
+	/* Check minimum header size */
+	if (sd->valsize < sizeof(ipfw_obj_lheader))
+		return (EINVAL);
+
+	olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh));
+
+	IPFW_UH_RLOCK(ch);
+	olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch),
+	    IPFW_TLV_NAT64STL_NAME);
+	olh->objsize = sizeof(ipfw_nat64stl_cfg);
+	olh->size = sizeof(*olh) + olh->count * olh->objsize;
+
+	if (sd->valsize < olh->size) {
+		IPFW_UH_RUNLOCK(ch);
+		return (ENOMEM);
+	}
+	memset(&da, 0, sizeof(da));
+	da.ch = ch;
+	da.sd = sd;
+	ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb,
+	    &da, IPFW_TLV_NAT64STL_NAME);
+	IPFW_UH_RUNLOCK(ch);
+
+	return (0);
+}
+
+#define	__COPY_STAT_FIELD(_cfg, _stats, _field)	\
+	(_stats)->_field = NAT64STAT_FETCH(&(_cfg)->stats, _field)
+static void
+export_stats(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg,
+    struct ipfw_nat64stl_stats *stats)
+{
+
+	__COPY_STAT_FIELD(cfg, stats, opcnt64);
+	__COPY_STAT_FIELD(cfg, stats, opcnt46);
+	__COPY_STAT_FIELD(cfg, stats, ofrags);
+	__COPY_STAT_FIELD(cfg, stats, ifrags);
+	__COPY_STAT_FIELD(cfg, stats, oerrors);
+	__COPY_STAT_FIELD(cfg, stats, noroute4);
+	__COPY_STAT_FIELD(cfg, stats, noroute6);
+	__COPY_STAT_FIELD(cfg, stats, noproto);
+	__COPY_STAT_FIELD(cfg, stats, nomem);
+	__COPY_STAT_FIELD(cfg, stats, dropped);
+}
+
+/*
+ * Get nat64stl statistics.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ * Reply: [ ipfw_obj_header ipfw_obj_ctlv [ uint64_t x N ]]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64stl_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
+    struct sockopt_data *sd)
+{
+	struct ipfw_nat64stl_stats stats;
+	struct nat64stl_cfg *cfg;
+	ipfw_obj_header *oh;
+	ipfw_obj_ctlv *ctlv;
+	size_t sz;
+
+	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats);
+	if (sd->valsize % sizeof(uint64_t))
+		return (EINVAL);
+	if (sd->valsize < sz)
+		return (ENOMEM);
+	oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
+	if (oh == NULL)
+		return (EINVAL);
+	memset(&stats, 0, sizeof(stats));
+
+	IPFW_UH_RLOCK(ch);
+	cfg = nat64stl_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+	if (cfg == NULL) {
+		IPFW_UH_RUNLOCK(ch);
+		return (ESRCH);
+	}
+	export_stats(ch, cfg, &stats);
+	IPFW_UH_RUNLOCK(ch);
+
+	ctlv = (ipfw_obj_ctlv *)(oh + 1);
+	memset(ctlv, 0, sizeof(*ctlv));
+	ctlv->head.type = IPFW_TLV_COUNTERS;
+	ctlv->head.length = sz - sizeof(ipfw_obj_header);
+	ctlv->count = sizeof(stats) / sizeof(uint64_t);
+	ctlv->objsize = sizeof(uint64_t);
+	ctlv->version = IPFW_NAT64_VERSION;
+	memcpy(ctlv + 1, &stats, sizeof(stats));
+	return (0);
+}
+
+/*
+ * Reset nat64stl statistics.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64stl_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
+    struct sockopt_data *sd)
+{
+	struct nat64stl_cfg *cfg;
+	ipfw_obj_header *oh;
+
+	if (sd->valsize != sizeof(*oh))
+		return (EINVAL);
+	oh = (ipfw_obj_header *)sd->kbuf;
+	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
+	    oh->ntlv.set >= IPFW_MAX_SETS)
+		return (EINVAL);
+
+	IPFW_UH_WLOCK(ch);
+	cfg = nat64stl_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+	if (cfg == NULL) {
+		IPFW_UH_WUNLOCK(ch);
+		return (ESRCH);
+	}
+	COUNTER_ARRAY_ZERO(cfg->stats.stats, NAT64STATS);
+	IPFW_UH_WUNLOCK(ch);
+	return (0);
+}
+
+static struct ipfw_sopt_handler	scodes[] = {
+
+	{ IP_FW_NAT64STL_CREATE, 0,	HDIR_SET,	nat64stl_create },
+	{ IP_FW_NAT64STL_DESTROY,0,	HDIR_SET,	nat64stl_destroy },
+	{ IP_FW_NAT64STL_CONFIG, 0,	HDIR_BOTH,	nat64stl_config },
+	{ IP_FW_NAT64STL_LIST,   0,	HDIR_GET,	nat64stl_list },
+	{ IP_FW_NAT64STL_STATS,  0,	HDIR_GET,	nat64stl_stats },
+	{ IP_FW_NAT64STL_RESET_STATS,0,	HDIR_SET,	nat64stl_reset_stats },
+};
+
+static int
+nat64stl_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
+{
+	ipfw_insn *icmd;
+
+	icmd = cmd - 1;
+	if (icmd->opcode != O_EXTERNAL_ACTION ||
+	    icmd->arg1 != V_nat64stl_eid)
+		return (1);
+
+	*puidx = cmd->arg1;
+	*ptype = 0;
+	return (0);
+}
+
+static void
+nat64stl_update_arg1(ipfw_insn *cmd, uint16_t idx)
+{
+
+	cmd->arg1 = idx;
+}
+
+static int
+nat64stl_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
+    struct named_object **pno)
+{
+	int err;
+
+	err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti,
+	    IPFW_TLV_NAT64STL_NAME, pno);
+	return (err);
+}
+
+static struct named_object *
+nat64stl_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
+{
+	struct namedobj_instance *ni;
+	struct named_object *no;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+	ni = CHAIN_TO_SRV(ch);
+	no = ipfw_objhash_lookup_kidx(ni, idx);
+	KASSERT(no != NULL, ("NAT with index %d not found", idx));
+
+	return (no);
+}
+
+static int
+nat64stl_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
+    enum ipfw_sets_cmd cmd)
+{
+
+	return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64STL_NAME,
+	    set, new_set, cmd));
+}
+
+static struct opcode_obj_rewrite opcodes[] = {
+	{
+		.opcode = O_EXTERNAL_INSTANCE,
+		.etlv = IPFW_TLV_EACTION /* just show it isn't table */,
+		.classifier = nat64stl_classify,
+		.update = nat64stl_update_arg1,
+		.find_byname = nat64stl_findbyname,
+		.find_bykidx = nat64stl_findbykidx,
+		.manage_sets = nat64stl_manage_sets,
+	},
+};
+
+static int
+destroy_config_cb(struct namedobj_instance *ni, struct named_object *no,
+    void *arg)
+{
+	struct nat64stl_cfg *cfg;
+	struct ip_fw_chain *ch;
+
+	ch = (struct ip_fw_chain *)arg;
+	cfg = (struct nat64stl_cfg *)SRV_OBJECT(ch, no->kidx);
+	SRV_OBJECT(ch, no->kidx) = NULL;
+	nat64stl_detach_config(ch, cfg);
+	nat64stl_free_config(cfg);
+	return (0);
+}
+
+int
+nat64stl_init(struct ip_fw_chain *ch, int first)
+{
+
+	V_nat64stl_eid = ipfw_add_eaction(ch, ipfw_nat64stl, "nat64stl");
+	if (V_nat64stl_eid == 0)
+		return (ENXIO);
+	IPFW_ADD_SOPT_HANDLER(first, scodes);
+	IPFW_ADD_OBJ_REWRITER(first, opcodes);
+	return (0);
+}
+
+void
+nat64stl_uninit(struct ip_fw_chain *ch, int last)
+{
+
+	IPFW_DEL_OBJ_REWRITER(last, opcodes);
+	IPFW_DEL_SOPT_HANDLER(last, scodes);
+	ipfw_del_eaction(ch, V_nat64stl_eid);
+	/*
+	 * Since we already have deregistered external action,
+	 * our named objects become unaccessible via rules, because
+	 * all rules were truncated by ipfw_del_eaction().
+	 * So, we can unlink and destroy our named objects without holding
+	 * IPFW_WLOCK().
+	 */
+	IPFW_UH_WLOCK(ch);
+	ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch,
+	    IPFW_TLV_NAT64STL_NAME);
+	V_nat64stl_eid = 0;
+	IPFW_UH_WUNLOCK(ch);
+}
+
diff --git a/freebsd/sys/netpfil/ipfw/nptv6/ip_fw_nptv6.c b/freebsd/sys/netpfil/ipfw/nptv6/ip_fw_nptv6.c
new file mode 100644
index 00000000..92a2c7a3
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/nptv6/ip_fw_nptv6.c
@@ -0,0 +1,101 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2016 Yandex LLC
+ * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+
+#include <net/if.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nptv6/nptv6.h>
+
+static int
+vnet_ipfw_nptv6_init(const void *arg __unused)
+{
+
+	return (nptv6_init(&V_layer3_chain, IS_DEFAULT_VNET(curvnet)));
+}
+
+static int
+vnet_ipfw_nptv6_uninit(const void *arg __unused)
+{
+
+	nptv6_uninit(&V_layer3_chain, IS_DEFAULT_VNET(curvnet));
+	return (0);
+}
+
+static int
+ipfw_nptv6_modevent(module_t mod, int type, void *unused)
+{
+
+	switch (type) {
+	case MOD_LOAD:
+	case MOD_UNLOAD:
+		break;
+	default:
+		return (EOPNOTSUPP);
+	}
+	return (0);
+}
+
+static moduledata_t ipfw_nptv6_mod = {
+	"ipfw_nptv6",
+	ipfw_nptv6_modevent,
+	0
+};
+
+/* Define startup order. */
+#define	IPFW_NPTV6_SI_SUB_FIREWALL	SI_SUB_PROTO_IFATTACHDOMAIN
+#define	IPFW_NPTV6_MODEVENT_ORDER	(SI_ORDER_ANY - 128) /* after ipfw */
+#define	IPFW_NPTV6_MODULE_ORDER		(IPFW_NPTV6_MODEVENT_ORDER + 1)
+#define	IPFW_NPTV6_VNET_ORDER		(IPFW_NPTV6_MODEVENT_ORDER + 2)
+
+DECLARE_MODULE(ipfw_nptv6, ipfw_nptv6_mod, IPFW_NPTV6_SI_SUB_FIREWALL,
+    IPFW_NPTV6_MODULE_ORDER);
+MODULE_DEPEND(ipfw_nptv6, ipfw, 3, 3, 3);
+MODULE_VERSION(ipfw_nptv6, 1);
+
+VNET_SYSINIT(vnet_ipfw_nptv6_init, IPFW_NPTV6_SI_SUB_FIREWALL,
+    IPFW_NPTV6_VNET_ORDER, vnet_ipfw_nptv6_init, NULL);
+VNET_SYSUNINIT(vnet_ipfw_nptv6_uninit, IPFW_NPTV6_SI_SUB_FIREWALL,
+    IPFW_NPTV6_VNET_ORDER, vnet_ipfw_nptv6_uninit, NULL);
diff --git a/freebsd/sys/netpfil/ipfw/nptv6/nptv6.c b/freebsd/sys/netpfil/ipfw/nptv6/nptv6.c
new file mode 100644
index 00000000..4256d028
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/nptv6/nptv6.c
@@ -0,0 +1,894 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2016 Yandex LLC
+ * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <rtems/bsd/sys/errno.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/rmlock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+#include <sys/syslog.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/netisr.h>
+#include <net/pfil.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_var.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nptv6/nptv6.h>
+
+static VNET_DEFINE(uint16_t, nptv6_eid) = 0;
+#define	V_nptv6_eid	VNET(nptv6_eid)
+#define	IPFW_TLV_NPTV6_NAME	IPFW_TLV_EACTION_NAME(V_nptv6_eid)
+
+static struct nptv6_cfg *nptv6_alloc_config(const char *name, uint8_t set);
+static void nptv6_free_config(struct nptv6_cfg *cfg);
+static struct nptv6_cfg *nptv6_find(struct namedobj_instance *ni,
+    const char *name, uint8_t set);
+static int nptv6_rewrite_internal(struct nptv6_cfg *cfg, struct mbuf **mp,
+    int offset);
+static int nptv6_rewrite_external(struct nptv6_cfg *cfg, struct mbuf **mp,
+    int offset);
+
+#define	NPTV6_LOOKUP(chain, cmd)	\
+    (struct nptv6_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
+
+#ifndef IN6_MASK_ADDR
+#define IN6_MASK_ADDR(a, m)	do { \
+	(a)->s6_addr32[0] &= (m)->s6_addr32[0]; \
+	(a)->s6_addr32[1] &= (m)->s6_addr32[1]; \
+	(a)->s6_addr32[2] &= (m)->s6_addr32[2]; \
+	(a)->s6_addr32[3] &= (m)->s6_addr32[3]; \
+} while (0)
+#endif
+#ifndef IN6_ARE_MASKED_ADDR_EQUAL
+#define IN6_ARE_MASKED_ADDR_EQUAL(d, a, m)	(	\
+	(((d)->s6_addr32[0] ^ (a)->s6_addr32[0]) & (m)->s6_addr32[0]) == 0 && \
+	(((d)->s6_addr32[1] ^ (a)->s6_addr32[1]) & (m)->s6_addr32[1]) == 0 && \
+	(((d)->s6_addr32[2] ^ (a)->s6_addr32[2]) & (m)->s6_addr32[2]) == 0 && \
+	(((d)->s6_addr32[3] ^ (a)->s6_addr32[3]) & (m)->s6_addr32[3]) == 0 )
+#endif
+
+#if 0
+#define	NPTV6_DEBUG(fmt, ...)	do {			\
+	printf("%s: " fmt "\n", __func__, ## __VA_ARGS__);	\
+} while (0)
+#define	NPTV6_IPDEBUG(fmt, ...)	do {			\
+	char _s[INET6_ADDRSTRLEN], _d[INET6_ADDRSTRLEN];	\
+	printf("%s: " fmt "\n", __func__, ## __VA_ARGS__);	\
+} while (0)
+#else
+#define	NPTV6_DEBUG(fmt, ...)
+#define	NPTV6_IPDEBUG(fmt, ...)
+#endif
+
+static int
+nptv6_getlasthdr(struct nptv6_cfg *cfg, struct mbuf *m, int *offset)
+{
+	struct ip6_hdr *ip6;
+	struct ip6_hbh *hbh;
+	int proto, hlen;
+
+	hlen = (offset == NULL) ? 0: *offset;
+	if (m->m_len < hlen)
+		return (-1);
+	ip6 = mtodo(m, hlen);
+	hlen += sizeof(*ip6);
+	proto = ip6->ip6_nxt;
+	while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING ||
+	    proto == IPPROTO_DSTOPTS) {
+		hbh = mtodo(m, hlen);
+		if (m->m_len < hlen)
+			return (-1);
+		proto = hbh->ip6h_nxt;
+		hlen += hbh->ip6h_len << 3;
+	}
+	if (offset != NULL)
+		*offset = hlen;
+	return (proto);
+}
+
+static int
+nptv6_translate_icmpv6(struct nptv6_cfg *cfg, struct mbuf **mp, int offset)
+{
+	struct icmp6_hdr *icmp6;
+	struct ip6_hdr *ip6;
+	struct mbuf *m;
+
+	m = *mp;
+	if (offset > m->m_len)
+		return (-1);
+	icmp6 = mtodo(m, offset);
+	NPTV6_DEBUG("ICMPv6 type %d", icmp6->icmp6_type);
+	switch (icmp6->icmp6_type) {
+	case ICMP6_DST_UNREACH:
+	case ICMP6_PACKET_TOO_BIG:
+	case ICMP6_TIME_EXCEEDED:
+	case ICMP6_PARAM_PROB:
+		break;
+	case ICMP6_ECHO_REQUEST:
+	case ICMP6_ECHO_REPLY:
+		/* nothing to translate */
+		return (0);
+	default:
+		/*
+		 * XXX: We can add some checks to not translate NDP and MLD
+		 * messages. Currently user must explicitly allow these message
+		 * types, otherwise packets will be dropped.
+		 */
+		return (-1);
+	}
+	offset += sizeof(*icmp6);
+	if (offset + sizeof(*ip6) > m->m_pkthdr.len)
+		return (-1);
+	if (offset + sizeof(*ip6) > m->m_len)
+		*mp = m = m_pullup(m, offset + sizeof(*ip6));
+	if (m == NULL)
+		return (-1);
+	ip6 = mtodo(m, offset);
+	NPTV6_IPDEBUG("offset %d, %s -> %s %d", offset,
+	    inet_ntop(AF_INET6, &ip6->ip6_src, _s, sizeof(_s)),
+	    inet_ntop(AF_INET6, &ip6->ip6_dst, _d, sizeof(_d)),
+	    ip6->ip6_nxt);
+	if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_src,
+	    &cfg->external, &cfg->mask))
+		return (nptv6_rewrite_external(cfg, mp, offset));
+	else if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_dst,
+	    &cfg->internal, &cfg->mask))
+		return (nptv6_rewrite_internal(cfg, mp, offset));
+	/*
+	 * Addresses in the inner IPv6 header doesn't matched to
+	 * our prefixes.
+	 */
+	return (-1);
+}
+
+static int
+nptv6_search_index(struct nptv6_cfg *cfg, struct in6_addr *a)
+{
+	int idx;
+
+	if (cfg->flags & NPTV6_48PLEN)
+		return (3);
+
+	/* Search suitable word index for adjustment */
+	for (idx = 4; idx < 8; idx++)
+		if (a->s6_addr16[idx] != 0xffff)
+			break;
+	/*
+	 * RFC 6296 p3.7: If an NPTv6 Translator discovers a datagram with
+	 * an IID of all-zeros while performing address mapping, that
+	 * datagram MUST be dropped, and an ICMPv6 Parameter Problem error
+	 * SHOULD be generated.
+	 */
+	if (idx == 8 ||
+	    (a->s6_addr32[2] == 0 && a->s6_addr32[3] == 0))
+		return (-1);
+	return (idx);
+}
+
+static void
+nptv6_copy_addr(struct in6_addr *src, struct in6_addr *dst,
+    struct in6_addr *mask)
+{
+	int i;
+
+	for (i = 0; i < 8 && mask->s6_addr8[i] != 0; i++) {
+		dst->s6_addr8[i] &=  ~mask->s6_addr8[i];
+		dst->s6_addr8[i] |= src->s6_addr8[i] & mask->s6_addr8[i];
+	}
+}
+
+static int
+nptv6_rewrite_internal(struct nptv6_cfg *cfg, struct mbuf **mp, int offset)
+{
+	struct in6_addr *addr;
+	struct ip6_hdr *ip6;
+	int idx, proto;
+	uint16_t adj;
+
+	ip6 = mtodo(*mp, offset);
+	NPTV6_IPDEBUG("offset %d, %s -> %s %d", offset,
+	    inet_ntop(AF_INET6, &ip6->ip6_src, _s, sizeof(_s)),
+	    inet_ntop(AF_INET6, &ip6->ip6_dst, _d, sizeof(_d)),
+	    ip6->ip6_nxt);
+	if (offset == 0)
+		addr = &ip6->ip6_src;
+	else {
+		/*
+		 * When we rewriting inner IPv6 header, we need to rewrite
+		 * destination address back to external prefix. The datagram in
+		 * the ICMPv6 payload should looks like it was send from
+		 * external prefix.
+		 */
+		addr = &ip6->ip6_dst;
+	}
+	idx = nptv6_search_index(cfg, addr);
+	if (idx < 0) {
+		/*
+		 * Do not send ICMPv6 error when offset isn't zero.
+		 * This means we are rewriting inner IPv6 header in the
+		 * ICMPv6 error message.
+		 */
+		if (offset == 0) {
+			icmp6_error2(*mp, ICMP6_DST_UNREACH,
+			    ICMP6_DST_UNREACH_ADDR, 0, (*mp)->m_pkthdr.rcvif);
+			*mp = NULL;
+		}
+		return (IP_FW_DENY);
+	}
+	adj = addr->s6_addr16[idx];
+	nptv6_copy_addr(&cfg->external, addr, &cfg->mask);
+	adj = cksum_add(adj, cfg->adjustment);
+	if (adj == 0xffff)
+		adj = 0;
+	addr->s6_addr16[idx] = adj;
+	if (offset == 0) {
+		/*
+		 * We may need to translate addresses in the inner IPv6
+		 * header for ICMPv6 error messages.
+		 */
+		proto = nptv6_getlasthdr(cfg, *mp, &offset);
+		if (proto < 0 || (proto == IPPROTO_ICMPV6 &&
+		    nptv6_translate_icmpv6(cfg, mp, offset) != 0))
+			return (IP_FW_DENY);
+		NPTV6STAT_INC(cfg, in2ex);
+	}
+	return (0);
+}
+
+static int
+nptv6_rewrite_external(struct nptv6_cfg *cfg, struct mbuf **mp, int offset)
+{
+	struct in6_addr *addr;
+	struct ip6_hdr *ip6;
+	int idx, proto;
+	uint16_t adj;
+
+	ip6 = mtodo(*mp, offset);
+	NPTV6_IPDEBUG("offset %d, %s -> %s %d", offset,
+	    inet_ntop(AF_INET6, &ip6->ip6_src, _s, sizeof(_s)),
+	    inet_ntop(AF_INET6, &ip6->ip6_dst, _d, sizeof(_d)),
+	    ip6->ip6_nxt);
+	if (offset == 0)
+		addr = &ip6->ip6_dst;
+	else {
+		/*
+		 * When we rewriting inner IPv6 header, we need to rewrite
+		 * source address back to internal prefix. The datagram in
+		 * the ICMPv6 payload should looks like it was send from
+		 * internal prefix.
+		 */
+		addr = &ip6->ip6_src;
+	}
+	idx = nptv6_search_index(cfg, addr);
+	if (idx < 0) {
+		/*
+		 * Do not send ICMPv6 error when offset isn't zero.
+		 * This means we are rewriting inner IPv6 header in the
+		 * ICMPv6 error message.
+		 */
+		if (offset == 0) {
+			icmp6_error2(*mp, ICMP6_DST_UNREACH,
+			    ICMP6_DST_UNREACH_ADDR, 0, (*mp)->m_pkthdr.rcvif);
+			*mp = NULL;
+		}
+		return (IP_FW_DENY);
+	}
+	adj = addr->s6_addr16[idx];
+	nptv6_copy_addr(&cfg->internal, addr, &cfg->mask);
+	adj = cksum_add(adj, ~cfg->adjustment);
+	if (adj == 0xffff)
+		adj = 0;
+	addr->s6_addr16[idx] = adj;
+	if (offset == 0) {
+		/*
+		 * We may need to translate addresses in the inner IPv6
+		 * header for ICMPv6 error messages.
+		 */
+		proto = nptv6_getlasthdr(cfg, *mp, &offset);
+		if (proto < 0 || (proto == IPPROTO_ICMPV6 &&
+		    nptv6_translate_icmpv6(cfg, mp, offset) != 0))
+			return (IP_FW_DENY);
+		NPTV6STAT_INC(cfg, ex2in);
+	}
+	return (0);
+}
+
+/*
+ * ipfw external action handler.
+ */
+static int
+ipfw_nptv6(struct ip_fw_chain *chain, struct ip_fw_args *args,
+    ipfw_insn *cmd, int *done)
+{
+	struct ip6_hdr *ip6;
+	struct nptv6_cfg *cfg;
+	ipfw_insn *icmd;
+	int ret;
+
+	*done = 0; /* try next rule if not matched */
+	icmd = cmd + 1;
+	if (cmd->opcode != O_EXTERNAL_ACTION ||
+	    cmd->arg1 != V_nptv6_eid ||
+	    icmd->opcode != O_EXTERNAL_INSTANCE ||
+	    (cfg = NPTV6_LOOKUP(chain, icmd)) == NULL)
+		return (0);
+	/*
+	 * We need act as router, so when forwarding is disabled -
+	 * do nothing.
+	 */
+	if (V_ip6_forwarding == 0 || args->f_id.addr_type != 6)
+		return (0);
+	/*
+	 * NOTE: we expect ipfw_chk() did m_pullup() up to upper level
+	 * protocol's headers. Also we skip some checks, that ip6_input(),
+	 * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did.
+	 */
+	ret = IP_FW_DENY;
+	ip6 = mtod(args->m, struct ip6_hdr *);
+	NPTV6_IPDEBUG("eid %u, oid %u, %s -> %s %d",
+	    cmd->arg1, icmd->arg1,
+	    inet_ntop(AF_INET6, &ip6->ip6_src, _s, sizeof(_s)),
+	    inet_ntop(AF_INET6, &ip6->ip6_dst, _d, sizeof(_d)),
+	    ip6->ip6_nxt);
+	if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_src,
+	    &cfg->internal, &cfg->mask)) {
+		/*
+		 * XXX: Do not translate packets when both src and dst
+		 * are from internal prefix.
+		 */
+		if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_dst,
+		    &cfg->internal, &cfg->mask))
+			return (0);
+		ret = nptv6_rewrite_internal(cfg, &args->m, 0);
+	} else if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_dst,
+	    &cfg->external, &cfg->mask))
+		ret = nptv6_rewrite_external(cfg, &args->m, 0);
+	else
+		return (0);
+	/*
+	 * If address wasn't rewrited - free mbuf.
+	 */
+	if (ret != 0) {
+		if (args->m != NULL) {
+			m_freem(args->m);
+			args->m = NULL; /* mark mbuf as consumed */
+		}
+		NPTV6STAT_INC(cfg, dropped);
+	}
+	/* Terminate the search if one_pass is set */
+	*done = V_fw_one_pass;
+	/* Update args->f_id when one_pass is off */
+	if (*done == 0 && ret == 0) {
+		ip6 = mtod(args->m, struct ip6_hdr *);
+		args->f_id.src_ip6 = ip6->ip6_src;
+		args->f_id.dst_ip6 = ip6->ip6_dst;
+	}
+	return (ret);
+}
+
+static struct nptv6_cfg *
+nptv6_alloc_config(const char *name, uint8_t set)
+{
+	struct nptv6_cfg *cfg;
+
+	cfg = malloc(sizeof(struct nptv6_cfg), M_IPFW, M_WAITOK | M_ZERO);
+	COUNTER_ARRAY_ALLOC(cfg->stats, NPTV6STATS, M_WAITOK);
+	cfg->no.name = cfg->name;
+	cfg->no.etlv = IPFW_TLV_NPTV6_NAME;
+	cfg->no.set = set;
+	strlcpy(cfg->name, name, sizeof(cfg->name));
+	return (cfg);
+}
+
+static void
+nptv6_free_config(struct nptv6_cfg *cfg)
+{
+
+	COUNTER_ARRAY_FREE(cfg->stats, NPTV6STATS);
+	free(cfg, M_IPFW);
+}
+
+static void
+nptv6_export_config(struct ip_fw_chain *ch, struct nptv6_cfg *cfg,
+    ipfw_nptv6_cfg *uc)
+{
+
+	uc->internal = cfg->internal;
+	uc->external = cfg->external;
+	uc->plen = cfg->plen;
+	uc->flags = cfg->flags & NPTV6_FLAGSMASK;
+	uc->set = cfg->no.set;
+	strlcpy(uc->name, cfg->no.name, sizeof(uc->name));
+}
+
+struct nptv6_dump_arg {
+	struct ip_fw_chain *ch;
+	struct sockopt_data *sd;
+};
+
+static int
+export_config_cb(struct namedobj_instance *ni, struct named_object *no,
+    void *arg)
+{
+	struct nptv6_dump_arg *da = (struct nptv6_dump_arg *)arg;
+	ipfw_nptv6_cfg *uc;
+
+	uc = (ipfw_nptv6_cfg *)ipfw_get_sopt_space(da->sd, sizeof(*uc));
+	nptv6_export_config(da->ch, (struct nptv6_cfg *)no, uc);
+	return (0);
+}
+
+static struct nptv6_cfg *
+nptv6_find(struct namedobj_instance *ni, const char *name, uint8_t set)
+{
+	struct nptv6_cfg *cfg;
+
+	cfg = (struct nptv6_cfg *)ipfw_objhash_lookup_name_type(ni, set,
+	    IPFW_TLV_NPTV6_NAME, name);
+
+	return (cfg);
+}
+
+static void
+nptv6_calculate_adjustment(struct nptv6_cfg *cfg)
+{
+	uint16_t i, e;
+	uint16_t *p;
+
+	/* Calculate checksum of internal prefix */
+	for (i = 0, p = (uint16_t *)&cfg->internal;
+	    p < (uint16_t *)(&cfg->internal + 1); p++)
+		i = cksum_add(i, *p);
+
+	/* Calculate checksum of external prefix */
+	for (e = 0, p = (uint16_t *)&cfg->external;
+	    p < (uint16_t *)(&cfg->external + 1); p++)
+		e = cksum_add(e, *p);
+
+	/* Adjustment value for Int->Ext direction */
+	cfg->adjustment = cksum_add(~e, i);
+}
+
+/*
+ * Creates new NPTv6 instance.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ipfw_nptv6_cfg ]
+ *
+ * Returns 0 on success
+ */
+static int
+nptv6_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	struct in6_addr mask;
+	ipfw_obj_lheader *olh;
+	ipfw_nptv6_cfg *uc;
+	struct namedobj_instance *ni;
+	struct nptv6_cfg *cfg;
+
+	if (sd->valsize != sizeof(*olh) + sizeof(*uc))
+		return (EINVAL);
+
+	olh = (ipfw_obj_lheader *)sd->kbuf;
+	uc = (ipfw_nptv6_cfg *)(olh + 1);
+	if (ipfw_check_object_name_generic(uc->name) != 0)
+		return (EINVAL);
+	if (uc->plen < 8 || uc->plen > 64 || uc->set >= IPFW_MAX_SETS)
+		return (EINVAL);
+	if (IN6_IS_ADDR_MULTICAST(&uc->internal) ||
+	    IN6_IS_ADDR_MULTICAST(&uc->external) ||
+	    IN6_IS_ADDR_UNSPECIFIED(&uc->internal) ||
+	    IN6_IS_ADDR_UNSPECIFIED(&uc->external) ||
+	    IN6_IS_ADDR_LINKLOCAL(&uc->internal) ||
+	    IN6_IS_ADDR_LINKLOCAL(&uc->external))
+		return (EINVAL);
+	in6_prefixlen2mask(&mask, uc->plen);
+	if (IN6_ARE_MASKED_ADDR_EQUAL(&uc->internal, &uc->external, &mask))
+		return (EINVAL);
+
+	ni = CHAIN_TO_SRV(ch);
+	IPFW_UH_RLOCK(ch);
+	if (nptv6_find(ni, uc->name, uc->set) != NULL) {
+		IPFW_UH_RUNLOCK(ch);
+		return (EEXIST);
+	}
+	IPFW_UH_RUNLOCK(ch);
+
+	cfg = nptv6_alloc_config(uc->name, uc->set);
+	cfg->plen = uc->plen;
+	if (cfg->plen <= 48)
+		cfg->flags |= NPTV6_48PLEN;
+	cfg->internal = uc->internal;
+	cfg->external = uc->external;
+	cfg->mask = mask;
+	IN6_MASK_ADDR(&cfg->internal, &mask);
+	IN6_MASK_ADDR(&cfg->external, &mask);
+	nptv6_calculate_adjustment(cfg);
+
+	IPFW_UH_WLOCK(ch);
+	if (ipfw_objhash_alloc_idx(ni, &cfg->no.kidx) != 0) {
+		IPFW_UH_WUNLOCK(ch);
+		nptv6_free_config(cfg);
+		return (ENOSPC);
+	}
+	ipfw_objhash_add(ni, &cfg->no);
+	IPFW_WLOCK(ch);
+	SRV_OBJECT(ch, cfg->no.kidx) = cfg;
+	IPFW_WUNLOCK(ch);
+	IPFW_UH_WUNLOCK(ch);
+	return (0);
+}
+
+/*
+ * Destroys NPTv6 instance.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ *
+ * Returns 0 on success
+ */
+static int
+nptv6_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	ipfw_obj_header *oh;
+	struct nptv6_cfg *cfg;
+
+	if (sd->valsize != sizeof(*oh))
+		return (EINVAL);
+
+	oh = (ipfw_obj_header *)sd->kbuf;
+	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0)
+		return (EINVAL);
+
+	IPFW_UH_WLOCK(ch);
+	cfg = nptv6_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+	if (cfg == NULL) {
+		IPFW_UH_WUNLOCK(ch);
+		return (ESRCH);
+	}
+	if (cfg->no.refcnt > 0) {
+		IPFW_UH_WUNLOCK(ch);
+		return (EBUSY);
+	}
+
+	IPFW_WLOCK(ch);
+	SRV_OBJECT(ch, cfg->no.kidx) = NULL;
+	IPFW_WUNLOCK(ch);
+
+	ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no);
+	ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx);
+	IPFW_UH_WUNLOCK(ch);
+
+	nptv6_free_config(cfg);
+	return (0);
+}
+
+/*
+ * Get or change nptv6 instance config.
+ * Request: [ ipfw_obj_header [ ipfw_nptv6_cfg ] ]
+ */
+static int
+nptv6_config(struct ip_fw_chain *chain, ip_fw3_opheader *op,
+    struct sockopt_data *sd)
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * Lists all NPTv6 instances currently available in kernel.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ]
+ * Reply: [ ipfw_obj_lheader ipfw_nptv6_cfg x N ]
+ *
+ * Returns 0 on success
+ */
+static int
+nptv6_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+    struct sockopt_data *sd)
+{
+	ipfw_obj_lheader *olh;
+	struct nptv6_dump_arg da;
+
+	/* Check minimum header size */
+	if (sd->valsize < sizeof(ipfw_obj_lheader))
+		return (EINVAL);
+
+	olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh));
+
+	IPFW_UH_RLOCK(ch);
+	olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch),
+	    IPFW_TLV_NPTV6_NAME);
+	olh->objsize = sizeof(ipfw_nptv6_cfg);
+	olh->size = sizeof(*olh) + olh->count * olh->objsize;
+
+	if (sd->valsize < olh->size) {
+		IPFW_UH_RUNLOCK(ch);
+		return (ENOMEM);
+	}
+	memset(&da, 0, sizeof(da));
+	da.ch = ch;
+	da.sd = sd;
+	ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb,
+	    &da, IPFW_TLV_NPTV6_NAME);
+	IPFW_UH_RUNLOCK(ch);
+
+	return (0);
+}
+
+#define	__COPY_STAT_FIELD(_cfg, _stats, _field)	\
+	(_stats)->_field = NPTV6STAT_FETCH(_cfg, _field)
+static void
+export_stats(struct ip_fw_chain *ch, struct nptv6_cfg *cfg,
+    struct ipfw_nptv6_stats *stats)
+{
+
+	__COPY_STAT_FIELD(cfg, stats, in2ex);
+	__COPY_STAT_FIELD(cfg, stats, ex2in);
+	__COPY_STAT_FIELD(cfg, stats, dropped);
+}
+
+/*
+ * Get NPTv6 statistics.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ * Reply: [ ipfw_obj_header ipfw_obj_ctlv [ uint64_t x N ]]
+ *
+ * Returns 0 on success
+ */
+static int
+nptv6_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
+    struct sockopt_data *sd)
+{
+	struct ipfw_nptv6_stats stats;
+	struct nptv6_cfg *cfg;
+	ipfw_obj_header *oh;
+	ipfw_obj_ctlv *ctlv;
+	size_t sz;
+
+	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats);
+	if (sd->valsize % sizeof(uint64_t))
+		return (EINVAL);
+	if (sd->valsize < sz)
+		return (ENOMEM);
+	oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
+	if (oh == NULL)
+		return (EINVAL);
+	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
+	    oh->ntlv.set >= IPFW_MAX_SETS)
+		return (EINVAL);
+	memset(&stats, 0, sizeof(stats));
+
+	IPFW_UH_RLOCK(ch);
+	cfg = nptv6_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+	if (cfg == NULL) {
+		IPFW_UH_RUNLOCK(ch);
+		return (ESRCH);
+	}
+	export_stats(ch, cfg, &stats);
+	IPFW_UH_RUNLOCK(ch);
+
+	ctlv = (ipfw_obj_ctlv *)(oh + 1);
+	memset(ctlv, 0, sizeof(*ctlv));
+	ctlv->head.type = IPFW_TLV_COUNTERS;
+	ctlv->head.length = sz - sizeof(ipfw_obj_header);
+	ctlv->count = sizeof(stats) / sizeof(uint64_t);
+	ctlv->objsize = sizeof(uint64_t);
+	ctlv->version = 1;
+	memcpy(ctlv + 1, &stats, sizeof(stats));
+	return (0);
+}
+
+/*
+ * Reset NPTv6 statistics.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ *
+ * Returns 0 on success
+ */
+static int
+nptv6_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
+    struct sockopt_data *sd)
+{
+	struct nptv6_cfg *cfg;
+	ipfw_obj_header *oh;
+
+	if (sd->valsize != sizeof(*oh))
+		return (EINVAL);
+	oh = (ipfw_obj_header *)sd->kbuf;
+	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
+	    oh->ntlv.set >= IPFW_MAX_SETS)
+		return (EINVAL);
+
+	IPFW_UH_WLOCK(ch);
+	cfg = nptv6_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+	if (cfg == NULL) {
+		IPFW_UH_WUNLOCK(ch);
+		return (ESRCH);
+	}
+	COUNTER_ARRAY_ZERO(cfg->stats, NPTV6STATS);
+	IPFW_UH_WUNLOCK(ch);
+	return (0);
+}
+
+static struct ipfw_sopt_handler	scodes[] = {
+	{ IP_FW_NPTV6_CREATE, 0,	HDIR_SET,	nptv6_create },
+	{ IP_FW_NPTV6_DESTROY,0,	HDIR_SET,	nptv6_destroy },
+	{ IP_FW_NPTV6_CONFIG, 0,	HDIR_BOTH,	nptv6_config },
+	{ IP_FW_NPTV6_LIST,   0,	HDIR_GET,	nptv6_list },
+	{ IP_FW_NPTV6_STATS,  0,	HDIR_GET,	nptv6_stats },
+	{ IP_FW_NPTV6_RESET_STATS,0,	HDIR_SET,	nptv6_reset_stats },
+};
+
+static int
+nptv6_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
+{
+	ipfw_insn *icmd;
+
+	icmd = cmd - 1;
+	NPTV6_DEBUG("opcode %d, arg1 %d, opcode0 %d, arg1 %d",
+	    cmd->opcode, cmd->arg1, icmd->opcode, icmd->arg1);
+	if (icmd->opcode != O_EXTERNAL_ACTION ||
+	    icmd->arg1 != V_nptv6_eid)
+		return (1);
+
+	*puidx = cmd->arg1;
+	*ptype = 0;
+	return (0);
+}
+
+static void
+nptv6_update_arg1(ipfw_insn *cmd, uint16_t idx)
+{
+
+	cmd->arg1 = idx;
+	NPTV6_DEBUG("opcode %d, arg1 -> %d", cmd->opcode, cmd->arg1);
+}
+
+static int
+nptv6_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
+    struct named_object **pno)
+{
+	int err;
+
+	err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti,
+	    IPFW_TLV_NPTV6_NAME, pno);
+	NPTV6_DEBUG("uidx %u, type %u, err %d", ti->uidx, ti->type, err);
+	return (err);
+}
+
+static struct named_object *
+nptv6_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
+{
+	struct namedobj_instance *ni;
+	struct named_object *no;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+	ni = CHAIN_TO_SRV(ch);
+	no = ipfw_objhash_lookup_kidx(ni, idx);
+	KASSERT(no != NULL, ("NPT with index %d not found", idx));
+
+	NPTV6_DEBUG("kidx %u -> %s", idx, no->name);
+	return (no);
+}
+
+static int
+nptv6_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
+    enum ipfw_sets_cmd cmd)
+{
+
+	return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NPTV6_NAME,
+	    set, new_set, cmd));
+}
+
+static struct opcode_obj_rewrite opcodes[] = {
+	{
+		.opcode	= O_EXTERNAL_INSTANCE,
+		.etlv = IPFW_TLV_EACTION /* just show it isn't table */,
+		.classifier = nptv6_classify,
+		.update = nptv6_update_arg1,
+		.find_byname = nptv6_findbyname,
+		.find_bykidx = nptv6_findbykidx,
+		.manage_sets = nptv6_manage_sets,
+	},
+};
+
+static int
+destroy_config_cb(struct namedobj_instance *ni, struct named_object *no,
+    void *arg)
+{
+	struct nptv6_cfg *cfg;
+	struct ip_fw_chain *ch;
+
+	ch = (struct ip_fw_chain *)arg;
+	IPFW_UH_WLOCK_ASSERT(ch);
+
+	cfg = (struct nptv6_cfg *)SRV_OBJECT(ch, no->kidx);
+	SRV_OBJECT(ch, no->kidx) = NULL;
+	ipfw_objhash_del(ni, &cfg->no);
+	ipfw_objhash_free_idx(ni, cfg->no.kidx);
+	nptv6_free_config(cfg);
+	return (0);
+}
+
+int
+nptv6_init(struct ip_fw_chain *ch, int first)
+{
+
+	V_nptv6_eid = ipfw_add_eaction(ch, ipfw_nptv6, "nptv6");
+	if (V_nptv6_eid == 0)
+		return (ENXIO);
+	IPFW_ADD_SOPT_HANDLER(first, scodes);
+	IPFW_ADD_OBJ_REWRITER(first, opcodes);
+	return (0);
+}
+
+void
+nptv6_uninit(struct ip_fw_chain *ch, int last)
+{
+
+	IPFW_DEL_OBJ_REWRITER(last, opcodes);
+	IPFW_DEL_SOPT_HANDLER(last, scodes);
+	ipfw_del_eaction(ch, V_nptv6_eid);
+	/*
+	 * Since we already have deregistered external action,
+	 * our named objects become unaccessible via rules, because
+	 * all rules were truncated by ipfw_del_eaction().
+	 * So, we can unlink and destroy our named objects without holding
+	 * IPFW_WLOCK().
+	 */
+	IPFW_UH_WLOCK(ch);
+	ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch,
+	    IPFW_TLV_NPTV6_NAME);
+	V_nptv6_eid = 0;
+	IPFW_UH_WUNLOCK(ch);
+}
+
diff --git a/freebsd/sys/netpfil/ipfw/nptv6/nptv6.h b/freebsd/sys/netpfil/ipfw/nptv6/nptv6.h
new file mode 100644
index 00000000..95b04bfe
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/nptv6/nptv6.h
@@ -0,0 +1,65 @@
+/*-
+ * Copyright (c) 2016 Yandex LLC
+ * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_IP_FW_NPTV6_H_
+#define	_IP_FW_NPTV6_H_
+
+#include <netinet6/ip_fw_nptv6.h>
+
+#ifdef _KERNEL
+#define	NPTV6STATS	(sizeof(struct ipfw_nptv6_stats) / sizeof(uint64_t))
+#define	NPTV6STAT_ADD(c, f, v)		\
+    counter_u64_add((c)->stats[		\
+	offsetof(struct ipfw_nptv6_stats, f) / sizeof(uint64_t)], (v))
+#define	NPTV6STAT_INC(c, f)	NPTV6STAT_ADD(c, f, 1)
+#define	NPTV6STAT_FETCH(c, f)		\
+    counter_u64_fetch((c)->stats[	\
+	offsetof(struct ipfw_nptv6_stats, f) / sizeof(uint64_t)])
+
+struct nptv6_cfg {
+	struct named_object	no;
+
+	struct in6_addr		internal;   /* Internal IPv6 prefix */
+	struct in6_addr		external;   /* External IPv6 prefix */
+	struct in6_addr		mask;	    /* IPv6 prefix mask */
+	uint16_t		adjustment; /* Checksum adjustment value */
+	uint8_t			plen;	    /* Prefix length */
+	uint8_t			flags;	    /* Flags for internal use */
+#define	NPTV6_48PLEN		0x0001
+	char			name[64];   /* Instance name */
+	counter_u64_t		stats[NPTV6STATS]; /* Statistics counters */
+};
+#define	NPTV6_FLAGSMASK		0
+
+int nptv6_init(struct ip_fw_chain *ch, int first);
+void nptv6_uninit(struct ip_fw_chain *ch, int last);
+#endif /* _KERNEL */
+
+#endif /* _IP_FW_NPTV6_H_ */
+
diff --git a/freebsd/sys/netpfil/pf/if_pflog.c b/freebsd/sys/netpfil/pf/if_pflog.c
new file mode 100644
index 00000000..3a364abc
--- /dev/null
+++ b/freebsd/sys/netpfil/pf/if_pflog.c
@@ -0,0 +1,320 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * The authors of this code are John Ioannidis (ji@tla.org),
+ * Angelos D. Keromytis (kermit@csd.uch.gr) and
+ * Niels Provos (provos@physnet.uni-hamburg.de).
+ *
+ * This code was written by John Ioannidis for BSD/OS in Athens, Greece,
+ * in November 1995.
+ *
+ * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
+ * by Angelos D. Keromytis.
+ *
+ * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
+ * and Niels Provos.
+ *
+ * Copyright (C) 1995, 1996, 1997, 1998 by John Ioannidis, Angelos D. Keromytis
+ * and Niels Provos.
+ * Copyright (c) 2001, Angelos D. Keromytis, Niels Provos.
+ *
+ * Permission to use, copy, and modify this software with or without fee
+ * is hereby granted, provided that this entire notice is included in
+ * all copies of any software which is or includes a copy or
+ * modification of this software.
+ * You may use this code under the GNU public license if you so wish. Please
+ * contribute changes back to the authors under this freer than GPL license
+ * so that we may further the use of strong encryption without limitations to
+ * all.
+ *
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
+ * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
+ * PURPOSE.
+ *
+ *	$OpenBSD: if_pflog.c,v 1.26 2007/10/18 21:58:18 mpf Exp $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+#include <rtems/bsd/local/opt_bpf.h>
+#include <rtems/bsd/local/opt_pf.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/proc.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+
+#include <net/bpf.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_clone.h>
+#include <net/if_pflog.h>
+#include <net/if_types.h>
+#include <net/vnet.h>
+#include <net/pfvar.h>
+
+#if defined(INET) || defined(INET6)
+#include <netinet/in.h>
+#endif
+#ifdef	INET
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#endif
+
+#ifdef INET6
+#include <netinet6/in6_var.h>
+#include <netinet6/nd6.h>
+#endif /* INET6 */
+
+#ifdef INET
+#include <machine/in_cksum.h>
+#endif /* INET */
+
+#define PFLOGMTU	(32768 + MHLEN + MLEN)
+
+#ifdef PFLOGDEBUG
+#define DPRINTF(x)    do { if (pflogdebug) printf x ; } while (0)
+#else
+#define DPRINTF(x)
+#endif
+
+static int	pflogoutput(struct ifnet *, struct mbuf *,
+		    const struct sockaddr *, struct route *);
+static void	pflogattach(int);
+static int	pflogioctl(struct ifnet *, u_long, caddr_t);
+static void	pflogstart(struct ifnet *);
+static int	pflog_clone_create(struct if_clone *, int, caddr_t);
+static void	pflog_clone_destroy(struct ifnet *);
+
+static const char pflogname[] = "pflog";
+
+static VNET_DEFINE(struct if_clone *, pflog_cloner);
+#define	V_pflog_cloner		VNET(pflog_cloner)
+
+VNET_DEFINE(struct ifnet *, pflogifs[PFLOGIFS_MAX]);	/* for fast access */
+#define	V_pflogifs		VNET(pflogifs)
+
+static void
+pflogattach(int npflog __unused)
+{
+	int	i;
+	for (i = 0; i < PFLOGIFS_MAX; i++)
+		V_pflogifs[i] = NULL;
+	V_pflog_cloner = if_clone_simple(pflogname, pflog_clone_create,
+	    pflog_clone_destroy, 1);
+}
+
+static int
+pflog_clone_create(struct if_clone *ifc, int unit, caddr_t param)
+{
+	struct ifnet *ifp;
+
+	if (unit >= PFLOGIFS_MAX)
+		return (EINVAL);
+
+	ifp = if_alloc(IFT_PFLOG);
+	if (ifp == NULL) {
+		return (ENOSPC);
+	}
+	if_initname(ifp, pflogname, unit);
+	ifp->if_mtu = PFLOGMTU;
+	ifp->if_ioctl = pflogioctl;
+	ifp->if_output = pflogoutput;
+	ifp->if_start = pflogstart;
+	ifp->if_snd.ifq_maxlen = ifqmaxlen;
+	ifp->if_hdrlen = PFLOG_HDRLEN;
+	if_attach(ifp);
+
+	bpfattach(ifp, DLT_PFLOG, PFLOG_HDRLEN);
+
+	V_pflogifs[unit] = ifp;
+
+	return (0);
+}
+
+static void
+pflog_clone_destroy(struct ifnet *ifp)
+{
+	int i;
+
+	for (i = 0; i < PFLOGIFS_MAX; i++)
+		if (V_pflogifs[i] == ifp)
+			V_pflogifs[i] = NULL;
+
+	bpfdetach(ifp);
+	if_detach(ifp);
+	if_free(ifp);
+}
+
+/*
+ * Start output on the pflog interface.
+ */
+static void
+pflogstart(struct ifnet *ifp)
+{
+	struct mbuf *m;
+
+	for (;;) {
+		IF_LOCK(&ifp->if_snd);
+		_IF_DEQUEUE(&ifp->if_snd, m);
+		IF_UNLOCK(&ifp->if_snd);
+
+		if (m == NULL)
+			return;
+		else
+			m_freem(m);
+	}
+}
+
+static int
+pflogoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+	struct route *rt)
+{
+	m_freem(m);
+	return (0);
+}
+
+/* ARGSUSED */
+static int
+pflogioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+	switch (cmd) {
+	case SIOCSIFFLAGS:
+		if (ifp->if_flags & IFF_UP)
+			ifp->if_drv_flags |= IFF_DRV_RUNNING;
+		else
+			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+		break;
+	default:
+		return (ENOTTY);
+	}
+
+	return (0);
+}
+
+static int
+pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir,
+    u_int8_t reason, struct pf_rule *rm, struct pf_rule *am,
+    struct pf_ruleset *ruleset, struct pf_pdesc *pd, int lookupsafe)
+{
+	struct ifnet *ifn;
+	struct pfloghdr hdr;
+
+	if (kif == NULL || m == NULL || rm == NULL || pd == NULL)
+		return ( 1);
+
+	if ((ifn = V_pflogifs[rm->logif]) == NULL || !ifn->if_bpf)
+		return (0);
+
+	bzero(&hdr, sizeof(hdr));
+	hdr.length = PFLOG_REAL_HDRLEN;
+	hdr.af = af;
+	hdr.action = rm->action;
+	hdr.reason = reason;
+	memcpy(hdr.ifname, kif->pfik_name, sizeof(hdr.ifname));
+
+	if (am == NULL) {
+		hdr.rulenr = htonl(rm->nr);
+		hdr.subrulenr =  1;
+	} else {
+		hdr.rulenr = htonl(am->nr);
+		hdr.subrulenr = htonl(rm->nr);
+		if (ruleset != NULL && ruleset->anchor != NULL)
+			strlcpy(hdr.ruleset, ruleset->anchor->name,
+			    sizeof(hdr.ruleset));
+	}
+	/*
+	 * XXXGL: we avoid pf_socket_lookup() when we are holding
+	 * state lock, since this leads to unsafe LOR.
+	 * These conditions are very very rare, however.
+	 */
+	if (rm->log & PF_LOG_SOCKET_LOOKUP && !pd->lookup.done && lookupsafe)
+		pd->lookup.done = pf_socket_lookup(dir, pd, m);
+	if (pd->lookup.done > 0)
+		hdr.uid = pd->lookup.uid;
+	else
+		hdr.uid = UID_MAX;
+	hdr.pid = NO_PID;
+	hdr.rule_uid = rm->cuid;
+	hdr.rule_pid = rm->cpid;
+	hdr.dir = dir;
+
+#ifdef INET
+	if (af == AF_INET && dir == PF_OUT) {
+		struct ip *ip;
+
+		ip = mtod(m, struct ip *);
+		ip->ip_sum = 0;
+		ip->ip_sum = in_cksum(m, ip->ip_hl << 2);
+	}
+#endif /* INET */
+
+	if_inc_counter(ifn, IFCOUNTER_OPACKETS, 1);
+	if_inc_counter(ifn, IFCOUNTER_OBYTES, m->m_pkthdr.len);
+	BPF_MTAP2(ifn, &hdr, PFLOG_HDRLEN, m);
+
+	return (0);
+}
+
+static void
+vnet_pflog_init(const void *unused __unused)
+{
+
+	pflogattach(1);
+}
+VNET_SYSINIT(vnet_pflog_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY,
+    vnet_pflog_init, NULL);
+
+static void
+vnet_pflog_uninit(const void *unused __unused)
+{
+
+	if_clone_detach(V_pflog_cloner);
+}
+/*
+ * Detach after pf is gone; otherwise we might touch pflog memory
+ * from within pf after freeing pflog.
+ */
+VNET_SYSUNINIT(vnet_pflog_uninit, SI_SUB_INIT_IF, SI_ORDER_SECOND,
+    vnet_pflog_uninit, NULL);
+
+static int
+pflog_modevent(module_t mod, int type, void *data)
+{
+	int error = 0;
+
+	switch (type) {
+	case MOD_LOAD:
+		PF_RULES_WLOCK();
+		pflog_packet_ptr = pflog_packet;
+		PF_RULES_WUNLOCK();
+		break;
+	case MOD_UNLOAD:
+		PF_RULES_WLOCK();
+		pflog_packet_ptr = NULL;
+		PF_RULES_WUNLOCK();
+		break;
+	default:
+		error = EOPNOTSUPP;
+		break;
+	}
+
+	return error;
+}
+
+static moduledata_t pflog_mod = { pflogname, pflog_modevent, 0 };
+
+#define PFLOG_MODVER 1
+
+/* Do not run before pf is initialized as we depend on its locks. */
+DECLARE_MODULE(pflog, pflog_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY);
+MODULE_VERSION(pflog, PFLOG_MODVER);
+MODULE_DEPEND(pflog, pf, PF_MODVER, PF_MODVER, PF_MODVER);
diff --git a/freebsd/sys/netpfil/pf/if_pfsync.c b/freebsd/sys/netpfil/pf/if_pfsync.c
new file mode 100644
index 00000000..d6a0dfc0
--- /dev/null
+++ b/freebsd/sys/netpfil/pf/if_pfsync.c
@@ -0,0 +1,2421 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2002 Michael Shalayeff
+ * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $
+ *
+ * Revisions picked from OpenBSD after revision 1.110 import:
+ * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input()
+ * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates
+ * 1.120, 1.175 - use monotonic time_uptime
+ * 1.122 - reduce number of updates for non-TCP sessions
+ * 1.125, 1.127 - rewrite merge or stale processing
+ * 1.128 - cleanups
+ * 1.146 - bzero() mbuf before sparsely filling it with data
+ * 1.170 - SIOCSIFMTU checks
+ * 1.126, 1.142 - deferred packets processing
+ * 1.173 - correct expire time processing
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+#include <rtems/bsd/local/opt_pf.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/bus.h>
+#include <sys/endian.h>
+#include <sys/interrupt.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/priv.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+
+#include <net/bpf.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_clone.h>
+#include <net/if_types.h>
+#include <net/vnet.h>
+#include <net/pfvar.h>
+#include <net/if_pfsync.h>
+
+#include <netinet/if_ether.h>
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/ip_carp.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+
+#define PFSYNC_MINPKT ( \
+	sizeof(struct ip) + \
+	sizeof(struct pfsync_header) + \
+	sizeof(struct pfsync_subheader) )
+
+struct pfsync_pkt {
+	struct ip *ip;
+	struct in_addr src;
+	u_int8_t flags;
+};
+
+static int	pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *,
+		    struct pfsync_state_peer *);
+static int	pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int);
+static int	pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int);
+static int	pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int);
+static int	pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int);
+static int	pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int);
+static int	pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int);
+static int	pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int);
+static int	pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int);
+static int	pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int);
+static int	pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int);
+static int	pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int);
+static int	pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int);
+
+static int (*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = {
+	pfsync_in_clr,			/* PFSYNC_ACT_CLR */
+	pfsync_in_ins,			/* PFSYNC_ACT_INS */
+	pfsync_in_iack,			/* PFSYNC_ACT_INS_ACK */
+	pfsync_in_upd,			/* PFSYNC_ACT_UPD */
+	pfsync_in_upd_c,		/* PFSYNC_ACT_UPD_C */
+	pfsync_in_ureq,			/* PFSYNC_ACT_UPD_REQ */
+	pfsync_in_del,			/* PFSYNC_ACT_DEL */
+	pfsync_in_del_c,		/* PFSYNC_ACT_DEL_C */
+	pfsync_in_error,		/* PFSYNC_ACT_INS_F */
+	pfsync_in_error,		/* PFSYNC_ACT_DEL_F */
+	pfsync_in_bus,			/* PFSYNC_ACT_BUS */
+	pfsync_in_tdb,			/* PFSYNC_ACT_TDB */
+	pfsync_in_eof			/* PFSYNC_ACT_EOF */
+};
+
+struct pfsync_q {
+	void		(*write)(struct pf_state *, void *);
+	size_t		len;
+	u_int8_t	action;
+};
+
+/* we have one of these for every PFSYNC_S_ */
+static void	pfsync_out_state(struct pf_state *, void *);
+static void	pfsync_out_iack(struct pf_state *, void *);
+static void	pfsync_out_upd_c(struct pf_state *, void *);
+static void	pfsync_out_del(struct pf_state *, void *);
+
+static struct pfsync_q pfsync_qs[] = {
+	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_INS },
+	{ pfsync_out_iack,  sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
+	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_UPD },
+	{ pfsync_out_upd_c, sizeof(struct pfsync_upd_c),   PFSYNC_ACT_UPD_C },
+	{ pfsync_out_del,   sizeof(struct pfsync_del_c),   PFSYNC_ACT_DEL_C }
+};
+
+static void	pfsync_q_ins(struct pf_state *, int);
+static void	pfsync_q_del(struct pf_state *);
+
+static void	pfsync_update_state(struct pf_state *);
+
+struct pfsync_upd_req_item {
+	TAILQ_ENTRY(pfsync_upd_req_item)	ur_entry;
+	struct pfsync_upd_req			ur_msg;
+};
+
+struct pfsync_deferral {
+	struct pfsync_softc		*pd_sc;
+	TAILQ_ENTRY(pfsync_deferral)	pd_entry;
+	u_int				pd_refs;
+	struct callout			pd_tmo;
+
+	struct pf_state			*pd_st;
+	struct mbuf			*pd_m;
+};
+
+struct pfsync_softc {
+	/* Configuration */
+	struct ifnet		*sc_ifp;
+	struct ifnet		*sc_sync_if;
+	struct ip_moptions	sc_imo;
+	struct in_addr		sc_sync_peer;
+	uint32_t		sc_flags;
+#define	PFSYNCF_OK		0x00000001
+#define	PFSYNCF_DEFER		0x00000002
+#define	PFSYNCF_PUSH		0x00000004
+	uint8_t			sc_maxupdates;
+	struct ip		sc_template;
+	struct callout		sc_tmo;
+	struct mtx		sc_mtx;
+
+	/* Queued data */
+	size_t			sc_len;
+	TAILQ_HEAD(, pf_state)			sc_qs[PFSYNC_S_COUNT];
+	TAILQ_HEAD(, pfsync_upd_req_item)	sc_upd_req_list;
+	TAILQ_HEAD(, pfsync_deferral)		sc_deferrals;
+	u_int			sc_deferred;
+	void			*sc_plus;
+	size_t			sc_pluslen;
+
+	/* Bulk update info */
+	struct mtx		sc_bulk_mtx;
+	uint32_t		sc_ureq_sent;
+	int			sc_bulk_tries;
+	uint32_t		sc_ureq_received;
+	int			sc_bulk_hashid;
+	uint64_t		sc_bulk_stateid;
+	uint32_t		sc_bulk_creatorid;
+	struct callout		sc_bulk_tmo;
+	struct callout		sc_bulkfail_tmo;
+};
+
+#define	PFSYNC_LOCK(sc)		mtx_lock(&(sc)->sc_mtx)
+#define	PFSYNC_UNLOCK(sc)	mtx_unlock(&(sc)->sc_mtx)
+#define	PFSYNC_LOCK_ASSERT(sc)	mtx_assert(&(sc)->sc_mtx, MA_OWNED)
+
+#define	PFSYNC_BLOCK(sc)	mtx_lock(&(sc)->sc_bulk_mtx)
+#define	PFSYNC_BUNLOCK(sc)	mtx_unlock(&(sc)->sc_bulk_mtx)
+#define	PFSYNC_BLOCK_ASSERT(sc)	mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED)
+
+static const char pfsyncname[] = "pfsync";
+static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data");
+static VNET_DEFINE(struct pfsync_softc	*, pfsyncif) = NULL;
+#define	V_pfsyncif		VNET(pfsyncif)
+static VNET_DEFINE(void *, pfsync_swi_cookie) = NULL;
+#define	V_pfsync_swi_cookie	VNET(pfsync_swi_cookie)
+static VNET_DEFINE(struct pfsyncstats, pfsyncstats);
+#define	V_pfsyncstats		VNET(pfsyncstats)
+static VNET_DEFINE(int, pfsync_carp_adj) = CARP_MAXSKEW;
+#define	V_pfsync_carp_adj	VNET(pfsync_carp_adj)
+
+static void	pfsync_timeout(void *);
+static void	pfsync_push(struct pfsync_softc *);
+static void	pfsyncintr(void *);
+static int	pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *,
+		    void *);
+static void	pfsync_multicast_cleanup(struct pfsync_softc *);
+static void	pfsync_pointers_init(void);
+static void	pfsync_pointers_uninit(void);
+static int	pfsync_init(void);
+static void	pfsync_uninit(void);
+
+SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC");
+SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW,
+    &VNET_NAME(pfsyncstats), pfsyncstats,
+    "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)");
+SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_RW,
+    &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment");
+
+static int	pfsync_clone_create(struct if_clone *, int, caddr_t);
+static void	pfsync_clone_destroy(struct ifnet *);
+static int	pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
+		    struct pf_state_peer *);
+static int	pfsyncoutput(struct ifnet *, struct mbuf *,
+		    const struct sockaddr *, struct route *);
+static int	pfsyncioctl(struct ifnet *, u_long, caddr_t);
+
+static int	pfsync_defer(struct pf_state *, struct mbuf *);
+static void	pfsync_undefer(struct pfsync_deferral *, int);
+static void	pfsync_undefer_state(struct pf_state *, int);
+static void	pfsync_defer_tmo(void *);
+
+static void	pfsync_request_update(u_int32_t, u_int64_t);
+static void	pfsync_update_state_req(struct pf_state *);
+
+static void	pfsync_drop(struct pfsync_softc *);
+static void	pfsync_sendout(int);
+static void	pfsync_send_plus(void *, size_t);
+
+static void	pfsync_bulk_start(void);
+static void	pfsync_bulk_status(u_int8_t);
+static void	pfsync_bulk_update(void *);
+static void	pfsync_bulk_fail(void *);
+
+#ifdef IPSEC
+static void	pfsync_update_net_tdb(struct pfsync_tdb *);
+#endif
+
+#define PFSYNC_MAX_BULKTRIES	12
+
+VNET_DEFINE(struct if_clone *, pfsync_cloner);
+#define	V_pfsync_cloner	VNET(pfsync_cloner)
+
+static int
+pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param)
+{
+	struct pfsync_softc *sc;
+	struct ifnet *ifp;
+	int q;
+
+	if (unit != 0)
+		return (EINVAL);
+
+	sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO);
+	sc->sc_flags |= PFSYNCF_OK;
+
+	for (q = 0; q < PFSYNC_S_COUNT; q++)
+		TAILQ_INIT(&sc->sc_qs[q]);
+
+	TAILQ_INIT(&sc->sc_upd_req_list);
+	TAILQ_INIT(&sc->sc_deferrals);
+
+	sc->sc_len = PFSYNC_MINPKT;
+	sc->sc_maxupdates = 128;
+
+	ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC);
+	if (ifp == NULL) {
+		free(sc, M_PFSYNC);
+		return (ENOSPC);
+	}
+	if_initname(ifp, pfsyncname, unit);
+	ifp->if_softc = sc;
+	ifp->if_ioctl = pfsyncioctl;
+	ifp->if_output = pfsyncoutput;
+	ifp->if_type = IFT_PFSYNC;
+	ifp->if_snd.ifq_maxlen = ifqmaxlen;
+	ifp->if_hdrlen = sizeof(struct pfsync_header);
+	ifp->if_mtu = ETHERMTU;
+	mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF);
+	mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF);
+	callout_init(&sc->sc_tmo, 1);
+	callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0);
+	callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0);
+
+	if_attach(ifp);
+
+	bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
+
+	V_pfsyncif = sc;
+
+	return (0);
+}
+
+static void
+pfsync_clone_destroy(struct ifnet *ifp)
+{
+	struct pfsync_softc *sc = ifp->if_softc;
+
+	/*
+	 * At this stage, everything should have already been
+	 * cleared by pfsync_uninit(), and we have only to
+	 * drain callouts.
+	 */
+	while (sc->sc_deferred > 0) {
+		struct pfsync_deferral *pd = TAILQ_FIRST(&sc->sc_deferrals);
+
+		TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
+		sc->sc_deferred--;
+		if (callout_stop(&pd->pd_tmo) > 0) {
+			pf_release_state(pd->pd_st);
+			m_freem(pd->pd_m);
+			free(pd, M_PFSYNC);
+		} else {
+			pd->pd_refs++;
+			callout_drain(&pd->pd_tmo);
+			free(pd, M_PFSYNC);
+		}
+	}
+
+	callout_drain(&sc->sc_tmo);
+	callout_drain(&sc->sc_bulkfail_tmo);
+	callout_drain(&sc->sc_bulk_tmo);
+
+	if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
+		(*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy");
+	bpfdetach(ifp);
+	if_detach(ifp);
+
+	pfsync_drop(sc);
+
+	if_free(ifp);
+	if (sc->sc_imo.imo_membership)
+		pfsync_multicast_cleanup(sc);
+	mtx_destroy(&sc->sc_mtx);
+	mtx_destroy(&sc->sc_bulk_mtx);
+	free(sc, M_PFSYNC);
+
+	V_pfsyncif = NULL;
+}
+
+static int
+pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
+    struct pf_state_peer *d)
+{
+	if (s->scrub.scrub_flag && d->scrub == NULL) {
+		d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO);
+		if (d->scrub == NULL)
+			return (ENOMEM);
+	}
+
+	return (0);
+}
+
+
+static int
+pfsync_state_import(struct pfsync_state *sp, u_int8_t flags)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+#ifndef	__NO_STRICT_ALIGNMENT
+	struct pfsync_state_key key[2];
+#endif
+	struct pfsync_state_key *kw, *ks;
+	struct pf_state	*st = NULL;
+	struct pf_state_key *skw = NULL, *sks = NULL;
+	struct pf_rule *r = NULL;
+	struct pfi_kif	*kif;
+	int error;
+
+	PF_RULES_RASSERT();
+
+	if (sp->creatorid == 0) {
+		if (V_pf_status.debug >= PF_DEBUG_MISC)
+			printf("%s: invalid creator id: %08x\n", __func__,
+			    ntohl(sp->creatorid));
+		return (EINVAL);
+	}
+
+	if ((kif = pfi_kif_find(sp->ifname)) == NULL) {
+		if (V_pf_status.debug >= PF_DEBUG_MISC)
+			printf("%s: unknown interface: %s\n", __func__,
+			    sp->ifname);
+		if (flags & PFSYNC_SI_IOCTL)
+			return (EINVAL);
+		return (0);	/* skip this state */
+	}
+
+	/*
+	 * If the ruleset checksums match or the state is coming from the ioctl,
+	 * it's safe to associate the state with the rule of that number.
+	 */
+	if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) &&
+	    (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) <
+	    pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount)
+		r = pf_main_ruleset.rules[
+		    PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)];
+	else
+		r = &V_pf_default_rule;
+
+	if ((r->max_states &&
+	    counter_u64_fetch(r->states_cur) >= r->max_states))
+		goto cleanup;
+
+	/*
+	 * XXXGL: consider M_WAITOK in ioctl path after.
+	 */
+	if ((st = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO)) == NULL)
+		goto cleanup;
+
+	if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL)
+		goto cleanup;
+
+#ifndef	__NO_STRICT_ALIGNMENT
+	bcopy(&sp->key, key, sizeof(struct pfsync_state_key) * 2);
+	kw = &key[PF_SK_WIRE];
+	ks = &key[PF_SK_STACK];
+#else
+	kw = &sp->key[PF_SK_WIRE];
+	ks = &sp->key[PF_SK_STACK];
+#endif
+
+	if (PF_ANEQ(&kw->addr[0], &ks->addr[0], sp->af) ||
+	    PF_ANEQ(&kw->addr[1], &ks->addr[1], sp->af) ||
+	    kw->port[0] != ks->port[0] ||
+	    kw->port[1] != ks->port[1]) {
+		sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
+		if (sks == NULL)
+			goto cleanup;
+	} else
+		sks = skw;
+
+	/* allocate memory for scrub info */
+	if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
+	    pfsync_alloc_scrub_memory(&sp->dst, &st->dst))
+		goto cleanup;
+
+	/* Copy to state key(s). */
+	skw->addr[0] = kw->addr[0];
+	skw->addr[1] = kw->addr[1];
+	skw->port[0] = kw->port[0];
+	skw->port[1] = kw->port[1];
+	skw->proto = sp->proto;
+	skw->af = sp->af;
+	if (sks != skw) {
+		sks->addr[0] = ks->addr[0];
+		sks->addr[1] = ks->addr[1];
+		sks->port[0] = ks->port[0];
+		sks->port[1] = ks->port[1];
+		sks->proto = sp->proto;
+		sks->af = sp->af;
+	}
+
+	/* copy to state */
+	bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
+	st->creation = time_uptime - ntohl(sp->creation);
+	st->expire = time_uptime;
+	if (sp->expire) {
+		uint32_t timeout;
+
+		timeout = r->timeout[sp->timeout];
+		if (!timeout)
+			timeout = V_pf_default_rule.timeout[sp->timeout];
+
+		/* sp->expire may have been adaptively scaled by export. */
+		st->expire -= timeout - ntohl(sp->expire);
+	}
+
+	st->direction = sp->direction;
+	st->log = sp->log;
+	st->timeout = sp->timeout;
+	st->state_flags = sp->state_flags;
+
+	st->id = sp->id;
+	st->creatorid = sp->creatorid;
+	pf_state_peer_ntoh(&sp->src, &st->src);
+	pf_state_peer_ntoh(&sp->dst, &st->dst);
+
+	st->rule.ptr = r;
+	st->nat_rule.ptr = NULL;
+	st->anchor.ptr = NULL;
+	st->rt_kif = NULL;
+
+	st->pfsync_time = time_uptime;
+	st->sync_state = PFSYNC_S_NONE;
+
+	if (!(flags & PFSYNC_SI_IOCTL))
+		st->state_flags |= PFSTATE_NOSYNC;
+
+	if ((error = pf_state_insert(kif, skw, sks, st)) != 0)
+		goto cleanup_state;
+
+	/* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
+	counter_u64_add(r->states_cur, 1);
+	counter_u64_add(r->states_tot, 1);
+
+	if (!(flags & PFSYNC_SI_IOCTL)) {
+		st->state_flags &= ~PFSTATE_NOSYNC;
+		if (st->state_flags & PFSTATE_ACK) {
+			pfsync_q_ins(st, PFSYNC_S_IACK);
+			pfsync_push(sc);
+		}
+	}
+	st->state_flags &= ~PFSTATE_ACK;
+	PF_STATE_UNLOCK(st);
+
+	return (0);
+
+cleanup:
+	error = ENOMEM;
+	if (skw == sks)
+		sks = NULL;
+	if (skw != NULL)
+		uma_zfree(V_pf_state_key_z, skw);
+	if (sks != NULL)
+		uma_zfree(V_pf_state_key_z, sks);
+
+cleanup_state:	/* pf_state_insert() frees the state keys. */
+	if (st) {
+		if (st->dst.scrub)
+			uma_zfree(V_pf_state_scrub_z, st->dst.scrub);
+		if (st->src.scrub)
+			uma_zfree(V_pf_state_scrub_z, st->src.scrub);
+		uma_zfree(V_pf_state_z, st);
+	}
+	return (error);
+}
+
+static int
+pfsync_input(struct mbuf **mp, int *offp __unused, int proto __unused)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+	struct pfsync_pkt pkt;
+	struct mbuf *m = *mp;
+	struct ip *ip = mtod(m, struct ip *);
+	struct pfsync_header *ph;
+	struct pfsync_subheader subh;
+
+	int offset, len;
+	int rv;
+	uint16_t count;
+
+	*mp = NULL;
+	V_pfsyncstats.pfsyncs_ipackets++;
+
+	/* Verify that we have a sync interface configured. */
+	if (!sc || !sc->sc_sync_if || !V_pf_status.running ||
+	    (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
+		goto done;
+
+	/* verify that the packet came in on the right interface */
+	if (sc->sc_sync_if != m->m_pkthdr.rcvif) {
+		V_pfsyncstats.pfsyncs_badif++;
+		goto done;
+	}
+
+	if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1);
+	if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
+	/* verify that the IP TTL is 255. */
+	if (ip->ip_ttl != PFSYNC_DFLTTL) {
+		V_pfsyncstats.pfsyncs_badttl++;
+		goto done;
+	}
+
+	offset = ip->ip_hl << 2;
+	if (m->m_pkthdr.len < offset + sizeof(*ph)) {
+		V_pfsyncstats.pfsyncs_hdrops++;
+		goto done;
+	}
+
+	if (offset + sizeof(*ph) > m->m_len) {
+		if (m_pullup(m, offset + sizeof(*ph)) == NULL) {
+			V_pfsyncstats.pfsyncs_hdrops++;
+			return (IPPROTO_DONE);
+		}
+		ip = mtod(m, struct ip *);
+	}
+	ph = (struct pfsync_header *)((char *)ip + offset);
+
+	/* verify the version */
+	if (ph->version != PFSYNC_VERSION) {
+		V_pfsyncstats.pfsyncs_badver++;
+		goto done;
+	}
+
+	len = ntohs(ph->len) + offset;
+	if (m->m_pkthdr.len < len) {
+		V_pfsyncstats.pfsyncs_badlen++;
+		goto done;
+	}
+
+	/* Cheaper to grab this now than having to mess with mbufs later */
+	pkt.ip = ip;
+	pkt.src = ip->ip_src;
+	pkt.flags = 0;
+
+	/*
+	 * Trusting pf_chksum during packet processing, as well as seeking
+	 * in interface name tree, require holding PF_RULES_RLOCK().
+	 */
+	PF_RULES_RLOCK();
+	if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
+		pkt.flags |= PFSYNC_SI_CKSUM;
+
+	offset += sizeof(*ph);
+	while (offset <= len - sizeof(subh)) {
+		m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
+		offset += sizeof(subh);
+
+		if (subh.action >= PFSYNC_ACT_MAX) {
+			V_pfsyncstats.pfsyncs_badact++;
+			PF_RULES_RUNLOCK();
+			goto done;
+		}
+
+		count = ntohs(subh.count);
+		V_pfsyncstats.pfsyncs_iacts[subh.action] += count;
+		rv = (*pfsync_acts[subh.action])(&pkt, m, offset, count);
+		if (rv == -1) {
+			PF_RULES_RUNLOCK();
+			return (IPPROTO_DONE);
+		}
+
+		offset += rv;
+	}
+	PF_RULES_RUNLOCK();
+
+done:
+	m_freem(m);
+	return (IPPROTO_DONE);
+}
+
+static int
+pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+	struct pfsync_clr *clr;
+	struct mbuf *mp;
+	int len = sizeof(*clr) * count;
+	int i, offp;
+	u_int32_t creatorid;
+
+	mp = m_pulldown(m, offset, len, &offp);
+	if (mp == NULL) {
+		V_pfsyncstats.pfsyncs_badlen++;
+		return (-1);
+	}
+	clr = (struct pfsync_clr *)(mp->m_data + offp);
+
+	for (i = 0; i < count; i++) {
+		creatorid = clr[i].creatorid;
+
+		if (clr[i].ifname[0] != '\0' &&
+		    pfi_kif_find(clr[i].ifname) == NULL)
+			continue;
+
+		for (int i = 0; i <= pf_hashmask; i++) {
+			struct pf_idhash *ih = &V_pf_idhash[i];
+			struct pf_state *s;
+relock:
+			PF_HASHROW_LOCK(ih);
+			LIST_FOREACH(s, &ih->states, entry) {
+				if (s->creatorid == creatorid) {
+					s->state_flags |= PFSTATE_NOSYNC;
+					pf_unlink_state(s, PF_ENTER_LOCKED);
+					goto relock;
+				}
+			}
+			PF_HASHROW_UNLOCK(ih);
+		}
+	}
+
+	return (len);
+}
+
+static int
+pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+	struct mbuf *mp;
+	struct pfsync_state *sa, *sp;
+	int len = sizeof(*sp) * count;
+	int i, offp;
+
+	mp = m_pulldown(m, offset, len, &offp);
+	if (mp == NULL) {
+		V_pfsyncstats.pfsyncs_badlen++;
+		return (-1);
+	}
+	sa = (struct pfsync_state *)(mp->m_data + offp);
+
+	for (i = 0; i < count; i++) {
+		sp = &sa[i];
+
+		/* Check for invalid values. */
+		if (sp->timeout >= PFTM_MAX ||
+		    sp->src.state > PF_TCPS_PROXY_DST ||
+		    sp->dst.state > PF_TCPS_PROXY_DST ||
+		    sp->direction > PF_OUT ||
+		    (sp->af != AF_INET && sp->af != AF_INET6)) {
+			if (V_pf_status.debug >= PF_DEBUG_MISC)
+				printf("%s: invalid value\n", __func__);
+			V_pfsyncstats.pfsyncs_badval++;
+			continue;
+		}
+
+		if (pfsync_state_import(sp, pkt->flags) == ENOMEM)
+			/* Drop out, but process the rest of the actions. */
+			break;
+	}
+
+	return (len);
+}
+
+static int
+pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+	struct pfsync_ins_ack *ia, *iaa;
+	struct pf_state *st;
+
+	struct mbuf *mp;
+	int len = count * sizeof(*ia);
+	int offp, i;
+
+	mp = m_pulldown(m, offset, len, &offp);
+	if (mp == NULL) {
+		V_pfsyncstats.pfsyncs_badlen++;
+		return (-1);
+	}
+	iaa = (struct pfsync_ins_ack *)(mp->m_data + offp);
+
+	for (i = 0; i < count; i++) {
+		ia = &iaa[i];
+
+		st = pf_find_state_byid(ia->id, ia->creatorid);
+		if (st == NULL)
+			continue;
+
+		if (st->state_flags & PFSTATE_ACK) {
+			PFSYNC_LOCK(V_pfsyncif);
+			pfsync_undefer_state(st, 0);
+			PFSYNC_UNLOCK(V_pfsyncif);
+		}
+		PF_STATE_UNLOCK(st);
+	}
+	/*
+	 * XXX this is not yet implemented, but we know the size of the
+	 * message so we can skip it.
+	 */
+
+	return (count * sizeof(struct pfsync_ins_ack));
+}
+
+static int
+pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src,
+    struct pfsync_state_peer *dst)
+{
+	int sync = 0;
+
+	PF_STATE_LOCK_ASSERT(st);
+
+	/*
+	 * The state should never go backwards except
+	 * for syn-proxy states.  Neither should the
+	 * sequence window slide backwards.
+	 */
+	if ((st->src.state > src->state &&
+	    (st->src.state < PF_TCPS_PROXY_SRC ||
+	    src->state >= PF_TCPS_PROXY_SRC)) ||
+
+	    (st->src.state == src->state &&
+	    SEQ_GT(st->src.seqlo, ntohl(src->seqlo))))
+		sync++;
+	else
+		pf_state_peer_ntoh(src, &st->src);
+
+	if ((st->dst.state > dst->state) ||
+
+	    (st->dst.state >= TCPS_SYN_SENT &&
+	    SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo))))
+		sync++;
+	else
+		pf_state_peer_ntoh(dst, &st->dst);
+
+	return (sync);
+}
+
+static int
+pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+	struct pfsync_state *sa, *sp;
+	struct pf_state *st;
+	int sync;
+
+	struct mbuf *mp;
+	int len = count * sizeof(*sp);
+	int offp, i;
+
+	mp = m_pulldown(m, offset, len, &offp);
+	if (mp == NULL) {
+		V_pfsyncstats.pfsyncs_badlen++;
+		return (-1);
+	}
+	sa = (struct pfsync_state *)(mp->m_data + offp);
+
+	for (i = 0; i < count; i++) {
+		sp = &sa[i];
+
+		/* check for invalid values */
+		if (sp->timeout >= PFTM_MAX ||
+		    sp->src.state > PF_TCPS_PROXY_DST ||
+		    sp->dst.state > PF_TCPS_PROXY_DST) {
+			if (V_pf_status.debug >= PF_DEBUG_MISC) {
+				printf("pfsync_input: PFSYNC_ACT_UPD: "
+				    "invalid value\n");
+			}
+			V_pfsyncstats.pfsyncs_badval++;
+			continue;
+		}
+
+		st = pf_find_state_byid(sp->id, sp->creatorid);
+		if (st == NULL) {
+			/* insert the update */
+			if (pfsync_state_import(sp, 0))
+				V_pfsyncstats.pfsyncs_badstate++;
+			continue;
+		}
+
+		if (st->state_flags & PFSTATE_ACK) {
+			PFSYNC_LOCK(sc);
+			pfsync_undefer_state(st, 1);
+			PFSYNC_UNLOCK(sc);
+		}
+
+		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
+			sync = pfsync_upd_tcp(st, &sp->src, &sp->dst);
+		else {
+			sync = 0;
+
+			/*
+			 * Non-TCP protocol state machine always go
+			 * forwards
+			 */
+			if (st->src.state > sp->src.state)
+				sync++;
+			else
+				pf_state_peer_ntoh(&sp->src, &st->src);
+			if (st->dst.state > sp->dst.state)
+				sync++;
+			else
+				pf_state_peer_ntoh(&sp->dst, &st->dst);
+		}
+		if (sync < 2) {
+			pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
+			pf_state_peer_ntoh(&sp->dst, &st->dst);
+			st->expire = time_uptime;
+			st->timeout = sp->timeout;
+		}
+		st->pfsync_time = time_uptime;
+
+		if (sync) {
+			V_pfsyncstats.pfsyncs_stale++;
+
+			pfsync_update_state(st);
+			PF_STATE_UNLOCK(st);
+			PFSYNC_LOCK(sc);
+			pfsync_push(sc);
+			PFSYNC_UNLOCK(sc);
+			continue;
+		}
+		PF_STATE_UNLOCK(st);
+	}
+
+	return (len);
+}
+
+static int
+pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+	struct pfsync_upd_c *ua, *up;
+	struct pf_state *st;
+	int len = count * sizeof(*up);
+	int sync;
+	struct mbuf *mp;
+	int offp, i;
+
+	mp = m_pulldown(m, offset, len, &offp);
+	if (mp == NULL) {
+		V_pfsyncstats.pfsyncs_badlen++;
+		return (-1);
+	}
+	ua = (struct pfsync_upd_c *)(mp->m_data + offp);
+
+	for (i = 0; i < count; i++) {
+		up = &ua[i];
+
+		/* check for invalid values */
+		if (up->timeout >= PFTM_MAX ||
+		    up->src.state > PF_TCPS_PROXY_DST ||
+		    up->dst.state > PF_TCPS_PROXY_DST) {
+			if (V_pf_status.debug >= PF_DEBUG_MISC) {
+				printf("pfsync_input: "
+				    "PFSYNC_ACT_UPD_C: "
+				    "invalid value\n");
+			}
+			V_pfsyncstats.pfsyncs_badval++;
+			continue;
+		}
+
+		st = pf_find_state_byid(up->id, up->creatorid);
+		if (st == NULL) {
+			/* We don't have this state. Ask for it. */
+			PFSYNC_LOCK(sc);
+			pfsync_request_update(up->creatorid, up->id);
+			PFSYNC_UNLOCK(sc);
+			continue;
+		}
+
+		if (st->state_flags & PFSTATE_ACK) {
+			PFSYNC_LOCK(sc);
+			pfsync_undefer_state(st, 1);
+			PFSYNC_UNLOCK(sc);
+		}
+
+		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
+			sync = pfsync_upd_tcp(st, &up->src, &up->dst);
+		else {
+			sync = 0;
+
+			/*
+			 * Non-TCP protocol state machine always go
+			 * forwards
+			 */
+			if (st->src.state > up->src.state)
+				sync++;
+			else
+				pf_state_peer_ntoh(&up->src, &st->src);
+			if (st->dst.state > up->dst.state)
+				sync++;
+			else
+				pf_state_peer_ntoh(&up->dst, &st->dst);
+		}
+		if (sync < 2) {
+			pfsync_alloc_scrub_memory(&up->dst, &st->dst);
+			pf_state_peer_ntoh(&up->dst, &st->dst);
+			st->expire = time_uptime;
+			st->timeout = up->timeout;
+		}
+		st->pfsync_time = time_uptime;
+
+		if (sync) {
+			V_pfsyncstats.pfsyncs_stale++;
+
+			pfsync_update_state(st);
+			PF_STATE_UNLOCK(st);
+			PFSYNC_LOCK(sc);
+			pfsync_push(sc);
+			PFSYNC_UNLOCK(sc);
+			continue;
+		}
+		PF_STATE_UNLOCK(st);
+	}
+
+	return (len);
+}
+
+static int
+pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+	struct pfsync_upd_req *ur, *ura;
+	struct mbuf *mp;
+	int len = count * sizeof(*ur);
+	int i, offp;
+
+	struct pf_state *st;
+
+	mp = m_pulldown(m, offset, len, &offp);
+	if (mp == NULL) {
+		V_pfsyncstats.pfsyncs_badlen++;
+		return (-1);
+	}
+	ura = (struct pfsync_upd_req *)(mp->m_data + offp);
+
+	for (i = 0; i < count; i++) {
+		ur = &ura[i];
+
+		if (ur->id == 0 && ur->creatorid == 0)
+			pfsync_bulk_start();
+		else {
+			st = pf_find_state_byid(ur->id, ur->creatorid);
+			if (st == NULL) {
+				V_pfsyncstats.pfsyncs_badstate++;
+				continue;
+			}
+			if (st->state_flags & PFSTATE_NOSYNC) {
+				PF_STATE_UNLOCK(st);
+				continue;
+			}
+
+			pfsync_update_state_req(st);
+			PF_STATE_UNLOCK(st);
+		}
+	}
+
+	return (len);
+}
+
+static int
+pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+	struct mbuf *mp;
+	struct pfsync_state *sa, *sp;
+	struct pf_state *st;
+	int len = count * sizeof(*sp);
+	int offp, i;
+
+	mp = m_pulldown(m, offset, len, &offp);
+	if (mp == NULL) {
+		V_pfsyncstats.pfsyncs_badlen++;
+		return (-1);
+	}
+	sa = (struct pfsync_state *)(mp->m_data + offp);
+
+	for (i = 0; i < count; i++) {
+		sp = &sa[i];
+
+		st = pf_find_state_byid(sp->id, sp->creatorid);
+		if (st == NULL) {
+			V_pfsyncstats.pfsyncs_badstate++;
+			continue;
+		}
+		st->state_flags |= PFSTATE_NOSYNC;
+		pf_unlink_state(st, PF_ENTER_LOCKED);
+	}
+
+	return (len);
+}
+
+static int
+pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+	struct mbuf *mp;
+	struct pfsync_del_c *sa, *sp;
+	struct pf_state *st;
+	int len = count * sizeof(*sp);
+	int offp, i;
+
+	mp = m_pulldown(m, offset, len, &offp);
+	if (mp == NULL) {
+		V_pfsyncstats.pfsyncs_badlen++;
+		return (-1);
+	}
+	sa = (struct pfsync_del_c *)(mp->m_data + offp);
+
+	for (i = 0; i < count; i++) {
+		sp = &sa[i];
+
+		st = pf_find_state_byid(sp->id, sp->creatorid);
+		if (st == NULL) {
+			V_pfsyncstats.pfsyncs_badstate++;
+			continue;
+		}
+
+		st->state_flags |= PFSTATE_NOSYNC;
+		pf_unlink_state(st, PF_ENTER_LOCKED);
+	}
+
+	return (len);
+}
+
+static int
+pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+	struct pfsync_bus *bus;
+	struct mbuf *mp;
+	int len = count * sizeof(*bus);
+	int offp;
+
+	PFSYNC_BLOCK(sc);
+
+	/* If we're not waiting for a bulk update, who cares. */
+	if (sc->sc_ureq_sent == 0) {
+		PFSYNC_BUNLOCK(sc);
+		return (len);
+	}
+
+	mp = m_pulldown(m, offset, len, &offp);
+	if (mp == NULL) {
+		PFSYNC_BUNLOCK(sc);
+		V_pfsyncstats.pfsyncs_badlen++;
+		return (-1);
+	}
+	bus = (struct pfsync_bus *)(mp->m_data + offp);
+
+	switch (bus->status) {
+	case PFSYNC_BUS_START:
+		callout_reset(&sc->sc_bulkfail_tmo, 4 * hz +
+		    V_pf_limits[PF_LIMIT_STATES].limit /
+		    ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) /
+		    sizeof(struct pfsync_state)),
+		    pfsync_bulk_fail, sc);
+		if (V_pf_status.debug >= PF_DEBUG_MISC)
+			printf("pfsync: received bulk update start\n");
+		break;
+
+	case PFSYNC_BUS_END:
+		if (time_uptime - ntohl(bus->endtime) >=
+		    sc->sc_ureq_sent) {
+			/* that's it, we're happy */
+			sc->sc_ureq_sent = 0;
+			sc->sc_bulk_tries = 0;
+			callout_stop(&sc->sc_bulkfail_tmo);
+			if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
+				(*carp_demote_adj_p)(-V_pfsync_carp_adj,
+				    "pfsync bulk done");
+			sc->sc_flags |= PFSYNCF_OK;
+			if (V_pf_status.debug >= PF_DEBUG_MISC)
+				printf("pfsync: received valid "
+				    "bulk update end\n");
+		} else {
+			if (V_pf_status.debug >= PF_DEBUG_MISC)
+				printf("pfsync: received invalid "
+				    "bulk update end: bad timestamp\n");
+		}
+		break;
+	}
+	PFSYNC_BUNLOCK(sc);
+
+	return (len);
+}
+
+static int
+pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+	int len = count * sizeof(struct pfsync_tdb);
+
+#if defined(IPSEC)
+	struct pfsync_tdb *tp;
+	struct mbuf *mp;
+	int offp;
+	int i;
+	int s;
+
+	mp = m_pulldown(m, offset, len, &offp);
+	if (mp == NULL) {
+		V_pfsyncstats.pfsyncs_badlen++;
+		return (-1);
+	}
+	tp = (struct pfsync_tdb *)(mp->m_data + offp);
+
+	for (i = 0; i < count; i++)
+		pfsync_update_net_tdb(&tp[i]);
+#endif
+
+	return (len);
+}
+
+#if defined(IPSEC)
+/* Update an in-kernel tdb. Silently fail if no tdb is found. */
+static void
+pfsync_update_net_tdb(struct pfsync_tdb *pt)
+{
+	struct tdb		*tdb;
+	int			 s;
+
+	/* check for invalid values */
+	if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
+	    (pt->dst.sa.sa_family != AF_INET &&
+	    pt->dst.sa.sa_family != AF_INET6))
+		goto bad;
+
+	tdb = gettdb(pt->spi, &pt->dst, pt->sproto);
+	if (tdb) {
+		pt->rpl = ntohl(pt->rpl);
+		pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes);
+
+		/* Neither replay nor byte counter should ever decrease. */
+		if (pt->rpl < tdb->tdb_rpl ||
+		    pt->cur_bytes < tdb->tdb_cur_bytes) {
+			goto bad;
+		}
+
+		tdb->tdb_rpl = pt->rpl;
+		tdb->tdb_cur_bytes = pt->cur_bytes;
+	}
+	return;
+
+bad:
+	if (V_pf_status.debug >= PF_DEBUG_MISC)
+		printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: "
+		    "invalid value\n");
+	V_pfsyncstats.pfsyncs_badstate++;
+	return;
+}
+#endif
+
+
+static int
+pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+	/* check if we are at the right place in the packet */
+	if (offset != m->m_pkthdr.len)
+		V_pfsyncstats.pfsyncs_badlen++;
+
+	/* we're done. free and let the caller return */
+	m_freem(m);
+	return (-1);
+}
+
+static int
+pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+	V_pfsyncstats.pfsyncs_badact++;
+
+	m_freem(m);
+	return (-1);
+}
+
+static int
+pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+	struct route *rt)
+{
+	m_freem(m);
+	return (0);
+}
+
+/* ARGSUSED */
+static int
+pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+	struct pfsync_softc *sc = ifp->if_softc;
+	struct ifreq *ifr = (struct ifreq *)data;
+	struct pfsyncreq pfsyncr;
+	int error;
+
+	switch (cmd) {
+	case SIOCSIFFLAGS:
+		PFSYNC_LOCK(sc);
+		if (ifp->if_flags & IFF_UP) {
+			ifp->if_drv_flags |= IFF_DRV_RUNNING;
+			PFSYNC_UNLOCK(sc);
+			pfsync_pointers_init();
+		} else {
+			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+			PFSYNC_UNLOCK(sc);
+			pfsync_pointers_uninit();
+		}
+		break;
+	case SIOCSIFMTU:
+		if (!sc->sc_sync_if ||
+		    ifr->ifr_mtu <= PFSYNC_MINPKT ||
+		    ifr->ifr_mtu > sc->sc_sync_if->if_mtu)
+			return (EINVAL);
+		if (ifr->ifr_mtu < ifp->if_mtu) {
+			PFSYNC_LOCK(sc);
+			if (sc->sc_len > PFSYNC_MINPKT)
+				pfsync_sendout(1);
+			PFSYNC_UNLOCK(sc);
+		}
+		ifp->if_mtu = ifr->ifr_mtu;
+		break;
+	case SIOCGETPFSYNC:
+		bzero(&pfsyncr, sizeof(pfsyncr));
+		PFSYNC_LOCK(sc);
+		if (sc->sc_sync_if) {
+			strlcpy(pfsyncr.pfsyncr_syncdev,
+			    sc->sc_sync_if->if_xname, IFNAMSIZ);
+		}
+		pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
+		pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
+		pfsyncr.pfsyncr_defer = (PFSYNCF_DEFER ==
+		    (sc->sc_flags & PFSYNCF_DEFER));
+		PFSYNC_UNLOCK(sc);
+		return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)));
+
+	case SIOCSETPFSYNC:
+	    {
+		struct ip_moptions *imo = &sc->sc_imo;
+		struct ifnet *sifp;
+		struct ip *ip;
+		void *mship = NULL;
+
+		if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0)
+			return (error);
+		if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
+			return (error);
+
+		if (pfsyncr.pfsyncr_maxupdates > 255)
+			return (EINVAL);
+
+		if (pfsyncr.pfsyncr_syncdev[0] == 0)
+			sifp = NULL;
+		else if ((sifp = ifunit_ref(pfsyncr.pfsyncr_syncdev)) == NULL)
+			return (EINVAL);
+
+		if (sifp != NULL && (
+		    pfsyncr.pfsyncr_syncpeer.s_addr == 0 ||
+		    pfsyncr.pfsyncr_syncpeer.s_addr ==
+		    htonl(INADDR_PFSYNC_GROUP)))
+			mship = malloc((sizeof(struct in_multi *) *
+			    IP_MIN_MEMBERSHIPS), M_PFSYNC, M_WAITOK | M_ZERO);
+
+		PFSYNC_LOCK(sc);
+		if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
+			sc->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP);
+		else
+			sc->sc_sync_peer.s_addr =
+			    pfsyncr.pfsyncr_syncpeer.s_addr;
+
+		sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
+		if (pfsyncr.pfsyncr_defer) {
+			sc->sc_flags |= PFSYNCF_DEFER;
+			pfsync_defer_ptr = pfsync_defer;
+		} else {
+			sc->sc_flags &= ~PFSYNCF_DEFER;
+			pfsync_defer_ptr = NULL;
+		}
+
+		if (sifp == NULL) {
+			if (sc->sc_sync_if)
+				if_rele(sc->sc_sync_if);
+			sc->sc_sync_if = NULL;
+			if (imo->imo_membership)
+				pfsync_multicast_cleanup(sc);
+			PFSYNC_UNLOCK(sc);
+			break;
+		}
+
+		if (sc->sc_len > PFSYNC_MINPKT &&
+		    (sifp->if_mtu < sc->sc_ifp->if_mtu ||
+		    (sc->sc_sync_if != NULL &&
+		    sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
+		    sifp->if_mtu < MCLBYTES - sizeof(struct ip)))
+			pfsync_sendout(1);
+
+		if (imo->imo_membership)
+			pfsync_multicast_cleanup(sc);
+
+		if (sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) {
+			error = pfsync_multicast_setup(sc, sifp, mship);
+			if (error) {
+				if_rele(sifp);
+				free(mship, M_PFSYNC);
+				return (error);
+			}
+		}
+		if (sc->sc_sync_if)
+			if_rele(sc->sc_sync_if);
+		sc->sc_sync_if = sifp;
+
+		ip = &sc->sc_template;
+		bzero(ip, sizeof(*ip));
+		ip->ip_v = IPVERSION;
+		ip->ip_hl = sizeof(sc->sc_template) >> 2;
+		ip->ip_tos = IPTOS_LOWDELAY;
+		/* len and id are set later. */
+		ip->ip_off = htons(IP_DF);
+		ip->ip_ttl = PFSYNC_DFLTTL;
+		ip->ip_p = IPPROTO_PFSYNC;
+		ip->ip_src.s_addr = INADDR_ANY;
+		ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr;
+
+		/* Request a full state table update. */
+		if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
+			(*carp_demote_adj_p)(V_pfsync_carp_adj,
+			    "pfsync bulk start");
+		sc->sc_flags &= ~PFSYNCF_OK;
+		if (V_pf_status.debug >= PF_DEBUG_MISC)
+			printf("pfsync: requesting bulk update\n");
+		pfsync_request_update(0, 0);
+		PFSYNC_UNLOCK(sc);
+		PFSYNC_BLOCK(sc);
+		sc->sc_ureq_sent = time_uptime;
+		callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail,
+		    sc);
+		PFSYNC_BUNLOCK(sc);
+
+		break;
+	    }
+	default:
+		return (ENOTTY);
+	}
+
+	return (0);
+}
+
+static void
+pfsync_out_state(struct pf_state *st, void *buf)
+{
+	struct pfsync_state *sp = buf;
+
+	pfsync_state_export(sp, st);
+}
+
+static void
+pfsync_out_iack(struct pf_state *st, void *buf)
+{
+	struct pfsync_ins_ack *iack = buf;
+
+	iack->id = st->id;
+	iack->creatorid = st->creatorid;
+}
+
+static void
+pfsync_out_upd_c(struct pf_state *st, void *buf)
+{
+	struct pfsync_upd_c *up = buf;
+
+	bzero(up, sizeof(*up));
+	up->id = st->id;
+	pf_state_peer_hton(&st->src, &up->src);
+	pf_state_peer_hton(&st->dst, &up->dst);
+	up->creatorid = st->creatorid;
+	up->timeout = st->timeout;
+}
+
+static void
+pfsync_out_del(struct pf_state *st, void *buf)
+{
+	struct pfsync_del_c *dp = buf;
+
+	dp->id = st->id;
+	dp->creatorid = st->creatorid;
+	st->state_flags |= PFSTATE_NOSYNC;
+}
+
+static void
+pfsync_drop(struct pfsync_softc *sc)
+{
+	struct pf_state *st, *next;
+	struct pfsync_upd_req_item *ur;
+	int q;
+
+	for (q = 0; q < PFSYNC_S_COUNT; q++) {
+		if (TAILQ_EMPTY(&sc->sc_qs[q]))
+			continue;
+
+		TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, next) {
+			KASSERT(st->sync_state == q,
+				("%s: st->sync_state == q",
+					__func__));
+			st->sync_state = PFSYNC_S_NONE;
+			pf_release_state(st);
+		}
+		TAILQ_INIT(&sc->sc_qs[q]);
+	}
+
+	while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
+		TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
+		free(ur, M_PFSYNC);
+	}
+
+	sc->sc_plus = NULL;
+	sc->sc_len = PFSYNC_MINPKT;
+}
+
+static void
+pfsync_sendout(int schedswi)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+	struct ifnet *ifp = sc->sc_ifp;
+	struct mbuf *m;
+	struct ip *ip;
+	struct pfsync_header *ph;
+	struct pfsync_subheader *subh;
+	struct pf_state *st;
+	struct pfsync_upd_req_item *ur;
+	int offset;
+	int q, count = 0;
+
+	KASSERT(sc != NULL, ("%s: null sc", __func__));
+	KASSERT(sc->sc_len > PFSYNC_MINPKT,
+	    ("%s: sc_len %zu", __func__, sc->sc_len));
+	PFSYNC_LOCK_ASSERT(sc);
+
+	if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) {
+		pfsync_drop(sc);
+		return;
+	}
+
+	m = m_get2(max_linkhdr + sc->sc_len, M_NOWAIT, MT_DATA, M_PKTHDR);
+	if (m == NULL) {
+		if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
+		V_pfsyncstats.pfsyncs_onomem++;
+		return;
+	}
+	m->m_data += max_linkhdr;
+	m->m_len = m->m_pkthdr.len = sc->sc_len;
+
+	/* build the ip header */
+	ip = (struct ip *)m->m_data;
+	bcopy(&sc->sc_template, ip, sizeof(*ip));
+	offset = sizeof(*ip);
+
+	ip->ip_len = htons(m->m_pkthdr.len);
+	ip_fillid(ip);
+
+	/* build the pfsync header */
+	ph = (struct pfsync_header *)(m->m_data + offset);
+	bzero(ph, sizeof(*ph));
+	offset += sizeof(*ph);
+
+	ph->version = PFSYNC_VERSION;
+	ph->len = htons(sc->sc_len - sizeof(*ip));
+	bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
+
+	/* walk the queues */
+	for (q = 0; q < PFSYNC_S_COUNT; q++) {
+		if (TAILQ_EMPTY(&sc->sc_qs[q]))
+			continue;
+
+		subh = (struct pfsync_subheader *)(m->m_data + offset);
+		offset += sizeof(*subh);
+
+		count = 0;
+		TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
+			KASSERT(st->sync_state == q,
+				("%s: st->sync_state == q",
+					__func__));
+			/*
+			 * XXXGL: some of write methods do unlocked reads
+			 * of state data :(
+			 */
+			pfsync_qs[q].write(st, m->m_data + offset);
+			offset += pfsync_qs[q].len;
+			st->sync_state = PFSYNC_S_NONE;
+			pf_release_state(st);
+			count++;
+		}
+		TAILQ_INIT(&sc->sc_qs[q]);
+
+		bzero(subh, sizeof(*subh));
+		subh->action = pfsync_qs[q].action;
+		subh->count = htons(count);
+		V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count;
+	}
+
+	if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) {
+		subh = (struct pfsync_subheader *)(m->m_data + offset);
+		offset += sizeof(*subh);
+
+		count = 0;
+		while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
+			TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
+
+			bcopy(&ur->ur_msg, m->m_data + offset,
+			    sizeof(ur->ur_msg));
+			offset += sizeof(ur->ur_msg);
+			free(ur, M_PFSYNC);
+			count++;
+		}
+
+		bzero(subh, sizeof(*subh));
+		subh->action = PFSYNC_ACT_UPD_REQ;
+		subh->count = htons(count);
+		V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count;
+	}
+
+	/* has someone built a custom region for us to add? */
+	if (sc->sc_plus != NULL) {
+		bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen);
+		offset += sc->sc_pluslen;
+
+		sc->sc_plus = NULL;
+	}
+
+	subh = (struct pfsync_subheader *)(m->m_data + offset);
+	offset += sizeof(*subh);
+
+	bzero(subh, sizeof(*subh));
+	subh->action = PFSYNC_ACT_EOF;
+	subh->count = htons(1);
+	V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++;
+
+	/* we're done, let's put it on the wire */
+	if (ifp->if_bpf) {
+		m->m_data += sizeof(*ip);
+		m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip);
+		BPF_MTAP(ifp, m);
+		m->m_data -= sizeof(*ip);
+		m->m_len = m->m_pkthdr.len = sc->sc_len;
+	}
+
+	if (sc->sc_sync_if == NULL) {
+		sc->sc_len = PFSYNC_MINPKT;
+		m_freem(m);
+		return;
+	}
+
+	if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1);
+	if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
+	sc->sc_len = PFSYNC_MINPKT;
+
+	if (!_IF_QFULL(&sc->sc_ifp->if_snd))
+		_IF_ENQUEUE(&sc->sc_ifp->if_snd, m);
+	else {
+		m_freem(m);
+		if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1);
+	}
+	if (schedswi)
+		swi_sched(V_pfsync_swi_cookie, 0);
+}
+
+static void
+pfsync_insert_state(struct pf_state *st)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+
+	if (st->state_flags & PFSTATE_NOSYNC)
+		return;
+
+	if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) ||
+	    st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) {
+		st->state_flags |= PFSTATE_NOSYNC;
+		return;
+	}
+
+	KASSERT(st->sync_state == PFSYNC_S_NONE,
+		("%s: st->sync_state %u", __func__, st->sync_state));
+
+	PFSYNC_LOCK(sc);
+	if (sc->sc_len == PFSYNC_MINPKT)
+		callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif);
+
+	pfsync_q_ins(st, PFSYNC_S_INS);
+	PFSYNC_UNLOCK(sc);
+
+	st->sync_updates = 0;
+}
+
+static int
+pfsync_defer(struct pf_state *st, struct mbuf *m)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+	struct pfsync_deferral *pd;
+
+	if (m->m_flags & (M_BCAST|M_MCAST))
+		return (0);
+
+	PFSYNC_LOCK(sc);
+
+	if (sc == NULL || !(sc->sc_ifp->if_flags & IFF_DRV_RUNNING) ||
+	    !(sc->sc_flags & PFSYNCF_DEFER)) {
+		PFSYNC_UNLOCK(sc);
+		return (0);
+	}
+
+	 if (sc->sc_deferred >= 128)
+		pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0);
+
+	pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT);
+	if (pd == NULL)
+		return (0);
+	sc->sc_deferred++;
+
+	m->m_flags |= M_SKIP_FIREWALL;
+	st->state_flags |= PFSTATE_ACK;
+
+	pd->pd_sc = sc;
+	pd->pd_refs = 0;
+	pd->pd_st = st;
+	pf_ref_state(st);
+	pd->pd_m = m;
+
+	TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
+	callout_init_mtx(&pd->pd_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
+	callout_reset(&pd->pd_tmo, 10, pfsync_defer_tmo, pd);
+
+	pfsync_push(sc);
+
+	return (1);
+}
+
+static void
+pfsync_undefer(struct pfsync_deferral *pd, int drop)
+{
+	struct pfsync_softc *sc = pd->pd_sc;
+	struct mbuf *m = pd->pd_m;
+	struct pf_state *st = pd->pd_st;
+
+	PFSYNC_LOCK_ASSERT(sc);
+
+	TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
+	sc->sc_deferred--;
+	pd->pd_st->state_flags &= ~PFSTATE_ACK;	/* XXX: locking! */
+	free(pd, M_PFSYNC);
+	pf_release_state(st);
+
+	if (drop)
+		m_freem(m);
+	else {
+		_IF_ENQUEUE(&sc->sc_ifp->if_snd, m);
+		pfsync_push(sc);
+	}
+}
+
+static void
+pfsync_defer_tmo(void *arg)
+{
+	struct pfsync_deferral *pd = arg;
+	struct pfsync_softc *sc = pd->pd_sc;
+	struct mbuf *m = pd->pd_m;
+	struct pf_state *st = pd->pd_st;
+
+	PFSYNC_LOCK_ASSERT(sc);
+
+	CURVNET_SET(m->m_pkthdr.rcvif->if_vnet);
+
+	TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
+	sc->sc_deferred--;
+	pd->pd_st->state_flags &= ~PFSTATE_ACK;	/* XXX: locking! */
+	if (pd->pd_refs == 0)
+		free(pd, M_PFSYNC);
+	PFSYNC_UNLOCK(sc);
+
+	ip_output(m, NULL, NULL, 0, NULL, NULL);
+
+	pf_release_state(st);
+
+	CURVNET_RESTORE();
+}
+
+static void
+pfsync_undefer_state(struct pf_state *st, int drop)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+	struct pfsync_deferral *pd;
+
+	PFSYNC_LOCK_ASSERT(sc);
+
+	TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) {
+		 if (pd->pd_st == st) {
+			if (callout_stop(&pd->pd_tmo) > 0)
+				pfsync_undefer(pd, drop);
+			return;
+		}
+	}
+
+	panic("%s: unable to find deferred state", __func__);
+}
+
+static void
+pfsync_update_state(struct pf_state *st)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+	int sync = 0;
+
+	PF_STATE_LOCK_ASSERT(st);
+	PFSYNC_LOCK(sc);
+
+	if (st->state_flags & PFSTATE_ACK)
+		pfsync_undefer_state(st, 0);
+	if (st->state_flags & PFSTATE_NOSYNC) {
+		if (st->sync_state != PFSYNC_S_NONE)
+			pfsync_q_del(st);
+		PFSYNC_UNLOCK(sc);
+		return;
+	}
+
+	if (sc->sc_len == PFSYNC_MINPKT)
+		callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif);
+
+	switch (st->sync_state) {
+	case PFSYNC_S_UPD_C:
+	case PFSYNC_S_UPD:
+	case PFSYNC_S_INS:
+		/* we're already handling it */
+
+		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
+			st->sync_updates++;
+			if (st->sync_updates >= sc->sc_maxupdates)
+				sync = 1;
+		}
+		break;
+
+	case PFSYNC_S_IACK:
+		pfsync_q_del(st);
+	case PFSYNC_S_NONE:
+		pfsync_q_ins(st, PFSYNC_S_UPD_C);
+		st->sync_updates = 0;
+		break;
+
+	default:
+		panic("%s: unexpected sync state %d", __func__, st->sync_state);
+	}
+
+	if (sync || (time_uptime - st->pfsync_time) < 2)
+		pfsync_push(sc);
+
+	PFSYNC_UNLOCK(sc);
+}
+
+static void
+pfsync_request_update(u_int32_t creatorid, u_int64_t id)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+	struct pfsync_upd_req_item *item;
+	size_t nlen = sizeof(struct pfsync_upd_req);
+
+	PFSYNC_LOCK_ASSERT(sc);
+
+	/*
+	 * This code does a bit to prevent multiple update requests for the
+	 * same state being generated. It searches current subheader queue,
+	 * but it doesn't lookup into queue of already packed datagrams.
+	 */
+	TAILQ_FOREACH(item, &sc->sc_upd_req_list, ur_entry)
+		if (item->ur_msg.id == id &&
+		    item->ur_msg.creatorid == creatorid)
+			return;
+
+	item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT);
+	if (item == NULL)
+		return; /* XXX stats */
+
+	item->ur_msg.id = id;
+	item->ur_msg.creatorid = creatorid;
+
+	if (TAILQ_EMPTY(&sc->sc_upd_req_list))
+		nlen += sizeof(struct pfsync_subheader);
+
+	if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) {
+		pfsync_sendout(1);
+
+		nlen = sizeof(struct pfsync_subheader) +
+		    sizeof(struct pfsync_upd_req);
+	}
+
+	TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry);
+	sc->sc_len += nlen;
+}
+
+static void
+pfsync_update_state_req(struct pf_state *st)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+
+	PF_STATE_LOCK_ASSERT(st);
+	PFSYNC_LOCK(sc);
+
+	if (st->state_flags & PFSTATE_NOSYNC) {
+		if (st->sync_state != PFSYNC_S_NONE)
+			pfsync_q_del(st);
+		PFSYNC_UNLOCK(sc);
+		return;
+	}
+
+	switch (st->sync_state) {
+	case PFSYNC_S_UPD_C:
+	case PFSYNC_S_IACK:
+		pfsync_q_del(st);
+	case PFSYNC_S_NONE:
+		pfsync_q_ins(st, PFSYNC_S_UPD);
+		pfsync_push(sc);
+		break;
+
+	case PFSYNC_S_INS:
+	case PFSYNC_S_UPD:
+	case PFSYNC_S_DEL:
+		/* we're already handling it */
+		break;
+
+	default:
+		panic("%s: unexpected sync state %d", __func__, st->sync_state);
+	}
+
+	PFSYNC_UNLOCK(sc);
+}
+
+static void
+pfsync_delete_state(struct pf_state *st)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+
+	PFSYNC_LOCK(sc);
+	if (st->state_flags & PFSTATE_ACK)
+		pfsync_undefer_state(st, 1);
+	if (st->state_flags & PFSTATE_NOSYNC) {
+		if (st->sync_state != PFSYNC_S_NONE)
+			pfsync_q_del(st);
+		PFSYNC_UNLOCK(sc);
+		return;
+	}
+
+	if (sc->sc_len == PFSYNC_MINPKT)
+		callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif);
+
+	switch (st->sync_state) {
+	case PFSYNC_S_INS:
+		/* We never got to tell the world so just forget about it. */
+		pfsync_q_del(st);
+		break;
+
+	case PFSYNC_S_UPD_C:
+	case PFSYNC_S_UPD:
+	case PFSYNC_S_IACK:
+		pfsync_q_del(st);
+		/* FALLTHROUGH to putting it on the del list */
+
+	case PFSYNC_S_NONE:
+		pfsync_q_ins(st, PFSYNC_S_DEL);
+		break;
+
+	default:
+		panic("%s: unexpected sync state %d", __func__, st->sync_state);
+	}
+	PFSYNC_UNLOCK(sc);
+}
+
+static void
+pfsync_clear_states(u_int32_t creatorid, const char *ifname)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+	struct {
+		struct pfsync_subheader subh;
+		struct pfsync_clr clr;
+	} __packed r;
+
+	bzero(&r, sizeof(r));
+
+	r.subh.action = PFSYNC_ACT_CLR;
+	r.subh.count = htons(1);
+	V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++;
+
+	strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
+	r.clr.creatorid = creatorid;
+
+	PFSYNC_LOCK(sc);
+	pfsync_send_plus(&r, sizeof(r));
+	PFSYNC_UNLOCK(sc);
+}
+
+static void
+pfsync_q_ins(struct pf_state *st, int q)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+	size_t nlen = pfsync_qs[q].len;
+
+	PFSYNC_LOCK_ASSERT(sc);
+
+	KASSERT(st->sync_state == PFSYNC_S_NONE,
+		("%s: st->sync_state %u", __func__, st->sync_state));
+	KASSERT(sc->sc_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu",
+	    sc->sc_len));
+
+	if (TAILQ_EMPTY(&sc->sc_qs[q]))
+		nlen += sizeof(struct pfsync_subheader);
+
+	if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) {
+		pfsync_sendout(1);
+
+		nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
+	}
+
+	sc->sc_len += nlen;
+	TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list);
+	st->sync_state = q;
+	pf_ref_state(st);
+}
+
+static void
+pfsync_q_del(struct pf_state *st)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+	int q = st->sync_state;
+
+	PFSYNC_LOCK_ASSERT(sc);
+	KASSERT(st->sync_state != PFSYNC_S_NONE,
+		("%s: st->sync_state != PFSYNC_S_NONE", __func__));
+
+	sc->sc_len -= pfsync_qs[q].len;
+	TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
+	st->sync_state = PFSYNC_S_NONE;
+	pf_release_state(st);
+
+	if (TAILQ_EMPTY(&sc->sc_qs[q]))
+		sc->sc_len -= sizeof(struct pfsync_subheader);
+}
+
+static void
+pfsync_bulk_start(void)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+
+	if (V_pf_status.debug >= PF_DEBUG_MISC)
+		printf("pfsync: received bulk update request\n");
+
+	PFSYNC_BLOCK(sc);
+
+	sc->sc_ureq_received = time_uptime;
+	sc->sc_bulk_hashid = 0;
+	sc->sc_bulk_stateid = 0;
+	pfsync_bulk_status(PFSYNC_BUS_START);
+	callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc);
+	PFSYNC_BUNLOCK(sc);
+}
+
+static void
+pfsync_bulk_update(void *arg)
+{
+	struct pfsync_softc *sc = arg;
+	struct pf_state *s;
+	int i, sent = 0;
+
+	PFSYNC_BLOCK_ASSERT(sc);
+	CURVNET_SET(sc->sc_ifp->if_vnet);
+
+	/*
+	 * Start with last state from previous invocation.
+	 * It may had gone, in this case start from the
+	 * hash slot.
+	 */
+	s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid);
+
+	if (s != NULL)
+		i = PF_IDHASH(s);
+	else
+		i = sc->sc_bulk_hashid;
+
+	for (; i <= pf_hashmask; i++) {
+		struct pf_idhash *ih = &V_pf_idhash[i];
+
+		if (s != NULL)
+			PF_HASHROW_ASSERT(ih);
+		else {
+			PF_HASHROW_LOCK(ih);
+			s = LIST_FIRST(&ih->states);
+		}
+
+		for (; s; s = LIST_NEXT(s, entry)) {
+
+			if (sent > 1 && (sc->sc_ifp->if_mtu - sc->sc_len) <
+			    sizeof(struct pfsync_state)) {
+				/* We've filled a packet. */
+				sc->sc_bulk_hashid = i;
+				sc->sc_bulk_stateid = s->id;
+				sc->sc_bulk_creatorid = s->creatorid;
+				PF_HASHROW_UNLOCK(ih);
+				callout_reset(&sc->sc_bulk_tmo, 1,
+				    pfsync_bulk_update, sc);
+				goto full;
+			}
+
+			if (s->sync_state == PFSYNC_S_NONE &&
+			    s->timeout < PFTM_MAX &&
+			    s->pfsync_time <= sc->sc_ureq_received) {
+				pfsync_update_state_req(s);
+				sent++;
+			}
+		}
+		PF_HASHROW_UNLOCK(ih);
+	}
+
+	/* We're done. */
+	pfsync_bulk_status(PFSYNC_BUS_END);
+
+full:
+	CURVNET_RESTORE();
+}
+
+static void
+pfsync_bulk_status(u_int8_t status)
+{
+	struct {
+		struct pfsync_subheader subh;
+		struct pfsync_bus bus;
+	} __packed r;
+
+	struct pfsync_softc *sc = V_pfsyncif;
+
+	bzero(&r, sizeof(r));
+
+	r.subh.action = PFSYNC_ACT_BUS;
+	r.subh.count = htons(1);
+	V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++;
+
+	r.bus.creatorid = V_pf_status.hostid;
+	r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received);
+	r.bus.status = status;
+
+	PFSYNC_LOCK(sc);
+	pfsync_send_plus(&r, sizeof(r));
+	PFSYNC_UNLOCK(sc);
+}
+
+static void
+pfsync_bulk_fail(void *arg)
+{
+	struct pfsync_softc *sc = arg;
+
+	CURVNET_SET(sc->sc_ifp->if_vnet);
+
+	PFSYNC_BLOCK_ASSERT(sc);
+
+	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
+		/* Try again */
+		callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
+		    pfsync_bulk_fail, V_pfsyncif);
+		PFSYNC_LOCK(sc);
+		pfsync_request_update(0, 0);
+		PFSYNC_UNLOCK(sc);
+	} else {
+		/* Pretend like the transfer was ok. */
+		sc->sc_ureq_sent = 0;
+		sc->sc_bulk_tries = 0;
+		PFSYNC_LOCK(sc);
+		if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
+			(*carp_demote_adj_p)(-V_pfsync_carp_adj,
+			    "pfsync bulk fail");
+		sc->sc_flags |= PFSYNCF_OK;
+		PFSYNC_UNLOCK(sc);
+		if (V_pf_status.debug >= PF_DEBUG_MISC)
+			printf("pfsync: failed to receive bulk update\n");
+	}
+
+	CURVNET_RESTORE();
+}
+
+static void
+pfsync_send_plus(void *plus, size_t pluslen)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+
+	PFSYNC_LOCK_ASSERT(sc);
+
+	if (sc->sc_len + pluslen > sc->sc_ifp->if_mtu)
+		pfsync_sendout(1);
+
+	sc->sc_plus = plus;
+	sc->sc_len += (sc->sc_pluslen = pluslen);
+
+	pfsync_sendout(1);
+}
+
+static void
+pfsync_timeout(void *arg)
+{
+	struct pfsync_softc *sc = arg;
+
+	CURVNET_SET(sc->sc_ifp->if_vnet);
+	PFSYNC_LOCK(sc);
+	pfsync_push(sc);
+	PFSYNC_UNLOCK(sc);
+	CURVNET_RESTORE();
+}
+
+static void
+pfsync_push(struct pfsync_softc *sc)
+{
+
+	PFSYNC_LOCK_ASSERT(sc);
+
+	sc->sc_flags |= PFSYNCF_PUSH;
+	swi_sched(V_pfsync_swi_cookie, 0);
+}
+
+static void
+pfsyncintr(void *arg)
+{
+	struct pfsync_softc *sc = arg;
+	struct mbuf *m, *n;
+
+	CURVNET_SET(sc->sc_ifp->if_vnet);
+
+	PFSYNC_LOCK(sc);
+	if ((sc->sc_flags & PFSYNCF_PUSH) && sc->sc_len > PFSYNC_MINPKT) {
+		pfsync_sendout(0);
+		sc->sc_flags &= ~PFSYNCF_PUSH;
+	}
+	_IF_DEQUEUE_ALL(&sc->sc_ifp->if_snd, m);
+	PFSYNC_UNLOCK(sc);
+
+	for (; m != NULL; m = n) {
+
+		n = m->m_nextpkt;
+		m->m_nextpkt = NULL;
+
+		/*
+		 * We distinguish between a deferral packet and our
+		 * own pfsync packet based on M_SKIP_FIREWALL
+		 * flag. This is XXX.
+		 */
+		if (m->m_flags & M_SKIP_FIREWALL)
+			ip_output(m, NULL, NULL, 0, NULL, NULL);
+		else if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
+		    NULL) == 0)
+			V_pfsyncstats.pfsyncs_opackets++;
+		else
+			V_pfsyncstats.pfsyncs_oerrors++;
+	}
+	CURVNET_RESTORE();
+}
+
+static int
+pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, void *mship)
+{
+	struct ip_moptions *imo = &sc->sc_imo;
+	int error;
+
+	if (!(ifp->if_flags & IFF_MULTICAST))
+		return (EADDRNOTAVAIL);
+
+	imo->imo_membership = (struct in_multi **)mship;
+	imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
+	imo->imo_multicast_vif = -1;
+
+	if ((error = in_joingroup(ifp, &sc->sc_sync_peer, NULL,
+	    &imo->imo_membership[0])) != 0) {
+		imo->imo_membership = NULL;
+		return (error);
+	}
+	imo->imo_num_memberships++;
+	imo->imo_multicast_ifp = ifp;
+	imo->imo_multicast_ttl = PFSYNC_DFLTTL;
+	imo->imo_multicast_loop = 0;
+
+	return (0);
+}
+
+static void
+pfsync_multicast_cleanup(struct pfsync_softc *sc)
+{
+	struct ip_moptions *imo = &sc->sc_imo;
+
+	in_leavegroup(imo->imo_membership[0], NULL);
+	free(imo->imo_membership, M_PFSYNC);
+	imo->imo_membership = NULL;
+	imo->imo_multicast_ifp = NULL;
+}
+
+#ifdef INET
+extern  struct domain inetdomain;
+static struct protosw in_pfsync_protosw = {
+	.pr_type =		SOCK_RAW,
+	.pr_domain =		&inetdomain,
+	.pr_protocol =		IPPROTO_PFSYNC,
+	.pr_flags =		PR_ATOMIC|PR_ADDR,
+	.pr_input =		pfsync_input,
+	.pr_output =		rip_output,
+	.pr_ctloutput =		rip_ctloutput,
+	.pr_usrreqs =		&rip_usrreqs
+};
+#endif
+
+static void
+pfsync_pointers_init()
+{
+
+	PF_RULES_WLOCK();
+	pfsync_state_import_ptr = pfsync_state_import;
+	pfsync_insert_state_ptr = pfsync_insert_state;
+	pfsync_update_state_ptr = pfsync_update_state;
+	pfsync_delete_state_ptr = pfsync_delete_state;
+	pfsync_clear_states_ptr = pfsync_clear_states;
+	pfsync_defer_ptr = pfsync_defer;
+	PF_RULES_WUNLOCK();
+}
+
+static void
+pfsync_pointers_uninit()
+{
+
+	PF_RULES_WLOCK();
+	pfsync_state_import_ptr = NULL;
+	pfsync_insert_state_ptr = NULL;
+	pfsync_update_state_ptr = NULL;
+	pfsync_delete_state_ptr = NULL;
+	pfsync_clear_states_ptr = NULL;
+	pfsync_defer_ptr = NULL;
+	PF_RULES_WUNLOCK();
+}
+
+static void
+vnet_pfsync_init(const void *unused __unused)
+{
+	int error;
+
+	V_pfsync_cloner = if_clone_simple(pfsyncname,
+	    pfsync_clone_create, pfsync_clone_destroy, 1);
+	error = swi_add(NULL, pfsyncname, pfsyncintr, V_pfsyncif,
+	    SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie);
+	if (error) {
+		if_clone_detach(V_pfsync_cloner);
+		log(LOG_INFO, "swi_add() failed in %s\n", __func__);
+	}
+}
+VNET_SYSINIT(vnet_pfsync_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY,
+    vnet_pfsync_init, NULL);
+
+static void
+vnet_pfsync_uninit(const void *unused __unused)
+{
+
+	if_clone_detach(V_pfsync_cloner);
+	swi_remove(V_pfsync_swi_cookie);
+}
+/*
+ * Detach after pf is gone; otherwise we might touch pfsync memory
+ * from within pf after freeing pfsync.
+ */
+VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_INIT_IF, SI_ORDER_SECOND,
+    vnet_pfsync_uninit, NULL);
+
+static int
+pfsync_init()
+{
+#ifdef INET
+	int error;
+
+	error = pf_proto_register(PF_INET, &in_pfsync_protosw);
+	if (error)
+		return (error);
+	error = ipproto_register(IPPROTO_PFSYNC);
+	if (error) {
+		pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW);
+		return (error);
+	}
+#endif
+	pfsync_pointers_init();
+
+	return (0);
+}
+
+static void
+pfsync_uninit()
+{
+
+	pfsync_pointers_uninit();
+
+#ifdef INET
+	ipproto_unregister(IPPROTO_PFSYNC);
+	pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW);
+#endif
+}
+
+static int
+pfsync_modevent(module_t mod, int type, void *data)
+{
+	int error = 0;
+
+	switch (type) {
+	case MOD_LOAD:
+		error = pfsync_init();
+		break;
+	case MOD_QUIESCE:
+		/*
+		 * Module should not be unloaded due to race conditions.
+		 */
+		error = EBUSY;
+		break;
+	case MOD_UNLOAD:
+		pfsync_uninit();
+		break;
+	default:
+		error = EINVAL;
+		break;
+	}
+
+	return (error);
+}
+
+static moduledata_t pfsync_mod = {
+	pfsyncname,
+	pfsync_modevent,
+	0
+};
+
+#define PFSYNC_MODVER 1
+
+/* Stay on FIREWALL as we depend on pf being initialized and on inetdomain. */
+DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY);
+MODULE_VERSION(pfsync, PFSYNC_MODVER);
+MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER);
diff --git a/freebsd/sys/netpfil/pf/in4_cksum.c b/freebsd/sys/netpfil/pf/in4_cksum.c
new file mode 100644
index 00000000..19cc8ac4
--- /dev/null
+++ b/freebsd/sys/netpfil/pf/in4_cksum.c
@@ -0,0 +1,122 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*	$FreeBSD$	*/
+/*	$OpenBSD: in4_cksum.c,v 1.7 2003/06/02 23:28:13 millert Exp $	*/
+/*	$KAME: in4_cksum.c,v 1.10 2001/11/30 10:06:15 itojun Exp $	*/
+/*	$NetBSD: in_cksum.c,v 1.13 1996/10/13 02:03:03 christos Exp $	*/
+
+/*
+ * Copyright (C) 1999 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 1988, 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)in_cksum.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+
+#include <machine/in_cksum.h>
+
+#define ADDCARRY(x)  (x > 65535 ? x -= 65535 : x)
+#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; (void)ADDCARRY(sum);}
+
+int in4_cksum(struct mbuf *, u_int8_t, int, int);
+
+int
+in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len)
+{
+	union {
+		struct ipovly ipov;
+		u_int16_t w[10];
+	} u;
+	union {
+		u_int16_t s[2];
+		u_int32_t l;
+	} l_util;
+
+	u_int16_t *w;
+	int psum;
+	int sum = 0;
+
+	if (nxt != 0) {
+		/* pseudo header */
+		if (off < sizeof(struct ipovly))
+			panic("in4_cksum: offset too short");
+		if (m->m_len < sizeof(struct ip))
+			panic("in4_cksum: bad mbuf chain");
+		bzero(&u.ipov, sizeof(u.ipov));
+		u.ipov.ih_len = htons(len);
+		u.ipov.ih_pr = nxt;
+		u.ipov.ih_src = mtod(m, struct ip *)->ip_src;
+		u.ipov.ih_dst = mtod(m, struct ip *)->ip_dst;
+		w = u.w;
+		/* assumes sizeof(ipov) == 20 */
+		sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; sum += w[4];
+		sum += w[5]; sum += w[6]; sum += w[7]; sum += w[8]; sum += w[9];
+	}
+
+	psum = in_cksum_skip(m, len + off, off);
+	psum = ~psum & 0xffff;
+	sum += psum;
+	REDUCE;
+	return (~sum & 0xffff);
+}
diff --git a/freebsd/sys/netpfil/pf/pf.c b/freebsd/sys/netpfil/pf/pf.c
new file mode 100644
index 00000000..7ac181b5
--- /dev/null
+++ b/freebsd/sys/netpfil/pf/pf.c
@@ -0,0 +1,6657 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2001 Daniel Hartmeier
+ * Copyright (c) 2002 - 2008 Henning Brauer
+ * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    - Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    - Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Effort sponsored in part by the Defense Advanced Research Projects
+ * Agency (DARPA) and Air Force Research Laboratory, Air Force
+ * Materiel Command, USAF, under agreement number F30602-01-2-0537.
+ *
+ *	$OpenBSD: pf.c,v 1.634 2009/02/27 12:37:45 henning Exp $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+#include <rtems/bsd/local/opt_bpf.h>
+#include <rtems/bsd/local/opt_pf.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/bus.h>
+#include <sys/endian.h>
+#include <sys/hash.h>
+#include <sys/interrupt.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/limits.h>
+#include <sys/mbuf.h>
+#include <sys/md5.h>
+#include <sys/random.h>
+#include <sys/refcount.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
+#include <sys/ucred.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_types.h>
+#include <net/if_vlan_var.h>
+#include <net/route.h>
+#include <net/radix_mpath.h>
+#include <net/vnet.h>
+
+#include <net/pfvar.h>
+#include <net/if_pflog.h>
+#include <net/if_pfsync.h>
+
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/in_fib.h>
+#include <netinet/ip.h>
+#include <netinet/ip_fw.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/icmp_var.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+
+#include <netpfil/ipfw/ip_fw_private.h> /* XXX: only for DIR_IN/DIR_OUT */
+
+#ifdef INET6
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#include <netinet6/nd6.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/in6_pcb.h>
+#include <netinet6/in6_fib.h>
+#include <netinet6/scope6_var.h>
+#endif /* INET6 */
+
+#include <machine/in_cksum.h>
+#include <security/mac/mac_framework.h>
+
+#define	DPFPRINTF(n, x)	if (V_pf_status.debug >= (n)) printf x
+
+/*
+ * Global variables
+ */
+
+/* state tables */
+VNET_DEFINE(struct pf_altqqueue,	 pf_altqs[2]);
+VNET_DEFINE(struct pf_palist,		 pf_pabuf);
+VNET_DEFINE(struct pf_altqqueue *,	 pf_altqs_active);
+VNET_DEFINE(struct pf_altqqueue *,	 pf_altqs_inactive);
+VNET_DEFINE(struct pf_kstatus,		 pf_status);
+
+VNET_DEFINE(u_int32_t,			 ticket_altqs_active);
+VNET_DEFINE(u_int32_t,			 ticket_altqs_inactive);
+VNET_DEFINE(int,			 altqs_inactive_open);
+VNET_DEFINE(u_int32_t,			 ticket_pabuf);
+
+VNET_DEFINE(MD5_CTX,			 pf_tcp_secret_ctx);
+#define	V_pf_tcp_secret_ctx		 VNET(pf_tcp_secret_ctx)
+VNET_DEFINE(u_char,			 pf_tcp_secret[16]);
+#define	V_pf_tcp_secret			 VNET(pf_tcp_secret)
+VNET_DEFINE(int,			 pf_tcp_secret_init);
+#define	V_pf_tcp_secret_init		 VNET(pf_tcp_secret_init)
+VNET_DEFINE(int,			 pf_tcp_iss_off);
+#define	V_pf_tcp_iss_off		 VNET(pf_tcp_iss_off)
+
+/*
+ * Queue for pf_intr() sends.
+ */
+static MALLOC_DEFINE(M_PFTEMP, "pf_temp", "pf(4) temporary allocations");
+struct pf_send_entry {
+	STAILQ_ENTRY(pf_send_entry)	pfse_next;
+	struct mbuf			*pfse_m;
+	enum {
+		PFSE_IP,
+		PFSE_IP6,
+		PFSE_ICMP,
+		PFSE_ICMP6,
+	}				pfse_type;
+	struct {
+		int		type;
+		int		code;
+		int		mtu;
+	} icmpopts;
+};
+
+STAILQ_HEAD(pf_send_head, pf_send_entry);
+static VNET_DEFINE(struct pf_send_head, pf_sendqueue);
+#define	V_pf_sendqueue	VNET(pf_sendqueue)
+
+static struct mtx pf_sendqueue_mtx;
+MTX_SYSINIT(pf_sendqueue_mtx, &pf_sendqueue_mtx, "pf send queue", MTX_DEF);
+#define	PF_SENDQ_LOCK()		mtx_lock(&pf_sendqueue_mtx)
+#define	PF_SENDQ_UNLOCK()	mtx_unlock(&pf_sendqueue_mtx)
+
+/*
+ * Queue for pf_overload_task() tasks.
+ */
+struct pf_overload_entry {
+	SLIST_ENTRY(pf_overload_entry)	next;
+	struct pf_addr  		addr;
+	sa_family_t			af;
+	uint8_t				dir;
+	struct pf_rule  		*rule;
+};
+
+SLIST_HEAD(pf_overload_head, pf_overload_entry);
+static VNET_DEFINE(struct pf_overload_head, pf_overloadqueue);
+#define V_pf_overloadqueue	VNET(pf_overloadqueue)
+static VNET_DEFINE(struct task, pf_overloadtask);
+#define	V_pf_overloadtask	VNET(pf_overloadtask)
+
+static struct mtx pf_overloadqueue_mtx;
+MTX_SYSINIT(pf_overloadqueue_mtx, &pf_overloadqueue_mtx,
+    "pf overload/flush queue", MTX_DEF);
+#define	PF_OVERLOADQ_LOCK()	mtx_lock(&pf_overloadqueue_mtx)
+#define	PF_OVERLOADQ_UNLOCK()	mtx_unlock(&pf_overloadqueue_mtx)
+
+VNET_DEFINE(struct pf_rulequeue, pf_unlinked_rules);
+struct mtx pf_unlnkdrules_mtx;
+MTX_SYSINIT(pf_unlnkdrules_mtx, &pf_unlnkdrules_mtx, "pf unlinked rules",
+    MTX_DEF);
+
+static VNET_DEFINE(uma_zone_t,	pf_sources_z);
+#define	V_pf_sources_z	VNET(pf_sources_z)
+uma_zone_t		pf_mtag_z;
+VNET_DEFINE(uma_zone_t,	 pf_state_z);
+VNET_DEFINE(uma_zone_t,	 pf_state_key_z);
+
+VNET_DEFINE(uint64_t, pf_stateid[MAXCPU]);
+#define	PFID_CPUBITS	8
+#define	PFID_CPUSHIFT	(sizeof(uint64_t) * NBBY - PFID_CPUBITS)
+#define	PFID_CPUMASK	((uint64_t)((1 << PFID_CPUBITS) - 1) <<	PFID_CPUSHIFT)
+#define	PFID_MAXID	(~PFID_CPUMASK)
+CTASSERT((1 << PFID_CPUBITS) >= MAXCPU);
+
+static void		 pf_src_tree_remove_state(struct pf_state *);
+static void		 pf_init_threshold(struct pf_threshold *, u_int32_t,
+			    u_int32_t);
+static void		 pf_add_threshold(struct pf_threshold *);
+static int		 pf_check_threshold(struct pf_threshold *);
+
+static void		 pf_change_ap(struct mbuf *, struct pf_addr *, u_int16_t *,
+			    u_int16_t *, u_int16_t *, struct pf_addr *,
+			    u_int16_t, u_int8_t, sa_family_t);
+static int		 pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *,
+			    struct tcphdr *, struct pf_state_peer *);
+static void		 pf_change_icmp(struct pf_addr *, u_int16_t *,
+			    struct pf_addr *, struct pf_addr *, u_int16_t,
+			    u_int16_t *, u_int16_t *, u_int16_t *,
+			    u_int16_t *, u_int8_t, sa_family_t);
+static void		 pf_send_tcp(struct mbuf *,
+			    const struct pf_rule *, sa_family_t,
+			    const struct pf_addr *, const struct pf_addr *,
+			    u_int16_t, u_int16_t, u_int32_t, u_int32_t,
+			    u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
+			    u_int16_t, struct ifnet *);
+static void		 pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
+			    sa_family_t, struct pf_rule *);
+static void		 pf_detach_state(struct pf_state *);
+static int		 pf_state_key_attach(struct pf_state_key *,
+			    struct pf_state_key *, struct pf_state *);
+static void		 pf_state_key_detach(struct pf_state *, int);
+static int		 pf_state_key_ctor(void *, int, void *, int);
+static u_int32_t	 pf_tcp_iss(struct pf_pdesc *);
+static int		 pf_test_rule(struct pf_rule **, struct pf_state **,
+			    int, struct pfi_kif *, struct mbuf *, int,
+			    struct pf_pdesc *, struct pf_rule **,
+			    struct pf_ruleset **, struct inpcb *);
+static int		 pf_create_state(struct pf_rule *, struct pf_rule *,
+			    struct pf_rule *, struct pf_pdesc *,
+			    struct pf_src_node *, struct pf_state_key *,
+			    struct pf_state_key *, struct mbuf *, int,
+			    u_int16_t, u_int16_t, int *, struct pfi_kif *,
+			    struct pf_state **, int, u_int16_t, u_int16_t,
+			    int);
+static int		 pf_test_fragment(struct pf_rule **, int,
+			    struct pfi_kif *, struct mbuf *, void *,
+			    struct pf_pdesc *, struct pf_rule **,
+			    struct pf_ruleset **);
+static int		 pf_tcp_track_full(struct pf_state_peer *,
+			    struct pf_state_peer *, struct pf_state **,
+			    struct pfi_kif *, struct mbuf *, int,
+			    struct pf_pdesc *, u_short *, int *);
+static int		 pf_tcp_track_sloppy(struct pf_state_peer *,
+			    struct pf_state_peer *, struct pf_state **,
+			    struct pf_pdesc *, u_short *);
+static int		 pf_test_state_tcp(struct pf_state **, int,
+			    struct pfi_kif *, struct mbuf *, int,
+			    void *, struct pf_pdesc *, u_short *);
+static int		 pf_test_state_udp(struct pf_state **, int,
+			    struct pfi_kif *, struct mbuf *, int,
+			    void *, struct pf_pdesc *);
+static int		 pf_test_state_icmp(struct pf_state **, int,
+			    struct pfi_kif *, struct mbuf *, int,
+			    void *, struct pf_pdesc *, u_short *);
+static int		 pf_test_state_other(struct pf_state **, int,
+			    struct pfi_kif *, struct mbuf *, struct pf_pdesc *);
+static u_int8_t		 pf_get_wscale(struct mbuf *, int, u_int16_t,
+			    sa_family_t);
+static u_int16_t	 pf_get_mss(struct mbuf *, int, u_int16_t,
+			    sa_family_t);
+static u_int16_t	 pf_calc_mss(struct pf_addr *, sa_family_t,
+				int, u_int16_t);
+static int		 pf_check_proto_cksum(struct mbuf *, int, int,
+			    u_int8_t, sa_family_t);
+static void		 pf_print_state_parts(struct pf_state *,
+			    struct pf_state_key *, struct pf_state_key *);
+static int		 pf_addr_wrap_neq(struct pf_addr_wrap *,
+			    struct pf_addr_wrap *);
+static struct pf_state	*pf_find_state(struct pfi_kif *,
+			    struct pf_state_key_cmp *, u_int);
+static int		 pf_src_connlimit(struct pf_state **);
+static void		 pf_overload_task(void *v, int pending);
+static int		 pf_insert_src_node(struct pf_src_node **,
+			    struct pf_rule *, struct pf_addr *, sa_family_t);
+static u_int		 pf_purge_expired_states(u_int, int);
+static void		 pf_purge_unlinked_rules(void);
+static int		 pf_mtag_uminit(void *, int, int);
+static void		 pf_mtag_free(struct m_tag *);
+#ifdef INET
+static void		 pf_route(struct mbuf **, struct pf_rule *, int,
+			    struct ifnet *, struct pf_state *,
+			    struct pf_pdesc *);
+#endif /* INET */
+#ifdef INET6
+static void		 pf_change_a6(struct pf_addr *, u_int16_t *,
+			    struct pf_addr *, u_int8_t);
+static void		 pf_route6(struct mbuf **, struct pf_rule *, int,
+			    struct ifnet *, struct pf_state *,
+			    struct pf_pdesc *);
+#endif /* INET6 */
+
+int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
+
+extern int pf_end_threads;
+
+VNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]);
+
+#define	PACKET_LOOPED(pd)	((pd)->pf_mtag &&			\
+				 (pd)->pf_mtag->flags & PF_PACKET_LOOPED)
+
+#define	STATE_LOOKUP(i, k, d, s, pd)					\
+	do {								\
+		(s) = pf_find_state((i), (k), (d));			\
+		if ((s) == NULL)					\
+			return (PF_DROP);				\
+		if (PACKET_LOOPED(pd))					\
+			return (PF_PASS);				\
+		if ((d) == PF_OUT &&					\
+		    (((s)->rule.ptr->rt == PF_ROUTETO &&		\
+		    (s)->rule.ptr->direction == PF_OUT) ||		\
+		    ((s)->rule.ptr->rt == PF_REPLYTO &&			\
+		    (s)->rule.ptr->direction == PF_IN)) &&		\
+		    (s)->rt_kif != NULL &&				\
+		    (s)->rt_kif != (i))					\
+			return (PF_PASS);				\
+	} while (0)
+
+#define	BOUND_IFACE(r, k) \
+	((r)->rule_flag & PFRULE_IFBOUND) ? (k) : V_pfi_all
+
+#define	STATE_INC_COUNTERS(s)						\
+	do {								\
+		counter_u64_add(s->rule.ptr->states_cur, 1);		\
+		counter_u64_add(s->rule.ptr->states_tot, 1);		\
+		if (s->anchor.ptr != NULL) {				\
+			counter_u64_add(s->anchor.ptr->states_cur, 1);	\
+			counter_u64_add(s->anchor.ptr->states_tot, 1);	\
+		}							\
+		if (s->nat_rule.ptr != NULL) {				\
+			counter_u64_add(s->nat_rule.ptr->states_cur, 1);\
+			counter_u64_add(s->nat_rule.ptr->states_tot, 1);\
+		}							\
+	} while (0)
+
+#define	STATE_DEC_COUNTERS(s)						\
+	do {								\
+		if (s->nat_rule.ptr != NULL)				\
+			counter_u64_add(s->nat_rule.ptr->states_cur, -1);\
+		if (s->anchor.ptr != NULL)				\
+			counter_u64_add(s->anchor.ptr->states_cur, -1);	\
+		counter_u64_add(s->rule.ptr->states_cur, -1);		\
+	} while (0)
+
+static MALLOC_DEFINE(M_PFHASH, "pf_hash", "pf(4) hash header structures");
+VNET_DEFINE(struct pf_keyhash *, pf_keyhash);
+VNET_DEFINE(struct pf_idhash *, pf_idhash);
+VNET_DEFINE(struct pf_srchash *, pf_srchash);
+
+SYSCTL_NODE(_net, OID_AUTO, pf, CTLFLAG_RW, 0, "pf(4)");
+
+u_long	pf_hashmask;
+u_long	pf_srchashmask;
+static u_long	pf_hashsize;
+static u_long	pf_srchashsize;
+
+SYSCTL_ULONG(_net_pf, OID_AUTO, states_hashsize, CTLFLAG_RDTUN,
+    &pf_hashsize, 0, "Size of pf(4) states hashtable");
+SYSCTL_ULONG(_net_pf, OID_AUTO, source_nodes_hashsize, CTLFLAG_RDTUN,
+    &pf_srchashsize, 0, "Size of pf(4) source nodes hashtable");
+
+VNET_DEFINE(void *, pf_swi_cookie);
+
+VNET_DEFINE(uint32_t, pf_hashseed);
+#define	V_pf_hashseed	VNET(pf_hashseed)
+
+int
+pf_addr_cmp(struct pf_addr *a, struct pf_addr *b, sa_family_t af)
+{
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		if (a->addr32[0] > b->addr32[0])
+			return (1);
+		if (a->addr32[0] < b->addr32[0])
+			return (-1);
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		if (a->addr32[3] > b->addr32[3])
+			return (1);
+		if (a->addr32[3] < b->addr32[3])
+			return (-1);
+		if (a->addr32[2] > b->addr32[2])
+			return (1);
+		if (a->addr32[2] < b->addr32[2])
+			return (-1);
+		if (a->addr32[1] > b->addr32[1])
+			return (1);
+		if (a->addr32[1] < b->addr32[1])
+			return (-1);
+		if (a->addr32[0] > b->addr32[0])
+			return (1);
+		if (a->addr32[0] < b->addr32[0])
+			return (-1);
+		break;
+#endif /* INET6 */
+	default:
+		panic("%s: unknown address family %u", __func__, af);
+	}
+	return (0);
+}
+
+static __inline uint32_t
+pf_hashkey(struct pf_state_key *sk)
+{
+	uint32_t h;
+
+	h = murmur3_32_hash32((uint32_t *)sk,
+	    sizeof(struct pf_state_key_cmp)/sizeof(uint32_t),
+	    V_pf_hashseed);
+
+	return (h & pf_hashmask);
+}
+
+static __inline uint32_t
+pf_hashsrc(struct pf_addr *addr, sa_family_t af)
+{
+	uint32_t h;
+
+	switch (af) {
+	case AF_INET:
+		h = murmur3_32_hash32((uint32_t *)&addr->v4,
+		    sizeof(addr->v4)/sizeof(uint32_t), V_pf_hashseed);
+		break;
+	case AF_INET6:
+		h = murmur3_32_hash32((uint32_t *)&addr->v6,
+		    sizeof(addr->v6)/sizeof(uint32_t), V_pf_hashseed);
+		break;
+	default:
+		panic("%s: unknown address family %u", __func__, af);
+	}
+
+	return (h & pf_srchashmask);
+}
+
+#ifdef ALTQ
+static int
+pf_state_hash(struct pf_state *s)
+{
+	u_int32_t hv = (intptr_t)s / sizeof(*s);
+
+	hv ^= crc32(&s->src, sizeof(s->src));
+	hv ^= crc32(&s->dst, sizeof(s->dst));
+	if (hv == 0)
+		hv = 1;
+	return (hv);
+}
+#endif
+
+#ifdef INET6
+void
+pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
+{
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		dst->addr32[0] = src->addr32[0];
+		break;
+#endif /* INET */
+	case AF_INET6:
+		dst->addr32[0] = src->addr32[0];
+		dst->addr32[1] = src->addr32[1];
+		dst->addr32[2] = src->addr32[2];
+		dst->addr32[3] = src->addr32[3];
+		break;
+	}
+}
+#endif /* INET6 */
+
+static void
+pf_init_threshold(struct pf_threshold *threshold,
+    u_int32_t limit, u_int32_t seconds)
+{
+	threshold->limit = limit * PF_THRESHOLD_MULT;
+	threshold->seconds = seconds;
+	threshold->count = 0;
+	threshold->last = time_uptime;
+}
+
+static void
+pf_add_threshold(struct pf_threshold *threshold)
+{
+	u_int32_t t = time_uptime, diff = t - threshold->last;
+
+	if (diff >= threshold->seconds)
+		threshold->count = 0;
+	else
+		threshold->count -= threshold->count * diff /
+		    threshold->seconds;
+	threshold->count += PF_THRESHOLD_MULT;
+	threshold->last = t;
+}
+
+static int
+pf_check_threshold(struct pf_threshold *threshold)
+{
+	return (threshold->count > threshold->limit);
+}
+
+static int
+pf_src_connlimit(struct pf_state **state)
+{
+	struct pf_overload_entry *pfoe;
+	int bad = 0;
+
+	PF_STATE_LOCK_ASSERT(*state);
+
+	(*state)->src_node->conn++;
+	(*state)->src.tcp_est = 1;
+	pf_add_threshold(&(*state)->src_node->conn_rate);
+
+	if ((*state)->rule.ptr->max_src_conn &&
+	    (*state)->rule.ptr->max_src_conn <
+	    (*state)->src_node->conn) {
+		counter_u64_add(V_pf_status.lcounters[LCNT_SRCCONN], 1);
+		bad++;
+	}
+
+	if ((*state)->rule.ptr->max_src_conn_rate.limit &&
+	    pf_check_threshold(&(*state)->src_node->conn_rate)) {
+		counter_u64_add(V_pf_status.lcounters[LCNT_SRCCONNRATE], 1);
+		bad++;
+	}
+
+	if (!bad)
+		return (0);
+
+	/* Kill this state. */
+	(*state)->timeout = PFTM_PURGE;
+	(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
+
+	if ((*state)->rule.ptr->overload_tbl == NULL)
+		return (1);
+
+	/* Schedule overloading and flushing task. */
+	pfoe = malloc(sizeof(*pfoe), M_PFTEMP, M_NOWAIT);
+	if (pfoe == NULL)
+		return (1);	/* too bad :( */
+
+	bcopy(&(*state)->src_node->addr, &pfoe->addr, sizeof(pfoe->addr));
+	pfoe->af = (*state)->key[PF_SK_WIRE]->af;
+	pfoe->rule = (*state)->rule.ptr;
+	pfoe->dir = (*state)->direction;
+	PF_OVERLOADQ_LOCK();
+	SLIST_INSERT_HEAD(&V_pf_overloadqueue, pfoe, next);
+	PF_OVERLOADQ_UNLOCK();
+	taskqueue_enqueue(taskqueue_swi, &V_pf_overloadtask);
+
+	return (1);
+}
+
+static void
+pf_overload_task(void *v, int pending)
+{
+	struct pf_overload_head queue;
+	struct pfr_addr p;
+	struct pf_overload_entry *pfoe, *pfoe1;
+	uint32_t killed = 0;
+
+	CURVNET_SET((struct vnet *)v);
+
+	PF_OVERLOADQ_LOCK();
+	queue = V_pf_overloadqueue;
+	SLIST_INIT(&V_pf_overloadqueue);
+	PF_OVERLOADQ_UNLOCK();
+
+	bzero(&p, sizeof(p));
+	SLIST_FOREACH(pfoe, &queue, next) {
+		counter_u64_add(V_pf_status.lcounters[LCNT_OVERLOAD_TABLE], 1);
+		if (V_pf_status.debug >= PF_DEBUG_MISC) {
+			printf("%s: blocking address ", __func__);
+			pf_print_host(&pfoe->addr, 0, pfoe->af);
+			printf("\n");
+		}
+
+		p.pfra_af = pfoe->af;
+		switch (pfoe->af) {
+#ifdef INET
+		case AF_INET:
+			p.pfra_net = 32;
+			p.pfra_ip4addr = pfoe->addr.v4;
+			break;
+#endif
+#ifdef INET6
+		case AF_INET6:
+			p.pfra_net = 128;
+			p.pfra_ip6addr = pfoe->addr.v6;
+			break;
+#endif
+		}
+
+		PF_RULES_WLOCK();
+		pfr_insert_kentry(pfoe->rule->overload_tbl, &p, time_second);
+		PF_RULES_WUNLOCK();
+	}
+
+	/*
+	 * Remove those entries, that don't need flushing.
+	 */
+	SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1)
+		if (pfoe->rule->flush == 0) {
+			SLIST_REMOVE(&queue, pfoe, pf_overload_entry, next);
+			free(pfoe, M_PFTEMP);
+		} else
+			counter_u64_add(
+			    V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH], 1);
+
+	/* If nothing to flush, return. */
+	if (SLIST_EMPTY(&queue)) {
+		CURVNET_RESTORE();
+		return;
+	}
+
+	for (int i = 0; i <= pf_hashmask; i++) {
+		struct pf_idhash *ih = &V_pf_idhash[i];
+		struct pf_state_key *sk;
+		struct pf_state *s;
+
+		PF_HASHROW_LOCK(ih);
+		LIST_FOREACH(s, &ih->states, entry) {
+		    sk = s->key[PF_SK_WIRE];
+		    SLIST_FOREACH(pfoe, &queue, next)
+			if (sk->af == pfoe->af &&
+			    ((pfoe->rule->flush & PF_FLUSH_GLOBAL) ||
+			    pfoe->rule == s->rule.ptr) &&
+			    ((pfoe->dir == PF_OUT &&
+			    PF_AEQ(&pfoe->addr, &sk->addr[1], sk->af)) ||
+			    (pfoe->dir == PF_IN &&
+			    PF_AEQ(&pfoe->addr, &sk->addr[0], sk->af)))) {
+				s->timeout = PFTM_PURGE;
+				s->src.state = s->dst.state = TCPS_CLOSED;
+				killed++;
+			}
+		}
+		PF_HASHROW_UNLOCK(ih);
+	}
+	SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1)
+		free(pfoe, M_PFTEMP);
+	if (V_pf_status.debug >= PF_DEBUG_MISC)
+		printf("%s: %u states killed", __func__, killed);
+
+	CURVNET_RESTORE();
+}
+
+/*
+ * Can return locked on failure, so that we can consistently
+ * allocate and insert a new one.
+ */
+struct pf_src_node *
+pf_find_src_node(struct pf_addr *src, struct pf_rule *rule, sa_family_t af,
+	int returnlocked)
+{
+	struct pf_srchash *sh;
+	struct pf_src_node *n;
+
+	counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_SEARCH], 1);
+
+	sh = &V_pf_srchash[pf_hashsrc(src, af)];
+	PF_HASHROW_LOCK(sh);
+	LIST_FOREACH(n, &sh->nodes, entry)
+		if (n->rule.ptr == rule && n->af == af &&
+		    ((af == AF_INET && n->addr.v4.s_addr == src->v4.s_addr) ||
+		    (af == AF_INET6 && bcmp(&n->addr, src, sizeof(*src)) == 0)))
+			break;
+	if (n != NULL) {
+		n->states++;
+		PF_HASHROW_UNLOCK(sh);
+	} else if (returnlocked == 0)
+		PF_HASHROW_UNLOCK(sh);
+
+	return (n);
+}
+
+static int
+pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
+    struct pf_addr *src, sa_family_t af)
+{
+
+	KASSERT((rule->rule_flag & PFRULE_RULESRCTRACK ||
+	    rule->rpool.opts & PF_POOL_STICKYADDR),
+	    ("%s for non-tracking rule %p", __func__, rule));
+
+	if (*sn == NULL)
+		*sn = pf_find_src_node(src, rule, af, 1);
+
+	if (*sn == NULL) {
+		struct pf_srchash *sh = &V_pf_srchash[pf_hashsrc(src, af)];
+
+		PF_HASHROW_ASSERT(sh);
+
+		if (!rule->max_src_nodes ||
+		    counter_u64_fetch(rule->src_nodes) < rule->max_src_nodes)
+			(*sn) = uma_zalloc(V_pf_sources_z, M_NOWAIT | M_ZERO);
+		else
+			counter_u64_add(V_pf_status.lcounters[LCNT_SRCNODES],
+			    1);
+		if ((*sn) == NULL) {
+			PF_HASHROW_UNLOCK(sh);
+			return (-1);
+		}
+
+		pf_init_threshold(&(*sn)->conn_rate,
+		    rule->max_src_conn_rate.limit,
+		    rule->max_src_conn_rate.seconds);
+
+		(*sn)->af = af;
+		(*sn)->rule.ptr = rule;
+		PF_ACPY(&(*sn)->addr, src, af);
+		LIST_INSERT_HEAD(&sh->nodes, *sn, entry);
+		(*sn)->creation = time_uptime;
+		(*sn)->ruletype = rule->action;
+		(*sn)->states = 1;
+		if ((*sn)->rule.ptr != NULL)
+			counter_u64_add((*sn)->rule.ptr->src_nodes, 1);
+		PF_HASHROW_UNLOCK(sh);
+		counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_INSERT], 1);
+	} else {
+		if (rule->max_src_states &&
+		    (*sn)->states >= rule->max_src_states) {
+			counter_u64_add(V_pf_status.lcounters[LCNT_SRCSTATES],
+			    1);
+			return (-1);
+		}
+	}
+	return (0);
+}
+
+void
+pf_unlink_src_node(struct pf_src_node *src)
+{
+
+	PF_HASHROW_ASSERT(&V_pf_srchash[pf_hashsrc(&src->addr, src->af)]);
+	LIST_REMOVE(src, entry);
+	if (src->rule.ptr)
+		counter_u64_add(src->rule.ptr->src_nodes, -1);
+}
+
+u_int
+pf_free_src_nodes(struct pf_src_node_list *head)
+{
+	struct pf_src_node *sn, *tmp;
+	u_int count = 0;
+
+	LIST_FOREACH_SAFE(sn, head, entry, tmp) {
+		uma_zfree(V_pf_sources_z, sn);
+		count++;
+	}
+
+	counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], count);
+
+	return (count);
+}
+
+void
+pf_mtag_initialize()
+{
+
+	pf_mtag_z = uma_zcreate("pf mtags", sizeof(struct m_tag) +
+	    sizeof(struct pf_mtag), NULL, NULL, pf_mtag_uminit, NULL,
+	    UMA_ALIGN_PTR, 0);
+}
+
+/* Per-vnet data storage structures initialization. */
+void
+pf_initialize()
+{
+	struct pf_keyhash	*kh;
+	struct pf_idhash	*ih;
+	struct pf_srchash	*sh;
+	u_int i;
+
+	if (pf_hashsize == 0 || !powerof2(pf_hashsize))
+		pf_hashsize = PF_HASHSIZ;
+	if (pf_srchashsize == 0 || !powerof2(pf_srchashsize))
+		pf_srchashsize = PF_HASHSIZ / 4;
+
+	V_pf_hashseed = arc4random();
+
+	/* States and state keys storage. */
+	V_pf_state_z = uma_zcreate("pf states", sizeof(struct pf_state),
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+	V_pf_limits[PF_LIMIT_STATES].zone = V_pf_state_z;
+	uma_zone_set_max(V_pf_state_z, PFSTATE_HIWAT);
+	uma_zone_set_warning(V_pf_state_z, "PF states limit reached");
+
+	V_pf_state_key_z = uma_zcreate("pf state keys",
+	    sizeof(struct pf_state_key), pf_state_key_ctor, NULL, NULL, NULL,
+	    UMA_ALIGN_PTR, 0);
+	V_pf_keyhash = malloc(pf_hashsize * sizeof(struct pf_keyhash),
+	    M_PFHASH, M_WAITOK | M_ZERO);
+	V_pf_idhash = malloc(pf_hashsize * sizeof(struct pf_idhash),
+	    M_PFHASH, M_WAITOK | M_ZERO);
+	pf_hashmask = pf_hashsize - 1;
+	for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= pf_hashmask;
+	    i++, kh++, ih++) {
+		mtx_init(&kh->lock, "pf_keyhash", NULL, MTX_DEF | MTX_DUPOK);
+		mtx_init(&ih->lock, "pf_idhash", NULL, MTX_DEF);
+	}
+
+	/* Source nodes. */
+	V_pf_sources_z = uma_zcreate("pf source nodes",
+	    sizeof(struct pf_src_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
+	    0);
+	V_pf_limits[PF_LIMIT_SRC_NODES].zone = V_pf_sources_z;
+	uma_zone_set_max(V_pf_sources_z, PFSNODE_HIWAT);
+	uma_zone_set_warning(V_pf_sources_z, "PF source nodes limit reached");
+	V_pf_srchash = malloc(pf_srchashsize * sizeof(struct pf_srchash),
+	  M_PFHASH, M_WAITOK|M_ZERO);
+	pf_srchashmask = pf_srchashsize - 1;
+	for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++)
+		mtx_init(&sh->lock, "pf_srchash", NULL, MTX_DEF);
+
+	/* ALTQ */
+	TAILQ_INIT(&V_pf_altqs[0]);
+	TAILQ_INIT(&V_pf_altqs[1]);
+	TAILQ_INIT(&V_pf_pabuf);
+	V_pf_altqs_active = &V_pf_altqs[0];
+	V_pf_altqs_inactive = &V_pf_altqs[1];
+
+	/* Send & overload+flush queues. */
+	STAILQ_INIT(&V_pf_sendqueue);
+	SLIST_INIT(&V_pf_overloadqueue);
+	TASK_INIT(&V_pf_overloadtask, 0, pf_overload_task, curvnet);
+
+	/* Unlinked, but may be referenced rules. */
+	TAILQ_INIT(&V_pf_unlinked_rules);
+}
+
+void
+pf_mtag_cleanup()
+{
+
+	uma_zdestroy(pf_mtag_z);
+}
+
+void
+pf_cleanup()
+{
+	struct pf_keyhash	*kh;
+	struct pf_idhash	*ih;
+	struct pf_srchash	*sh;
+	struct pf_send_entry	*pfse, *next;
+	u_int i;
+
+	for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= pf_hashmask;
+	    i++, kh++, ih++) {
+		KASSERT(LIST_EMPTY(&kh->keys), ("%s: key hash not empty",
+		    __func__));
+		KASSERT(LIST_EMPTY(&ih->states), ("%s: id hash not empty",
+		    __func__));
+		mtx_destroy(&kh->lock);
+		mtx_destroy(&ih->lock);
+	}
+	free(V_pf_keyhash, M_PFHASH);
+	free(V_pf_idhash, M_PFHASH);
+
+	for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++) {
+		KASSERT(LIST_EMPTY(&sh->nodes),
+		    ("%s: source node hash not empty", __func__));
+		mtx_destroy(&sh->lock);
+	}
+	free(V_pf_srchash, M_PFHASH);
+
+	STAILQ_FOREACH_SAFE(pfse, &V_pf_sendqueue, pfse_next, next) {
+		m_freem(pfse->pfse_m);
+		free(pfse, M_PFTEMP);
+	}
+
+	uma_zdestroy(V_pf_sources_z);
+	uma_zdestroy(V_pf_state_z);
+	uma_zdestroy(V_pf_state_key_z);
+}
+
+static int
+pf_mtag_uminit(void *mem, int size, int how)
+{
+	struct m_tag *t;
+
+	t = (struct m_tag *)mem;
+	t->m_tag_cookie = MTAG_ABI_COMPAT;
+	t->m_tag_id = PACKET_TAG_PF;
+	t->m_tag_len = sizeof(struct pf_mtag);
+	t->m_tag_free = pf_mtag_free;
+
+	return (0);
+}
+
+static void
+pf_mtag_free(struct m_tag *t)
+{
+
+	uma_zfree(pf_mtag_z, t);
+}
+
+struct pf_mtag *
+pf_get_mtag(struct mbuf *m)
+{
+	struct m_tag *mtag;
+
+	if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) != NULL)
+		return ((struct pf_mtag *)(mtag + 1));
+
+	mtag = uma_zalloc(pf_mtag_z, M_NOWAIT);
+	if (mtag == NULL)
+		return (NULL);
+	bzero(mtag + 1, sizeof(struct pf_mtag));
+	m_tag_prepend(m, mtag);
+
+	return ((struct pf_mtag *)(mtag + 1));
+}
+
+static int
+pf_state_key_attach(struct pf_state_key *skw, struct pf_state_key *sks,
+    struct pf_state *s)
+{
+	struct pf_keyhash	*khs, *khw, *kh;
+	struct pf_state_key	*sk, *cur;
+	struct pf_state		*si, *olds = NULL;
+	int idx;
+
+	KASSERT(s->refs == 0, ("%s: state not pristine", __func__));
+	KASSERT(s->key[PF_SK_WIRE] == NULL, ("%s: state has key", __func__));
+	KASSERT(s->key[PF_SK_STACK] == NULL, ("%s: state has key", __func__));
+
+	/*
+	 * We need to lock hash slots of both keys. To avoid deadlock
+	 * we always lock the slot with lower address first. Unlock order
+	 * isn't important.
+	 *
+	 * We also need to lock ID hash slot before dropping key
+	 * locks. On success we return with ID hash slot locked.
+	 */
+
+	if (skw == sks) {
+		khs = khw = &V_pf_keyhash[pf_hashkey(skw)];
+		PF_HASHROW_LOCK(khs);
+	} else {
+		khs = &V_pf_keyhash[pf_hashkey(sks)];
+		khw = &V_pf_keyhash[pf_hashkey(skw)];
+		if (khs == khw) {
+			PF_HASHROW_LOCK(khs);
+		} else if (khs < khw) {
+			PF_HASHROW_LOCK(khs);
+			PF_HASHROW_LOCK(khw);
+		} else {
+			PF_HASHROW_LOCK(khw);
+			PF_HASHROW_LOCK(khs);
+		}
+	}
+
+#define	KEYS_UNLOCK()	do {			\
+	if (khs != khw) {			\
+		PF_HASHROW_UNLOCK(khs);		\
+		PF_HASHROW_UNLOCK(khw);		\
+	} else					\
+		PF_HASHROW_UNLOCK(khs);		\
+} while (0)
+
+	/*
+	 * First run: start with wire key.
+	 */
+	sk = skw;
+	kh = khw;
+	idx = PF_SK_WIRE;
+
+keyattach:
+	LIST_FOREACH(cur, &kh->keys, entry)
+		if (bcmp(cur, sk, sizeof(struct pf_state_key_cmp)) == 0)
+			break;
+
+	if (cur != NULL) {
+		/* Key exists. Check for same kif, if none, add to key. */
+		TAILQ_FOREACH(si, &cur->states[idx], key_list[idx]) {
+			struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(si)];
+
+			PF_HASHROW_LOCK(ih);
+			if (si->kif == s->kif &&
+			    si->direction == s->direction) {
+				if (sk->proto == IPPROTO_TCP &&
+				    si->src.state >= TCPS_FIN_WAIT_2 &&
+				    si->dst.state >= TCPS_FIN_WAIT_2) {
+					/*
+					 * New state matches an old >FIN_WAIT_2
+					 * state. We can't drop key hash locks,
+					 * thus we can't unlink it properly.
+					 *
+					 * As a workaround we drop it into
+					 * TCPS_CLOSED state, schedule purge
+					 * ASAP and push it into the very end
+					 * of the slot TAILQ, so that it won't
+					 * conflict with our new state.
+					 */
+					si->src.state = si->dst.state =
+					    TCPS_CLOSED;
+					si->timeout = PFTM_PURGE;
+					olds = si;
+				} else {
+					if (V_pf_status.debug >= PF_DEBUG_MISC) {
+						printf("pf: %s key attach "
+						    "failed on %s: ",
+						    (idx == PF_SK_WIRE) ?
+						    "wire" : "stack",
+						    s->kif->pfik_name);
+						pf_print_state_parts(s,
+						    (idx == PF_SK_WIRE) ?
+						    sk : NULL,
+						    (idx == PF_SK_STACK) ?
+						    sk : NULL);
+						printf(", existing: ");
+						pf_print_state_parts(si,
+						    (idx == PF_SK_WIRE) ?
+						    sk : NULL,
+						    (idx == PF_SK_STACK) ?
+						    sk : NULL);
+						printf("\n");
+					}
+					PF_HASHROW_UNLOCK(ih);
+					KEYS_UNLOCK();
+					uma_zfree(V_pf_state_key_z, sk);
+					if (idx == PF_SK_STACK)
+						pf_detach_state(s);
+					return (EEXIST); /* collision! */
+				}
+			}
+			PF_HASHROW_UNLOCK(ih);
+		}
+		uma_zfree(V_pf_state_key_z, sk);
+		s->key[idx] = cur;
+	} else {
+		LIST_INSERT_HEAD(&kh->keys, sk, entry);
+		s->key[idx] = sk;
+	}
+
+stateattach:
+	/* List is sorted, if-bound states before floating. */
+	if (s->kif == V_pfi_all)
+		TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], s, key_list[idx]);
+	else
+		TAILQ_INSERT_HEAD(&s->key[idx]->states[idx], s, key_list[idx]);
+
+	if (olds) {
+		TAILQ_REMOVE(&s->key[idx]->states[idx], olds, key_list[idx]);
+		TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], olds,
+		    key_list[idx]);
+		olds = NULL;
+	}
+
+	/*
+	 * Attach done. See how should we (or should not?)
+	 * attach a second key.
+	 */
+	if (sks == skw) {
+		s->key[PF_SK_STACK] = s->key[PF_SK_WIRE];
+		idx = PF_SK_STACK;
+		sks = NULL;
+		goto stateattach;
+	} else if (sks != NULL) {
+		/*
+		 * Continue attaching with stack key.
+		 */
+		sk = sks;
+		kh = khs;
+		idx = PF_SK_STACK;
+		sks = NULL;
+		goto keyattach;
+	}
+
+	PF_STATE_LOCK(s);
+	KEYS_UNLOCK();
+
+	KASSERT(s->key[PF_SK_WIRE] != NULL && s->key[PF_SK_STACK] != NULL,
+	    ("%s failure", __func__));
+
+	return (0);
+#undef	KEYS_UNLOCK
+}
+
+static void
+pf_detach_state(struct pf_state *s)
+{
+	struct pf_state_key *sks = s->key[PF_SK_STACK];
+	struct pf_keyhash *kh;
+
+	if (sks != NULL) {
+		kh = &V_pf_keyhash[pf_hashkey(sks)];
+		PF_HASHROW_LOCK(kh);
+		if (s->key[PF_SK_STACK] != NULL)
+			pf_state_key_detach(s, PF_SK_STACK);
+		/*
+		 * If both point to same key, then we are done.
+		 */
+		if (sks == s->key[PF_SK_WIRE]) {
+			pf_state_key_detach(s, PF_SK_WIRE);
+			PF_HASHROW_UNLOCK(kh);
+			return;
+		}
+		PF_HASHROW_UNLOCK(kh);
+	}
+
+	if (s->key[PF_SK_WIRE] != NULL) {
+		kh = &V_pf_keyhash[pf_hashkey(s->key[PF_SK_WIRE])];
+		PF_HASHROW_LOCK(kh);
+		if (s->key[PF_SK_WIRE] != NULL)
+			pf_state_key_detach(s, PF_SK_WIRE);
+		PF_HASHROW_UNLOCK(kh);
+	}
+}
+
+static void
+pf_state_key_detach(struct pf_state *s, int idx)
+{
+	struct pf_state_key *sk = s->key[idx];
+#ifdef INVARIANTS
+	struct pf_keyhash *kh = &V_pf_keyhash[pf_hashkey(sk)];
+
+	PF_HASHROW_ASSERT(kh);
+#endif
+	TAILQ_REMOVE(&sk->states[idx], s, key_list[idx]);
+	s->key[idx] = NULL;
+
+	if (TAILQ_EMPTY(&sk->states[0]) && TAILQ_EMPTY(&sk->states[1])) {
+		LIST_REMOVE(sk, entry);
+		uma_zfree(V_pf_state_key_z, sk);
+	}
+}
+
+static int
+pf_state_key_ctor(void *mem, int size, void *arg, int flags)
+{
+	struct pf_state_key *sk = mem;
+
+	bzero(sk, sizeof(struct pf_state_key_cmp));
+	TAILQ_INIT(&sk->states[PF_SK_WIRE]);
+	TAILQ_INIT(&sk->states[PF_SK_STACK]);
+
+	return (0);
+}
+
+struct pf_state_key *
+pf_state_key_setup(struct pf_pdesc *pd, struct pf_addr *saddr,
+	struct pf_addr *daddr, u_int16_t sport, u_int16_t dport)
+{
+	struct pf_state_key *sk;
+
+	sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
+	if (sk == NULL)
+		return (NULL);
+
+	PF_ACPY(&sk->addr[pd->sidx], saddr, pd->af);
+	PF_ACPY(&sk->addr[pd->didx], daddr, pd->af);
+	sk->port[pd->sidx] = sport;
+	sk->port[pd->didx] = dport;
+	sk->proto = pd->proto;
+	sk->af = pd->af;
+
+	return (sk);
+}
+
+struct pf_state_key *
+pf_state_key_clone(struct pf_state_key *orig)
+{
+	struct pf_state_key *sk;
+
+	sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
+	if (sk == NULL)
+		return (NULL);
+
+	bcopy(orig, sk, sizeof(struct pf_state_key_cmp));
+
+	return (sk);
+}
+
+int
+pf_state_insert(struct pfi_kif *kif, struct pf_state_key *skw,
+    struct pf_state_key *sks, struct pf_state *s)
+{
+	struct pf_idhash *ih;
+	struct pf_state *cur;
+	int error;
+
+	KASSERT(TAILQ_EMPTY(&sks->states[0]) && TAILQ_EMPTY(&sks->states[1]),
+	    ("%s: sks not pristine", __func__));
+	KASSERT(TAILQ_EMPTY(&skw->states[0]) && TAILQ_EMPTY(&skw->states[1]),
+	    ("%s: skw not pristine", __func__));
+	KASSERT(s->refs == 0, ("%s: state not pristine", __func__));
+
+	s->kif = kif;
+
+	if (s->id == 0 && s->creatorid == 0) {
+		/* XXX: should be atomic, but probability of collision low */
+		if ((s->id = V_pf_stateid[curcpu]++) == PFID_MAXID)
+			V_pf_stateid[curcpu] = 1;
+		s->id |= (uint64_t )curcpu << PFID_CPUSHIFT;
+		s->id = htobe64(s->id);
+		s->creatorid = V_pf_status.hostid;
+	}
+
+	/* Returns with ID locked on success. */
+	if ((error = pf_state_key_attach(skw, sks, s)) != 0)
+		return (error);
+
+	ih = &V_pf_idhash[PF_IDHASH(s)];
+	PF_HASHROW_ASSERT(ih);
+	LIST_FOREACH(cur, &ih->states, entry)
+		if (cur->id == s->id && cur->creatorid == s->creatorid)
+			break;
+
+	if (cur != NULL) {
+		PF_HASHROW_UNLOCK(ih);
+		if (V_pf_status.debug >= PF_DEBUG_MISC) {
+			printf("pf: state ID collision: "
+			    "id: %016llx creatorid: %08x\n",
+			    (unsigned long long)be64toh(s->id),
+			    ntohl(s->creatorid));
+		}
+		pf_detach_state(s);
+		return (EEXIST);
+	}
+	LIST_INSERT_HEAD(&ih->states, s, entry);
+	/* One for keys, one for ID hash. */
+	refcount_init(&s->refs, 2);
+
+	counter_u64_add(V_pf_status.fcounters[FCNT_STATE_INSERT], 1);
+	if (pfsync_insert_state_ptr != NULL)
+		pfsync_insert_state_ptr(s);
+
+	/* Returns locked. */
+	return (0);
+}
+
+/*
+ * Find state by ID: returns with locked row on success.
+ */
+struct pf_state *
+pf_find_state_byid(uint64_t id, uint32_t creatorid)
+{
+	struct pf_idhash *ih;
+	struct pf_state *s;
+
+	counter_u64_add(V_pf_status.fcounters[FCNT_STATE_SEARCH], 1);
+
+	ih = &V_pf_idhash[(be64toh(id) % (pf_hashmask + 1))];
+
+	PF_HASHROW_LOCK(ih);
+	LIST_FOREACH(s, &ih->states, entry)
+		if (s->id == id && s->creatorid == creatorid)
+			break;
+
+	if (s == NULL)
+		PF_HASHROW_UNLOCK(ih);
+
+	return (s);
+}
+
+/*
+ * Find state by key.
+ * Returns with ID hash slot locked on success.
+ */
+static struct pf_state *
+pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir)
+{
+	struct pf_keyhash	*kh;
+	struct pf_state_key	*sk;
+	struct pf_state		*s;
+	int idx;
+
+	counter_u64_add(V_pf_status.fcounters[FCNT_STATE_SEARCH], 1);
+
+	kh = &V_pf_keyhash[pf_hashkey((struct pf_state_key *)key)];
+
+	PF_HASHROW_LOCK(kh);
+	LIST_FOREACH(sk, &kh->keys, entry)
+		if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0)
+			break;
+	if (sk == NULL) {
+		PF_HASHROW_UNLOCK(kh);
+		return (NULL);
+	}
+
+	idx = (dir == PF_IN ? PF_SK_WIRE : PF_SK_STACK);
+
+	/* List is sorted, if-bound states before floating ones. */
+	TAILQ_FOREACH(s, &sk->states[idx], key_list[idx])
+		if (s->kif == V_pfi_all || s->kif == kif) {
+			PF_STATE_LOCK(s);
+			PF_HASHROW_UNLOCK(kh);
+			if (s->timeout >= PFTM_MAX) {
+				/*
+				 * State is either being processed by
+				 * pf_unlink_state() in an other thread, or
+				 * is scheduled for immediate expiry.
+				 */
+				PF_STATE_UNLOCK(s);
+				return (NULL);
+			}
+			return (s);
+		}
+	PF_HASHROW_UNLOCK(kh);
+
+	return (NULL);
+}
+
+struct pf_state *
+pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more)
+{
+	struct pf_keyhash	*kh;
+	struct pf_state_key	*sk;
+	struct pf_state		*s, *ret = NULL;
+	int			 idx, inout = 0;
+
+	counter_u64_add(V_pf_status.fcounters[FCNT_STATE_SEARCH], 1);
+
+	kh = &V_pf_keyhash[pf_hashkey((struct pf_state_key *)key)];
+
+	PF_HASHROW_LOCK(kh);
+	LIST_FOREACH(sk, &kh->keys, entry)
+		if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0)
+			break;
+	if (sk == NULL) {
+		PF_HASHROW_UNLOCK(kh);
+		return (NULL);
+	}
+	switch (dir) {
+	case PF_IN:
+		idx = PF_SK_WIRE;
+		break;
+	case PF_OUT:
+		idx = PF_SK_STACK;
+		break;
+	case PF_INOUT:
+		idx = PF_SK_WIRE;
+		inout = 1;
+		break;
+	default:
+		panic("%s: dir %u", __func__, dir);
+	}
+second_run:
+	TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) {
+		if (more == NULL) {
+			PF_HASHROW_UNLOCK(kh);
+			return (s);
+		}
+
+		if (ret)
+			(*more)++;
+		else
+			ret = s;
+	}
+	if (inout == 1) {
+		inout = 0;
+		idx = PF_SK_STACK;
+		goto second_run;
+	}
+	PF_HASHROW_UNLOCK(kh);
+
+	return (ret);
+}
+
+/* END state table stuff */
+
+static void
+pf_send(struct pf_send_entry *pfse)
+{
+
+	PF_SENDQ_LOCK();
+	STAILQ_INSERT_TAIL(&V_pf_sendqueue, pfse, pfse_next);
+	PF_SENDQ_UNLOCK();
+	swi_sched(V_pf_swi_cookie, 0);
+}
+
+void
+pf_intr(void *v)
+{
+	struct pf_send_head queue;
+	struct pf_send_entry *pfse, *next;
+
+	CURVNET_SET((struct vnet *)v);
+
+	PF_SENDQ_LOCK();
+	queue = V_pf_sendqueue;
+	STAILQ_INIT(&V_pf_sendqueue);
+	PF_SENDQ_UNLOCK();
+
+	STAILQ_FOREACH_SAFE(pfse, &queue, pfse_next, next) {
+		switch (pfse->pfse_type) {
+#ifdef INET
+		case PFSE_IP:
+			ip_output(pfse->pfse_m, NULL, NULL, 0, NULL, NULL);
+			break;
+		case PFSE_ICMP:
+			icmp_error(pfse->pfse_m, pfse->icmpopts.type,
+			    pfse->icmpopts.code, 0, pfse->icmpopts.mtu);
+			break;
+#endif /* INET */
+#ifdef INET6
+		case PFSE_IP6:
+			ip6_output(pfse->pfse_m, NULL, NULL, 0, NULL, NULL,
+			    NULL);
+			break;
+		case PFSE_ICMP6:
+			icmp6_error(pfse->pfse_m, pfse->icmpopts.type,
+			    pfse->icmpopts.code, pfse->icmpopts.mtu);
+			break;
+#endif /* INET6 */
+		default:
+			panic("%s: unknown type", __func__);
+		}
+		free(pfse, M_PFTEMP);
+	}
+	CURVNET_RESTORE();
+}
+
+void
+pf_purge_thread(void *unused __unused)
+{
+	VNET_ITERATOR_DECL(vnet_iter);
+	u_int idx = 0;
+
+	for (;;) {
+		PF_RULES_RLOCK();
+		rw_sleep(pf_purge_thread, &pf_rules_lock, 0, "pftm", hz / 10);
+		PF_RULES_RUNLOCK();
+
+		VNET_LIST_RLOCK();
+		VNET_FOREACH(vnet_iter) {
+			CURVNET_SET(vnet_iter);
+
+		if (pf_end_threads) {
+			pf_end_threads++;
+			wakeup(pf_purge_thread);
+			kproc_exit(0);
+		}
+
+		/* Process 1/interval fraction of the state table every run. */
+		idx = pf_purge_expired_states(idx, pf_hashmask /
+			    (V_pf_default_rule.timeout[PFTM_INTERVAL] * 10));
+
+		/* Purge other expired types every PFTM_INTERVAL seconds. */
+		if (idx == 0) {
+			/*
+			 * Order is important:
+			 * - states and src nodes reference rules
+			 * - states and rules reference kifs
+			 */
+			pf_purge_expired_fragments();
+			pf_purge_expired_src_nodes();
+			pf_purge_unlinked_rules();
+			pfi_kif_purge();
+		}
+		CURVNET_RESTORE();
+		}
+		VNET_LIST_RUNLOCK();
+	}
+	/* not reached */
+}
+
+void
+pf_unload_vnet_purge(void)
+{
+
+	/*
+	 * To cleanse up all kifs and rules we need
+	 * two runs: first one clears reference flags,
+	 * then pf_purge_expired_states() doesn't
+	 * raise them, and then second run frees.
+	 */
+	pf_purge_unlinked_rules();
+	pfi_kif_purge();
+
+	/*
+	 * Now purge everything.
+	 */
+	pf_purge_expired_states(0, pf_hashmask);
+	pf_purge_expired_fragments();
+	pf_purge_expired_src_nodes();
+
+	/*
+	 * Now all kifs & rules should be unreferenced,
+	 * thus should be successfully freed.
+	 */
+	pf_purge_unlinked_rules();
+	pfi_kif_purge();
+}
+
+
+u_int32_t
+pf_state_expires(const struct pf_state *state)
+{
+	u_int32_t	timeout;
+	u_int32_t	start;
+	u_int32_t	end;
+	u_int32_t	states;
+
+	/* handle all PFTM_* > PFTM_MAX here */
+	if (state->timeout == PFTM_PURGE)
+		return (time_uptime);
+	KASSERT(state->timeout != PFTM_UNLINKED,
+	    ("pf_state_expires: timeout == PFTM_UNLINKED"));
+	KASSERT((state->timeout < PFTM_MAX),
+	    ("pf_state_expires: timeout > PFTM_MAX"));
+	timeout = state->rule.ptr->timeout[state->timeout];
+	if (!timeout)
+		timeout = V_pf_default_rule.timeout[state->timeout];
+	start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
+	if (start) {
+		end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
+		states = counter_u64_fetch(state->rule.ptr->states_cur);
+	} else {
+		start = V_pf_default_rule.timeout[PFTM_ADAPTIVE_START];
+		end = V_pf_default_rule.timeout[PFTM_ADAPTIVE_END];
+		states = V_pf_status.states;
+	}
+	if (end && states > start && start < end) {
+		if (states < end)
+			return (state->expire + timeout * (end - states) /
+			    (end - start));
+		else
+			return (time_uptime);
+	}
+	return (state->expire + timeout);
+}
+
+void
+pf_purge_expired_src_nodes()
+{
+	struct pf_src_node_list	 freelist;
+	struct pf_srchash	*sh;
+	struct pf_src_node	*cur, *next;
+	int i;
+
+	LIST_INIT(&freelist);
+	for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++) {
+	    PF_HASHROW_LOCK(sh);
+	    LIST_FOREACH_SAFE(cur, &sh->nodes, entry, next)
+		if (cur->states == 0 && cur->expire <= time_uptime) {
+			pf_unlink_src_node(cur);
+			LIST_INSERT_HEAD(&freelist, cur, entry);
+		} else if (cur->rule.ptr != NULL)
+			cur->rule.ptr->rule_flag |= PFRULE_REFS;
+	    PF_HASHROW_UNLOCK(sh);
+	}
+
+	pf_free_src_nodes(&freelist);
+
+	V_pf_status.src_nodes = uma_zone_get_cur(V_pf_sources_z);
+}
+
+static void
+pf_src_tree_remove_state(struct pf_state *s)
+{
+	struct pf_src_node *sn;
+	struct pf_srchash *sh;
+	uint32_t timeout;
+
+	timeout = s->rule.ptr->timeout[PFTM_SRC_NODE] ?
+	    s->rule.ptr->timeout[PFTM_SRC_NODE] :
+	    V_pf_default_rule.timeout[PFTM_SRC_NODE];
+
+	if (s->src_node != NULL) {
+		sn = s->src_node;
+		sh = &V_pf_srchash[pf_hashsrc(&sn->addr, sn->af)];
+	    	PF_HASHROW_LOCK(sh);
+		if (s->src.tcp_est)
+			--sn->conn;
+		if (--sn->states == 0)
+			sn->expire = time_uptime + timeout;
+	    	PF_HASHROW_UNLOCK(sh);
+	}
+	if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
+		sn = s->nat_src_node;
+		sh = &V_pf_srchash[pf_hashsrc(&sn->addr, sn->af)];
+	    	PF_HASHROW_LOCK(sh);
+		if (--sn->states == 0)
+			sn->expire = time_uptime + timeout;
+	    	PF_HASHROW_UNLOCK(sh);
+	}
+	s->src_node = s->nat_src_node = NULL;
+}
+
+/*
+ * Unlink and potentilly free a state. Function may be
+ * called with ID hash row locked, but always returns
+ * unlocked, since it needs to go through key hash locking.
+ */
+int
+pf_unlink_state(struct pf_state *s, u_int flags)
+{
+	struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(s)];
+
+	if ((flags & PF_ENTER_LOCKED) == 0)
+		PF_HASHROW_LOCK(ih);
+	else
+		PF_HASHROW_ASSERT(ih);
+
+	if (s->timeout == PFTM_UNLINKED) {
+		/*
+		 * State is being processed
+		 * by pf_unlink_state() in
+		 * an other thread.
+		 */
+		PF_HASHROW_UNLOCK(ih);
+		return (0);	/* XXXGL: undefined actually */
+	}
+
+	if (s->src.state == PF_TCPS_PROXY_DST) {
+		/* XXX wire key the right one? */
+		pf_send_tcp(NULL, s->rule.ptr, s->key[PF_SK_WIRE]->af,
+		    &s->key[PF_SK_WIRE]->addr[1],
+		    &s->key[PF_SK_WIRE]->addr[0],
+		    s->key[PF_SK_WIRE]->port[1],
+		    s->key[PF_SK_WIRE]->port[0],
+		    s->src.seqhi, s->src.seqlo + 1,
+		    TH_RST|TH_ACK, 0, 0, 0, 1, s->tag, NULL);
+	}
+
+	LIST_REMOVE(s, entry);
+	pf_src_tree_remove_state(s);
+
+	if (pfsync_delete_state_ptr != NULL)
+		pfsync_delete_state_ptr(s);
+
+	STATE_DEC_COUNTERS(s);
+
+	s->timeout = PFTM_UNLINKED;
+
+	PF_HASHROW_UNLOCK(ih);
+
+	pf_detach_state(s);
+	refcount_release(&s->refs);
+
+	return (pf_release_state(s));
+}
+
+void
+pf_free_state(struct pf_state *cur)
+{
+
+	KASSERT(cur->refs == 0, ("%s: %p has refs", __func__, cur));
+	KASSERT(cur->timeout == PFTM_UNLINKED, ("%s: timeout %u", __func__,
+	    cur->timeout));
+
+	pf_normalize_tcp_cleanup(cur);
+	uma_zfree(V_pf_state_z, cur);
+	counter_u64_add(V_pf_status.fcounters[FCNT_STATE_REMOVALS], 1);
+}
+
+/*
+ * Called only from pf_purge_thread(), thus serialized.
+ */
+static u_int
+pf_purge_expired_states(u_int i, int maxcheck)
+{
+	struct pf_idhash *ih;
+	struct pf_state *s;
+
+	V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
+
+	/*
+	 * Go through hash and unlink states that expire now.
+	 */
+	while (maxcheck > 0) {
+
+		ih = &V_pf_idhash[i];
+relock:
+		PF_HASHROW_LOCK(ih);
+		LIST_FOREACH(s, &ih->states, entry) {
+			if (pf_state_expires(s) <= time_uptime) {
+				V_pf_status.states -=
+				    pf_unlink_state(s, PF_ENTER_LOCKED);
+				goto relock;
+			}
+			s->rule.ptr->rule_flag |= PFRULE_REFS;
+			if (s->nat_rule.ptr != NULL)
+				s->nat_rule.ptr->rule_flag |= PFRULE_REFS;
+			if (s->anchor.ptr != NULL)
+				s->anchor.ptr->rule_flag |= PFRULE_REFS;
+			s->kif->pfik_flags |= PFI_IFLAG_REFS;
+			if (s->rt_kif)
+				s->rt_kif->pfik_flags |= PFI_IFLAG_REFS;
+		}
+		PF_HASHROW_UNLOCK(ih);
+
+		/* Return when we hit end of hash. */
+		if (++i > pf_hashmask) {
+			V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
+			return (0);
+		}
+
+		maxcheck--;
+	}
+
+	V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
+
+	return (i);
+}
+
+static void
+pf_purge_unlinked_rules()
+{
+	struct pf_rulequeue tmpq;
+	struct pf_rule *r, *r1;
+
+	/*
+	 * If we have overloading task pending, then we'd
+	 * better skip purging this time. There is a tiny
+	 * probability that overloading task references
+	 * an already unlinked rule.
+	 */
+	PF_OVERLOADQ_LOCK();
+	if (!SLIST_EMPTY(&V_pf_overloadqueue)) {
+		PF_OVERLOADQ_UNLOCK();
+		return;
+	}
+	PF_OVERLOADQ_UNLOCK();
+
+	/*
+	 * Do naive mark-and-sweep garbage collecting of old rules.
+	 * Reference flag is raised by pf_purge_expired_states()
+	 * and pf_purge_expired_src_nodes().
+	 *
+	 * To avoid LOR between PF_UNLNKDRULES_LOCK/PF_RULES_WLOCK,
+	 * use a temporary queue.
+	 */
+	TAILQ_INIT(&tmpq);
+	PF_UNLNKDRULES_LOCK();
+	TAILQ_FOREACH_SAFE(r, &V_pf_unlinked_rules, entries, r1) {
+		if (!(r->rule_flag & PFRULE_REFS)) {
+			TAILQ_REMOVE(&V_pf_unlinked_rules, r, entries);
+			TAILQ_INSERT_TAIL(&tmpq, r, entries);
+		} else
+			r->rule_flag &= ~PFRULE_REFS;
+	}
+	PF_UNLNKDRULES_UNLOCK();
+
+	if (!TAILQ_EMPTY(&tmpq)) {
+		PF_RULES_WLOCK();
+		TAILQ_FOREACH_SAFE(r, &tmpq, entries, r1) {
+			TAILQ_REMOVE(&tmpq, r, entries);
+			pf_free_rule(r);
+		}
+		PF_RULES_WUNLOCK();
+	}
+}
+
+void
+pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
+{
+	switch (af) {
+#ifdef INET
+	case AF_INET: {
+		u_int32_t a = ntohl(addr->addr32[0]);
+		printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
+		    (a>>8)&255, a&255);
+		if (p) {
+			p = ntohs(p);
+			printf(":%u", p);
+		}
+		break;
+	}
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6: {
+		u_int16_t b;
+		u_int8_t i, curstart, curend, maxstart, maxend;
+		curstart = curend = maxstart = maxend = 255;
+		for (i = 0; i < 8; i++) {
+			if (!addr->addr16[i]) {
+				if (curstart == 255)
+					curstart = i;
+				curend = i;
+			} else {
+				if ((curend - curstart) >
+				    (maxend - maxstart)) {
+					maxstart = curstart;
+					maxend = curend;
+				}
+				curstart = curend = 255;
+			}
+		}
+		if ((curend - curstart) >
+		    (maxend - maxstart)) {
+			maxstart = curstart;
+			maxend = curend;
+		}
+		for (i = 0; i < 8; i++) {
+			if (i >= maxstart && i <= maxend) {
+				if (i == 0)
+					printf(":");
+				if (i == maxend)
+					printf(":");
+			} else {
+				b = ntohs(addr->addr16[i]);
+				printf("%x", b);
+				if (i < 7)
+					printf(":");
+			}
+		}
+		if (p) {
+			p = ntohs(p);
+			printf("[%u]", p);
+		}
+		break;
+	}
+#endif /* INET6 */
+	}
+}
+
+void
+pf_print_state(struct pf_state *s)
+{
+	pf_print_state_parts(s, NULL, NULL);
+}
+
+static void
+pf_print_state_parts(struct pf_state *s,
+    struct pf_state_key *skwp, struct pf_state_key *sksp)
+{
+	struct pf_state_key *skw, *sks;
+	u_int8_t proto, dir;
+
+	/* Do our best to fill these, but they're skipped if NULL */
+	skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL);
+	sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL);
+	proto = skw ? skw->proto : (sks ? sks->proto : 0);
+	dir = s ? s->direction : 0;
+
+	switch (proto) {
+	case IPPROTO_IPV4:
+		printf("IPv4");
+		break;
+	case IPPROTO_IPV6:
+		printf("IPv6");
+		break;
+	case IPPROTO_TCP:
+		printf("TCP");
+		break;
+	case IPPROTO_UDP:
+		printf("UDP");
+		break;
+	case IPPROTO_ICMP:
+		printf("ICMP");
+		break;
+	case IPPROTO_ICMPV6:
+		printf("ICMPv6");
+		break;
+	default:
+		printf("%u", proto);
+		break;
+	}
+	switch (dir) {
+	case PF_IN:
+		printf(" in");
+		break;
+	case PF_OUT:
+		printf(" out");
+		break;
+	}
+	if (skw) {
+		printf(" wire: ");
+		pf_print_host(&skw->addr[0], skw->port[0], skw->af);
+		printf(" ");
+		pf_print_host(&skw->addr[1], skw->port[1], skw->af);
+	}
+	if (sks) {
+		printf(" stack: ");
+		if (sks != skw) {
+			pf_print_host(&sks->addr[0], sks->port[0], sks->af);
+			printf(" ");
+			pf_print_host(&sks->addr[1], sks->port[1], sks->af);
+		} else
+			printf("-");
+	}
+	if (s) {
+		if (proto == IPPROTO_TCP) {
+			printf(" [lo=%u high=%u win=%u modulator=%u",
+			    s->src.seqlo, s->src.seqhi,
+			    s->src.max_win, s->src.seqdiff);
+			if (s->src.wscale && s->dst.wscale)
+				printf(" wscale=%u",
+				    s->src.wscale & PF_WSCALE_MASK);
+			printf("]");
+			printf(" [lo=%u high=%u win=%u modulator=%u",
+			    s->dst.seqlo, s->dst.seqhi,
+			    s->dst.max_win, s->dst.seqdiff);
+			if (s->src.wscale && s->dst.wscale)
+				printf(" wscale=%u",
+				s->dst.wscale & PF_WSCALE_MASK);
+			printf("]");
+		}
+		printf(" %u:%u", s->src.state, s->dst.state);
+	}
+}
+
+void
+pf_print_flags(u_int8_t f)
+{
+	if (f)
+		printf(" ");
+	if (f & TH_FIN)
+		printf("F");
+	if (f & TH_SYN)
+		printf("S");
+	if (f & TH_RST)
+		printf("R");
+	if (f & TH_PUSH)
+		printf("P");
+	if (f & TH_ACK)
+		printf("A");
+	if (f & TH_URG)
+		printf("U");
+	if (f & TH_ECE)
+		printf("E");
+	if (f & TH_CWR)
+		printf("W");
+}
+
+#define	PF_SET_SKIP_STEPS(i)					\
+	do {							\
+		while (head[i] != cur) {			\
+			head[i]->skip[i].ptr = cur;		\
+			head[i] = TAILQ_NEXT(head[i], entries);	\
+		}						\
+	} while (0)
+
+void
+pf_calc_skip_steps(struct pf_rulequeue *rules)
+{
+	struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
+	int i;
+
+	cur = TAILQ_FIRST(rules);
+	prev = cur;
+	for (i = 0; i < PF_SKIP_COUNT; ++i)
+		head[i] = cur;
+	while (cur != NULL) {
+
+		if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
+			PF_SET_SKIP_STEPS(PF_SKIP_IFP);
+		if (cur->direction != prev->direction)
+			PF_SET_SKIP_STEPS(PF_SKIP_DIR);
+		if (cur->af != prev->af)
+			PF_SET_SKIP_STEPS(PF_SKIP_AF);
+		if (cur->proto != prev->proto)
+			PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
+		if (cur->src.neg != prev->src.neg ||
+		    pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
+			PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
+		if (cur->src.port[0] != prev->src.port[0] ||
+		    cur->src.port[1] != prev->src.port[1] ||
+		    cur->src.port_op != prev->src.port_op)
+			PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
+		if (cur->dst.neg != prev->dst.neg ||
+		    pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
+			PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
+		if (cur->dst.port[0] != prev->dst.port[0] ||
+		    cur->dst.port[1] != prev->dst.port[1] ||
+		    cur->dst.port_op != prev->dst.port_op)
+			PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
+
+		prev = cur;
+		cur = TAILQ_NEXT(cur, entries);
+	}
+	for (i = 0; i < PF_SKIP_COUNT; ++i)
+		PF_SET_SKIP_STEPS(i);
+}
+
+static int
+pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
+{
+	if (aw1->type != aw2->type)
+		return (1);
+	switch (aw1->type) {
+	case PF_ADDR_ADDRMASK:
+	case PF_ADDR_RANGE:
+		if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6))
+			return (1);
+		if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6))
+			return (1);
+		return (0);
+	case PF_ADDR_DYNIFTL:
+		return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
+	case PF_ADDR_NOROUTE:
+	case PF_ADDR_URPFFAILED:
+		return (0);
+	case PF_ADDR_TABLE:
+		return (aw1->p.tbl != aw2->p.tbl);
+	default:
+		printf("invalid address type: %d\n", aw1->type);
+		return (1);
+	}
+}
+
+/**
+ * Checksum updates are a little complicated because the checksum in the TCP/UDP
+ * header isn't always a full checksum. In some cases (i.e. output) it's a
+ * pseudo-header checksum, which is a partial checksum over src/dst IP
+ * addresses, protocol number and length.
+ *
+ * That means we have the following cases:
+ *  * Input or forwarding: we don't have TSO, the checksum fields are full
+ *  	checksums, we need to update the checksum whenever we change anything.
+ *  * Output (i.e. the checksum is a pseudo-header checksum):
+ *  	x The field being updated is src/dst address or affects the length of
+ *  	the packet. We need to update the pseudo-header checksum (note that this
+ *  	checksum is not ones' complement).
+ *  	x Some other field is being modified (e.g. src/dst port numbers): We
+ *  	don't have to update anything.
+ **/
+u_int16_t
+pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
+{
+	u_int32_t	l;
+
+	if (udp && !cksum)
+		return (0x0000);
+	l = cksum + old - new;
+	l = (l >> 16) + (l & 65535);
+	l = l & 65535;
+	if (udp && !l)
+		return (0xFFFF);
+	return (l);
+}
+
+u_int16_t
+pf_proto_cksum_fixup(struct mbuf *m, u_int16_t cksum, u_int16_t old,
+        u_int16_t new, u_int8_t udp)
+{
+	if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6))
+		return (cksum);
+
+	return (pf_cksum_fixup(cksum, old, new, udp));
+}
+
+static void
+pf_change_ap(struct mbuf *m, struct pf_addr *a, u_int16_t *p, u_int16_t *ic,
+        u_int16_t *pc, struct pf_addr *an, u_int16_t pn, u_int8_t u,
+        sa_family_t af)
+{
+	struct pf_addr	ao;
+	u_int16_t	po = *p;
+
+	PF_ACPY(&ao, a, af);
+	PF_ACPY(a, an, af);
+
+	if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6))
+		*pc = ~*pc;
+
+	*p = pn;
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
+		    ao.addr16[0], an->addr16[0], 0),
+		    ao.addr16[1], an->addr16[1], 0);
+		*p = pn;
+
+		*pc = pf_cksum_fixup(pf_cksum_fixup(*pc,
+		    ao.addr16[0], an->addr16[0], u),
+		    ao.addr16[1], an->addr16[1], u);
+
+		*pc = pf_proto_cksum_fixup(m, *pc, po, pn, u);
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+		    pf_cksum_fixup(pf_cksum_fixup(*pc,
+		    ao.addr16[0], an->addr16[0], u),
+		    ao.addr16[1], an->addr16[1], u),
+		    ao.addr16[2], an->addr16[2], u),
+		    ao.addr16[3], an->addr16[3], u),
+		    ao.addr16[4], an->addr16[4], u),
+		    ao.addr16[5], an->addr16[5], u),
+		    ao.addr16[6], an->addr16[6], u),
+		    ao.addr16[7], an->addr16[7], u);
+
+		*pc = pf_proto_cksum_fixup(m, *pc, po, pn, u);
+		break;
+#endif /* INET6 */
+	}
+
+	if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | 
+	    CSUM_DELAY_DATA_IPV6)) {
+		*pc = ~*pc;
+		if (! *pc)
+			*pc = 0xffff;
+	}
+}
+
+/* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
+void
+pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
+{
+	u_int32_t	ao;
+
+	memcpy(&ao, a, sizeof(ao));
+	memcpy(a, &an, sizeof(u_int32_t));
+	*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
+	    ao % 65536, an % 65536, u);
+}
+
+void
+pf_change_proto_a(struct mbuf *m, void *a, u_int16_t *c, u_int32_t an, u_int8_t udp)
+{
+	u_int32_t	ao;
+
+	memcpy(&ao, a, sizeof(ao));
+	memcpy(a, &an, sizeof(u_int32_t));
+
+	*c = pf_proto_cksum_fixup(m,
+	    pf_proto_cksum_fixup(m, *c, ao / 65536, an / 65536, udp),
+	    ao % 65536, an % 65536, udp);
+}
+
+#ifdef INET6
+static void
+pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
+{
+	struct pf_addr	ao;
+
+	PF_ACPY(&ao, a, AF_INET6);
+	PF_ACPY(a, an, AF_INET6);
+
+	*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+	    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+	    pf_cksum_fixup(pf_cksum_fixup(*c,
+	    ao.addr16[0], an->addr16[0], u),
+	    ao.addr16[1], an->addr16[1], u),
+	    ao.addr16[2], an->addr16[2], u),
+	    ao.addr16[3], an->addr16[3], u),
+	    ao.addr16[4], an->addr16[4], u),
+	    ao.addr16[5], an->addr16[5], u),
+	    ao.addr16[6], an->addr16[6], u),
+	    ao.addr16[7], an->addr16[7], u);
+}
+#endif /* INET6 */
+
+static void
+pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
+    struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
+    u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
+{
+	struct pf_addr	oia, ooa;
+
+	PF_ACPY(&oia, ia, af);
+	if (oa)
+		PF_ACPY(&ooa, oa, af);
+
+	/* Change inner protocol port, fix inner protocol checksum. */
+	if (ip != NULL) {
+		u_int16_t	oip = *ip;
+		u_int32_t	opc;
+
+		if (pc != NULL)
+			opc = *pc;
+		*ip = np;
+		if (pc != NULL)
+			*pc = pf_cksum_fixup(*pc, oip, *ip, u);
+		*ic = pf_cksum_fixup(*ic, oip, *ip, 0);
+		if (pc != NULL)
+			*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
+	}
+	/* Change inner ip address, fix inner ip and icmp checksums. */
+	PF_ACPY(ia, na, af);
+	switch (af) {
+#ifdef INET
+	case AF_INET: {
+		u_int32_t	 oh2c = *h2c;
+
+		*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
+		    oia.addr16[0], ia->addr16[0], 0),
+		    oia.addr16[1], ia->addr16[1], 0);
+		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
+		    oia.addr16[0], ia->addr16[0], 0),
+		    oia.addr16[1], ia->addr16[1], 0);
+		*ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
+		break;
+	}
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+		    pf_cksum_fixup(pf_cksum_fixup(*ic,
+		    oia.addr16[0], ia->addr16[0], u),
+		    oia.addr16[1], ia->addr16[1], u),
+		    oia.addr16[2], ia->addr16[2], u),
+		    oia.addr16[3], ia->addr16[3], u),
+		    oia.addr16[4], ia->addr16[4], u),
+		    oia.addr16[5], ia->addr16[5], u),
+		    oia.addr16[6], ia->addr16[6], u),
+		    oia.addr16[7], ia->addr16[7], u);
+		break;
+#endif /* INET6 */
+	}
+	/* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */
+	if (oa) {
+		PF_ACPY(oa, na, af);
+		switch (af) {
+#ifdef INET
+		case AF_INET:
+			*hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
+			    ooa.addr16[0], oa->addr16[0], 0),
+			    ooa.addr16[1], oa->addr16[1], 0);
+			break;
+#endif /* INET */
+#ifdef INET6
+		case AF_INET6:
+			*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+			    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+			    pf_cksum_fixup(pf_cksum_fixup(*ic,
+			    ooa.addr16[0], oa->addr16[0], u),
+			    ooa.addr16[1], oa->addr16[1], u),
+			    ooa.addr16[2], oa->addr16[2], u),
+			    ooa.addr16[3], oa->addr16[3], u),
+			    ooa.addr16[4], oa->addr16[4], u),
+			    ooa.addr16[5], oa->addr16[5], u),
+			    ooa.addr16[6], oa->addr16[6], u),
+			    ooa.addr16[7], oa->addr16[7], u);
+			break;
+#endif /* INET6 */
+		}
+	}
+}
+
+
+/*
+ * Need to modulate the sequence numbers in the TCP SACK option
+ * (credits to Krzysztof Pfaff for report and patch)
+ */
+static int
+pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
+    struct tcphdr *th, struct pf_state_peer *dst)
+{
+	int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
+	u_int8_t opts[TCP_MAXOLEN], *opt = opts;
+	int copyback = 0, i, olen;
+	struct sackblk sack;
+
+#define	TCPOLEN_SACKLEN	(TCPOLEN_SACK + 2)
+	if (hlen < TCPOLEN_SACKLEN ||
+	    !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af))
+		return 0;
+
+	while (hlen >= TCPOLEN_SACKLEN) {
+		olen = opt[1];
+		switch (*opt) {
+		case TCPOPT_EOL:	/* FALLTHROUGH */
+		case TCPOPT_NOP:
+			opt++;
+			hlen--;
+			break;
+		case TCPOPT_SACK:
+			if (olen > hlen)
+				olen = hlen;
+			if (olen >= TCPOLEN_SACKLEN) {
+				for (i = 2; i + TCPOLEN_SACK <= olen;
+				    i += TCPOLEN_SACK) {
+					memcpy(&sack, &opt[i], sizeof(sack));
+					pf_change_proto_a(m, &sack.start, &th->th_sum,
+					    htonl(ntohl(sack.start) - dst->seqdiff), 0);
+					pf_change_proto_a(m, &sack.end, &th->th_sum,
+					    htonl(ntohl(sack.end) - dst->seqdiff), 0);
+					memcpy(&opt[i], &sack, sizeof(sack));
+				}
+				copyback = 1;
+			}
+			/* FALLTHROUGH */
+		default:
+			if (olen < 2)
+				olen = 2;
+			hlen -= olen;
+			opt += olen;
+		}
+	}
+
+	if (copyback)
+		m_copyback(m, off + sizeof(*th), thoptlen, (caddr_t)opts);
+	return (copyback);
+}
+
+static void
+pf_send_tcp(struct mbuf *replyto, const struct pf_rule *r, sa_family_t af,
+    const struct pf_addr *saddr, const struct pf_addr *daddr,
+    u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
+    u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
+    u_int16_t rtag, struct ifnet *ifp)
+{
+	struct pf_send_entry *pfse;
+	struct mbuf	*m;
+	int		 len, tlen;
+#ifdef INET
+	struct ip	*h = NULL;
+#endif /* INET */
+#ifdef INET6
+	struct ip6_hdr	*h6 = NULL;
+#endif /* INET6 */
+	struct tcphdr	*th;
+	char		*opt;
+	struct pf_mtag  *pf_mtag;
+
+	len = 0;
+	th = NULL;
+
+	/* maximum segment size tcp option */
+	tlen = sizeof(struct tcphdr);
+	if (mss)
+		tlen += 4;
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		len = sizeof(struct ip) + tlen;
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		len = sizeof(struct ip6_hdr) + tlen;
+		break;
+#endif /* INET6 */
+	default:
+		panic("%s: unsupported af %d", __func__, af);
+	}
+
+	/* Allocate outgoing queue entry, mbuf and mbuf tag. */
+	pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
+	if (pfse == NULL)
+		return;
+	m = m_gethdr(M_NOWAIT, MT_DATA);
+	if (m == NULL) {
+		free(pfse, M_PFTEMP);
+		return;
+	}
+#ifdef MAC
+	mac_netinet_firewall_send(m);
+#endif
+	if ((pf_mtag = pf_get_mtag(m)) == NULL) {
+		free(pfse, M_PFTEMP);
+		m_freem(m);
+		return;
+	}
+	if (tag)
+		m->m_flags |= M_SKIP_FIREWALL;
+	pf_mtag->tag = rtag;
+
+	if (r != NULL && r->rtableid >= 0)
+		M_SETFIB(m, r->rtableid);
+
+#ifdef ALTQ
+	if (r != NULL && r->qid) {
+		pf_mtag->qid = r->qid;
+
+		/* add hints for ecn */
+		pf_mtag->hdr = mtod(m, struct ip *);
+	}
+#endif /* ALTQ */
+	m->m_data += max_linkhdr;
+	m->m_pkthdr.len = m->m_len = len;
+	m->m_pkthdr.rcvif = NULL;
+	bzero(m->m_data, len);
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		h = mtod(m, struct ip *);
+
+		/* IP header fields included in the TCP checksum */
+		h->ip_p = IPPROTO_TCP;
+		h->ip_len = htons(tlen);
+		h->ip_src.s_addr = saddr->v4.s_addr;
+		h->ip_dst.s_addr = daddr->v4.s_addr;
+
+		th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		h6 = mtod(m, struct ip6_hdr *);
+
+		/* IP header fields included in the TCP checksum */
+		h6->ip6_nxt = IPPROTO_TCP;
+		h6->ip6_plen = htons(tlen);
+		memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
+		memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
+
+		th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
+		break;
+#endif /* INET6 */
+	}
+
+	/* TCP header */
+	th->th_sport = sport;
+	th->th_dport = dport;
+	th->th_seq = htonl(seq);
+	th->th_ack = htonl(ack);
+	th->th_off = tlen >> 2;
+	th->th_flags = flags;
+	th->th_win = htons(win);
+
+	if (mss) {
+		opt = (char *)(th + 1);
+		opt[0] = TCPOPT_MAXSEG;
+		opt[1] = 4;
+		HTONS(mss);
+		bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
+	}
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		/* TCP checksum */
+		th->th_sum = in_cksum(m, len);
+
+		/* Finish the IP header */
+		h->ip_v = 4;
+		h->ip_hl = sizeof(*h) >> 2;
+		h->ip_tos = IPTOS_LOWDELAY;
+		h->ip_off = htons(V_path_mtu_discovery ? IP_DF : 0);
+		h->ip_len = htons(len);
+		h->ip_ttl = ttl ? ttl : V_ip_defttl;
+		h->ip_sum = 0;
+
+		pfse->pfse_type = PFSE_IP;
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		/* TCP checksum */
+		th->th_sum = in6_cksum(m, IPPROTO_TCP,
+		    sizeof(struct ip6_hdr), tlen);
+
+		h6->ip6_vfc |= IPV6_VERSION;
+		h6->ip6_hlim = IPV6_DEFHLIM;
+
+		pfse->pfse_type = PFSE_IP6;
+		break;
+#endif /* INET6 */
+	}
+	pfse->pfse_m = m;
+	pf_send(pfse);
+}
+
+static int
+pf_ieee8021q_setpcp(struct mbuf *m, u_int8_t prio)
+{
+	struct m_tag *mtag;
+
+	KASSERT(prio <= PF_PRIO_MAX,
+	    ("%s with invalid pcp", __func__));
+
+	mtag = m_tag_locate(m, MTAG_8021Q, MTAG_8021Q_PCP_OUT, NULL);
+	if (mtag == NULL) {
+		mtag = m_tag_alloc(MTAG_8021Q, MTAG_8021Q_PCP_OUT,
+		    sizeof(uint8_t), M_NOWAIT);
+		if (mtag == NULL)
+			return (ENOMEM);
+		m_tag_prepend(m, mtag);
+	}
+
+	*(uint8_t *)(mtag + 1) = prio;
+	return (0);
+}
+
+static int
+pf_match_ieee8021q_pcp(u_int8_t prio, struct mbuf *m)
+{
+	struct m_tag *mtag;
+	u_int8_t mpcp;
+
+	mtag = m_tag_locate(m, MTAG_8021Q, MTAG_8021Q_PCP_IN, NULL);
+	if (mtag == NULL)
+		return (0);
+
+	if (prio == PF_PRIO_ZERO)
+		prio = 0;
+
+	mpcp = *(uint8_t *)(mtag + 1);
+
+	return (mpcp == prio);
+}
+
+static void
+pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
+    struct pf_rule *r)
+{
+	struct pf_send_entry *pfse;
+	struct mbuf *m0;
+	struct pf_mtag *pf_mtag;
+
+	/* Allocate outgoing queue entry, mbuf and mbuf tag. */
+	pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
+	if (pfse == NULL)
+		return;
+
+	if ((m0 = m_copypacket(m, M_NOWAIT)) == NULL) {
+		free(pfse, M_PFTEMP);
+		return;
+	}
+
+	if ((pf_mtag = pf_get_mtag(m0)) == NULL) {
+		free(pfse, M_PFTEMP);
+		return;
+	}
+	/* XXX: revisit */
+	m0->m_flags |= M_SKIP_FIREWALL;
+
+	if (r->rtableid >= 0)
+		M_SETFIB(m0, r->rtableid);
+
+#ifdef ALTQ
+	if (r->qid) {
+		pf_mtag->qid = r->qid;
+		/* add hints for ecn */
+		pf_mtag->hdr = mtod(m0, struct ip *);
+	}
+#endif /* ALTQ */
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		pfse->pfse_type = PFSE_ICMP;
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		pfse->pfse_type = PFSE_ICMP6;
+		break;
+#endif /* INET6 */
+	}
+	pfse->pfse_m = m0;
+	pfse->icmpopts.type = type;
+	pfse->icmpopts.code = code;
+	pf_send(pfse);
+}
+
+/*
+ * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
+ * If n is 0, they match if they are equal. If n is != 0, they match if they
+ * are different.
+ */
+int
+pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
+    struct pf_addr *b, sa_family_t af)
+{
+	int	match = 0;
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		if ((a->addr32[0] & m->addr32[0]) ==
+		    (b->addr32[0] & m->addr32[0]))
+			match++;
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		if (((a->addr32[0] & m->addr32[0]) ==
+		     (b->addr32[0] & m->addr32[0])) &&
+		    ((a->addr32[1] & m->addr32[1]) ==
+		     (b->addr32[1] & m->addr32[1])) &&
+		    ((a->addr32[2] & m->addr32[2]) ==
+		     (b->addr32[2] & m->addr32[2])) &&
+		    ((a->addr32[3] & m->addr32[3]) ==
+		     (b->addr32[3] & m->addr32[3])))
+			match++;
+		break;
+#endif /* INET6 */
+	}
+	if (match) {
+		if (n)
+			return (0);
+		else
+			return (1);
+	} else {
+		if (n)
+			return (1);
+		else
+			return (0);
+	}
+}
+
+/*
+ * Return 1 if b <= a <= e, otherwise return 0.
+ */
+int
+pf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
+    struct pf_addr *a, sa_family_t af)
+{
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		if ((ntohl(a->addr32[0]) < ntohl(b->addr32[0])) ||
+		    (ntohl(a->addr32[0]) > ntohl(e->addr32[0])))
+			return (0);
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6: {
+		int	i;
+
+		/* check a >= b */
+		for (i = 0; i < 4; ++i)
+			if (ntohl(a->addr32[i]) > ntohl(b->addr32[i]))
+				break;
+			else if (ntohl(a->addr32[i]) < ntohl(b->addr32[i]))
+				return (0);
+		/* check a <= e */
+		for (i = 0; i < 4; ++i)
+			if (ntohl(a->addr32[i]) < ntohl(e->addr32[i]))
+				break;
+			else if (ntohl(a->addr32[i]) > ntohl(e->addr32[i]))
+				return (0);
+		break;
+	}
+#endif /* INET6 */
+	}
+	return (1);
+}
+
+static int
+pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
+{
+	switch (op) {
+	case PF_OP_IRG:
+		return ((p > a1) && (p < a2));
+	case PF_OP_XRG:
+		return ((p < a1) || (p > a2));
+	case PF_OP_RRG:
+		return ((p >= a1) && (p <= a2));
+	case PF_OP_EQ:
+		return (p == a1);
+	case PF_OP_NE:
+		return (p != a1);
+	case PF_OP_LT:
+		return (p < a1);
+	case PF_OP_LE:
+		return (p <= a1);
+	case PF_OP_GT:
+		return (p > a1);
+	case PF_OP_GE:
+		return (p >= a1);
+	}
+	return (0); /* never reached */
+}
+
+int
+pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
+{
+	NTOHS(a1);
+	NTOHS(a2);
+	NTOHS(p);
+	return (pf_match(op, a1, a2, p));
+}
+
+static int
+pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
+{
+	if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
+		return (0);
+	return (pf_match(op, a1, a2, u));
+}
+
+static int
+pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
+{
+	if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
+		return (0);
+	return (pf_match(op, a1, a2, g));
+}
+
+int
+pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag, int mtag)
+{
+	if (*tag == -1)
+		*tag = mtag;
+
+	return ((!r->match_tag_not && r->match_tag == *tag) ||
+	    (r->match_tag_not && r->match_tag != *tag));
+}
+
+int
+pf_tag_packet(struct mbuf *m, struct pf_pdesc *pd, int tag)
+{
+
+	KASSERT(tag > 0, ("%s: tag %d", __func__, tag));
+
+	if (pd->pf_mtag == NULL && ((pd->pf_mtag = pf_get_mtag(m)) == NULL))
+		return (ENOMEM);
+
+	pd->pf_mtag->tag = tag;
+
+	return (0);
+}
+
+#define	PF_ANCHOR_STACKSIZE	32
+struct pf_anchor_stackframe {
+	struct pf_ruleset	*rs;
+	struct pf_rule		*r;	/* XXX: + match bit */
+	struct pf_anchor	*child;
+};
+
+/*
+ * XXX: We rely on malloc(9) returning pointer aligned addresses.
+ */
+#define	PF_ANCHORSTACK_MATCH	0x00000001
+#define	PF_ANCHORSTACK_MASK	(PF_ANCHORSTACK_MATCH)
+
+#define	PF_ANCHOR_MATCH(f)	((uintptr_t)(f)->r & PF_ANCHORSTACK_MATCH)
+#define	PF_ANCHOR_RULE(f)	(struct pf_rule *)			\
+				((uintptr_t)(f)->r & ~PF_ANCHORSTACK_MASK)
+#define	PF_ANCHOR_SET_MATCH(f)	do { (f)->r = (void *) 			\
+				((uintptr_t)(f)->r | PF_ANCHORSTACK_MATCH);  \
+} while (0)
+
+void
+pf_step_into_anchor(struct pf_anchor_stackframe *stack, int *depth,
+    struct pf_ruleset **rs, int n, struct pf_rule **r, struct pf_rule **a,
+    int *match)
+{
+	struct pf_anchor_stackframe	*f;
+
+	PF_RULES_RASSERT();
+
+	if (match)
+		*match = 0;
+	if (*depth >= PF_ANCHOR_STACKSIZE) {
+		printf("%s: anchor stack overflow on %s\n",
+		    __func__, (*r)->anchor->name);
+		*r = TAILQ_NEXT(*r, entries);
+		return;
+	} else if (*depth == 0 && a != NULL)
+		*a = *r;
+	f = stack + (*depth)++;
+	f->rs = *rs;
+	f->r = *r;
+	if ((*r)->anchor_wildcard) {
+		struct pf_anchor_node *parent = &(*r)->anchor->children;
+
+		if ((f->child = RB_MIN(pf_anchor_node, parent)) == NULL) {
+			*r = NULL;
+			return;
+		}
+		*rs = &f->child->ruleset;
+	} else {
+		f->child = NULL;
+		*rs = &(*r)->anchor->ruleset;
+	}
+	*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
+}
+
+int
+pf_step_out_of_anchor(struct pf_anchor_stackframe *stack, int *depth,
+    struct pf_ruleset **rs, int n, struct pf_rule **r, struct pf_rule **a,
+    int *match)
+{
+	struct pf_anchor_stackframe	*f;
+	struct pf_rule *fr;
+	int quick = 0;
+
+	PF_RULES_RASSERT();
+
+	do {
+		if (*depth <= 0)
+			break;
+		f = stack + *depth - 1;
+		fr = PF_ANCHOR_RULE(f);
+		if (f->child != NULL) {
+			struct pf_anchor_node *parent;
+
+			/*
+			 * This block traverses through
+			 * a wildcard anchor.
+			 */
+			parent = &fr->anchor->children;
+			if (match != NULL && *match) {
+				/*
+				 * If any of "*" matched, then
+				 * "foo/ *" matched, mark frame
+				 * appropriately.
+				 */
+				PF_ANCHOR_SET_MATCH(f);
+				*match = 0;
+			}
+			f->child = RB_NEXT(pf_anchor_node, parent, f->child);
+			if (f->child != NULL) {
+				*rs = &f->child->ruleset;
+				*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
+				if (*r == NULL)
+					continue;
+				else
+					break;
+			}
+		}
+		(*depth)--;
+		if (*depth == 0 && a != NULL)
+			*a = NULL;
+		*rs = f->rs;
+		if (PF_ANCHOR_MATCH(f) || (match != NULL && *match))
+			quick = fr->quick;
+		*r = TAILQ_NEXT(fr, entries);
+	} while (*r == NULL);
+
+	return (quick);
+}
+
+#ifdef INET6
+void
+pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
+    struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
+{
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
+		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
+		break;
+#endif /* INET */
+	case AF_INET6:
+		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
+		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
+		naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
+		((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
+		naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
+		((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
+		naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
+		((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
+		break;
+	}
+}
+
+void
+pf_addr_inc(struct pf_addr *addr, sa_family_t af)
+{
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
+		break;
+#endif /* INET */
+	case AF_INET6:
+		if (addr->addr32[3] == 0xffffffff) {
+			addr->addr32[3] = 0;
+			if (addr->addr32[2] == 0xffffffff) {
+				addr->addr32[2] = 0;
+				if (addr->addr32[1] == 0xffffffff) {
+					addr->addr32[1] = 0;
+					addr->addr32[0] =
+					    htonl(ntohl(addr->addr32[0]) + 1);
+				} else
+					addr->addr32[1] =
+					    htonl(ntohl(addr->addr32[1]) + 1);
+			} else
+				addr->addr32[2] =
+				    htonl(ntohl(addr->addr32[2]) + 1);
+		} else
+			addr->addr32[3] =
+			    htonl(ntohl(addr->addr32[3]) + 1);
+		break;
+	}
+}
+#endif /* INET6 */
+
+int
+pf_socket_lookup(int direction, struct pf_pdesc *pd, struct mbuf *m)
+{
+	struct pf_addr		*saddr, *daddr;
+	u_int16_t		 sport, dport;
+	struct inpcbinfo	*pi;
+	struct inpcb		*inp;
+
+	pd->lookup.uid = UID_MAX;
+	pd->lookup.gid = GID_MAX;
+
+	switch (pd->proto) {
+	case IPPROTO_TCP:
+		if (pd->hdr.tcp == NULL)
+			return (-1);
+		sport = pd->hdr.tcp->th_sport;
+		dport = pd->hdr.tcp->th_dport;
+		pi = &V_tcbinfo;
+		break;
+	case IPPROTO_UDP:
+		if (pd->hdr.udp == NULL)
+			return (-1);
+		sport = pd->hdr.udp->uh_sport;
+		dport = pd->hdr.udp->uh_dport;
+		pi = &V_udbinfo;
+		break;
+	default:
+		return (-1);
+	}
+	if (direction == PF_IN) {
+		saddr = pd->src;
+		daddr = pd->dst;
+	} else {
+		u_int16_t	p;
+
+		p = sport;
+		sport = dport;
+		dport = p;
+		saddr = pd->dst;
+		daddr = pd->src;
+	}
+	switch (pd->af) {
+#ifdef INET
+	case AF_INET:
+		inp = in_pcblookup_mbuf(pi, saddr->v4, sport, daddr->v4,
+		    dport, INPLOOKUP_RLOCKPCB, NULL, m);
+		if (inp == NULL) {
+			inp = in_pcblookup_mbuf(pi, saddr->v4, sport,
+			   daddr->v4, dport, INPLOOKUP_WILDCARD |
+			   INPLOOKUP_RLOCKPCB, NULL, m);
+			if (inp == NULL)
+				return (-1);
+		}
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport, &daddr->v6,
+		    dport, INPLOOKUP_RLOCKPCB, NULL, m);
+		if (inp == NULL) {
+			inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport,
+			    &daddr->v6, dport, INPLOOKUP_WILDCARD |
+			    INPLOOKUP_RLOCKPCB, NULL, m);
+			if (inp == NULL)
+				return (-1);
+		}
+		break;
+#endif /* INET6 */
+
+	default:
+		return (-1);
+	}
+	INP_RLOCK_ASSERT(inp);
+#ifndef __rtems__
+	pd->lookup.uid = inp->inp_cred->cr_uid;
+	pd->lookup.gid = inp->inp_cred->cr_groups[0];
+#else /* __rtems__ */
+	pd->lookup.uid = BSD_DEFAULT_UID;
+	pd->lookup.gid = BSD_DEFAULT_GID;
+#endif /* __rtems__ */
+	INP_RUNLOCK(inp);
+
+	return (1);
+}
+
+static u_int8_t
+pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
+{
+	int		 hlen;
+	u_int8_t	 hdr[60];
+	u_int8_t	*opt, optlen;
+	u_int8_t	 wscale = 0;
+
+	hlen = th_off << 2;		/* hlen <= sizeof(hdr) */
+	if (hlen <= sizeof(struct tcphdr))
+		return (0);
+	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
+		return (0);
+	opt = hdr + sizeof(struct tcphdr);
+	hlen -= sizeof(struct tcphdr);
+	while (hlen >= 3) {
+		switch (*opt) {
+		case TCPOPT_EOL:
+		case TCPOPT_NOP:
+			++opt;
+			--hlen;
+			break;
+		case TCPOPT_WINDOW:
+			wscale = opt[2];
+			if (wscale > TCP_MAX_WINSHIFT)
+				wscale = TCP_MAX_WINSHIFT;
+			wscale |= PF_WSCALE_FLAG;
+			/* FALLTHROUGH */
+		default:
+			optlen = opt[1];
+			if (optlen < 2)
+				optlen = 2;
+			hlen -= optlen;
+			opt += optlen;
+			break;
+		}
+	}
+	return (wscale);
+}
+
+static u_int16_t
+pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
+{
+	int		 hlen;
+	u_int8_t	 hdr[60];
+	u_int8_t	*opt, optlen;
+	u_int16_t	 mss = V_tcp_mssdflt;
+
+	hlen = th_off << 2;	/* hlen <= sizeof(hdr) */
+	if (hlen <= sizeof(struct tcphdr))
+		return (0);
+	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
+		return (0);
+	opt = hdr + sizeof(struct tcphdr);
+	hlen -= sizeof(struct tcphdr);
+	while (hlen >= TCPOLEN_MAXSEG) {
+		switch (*opt) {
+		case TCPOPT_EOL:
+		case TCPOPT_NOP:
+			++opt;
+			--hlen;
+			break;
+		case TCPOPT_MAXSEG:
+			bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
+			NTOHS(mss);
+			/* FALLTHROUGH */
+		default:
+			optlen = opt[1];
+			if (optlen < 2)
+				optlen = 2;
+			hlen -= optlen;
+			opt += optlen;
+			break;
+		}
+	}
+	return (mss);
+}
+
+static u_int16_t
+pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer)
+{
+#ifdef INET
+	struct nhop4_basic	nh4;
+#endif /* INET */
+#ifdef INET6
+	struct nhop6_basic	nh6;
+	struct in6_addr		dst6;
+	uint32_t		scopeid;
+#endif /* INET6 */
+	int			 hlen = 0;
+	uint16_t		 mss = 0;
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		hlen = sizeof(struct ip);
+		if (fib4_lookup_nh_basic(rtableid, addr->v4, 0, 0, &nh4) == 0)
+			mss = nh4.nh_mtu - hlen - sizeof(struct tcphdr);
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		hlen = sizeof(struct ip6_hdr);
+		in6_splitscope(&addr->v6, &dst6, &scopeid);
+		if (fib6_lookup_nh_basic(rtableid, &dst6, scopeid, 0,0,&nh6)==0)
+			mss = nh6.nh_mtu - hlen - sizeof(struct tcphdr);
+		break;
+#endif /* INET6 */
+	}
+
+	mss = max(V_tcp_mssdflt, mss);
+	mss = min(mss, offer);
+	mss = max(mss, 64);		/* sanity - at least max opt space */
+	return (mss);
+}
+
+static u_int32_t
+pf_tcp_iss(struct pf_pdesc *pd)
+{
+	MD5_CTX ctx;
+	u_int32_t digest[4];
+
+	if (V_pf_tcp_secret_init == 0) {
+		read_random(&V_pf_tcp_secret, sizeof(V_pf_tcp_secret));
+		MD5Init(&V_pf_tcp_secret_ctx);
+		MD5Update(&V_pf_tcp_secret_ctx, V_pf_tcp_secret,
+		    sizeof(V_pf_tcp_secret));
+		V_pf_tcp_secret_init = 1;
+	}
+
+	ctx = V_pf_tcp_secret_ctx;
+
+	MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof(u_short));
+	MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof(u_short));
+	if (pd->af == AF_INET6) {
+		MD5Update(&ctx, (char *)&pd->src->v6, sizeof(struct in6_addr));
+		MD5Update(&ctx, (char *)&pd->dst->v6, sizeof(struct in6_addr));
+	} else {
+		MD5Update(&ctx, (char *)&pd->src->v4, sizeof(struct in_addr));
+		MD5Update(&ctx, (char *)&pd->dst->v4, sizeof(struct in_addr));
+	}
+	MD5Final((u_char *)digest, &ctx);
+	V_pf_tcp_iss_off += 4096;
+#define	ISN_RANDOM_INCREMENT (4096 - 1)
+	return (digest[0] + (arc4random() & ISN_RANDOM_INCREMENT) +
+	    V_pf_tcp_iss_off);
+#undef	ISN_RANDOM_INCREMENT
+}
+
+static int
+pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
+    struct pfi_kif *kif, struct mbuf *m, int off, struct pf_pdesc *pd,
+    struct pf_rule **am, struct pf_ruleset **rsm, struct inpcb *inp)
+{
+	struct pf_rule		*nr = NULL;
+	struct pf_addr		* const saddr = pd->src;
+	struct pf_addr		* const daddr = pd->dst;
+	sa_family_t		 af = pd->af;
+	struct pf_rule		*r, *a = NULL;
+	struct pf_ruleset	*ruleset = NULL;
+	struct pf_src_node	*nsn = NULL;
+	struct tcphdr		*th = pd->hdr.tcp;
+	struct pf_state_key	*sk = NULL, *nk = NULL;
+	u_short			 reason;
+	int			 rewrite = 0, hdrlen = 0;
+	int			 tag = -1, rtableid = -1;
+	int			 asd = 0;
+	int			 match = 0;
+	int			 state_icmp = 0;
+	u_int16_t		 sport = 0, dport = 0;
+	u_int16_t		 bproto_sum = 0, bip_sum = 0;
+	u_int8_t		 icmptype = 0, icmpcode = 0;
+	struct pf_anchor_stackframe	anchor_stack[PF_ANCHOR_STACKSIZE];
+
+	PF_RULES_RASSERT();
+
+	if (inp != NULL) {
+		INP_LOCK_ASSERT(inp);
+#ifndef __rtems__
+		pd->lookup.uid = inp->inp_cred->cr_uid;
+		pd->lookup.gid = inp->inp_cred->cr_groups[0];
+#else /* __rtems__ */
+		pd->lookup.uid = BSD_DEFAULT_UID;
+		pd->lookup.gid = BSD_DEFAULT_GID;
+#endif /* __rtems__ */
+		pd->lookup.done = 1;
+	}
+
+	switch (pd->proto) {
+	case IPPROTO_TCP:
+		sport = th->th_sport;
+		dport = th->th_dport;
+		hdrlen = sizeof(*th);
+		break;
+	case IPPROTO_UDP:
+		sport = pd->hdr.udp->uh_sport;
+		dport = pd->hdr.udp->uh_dport;
+		hdrlen = sizeof(*pd->hdr.udp);
+		break;
+#ifdef INET
+	case IPPROTO_ICMP:
+		if (pd->af != AF_INET)
+			break;
+		sport = dport = pd->hdr.icmp->icmp_id;
+		hdrlen = sizeof(*pd->hdr.icmp);
+		icmptype = pd->hdr.icmp->icmp_type;
+		icmpcode = pd->hdr.icmp->icmp_code;
+
+		if (icmptype == ICMP_UNREACH ||
+		    icmptype == ICMP_SOURCEQUENCH ||
+		    icmptype == ICMP_REDIRECT ||
+		    icmptype == ICMP_TIMXCEED ||
+		    icmptype == ICMP_PARAMPROB)
+			state_icmp++;
+		break;
+#endif /* INET */
+#ifdef INET6
+	case IPPROTO_ICMPV6:
+		if (af != AF_INET6)
+			break;
+		sport = dport = pd->hdr.icmp6->icmp6_id;
+		hdrlen = sizeof(*pd->hdr.icmp6);
+		icmptype = pd->hdr.icmp6->icmp6_type;
+		icmpcode = pd->hdr.icmp6->icmp6_code;
+
+		if (icmptype == ICMP6_DST_UNREACH ||
+		    icmptype == ICMP6_PACKET_TOO_BIG ||
+		    icmptype == ICMP6_TIME_EXCEEDED ||
+		    icmptype == ICMP6_PARAM_PROB)
+			state_icmp++;
+		break;
+#endif /* INET6 */
+	default:
+		sport = dport = hdrlen = 0;
+		break;
+	}
+
+	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
+
+	/* check packet for BINAT/NAT/RDR */
+	if ((nr = pf_get_translation(pd, m, off, direction, kif, &nsn, &sk,
+	    &nk, saddr, daddr, sport, dport, anchor_stack)) != NULL) {
+		KASSERT(sk != NULL, ("%s: null sk", __func__));
+		KASSERT(nk != NULL, ("%s: null nk", __func__));
+
+		if (pd->ip_sum)
+			bip_sum = *pd->ip_sum;
+
+		switch (pd->proto) {
+		case IPPROTO_TCP:
+			bproto_sum = th->th_sum;
+			pd->proto_sum = &th->th_sum;
+
+			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
+			    nk->port[pd->sidx] != sport) {
+				pf_change_ap(m, saddr, &th->th_sport, pd->ip_sum,
+				    &th->th_sum, &nk->addr[pd->sidx],
+				    nk->port[pd->sidx], 0, af);
+				pd->sport = &th->th_sport;
+				sport = th->th_sport;
+			}
+
+			if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
+			    nk->port[pd->didx] != dport) {
+				pf_change_ap(m, daddr, &th->th_dport, pd->ip_sum,
+				    &th->th_sum, &nk->addr[pd->didx],
+				    nk->port[pd->didx], 0, af);
+				dport = th->th_dport;
+				pd->dport = &th->th_dport;
+			}
+			rewrite++;
+			break;
+		case IPPROTO_UDP:
+			bproto_sum = pd->hdr.udp->uh_sum;
+			pd->proto_sum = &pd->hdr.udp->uh_sum;
+
+			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
+			    nk->port[pd->sidx] != sport) {
+				pf_change_ap(m, saddr, &pd->hdr.udp->uh_sport,
+				    pd->ip_sum, &pd->hdr.udp->uh_sum,
+				    &nk->addr[pd->sidx],
+				    nk->port[pd->sidx], 1, af);
+				sport = pd->hdr.udp->uh_sport;
+				pd->sport = &pd->hdr.udp->uh_sport;
+			}
+
+			if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
+			    nk->port[pd->didx] != dport) {
+				pf_change_ap(m, daddr, &pd->hdr.udp->uh_dport,
+				    pd->ip_sum, &pd->hdr.udp->uh_sum,
+				    &nk->addr[pd->didx],
+				    nk->port[pd->didx], 1, af);
+				dport = pd->hdr.udp->uh_dport;
+				pd->dport = &pd->hdr.udp->uh_dport;
+			}
+			rewrite++;
+			break;
+#ifdef INET
+		case IPPROTO_ICMP:
+			nk->port[0] = nk->port[1];
+			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET))
+				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
+				    nk->addr[pd->sidx].v4.s_addr, 0);
+
+			if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET))
+				pf_change_a(&daddr->v4.s_addr, pd->ip_sum,
+				    nk->addr[pd->didx].v4.s_addr, 0);
+
+			if (nk->port[1] != pd->hdr.icmp->icmp_id) {
+				pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
+				    pd->hdr.icmp->icmp_cksum, sport,
+				    nk->port[1], 0);
+				pd->hdr.icmp->icmp_id = nk->port[1];
+				pd->sport = &pd->hdr.icmp->icmp_id;
+			}
+			m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
+			break;
+#endif /* INET */
+#ifdef INET6
+		case IPPROTO_ICMPV6:
+			nk->port[0] = nk->port[1];
+			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET6))
+				pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
+				    &nk->addr[pd->sidx], 0);
+
+			if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET6))
+				pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
+				    &nk->addr[pd->didx], 0);
+			rewrite++;
+			break;
+#endif /* INET */
+		default:
+			switch (af) {
+#ifdef INET
+			case AF_INET:
+				if (PF_ANEQ(saddr,
+				    &nk->addr[pd->sidx], AF_INET))
+					pf_change_a(&saddr->v4.s_addr,
+					    pd->ip_sum,
+					    nk->addr[pd->sidx].v4.s_addr, 0);
+
+				if (PF_ANEQ(daddr,
+				    &nk->addr[pd->didx], AF_INET))
+					pf_change_a(&daddr->v4.s_addr,
+					    pd->ip_sum,
+					    nk->addr[pd->didx].v4.s_addr, 0);
+				break;
+#endif /* INET */
+#ifdef INET6
+			case AF_INET6:
+				if (PF_ANEQ(saddr,
+				    &nk->addr[pd->sidx], AF_INET6))
+					PF_ACPY(saddr, &nk->addr[pd->sidx], af);
+
+				if (PF_ANEQ(daddr,
+				    &nk->addr[pd->didx], AF_INET6))
+					PF_ACPY(saddr, &nk->addr[pd->didx], af);
+				break;
+#endif /* INET */
+			}
+			break;
+		}
+		if (nr->natpass)
+			r = NULL;
+		pd->nat_rule = nr;
+	}
+
+	while (r != NULL) {
+		r->evaluations++;
+		if (pfi_kif_match(r->kif, kif) == r->ifnot)
+			r = r->skip[PF_SKIP_IFP].ptr;
+		else if (r->direction && r->direction != direction)
+			r = r->skip[PF_SKIP_DIR].ptr;
+		else if (r->af && r->af != af)
+			r = r->skip[PF_SKIP_AF].ptr;
+		else if (r->proto && r->proto != pd->proto)
+			r = r->skip[PF_SKIP_PROTO].ptr;
+		else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
+		    r->src.neg, kif, M_GETFIB(m)))
+			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
+		/* tcp/udp only. port_op always 0 in other cases */
+		else if (r->src.port_op && !pf_match_port(r->src.port_op,
+		    r->src.port[0], r->src.port[1], sport))
+			r = r->skip[PF_SKIP_SRC_PORT].ptr;
+		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
+		    r->dst.neg, NULL, M_GETFIB(m)))
+			r = r->skip[PF_SKIP_DST_ADDR].ptr;
+		/* tcp/udp only. port_op always 0 in other cases */
+		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
+		    r->dst.port[0], r->dst.port[1], dport))
+			r = r->skip[PF_SKIP_DST_PORT].ptr;
+		/* icmp only. type always 0 in other cases */
+		else if (r->type && r->type != icmptype + 1)
+			r = TAILQ_NEXT(r, entries);
+		/* icmp only. type always 0 in other cases */
+		else if (r->code && r->code != icmpcode + 1)
+			r = TAILQ_NEXT(r, entries);
+		else if (r->tos && !(r->tos == pd->tos))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->rule_flag & PFRULE_FRAGMENT)
+			r = TAILQ_NEXT(r, entries);
+		else if (pd->proto == IPPROTO_TCP &&
+		    (r->flagset & th->th_flags) != r->flags)
+			r = TAILQ_NEXT(r, entries);
+		/* tcp/udp only. uid.op always 0 in other cases */
+		else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
+		    pf_socket_lookup(direction, pd, m), 1)) &&
+		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
+		    pd->lookup.uid))
+			r = TAILQ_NEXT(r, entries);
+		/* tcp/udp only. gid.op always 0 in other cases */
+		else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
+		    pf_socket_lookup(direction, pd, m), 1)) &&
+		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
+		    pd->lookup.gid))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->prio &&
+		    !pf_match_ieee8021q_pcp(r->prio, m))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->prob &&
+		    r->prob <= arc4random())
+			r = TAILQ_NEXT(r, entries);
+		else if (r->match_tag && !pf_match_tag(m, r, &tag,
+		    pd->pf_mtag ? pd->pf_mtag->tag : 0))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->os_fingerprint != PF_OSFP_ANY &&
+		    (pd->proto != IPPROTO_TCP || !pf_osfp_match(
+		    pf_osfp_fingerprint(pd, m, off, th),
+		    r->os_fingerprint)))
+			r = TAILQ_NEXT(r, entries);
+		else {
+			if (r->tag)
+				tag = r->tag;
+			if (r->rtableid >= 0)
+				rtableid = r->rtableid;
+			if (r->anchor == NULL) {
+				match = 1;
+				*rm = r;
+				*am = a;
+				*rsm = ruleset;
+				if ((*rm)->quick)
+					break;
+				r = TAILQ_NEXT(r, entries);
+			} else
+				pf_step_into_anchor(anchor_stack, &asd,
+				    &ruleset, PF_RULESET_FILTER, &r, &a,
+				    &match);
+		}
+		if (r == NULL && pf_step_out_of_anchor(anchor_stack, &asd,
+		    &ruleset, PF_RULESET_FILTER, &r, &a, &match))
+			break;
+	}
+	r = *rm;
+	a = *am;
+	ruleset = *rsm;
+
+	REASON_SET(&reason, PFRES_MATCH);
+
+	if (r->log || (nr != NULL && nr->log)) {
+		if (rewrite)
+			m_copyback(m, off, hdrlen, pd->hdr.any);
+		PFLOG_PACKET(kif, m, af, direction, reason, r->log ? r : nr, a,
+		    ruleset, pd, 1);
+	}
+
+	if ((r->action == PF_DROP) &&
+	    ((r->rule_flag & PFRULE_RETURNRST) ||
+	    (r->rule_flag & PFRULE_RETURNICMP) ||
+	    (r->rule_flag & PFRULE_RETURN))) {
+		/* undo NAT changes, if they have taken place */
+		if (nr != NULL) {
+			PF_ACPY(saddr, &sk->addr[pd->sidx], af);
+			PF_ACPY(daddr, &sk->addr[pd->didx], af);
+			if (pd->sport)
+				*pd->sport = sk->port[pd->sidx];
+			if (pd->dport)
+				*pd->dport = sk->port[pd->didx];
+			if (pd->proto_sum)
+				*pd->proto_sum = bproto_sum;
+			if (pd->ip_sum)
+				*pd->ip_sum = bip_sum;
+			m_copyback(m, off, hdrlen, pd->hdr.any);
+		}
+		if (pd->proto == IPPROTO_TCP &&
+		    ((r->rule_flag & PFRULE_RETURNRST) ||
+		    (r->rule_flag & PFRULE_RETURN)) &&
+		    !(th->th_flags & TH_RST)) {
+			u_int32_t	 ack = ntohl(th->th_seq) + pd->p_len;
+			int		 len = 0;
+#ifdef INET
+			struct ip	*h4;
+#endif
+#ifdef INET6
+			struct ip6_hdr	*h6;
+#endif
+
+			switch (af) {
+#ifdef INET
+			case AF_INET:
+				h4 = mtod(m, struct ip *);
+				len = ntohs(h4->ip_len) - off;
+				break;
+#endif
+#ifdef INET6
+			case AF_INET6:
+				h6 = mtod(m, struct ip6_hdr *);
+				len = ntohs(h6->ip6_plen) - (off - sizeof(*h6));
+				break;
+#endif
+			}
+
+			if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af))
+				REASON_SET(&reason, PFRES_PROTCKSUM);
+			else {
+				if (th->th_flags & TH_SYN)
+					ack++;
+				if (th->th_flags & TH_FIN)
+					ack++;
+				pf_send_tcp(m, r, af, pd->dst,
+				    pd->src, th->th_dport, th->th_sport,
+				    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
+				    r->return_ttl, 1, 0, kif->pfik_ifp);
+			}
+		} else if (pd->proto != IPPROTO_ICMP && af == AF_INET &&
+		    r->return_icmp)
+			pf_send_icmp(m, r->return_icmp >> 8,
+			    r->return_icmp & 255, af, r);
+		else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 &&
+		    r->return_icmp6)
+			pf_send_icmp(m, r->return_icmp6 >> 8,
+			    r->return_icmp6 & 255, af, r);
+	}
+
+	if (r->action == PF_DROP)
+		goto cleanup;
+
+	if (tag > 0 && pf_tag_packet(m, pd, tag)) {
+		REASON_SET(&reason, PFRES_MEMORY);
+		goto cleanup;
+	}
+	if (rtableid >= 0)
+		M_SETFIB(m, rtableid);
+
+	if (!state_icmp && (r->keep_state || nr != NULL ||
+	    (pd->flags & PFDESC_TCP_NORM))) {
+		int action;
+		action = pf_create_state(r, nr, a, pd, nsn, nk, sk, m, off,
+		    sport, dport, &rewrite, kif, sm, tag, bproto_sum, bip_sum,
+		    hdrlen);
+		if (action != PF_PASS)
+			return (action);
+	} else {
+		if (sk != NULL)
+			uma_zfree(V_pf_state_key_z, sk);
+		if (nk != NULL)
+			uma_zfree(V_pf_state_key_z, nk);
+	}
+
+	/* copy back packet headers if we performed NAT operations */
+	if (rewrite)
+		m_copyback(m, off, hdrlen, pd->hdr.any);
+
+	if (*sm != NULL && !((*sm)->state_flags & PFSTATE_NOSYNC) &&
+	    direction == PF_OUT &&
+	    pfsync_defer_ptr != NULL && pfsync_defer_ptr(*sm, m))
+		/*
+		 * We want the state created, but we dont
+		 * want to send this in case a partner
+		 * firewall has to know about it to allow
+		 * replies through it.
+		 */
+		return (PF_DEFER);
+
+	return (PF_PASS);
+
+cleanup:
+	if (sk != NULL)
+		uma_zfree(V_pf_state_key_z, sk);
+	if (nk != NULL)
+		uma_zfree(V_pf_state_key_z, nk);
+	return (PF_DROP);
+}
+
+static int
+pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a,
+    struct pf_pdesc *pd, struct pf_src_node *nsn, struct pf_state_key *nk,
+    struct pf_state_key *sk, struct mbuf *m, int off, u_int16_t sport,
+    u_int16_t dport, int *rewrite, struct pfi_kif *kif, struct pf_state **sm,
+    int tag, u_int16_t bproto_sum, u_int16_t bip_sum, int hdrlen)
+{
+	struct pf_state		*s = NULL;
+	struct pf_src_node	*sn = NULL;
+	struct tcphdr		*th = pd->hdr.tcp;
+	u_int16_t		 mss = V_tcp_mssdflt;
+	u_short			 reason;
+
+	/* check maximums */
+	if (r->max_states &&
+	    (counter_u64_fetch(r->states_cur) >= r->max_states)) {
+		counter_u64_add(V_pf_status.lcounters[LCNT_STATES], 1);
+		REASON_SET(&reason, PFRES_MAXSTATES);
+		return (PF_DROP);
+	}
+	/* src node for filter rule */
+	if ((r->rule_flag & PFRULE_SRCTRACK ||
+	    r->rpool.opts & PF_POOL_STICKYADDR) &&
+	    pf_insert_src_node(&sn, r, pd->src, pd->af) != 0) {
+		REASON_SET(&reason, PFRES_SRCLIMIT);
+		goto csfailed;
+	}
+	/* src node for translation rule */
+	if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
+	    pf_insert_src_node(&nsn, nr, &sk->addr[pd->sidx], pd->af)) {
+		REASON_SET(&reason, PFRES_SRCLIMIT);
+		goto csfailed;
+	}
+	s = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO);
+	if (s == NULL) {
+		REASON_SET(&reason, PFRES_MEMORY);
+		goto csfailed;
+	}
+	s->rule.ptr = r;
+	s->nat_rule.ptr = nr;
+	s->anchor.ptr = a;
+	STATE_INC_COUNTERS(s);
+	if (r->allow_opts)
+		s->state_flags |= PFSTATE_ALLOWOPTS;
+	if (r->rule_flag & PFRULE_STATESLOPPY)
+		s->state_flags |= PFSTATE_SLOPPY;
+	s->log = r->log & PF_LOG_ALL;
+	s->sync_state = PFSYNC_S_NONE;
+	if (nr != NULL)
+		s->log |= nr->log & PF_LOG_ALL;
+	switch (pd->proto) {
+	case IPPROTO_TCP:
+		s->src.seqlo = ntohl(th->th_seq);
+		s->src.seqhi = s->src.seqlo + pd->p_len + 1;
+		if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
+		    r->keep_state == PF_STATE_MODULATE) {
+			/* Generate sequence number modulator */
+			if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) ==
+			    0)
+				s->src.seqdiff = 1;
+			pf_change_proto_a(m, &th->th_seq, &th->th_sum,
+			    htonl(s->src.seqlo + s->src.seqdiff), 0);
+			*rewrite = 1;
+		} else
+			s->src.seqdiff = 0;
+		if (th->th_flags & TH_SYN) {
+			s->src.seqhi++;
+			s->src.wscale = pf_get_wscale(m, off,
+			    th->th_off, pd->af);
+		}
+		s->src.max_win = MAX(ntohs(th->th_win), 1);
+		if (s->src.wscale & PF_WSCALE_MASK) {
+			/* Remove scale factor from initial window */
+			int win = s->src.max_win;
+			win += 1 << (s->src.wscale & PF_WSCALE_MASK);
+			s->src.max_win = (win - 1) >>
+			    (s->src.wscale & PF_WSCALE_MASK);
+		}
+		if (th->th_flags & TH_FIN)
+			s->src.seqhi++;
+		s->dst.seqhi = 1;
+		s->dst.max_win = 1;
+		s->src.state = TCPS_SYN_SENT;
+		s->dst.state = TCPS_CLOSED;
+		s->timeout = PFTM_TCP_FIRST_PACKET;
+		break;
+	case IPPROTO_UDP:
+		s->src.state = PFUDPS_SINGLE;
+		s->dst.state = PFUDPS_NO_TRAFFIC;
+		s->timeout = PFTM_UDP_FIRST_PACKET;
+		break;
+	case IPPROTO_ICMP:
+#ifdef INET6
+	case IPPROTO_ICMPV6:
+#endif
+		s->timeout = PFTM_ICMP_FIRST_PACKET;
+		break;
+	default:
+		s->src.state = PFOTHERS_SINGLE;
+		s->dst.state = PFOTHERS_NO_TRAFFIC;
+		s->timeout = PFTM_OTHER_FIRST_PACKET;
+	}
+
+	if (r->rt && r->rt != PF_FASTROUTE) {
+		if (pf_map_addr(pd->af, r, pd->src, &s->rt_addr, NULL, &sn)) {
+			REASON_SET(&reason, PFRES_MAPFAILED);
+			pf_src_tree_remove_state(s);
+			STATE_DEC_COUNTERS(s);
+			uma_zfree(V_pf_state_z, s);
+			goto csfailed;
+		}
+		s->rt_kif = r->rpool.cur->kif;
+	}
+
+	s->creation = time_uptime;
+	s->expire = time_uptime;
+
+	if (sn != NULL)
+		s->src_node = sn;
+	if (nsn != NULL) {
+		/* XXX We only modify one side for now. */
+		PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af);
+		s->nat_src_node = nsn;
+	}
+	if (pd->proto == IPPROTO_TCP) {
+		if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
+		    off, pd, th, &s->src, &s->dst)) {
+			REASON_SET(&reason, PFRES_MEMORY);
+			pf_src_tree_remove_state(s);
+			STATE_DEC_COUNTERS(s);
+			uma_zfree(V_pf_state_z, s);
+			return (PF_DROP);
+		}
+		if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
+		    pf_normalize_tcp_stateful(m, off, pd, &reason, th, s,
+		    &s->src, &s->dst, rewrite)) {
+			/* This really shouldn't happen!!! */
+			DPFPRINTF(PF_DEBUG_URGENT,
+			    ("pf_normalize_tcp_stateful failed on first pkt"));
+			pf_normalize_tcp_cleanup(s);
+			pf_src_tree_remove_state(s);
+			STATE_DEC_COUNTERS(s);
+			uma_zfree(V_pf_state_z, s);
+			return (PF_DROP);
+		}
+	}
+	s->direction = pd->dir;
+
+	/*
+	 * sk/nk could already been setup by pf_get_translation().
+	 */
+	if (nr == NULL) {
+		KASSERT((sk == NULL && nk == NULL), ("%s: nr %p sk %p, nk %p",
+		    __func__, nr, sk, nk));
+		sk = pf_state_key_setup(pd, pd->src, pd->dst, sport, dport);
+		if (sk == NULL)
+			goto csfailed;
+		nk = sk;
+	} else
+		KASSERT((sk != NULL && nk != NULL), ("%s: nr %p sk %p, nk %p",
+		    __func__, nr, sk, nk));
+
+	/* Swap sk/nk for PF_OUT. */
+	if (pf_state_insert(BOUND_IFACE(r, kif),
+	    (pd->dir == PF_IN) ? sk : nk,
+	    (pd->dir == PF_IN) ? nk : sk, s)) {
+		if (pd->proto == IPPROTO_TCP)
+			pf_normalize_tcp_cleanup(s);
+		REASON_SET(&reason, PFRES_STATEINS);
+		pf_src_tree_remove_state(s);
+		STATE_DEC_COUNTERS(s);
+		uma_zfree(V_pf_state_z, s);
+		return (PF_DROP);
+	} else
+		*sm = s;
+
+	if (tag > 0)
+		s->tag = tag;
+	if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) ==
+	    TH_SYN && r->keep_state == PF_STATE_SYNPROXY) {
+		s->src.state = PF_TCPS_PROXY_SRC;
+		/* undo NAT changes, if they have taken place */
+		if (nr != NULL) {
+			struct pf_state_key *skt = s->key[PF_SK_WIRE];
+			if (pd->dir == PF_OUT)
+				skt = s->key[PF_SK_STACK];
+			PF_ACPY(pd->src, &skt->addr[pd->sidx], pd->af);
+			PF_ACPY(pd->dst, &skt->addr[pd->didx], pd->af);
+			if (pd->sport)
+				*pd->sport = skt->port[pd->sidx];
+			if (pd->dport)
+				*pd->dport = skt->port[pd->didx];
+			if (pd->proto_sum)
+				*pd->proto_sum = bproto_sum;
+			if (pd->ip_sum)
+				*pd->ip_sum = bip_sum;
+			m_copyback(m, off, hdrlen, pd->hdr.any);
+		}
+		s->src.seqhi = htonl(arc4random());
+		/* Find mss option */
+		int rtid = M_GETFIB(m);
+		mss = pf_get_mss(m, off, th->th_off, pd->af);
+		mss = pf_calc_mss(pd->src, pd->af, rtid, mss);
+		mss = pf_calc_mss(pd->dst, pd->af, rtid, mss);
+		s->src.mss = mss;
+		pf_send_tcp(NULL, r, pd->af, pd->dst, pd->src, th->th_dport,
+		    th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
+		    TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL);
+		REASON_SET(&reason, PFRES_SYNPROXY);
+		return (PF_SYNPROXY_DROP);
+	}
+
+	return (PF_PASS);
+
+csfailed:
+	if (sk != NULL)
+		uma_zfree(V_pf_state_key_z, sk);
+	if (nk != NULL)
+		uma_zfree(V_pf_state_key_z, nk);
+
+	if (sn != NULL) {
+		struct pf_srchash *sh;
+
+		sh = &V_pf_srchash[pf_hashsrc(&sn->addr, sn->af)];
+		PF_HASHROW_LOCK(sh);
+		if (--sn->states == 0 && sn->expire == 0) {
+			pf_unlink_src_node(sn);
+			uma_zfree(V_pf_sources_z, sn);
+			counter_u64_add(
+			    V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1);
+		}
+		PF_HASHROW_UNLOCK(sh);
+	}
+
+	if (nsn != sn && nsn != NULL) {
+		struct pf_srchash *sh;
+
+		sh = &V_pf_srchash[pf_hashsrc(&nsn->addr, nsn->af)];
+		PF_HASHROW_LOCK(sh);
+		if (--nsn->states == 0 && nsn->expire == 0) {
+			pf_unlink_src_node(nsn);
+			uma_zfree(V_pf_sources_z, nsn);
+			counter_u64_add(
+			    V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1);
+		}
+		PF_HASHROW_UNLOCK(sh);
+	}
+
+	return (PF_DROP);
+}
+
+static int
+pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
+    struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
+    struct pf_ruleset **rsm)
+{
+	struct pf_rule		*r, *a = NULL;
+	struct pf_ruleset	*ruleset = NULL;
+	sa_family_t		 af = pd->af;
+	u_short			 reason;
+	int			 tag = -1;
+	int			 asd = 0;
+	int			 match = 0;
+	struct pf_anchor_stackframe	anchor_stack[PF_ANCHOR_STACKSIZE];
+
+	PF_RULES_RASSERT();
+
+	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
+	while (r != NULL) {
+		r->evaluations++;
+		if (pfi_kif_match(r->kif, kif) == r->ifnot)
+			r = r->skip[PF_SKIP_IFP].ptr;
+		else if (r->direction && r->direction != direction)
+			r = r->skip[PF_SKIP_DIR].ptr;
+		else if (r->af && r->af != af)
+			r = r->skip[PF_SKIP_AF].ptr;
+		else if (r->proto && r->proto != pd->proto)
+			r = r->skip[PF_SKIP_PROTO].ptr;
+		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
+		    r->src.neg, kif, M_GETFIB(m)))
+			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
+		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
+		    r->dst.neg, NULL, M_GETFIB(m)))
+			r = r->skip[PF_SKIP_DST_ADDR].ptr;
+		else if (r->tos && !(r->tos == pd->tos))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->os_fingerprint != PF_OSFP_ANY)
+			r = TAILQ_NEXT(r, entries);
+		else if (pd->proto == IPPROTO_UDP &&
+		    (r->src.port_op || r->dst.port_op))
+			r = TAILQ_NEXT(r, entries);
+		else if (pd->proto == IPPROTO_TCP &&
+		    (r->src.port_op || r->dst.port_op || r->flagset))
+			r = TAILQ_NEXT(r, entries);
+		else if ((pd->proto == IPPROTO_ICMP ||
+		    pd->proto == IPPROTO_ICMPV6) &&
+		    (r->type || r->code))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->prio &&
+		    !pf_match_ieee8021q_pcp(r->prio, m))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->prob && r->prob <=
+		    (arc4random() % (UINT_MAX - 1) + 1))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->match_tag && !pf_match_tag(m, r, &tag,
+		    pd->pf_mtag ? pd->pf_mtag->tag : 0))
+			r = TAILQ_NEXT(r, entries);
+		else {
+			if (r->anchor == NULL) {
+				match = 1;
+				*rm = r;
+				*am = a;
+				*rsm = ruleset;
+				if ((*rm)->quick)
+					break;
+				r = TAILQ_NEXT(r, entries);
+			} else
+				pf_step_into_anchor(anchor_stack, &asd,
+				    &ruleset, PF_RULESET_FILTER, &r, &a,
+				    &match);
+		}
+		if (r == NULL && pf_step_out_of_anchor(anchor_stack, &asd,
+		    &ruleset, PF_RULESET_FILTER, &r, &a, &match))
+			break;
+	}
+	r = *rm;
+	a = *am;
+	ruleset = *rsm;
+
+	REASON_SET(&reason, PFRES_MATCH);
+
+	if (r->log)
+		PFLOG_PACKET(kif, m, af, direction, reason, r, a, ruleset, pd,
+		    1);
+
+	if (r->action != PF_PASS)
+		return (PF_DROP);
+
+	if (tag > 0 && pf_tag_packet(m, pd, tag)) {
+		REASON_SET(&reason, PFRES_MEMORY);
+		return (PF_DROP);
+	}
+
+	return (PF_PASS);
+}
+
+static int
+pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst,
+	struct pf_state **state, struct pfi_kif *kif, struct mbuf *m, int off,
+	struct pf_pdesc *pd, u_short *reason, int *copyback)
+{
+	struct tcphdr		*th = pd->hdr.tcp;
+	u_int16_t		 win = ntohs(th->th_win);
+	u_int32_t		 ack, end, seq, orig_seq;
+	u_int8_t		 sws, dws;
+	int			 ackskew;
+
+	if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
+		sws = src->wscale & PF_WSCALE_MASK;
+		dws = dst->wscale & PF_WSCALE_MASK;
+	} else
+		sws = dws = 0;
+
+	/*
+	 * Sequence tracking algorithm from Guido van Rooij's paper:
+	 *   http://www.madison-gurkha.com/publications/tcp_filtering/
+	 *	tcp_filtering.ps
+	 */
+
+	orig_seq = seq = ntohl(th->th_seq);
+	if (src->seqlo == 0) {
+		/* First packet from this end. Set its state */
+
+		if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
+		    src->scrub == NULL) {
+			if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) {
+				REASON_SET(reason, PFRES_MEMORY);
+				return (PF_DROP);
+			}
+		}
+
+		/* Deferred generation of sequence number modulator */
+		if (dst->seqdiff && !src->seqdiff) {
+			/* use random iss for the TCP server */
+			while ((src->seqdiff = arc4random() - seq) == 0)
+				;
+			ack = ntohl(th->th_ack) - dst->seqdiff;
+			pf_change_proto_a(m, &th->th_seq, &th->th_sum, htonl(seq +
+			    src->seqdiff), 0);
+			pf_change_proto_a(m, &th->th_ack, &th->th_sum, htonl(ack), 0);
+			*copyback = 1;
+		} else {
+			ack = ntohl(th->th_ack);
+		}
+
+		end = seq + pd->p_len;
+		if (th->th_flags & TH_SYN) {
+			end++;
+			if (dst->wscale & PF_WSCALE_FLAG) {
+				src->wscale = pf_get_wscale(m, off, th->th_off,
+				    pd->af);
+				if (src->wscale & PF_WSCALE_FLAG) {
+					/* Remove scale factor from initial
+					 * window */
+					sws = src->wscale & PF_WSCALE_MASK;
+					win = ((u_int32_t)win + (1 << sws) - 1)
+					    >> sws;
+					dws = dst->wscale & PF_WSCALE_MASK;
+				} else {
+					/* fixup other window */
+					dst->max_win <<= dst->wscale &
+					    PF_WSCALE_MASK;
+					/* in case of a retrans SYN|ACK */
+					dst->wscale = 0;
+				}
+			}
+		}
+		if (th->th_flags & TH_FIN)
+			end++;
+
+		src->seqlo = seq;
+		if (src->state < TCPS_SYN_SENT)
+			src->state = TCPS_SYN_SENT;
+
+		/*
+		 * May need to slide the window (seqhi may have been set by
+		 * the crappy stack check or if we picked up the connection
+		 * after establishment)
+		 */
+		if (src->seqhi == 1 ||
+		    SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
+			src->seqhi = end + MAX(1, dst->max_win << dws);
+		if (win > src->max_win)
+			src->max_win = win;
+
+	} else {
+		ack = ntohl(th->th_ack) - dst->seqdiff;
+		if (src->seqdiff) {
+			/* Modulate sequence numbers */
+			pf_change_proto_a(m, &th->th_seq, &th->th_sum, htonl(seq +
+			    src->seqdiff), 0);
+			pf_change_proto_a(m, &th->th_ack, &th->th_sum, htonl(ack), 0);
+			*copyback = 1;
+		}
+		end = seq + pd->p_len;
+		if (th->th_flags & TH_SYN)
+			end++;
+		if (th->th_flags & TH_FIN)
+			end++;
+	}
+
+	if ((th->th_flags & TH_ACK) == 0) {
+		/* Let it pass through the ack skew check */
+		ack = dst->seqlo;
+	} else if ((ack == 0 &&
+	    (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
+	    /* broken tcp stacks do not set ack */
+	    (dst->state < TCPS_SYN_SENT)) {
+		/*
+		 * Many stacks (ours included) will set the ACK number in an
+		 * FIN|ACK if the SYN times out -- no sequence to ACK.
+		 */
+		ack = dst->seqlo;
+	}
+
+	if (seq == end) {
+		/* Ease sequencing restrictions on no data packets */
+		seq = src->seqlo;
+		end = seq;
+	}
+
+	ackskew = dst->seqlo - ack;
+
+
+	/*
+	 * Need to demodulate the sequence numbers in any TCP SACK options
+	 * (Selective ACK). We could optionally validate the SACK values
+	 * against the current ACK window, either forwards or backwards, but
+	 * I'm not confident that SACK has been implemented properly
+	 * everywhere. It wouldn't surprise me if several stacks accidentally
+	 * SACK too far backwards of previously ACKed data. There really aren't
+	 * any security implications of bad SACKing unless the target stack
+	 * doesn't validate the option length correctly. Someone trying to
+	 * spoof into a TCP connection won't bother blindly sending SACK
+	 * options anyway.
+	 */
+	if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) {
+		if (pf_modulate_sack(m, off, pd, th, dst))
+			*copyback = 1;
+	}
+
+
+#define	MAXACKWINDOW (0xffff + 1500)	/* 1500 is an arbitrary fudge factor */
+	if (SEQ_GEQ(src->seqhi, end) &&
+	    /* Last octet inside other's window space */
+	    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
+	    /* Retrans: not more than one window back */
+	    (ackskew >= -MAXACKWINDOW) &&
+	    /* Acking not more than one reassembled fragment backwards */
+	    (ackskew <= (MAXACKWINDOW << sws)) &&
+	    /* Acking not more than one window forward */
+	    ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
+	    (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) ||
+	    (pd->flags & PFDESC_IP_REAS) == 0)) {
+	    /* Require an exact/+1 sequence match on resets when possible */
+
+		if (dst->scrub || src->scrub) {
+			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
+			    *state, src, dst, copyback))
+				return (PF_DROP);
+		}
+
+		/* update max window */
+		if (src->max_win < win)
+			src->max_win = win;
+		/* synchronize sequencing */
+		if (SEQ_GT(end, src->seqlo))
+			src->seqlo = end;
+		/* slide the window of what the other end can send */
+		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
+			dst->seqhi = ack + MAX((win << sws), 1);
+
+
+		/* update states */
+		if (th->th_flags & TH_SYN)
+			if (src->state < TCPS_SYN_SENT)
+				src->state = TCPS_SYN_SENT;
+		if (th->th_flags & TH_FIN)
+			if (src->state < TCPS_CLOSING)
+				src->state = TCPS_CLOSING;
+		if (th->th_flags & TH_ACK) {
+			if (dst->state == TCPS_SYN_SENT) {
+				dst->state = TCPS_ESTABLISHED;
+				if (src->state == TCPS_ESTABLISHED &&
+				    (*state)->src_node != NULL &&
+				    pf_src_connlimit(state)) {
+					REASON_SET(reason, PFRES_SRCLIMIT);
+					return (PF_DROP);
+				}
+			} else if (dst->state == TCPS_CLOSING)
+				dst->state = TCPS_FIN_WAIT_2;
+		}
+		if (th->th_flags & TH_RST)
+			src->state = dst->state = TCPS_TIME_WAIT;
+
+		/* update expire time */
+		(*state)->expire = time_uptime;
+		if (src->state >= TCPS_FIN_WAIT_2 &&
+		    dst->state >= TCPS_FIN_WAIT_2)
+			(*state)->timeout = PFTM_TCP_CLOSED;
+		else if (src->state >= TCPS_CLOSING &&
+		    dst->state >= TCPS_CLOSING)
+			(*state)->timeout = PFTM_TCP_FIN_WAIT;
+		else if (src->state < TCPS_ESTABLISHED ||
+		    dst->state < TCPS_ESTABLISHED)
+			(*state)->timeout = PFTM_TCP_OPENING;
+		else if (src->state >= TCPS_CLOSING ||
+		    dst->state >= TCPS_CLOSING)
+			(*state)->timeout = PFTM_TCP_CLOSING;
+		else
+			(*state)->timeout = PFTM_TCP_ESTABLISHED;
+
+		/* Fall through to PASS packet */
+
+	} else if ((dst->state < TCPS_SYN_SENT ||
+		dst->state >= TCPS_FIN_WAIT_2 ||
+		src->state >= TCPS_FIN_WAIT_2) &&
+	    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
+	    /* Within a window forward of the originating packet */
+	    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
+	    /* Within a window backward of the originating packet */
+
+		/*
+		 * This currently handles three situations:
+		 *  1) Stupid stacks will shotgun SYNs before their peer
+		 *     replies.
+		 *  2) When PF catches an already established stream (the
+		 *     firewall rebooted, the state table was flushed, routes
+		 *     changed...)
+		 *  3) Packets get funky immediately after the connection
+		 *     closes (this should catch Solaris spurious ACK|FINs
+		 *     that web servers like to spew after a close)
+		 *
+		 * This must be a little more careful than the above code
+		 * since packet floods will also be caught here. We don't
+		 * update the TTL here to mitigate the damage of a packet
+		 * flood and so the same code can handle awkward establishment
+		 * and a loosened connection close.
+		 * In the establishment case, a correct peer response will
+		 * validate the connection, go through the normal state code
+		 * and keep updating the state TTL.
+		 */
+
+		if (V_pf_status.debug >= PF_DEBUG_MISC) {
+			printf("pf: loose state match: ");
+			pf_print_state(*state);
+			pf_print_flags(th->th_flags);
+			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
+			    "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack,
+			    pd->p_len, ackskew, (unsigned long long)(*state)->packets[0],
+			    (unsigned long long)(*state)->packets[1],
+			    pd->dir == PF_IN ? "in" : "out",
+			    pd->dir == (*state)->direction ? "fwd" : "rev");
+		}
+
+		if (dst->scrub || src->scrub) {
+			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
+			    *state, src, dst, copyback))
+				return (PF_DROP);
+		}
+
+		/* update max window */
+		if (src->max_win < win)
+			src->max_win = win;
+		/* synchronize sequencing */
+		if (SEQ_GT(end, src->seqlo))
+			src->seqlo = end;
+		/* slide the window of what the other end can send */
+		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
+			dst->seqhi = ack + MAX((win << sws), 1);
+
+		/*
+		 * Cannot set dst->seqhi here since this could be a shotgunned
+		 * SYN and not an already established connection.
+		 */
+
+		if (th->th_flags & TH_FIN)
+			if (src->state < TCPS_CLOSING)
+				src->state = TCPS_CLOSING;
+		if (th->th_flags & TH_RST)
+			src->state = dst->state = TCPS_TIME_WAIT;
+
+		/* Fall through to PASS packet */
+
+	} else {
+		if ((*state)->dst.state == TCPS_SYN_SENT &&
+		    (*state)->src.state == TCPS_SYN_SENT) {
+			/* Send RST for state mismatches during handshake */
+			if (!(th->th_flags & TH_RST))
+				pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
+				    pd->dst, pd->src, th->th_dport,
+				    th->th_sport, ntohl(th->th_ack), 0,
+				    TH_RST, 0, 0,
+				    (*state)->rule.ptr->return_ttl, 1, 0,
+				    kif->pfik_ifp);
+			src->seqlo = 0;
+			src->seqhi = 1;
+			src->max_win = 1;
+		} else if (V_pf_status.debug >= PF_DEBUG_MISC) {
+			printf("pf: BAD state: ");
+			pf_print_state(*state);
+			pf_print_flags(th->th_flags);
+			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
+			    "pkts=%llu:%llu dir=%s,%s\n",
+			    seq, orig_seq, ack, pd->p_len, ackskew,
+			    (unsigned long long)(*state)->packets[0],
+			    (unsigned long long)(*state)->packets[1],
+			    pd->dir == PF_IN ? "in" : "out",
+			    pd->dir == (*state)->direction ? "fwd" : "rev");
+			printf("pf: State failure on: %c %c %c %c | %c %c\n",
+			    SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
+			    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
+			    ' ': '2',
+			    (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
+			    (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
+			    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
+			    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
+		}
+		REASON_SET(reason, PFRES_BADSTATE);
+		return (PF_DROP);
+	}
+
+	return (PF_PASS);
+}
+
+static int
+pf_tcp_track_sloppy(struct pf_state_peer *src, struct pf_state_peer *dst,
+	struct pf_state **state, struct pf_pdesc *pd, u_short *reason)
+{
+	struct tcphdr		*th = pd->hdr.tcp;
+
+	if (th->th_flags & TH_SYN)
+		if (src->state < TCPS_SYN_SENT)
+			src->state = TCPS_SYN_SENT;
+	if (th->th_flags & TH_FIN)
+		if (src->state < TCPS_CLOSING)
+			src->state = TCPS_CLOSING;
+	if (th->th_flags & TH_ACK) {
+		if (dst->state == TCPS_SYN_SENT) {
+			dst->state = TCPS_ESTABLISHED;
+			if (src->state == TCPS_ESTABLISHED &&
+			    (*state)->src_node != NULL &&
+			    pf_src_connlimit(state)) {
+				REASON_SET(reason, PFRES_SRCLIMIT);
+				return (PF_DROP);
+			}
+		} else if (dst->state == TCPS_CLOSING) {
+			dst->state = TCPS_FIN_WAIT_2;
+		} else if (src->state == TCPS_SYN_SENT &&
+		    dst->state < TCPS_SYN_SENT) {
+			/*
+			 * Handle a special sloppy case where we only see one
+			 * half of the connection. If there is a ACK after
+			 * the initial SYN without ever seeing a packet from
+			 * the destination, set the connection to established.
+			 */
+			dst->state = src->state = TCPS_ESTABLISHED;
+			if ((*state)->src_node != NULL &&
+			    pf_src_connlimit(state)) {
+				REASON_SET(reason, PFRES_SRCLIMIT);
+				return (PF_DROP);
+			}
+		} else if (src->state == TCPS_CLOSING &&
+		    dst->state == TCPS_ESTABLISHED &&
+		    dst->seqlo == 0) {
+			/*
+			 * Handle the closing of half connections where we
+			 * don't see the full bidirectional FIN/ACK+ACK
+			 * handshake.
+			 */
+			dst->state = TCPS_CLOSING;
+		}
+	}
+	if (th->th_flags & TH_RST)
+		src->state = dst->state = TCPS_TIME_WAIT;
+
+	/* update expire time */
+	(*state)->expire = time_uptime;
+	if (src->state >= TCPS_FIN_WAIT_2 &&
+	    dst->state >= TCPS_FIN_WAIT_2)
+		(*state)->timeout = PFTM_TCP_CLOSED;
+	else if (src->state >= TCPS_CLOSING &&
+	    dst->state >= TCPS_CLOSING)
+		(*state)->timeout = PFTM_TCP_FIN_WAIT;
+	else if (src->state < TCPS_ESTABLISHED ||
+	    dst->state < TCPS_ESTABLISHED)
+		(*state)->timeout = PFTM_TCP_OPENING;
+	else if (src->state >= TCPS_CLOSING ||
+	    dst->state >= TCPS_CLOSING)
+		(*state)->timeout = PFTM_TCP_CLOSING;
+	else
+		(*state)->timeout = PFTM_TCP_ESTABLISHED;
+
+	return (PF_PASS);
+}
+
+static int
+pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
+    struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
+    u_short *reason)
+{
+	struct pf_state_key_cmp	 key;
+	struct tcphdr		*th = pd->hdr.tcp;
+	int			 copyback = 0;
+	struct pf_state_peer	*src, *dst;
+	struct pf_state_key	*sk;
+
+	bzero(&key, sizeof(key));
+	key.af = pd->af;
+	key.proto = IPPROTO_TCP;
+	if (direction == PF_IN)	{	/* wire side, straight */
+		PF_ACPY(&key.addr[0], pd->src, key.af);
+		PF_ACPY(&key.addr[1], pd->dst, key.af);
+		key.port[0] = th->th_sport;
+		key.port[1] = th->th_dport;
+	} else {			/* stack side, reverse */
+		PF_ACPY(&key.addr[1], pd->src, key.af);
+		PF_ACPY(&key.addr[0], pd->dst, key.af);
+		key.port[1] = th->th_sport;
+		key.port[0] = th->th_dport;
+	}
+
+	STATE_LOOKUP(kif, &key, direction, *state, pd);
+
+	if (direction == (*state)->direction) {
+		src = &(*state)->src;
+		dst = &(*state)->dst;
+	} else {
+		src = &(*state)->dst;
+		dst = &(*state)->src;
+	}
+
+	sk = (*state)->key[pd->didx];
+
+	if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
+		if (direction != (*state)->direction) {
+			REASON_SET(reason, PFRES_SYNPROXY);
+			return (PF_SYNPROXY_DROP);
+		}
+		if (th->th_flags & TH_SYN) {
+			if (ntohl(th->th_seq) != (*state)->src.seqlo) {
+				REASON_SET(reason, PFRES_SYNPROXY);
+				return (PF_DROP);
+			}
+			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst,
+			    pd->src, th->th_dport, th->th_sport,
+			    (*state)->src.seqhi, ntohl(th->th_seq) + 1,
+			    TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, 0, NULL);
+			REASON_SET(reason, PFRES_SYNPROXY);
+			return (PF_SYNPROXY_DROP);
+		} else if (!(th->th_flags & TH_ACK) ||
+		    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
+		    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
+			REASON_SET(reason, PFRES_SYNPROXY);
+			return (PF_DROP);
+		} else if ((*state)->src_node != NULL &&
+		    pf_src_connlimit(state)) {
+			REASON_SET(reason, PFRES_SRCLIMIT);
+			return (PF_DROP);
+		} else
+			(*state)->src.state = PF_TCPS_PROXY_DST;
+	}
+	if ((*state)->src.state == PF_TCPS_PROXY_DST) {
+		if (direction == (*state)->direction) {
+			if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
+			    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
+			    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
+				REASON_SET(reason, PFRES_SYNPROXY);
+				return (PF_DROP);
+			}
+			(*state)->src.max_win = MAX(ntohs(th->th_win), 1);
+			if ((*state)->dst.seqhi == 1)
+				(*state)->dst.seqhi = htonl(arc4random());
+			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
+			    &sk->addr[pd->sidx], &sk->addr[pd->didx],
+			    sk->port[pd->sidx], sk->port[pd->didx],
+			    (*state)->dst.seqhi, 0, TH_SYN, 0,
+			    (*state)->src.mss, 0, 0, (*state)->tag, NULL);
+			REASON_SET(reason, PFRES_SYNPROXY);
+			return (PF_SYNPROXY_DROP);
+		} else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
+		    (TH_SYN|TH_ACK)) ||
+		    (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) {
+			REASON_SET(reason, PFRES_SYNPROXY);
+			return (PF_DROP);
+		} else {
+			(*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
+			(*state)->dst.seqlo = ntohl(th->th_seq);
+			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst,
+			    pd->src, th->th_dport, th->th_sport,
+			    ntohl(th->th_ack), ntohl(th->th_seq) + 1,
+			    TH_ACK, (*state)->src.max_win, 0, 0, 0,
+			    (*state)->tag, NULL);
+			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
+			    &sk->addr[pd->sidx], &sk->addr[pd->didx],
+			    sk->port[pd->sidx], sk->port[pd->didx],
+			    (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
+			    TH_ACK, (*state)->dst.max_win, 0, 0, 1, 0, NULL);
+			(*state)->src.seqdiff = (*state)->dst.seqhi -
+			    (*state)->src.seqlo;
+			(*state)->dst.seqdiff = (*state)->src.seqhi -
+			    (*state)->dst.seqlo;
+			(*state)->src.seqhi = (*state)->src.seqlo +
+			    (*state)->dst.max_win;
+			(*state)->dst.seqhi = (*state)->dst.seqlo +
+			    (*state)->src.max_win;
+			(*state)->src.wscale = (*state)->dst.wscale = 0;
+			(*state)->src.state = (*state)->dst.state =
+			    TCPS_ESTABLISHED;
+			REASON_SET(reason, PFRES_SYNPROXY);
+			return (PF_SYNPROXY_DROP);
+		}
+	}
+
+	if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) &&
+	    dst->state >= TCPS_FIN_WAIT_2 &&
+	    src->state >= TCPS_FIN_WAIT_2) {
+		if (V_pf_status.debug >= PF_DEBUG_MISC) {
+			printf("pf: state reuse ");
+			pf_print_state(*state);
+			pf_print_flags(th->th_flags);
+			printf("\n");
+		}
+		/* XXX make sure it's the same direction ?? */
+		(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
+		pf_unlink_state(*state, PF_ENTER_LOCKED);
+		*state = NULL;
+		return (PF_DROP);
+	}
+
+	if ((*state)->state_flags & PFSTATE_SLOPPY) {
+		if (pf_tcp_track_sloppy(src, dst, state, pd, reason) == PF_DROP)
+			return (PF_DROP);
+	} else {
+		if (pf_tcp_track_full(src, dst, state, kif, m, off, pd, reason,
+		    &copyback) == PF_DROP)
+			return (PF_DROP);
+	}
+
+	/* translate source/destination address, if necessary */
+	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
+		struct pf_state_key *nk = (*state)->key[pd->didx];
+
+		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
+		    nk->port[pd->sidx] != th->th_sport)
+			pf_change_ap(m, pd->src, &th->th_sport,
+			    pd->ip_sum, &th->th_sum, &nk->addr[pd->sidx],
+			    nk->port[pd->sidx], 0, pd->af);
+
+		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
+		    nk->port[pd->didx] != th->th_dport)
+			pf_change_ap(m, pd->dst, &th->th_dport,
+			    pd->ip_sum, &th->th_sum, &nk->addr[pd->didx],
+			    nk->port[pd->didx], 0, pd->af);
+		copyback = 1;
+	}
+
+	/* Copyback sequence modulation or stateful scrub changes if needed */
+	if (copyback)
+		m_copyback(m, off, sizeof(*th), (caddr_t)th);
+
+	return (PF_PASS);
+}
+
+static int
+pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
+    struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
+{
+	struct pf_state_peer	*src, *dst;
+	struct pf_state_key_cmp	 key;
+	struct udphdr		*uh = pd->hdr.udp;
+
+	bzero(&key, sizeof(key));
+	key.af = pd->af;
+	key.proto = IPPROTO_UDP;
+	if (direction == PF_IN)	{	/* wire side, straight */
+		PF_ACPY(&key.addr[0], pd->src, key.af);
+		PF_ACPY(&key.addr[1], pd->dst, key.af);
+		key.port[0] = uh->uh_sport;
+		key.port[1] = uh->uh_dport;
+	} else {			/* stack side, reverse */
+		PF_ACPY(&key.addr[1], pd->src, key.af);
+		PF_ACPY(&key.addr[0], pd->dst, key.af);
+		key.port[1] = uh->uh_sport;
+		key.port[0] = uh->uh_dport;
+	}
+
+	STATE_LOOKUP(kif, &key, direction, *state, pd);
+
+	if (direction == (*state)->direction) {
+		src = &(*state)->src;
+		dst = &(*state)->dst;
+	} else {
+		src = &(*state)->dst;
+		dst = &(*state)->src;
+	}
+
+	/* update states */
+	if (src->state < PFUDPS_SINGLE)
+		src->state = PFUDPS_SINGLE;
+	if (dst->state == PFUDPS_SINGLE)
+		dst->state = PFUDPS_MULTIPLE;
+
+	/* update expire time */
+	(*state)->expire = time_uptime;
+	if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
+		(*state)->timeout = PFTM_UDP_MULTIPLE;
+	else
+		(*state)->timeout = PFTM_UDP_SINGLE;
+
+	/* translate source/destination address, if necessary */
+	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
+		struct pf_state_key *nk = (*state)->key[pd->didx];
+
+		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
+		    nk->port[pd->sidx] != uh->uh_sport)
+			pf_change_ap(m, pd->src, &uh->uh_sport, pd->ip_sum,
+			    &uh->uh_sum, &nk->addr[pd->sidx],
+			    nk->port[pd->sidx], 1, pd->af);
+
+		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
+		    nk->port[pd->didx] != uh->uh_dport)
+			pf_change_ap(m, pd->dst, &uh->uh_dport, pd->ip_sum,
+			    &uh->uh_sum, &nk->addr[pd->didx],
+			    nk->port[pd->didx], 1, pd->af);
+		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
+	}
+
+	return (PF_PASS);
+}
+
+static int
+pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
+    struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason)
+{
+	struct pf_addr  *saddr = pd->src, *daddr = pd->dst;
+	u_int16_t	 icmpid = 0, *icmpsum;
+	u_int8_t	 icmptype;
+	int		 state_icmp = 0;
+	struct pf_state_key_cmp key;
+
+	bzero(&key, sizeof(key));
+	switch (pd->proto) {
+#ifdef INET
+	case IPPROTO_ICMP:
+		icmptype = pd->hdr.icmp->icmp_type;
+		icmpid = pd->hdr.icmp->icmp_id;
+		icmpsum = &pd->hdr.icmp->icmp_cksum;
+
+		if (icmptype == ICMP_UNREACH ||
+		    icmptype == ICMP_SOURCEQUENCH ||
+		    icmptype == ICMP_REDIRECT ||
+		    icmptype == ICMP_TIMXCEED ||
+		    icmptype == ICMP_PARAMPROB)
+			state_icmp++;
+		break;
+#endif /* INET */
+#ifdef INET6
+	case IPPROTO_ICMPV6:
+		icmptype = pd->hdr.icmp6->icmp6_type;
+		icmpid = pd->hdr.icmp6->icmp6_id;
+		icmpsum = &pd->hdr.icmp6->icmp6_cksum;
+
+		if (icmptype == ICMP6_DST_UNREACH ||
+		    icmptype == ICMP6_PACKET_TOO_BIG ||
+		    icmptype == ICMP6_TIME_EXCEEDED ||
+		    icmptype == ICMP6_PARAM_PROB)
+			state_icmp++;
+		break;
+#endif /* INET6 */
+	}
+
+	if (!state_icmp) {
+
+		/*
+		 * ICMP query/reply message not related to a TCP/UDP packet.
+		 * Search for an ICMP state.
+		 */
+		key.af = pd->af;
+		key.proto = pd->proto;
+		key.port[0] = key.port[1] = icmpid;
+		if (direction == PF_IN)	{	/* wire side, straight */
+			PF_ACPY(&key.addr[0], pd->src, key.af);
+			PF_ACPY(&key.addr[1], pd->dst, key.af);
+		} else {			/* stack side, reverse */
+			PF_ACPY(&key.addr[1], pd->src, key.af);
+			PF_ACPY(&key.addr[0], pd->dst, key.af);
+		}
+
+		STATE_LOOKUP(kif, &key, direction, *state, pd);
+
+		(*state)->expire = time_uptime;
+		(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
+
+		/* translate source/destination address, if necessary */
+		if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
+			struct pf_state_key *nk = (*state)->key[pd->didx];
+
+			switch (pd->af) {
+#ifdef INET
+			case AF_INET:
+				if (PF_ANEQ(pd->src,
+				    &nk->addr[pd->sidx], AF_INET))
+					pf_change_a(&saddr->v4.s_addr,
+					    pd->ip_sum,
+					    nk->addr[pd->sidx].v4.s_addr, 0);
+
+				if (PF_ANEQ(pd->dst, &nk->addr[pd->didx],
+				    AF_INET))
+					pf_change_a(&daddr->v4.s_addr,
+					    pd->ip_sum,
+					    nk->addr[pd->didx].v4.s_addr, 0);
+
+				if (nk->port[0] !=
+				    pd->hdr.icmp->icmp_id) {
+					pd->hdr.icmp->icmp_cksum =
+					    pf_cksum_fixup(
+					    pd->hdr.icmp->icmp_cksum, icmpid,
+					    nk->port[pd->sidx], 0);
+					pd->hdr.icmp->icmp_id =
+					    nk->port[pd->sidx];
+				}
+
+				m_copyback(m, off, ICMP_MINLEN,
+				    (caddr_t )pd->hdr.icmp);
+				break;
+#endif /* INET */
+#ifdef INET6
+			case AF_INET6:
+				if (PF_ANEQ(pd->src,
+				    &nk->addr[pd->sidx], AF_INET6))
+					pf_change_a6(saddr,
+					    &pd->hdr.icmp6->icmp6_cksum,
+					    &nk->addr[pd->sidx], 0);
+
+				if (PF_ANEQ(pd->dst,
+				    &nk->addr[pd->didx], AF_INET6))
+					pf_change_a6(daddr,
+					    &pd->hdr.icmp6->icmp6_cksum,
+					    &nk->addr[pd->didx], 0);
+
+				m_copyback(m, off, sizeof(struct icmp6_hdr),
+				    (caddr_t )pd->hdr.icmp6);
+				break;
+#endif /* INET6 */
+			}
+		}
+		return (PF_PASS);
+
+	} else {
+		/*
+		 * ICMP error message in response to a TCP/UDP packet.
+		 * Extract the inner TCP/UDP header and search for that state.
+		 */
+
+		struct pf_pdesc	pd2;
+		bzero(&pd2, sizeof pd2);
+#ifdef INET
+		struct ip	h2;
+#endif /* INET */
+#ifdef INET6
+		struct ip6_hdr	h2_6;
+		int		terminal = 0;
+#endif /* INET6 */
+		int		ipoff2 = 0;
+		int		off2 = 0;
+
+		pd2.af = pd->af;
+		/* Payload packet is from the opposite direction. */
+		pd2.sidx = (direction == PF_IN) ? 1 : 0;
+		pd2.didx = (direction == PF_IN) ? 0 : 1;
+		switch (pd->af) {
+#ifdef INET
+		case AF_INET:
+			/* offset of h2 in mbuf chain */
+			ipoff2 = off + ICMP_MINLEN;
+
+			if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2),
+			    NULL, reason, pd2.af)) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: ICMP error message too short "
+				    "(ip)\n"));
+				return (PF_DROP);
+			}
+			/*
+			 * ICMP error messages don't refer to non-first
+			 * fragments
+			 */
+			if (h2.ip_off & htons(IP_OFFMASK)) {
+				REASON_SET(reason, PFRES_FRAG);
+				return (PF_DROP);
+			}
+
+			/* offset of protocol header that follows h2 */
+			off2 = ipoff2 + (h2.ip_hl << 2);
+
+			pd2.proto = h2.ip_p;
+			pd2.src = (struct pf_addr *)&h2.ip_src;
+			pd2.dst = (struct pf_addr *)&h2.ip_dst;
+			pd2.ip_sum = &h2.ip_sum;
+			break;
+#endif /* INET */
+#ifdef INET6
+		case AF_INET6:
+			ipoff2 = off + sizeof(struct icmp6_hdr);
+
+			if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6),
+			    NULL, reason, pd2.af)) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: ICMP error message too short "
+				    "(ip6)\n"));
+				return (PF_DROP);
+			}
+			pd2.proto = h2_6.ip6_nxt;
+			pd2.src = (struct pf_addr *)&h2_6.ip6_src;
+			pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
+			pd2.ip_sum = NULL;
+			off2 = ipoff2 + sizeof(h2_6);
+			do {
+				switch (pd2.proto) {
+				case IPPROTO_FRAGMENT:
+					/*
+					 * ICMPv6 error messages for
+					 * non-first fragments
+					 */
+					REASON_SET(reason, PFRES_FRAG);
+					return (PF_DROP);
+				case IPPROTO_AH:
+				case IPPROTO_HOPOPTS:
+				case IPPROTO_ROUTING:
+				case IPPROTO_DSTOPTS: {
+					/* get next header and header length */
+					struct ip6_ext opt6;
+
+					if (!pf_pull_hdr(m, off2, &opt6,
+					    sizeof(opt6), NULL, reason,
+					    pd2.af)) {
+						DPFPRINTF(PF_DEBUG_MISC,
+						    ("pf: ICMPv6 short opt\n"));
+						return (PF_DROP);
+					}
+					if (pd2.proto == IPPROTO_AH)
+						off2 += (opt6.ip6e_len + 2) * 4;
+					else
+						off2 += (opt6.ip6e_len + 1) * 8;
+					pd2.proto = opt6.ip6e_nxt;
+					/* goto the next header */
+					break;
+				}
+				default:
+					terminal++;
+					break;
+				}
+			} while (!terminal);
+			break;
+#endif /* INET6 */
+		}
+
+		switch (pd2.proto) {
+		case IPPROTO_TCP: {
+			struct tcphdr		 th;
+			u_int32_t		 seq;
+			struct pf_state_peer	*src, *dst;
+			u_int8_t		 dws;
+			int			 copyback = 0;
+
+			/*
+			 * Only the first 8 bytes of the TCP header can be
+			 * expected. Don't access any TCP header fields after
+			 * th_seq, an ackskew test is not possible.
+			 */
+			if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason,
+			    pd2.af)) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: ICMP error message too short "
+				    "(tcp)\n"));
+				return (PF_DROP);
+			}
+
+			key.af = pd2.af;
+			key.proto = IPPROTO_TCP;
+			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
+			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
+			key.port[pd2.sidx] = th.th_sport;
+			key.port[pd2.didx] = th.th_dport;
+
+			STATE_LOOKUP(kif, &key, direction, *state, pd);
+
+			if (direction == (*state)->direction) {
+				src = &(*state)->dst;
+				dst = &(*state)->src;
+			} else {
+				src = &(*state)->src;
+				dst = &(*state)->dst;
+			}
+
+			if (src->wscale && dst->wscale)
+				dws = dst->wscale & PF_WSCALE_MASK;
+			else
+				dws = 0;
+
+			/* Demodulate sequence number */
+			seq = ntohl(th.th_seq) - src->seqdiff;
+			if (src->seqdiff) {
+				pf_change_a(&th.th_seq, icmpsum,
+				    htonl(seq), 0);
+				copyback = 1;
+			}
+
+			if (!((*state)->state_flags & PFSTATE_SLOPPY) &&
+			    (!SEQ_GEQ(src->seqhi, seq) ||
+			    !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) {
+				if (V_pf_status.debug >= PF_DEBUG_MISC) {
+					printf("pf: BAD ICMP %d:%d ",
+					    icmptype, pd->hdr.icmp->icmp_code);
+					pf_print_host(pd->src, 0, pd->af);
+					printf(" -> ");
+					pf_print_host(pd->dst, 0, pd->af);
+					printf(" state: ");
+					pf_print_state(*state);
+					printf(" seq=%u\n", seq);
+				}
+				REASON_SET(reason, PFRES_BADSTATE);
+				return (PF_DROP);
+			} else {
+				if (V_pf_status.debug >= PF_DEBUG_MISC) {
+					printf("pf: OK ICMP %d:%d ",
+					    icmptype, pd->hdr.icmp->icmp_code);
+					pf_print_host(pd->src, 0, pd->af);
+					printf(" -> ");
+					pf_print_host(pd->dst, 0, pd->af);
+					printf(" state: ");
+					pf_print_state(*state);
+					printf(" seq=%u\n", seq);
+				}
+			}
+
+			/* translate source/destination address, if necessary */
+			if ((*state)->key[PF_SK_WIRE] !=
+			    (*state)->key[PF_SK_STACK]) {
+				struct pf_state_key *nk =
+				    (*state)->key[pd->didx];
+
+				if (PF_ANEQ(pd2.src,
+				    &nk->addr[pd2.sidx], pd2.af) ||
+				    nk->port[pd2.sidx] != th.th_sport)
+					pf_change_icmp(pd2.src, &th.th_sport,
+					    daddr, &nk->addr[pd2.sidx],
+					    nk->port[pd2.sidx], NULL,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 0, pd2.af);
+
+				if (PF_ANEQ(pd2.dst,
+				    &nk->addr[pd2.didx], pd2.af) ||
+				    nk->port[pd2.didx] != th.th_dport)
+					pf_change_icmp(pd2.dst, &th.th_dport,
+					    saddr, &nk->addr[pd2.didx],
+					    nk->port[pd2.didx], NULL,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 0, pd2.af);
+				copyback = 1;
+			}
+
+			if (copyback) {
+				switch (pd2.af) {
+#ifdef INET
+				case AF_INET:
+					m_copyback(m, off, ICMP_MINLEN,
+					    (caddr_t )pd->hdr.icmp);
+					m_copyback(m, ipoff2, sizeof(h2),
+					    (caddr_t )&h2);
+					break;
+#endif /* INET */
+#ifdef INET6
+				case AF_INET6:
+					m_copyback(m, off,
+					    sizeof(struct icmp6_hdr),
+					    (caddr_t )pd->hdr.icmp6);
+					m_copyback(m, ipoff2, sizeof(h2_6),
+					    (caddr_t )&h2_6);
+					break;
+#endif /* INET6 */
+				}
+				m_copyback(m, off2, 8, (caddr_t)&th);
+			}
+
+			return (PF_PASS);
+			break;
+		}
+		case IPPROTO_UDP: {
+			struct udphdr		uh;
+
+			if (!pf_pull_hdr(m, off2, &uh, sizeof(uh),
+			    NULL, reason, pd2.af)) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: ICMP error message too short "
+				    "(udp)\n"));
+				return (PF_DROP);
+			}
+
+			key.af = pd2.af;
+			key.proto = IPPROTO_UDP;
+			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
+			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
+			key.port[pd2.sidx] = uh.uh_sport;
+			key.port[pd2.didx] = uh.uh_dport;
+
+			STATE_LOOKUP(kif, &key, direction, *state, pd);
+
+			/* translate source/destination address, if necessary */
+			if ((*state)->key[PF_SK_WIRE] !=
+			    (*state)->key[PF_SK_STACK]) {
+				struct pf_state_key *nk =
+				    (*state)->key[pd->didx];
+
+				if (PF_ANEQ(pd2.src,
+				    &nk->addr[pd2.sidx], pd2.af) ||
+				    nk->port[pd2.sidx] != uh.uh_sport)
+					pf_change_icmp(pd2.src, &uh.uh_sport,
+					    daddr, &nk->addr[pd2.sidx],
+					    nk->port[pd2.sidx], &uh.uh_sum,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 1, pd2.af);
+
+				if (PF_ANEQ(pd2.dst,
+				    &nk->addr[pd2.didx], pd2.af) ||
+				    nk->port[pd2.didx] != uh.uh_dport)
+					pf_change_icmp(pd2.dst, &uh.uh_dport,
+					    saddr, &nk->addr[pd2.didx],
+					    nk->port[pd2.didx], &uh.uh_sum,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 1, pd2.af);
+
+				switch (pd2.af) {
+#ifdef INET
+				case AF_INET:
+					m_copyback(m, off, ICMP_MINLEN,
+					    (caddr_t )pd->hdr.icmp);
+					m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
+					break;
+#endif /* INET */
+#ifdef INET6
+				case AF_INET6:
+					m_copyback(m, off,
+					    sizeof(struct icmp6_hdr),
+					    (caddr_t )pd->hdr.icmp6);
+					m_copyback(m, ipoff2, sizeof(h2_6),
+					    (caddr_t )&h2_6);
+					break;
+#endif /* INET6 */
+				}
+				m_copyback(m, off2, sizeof(uh), (caddr_t)&uh);
+			}
+			return (PF_PASS);
+			break;
+		}
+#ifdef INET
+		case IPPROTO_ICMP: {
+			struct icmp		iih;
+
+			if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN,
+			    NULL, reason, pd2.af)) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: ICMP error message too short i"
+				    "(icmp)\n"));
+				return (PF_DROP);
+			}
+
+			key.af = pd2.af;
+			key.proto = IPPROTO_ICMP;
+			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
+			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
+			key.port[0] = key.port[1] = iih.icmp_id;
+
+			STATE_LOOKUP(kif, &key, direction, *state, pd);
+
+			/* translate source/destination address, if necessary */
+			if ((*state)->key[PF_SK_WIRE] !=
+			    (*state)->key[PF_SK_STACK]) {
+				struct pf_state_key *nk =
+				    (*state)->key[pd->didx];
+
+				if (PF_ANEQ(pd2.src,
+				    &nk->addr[pd2.sidx], pd2.af) ||
+				    nk->port[pd2.sidx] != iih.icmp_id)
+					pf_change_icmp(pd2.src, &iih.icmp_id,
+					    daddr, &nk->addr[pd2.sidx],
+					    nk->port[pd2.sidx], NULL,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 0, AF_INET);
+
+				if (PF_ANEQ(pd2.dst,
+				    &nk->addr[pd2.didx], pd2.af) ||
+				    nk->port[pd2.didx] != iih.icmp_id)
+					pf_change_icmp(pd2.dst, &iih.icmp_id,
+					    saddr, &nk->addr[pd2.didx],
+					    nk->port[pd2.didx], NULL,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 0, AF_INET);
+
+				m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
+				m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
+				m_copyback(m, off2, ICMP_MINLEN, (caddr_t)&iih);
+			}
+			return (PF_PASS);
+			break;
+		}
+#endif /* INET */
+#ifdef INET6
+		case IPPROTO_ICMPV6: {
+			struct icmp6_hdr	iih;
+
+			if (!pf_pull_hdr(m, off2, &iih,
+			    sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: ICMP error message too short "
+				    "(icmp6)\n"));
+				return (PF_DROP);
+			}
+
+			key.af = pd2.af;
+			key.proto = IPPROTO_ICMPV6;
+			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
+			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
+			key.port[0] = key.port[1] = iih.icmp6_id;
+
+			STATE_LOOKUP(kif, &key, direction, *state, pd);
+
+			/* translate source/destination address, if necessary */
+			if ((*state)->key[PF_SK_WIRE] !=
+			    (*state)->key[PF_SK_STACK]) {
+				struct pf_state_key *nk =
+				    (*state)->key[pd->didx];
+
+				if (PF_ANEQ(pd2.src,
+				    &nk->addr[pd2.sidx], pd2.af) ||
+				    nk->port[pd2.sidx] != iih.icmp6_id)
+					pf_change_icmp(pd2.src, &iih.icmp6_id,
+					    daddr, &nk->addr[pd2.sidx],
+					    nk->port[pd2.sidx], NULL,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 0, AF_INET6);
+
+				if (PF_ANEQ(pd2.dst,
+				    &nk->addr[pd2.didx], pd2.af) ||
+				    nk->port[pd2.didx] != iih.icmp6_id)
+					pf_change_icmp(pd2.dst, &iih.icmp6_id,
+					    saddr, &nk->addr[pd2.didx],
+					    nk->port[pd2.didx], NULL,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 0, AF_INET6);
+
+				m_copyback(m, off, sizeof(struct icmp6_hdr),
+				    (caddr_t)pd->hdr.icmp6);
+				m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6);
+				m_copyback(m, off2, sizeof(struct icmp6_hdr),
+				    (caddr_t)&iih);
+			}
+			return (PF_PASS);
+			break;
+		}
+#endif /* INET6 */
+		default: {
+			key.af = pd2.af;
+			key.proto = pd2.proto;
+			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
+			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
+			key.port[0] = key.port[1] = 0;
+
+			STATE_LOOKUP(kif, &key, direction, *state, pd);
+
+			/* translate source/destination address, if necessary */
+			if ((*state)->key[PF_SK_WIRE] !=
+			    (*state)->key[PF_SK_STACK]) {
+				struct pf_state_key *nk =
+				    (*state)->key[pd->didx];
+
+				if (PF_ANEQ(pd2.src,
+				    &nk->addr[pd2.sidx], pd2.af))
+					pf_change_icmp(pd2.src, NULL, daddr,
+					    &nk->addr[pd2.sidx], 0, NULL,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 0, pd2.af);
+
+				if (PF_ANEQ(pd2.dst,
+				    &nk->addr[pd2.didx], pd2.af))
+					pf_change_icmp(pd2.dst, NULL, saddr,
+					    &nk->addr[pd2.didx], 0, NULL,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 0, pd2.af);
+
+				switch (pd2.af) {
+#ifdef INET
+				case AF_INET:
+					m_copyback(m, off, ICMP_MINLEN,
+					    (caddr_t)pd->hdr.icmp);
+					m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
+					break;
+#endif /* INET */
+#ifdef INET6
+				case AF_INET6:
+					m_copyback(m, off,
+					    sizeof(struct icmp6_hdr),
+					    (caddr_t )pd->hdr.icmp6);
+					m_copyback(m, ipoff2, sizeof(h2_6),
+					    (caddr_t )&h2_6);
+					break;
+#endif /* INET6 */
+				}
+			}
+			return (PF_PASS);
+			break;
+		}
+		}
+	}
+}
+
+static int
+pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
+    struct mbuf *m, struct pf_pdesc *pd)
+{
+	struct pf_state_peer	*src, *dst;
+	struct pf_state_key_cmp	 key;
+
+	bzero(&key, sizeof(key));
+	key.af = pd->af;
+	key.proto = pd->proto;
+	if (direction == PF_IN)	{
+		PF_ACPY(&key.addr[0], pd->src, key.af);
+		PF_ACPY(&key.addr[1], pd->dst, key.af);
+		key.port[0] = key.port[1] = 0;
+	} else {
+		PF_ACPY(&key.addr[1], pd->src, key.af);
+		PF_ACPY(&key.addr[0], pd->dst, key.af);
+		key.port[1] = key.port[0] = 0;
+	}
+
+	STATE_LOOKUP(kif, &key, direction, *state, pd);
+
+	if (direction == (*state)->direction) {
+		src = &(*state)->src;
+		dst = &(*state)->dst;
+	} else {
+		src = &(*state)->dst;
+		dst = &(*state)->src;
+	}
+
+	/* update states */
+	if (src->state < PFOTHERS_SINGLE)
+		src->state = PFOTHERS_SINGLE;
+	if (dst->state == PFOTHERS_SINGLE)
+		dst->state = PFOTHERS_MULTIPLE;
+
+	/* update expire time */
+	(*state)->expire = time_uptime;
+	if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
+		(*state)->timeout = PFTM_OTHER_MULTIPLE;
+	else
+		(*state)->timeout = PFTM_OTHER_SINGLE;
+
+	/* translate source/destination address, if necessary */
+	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
+		struct pf_state_key *nk = (*state)->key[pd->didx];
+
+		KASSERT(nk, ("%s: nk is null", __func__));
+		KASSERT(pd, ("%s: pd is null", __func__));
+		KASSERT(pd->src, ("%s: pd->src is null", __func__));
+		KASSERT(pd->dst, ("%s: pd->dst is null", __func__));
+		switch (pd->af) {
+#ifdef INET
+		case AF_INET:
+			if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET))
+				pf_change_a(&pd->src->v4.s_addr,
+				    pd->ip_sum,
+				    nk->addr[pd->sidx].v4.s_addr,
+				    0);
+
+
+			if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET))
+				pf_change_a(&pd->dst->v4.s_addr,
+				    pd->ip_sum,
+				    nk->addr[pd->didx].v4.s_addr,
+				    0);
+
+				break;
+#endif /* INET */
+#ifdef INET6
+		case AF_INET6:
+			if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET))
+				PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af);
+
+			if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET))
+				PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af);
+#endif /* INET6 */
+		}
+	}
+	return (PF_PASS);
+}
+
+/*
+ * ipoff and off are measured from the start of the mbuf chain.
+ * h must be at "ipoff" on the mbuf chain.
+ */
+void *
+pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
+    u_short *actionp, u_short *reasonp, sa_family_t af)
+{
+	switch (af) {
+#ifdef INET
+	case AF_INET: {
+		struct ip	*h = mtod(m, struct ip *);
+		u_int16_t	 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
+
+		if (fragoff) {
+			if (fragoff >= len)
+				ACTION_SET(actionp, PF_PASS);
+			else {
+				ACTION_SET(actionp, PF_DROP);
+				REASON_SET(reasonp, PFRES_FRAG);
+			}
+			return (NULL);
+		}
+		if (m->m_pkthdr.len < off + len ||
+		    ntohs(h->ip_len) < off + len) {
+			ACTION_SET(actionp, PF_DROP);
+			REASON_SET(reasonp, PFRES_SHORT);
+			return (NULL);
+		}
+		break;
+	}
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6: {
+		struct ip6_hdr	*h = mtod(m, struct ip6_hdr *);
+
+		if (m->m_pkthdr.len < off + len ||
+		    (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
+		    (unsigned)(off + len)) {
+			ACTION_SET(actionp, PF_DROP);
+			REASON_SET(reasonp, PFRES_SHORT);
+			return (NULL);
+		}
+		break;
+	}
+#endif /* INET6 */
+	}
+	m_copydata(m, off, len, p);
+	return (p);
+}
+
+#ifdef RADIX_MPATH
+static int
+pf_routable_oldmpath(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif,
+    int rtableid)
+{
+	struct radix_node_head	*rnh;
+	struct sockaddr_in	*dst;
+	int			 ret = 1;
+	int			 check_mpath;
+#ifdef INET6
+	struct sockaddr_in6	*dst6;
+	struct route_in6	 ro;
+#else
+	struct route		 ro;
+#endif
+	struct radix_node	*rn;
+	struct rtentry		*rt;
+	struct ifnet		*ifp;
+
+	check_mpath = 0;
+	/* XXX: stick to table 0 for now */
+	rnh = rt_tables_get_rnh(0, af);
+	if (rnh != NULL && rn_mpath_capable(rnh))
+		check_mpath = 1;
+	bzero(&ro, sizeof(ro));
+	switch (af) {
+	case AF_INET:
+		dst = satosin(&ro.ro_dst);
+		dst->sin_family = AF_INET;
+		dst->sin_len = sizeof(*dst);
+		dst->sin_addr = addr->v4;
+		break;
+#ifdef INET6
+	case AF_INET6:
+		/*
+		 * Skip check for addresses with embedded interface scope,
+		 * as they would always match anyway.
+		 */
+		if (IN6_IS_SCOPE_EMBED(&addr->v6))
+			goto out;
+		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
+		dst6->sin6_family = AF_INET6;
+		dst6->sin6_len = sizeof(*dst6);
+		dst6->sin6_addr = addr->v6;
+		break;
+#endif /* INET6 */
+	default:
+		return (0);
+	}
+
+	/* Skip checks for ipsec interfaces */
+	if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
+		goto out;
+
+	switch (af) {
+#ifdef INET6
+	case AF_INET6:
+		in6_rtalloc_ign(&ro, 0, rtableid);
+		break;
+#endif
+#ifdef INET
+	case AF_INET:
+		in_rtalloc_ign((struct route *)&ro, 0, rtableid);
+		break;
+#endif
+	}
+
+	if (ro.ro_rt != NULL) {
+		/* No interface given, this is a no-route check */
+		if (kif == NULL)
+			goto out;
+
+		if (kif->pfik_ifp == NULL) {
+			ret = 0;
+			goto out;
+		}
+
+		/* Perform uRPF check if passed input interface */
+		ret = 0;
+		rn = (struct radix_node *)ro.ro_rt;
+		do {
+			rt = (struct rtentry *)rn;
+			ifp = rt->rt_ifp;
+
+			if (kif->pfik_ifp == ifp)
+				ret = 1;
+			rn = rn_mpath_next(rn);
+		} while (check_mpath == 1 && rn != NULL && ret == 0);
+	} else
+		ret = 0;
+out:
+	if (ro.ro_rt != NULL)
+		RTFREE(ro.ro_rt);
+	return (ret);
+}
+#endif
+
+int
+pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif,
+    int rtableid)
+{
+#ifdef INET
+	struct nhop4_basic	nh4;
+#endif
+#ifdef INET6
+	struct nhop6_basic	nh6;
+#endif
+	struct ifnet		*ifp;
+#ifdef RADIX_MPATH
+	struct radix_node_head	*rnh;
+
+	/* XXX: stick to table 0 for now */
+	rnh = rt_tables_get_rnh(0, af);
+	if (rnh != NULL && rn_mpath_capable(rnh))
+		return (pf_routable_oldmpath(addr, af, kif, rtableid));
+#endif
+	/*
+	 * Skip check for addresses with embedded interface scope,
+	 * as they would always match anyway.
+	 */
+	if (af == AF_INET6 && IN6_IS_SCOPE_EMBED(&addr->v6))
+		return (1);
+
+	if (af != AF_INET && af != AF_INET6)
+		return (0);
+
+	/* Skip checks for ipsec interfaces */
+	if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
+		return (1);
+
+	ifp = NULL;
+
+	switch (af) {
+#ifdef INET6
+	case AF_INET6:
+		if (fib6_lookup_nh_basic(rtableid, &addr->v6, 0, 0, 0, &nh6)!=0)
+			return (0);
+		ifp = nh6.nh_ifp;
+		break;
+#endif
+#ifdef INET
+	case AF_INET:
+		if (fib4_lookup_nh_basic(rtableid, addr->v4, 0, 0, &nh4) != 0)
+			return (0);
+		ifp = nh4.nh_ifp;
+		break;
+#endif
+	}
+
+	/* No interface given, this is a no-route check */
+	if (kif == NULL)
+		return (1);
+
+	if (kif->pfik_ifp == NULL)
+		return (0);
+
+	/* Perform uRPF check if passed input interface */
+	if (kif->pfik_ifp == ifp)
+		return (1);
+	return (0);
+}
+
+#ifdef INET
+static void
+pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
+    struct pf_state *s, struct pf_pdesc *pd)
+{
+	struct mbuf		*m0, *m1;
+	struct sockaddr_in	dst;
+	struct ip		*ip;
+	struct ifnet		*ifp = NULL;
+	struct pf_addr		 naddr;
+	struct pf_src_node	*sn = NULL;
+	int			 error = 0;
+	uint16_t		 ip_len, ip_off;
+
+	KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__));
+	KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: invalid direction",
+	    __func__));
+
+	if ((pd->pf_mtag == NULL &&
+	    ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) ||
+	    pd->pf_mtag->routed++ > 3) {
+		m0 = *m;
+		*m = NULL;
+		goto bad_locked;
+	}
+
+	if (r->rt == PF_DUPTO) {
+		if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) {
+			if (s)
+				PF_STATE_UNLOCK(s);
+			return;
+		}
+	} else {
+		if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
+			if (s)
+				PF_STATE_UNLOCK(s);
+			return;
+		}
+		m0 = *m;
+	}
+
+	ip = mtod(m0, struct ip *);
+
+	bzero(&dst, sizeof(dst));
+	dst.sin_family = AF_INET;
+	dst.sin_len = sizeof(dst);
+	dst.sin_addr = ip->ip_dst;
+
+	if (r->rt == PF_FASTROUTE) {
+		struct nhop4_basic nh4;
+
+		if (s)
+			PF_STATE_UNLOCK(s);
+
+		if (fib4_lookup_nh_basic(M_GETFIB(m0), ip->ip_dst, 0,
+		    m0->m_pkthdr.flowid, &nh4) != 0) {
+			KMOD_IPSTAT_INC(ips_noroute);
+			error = EHOSTUNREACH;
+			goto bad;
+		}
+
+		ifp = nh4.nh_ifp;
+		dst.sin_addr = nh4.nh_addr;
+	} else {
+		if (TAILQ_EMPTY(&r->rpool.list)) {
+			DPFPRINTF(PF_DEBUG_URGENT,
+			    ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__));
+			goto bad_locked;
+		}
+		if (s == NULL) {
+			pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
+			    &naddr, NULL, &sn);
+			if (!PF_AZERO(&naddr, AF_INET))
+				dst.sin_addr.s_addr = naddr.v4.s_addr;
+			ifp = r->rpool.cur->kif ?
+			    r->rpool.cur->kif->pfik_ifp : NULL;
+		} else {
+			if (!PF_AZERO(&s->rt_addr, AF_INET))
+				dst.sin_addr.s_addr =
+				    s->rt_addr.v4.s_addr;
+			ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
+			PF_STATE_UNLOCK(s);
+		}
+	}
+	if (ifp == NULL)
+		goto bad;
+
+	if (oifp != ifp) {
+		if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS)
+			goto bad;
+		else if (m0 == NULL)
+			goto done;
+		if (m0->m_len < sizeof(struct ip)) {
+			DPFPRINTF(PF_DEBUG_URGENT,
+			    ("%s: m0->m_len < sizeof(struct ip)\n", __func__));
+			goto bad;
+		}
+		ip = mtod(m0, struct ip *);
+	}
+
+	if (ifp->if_flags & IFF_LOOPBACK)
+		m0->m_flags |= M_SKIP_FIREWALL;
+
+	ip_len = ntohs(ip->ip_len);
+	ip_off = ntohs(ip->ip_off);
+
+	/* Copied from FreeBSD 10.0-CURRENT ip_output. */
+	m0->m_pkthdr.csum_flags |= CSUM_IP;
+	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) {
+		in_delayed_cksum(m0);
+		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+	}
+#ifdef SCTP
+	if (m0->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) {
+		sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
+		m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
+	}
+#endif
+
+	/*
+	 * If small enough for interface, or the interface will take
+	 * care of the fragmentation for us, we can just send directly.
+	 */
+	if (ip_len <= ifp->if_mtu ||
+	    (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0) {
+		ip->ip_sum = 0;
+		if (m0->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) {
+			ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
+			m0->m_pkthdr.csum_flags &= ~CSUM_IP;
+		}
+		m_clrprotoflags(m0);	/* Avoid confusing lower layers. */
+		error = (*ifp->if_output)(ifp, m0, sintosa(&dst), NULL);
+		goto done;
+	}
+
+	/* Balk when DF bit is set or the interface didn't support TSO. */
+	if ((ip_off & IP_DF) || (m0->m_pkthdr.csum_flags & CSUM_TSO)) {
+		error = EMSGSIZE;
+		KMOD_IPSTAT_INC(ips_cantfrag);
+		if (r->rt != PF_DUPTO) {
+			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
+			    ifp->if_mtu);
+			goto done;
+		} else
+			goto bad;
+	}
+
+	error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist);
+	if (error)
+		goto bad;
+
+	for (; m0; m0 = m1) {
+		m1 = m0->m_nextpkt;
+		m0->m_nextpkt = NULL;
+		if (error == 0) {
+			m_clrprotoflags(m0);
+			error = (*ifp->if_output)(ifp, m0, sintosa(&dst), NULL);
+		} else
+			m_freem(m0);
+	}
+
+	if (error == 0)
+		KMOD_IPSTAT_INC(ips_fragmented);
+
+done:
+	if (r->rt != PF_DUPTO)
+		*m = NULL;
+	return;
+
+bad_locked:
+	if (s)
+		PF_STATE_UNLOCK(s);
+bad:
+	m_freem(m0);
+	goto done;
+}
+#endif /* INET */
+
+#ifdef INET6
+static void
+pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
+    struct pf_state *s, struct pf_pdesc *pd)
+{
+	struct mbuf		*m0;
+	struct sockaddr_in6	dst;
+	struct ip6_hdr		*ip6;
+	struct ifnet		*ifp = NULL;
+	struct pf_addr		 naddr;
+	struct pf_src_node	*sn = NULL;
+
+	KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__));
+	KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: invalid direction",
+	    __func__));
+
+	if ((pd->pf_mtag == NULL &&
+	    ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) ||
+	    pd->pf_mtag->routed++ > 3) {
+		m0 = *m;
+		*m = NULL;
+		goto bad_locked;
+	}
+
+	if (r->rt == PF_DUPTO) {
+		if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) {
+			if (s)
+				PF_STATE_UNLOCK(s);
+			return;
+		}
+	} else {
+		if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
+			if (s)
+				PF_STATE_UNLOCK(s);
+			return;
+		}
+		m0 = *m;
+	}
+
+	ip6 = mtod(m0, struct ip6_hdr *);
+
+	bzero(&dst, sizeof(dst));
+	dst.sin6_family = AF_INET6;
+	dst.sin6_len = sizeof(dst);
+	dst.sin6_addr = ip6->ip6_dst;
+
+	/* Cheat. XXX why only in the v6 case??? */
+	if (r->rt == PF_FASTROUTE) {
+		if (s)
+			PF_STATE_UNLOCK(s);
+		m0->m_flags |= M_SKIP_FIREWALL;
+		ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
+		*m = NULL;
+		return;
+	}
+
+	if (TAILQ_EMPTY(&r->rpool.list)) {
+		DPFPRINTF(PF_DEBUG_URGENT,
+		    ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__));
+		goto bad_locked;
+	}
+	if (s == NULL) {
+		pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src,
+		    &naddr, NULL, &sn);
+		if (!PF_AZERO(&naddr, AF_INET6))
+			PF_ACPY((struct pf_addr *)&dst.sin6_addr,
+			    &naddr, AF_INET6);
+		ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
+	} else {
+		if (!PF_AZERO(&s->rt_addr, AF_INET6))
+			PF_ACPY((struct pf_addr *)&dst.sin6_addr,
+			    &s->rt_addr, AF_INET6);
+		ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
+	}
+
+	if (s)
+		PF_STATE_UNLOCK(s);
+
+	if (ifp == NULL)
+		goto bad;
+
+	if (oifp != ifp) {
+		if (pf_test6(PF_FWD, ifp, &m0, NULL) != PF_PASS)
+			goto bad;
+		else if (m0 == NULL)
+			goto done;
+		if (m0->m_len < sizeof(struct ip6_hdr)) {
+			DPFPRINTF(PF_DEBUG_URGENT,
+			    ("%s: m0->m_len < sizeof(struct ip6_hdr)\n",
+			    __func__));
+			goto bad;
+		}
+		ip6 = mtod(m0, struct ip6_hdr *);
+	}
+
+	if (ifp->if_flags & IFF_LOOPBACK)
+		m0->m_flags |= M_SKIP_FIREWALL;
+
+	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 &
+	    ~ifp->if_hwassist) {
+		uint32_t plen = m0->m_pkthdr.len - sizeof(*ip6);
+		in6_delayed_cksum(m0, plen, sizeof(struct ip6_hdr));
+		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
+	}
+
+	/*
+	 * If the packet is too large for the outgoing interface,
+	 * send back an icmp6 error.
+	 */
+	if (IN6_IS_SCOPE_EMBED(&dst.sin6_addr))
+		dst.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
+	if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu)
+		nd6_output_ifp(ifp, ifp, m0, &dst, NULL);
+	else {
+		in6_ifstat_inc(ifp, ifs6_in_toobig);
+		if (r->rt != PF_DUPTO)
+			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
+		else
+			goto bad;
+	}
+
+done:
+	if (r->rt != PF_DUPTO)
+		*m = NULL;
+	return;
+
+bad_locked:
+	if (s)
+		PF_STATE_UNLOCK(s);
+bad:
+	m_freem(m0);
+	goto done;
+}
+#endif /* INET6 */
+
+/*
+ * FreeBSD supports cksum offloads for the following drivers.
+ *  em(4), fxp(4), ixgb(4), lge(4), ndis(4), nge(4), re(4),
+ *   ti(4), txp(4), xl(4)
+ *
+ * CSUM_DATA_VALID | CSUM_PSEUDO_HDR :
+ *  network driver performed cksum including pseudo header, need to verify
+ *   csum_data
+ * CSUM_DATA_VALID :
+ *  network driver performed cksum, needs to additional pseudo header
+ *  cksum computation with partial csum_data(i.e. lack of H/W support for
+ *  pseudo header, for instance hme(4), sk(4) and possibly gem(4))
+ *
+ * After validating the cksum of packet, set both flag CSUM_DATA_VALID and
+ * CSUM_PSEUDO_HDR in order to avoid recomputation of the cksum in upper
+ * TCP/UDP layer.
+ * Also, set csum_data to 0xffff to force cksum validation.
+ */
+static int
+pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af)
+{
+	u_int16_t sum = 0;
+	int hw_assist = 0;
+	struct ip *ip;
+
+	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
+		return (1);
+	if (m->m_pkthdr.len < off + len)
+		return (1);
+
+	switch (p) {
+	case IPPROTO_TCP:
+		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
+			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
+				sum = m->m_pkthdr.csum_data;
+			} else {
+				ip = mtod(m, struct ip *);
+				sum = in_pseudo(ip->ip_src.s_addr,
+				ip->ip_dst.s_addr, htonl((u_short)len +
+				m->m_pkthdr.csum_data + IPPROTO_TCP));
+			}
+			sum ^= 0xffff;
+			++hw_assist;
+		}
+		break;
+	case IPPROTO_UDP:
+		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
+			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
+				sum = m->m_pkthdr.csum_data;
+			} else {
+				ip = mtod(m, struct ip *);
+				sum = in_pseudo(ip->ip_src.s_addr,
+				ip->ip_dst.s_addr, htonl((u_short)len +
+				m->m_pkthdr.csum_data + IPPROTO_UDP));
+			}
+			sum ^= 0xffff;
+			++hw_assist;
+		}
+		break;
+	case IPPROTO_ICMP:
+#ifdef INET6
+	case IPPROTO_ICMPV6:
+#endif /* INET6 */
+		break;
+	default:
+		return (1);
+	}
+
+	if (!hw_assist) {
+		switch (af) {
+		case AF_INET:
+			if (p == IPPROTO_ICMP) {
+				if (m->m_len < off)
+					return (1);
+				m->m_data += off;
+				m->m_len -= off;
+				sum = in_cksum(m, len);
+				m->m_data -= off;
+				m->m_len += off;
+			} else {
+				if (m->m_len < sizeof(struct ip))
+					return (1);
+				sum = in4_cksum(m, p, off, len);
+			}
+			break;
+#ifdef INET6
+		case AF_INET6:
+			if (m->m_len < sizeof(struct ip6_hdr))
+				return (1);
+			sum = in6_cksum(m, p, off, len);
+			break;
+#endif /* INET6 */
+		default:
+			return (1);
+		}
+	}
+	if (sum) {
+		switch (p) {
+		case IPPROTO_TCP:
+		    {
+			KMOD_TCPSTAT_INC(tcps_rcvbadsum);
+			break;
+		    }
+		case IPPROTO_UDP:
+		    {
+			KMOD_UDPSTAT_INC(udps_badsum);
+			break;
+		    }
+#ifdef INET
+		case IPPROTO_ICMP:
+		    {
+			KMOD_ICMPSTAT_INC(icps_checksum);
+			break;
+		    }
+#endif
+#ifdef INET6
+		case IPPROTO_ICMPV6:
+		    {
+			KMOD_ICMP6STAT_INC(icp6s_checksum);
+			break;
+		    }
+#endif /* INET6 */
+		}
+		return (1);
+	} else {
+		if (p == IPPROTO_TCP || p == IPPROTO_UDP) {
+			m->m_pkthdr.csum_flags |=
+			    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
+			m->m_pkthdr.csum_data = 0xffff;
+		}
+	}
+	return (0);
+}
+
+
+#ifdef INET
+int
+pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp)
+{
+	struct pfi_kif		*kif;
+	u_short			 action, reason = 0, log = 0;
+	struct mbuf		*m = *m0;
+	struct ip		*h = NULL;
+	struct m_tag		*ipfwtag;
+	struct pf_rule		*a = NULL, *r = &V_pf_default_rule, *tr, *nr;
+	struct pf_state		*s = NULL;
+	struct pf_ruleset	*ruleset = NULL;
+	struct pf_pdesc		 pd;
+	int			 off, dirndx, pqid = 0;
+
+	M_ASSERTPKTHDR(m);
+
+	if (!V_pf_status.running)
+		return (PF_PASS);
+
+	memset(&pd, 0, sizeof(pd));
+
+	kif = (struct pfi_kif *)ifp->if_pf_kif;
+
+	if (kif == NULL) {
+		DPFPRINTF(PF_DEBUG_URGENT,
+		    ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname));
+		return (PF_DROP);
+	}
+	if (kif->pfik_flags & PFI_IFLAG_SKIP)
+		return (PF_PASS);
+
+	if (m->m_flags & M_SKIP_FIREWALL)
+		return (PF_PASS);
+
+	pd.pf_mtag = pf_find_mtag(m);
+
+	PF_RULES_RLOCK();
+
+	if (ip_divert_ptr != NULL &&
+	    ((ipfwtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL)) != NULL)) {
+		struct ipfw_rule_ref *rr = (struct ipfw_rule_ref *)(ipfwtag+1);
+		if (rr->info & IPFW_IS_DIVERT && rr->rulenum == 0) {
+			if (pd.pf_mtag == NULL &&
+			    ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
+				action = PF_DROP;
+				goto done;
+			}
+			pd.pf_mtag->flags |= PF_PACKET_LOOPED;
+			m_tag_delete(m, ipfwtag);
+		}
+		if (pd.pf_mtag && pd.pf_mtag->flags & PF_FASTFWD_OURS_PRESENT) {
+			m->m_flags |= M_FASTFWD_OURS;
+			pd.pf_mtag->flags &= ~PF_FASTFWD_OURS_PRESENT;
+		}
+	} else if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) {
+		/* We do IP header normalization and packet reassembly here */
+		action = PF_DROP;
+		goto done;
+	}
+	m = *m0;	/* pf_normalize messes with m0 */
+	h = mtod(m, struct ip *);
+
+	off = h->ip_hl << 2;
+	if (off < (int)sizeof(struct ip)) {
+		action = PF_DROP;
+		REASON_SET(&reason, PFRES_SHORT);
+		log = 1;
+		goto done;
+	}
+
+	pd.src = (struct pf_addr *)&h->ip_src;
+	pd.dst = (struct pf_addr *)&h->ip_dst;
+	pd.sport = pd.dport = NULL;
+	pd.ip_sum = &h->ip_sum;
+	pd.proto_sum = NULL;
+	pd.proto = h->ip_p;
+	pd.dir = dir;
+	pd.sidx = (dir == PF_IN) ? 0 : 1;
+	pd.didx = (dir == PF_IN) ? 1 : 0;
+	pd.af = AF_INET;
+	pd.tos = h->ip_tos;
+	pd.tot_len = ntohs(h->ip_len);
+
+	/* handle fragments that didn't get reassembled by normalization */
+	if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
+		action = pf_test_fragment(&r, dir, kif, m, h,
+		    &pd, &a, &ruleset);
+		goto done;
+	}
+
+	switch (h->ip_p) {
+
+	case IPPROTO_TCP: {
+		struct tcphdr	th;
+
+		pd.hdr.tcp = &th;
+		if (!pf_pull_hdr(m, off, &th, sizeof(th),
+		    &action, &reason, AF_INET)) {
+			log = action != PF_PASS;
+			goto done;
+		}
+		pd.p_len = pd.tot_len - off - (th.th_off << 2);
+		if ((th.th_flags & TH_ACK) && pd.p_len == 0)
+			pqid = 1;
+		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
+		if (action == PF_DROP)
+			goto done;
+		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
+		    &reason);
+		if (action == PF_PASS) {
+			if (pfsync_update_state_ptr != NULL)
+				pfsync_update_state_ptr(s);
+			r = s->rule.ptr;
+			a = s->anchor.ptr;
+			log = s->log;
+		} else if (s == NULL)
+			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
+			    &a, &ruleset, inp);
+		break;
+	}
+
+	case IPPROTO_UDP: {
+		struct udphdr	uh;
+
+		pd.hdr.udp = &uh;
+		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
+		    &action, &reason, AF_INET)) {
+			log = action != PF_PASS;
+			goto done;
+		}
+		if (uh.uh_dport == 0 ||
+		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
+		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
+			action = PF_DROP;
+			REASON_SET(&reason, PFRES_SHORT);
+			goto done;
+		}
+		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
+		if (action == PF_PASS) {
+			if (pfsync_update_state_ptr != NULL)
+				pfsync_update_state_ptr(s);
+			r = s->rule.ptr;
+			a = s->anchor.ptr;
+			log = s->log;
+		} else if (s == NULL)
+			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
+			    &a, &ruleset, inp);
+		break;
+	}
+
+	case IPPROTO_ICMP: {
+		struct icmp	ih;
+
+		pd.hdr.icmp = &ih;
+		if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN,
+		    &action, &reason, AF_INET)) {
+			log = action != PF_PASS;
+			goto done;
+		}
+		action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd,
+		    &reason);
+		if (action == PF_PASS) {
+			if (pfsync_update_state_ptr != NULL)
+				pfsync_update_state_ptr(s);
+			r = s->rule.ptr;
+			a = s->anchor.ptr;
+			log = s->log;
+		} else if (s == NULL)
+			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
+			    &a, &ruleset, inp);
+		break;
+	}
+
+#ifdef INET6
+	case IPPROTO_ICMPV6: {
+		action = PF_DROP;
+		DPFPRINTF(PF_DEBUG_MISC,
+		    ("pf: dropping IPv4 packet with ICMPv6 payload\n"));
+		goto done;
+	}
+#endif
+
+	default:
+		action = pf_test_state_other(&s, dir, kif, m, &pd);
+		if (action == PF_PASS) {
+			if (pfsync_update_state_ptr != NULL)
+				pfsync_update_state_ptr(s);
+			r = s->rule.ptr;
+			a = s->anchor.ptr;
+			log = s->log;
+		} else if (s == NULL)
+			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
+			    &a, &ruleset, inp);
+		break;
+	}
+
+done:
+	PF_RULES_RUNLOCK();
+	if (action == PF_PASS && h->ip_hl > 5 &&
+	    !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) {
+		action = PF_DROP;
+		REASON_SET(&reason, PFRES_IPOPTIONS);
+		log = r->log;
+		DPFPRINTF(PF_DEBUG_MISC,
+		    ("pf: dropping packet with ip options\n"));
+	}
+
+	if (s && s->tag > 0 && pf_tag_packet(m, &pd, s->tag)) {
+		action = PF_DROP;
+		REASON_SET(&reason, PFRES_MEMORY);
+	}
+	if (r->rtableid >= 0)
+		M_SETFIB(m, r->rtableid);
+
+	if (r->scrub_flags & PFSTATE_SETPRIO) {
+		if (pd.tos & IPTOS_LOWDELAY)
+			pqid = 1;
+		if (pf_ieee8021q_setpcp(m, r->set_prio[pqid])) {
+			action = PF_DROP;
+			REASON_SET(&reason, PFRES_MEMORY);
+			log = 1;
+			DPFPRINTF(PF_DEBUG_MISC,
+			    ("pf: failed to allocate 802.1q mtag\n"));
+		}
+	}
+
+#ifdef ALTQ
+	if (action == PF_PASS && r->qid) {
+		if (pd.pf_mtag == NULL &&
+		    ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
+			action = PF_DROP;
+			REASON_SET(&reason, PFRES_MEMORY);
+		} else {
+			if (s != NULL)
+				pd.pf_mtag->qid_hash = pf_state_hash(s);
+			if (pqid || (pd.tos & IPTOS_LOWDELAY))
+				pd.pf_mtag->qid = r->pqid;
+			else
+				pd.pf_mtag->qid = r->qid;
+			/* Add hints for ecn. */
+			pd.pf_mtag->hdr = h;
+		}
+
+	}
+#endif /* ALTQ */
+
+	/*
+	 * connections redirected to loopback should not match sockets
+	 * bound specifically to loopback due to security implications,
+	 * see tcp_input() and in_pcblookup_listen().
+	 */
+	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
+	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
+	    (s->nat_rule.ptr->action == PF_RDR ||
+	    s->nat_rule.ptr->action == PF_BINAT) &&
+	    (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
+		m->m_flags |= M_SKIP_FIREWALL;
+
+	if (action == PF_PASS && r->divert.port && ip_divert_ptr != NULL &&
+	    !PACKET_LOOPED(&pd)) {
+
+		ipfwtag = m_tag_alloc(MTAG_IPFW_RULE, 0,
+		    sizeof(struct ipfw_rule_ref), M_NOWAIT | M_ZERO);
+		if (ipfwtag != NULL) {
+			((struct ipfw_rule_ref *)(ipfwtag+1))->info =
+			    ntohs(r->divert.port);
+			((struct ipfw_rule_ref *)(ipfwtag+1))->rulenum = dir;
+
+			if (s)
+				PF_STATE_UNLOCK(s);
+
+			m_tag_prepend(m, ipfwtag);
+			if (m->m_flags & M_FASTFWD_OURS) {
+				if (pd.pf_mtag == NULL &&
+				    ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
+					action = PF_DROP;
+					REASON_SET(&reason, PFRES_MEMORY);
+					log = 1;
+					DPFPRINTF(PF_DEBUG_MISC,
+					    ("pf: failed to allocate tag\n"));
+				} else {
+					pd.pf_mtag->flags |=
+					    PF_FASTFWD_OURS_PRESENT;
+					m->m_flags &= ~M_FASTFWD_OURS;
+				}
+			}
+			ip_divert_ptr(*m0, dir ==  PF_IN ? DIR_IN : DIR_OUT);
+			*m0 = NULL;
+
+			return (action);
+		} else {
+			/* XXX: ipfw has the same behaviour! */
+			action = PF_DROP;
+			REASON_SET(&reason, PFRES_MEMORY);
+			log = 1;
+			DPFPRINTF(PF_DEBUG_MISC,
+			    ("pf: failed to allocate divert tag\n"));
+		}
+	}
+
+	if (log) {
+		struct pf_rule *lr;
+
+		if (s != NULL && s->nat_rule.ptr != NULL &&
+		    s->nat_rule.ptr->log & PF_LOG_ALL)
+			lr = s->nat_rule.ptr;
+		else
+			lr = r;
+		PFLOG_PACKET(kif, m, AF_INET, dir, reason, lr, a, ruleset, &pd,
+		    (s == NULL));
+	}
+
+	kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
+	kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
+
+	if (action == PF_PASS || r->action == PF_DROP) {
+		dirndx = (dir == PF_OUT);
+		r->packets[dirndx]++;
+		r->bytes[dirndx] += pd.tot_len;
+		if (a != NULL) {
+			a->packets[dirndx]++;
+			a->bytes[dirndx] += pd.tot_len;
+		}
+		if (s != NULL) {
+			if (s->nat_rule.ptr != NULL) {
+				s->nat_rule.ptr->packets[dirndx]++;
+				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
+			}
+			if (s->src_node != NULL) {
+				s->src_node->packets[dirndx]++;
+				s->src_node->bytes[dirndx] += pd.tot_len;
+			}
+			if (s->nat_src_node != NULL) {
+				s->nat_src_node->packets[dirndx]++;
+				s->nat_src_node->bytes[dirndx] += pd.tot_len;
+			}
+			dirndx = (dir == s->direction) ? 0 : 1;
+			s->packets[dirndx]++;
+			s->bytes[dirndx] += pd.tot_len;
+		}
+		tr = r;
+		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
+		if (nr != NULL && r == &V_pf_default_rule)
+			tr = nr;
+		if (tr->src.addr.type == PF_ADDR_TABLE)
+			pfr_update_stats(tr->src.addr.p.tbl,
+			    (s == NULL) ? pd.src :
+			    &s->key[(s->direction == PF_IN)]->
+				addr[(s->direction == PF_OUT)],
+			    pd.af, pd.tot_len, dir == PF_OUT,
+			    r->action == PF_PASS, tr->src.neg);
+		if (tr->dst.addr.type == PF_ADDR_TABLE)
+			pfr_update_stats(tr->dst.addr.p.tbl,
+			    (s == NULL) ? pd.dst :
+			    &s->key[(s->direction == PF_IN)]->
+				addr[(s->direction == PF_IN)],
+			    pd.af, pd.tot_len, dir == PF_OUT,
+			    r->action == PF_PASS, tr->dst.neg);
+	}
+
+	switch (action) {
+	case PF_SYNPROXY_DROP:
+		m_freem(*m0);
+	case PF_DEFER:
+		*m0 = NULL;
+		action = PF_PASS;
+		break;
+	case PF_DROP:
+		m_freem(*m0);
+		*m0 = NULL;
+		break;
+	default:
+		/* pf_route() returns unlocked. */
+		if (r->rt) {
+			pf_route(m0, r, dir, kif->pfik_ifp, s, &pd);
+			return (action);
+		}
+		break;
+	}
+	if (s)
+		PF_STATE_UNLOCK(s);
+
+	return (action);
+}
+#endif /* INET */
+
+#ifdef INET6
+int
+pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp)
+{
+	struct pfi_kif		*kif;
+	u_short			 action, reason = 0, log = 0;
+	struct mbuf		*m = *m0, *n = NULL;
+	struct m_tag		*mtag;
+	struct ip6_hdr		*h = NULL;
+	struct pf_rule		*a = NULL, *r = &V_pf_default_rule, *tr, *nr;
+	struct pf_state		*s = NULL;
+	struct pf_ruleset	*ruleset = NULL;
+	struct pf_pdesc		 pd;
+	int			 off, terminal = 0, dirndx, rh_cnt = 0, pqid = 0;
+	int			 fwdir = dir;
+
+	M_ASSERTPKTHDR(m);
+
+	/* Detect packet forwarding.
+	 * If the input interface is different from the output interface we're
+	 * forwarding.
+	 * We do need to be careful about bridges. If the
+	 * net.link.bridge.pfil_bridge sysctl is set we can be filtering on a
+	 * bridge, so if the input interface is a bridge member and the output
+	 * interface is its bridge or a member of the same bridge we're not
+	 * actually forwarding but bridging.
+	 */
+	if (dir == PF_OUT && m->m_pkthdr.rcvif && ifp != m->m_pkthdr.rcvif &&
+	    (m->m_pkthdr.rcvif->if_bridge == NULL ||
+	    (m->m_pkthdr.rcvif->if_bridge != ifp->if_softc &&
+	    m->m_pkthdr.rcvif->if_bridge != ifp->if_bridge)))
+		fwdir = PF_FWD;
+
+	if (!V_pf_status.running)
+		return (PF_PASS);
+
+	memset(&pd, 0, sizeof(pd));
+	pd.pf_mtag = pf_find_mtag(m);
+
+	if (pd.pf_mtag && pd.pf_mtag->flags & PF_TAG_GENERATED)
+		return (PF_PASS);
+
+	kif = (struct pfi_kif *)ifp->if_pf_kif;
+	if (kif == NULL) {
+		DPFPRINTF(PF_DEBUG_URGENT,
+		    ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname));
+		return (PF_DROP);
+	}
+	if (kif->pfik_flags & PFI_IFLAG_SKIP)
+		return (PF_PASS);
+
+	if (m->m_flags & M_SKIP_FIREWALL)
+		return (PF_PASS);
+
+	PF_RULES_RLOCK();
+
+	/* We do IP header normalization and packet reassembly here */
+	if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) {
+		action = PF_DROP;
+		goto done;
+	}
+	m = *m0;	/* pf_normalize messes with m0 */
+	h = mtod(m, struct ip6_hdr *);
+
+#if 1
+	/*
+	 * we do not support jumbogram yet.  if we keep going, zero ip6_plen
+	 * will do something bad, so drop the packet for now.
+	 */
+	if (htons(h->ip6_plen) == 0) {
+		action = PF_DROP;
+		REASON_SET(&reason, PFRES_NORM);	/*XXX*/
+		goto done;
+	}
+#endif
+
+	pd.src = (struct pf_addr *)&h->ip6_src;
+	pd.dst = (struct pf_addr *)&h->ip6_dst;
+	pd.sport = pd.dport = NULL;
+	pd.ip_sum = NULL;
+	pd.proto_sum = NULL;
+	pd.dir = dir;
+	pd.sidx = (dir == PF_IN) ? 0 : 1;
+	pd.didx = (dir == PF_IN) ? 1 : 0;
+	pd.af = AF_INET6;
+	pd.tos = 0;
+	pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
+
+	off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr);
+	pd.proto = h->ip6_nxt;
+	do {
+		switch (pd.proto) {
+		case IPPROTO_FRAGMENT:
+			action = pf_test_fragment(&r, dir, kif, m, h,
+			    &pd, &a, &ruleset);
+			if (action == PF_DROP)
+				REASON_SET(&reason, PFRES_FRAG);
+			goto done;
+		case IPPROTO_ROUTING: {
+			struct ip6_rthdr rthdr;
+
+			if (rh_cnt++) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: IPv6 more than one rthdr\n"));
+				action = PF_DROP;
+				REASON_SET(&reason, PFRES_IPOPTIONS);
+				log = 1;
+				goto done;
+			}
+			if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL,
+			    &reason, pd.af)) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: IPv6 short rthdr\n"));
+				action = PF_DROP;
+				REASON_SET(&reason, PFRES_SHORT);
+				log = 1;
+				goto done;
+			}
+			if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: IPv6 rthdr0\n"));
+				action = PF_DROP;
+				REASON_SET(&reason, PFRES_IPOPTIONS);
+				log = 1;
+				goto done;
+			}
+			/* FALLTHROUGH */
+		}
+		case IPPROTO_AH:
+		case IPPROTO_HOPOPTS:
+		case IPPROTO_DSTOPTS: {
+			/* get next header and header length */
+			struct ip6_ext	opt6;
+
+			if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6),
+			    NULL, &reason, pd.af)) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: IPv6 short opt\n"));
+				action = PF_DROP;
+				log = 1;
+				goto done;
+			}
+			if (pd.proto == IPPROTO_AH)
+				off += (opt6.ip6e_len + 2) * 4;
+			else
+				off += (opt6.ip6e_len + 1) * 8;
+			pd.proto = opt6.ip6e_nxt;
+			/* goto the next header */
+			break;
+		}
+		default:
+			terminal++;
+			break;
+		}
+	} while (!terminal);
+
+	/* if there's no routing header, use unmodified mbuf for checksumming */
+	if (!n)
+		n = m;
+
+	switch (pd.proto) {
+
+	case IPPROTO_TCP: {
+		struct tcphdr	th;
+
+		pd.hdr.tcp = &th;
+		if (!pf_pull_hdr(m, off, &th, sizeof(th),
+		    &action, &reason, AF_INET6)) {
+			log = action != PF_PASS;
+			goto done;
+		}
+		pd.p_len = pd.tot_len - off - (th.th_off << 2);
+		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
+		if (action == PF_DROP)
+			goto done;
+		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
+		    &reason);
+		if (action == PF_PASS) {
+			if (pfsync_update_state_ptr != NULL)
+				pfsync_update_state_ptr(s);
+			r = s->rule.ptr;
+			a = s->anchor.ptr;
+			log = s->log;
+		} else if (s == NULL)
+			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
+			    &a, &ruleset, inp);
+		break;
+	}
+
+	case IPPROTO_UDP: {
+		struct udphdr	uh;
+
+		pd.hdr.udp = &uh;
+		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
+		    &action, &reason, AF_INET6)) {
+			log = action != PF_PASS;
+			goto done;
+		}
+		if (uh.uh_dport == 0 ||
+		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
+		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
+			action = PF_DROP;
+			REASON_SET(&reason, PFRES_SHORT);
+			goto done;
+		}
+		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
+		if (action == PF_PASS) {
+			if (pfsync_update_state_ptr != NULL)
+				pfsync_update_state_ptr(s);
+			r = s->rule.ptr;
+			a = s->anchor.ptr;
+			log = s->log;
+		} else if (s == NULL)
+			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
+			    &a, &ruleset, inp);
+		break;
+	}
+
+	case IPPROTO_ICMP: {
+		action = PF_DROP;
+		DPFPRINTF(PF_DEBUG_MISC,
+		    ("pf: dropping IPv6 packet with ICMPv4 payload\n"));
+		goto done;
+	}
+
+	case IPPROTO_ICMPV6: {
+		struct icmp6_hdr	ih;
+
+		pd.hdr.icmp6 = &ih;
+		if (!pf_pull_hdr(m, off, &ih, sizeof(ih),
+		    &action, &reason, AF_INET6)) {
+			log = action != PF_PASS;
+			goto done;
+		}
+		action = pf_test_state_icmp(&s, dir, kif,
+		    m, off, h, &pd, &reason);
+		if (action == PF_PASS) {
+			if (pfsync_update_state_ptr != NULL)
+				pfsync_update_state_ptr(s);
+			r = s->rule.ptr;
+			a = s->anchor.ptr;
+			log = s->log;
+		} else if (s == NULL)
+			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
+			    &a, &ruleset, inp);
+		break;
+	}
+
+	default:
+		action = pf_test_state_other(&s, dir, kif, m, &pd);
+		if (action == PF_PASS) {
+			if (pfsync_update_state_ptr != NULL)
+				pfsync_update_state_ptr(s);
+			r = s->rule.ptr;
+			a = s->anchor.ptr;
+			log = s->log;
+		} else if (s == NULL)
+			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
+			    &a, &ruleset, inp);
+		break;
+	}
+
+done:
+	PF_RULES_RUNLOCK();
+	if (n != m) {
+		m_freem(n);
+		n = NULL;
+	}
+
+	/* handle dangerous IPv6 extension headers. */
+	if (action == PF_PASS && rh_cnt &&
+	    !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) {
+		action = PF_DROP;
+		REASON_SET(&reason, PFRES_IPOPTIONS);
+		log = r->log;
+		DPFPRINTF(PF_DEBUG_MISC,
+		    ("pf: dropping packet with dangerous v6 headers\n"));
+	}
+
+	if (s && s->tag > 0 && pf_tag_packet(m, &pd, s->tag)) {
+		action = PF_DROP;
+		REASON_SET(&reason, PFRES_MEMORY);
+	}
+	if (r->rtableid >= 0)
+		M_SETFIB(m, r->rtableid);
+
+	if (r->scrub_flags & PFSTATE_SETPRIO) {
+		if (pd.tos & IPTOS_LOWDELAY)
+			pqid = 1;
+		if (pf_ieee8021q_setpcp(m, r->set_prio[pqid])) {
+			action = PF_DROP;
+			REASON_SET(&reason, PFRES_MEMORY);
+			log = 1;
+			DPFPRINTF(PF_DEBUG_MISC,
+			    ("pf: failed to allocate 802.1q mtag\n"));
+		}
+	}
+
+#ifdef ALTQ
+	if (action == PF_PASS && r->qid) {
+		if (pd.pf_mtag == NULL &&
+		    ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
+			action = PF_DROP;
+			REASON_SET(&reason, PFRES_MEMORY);
+		} else {
+			if (s != NULL)
+				pd.pf_mtag->qid_hash = pf_state_hash(s);
+			if (pd.tos & IPTOS_LOWDELAY)
+				pd.pf_mtag->qid = r->pqid;
+			else
+				pd.pf_mtag->qid = r->qid;
+			/* Add hints for ecn. */
+			pd.pf_mtag->hdr = h;
+		}
+	}
+#endif /* ALTQ */
+
+	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
+	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
+	    (s->nat_rule.ptr->action == PF_RDR ||
+	    s->nat_rule.ptr->action == PF_BINAT) &&
+	    IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))
+		m->m_flags |= M_SKIP_FIREWALL;
+
+	/* XXX: Anybody working on it?! */
+	if (r->divert.port)
+		printf("pf: divert(9) is not supported for IPv6\n");
+
+	if (log) {
+		struct pf_rule *lr;
+
+		if (s != NULL && s->nat_rule.ptr != NULL &&
+		    s->nat_rule.ptr->log & PF_LOG_ALL)
+			lr = s->nat_rule.ptr;
+		else
+			lr = r;
+		PFLOG_PACKET(kif, m, AF_INET6, dir, reason, lr, a, ruleset,
+		    &pd, (s == NULL));
+	}
+
+	kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
+	kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
+
+	if (action == PF_PASS || r->action == PF_DROP) {
+		dirndx = (dir == PF_OUT);
+		r->packets[dirndx]++;
+		r->bytes[dirndx] += pd.tot_len;
+		if (a != NULL) {
+			a->packets[dirndx]++;
+			a->bytes[dirndx] += pd.tot_len;
+		}
+		if (s != NULL) {
+			if (s->nat_rule.ptr != NULL) {
+				s->nat_rule.ptr->packets[dirndx]++;
+				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
+			}
+			if (s->src_node != NULL) {
+				s->src_node->packets[dirndx]++;
+				s->src_node->bytes[dirndx] += pd.tot_len;
+			}
+			if (s->nat_src_node != NULL) {
+				s->nat_src_node->packets[dirndx]++;
+				s->nat_src_node->bytes[dirndx] += pd.tot_len;
+			}
+			dirndx = (dir == s->direction) ? 0 : 1;
+			s->packets[dirndx]++;
+			s->bytes[dirndx] += pd.tot_len;
+		}
+		tr = r;
+		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
+		if (nr != NULL && r == &V_pf_default_rule)
+			tr = nr;
+		if (tr->src.addr.type == PF_ADDR_TABLE)
+			pfr_update_stats(tr->src.addr.p.tbl,
+			    (s == NULL) ? pd.src :
+			    &s->key[(s->direction == PF_IN)]->addr[0],
+			    pd.af, pd.tot_len, dir == PF_OUT,
+			    r->action == PF_PASS, tr->src.neg);
+		if (tr->dst.addr.type == PF_ADDR_TABLE)
+			pfr_update_stats(tr->dst.addr.p.tbl,
+			    (s == NULL) ? pd.dst :
+			    &s->key[(s->direction == PF_IN)]->addr[1],
+			    pd.af, pd.tot_len, dir == PF_OUT,
+			    r->action == PF_PASS, tr->dst.neg);
+	}
+
+	switch (action) {
+	case PF_SYNPROXY_DROP:
+		m_freem(*m0);
+	case PF_DEFER:
+		*m0 = NULL;
+		action = PF_PASS;
+		break;
+	case PF_DROP:
+		m_freem(*m0);
+		*m0 = NULL;
+		break;
+	default:
+		/* pf_route6() returns unlocked. */
+		if (r->rt) {
+			pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd);
+			return (action);
+		}
+		break;
+	}
+
+	if (s)
+		PF_STATE_UNLOCK(s);
+
+	/* If reassembled packet passed, create new fragments. */
+	if (action == PF_PASS && *m0 && fwdir == PF_FWD &&
+	    (mtag = m_tag_find(m, PF_REASSEMBLED, NULL)) != NULL)
+		action = pf_refragment6(ifp, m0, mtag);
+
+	return (action);
+}
+#endif /* INET6 */
diff --git a/freebsd/sys/netpfil/pf/pf.h b/freebsd/sys/netpfil/pf/pf.h
new file mode 100644
index 00000000..ac0e0fb9
--- /dev/null
+++ b/freebsd/sys/netpfil/pf/pf.h
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2001 Daniel Hartmeier
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    - Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    - Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ *	$OpenBSD: pfvar.h,v 1.282 2009/01/29 15:12:28 pyr Exp $
+ *	$FreeBSD$
+ */
+
+#ifndef	_NET_PF_H_
+#define	_NET_PF_H_
+
+#define	PF_TCPS_PROXY_SRC	((TCP_NSTATES)+0)
+#define	PF_TCPS_PROXY_DST	((TCP_NSTATES)+1)
+
+#define	PF_MD5_DIGEST_LENGTH	16
+#ifdef MD5_DIGEST_LENGTH
+#if PF_MD5_DIGEST_LENGTH != MD5_DIGEST_LENGTH
+#error
+#endif
+#endif
+
+enum	{ PF_INOUT, PF_IN, PF_OUT, PF_FWD };
+enum	{ PF_PASS, PF_DROP, PF_SCRUB, PF_NOSCRUB, PF_NAT, PF_NONAT,
+	  PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP, PF_DEFER };
+enum	{ PF_RULESET_SCRUB, PF_RULESET_FILTER, PF_RULESET_NAT,
+	  PF_RULESET_BINAT, PF_RULESET_RDR, PF_RULESET_MAX };
+enum	{ PF_OP_NONE, PF_OP_IRG, PF_OP_EQ, PF_OP_NE, PF_OP_LT,
+	  PF_OP_LE, PF_OP_GT, PF_OP_GE, PF_OP_XRG, PF_OP_RRG };
+enum	{ PF_DEBUG_NONE, PF_DEBUG_URGENT, PF_DEBUG_MISC, PF_DEBUG_NOISY };
+enum	{ PF_CHANGE_NONE, PF_CHANGE_ADD_HEAD, PF_CHANGE_ADD_TAIL,
+	  PF_CHANGE_ADD_BEFORE, PF_CHANGE_ADD_AFTER,
+	  PF_CHANGE_REMOVE, PF_CHANGE_GET_TICKET };
+enum	{ PF_GET_NONE, PF_GET_CLR_CNTR };
+enum	{ PF_SK_WIRE, PF_SK_STACK, PF_SK_BOTH };
+
+/*
+ * Note about PFTM_*: real indices into pf_rule.timeout[] come before
+ * PFTM_MAX, special cases afterwards. See pf_state_expires().
+ */
+enum	{ PFTM_TCP_FIRST_PACKET, PFTM_TCP_OPENING, PFTM_TCP_ESTABLISHED,
+	  PFTM_TCP_CLOSING, PFTM_TCP_FIN_WAIT, PFTM_TCP_CLOSED,
+	  PFTM_UDP_FIRST_PACKET, PFTM_UDP_SINGLE, PFTM_UDP_MULTIPLE,
+	  PFTM_ICMP_FIRST_PACKET, PFTM_ICMP_ERROR_REPLY,
+	  PFTM_OTHER_FIRST_PACKET, PFTM_OTHER_SINGLE,
+	  PFTM_OTHER_MULTIPLE, PFTM_FRAG, PFTM_INTERVAL,
+	  PFTM_ADAPTIVE_START, PFTM_ADAPTIVE_END, PFTM_SRC_NODE,
+	  PFTM_TS_DIFF, PFTM_MAX, PFTM_PURGE, PFTM_UNLINKED };
+
+/* PFTM default values */
+#define PFTM_TCP_FIRST_PACKET_VAL	120	/* First TCP packet */
+#define PFTM_TCP_OPENING_VAL		30	/* No response yet */
+#define PFTM_TCP_ESTABLISHED_VAL	24*60*60/* Established */
+#define PFTM_TCP_CLOSING_VAL		15 * 60	/* Half closed */
+#define PFTM_TCP_FIN_WAIT_VAL		45	/* Got both FINs */
+#define PFTM_TCP_CLOSED_VAL		90	/* Got a RST */
+#define PFTM_UDP_FIRST_PACKET_VAL	60	/* First UDP packet */
+#define PFTM_UDP_SINGLE_VAL		30	/* Unidirectional */
+#define PFTM_UDP_MULTIPLE_VAL		60	/* Bidirectional */
+#define PFTM_ICMP_FIRST_PACKET_VAL	20	/* First ICMP packet */
+#define PFTM_ICMP_ERROR_REPLY_VAL	10	/* Got error response */
+#define PFTM_OTHER_FIRST_PACKET_VAL	60	/* First packet */
+#define PFTM_OTHER_SINGLE_VAL		30	/* Unidirectional */
+#define PFTM_OTHER_MULTIPLE_VAL		60	/* Bidirectional */
+#define PFTM_FRAG_VAL			30	/* Fragment expire */
+#define PFTM_INTERVAL_VAL		10	/* Expire interval */
+#define PFTM_SRC_NODE_VAL		0	/* Source tracking */
+#define PFTM_TS_DIFF_VAL		30	/* Allowed TS diff */
+
+enum	{ PF_NOPFROUTE, PF_FASTROUTE, PF_ROUTETO, PF_DUPTO, PF_REPLYTO };
+enum	{ PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS,
+	  PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_MAX };
+#define PF_POOL_IDMASK		0x0f
+enum	{ PF_POOL_NONE, PF_POOL_BITMASK, PF_POOL_RANDOM,
+	  PF_POOL_SRCHASH, PF_POOL_ROUNDROBIN };
+enum	{ PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL,
+	  PF_ADDR_TABLE, PF_ADDR_URPFFAILED,
+	  PF_ADDR_RANGE };
+#define PF_POOL_TYPEMASK	0x0f
+#define PF_POOL_STICKYADDR	0x20
+#define	PF_WSCALE_FLAG		0x80
+#define	PF_WSCALE_MASK		0x0f
+
+#define	PF_LOG			0x01
+#define	PF_LOG_ALL		0x02
+#define	PF_LOG_SOCKET_LOOKUP	0x04
+
+/* Reasons code for passing/dropping a packet */
+#define PFRES_MATCH	0		/* Explicit match of a rule */
+#define PFRES_BADOFF	1		/* Bad offset for pull_hdr */
+#define PFRES_FRAG	2		/* Dropping following fragment */
+#define PFRES_SHORT	3		/* Dropping short packet */
+#define PFRES_NORM	4		/* Dropping by normalizer */
+#define PFRES_MEMORY	5		/* Dropped due to lacking mem */
+#define PFRES_TS	6		/* Bad TCP Timestamp (RFC1323) */
+#define PFRES_CONGEST	7		/* Congestion (of ipintrq) */
+#define PFRES_IPOPTIONS 8		/* IP option */
+#define PFRES_PROTCKSUM 9		/* Protocol checksum invalid */
+#define PFRES_BADSTATE	10		/* State mismatch */
+#define PFRES_STATEINS	11		/* State insertion failure */
+#define PFRES_MAXSTATES	12		/* State limit */
+#define PFRES_SRCLIMIT	13		/* Source node/conn limit */
+#define PFRES_SYNPROXY	14		/* SYN proxy */
+#define PFRES_MAPFAILED	15		/* pf_map_addr() failed */
+#define PFRES_MAX	16		/* total+1 */
+
+#define PFRES_NAMES { \
+	"match", \
+	"bad-offset", \
+	"fragment", \
+	"short", \
+	"normalize", \
+	"memory", \
+	"bad-timestamp", \
+	"congestion", \
+	"ip-option", \
+	"proto-cksum", \
+	"state-mismatch", \
+	"state-insert", \
+	"state-limit", \
+	"src-limit", \
+	"synproxy", \
+	"map-failed", \
+	NULL \
+}
+
+/* Counters for other things we want to keep track of */
+#define LCNT_STATES		0	/* states */
+#define LCNT_SRCSTATES		1	/* max-src-states */
+#define LCNT_SRCNODES		2	/* max-src-nodes */
+#define LCNT_SRCCONN		3	/* max-src-conn */
+#define LCNT_SRCCONNRATE	4	/* max-src-conn-rate */
+#define LCNT_OVERLOAD_TABLE	5	/* entry added to overload table */
+#define LCNT_OVERLOAD_FLUSH	6	/* state entries flushed */
+#define LCNT_MAX		7	/* total+1 */
+
+#define LCNT_NAMES { \
+	"max states per rule", \
+	"max-src-states", \
+	"max-src-nodes", \
+	"max-src-conn", \
+	"max-src-conn-rate", \
+	"overload table insertion", \
+	"overload flush states", \
+	NULL \
+}
+
+/* state operation counters */
+#define FCNT_STATE_SEARCH	0
+#define FCNT_STATE_INSERT	1
+#define FCNT_STATE_REMOVALS	2
+#define FCNT_MAX		3
+
+/* src_node operation counters */
+#define SCNT_SRC_NODE_SEARCH	0
+#define SCNT_SRC_NODE_INSERT	1
+#define SCNT_SRC_NODE_REMOVALS	2
+#define SCNT_MAX		3
+
+#define	PF_TABLE_NAME_SIZE	32
+#define	PF_QNAME_SIZE		64
+
+struct pf_status {
+	uint64_t	counters[PFRES_MAX];
+	uint64_t	lcounters[LCNT_MAX];
+	uint64_t	fcounters[FCNT_MAX];
+	uint64_t	scounters[SCNT_MAX];
+	uint64_t	pcounters[2][2][3];
+	uint64_t	bcounters[2][2];
+	uint32_t	running;
+	uint32_t	states;
+	uint32_t	src_nodes;
+	uint32_t	since;
+	uint32_t	debug;
+	uint32_t	hostid;
+	char		ifname[IFNAMSIZ];
+	uint8_t		pf_chksum[PF_MD5_DIGEST_LENGTH];
+};
+
+#endif	/* _NET_PF_H_ */
diff --git a/freebsd/sys/netpfil/pf/pf_altq.h b/freebsd/sys/netpfil/pf/pf_altq.h
new file mode 100644
index 00000000..3efd4ff7
--- /dev/null
+++ b/freebsd/sys/netpfil/pf/pf_altq.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2001 Daniel Hartmeier
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    - Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    - Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ *	$OpenBSD: pfvar.h,v 1.282 2009/01/29 15:12:28 pyr Exp $
+ *	$FreeBSD$
+ */
+
+#ifndef	_NET_PF_ALTQ_H_
+#define	_NET_PF_ALTQ_H_
+
+struct cbq_opts {
+	u_int		minburst;
+	u_int		maxburst;
+	u_int		pktsize;
+	u_int		maxpktsize;
+	u_int		ns_per_byte;
+	u_int		maxidle;
+	int		minidle;
+	u_int		offtime;
+	int		flags;
+};
+
+struct codel_opts {
+	u_int		target;
+	u_int		interval;
+	int		ecn;
+};
+
+struct priq_opts {
+	int		flags;
+};
+
+struct hfsc_opts {
+	/* real-time service curve */
+	u_int		rtsc_m1;	/* slope of the 1st segment in bps */
+	u_int		rtsc_d;		/* the x-projection of m1 in msec */
+	u_int		rtsc_m2;	/* slope of the 2nd segment in bps */
+	/* link-sharing service curve */
+	u_int		lssc_m1;
+	u_int		lssc_d;
+	u_int		lssc_m2;
+	/* upper-limit service curve */
+	u_int		ulsc_m1;
+	u_int		ulsc_d;
+	u_int		ulsc_m2;
+	int		flags;
+};
+
+/*
+ * XXX this needs some work
+ */
+struct fairq_opts {
+	u_int           nbuckets;
+	u_int           hogs_m1;
+	int             flags;
+
+	/* link sharing service curve */
+	u_int           lssc_m1;
+	u_int           lssc_d;
+	u_int           lssc_m2;
+};
+
+struct pf_altq {
+	char			 ifname[IFNAMSIZ];
+
+	void			*altq_disc;	/* discipline-specific state */
+	TAILQ_ENTRY(pf_altq)	 entries;
+
+	/* scheduler spec */
+	uint8_t			 scheduler;	/* scheduler type */
+	uint16_t		 tbrsize;	/* tokenbucket regulator size */
+	uint32_t		 ifbandwidth;	/* interface bandwidth */
+
+	/* queue spec */
+	char			 qname[PF_QNAME_SIZE];	/* queue name */
+	char			 parent[PF_QNAME_SIZE];	/* parent name */
+	uint32_t		 parent_qid;	/* parent queue id */
+	uint32_t		 bandwidth;	/* queue bandwidth */
+	uint8_t			 priority;	/* priority */
+	uint8_t			 local_flags;	/* dynamic interface */
+#define	PFALTQ_FLAG_IF_REMOVED		0x01
+
+	uint16_t		 qlimit;	/* queue size limit */
+	uint16_t		 flags;		/* misc flags */
+	union {
+		struct cbq_opts		 cbq_opts;
+		struct codel_opts	 codel_opts;
+		struct priq_opts	 priq_opts;
+		struct hfsc_opts	 hfsc_opts;
+		struct fairq_opts        fairq_opts;
+	} pq_u;
+
+	uint32_t		 qid;		/* return value */
+};
+
+#endif	/* _NET_PF_ALTQ_H_ */
diff --git a/freebsd/sys/netpfil/pf/pf_if.c b/freebsd/sys/netpfil/pf/pf_if.c
new file mode 100644
index 00000000..d1c54b22
--- /dev/null
+++ b/freebsd/sys/netpfil/pf/pf_if.c
@@ -0,0 +1,924 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2001 Daniel Hartmeier
+ * Copyright (c) 2003 Cedric Berger
+ * Copyright (c) 2005 Henning Brauer <henning@openbsd.org>
+ * Copyright (c) 2005 Ryan McBride <mcbride@openbsd.org>
+ * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    - Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    - Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ *	$OpenBSD: pf_if.c,v 1.54 2008/06/14 16:55:28 mk Exp $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/kernel.h>
+#include <sys/eventhandler.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/vnet.h>
+#include <net/pfvar.h>
+#include <net/route.h>
+
+VNET_DEFINE(struct pfi_kif *,	 pfi_all);
+static VNET_DEFINE(long, pfi_update);
+#define	V_pfi_update	VNET(pfi_update)
+#define PFI_BUFFER_MAX	0x10000
+
+VNET_DECLARE(int, pf_vnet_active);
+#define V_pf_vnet_active	VNET(pf_vnet_active)
+
+static VNET_DEFINE(struct pfr_addr *, pfi_buffer);
+static VNET_DEFINE(int, pfi_buffer_cnt);
+static VNET_DEFINE(int,	pfi_buffer_max);
+#define	V_pfi_buffer		 VNET(pfi_buffer)
+#define	V_pfi_buffer_cnt	 VNET(pfi_buffer_cnt)
+#define	V_pfi_buffer_max	 VNET(pfi_buffer_max)
+
+eventhandler_tag	 pfi_attach_cookie;
+eventhandler_tag	 pfi_detach_cookie;
+eventhandler_tag	 pfi_attach_group_cookie;
+eventhandler_tag	 pfi_change_group_cookie;
+eventhandler_tag	 pfi_detach_group_cookie;
+eventhandler_tag	 pfi_ifaddr_event_cookie;
+
+static void	 pfi_attach_ifnet(struct ifnet *);
+static void	 pfi_attach_ifgroup(struct ifg_group *);
+
+static void	 pfi_kif_update(struct pfi_kif *);
+static void	 pfi_dynaddr_update(struct pfi_dynaddr *dyn);
+static void	 pfi_table_update(struct pfr_ktable *, struct pfi_kif *, int,
+		    int);
+static void	 pfi_instance_add(struct ifnet *, int, int);
+static void	 pfi_address_add(struct sockaddr *, int, int);
+static int	 pfi_if_compare(struct pfi_kif *, struct pfi_kif *);
+static int	 pfi_skip_if(const char *, struct pfi_kif *);
+static int	 pfi_unmask(void *);
+static void	 pfi_attach_ifnet_event(void * __unused, struct ifnet *);
+static void	 pfi_detach_ifnet_event(void * __unused, struct ifnet *);
+static void	 pfi_attach_group_event(void *, struct ifg_group *);
+static void	 pfi_change_group_event(void *, char *);
+static void	 pfi_detach_group_event(void *, struct ifg_group *);
+static void	 pfi_ifaddr_event(void * __unused, struct ifnet *);
+
+RB_HEAD(pfi_ifhead, pfi_kif);
+static RB_PROTOTYPE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare);
+static RB_GENERATE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare);
+static VNET_DEFINE(struct pfi_ifhead, pfi_ifs);
+#define	V_pfi_ifs	VNET(pfi_ifs)
+
+#define	PFI_BUFFER_MAX		0x10000
+MALLOC_DEFINE(PFI_MTYPE, "pf_ifnet", "pf(4) interface database");
+
+LIST_HEAD(pfi_list, pfi_kif);
+static VNET_DEFINE(struct pfi_list, pfi_unlinked_kifs);
+#define	V_pfi_unlinked_kifs	VNET(pfi_unlinked_kifs)
+static struct mtx pfi_unlnkdkifs_mtx;
+MTX_SYSINIT(pfi_unlnkdkifs_mtx, &pfi_unlnkdkifs_mtx, "pf unlinked interfaces",
+    MTX_DEF);
+
+void
+pfi_initialize_vnet(void)
+{
+	struct ifg_group *ifg;
+	struct ifnet *ifp;
+	struct pfi_kif *kif;
+
+	V_pfi_buffer_max = 64;
+	V_pfi_buffer = malloc(V_pfi_buffer_max * sizeof(*V_pfi_buffer),
+	    PFI_MTYPE, M_WAITOK);
+
+	kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
+	PF_RULES_WLOCK();
+	V_pfi_all = pfi_kif_attach(kif, IFG_ALL);
+	PF_RULES_WUNLOCK();
+
+	IFNET_RLOCK();
+	TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
+		pfi_attach_ifgroup(ifg);
+	TAILQ_FOREACH(ifp, &V_ifnet, if_link)
+		pfi_attach_ifnet(ifp);
+	IFNET_RUNLOCK();
+}
+
+void
+pfi_initialize(void)
+{
+
+	pfi_attach_cookie = EVENTHANDLER_REGISTER(ifnet_arrival_event,
+	    pfi_attach_ifnet_event, NULL, EVENTHANDLER_PRI_ANY);
+	pfi_detach_cookie = EVENTHANDLER_REGISTER(ifnet_departure_event,
+	    pfi_detach_ifnet_event, NULL, EVENTHANDLER_PRI_ANY);
+	pfi_attach_group_cookie = EVENTHANDLER_REGISTER(group_attach_event,
+	    pfi_attach_group_event, curvnet, EVENTHANDLER_PRI_ANY);
+	pfi_change_group_cookie = EVENTHANDLER_REGISTER(group_change_event,
+	    pfi_change_group_event, curvnet, EVENTHANDLER_PRI_ANY);
+	pfi_detach_group_cookie = EVENTHANDLER_REGISTER(group_detach_event,
+	    pfi_detach_group_event, curvnet, EVENTHANDLER_PRI_ANY);
+	pfi_ifaddr_event_cookie = EVENTHANDLER_REGISTER(ifaddr_event,
+	    pfi_ifaddr_event, NULL, EVENTHANDLER_PRI_ANY);
+}
+
+void
+pfi_cleanup_vnet(void)
+{
+	struct pfi_kif *kif;
+
+	PF_RULES_WASSERT();
+
+	V_pfi_all = NULL;
+	while ((kif = RB_MIN(pfi_ifhead, &V_pfi_ifs))) {
+		RB_REMOVE(pfi_ifhead, &V_pfi_ifs, kif);
+		if (kif->pfik_group)
+			kif->pfik_group->ifg_pf_kif = NULL;
+		if (kif->pfik_ifp)
+			kif->pfik_ifp->if_pf_kif = NULL;
+		free(kif, PFI_MTYPE);
+	}
+
+	mtx_lock(&pfi_unlnkdkifs_mtx);
+	while ((kif = LIST_FIRST(&V_pfi_unlinked_kifs))) {
+		LIST_REMOVE(kif, pfik_list);
+		free(kif, PFI_MTYPE);
+	}
+	mtx_unlock(&pfi_unlnkdkifs_mtx);
+
+	free(V_pfi_buffer, PFI_MTYPE);
+}
+
+void
+pfi_cleanup(void)
+{
+
+	EVENTHANDLER_DEREGISTER(ifnet_arrival_event, pfi_attach_cookie);
+	EVENTHANDLER_DEREGISTER(ifnet_departure_event, pfi_detach_cookie);
+	EVENTHANDLER_DEREGISTER(group_attach_event, pfi_attach_group_cookie);
+	EVENTHANDLER_DEREGISTER(group_change_event, pfi_change_group_cookie);
+	EVENTHANDLER_DEREGISTER(group_detach_event, pfi_detach_group_cookie);
+	EVENTHANDLER_DEREGISTER(ifaddr_event, pfi_ifaddr_event_cookie);
+}
+
+struct pfi_kif *
+pfi_kif_find(const char *kif_name)
+{
+	struct pfi_kif_cmp s;
+
+	PF_RULES_ASSERT();
+
+	bzero(&s, sizeof(s));
+	strlcpy(s.pfik_name, kif_name, sizeof(s.pfik_name));
+
+	return (RB_FIND(pfi_ifhead, &V_pfi_ifs, (struct pfi_kif *)&s));
+}
+
+struct pfi_kif *
+pfi_kif_attach(struct pfi_kif *kif, const char *kif_name)
+{
+	struct pfi_kif *kif1;
+
+	PF_RULES_WASSERT();
+	KASSERT(kif != NULL, ("%s: null kif", __func__));
+
+	kif1 = pfi_kif_find(kif_name);
+	if (kif1 != NULL) {
+		free(kif, PFI_MTYPE);
+		return (kif1);
+	}
+
+	bzero(kif, sizeof(*kif));
+	strlcpy(kif->pfik_name, kif_name, sizeof(kif->pfik_name));
+	/*
+	 * It seems that the value of time_second is in unintialzied state
+	 * when pf sets interface statistics clear time in boot phase if pf
+	 * was statically linked to kernel. Instead of setting the bogus
+	 * time value have pfi_get_ifaces handle this case. In
+	 * pfi_get_ifaces it uses time_second if it sees the time is 0.
+	 */
+	kif->pfik_tzero = time_second > 1 ? time_second : 0;
+	TAILQ_INIT(&kif->pfik_dynaddrs);
+
+	RB_INSERT(pfi_ifhead, &V_pfi_ifs, kif);
+
+	return (kif);
+}
+
+void
+pfi_kif_ref(struct pfi_kif *kif)
+{
+
+	PF_RULES_WASSERT();
+	kif->pfik_rulerefs++;
+}
+
+void
+pfi_kif_unref(struct pfi_kif *kif)
+{
+
+	PF_RULES_WASSERT();
+	KASSERT(kif->pfik_rulerefs > 0, ("%s: %p has zero refs", __func__, kif));
+
+	kif->pfik_rulerefs--;
+
+	if (kif->pfik_rulerefs > 0)
+		return;
+
+	/* kif referencing an existing ifnet or group should exist. */
+	if (kif->pfik_ifp != NULL || kif->pfik_group != NULL || kif == V_pfi_all)
+		return;
+
+	RB_REMOVE(pfi_ifhead, &V_pfi_ifs, kif);
+
+	kif->pfik_flags |= PFI_IFLAG_REFS;
+
+	mtx_lock(&pfi_unlnkdkifs_mtx);
+	LIST_INSERT_HEAD(&V_pfi_unlinked_kifs, kif, pfik_list);
+	mtx_unlock(&pfi_unlnkdkifs_mtx);
+}
+
+void
+pfi_kif_purge(void)
+{
+	struct pfi_kif *kif, *kif1;
+
+	/*
+	 * Do naive mark-and-sweep garbage collecting of old kifs.
+	 * Reference flag is raised by pf_purge_expired_states().
+	 */
+	mtx_lock(&pfi_unlnkdkifs_mtx);
+	LIST_FOREACH_SAFE(kif, &V_pfi_unlinked_kifs, pfik_list, kif1) {
+		if (!(kif->pfik_flags & PFI_IFLAG_REFS)) {
+			LIST_REMOVE(kif, pfik_list);
+			free(kif, PFI_MTYPE);
+		} else
+			kif->pfik_flags &= ~PFI_IFLAG_REFS;
+	}
+	mtx_unlock(&pfi_unlnkdkifs_mtx);
+}
+
+int
+pfi_kif_match(struct pfi_kif *rule_kif, struct pfi_kif *packet_kif)
+{
+	struct ifg_list	*p;
+
+	if (rule_kif == NULL || rule_kif == packet_kif)
+		return (1);
+
+	if (rule_kif->pfik_group != NULL)
+		/* XXXGL: locking? */
+		TAILQ_FOREACH(p, &packet_kif->pfik_ifp->if_groups, ifgl_next)
+			if (p->ifgl_group == rule_kif->pfik_group)
+				return (1);
+
+	return (0);
+}
+
+static void
+pfi_attach_ifnet(struct ifnet *ifp)
+{
+	struct pfi_kif *kif;
+
+	kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
+
+	PF_RULES_WLOCK();
+	V_pfi_update++;
+	kif = pfi_kif_attach(kif, ifp->if_xname);
+
+	kif->pfik_ifp = ifp;
+	ifp->if_pf_kif = kif;
+
+	pfi_kif_update(kif);
+	PF_RULES_WUNLOCK();
+}
+
+static void
+pfi_attach_ifgroup(struct ifg_group *ifg)
+{
+	struct pfi_kif *kif;
+
+	kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
+
+	PF_RULES_WLOCK();
+	V_pfi_update++;
+	kif = pfi_kif_attach(kif, ifg->ifg_group);
+
+	kif->pfik_group = ifg;
+	ifg->ifg_pf_kif = kif;
+	PF_RULES_WUNLOCK();
+}
+
+int
+pfi_match_addr(struct pfi_dynaddr *dyn, struct pf_addr *a, sa_family_t af)
+{
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		switch (dyn->pfid_acnt4) {
+		case 0:
+			return (0);
+		case 1:
+			return (PF_MATCHA(0, &dyn->pfid_addr4,
+			    &dyn->pfid_mask4, a, AF_INET));
+		default:
+			return (pfr_match_addr(dyn->pfid_kt, a, AF_INET));
+		}
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		switch (dyn->pfid_acnt6) {
+		case 0:
+			return (0);
+		case 1:
+			return (PF_MATCHA(0, &dyn->pfid_addr6,
+			    &dyn->pfid_mask6, a, AF_INET6));
+		default:
+			return (pfr_match_addr(dyn->pfid_kt, a, AF_INET6));
+		}
+		break;
+#endif /* INET6 */
+	default:
+		return (0);
+	}
+}
+
+int
+pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af)
+{
+	struct pfi_dynaddr	*dyn;
+	char			 tblname[PF_TABLE_NAME_SIZE];
+	struct pf_ruleset	*ruleset = NULL;
+	struct pfi_kif		*kif;
+	int			 rv = 0;
+
+	PF_RULES_WASSERT();
+	KASSERT(aw->type == PF_ADDR_DYNIFTL, ("%s: type %u",
+	    __func__, aw->type));
+	KASSERT(aw->p.dyn == NULL, ("%s: dyn is %p", __func__, aw->p.dyn));
+
+	if ((dyn = malloc(sizeof(*dyn), PFI_MTYPE, M_NOWAIT | M_ZERO)) == NULL)
+		return (ENOMEM);
+
+	if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_NOWAIT)) == NULL) {
+		free(dyn, PFI_MTYPE);
+		return (ENOMEM);
+	}
+
+	if (!strcmp(aw->v.ifname, "self"))
+		dyn->pfid_kif = pfi_kif_attach(kif, IFG_ALL);
+	else
+		dyn->pfid_kif = pfi_kif_attach(kif, aw->v.ifname);
+	pfi_kif_ref(dyn->pfid_kif);
+
+	dyn->pfid_net = pfi_unmask(&aw->v.a.mask);
+	if (af == AF_INET && dyn->pfid_net == 32)
+		dyn->pfid_net = 128;
+	strlcpy(tblname, aw->v.ifname, sizeof(tblname));
+	if (aw->iflags & PFI_AFLAG_NETWORK)
+		strlcat(tblname, ":network", sizeof(tblname));
+	if (aw->iflags & PFI_AFLAG_BROADCAST)
+		strlcat(tblname, ":broadcast", sizeof(tblname));
+	if (aw->iflags & PFI_AFLAG_PEER)
+		strlcat(tblname, ":peer", sizeof(tblname));
+	if (aw->iflags & PFI_AFLAG_NOALIAS)
+		strlcat(tblname, ":0", sizeof(tblname));
+	if (dyn->pfid_net != 128)
+		snprintf(tblname + strlen(tblname),
+		    sizeof(tblname) - strlen(tblname), "/%d", dyn->pfid_net);
+	if ((ruleset = pf_find_or_create_ruleset(PF_RESERVED_ANCHOR)) == NULL) {
+		rv = ENOMEM;
+		goto _bad;
+	}
+
+	if ((dyn->pfid_kt = pfr_attach_table(ruleset, tblname)) == NULL) {
+		rv = ENOMEM;
+		goto _bad;
+	}
+
+	dyn->pfid_kt->pfrkt_flags |= PFR_TFLAG_ACTIVE;
+	dyn->pfid_iflags = aw->iflags;
+	dyn->pfid_af = af;
+
+	TAILQ_INSERT_TAIL(&dyn->pfid_kif->pfik_dynaddrs, dyn, entry);
+	aw->p.dyn = dyn;
+	pfi_kif_update(dyn->pfid_kif);
+
+	return (0);
+
+_bad:
+	if (dyn->pfid_kt != NULL)
+		pfr_detach_table(dyn->pfid_kt);
+	if (ruleset != NULL)
+		pf_remove_if_empty_ruleset(ruleset);
+	if (dyn->pfid_kif != NULL)
+		pfi_kif_unref(dyn->pfid_kif);
+	free(dyn, PFI_MTYPE);
+
+	return (rv);
+}
+
+static void
+pfi_kif_update(struct pfi_kif *kif)
+{
+	struct ifg_list		*ifgl;
+	struct pfi_dynaddr	*p;
+
+	PF_RULES_WASSERT();
+
+	/* update all dynaddr */
+	TAILQ_FOREACH(p, &kif->pfik_dynaddrs, entry)
+		pfi_dynaddr_update(p);
+
+	/* again for all groups kif is member of */
+	if (kif->pfik_ifp != NULL) {
+		IF_ADDR_RLOCK(kif->pfik_ifp);
+		TAILQ_FOREACH(ifgl, &kif->pfik_ifp->if_groups, ifgl_next)
+			pfi_kif_update((struct pfi_kif *)
+			    ifgl->ifgl_group->ifg_pf_kif);
+		IF_ADDR_RUNLOCK(kif->pfik_ifp);
+	}
+}
+
+static void
+pfi_dynaddr_update(struct pfi_dynaddr *dyn)
+{
+	struct pfi_kif		*kif;
+	struct pfr_ktable	*kt;
+
+	PF_RULES_WASSERT();
+	KASSERT(dyn && dyn->pfid_kif && dyn->pfid_kt,
+	    ("%s: bad argument", __func__));
+
+	kif = dyn->pfid_kif;
+	kt = dyn->pfid_kt;
+
+	if (kt->pfrkt_larg != V_pfi_update) {
+		/* this table needs to be brought up-to-date */
+		pfi_table_update(kt, kif, dyn->pfid_net, dyn->pfid_iflags);
+		kt->pfrkt_larg = V_pfi_update;
+	}
+	pfr_dynaddr_update(kt, dyn);
+}
+
+static void
+pfi_table_update(struct pfr_ktable *kt, struct pfi_kif *kif, int net, int flags)
+{
+	int			 e, size2 = 0;
+	struct ifg_member	*ifgm;
+
+	V_pfi_buffer_cnt = 0;
+
+	if (kif->pfik_ifp != NULL)
+		pfi_instance_add(kif->pfik_ifp, net, flags);
+	else if (kif->pfik_group != NULL) {
+		IFNET_RLOCK_NOSLEEP();
+		TAILQ_FOREACH(ifgm, &kif->pfik_group->ifg_members, ifgm_next)
+			pfi_instance_add(ifgm->ifgm_ifp, net, flags);
+		IFNET_RUNLOCK_NOSLEEP();
+	}
+
+	if ((e = pfr_set_addrs(&kt->pfrkt_t, V_pfi_buffer, V_pfi_buffer_cnt, &size2,
+	    NULL, NULL, NULL, 0, PFR_TFLAG_ALLMASK)))
+		printf("%s: cannot set %d new addresses into table %s: %d\n",
+		    __func__, V_pfi_buffer_cnt, kt->pfrkt_name, e);
+}
+
+static void
+pfi_instance_add(struct ifnet *ifp, int net, int flags)
+{
+	struct ifaddr	*ia;
+	int		 got4 = 0, got6 = 0;
+	int		 net2, af;
+
+	IF_ADDR_RLOCK(ifp);
+	TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_list) {
+		if (ia->ifa_addr == NULL)
+			continue;
+		af = ia->ifa_addr->sa_family;
+		if (af != AF_INET && af != AF_INET6)
+			continue;
+		/*
+		 * XXX: For point-to-point interfaces, (ifname:0) and IPv4,
+		 *      jump over addresses without a proper route to work
+		 *      around a problem with ppp not fully removing the
+		 *      address used during IPCP.
+		 */
+		if ((ifp->if_flags & IFF_POINTOPOINT) &&
+		    !(ia->ifa_flags & IFA_ROUTE) &&
+		    (flags & PFI_AFLAG_NOALIAS) && (af == AF_INET))
+			continue;
+		if ((flags & PFI_AFLAG_BROADCAST) && af == AF_INET6)
+			continue;
+		if ((flags & PFI_AFLAG_BROADCAST) &&
+		    !(ifp->if_flags & IFF_BROADCAST))
+			continue;
+		if ((flags & PFI_AFLAG_PEER) &&
+		    !(ifp->if_flags & IFF_POINTOPOINT))
+			continue;
+		if ((flags & PFI_AFLAG_NETWORK) && af == AF_INET6 &&
+		    IN6_IS_ADDR_LINKLOCAL(
+		    &((struct sockaddr_in6 *)ia->ifa_addr)->sin6_addr))
+			continue;
+		if (flags & PFI_AFLAG_NOALIAS) {
+			if (af == AF_INET && got4)
+				continue;
+			if (af == AF_INET6 && got6)
+				continue;
+		}
+		if (af == AF_INET)
+			got4 = 1;
+		else if (af == AF_INET6)
+			got6 = 1;
+		net2 = net;
+		if (net2 == 128 && (flags & PFI_AFLAG_NETWORK)) {
+			if (af == AF_INET)
+				net2 = pfi_unmask(&((struct sockaddr_in *)
+				    ia->ifa_netmask)->sin_addr);
+			else if (af == AF_INET6)
+				net2 = pfi_unmask(&((struct sockaddr_in6 *)
+				    ia->ifa_netmask)->sin6_addr);
+		}
+		if (af == AF_INET && net2 > 32)
+			net2 = 32;
+		if (flags & PFI_AFLAG_BROADCAST)
+			pfi_address_add(ia->ifa_broadaddr, af, net2);
+		else if (flags & PFI_AFLAG_PEER)
+			pfi_address_add(ia->ifa_dstaddr, af, net2);
+		else
+			pfi_address_add(ia->ifa_addr, af, net2);
+	}
+	IF_ADDR_RUNLOCK(ifp);
+}
+
+static void
+pfi_address_add(struct sockaddr *sa, int af, int net)
+{
+	struct pfr_addr	*p;
+	int		 i;
+
+	if (V_pfi_buffer_cnt >= V_pfi_buffer_max) {
+		int		 new_max = V_pfi_buffer_max * 2;
+
+		if (new_max > PFI_BUFFER_MAX) {
+			printf("%s: address buffer full (%d/%d)\n", __func__,
+			    V_pfi_buffer_cnt, PFI_BUFFER_MAX);
+			return;
+		}
+		p = malloc(new_max * sizeof(*V_pfi_buffer), PFI_MTYPE,
+		    M_NOWAIT);
+		if (p == NULL) {
+			printf("%s: no memory to grow buffer (%d/%d)\n",
+			    __func__, V_pfi_buffer_cnt, PFI_BUFFER_MAX);
+			return;
+		}
+		memcpy(p, V_pfi_buffer, V_pfi_buffer_max * sizeof(*V_pfi_buffer));
+		/* no need to zero buffer */
+		free(V_pfi_buffer, PFI_MTYPE);
+		V_pfi_buffer = p;
+		V_pfi_buffer_max = new_max;
+	}
+	if (af == AF_INET && net > 32)
+		net = 128;
+	p = V_pfi_buffer + V_pfi_buffer_cnt++;
+	bzero(p, sizeof(*p));
+	p->pfra_af = af;
+	p->pfra_net = net;
+	if (af == AF_INET)
+		p->pfra_ip4addr = ((struct sockaddr_in *)sa)->sin_addr;
+	else if (af == AF_INET6) {
+		p->pfra_ip6addr = ((struct sockaddr_in6 *)sa)->sin6_addr;
+		if (IN6_IS_SCOPE_EMBED(&p->pfra_ip6addr))
+			p->pfra_ip6addr.s6_addr16[1] = 0;
+	}
+	/* mask network address bits */
+	if (net < 128)
+		((caddr_t)p)[p->pfra_net/8] &= ~(0xFF >> (p->pfra_net%8));
+	for (i = (p->pfra_net+7)/8; i < sizeof(p->pfra_u); i++)
+		((caddr_t)p)[i] = 0;
+}
+
+void
+pfi_dynaddr_remove(struct pfi_dynaddr *dyn)
+{
+
+	KASSERT(dyn->pfid_kif != NULL, ("%s: null pfid_kif", __func__));
+	KASSERT(dyn->pfid_kt != NULL, ("%s: null pfid_kt", __func__));
+
+	TAILQ_REMOVE(&dyn->pfid_kif->pfik_dynaddrs, dyn, entry);
+	pfi_kif_unref(dyn->pfid_kif);
+	pfr_detach_table(dyn->pfid_kt);
+	free(dyn, PFI_MTYPE);
+}
+
+void
+pfi_dynaddr_copyout(struct pf_addr_wrap *aw)
+{
+
+	KASSERT(aw->type == PF_ADDR_DYNIFTL,
+	    ("%s: type %u", __func__, aw->type));
+
+	if (aw->p.dyn == NULL || aw->p.dyn->pfid_kif == NULL)
+		return;
+	aw->p.dyncnt = aw->p.dyn->pfid_acnt4 + aw->p.dyn->pfid_acnt6;
+}
+
+static int
+pfi_if_compare(struct pfi_kif *p, struct pfi_kif *q)
+{
+	return (strncmp(p->pfik_name, q->pfik_name, IFNAMSIZ));
+}
+
+void
+pfi_update_status(const char *name, struct pf_status *pfs)
+{
+	struct pfi_kif		*p;
+	struct pfi_kif_cmp	 key;
+	struct ifg_member	 p_member, *ifgm;
+	TAILQ_HEAD(, ifg_member) ifg_members;
+	int			 i, j, k;
+
+	strlcpy(key.pfik_name, name, sizeof(key.pfik_name));
+	p = RB_FIND(pfi_ifhead, &V_pfi_ifs, (struct pfi_kif *)&key);
+	if (p == NULL)
+		return;
+
+	if (p->pfik_group != NULL) {
+		bcopy(&p->pfik_group->ifg_members, &ifg_members,
+		    sizeof(ifg_members));
+	} else {
+		/* build a temporary list for p only */
+		bzero(&p_member, sizeof(p_member));
+		p_member.ifgm_ifp = p->pfik_ifp;
+		TAILQ_INIT(&ifg_members);
+		TAILQ_INSERT_TAIL(&ifg_members, &p_member, ifgm_next);
+	}
+	if (pfs) {
+		bzero(pfs->pcounters, sizeof(pfs->pcounters));
+		bzero(pfs->bcounters, sizeof(pfs->bcounters));
+	}
+	TAILQ_FOREACH(ifgm, &ifg_members, ifgm_next) {
+		if (ifgm->ifgm_ifp == NULL || ifgm->ifgm_ifp->if_pf_kif == NULL)
+			continue;
+		p = (struct pfi_kif *)ifgm->ifgm_ifp->if_pf_kif;
+
+		/* just clear statistics */
+		if (pfs == NULL) {
+			bzero(p->pfik_packets, sizeof(p->pfik_packets));
+			bzero(p->pfik_bytes, sizeof(p->pfik_bytes));
+			p->pfik_tzero = time_second;
+			continue;
+		}
+		for (i = 0; i < 2; i++)
+			for (j = 0; j < 2; j++)
+				for (k = 0; k < 2; k++) {
+					pfs->pcounters[i][j][k] +=
+						p->pfik_packets[i][j][k];
+					pfs->bcounters[i][j] +=
+						p->pfik_bytes[i][j][k];
+				}
+	}
+}
+
+void
+pfi_get_ifaces(const char *name, struct pfi_kif *buf, int *size)
+{
+	struct pfi_kif	*p, *nextp;
+	int		 n = 0;
+
+	for (p = RB_MIN(pfi_ifhead, &V_pfi_ifs); p; p = nextp) {
+		nextp = RB_NEXT(pfi_ifhead, &V_pfi_ifs, p);
+		if (pfi_skip_if(name, p))
+			continue;
+		if (*size <= n++)
+			break;
+		if (!p->pfik_tzero)
+			p->pfik_tzero = time_second;
+		bcopy(p, buf++, sizeof(*buf));
+		nextp = RB_NEXT(pfi_ifhead, &V_pfi_ifs, p);
+	}
+	*size = n;
+}
+
+static int
+pfi_skip_if(const char *filter, struct pfi_kif *p)
+{
+	int	n;
+
+	if (filter == NULL || !*filter)
+		return (0);
+	if (!strcmp(p->pfik_name, filter))
+		return (0);	/* exact match */
+	n = strlen(filter);
+	if (n < 1 || n >= IFNAMSIZ)
+		return (1);	/* sanity check */
+	if (filter[n-1] >= '0' && filter[n-1] <= '9')
+		return (1);	/* only do exact match in that case */
+	if (strncmp(p->pfik_name, filter, n))
+		return (1);	/* prefix doesn't match */
+	return (p->pfik_name[n] < '0' || p->pfik_name[n] > '9');
+}
+
+int
+pfi_set_flags(const char *name, int flags)
+{
+	struct pfi_kif	*p;
+
+	RB_FOREACH(p, pfi_ifhead, &V_pfi_ifs) {
+		if (pfi_skip_if(name, p))
+			continue;
+		p->pfik_flags |= flags;
+	}
+	return (0);
+}
+
+int
+pfi_clear_flags(const char *name, int flags)
+{
+	struct pfi_kif	*p;
+
+	RB_FOREACH(p, pfi_ifhead, &V_pfi_ifs) {
+		if (pfi_skip_if(name, p))
+			continue;
+		p->pfik_flags &= ~flags;
+	}
+	return (0);
+}
+
+/* from pf_print_state.c */
+static int
+pfi_unmask(void *addr)
+{
+	struct pf_addr *m = addr;
+	int i = 31, j = 0, b = 0;
+	u_int32_t tmp;
+
+	while (j < 4 && m->addr32[j] == 0xffffffff) {
+		b += 32;
+		j++;
+	}
+	if (j < 4) {
+		tmp = ntohl(m->addr32[j]);
+		for (i = 31; tmp & (1 << i); --i)
+			b++;
+	}
+	return (b);
+}
+
+static void
+pfi_attach_ifnet_event(void *arg __unused, struct ifnet *ifp)
+{
+
+	CURVNET_SET(ifp->if_vnet);
+	if (V_pf_vnet_active == 0) {
+		/* Avoid teardown race in the least expensive way. */
+		CURVNET_RESTORE();
+		return;
+	}
+	pfi_attach_ifnet(ifp);
+#ifdef ALTQ
+	PF_RULES_WLOCK();
+	pf_altq_ifnet_event(ifp, 0);
+	PF_RULES_WUNLOCK();
+#endif
+	CURVNET_RESTORE();
+}
+
+static void
+pfi_detach_ifnet_event(void *arg __unused, struct ifnet *ifp)
+{
+	struct pfi_kif *kif = (struct pfi_kif *)ifp->if_pf_kif;
+
+	if (kif == NULL)
+		return;
+
+	CURVNET_SET(ifp->if_vnet);
+	if (V_pf_vnet_active == 0) {
+		/* Avoid teardown race in the least expensive way. */
+		CURVNET_RESTORE();
+		return;
+	}
+	PF_RULES_WLOCK();
+	V_pfi_update++;
+	pfi_kif_update(kif);
+
+	kif->pfik_ifp = NULL;
+	ifp->if_pf_kif = NULL;
+#ifdef ALTQ
+	pf_altq_ifnet_event(ifp, 1);
+#endif
+	PF_RULES_WUNLOCK();
+	CURVNET_RESTORE();
+}
+
+static void
+pfi_attach_group_event(void *arg , struct ifg_group *ifg)
+{
+
+	CURVNET_SET((struct vnet *)arg);
+	if (V_pf_vnet_active == 0) {
+		/* Avoid teardown race in the least expensive way. */
+		CURVNET_RESTORE();
+		return;
+	}
+	pfi_attach_ifgroup(ifg);
+	CURVNET_RESTORE();
+}
+
+static void
+pfi_change_group_event(void *arg, char *gname)
+{
+	struct pfi_kif *kif;
+
+	CURVNET_SET((struct vnet *)arg);
+	if (V_pf_vnet_active == 0) {
+		/* Avoid teardown race in the least expensive way. */
+		CURVNET_RESTORE();
+		return;
+	}
+
+	kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
+	PF_RULES_WLOCK();
+	V_pfi_update++;
+	kif = pfi_kif_attach(kif, gname);
+	pfi_kif_update(kif);
+	PF_RULES_WUNLOCK();
+	CURVNET_RESTORE();
+}
+
+static void
+pfi_detach_group_event(void *arg, struct ifg_group *ifg)
+{
+	struct pfi_kif *kif = (struct pfi_kif *)ifg->ifg_pf_kif;
+
+	if (kif == NULL)
+		return;
+
+	CURVNET_SET((struct vnet *)arg);
+	if (V_pf_vnet_active == 0) {
+		/* Avoid teardown race in the least expensive way. */
+		CURVNET_RESTORE();
+		return;
+	}
+	PF_RULES_WLOCK();
+	V_pfi_update++;
+
+	kif->pfik_group = NULL;
+	ifg->ifg_pf_kif = NULL;
+	PF_RULES_WUNLOCK();
+	CURVNET_RESTORE();
+}
+
+static void
+pfi_ifaddr_event(void *arg __unused, struct ifnet *ifp)
+{
+	if (ifp->if_pf_kif == NULL)
+		return;
+
+	CURVNET_SET(ifp->if_vnet);
+	if (V_pf_vnet_active == 0) {
+		/* Avoid teardown race in the least expensive way. */
+		CURVNET_RESTORE();
+		return;
+	}
+	PF_RULES_WLOCK();
+	if (ifp && ifp->if_pf_kif) {
+		V_pfi_update++;
+		pfi_kif_update(ifp->if_pf_kif);
+	}
+	PF_RULES_WUNLOCK();
+	CURVNET_RESTORE();
+}
diff --git a/freebsd/sys/netpfil/pf/pf_ioctl.c b/freebsd/sys/netpfil/pf/pf_ioctl.c
new file mode 100644
index 00000000..9c1523ca
--- /dev/null
+++ b/freebsd/sys/netpfil/pf/pf_ioctl.c
@@ -0,0 +1,3872 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2001 Daniel Hartmeier
+ * Copyright (c) 2002,2003 Henning Brauer
+ * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    - Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    - Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Effort sponsored in part by the Defense Advanced Research Projects
+ * Agency (DARPA) and Air Force Research Laboratory, Air Force
+ * Materiel Command, USAF, under agreement number F30602-01-2-0537.
+ *
+ *	$OpenBSD: pf_ioctl.c,v 1.213 2009/02/15 21:46:12 mbalmer Exp $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+#include <rtems/bsd/local/opt_bpf.h>
+#include <rtems/bsd/local/opt_pf.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/endian.h>
+#include <sys/fcntl.h>
+#include <sys/filio.h>
+#include <sys/interrupt.h>
+#include <sys/jail.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/smp.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/md5.h>
+#include <sys/ucred.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/vnet.h>
+#include <net/route.h>
+#include <net/pfil.h>
+#include <net/pfvar.h>
+#include <net/if_pfsync.h>
+#include <net/if_pflog.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet6/ip6_var.h>
+#include <netinet/ip_icmp.h>
+
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif /* INET6 */
+
+#ifdef ALTQ
+#include <net/altq/altq.h>
+#endif
+
+static struct pf_pool	*pf_get_pool(char *, u_int32_t, u_int8_t, u_int32_t,
+			    u_int8_t, u_int8_t, u_int8_t);
+
+static void		 pf_mv_pool(struct pf_palist *, struct pf_palist *);
+static void		 pf_empty_pool(struct pf_palist *);
+static int		 pfioctl(struct cdev *, u_long, caddr_t, int,
+			    struct thread *);
+#ifdef ALTQ
+static int		 pf_begin_altq(u_int32_t *);
+static int		 pf_rollback_altq(u_int32_t);
+static int		 pf_commit_altq(u_int32_t);
+static int		 pf_enable_altq(struct pf_altq *);
+static int		 pf_disable_altq(struct pf_altq *);
+static u_int32_t	 pf_qname2qid(char *);
+static void		 pf_qid_unref(u_int32_t);
+#endif /* ALTQ */
+static int		 pf_begin_rules(u_int32_t *, int, const char *);
+static int		 pf_rollback_rules(u_int32_t, int, char *);
+static int		 pf_setup_pfsync_matching(struct pf_ruleset *);
+static void		 pf_hash_rule(MD5_CTX *, struct pf_rule *);
+static void		 pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *);
+static int		 pf_commit_rules(u_int32_t, int, char *);
+static int		 pf_addr_setup(struct pf_ruleset *,
+			    struct pf_addr_wrap *, sa_family_t);
+static void		 pf_addr_copyout(struct pf_addr_wrap *);
+
+VNET_DEFINE(struct pf_rule,	pf_default_rule);
+
+#ifdef ALTQ
+static VNET_DEFINE(int,		pf_altq_running);
+#define	V_pf_altq_running	VNET(pf_altq_running)
+#endif
+
+#define	TAGID_MAX	 50000
+struct pf_tagname {
+	TAILQ_ENTRY(pf_tagname)	entries;
+	char			name[PF_TAG_NAME_SIZE];
+	uint16_t		tag;
+	int			ref;
+};
+
+TAILQ_HEAD(pf_tags, pf_tagname);
+#define	V_pf_tags		VNET(pf_tags)
+VNET_DEFINE(struct pf_tags, pf_tags);
+#define	V_pf_qids		VNET(pf_qids)
+VNET_DEFINE(struct pf_tags, pf_qids);
+static MALLOC_DEFINE(M_PFTAG, "pf_tag", "pf(4) tag names");
+static MALLOC_DEFINE(M_PFALTQ, "pf_altq", "pf(4) altq configuration db");
+static MALLOC_DEFINE(M_PFRULE, "pf_rule", "pf(4) rules");
+
+#if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE)
+#error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE
+#endif
+
+static u_int16_t	 tagname2tag(struct pf_tags *, char *);
+static u_int16_t	 pf_tagname2tag(char *);
+static void		 tag_unref(struct pf_tags *, u_int16_t);
+
+#define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x
+
+struct cdev *pf_dev;
+
+/*
+ * XXX - These are new and need to be checked when moveing to a new version
+ */
+static void		 pf_clear_states(void);
+static int		 pf_clear_tables(void);
+static void		 pf_clear_srcnodes(struct pf_src_node *);
+static void		 pf_kill_srcnodes(struct pfioc_src_node_kill *);
+static void		 pf_tbladdr_copyout(struct pf_addr_wrap *);
+
+/*
+ * Wrapper functions for pfil(9) hooks
+ */
+#ifdef INET
+static int pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp,
+    int dir, struct inpcb *inp);
+static int pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp,
+    int dir, struct inpcb *inp);
+#endif
+#ifdef INET6
+static int pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp,
+    int dir, struct inpcb *inp);
+static int pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp,
+    int dir, struct inpcb *inp);
+#endif
+
+static int		hook_pf(void);
+static int		dehook_pf(void);
+static int		shutdown_pf(void);
+static int		pf_load(void);
+static int		pf_unload(void);
+
+static struct cdevsw pf_cdevsw = {
+	.d_ioctl =	pfioctl,
+	.d_name =	PF_NAME,
+	.d_version =	D_VERSION,
+};
+
+static volatile VNET_DEFINE(int, pf_pfil_hooked);
+#define V_pf_pfil_hooked	VNET(pf_pfil_hooked)
+
+/*
+ * We need a flag that is neither hooked nor running to know when
+ * the VNET is "valid".  We primarily need this to control (global)
+ * external event, e.g., eventhandlers.
+ */
+VNET_DEFINE(int, pf_vnet_active);
+#define V_pf_vnet_active	VNET(pf_vnet_active)
+
+int pf_end_threads;
+
+struct rwlock			pf_rules_lock;
+struct sx			pf_ioctl_lock;
+
+/* pfsync */
+pfsync_state_import_t 		*pfsync_state_import_ptr = NULL;
+pfsync_insert_state_t		*pfsync_insert_state_ptr = NULL;
+pfsync_update_state_t		*pfsync_update_state_ptr = NULL;
+pfsync_delete_state_t		*pfsync_delete_state_ptr = NULL;
+pfsync_clear_states_t		*pfsync_clear_states_ptr = NULL;
+pfsync_defer_t			*pfsync_defer_ptr = NULL;
+/* pflog */
+pflog_packet_t			*pflog_packet_ptr = NULL;
+
+static void
+pfattach_vnet(void)
+{
+	u_int32_t *my_timeout = V_pf_default_rule.timeout;
+
+	pf_initialize();
+	pfr_initialize();
+	pfi_initialize_vnet();
+	pf_normalize_init();
+
+	V_pf_limits[PF_LIMIT_STATES].limit = PFSTATE_HIWAT;
+	V_pf_limits[PF_LIMIT_SRC_NODES].limit = PFSNODE_HIWAT;
+
+	RB_INIT(&V_pf_anchors);
+	pf_init_ruleset(&pf_main_ruleset);
+
+	/* default rule should never be garbage collected */
+	V_pf_default_rule.entries.tqe_prev = &V_pf_default_rule.entries.tqe_next;
+#ifdef PF_DEFAULT_TO_DROP
+	V_pf_default_rule.action = PF_DROP;
+#else
+	V_pf_default_rule.action = PF_PASS;
+#endif
+	V_pf_default_rule.nr = -1;
+	V_pf_default_rule.rtableid = -1;
+
+	V_pf_default_rule.states_cur = counter_u64_alloc(M_WAITOK);
+	V_pf_default_rule.states_tot = counter_u64_alloc(M_WAITOK);
+	V_pf_default_rule.src_nodes = counter_u64_alloc(M_WAITOK);
+
+	/* initialize default timeouts */
+	my_timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL;
+	my_timeout[PFTM_TCP_OPENING] = PFTM_TCP_OPENING_VAL;
+	my_timeout[PFTM_TCP_ESTABLISHED] = PFTM_TCP_ESTABLISHED_VAL;
+	my_timeout[PFTM_TCP_CLOSING] = PFTM_TCP_CLOSING_VAL;
+	my_timeout[PFTM_TCP_FIN_WAIT] = PFTM_TCP_FIN_WAIT_VAL;
+	my_timeout[PFTM_TCP_CLOSED] = PFTM_TCP_CLOSED_VAL;
+	my_timeout[PFTM_UDP_FIRST_PACKET] = PFTM_UDP_FIRST_PACKET_VAL;
+	my_timeout[PFTM_UDP_SINGLE] = PFTM_UDP_SINGLE_VAL;
+	my_timeout[PFTM_UDP_MULTIPLE] = PFTM_UDP_MULTIPLE_VAL;
+	my_timeout[PFTM_ICMP_FIRST_PACKET] = PFTM_ICMP_FIRST_PACKET_VAL;
+	my_timeout[PFTM_ICMP_ERROR_REPLY] = PFTM_ICMP_ERROR_REPLY_VAL;
+	my_timeout[PFTM_OTHER_FIRST_PACKET] = PFTM_OTHER_FIRST_PACKET_VAL;
+	my_timeout[PFTM_OTHER_SINGLE] = PFTM_OTHER_SINGLE_VAL;
+	my_timeout[PFTM_OTHER_MULTIPLE] = PFTM_OTHER_MULTIPLE_VAL;
+	my_timeout[PFTM_FRAG] = PFTM_FRAG_VAL;
+	my_timeout[PFTM_INTERVAL] = PFTM_INTERVAL_VAL;
+	my_timeout[PFTM_SRC_NODE] = PFTM_SRC_NODE_VAL;
+	my_timeout[PFTM_TS_DIFF] = PFTM_TS_DIFF_VAL;
+	my_timeout[PFTM_ADAPTIVE_START] = PFSTATE_ADAPT_START;
+	my_timeout[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END;
+
+	bzero(&V_pf_status, sizeof(V_pf_status));
+	V_pf_status.debug = PF_DEBUG_URGENT;
+
+	V_pf_pfil_hooked = 0;
+
+	/* XXX do our best to avoid a conflict */
+	V_pf_status.hostid = arc4random();
+
+	for (int i = 0; i < PFRES_MAX; i++)
+		V_pf_status.counters[i] = counter_u64_alloc(M_WAITOK);
+	for (int i = 0; i < LCNT_MAX; i++)
+		V_pf_status.lcounters[i] = counter_u64_alloc(M_WAITOK);
+	for (int i = 0; i < FCNT_MAX; i++)
+		V_pf_status.fcounters[i] = counter_u64_alloc(M_WAITOK);
+	for (int i = 0; i < SCNT_MAX; i++)
+		V_pf_status.scounters[i] = counter_u64_alloc(M_WAITOK);
+
+	if (swi_add(NULL, "pf send", pf_intr, curvnet, SWI_NET,
+	    INTR_MPSAFE, &V_pf_swi_cookie) != 0)
+		/* XXXGL: leaked all above. */
+		return;
+}
+
+
+static struct pf_pool *
+pf_get_pool(char *anchor, u_int32_t ticket, u_int8_t rule_action,
+    u_int32_t rule_number, u_int8_t r_last, u_int8_t active,
+    u_int8_t check_ticket)
+{
+	struct pf_ruleset	*ruleset;
+	struct pf_rule		*rule;
+	int			 rs_num;
+
+	ruleset = pf_find_ruleset(anchor);
+	if (ruleset == NULL)
+		return (NULL);
+	rs_num = pf_get_ruleset_number(rule_action);
+	if (rs_num >= PF_RULESET_MAX)
+		return (NULL);
+	if (active) {
+		if (check_ticket && ticket !=
+		    ruleset->rules[rs_num].active.ticket)
+			return (NULL);
+		if (r_last)
+			rule = TAILQ_LAST(ruleset->rules[rs_num].active.ptr,
+			    pf_rulequeue);
+		else
+			rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr);
+	} else {
+		if (check_ticket && ticket !=
+		    ruleset->rules[rs_num].inactive.ticket)
+			return (NULL);
+		if (r_last)
+			rule = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr,
+			    pf_rulequeue);
+		else
+			rule = TAILQ_FIRST(ruleset->rules[rs_num].inactive.ptr);
+	}
+	if (!r_last) {
+		while ((rule != NULL) && (rule->nr != rule_number))
+			rule = TAILQ_NEXT(rule, entries);
+	}
+	if (rule == NULL)
+		return (NULL);
+
+	return (&rule->rpool);
+}
+
+static void
+pf_mv_pool(struct pf_palist *poola, struct pf_palist *poolb)
+{
+	struct pf_pooladdr	*mv_pool_pa;
+
+	while ((mv_pool_pa = TAILQ_FIRST(poola)) != NULL) {
+		TAILQ_REMOVE(poola, mv_pool_pa, entries);
+		TAILQ_INSERT_TAIL(poolb, mv_pool_pa, entries);
+	}
+}
+
+static void
+pf_empty_pool(struct pf_palist *poola)
+{
+	struct pf_pooladdr *pa;
+
+	while ((pa = TAILQ_FIRST(poola)) != NULL) {
+		switch (pa->addr.type) {
+		case PF_ADDR_DYNIFTL:
+			pfi_dynaddr_remove(pa->addr.p.dyn);
+			break;
+		case PF_ADDR_TABLE:
+			/* XXX: this could be unfinished pooladdr on pabuf */
+			if (pa->addr.p.tbl != NULL)
+				pfr_detach_table(pa->addr.p.tbl);
+			break;
+		}
+		if (pa->kif)
+			pfi_kif_unref(pa->kif);
+		TAILQ_REMOVE(poola, pa, entries);
+		free(pa, M_PFRULE);
+	}
+}
+
+static void
+pf_unlink_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule)
+{
+
+	PF_RULES_WASSERT();
+
+	TAILQ_REMOVE(rulequeue, rule, entries);
+
+	PF_UNLNKDRULES_LOCK();
+	rule->rule_flag |= PFRULE_REFS;
+	TAILQ_INSERT_TAIL(&V_pf_unlinked_rules, rule, entries);
+	PF_UNLNKDRULES_UNLOCK();
+}
+
+void
+pf_free_rule(struct pf_rule *rule)
+{
+
+	PF_RULES_WASSERT();
+
+	if (rule->tag)
+		tag_unref(&V_pf_tags, rule->tag);
+	if (rule->match_tag)
+		tag_unref(&V_pf_tags, rule->match_tag);
+#ifdef ALTQ
+	if (rule->pqid != rule->qid)
+		pf_qid_unref(rule->pqid);
+	pf_qid_unref(rule->qid);
+#endif
+	switch (rule->src.addr.type) {
+	case PF_ADDR_DYNIFTL:
+		pfi_dynaddr_remove(rule->src.addr.p.dyn);
+		break;
+	case PF_ADDR_TABLE:
+		pfr_detach_table(rule->src.addr.p.tbl);
+		break;
+	}
+	switch (rule->dst.addr.type) {
+	case PF_ADDR_DYNIFTL:
+		pfi_dynaddr_remove(rule->dst.addr.p.dyn);
+		break;
+	case PF_ADDR_TABLE:
+		pfr_detach_table(rule->dst.addr.p.tbl);
+		break;
+	}
+	if (rule->overload_tbl)
+		pfr_detach_table(rule->overload_tbl);
+	if (rule->kif)
+		pfi_kif_unref(rule->kif);
+	pf_anchor_remove(rule);
+	pf_empty_pool(&rule->rpool.list);
+	counter_u64_free(rule->states_cur);
+	counter_u64_free(rule->states_tot);
+	counter_u64_free(rule->src_nodes);
+	free(rule, M_PFRULE);
+}
+
+static u_int16_t
+tagname2tag(struct pf_tags *head, char *tagname)
+{
+	struct pf_tagname	*tag, *p = NULL;
+	u_int16_t		 new_tagid = 1;
+
+	PF_RULES_WASSERT();
+
+	TAILQ_FOREACH(tag, head, entries)
+		if (strcmp(tagname, tag->name) == 0) {
+			tag->ref++;
+			return (tag->tag);
+		}
+
+	/*
+	 * to avoid fragmentation, we do a linear search from the beginning
+	 * and take the first free slot we find. if there is none or the list
+	 * is empty, append a new entry at the end.
+	 */
+
+	/* new entry */
+	if (!TAILQ_EMPTY(head))
+		for (p = TAILQ_FIRST(head); p != NULL &&
+		    p->tag == new_tagid; p = TAILQ_NEXT(p, entries))
+			new_tagid = p->tag + 1;
+
+	if (new_tagid > TAGID_MAX)
+		return (0);
+
+	/* allocate and fill new struct pf_tagname */
+	tag = malloc(sizeof(*tag), M_PFTAG, M_NOWAIT|M_ZERO);
+	if (tag == NULL)
+		return (0);
+	strlcpy(tag->name, tagname, sizeof(tag->name));
+	tag->tag = new_tagid;
+	tag->ref++;
+
+	if (p != NULL)	/* insert new entry before p */
+		TAILQ_INSERT_BEFORE(p, tag, entries);
+	else	/* either list empty or no free slot in between */
+		TAILQ_INSERT_TAIL(head, tag, entries);
+
+	return (tag->tag);
+}
+
+static void
+tag_unref(struct pf_tags *head, u_int16_t tag)
+{
+	struct pf_tagname	*p, *next;
+
+	PF_RULES_WASSERT();
+
+	for (p = TAILQ_FIRST(head); p != NULL; p = next) {
+		next = TAILQ_NEXT(p, entries);
+		if (tag == p->tag) {
+			if (--p->ref == 0) {
+				TAILQ_REMOVE(head, p, entries);
+				free(p, M_PFTAG);
+			}
+			break;
+		}
+	}
+}
+
+static u_int16_t
+pf_tagname2tag(char *tagname)
+{
+	return (tagname2tag(&V_pf_tags, tagname));
+}
+
+#ifdef ALTQ
+static u_int32_t
+pf_qname2qid(char *qname)
+{
+	return ((u_int32_t)tagname2tag(&V_pf_qids, qname));
+}
+
+static void
+pf_qid_unref(u_int32_t qid)
+{
+	tag_unref(&V_pf_qids, (u_int16_t)qid);
+}
+
+static int
+pf_begin_altq(u_int32_t *ticket)
+{
+	struct pf_altq	*altq;
+	int		 error = 0;
+
+	PF_RULES_WASSERT();
+
+	/* Purge the old altq list */
+	while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) {
+		TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries);
+		if (altq->qname[0] == 0 &&
+		    (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
+			/* detach and destroy the discipline */
+			error = altq_remove(altq);
+		} else
+			pf_qid_unref(altq->qid);
+		free(altq, M_PFALTQ);
+	}
+	if (error)
+		return (error);
+	*ticket = ++V_ticket_altqs_inactive;
+	V_altqs_inactive_open = 1;
+	return (0);
+}
+
+static int
+pf_rollback_altq(u_int32_t ticket)
+{
+	struct pf_altq	*altq;
+	int		 error = 0;
+
+	PF_RULES_WASSERT();
+
+	if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive)
+		return (0);
+	/* Purge the old altq list */
+	while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) {
+		TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries);
+		if (altq->qname[0] == 0 &&
+		   (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
+			/* detach and destroy the discipline */
+			error = altq_remove(altq);
+		} else
+			pf_qid_unref(altq->qid);
+		free(altq, M_PFALTQ);
+	}
+	V_altqs_inactive_open = 0;
+	return (error);
+}
+
+static int
+pf_commit_altq(u_int32_t ticket)
+{
+	struct pf_altqqueue	*old_altqs;
+	struct pf_altq		*altq;
+	int			 err, error = 0;
+
+	PF_RULES_WASSERT();
+
+	if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive)
+		return (EBUSY);
+
+	/* swap altqs, keep the old. */
+	old_altqs = V_pf_altqs_active;
+	V_pf_altqs_active = V_pf_altqs_inactive;
+	V_pf_altqs_inactive = old_altqs;
+	V_ticket_altqs_active = V_ticket_altqs_inactive;
+
+	/* Attach new disciplines */
+	TAILQ_FOREACH(altq, V_pf_altqs_active, entries) {
+	if (altq->qname[0] == 0 &&
+	   (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
+			/* attach the discipline */
+			error = altq_pfattach(altq);
+			if (error == 0 && V_pf_altq_running)
+				error = pf_enable_altq(altq);
+			if (error != 0)
+				return (error);
+		}
+	}
+
+	/* Purge the old altq list */
+	while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) {
+		TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries);
+		if (altq->qname[0] == 0 &&
+		    (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
+			/* detach and destroy the discipline */
+			if (V_pf_altq_running)
+				error = pf_disable_altq(altq);
+			err = altq_pfdetach(altq);
+			if (err != 0 && error == 0)
+				error = err;
+			err = altq_remove(altq);
+			if (err != 0 && error == 0)
+				error = err;
+		} else
+			pf_qid_unref(altq->qid);
+		free(altq, M_PFALTQ);
+	}
+
+	V_altqs_inactive_open = 0;
+	return (error);
+}
+
+static int
+pf_enable_altq(struct pf_altq *altq)
+{
+	struct ifnet		*ifp;
+	struct tb_profile	 tb;
+	int			 error = 0;
+
+	if ((ifp = ifunit(altq->ifname)) == NULL)
+		return (EINVAL);
+
+	if (ifp->if_snd.altq_type != ALTQT_NONE)
+		error = altq_enable(&ifp->if_snd);
+
+	/* set tokenbucket regulator */
+	if (error == 0 && ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
+		tb.rate = altq->ifbandwidth;
+		tb.depth = altq->tbrsize;
+		error = tbr_set(&ifp->if_snd, &tb);
+	}
+
+	return (error);
+}
+
+static int
+pf_disable_altq(struct pf_altq *altq)
+{
+	struct ifnet		*ifp;
+	struct tb_profile	 tb;
+	int			 error;
+
+	if ((ifp = ifunit(altq->ifname)) == NULL)
+		return (EINVAL);
+
+	/*
+	 * when the discipline is no longer referenced, it was overridden
+	 * by a new one.  if so, just return.
+	 */
+	if (altq->altq_disc != ifp->if_snd.altq_disc)
+		return (0);
+
+	error = altq_disable(&ifp->if_snd);
+
+	if (error == 0) {
+		/* clear tokenbucket regulator */
+		tb.rate = 0;
+		error = tbr_set(&ifp->if_snd, &tb);
+	}
+
+	return (error);
+}
+
+void
+pf_altq_ifnet_event(struct ifnet *ifp, int remove)
+{
+	struct ifnet	*ifp1;
+	struct pf_altq	*a1, *a2, *a3;
+	u_int32_t	 ticket;
+	int		 error = 0;
+
+	/* Interrupt userland queue modifications */
+	if (V_altqs_inactive_open)
+		pf_rollback_altq(V_ticket_altqs_inactive);
+
+	/* Start new altq ruleset */
+	if (pf_begin_altq(&ticket))
+		return;
+
+	/* Copy the current active set */
+	TAILQ_FOREACH(a1, V_pf_altqs_active, entries) {
+		a2 = malloc(sizeof(*a2), M_PFALTQ, M_NOWAIT);
+		if (a2 == NULL) {
+			error = ENOMEM;
+			break;
+		}
+		bcopy(a1, a2, sizeof(struct pf_altq));
+
+		if (a2->qname[0] != 0) {
+			if ((a2->qid = pf_qname2qid(a2->qname)) == 0) {
+				error = EBUSY;
+				free(a2, M_PFALTQ);
+				break;
+			}
+			a2->altq_disc = NULL;
+			TAILQ_FOREACH(a3, V_pf_altqs_inactive, entries) {
+				if (strncmp(a3->ifname, a2->ifname,
+				    IFNAMSIZ) == 0 && a3->qname[0] == 0) {
+					a2->altq_disc = a3->altq_disc;
+					break;
+				}
+			}
+		}
+		/* Deactivate the interface in question */
+		a2->local_flags &= ~PFALTQ_FLAG_IF_REMOVED;
+		if ((ifp1 = ifunit(a2->ifname)) == NULL ||
+		    (remove && ifp1 == ifp)) {
+			a2->local_flags |= PFALTQ_FLAG_IF_REMOVED;
+		} else {
+			error = altq_add(a2);
+
+			if (ticket != V_ticket_altqs_inactive)
+				error = EBUSY;
+
+			if (error) {
+				free(a2, M_PFALTQ);
+				break;
+			}
+		}
+
+		TAILQ_INSERT_TAIL(V_pf_altqs_inactive, a2, entries);
+	}
+
+	if (error != 0)
+		pf_rollback_altq(ticket);
+	else
+		pf_commit_altq(ticket);
+}
+#endif /* ALTQ */
+
+static int
+pf_begin_rules(u_int32_t *ticket, int rs_num, const char *anchor)
+{
+	struct pf_ruleset	*rs;
+	struct pf_rule		*rule;
+
+	PF_RULES_WASSERT();
+
+	if (rs_num < 0 || rs_num >= PF_RULESET_MAX)
+		return (EINVAL);
+	rs = pf_find_or_create_ruleset(anchor);
+	if (rs == NULL)
+		return (EINVAL);
+	while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) {
+		pf_unlink_rule(rs->rules[rs_num].inactive.ptr, rule);
+		rs->rules[rs_num].inactive.rcount--;
+	}
+	*ticket = ++rs->rules[rs_num].inactive.ticket;
+	rs->rules[rs_num].inactive.open = 1;
+	return (0);
+}
+
+static int
+pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor)
+{
+	struct pf_ruleset	*rs;
+	struct pf_rule		*rule;
+
+	PF_RULES_WASSERT();
+
+	if (rs_num < 0 || rs_num >= PF_RULESET_MAX)
+		return (EINVAL);
+	rs = pf_find_ruleset(anchor);
+	if (rs == NULL || !rs->rules[rs_num].inactive.open ||
+	    rs->rules[rs_num].inactive.ticket != ticket)
+		return (0);
+	while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) {
+		pf_unlink_rule(rs->rules[rs_num].inactive.ptr, rule);
+		rs->rules[rs_num].inactive.rcount--;
+	}
+	rs->rules[rs_num].inactive.open = 0;
+	return (0);
+}
+
+#define PF_MD5_UPD(st, elm)						\
+		MD5Update(ctx, (u_int8_t *) &(st)->elm, sizeof((st)->elm))
+
+#define PF_MD5_UPD_STR(st, elm)						\
+		MD5Update(ctx, (u_int8_t *) (st)->elm, strlen((st)->elm))
+
+#define PF_MD5_UPD_HTONL(st, elm, stor) do {				\
+		(stor) = htonl((st)->elm);				\
+		MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int32_t));\
+} while (0)
+
+#define PF_MD5_UPD_HTONS(st, elm, stor) do {				\
+		(stor) = htons((st)->elm);				\
+		MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int16_t));\
+} while (0)
+
+static void
+pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr)
+{
+	PF_MD5_UPD(pfr, addr.type);
+	switch (pfr->addr.type) {
+		case PF_ADDR_DYNIFTL:
+			PF_MD5_UPD(pfr, addr.v.ifname);
+			PF_MD5_UPD(pfr, addr.iflags);
+			break;
+		case PF_ADDR_TABLE:
+			PF_MD5_UPD(pfr, addr.v.tblname);
+			break;
+		case PF_ADDR_ADDRMASK:
+			/* XXX ignore af? */
+			PF_MD5_UPD(pfr, addr.v.a.addr.addr32);
+			PF_MD5_UPD(pfr, addr.v.a.mask.addr32);
+			break;
+	}
+
+	PF_MD5_UPD(pfr, port[0]);
+	PF_MD5_UPD(pfr, port[1]);
+	PF_MD5_UPD(pfr, neg);
+	PF_MD5_UPD(pfr, port_op);
+}
+
+static void
+pf_hash_rule(MD5_CTX *ctx, struct pf_rule *rule)
+{
+	u_int16_t x;
+	u_int32_t y;
+
+	pf_hash_rule_addr(ctx, &rule->src);
+	pf_hash_rule_addr(ctx, &rule->dst);
+	PF_MD5_UPD_STR(rule, label);
+	PF_MD5_UPD_STR(rule, ifname);
+	PF_MD5_UPD_STR(rule, match_tagname);
+	PF_MD5_UPD_HTONS(rule, match_tag, x); /* dup? */
+	PF_MD5_UPD_HTONL(rule, os_fingerprint, y);
+	PF_MD5_UPD_HTONL(rule, prob, y);
+	PF_MD5_UPD_HTONL(rule, uid.uid[0], y);
+	PF_MD5_UPD_HTONL(rule, uid.uid[1], y);
+	PF_MD5_UPD(rule, uid.op);
+	PF_MD5_UPD_HTONL(rule, gid.gid[0], y);
+	PF_MD5_UPD_HTONL(rule, gid.gid[1], y);
+	PF_MD5_UPD(rule, gid.op);
+	PF_MD5_UPD_HTONL(rule, rule_flag, y);
+	PF_MD5_UPD(rule, action);
+	PF_MD5_UPD(rule, direction);
+	PF_MD5_UPD(rule, af);
+	PF_MD5_UPD(rule, quick);
+	PF_MD5_UPD(rule, ifnot);
+	PF_MD5_UPD(rule, match_tag_not);
+	PF_MD5_UPD(rule, natpass);
+	PF_MD5_UPD(rule, keep_state);
+	PF_MD5_UPD(rule, proto);
+	PF_MD5_UPD(rule, type);
+	PF_MD5_UPD(rule, code);
+	PF_MD5_UPD(rule, flags);
+	PF_MD5_UPD(rule, flagset);
+	PF_MD5_UPD(rule, allow_opts);
+	PF_MD5_UPD(rule, rt);
+	PF_MD5_UPD(rule, tos);
+}
+
+static int
+pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor)
+{
+	struct pf_ruleset	*rs;
+	struct pf_rule		*rule, **old_array;
+	struct pf_rulequeue	*old_rules;
+	int			 error;
+	u_int32_t		 old_rcount;
+
+	PF_RULES_WASSERT();
+
+	if (rs_num < 0 || rs_num >= PF_RULESET_MAX)
+		return (EINVAL);
+	rs = pf_find_ruleset(anchor);
+	if (rs == NULL || !rs->rules[rs_num].inactive.open ||
+	    ticket != rs->rules[rs_num].inactive.ticket)
+		return (EBUSY);
+
+	/* Calculate checksum for the main ruleset */
+	if (rs == &pf_main_ruleset) {
+		error = pf_setup_pfsync_matching(rs);
+		if (error != 0)
+			return (error);
+	}
+
+	/* Swap rules, keep the old. */
+	old_rules = rs->rules[rs_num].active.ptr;
+	old_rcount = rs->rules[rs_num].active.rcount;
+	old_array = rs->rules[rs_num].active.ptr_array;
+
+	rs->rules[rs_num].active.ptr =
+	    rs->rules[rs_num].inactive.ptr;
+	rs->rules[rs_num].active.ptr_array =
+	    rs->rules[rs_num].inactive.ptr_array;
+	rs->rules[rs_num].active.rcount =
+	    rs->rules[rs_num].inactive.rcount;
+	rs->rules[rs_num].inactive.ptr = old_rules;
+	rs->rules[rs_num].inactive.ptr_array = old_array;
+	rs->rules[rs_num].inactive.rcount = old_rcount;
+
+	rs->rules[rs_num].active.ticket =
+	    rs->rules[rs_num].inactive.ticket;
+	pf_calc_skip_steps(rs->rules[rs_num].active.ptr);
+
+
+	/* Purge the old rule list. */
+	while ((rule = TAILQ_FIRST(old_rules)) != NULL)
+		pf_unlink_rule(old_rules, rule);
+	if (rs->rules[rs_num].inactive.ptr_array)
+		free(rs->rules[rs_num].inactive.ptr_array, M_TEMP);
+	rs->rules[rs_num].inactive.ptr_array = NULL;
+	rs->rules[rs_num].inactive.rcount = 0;
+	rs->rules[rs_num].inactive.open = 0;
+	pf_remove_if_empty_ruleset(rs);
+
+	return (0);
+}
+
+static int
+pf_setup_pfsync_matching(struct pf_ruleset *rs)
+{
+	MD5_CTX			 ctx;
+	struct pf_rule		*rule;
+	int			 rs_cnt;
+	u_int8_t		 digest[PF_MD5_DIGEST_LENGTH];
+
+	MD5Init(&ctx);
+	for (rs_cnt = 0; rs_cnt < PF_RULESET_MAX; rs_cnt++) {
+		/* XXX PF_RULESET_SCRUB as well? */
+		if (rs_cnt == PF_RULESET_SCRUB)
+			continue;
+
+		if (rs->rules[rs_cnt].inactive.ptr_array)
+			free(rs->rules[rs_cnt].inactive.ptr_array, M_TEMP);
+		rs->rules[rs_cnt].inactive.ptr_array = NULL;
+
+		if (rs->rules[rs_cnt].inactive.rcount) {
+			rs->rules[rs_cnt].inactive.ptr_array =
+			    malloc(sizeof(caddr_t) *
+			    rs->rules[rs_cnt].inactive.rcount,
+			    M_TEMP, M_NOWAIT);
+
+			if (!rs->rules[rs_cnt].inactive.ptr_array)
+				return (ENOMEM);
+		}
+
+		TAILQ_FOREACH(rule, rs->rules[rs_cnt].inactive.ptr,
+		    entries) {
+			pf_hash_rule(&ctx, rule);
+			(rs->rules[rs_cnt].inactive.ptr_array)[rule->nr] = rule;
+		}
+	}
+
+	MD5Final(digest, &ctx);
+	memcpy(V_pf_status.pf_chksum, digest, sizeof(V_pf_status.pf_chksum));
+	return (0);
+}
+
+static int
+pf_addr_setup(struct pf_ruleset *ruleset, struct pf_addr_wrap *addr,
+    sa_family_t af)
+{
+	int error = 0;
+
+	switch (addr->type) {
+	case PF_ADDR_TABLE:
+		addr->p.tbl = pfr_attach_table(ruleset, addr->v.tblname);
+		if (addr->p.tbl == NULL)
+			error = ENOMEM;
+		break;
+	case PF_ADDR_DYNIFTL:
+		error = pfi_dynaddr_setup(addr, af);
+		break;
+	}
+
+	return (error);
+}
+
+static void
+pf_addr_copyout(struct pf_addr_wrap *addr)
+{
+
+	switch (addr->type) {
+	case PF_ADDR_DYNIFTL:
+		pfi_dynaddr_copyout(addr);
+		break;
+	case PF_ADDR_TABLE:
+		pf_tbladdr_copyout(addr);
+		break;
+	}
+}
+
+static int
+pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td)
+{
+	int			 error = 0;
+
+	/* XXX keep in sync with switch() below */
+	if (securelevel_gt(td->td_ucred, 2))
+		switch (cmd) {
+		case DIOCGETRULES:
+		case DIOCGETRULE:
+		case DIOCGETADDRS:
+		case DIOCGETADDR:
+		case DIOCGETSTATE:
+		case DIOCSETSTATUSIF:
+		case DIOCGETSTATUS:
+		case DIOCCLRSTATUS:
+		case DIOCNATLOOK:
+		case DIOCSETDEBUG:
+		case DIOCGETSTATES:
+		case DIOCGETTIMEOUT:
+		case DIOCCLRRULECTRS:
+		case DIOCGETLIMIT:
+		case DIOCGETALTQS:
+		case DIOCGETALTQ:
+		case DIOCGETQSTATS:
+		case DIOCGETRULESETS:
+		case DIOCGETRULESET:
+		case DIOCRGETTABLES:
+		case DIOCRGETTSTATS:
+		case DIOCRCLRTSTATS:
+		case DIOCRCLRADDRS:
+		case DIOCRADDADDRS:
+		case DIOCRDELADDRS:
+		case DIOCRSETADDRS:
+		case DIOCRGETADDRS:
+		case DIOCRGETASTATS:
+		case DIOCRCLRASTATS:
+		case DIOCRTSTADDRS:
+		case DIOCOSFPGET:
+		case DIOCGETSRCNODES:
+		case DIOCCLRSRCNODES:
+		case DIOCIGETIFACES:
+		case DIOCGIFSPEED:
+		case DIOCSETIFFLAG:
+		case DIOCCLRIFFLAG:
+			break;
+		case DIOCRCLRTABLES:
+		case DIOCRADDTABLES:
+		case DIOCRDELTABLES:
+		case DIOCRSETTFLAGS:
+			if (((struct pfioc_table *)addr)->pfrio_flags &
+			    PFR_FLAG_DUMMY)
+				break; /* dummy operation ok */
+			return (EPERM);
+		default:
+			return (EPERM);
+		}
+
+	if (!(flags & FWRITE))
+		switch (cmd) {
+		case DIOCGETRULES:
+		case DIOCGETADDRS:
+		case DIOCGETADDR:
+		case DIOCGETSTATE:
+		case DIOCGETSTATUS:
+		case DIOCGETSTATES:
+		case DIOCGETTIMEOUT:
+		case DIOCGETLIMIT:
+		case DIOCGETALTQS:
+		case DIOCGETALTQ:
+		case DIOCGETQSTATS:
+		case DIOCGETRULESETS:
+		case DIOCGETRULESET:
+		case DIOCNATLOOK:
+		case DIOCRGETTABLES:
+		case DIOCRGETTSTATS:
+		case DIOCRGETADDRS:
+		case DIOCRGETASTATS:
+		case DIOCRTSTADDRS:
+		case DIOCOSFPGET:
+		case DIOCGETSRCNODES:
+		case DIOCIGETIFACES:
+		case DIOCGIFSPEED:
+			break;
+		case DIOCRCLRTABLES:
+		case DIOCRADDTABLES:
+		case DIOCRDELTABLES:
+		case DIOCRCLRTSTATS:
+		case DIOCRCLRADDRS:
+		case DIOCRADDADDRS:
+		case DIOCRDELADDRS:
+		case DIOCRSETADDRS:
+		case DIOCRSETTFLAGS:
+			if (((struct pfioc_table *)addr)->pfrio_flags &
+			    PFR_FLAG_DUMMY) {
+				flags |= FWRITE; /* need write lock for dummy */
+				break; /* dummy operation ok */
+			}
+			return (EACCES);
+		case DIOCGETRULE:
+			if (((struct pfioc_rule *)addr)->action ==
+			    PF_GET_CLR_CNTR)
+				return (EACCES);
+			break;
+		default:
+			return (EACCES);
+		}
+
+	CURVNET_SET(TD_TO_VNET(td));
+
+	switch (cmd) {
+	case DIOCSTART:
+		sx_xlock(&pf_ioctl_lock);
+		if (V_pf_status.running)
+			error = EEXIST;
+		else {
+			int cpu;
+
+			error = hook_pf();
+			if (error) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: pfil registration failed\n"));
+				break;
+			}
+			V_pf_status.running = 1;
+			V_pf_status.since = time_second;
+
+			CPU_FOREACH(cpu)
+				V_pf_stateid[cpu] = time_second;
+
+			DPFPRINTF(PF_DEBUG_MISC, ("pf: started\n"));
+		}
+		break;
+
+	case DIOCSTOP:
+		sx_xlock(&pf_ioctl_lock);
+		if (!V_pf_status.running)
+			error = ENOENT;
+		else {
+			V_pf_status.running = 0;
+			error = dehook_pf();
+			if (error) {
+				V_pf_status.running = 1;
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: pfil unregistration failed\n"));
+			}
+			V_pf_status.since = time_second;
+			DPFPRINTF(PF_DEBUG_MISC, ("pf: stopped\n"));
+		}
+		break;
+
+	case DIOCADDRULE: {
+		struct pfioc_rule	*pr = (struct pfioc_rule *)addr;
+		struct pf_ruleset	*ruleset;
+		struct pf_rule		*rule, *tail;
+		struct pf_pooladdr	*pa;
+		struct pfi_kif		*kif = NULL;
+		int			 rs_num;
+
+		if (pr->rule.return_icmp >> 8 > ICMP_MAXTYPE) {
+			error = EINVAL;
+			break;
+		}
+#ifndef INET
+		if (pr->rule.af == AF_INET) {
+			error = EAFNOSUPPORT;
+			break;
+		}
+#endif /* INET */
+#ifndef INET6
+		if (pr->rule.af == AF_INET6) {
+			error = EAFNOSUPPORT;
+			break;
+		}
+#endif /* INET6 */
+
+		rule = malloc(sizeof(*rule), M_PFRULE, M_WAITOK);
+		bcopy(&pr->rule, rule, sizeof(struct pf_rule));
+		if (rule->ifname[0])
+			kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
+		rule->states_cur = counter_u64_alloc(M_WAITOK);
+		rule->states_tot = counter_u64_alloc(M_WAITOK);
+		rule->src_nodes = counter_u64_alloc(M_WAITOK);
+#ifndef __rtems__
+		rule->cuid = td->td_ucred->cr_ruid;
+		rule->cpid = td->td_proc ? td->td_proc->p_pid : 0;
+#else /* __rtems__ */
+		rule->cuid = BSD_DEFAULT_UID;
+		rule->cpid = BSD_DEFAULT_PID;
+#endif /* __rtems__ */
+		TAILQ_INIT(&rule->rpool.list);
+
+#define	ERROUT(x)	{ error = (x); goto DIOCADDRULE_error; }
+
+		PF_RULES_WLOCK();
+		pr->anchor[sizeof(pr->anchor) - 1] = 0;
+		ruleset = pf_find_ruleset(pr->anchor);
+		if (ruleset == NULL)
+			ERROUT(EINVAL);
+		rs_num = pf_get_ruleset_number(pr->rule.action);
+		if (rs_num >= PF_RULESET_MAX)
+			ERROUT(EINVAL);
+		if (pr->ticket != ruleset->rules[rs_num].inactive.ticket) {
+			DPFPRINTF(PF_DEBUG_MISC,
+			    ("ticket: %d != [%d]%d\n", pr->ticket, rs_num,
+			    ruleset->rules[rs_num].inactive.ticket));
+			ERROUT(EBUSY);
+		}
+		if (pr->pool_ticket != V_ticket_pabuf) {
+			DPFPRINTF(PF_DEBUG_MISC,
+			    ("pool_ticket: %d != %d\n", pr->pool_ticket,
+			    V_ticket_pabuf));
+			ERROUT(EBUSY);
+		}
+
+		tail = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr,
+		    pf_rulequeue);
+		if (tail)
+			rule->nr = tail->nr + 1;
+		else
+			rule->nr = 0;
+		if (rule->ifname[0]) {
+			rule->kif = pfi_kif_attach(kif, rule->ifname);
+			pfi_kif_ref(rule->kif);
+		} else
+			rule->kif = NULL;
+
+		if (rule->rtableid > 0 && rule->rtableid >= rt_numfibs)
+			error = EBUSY;
+
+#ifdef ALTQ
+		/* set queue IDs */
+		if (rule->qname[0] != 0) {
+			if ((rule->qid = pf_qname2qid(rule->qname)) == 0)
+				error = EBUSY;
+			else if (rule->pqname[0] != 0) {
+				if ((rule->pqid =
+				    pf_qname2qid(rule->pqname)) == 0)
+					error = EBUSY;
+			} else
+				rule->pqid = rule->qid;
+		}
+#endif
+		if (rule->tagname[0])
+			if ((rule->tag = pf_tagname2tag(rule->tagname)) == 0)
+				error = EBUSY;
+		if (rule->match_tagname[0])
+			if ((rule->match_tag =
+			    pf_tagname2tag(rule->match_tagname)) == 0)
+				error = EBUSY;
+		if (rule->rt && !rule->direction)
+			error = EINVAL;
+		if (!rule->log)
+			rule->logif = 0;
+		if (rule->logif >= PFLOGIFS_MAX)
+			error = EINVAL;
+		if (pf_addr_setup(ruleset, &rule->src.addr, rule->af))
+			error = ENOMEM;
+		if (pf_addr_setup(ruleset, &rule->dst.addr, rule->af))
+			error = ENOMEM;
+		if (pf_anchor_setup(rule, ruleset, pr->anchor_call))
+			error = EINVAL;
+		if (rule->scrub_flags & PFSTATE_SETPRIO &&
+		    (rule->set_prio[0] > PF_PRIO_MAX ||
+		    rule->set_prio[1] > PF_PRIO_MAX))
+			error = EINVAL;
+		TAILQ_FOREACH(pa, &V_pf_pabuf, entries)
+			if (pa->addr.type == PF_ADDR_TABLE) {
+				pa->addr.p.tbl = pfr_attach_table(ruleset,
+				    pa->addr.v.tblname);
+				if (pa->addr.p.tbl == NULL)
+					error = ENOMEM;
+			}
+
+		rule->overload_tbl = NULL;
+		if (rule->overload_tblname[0]) {
+			if ((rule->overload_tbl = pfr_attach_table(ruleset,
+			    rule->overload_tblname)) == NULL)
+				error = EINVAL;
+			else
+				rule->overload_tbl->pfrkt_flags |=
+				    PFR_TFLAG_ACTIVE;
+		}
+
+		pf_mv_pool(&V_pf_pabuf, &rule->rpool.list);
+		if (((((rule->action == PF_NAT) || (rule->action == PF_RDR) ||
+		    (rule->action == PF_BINAT)) && rule->anchor == NULL) ||
+		    (rule->rt > PF_FASTROUTE)) &&
+		    (TAILQ_FIRST(&rule->rpool.list) == NULL))
+			error = EINVAL;
+
+		if (error) {
+			pf_free_rule(rule);
+			PF_RULES_WUNLOCK();
+			break;
+		}
+
+		rule->rpool.cur = TAILQ_FIRST(&rule->rpool.list);
+		rule->evaluations = rule->packets[0] = rule->packets[1] =
+		    rule->bytes[0] = rule->bytes[1] = 0;
+		TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr,
+		    rule, entries);
+		ruleset->rules[rs_num].inactive.rcount++;
+		PF_RULES_WUNLOCK();
+		break;
+
+#undef ERROUT
+DIOCADDRULE_error:
+		PF_RULES_WUNLOCK();
+		counter_u64_free(rule->states_cur);
+		counter_u64_free(rule->states_tot);
+		counter_u64_free(rule->src_nodes);
+		free(rule, M_PFRULE);
+		if (kif)
+			free(kif, PFI_MTYPE);
+		break;
+	}
+
+	case DIOCGETRULES: {
+		struct pfioc_rule	*pr = (struct pfioc_rule *)addr;
+		struct pf_ruleset	*ruleset;
+		struct pf_rule		*tail;
+		int			 rs_num;
+
+		PF_RULES_WLOCK();
+		pr->anchor[sizeof(pr->anchor) - 1] = 0;
+		ruleset = pf_find_ruleset(pr->anchor);
+		if (ruleset == NULL) {
+			PF_RULES_WUNLOCK();
+			error = EINVAL;
+			break;
+		}
+		rs_num = pf_get_ruleset_number(pr->rule.action);
+		if (rs_num >= PF_RULESET_MAX) {
+			PF_RULES_WUNLOCK();
+			error = EINVAL;
+			break;
+		}
+		tail = TAILQ_LAST(ruleset->rules[rs_num].active.ptr,
+		    pf_rulequeue);
+		if (tail)
+			pr->nr = tail->nr + 1;
+		else
+			pr->nr = 0;
+		pr->ticket = ruleset->rules[rs_num].active.ticket;
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCGETRULE: {
+		struct pfioc_rule	*pr = (struct pfioc_rule *)addr;
+		struct pf_ruleset	*ruleset;
+		struct pf_rule		*rule;
+		int			 rs_num, i;
+
+		PF_RULES_WLOCK();
+		pr->anchor[sizeof(pr->anchor) - 1] = 0;
+		ruleset = pf_find_ruleset(pr->anchor);
+		if (ruleset == NULL) {
+			PF_RULES_WUNLOCK();
+			error = EINVAL;
+			break;
+		}
+		rs_num = pf_get_ruleset_number(pr->rule.action);
+		if (rs_num >= PF_RULESET_MAX) {
+			PF_RULES_WUNLOCK();
+			error = EINVAL;
+			break;
+		}
+		if (pr->ticket != ruleset->rules[rs_num].active.ticket) {
+			PF_RULES_WUNLOCK();
+			error = EBUSY;
+			break;
+		}
+		rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr);
+		while ((rule != NULL) && (rule->nr != pr->nr))
+			rule = TAILQ_NEXT(rule, entries);
+		if (rule == NULL) {
+			PF_RULES_WUNLOCK();
+			error = EBUSY;
+			break;
+		}
+		bcopy(rule, &pr->rule, sizeof(struct pf_rule));
+		pr->rule.u_states_cur = counter_u64_fetch(rule->states_cur);
+		pr->rule.u_states_tot = counter_u64_fetch(rule->states_tot);
+		pr->rule.u_src_nodes = counter_u64_fetch(rule->src_nodes);
+		if (pf_anchor_copyout(ruleset, rule, pr)) {
+			PF_RULES_WUNLOCK();
+			error = EBUSY;
+			break;
+		}
+		pf_addr_copyout(&pr->rule.src.addr);
+		pf_addr_copyout(&pr->rule.dst.addr);
+		for (i = 0; i < PF_SKIP_COUNT; ++i)
+			if (rule->skip[i].ptr == NULL)
+				pr->rule.skip[i].nr = -1;
+			else
+				pr->rule.skip[i].nr =
+				    rule->skip[i].ptr->nr;
+
+		if (pr->action == PF_GET_CLR_CNTR) {
+			rule->evaluations = 0;
+			rule->packets[0] = rule->packets[1] = 0;
+			rule->bytes[0] = rule->bytes[1] = 0;
+			counter_u64_zero(rule->states_tot);
+		}
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCCHANGERULE: {
+		struct pfioc_rule	*pcr = (struct pfioc_rule *)addr;
+		struct pf_ruleset	*ruleset;
+		struct pf_rule		*oldrule = NULL, *newrule = NULL;
+		struct pfi_kif		*kif = NULL;
+		struct pf_pooladdr	*pa;
+		u_int32_t		 nr = 0;
+		int			 rs_num;
+
+		if (pcr->action < PF_CHANGE_ADD_HEAD ||
+		    pcr->action > PF_CHANGE_GET_TICKET) {
+			error = EINVAL;
+			break;
+		}
+		if (pcr->rule.return_icmp >> 8 > ICMP_MAXTYPE) {
+			error = EINVAL;
+			break;
+		}
+
+		if (pcr->action != PF_CHANGE_REMOVE) {
+#ifndef INET
+			if (pcr->rule.af == AF_INET) {
+				error = EAFNOSUPPORT;
+				break;
+			}
+#endif /* INET */
+#ifndef INET6
+			if (pcr->rule.af == AF_INET6) {
+				error = EAFNOSUPPORT;
+				break;
+			}
+#endif /* INET6 */
+			newrule = malloc(sizeof(*newrule), M_PFRULE, M_WAITOK);
+			bcopy(&pcr->rule, newrule, sizeof(struct pf_rule));
+			if (newrule->ifname[0])
+				kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
+			newrule->states_cur = counter_u64_alloc(M_WAITOK);
+			newrule->states_tot = counter_u64_alloc(M_WAITOK);
+			newrule->src_nodes = counter_u64_alloc(M_WAITOK);
+#ifndef __rtems__
+			newrule->cuid = td->td_ucred->cr_ruid;
+			newrule->cpid = td->td_proc ? td->td_proc->p_pid : 0;
+#else /* __rtems__ */
+			newrule->cuid = BSD_DEFAULT_UID;
+			newrule->cpid = BSD_DEFAULT_PID;
+#endif /* __rtems__ */
+			TAILQ_INIT(&newrule->rpool.list);
+		}
+
+#define	ERROUT(x)	{ error = (x); goto DIOCCHANGERULE_error; }
+
+		PF_RULES_WLOCK();
+		if (!(pcr->action == PF_CHANGE_REMOVE ||
+		    pcr->action == PF_CHANGE_GET_TICKET) &&
+		    pcr->pool_ticket != V_ticket_pabuf)
+			ERROUT(EBUSY);
+
+		ruleset = pf_find_ruleset(pcr->anchor);
+		if (ruleset == NULL)
+			ERROUT(EINVAL);
+
+		rs_num = pf_get_ruleset_number(pcr->rule.action);
+		if (rs_num >= PF_RULESET_MAX)
+			ERROUT(EINVAL);
+
+		if (pcr->action == PF_CHANGE_GET_TICKET) {
+			pcr->ticket = ++ruleset->rules[rs_num].active.ticket;
+			ERROUT(0);
+		} else if (pcr->ticket !=
+			    ruleset->rules[rs_num].active.ticket)
+				ERROUT(EINVAL);
+
+		if (pcr->action != PF_CHANGE_REMOVE) {
+			if (newrule->ifname[0]) {
+				newrule->kif = pfi_kif_attach(kif,
+				    newrule->ifname);
+				pfi_kif_ref(newrule->kif);
+			} else
+				newrule->kif = NULL;
+
+			if (newrule->rtableid > 0 &&
+			    newrule->rtableid >= rt_numfibs)
+				error = EBUSY;
+
+#ifdef ALTQ
+			/* set queue IDs */
+			if (newrule->qname[0] != 0) {
+				if ((newrule->qid =
+				    pf_qname2qid(newrule->qname)) == 0)
+					error = EBUSY;
+				else if (newrule->pqname[0] != 0) {
+					if ((newrule->pqid =
+					    pf_qname2qid(newrule->pqname)) == 0)
+						error = EBUSY;
+				} else
+					newrule->pqid = newrule->qid;
+			}
+#endif /* ALTQ */
+			if (newrule->tagname[0])
+				if ((newrule->tag =
+				    pf_tagname2tag(newrule->tagname)) == 0)
+					error = EBUSY;
+			if (newrule->match_tagname[0])
+				if ((newrule->match_tag = pf_tagname2tag(
+				    newrule->match_tagname)) == 0)
+					error = EBUSY;
+			if (newrule->rt && !newrule->direction)
+				error = EINVAL;
+			if (!newrule->log)
+				newrule->logif = 0;
+			if (newrule->logif >= PFLOGIFS_MAX)
+				error = EINVAL;
+			if (pf_addr_setup(ruleset, &newrule->src.addr, newrule->af))
+				error = ENOMEM;
+			if (pf_addr_setup(ruleset, &newrule->dst.addr, newrule->af))
+				error = ENOMEM;
+			if (pf_anchor_setup(newrule, ruleset, pcr->anchor_call))
+				error = EINVAL;
+			TAILQ_FOREACH(pa, &V_pf_pabuf, entries)
+				if (pa->addr.type == PF_ADDR_TABLE) {
+					pa->addr.p.tbl =
+					    pfr_attach_table(ruleset,
+					    pa->addr.v.tblname);
+					if (pa->addr.p.tbl == NULL)
+						error = ENOMEM;
+				}
+
+			newrule->overload_tbl = NULL;
+			if (newrule->overload_tblname[0]) {
+				if ((newrule->overload_tbl = pfr_attach_table(
+				    ruleset, newrule->overload_tblname)) ==
+				    NULL)
+					error = EINVAL;
+				else
+					newrule->overload_tbl->pfrkt_flags |=
+					    PFR_TFLAG_ACTIVE;
+			}
+
+			pf_mv_pool(&V_pf_pabuf, &newrule->rpool.list);
+			if (((((newrule->action == PF_NAT) ||
+			    (newrule->action == PF_RDR) ||
+			    (newrule->action == PF_BINAT) ||
+			    (newrule->rt > PF_FASTROUTE)) &&
+			    !newrule->anchor)) &&
+			    (TAILQ_FIRST(&newrule->rpool.list) == NULL))
+				error = EINVAL;
+
+			if (error) {
+				pf_free_rule(newrule);
+				PF_RULES_WUNLOCK();
+				break;
+			}
+
+			newrule->rpool.cur = TAILQ_FIRST(&newrule->rpool.list);
+			newrule->evaluations = 0;
+			newrule->packets[0] = newrule->packets[1] = 0;
+			newrule->bytes[0] = newrule->bytes[1] = 0;
+		}
+		pf_empty_pool(&V_pf_pabuf);
+
+		if (pcr->action == PF_CHANGE_ADD_HEAD)
+			oldrule = TAILQ_FIRST(
+			    ruleset->rules[rs_num].active.ptr);
+		else if (pcr->action == PF_CHANGE_ADD_TAIL)
+			oldrule = TAILQ_LAST(
+			    ruleset->rules[rs_num].active.ptr, pf_rulequeue);
+		else {
+			oldrule = TAILQ_FIRST(
+			    ruleset->rules[rs_num].active.ptr);
+			while ((oldrule != NULL) && (oldrule->nr != pcr->nr))
+				oldrule = TAILQ_NEXT(oldrule, entries);
+			if (oldrule == NULL) {
+				if (newrule != NULL)
+					pf_free_rule(newrule);
+				PF_RULES_WUNLOCK();
+				error = EINVAL;
+				break;
+			}
+		}
+
+		if (pcr->action == PF_CHANGE_REMOVE) {
+			pf_unlink_rule(ruleset->rules[rs_num].active.ptr,
+			    oldrule);
+			ruleset->rules[rs_num].active.rcount--;
+		} else {
+			if (oldrule == NULL)
+				TAILQ_INSERT_TAIL(
+				    ruleset->rules[rs_num].active.ptr,
+				    newrule, entries);
+			else if (pcr->action == PF_CHANGE_ADD_HEAD ||
+			    pcr->action == PF_CHANGE_ADD_BEFORE)
+				TAILQ_INSERT_BEFORE(oldrule, newrule, entries);
+			else
+				TAILQ_INSERT_AFTER(
+				    ruleset->rules[rs_num].active.ptr,
+				    oldrule, newrule, entries);
+			ruleset->rules[rs_num].active.rcount++;
+		}
+
+		nr = 0;
+		TAILQ_FOREACH(oldrule,
+		    ruleset->rules[rs_num].active.ptr, entries)
+			oldrule->nr = nr++;
+
+		ruleset->rules[rs_num].active.ticket++;
+
+		pf_calc_skip_steps(ruleset->rules[rs_num].active.ptr);
+		pf_remove_if_empty_ruleset(ruleset);
+
+		PF_RULES_WUNLOCK();
+		break;
+
+#undef ERROUT
+DIOCCHANGERULE_error:
+		PF_RULES_WUNLOCK();
+		if (newrule != NULL) {
+			counter_u64_free(newrule->states_cur);
+			counter_u64_free(newrule->states_tot);
+			counter_u64_free(newrule->src_nodes);
+			free(newrule, M_PFRULE);
+		}
+		if (kif != NULL)
+			free(kif, PFI_MTYPE);
+		break;
+	}
+
+	case DIOCCLRSTATES: {
+		struct pf_state		*s;
+		struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr;
+		u_int			 i, killed = 0;
+
+		for (i = 0; i <= pf_hashmask; i++) {
+			struct pf_idhash *ih = &V_pf_idhash[i];
+
+relock_DIOCCLRSTATES:
+			PF_HASHROW_LOCK(ih);
+			LIST_FOREACH(s, &ih->states, entry)
+				if (!psk->psk_ifname[0] ||
+				    !strcmp(psk->psk_ifname,
+				    s->kif->pfik_name)) {
+					/*
+					 * Don't send out individual
+					 * delete messages.
+					 */
+					s->state_flags |= PFSTATE_NOSYNC;
+					pf_unlink_state(s, PF_ENTER_LOCKED);
+					killed++;
+					goto relock_DIOCCLRSTATES;
+				}
+			PF_HASHROW_UNLOCK(ih);
+		}
+		psk->psk_killed = killed;
+		if (pfsync_clear_states_ptr != NULL)
+			pfsync_clear_states_ptr(V_pf_status.hostid, psk->psk_ifname);
+		break;
+	}
+
+	case DIOCKILLSTATES: {
+		struct pf_state		*s;
+		struct pf_state_key	*sk;
+		struct pf_addr		*srcaddr, *dstaddr;
+		u_int16_t		 srcport, dstport;
+		struct pfioc_state_kill	*psk = (struct pfioc_state_kill *)addr;
+		u_int			 i, killed = 0;
+
+		if (psk->psk_pfcmp.id) {
+			if (psk->psk_pfcmp.creatorid == 0)
+				psk->psk_pfcmp.creatorid = V_pf_status.hostid;
+			if ((s = pf_find_state_byid(psk->psk_pfcmp.id,
+			    psk->psk_pfcmp.creatorid))) {
+				pf_unlink_state(s, PF_ENTER_LOCKED);
+				psk->psk_killed = 1;
+			}
+			break;
+		}
+
+		for (i = 0; i <= pf_hashmask; i++) {
+			struct pf_idhash *ih = &V_pf_idhash[i];
+
+relock_DIOCKILLSTATES:
+			PF_HASHROW_LOCK(ih);
+			LIST_FOREACH(s, &ih->states, entry) {
+				sk = s->key[PF_SK_WIRE];
+				if (s->direction == PF_OUT) {
+					srcaddr = &sk->addr[1];
+					dstaddr = &sk->addr[0];
+					srcport = sk->port[1];
+					dstport = sk->port[0];
+				} else {
+					srcaddr = &sk->addr[0];
+					dstaddr = &sk->addr[1];
+					srcport = sk->port[0];
+					dstport = sk->port[1];
+				}
+
+				if ((!psk->psk_af || sk->af == psk->psk_af)
+				    && (!psk->psk_proto || psk->psk_proto ==
+				    sk->proto) &&
+				    PF_MATCHA(psk->psk_src.neg,
+				    &psk->psk_src.addr.v.a.addr,
+				    &psk->psk_src.addr.v.a.mask,
+				    srcaddr, sk->af) &&
+				    PF_MATCHA(psk->psk_dst.neg,
+				    &psk->psk_dst.addr.v.a.addr,
+				    &psk->psk_dst.addr.v.a.mask,
+				    dstaddr, sk->af) &&
+				    (psk->psk_src.port_op == 0 ||
+				    pf_match_port(psk->psk_src.port_op,
+				    psk->psk_src.port[0], psk->psk_src.port[1],
+				    srcport)) &&
+				    (psk->psk_dst.port_op == 0 ||
+				    pf_match_port(psk->psk_dst.port_op,
+				    psk->psk_dst.port[0], psk->psk_dst.port[1],
+				    dstport)) &&
+				    (!psk->psk_label[0] ||
+				    (s->rule.ptr->label[0] &&
+				    !strcmp(psk->psk_label,
+				    s->rule.ptr->label))) &&
+				    (!psk->psk_ifname[0] ||
+				    !strcmp(psk->psk_ifname,
+				    s->kif->pfik_name))) {
+					pf_unlink_state(s, PF_ENTER_LOCKED);
+					killed++;
+					goto relock_DIOCKILLSTATES;
+				}
+			}
+			PF_HASHROW_UNLOCK(ih);
+		}
+		psk->psk_killed = killed;
+		break;
+	}
+
+	case DIOCADDSTATE: {
+		struct pfioc_state	*ps = (struct pfioc_state *)addr;
+		struct pfsync_state	*sp = &ps->state;
+
+		if (sp->timeout >= PFTM_MAX) {
+			error = EINVAL;
+			break;
+		}
+		if (pfsync_state_import_ptr != NULL) {
+			PF_RULES_RLOCK();
+			error = pfsync_state_import_ptr(sp, PFSYNC_SI_IOCTL);
+			PF_RULES_RUNLOCK();
+		} else
+			error = EOPNOTSUPP;
+		break;
+	}
+
+	case DIOCGETSTATE: {
+		struct pfioc_state	*ps = (struct pfioc_state *)addr;
+		struct pf_state		*s;
+
+		s = pf_find_state_byid(ps->state.id, ps->state.creatorid);
+		if (s == NULL) {
+			error = ENOENT;
+			break;
+		}
+
+		pfsync_state_export(&ps->state, s);
+		PF_STATE_UNLOCK(s);
+		break;
+	}
+
+	case DIOCGETSTATES: {
+		struct pfioc_states	*ps = (struct pfioc_states *)addr;
+		struct pf_state		*s;
+		struct pfsync_state	*pstore, *p;
+		int i, nr;
+
+		if (ps->ps_len == 0) {
+			nr = uma_zone_get_cur(V_pf_state_z);
+			ps->ps_len = sizeof(struct pfsync_state) * nr;
+			break;
+		}
+
+		p = pstore = malloc(ps->ps_len, M_TEMP, M_WAITOK);
+		nr = 0;
+
+		for (i = 0; i <= pf_hashmask; i++) {
+			struct pf_idhash *ih = &V_pf_idhash[i];
+
+			PF_HASHROW_LOCK(ih);
+			LIST_FOREACH(s, &ih->states, entry) {
+
+				if (s->timeout == PFTM_UNLINKED)
+					continue;
+
+				if ((nr+1) * sizeof(*p) > ps->ps_len) {
+					PF_HASHROW_UNLOCK(ih);
+					goto DIOCGETSTATES_full;
+				}
+				pfsync_state_export(p, s);
+				p++;
+				nr++;
+			}
+			PF_HASHROW_UNLOCK(ih);
+		}
+DIOCGETSTATES_full:
+		error = copyout(pstore, ps->ps_states,
+		    sizeof(struct pfsync_state) * nr);
+		if (error) {
+			free(pstore, M_TEMP);
+			break;
+		}
+		ps->ps_len = sizeof(struct pfsync_state) * nr;
+		free(pstore, M_TEMP);
+
+		break;
+	}
+
+	case DIOCGETSTATUS: {
+		struct pf_status *s = (struct pf_status *)addr;
+
+		PF_RULES_RLOCK();
+		s->running = V_pf_status.running;
+		s->since   = V_pf_status.since;
+		s->debug   = V_pf_status.debug;
+		s->hostid  = V_pf_status.hostid;
+		s->states  = V_pf_status.states;
+		s->src_nodes = V_pf_status.src_nodes;
+
+		for (int i = 0; i < PFRES_MAX; i++)
+			s->counters[i] =
+			    counter_u64_fetch(V_pf_status.counters[i]);
+		for (int i = 0; i < LCNT_MAX; i++)
+			s->lcounters[i] =
+			    counter_u64_fetch(V_pf_status.lcounters[i]);
+		for (int i = 0; i < FCNT_MAX; i++)
+			s->fcounters[i] =
+			    counter_u64_fetch(V_pf_status.fcounters[i]);
+		for (int i = 0; i < SCNT_MAX; i++)
+			s->scounters[i] =
+			    counter_u64_fetch(V_pf_status.scounters[i]);
+
+		bcopy(V_pf_status.ifname, s->ifname, IFNAMSIZ);
+		bcopy(V_pf_status.pf_chksum, s->pf_chksum,
+		    PF_MD5_DIGEST_LENGTH);
+
+		pfi_update_status(s->ifname, s);
+		PF_RULES_RUNLOCK();
+		break;
+	}
+
+	case DIOCSETSTATUSIF: {
+		struct pfioc_if	*pi = (struct pfioc_if *)addr;
+
+		if (pi->ifname[0] == 0) {
+			bzero(V_pf_status.ifname, IFNAMSIZ);
+			break;
+		}
+		PF_RULES_WLOCK();
+		strlcpy(V_pf_status.ifname, pi->ifname, IFNAMSIZ);
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCCLRSTATUS: {
+		PF_RULES_WLOCK();
+		for (int i = 0; i < PFRES_MAX; i++)
+			counter_u64_zero(V_pf_status.counters[i]);
+		for (int i = 0; i < FCNT_MAX; i++)
+			counter_u64_zero(V_pf_status.fcounters[i]);
+		for (int i = 0; i < SCNT_MAX; i++)
+			counter_u64_zero(V_pf_status.scounters[i]);
+		V_pf_status.since = time_second;
+		if (*V_pf_status.ifname)
+			pfi_update_status(V_pf_status.ifname, NULL);
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCNATLOOK: {
+		struct pfioc_natlook	*pnl = (struct pfioc_natlook *)addr;
+		struct pf_state_key	*sk;
+		struct pf_state		*state;
+		struct pf_state_key_cmp	 key;
+		int			 m = 0, direction = pnl->direction;
+		int			 sidx, didx;
+
+		/* NATLOOK src and dst are reversed, so reverse sidx/didx */
+		sidx = (direction == PF_IN) ? 1 : 0;
+		didx = (direction == PF_IN) ? 0 : 1;
+
+		if (!pnl->proto ||
+		    PF_AZERO(&pnl->saddr, pnl->af) ||
+		    PF_AZERO(&pnl->daddr, pnl->af) ||
+		    ((pnl->proto == IPPROTO_TCP ||
+		    pnl->proto == IPPROTO_UDP) &&
+		    (!pnl->dport || !pnl->sport)))
+			error = EINVAL;
+		else {
+			bzero(&key, sizeof(key));
+			key.af = pnl->af;
+			key.proto = pnl->proto;
+			PF_ACPY(&key.addr[sidx], &pnl->saddr, pnl->af);
+			key.port[sidx] = pnl->sport;
+			PF_ACPY(&key.addr[didx], &pnl->daddr, pnl->af);
+			key.port[didx] = pnl->dport;
+
+			state = pf_find_state_all(&key, direction, &m);
+
+			if (m > 1)
+				error = E2BIG;	/* more than one state */
+			else if (state != NULL) {
+				/* XXXGL: not locked read */
+				sk = state->key[sidx];
+				PF_ACPY(&pnl->rsaddr, &sk->addr[sidx], sk->af);
+				pnl->rsport = sk->port[sidx];
+				PF_ACPY(&pnl->rdaddr, &sk->addr[didx], sk->af);
+				pnl->rdport = sk->port[didx];
+			} else
+				error = ENOENT;
+		}
+		break;
+	}
+
+	case DIOCSETTIMEOUT: {
+		struct pfioc_tm	*pt = (struct pfioc_tm *)addr;
+		int		 old;
+
+		if (pt->timeout < 0 || pt->timeout >= PFTM_MAX ||
+		    pt->seconds < 0) {
+			error = EINVAL;
+			break;
+		}
+		PF_RULES_WLOCK();
+		old = V_pf_default_rule.timeout[pt->timeout];
+		if (pt->timeout == PFTM_INTERVAL && pt->seconds == 0)
+			pt->seconds = 1;
+		V_pf_default_rule.timeout[pt->timeout] = pt->seconds;
+		if (pt->timeout == PFTM_INTERVAL && pt->seconds < old)
+			wakeup(pf_purge_thread);
+		pt->seconds = old;
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCGETTIMEOUT: {
+		struct pfioc_tm	*pt = (struct pfioc_tm *)addr;
+
+		if (pt->timeout < 0 || pt->timeout >= PFTM_MAX) {
+			error = EINVAL;
+			break;
+		}
+		PF_RULES_RLOCK();
+		pt->seconds = V_pf_default_rule.timeout[pt->timeout];
+		PF_RULES_RUNLOCK();
+		break;
+	}
+
+	case DIOCGETLIMIT: {
+		struct pfioc_limit	*pl = (struct pfioc_limit *)addr;
+
+		if (pl->index < 0 || pl->index >= PF_LIMIT_MAX) {
+			error = EINVAL;
+			break;
+		}
+		PF_RULES_RLOCK();
+		pl->limit = V_pf_limits[pl->index].limit;
+		PF_RULES_RUNLOCK();
+		break;
+	}
+
+	case DIOCSETLIMIT: {
+		struct pfioc_limit	*pl = (struct pfioc_limit *)addr;
+		int			 old_limit;
+
+		PF_RULES_WLOCK();
+		if (pl->index < 0 || pl->index >= PF_LIMIT_MAX ||
+		    V_pf_limits[pl->index].zone == NULL) {
+			PF_RULES_WUNLOCK();
+			error = EINVAL;
+			break;
+		}
+		uma_zone_set_max(V_pf_limits[pl->index].zone, pl->limit);
+		old_limit = V_pf_limits[pl->index].limit;
+		V_pf_limits[pl->index].limit = pl->limit;
+		pl->limit = old_limit;
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCSETDEBUG: {
+		u_int32_t	*level = (u_int32_t *)addr;
+
+		PF_RULES_WLOCK();
+		V_pf_status.debug = *level;
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCCLRRULECTRS: {
+		/* obsoleted by DIOCGETRULE with action=PF_GET_CLR_CNTR */
+		struct pf_ruleset	*ruleset = &pf_main_ruleset;
+		struct pf_rule		*rule;
+
+		PF_RULES_WLOCK();
+		TAILQ_FOREACH(rule,
+		    ruleset->rules[PF_RULESET_FILTER].active.ptr, entries) {
+			rule->evaluations = 0;
+			rule->packets[0] = rule->packets[1] = 0;
+			rule->bytes[0] = rule->bytes[1] = 0;
+		}
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCGIFSPEED: {
+		struct pf_ifspeed	*psp = (struct pf_ifspeed *)addr;
+		struct pf_ifspeed	ps;
+		struct ifnet		*ifp;
+
+		if (psp->ifname[0] != 0) {
+			/* Can we completely trust user-land? */
+			strlcpy(ps.ifname, psp->ifname, IFNAMSIZ);
+			ifp = ifunit(ps.ifname);
+			if (ifp != NULL)
+				psp->baudrate = ifp->if_baudrate;
+			else
+				error = EINVAL;
+		} else
+			error = EINVAL;
+		break;
+	}
+
+#ifdef ALTQ
+	case DIOCSTARTALTQ: {
+		struct pf_altq		*altq;
+
+		PF_RULES_WLOCK();
+		/* enable all altq interfaces on active list */
+		TAILQ_FOREACH(altq, V_pf_altqs_active, entries) {
+			if (altq->qname[0] == 0 && (altq->local_flags &
+			    PFALTQ_FLAG_IF_REMOVED) == 0) {
+				error = pf_enable_altq(altq);
+				if (error != 0)
+					break;
+			}
+		}
+		if (error == 0)
+			V_pf_altq_running = 1;
+		PF_RULES_WUNLOCK();
+		DPFPRINTF(PF_DEBUG_MISC, ("altq: started\n"));
+		break;
+	}
+
+	case DIOCSTOPALTQ: {
+		struct pf_altq		*altq;
+
+		PF_RULES_WLOCK();
+		/* disable all altq interfaces on active list */
+		TAILQ_FOREACH(altq, V_pf_altqs_active, entries) {
+			if (altq->qname[0] == 0 && (altq->local_flags &
+			    PFALTQ_FLAG_IF_REMOVED) == 0) {
+				error = pf_disable_altq(altq);
+				if (error != 0)
+					break;
+			}
+		}
+		if (error == 0)
+			V_pf_altq_running = 0;
+		PF_RULES_WUNLOCK();
+		DPFPRINTF(PF_DEBUG_MISC, ("altq: stopped\n"));
+		break;
+	}
+
+	case DIOCADDALTQ: {
+		struct pfioc_altq	*pa = (struct pfioc_altq *)addr;
+		struct pf_altq		*altq, *a;
+		struct ifnet		*ifp;
+
+		altq = malloc(sizeof(*altq), M_PFALTQ, M_WAITOK);
+		bcopy(&pa->altq, altq, sizeof(struct pf_altq));
+		altq->local_flags = 0;
+
+		PF_RULES_WLOCK();
+		if (pa->ticket != V_ticket_altqs_inactive) {
+			PF_RULES_WUNLOCK();
+			free(altq, M_PFALTQ);
+			error = EBUSY;
+			break;
+		}
+
+		/*
+		 * if this is for a queue, find the discipline and
+		 * copy the necessary fields
+		 */
+		if (altq->qname[0] != 0) {
+			if ((altq->qid = pf_qname2qid(altq->qname)) == 0) {
+				PF_RULES_WUNLOCK();
+				error = EBUSY;
+				free(altq, M_PFALTQ);
+				break;
+			}
+			altq->altq_disc = NULL;
+			TAILQ_FOREACH(a, V_pf_altqs_inactive, entries) {
+				if (strncmp(a->ifname, altq->ifname,
+				    IFNAMSIZ) == 0 && a->qname[0] == 0) {
+					altq->altq_disc = a->altq_disc;
+					break;
+				}
+			}
+		}
+
+		if ((ifp = ifunit(altq->ifname)) == NULL)
+			altq->local_flags |= PFALTQ_FLAG_IF_REMOVED;
+		else
+			error = altq_add(altq);
+
+		if (error) {
+			PF_RULES_WUNLOCK();
+			free(altq, M_PFALTQ);
+			break;
+		}
+
+		TAILQ_INSERT_TAIL(V_pf_altqs_inactive, altq, entries);
+		bcopy(altq, &pa->altq, sizeof(struct pf_altq));
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCGETALTQS: {
+		struct pfioc_altq	*pa = (struct pfioc_altq *)addr;
+		struct pf_altq		*altq;
+
+		PF_RULES_RLOCK();
+		pa->nr = 0;
+		TAILQ_FOREACH(altq, V_pf_altqs_active, entries)
+			pa->nr++;
+		pa->ticket = V_ticket_altqs_active;
+		PF_RULES_RUNLOCK();
+		break;
+	}
+
+	case DIOCGETALTQ: {
+		struct pfioc_altq	*pa = (struct pfioc_altq *)addr;
+		struct pf_altq		*altq;
+		u_int32_t		 nr;
+
+		PF_RULES_RLOCK();
+		if (pa->ticket != V_ticket_altqs_active) {
+			PF_RULES_RUNLOCK();
+			error = EBUSY;
+			break;
+		}
+		nr = 0;
+		altq = TAILQ_FIRST(V_pf_altqs_active);
+		while ((altq != NULL) && (nr < pa->nr)) {
+			altq = TAILQ_NEXT(altq, entries);
+			nr++;
+		}
+		if (altq == NULL) {
+			PF_RULES_RUNLOCK();
+			error = EBUSY;
+			break;
+		}
+		bcopy(altq, &pa->altq, sizeof(struct pf_altq));
+		PF_RULES_RUNLOCK();
+		break;
+	}
+
+	case DIOCCHANGEALTQ:
+		/* CHANGEALTQ not supported yet! */
+		error = ENODEV;
+		break;
+
+	case DIOCGETQSTATS: {
+		struct pfioc_qstats	*pq = (struct pfioc_qstats *)addr;
+		struct pf_altq		*altq;
+		u_int32_t		 nr;
+		int			 nbytes;
+
+		PF_RULES_RLOCK();
+		if (pq->ticket != V_ticket_altqs_active) {
+			PF_RULES_RUNLOCK();
+			error = EBUSY;
+			break;
+		}
+		nbytes = pq->nbytes;
+		nr = 0;
+		altq = TAILQ_FIRST(V_pf_altqs_active);
+		while ((altq != NULL) && (nr < pq->nr)) {
+			altq = TAILQ_NEXT(altq, entries);
+			nr++;
+		}
+		if (altq == NULL) {
+			PF_RULES_RUNLOCK();
+			error = EBUSY;
+			break;
+		}
+
+		if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) != 0) {
+			PF_RULES_RUNLOCK();
+			error = ENXIO;
+			break;
+		}
+		PF_RULES_RUNLOCK();
+		error = altq_getqstats(altq, pq->buf, &nbytes);
+		if (error == 0) {
+			pq->scheduler = altq->scheduler;
+			pq->nbytes = nbytes;
+		}
+		break;
+	}
+#endif /* ALTQ */
+
+	case DIOCBEGINADDRS: {
+		struct pfioc_pooladdr	*pp = (struct pfioc_pooladdr *)addr;
+
+		PF_RULES_WLOCK();
+		pf_empty_pool(&V_pf_pabuf);
+		pp->ticket = ++V_ticket_pabuf;
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCADDADDR: {
+		struct pfioc_pooladdr	*pp = (struct pfioc_pooladdr *)addr;
+		struct pf_pooladdr	*pa;
+		struct pfi_kif		*kif = NULL;
+
+#ifndef INET
+		if (pp->af == AF_INET) {
+			error = EAFNOSUPPORT;
+			break;
+		}
+#endif /* INET */
+#ifndef INET6
+		if (pp->af == AF_INET6) {
+			error = EAFNOSUPPORT;
+			break;
+		}
+#endif /* INET6 */
+		if (pp->addr.addr.type != PF_ADDR_ADDRMASK &&
+		    pp->addr.addr.type != PF_ADDR_DYNIFTL &&
+		    pp->addr.addr.type != PF_ADDR_TABLE) {
+			error = EINVAL;
+			break;
+		}
+		pa = malloc(sizeof(*pa), M_PFRULE, M_WAITOK);
+		bcopy(&pp->addr, pa, sizeof(struct pf_pooladdr));
+		if (pa->ifname[0])
+			kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
+		PF_RULES_WLOCK();
+		if (pp->ticket != V_ticket_pabuf) {
+			PF_RULES_WUNLOCK();
+			if (pa->ifname[0])
+				free(kif, PFI_MTYPE);
+			free(pa, M_PFRULE);
+			error = EBUSY;
+			break;
+		}
+		if (pa->ifname[0]) {
+			pa->kif = pfi_kif_attach(kif, pa->ifname);
+			pfi_kif_ref(pa->kif);
+		} else
+			pa->kif = NULL;
+		if (pa->addr.type == PF_ADDR_DYNIFTL && ((error =
+		    pfi_dynaddr_setup(&pa->addr, pp->af)) != 0)) {
+			if (pa->ifname[0])
+				pfi_kif_unref(pa->kif);
+			PF_RULES_WUNLOCK();
+			free(pa, M_PFRULE);
+			break;
+		}
+		TAILQ_INSERT_TAIL(&V_pf_pabuf, pa, entries);
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCGETADDRS: {
+		struct pfioc_pooladdr	*pp = (struct pfioc_pooladdr *)addr;
+		struct pf_pool		*pool;
+		struct pf_pooladdr	*pa;
+
+		PF_RULES_RLOCK();
+		pp->nr = 0;
+		pool = pf_get_pool(pp->anchor, pp->ticket, pp->r_action,
+		    pp->r_num, 0, 1, 0);
+		if (pool == NULL) {
+			PF_RULES_RUNLOCK();
+			error = EBUSY;
+			break;
+		}
+		TAILQ_FOREACH(pa, &pool->list, entries)
+			pp->nr++;
+		PF_RULES_RUNLOCK();
+		break;
+	}
+
+	case DIOCGETADDR: {
+		struct pfioc_pooladdr	*pp = (struct pfioc_pooladdr *)addr;
+		struct pf_pool		*pool;
+		struct pf_pooladdr	*pa;
+		u_int32_t		 nr = 0;
+
+		PF_RULES_RLOCK();
+		pool = pf_get_pool(pp->anchor, pp->ticket, pp->r_action,
+		    pp->r_num, 0, 1, 1);
+		if (pool == NULL) {
+			PF_RULES_RUNLOCK();
+			error = EBUSY;
+			break;
+		}
+		pa = TAILQ_FIRST(&pool->list);
+		while ((pa != NULL) && (nr < pp->nr)) {
+			pa = TAILQ_NEXT(pa, entries);
+			nr++;
+		}
+		if (pa == NULL) {
+			PF_RULES_RUNLOCK();
+			error = EBUSY;
+			break;
+		}
+		bcopy(pa, &pp->addr, sizeof(struct pf_pooladdr));
+		pf_addr_copyout(&pp->addr.addr);
+		PF_RULES_RUNLOCK();
+		break;
+	}
+
+	case DIOCCHANGEADDR: {
+		struct pfioc_pooladdr	*pca = (struct pfioc_pooladdr *)addr;
+		struct pf_pool		*pool;
+		struct pf_pooladdr	*oldpa = NULL, *newpa = NULL;
+		struct pf_ruleset	*ruleset;
+		struct pfi_kif		*kif = NULL;
+
+		if (pca->action < PF_CHANGE_ADD_HEAD ||
+		    pca->action > PF_CHANGE_REMOVE) {
+			error = EINVAL;
+			break;
+		}
+		if (pca->addr.addr.type != PF_ADDR_ADDRMASK &&
+		    pca->addr.addr.type != PF_ADDR_DYNIFTL &&
+		    pca->addr.addr.type != PF_ADDR_TABLE) {
+			error = EINVAL;
+			break;
+		}
+
+		if (pca->action != PF_CHANGE_REMOVE) {
+#ifndef INET
+			if (pca->af == AF_INET) {
+				error = EAFNOSUPPORT;
+				break;
+			}
+#endif /* INET */
+#ifndef INET6
+			if (pca->af == AF_INET6) {
+				error = EAFNOSUPPORT;
+				break;
+			}
+#endif /* INET6 */
+			newpa = malloc(sizeof(*newpa), M_PFRULE, M_WAITOK);
+			bcopy(&pca->addr, newpa, sizeof(struct pf_pooladdr));
+			if (newpa->ifname[0])
+				kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
+			newpa->kif = NULL;
+		}
+
+#define	ERROUT(x)	{ error = (x); goto DIOCCHANGEADDR_error; }
+		PF_RULES_WLOCK();
+		ruleset = pf_find_ruleset(pca->anchor);
+		if (ruleset == NULL)
+			ERROUT(EBUSY);
+
+		pool = pf_get_pool(pca->anchor, pca->ticket, pca->r_action,
+		    pca->r_num, pca->r_last, 1, 1);
+		if (pool == NULL)
+			ERROUT(EBUSY);
+
+		if (pca->action != PF_CHANGE_REMOVE) {
+			if (newpa->ifname[0]) {
+				newpa->kif = pfi_kif_attach(kif, newpa->ifname);
+				pfi_kif_ref(newpa->kif);
+				kif = NULL;
+			}
+
+			switch (newpa->addr.type) {
+			case PF_ADDR_DYNIFTL:
+				error = pfi_dynaddr_setup(&newpa->addr,
+				    pca->af);
+				break;
+			case PF_ADDR_TABLE:
+				newpa->addr.p.tbl = pfr_attach_table(ruleset,
+				    newpa->addr.v.tblname);
+				if (newpa->addr.p.tbl == NULL)
+					error = ENOMEM;
+				break;
+			}
+			if (error)
+				goto DIOCCHANGEADDR_error;
+		}
+
+		switch (pca->action) {
+		case PF_CHANGE_ADD_HEAD:
+			oldpa = TAILQ_FIRST(&pool->list);
+			break;
+		case PF_CHANGE_ADD_TAIL:
+			oldpa = TAILQ_LAST(&pool->list, pf_palist);
+			break;
+		default:
+			oldpa = TAILQ_FIRST(&pool->list);
+			for (int i = 0; oldpa && i < pca->nr; i++)
+				oldpa = TAILQ_NEXT(oldpa, entries);
+
+			if (oldpa == NULL)
+				ERROUT(EINVAL);
+		}
+
+		if (pca->action == PF_CHANGE_REMOVE) {
+			TAILQ_REMOVE(&pool->list, oldpa, entries);
+			switch (oldpa->addr.type) {
+			case PF_ADDR_DYNIFTL:
+				pfi_dynaddr_remove(oldpa->addr.p.dyn);
+				break;
+			case PF_ADDR_TABLE:
+				pfr_detach_table(oldpa->addr.p.tbl);
+				break;
+			}
+			if (oldpa->kif)
+				pfi_kif_unref(oldpa->kif);
+			free(oldpa, M_PFRULE);
+		} else {
+			if (oldpa == NULL)
+				TAILQ_INSERT_TAIL(&pool->list, newpa, entries);
+			else if (pca->action == PF_CHANGE_ADD_HEAD ||
+			    pca->action == PF_CHANGE_ADD_BEFORE)
+				TAILQ_INSERT_BEFORE(oldpa, newpa, entries);
+			else
+				TAILQ_INSERT_AFTER(&pool->list, oldpa,
+				    newpa, entries);
+		}
+
+		pool->cur = TAILQ_FIRST(&pool->list);
+		PF_ACPY(&pool->counter, &pool->cur->addr.v.a.addr, pca->af);
+		PF_RULES_WUNLOCK();
+		break;
+
+#undef ERROUT
+DIOCCHANGEADDR_error:
+		if (newpa->kif)
+			pfi_kif_unref(newpa->kif);
+		PF_RULES_WUNLOCK();
+		if (newpa != NULL)
+			free(newpa, M_PFRULE);
+		if (kif != NULL)
+			free(kif, PFI_MTYPE);
+		break;
+	}
+
+	case DIOCGETRULESETS: {
+		struct pfioc_ruleset	*pr = (struct pfioc_ruleset *)addr;
+		struct pf_ruleset	*ruleset;
+		struct pf_anchor	*anchor;
+
+		PF_RULES_RLOCK();
+		pr->path[sizeof(pr->path) - 1] = 0;
+		if ((ruleset = pf_find_ruleset(pr->path)) == NULL) {
+			PF_RULES_RUNLOCK();
+			error = ENOENT;
+			break;
+		}
+		pr->nr = 0;
+		if (ruleset->anchor == NULL) {
+			/* XXX kludge for pf_main_ruleset */
+			RB_FOREACH(anchor, pf_anchor_global, &V_pf_anchors)
+				if (anchor->parent == NULL)
+					pr->nr++;
+		} else {
+			RB_FOREACH(anchor, pf_anchor_node,
+			    &ruleset->anchor->children)
+				pr->nr++;
+		}
+		PF_RULES_RUNLOCK();
+		break;
+	}
+
+	case DIOCGETRULESET: {
+		struct pfioc_ruleset	*pr = (struct pfioc_ruleset *)addr;
+		struct pf_ruleset	*ruleset;
+		struct pf_anchor	*anchor;
+		u_int32_t		 nr = 0;
+
+		PF_RULES_RLOCK();
+		pr->path[sizeof(pr->path) - 1] = 0;
+		if ((ruleset = pf_find_ruleset(pr->path)) == NULL) {
+			PF_RULES_RUNLOCK();
+			error = ENOENT;
+			break;
+		}
+		pr->name[0] = 0;
+		if (ruleset->anchor == NULL) {
+			/* XXX kludge for pf_main_ruleset */
+			RB_FOREACH(anchor, pf_anchor_global, &V_pf_anchors)
+				if (anchor->parent == NULL && nr++ == pr->nr) {
+					strlcpy(pr->name, anchor->name,
+					    sizeof(pr->name));
+					break;
+				}
+		} else {
+			RB_FOREACH(anchor, pf_anchor_node,
+			    &ruleset->anchor->children)
+				if (nr++ == pr->nr) {
+					strlcpy(pr->name, anchor->name,
+					    sizeof(pr->name));
+					break;
+				}
+		}
+		if (!pr->name[0])
+			error = EBUSY;
+		PF_RULES_RUNLOCK();
+		break;
+	}
+
+	case DIOCRCLRTABLES: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+
+		if (io->pfrio_esize != 0) {
+			error = ENODEV;
+			break;
+		}
+		PF_RULES_WLOCK();
+		error = pfr_clr_tables(&io->pfrio_table, &io->pfrio_ndel,
+		    io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCRADDTABLES: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+		struct pfr_table *pfrts;
+		size_t totlen;
+
+		if (io->pfrio_esize != sizeof(struct pfr_table)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = io->pfrio_size * sizeof(struct pfr_table);
+		pfrts = malloc(totlen, M_TEMP, M_WAITOK);
+		error = copyin(io->pfrio_buffer, pfrts, totlen);
+		if (error) {
+			free(pfrts, M_TEMP);
+			break;
+		}
+		PF_RULES_WLOCK();
+		error = pfr_add_tables(pfrts, io->pfrio_size,
+		    &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		PF_RULES_WUNLOCK();
+		free(pfrts, M_TEMP);
+		break;
+	}
+
+	case DIOCRDELTABLES: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+		struct pfr_table *pfrts;
+		size_t totlen;
+
+		if (io->pfrio_esize != sizeof(struct pfr_table)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = io->pfrio_size * sizeof(struct pfr_table);
+		pfrts = malloc(totlen, M_TEMP, M_WAITOK);
+		error = copyin(io->pfrio_buffer, pfrts, totlen);
+		if (error) {
+			free(pfrts, M_TEMP);
+			break;
+		}
+		PF_RULES_WLOCK();
+		error = pfr_del_tables(pfrts, io->pfrio_size,
+		    &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		PF_RULES_WUNLOCK();
+		free(pfrts, M_TEMP);
+		break;
+	}
+
+	case DIOCRGETTABLES: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+		struct pfr_table *pfrts;
+		size_t totlen;
+
+		if (io->pfrio_esize != sizeof(struct pfr_table)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = io->pfrio_size * sizeof(struct pfr_table);
+		pfrts = malloc(totlen, M_TEMP, M_WAITOK);
+		PF_RULES_RLOCK();
+		error = pfr_get_tables(&io->pfrio_table, pfrts,
+		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		PF_RULES_RUNLOCK();
+		if (error == 0)
+			error = copyout(pfrts, io->pfrio_buffer, totlen);
+		free(pfrts, M_TEMP);
+		break;
+	}
+
+	case DIOCRGETTSTATS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+		struct pfr_tstats *pfrtstats;
+		size_t totlen;
+
+		if (io->pfrio_esize != sizeof(struct pfr_tstats)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = io->pfrio_size * sizeof(struct pfr_tstats);
+		pfrtstats = malloc(totlen, M_TEMP, M_WAITOK);
+		PF_RULES_WLOCK();
+		error = pfr_get_tstats(&io->pfrio_table, pfrtstats,
+		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		PF_RULES_WUNLOCK();
+		if (error == 0)
+			error = copyout(pfrtstats, io->pfrio_buffer, totlen);
+		free(pfrtstats, M_TEMP);
+		break;
+	}
+
+	case DIOCRCLRTSTATS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+		struct pfr_table *pfrts;
+		size_t totlen;
+
+		if (io->pfrio_esize != sizeof(struct pfr_table)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = io->pfrio_size * sizeof(struct pfr_table);
+		pfrts = malloc(totlen, M_TEMP, M_WAITOK);
+		error = copyin(io->pfrio_buffer, pfrts, totlen);
+		if (error) {
+			free(pfrts, M_TEMP);
+			break;
+		}
+		PF_RULES_WLOCK();
+		error = pfr_clr_tstats(pfrts, io->pfrio_size,
+		    &io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		PF_RULES_WUNLOCK();
+		free(pfrts, M_TEMP);
+		break;
+	}
+
+	case DIOCRSETTFLAGS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+		struct pfr_table *pfrts;
+		size_t totlen;
+
+		if (io->pfrio_esize != sizeof(struct pfr_table)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = io->pfrio_size * sizeof(struct pfr_table);
+		pfrts = malloc(totlen, M_TEMP, M_WAITOK);
+		error = copyin(io->pfrio_buffer, pfrts, totlen);
+		if (error) {
+			free(pfrts, M_TEMP);
+			break;
+		}
+		PF_RULES_WLOCK();
+		error = pfr_set_tflags(pfrts, io->pfrio_size,
+		    io->pfrio_setflag, io->pfrio_clrflag, &io->pfrio_nchange,
+		    &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		PF_RULES_WUNLOCK();
+		free(pfrts, M_TEMP);
+		break;
+	}
+
+	case DIOCRCLRADDRS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+
+		if (io->pfrio_esize != 0) {
+			error = ENODEV;
+			break;
+		}
+		PF_RULES_WLOCK();
+		error = pfr_clr_addrs(&io->pfrio_table, &io->pfrio_ndel,
+		    io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCRADDADDRS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+		struct pfr_addr *pfras;
+		size_t totlen;
+
+		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = io->pfrio_size * sizeof(struct pfr_addr);
+		pfras = malloc(totlen, M_TEMP, M_WAITOK);
+		error = copyin(io->pfrio_buffer, pfras, totlen);
+		if (error) {
+			free(pfras, M_TEMP);
+			break;
+		}
+		PF_RULES_WLOCK();
+		error = pfr_add_addrs(&io->pfrio_table, pfras,
+		    io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags |
+		    PFR_FLAG_USERIOCTL);
+		PF_RULES_WUNLOCK();
+		if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
+			error = copyout(pfras, io->pfrio_buffer, totlen);
+		free(pfras, M_TEMP);
+		break;
+	}
+
+	case DIOCRDELADDRS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+		struct pfr_addr *pfras;
+		size_t totlen;
+
+		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = io->pfrio_size * sizeof(struct pfr_addr);
+		pfras = malloc(totlen, M_TEMP, M_WAITOK);
+		error = copyin(io->pfrio_buffer, pfras, totlen);
+		if (error) {
+			free(pfras, M_TEMP);
+			break;
+		}
+		PF_RULES_WLOCK();
+		error = pfr_del_addrs(&io->pfrio_table, pfras,
+		    io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags |
+		    PFR_FLAG_USERIOCTL);
+		PF_RULES_WUNLOCK();
+		if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
+			error = copyout(pfras, io->pfrio_buffer, totlen);
+		free(pfras, M_TEMP);
+		break;
+	}
+
+	case DIOCRSETADDRS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+		struct pfr_addr *pfras;
+		size_t totlen, count;
+
+		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
+			error = ENODEV;
+			break;
+		}
+		count = max(io->pfrio_size, io->pfrio_size2);
+		totlen = count * sizeof(struct pfr_addr);
+		pfras = malloc(totlen, M_TEMP, M_WAITOK);
+		error = copyin(io->pfrio_buffer, pfras, totlen);
+		if (error) {
+			free(pfras, M_TEMP);
+			break;
+		}
+		PF_RULES_WLOCK();
+		error = pfr_set_addrs(&io->pfrio_table, pfras,
+		    io->pfrio_size, &io->pfrio_size2, &io->pfrio_nadd,
+		    &io->pfrio_ndel, &io->pfrio_nchange, io->pfrio_flags |
+		    PFR_FLAG_USERIOCTL, 0);
+		PF_RULES_WUNLOCK();
+		if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
+			error = copyout(pfras, io->pfrio_buffer, totlen);
+		free(pfras, M_TEMP);
+		break;
+	}
+
+	case DIOCRGETADDRS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+		struct pfr_addr *pfras;
+		size_t totlen;
+
+		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = io->pfrio_size * sizeof(struct pfr_addr);
+		pfras = malloc(totlen, M_TEMP, M_WAITOK);
+		PF_RULES_RLOCK();
+		error = pfr_get_addrs(&io->pfrio_table, pfras,
+		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		PF_RULES_RUNLOCK();
+		if (error == 0)
+			error = copyout(pfras, io->pfrio_buffer, totlen);
+		free(pfras, M_TEMP);
+		break;
+	}
+
+	case DIOCRGETASTATS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+		struct pfr_astats *pfrastats;
+		size_t totlen;
+
+		if (io->pfrio_esize != sizeof(struct pfr_astats)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = io->pfrio_size * sizeof(struct pfr_astats);
+		pfrastats = malloc(totlen, M_TEMP, M_WAITOK);
+		PF_RULES_RLOCK();
+		error = pfr_get_astats(&io->pfrio_table, pfrastats,
+		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		PF_RULES_RUNLOCK();
+		if (error == 0)
+			error = copyout(pfrastats, io->pfrio_buffer, totlen);
+		free(pfrastats, M_TEMP);
+		break;
+	}
+
+	case DIOCRCLRASTATS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+		struct pfr_addr *pfras;
+		size_t totlen;
+
+		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = io->pfrio_size * sizeof(struct pfr_addr);
+		pfras = malloc(totlen, M_TEMP, M_WAITOK);
+		error = copyin(io->pfrio_buffer, pfras, totlen);
+		if (error) {
+			free(pfras, M_TEMP);
+			break;
+		}
+		PF_RULES_WLOCK();
+		error = pfr_clr_astats(&io->pfrio_table, pfras,
+		    io->pfrio_size, &io->pfrio_nzero, io->pfrio_flags |
+		    PFR_FLAG_USERIOCTL);
+		PF_RULES_WUNLOCK();
+		if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
+			error = copyout(pfras, io->pfrio_buffer, totlen);
+		free(pfras, M_TEMP);
+		break;
+	}
+
+	case DIOCRTSTADDRS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+		struct pfr_addr *pfras;
+		size_t totlen;
+
+		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = io->pfrio_size * sizeof(struct pfr_addr);
+		pfras = malloc(totlen, M_TEMP, M_WAITOK);
+		error = copyin(io->pfrio_buffer, pfras, totlen);
+		if (error) {
+			free(pfras, M_TEMP);
+			break;
+		}
+		PF_RULES_RLOCK();
+		error = pfr_tst_addrs(&io->pfrio_table, pfras,
+		    io->pfrio_size, &io->pfrio_nmatch, io->pfrio_flags |
+		    PFR_FLAG_USERIOCTL);
+		PF_RULES_RUNLOCK();
+		if (error == 0)
+			error = copyout(pfras, io->pfrio_buffer, totlen);
+		free(pfras, M_TEMP);
+		break;
+	}
+
+	case DIOCRINADEFINE: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+		struct pfr_addr *pfras;
+		size_t totlen;
+
+		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = io->pfrio_size * sizeof(struct pfr_addr);
+		pfras = malloc(totlen, M_TEMP, M_WAITOK);
+		error = copyin(io->pfrio_buffer, pfras, totlen);
+		if (error) {
+			free(pfras, M_TEMP);
+			break;
+		}
+		PF_RULES_WLOCK();
+		error = pfr_ina_define(&io->pfrio_table, pfras,
+		    io->pfrio_size, &io->pfrio_nadd, &io->pfrio_naddr,
+		    io->pfrio_ticket, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		PF_RULES_WUNLOCK();
+		free(pfras, M_TEMP);
+		break;
+	}
+
+	case DIOCOSFPADD: {
+		struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr;
+		PF_RULES_WLOCK();
+		error = pf_osfp_add(io);
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCOSFPGET: {
+		struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr;
+		PF_RULES_RLOCK();
+		error = pf_osfp_get(io);
+		PF_RULES_RUNLOCK();
+		break;
+	}
+
+	case DIOCXBEGIN: {
+		struct pfioc_trans	*io = (struct pfioc_trans *)addr;
+		struct pfioc_trans_e	*ioes, *ioe;
+		size_t			 totlen;
+		int			 i;
+
+		if (io->esize != sizeof(*ioe)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = sizeof(struct pfioc_trans_e) * io->size;
+		ioes = malloc(totlen, M_TEMP, M_WAITOK);
+		error = copyin(io->array, ioes, totlen);
+		if (error) {
+			free(ioes, M_TEMP);
+			break;
+		}
+		PF_RULES_WLOCK();
+		for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
+			switch (ioe->rs_num) {
+#ifdef ALTQ
+			case PF_RULESET_ALTQ:
+				if (ioe->anchor[0]) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					error = EINVAL;
+					goto fail;
+				}
+				if ((error = pf_begin_altq(&ioe->ticket))) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					goto fail;
+				}
+				break;
+#endif /* ALTQ */
+			case PF_RULESET_TABLE:
+			    {
+				struct pfr_table table;
+
+				bzero(&table, sizeof(table));
+				strlcpy(table.pfrt_anchor, ioe->anchor,
+				    sizeof(table.pfrt_anchor));
+				if ((error = pfr_ina_begin(&table,
+				    &ioe->ticket, NULL, 0))) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					goto fail;
+				}
+				break;
+			    }
+			default:
+				if ((error = pf_begin_rules(&ioe->ticket,
+				    ioe->rs_num, ioe->anchor))) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					goto fail;
+				}
+				break;
+			}
+		}
+		PF_RULES_WUNLOCK();
+		error = copyout(ioes, io->array, totlen);
+		free(ioes, M_TEMP);
+		break;
+	}
+
+	case DIOCXROLLBACK: {
+		struct pfioc_trans	*io = (struct pfioc_trans *)addr;
+		struct pfioc_trans_e	*ioe, *ioes;
+		size_t			 totlen;
+		int			 i;
+
+		if (io->esize != sizeof(*ioe)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = sizeof(struct pfioc_trans_e) * io->size;
+		ioes = malloc(totlen, M_TEMP, M_WAITOK);
+		error = copyin(io->array, ioes, totlen);
+		if (error) {
+			free(ioes, M_TEMP);
+			break;
+		}
+		PF_RULES_WLOCK();
+		for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
+			switch (ioe->rs_num) {
+#ifdef ALTQ
+			case PF_RULESET_ALTQ:
+				if (ioe->anchor[0]) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					error = EINVAL;
+					goto fail;
+				}
+				if ((error = pf_rollback_altq(ioe->ticket))) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					goto fail; /* really bad */
+				}
+				break;
+#endif /* ALTQ */
+			case PF_RULESET_TABLE:
+			    {
+				struct pfr_table table;
+
+				bzero(&table, sizeof(table));
+				strlcpy(table.pfrt_anchor, ioe->anchor,
+				    sizeof(table.pfrt_anchor));
+				if ((error = pfr_ina_rollback(&table,
+				    ioe->ticket, NULL, 0))) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					goto fail; /* really bad */
+				}
+				break;
+			    }
+			default:
+				if ((error = pf_rollback_rules(ioe->ticket,
+				    ioe->rs_num, ioe->anchor))) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					goto fail; /* really bad */
+				}
+				break;
+			}
+		}
+		PF_RULES_WUNLOCK();
+		free(ioes, M_TEMP);
+		break;
+	}
+
+	case DIOCXCOMMIT: {
+		struct pfioc_trans	*io = (struct pfioc_trans *)addr;
+		struct pfioc_trans_e	*ioe, *ioes;
+		struct pf_ruleset	*rs;
+		size_t			 totlen;
+		int			 i;
+
+		if (io->esize != sizeof(*ioe)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = sizeof(struct pfioc_trans_e) * io->size;
+		ioes = malloc(totlen, M_TEMP, M_WAITOK);
+		error = copyin(io->array, ioes, totlen);
+		if (error) {
+			free(ioes, M_TEMP);
+			break;
+		}
+		PF_RULES_WLOCK();
+		/* First makes sure everything will succeed. */
+		for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
+			switch (ioe->rs_num) {
+#ifdef ALTQ
+			case PF_RULESET_ALTQ:
+				if (ioe->anchor[0]) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					error = EINVAL;
+					goto fail;
+				}
+				if (!V_altqs_inactive_open || ioe->ticket !=
+				    V_ticket_altqs_inactive) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					error = EBUSY;
+					goto fail;
+				}
+				break;
+#endif /* ALTQ */
+			case PF_RULESET_TABLE:
+				rs = pf_find_ruleset(ioe->anchor);
+				if (rs == NULL || !rs->topen || ioe->ticket !=
+				    rs->tticket) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					error = EBUSY;
+					goto fail;
+				}
+				break;
+			default:
+				if (ioe->rs_num < 0 || ioe->rs_num >=
+				    PF_RULESET_MAX) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					error = EINVAL;
+					goto fail;
+				}
+				rs = pf_find_ruleset(ioe->anchor);
+				if (rs == NULL ||
+				    !rs->rules[ioe->rs_num].inactive.open ||
+				    rs->rules[ioe->rs_num].inactive.ticket !=
+				    ioe->ticket) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					error = EBUSY;
+					goto fail;
+				}
+				break;
+			}
+		}
+		/* Now do the commit - no errors should happen here. */
+		for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
+			switch (ioe->rs_num) {
+#ifdef ALTQ
+			case PF_RULESET_ALTQ:
+				if ((error = pf_commit_altq(ioe->ticket))) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					goto fail; /* really bad */
+				}
+				break;
+#endif /* ALTQ */
+			case PF_RULESET_TABLE:
+			    {
+				struct pfr_table table;
+
+				bzero(&table, sizeof(table));
+				strlcpy(table.pfrt_anchor, ioe->anchor,
+				    sizeof(table.pfrt_anchor));
+				if ((error = pfr_ina_commit(&table,
+				    ioe->ticket, NULL, NULL, 0))) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					goto fail; /* really bad */
+				}
+				break;
+			    }
+			default:
+				if ((error = pf_commit_rules(ioe->ticket,
+				    ioe->rs_num, ioe->anchor))) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					goto fail; /* really bad */
+				}
+				break;
+			}
+		}
+		PF_RULES_WUNLOCK();
+		free(ioes, M_TEMP);
+		break;
+	}
+
+	case DIOCGETSRCNODES: {
+		struct pfioc_src_nodes	*psn = (struct pfioc_src_nodes *)addr;
+		struct pf_srchash	*sh;
+		struct pf_src_node	*n, *p, *pstore;
+		uint32_t		 i, nr = 0;
+
+		if (psn->psn_len == 0) {
+			for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask;
+			    i++, sh++) {
+				PF_HASHROW_LOCK(sh);
+				LIST_FOREACH(n, &sh->nodes, entry)
+					nr++;
+				PF_HASHROW_UNLOCK(sh);
+			}
+			psn->psn_len = sizeof(struct pf_src_node) * nr;
+			break;
+		}
+
+		p = pstore = malloc(psn->psn_len, M_TEMP, M_WAITOK);
+		for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask;
+		    i++, sh++) {
+		    PF_HASHROW_LOCK(sh);
+		    LIST_FOREACH(n, &sh->nodes, entry) {
+			int	secs = time_uptime, diff;
+
+			if ((nr + 1) * sizeof(*p) > (unsigned)psn->psn_len)
+				break;
+
+			bcopy(n, p, sizeof(struct pf_src_node));
+			if (n->rule.ptr != NULL)
+				p->rule.nr = n->rule.ptr->nr;
+			p->creation = secs - p->creation;
+			if (p->expire > secs)
+				p->expire -= secs;
+			else
+				p->expire = 0;
+
+			/* Adjust the connection rate estimate. */
+			diff = secs - n->conn_rate.last;
+			if (diff >= n->conn_rate.seconds)
+				p->conn_rate.count = 0;
+			else
+				p->conn_rate.count -=
+				    n->conn_rate.count * diff /
+				    n->conn_rate.seconds;
+			p++;
+			nr++;
+		    }
+		    PF_HASHROW_UNLOCK(sh);
+		}
+		error = copyout(pstore, psn->psn_src_nodes,
+		    sizeof(struct pf_src_node) * nr);
+		if (error) {
+			free(pstore, M_TEMP);
+			break;
+		}
+		psn->psn_len = sizeof(struct pf_src_node) * nr;
+		free(pstore, M_TEMP);
+		break;
+	}
+
+	case DIOCCLRSRCNODES: {
+
+		pf_clear_srcnodes(NULL);
+		pf_purge_expired_src_nodes();
+		break;
+	}
+
+	case DIOCKILLSRCNODES:
+		pf_kill_srcnodes((struct pfioc_src_node_kill *)addr);
+		break;
+
+	case DIOCSETHOSTID: {
+		u_int32_t	*hostid = (u_int32_t *)addr;
+
+		PF_RULES_WLOCK();
+		if (*hostid == 0)
+			V_pf_status.hostid = arc4random();
+		else
+			V_pf_status.hostid = *hostid;
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCOSFPFLUSH:
+		PF_RULES_WLOCK();
+		pf_osfp_flush();
+		PF_RULES_WUNLOCK();
+		break;
+
+	case DIOCIGETIFACES: {
+		struct pfioc_iface *io = (struct pfioc_iface *)addr;
+		struct pfi_kif *ifstore;
+		size_t bufsiz;
+
+		if (io->pfiio_esize != sizeof(struct pfi_kif)) {
+			error = ENODEV;
+			break;
+		}
+
+		bufsiz = io->pfiio_size * sizeof(struct pfi_kif);
+		ifstore = malloc(bufsiz, M_TEMP, M_WAITOK);
+		PF_RULES_RLOCK();
+		pfi_get_ifaces(io->pfiio_name, ifstore, &io->pfiio_size);
+		PF_RULES_RUNLOCK();
+		error = copyout(ifstore, io->pfiio_buffer, bufsiz);
+		free(ifstore, M_TEMP);
+		break;
+	}
+
+	case DIOCSETIFFLAG: {
+		struct pfioc_iface *io = (struct pfioc_iface *)addr;
+
+		PF_RULES_WLOCK();
+		error = pfi_set_flags(io->pfiio_name, io->pfiio_flags);
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCCLRIFFLAG: {
+		struct pfioc_iface *io = (struct pfioc_iface *)addr;
+
+		PF_RULES_WLOCK();
+		error = pfi_clear_flags(io->pfiio_name, io->pfiio_flags);
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	default:
+		error = ENODEV;
+		break;
+	}
+fail:
+	if (sx_xlocked(&pf_ioctl_lock))
+		sx_xunlock(&pf_ioctl_lock);
+	CURVNET_RESTORE();
+
+	return (error);
+}
+
+void
+pfsync_state_export(struct pfsync_state *sp, struct pf_state *st)
+{
+	bzero(sp, sizeof(struct pfsync_state));
+
+	/* copy from state key */
+	sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
+	sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
+	sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
+	sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
+	sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
+	sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
+	sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
+	sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];
+	sp->proto = st->key[PF_SK_WIRE]->proto;
+	sp->af = st->key[PF_SK_WIRE]->af;
+
+	/* copy from state */
+	strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname));
+	bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
+	sp->creation = htonl(time_uptime - st->creation);
+	sp->expire = pf_state_expires(st);
+	if (sp->expire <= time_uptime)
+		sp->expire = htonl(0);
+	else
+		sp->expire = htonl(sp->expire - time_uptime);
+
+	sp->direction = st->direction;
+	sp->log = st->log;
+	sp->timeout = st->timeout;
+	sp->state_flags = st->state_flags;
+	if (st->src_node)
+		sp->sync_flags |= PFSYNC_FLAG_SRCNODE;
+	if (st->nat_src_node)
+		sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE;
+
+	sp->id = st->id;
+	sp->creatorid = st->creatorid;
+	pf_state_peer_hton(&st->src, &sp->src);
+	pf_state_peer_hton(&st->dst, &sp->dst);
+
+	if (st->rule.ptr == NULL)
+		sp->rule = htonl(-1);
+	else
+		sp->rule = htonl(st->rule.ptr->nr);
+	if (st->anchor.ptr == NULL)
+		sp->anchor = htonl(-1);
+	else
+		sp->anchor = htonl(st->anchor.ptr->nr);
+	if (st->nat_rule.ptr == NULL)
+		sp->nat_rule = htonl(-1);
+	else
+		sp->nat_rule = htonl(st->nat_rule.ptr->nr);
+
+	pf_state_counter_hton(st->packets[0], sp->packets[0]);
+	pf_state_counter_hton(st->packets[1], sp->packets[1]);
+	pf_state_counter_hton(st->bytes[0], sp->bytes[0]);
+	pf_state_counter_hton(st->bytes[1], sp->bytes[1]);
+
+}
+
+static void
+pf_tbladdr_copyout(struct pf_addr_wrap *aw)
+{
+	struct pfr_ktable *kt;
+
+	KASSERT(aw->type == PF_ADDR_TABLE, ("%s: type %u", __func__, aw->type));
+
+	kt = aw->p.tbl;
+	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
+		kt = kt->pfrkt_root;
+	aw->p.tbl = NULL;
+	aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
+		kt->pfrkt_cnt : -1;
+}
+
+/*
+ * XXX - Check for version missmatch!!!
+ */
+static void
+pf_clear_states(void)
+{
+	struct pf_state	*s;
+	u_int i;
+
+	for (i = 0; i <= pf_hashmask; i++) {
+		struct pf_idhash *ih = &V_pf_idhash[i];
+relock:
+		PF_HASHROW_LOCK(ih);
+		LIST_FOREACH(s, &ih->states, entry) {
+			s->timeout = PFTM_PURGE;
+			/* Don't send out individual delete messages. */
+			s->state_flags |= PFSTATE_NOSYNC;
+			pf_unlink_state(s, PF_ENTER_LOCKED);
+			goto relock;
+		}
+		PF_HASHROW_UNLOCK(ih);
+	}
+}
+
+static int
+pf_clear_tables(void)
+{
+	struct pfioc_table io;
+	int error;
+
+	bzero(&io, sizeof(io));
+
+	error = pfr_clr_tables(&io.pfrio_table, &io.pfrio_ndel,
+	    io.pfrio_flags);
+
+	return (error);
+}
+
+static void
+pf_clear_srcnodes(struct pf_src_node *n)
+{
+	struct pf_state *s;
+	int i;
+
+	for (i = 0; i <= pf_hashmask; i++) {
+		struct pf_idhash *ih = &V_pf_idhash[i];
+
+		PF_HASHROW_LOCK(ih);
+		LIST_FOREACH(s, &ih->states, entry) {
+			if (n == NULL || n == s->src_node)
+				s->src_node = NULL;
+			if (n == NULL || n == s->nat_src_node)
+				s->nat_src_node = NULL;
+		}
+		PF_HASHROW_UNLOCK(ih);
+	}
+
+	if (n == NULL) {
+		struct pf_srchash *sh;
+
+		for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask;
+		    i++, sh++) {
+			PF_HASHROW_LOCK(sh);
+			LIST_FOREACH(n, &sh->nodes, entry) {
+				n->expire = 1;
+				n->states = 0;
+			}
+			PF_HASHROW_UNLOCK(sh);
+		}
+	} else {
+		/* XXX: hash slot should already be locked here. */
+		n->expire = 1;
+		n->states = 0;
+	}
+}
+
+static void
+pf_kill_srcnodes(struct pfioc_src_node_kill *psnk)
+{
+	struct pf_src_node_list	 kill;
+
+	LIST_INIT(&kill);
+	for (int i = 0; i <= pf_srchashmask; i++) {
+		struct pf_srchash *sh = &V_pf_srchash[i];
+		struct pf_src_node *sn, *tmp;
+
+		PF_HASHROW_LOCK(sh);
+		LIST_FOREACH_SAFE(sn, &sh->nodes, entry, tmp)
+			if (PF_MATCHA(psnk->psnk_src.neg,
+			      &psnk->psnk_src.addr.v.a.addr,
+			      &psnk->psnk_src.addr.v.a.mask,
+			      &sn->addr, sn->af) &&
+			    PF_MATCHA(psnk->psnk_dst.neg,
+			      &psnk->psnk_dst.addr.v.a.addr,
+			      &psnk->psnk_dst.addr.v.a.mask,
+			      &sn->raddr, sn->af)) {
+				pf_unlink_src_node(sn);
+				LIST_INSERT_HEAD(&kill, sn, entry);
+				sn->expire = 1;
+			}
+		PF_HASHROW_UNLOCK(sh);
+	}
+
+	for (int i = 0; i <= pf_hashmask; i++) {
+		struct pf_idhash *ih = &V_pf_idhash[i];
+		struct pf_state *s;
+
+		PF_HASHROW_LOCK(ih);
+		LIST_FOREACH(s, &ih->states, entry) {
+			if (s->src_node && s->src_node->expire == 1)
+				s->src_node = NULL;
+			if (s->nat_src_node && s->nat_src_node->expire == 1)
+				s->nat_src_node = NULL;
+		}
+		PF_HASHROW_UNLOCK(ih);
+	}
+
+	psnk->psnk_killed = pf_free_src_nodes(&kill);
+}
+
+/*
+ * XXX - Check for version missmatch!!!
+ */
+
+/*
+ * Duplicate pfctl -Fa operation to get rid of as much as we can.
+ */
+static int
+shutdown_pf(void)
+{
+	int error = 0;
+	u_int32_t t[5];
+	char nn = '\0';
+
+	do {
+		if ((error = pf_begin_rules(&t[0], PF_RULESET_SCRUB, &nn))
+		    != 0) {
+			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: SCRUB\n"));
+			break;
+		}
+		if ((error = pf_begin_rules(&t[1], PF_RULESET_FILTER, &nn))
+		    != 0) {
+			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: FILTER\n"));
+			break;		/* XXX: rollback? */
+		}
+		if ((error = pf_begin_rules(&t[2], PF_RULESET_NAT, &nn))
+		    != 0) {
+			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: NAT\n"));
+			break;		/* XXX: rollback? */
+		}
+		if ((error = pf_begin_rules(&t[3], PF_RULESET_BINAT, &nn))
+		    != 0) {
+			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: BINAT\n"));
+			break;		/* XXX: rollback? */
+		}
+		if ((error = pf_begin_rules(&t[4], PF_RULESET_RDR, &nn))
+		    != 0) {
+			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: RDR\n"));
+			break;		/* XXX: rollback? */
+		}
+
+		/* XXX: these should always succeed here */
+		pf_commit_rules(t[0], PF_RULESET_SCRUB, &nn);
+		pf_commit_rules(t[1], PF_RULESET_FILTER, &nn);
+		pf_commit_rules(t[2], PF_RULESET_NAT, &nn);
+		pf_commit_rules(t[3], PF_RULESET_BINAT, &nn);
+		pf_commit_rules(t[4], PF_RULESET_RDR, &nn);
+
+		if ((error = pf_clear_tables()) != 0)
+			break;
+
+#ifdef ALTQ
+		if ((error = pf_begin_altq(&t[0])) != 0) {
+			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: ALTQ\n"));
+			break;
+		}
+		pf_commit_altq(t[0]);
+#endif
+
+		pf_clear_states();
+
+		pf_clear_srcnodes(NULL);
+
+		/* status does not use malloced mem so no need to cleanup */
+		/* fingerprints and interfaces have their own cleanup code */
+
+		/* Free counters last as we updated them during shutdown. */
+		counter_u64_free(V_pf_default_rule.states_cur);
+		counter_u64_free(V_pf_default_rule.states_tot);
+		counter_u64_free(V_pf_default_rule.src_nodes);
+
+		for (int i = 0; i < PFRES_MAX; i++)
+			counter_u64_free(V_pf_status.counters[i]);
+		for (int i = 0; i < LCNT_MAX; i++)
+			counter_u64_free(V_pf_status.lcounters[i]);
+		for (int i = 0; i < FCNT_MAX; i++)
+			counter_u64_free(V_pf_status.fcounters[i]);
+		for (int i = 0; i < SCNT_MAX; i++)
+			counter_u64_free(V_pf_status.scounters[i]);
+	} while(0);
+
+	return (error);
+}
+
+#ifdef INET
+static int
+pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
+    struct inpcb *inp)
+{
+	int chk;
+
+	chk = pf_test(PF_IN, ifp, m, inp);
+	if (chk && *m) {
+		m_freem(*m);
+		*m = NULL;
+	}
+
+	if (chk != PF_PASS)
+		return (EACCES);
+	return (0);
+}
+
+static int
+pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
+    struct inpcb *inp)
+{
+	int chk;
+
+	chk = pf_test(PF_OUT, ifp, m, inp);
+	if (chk && *m) {
+		m_freem(*m);
+		*m = NULL;
+	}
+
+	if (chk != PF_PASS)
+		return (EACCES);
+	return (0);
+}
+#endif
+
+#ifdef INET6
+static int
+pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
+    struct inpcb *inp)
+{
+	int chk;
+
+	/*
+	 * In case of loopback traffic IPv6 uses the real interface in
+	 * order to support scoped addresses. In order to support stateful
+	 * filtering we have change this to lo0 as it is the case in IPv4.
+	 */
+	CURVNET_SET(ifp->if_vnet);
+	chk = pf_test6(PF_IN, (*m)->m_flags & M_LOOP ? V_loif : ifp, m, inp);
+	CURVNET_RESTORE();
+	if (chk && *m) {
+		m_freem(*m);
+		*m = NULL;
+	}
+	if (chk != PF_PASS)
+		return (EACCES);
+	return (0);
+}
+
+static int
+pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
+    struct inpcb *inp)
+{
+	int chk;
+
+	CURVNET_SET(ifp->if_vnet);
+	chk = pf_test6(PF_OUT, ifp, m, inp);
+	CURVNET_RESTORE();
+	if (chk && *m) {
+		m_freem(*m);
+		*m = NULL;
+	}
+	if (chk != PF_PASS)
+		return (EACCES);
+	return (0);
+}
+#endif /* INET6 */
+
+static int
+hook_pf(void)
+{
+#ifdef INET
+	struct pfil_head *pfh_inet;
+#endif
+#ifdef INET6
+	struct pfil_head *pfh_inet6;
+#endif
+
+	if (V_pf_pfil_hooked)
+		return (0);
+
+#ifdef INET
+	pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET);
+	if (pfh_inet == NULL)
+		return (ESRCH); /* XXX */
+	pfil_add_hook(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet);
+	pfil_add_hook(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet);
+#endif
+#ifdef INET6
+	pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6);
+	if (pfh_inet6 == NULL) {
+#ifdef INET
+		pfil_remove_hook(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK,
+		    pfh_inet);
+		pfil_remove_hook(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK,
+		    pfh_inet);
+#endif
+		return (ESRCH); /* XXX */
+	}
+	pfil_add_hook(pf_check6_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet6);
+	pfil_add_hook(pf_check6_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet6);
+#endif
+
+	V_pf_pfil_hooked = 1;
+	return (0);
+}
+
+static int
+dehook_pf(void)
+{
+#ifdef INET
+	struct pfil_head *pfh_inet;
+#endif
+#ifdef INET6
+	struct pfil_head *pfh_inet6;
+#endif
+
+	if (V_pf_pfil_hooked == 0)
+		return (0);
+
+#ifdef INET
+	pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET);
+	if (pfh_inet == NULL)
+		return (ESRCH); /* XXX */
+	pfil_remove_hook(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK,
+	    pfh_inet);
+	pfil_remove_hook(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK,
+	    pfh_inet);
+#endif
+#ifdef INET6
+	pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6);
+	if (pfh_inet6 == NULL)
+		return (ESRCH); /* XXX */
+	pfil_remove_hook(pf_check6_in, NULL, PFIL_IN | PFIL_WAITOK,
+	    pfh_inet6);
+	pfil_remove_hook(pf_check6_out, NULL, PFIL_OUT | PFIL_WAITOK,
+	    pfh_inet6);
+#endif
+
+	V_pf_pfil_hooked = 0;
+	return (0);
+}
+
+static void
+pf_load_vnet(void)
+{
+	VNET_ITERATOR_DECL(vnet_iter);
+
+	VNET_LIST_RLOCK();
+	VNET_FOREACH(vnet_iter) {
+		CURVNET_SET(vnet_iter);
+		V_pf_pfil_hooked = 0;
+		TAILQ_INIT(&V_pf_tags);
+		TAILQ_INIT(&V_pf_qids);
+		CURVNET_RESTORE();
+	}
+	VNET_LIST_RUNLOCK();
+
+	pfattach_vnet();
+	V_pf_vnet_active = 1;
+}
+
+static int
+pf_load(void)
+{
+	int error;
+
+	rw_init(&pf_rules_lock, "pf rulesets");
+	sx_init(&pf_ioctl_lock, "pf ioctl");
+
+	pf_mtag_initialize();
+
+	pf_dev = make_dev(&pf_cdevsw, 0, 0, 0, 0600, PF_NAME);
+	if (pf_dev == NULL)
+		return (ENOMEM);
+
+	pf_end_threads = 0;
+	error = kproc_create(pf_purge_thread, NULL, NULL, 0, 0, "pf purge");
+	if (error != 0)
+		return (error);
+
+	pfi_initialize();
+
+	return (0);
+}
+
+static void
+pf_unload_vnet(void)
+{
+	int error;
+
+	V_pf_vnet_active = 0;
+	V_pf_status.running = 0;
+	swi_remove(V_pf_swi_cookie);
+	error = dehook_pf();
+	if (error) {
+		/*
+		 * Should not happen!
+		 * XXX Due to error code ESRCH, kldunload will show
+		 * a message like 'No such process'.
+		 */
+		printf("%s : pfil unregisteration fail\n", __FUNCTION__);
+		return;
+	}
+
+	pf_unload_vnet_purge();
+
+	PF_RULES_WLOCK();
+	shutdown_pf();
+	PF_RULES_WUNLOCK();
+
+	pf_normalize_cleanup();
+	PF_RULES_WLOCK();
+	pfi_cleanup_vnet();
+	PF_RULES_WUNLOCK();
+	pfr_cleanup();
+	pf_osfp_flush();
+	pf_cleanup();
+	if (IS_DEFAULT_VNET(curvnet))
+		pf_mtag_cleanup();
+}
+
+static int
+pf_unload(void)
+{
+	int error = 0;
+
+	pf_end_threads = 1;
+	while (pf_end_threads < 2) {
+		wakeup_one(pf_purge_thread);
+		rw_sleep(pf_purge_thread, &pf_rules_lock, 0, "pftmo", 0);
+	}
+
+	if (pf_dev != NULL)
+		destroy_dev(pf_dev);
+
+	pfi_cleanup();
+
+	rw_destroy(&pf_rules_lock);
+	sx_destroy(&pf_ioctl_lock);
+
+	return (error);
+}
+
+static void
+vnet_pf_init(void *unused __unused)
+{
+
+	pf_load_vnet();
+}
+VNET_SYSINIT(vnet_pf_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD, 
+    vnet_pf_init, NULL);
+
+static void
+vnet_pf_uninit(const void *unused __unused)
+{
+
+	pf_unload_vnet();
+} 
+VNET_SYSUNINIT(vnet_pf_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD,
+    vnet_pf_uninit, NULL);
+
+
+static int
+pf_modevent(module_t mod, int type, void *data)
+{
+	int error = 0;
+
+	switch(type) {
+	case MOD_LOAD:
+		error = pf_load();
+		break;
+	case MOD_QUIESCE:
+		/*
+		 * Module should not be unloaded due to race conditions.
+		 */
+		error = EBUSY;
+		break;
+	case MOD_UNLOAD:
+		error = pf_unload();
+		break;
+	default:
+		error = EINVAL;
+		break;
+	}
+
+	return (error);
+}
+
+static moduledata_t pf_mod = {
+	"pf",
+	pf_modevent,
+	0
+};
+
+DECLARE_MODULE(pf, pf_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_SECOND);
+MODULE_VERSION(pf, PF_MODVER);
diff --git a/freebsd/sys/netpfil/pf/pf_lb.c b/freebsd/sys/netpfil/pf/pf_lb.c
new file mode 100644
index 00000000..033c3879
--- /dev/null
+++ b/freebsd/sys/netpfil/pf/pf_lb.c
@@ -0,0 +1,681 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2001 Daniel Hartmeier
+ * Copyright (c) 2002 - 2008 Henning Brauer
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    - Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    - Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Effort sponsored in part by the Defense Advanced Research Projects
+ * Agency (DARPA) and Air Force Research Laboratory, Air Force
+ * Materiel Command, USAF, under agreement number F30602-01-2-0537.
+ *
+ *	$OpenBSD: pf_lb.c,v 1.2 2009/02/12 02:13:15 sthen Exp $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/local/opt_pf.h>
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/vnet.h>
+#include <net/pfvar.h>
+#include <net/if_pflog.h>
+
+#define DPFPRINTF(n, x)	if (V_pf_status.debug >= (n)) printf x
+
+static void		 pf_hash(struct pf_addr *, struct pf_addr *,
+			    struct pf_poolhashkey *, sa_family_t);
+static struct pf_rule	*pf_match_translation(struct pf_pdesc *, struct mbuf *,
+			    int, int, struct pfi_kif *,
+			    struct pf_addr *, u_int16_t, struct pf_addr *,
+			    uint16_t, int, struct pf_anchor_stackframe *);
+static int pf_get_sport(sa_family_t, uint8_t, struct pf_rule *,
+    struct pf_addr *, uint16_t, struct pf_addr *, uint16_t, struct pf_addr *,
+    uint16_t *, uint16_t, uint16_t, struct pf_src_node **);
+
+#define mix(a,b,c) \
+	do {					\
+		a -= b; a -= c; a ^= (c >> 13);	\
+		b -= c; b -= a; b ^= (a << 8);	\
+		c -= a; c -= b; c ^= (b >> 13);	\
+		a -= b; a -= c; a ^= (c >> 12);	\
+		b -= c; b -= a; b ^= (a << 16);	\
+		c -= a; c -= b; c ^= (b >> 5);	\
+		a -= b; a -= c; a ^= (c >> 3);	\
+		b -= c; b -= a; b ^= (a << 10);	\
+		c -= a; c -= b; c ^= (b >> 15);	\
+	} while (0)
+
+/*
+ * hash function based on bridge_hash in if_bridge.c
+ */
+static void
+pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
+    struct pf_poolhashkey *key, sa_family_t af)
+{
+	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		a += inaddr->addr32[0];
+		b += key->key32[1];
+		mix(a, b, c);
+		hash->addr32[0] = c + key->key32[2];
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		a += inaddr->addr32[0];
+		b += inaddr->addr32[2];
+		mix(a, b, c);
+		hash->addr32[0] = c;
+		a += inaddr->addr32[1];
+		b += inaddr->addr32[3];
+		c += key->key32[1];
+		mix(a, b, c);
+		hash->addr32[1] = c;
+		a += inaddr->addr32[2];
+		b += inaddr->addr32[1];
+		c += key->key32[2];
+		mix(a, b, c);
+		hash->addr32[2] = c;
+		a += inaddr->addr32[3];
+		b += inaddr->addr32[0];
+		c += key->key32[3];
+		mix(a, b, c);
+		hash->addr32[3] = c;
+		break;
+#endif /* INET6 */
+	}
+}
+
+static struct pf_rule *
+pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
+    int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport,
+    struct pf_addr *daddr, uint16_t dport, int rs_num,
+    struct pf_anchor_stackframe *anchor_stack)
+{
+	struct pf_rule		*r, *rm = NULL;
+	struct pf_ruleset	*ruleset = NULL;
+	int			 tag = -1;
+	int			 rtableid = -1;
+	int			 asd = 0;
+
+	r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
+	while (r && rm == NULL) {
+		struct pf_rule_addr	*src = NULL, *dst = NULL;
+		struct pf_addr_wrap	*xdst = NULL;
+
+		if (r->action == PF_BINAT && direction == PF_IN) {
+			src = &r->dst;
+			if (r->rpool.cur != NULL)
+				xdst = &r->rpool.cur->addr;
+		} else {
+			src = &r->src;
+			dst = &r->dst;
+		}
+
+		r->evaluations++;
+		if (pfi_kif_match(r->kif, kif) == r->ifnot)
+			r = r->skip[PF_SKIP_IFP].ptr;
+		else if (r->direction && r->direction != direction)
+			r = r->skip[PF_SKIP_DIR].ptr;
+		else if (r->af && r->af != pd->af)
+			r = r->skip[PF_SKIP_AF].ptr;
+		else if (r->proto && r->proto != pd->proto)
+			r = r->skip[PF_SKIP_PROTO].ptr;
+		else if (PF_MISMATCHAW(&src->addr, saddr, pd->af,
+		    src->neg, kif, M_GETFIB(m)))
+			r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
+			    PF_SKIP_DST_ADDR].ptr;
+		else if (src->port_op && !pf_match_port(src->port_op,
+		    src->port[0], src->port[1], sport))
+			r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
+			    PF_SKIP_DST_PORT].ptr;
+		else if (dst != NULL &&
+		    PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL,
+		    M_GETFIB(m)))
+			r = r->skip[PF_SKIP_DST_ADDR].ptr;
+		else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
+		    0, NULL, M_GETFIB(m)))
+			r = TAILQ_NEXT(r, entries);
+		else if (dst != NULL && dst->port_op &&
+		    !pf_match_port(dst->port_op, dst->port[0],
+		    dst->port[1], dport))
+			r = r->skip[PF_SKIP_DST_PORT].ptr;
+		else if (r->match_tag && !pf_match_tag(m, r, &tag,
+		    pd->pf_mtag ? pd->pf_mtag->tag : 0))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
+		    IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
+		    off, pd->hdr.tcp), r->os_fingerprint)))
+			r = TAILQ_NEXT(r, entries);
+		else {
+			if (r->tag)
+				tag = r->tag;
+			if (r->rtableid >= 0)
+				rtableid = r->rtableid;
+			if (r->anchor == NULL) {
+				rm = r;
+			} else
+				pf_step_into_anchor(anchor_stack, &asd,
+				    &ruleset, rs_num, &r, NULL, NULL);
+		}
+		if (r == NULL)
+			pf_step_out_of_anchor(anchor_stack, &asd, &ruleset,
+			    rs_num, &r, NULL, NULL);
+	}
+
+	if (tag > 0 && pf_tag_packet(m, pd, tag))
+		return (NULL);
+	if (rtableid >= 0)
+		M_SETFIB(m, rtableid);
+
+	if (rm != NULL && (rm->action == PF_NONAT ||
+	    rm->action == PF_NORDR || rm->action == PF_NOBINAT))
+		return (NULL);
+	return (rm);
+}
+
+static int
+pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
+    struct pf_addr *saddr, uint16_t sport, struct pf_addr *daddr,
+    uint16_t dport, struct pf_addr *naddr, uint16_t *nport, uint16_t low,
+    uint16_t high, struct pf_src_node **sn)
+{
+	struct pf_state_key_cmp	key;
+	struct pf_addr		init_addr;
+
+	bzero(&init_addr, sizeof(init_addr));
+	if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
+		return (1);
+
+	if (proto == IPPROTO_ICMP) {
+		low = 1;
+		high = 65535;
+	}
+
+	bzero(&key, sizeof(key));
+	key.af = af;
+	key.proto = proto;
+	key.port[0] = dport;
+	PF_ACPY(&key.addr[0], daddr, key.af);
+
+	do {
+		PF_ACPY(&key.addr[1], naddr, key.af);
+
+		/*
+		 * port search; start random, step;
+		 * similar 2 portloop in in_pcbbind
+		 */
+		if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
+		    proto == IPPROTO_ICMP) || (low == 0 && high == 0)) {
+			/*
+			 * XXX bug: icmp states don't use the id on both sides.
+			 * (traceroute -I through nat)
+			 */
+			key.port[1] = sport;
+			if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
+				*nport = sport;
+				return (0);
+			}
+		} else if (low == high) {
+			key.port[1] = htons(low);
+			if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
+				*nport = htons(low);
+				return (0);
+			}
+		} else {
+			uint16_t tmp, cut;
+
+			if (low > high) {
+				tmp = low;
+				low = high;
+				high = tmp;
+			}
+			/* low < high */
+			cut = arc4random() % (1 + high - low) + low;
+			/* low <= cut <= high */
+			for (tmp = cut; tmp <= high; ++(tmp)) {
+				key.port[1] = htons(tmp);
+				if (pf_find_state_all(&key, PF_IN, NULL) ==
+				    NULL) {
+					*nport = htons(tmp);
+					return (0);
+				}
+			}
+			for (tmp = cut - 1; tmp >= low; --(tmp)) {
+				key.port[1] = htons(tmp);
+				if (pf_find_state_all(&key, PF_IN, NULL) ==
+				    NULL) {
+					*nport = htons(tmp);
+					return (0);
+				}
+			}
+		}
+
+		switch (r->rpool.opts & PF_POOL_TYPEMASK) {
+		case PF_POOL_RANDOM:
+		case PF_POOL_ROUNDROBIN:
+			if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
+				return (1);
+			break;
+		case PF_POOL_NONE:
+		case PF_POOL_SRCHASH:
+		case PF_POOL_BITMASK:
+		default:
+			return (1);
+		}
+	} while (! PF_AEQ(&init_addr, naddr, af) );
+	return (1);					/* none available */
+}
+
+int
+pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
+    struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn)
+{
+	struct pf_pool		*rpool = &r->rpool;
+	struct pf_addr		*raddr = NULL, *rmask = NULL;
+
+	/* Try to find a src_node if none was given and this
+	   is a sticky-address rule. */
+	if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
+	    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
+		*sn = pf_find_src_node(saddr, r, af, 0);
+
+	/* If a src_node was found or explicitly given and it has a non-zero
+	   route address, use this address. A zeroed address is found if the
+	   src node was created just a moment ago in pf_create_state and it
+	   needs to be filled in with routing decision calculated here. */
+	if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
+		PF_ACPY(naddr, &(*sn)->raddr, af);
+		if (V_pf_status.debug >= PF_DEBUG_MISC) {
+			printf("pf_map_addr: src tracking maps ");
+			pf_print_host(saddr, 0, af);
+			printf(" to ");
+			pf_print_host(naddr, 0, af);
+			printf("\n");
+		}
+		return (0);
+	}
+
+	/* Find the route using chosen algorithm. Store the found route
+	   in src_node if it was given or found. */
+	if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
+		return (1);
+	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
+		switch (af) {
+#ifdef INET
+		case AF_INET:
+			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
+			    (rpool->opts & PF_POOL_TYPEMASK) !=
+			    PF_POOL_ROUNDROBIN)
+				return (1);
+			 raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
+			 rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
+			break;
+#endif /* INET */
+#ifdef INET6
+		case AF_INET6:
+			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
+			    (rpool->opts & PF_POOL_TYPEMASK) !=
+			    PF_POOL_ROUNDROBIN)
+				return (1);
+			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
+			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
+			break;
+#endif /* INET6 */
+		}
+	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
+		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
+			return (1); /* unsupported */
+	} else {
+		raddr = &rpool->cur->addr.v.a.addr;
+		rmask = &rpool->cur->addr.v.a.mask;
+	}
+
+	switch (rpool->opts & PF_POOL_TYPEMASK) {
+	case PF_POOL_NONE:
+		PF_ACPY(naddr, raddr, af);
+		break;
+	case PF_POOL_BITMASK:
+		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
+		break;
+	case PF_POOL_RANDOM:
+		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
+			switch (af) {
+#ifdef INET
+			case AF_INET:
+				rpool->counter.addr32[0] = htonl(arc4random());
+				break;
+#endif /* INET */
+#ifdef INET6
+			case AF_INET6:
+				if (rmask->addr32[3] != 0xffffffff)
+					rpool->counter.addr32[3] =
+					    htonl(arc4random());
+				else
+					break;
+				if (rmask->addr32[2] != 0xffffffff)
+					rpool->counter.addr32[2] =
+					    htonl(arc4random());
+				else
+					break;
+				if (rmask->addr32[1] != 0xffffffff)
+					rpool->counter.addr32[1] =
+					    htonl(arc4random());
+				else
+					break;
+				if (rmask->addr32[0] != 0xffffffff)
+					rpool->counter.addr32[0] =
+					    htonl(arc4random());
+				break;
+#endif /* INET6 */
+			}
+			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
+			PF_ACPY(init_addr, naddr, af);
+
+		} else {
+			PF_AINC(&rpool->counter, af);
+			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
+		}
+		break;
+	case PF_POOL_SRCHASH:
+	    {
+		unsigned char hash[16];
+
+		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
+		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
+		break;
+	    }
+	case PF_POOL_ROUNDROBIN:
+	    {
+		struct pf_pooladdr *acur = rpool->cur;
+
+		/*
+		 * XXXGL: in the round-robin case we need to store
+		 * the round-robin machine state in the rule, thus
+		 * forwarding thread needs to modify rule.
+		 *
+		 * This is done w/o locking, because performance is assumed
+		 * more important than round-robin precision.
+		 *
+		 * In the simpliest case we just update the "rpool->cur"
+		 * pointer. However, if pool contains tables or dynamic
+		 * addresses, then "tblidx" is also used to store machine
+		 * state. Since "tblidx" is int, concurrent access to it can't
+		 * lead to inconsistence, only to lost of precision.
+		 *
+		 * Things get worse, if table contains not hosts, but
+		 * prefixes. In this case counter also stores machine state,
+		 * and for IPv6 address, counter can't be updated atomically.
+		 * Probably, using round-robin on a table containing IPv6
+		 * prefixes (or even IPv4) would cause a panic.
+		 */
+
+		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
+			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
+			    &rpool->tblidx, &rpool->counter, af))
+				goto get_addr;
+		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
+			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
+			    &rpool->tblidx, &rpool->counter, af))
+				goto get_addr;
+		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
+			goto get_addr;
+
+	try_next:
+		if (TAILQ_NEXT(rpool->cur, entries) == NULL)
+			rpool->cur = TAILQ_FIRST(&rpool->list);
+		else
+			rpool->cur = TAILQ_NEXT(rpool->cur, entries);
+		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
+			rpool->tblidx = -1;
+			if (pfr_pool_get(rpool->cur->addr.p.tbl,
+			    &rpool->tblidx, &rpool->counter, af)) {
+				/* table contains no address of type 'af' */
+				if (rpool->cur != acur)
+					goto try_next;
+				return (1);
+			}
+		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
+			rpool->tblidx = -1;
+			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
+			    &rpool->tblidx, &rpool->counter, af)) {
+				/* table contains no address of type 'af' */
+				if (rpool->cur != acur)
+					goto try_next;
+				return (1);
+			}
+		} else {
+			raddr = &rpool->cur->addr.v.a.addr;
+			rmask = &rpool->cur->addr.v.a.mask;
+			PF_ACPY(&rpool->counter, raddr, af);
+		}
+
+	get_addr:
+		PF_ACPY(naddr, &rpool->counter, af);
+		if (init_addr != NULL && PF_AZERO(init_addr, af))
+			PF_ACPY(init_addr, naddr, af);
+		PF_AINC(&rpool->counter, af);
+		break;
+	    }
+	}
+	if (*sn != NULL)
+		PF_ACPY(&(*sn)->raddr, naddr, af);
+
+	if (V_pf_status.debug >= PF_DEBUG_MISC &&
+	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
+		printf("pf_map_addr: selected address ");
+		pf_print_host(naddr, 0, af);
+		printf("\n");
+	}
+
+	return (0);
+}
+
+struct pf_rule *
+pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
+    struct pfi_kif *kif, struct pf_src_node **sn,
+    struct pf_state_key **skp, struct pf_state_key **nkp,
+    struct pf_addr *saddr, struct pf_addr *daddr,
+    uint16_t sport, uint16_t dport, struct pf_anchor_stackframe *anchor_stack)
+{
+	struct pf_rule	*r = NULL;
+	struct pf_addr	*naddr;
+	uint16_t	*nport;
+
+	PF_RULES_RASSERT();
+	KASSERT(*skp == NULL, ("*skp not NULL"));
+	KASSERT(*nkp == NULL, ("*nkp not NULL"));
+
+	if (direction == PF_OUT) {
+		r = pf_match_translation(pd, m, off, direction, kif, saddr,
+		    sport, daddr, dport, PF_RULESET_BINAT, anchor_stack);
+		if (r == NULL)
+			r = pf_match_translation(pd, m, off, direction, kif,
+			    saddr, sport, daddr, dport, PF_RULESET_NAT,
+			    anchor_stack);
+	} else {
+		r = pf_match_translation(pd, m, off, direction, kif, saddr,
+		    sport, daddr, dport, PF_RULESET_RDR, anchor_stack);
+		if (r == NULL)
+			r = pf_match_translation(pd, m, off, direction, kif,
+			    saddr, sport, daddr, dport, PF_RULESET_BINAT,
+			    anchor_stack);
+	}
+
+	if (r == NULL)
+		return (NULL);
+
+	switch (r->action) {
+	case PF_NONAT:
+	case PF_NOBINAT:
+	case PF_NORDR:
+		return (NULL);
+	}
+
+	*skp = pf_state_key_setup(pd, saddr, daddr, sport, dport);
+	if (*skp == NULL)
+		return (NULL);
+	*nkp = pf_state_key_clone(*skp);
+	if (*nkp == NULL) {
+		uma_zfree(V_pf_state_key_z, skp);
+		*skp = NULL;
+		return (NULL);
+	}
+
+	/* XXX We only modify one side for now. */
+	naddr = &(*nkp)->addr[1];
+	nport = &(*nkp)->port[1];
+
+	switch (r->action) {
+	case PF_NAT:
+		if (pf_get_sport(pd->af, pd->proto, r, saddr, sport, daddr,
+		    dport, naddr, nport, r->rpool.proxy_port[0],
+		    r->rpool.proxy_port[1], sn)) {
+			DPFPRINTF(PF_DEBUG_MISC,
+			    ("pf: NAT proxy port allocation (%u-%u) failed\n",
+			    r->rpool.proxy_port[0], r->rpool.proxy_port[1]));
+			goto notrans;
+		}
+		break;
+	case PF_BINAT:
+		switch (direction) {
+		case PF_OUT:
+			if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
+				switch (pd->af) {
+#ifdef INET
+				case AF_INET:
+					if (r->rpool.cur->addr.p.dyn->
+					    pfid_acnt4 < 1)
+						goto notrans;
+					PF_POOLMASK(naddr,
+					    &r->rpool.cur->addr.p.dyn->
+					    pfid_addr4,
+					    &r->rpool.cur->addr.p.dyn->
+					    pfid_mask4, saddr, AF_INET);
+					break;
+#endif /* INET */
+#ifdef INET6
+				case AF_INET6:
+					if (r->rpool.cur->addr.p.dyn->
+					    pfid_acnt6 < 1)
+						goto notrans;
+					PF_POOLMASK(naddr,
+					    &r->rpool.cur->addr.p.dyn->
+					    pfid_addr6,
+					    &r->rpool.cur->addr.p.dyn->
+					    pfid_mask6, saddr, AF_INET6);
+					break;
+#endif /* INET6 */
+				}
+			} else
+				PF_POOLMASK(naddr,
+				    &r->rpool.cur->addr.v.a.addr,
+				    &r->rpool.cur->addr.v.a.mask, saddr,
+				    pd->af);
+			break;
+		case PF_IN:
+			if (r->src.addr.type == PF_ADDR_DYNIFTL) {
+				switch (pd->af) {
+#ifdef INET
+				case AF_INET:
+					if (r->src.addr.p.dyn-> pfid_acnt4 < 1)
+						goto notrans;
+					PF_POOLMASK(naddr,
+					    &r->src.addr.p.dyn->pfid_addr4,
+					    &r->src.addr.p.dyn->pfid_mask4,
+					    daddr, AF_INET);
+					break;
+#endif /* INET */
+#ifdef INET6
+				case AF_INET6:
+					if (r->src.addr.p.dyn->pfid_acnt6 < 1)
+						goto notrans;
+					PF_POOLMASK(naddr,
+					    &r->src.addr.p.dyn->pfid_addr6,
+					    &r->src.addr.p.dyn->pfid_mask6,
+					    daddr, AF_INET6);
+					break;
+#endif /* INET6 */
+				}
+			} else
+				PF_POOLMASK(naddr, &r->src.addr.v.a.addr,
+				    &r->src.addr.v.a.mask, daddr, pd->af);
+			break;
+		}
+		break;
+	case PF_RDR: {
+		if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn))
+			goto notrans;
+		if ((r->rpool.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
+			PF_POOLMASK(naddr, naddr, &r->rpool.cur->addr.v.a.mask,
+			    daddr, pd->af);
+
+		if (r->rpool.proxy_port[1]) {
+			uint32_t	tmp_nport;
+
+			tmp_nport = ((ntohs(dport) - ntohs(r->dst.port[0])) %
+			    (r->rpool.proxy_port[1] - r->rpool.proxy_port[0] +
+			    1)) + r->rpool.proxy_port[0];
+
+			/* Wrap around if necessary. */
+			if (tmp_nport > 65535)
+				tmp_nport -= 65535;
+			*nport = htons((uint16_t)tmp_nport);
+		} else if (r->rpool.proxy_port[0])
+			*nport = htons(r->rpool.proxy_port[0]);
+		break;
+	}
+	default:
+		panic("%s: unknown action %u", __func__, r->action);
+	}
+
+	/* Return success only if translation really happened. */
+	if (bcmp(*skp, *nkp, sizeof(struct pf_state_key_cmp)))
+		return (r);
+
+notrans:
+	uma_zfree(V_pf_state_key_z, *nkp);
+	uma_zfree(V_pf_state_key_z, *skp);
+	*skp = *nkp = NULL;
+	*sn = NULL;
+
+	return (NULL);
+}
diff --git a/freebsd/sys/netpfil/pf/pf_mtag.h b/freebsd/sys/netpfil/pf/pf_mtag.h
new file mode 100644
index 00000000..fd8554ae
--- /dev/null
+++ b/freebsd/sys/netpfil/pf/pf_mtag.h
@@ -0,0 +1,64 @@
+/*	$FreeBSD$	*/
+/*
+ * Copyright (c) 2001 Daniel Hartmeier
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    - Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    - Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _NET_PF_MTAG_H_
+#define _NET_PF_MTAG_H_
+
+#ifdef _KERNEL
+
+#define	PF_TAG_GENERATED		0x01
+#define	PF_TAG_FRAGCACHE		0x02
+#define	PF_TAG_TRANSLATE_LOCALHOST	0x04
+#define	PF_PACKET_LOOPED		0x08
+#define	PF_FASTFWD_OURS_PRESENT		0x10
+#define	PF_REASSEMBLED			0x20
+
+struct pf_mtag {
+	void		*hdr;		/* saved hdr pos in mbuf, for ECN */
+	u_int32_t	 qid;		/* queue id */
+	u_int32_t	 qid_hash;	/* queue hashid used by WFQ like algos */
+	u_int16_t	 tag;		/* tag id */
+	u_int8_t	 flags;
+	u_int8_t	 routed;
+};
+
+static __inline struct pf_mtag *
+pf_find_mtag(struct mbuf *m)
+{
+	struct m_tag	*mtag;
+
+	if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) == NULL)
+		return (NULL);
+
+	return ((struct pf_mtag *)(mtag + 1));
+}
+#endif /* _KERNEL */
+#endif /* _NET_PF_MTAG_H_ */
diff --git a/freebsd/sys/netpfil/pf/pf_norm.c b/freebsd/sys/netpfil/pf/pf_norm.c
new file mode 100644
index 00000000..86d2c8eb
--- /dev/null
+++ b/freebsd/sys/netpfil/pf/pf_norm.c
@@ -0,0 +1,1843 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright 2001 Niels Provos <provos@citi.umich.edu>
+ * Copyright 2011 Alexander Bluhm <bluhm@openbsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *	$OpenBSD: pf_norm.c,v 1.114 2009/01/29 14:11:45 henning Exp $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+#include <rtems/bsd/local/opt_pf.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/mutex.h>
+#include <sys/refcount.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+
+#include <net/if.h>
+#include <net/vnet.h>
+#include <net/pfvar.h>
+#include <net/if_pflog.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet6/ip6_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif /* INET6 */
+
+struct pf_frent {
+	TAILQ_ENTRY(pf_frent)	fr_next;
+	struct mbuf	*fe_m;
+	uint16_t	fe_hdrlen;	/* ipv4 header length with ip options
+					   ipv6, extension, fragment header */
+	uint16_t	fe_extoff;	/* last extension header offset or 0 */
+	uint16_t	fe_len;		/* fragment length */
+	uint16_t	fe_off;		/* fragment offset */
+	uint16_t	fe_mff;		/* more fragment flag */
+};
+
+struct pf_fragment_cmp {
+	struct pf_addr	frc_src;
+	struct pf_addr	frc_dst;
+	uint32_t	frc_id;
+	sa_family_t	frc_af;
+	uint8_t		frc_proto;
+};
+
+struct pf_fragment {
+	struct pf_fragment_cmp	fr_key;
+#define fr_src	fr_key.frc_src
+#define fr_dst	fr_key.frc_dst
+#define fr_id	fr_key.frc_id
+#define fr_af	fr_key.frc_af
+#define fr_proto	fr_key.frc_proto
+
+	RB_ENTRY(pf_fragment) fr_entry;
+	TAILQ_ENTRY(pf_fragment) frag_next;
+	uint32_t	fr_timeout;
+	uint16_t	fr_maxlen;	/* maximum length of single fragment */
+	TAILQ_HEAD(pf_fragq, pf_frent) fr_queue;
+};
+
+struct pf_fragment_tag {
+	uint16_t	ft_hdrlen;	/* header length of reassembled pkt */
+	uint16_t	ft_extoff;	/* last extension header offset or 0 */
+	uint16_t	ft_maxlen;	/* maximum fragment payload length */
+	uint32_t	ft_id;		/* fragment id */
+};
+
+static struct mtx pf_frag_mtx;
+MTX_SYSINIT(pf_frag_mtx, &pf_frag_mtx, "pf fragments", MTX_DEF);
+#define PF_FRAG_LOCK()		mtx_lock(&pf_frag_mtx)
+#define PF_FRAG_UNLOCK()	mtx_unlock(&pf_frag_mtx)
+#define PF_FRAG_ASSERT()	mtx_assert(&pf_frag_mtx, MA_OWNED)
+
+VNET_DEFINE(uma_zone_t, pf_state_scrub_z);	/* XXX: shared with pfsync */
+
+static VNET_DEFINE(uma_zone_t, pf_frent_z);
+#define	V_pf_frent_z	VNET(pf_frent_z)
+static VNET_DEFINE(uma_zone_t, pf_frag_z);
+#define	V_pf_frag_z	VNET(pf_frag_z)
+
+TAILQ_HEAD(pf_fragqueue, pf_fragment);
+TAILQ_HEAD(pf_cachequeue, pf_fragment);
+static VNET_DEFINE(struct pf_fragqueue,	pf_fragqueue);
+#define	V_pf_fragqueue			VNET(pf_fragqueue)
+RB_HEAD(pf_frag_tree, pf_fragment);
+static VNET_DEFINE(struct pf_frag_tree,	pf_frag_tree);
+#define	V_pf_frag_tree			VNET(pf_frag_tree)
+static int		 pf_frag_compare(struct pf_fragment *,
+			    struct pf_fragment *);
+static RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
+static RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
+
+static void	pf_flush_fragments(void);
+static void	pf_free_fragment(struct pf_fragment *);
+static void	pf_remove_fragment(struct pf_fragment *);
+static int	pf_normalize_tcpopt(struct pf_rule *, struct mbuf *,
+		    struct tcphdr *, int, sa_family_t);
+static struct pf_frent *pf_create_fragment(u_short *);
+static struct pf_fragment *pf_find_fragment(struct pf_fragment_cmp *key,
+		    struct pf_frag_tree *tree);
+static struct pf_fragment *pf_fillup_fragment(struct pf_fragment_cmp *,
+		    struct pf_frent *, u_short *);
+static int	pf_isfull_fragment(struct pf_fragment *);
+static struct mbuf *pf_join_fragment(struct pf_fragment *);
+#ifdef INET
+static void	pf_scrub_ip(struct mbuf **, uint32_t, uint8_t, uint8_t);
+static int	pf_reassemble(struct mbuf **, struct ip *, int, u_short *);
+#endif	/* INET */
+#ifdef INET6
+static int	pf_reassemble6(struct mbuf **, struct ip6_hdr *,
+		    struct ip6_frag *, uint16_t, uint16_t, u_short *);
+static void	pf_scrub_ip6(struct mbuf **, uint8_t);
+#endif	/* INET6 */
+
+#define	DPFPRINTF(x) do {				\
+	if (V_pf_status.debug >= PF_DEBUG_MISC) {	\
+		printf("%s: ", __func__);		\
+		printf x ;				\
+	}						\
+} while(0)
+
+#ifdef INET
+static void
+pf_ip2key(struct ip *ip, int dir, struct pf_fragment_cmp *key)
+{
+
+	key->frc_src.v4 = ip->ip_src;
+	key->frc_dst.v4 = ip->ip_dst;
+	key->frc_af = AF_INET;
+	key->frc_proto = ip->ip_p;
+	key->frc_id = ip->ip_id;
+}
+#endif	/* INET */
+
+void
+pf_normalize_init(void)
+{
+
+	V_pf_frag_z = uma_zcreate("pf frags", sizeof(struct pf_fragment),
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+	V_pf_frent_z = uma_zcreate("pf frag entries", sizeof(struct pf_frent),
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+	V_pf_state_scrub_z = uma_zcreate("pf state scrubs",
+	    sizeof(struct pf_state_scrub),  NULL, NULL, NULL, NULL,
+	    UMA_ALIGN_PTR, 0);
+
+	V_pf_limits[PF_LIMIT_FRAGS].zone = V_pf_frent_z;
+	V_pf_limits[PF_LIMIT_FRAGS].limit = PFFRAG_FRENT_HIWAT;
+	uma_zone_set_max(V_pf_frent_z, PFFRAG_FRENT_HIWAT);
+	uma_zone_set_warning(V_pf_frent_z, "PF frag entries limit reached");
+
+	TAILQ_INIT(&V_pf_fragqueue);
+}
+
+void
+pf_normalize_cleanup(void)
+{
+
+	uma_zdestroy(V_pf_state_scrub_z);
+	uma_zdestroy(V_pf_frent_z);
+	uma_zdestroy(V_pf_frag_z);
+}
+
+static int
+pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
+{
+	int	diff;
+
+	if ((diff = a->fr_id - b->fr_id) != 0)
+		return (diff);
+	if ((diff = a->fr_proto - b->fr_proto) != 0)
+		return (diff);
+	if ((diff = a->fr_af - b->fr_af) != 0)
+		return (diff);
+	if ((diff = pf_addr_cmp(&a->fr_src, &b->fr_src, a->fr_af)) != 0)
+		return (diff);
+	if ((diff = pf_addr_cmp(&a->fr_dst, &b->fr_dst, a->fr_af)) != 0)
+		return (diff);
+	return (0);
+}
+
+void
+pf_purge_expired_fragments(void)
+{
+	struct pf_fragment	*frag;
+	u_int32_t		 expire = time_uptime -
+				    V_pf_default_rule.timeout[PFTM_FRAG];
+
+	PF_FRAG_LOCK();
+	while ((frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue)) != NULL) {
+		if (frag->fr_timeout > expire)
+			break;
+
+		DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
+		pf_free_fragment(frag);
+	}
+
+	PF_FRAG_UNLOCK();
+}
+
+/*
+ * Try to flush old fragments to make space for new ones
+ */
+static void
+pf_flush_fragments(void)
+{
+	struct pf_fragment	*frag;
+	int			 goal;
+
+	PF_FRAG_ASSERT();
+
+	goal = uma_zone_get_cur(V_pf_frent_z) * 9 / 10;
+	DPFPRINTF(("trying to free %d frag entriess\n", goal));
+	while (goal < uma_zone_get_cur(V_pf_frent_z)) {
+		frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue);
+		if (frag)
+			pf_free_fragment(frag);
+		else
+			break;
+	}
+}
+
+/* Frees the fragments and all associated entries */
+static void
+pf_free_fragment(struct pf_fragment *frag)
+{
+	struct pf_frent		*frent;
+
+	PF_FRAG_ASSERT();
+
+	/* Free all fragments */
+	for (frent = TAILQ_FIRST(&frag->fr_queue); frent;
+	    frent = TAILQ_FIRST(&frag->fr_queue)) {
+		TAILQ_REMOVE(&frag->fr_queue, frent, fr_next);
+
+		m_freem(frent->fe_m);
+		uma_zfree(V_pf_frent_z, frent);
+	}
+
+	pf_remove_fragment(frag);
+}
+
+static struct pf_fragment *
+pf_find_fragment(struct pf_fragment_cmp *key, struct pf_frag_tree *tree)
+{
+	struct pf_fragment	*frag;
+
+	PF_FRAG_ASSERT();
+
+	frag = RB_FIND(pf_frag_tree, tree, (struct pf_fragment *)key);
+	if (frag != NULL) {
+		/* XXX Are we sure we want to update the timeout? */
+		frag->fr_timeout = time_uptime;
+		TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next);
+		TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next);
+	}
+
+	return (frag);
+}
+
+/* Removes a fragment from the fragment queue and frees the fragment */
+static void
+pf_remove_fragment(struct pf_fragment *frag)
+{
+
+	PF_FRAG_ASSERT();
+
+	RB_REMOVE(pf_frag_tree, &V_pf_frag_tree, frag);
+	TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next);
+	uma_zfree(V_pf_frag_z, frag);
+}
+
+static struct pf_frent *
+pf_create_fragment(u_short *reason)
+{
+	struct pf_frent *frent;
+
+	PF_FRAG_ASSERT();
+
+	frent = uma_zalloc(V_pf_frent_z, M_NOWAIT);
+	if (frent == NULL) {
+		pf_flush_fragments();
+		frent = uma_zalloc(V_pf_frent_z, M_NOWAIT);
+		if (frent == NULL) {
+			REASON_SET(reason, PFRES_MEMORY);
+			return (NULL);
+		}
+	}
+
+	return (frent);
+}
+
+static struct pf_fragment *
+pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent,
+		u_short *reason)
+{
+	struct pf_frent		*after, *next, *prev;
+	struct pf_fragment	*frag;
+	uint16_t		total;
+
+	PF_FRAG_ASSERT();
+
+	/* No empty fragments. */
+	if (frent->fe_len == 0) {
+		DPFPRINTF(("bad fragment: len 0"));
+		goto bad_fragment;
+	}
+
+	/* All fragments are 8 byte aligned. */
+	if (frent->fe_mff && (frent->fe_len & 0x7)) {
+		DPFPRINTF(("bad fragment: mff and len %d", frent->fe_len));
+		goto bad_fragment;
+	}
+
+	/* Respect maximum length, IP_MAXPACKET == IPV6_MAXPACKET. */
+	if (frent->fe_off + frent->fe_len > IP_MAXPACKET) {
+		DPFPRINTF(("bad fragment: max packet %d",
+		    frent->fe_off + frent->fe_len));
+		goto bad_fragment;
+	}
+
+	DPFPRINTF((key->frc_af == AF_INET ?
+	    "reass frag %d @ %d-%d" : "reass frag %#08x @ %d-%d",
+	    key->frc_id, frent->fe_off, frent->fe_off + frent->fe_len));
+
+	/* Fully buffer all of the fragments in this fragment queue. */
+	frag = pf_find_fragment(key, &V_pf_frag_tree);
+
+	/* Create a new reassembly queue for this packet. */
+	if (frag == NULL) {
+		frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
+		if (frag == NULL) {
+			pf_flush_fragments();
+			frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
+			if (frag == NULL) {
+				REASON_SET(reason, PFRES_MEMORY);
+				goto drop_fragment;
+			}
+		}
+
+		*(struct pf_fragment_cmp *)frag = *key;
+		frag->fr_timeout = time_uptime;
+		frag->fr_maxlen = frent->fe_len;
+		TAILQ_INIT(&frag->fr_queue);
+
+		RB_INSERT(pf_frag_tree, &V_pf_frag_tree, frag);
+		TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next);
+
+		/* We do not have a previous fragment. */
+		TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next);
+
+		return (frag);
+	}
+
+	KASSERT(!TAILQ_EMPTY(&frag->fr_queue), ("!TAILQ_EMPTY()->fr_queue"));
+
+	/* Remember maximum fragment len for refragmentation. */
+	if (frent->fe_len > frag->fr_maxlen)
+		frag->fr_maxlen = frent->fe_len;
+
+	/* Maximum data we have seen already. */
+	total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
+		TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
+
+	/* Non terminal fragments must have more fragments flag. */
+	if (frent->fe_off + frent->fe_len < total && !frent->fe_mff)
+		goto bad_fragment;
+
+	/* Check if we saw the last fragment already. */
+	if (!TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff) {
+		if (frent->fe_off + frent->fe_len > total ||
+		    (frent->fe_off + frent->fe_len == total && frent->fe_mff))
+			goto bad_fragment;
+	} else {
+		if (frent->fe_off + frent->fe_len == total && !frent->fe_mff)
+			goto bad_fragment;
+	}
+
+	/* Find a fragment after the current one. */
+	prev = NULL;
+	TAILQ_FOREACH(after, &frag->fr_queue, fr_next) {
+		if (after->fe_off > frent->fe_off)
+			break;
+		prev = after;
+	}
+
+	KASSERT(prev != NULL || after != NULL,
+	    ("prev != NULL || after != NULL"));
+
+	if (prev != NULL && prev->fe_off + prev->fe_len > frent->fe_off) {
+		uint16_t precut;
+
+		precut = prev->fe_off + prev->fe_len - frent->fe_off;
+		if (precut >= frent->fe_len)
+			goto bad_fragment;
+		DPFPRINTF(("overlap -%d", precut));
+		m_adj(frent->fe_m, precut);
+		frent->fe_off += precut;
+		frent->fe_len -= precut;
+	}
+
+	for (; after != NULL && frent->fe_off + frent->fe_len > after->fe_off;
+	    after = next) {
+		uint16_t aftercut;
+
+		aftercut = frent->fe_off + frent->fe_len - after->fe_off;
+		DPFPRINTF(("adjust overlap %d", aftercut));
+		if (aftercut < after->fe_len) {
+			m_adj(after->fe_m, aftercut);
+			after->fe_off += aftercut;
+			after->fe_len -= aftercut;
+			break;
+		}
+
+		/* This fragment is completely overlapped, lose it. */
+		next = TAILQ_NEXT(after, fr_next);
+		m_freem(after->fe_m);
+		TAILQ_REMOVE(&frag->fr_queue, after, fr_next);
+		uma_zfree(V_pf_frent_z, after);
+	}
+
+	if (prev == NULL)
+		TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next);
+	else
+		TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next);
+
+	return (frag);
+
+bad_fragment:
+	REASON_SET(reason, PFRES_FRAG);
+drop_fragment:
+	uma_zfree(V_pf_frent_z, frent);
+	return (NULL);
+}
+
+static int
+pf_isfull_fragment(struct pf_fragment *frag)
+{
+	struct pf_frent	*frent, *next;
+	uint16_t off, total;
+
+	/* Check if we are completely reassembled */
+	if (TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff)
+		return (0);
+
+	/* Maximum data we have seen already */
+	total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
+		TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
+
+	/* Check if we have all the data */
+	off = 0;
+	for (frent = TAILQ_FIRST(&frag->fr_queue); frent; frent = next) {
+		next = TAILQ_NEXT(frent, fr_next);
+
+		off += frent->fe_len;
+		if (off < total && (next == NULL || next->fe_off != off)) {
+			DPFPRINTF(("missing fragment at %d, next %d, total %d",
+			    off, next == NULL ? -1 : next->fe_off, total));
+			return (0);
+		}
+	}
+	DPFPRINTF(("%d < %d?", off, total));
+	if (off < total)
+		return (0);
+	KASSERT(off == total, ("off == total"));
+
+	return (1);
+}
+
+static struct mbuf *
+pf_join_fragment(struct pf_fragment *frag)
+{
+	struct mbuf *m, *m2;
+	struct pf_frent	*frent, *next;
+
+	frent = TAILQ_FIRST(&frag->fr_queue);
+	next = TAILQ_NEXT(frent, fr_next);
+
+	m = frent->fe_m;
+	m_adj(m, (frent->fe_hdrlen + frent->fe_len) - m->m_pkthdr.len);
+	uma_zfree(V_pf_frent_z, frent);
+	for (frent = next; frent != NULL; frent = next) {
+		next = TAILQ_NEXT(frent, fr_next);
+
+		m2 = frent->fe_m;
+		/* Strip off ip header. */
+		m_adj(m2, frent->fe_hdrlen);
+		/* Strip off any trailing bytes. */
+		m_adj(m2, frent->fe_len - m2->m_pkthdr.len);
+
+		uma_zfree(V_pf_frent_z, frent);
+		m_cat(m, m2);
+	}
+
+	/* Remove from fragment queue. */
+	pf_remove_fragment(frag);
+
+	return (m);
+}
+
+#ifdef INET
+static int
+pf_reassemble(struct mbuf **m0, struct ip *ip, int dir, u_short *reason)
+{
+	struct mbuf		*m = *m0;
+	struct pf_frent		*frent;
+	struct pf_fragment	*frag;
+	struct pf_fragment_cmp	key;
+	uint16_t		total, hdrlen;
+
+	/* Get an entry for the fragment queue */
+	if ((frent = pf_create_fragment(reason)) == NULL)
+		return (PF_DROP);
+
+	frent->fe_m = m;
+	frent->fe_hdrlen = ip->ip_hl << 2;
+	frent->fe_extoff = 0;
+	frent->fe_len = ntohs(ip->ip_len) - (ip->ip_hl << 2);
+	frent->fe_off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
+	frent->fe_mff = ntohs(ip->ip_off) & IP_MF;
+
+	pf_ip2key(ip, dir, &key);
+
+	if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL)
+		return (PF_DROP);
+
+	/* The mbuf is part of the fragment entry, no direct free or access */
+	m = *m0 = NULL;
+
+	if (!pf_isfull_fragment(frag))
+		return (PF_PASS);  /* drop because *m0 is NULL, no error */
+
+	/* We have all the data */
+	frent = TAILQ_FIRST(&frag->fr_queue);
+	KASSERT(frent != NULL, ("frent != NULL"));
+	total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
+		TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
+	hdrlen = frent->fe_hdrlen;
+
+	m = *m0 = pf_join_fragment(frag);
+	frag = NULL;
+
+	if (m->m_flags & M_PKTHDR) {
+		int plen = 0;
+		for (m = *m0; m; m = m->m_next)
+			plen += m->m_len;
+		m = *m0;
+		m->m_pkthdr.len = plen;
+	}
+
+	ip = mtod(m, struct ip *);
+	ip->ip_len = htons(hdrlen + total);
+	ip->ip_off &= ~(IP_MF|IP_OFFMASK);
+
+	if (hdrlen + total > IP_MAXPACKET) {
+		DPFPRINTF(("drop: too big: %d", total));
+		ip->ip_len = 0;
+		REASON_SET(reason, PFRES_SHORT);
+		/* PF_DROP requires a valid mbuf *m0 in pf_test() */
+		return (PF_DROP);
+	}
+
+	DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len)));
+	return (PF_PASS);
+}
+#endif	/* INET */
+
+#ifdef INET6
+static int
+pf_reassemble6(struct mbuf **m0, struct ip6_hdr *ip6, struct ip6_frag *fraghdr,
+    uint16_t hdrlen, uint16_t extoff, u_short *reason)
+{
+	struct mbuf		*m = *m0;
+	struct pf_frent		*frent;
+	struct pf_fragment	*frag;
+	struct pf_fragment_cmp	 key;
+	struct m_tag		*mtag;
+	struct pf_fragment_tag	*ftag;
+	int			 off;
+	uint32_t		 frag_id;
+	uint16_t		 total, maxlen;
+	uint8_t			 proto;
+
+	PF_FRAG_LOCK();
+
+	/* Get an entry for the fragment queue. */
+	if ((frent = pf_create_fragment(reason)) == NULL) {
+		PF_FRAG_UNLOCK();
+		return (PF_DROP);
+	}
+
+	frent->fe_m = m;
+	frent->fe_hdrlen = hdrlen;
+	frent->fe_extoff = extoff;
+	frent->fe_len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - hdrlen;
+	frent->fe_off = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK);
+	frent->fe_mff = fraghdr->ip6f_offlg & IP6F_MORE_FRAG;
+
+	key.frc_src.v6 = ip6->ip6_src;
+	key.frc_dst.v6 = ip6->ip6_dst;
+	key.frc_af = AF_INET6;
+	/* Only the first fragment's protocol is relevant. */
+	key.frc_proto = 0;
+	key.frc_id = fraghdr->ip6f_ident;
+
+	if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) {
+		PF_FRAG_UNLOCK();
+		return (PF_DROP);
+	}
+
+	/* The mbuf is part of the fragment entry, no direct free or access. */
+	m = *m0 = NULL;
+
+	if (!pf_isfull_fragment(frag)) {
+		PF_FRAG_UNLOCK();
+		return (PF_PASS);  /* Drop because *m0 is NULL, no error. */
+	}
+
+	/* We have all the data. */
+	extoff = frent->fe_extoff;
+	maxlen = frag->fr_maxlen;
+	frag_id = frag->fr_id;
+	frent = TAILQ_FIRST(&frag->fr_queue);
+	KASSERT(frent != NULL, ("frent != NULL"));
+	total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
+		TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
+	hdrlen = frent->fe_hdrlen - sizeof(struct ip6_frag);
+
+	m = *m0 = pf_join_fragment(frag);
+	frag = NULL;
+
+	PF_FRAG_UNLOCK();
+
+	/* Take protocol from first fragment header. */
+	m = m_getptr(m, hdrlen + offsetof(struct ip6_frag, ip6f_nxt), &off);
+	KASSERT(m, ("%s: short mbuf chain", __func__));
+	proto = *(mtod(m, caddr_t) + off);
+	m = *m0;
+
+	/* Delete frag6 header */
+	if (ip6_deletefraghdr(m, hdrlen, M_NOWAIT) != 0)
+		goto fail;
+
+	if (m->m_flags & M_PKTHDR) {
+		int plen = 0;
+		for (m = *m0; m; m = m->m_next)
+			plen += m->m_len;
+		m = *m0;
+		m->m_pkthdr.len = plen;
+	}
+
+	if ((mtag = m_tag_get(PF_REASSEMBLED, sizeof(struct pf_fragment_tag),
+	    M_NOWAIT)) == NULL)
+		goto fail;
+	ftag = (struct pf_fragment_tag *)(mtag + 1);
+	ftag->ft_hdrlen = hdrlen;
+	ftag->ft_extoff = extoff;
+	ftag->ft_maxlen = maxlen;
+	ftag->ft_id = frag_id;
+	m_tag_prepend(m, mtag);
+
+	ip6 = mtod(m, struct ip6_hdr *);
+	ip6->ip6_plen = htons(hdrlen - sizeof(struct ip6_hdr) + total);
+	if (extoff) {
+		/* Write protocol into next field of last extension header. */
+		m = m_getptr(m, extoff + offsetof(struct ip6_ext, ip6e_nxt),
+		    &off);
+		KASSERT(m, ("%s: short mbuf chain", __func__));
+		*(mtod(m, char *) + off) = proto;
+		m = *m0;
+	} else
+		ip6->ip6_nxt = proto;
+
+	if (hdrlen - sizeof(struct ip6_hdr) + total > IPV6_MAXPACKET) {
+		DPFPRINTF(("drop: too big: %d", total));
+		ip6->ip6_plen = 0;
+		REASON_SET(reason, PFRES_SHORT);
+		/* PF_DROP requires a valid mbuf *m0 in pf_test6(). */
+		return (PF_DROP);
+	}
+
+	DPFPRINTF(("complete: %p(%d)", m, ntohs(ip6->ip6_plen)));
+	return (PF_PASS);
+
+fail:
+	REASON_SET(reason, PFRES_MEMORY);
+	/* PF_DROP requires a valid mbuf *m0 in pf_test6(), will free later. */
+	return (PF_DROP);
+}
+#endif	/* INET6 */
+
+#ifdef INET6
+int
+pf_refragment6(struct ifnet *ifp, struct mbuf **m0, struct m_tag *mtag)
+{
+	struct mbuf		*m = *m0, *t;
+	struct pf_fragment_tag	*ftag = (struct pf_fragment_tag *)(mtag + 1);
+	struct pf_pdesc		 pd;
+	uint32_t		 frag_id;
+	uint16_t		 hdrlen, extoff, maxlen;
+	uint8_t			 proto;
+	int			 error, action;
+
+	hdrlen = ftag->ft_hdrlen;
+	extoff = ftag->ft_extoff;
+	maxlen = ftag->ft_maxlen;
+	frag_id = ftag->ft_id;
+	m_tag_delete(m, mtag);
+	mtag = NULL;
+	ftag = NULL;
+
+	if (extoff) {
+		int off;
+
+		/* Use protocol from next field of last extension header */
+		m = m_getptr(m, extoff + offsetof(struct ip6_ext, ip6e_nxt),
+		    &off);
+		KASSERT((m != NULL), ("pf_refragment6: short mbuf chain"));
+		proto = *(mtod(m, caddr_t) + off);
+		*(mtod(m, char *) + off) = IPPROTO_FRAGMENT;
+		m = *m0;
+	} else {
+		struct ip6_hdr *hdr;
+
+		hdr = mtod(m, struct ip6_hdr *);
+		proto = hdr->ip6_nxt;
+		hdr->ip6_nxt = IPPROTO_FRAGMENT;
+	}
+
+	/*
+	 * Maxlen may be less than 8 if there was only a single
+	 * fragment.  As it was fragmented before, add a fragment
+	 * header also for a single fragment.  If total or maxlen
+	 * is less than 8, ip6_fragment() will return EMSGSIZE and
+	 * we drop the packet.
+	 */
+	error = ip6_fragment(ifp, m, hdrlen, proto, maxlen, frag_id);
+	m = (*m0)->m_nextpkt;
+	(*m0)->m_nextpkt = NULL;
+	if (error == 0) {
+		/* The first mbuf contains the unfragmented packet. */
+		m_freem(*m0);
+		*m0 = NULL;
+		action = PF_PASS;
+	} else {
+		/* Drop expects an mbuf to free. */
+		DPFPRINTF(("refragment error %d", error));
+		action = PF_DROP;
+	}
+	for (t = m; m; m = t) {
+		t = m->m_nextpkt;
+		m->m_nextpkt = NULL;
+		m->m_flags |= M_SKIP_FIREWALL;
+		memset(&pd, 0, sizeof(pd));
+		pd.pf_mtag = pf_find_mtag(m);
+		if (error == 0)
+			ip6_forward(m, 0);
+		else
+			m_freem(m);
+	}
+
+	return (action);
+}
+#endif /* INET6 */
+
+#ifdef INET
+int
+pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
+    struct pf_pdesc *pd)
+{
+	struct mbuf		*m = *m0;
+	struct pf_rule		*r;
+	struct ip		*h = mtod(m, struct ip *);
+	int			 mff = (ntohs(h->ip_off) & IP_MF);
+	int			 hlen = h->ip_hl << 2;
+	u_int16_t		 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
+	u_int16_t		 max;
+	int			 ip_len;
+	int			 ip_off;
+	int			 tag = -1;
+	int			 verdict;
+
+	PF_RULES_RASSERT();
+
+	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
+	while (r != NULL) {
+		r->evaluations++;
+		if (pfi_kif_match(r->kif, kif) == r->ifnot)
+			r = r->skip[PF_SKIP_IFP].ptr;
+		else if (r->direction && r->direction != dir)
+			r = r->skip[PF_SKIP_DIR].ptr;
+		else if (r->af && r->af != AF_INET)
+			r = r->skip[PF_SKIP_AF].ptr;
+		else if (r->proto && r->proto != h->ip_p)
+			r = r->skip[PF_SKIP_PROTO].ptr;
+		else if (PF_MISMATCHAW(&r->src.addr,
+		    (struct pf_addr *)&h->ip_src.s_addr, AF_INET,
+		    r->src.neg, kif, M_GETFIB(m)))
+			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
+		else if (PF_MISMATCHAW(&r->dst.addr,
+		    (struct pf_addr *)&h->ip_dst.s_addr, AF_INET,
+		    r->dst.neg, NULL, M_GETFIB(m)))
+			r = r->skip[PF_SKIP_DST_ADDR].ptr;
+		else if (r->match_tag && !pf_match_tag(m, r, &tag,
+		    pd->pf_mtag ? pd->pf_mtag->tag : 0))
+			r = TAILQ_NEXT(r, entries);
+		else
+			break;
+	}
+
+	if (r == NULL || r->action == PF_NOSCRUB)
+		return (PF_PASS);
+	else {
+		r->packets[dir == PF_OUT]++;
+		r->bytes[dir == PF_OUT] += pd->tot_len;
+	}
+
+	/* Check for illegal packets */
+	if (hlen < (int)sizeof(struct ip)) {
+		REASON_SET(reason, PFRES_NORM);
+		goto drop;
+	}
+
+	if (hlen > ntohs(h->ip_len)) {
+		REASON_SET(reason, PFRES_NORM);
+		goto drop;
+	}
+
+	/* Clear IP_DF if the rule uses the no-df option */
+	if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
+		u_int16_t ip_off = h->ip_off;
+
+		h->ip_off &= htons(~IP_DF);
+		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
+	}
+
+	/* We will need other tests here */
+	if (!fragoff && !mff)
+		goto no_fragment;
+
+	/* We're dealing with a fragment now. Don't allow fragments
+	 * with IP_DF to enter the cache. If the flag was cleared by
+	 * no-df above, fine. Otherwise drop it.
+	 */
+	if (h->ip_off & htons(IP_DF)) {
+		DPFPRINTF(("IP_DF\n"));
+		goto bad;
+	}
+
+	ip_len = ntohs(h->ip_len) - hlen;
+	ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
+
+	/* All fragments are 8 byte aligned */
+	if (mff && (ip_len & 0x7)) {
+		DPFPRINTF(("mff and %d\n", ip_len));
+		goto bad;
+	}
+
+	/* Respect maximum length */
+	if (fragoff + ip_len > IP_MAXPACKET) {
+		DPFPRINTF(("max packet %d\n", fragoff + ip_len));
+		goto bad;
+	}
+	max = fragoff + ip_len;
+
+	/* Fully buffer all of the fragments
+	 * Might return a completely reassembled mbuf, or NULL */
+	PF_FRAG_LOCK();
+	DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max));
+	verdict = pf_reassemble(m0, h, dir, reason);
+	PF_FRAG_UNLOCK();
+
+	if (verdict != PF_PASS)
+		return (PF_DROP);
+
+	m = *m0;
+	if (m == NULL)
+		return (PF_DROP);
+
+	h = mtod(m, struct ip *);
+
+ no_fragment:
+	/* At this point, only IP_DF is allowed in ip_off */
+	if (h->ip_off & ~htons(IP_DF)) {
+		u_int16_t ip_off = h->ip_off;
+
+		h->ip_off &= htons(IP_DF);
+		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
+	}
+
+	pf_scrub_ip(&m, r->rule_flag, r->min_ttl, r->set_tos);
+
+	return (PF_PASS);
+
+ bad:
+	DPFPRINTF(("dropping bad fragment\n"));
+	REASON_SET(reason, PFRES_FRAG);
+ drop:
+	if (r != NULL && r->log)
+		PFLOG_PACKET(kif, m, AF_INET, dir, *reason, r, NULL, NULL, pd,
+		    1);
+
+	return (PF_DROP);
+}
+#endif
+
+#ifdef INET6
+int
+pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
+    u_short *reason, struct pf_pdesc *pd)
+{
+	struct mbuf		*m = *m0;
+	struct pf_rule		*r;
+	struct ip6_hdr		*h = mtod(m, struct ip6_hdr *);
+	int			 extoff;
+	int			 off;
+	struct ip6_ext		 ext;
+	struct ip6_opt		 opt;
+	struct ip6_opt_jumbo	 jumbo;
+	struct ip6_frag		 frag;
+	u_int32_t		 jumbolen = 0, plen;
+	int			 optend;
+	int			 ooff;
+	u_int8_t		 proto;
+	int			 terminal;
+
+	PF_RULES_RASSERT();
+
+	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
+	while (r != NULL) {
+		r->evaluations++;
+		if (pfi_kif_match(r->kif, kif) == r->ifnot)
+			r = r->skip[PF_SKIP_IFP].ptr;
+		else if (r->direction && r->direction != dir)
+			r = r->skip[PF_SKIP_DIR].ptr;
+		else if (r->af && r->af != AF_INET6)
+			r = r->skip[PF_SKIP_AF].ptr;
+#if 0 /* header chain! */
+		else if (r->proto && r->proto != h->ip6_nxt)
+			r = r->skip[PF_SKIP_PROTO].ptr;
+#endif
+		else if (PF_MISMATCHAW(&r->src.addr,
+		    (struct pf_addr *)&h->ip6_src, AF_INET6,
+		    r->src.neg, kif, M_GETFIB(m)))
+			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
+		else if (PF_MISMATCHAW(&r->dst.addr,
+		    (struct pf_addr *)&h->ip6_dst, AF_INET6,
+		    r->dst.neg, NULL, M_GETFIB(m)))
+			r = r->skip[PF_SKIP_DST_ADDR].ptr;
+		else
+			break;
+	}
+
+	if (r == NULL || r->action == PF_NOSCRUB)
+		return (PF_PASS);
+	else {
+		r->packets[dir == PF_OUT]++;
+		r->bytes[dir == PF_OUT] += pd->tot_len;
+	}
+
+	/* Check for illegal packets */
+	if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len)
+		goto drop;
+
+	extoff = 0;
+	off = sizeof(struct ip6_hdr);
+	proto = h->ip6_nxt;
+	terminal = 0;
+	do {
+		switch (proto) {
+		case IPPROTO_FRAGMENT:
+			goto fragment;
+			break;
+		case IPPROTO_AH:
+		case IPPROTO_ROUTING:
+		case IPPROTO_DSTOPTS:
+			if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
+			    NULL, AF_INET6))
+				goto shortpkt;
+			extoff = off;
+			if (proto == IPPROTO_AH)
+				off += (ext.ip6e_len + 2) * 4;
+			else
+				off += (ext.ip6e_len + 1) * 8;
+			proto = ext.ip6e_nxt;
+			break;
+		case IPPROTO_HOPOPTS:
+			if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
+			    NULL, AF_INET6))
+				goto shortpkt;
+			extoff = off;
+			optend = off + (ext.ip6e_len + 1) * 8;
+			ooff = off + sizeof(ext);
+			do {
+				if (!pf_pull_hdr(m, ooff, &opt.ip6o_type,
+				    sizeof(opt.ip6o_type), NULL, NULL,
+				    AF_INET6))
+					goto shortpkt;
+				if (opt.ip6o_type == IP6OPT_PAD1) {
+					ooff++;
+					continue;
+				}
+				if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt),
+				    NULL, NULL, AF_INET6))
+					goto shortpkt;
+				if (ooff + sizeof(opt) + opt.ip6o_len > optend)
+					goto drop;
+				switch (opt.ip6o_type) {
+				case IP6OPT_JUMBO:
+					if (h->ip6_plen != 0)
+						goto drop;
+					if (!pf_pull_hdr(m, ooff, &jumbo,
+					    sizeof(jumbo), NULL, NULL,
+					    AF_INET6))
+						goto shortpkt;
+					memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
+					    sizeof(jumbolen));
+					jumbolen = ntohl(jumbolen);
+					if (jumbolen <= IPV6_MAXPACKET)
+						goto drop;
+					if (sizeof(struct ip6_hdr) + jumbolen !=
+					    m->m_pkthdr.len)
+						goto drop;
+					break;
+				default:
+					break;
+				}
+				ooff += sizeof(opt) + opt.ip6o_len;
+			} while (ooff < optend);
+
+			off = optend;
+			proto = ext.ip6e_nxt;
+			break;
+		default:
+			terminal = 1;
+			break;
+		}
+	} while (!terminal);
+
+	/* jumbo payload option must be present, or plen > 0 */
+	if (ntohs(h->ip6_plen) == 0)
+		plen = jumbolen;
+	else
+		plen = ntohs(h->ip6_plen);
+	if (plen == 0)
+		goto drop;
+	if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
+		goto shortpkt;
+
+	pf_scrub_ip6(&m, r->min_ttl);
+
+	return (PF_PASS);
+
+ fragment:
+	/* Jumbo payload packets cannot be fragmented. */
+	plen = ntohs(h->ip6_plen);
+	if (plen == 0 || jumbolen)
+		goto drop;
+	if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
+		goto shortpkt;
+
+	if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6))
+		goto shortpkt;
+
+	/* Offset now points to data portion. */
+	off += sizeof(frag);
+
+	/* Returns PF_DROP or *m0 is NULL or completely reassembled mbuf. */
+	if (pf_reassemble6(m0, h, &frag, off, extoff, reason) != PF_PASS)
+		return (PF_DROP);
+	m = *m0;
+	if (m == NULL)
+		return (PF_DROP);
+
+	pd->flags |= PFDESC_IP_REAS;
+	return (PF_PASS);
+
+ shortpkt:
+	REASON_SET(reason, PFRES_SHORT);
+	if (r != NULL && r->log)
+		PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd,
+		    1);
+	return (PF_DROP);
+
+ drop:
+	REASON_SET(reason, PFRES_NORM);
+	if (r != NULL && r->log)
+		PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd,
+		    1);
+	return (PF_DROP);
+}
+#endif /* INET6 */
+
+int
+pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff,
+    int off, void *h, struct pf_pdesc *pd)
+{
+	struct pf_rule	*r, *rm = NULL;
+	struct tcphdr	*th = pd->hdr.tcp;
+	int		 rewrite = 0;
+	u_short		 reason;
+	u_int8_t	 flags;
+	sa_family_t	 af = pd->af;
+
+	PF_RULES_RASSERT();
+
+	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
+	while (r != NULL) {
+		r->evaluations++;
+		if (pfi_kif_match(r->kif, kif) == r->ifnot)
+			r = r->skip[PF_SKIP_IFP].ptr;
+		else if (r->direction && r->direction != dir)
+			r = r->skip[PF_SKIP_DIR].ptr;
+		else if (r->af && r->af != af)
+			r = r->skip[PF_SKIP_AF].ptr;
+		else if (r->proto && r->proto != pd->proto)
+			r = r->skip[PF_SKIP_PROTO].ptr;
+		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
+		    r->src.neg, kif, M_GETFIB(m)))
+			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
+		else if (r->src.port_op && !pf_match_port(r->src.port_op,
+			    r->src.port[0], r->src.port[1], th->th_sport))
+			r = r->skip[PF_SKIP_SRC_PORT].ptr;
+		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
+		    r->dst.neg, NULL, M_GETFIB(m)))
+			r = r->skip[PF_SKIP_DST_ADDR].ptr;
+		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
+			    r->dst.port[0], r->dst.port[1], th->th_dport))
+			r = r->skip[PF_SKIP_DST_PORT].ptr;
+		else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
+			    pf_osfp_fingerprint(pd, m, off, th),
+			    r->os_fingerprint))
+			r = TAILQ_NEXT(r, entries);
+		else {
+			rm = r;
+			break;
+		}
+	}
+
+	if (rm == NULL || rm->action == PF_NOSCRUB)
+		return (PF_PASS);
+	else {
+		r->packets[dir == PF_OUT]++;
+		r->bytes[dir == PF_OUT] += pd->tot_len;
+	}
+
+	if (rm->rule_flag & PFRULE_REASSEMBLE_TCP)
+		pd->flags |= PFDESC_TCP_NORM;
+
+	flags = th->th_flags;
+	if (flags & TH_SYN) {
+		/* Illegal packet */
+		if (flags & TH_RST)
+			goto tcp_drop;
+
+		if (flags & TH_FIN)
+			goto tcp_drop;
+	} else {
+		/* Illegal packet */
+		if (!(flags & (TH_ACK|TH_RST)))
+			goto tcp_drop;
+	}
+
+	if (!(flags & TH_ACK)) {
+		/* These flags are only valid if ACK is set */
+		if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG))
+			goto tcp_drop;
+	}
+
+	/* Check for illegal header length */
+	if (th->th_off < (sizeof(struct tcphdr) >> 2))
+		goto tcp_drop;
+
+	/* If flags changed, or reserved data set, then adjust */
+	if (flags != th->th_flags || th->th_x2 != 0) {
+		u_int16_t	ov, nv;
+
+		ov = *(u_int16_t *)(&th->th_ack + 1);
+		th->th_flags = flags;
+		th->th_x2 = 0;
+		nv = *(u_int16_t *)(&th->th_ack + 1);
+
+		th->th_sum = pf_proto_cksum_fixup(m, th->th_sum, ov, nv, 0);
+		rewrite = 1;
+	}
+
+	/* Remove urgent pointer, if TH_URG is not set */
+	if (!(flags & TH_URG) && th->th_urp) {
+		th->th_sum = pf_proto_cksum_fixup(m, th->th_sum, th->th_urp,
+		    0, 0);
+		th->th_urp = 0;
+		rewrite = 1;
+	}
+
+	/* Process options */
+	if (r->max_mss && pf_normalize_tcpopt(r, m, th, off, pd->af))
+		rewrite = 1;
+
+	/* copy back packet headers if we sanitized */
+	if (rewrite)
+		m_copyback(m, off, sizeof(*th), (caddr_t)th);
+
+	return (PF_PASS);
+
+ tcp_drop:
+	REASON_SET(&reason, PFRES_NORM);
+	if (rm != NULL && r->log)
+		PFLOG_PACKET(kif, m, AF_INET, dir, reason, r, NULL, NULL, pd,
+		    1);
+	return (PF_DROP);
+}
+
+int
+pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd,
+    struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst)
+{
+	u_int32_t tsval, tsecr;
+	u_int8_t hdr[60];
+	u_int8_t *opt;
+
+	KASSERT((src->scrub == NULL),
+	    ("pf_normalize_tcp_init: src->scrub != NULL"));
+
+	src->scrub = uma_zalloc(V_pf_state_scrub_z, M_ZERO | M_NOWAIT);
+	if (src->scrub == NULL)
+		return (1);
+
+	switch (pd->af) {
+#ifdef INET
+	case AF_INET: {
+		struct ip *h = mtod(m, struct ip *);
+		src->scrub->pfss_ttl = h->ip_ttl;
+		break;
+	}
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6: {
+		struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
+		src->scrub->pfss_ttl = h->ip6_hlim;
+		break;
+	}
+#endif /* INET6 */
+	}
+
+
+	/*
+	 * All normalizations below are only begun if we see the start of
+	 * the connections.  They must all set an enabled bit in pfss_flags
+	 */
+	if ((th->th_flags & TH_SYN) == 0)
+		return (0);
+
+
+	if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub &&
+	    pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
+		/* Diddle with TCP options */
+		int hlen;
+		opt = hdr + sizeof(struct tcphdr);
+		hlen = (th->th_off << 2) - sizeof(struct tcphdr);
+		while (hlen >= TCPOLEN_TIMESTAMP) {
+			switch (*opt) {
+			case TCPOPT_EOL:	/* FALLTHROUGH */
+			case TCPOPT_NOP:
+				opt++;
+				hlen--;
+				break;
+			case TCPOPT_TIMESTAMP:
+				if (opt[1] >= TCPOLEN_TIMESTAMP) {
+					src->scrub->pfss_flags |=
+					    PFSS_TIMESTAMP;
+					src->scrub->pfss_ts_mod =
+					    htonl(arc4random());
+
+					/* note PFSS_PAWS not set yet */
+					memcpy(&tsval, &opt[2],
+					    sizeof(u_int32_t));
+					memcpy(&tsecr, &opt[6],
+					    sizeof(u_int32_t));
+					src->scrub->pfss_tsval0 = ntohl(tsval);
+					src->scrub->pfss_tsval = ntohl(tsval);
+					src->scrub->pfss_tsecr = ntohl(tsecr);
+					getmicrouptime(&src->scrub->pfss_last);
+				}
+				/* FALLTHROUGH */
+			default:
+				hlen -= MAX(opt[1], 2);
+				opt += MAX(opt[1], 2);
+				break;
+			}
+		}
+	}
+
+	return (0);
+}
+
+void
+pf_normalize_tcp_cleanup(struct pf_state *state)
+{
+	if (state->src.scrub)
+		uma_zfree(V_pf_state_scrub_z, state->src.scrub);
+	if (state->dst.scrub)
+		uma_zfree(V_pf_state_scrub_z, state->dst.scrub);
+
+	/* Someday... flush the TCP segment reassembly descriptors. */
+}
+
+int
+pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
+    u_short *reason, struct tcphdr *th, struct pf_state *state,
+    struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback)
+{
+	struct timeval uptime;
+	u_int32_t tsval, tsecr;
+	u_int tsval_from_last;
+	u_int8_t hdr[60];
+	u_int8_t *opt;
+	int copyback = 0;
+	int got_ts = 0;
+
+	KASSERT((src->scrub || dst->scrub),
+	    ("%s: src->scrub && dst->scrub!", __func__));
+
+	/*
+	 * Enforce the minimum TTL seen for this connection.  Negate a common
+	 * technique to evade an intrusion detection system and confuse
+	 * firewall state code.
+	 */
+	switch (pd->af) {
+#ifdef INET
+	case AF_INET: {
+		if (src->scrub) {
+			struct ip *h = mtod(m, struct ip *);
+			if (h->ip_ttl > src->scrub->pfss_ttl)
+				src->scrub->pfss_ttl = h->ip_ttl;
+			h->ip_ttl = src->scrub->pfss_ttl;
+		}
+		break;
+	}
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6: {
+		if (src->scrub) {
+			struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
+			if (h->ip6_hlim > src->scrub->pfss_ttl)
+				src->scrub->pfss_ttl = h->ip6_hlim;
+			h->ip6_hlim = src->scrub->pfss_ttl;
+		}
+		break;
+	}
+#endif /* INET6 */
+	}
+
+	if (th->th_off > (sizeof(struct tcphdr) >> 2) &&
+	    ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
+	    (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
+	    pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
+		/* Diddle with TCP options */
+		int hlen;
+		opt = hdr + sizeof(struct tcphdr);
+		hlen = (th->th_off << 2) - sizeof(struct tcphdr);
+		while (hlen >= TCPOLEN_TIMESTAMP) {
+			switch (*opt) {
+			case TCPOPT_EOL:	/* FALLTHROUGH */
+			case TCPOPT_NOP:
+				opt++;
+				hlen--;
+				break;
+			case TCPOPT_TIMESTAMP:
+				/* Modulate the timestamps.  Can be used for
+				 * NAT detection, OS uptime determination or
+				 * reboot detection.
+				 */
+
+				if (got_ts) {
+					/* Huh?  Multiple timestamps!? */
+					if (V_pf_status.debug >= PF_DEBUG_MISC) {
+						DPFPRINTF(("multiple TS??"));
+						pf_print_state(state);
+						printf("\n");
+					}
+					REASON_SET(reason, PFRES_TS);
+					return (PF_DROP);
+				}
+				if (opt[1] >= TCPOLEN_TIMESTAMP) {
+					memcpy(&tsval, &opt[2],
+					    sizeof(u_int32_t));
+					if (tsval && src->scrub &&
+					    (src->scrub->pfss_flags &
+					    PFSS_TIMESTAMP)) {
+						tsval = ntohl(tsval);
+						pf_change_proto_a(m, &opt[2],
+						    &th->th_sum,
+						    htonl(tsval +
+						    src->scrub->pfss_ts_mod),
+						    0);
+						copyback = 1;
+					}
+
+					/* Modulate TS reply iff valid (!0) */
+					memcpy(&tsecr, &opt[6],
+					    sizeof(u_int32_t));
+					if (tsecr && dst->scrub &&
+					    (dst->scrub->pfss_flags &
+					    PFSS_TIMESTAMP)) {
+						tsecr = ntohl(tsecr)
+						    - dst->scrub->pfss_ts_mod;
+						pf_change_proto_a(m, &opt[6],
+						    &th->th_sum, htonl(tsecr),
+						    0);
+						copyback = 1;
+					}
+					got_ts = 1;
+				}
+				/* FALLTHROUGH */
+			default:
+				hlen -= MAX(opt[1], 2);
+				opt += MAX(opt[1], 2);
+				break;
+			}
+		}
+		if (copyback) {
+			/* Copyback the options, caller copys back header */
+			*writeback = 1;
+			m_copyback(m, off + sizeof(struct tcphdr),
+			    (th->th_off << 2) - sizeof(struct tcphdr), hdr +
+			    sizeof(struct tcphdr));
+		}
+	}
+
+
+	/*
+	 * Must invalidate PAWS checks on connections idle for too long.
+	 * The fastest allowed timestamp clock is 1ms.  That turns out to
+	 * be about 24 days before it wraps.  XXX Right now our lowerbound
+	 * TS echo check only works for the first 12 days of a connection
+	 * when the TS has exhausted half its 32bit space
+	 */
+#define TS_MAX_IDLE	(24*24*60*60)
+#define TS_MAX_CONN	(12*24*60*60)	/* XXX remove when better tsecr check */
+
+	getmicrouptime(&uptime);
+	if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
+	    (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
+	    time_uptime - state->creation > TS_MAX_CONN))  {
+		if (V_pf_status.debug >= PF_DEBUG_MISC) {
+			DPFPRINTF(("src idled out of PAWS\n"));
+			pf_print_state(state);
+			printf("\n");
+		}
+		src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS)
+		    | PFSS_PAWS_IDLED;
+	}
+	if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
+	    uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
+		if (V_pf_status.debug >= PF_DEBUG_MISC) {
+			DPFPRINTF(("dst idled out of PAWS\n"));
+			pf_print_state(state);
+			printf("\n");
+		}
+		dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS)
+		    | PFSS_PAWS_IDLED;
+	}
+
+	if (got_ts && src->scrub && dst->scrub &&
+	    (src->scrub->pfss_flags & PFSS_PAWS) &&
+	    (dst->scrub->pfss_flags & PFSS_PAWS)) {
+		/* Validate that the timestamps are "in-window".
+		 * RFC1323 describes TCP Timestamp options that allow
+		 * measurement of RTT (round trip time) and PAWS
+		 * (protection against wrapped sequence numbers).  PAWS
+		 * gives us a set of rules for rejecting packets on
+		 * long fat pipes (packets that were somehow delayed
+		 * in transit longer than the time it took to send the
+		 * full TCP sequence space of 4Gb).  We can use these
+		 * rules and infer a few others that will let us treat
+		 * the 32bit timestamp and the 32bit echoed timestamp
+		 * as sequence numbers to prevent a blind attacker from
+		 * inserting packets into a connection.
+		 *
+		 * RFC1323 tells us:
+		 *  - The timestamp on this packet must be greater than
+		 *    or equal to the last value echoed by the other
+		 *    endpoint.  The RFC says those will be discarded
+		 *    since it is a dup that has already been acked.
+		 *    This gives us a lowerbound on the timestamp.
+		 *        timestamp >= other last echoed timestamp
+		 *  - The timestamp will be less than or equal to
+		 *    the last timestamp plus the time between the
+		 *    last packet and now.  The RFC defines the max
+		 *    clock rate as 1ms.  We will allow clocks to be
+		 *    up to 10% fast and will allow a total difference
+		 *    or 30 seconds due to a route change.  And this
+		 *    gives us an upperbound on the timestamp.
+		 *        timestamp <= last timestamp + max ticks
+		 *    We have to be careful here.  Windows will send an
+		 *    initial timestamp of zero and then initialize it
+		 *    to a random value after the 3whs; presumably to
+		 *    avoid a DoS by having to call an expensive RNG
+		 *    during a SYN flood.  Proof MS has at least one
+		 *    good security geek.
+		 *
+		 *  - The TCP timestamp option must also echo the other
+		 *    endpoints timestamp.  The timestamp echoed is the
+		 *    one carried on the earliest unacknowledged segment
+		 *    on the left edge of the sequence window.  The RFC
+		 *    states that the host will reject any echoed
+		 *    timestamps that were larger than any ever sent.
+		 *    This gives us an upperbound on the TS echo.
+		 *        tescr <= largest_tsval
+		 *  - The lowerbound on the TS echo is a little more
+		 *    tricky to determine.  The other endpoint's echoed
+		 *    values will not decrease.  But there may be
+		 *    network conditions that re-order packets and
+		 *    cause our view of them to decrease.  For now the
+		 *    only lowerbound we can safely determine is that
+		 *    the TS echo will never be less than the original
+		 *    TS.  XXX There is probably a better lowerbound.
+		 *    Remove TS_MAX_CONN with better lowerbound check.
+		 *        tescr >= other original TS
+		 *
+		 * It is also important to note that the fastest
+		 * timestamp clock of 1ms will wrap its 32bit space in
+		 * 24 days.  So we just disable TS checking after 24
+		 * days of idle time.  We actually must use a 12d
+		 * connection limit until we can come up with a better
+		 * lowerbound to the TS echo check.
+		 */
+		struct timeval delta_ts;
+		int ts_fudge;
+
+
+		/*
+		 * PFTM_TS_DIFF is how many seconds of leeway to allow
+		 * a host's timestamp.  This can happen if the previous
+		 * packet got delayed in transit for much longer than
+		 * this packet.
+		 */
+		if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0)
+			ts_fudge = V_pf_default_rule.timeout[PFTM_TS_DIFF];
+
+		/* Calculate max ticks since the last timestamp */
+#define TS_MAXFREQ	1100		/* RFC max TS freq of 1Khz + 10% skew */
+#define TS_MICROSECS	1000000		/* microseconds per second */
+		delta_ts = uptime;
+		timevalsub(&delta_ts, &src->scrub->pfss_last);
+		tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
+		tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ);
+
+		if ((src->state >= TCPS_ESTABLISHED &&
+		    dst->state >= TCPS_ESTABLISHED) &&
+		    (SEQ_LT(tsval, dst->scrub->pfss_tsecr) ||
+		    SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ||
+		    (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) ||
+		    SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
+			/* Bad RFC1323 implementation or an insertion attack.
+			 *
+			 * - Solaris 2.6 and 2.7 are known to send another ACK
+			 *   after the FIN,FIN|ACK,ACK closing that carries
+			 *   an old timestamp.
+			 */
+
+			DPFPRINTF(("Timestamp failed %c%c%c%c\n",
+			    SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
+			    SEQ_GT(tsval, src->scrub->pfss_tsval +
+			    tsval_from_last) ? '1' : ' ',
+			    SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
+			    SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '));
+			DPFPRINTF((" tsval: %u  tsecr: %u  +ticks: %u  "
+			    "idle: %jus %lums\n",
+			    tsval, tsecr, tsval_from_last,
+			    (uintmax_t)delta_ts.tv_sec,
+			    delta_ts.tv_usec / 1000));
+			DPFPRINTF((" src->tsval: %u  tsecr: %u\n",
+			    src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
+			DPFPRINTF((" dst->tsval: %u  tsecr: %u  tsval0: %u"
+			    "\n", dst->scrub->pfss_tsval,
+			    dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0));
+			if (V_pf_status.debug >= PF_DEBUG_MISC) {
+				pf_print_state(state);
+				pf_print_flags(th->th_flags);
+				printf("\n");
+			}
+			REASON_SET(reason, PFRES_TS);
+			return (PF_DROP);
+		}
+
+		/* XXX I'd really like to require tsecr but it's optional */
+
+	} else if (!got_ts && (th->th_flags & TH_RST) == 0 &&
+	    ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
+	    || pd->p_len > 0 || (th->th_flags & TH_SYN)) &&
+	    src->scrub && dst->scrub &&
+	    (src->scrub->pfss_flags & PFSS_PAWS) &&
+	    (dst->scrub->pfss_flags & PFSS_PAWS)) {
+		/* Didn't send a timestamp.  Timestamps aren't really useful
+		 * when:
+		 *  - connection opening or closing (often not even sent).
+		 *    but we must not let an attacker to put a FIN on a
+		 *    data packet to sneak it through our ESTABLISHED check.
+		 *  - on a TCP reset.  RFC suggests not even looking at TS.
+		 *  - on an empty ACK.  The TS will not be echoed so it will
+		 *    probably not help keep the RTT calculation in sync and
+		 *    there isn't as much danger when the sequence numbers
+		 *    got wrapped.  So some stacks don't include TS on empty
+		 *    ACKs :-(
+		 *
+		 * To minimize the disruption to mostly RFC1323 conformant
+		 * stacks, we will only require timestamps on data packets.
+		 *
+		 * And what do ya know, we cannot require timestamps on data
+		 * packets.  There appear to be devices that do legitimate
+		 * TCP connection hijacking.  There are HTTP devices that allow
+		 * a 3whs (with timestamps) and then buffer the HTTP request.
+		 * If the intermediate device has the HTTP response cache, it
+		 * will spoof the response but not bother timestamping its
+		 * packets.  So we can look for the presence of a timestamp in
+		 * the first data packet and if there, require it in all future
+		 * packets.
+		 */
+
+		if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
+			/*
+			 * Hey!  Someone tried to sneak a packet in.  Or the
+			 * stack changed its RFC1323 behavior?!?!
+			 */
+			if (V_pf_status.debug >= PF_DEBUG_MISC) {
+				DPFPRINTF(("Did not receive expected RFC1323 "
+				    "timestamp\n"));
+				pf_print_state(state);
+				pf_print_flags(th->th_flags);
+				printf("\n");
+			}
+			REASON_SET(reason, PFRES_TS);
+			return (PF_DROP);
+		}
+	}
+
+
+	/*
+	 * We will note if a host sends his data packets with or without
+	 * timestamps.  And require all data packets to contain a timestamp
+	 * if the first does.  PAWS implicitly requires that all data packets be
+	 * timestamped.  But I think there are middle-man devices that hijack
+	 * TCP streams immediately after the 3whs and don't timestamp their
+	 * packets (seen in a WWW accelerator or cache).
+	 */
+	if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags &
+	    (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
+		if (got_ts)
+			src->scrub->pfss_flags |= PFSS_DATA_TS;
+		else {
+			src->scrub->pfss_flags |= PFSS_DATA_NOTS;
+			if (V_pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
+			    (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
+				/* Don't warn if other host rejected RFC1323 */
+				DPFPRINTF(("Broken RFC1323 stack did not "
+				    "timestamp data packet. Disabled PAWS "
+				    "security.\n"));
+				pf_print_state(state);
+				pf_print_flags(th->th_flags);
+				printf("\n");
+			}
+		}
+	}
+
+
+	/*
+	 * Update PAWS values
+	 */
+	if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
+	    (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) {
+		getmicrouptime(&src->scrub->pfss_last);
+		if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) ||
+		    (src->scrub->pfss_flags & PFSS_PAWS) == 0)
+			src->scrub->pfss_tsval = tsval;
+
+		if (tsecr) {
+			if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) ||
+			    (src->scrub->pfss_flags & PFSS_PAWS) == 0)
+				src->scrub->pfss_tsecr = tsecr;
+
+			if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 &&
+			    (SEQ_LT(tsval, src->scrub->pfss_tsval0) ||
+			    src->scrub->pfss_tsval0 == 0)) {
+				/* tsval0 MUST be the lowest timestamp */
+				src->scrub->pfss_tsval0 = tsval;
+			}
+
+			/* Only fully initialized after a TS gets echoed */
+			if ((src->scrub->pfss_flags & PFSS_PAWS) == 0)
+				src->scrub->pfss_flags |= PFSS_PAWS;
+		}
+	}
+
+	/* I have a dream....  TCP segment reassembly.... */
+	return (0);
+}
+
+static int
+pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
+    int off, sa_family_t af)
+{
+	u_int16_t	*mss;
+	int		 thoff;
+	int		 opt, cnt, optlen = 0;
+	int		 rewrite = 0;
+	u_char		 opts[TCP_MAXOLEN];
+	u_char		*optp = opts;
+
+	thoff = th->th_off << 2;
+	cnt = thoff - sizeof(struct tcphdr);
+
+	if (cnt > 0 && !pf_pull_hdr(m, off + sizeof(*th), opts, cnt,
+	    NULL, NULL, af))
+		return (rewrite);
+
+	for (; cnt > 0; cnt -= optlen, optp += optlen) {
+		opt = optp[0];
+		if (opt == TCPOPT_EOL)
+			break;
+		if (opt == TCPOPT_NOP)
+			optlen = 1;
+		else {
+			if (cnt < 2)
+				break;
+			optlen = optp[1];
+			if (optlen < 2 || optlen > cnt)
+				break;
+		}
+		switch (opt) {
+		case TCPOPT_MAXSEG:
+			mss = (u_int16_t *)(optp + 2);
+			if ((ntohs(*mss)) > r->max_mss) {
+				th->th_sum = pf_proto_cksum_fixup(m,
+				    th->th_sum, *mss, htons(r->max_mss), 0);
+				*mss = htons(r->max_mss);
+				rewrite = 1;
+			}
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (rewrite)
+		m_copyback(m, off + sizeof(*th), thoff - sizeof(*th), opts);
+
+	return (rewrite);
+}
+
+#ifdef INET
+static void
+pf_scrub_ip(struct mbuf **m0, u_int32_t flags, u_int8_t min_ttl, u_int8_t tos)
+{
+	struct mbuf		*m = *m0;
+	struct ip		*h = mtod(m, struct ip *);
+
+	/* Clear IP_DF if no-df was requested */
+	if (flags & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
+		u_int16_t ip_off = h->ip_off;
+
+		h->ip_off &= htons(~IP_DF);
+		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
+	}
+
+	/* Enforce a minimum ttl, may cause endless packet loops */
+	if (min_ttl && h->ip_ttl < min_ttl) {
+		u_int16_t ip_ttl = h->ip_ttl;
+
+		h->ip_ttl = min_ttl;
+		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
+	}
+
+	/* Enforce tos */
+	if (flags & PFRULE_SET_TOS) {
+		u_int16_t	ov, nv;
+
+		ov = *(u_int16_t *)h;
+		h->ip_tos = tos;
+		nv = *(u_int16_t *)h;
+
+		h->ip_sum = pf_cksum_fixup(h->ip_sum, ov, nv, 0);
+	}
+
+	/* random-id, but not for fragments */
+	if (flags & PFRULE_RANDOMID && !(h->ip_off & ~htons(IP_DF))) {
+		uint16_t ip_id = h->ip_id;
+
+		ip_fillid(h);
+		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0);
+	}
+}
+#endif /* INET */
+
+#ifdef INET6
+static void
+pf_scrub_ip6(struct mbuf **m0, u_int8_t min_ttl)
+{
+	struct mbuf		*m = *m0;
+	struct ip6_hdr		*h = mtod(m, struct ip6_hdr *);
+
+	/* Enforce a minimum ttl, may cause endless packet loops */
+	if (min_ttl && h->ip6_hlim < min_ttl)
+		h->ip6_hlim = min_ttl;
+}
+#endif
diff --git a/freebsd/sys/netpfil/pf/pf_osfp.c b/freebsd/sys/netpfil/pf/pf_osfp.c
new file mode 100644
index 00000000..33bef4c8
--- /dev/null
+++ b/freebsd/sys/netpfil/pf/pf_osfp.c
@@ -0,0 +1,530 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2003 Mike Frantzen <frantzen@w4g.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ *	$OpenBSD: pf_osfp.c,v 1.14 2008/06/12 18:17:01 henning Exp $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+
+#include <net/if.h>
+#include <net/vnet.h>
+#include <net/pfvar.h>
+
+#include <netinet/ip6.h>
+
+static MALLOC_DEFINE(M_PFOSFP, "pf_osfp", "pf(4) operating system fingerprints");
+#define	DPFPRINTF(format, x...)		\
+	if (V_pf_status.debug >= PF_DEBUG_NOISY)	\
+		printf(format , ##x)
+
+SLIST_HEAD(pf_osfp_list, pf_os_fingerprint);
+static VNET_DEFINE(struct pf_osfp_list,	pf_osfp_list) =
+	SLIST_HEAD_INITIALIZER();
+#define	V_pf_osfp_list			VNET(pf_osfp_list)
+
+static struct pf_osfp_enlist	*pf_osfp_fingerprint_hdr(const struct ip *,
+				    const struct ip6_hdr *,
+				    const struct tcphdr *);
+static struct pf_os_fingerprint	*pf_osfp_find(struct pf_osfp_list *,
+				    struct pf_os_fingerprint *, u_int8_t);
+static struct pf_os_fingerprint	*pf_osfp_find_exact(struct pf_osfp_list *,
+				    struct pf_os_fingerprint *);
+static void			 pf_osfp_insert(struct pf_osfp_list *,
+				    struct pf_os_fingerprint *);
+#ifdef PFDEBUG
+static struct pf_os_fingerprint	*pf_osfp_validate(void);
+#endif
+
+/*
+ * Passively fingerprint the OS of the host (IPv4 TCP SYN packets only)
+ * Returns the list of possible OSes.
+ */
+struct pf_osfp_enlist *
+pf_osfp_fingerprint(struct pf_pdesc *pd, struct mbuf *m, int off,
+    const struct tcphdr *tcp)
+{
+	struct ip *ip;
+	struct ip6_hdr *ip6;
+	char hdr[60];
+
+	if ((pd->af != PF_INET && pd->af != PF_INET6) ||
+	    pd->proto != IPPROTO_TCP || (tcp->th_off << 2) < sizeof(*tcp))
+		return (NULL);
+
+	if (pd->af == PF_INET) {
+		ip = mtod(m, struct ip *);
+		ip6 = (struct ip6_hdr *)NULL;
+	} else {
+		ip = (struct ip *)NULL;
+		ip6 = mtod(m, struct ip6_hdr *);
+	}
+	if (!pf_pull_hdr(m, off, hdr, tcp->th_off << 2, NULL, NULL,
+	    pd->af)) return (NULL);
+
+	return (pf_osfp_fingerprint_hdr(ip, ip6, (struct tcphdr *)hdr));
+}
+
+static struct pf_osfp_enlist *
+pf_osfp_fingerprint_hdr(const struct ip *ip, const struct ip6_hdr *ip6, const struct tcphdr *tcp)
+{
+	struct pf_os_fingerprint fp, *fpresult;
+	int cnt, optlen = 0;
+	const u_int8_t *optp;
+	char srcname[128];
+
+	if ((tcp->th_flags & (TH_SYN|TH_ACK)) != TH_SYN)
+		return (NULL);
+	if (ip) {
+		if ((ip->ip_off & htons(IP_OFFMASK)) != 0)
+			return (NULL);
+	}
+
+	memset(&fp, 0, sizeof(fp));
+
+	if (ip) {
+		fp.fp_psize = ntohs(ip->ip_len);
+		fp.fp_ttl = ip->ip_ttl;
+		if (ip->ip_off & htons(IP_DF))
+			fp.fp_flags |= PF_OSFP_DF;
+		strlcpy(srcname, inet_ntoa(ip->ip_src), sizeof(srcname));
+	}
+#ifdef INET6
+	else if (ip6) {
+		/* jumbo payload? */
+		fp.fp_psize = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen);
+		fp.fp_ttl = ip6->ip6_hlim;
+		fp.fp_flags |= PF_OSFP_DF;
+		fp.fp_flags |= PF_OSFP_INET6;
+		strlcpy(srcname, ip6_sprintf((struct in6_addr *)&ip6->ip6_src),
+		    sizeof(srcname));
+	}
+#endif
+	else
+		return (NULL);
+	fp.fp_wsize = ntohs(tcp->th_win);
+
+
+	cnt = (tcp->th_off << 2) - sizeof(*tcp);
+	optp = (const u_int8_t *)((const char *)tcp + sizeof(*tcp));
+	for (; cnt > 0; cnt -= optlen, optp += optlen) {
+		if (*optp == TCPOPT_EOL)
+			break;
+
+		fp.fp_optcnt++;
+		if (*optp == TCPOPT_NOP) {
+			fp.fp_tcpopts = (fp.fp_tcpopts << PF_OSFP_TCPOPT_BITS) |
+			    PF_OSFP_TCPOPT_NOP;
+			optlen = 1;
+		} else {
+			if (cnt < 2)
+				return (NULL);
+			optlen = optp[1];
+			if (optlen > cnt || optlen < 2)
+				return (NULL);
+			switch (*optp) {
+			case TCPOPT_MAXSEG:
+				if (optlen >= TCPOLEN_MAXSEG)
+					memcpy(&fp.fp_mss, &optp[2],
+					    sizeof(fp.fp_mss));
+				fp.fp_tcpopts = (fp.fp_tcpopts <<
+				    PF_OSFP_TCPOPT_BITS) | PF_OSFP_TCPOPT_MSS;
+				NTOHS(fp.fp_mss);
+				break;
+			case TCPOPT_WINDOW:
+				if (optlen >= TCPOLEN_WINDOW)
+					memcpy(&fp.fp_wscale, &optp[2],
+					    sizeof(fp.fp_wscale));
+				NTOHS(fp.fp_wscale);
+				fp.fp_tcpopts = (fp.fp_tcpopts <<
+				    PF_OSFP_TCPOPT_BITS) |
+				    PF_OSFP_TCPOPT_WSCALE;
+				break;
+			case TCPOPT_SACK_PERMITTED:
+				fp.fp_tcpopts = (fp.fp_tcpopts <<
+				    PF_OSFP_TCPOPT_BITS) | PF_OSFP_TCPOPT_SACK;
+				break;
+			case TCPOPT_TIMESTAMP:
+				if (optlen >= TCPOLEN_TIMESTAMP) {
+					u_int32_t ts;
+					memcpy(&ts, &optp[2], sizeof(ts));
+					if (ts == 0)
+						fp.fp_flags |= PF_OSFP_TS0;
+
+				}
+				fp.fp_tcpopts = (fp.fp_tcpopts <<
+				    PF_OSFP_TCPOPT_BITS) | PF_OSFP_TCPOPT_TS;
+				break;
+			default:
+				return (NULL);
+			}
+		}
+		optlen = MAX(optlen, 1);	/* paranoia */
+	}
+
+	DPFPRINTF("fingerprinted %s:%d  %d:%d:%d:%d:%llx (%d) "
+	    "(TS=%s,M=%s%d,W=%s%d)\n",
+	    srcname, ntohs(tcp->th_sport),
+	    fp.fp_wsize, fp.fp_ttl, (fp.fp_flags & PF_OSFP_DF) != 0,
+	    fp.fp_psize, (long long int)fp.fp_tcpopts, fp.fp_optcnt,
+	    (fp.fp_flags & PF_OSFP_TS0) ? "0" : "",
+	    (fp.fp_flags & PF_OSFP_MSS_MOD) ? "%" :
+	    (fp.fp_flags & PF_OSFP_MSS_DC) ? "*" : "",
+	    fp.fp_mss,
+	    (fp.fp_flags & PF_OSFP_WSCALE_MOD) ? "%" :
+	    (fp.fp_flags & PF_OSFP_WSCALE_DC) ? "*" : "",
+	    fp.fp_wscale);
+
+	if ((fpresult = pf_osfp_find(&V_pf_osfp_list, &fp,
+	    PF_OSFP_MAXTTL_OFFSET)))
+		return (&fpresult->fp_oses);
+	return (NULL);
+}
+
+/* Match a fingerprint ID against a list of OSes */
+int
+pf_osfp_match(struct pf_osfp_enlist *list, pf_osfp_t os)
+{
+	struct pf_osfp_entry *entry;
+	int os_class, os_version, os_subtype;
+	int en_class, en_version, en_subtype;
+
+	if (os == PF_OSFP_ANY)
+		return (1);
+	if (list == NULL) {
+		DPFPRINTF("osfp no match against %x\n", os);
+		return (os == PF_OSFP_UNKNOWN);
+	}
+	PF_OSFP_UNPACK(os, os_class, os_version, os_subtype);
+	SLIST_FOREACH(entry, list, fp_entry) {
+		PF_OSFP_UNPACK(entry->fp_os, en_class, en_version, en_subtype);
+		if ((os_class == PF_OSFP_ANY || en_class == os_class) &&
+		    (os_version == PF_OSFP_ANY || en_version == os_version) &&
+		    (os_subtype == PF_OSFP_ANY || en_subtype == os_subtype)) {
+			DPFPRINTF("osfp matched %s %s %s  %x==%x\n",
+			    entry->fp_class_nm, entry->fp_version_nm,
+			    entry->fp_subtype_nm, os, entry->fp_os);
+			return (1);
+		}
+	}
+	DPFPRINTF("fingerprint 0x%x didn't match\n", os);
+	return (0);
+}
+
+/* Flush the fingerprint list */
+void
+pf_osfp_flush(void)
+{
+	struct pf_os_fingerprint *fp;
+	struct pf_osfp_entry *entry;
+
+	while ((fp = SLIST_FIRST(&V_pf_osfp_list))) {
+		SLIST_REMOVE_HEAD(&V_pf_osfp_list, fp_next);
+		while ((entry = SLIST_FIRST(&fp->fp_oses))) {
+			SLIST_REMOVE_HEAD(&fp->fp_oses, fp_entry);
+			free(entry, M_PFOSFP);
+		}
+		free(fp, M_PFOSFP);
+	}
+}
+
+
+/* Add a fingerprint */
+int
+pf_osfp_add(struct pf_osfp_ioctl *fpioc)
+{
+	struct pf_os_fingerprint *fp, fpadd;
+	struct pf_osfp_entry *entry;
+
+	PF_RULES_WASSERT();
+
+	memset(&fpadd, 0, sizeof(fpadd));
+	fpadd.fp_tcpopts = fpioc->fp_tcpopts;
+	fpadd.fp_wsize = fpioc->fp_wsize;
+	fpadd.fp_psize = fpioc->fp_psize;
+	fpadd.fp_mss = fpioc->fp_mss;
+	fpadd.fp_flags = fpioc->fp_flags;
+	fpadd.fp_optcnt = fpioc->fp_optcnt;
+	fpadd.fp_wscale = fpioc->fp_wscale;
+	fpadd.fp_ttl = fpioc->fp_ttl;
+
+#if 0	/* XXX RYAN wants to fix logging */
+	DPFPRINTF("adding osfp %s %s %s = %s%d:%d:%d:%s%d:0x%llx %d "
+	    "(TS=%s,M=%s%d,W=%s%d) %x\n",
+	    fpioc->fp_os.fp_class_nm, fpioc->fp_os.fp_version_nm,
+	    fpioc->fp_os.fp_subtype_nm,
+	    (fpadd.fp_flags & PF_OSFP_WSIZE_MOD) ? "%" :
+	    (fpadd.fp_flags & PF_OSFP_WSIZE_MSS) ? "S" :
+	    (fpadd.fp_flags & PF_OSFP_WSIZE_MTU) ? "T" :
+	    (fpadd.fp_flags & PF_OSFP_WSIZE_DC) ? "*" : "",
+	    fpadd.fp_wsize,
+	    fpadd.fp_ttl,
+	    (fpadd.fp_flags & PF_OSFP_DF) ? 1 : 0,
+	    (fpadd.fp_flags & PF_OSFP_PSIZE_MOD) ? "%" :
+	    (fpadd.fp_flags & PF_OSFP_PSIZE_DC) ? "*" : "",
+	    fpadd.fp_psize,
+	    (long long int)fpadd.fp_tcpopts, fpadd.fp_optcnt,
+	    (fpadd.fp_flags & PF_OSFP_TS0) ? "0" : "",
+	    (fpadd.fp_flags & PF_OSFP_MSS_MOD) ? "%" :
+	    (fpadd.fp_flags & PF_OSFP_MSS_DC) ? "*" : "",
+	    fpadd.fp_mss,
+	    (fpadd.fp_flags & PF_OSFP_WSCALE_MOD) ? "%" :
+	    (fpadd.fp_flags & PF_OSFP_WSCALE_DC) ? "*" : "",
+	    fpadd.fp_wscale,
+	    fpioc->fp_os.fp_os);
+#endif
+
+	if ((fp = pf_osfp_find_exact(&V_pf_osfp_list, &fpadd))) {
+		 SLIST_FOREACH(entry, &fp->fp_oses, fp_entry) {
+			if (PF_OSFP_ENTRY_EQ(entry, &fpioc->fp_os))
+				return (EEXIST);
+		}
+		if ((entry = malloc(sizeof(*entry), M_PFOSFP, M_NOWAIT))
+		    == NULL)
+			return (ENOMEM);
+	} else {
+		if ((fp = malloc(sizeof(*fp), M_PFOSFP, M_ZERO | M_NOWAIT))
+		    == NULL)
+			return (ENOMEM);
+		fp->fp_tcpopts = fpioc->fp_tcpopts;
+		fp->fp_wsize = fpioc->fp_wsize;
+		fp->fp_psize = fpioc->fp_psize;
+		fp->fp_mss = fpioc->fp_mss;
+		fp->fp_flags = fpioc->fp_flags;
+		fp->fp_optcnt = fpioc->fp_optcnt;
+		fp->fp_wscale = fpioc->fp_wscale;
+		fp->fp_ttl = fpioc->fp_ttl;
+		SLIST_INIT(&fp->fp_oses);
+		if ((entry = malloc(sizeof(*entry), M_PFOSFP, M_NOWAIT))
+		    == NULL) {
+			free(fp, M_PFOSFP);
+			return (ENOMEM);
+		}
+		pf_osfp_insert(&V_pf_osfp_list, fp);
+	}
+	memcpy(entry, &fpioc->fp_os, sizeof(*entry));
+
+	/* Make sure the strings are NUL terminated */
+	entry->fp_class_nm[sizeof(entry->fp_class_nm)-1] = '\0';
+	entry->fp_version_nm[sizeof(entry->fp_version_nm)-1] = '\0';
+	entry->fp_subtype_nm[sizeof(entry->fp_subtype_nm)-1] = '\0';
+
+	SLIST_INSERT_HEAD(&fp->fp_oses, entry, fp_entry);
+
+#ifdef PFDEBUG
+	if ((fp = pf_osfp_validate()))
+		printf("Invalid fingerprint list\n");
+#endif /* PFDEBUG */
+	return (0);
+}
+
+
+/* Find a fingerprint in the list */
+static struct pf_os_fingerprint *
+pf_osfp_find(struct pf_osfp_list *list, struct pf_os_fingerprint *find,
+    u_int8_t ttldiff)
+{
+	struct pf_os_fingerprint *f;
+
+#define	MATCH_INT(_MOD, _DC, _field)					\
+	if ((f->fp_flags & _DC) == 0) {					\
+		if ((f->fp_flags & _MOD) == 0) {			\
+			if (f->_field != find->_field)			\
+				continue;				\
+		} else {						\
+			if (f->_field == 0 || find->_field % f->_field)	\
+				continue;				\
+		}							\
+	}
+
+	SLIST_FOREACH(f, list, fp_next) {
+		if (f->fp_tcpopts != find->fp_tcpopts ||
+		    f->fp_optcnt != find->fp_optcnt ||
+		    f->fp_ttl < find->fp_ttl ||
+		    f->fp_ttl - find->fp_ttl > ttldiff ||
+		    (f->fp_flags & (PF_OSFP_DF|PF_OSFP_TS0)) !=
+		    (find->fp_flags & (PF_OSFP_DF|PF_OSFP_TS0)))
+			continue;
+
+		MATCH_INT(PF_OSFP_PSIZE_MOD, PF_OSFP_PSIZE_DC, fp_psize)
+		MATCH_INT(PF_OSFP_MSS_MOD, PF_OSFP_MSS_DC, fp_mss)
+		MATCH_INT(PF_OSFP_WSCALE_MOD, PF_OSFP_WSCALE_DC, fp_wscale)
+		if ((f->fp_flags & PF_OSFP_WSIZE_DC) == 0) {
+			if (f->fp_flags & PF_OSFP_WSIZE_MSS) {
+				if (find->fp_mss == 0)
+					continue;
+
+/*
+ * Some "smart" NAT devices and DSL routers will tweak the MSS size and
+ * will set it to whatever is suitable for the link type.
+ */
+#define	SMART_MSS	1460
+				if ((find->fp_wsize % find->fp_mss ||
+				    find->fp_wsize / find->fp_mss !=
+				    f->fp_wsize) &&
+				    (find->fp_wsize % SMART_MSS ||
+				    find->fp_wsize / SMART_MSS !=
+				    f->fp_wsize))
+					continue;
+			} else if (f->fp_flags & PF_OSFP_WSIZE_MTU) {
+				if (find->fp_mss == 0)
+					continue;
+
+#define	MTUOFF		(sizeof(struct ip) + sizeof(struct tcphdr))
+#define	SMART_MTU	(SMART_MSS + MTUOFF)
+				if ((find->fp_wsize % (find->fp_mss + MTUOFF) ||
+				    find->fp_wsize / (find->fp_mss + MTUOFF) !=
+				    f->fp_wsize) &&
+				    (find->fp_wsize % SMART_MTU ||
+				    find->fp_wsize / SMART_MTU !=
+				    f->fp_wsize))
+					continue;
+			} else if (f->fp_flags & PF_OSFP_WSIZE_MOD) {
+				if (f->fp_wsize == 0 || find->fp_wsize %
+				    f->fp_wsize)
+					continue;
+			} else {
+				if (f->fp_wsize != find->fp_wsize)
+					continue;
+			}
+		}
+		return (f);
+	}
+
+	return (NULL);
+}
+
+/* Find an exact fingerprint in the list */
+static struct pf_os_fingerprint *
+pf_osfp_find_exact(struct pf_osfp_list *list, struct pf_os_fingerprint *find)
+{
+	struct pf_os_fingerprint *f;
+
+	SLIST_FOREACH(f, list, fp_next) {
+		if (f->fp_tcpopts == find->fp_tcpopts &&
+		    f->fp_wsize == find->fp_wsize &&
+		    f->fp_psize == find->fp_psize &&
+		    f->fp_mss == find->fp_mss &&
+		    f->fp_flags == find->fp_flags &&
+		    f->fp_optcnt == find->fp_optcnt &&
+		    f->fp_wscale == find->fp_wscale &&
+		    f->fp_ttl == find->fp_ttl)
+			return (f);
+	}
+
+	return (NULL);
+}
+
+/* Insert a fingerprint into the list */
+static void
+pf_osfp_insert(struct pf_osfp_list *list, struct pf_os_fingerprint *ins)
+{
+	struct pf_os_fingerprint *f, *prev = NULL;
+
+	/* XXX need to go semi tree based.  can key on tcp options */
+
+	SLIST_FOREACH(f, list, fp_next)
+		prev = f;
+	if (prev)
+		SLIST_INSERT_AFTER(prev, ins, fp_next);
+	else
+		SLIST_INSERT_HEAD(list, ins, fp_next);
+}
+
+/* Fill a fingerprint by its number (from an ioctl) */
+int
+pf_osfp_get(struct pf_osfp_ioctl *fpioc)
+{
+	struct pf_os_fingerprint *fp;
+	struct pf_osfp_entry *entry;
+	int num = fpioc->fp_getnum;
+	int i = 0;
+
+
+	memset(fpioc, 0, sizeof(*fpioc));
+	SLIST_FOREACH(fp, &V_pf_osfp_list, fp_next) {
+		SLIST_FOREACH(entry, &fp->fp_oses, fp_entry) {
+			if (i++ == num) {
+				fpioc->fp_mss = fp->fp_mss;
+				fpioc->fp_wsize = fp->fp_wsize;
+				fpioc->fp_flags = fp->fp_flags;
+				fpioc->fp_psize = fp->fp_psize;
+				fpioc->fp_ttl = fp->fp_ttl;
+				fpioc->fp_wscale = fp->fp_wscale;
+				fpioc->fp_getnum = num;
+				memcpy(&fpioc->fp_os, entry,
+				    sizeof(fpioc->fp_os));
+				return (0);
+			}
+		}
+	}
+
+	return (EBUSY);
+}
+
+
+#ifdef PFDEBUG
+/* Validate that each signature is reachable */
+static struct pf_os_fingerprint *
+pf_osfp_validate(void)
+{
+	struct pf_os_fingerprint *f, *f2, find;
+
+	SLIST_FOREACH(f, &V_pf_osfp_list, fp_next) {
+		memcpy(&find, f, sizeof(find));
+
+		/* We do a few MSS/th_win percolations to make things unique */
+		if (find.fp_mss == 0)
+			find.fp_mss = 128;
+		if (f->fp_flags & PF_OSFP_WSIZE_MSS)
+			find.fp_wsize *= find.fp_mss;
+		else if (f->fp_flags & PF_OSFP_WSIZE_MTU)
+			find.fp_wsize *= (find.fp_mss + 40);
+		else if (f->fp_flags & PF_OSFP_WSIZE_MOD)
+			find.fp_wsize *= 2;
+		if (f != (f2 = pf_osfp_find(&V_pf_osfp_list, &find, 0))) {
+			if (f2)
+				printf("Found \"%s %s %s\" instead of "
+				    "\"%s %s %s\"\n",
+				    SLIST_FIRST(&f2->fp_oses)->fp_class_nm,
+				    SLIST_FIRST(&f2->fp_oses)->fp_version_nm,
+				    SLIST_FIRST(&f2->fp_oses)->fp_subtype_nm,
+				    SLIST_FIRST(&f->fp_oses)->fp_class_nm,
+				    SLIST_FIRST(&f->fp_oses)->fp_version_nm,
+				    SLIST_FIRST(&f->fp_oses)->fp_subtype_nm);
+			else
+				printf("Couldn't find \"%s %s %s\"\n",
+				    SLIST_FIRST(&f->fp_oses)->fp_class_nm,
+				    SLIST_FIRST(&f->fp_oses)->fp_version_nm,
+				    SLIST_FIRST(&f->fp_oses)->fp_subtype_nm);
+			return (f);
+		}
+	}
+	return (NULL);
+}
+#endif /* PFDEBUG */
diff --git a/freebsd/sys/netpfil/pf/pf_ruleset.c b/freebsd/sys/netpfil/pf/pf_ruleset.c
new file mode 100644
index 00000000..e16643aa
--- /dev/null
+++ b/freebsd/sys/netpfil/pf/pf_ruleset.c
@@ -0,0 +1,426 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2001 Daniel Hartmeier
+ * Copyright (c) 2002,2003 Henning Brauer
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    - Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    - Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Effort sponsored in part by the Defense Advanced Research Projects
+ * Agency (DARPA) and Air Force Research Laboratory, Air Force
+ * Materiel Command, USAF, under agreement number F30602-01-2-0537.
+ *
+ *	$OpenBSD: pf_ruleset.c,v 1.2 2008/12/18 15:31:37 dhill Exp $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/socket.h>
+#ifdef _KERNEL
+# include <sys/systm.h>
+# include <sys/refcount.h>
+#endif /* _KERNEL */
+#include <sys/mbuf.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+
+#include <net/if.h>
+#include <net/vnet.h>
+#include <net/pfvar.h>
+
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif /* INET6 */
+
+
+#ifdef _KERNEL
+#define DPFPRINTF(format, x...)				\
+	if (V_pf_status.debug >= PF_DEBUG_NOISY)	\
+		printf(format , ##x)
+#define rs_malloc(x)		malloc(x, M_TEMP, M_NOWAIT|M_ZERO)
+#define rs_free(x)		free(x, M_TEMP)
+
+#else
+/* Userland equivalents so we can lend code to pfctl et al. */
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#define rs_malloc(x)		 calloc(1, x)
+#define rs_free(x)		 free(x)
+
+#ifdef PFDEBUG
+#include <sys/stdarg.h>
+#define DPFPRINTF(format, x...)	fprintf(stderr, format , ##x)
+#else
+#define DPFPRINTF(format, x...)	((void)0)
+#endif /* PFDEBUG */
+#endif /* _KERNEL */
+
+#ifdef _KERNEL
+VNET_DEFINE(struct pf_anchor_global,	pf_anchors);
+VNET_DEFINE(struct pf_anchor,		pf_main_anchor);
+#else /* ! _KERNEL */
+struct pf_anchor_global	 pf_anchors;
+struct pf_anchor	 pf_main_anchor;
+#undef V_pf_anchors
+#define V_pf_anchors		 pf_anchors
+#undef pf_main_ruleset
+#define pf_main_ruleset		 pf_main_anchor.ruleset
+#endif /* _KERNEL */
+
+static __inline int pf_anchor_compare(struct pf_anchor *, struct pf_anchor *);
+
+static struct pf_anchor		*pf_find_anchor(const char *);
+
+RB_GENERATE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare);
+RB_GENERATE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare);
+
+static __inline int
+pf_anchor_compare(struct pf_anchor *a, struct pf_anchor *b)
+{
+	int c = strcmp(a->path, b->path);
+
+	return (c ? (c < 0 ? -1 : 1) : 0);
+}
+
+int
+pf_get_ruleset_number(u_int8_t action)
+{
+	switch (action) {
+	case PF_SCRUB:
+	case PF_NOSCRUB:
+		return (PF_RULESET_SCRUB);
+		break;
+	case PF_PASS:
+	case PF_DROP:
+		return (PF_RULESET_FILTER);
+		break;
+	case PF_NAT:
+	case PF_NONAT:
+		return (PF_RULESET_NAT);
+		break;
+	case PF_BINAT:
+	case PF_NOBINAT:
+		return (PF_RULESET_BINAT);
+		break;
+	case PF_RDR:
+	case PF_NORDR:
+		return (PF_RULESET_RDR);
+		break;
+	default:
+		return (PF_RULESET_MAX);
+		break;
+	}
+}
+
+void
+pf_init_ruleset(struct pf_ruleset *ruleset)
+{
+	int	i;
+
+	memset(ruleset, 0, sizeof(struct pf_ruleset));
+	for (i = 0; i < PF_RULESET_MAX; i++) {
+		TAILQ_INIT(&ruleset->rules[i].queues[0]);
+		TAILQ_INIT(&ruleset->rules[i].queues[1]);
+		ruleset->rules[i].active.ptr = &ruleset->rules[i].queues[0];
+		ruleset->rules[i].inactive.ptr = &ruleset->rules[i].queues[1];
+	}
+}
+
+static struct pf_anchor *
+pf_find_anchor(const char *path)
+{
+	struct pf_anchor	*key, *found;
+
+	key = (struct pf_anchor *)rs_malloc(sizeof(*key));
+	if (key == NULL)
+		return (NULL);
+	strlcpy(key->path, path, sizeof(key->path));
+	found = RB_FIND(pf_anchor_global, &V_pf_anchors, key);
+	rs_free(key);
+	return (found);
+}
+
+struct pf_ruleset *
+pf_find_ruleset(const char *path)
+{
+	struct pf_anchor	*anchor;
+
+	while (*path == '/')
+		path++;
+	if (!*path)
+		return (&pf_main_ruleset);
+	anchor = pf_find_anchor(path);
+	if (anchor == NULL)
+		return (NULL);
+	else
+		return (&anchor->ruleset);
+}
+
+struct pf_ruleset *
+pf_find_or_create_ruleset(const char *path)
+{
+	char			*p, *q, *r;
+	struct pf_ruleset	*ruleset;
+	struct pf_anchor	*anchor = NULL, *dup, *parent = NULL;
+
+	if (path[0] == 0)
+		return (&pf_main_ruleset);
+	while (*path == '/')
+		path++;
+	ruleset = pf_find_ruleset(path);
+	if (ruleset != NULL)
+		return (ruleset);
+	p = (char *)rs_malloc(MAXPATHLEN);
+	if (p == NULL)
+		return (NULL);
+	strlcpy(p, path, MAXPATHLEN);
+	while (parent == NULL && (q = strrchr(p, '/')) != NULL) {
+		*q = 0;
+		if ((ruleset = pf_find_ruleset(p)) != NULL) {
+			parent = ruleset->anchor;
+			break;
+		}
+	}
+	if (q == NULL)
+		q = p;
+	else
+		q++;
+	strlcpy(p, path, MAXPATHLEN);
+	if (!*q) {
+		rs_free(p);
+		return (NULL);
+	}
+	while ((r = strchr(q, '/')) != NULL || *q) {
+		if (r != NULL)
+			*r = 0;
+		if (!*q || strlen(q) >= PF_ANCHOR_NAME_SIZE ||
+		    (parent != NULL && strlen(parent->path) >=
+		    MAXPATHLEN - PF_ANCHOR_NAME_SIZE - 1)) {
+			rs_free(p);
+			return (NULL);
+		}
+		anchor = (struct pf_anchor *)rs_malloc(sizeof(*anchor));
+		if (anchor == NULL) {
+			rs_free(p);
+			return (NULL);
+		}
+		RB_INIT(&anchor->children);
+		strlcpy(anchor->name, q, sizeof(anchor->name));
+		if (parent != NULL) {
+			strlcpy(anchor->path, parent->path,
+			    sizeof(anchor->path));
+			strlcat(anchor->path, "/", sizeof(anchor->path));
+		}
+		strlcat(anchor->path, anchor->name, sizeof(anchor->path));
+		if ((dup = RB_INSERT(pf_anchor_global, &V_pf_anchors, anchor)) !=
+		    NULL) {
+			printf("pf_find_or_create_ruleset: RB_INSERT1 "
+			    "'%s' '%s' collides with '%s' '%s'\n",
+			    anchor->path, anchor->name, dup->path, dup->name);
+			rs_free(anchor);
+			rs_free(p);
+			return (NULL);
+		}
+		if (parent != NULL) {
+			anchor->parent = parent;
+			if ((dup = RB_INSERT(pf_anchor_node, &parent->children,
+			    anchor)) != NULL) {
+				printf("pf_find_or_create_ruleset: "
+				    "RB_INSERT2 '%s' '%s' collides with "
+				    "'%s' '%s'\n", anchor->path, anchor->name,
+				    dup->path, dup->name);
+				RB_REMOVE(pf_anchor_global, &V_pf_anchors,
+				    anchor);
+				rs_free(anchor);
+				rs_free(p);
+				return (NULL);
+			}
+		}
+		pf_init_ruleset(&anchor->ruleset);
+		anchor->ruleset.anchor = anchor;
+		parent = anchor;
+		if (r != NULL)
+			q = r + 1;
+		else
+			*q = 0;
+	}
+	rs_free(p);
+	return (&anchor->ruleset);
+}
+
+void
+pf_remove_if_empty_ruleset(struct pf_ruleset *ruleset)
+{
+	struct pf_anchor	*parent;
+	int			 i;
+
+	while (ruleset != NULL) {
+		if (ruleset == &pf_main_ruleset || ruleset->anchor == NULL ||
+		    !RB_EMPTY(&ruleset->anchor->children) ||
+		    ruleset->anchor->refcnt > 0 || ruleset->tables > 0 ||
+		    ruleset->topen)
+			return;
+		for (i = 0; i < PF_RULESET_MAX; ++i)
+			if (!TAILQ_EMPTY(ruleset->rules[i].active.ptr) ||
+			    !TAILQ_EMPTY(ruleset->rules[i].inactive.ptr) ||
+			    ruleset->rules[i].inactive.open)
+				return;
+		RB_REMOVE(pf_anchor_global, &V_pf_anchors, ruleset->anchor);
+		if ((parent = ruleset->anchor->parent) != NULL)
+			RB_REMOVE(pf_anchor_node, &parent->children,
+			    ruleset->anchor);
+		rs_free(ruleset->anchor);
+		if (parent == NULL)
+			return;
+		ruleset = &parent->ruleset;
+	}
+}
+
+int
+pf_anchor_setup(struct pf_rule *r, const struct pf_ruleset *s,
+    const char *name)
+{
+	char			*p, *path;
+	struct pf_ruleset	*ruleset;
+
+	r->anchor = NULL;
+	r->anchor_relative = 0;
+	r->anchor_wildcard = 0;
+	if (!name[0])
+		return (0);
+	path = (char *)rs_malloc(MAXPATHLEN);
+	if (path == NULL)
+		return (1);
+	if (name[0] == '/')
+		strlcpy(path, name + 1, MAXPATHLEN);
+	else {
+		/* relative path */
+		r->anchor_relative = 1;
+		if (s->anchor == NULL || !s->anchor->path[0])
+			path[0] = 0;
+		else
+			strlcpy(path, s->anchor->path, MAXPATHLEN);
+		while (name[0] == '.' && name[1] == '.' && name[2] == '/') {
+			if (!path[0]) {
+				printf("pf_anchor_setup: .. beyond root\n");
+				rs_free(path);
+				return (1);
+			}
+			if ((p = strrchr(path, '/')) != NULL)
+				*p = 0;
+			else
+				path[0] = 0;
+			r->anchor_relative++;
+			name += 3;
+		}
+		if (path[0])
+			strlcat(path, "/", MAXPATHLEN);
+		strlcat(path, name, MAXPATHLEN);
+	}
+	if ((p = strrchr(path, '/')) != NULL && !strcmp(p, "/*")) {
+		r->anchor_wildcard = 1;
+		*p = 0;
+	}
+	ruleset = pf_find_or_create_ruleset(path);
+	rs_free(path);
+	if (ruleset == NULL || ruleset->anchor == NULL) {
+		printf("pf_anchor_setup: ruleset\n");
+		return (1);
+	}
+	r->anchor = ruleset->anchor;
+	r->anchor->refcnt++;
+	return (0);
+}
+
+int
+pf_anchor_copyout(const struct pf_ruleset *rs, const struct pf_rule *r,
+    struct pfioc_rule *pr)
+{
+	pr->anchor_call[0] = 0;
+	if (r->anchor == NULL)
+		return (0);
+	if (!r->anchor_relative) {
+		strlcpy(pr->anchor_call, "/", sizeof(pr->anchor_call));
+		strlcat(pr->anchor_call, r->anchor->path,
+		    sizeof(pr->anchor_call));
+	} else {
+		char	*a, *p;
+		int	 i;
+
+		a = (char *)rs_malloc(MAXPATHLEN);
+		if (a == NULL)
+			return (1);
+		if (rs->anchor == NULL)
+			a[0] = 0;
+		else
+			strlcpy(a, rs->anchor->path, MAXPATHLEN);
+		for (i = 1; i < r->anchor_relative; ++i) {
+			if ((p = strrchr(a, '/')) == NULL)
+				p = a;
+			*p = 0;
+			strlcat(pr->anchor_call, "../",
+			    sizeof(pr->anchor_call));
+		}
+		if (strncmp(a, r->anchor->path, strlen(a))) {
+			printf("pf_anchor_copyout: '%s' '%s'\n", a,
+			    r->anchor->path);
+			rs_free(a);
+			return (1);
+		}
+		if (strlen(r->anchor->path) > strlen(a))
+			strlcat(pr->anchor_call, r->anchor->path + (a[0] ?
+			    strlen(a) + 1 : 0), sizeof(pr->anchor_call));
+		rs_free(a);
+	}
+	if (r->anchor_wildcard)
+		strlcat(pr->anchor_call, pr->anchor_call[0] ? "/*" : "*",
+		    sizeof(pr->anchor_call));
+	return (0);
+}
+
+void
+pf_anchor_remove(struct pf_rule *r)
+{
+	if (r->anchor == NULL)
+		return;
+	if (r->anchor->refcnt <= 0) {
+		printf("pf_anchor_remove: broken refcount\n");
+		r->anchor = NULL;
+		return;
+	}
+	if (!--r->anchor->refcnt)
+		pf_remove_if_empty_ruleset(&r->anchor->ruleset);
+	r->anchor = NULL;
+}
diff --git a/freebsd/sys/netpfil/pf/pf_table.c b/freebsd/sys/netpfil/pf/pf_table.c
new file mode 100644
index 00000000..26b6f4e9
--- /dev/null
+++ b/freebsd/sys/netpfil/pf/pf_table.c
@@ -0,0 +1,2195 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2002 Cedric Berger
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    - Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    - Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ *	$OpenBSD: pf_table.c,v 1.79 2008/10/08 06:24:50 mcbride Exp $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/mutex.h>
+#include <sys/refcount.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <vm/uma.h>
+
+#include <net/if.h>
+#include <net/vnet.h>
+#include <net/pfvar.h>
+
+#define	ACCEPT_FLAGS(flags, oklist)		\
+	do {					\
+		if ((flags & ~(oklist)) &	\
+		    PFR_FLAG_ALLMASK)		\
+			return (EINVAL);	\
+	} while (0)
+
+#define	FILLIN_SIN(sin, addr)			\
+	do {					\
+		(sin).sin_len = sizeof(sin);	\
+		(sin).sin_family = AF_INET;	\
+		(sin).sin_addr = (addr);	\
+	} while (0)
+
+#define	FILLIN_SIN6(sin6, addr)			\
+	do {					\
+		(sin6).sin6_len = sizeof(sin6);	\
+		(sin6).sin6_family = AF_INET6;	\
+		(sin6).sin6_addr = (addr);	\
+	} while (0)
+
+#define	SWAP(type, a1, a2)			\
+	do {					\
+		type tmp = a1;			\
+		a1 = a2;			\
+		a2 = tmp;			\
+	} while (0)
+
+#define	SUNION2PF(su, af) (((af)==AF_INET) ?	\
+    (struct pf_addr *)&(su)->sin.sin_addr :	\
+    (struct pf_addr *)&(su)->sin6.sin6_addr)
+
+#define	AF_BITS(af)		(((af)==AF_INET)?32:128)
+#define	ADDR_NETWORK(ad)	((ad)->pfra_net < AF_BITS((ad)->pfra_af))
+#define	KENTRY_NETWORK(ke)	((ke)->pfrke_net < AF_BITS((ke)->pfrke_af))
+#define	KENTRY_RNF_ROOT(ke) \
+		((((struct radix_node *)(ke))->rn_flags & RNF_ROOT) != 0)
+
+#define	NO_ADDRESSES		(-1)
+#define	ENQUEUE_UNMARKED_ONLY	(1)
+#define	INVERT_NEG_FLAG		(1)
+
+struct pfr_walktree {
+	enum pfrw_op {
+		PFRW_MARK,
+		PFRW_SWEEP,
+		PFRW_ENQUEUE,
+		PFRW_GET_ADDRS,
+		PFRW_GET_ASTATS,
+		PFRW_POOL_GET,
+		PFRW_DYNADDR_UPDATE
+	}	 pfrw_op;
+	union {
+		struct pfr_addr		*pfrw1_addr;
+		struct pfr_astats	*pfrw1_astats;
+		struct pfr_kentryworkq	*pfrw1_workq;
+		struct pfr_kentry	*pfrw1_kentry;
+		struct pfi_dynaddr	*pfrw1_dyn;
+	}	 pfrw_1;
+	int	 pfrw_free;
+};
+#define	pfrw_addr	pfrw_1.pfrw1_addr
+#define	pfrw_astats	pfrw_1.pfrw1_astats
+#define	pfrw_workq	pfrw_1.pfrw1_workq
+#define	pfrw_kentry	pfrw_1.pfrw1_kentry
+#define	pfrw_dyn	pfrw_1.pfrw1_dyn
+#define	pfrw_cnt	pfrw_free
+
+#define	senderr(e)	do { rv = (e); goto _bad; } while (0)
+
+static MALLOC_DEFINE(M_PFTABLE, "pf_table", "pf(4) tables structures");
+static VNET_DEFINE(uma_zone_t, pfr_kentry_z);
+#define	V_pfr_kentry_z		VNET(pfr_kentry_z)
+static VNET_DEFINE(uma_zone_t, pfr_kcounters_z);
+#define	V_pfr_kcounters_z	VNET(pfr_kcounters_z)
+
+static struct pf_addr	 pfr_ffaddr = {
+	.addr32 = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }
+};
+
+static void		 pfr_copyout_addr(struct pfr_addr *,
+			    struct pfr_kentry *ke);
+static int		 pfr_validate_addr(struct pfr_addr *);
+static void		 pfr_enqueue_addrs(struct pfr_ktable *,
+			    struct pfr_kentryworkq *, int *, int);
+static void		 pfr_mark_addrs(struct pfr_ktable *);
+static struct pfr_kentry
+			*pfr_lookup_addr(struct pfr_ktable *,
+			    struct pfr_addr *, int);
+static struct pfr_kentry *pfr_create_kentry(struct pfr_addr *);
+static void		 pfr_destroy_kentries(struct pfr_kentryworkq *);
+static void		 pfr_destroy_kentry(struct pfr_kentry *);
+static void		 pfr_insert_kentries(struct pfr_ktable *,
+			    struct pfr_kentryworkq *, long);
+static void		 pfr_remove_kentries(struct pfr_ktable *,
+			    struct pfr_kentryworkq *);
+static void		 pfr_clstats_kentries(struct pfr_kentryworkq *, long,
+			    int);
+static void		 pfr_reset_feedback(struct pfr_addr *, int);
+static void		 pfr_prepare_network(union sockaddr_union *, int, int);
+static int		 pfr_route_kentry(struct pfr_ktable *,
+			    struct pfr_kentry *);
+static int		 pfr_unroute_kentry(struct pfr_ktable *,
+			    struct pfr_kentry *);
+static int		 pfr_walktree(struct radix_node *, void *);
+static int		 pfr_validate_table(struct pfr_table *, int, int);
+static int		 pfr_fix_anchor(char *);
+static void		 pfr_commit_ktable(struct pfr_ktable *, long);
+static void		 pfr_insert_ktables(struct pfr_ktableworkq *);
+static void		 pfr_insert_ktable(struct pfr_ktable *);
+static void		 pfr_setflags_ktables(struct pfr_ktableworkq *);
+static void		 pfr_setflags_ktable(struct pfr_ktable *, int);
+static void		 pfr_clstats_ktables(struct pfr_ktableworkq *, long,
+			    int);
+static void		 pfr_clstats_ktable(struct pfr_ktable *, long, int);
+static struct pfr_ktable
+			*pfr_create_ktable(struct pfr_table *, long, int);
+static void		 pfr_destroy_ktables(struct pfr_ktableworkq *, int);
+static void		 pfr_destroy_ktable(struct pfr_ktable *, int);
+static int		 pfr_ktable_compare(struct pfr_ktable *,
+			    struct pfr_ktable *);
+static struct pfr_ktable
+			*pfr_lookup_table(struct pfr_table *);
+static void		 pfr_clean_node_mask(struct pfr_ktable *,
+			    struct pfr_kentryworkq *);
+static int		 pfr_table_count(struct pfr_table *, int);
+static int		 pfr_skip_table(struct pfr_table *,
+			    struct pfr_ktable *, int);
+static struct pfr_kentry
+			*pfr_kentry_byidx(struct pfr_ktable *, int, int);
+
+static RB_PROTOTYPE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare);
+static RB_GENERATE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare);
+
+struct pfr_ktablehead	 pfr_ktables;
+struct pfr_table	 pfr_nulltable;
+int			 pfr_ktable_cnt;
+
+void
+pfr_initialize(void)
+{
+
+	V_pfr_kentry_z = uma_zcreate("pf table entries",
+	    sizeof(struct pfr_kentry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
+	    0);
+	V_pfr_kcounters_z = uma_zcreate("pf table counters",
+	    sizeof(struct pfr_kcounters), NULL, NULL, NULL, NULL,
+	    UMA_ALIGN_PTR, 0);
+	V_pf_limits[PF_LIMIT_TABLE_ENTRIES].zone = V_pfr_kentry_z;
+	V_pf_limits[PF_LIMIT_TABLE_ENTRIES].limit = PFR_KENTRY_HIWAT;
+}
+
+void
+pfr_cleanup(void)
+{
+
+	uma_zdestroy(V_pfr_kentry_z);
+	uma_zdestroy(V_pfr_kcounters_z);
+}
+
+int
+pfr_clr_addrs(struct pfr_table *tbl, int *ndel, int flags)
+{
+	struct pfr_ktable	*kt;
+	struct pfr_kentryworkq	 workq;
+
+	PF_RULES_WASSERT();
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
+	if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL))
+		return (EINVAL);
+	kt = pfr_lookup_table(tbl);
+	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (ESRCH);
+	if (kt->pfrkt_flags & PFR_TFLAG_CONST)
+		return (EPERM);
+	pfr_enqueue_addrs(kt, &workq, ndel, 0);
+
+	if (!(flags & PFR_FLAG_DUMMY)) {
+		pfr_remove_kentries(kt, &workq);
+		KASSERT(kt->pfrkt_cnt == 0, ("%s: non-null pfrkt_cnt", __func__));
+	}
+	return (0);
+}
+
+int
+pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
+    int *nadd, int flags)
+{
+	struct pfr_ktable	*kt, *tmpkt;
+	struct pfr_kentryworkq	 workq;
+	struct pfr_kentry	*p, *q;
+	struct pfr_addr		*ad;
+	int			 i, rv, xadd = 0;
+	long			 tzero = time_second;
+
+	PF_RULES_WASSERT();
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK);
+	if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL))
+		return (EINVAL);
+	kt = pfr_lookup_table(tbl);
+	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (ESRCH);
+	if (kt->pfrkt_flags & PFR_TFLAG_CONST)
+		return (EPERM);
+	tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0);
+	if (tmpkt == NULL)
+		return (ENOMEM);
+	SLIST_INIT(&workq);
+	for (i = 0, ad = addr; i < size; i++, ad++) {
+		if (pfr_validate_addr(ad))
+			senderr(EINVAL);
+		p = pfr_lookup_addr(kt, ad, 1);
+		q = pfr_lookup_addr(tmpkt, ad, 1);
+		if (flags & PFR_FLAG_FEEDBACK) {
+			if (q != NULL)
+				ad->pfra_fback = PFR_FB_DUPLICATE;
+			else if (p == NULL)
+				ad->pfra_fback = PFR_FB_ADDED;
+			else if (p->pfrke_not != ad->pfra_not)
+				ad->pfra_fback = PFR_FB_CONFLICT;
+			else
+				ad->pfra_fback = PFR_FB_NONE;
+		}
+		if (p == NULL && q == NULL) {
+			p = pfr_create_kentry(ad);
+			if (p == NULL)
+				senderr(ENOMEM);
+			if (pfr_route_kentry(tmpkt, p)) {
+				pfr_destroy_kentry(p);
+				ad->pfra_fback = PFR_FB_NONE;
+			} else {
+				SLIST_INSERT_HEAD(&workq, p, pfrke_workq);
+				xadd++;
+			}
+		}
+	}
+	pfr_clean_node_mask(tmpkt, &workq);
+	if (!(flags & PFR_FLAG_DUMMY))
+		pfr_insert_kentries(kt, &workq, tzero);
+	else
+		pfr_destroy_kentries(&workq);
+	if (nadd != NULL)
+		*nadd = xadd;
+	pfr_destroy_ktable(tmpkt, 0);
+	return (0);
+_bad:
+	pfr_clean_node_mask(tmpkt, &workq);
+	pfr_destroy_kentries(&workq);
+	if (flags & PFR_FLAG_FEEDBACK)
+		pfr_reset_feedback(addr, size);
+	pfr_destroy_ktable(tmpkt, 0);
+	return (rv);
+}
+
+int
+pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
+    int *ndel, int flags)
+{
+	struct pfr_ktable	*kt;
+	struct pfr_kentryworkq	 workq;
+	struct pfr_kentry	*p;
+	struct pfr_addr		*ad;
+	int			 i, rv, xdel = 0, log = 1;
+
+	PF_RULES_WASSERT();
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK);
+	if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL))
+		return (EINVAL);
+	kt = pfr_lookup_table(tbl);
+	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (ESRCH);
+	if (kt->pfrkt_flags & PFR_TFLAG_CONST)
+		return (EPERM);
+	/*
+	 * there are two algorithms to choose from here.
+	 * with:
+	 *   n: number of addresses to delete
+	 *   N: number of addresses in the table
+	 *
+	 * one is O(N) and is better for large 'n'
+	 * one is O(n*LOG(N)) and is better for small 'n'
+	 *
+	 * following code try to decide which one is best.
+	 */
+	for (i = kt->pfrkt_cnt; i > 0; i >>= 1)
+		log++;
+	if (size > kt->pfrkt_cnt/log) {
+		/* full table scan */
+		pfr_mark_addrs(kt);
+	} else {
+		/* iterate over addresses to delete */
+		for (i = 0, ad = addr; i < size; i++, ad++) {
+			if (pfr_validate_addr(ad))
+				return (EINVAL);
+			p = pfr_lookup_addr(kt, ad, 1);
+			if (p != NULL)
+				p->pfrke_mark = 0;
+		}
+	}
+	SLIST_INIT(&workq);
+	for (i = 0, ad = addr; i < size; i++, ad++) {
+		if (pfr_validate_addr(ad))
+			senderr(EINVAL);
+		p = pfr_lookup_addr(kt, ad, 1);
+		if (flags & PFR_FLAG_FEEDBACK) {
+			if (p == NULL)
+				ad->pfra_fback = PFR_FB_NONE;
+			else if (p->pfrke_not != ad->pfra_not)
+				ad->pfra_fback = PFR_FB_CONFLICT;
+			else if (p->pfrke_mark)
+				ad->pfra_fback = PFR_FB_DUPLICATE;
+			else
+				ad->pfra_fback = PFR_FB_DELETED;
+		}
+		if (p != NULL && p->pfrke_not == ad->pfra_not &&
+		    !p->pfrke_mark) {
+			p->pfrke_mark = 1;
+			SLIST_INSERT_HEAD(&workq, p, pfrke_workq);
+			xdel++;
+		}
+	}
+	if (!(flags & PFR_FLAG_DUMMY))
+		pfr_remove_kentries(kt, &workq);
+	if (ndel != NULL)
+		*ndel = xdel;
+	return (0);
+_bad:
+	if (flags & PFR_FLAG_FEEDBACK)
+		pfr_reset_feedback(addr, size);
+	return (rv);
+}
+
+int
+pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
+    int *size2, int *nadd, int *ndel, int *nchange, int flags,
+    u_int32_t ignore_pfrt_flags)
+{
+	struct pfr_ktable	*kt, *tmpkt;
+	struct pfr_kentryworkq	 addq, delq, changeq;
+	struct pfr_kentry	*p, *q;
+	struct pfr_addr		 ad;
+	int			 i, rv, xadd = 0, xdel = 0, xchange = 0;
+	long			 tzero = time_second;
+
+	PF_RULES_WASSERT();
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK);
+	if (pfr_validate_table(tbl, ignore_pfrt_flags, flags &
+	    PFR_FLAG_USERIOCTL))
+		return (EINVAL);
+	kt = pfr_lookup_table(tbl);
+	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (ESRCH);
+	if (kt->pfrkt_flags & PFR_TFLAG_CONST)
+		return (EPERM);
+	tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0);
+	if (tmpkt == NULL)
+		return (ENOMEM);
+	pfr_mark_addrs(kt);
+	SLIST_INIT(&addq);
+	SLIST_INIT(&delq);
+	SLIST_INIT(&changeq);
+	for (i = 0; i < size; i++) {
+		/*
+		 * XXXGL: undertand pf_if usage of this function
+		 * and make ad a moving pointer
+		 */
+		bcopy(addr + i, &ad, sizeof(ad));
+		if (pfr_validate_addr(&ad))
+			senderr(EINVAL);
+		ad.pfra_fback = PFR_FB_NONE;
+		p = pfr_lookup_addr(kt, &ad, 1);
+		if (p != NULL) {
+			if (p->pfrke_mark) {
+				ad.pfra_fback = PFR_FB_DUPLICATE;
+				goto _skip;
+			}
+			p->pfrke_mark = 1;
+			if (p->pfrke_not != ad.pfra_not) {
+				SLIST_INSERT_HEAD(&changeq, p, pfrke_workq);
+				ad.pfra_fback = PFR_FB_CHANGED;
+				xchange++;
+			}
+		} else {
+			q = pfr_lookup_addr(tmpkt, &ad, 1);
+			if (q != NULL) {
+				ad.pfra_fback = PFR_FB_DUPLICATE;
+				goto _skip;
+			}
+			p = pfr_create_kentry(&ad);
+			if (p == NULL)
+				senderr(ENOMEM);
+			if (pfr_route_kentry(tmpkt, p)) {
+				pfr_destroy_kentry(p);
+				ad.pfra_fback = PFR_FB_NONE;
+			} else {
+				SLIST_INSERT_HEAD(&addq, p, pfrke_workq);
+				ad.pfra_fback = PFR_FB_ADDED;
+				xadd++;
+			}
+		}
+_skip:
+		if (flags & PFR_FLAG_FEEDBACK)
+			bcopy(&ad, addr + i, sizeof(ad));
+	}
+	pfr_enqueue_addrs(kt, &delq, &xdel, ENQUEUE_UNMARKED_ONLY);
+	if ((flags & PFR_FLAG_FEEDBACK) && *size2) {
+		if (*size2 < size+xdel) {
+			*size2 = size+xdel;
+			senderr(0);
+		}
+		i = 0;
+		SLIST_FOREACH(p, &delq, pfrke_workq) {
+			pfr_copyout_addr(&ad, p);
+			ad.pfra_fback = PFR_FB_DELETED;
+			bcopy(&ad, addr + size + i, sizeof(ad));
+			i++;
+		}
+	}
+	pfr_clean_node_mask(tmpkt, &addq);
+	if (!(flags & PFR_FLAG_DUMMY)) {
+		pfr_insert_kentries(kt, &addq, tzero);
+		pfr_remove_kentries(kt, &delq);
+		pfr_clstats_kentries(&changeq, tzero, INVERT_NEG_FLAG);
+	} else
+		pfr_destroy_kentries(&addq);
+	if (nadd != NULL)
+		*nadd = xadd;
+	if (ndel != NULL)
+		*ndel = xdel;
+	if (nchange != NULL)
+		*nchange = xchange;
+	if ((flags & PFR_FLAG_FEEDBACK) && size2)
+		*size2 = size+xdel;
+	pfr_destroy_ktable(tmpkt, 0);
+	return (0);
+_bad:
+	pfr_clean_node_mask(tmpkt, &addq);
+	pfr_destroy_kentries(&addq);
+	if (flags & PFR_FLAG_FEEDBACK)
+		pfr_reset_feedback(addr, size);
+	pfr_destroy_ktable(tmpkt, 0);
+	return (rv);
+}
+
+int
+pfr_tst_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
+	int *nmatch, int flags)
+{
+	struct pfr_ktable	*kt;
+	struct pfr_kentry	*p;
+	struct pfr_addr		*ad;
+	int			 i, xmatch = 0;
+
+	PF_RULES_RASSERT();
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_REPLACE);
+	if (pfr_validate_table(tbl, 0, 0))
+		return (EINVAL);
+	kt = pfr_lookup_table(tbl);
+	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (ESRCH);
+
+	for (i = 0, ad = addr; i < size; i++, ad++) {
+		if (pfr_validate_addr(ad))
+			return (EINVAL);
+		if (ADDR_NETWORK(ad))
+			return (EINVAL);
+		p = pfr_lookup_addr(kt, ad, 0);
+		if (flags & PFR_FLAG_REPLACE)
+			pfr_copyout_addr(ad, p);
+		ad->pfra_fback = (p == NULL) ? PFR_FB_NONE :
+		    (p->pfrke_not ? PFR_FB_NOTMATCH : PFR_FB_MATCH);
+		if (p != NULL && !p->pfrke_not)
+			xmatch++;
+	}
+	if (nmatch != NULL)
+		*nmatch = xmatch;
+	return (0);
+}
+
+int
+pfr_get_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int *size,
+	int flags)
+{
+	struct pfr_ktable	*kt;
+	struct pfr_walktree	 w;
+	int			 rv;
+
+	PF_RULES_RASSERT();
+
+	ACCEPT_FLAGS(flags, 0);
+	if (pfr_validate_table(tbl, 0, 0))
+		return (EINVAL);
+	kt = pfr_lookup_table(tbl);
+	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (ESRCH);
+	if (kt->pfrkt_cnt > *size) {
+		*size = kt->pfrkt_cnt;
+		return (0);
+	}
+
+	bzero(&w, sizeof(w));
+	w.pfrw_op = PFRW_GET_ADDRS;
+	w.pfrw_addr = addr;
+	w.pfrw_free = kt->pfrkt_cnt;
+	rv = kt->pfrkt_ip4->rnh_walktree(&kt->pfrkt_ip4->rh, pfr_walktree, &w);
+	if (!rv)
+		rv = kt->pfrkt_ip6->rnh_walktree(&kt->pfrkt_ip6->rh,
+		    pfr_walktree, &w);
+	if (rv)
+		return (rv);
+
+	KASSERT(w.pfrw_free == 0, ("%s: corruption detected (%d)", __func__,
+	    w.pfrw_free));
+
+	*size = kt->pfrkt_cnt;
+	return (0);
+}
+
+int
+pfr_get_astats(struct pfr_table *tbl, struct pfr_astats *addr, int *size,
+	int flags)
+{
+	struct pfr_ktable	*kt;
+	struct pfr_walktree	 w;
+	struct pfr_kentryworkq	 workq;
+	int			 rv;
+	long			 tzero = time_second;
+
+	PF_RULES_RASSERT();
+
+	/* XXX PFR_FLAG_CLSTATS disabled */
+	ACCEPT_FLAGS(flags, 0);
+	if (pfr_validate_table(tbl, 0, 0))
+		return (EINVAL);
+	kt = pfr_lookup_table(tbl);
+	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (ESRCH);
+	if (kt->pfrkt_cnt > *size) {
+		*size = kt->pfrkt_cnt;
+		return (0);
+	}
+
+	bzero(&w, sizeof(w));
+	w.pfrw_op = PFRW_GET_ASTATS;
+	w.pfrw_astats = addr;
+	w.pfrw_free = kt->pfrkt_cnt;
+	rv = kt->pfrkt_ip4->rnh_walktree(&kt->pfrkt_ip4->rh, pfr_walktree, &w);
+	if (!rv)
+		rv = kt->pfrkt_ip6->rnh_walktree(&kt->pfrkt_ip6->rh,
+		    pfr_walktree, &w);
+	if (!rv && (flags & PFR_FLAG_CLSTATS)) {
+		pfr_enqueue_addrs(kt, &workq, NULL, 0);
+		pfr_clstats_kentries(&workq, tzero, 0);
+	}
+	if (rv)
+		return (rv);
+
+	if (w.pfrw_free) {
+		printf("pfr_get_astats: corruption detected (%d).\n",
+		    w.pfrw_free);
+		return (ENOTTY);
+	}
+	*size = kt->pfrkt_cnt;
+	return (0);
+}
+
+int
+pfr_clr_astats(struct pfr_table *tbl, struct pfr_addr *addr, int size,
+    int *nzero, int flags)
+{
+	struct pfr_ktable	*kt;
+	struct pfr_kentryworkq	 workq;
+	struct pfr_kentry	*p;
+	struct pfr_addr		*ad;
+	int			 i, rv, xzero = 0;
+
+	PF_RULES_WASSERT();
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK);
+	if (pfr_validate_table(tbl, 0, 0))
+		return (EINVAL);
+	kt = pfr_lookup_table(tbl);
+	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (ESRCH);
+	SLIST_INIT(&workq);
+	for (i = 0, ad = addr; i < size; i++, ad++) {
+		if (pfr_validate_addr(ad))
+			senderr(EINVAL);
+		p = pfr_lookup_addr(kt, ad, 1);
+		if (flags & PFR_FLAG_FEEDBACK) {
+			ad->pfra_fback = (p != NULL) ?
+			    PFR_FB_CLEARED : PFR_FB_NONE;
+		}
+		if (p != NULL) {
+			SLIST_INSERT_HEAD(&workq, p, pfrke_workq);
+			xzero++;
+		}
+	}
+
+	if (!(flags & PFR_FLAG_DUMMY))
+		pfr_clstats_kentries(&workq, 0, 0);
+	if (nzero != NULL)
+		*nzero = xzero;
+	return (0);
+_bad:
+	if (flags & PFR_FLAG_FEEDBACK)
+		pfr_reset_feedback(addr, size);
+	return (rv);
+}
+
+static int
+pfr_validate_addr(struct pfr_addr *ad)
+{
+	int i;
+
+	switch (ad->pfra_af) {
+#ifdef INET
+	case AF_INET:
+		if (ad->pfra_net > 32)
+			return (-1);
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		if (ad->pfra_net > 128)
+			return (-1);
+		break;
+#endif /* INET6 */
+	default:
+		return (-1);
+	}
+	if (ad->pfra_net < 128 &&
+		(((caddr_t)ad)[ad->pfra_net/8] & (0xFF >> (ad->pfra_net%8))))
+			return (-1);
+	for (i = (ad->pfra_net+7)/8; i < sizeof(ad->pfra_u); i++)
+		if (((caddr_t)ad)[i])
+			return (-1);
+	if (ad->pfra_not && ad->pfra_not != 1)
+		return (-1);
+	if (ad->pfra_fback)
+		return (-1);
+	return (0);
+}
+
+static void
+pfr_enqueue_addrs(struct pfr_ktable *kt, struct pfr_kentryworkq *workq,
+	int *naddr, int sweep)
+{
+	struct pfr_walktree	w;
+
+	SLIST_INIT(workq);
+	bzero(&w, sizeof(w));
+	w.pfrw_op = sweep ? PFRW_SWEEP : PFRW_ENQUEUE;
+	w.pfrw_workq = workq;
+	if (kt->pfrkt_ip4 != NULL)
+		if (kt->pfrkt_ip4->rnh_walktree(&kt->pfrkt_ip4->rh,
+		    pfr_walktree, &w))
+			printf("pfr_enqueue_addrs: IPv4 walktree failed.\n");
+	if (kt->pfrkt_ip6 != NULL)
+		if (kt->pfrkt_ip6->rnh_walktree(&kt->pfrkt_ip6->rh,
+		    pfr_walktree, &w))
+			printf("pfr_enqueue_addrs: IPv6 walktree failed.\n");
+	if (naddr != NULL)
+		*naddr = w.pfrw_cnt;
+}
+
+static void
+pfr_mark_addrs(struct pfr_ktable *kt)
+{
+	struct pfr_walktree	w;
+
+	bzero(&w, sizeof(w));
+	w.pfrw_op = PFRW_MARK;
+	if (kt->pfrkt_ip4->rnh_walktree(&kt->pfrkt_ip4->rh, pfr_walktree, &w))
+		printf("pfr_mark_addrs: IPv4 walktree failed.\n");
+	if (kt->pfrkt_ip6->rnh_walktree(&kt->pfrkt_ip6->rh, pfr_walktree, &w))
+		printf("pfr_mark_addrs: IPv6 walktree failed.\n");
+}
+
+
+static struct pfr_kentry *
+pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact)
+{
+	union sockaddr_union	 sa, mask;
+	struct radix_head	*head = NULL;
+	struct pfr_kentry	*ke;
+
+	PF_RULES_ASSERT();
+
+	bzero(&sa, sizeof(sa));
+	if (ad->pfra_af == AF_INET) {
+		FILLIN_SIN(sa.sin, ad->pfra_ip4addr);
+		head = &kt->pfrkt_ip4->rh;
+	} else if ( ad->pfra_af == AF_INET6 ) {
+		FILLIN_SIN6(sa.sin6, ad->pfra_ip6addr);
+		head = &kt->pfrkt_ip6->rh;
+	}
+	if (ADDR_NETWORK(ad)) {
+		pfr_prepare_network(&mask, ad->pfra_af, ad->pfra_net);
+		ke = (struct pfr_kentry *)rn_lookup(&sa, &mask, head);
+		if (ke && KENTRY_RNF_ROOT(ke))
+			ke = NULL;
+	} else {
+		ke = (struct pfr_kentry *)rn_match(&sa, head);
+		if (ke && KENTRY_RNF_ROOT(ke))
+			ke = NULL;
+		if (exact && ke && KENTRY_NETWORK(ke))
+			ke = NULL;
+	}
+	return (ke);
+}
+
+static struct pfr_kentry *
+pfr_create_kentry(struct pfr_addr *ad)
+{
+	struct pfr_kentry	*ke;
+
+	ke =  uma_zalloc(V_pfr_kentry_z, M_NOWAIT | M_ZERO);
+	if (ke == NULL)
+		return (NULL);
+
+	if (ad->pfra_af == AF_INET)
+		FILLIN_SIN(ke->pfrke_sa.sin, ad->pfra_ip4addr);
+	else if (ad->pfra_af == AF_INET6)
+		FILLIN_SIN6(ke->pfrke_sa.sin6, ad->pfra_ip6addr);
+	ke->pfrke_af = ad->pfra_af;
+	ke->pfrke_net = ad->pfra_net;
+	ke->pfrke_not = ad->pfra_not;
+	return (ke);
+}
+
+static void
+pfr_destroy_kentries(struct pfr_kentryworkq *workq)
+{
+	struct pfr_kentry	*p, *q;
+
+	for (p = SLIST_FIRST(workq); p != NULL; p = q) {
+		q = SLIST_NEXT(p, pfrke_workq);
+		pfr_destroy_kentry(p);
+	}
+}
+
+static void
+pfr_destroy_kentry(struct pfr_kentry *ke)
+{
+	if (ke->pfrke_counters)
+		uma_zfree(V_pfr_kcounters_z, ke->pfrke_counters);
+	uma_zfree(V_pfr_kentry_z, ke);
+}
+
+static void
+pfr_insert_kentries(struct pfr_ktable *kt,
+    struct pfr_kentryworkq *workq, long tzero)
+{
+	struct pfr_kentry	*p;
+	int			 rv, n = 0;
+
+	SLIST_FOREACH(p, workq, pfrke_workq) {
+		rv = pfr_route_kentry(kt, p);
+		if (rv) {
+			printf("pfr_insert_kentries: cannot route entry "
+			    "(code=%d).\n", rv);
+			break;
+		}
+		p->pfrke_tzero = tzero;
+		n++;
+	}
+	kt->pfrkt_cnt += n;
+}
+
+int
+pfr_insert_kentry(struct pfr_ktable *kt, struct pfr_addr *ad, long tzero)
+{
+	struct pfr_kentry	*p;
+	int			 rv;
+
+	p = pfr_lookup_addr(kt, ad, 1);
+	if (p != NULL)
+		return (0);
+	p = pfr_create_kentry(ad);
+	if (p == NULL)
+		return (ENOMEM);
+
+	rv = pfr_route_kentry(kt, p);
+	if (rv)
+		return (rv);
+
+	p->pfrke_tzero = tzero;
+	kt->pfrkt_cnt++;
+
+	return (0);
+}
+
+static void
+pfr_remove_kentries(struct pfr_ktable *kt,
+    struct pfr_kentryworkq *workq)
+{
+	struct pfr_kentry	*p;
+	int			 n = 0;
+
+	SLIST_FOREACH(p, workq, pfrke_workq) {
+		pfr_unroute_kentry(kt, p);
+		n++;
+	}
+	kt->pfrkt_cnt -= n;
+	pfr_destroy_kentries(workq);
+}
+
+static void
+pfr_clean_node_mask(struct pfr_ktable *kt,
+    struct pfr_kentryworkq *workq)
+{
+	struct pfr_kentry	*p;
+
+	SLIST_FOREACH(p, workq, pfrke_workq)
+		pfr_unroute_kentry(kt, p);
+}
+
+static void
+pfr_clstats_kentries(struct pfr_kentryworkq *workq, long tzero, int negchange)
+{
+	struct pfr_kentry	*p;
+
+	SLIST_FOREACH(p, workq, pfrke_workq) {
+		if (negchange)
+			p->pfrke_not = !p->pfrke_not;
+		if (p->pfrke_counters) {
+			uma_zfree(V_pfr_kcounters_z, p->pfrke_counters);
+			p->pfrke_counters = NULL;
+		}
+		p->pfrke_tzero = tzero;
+	}
+}
+
+static void
+pfr_reset_feedback(struct pfr_addr *addr, int size)
+{
+	struct pfr_addr	*ad;
+	int		i;
+
+	for (i = 0, ad = addr; i < size; i++, ad++)
+		ad->pfra_fback = PFR_FB_NONE;
+}
+
+static void
+pfr_prepare_network(union sockaddr_union *sa, int af, int net)
+{
+	int	i;
+
+	bzero(sa, sizeof(*sa));
+	if (af == AF_INET) {
+		sa->sin.sin_len = sizeof(sa->sin);
+		sa->sin.sin_family = AF_INET;
+		sa->sin.sin_addr.s_addr = net ? htonl(-1 << (32-net)) : 0;
+	} else if (af == AF_INET6) {
+		sa->sin6.sin6_len = sizeof(sa->sin6);
+		sa->sin6.sin6_family = AF_INET6;
+		for (i = 0; i < 4; i++) {
+			if (net <= 32) {
+				sa->sin6.sin6_addr.s6_addr32[i] =
+				    net ? htonl(-1 << (32-net)) : 0;
+				break;
+			}
+			sa->sin6.sin6_addr.s6_addr32[i] = 0xFFFFFFFF;
+			net -= 32;
+		}
+	}
+}
+
+static int
+pfr_route_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke)
+{
+	union sockaddr_union	 mask;
+	struct radix_node	*rn;
+	struct radix_head	*head = NULL;
+
+	PF_RULES_WASSERT();
+
+	bzero(ke->pfrke_node, sizeof(ke->pfrke_node));
+	if (ke->pfrke_af == AF_INET)
+		head = &kt->pfrkt_ip4->rh;
+	else if (ke->pfrke_af == AF_INET6)
+		head = &kt->pfrkt_ip6->rh;
+
+	if (KENTRY_NETWORK(ke)) {
+		pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net);
+		rn = rn_addroute(&ke->pfrke_sa, &mask, head, ke->pfrke_node);
+	} else
+		rn = rn_addroute(&ke->pfrke_sa, NULL, head, ke->pfrke_node);
+
+	return (rn == NULL ? -1 : 0);
+}
+
+static int
+pfr_unroute_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke)
+{
+	union sockaddr_union	 mask;
+	struct radix_node	*rn;
+	struct radix_head	*head = NULL;
+
+	if (ke->pfrke_af == AF_INET)
+		head = &kt->pfrkt_ip4->rh;
+	else if (ke->pfrke_af == AF_INET6)
+		head = &kt->pfrkt_ip6->rh;
+
+	if (KENTRY_NETWORK(ke)) {
+		pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net);
+		rn = rn_delete(&ke->pfrke_sa, &mask, head);
+	} else
+		rn = rn_delete(&ke->pfrke_sa, NULL, head);
+
+	if (rn == NULL) {
+		printf("pfr_unroute_kentry: delete failed.\n");
+		return (-1);
+	}
+	return (0);
+}
+
+static void
+pfr_copyout_addr(struct pfr_addr *ad, struct pfr_kentry *ke)
+{
+	bzero(ad, sizeof(*ad));
+	if (ke == NULL)
+		return;
+	ad->pfra_af = ke->pfrke_af;
+	ad->pfra_net = ke->pfrke_net;
+	ad->pfra_not = ke->pfrke_not;
+	if (ad->pfra_af == AF_INET)
+		ad->pfra_ip4addr = ke->pfrke_sa.sin.sin_addr;
+	else if (ad->pfra_af == AF_INET6)
+		ad->pfra_ip6addr = ke->pfrke_sa.sin6.sin6_addr;
+}
+
+static int
+pfr_walktree(struct radix_node *rn, void *arg)
+{
+	struct pfr_kentry	*ke = (struct pfr_kentry *)rn;
+	struct pfr_walktree	*w = arg;
+
+	switch (w->pfrw_op) {
+	case PFRW_MARK:
+		ke->pfrke_mark = 0;
+		break;
+	case PFRW_SWEEP:
+		if (ke->pfrke_mark)
+			break;
+		/* FALLTHROUGH */
+	case PFRW_ENQUEUE:
+		SLIST_INSERT_HEAD(w->pfrw_workq, ke, pfrke_workq);
+		w->pfrw_cnt++;
+		break;
+	case PFRW_GET_ADDRS:
+		if (w->pfrw_free-- > 0) {
+			pfr_copyout_addr(w->pfrw_addr, ke);
+			w->pfrw_addr++;
+		}
+		break;
+	case PFRW_GET_ASTATS:
+		if (w->pfrw_free-- > 0) {
+			struct pfr_astats as;
+
+			pfr_copyout_addr(&as.pfras_a, ke);
+
+			if (ke->pfrke_counters) {
+				bcopy(ke->pfrke_counters->pfrkc_packets,
+				    as.pfras_packets, sizeof(as.pfras_packets));
+				bcopy(ke->pfrke_counters->pfrkc_bytes,
+				    as.pfras_bytes, sizeof(as.pfras_bytes));
+			} else {
+				bzero(as.pfras_packets, sizeof(as.pfras_packets));
+				bzero(as.pfras_bytes, sizeof(as.pfras_bytes));
+				as.pfras_a.pfra_fback = PFR_FB_NOCOUNT;
+			}
+			as.pfras_tzero = ke->pfrke_tzero;
+
+			bcopy(&as, w->pfrw_astats, sizeof(as));
+			w->pfrw_astats++;
+		}
+		break;
+	case PFRW_POOL_GET:
+		if (ke->pfrke_not)
+			break; /* negative entries are ignored */
+		if (!w->pfrw_cnt--) {
+			w->pfrw_kentry = ke;
+			return (1); /* finish search */
+		}
+		break;
+	case PFRW_DYNADDR_UPDATE:
+	    {
+		union sockaddr_union	pfr_mask;
+
+		if (ke->pfrke_af == AF_INET) {
+			if (w->pfrw_dyn->pfid_acnt4++ > 0)
+				break;
+			pfr_prepare_network(&pfr_mask, AF_INET, ke->pfrke_net);
+			w->pfrw_dyn->pfid_addr4 = *SUNION2PF(&ke->pfrke_sa,
+			    AF_INET);
+			w->pfrw_dyn->pfid_mask4 = *SUNION2PF(&pfr_mask,
+			    AF_INET);
+		} else if (ke->pfrke_af == AF_INET6){
+			if (w->pfrw_dyn->pfid_acnt6++ > 0)
+				break;
+			pfr_prepare_network(&pfr_mask, AF_INET6, ke->pfrke_net);
+			w->pfrw_dyn->pfid_addr6 = *SUNION2PF(&ke->pfrke_sa,
+			    AF_INET6);
+			w->pfrw_dyn->pfid_mask6 = *SUNION2PF(&pfr_mask,
+			    AF_INET6);
+		}
+		break;
+	    }
+	}
+	return (0);
+}
+
+int
+pfr_clr_tables(struct pfr_table *filter, int *ndel, int flags)
+{
+	struct pfr_ktableworkq	 workq;
+	struct pfr_ktable	*p;
+	int			 xdel = 0;
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_ALLRSETS);
+	if (pfr_fix_anchor(filter->pfrt_anchor))
+		return (EINVAL);
+	if (pfr_table_count(filter, flags) < 0)
+		return (ENOENT);
+
+	SLIST_INIT(&workq);
+	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
+		if (pfr_skip_table(filter, p, flags))
+			continue;
+		if (!strcmp(p->pfrkt_anchor, PF_RESERVED_ANCHOR))
+			continue;
+		if (!(p->pfrkt_flags & PFR_TFLAG_ACTIVE))
+			continue;
+		p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_ACTIVE;
+		SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+		xdel++;
+	}
+	if (!(flags & PFR_FLAG_DUMMY))
+		pfr_setflags_ktables(&workq);
+	if (ndel != NULL)
+		*ndel = xdel;
+	return (0);
+}
+
+int
+pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags)
+{
+	struct pfr_ktableworkq	 addq, changeq;
+	struct pfr_ktable	*p, *q, *r, key;
+	int			 i, rv, xadd = 0;
+	long			 tzero = time_second;
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
+	SLIST_INIT(&addq);
+	SLIST_INIT(&changeq);
+	for (i = 0; i < size; i++) {
+		bcopy(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t));
+		if (pfr_validate_table(&key.pfrkt_t, PFR_TFLAG_USRMASK,
+		    flags & PFR_FLAG_USERIOCTL))
+			senderr(EINVAL);
+		key.pfrkt_flags |= PFR_TFLAG_ACTIVE;
+		p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
+		if (p == NULL) {
+			p = pfr_create_ktable(&key.pfrkt_t, tzero, 1);
+			if (p == NULL)
+				senderr(ENOMEM);
+			SLIST_FOREACH(q, &addq, pfrkt_workq) {
+				if (!pfr_ktable_compare(p, q))
+					goto _skip;
+			}
+			SLIST_INSERT_HEAD(&addq, p, pfrkt_workq);
+			xadd++;
+			if (!key.pfrkt_anchor[0])
+				goto _skip;
+
+			/* find or create root table */
+			bzero(key.pfrkt_anchor, sizeof(key.pfrkt_anchor));
+			r = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
+			if (r != NULL) {
+				p->pfrkt_root = r;
+				goto _skip;
+			}
+			SLIST_FOREACH(q, &addq, pfrkt_workq) {
+				if (!pfr_ktable_compare(&key, q)) {
+					p->pfrkt_root = q;
+					goto _skip;
+				}
+			}
+			key.pfrkt_flags = 0;
+			r = pfr_create_ktable(&key.pfrkt_t, 0, 1);
+			if (r == NULL)
+				senderr(ENOMEM);
+			SLIST_INSERT_HEAD(&addq, r, pfrkt_workq);
+			p->pfrkt_root = r;
+		} else if (!(p->pfrkt_flags & PFR_TFLAG_ACTIVE)) {
+			SLIST_FOREACH(q, &changeq, pfrkt_workq)
+				if (!pfr_ktable_compare(&key, q))
+					goto _skip;
+			p->pfrkt_nflags = (p->pfrkt_flags &
+			    ~PFR_TFLAG_USRMASK) | key.pfrkt_flags;
+			SLIST_INSERT_HEAD(&changeq, p, pfrkt_workq);
+			xadd++;
+		}
+_skip:
+	;
+	}
+	if (!(flags & PFR_FLAG_DUMMY)) {
+		pfr_insert_ktables(&addq);
+		pfr_setflags_ktables(&changeq);
+	} else
+		 pfr_destroy_ktables(&addq, 0);
+	if (nadd != NULL)
+		*nadd = xadd;
+	return (0);
+_bad:
+	pfr_destroy_ktables(&addq, 0);
+	return (rv);
+}
+
+int
+pfr_del_tables(struct pfr_table *tbl, int size, int *ndel, int flags)
+{
+	struct pfr_ktableworkq	 workq;
+	struct pfr_ktable	*p, *q, key;
+	int			 i, xdel = 0;
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
+	SLIST_INIT(&workq);
+	for (i = 0; i < size; i++) {
+		bcopy(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t));
+		if (pfr_validate_table(&key.pfrkt_t, 0,
+		    flags & PFR_FLAG_USERIOCTL))
+			return (EINVAL);
+		p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
+		if (p != NULL && (p->pfrkt_flags & PFR_TFLAG_ACTIVE)) {
+			SLIST_FOREACH(q, &workq, pfrkt_workq)
+				if (!pfr_ktable_compare(p, q))
+					goto _skip;
+			p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_ACTIVE;
+			SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+			xdel++;
+		}
+_skip:
+	;
+	}
+
+	if (!(flags & PFR_FLAG_DUMMY))
+		pfr_setflags_ktables(&workq);
+	if (ndel != NULL)
+		*ndel = xdel;
+	return (0);
+}
+
+int
+pfr_get_tables(struct pfr_table *filter, struct pfr_table *tbl, int *size,
+	int flags)
+{
+	struct pfr_ktable	*p;
+	int			 n, nn;
+
+	PF_RULES_RASSERT();
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_ALLRSETS);
+	if (pfr_fix_anchor(filter->pfrt_anchor))
+		return (EINVAL);
+	n = nn = pfr_table_count(filter, flags);
+	if (n < 0)
+		return (ENOENT);
+	if (n > *size) {
+		*size = n;
+		return (0);
+	}
+	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
+		if (pfr_skip_table(filter, p, flags))
+			continue;
+		if (n-- <= 0)
+			continue;
+		bcopy(&p->pfrkt_t, tbl++, sizeof(*tbl));
+	}
+
+	KASSERT(n == 0, ("%s: corruption detected (%d)", __func__, n));
+
+	*size = nn;
+	return (0);
+}
+
+int
+pfr_get_tstats(struct pfr_table *filter, struct pfr_tstats *tbl, int *size,
+	int flags)
+{
+	struct pfr_ktable	*p;
+	struct pfr_ktableworkq	 workq;
+	int			 n, nn;
+	long			 tzero = time_second;
+
+	/* XXX PFR_FLAG_CLSTATS disabled */
+	ACCEPT_FLAGS(flags, PFR_FLAG_ALLRSETS);
+	if (pfr_fix_anchor(filter->pfrt_anchor))
+		return (EINVAL);
+	n = nn = pfr_table_count(filter, flags);
+	if (n < 0)
+		return (ENOENT);
+	if (n > *size) {
+		*size = n;
+		return (0);
+	}
+	SLIST_INIT(&workq);
+	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
+		if (pfr_skip_table(filter, p, flags))
+			continue;
+		if (n-- <= 0)
+			continue;
+		bcopy(&p->pfrkt_ts, tbl++, sizeof(*tbl));
+		SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+	}
+	if (flags & PFR_FLAG_CLSTATS)
+		pfr_clstats_ktables(&workq, tzero,
+		    flags & PFR_FLAG_ADDRSTOO);
+
+	KASSERT(n == 0, ("%s: corruption detected (%d)", __func__, n));
+
+	*size = nn;
+	return (0);
+}
+
+int
+pfr_clr_tstats(struct pfr_table *tbl, int size, int *nzero, int flags)
+{
+	struct pfr_ktableworkq	 workq;
+	struct pfr_ktable	*p, key;
+	int			 i, xzero = 0;
+	long			 tzero = time_second;
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_ADDRSTOO);
+	SLIST_INIT(&workq);
+	for (i = 0; i < size; i++) {
+		bcopy(tbl + i, &key.pfrkt_t, sizeof(key.pfrkt_t));
+		if (pfr_validate_table(&key.pfrkt_t, 0, 0))
+			return (EINVAL);
+		p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
+		if (p != NULL) {
+			SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+			xzero++;
+		}
+	}
+	if (!(flags & PFR_FLAG_DUMMY))
+		pfr_clstats_ktables(&workq, tzero, flags & PFR_FLAG_ADDRSTOO);
+	if (nzero != NULL)
+		*nzero = xzero;
+	return (0);
+}
+
+int
+pfr_set_tflags(struct pfr_table *tbl, int size, int setflag, int clrflag,
+	int *nchange, int *ndel, int flags)
+{
+	struct pfr_ktableworkq	 workq;
+	struct pfr_ktable	*p, *q, key;
+	int			 i, xchange = 0, xdel = 0;
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
+	if ((setflag & ~PFR_TFLAG_USRMASK) ||
+	    (clrflag & ~PFR_TFLAG_USRMASK) ||
+	    (setflag & clrflag))
+		return (EINVAL);
+	SLIST_INIT(&workq);
+	for (i = 0; i < size; i++) {
+		bcopy(tbl + i, &key.pfrkt_t, sizeof(key.pfrkt_t));
+		if (pfr_validate_table(&key.pfrkt_t, 0,
+		    flags & PFR_FLAG_USERIOCTL))
+			return (EINVAL);
+		p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
+		if (p != NULL && (p->pfrkt_flags & PFR_TFLAG_ACTIVE)) {
+			p->pfrkt_nflags = (p->pfrkt_flags | setflag) &
+			    ~clrflag;
+			if (p->pfrkt_nflags == p->pfrkt_flags)
+				goto _skip;
+			SLIST_FOREACH(q, &workq, pfrkt_workq)
+				if (!pfr_ktable_compare(p, q))
+					goto _skip;
+			SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+			if ((p->pfrkt_flags & PFR_TFLAG_PERSIST) &&
+			    (clrflag & PFR_TFLAG_PERSIST) &&
+			    !(p->pfrkt_flags & PFR_TFLAG_REFERENCED))
+				xdel++;
+			else
+				xchange++;
+		}
+_skip:
+	;
+	}
+	if (!(flags & PFR_FLAG_DUMMY))
+		pfr_setflags_ktables(&workq);
+	if (nchange != NULL)
+		*nchange = xchange;
+	if (ndel != NULL)
+		*ndel = xdel;
+	return (0);
+}
+
+int
+pfr_ina_begin(struct pfr_table *trs, u_int32_t *ticket, int *ndel, int flags)
+{
+	struct pfr_ktableworkq	 workq;
+	struct pfr_ktable	*p;
+	struct pf_ruleset	*rs;
+	int			 xdel = 0;
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
+	rs = pf_find_or_create_ruleset(trs->pfrt_anchor);
+	if (rs == NULL)
+		return (ENOMEM);
+	SLIST_INIT(&workq);
+	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
+		if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) ||
+		    pfr_skip_table(trs, p, 0))
+			continue;
+		p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_INACTIVE;
+		SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+		xdel++;
+	}
+	if (!(flags & PFR_FLAG_DUMMY)) {
+		pfr_setflags_ktables(&workq);
+		if (ticket != NULL)
+			*ticket = ++rs->tticket;
+		rs->topen = 1;
+	} else
+		pf_remove_if_empty_ruleset(rs);
+	if (ndel != NULL)
+		*ndel = xdel;
+	return (0);
+}
+
+int
+pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size,
+    int *nadd, int *naddr, u_int32_t ticket, int flags)
+{
+	struct pfr_ktableworkq	 tableq;
+	struct pfr_kentryworkq	 addrq;
+	struct pfr_ktable	*kt, *rt, *shadow, key;
+	struct pfr_kentry	*p;
+	struct pfr_addr		*ad;
+	struct pf_ruleset	*rs;
+	int			 i, rv, xadd = 0, xaddr = 0;
+
+	PF_RULES_WASSERT();
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_ADDRSTOO);
+	if (size && !(flags & PFR_FLAG_ADDRSTOO))
+		return (EINVAL);
+	if (pfr_validate_table(tbl, PFR_TFLAG_USRMASK,
+	    flags & PFR_FLAG_USERIOCTL))
+		return (EINVAL);
+	rs = pf_find_ruleset(tbl->pfrt_anchor);
+	if (rs == NULL || !rs->topen || ticket != rs->tticket)
+		return (EBUSY);
+	tbl->pfrt_flags |= PFR_TFLAG_INACTIVE;
+	SLIST_INIT(&tableq);
+	kt = RB_FIND(pfr_ktablehead, &pfr_ktables, (struct pfr_ktable *)tbl);
+	if (kt == NULL) {
+		kt = pfr_create_ktable(tbl, 0, 1);
+		if (kt == NULL)
+			return (ENOMEM);
+		SLIST_INSERT_HEAD(&tableq, kt, pfrkt_workq);
+		xadd++;
+		if (!tbl->pfrt_anchor[0])
+			goto _skip;
+
+		/* find or create root table */
+		bzero(&key, sizeof(key));
+		strlcpy(key.pfrkt_name, tbl->pfrt_name, sizeof(key.pfrkt_name));
+		rt = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
+		if (rt != NULL) {
+			kt->pfrkt_root = rt;
+			goto _skip;
+		}
+		rt = pfr_create_ktable(&key.pfrkt_t, 0, 1);
+		if (rt == NULL) {
+			pfr_destroy_ktables(&tableq, 0);
+			return (ENOMEM);
+		}
+		SLIST_INSERT_HEAD(&tableq, rt, pfrkt_workq);
+		kt->pfrkt_root = rt;
+	} else if (!(kt->pfrkt_flags & PFR_TFLAG_INACTIVE))
+		xadd++;
+_skip:
+	shadow = pfr_create_ktable(tbl, 0, 0);
+	if (shadow == NULL) {
+		pfr_destroy_ktables(&tableq, 0);
+		return (ENOMEM);
+	}
+	SLIST_INIT(&addrq);
+	for (i = 0, ad = addr; i < size; i++, ad++) {
+		if (pfr_validate_addr(ad))
+			senderr(EINVAL);
+		if (pfr_lookup_addr(shadow, ad, 1) != NULL)
+			continue;
+		p = pfr_create_kentry(ad);
+		if (p == NULL)
+			senderr(ENOMEM);
+		if (pfr_route_kentry(shadow, p)) {
+			pfr_destroy_kentry(p);
+			continue;
+		}
+		SLIST_INSERT_HEAD(&addrq, p, pfrke_workq);
+		xaddr++;
+	}
+	if (!(flags & PFR_FLAG_DUMMY)) {
+		if (kt->pfrkt_shadow != NULL)
+			pfr_destroy_ktable(kt->pfrkt_shadow, 1);
+		kt->pfrkt_flags |= PFR_TFLAG_INACTIVE;
+		pfr_insert_ktables(&tableq);
+		shadow->pfrkt_cnt = (flags & PFR_FLAG_ADDRSTOO) ?
+		    xaddr : NO_ADDRESSES;
+		kt->pfrkt_shadow = shadow;
+	} else {
+		pfr_clean_node_mask(shadow, &addrq);
+		pfr_destroy_ktable(shadow, 0);
+		pfr_destroy_ktables(&tableq, 0);
+		pfr_destroy_kentries(&addrq);
+	}
+	if (nadd != NULL)
+		*nadd = xadd;
+	if (naddr != NULL)
+		*naddr = xaddr;
+	return (0);
+_bad:
+	pfr_destroy_ktable(shadow, 0);
+	pfr_destroy_ktables(&tableq, 0);
+	pfr_destroy_kentries(&addrq);
+	return (rv);
+}
+
+int
+pfr_ina_rollback(struct pfr_table *trs, u_int32_t ticket, int *ndel, int flags)
+{
+	struct pfr_ktableworkq	 workq;
+	struct pfr_ktable	*p;
+	struct pf_ruleset	*rs;
+	int			 xdel = 0;
+
+	PF_RULES_WASSERT();
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
+	rs = pf_find_ruleset(trs->pfrt_anchor);
+	if (rs == NULL || !rs->topen || ticket != rs->tticket)
+		return (0);
+	SLIST_INIT(&workq);
+	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
+		if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) ||
+		    pfr_skip_table(trs, p, 0))
+			continue;
+		p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_INACTIVE;
+		SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+		xdel++;
+	}
+	if (!(flags & PFR_FLAG_DUMMY)) {
+		pfr_setflags_ktables(&workq);
+		rs->topen = 0;
+		pf_remove_if_empty_ruleset(rs);
+	}
+	if (ndel != NULL)
+		*ndel = xdel;
+	return (0);
+}
+
+int
+pfr_ina_commit(struct pfr_table *trs, u_int32_t ticket, int *nadd,
+    int *nchange, int flags)
+{
+	struct pfr_ktable	*p, *q;
+	struct pfr_ktableworkq	 workq;
+	struct pf_ruleset	*rs;
+	int			 xadd = 0, xchange = 0;
+	long			 tzero = time_second;
+
+	PF_RULES_WASSERT();
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
+	rs = pf_find_ruleset(trs->pfrt_anchor);
+	if (rs == NULL || !rs->topen || ticket != rs->tticket)
+		return (EBUSY);
+
+	SLIST_INIT(&workq);
+	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
+		if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) ||
+		    pfr_skip_table(trs, p, 0))
+			continue;
+		SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+		if (p->pfrkt_flags & PFR_TFLAG_ACTIVE)
+			xchange++;
+		else
+			xadd++;
+	}
+
+	if (!(flags & PFR_FLAG_DUMMY)) {
+		for (p = SLIST_FIRST(&workq); p != NULL; p = q) {
+			q = SLIST_NEXT(p, pfrkt_workq);
+			pfr_commit_ktable(p, tzero);
+		}
+		rs->topen = 0;
+		pf_remove_if_empty_ruleset(rs);
+	}
+	if (nadd != NULL)
+		*nadd = xadd;
+	if (nchange != NULL)
+		*nchange = xchange;
+
+	return (0);
+}
+
+static void
+pfr_commit_ktable(struct pfr_ktable *kt, long tzero)
+{
+	struct pfr_ktable	*shadow = kt->pfrkt_shadow;
+	int			 nflags;
+
+	PF_RULES_WASSERT();
+
+	if (shadow->pfrkt_cnt == NO_ADDRESSES) {
+		if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+			pfr_clstats_ktable(kt, tzero, 1);
+	} else if (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) {
+		/* kt might contain addresses */
+		struct pfr_kentryworkq	 addrq, addq, changeq, delq, garbageq;
+		struct pfr_kentry	*p, *q, *next;
+		struct pfr_addr		 ad;
+
+		pfr_enqueue_addrs(shadow, &addrq, NULL, 0);
+		pfr_mark_addrs(kt);
+		SLIST_INIT(&addq);
+		SLIST_INIT(&changeq);
+		SLIST_INIT(&delq);
+		SLIST_INIT(&garbageq);
+		pfr_clean_node_mask(shadow, &addrq);
+		for (p = SLIST_FIRST(&addrq); p != NULL; p = next) {
+			next = SLIST_NEXT(p, pfrke_workq);	/* XXX */
+			pfr_copyout_addr(&ad, p);
+			q = pfr_lookup_addr(kt, &ad, 1);
+			if (q != NULL) {
+				if (q->pfrke_not != p->pfrke_not)
+					SLIST_INSERT_HEAD(&changeq, q,
+					    pfrke_workq);
+				q->pfrke_mark = 1;
+				SLIST_INSERT_HEAD(&garbageq, p, pfrke_workq);
+			} else {
+				p->pfrke_tzero = tzero;
+				SLIST_INSERT_HEAD(&addq, p, pfrke_workq);
+			}
+		}
+		pfr_enqueue_addrs(kt, &delq, NULL, ENQUEUE_UNMARKED_ONLY);
+		pfr_insert_kentries(kt, &addq, tzero);
+		pfr_remove_kentries(kt, &delq);
+		pfr_clstats_kentries(&changeq, tzero, INVERT_NEG_FLAG);
+		pfr_destroy_kentries(&garbageq);
+	} else {
+		/* kt cannot contain addresses */
+		SWAP(struct radix_node_head *, kt->pfrkt_ip4,
+		    shadow->pfrkt_ip4);
+		SWAP(struct radix_node_head *, kt->pfrkt_ip6,
+		    shadow->pfrkt_ip6);
+		SWAP(int, kt->pfrkt_cnt, shadow->pfrkt_cnt);
+		pfr_clstats_ktable(kt, tzero, 1);
+	}
+	nflags = ((shadow->pfrkt_flags & PFR_TFLAG_USRMASK) |
+	    (kt->pfrkt_flags & PFR_TFLAG_SETMASK) | PFR_TFLAG_ACTIVE)
+		& ~PFR_TFLAG_INACTIVE;
+	pfr_destroy_ktable(shadow, 0);
+	kt->pfrkt_shadow = NULL;
+	pfr_setflags_ktable(kt, nflags);
+}
+
+static int
+pfr_validate_table(struct pfr_table *tbl, int allowedflags, int no_reserved)
+{
+	int i;
+
+	if (!tbl->pfrt_name[0])
+		return (-1);
+	if (no_reserved && !strcmp(tbl->pfrt_anchor, PF_RESERVED_ANCHOR))
+		 return (-1);
+	if (tbl->pfrt_name[PF_TABLE_NAME_SIZE-1])
+		return (-1);
+	for (i = strlen(tbl->pfrt_name); i < PF_TABLE_NAME_SIZE; i++)
+		if (tbl->pfrt_name[i])
+			return (-1);
+	if (pfr_fix_anchor(tbl->pfrt_anchor))
+		return (-1);
+	if (tbl->pfrt_flags & ~allowedflags)
+		return (-1);
+	return (0);
+}
+
+/*
+ * Rewrite anchors referenced by tables to remove slashes
+ * and check for validity.
+ */
+static int
+pfr_fix_anchor(char *anchor)
+{
+	size_t siz = MAXPATHLEN;
+	int i;
+
+	if (anchor[0] == '/') {
+		char *path;
+		int off;
+
+		path = anchor;
+		off = 1;
+		while (*++path == '/')
+			off++;
+		bcopy(path, anchor, siz - off);
+		memset(anchor + siz - off, 0, off);
+	}
+	if (anchor[siz - 1])
+		return (-1);
+	for (i = strlen(anchor); i < siz; i++)
+		if (anchor[i])
+			return (-1);
+	return (0);
+}
+
+static int
+pfr_table_count(struct pfr_table *filter, int flags)
+{
+	struct pf_ruleset *rs;
+
+	PF_RULES_ASSERT();
+
+	if (flags & PFR_FLAG_ALLRSETS)
+		return (pfr_ktable_cnt);
+	if (filter->pfrt_anchor[0]) {
+		rs = pf_find_ruleset(filter->pfrt_anchor);
+		return ((rs != NULL) ? rs->tables : -1);
+	}
+	return (pf_main_ruleset.tables);
+}
+
+static int
+pfr_skip_table(struct pfr_table *filter, struct pfr_ktable *kt, int flags)
+{
+	if (flags & PFR_FLAG_ALLRSETS)
+		return (0);
+	if (strcmp(filter->pfrt_anchor, kt->pfrkt_anchor))
+		return (1);
+	return (0);
+}
+
+static void
+pfr_insert_ktables(struct pfr_ktableworkq *workq)
+{
+	struct pfr_ktable	*p;
+
+	SLIST_FOREACH(p, workq, pfrkt_workq)
+		pfr_insert_ktable(p);
+}
+
+static void
+pfr_insert_ktable(struct pfr_ktable *kt)
+{
+
+	PF_RULES_WASSERT();
+
+	RB_INSERT(pfr_ktablehead, &pfr_ktables, kt);
+	pfr_ktable_cnt++;
+	if (kt->pfrkt_root != NULL)
+		if (!kt->pfrkt_root->pfrkt_refcnt[PFR_REFCNT_ANCHOR]++)
+			pfr_setflags_ktable(kt->pfrkt_root,
+			    kt->pfrkt_root->pfrkt_flags|PFR_TFLAG_REFDANCHOR);
+}
+
+static void
+pfr_setflags_ktables(struct pfr_ktableworkq *workq)
+{
+	struct pfr_ktable	*p, *q;
+
+	for (p = SLIST_FIRST(workq); p; p = q) {
+		q = SLIST_NEXT(p, pfrkt_workq);
+		pfr_setflags_ktable(p, p->pfrkt_nflags);
+	}
+}
+
+static void
+pfr_setflags_ktable(struct pfr_ktable *kt, int newf)
+{
+	struct pfr_kentryworkq	addrq;
+
+	PF_RULES_WASSERT();
+
+	if (!(newf & PFR_TFLAG_REFERENCED) &&
+	    !(newf & PFR_TFLAG_PERSIST))
+		newf &= ~PFR_TFLAG_ACTIVE;
+	if (!(newf & PFR_TFLAG_ACTIVE))
+		newf &= ~PFR_TFLAG_USRMASK;
+	if (!(newf & PFR_TFLAG_SETMASK)) {
+		RB_REMOVE(pfr_ktablehead, &pfr_ktables, kt);
+		if (kt->pfrkt_root != NULL)
+			if (!--kt->pfrkt_root->pfrkt_refcnt[PFR_REFCNT_ANCHOR])
+				pfr_setflags_ktable(kt->pfrkt_root,
+				    kt->pfrkt_root->pfrkt_flags &
+					~PFR_TFLAG_REFDANCHOR);
+		pfr_destroy_ktable(kt, 1);
+		pfr_ktable_cnt--;
+		return;
+	}
+	if (!(newf & PFR_TFLAG_ACTIVE) && kt->pfrkt_cnt) {
+		pfr_enqueue_addrs(kt, &addrq, NULL, 0);
+		pfr_remove_kentries(kt, &addrq);
+	}
+	if (!(newf & PFR_TFLAG_INACTIVE) && kt->pfrkt_shadow != NULL) {
+		pfr_destroy_ktable(kt->pfrkt_shadow, 1);
+		kt->pfrkt_shadow = NULL;
+	}
+	kt->pfrkt_flags = newf;
+}
+
+static void
+pfr_clstats_ktables(struct pfr_ktableworkq *workq, long tzero, int recurse)
+{
+	struct pfr_ktable	*p;
+
+	SLIST_FOREACH(p, workq, pfrkt_workq)
+		pfr_clstats_ktable(p, tzero, recurse);
+}
+
+static void
+pfr_clstats_ktable(struct pfr_ktable *kt, long tzero, int recurse)
+{
+	struct pfr_kentryworkq	 addrq;
+
+	if (recurse) {
+		pfr_enqueue_addrs(kt, &addrq, NULL, 0);
+		pfr_clstats_kentries(&addrq, tzero, 0);
+	}
+	bzero(kt->pfrkt_packets, sizeof(kt->pfrkt_packets));
+	bzero(kt->pfrkt_bytes, sizeof(kt->pfrkt_bytes));
+	kt->pfrkt_match = kt->pfrkt_nomatch = 0;
+	kt->pfrkt_tzero = tzero;
+}
+
+static struct pfr_ktable *
+pfr_create_ktable(struct pfr_table *tbl, long tzero, int attachruleset)
+{
+	struct pfr_ktable	*kt;
+	struct pf_ruleset	*rs;
+
+	PF_RULES_WASSERT();
+
+	kt = malloc(sizeof(*kt), M_PFTABLE, M_NOWAIT|M_ZERO);
+	if (kt == NULL)
+		return (NULL);
+	kt->pfrkt_t = *tbl;
+
+	if (attachruleset) {
+		rs = pf_find_or_create_ruleset(tbl->pfrt_anchor);
+		if (!rs) {
+			pfr_destroy_ktable(kt, 0);
+			return (NULL);
+		}
+		kt->pfrkt_rs = rs;
+		rs->tables++;
+	}
+
+	if (!rn_inithead((void **)&kt->pfrkt_ip4,
+	    offsetof(struct sockaddr_in, sin_addr) * 8) ||
+	    !rn_inithead((void **)&kt->pfrkt_ip6,
+	    offsetof(struct sockaddr_in6, sin6_addr) * 8)) {
+		pfr_destroy_ktable(kt, 0);
+		return (NULL);
+	}
+	kt->pfrkt_tzero = tzero;
+
+	return (kt);
+}
+
+static void
+pfr_destroy_ktables(struct pfr_ktableworkq *workq, int flushaddr)
+{
+	struct pfr_ktable	*p, *q;
+
+	for (p = SLIST_FIRST(workq); p; p = q) {
+		q = SLIST_NEXT(p, pfrkt_workq);
+		pfr_destroy_ktable(p, flushaddr);
+	}
+}
+
+static void
+pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr)
+{
+	struct pfr_kentryworkq	 addrq;
+
+	if (flushaddr) {
+		pfr_enqueue_addrs(kt, &addrq, NULL, 0);
+		pfr_clean_node_mask(kt, &addrq);
+		pfr_destroy_kentries(&addrq);
+	}
+	if (kt->pfrkt_ip4 != NULL)
+		rn_detachhead((void **)&kt->pfrkt_ip4);
+	if (kt->pfrkt_ip6 != NULL)
+		rn_detachhead((void **)&kt->pfrkt_ip6);
+	if (kt->pfrkt_shadow != NULL)
+		pfr_destroy_ktable(kt->pfrkt_shadow, flushaddr);
+	if (kt->pfrkt_rs != NULL) {
+		kt->pfrkt_rs->tables--;
+		pf_remove_if_empty_ruleset(kt->pfrkt_rs);
+	}
+	free(kt, M_PFTABLE);
+}
+
+static int
+pfr_ktable_compare(struct pfr_ktable *p, struct pfr_ktable *q)
+{
+	int d;
+
+	if ((d = strncmp(p->pfrkt_name, q->pfrkt_name, PF_TABLE_NAME_SIZE)))
+		return (d);
+	return (strcmp(p->pfrkt_anchor, q->pfrkt_anchor));
+}
+
+static struct pfr_ktable *
+pfr_lookup_table(struct pfr_table *tbl)
+{
+	/* struct pfr_ktable start like a struct pfr_table */
+	return (RB_FIND(pfr_ktablehead, &pfr_ktables,
+	    (struct pfr_ktable *)tbl));
+}
+
+int
+pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af)
+{
+	struct pfr_kentry	*ke = NULL;
+	int			 match;
+
+	PF_RULES_RASSERT();
+
+	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
+		kt = kt->pfrkt_root;
+	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (0);
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+	    {
+		struct sockaddr_in sin;
+
+		bzero(&sin, sizeof(sin));
+		sin.sin_len = sizeof(sin);
+		sin.sin_family = AF_INET;
+		sin.sin_addr.s_addr = a->addr32[0];
+		ke = (struct pfr_kentry *)rn_match(&sin, &kt->pfrkt_ip4->rh);
+		if (ke && KENTRY_RNF_ROOT(ke))
+			ke = NULL;
+		break;
+	    }
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+	    {
+		struct sockaddr_in6 sin6;
+
+		bzero(&sin6, sizeof(sin6));
+		sin6.sin6_len = sizeof(sin6);
+		sin6.sin6_family = AF_INET6;
+		bcopy(a, &sin6.sin6_addr, sizeof(sin6.sin6_addr));
+		ke = (struct pfr_kentry *)rn_match(&sin6, &kt->pfrkt_ip6->rh);
+		if (ke && KENTRY_RNF_ROOT(ke))
+			ke = NULL;
+		break;
+	    }
+#endif /* INET6 */
+	}
+	match = (ke && !ke->pfrke_not);
+	if (match)
+		kt->pfrkt_match++;
+	else
+		kt->pfrkt_nomatch++;
+	return (match);
+}
+
+void
+pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af,
+    u_int64_t len, int dir_out, int op_pass, int notrule)
+{
+	struct pfr_kentry	*ke = NULL;
+
+	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
+		kt = kt->pfrkt_root;
+	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return;
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+	    {
+		struct sockaddr_in sin;
+
+		bzero(&sin, sizeof(sin));
+		sin.sin_len = sizeof(sin);
+		sin.sin_family = AF_INET;
+		sin.sin_addr.s_addr = a->addr32[0];
+		ke = (struct pfr_kentry *)rn_match(&sin, &kt->pfrkt_ip4->rh);
+		if (ke && KENTRY_RNF_ROOT(ke))
+			ke = NULL;
+		break;
+	    }
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+	    {
+		struct sockaddr_in6 sin6;
+
+		bzero(&sin6, sizeof(sin6));
+		sin6.sin6_len = sizeof(sin6);
+		sin6.sin6_family = AF_INET6;
+		bcopy(a, &sin6.sin6_addr, sizeof(sin6.sin6_addr));
+		ke = (struct pfr_kentry *)rn_match(&sin6, &kt->pfrkt_ip6->rh);
+		if (ke && KENTRY_RNF_ROOT(ke))
+			ke = NULL;
+		break;
+	    }
+#endif /* INET6 */
+	default:
+		panic("%s: unknown address family %u", __func__, af);
+	}
+	if ((ke == NULL || ke->pfrke_not) != notrule) {
+		if (op_pass != PFR_OP_PASS)
+			printf("pfr_update_stats: assertion failed.\n");
+		op_pass = PFR_OP_XPASS;
+	}
+	kt->pfrkt_packets[dir_out][op_pass]++;
+	kt->pfrkt_bytes[dir_out][op_pass] += len;
+	if (ke != NULL && op_pass != PFR_OP_XPASS &&
+	    (kt->pfrkt_flags & PFR_TFLAG_COUNTERS)) {
+		if (ke->pfrke_counters == NULL)
+			ke->pfrke_counters = uma_zalloc(V_pfr_kcounters_z,
+			    M_NOWAIT | M_ZERO);
+		if (ke->pfrke_counters != NULL) {
+			ke->pfrke_counters->pfrkc_packets[dir_out][op_pass]++;
+			ke->pfrke_counters->pfrkc_bytes[dir_out][op_pass] += len;
+		}
+	}
+}
+
+struct pfr_ktable *
+pfr_attach_table(struct pf_ruleset *rs, char *name)
+{
+	struct pfr_ktable	*kt, *rt;
+	struct pfr_table	 tbl;
+	struct pf_anchor	*ac = rs->anchor;
+
+	PF_RULES_WASSERT();
+
+	bzero(&tbl, sizeof(tbl));
+	strlcpy(tbl.pfrt_name, name, sizeof(tbl.pfrt_name));
+	if (ac != NULL)
+		strlcpy(tbl.pfrt_anchor, ac->path, sizeof(tbl.pfrt_anchor));
+	kt = pfr_lookup_table(&tbl);
+	if (kt == NULL) {
+		kt = pfr_create_ktable(&tbl, time_second, 1);
+		if (kt == NULL)
+			return (NULL);
+		if (ac != NULL) {
+			bzero(tbl.pfrt_anchor, sizeof(tbl.pfrt_anchor));
+			rt = pfr_lookup_table(&tbl);
+			if (rt == NULL) {
+				rt = pfr_create_ktable(&tbl, 0, 1);
+				if (rt == NULL) {
+					pfr_destroy_ktable(kt, 0);
+					return (NULL);
+				}
+				pfr_insert_ktable(rt);
+			}
+			kt->pfrkt_root = rt;
+		}
+		pfr_insert_ktable(kt);
+	}
+	if (!kt->pfrkt_refcnt[PFR_REFCNT_RULE]++)
+		pfr_setflags_ktable(kt, kt->pfrkt_flags|PFR_TFLAG_REFERENCED);
+	return (kt);
+}
+
+void
+pfr_detach_table(struct pfr_ktable *kt)
+{
+
+	PF_RULES_WASSERT();
+	KASSERT(kt->pfrkt_refcnt[PFR_REFCNT_RULE] > 0, ("%s: refcount %d\n",
+	    __func__, kt->pfrkt_refcnt[PFR_REFCNT_RULE]));
+
+	if (!--kt->pfrkt_refcnt[PFR_REFCNT_RULE])
+		pfr_setflags_ktable(kt, kt->pfrkt_flags&~PFR_TFLAG_REFERENCED);
+}
+
+int
+pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter,
+    sa_family_t af)
+{
+	struct pf_addr		 *addr, *cur, *mask;
+	union sockaddr_union	 uaddr, umask;
+	struct pfr_kentry	*ke, *ke2 = NULL;
+	int			 idx = -1, use_counter = 0;
+
+	switch (af) {
+	case AF_INET:
+		uaddr.sin.sin_len = sizeof(struct sockaddr_in);
+		uaddr.sin.sin_family = AF_INET;
+		break;
+	case AF_INET6:
+		uaddr.sin6.sin6_len = sizeof(struct sockaddr_in6);
+		uaddr.sin6.sin6_family = AF_INET6;
+		break;
+	}
+	addr = SUNION2PF(&uaddr, af);
+
+	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
+		kt = kt->pfrkt_root;
+	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (-1);
+
+	if (pidx != NULL)
+		idx = *pidx;
+	if (counter != NULL && idx >= 0)
+		use_counter = 1;
+	if (idx < 0)
+		idx = 0;
+
+_next_block:
+	ke = pfr_kentry_byidx(kt, idx, af);
+	if (ke == NULL) {
+		kt->pfrkt_nomatch++;
+		return (1);
+	}
+	pfr_prepare_network(&umask, af, ke->pfrke_net);
+	cur = SUNION2PF(&ke->pfrke_sa, af);
+	mask = SUNION2PF(&umask, af);
+
+	if (use_counter) {
+		/* is supplied address within block? */
+		if (!PF_MATCHA(0, cur, mask, counter, af)) {
+			/* no, go to next block in table */
+			idx++;
+			use_counter = 0;
+			goto _next_block;
+		}
+		PF_ACPY(addr, counter, af);
+	} else {
+		/* use first address of block */
+		PF_ACPY(addr, cur, af);
+	}
+
+	if (!KENTRY_NETWORK(ke)) {
+		/* this is a single IP address - no possible nested block */
+		PF_ACPY(counter, addr, af);
+		*pidx = idx;
+		kt->pfrkt_match++;
+		return (0);
+	}
+	for (;;) {
+		/* we don't want to use a nested block */
+		switch (af) {
+		case AF_INET:
+			ke2 = (struct pfr_kentry *)rn_match(&uaddr,
+			    &kt->pfrkt_ip4->rh);
+			break;
+		case AF_INET6:
+			ke2 = (struct pfr_kentry *)rn_match(&uaddr,
+			    &kt->pfrkt_ip6->rh);
+			break;
+		}
+		/* no need to check KENTRY_RNF_ROOT() here */
+		if (ke2 == ke) {
+			/* lookup return the same block - perfect */
+			PF_ACPY(counter, addr, af);
+			*pidx = idx;
+			kt->pfrkt_match++;
+			return (0);
+		}
+
+		/* we need to increase the counter past the nested block */
+		pfr_prepare_network(&umask, AF_INET, ke2->pfrke_net);
+		PF_POOLMASK(addr, addr, SUNION2PF(&umask, af), &pfr_ffaddr, af);
+		PF_AINC(addr, af);
+		if (!PF_MATCHA(0, cur, mask, addr, af)) {
+			/* ok, we reached the end of our main block */
+			/* go to next block in table */
+			idx++;
+			use_counter = 0;
+			goto _next_block;
+		}
+	}
+}
+
+static struct pfr_kentry *
+pfr_kentry_byidx(struct pfr_ktable *kt, int idx, int af)
+{
+	struct pfr_walktree	w;
+
+	bzero(&w, sizeof(w));
+	w.pfrw_op = PFRW_POOL_GET;
+	w.pfrw_cnt = idx;
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		kt->pfrkt_ip4->rnh_walktree(&kt->pfrkt_ip4->rh, pfr_walktree, &w);
+		return (w.pfrw_kentry);
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		kt->pfrkt_ip6->rnh_walktree(&kt->pfrkt_ip6->rh, pfr_walktree, &w);
+		return (w.pfrw_kentry);
+#endif /* INET6 */
+	default:
+		return (NULL);
+	}
+}
+
+void
+pfr_dynaddr_update(struct pfr_ktable *kt, struct pfi_dynaddr *dyn)
+{
+	struct pfr_walktree	w;
+
+	bzero(&w, sizeof(w));
+	w.pfrw_op = PFRW_DYNADDR_UPDATE;
+	w.pfrw_dyn = dyn;
+
+	dyn->pfid_acnt4 = 0;
+	dyn->pfid_acnt6 = 0;
+	if (!dyn->pfid_af || dyn->pfid_af == AF_INET)
+		kt->pfrkt_ip4->rnh_walktree(&kt->pfrkt_ip4->rh, pfr_walktree, &w);
+	if (!dyn->pfid_af || dyn->pfid_af == AF_INET6)
+		kt->pfrkt_ip6->rnh_walktree(&kt->pfrkt_ip6->rh, pfr_walktree, &w);
+}
author	Sebastian Huber <sebastian.huber@embedded-brains.de>	2016-10-07 15:10:20 +0200
committer	Sebastian Huber <sebastian.huber@embedded-brains.de>	2017-01-10 09:53:31 +0100
commit	c40e45b75eb76d79a05c7fa85c1fa9b5c728a12f (patch)
tree	ad4f2519067709f00ab98b3c591186c26dc3a21f /freebsd/sys/netpfil
parent	userspace-header-gen.py: Simplify program ports (diff)
download	rtems-libbsd-c40e45b75eb76d79a05c7fa85c1fa9b5c728a12f.tar.bz2