Update to FreeBSD 8.4

author: Sebastian Huber <sebastian.huber@embedded-brains.de> 2013-11-04 11:33:00 +0100
committer: Sebastian Huber <sebastian.huber@embedded-brains.de> 2013-11-04 15:28:21 +0100
commit: af5333e0a02b2295304d4e029b15ee15a4fe2b3a (patch)
tree: c5c43680d374f58b487eeeaf18fb7ec6b84ba074 /freebsd/sys/net
parent: BUS_SPACE(9): Use simple memory model for ARM (diff)
download: rtems-libbsd-af5333e0a02b2295304d4e029b15ee15a4fe2b3a.tar.bz2
46 files changed, 2446 insertions, 1042 deletions
diff --git a/freebsd/sys/net/bpf.c b/freebsd/sys/net/bpf.c
index d9223313..179d5f0a 100644
--- a/freebsd/sys/net/bpf.c
+++ b/freebsd/sys/net/bpf.c
@@ -45,6 +45,8 @@ __FBSDID("$FreeBSD$");
 
 #include <rtems/bsd/sys/types.h>
 #include <rtems/bsd/sys/param.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/rwlock.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
@@ -68,6 +70,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/socket.h>
 
 #include <net/if.h>
+#define	BPF_INTERNAL
 #include <net/bpf.h>
 #include <net/bpf_buffer.h>
 #ifdef BPF_JITTER
@@ -141,6 +144,7 @@ static int		bpf_bpfd_cnt;
 
 static void	bpf_attachd(struct bpf_d *, struct bpf_if *);
 static void	bpf_detachd(struct bpf_d *);
+static void	bpf_detachd_locked(struct bpf_d *);
 static void	bpf_freed(struct bpf_d *);
 static int	bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
 		    struct sockaddr *, int *, struct bpf_insn *);
@@ -152,7 +156,7 @@ static void	catchpacket(struct bpf_d *, u_char *, u_int, u_int,
 		    void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int),
 		    struct timeval *);
 static void	reset_d(struct bpf_d *);
-static int	 bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
+static int	bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
 static int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
 static int	bpf_setdlt(struct bpf_d *, u_int);
 static void	filt_bpfdetach(struct knote *);
@@ -170,6 +174,12 @@ SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW,
 SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW,
     bpf_stats_sysctl, "bpf statistics portal");
 
+static VNET_DEFINE(int, bpf_optimize_writers) = 0;
+#define	V_bpf_optimize_writers VNET(bpf_optimize_writers)
+SYSCTL_VNET_INT(_net_bpf, OID_AUTO, optimize_writers,
+    CTLFLAG_RW, &VNET_NAME(bpf_optimize_writers), 0,
+    "Do not send packets until BPF program is set");
+
 static	d_open_t	bpfopen;
 static	d_read_t	bpfread;
 static	d_write_t	bpfwrite;
@@ -191,6 +201,37 @@ static struct cdevsw bpf_cdevsw = {
 static struct filterops bpfread_filtops =
 	{ 1, NULL, filt_bpfdetach, filt_bpfread };
 
+eventhandler_tag	bpf_ifdetach_cookie = NULL;
+
+/*
+ * LOCKING MODEL USED BY BPF:
+ * Locks:
+ * 1) global lock (BPF_LOCK). Mutex, used to protect interface addition/removal,
+ * some global counters and every bpf_if reference.
+ * 2) Interface lock. Rwlock, used to protect list of BPF descriptors and their filters.
+ * 3) Descriptor lock. Mutex, used to protect BPF buffers and various structure fields
+ *   used by bpf_mtap code.
+ *
+ * Lock order:
+ *
+ * Global lock, interface lock, descriptor lock
+ *
+ * We have to acquire interface lock before descriptor main lock due to BPF_MTAP[2]
+ * working model. In many places (like bpf_detachd) we start with BPF descriptor
+ * (and we need to at least rlock it to get reliable interface pointer). This
+ * gives us potential LOR. As a result, we use global lock to protect from bpf_if
+ * change in every such place.
+ *
+ * Changing d->bd_bif is protected by 1) global lock, 2) interface lock and
+ * 3) descriptor main wlock.
+ * Reading bd_bif can be protected by any of these locks, typically global lock.
+ *
+ * Changing read/write BPF filter is protected by the same three locks,
+ * the same applies for reading.
+ *
+ * Sleeping in global lock is not allowed due to bpfdetach() using it.
+ */
+
 /*
  * Wrapper functions for various buffering methods.  If the set of buffer
  * modes expands, we will probably want to introduce a switch data structure
@@ -284,7 +325,6 @@ bpf_canfreebuf(struct bpf_d *d)
 static int
 bpf_canwritebuf(struct bpf_d *d)
 {
-
 	BPFD_LOCK_ASSERT(d);
 
 	switch (d->bd_bufmode) {
@@ -563,17 +603,92 @@ bad:
 static void
 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
 {
+	int op_w;
+
+	BPF_LOCK_ASSERT();
+
+	/*
+	 * Save sysctl value to protect from sysctl change
+	 * between reads
+	 */
+	op_w = V_bpf_optimize_writers;
+
+	if (d->bd_bif != NULL)
+		bpf_detachd_locked(d);
 	/*
-	 * Point d at bp, and add d to the interface's list of listeners.
-	 * Finally, point the driver's bpf cookie at the interface so
-	 * it will divert packets to bpf.
+	 * Point d at bp, and add d to the interface's list.
+	 * Since there are many applicaiotns using BPF for
+	 * sending raw packets only (dhcpd, cdpd are good examples)
+	 * we can delay adding d to the list of active listeners until
+	 * some filter is configured.
 	 */
-	BPFIF_LOCK(bp);
+
+	BPFIF_WLOCK(bp);
+	BPFD_LOCK(d);
+
 	d->bd_bif = bp;
-	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
+
+	if (op_w != 0) {
+		/* Add to writers-only list */
+		LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next);
+		/*
+		 * We decrement bd_writer on every filter set operation.
+		 * First BIOCSETF is done by pcap_open_live() to set up
+		 * snap length. After that appliation usually sets its own filter
+		 */
+		d->bd_writer = 2;
+	} else
+		LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
+
+	BPFD_UNLOCK(d);
+	BPFIF_WUNLOCK(bp);
 
 	bpf_bpfd_cnt++;
-	BPFIF_UNLOCK(bp);
+
+	CTR3(KTR_NET, "%s: bpf_attach called by pid %d, adding to %s list",
+	    __func__, d->bd_pid, d->bd_writer ? "writer" : "active");
+
+	if (op_w == 0)
+		EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
+}
+
+/*
+ * Add d to the list of active bp filters.
+ * Reuqires bpf_attachd() to be called before
+ */
+static void
+bpf_upgraded(struct bpf_d *d)
+{
+	struct bpf_if *bp;
+
+	BPF_LOCK_ASSERT();
+
+	bp = d->bd_bif;
+
+	/*
+	 * Filter can be set several times without specifying interface.
+	 * Mark d as reader and exit.
+	 */
+	if (bp == NULL) {
+		BPFD_LOCK(d);
+		d->bd_writer = 0;
+		BPFD_UNLOCK(d);
+		return;
+	}
+
+	BPFIF_WLOCK(bp);
+	BPFD_LOCK(d);
+
+	/* Remove from writers-only list */
+	LIST_REMOVE(d, bd_next);
+	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
+	/* Mark d as reader */
+	d->bd_writer = 0;
+
+	BPFD_UNLOCK(d);
+	BPFIF_WUNLOCK(bp);
+
+	CTR2(KTR_NET, "%s: upgrade required by pid %d", __func__, d->bd_pid);
 
 	EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
 }
@@ -584,26 +699,47 @@ bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
 static void
 bpf_detachd(struct bpf_d *d)
 {
+	BPF_LOCK();
+	bpf_detachd_locked(d);
+	BPF_UNLOCK();
+}
+
+static void
+bpf_detachd_locked(struct bpf_d *d)
+{
 	int error;
 	struct bpf_if *bp;
 	struct ifnet *ifp;
 
-	bp = d->bd_bif;
-	BPFIF_LOCK(bp);
+	CTR2(KTR_NET, "%s: detach required by pid %d", __func__, d->bd_pid);
+
+	BPF_LOCK_ASSERT();
+
+	/* Check if descriptor is attached */
+	if ((bp = d->bd_bif) == NULL)
+		return;
+
+	BPFIF_WLOCK(bp);
 	BPFD_LOCK(d);
-	ifp = d->bd_bif->bif_ifp;
+
+	/* Save bd_writer value */
+	error = d->bd_writer;
 
 	/*
 	 * Remove d from the interface's descriptor list.
 	 */
 	LIST_REMOVE(d, bd_next);
 
-	bpf_bpfd_cnt--;
+	ifp = bp->bif_ifp;
 	d->bd_bif = NULL;
 	BPFD_UNLOCK(d);
-	BPFIF_UNLOCK(bp);
+	BPFIF_WUNLOCK(bp);
+
+	bpf_bpfd_cnt--;
 
-	EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0);
+	/* Call event handler iff d is attached */
+	if (error == 0)
+		EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0);
 
 	/*
 	 * Check if this descriptor had requested promiscuous mode.
@@ -642,14 +778,11 @@ bpf_dtor(void *data)
 	d->bd_state = BPF_IDLE;
 	BPFD_UNLOCK(d);
 	funsetown(&d->bd_sigio);
-	mtx_lock(&bpf_mtx);
-	if (d->bd_bif)
-		bpf_detachd(d);
-	mtx_unlock(&bpf_mtx);
-	selwakeuppri(&d->bd_sel, PRINET);
+	bpf_detachd(d);
 #ifdef MAC
 	mac_bpfdesc_destroy(d);
 #endif /* MAC */
+	seldrain(&d->bd_sel);
 	knlist_destroy(&d->bd_sel.si_note);
 	callout_drain(&d->bd_callout);
 	bpf_freed(d);
@@ -665,7 +798,7 @@ static	int
 bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
 {
 	struct bpf_d *d;
-	int error;
+	int error, size;
 
 	d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
 	error = devfs_set_cdevpriv(d, bpf_dtor);
@@ -683,14 +816,18 @@ bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
 	d->bd_bufmode = BPF_BUFMODE_BUFFER;
 	d->bd_sig = SIGIO;
 	d->bd_direction = BPF_D_INOUT;
-	d->bd_pid = td->td_proc->p_pid;
+	BPF_PID_REFRESH(d, td);
 #ifdef MAC
 	mac_bpfdesc_init(d);
 	mac_bpfdesc_create(td->td_ucred, d);
 #endif
-	mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
-	callout_init_mtx(&d->bd_callout, &d->bd_mtx, 0);
-	knlist_init_mtx(&d->bd_sel.si_note, &d->bd_mtx);
+	mtx_init(&d->bd_lock, devtoname(dev), "bpf cdev lock", MTX_DEF);
+	callout_init_mtx(&d->bd_callout, &d->bd_lock, 0);
+	knlist_init_mtx(&d->bd_sel.si_note, &d->bd_lock);
+
+	/* Allocate default buffers */
+	size = d->bd_bufsize;
+	bpf_buffer_ioctl_sblen(d, &size);
 
 	return (0);
 }
@@ -720,7 +857,7 @@ bpfread(struct cdev *dev, struct uio *uio, int ioflag)
 	non_block = ((ioflag & O_NONBLOCK) != 0);
 
 	BPFD_LOCK(d);
-	d->bd_pid = curthread->td_proc->p_pid;
+	BPF_PID_REFRESH_CUR(d);
 	if (d->bd_bufmode != BPF_BUFMODE_BUFFER) {
 		BPFD_UNLOCK(d);
 		return (EOPNOTSUPP);
@@ -766,7 +903,7 @@ bpfread(struct cdev *dev, struct uio *uio, int ioflag)
 			BPFD_UNLOCK(d);
 			return (EWOULDBLOCK);
 		}
-		error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
+		error = msleep(d, &d->bd_lock, PRINET|PCATCH,
 		     "bpf", d->bd_rtout);
 		if (error == EINTR || error == ERESTART) {
 			BPFD_UNLOCK(d);
@@ -883,8 +1020,9 @@ bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
 	if (error != 0)
 		return (error);
 
-	d->bd_pid = curthread->td_proc->p_pid;
+	BPF_PID_REFRESH_CUR(d);
 	d->bd_wcount++;
+	/* XXX: locking required */
 	if (d->bd_bif == NULL) {
 		d->bd_wdcount++;
 		return (ENXIO);
@@ -905,6 +1043,7 @@ bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
 	bzero(&dst, sizeof(dst));
 	m = NULL;
 	hlen = 0;
+	/* XXX: bpf_movein() can sleep */
 	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
 	    &m, &dst, &hlen, d->bd_wfilter);
 	if (error) {
@@ -964,7 +1103,7 @@ static void
 reset_d(struct bpf_d *d)
 {
 
-	mtx_assert(&d->bd_mtx, MA_OWNED);
+	BPFD_LOCK_ASSERT(d);
 
 	if ((d->bd_hbuf != NULL) &&
 	    (d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) {
@@ -1030,7 +1169,7 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 	 * Refresh PID associated with this descriptor.
 	 */
 	BPFD_LOCK(d);
-	d->bd_pid = td->td_proc->p_pid;
+	BPF_PID_REFRESH(d, td);
 	if (d->bd_state == BPF_WAITING)
 		callout_stop(&d->bd_callout);
 	d->bd_state = BPF_IDLE;
@@ -1081,7 +1220,9 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 	case BIOCGDLTLIST32:
 	case BIOCGRTIMEOUT32:
 	case BIOCSRTIMEOUT32:
+		BPFD_LOCK(d);
 		d->bd_compat32 = 1;
+		BPFD_UNLOCK(d);
 	}
 #endif
 
@@ -1126,7 +1267,9 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 	 * Get buffer len [for read()].
 	 */
 	case BIOCGBLEN:
+		BPFD_LOCK(d);
 		*(u_int *)addr = d->bd_bufsize;
+		BPFD_UNLOCK(d);
 		break;
 
 	/*
@@ -1181,10 +1324,12 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 	 * Get current data link type.
 	 */
 	case BIOCGDLT:
+		BPF_LOCK();
 		if (d->bd_bif == NULL)
 			error = EINVAL;
 		else
 			*(u_int *)addr = d->bd_bif->bif_dlt;
+		BPF_UNLOCK();
 		break;
 
 	/*
@@ -1199,6 +1344,7 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 			list32 = (struct bpf_dltlist32 *)addr;
 			dltlist.bfl_len = list32->bfl_len;
 			dltlist.bfl_list = PTRIN(list32->bfl_list);
+			BPF_LOCK();
 			if (d->bd_bif == NULL)
 				error = EINVAL;
 			else {
@@ -1206,31 +1352,37 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 				if (error == 0)
 					list32->bfl_len = dltlist.bfl_len;
 			}
+			BPF_UNLOCK();
 			break;
 		}
 #endif
 
 	case BIOCGDLTLIST:
+		BPF_LOCK();
 		if (d->bd_bif == NULL)
 			error = EINVAL;
 		else
 			error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
+		BPF_UNLOCK();
 		break;
 
 	/*
 	 * Set data link type.
 	 */
 	case BIOCSDLT:
+		BPF_LOCK();
 		if (d->bd_bif == NULL)
 			error = EINVAL;
 		else
 			error = bpf_setdlt(d, *(u_int *)addr);
+		BPF_UNLOCK();
 		break;
 
 	/*
 	 * Get interface name.
 	 */
 	case BIOCGETIF:
+		BPF_LOCK();
 		if (d->bd_bif == NULL)
 			error = EINVAL;
 		else {
@@ -1240,13 +1392,16 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 			strlcpy(ifr->ifr_name, ifp->if_xname,
 			    sizeof(ifr->ifr_name));
 		}
+		BPF_UNLOCK();
 		break;
 
 	/*
 	 * Set interface.
 	 */
 	case BIOCSETIF:
+		BPF_LOCK();
 		error = bpf_setif(d, (struct ifreq *)addr);
+		BPF_UNLOCK();
 		break;
 
 	/*
@@ -1329,7 +1484,9 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 	 * Set immediate mode.
 	 */
 	case BIOCIMMEDIATE:
+		BPFD_LOCK(d);
 		d->bd_immediate = *(u_int *)addr;
+		BPFD_UNLOCK(d);
 		break;
 
 	case BIOCVERSION:
@@ -1345,21 +1502,27 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 	 * Get "header already complete" flag
 	 */
 	case BIOCGHDRCMPLT:
+		BPFD_LOCK(d);
 		*(u_int *)addr = d->bd_hdrcmplt;
+		BPFD_UNLOCK(d);
 		break;
 
 	/*
 	 * Set "header already complete" flag
 	 */
 	case BIOCSHDRCMPLT:
+		BPFD_LOCK(d);
 		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
+		BPFD_UNLOCK(d);
 		break;
 
 	/*
 	 * Get packet direction flag
 	 */
 	case BIOCGDIRECTION:
+		BPFD_LOCK(d);
 		*(u_int *)addr = d->bd_direction;
+		BPFD_UNLOCK(d);
 		break;
 
 	/*
@@ -1374,7 +1537,9 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 			case BPF_D_IN:
 			case BPF_D_INOUT:
 			case BPF_D_OUT:
+				BPFD_LOCK(d);
 				d->bd_direction = direction;
+				BPFD_UNLOCK(d);
 				break;
 			default:
 				error = EINVAL;
@@ -1383,26 +1548,38 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 		break;
 
 	case BIOCFEEDBACK:
+		BPFD_LOCK(d);
 		d->bd_feedback = *(u_int *)addr;
+		BPFD_UNLOCK(d);
 		break;
 
 	case BIOCLOCK:
+		BPFD_LOCK(d);
 		d->bd_locked = 1;
+		BPFD_UNLOCK(d);
 		break;
 
 	case FIONBIO:		/* Non-blocking I/O */
 		break;
 
 	case FIOASYNC:		/* Send signal on receive packets */
+		BPFD_LOCK(d);
 		d->bd_async = *(int *)addr;
+		BPFD_UNLOCK(d);
 		break;
 
 	case FIOSETOWN:
+		/*
+		 * XXX: Add some sort of locking here?
+		 * fsetown() can sleep.
+		 */
 		error = fsetown(*(int *)addr, &d->bd_sigio);
 		break;
 
 	case FIOGETOWN:
+		BPFD_LOCK(d);
 		*(int *)addr = fgetown(&d->bd_sigio);
+		BPFD_UNLOCK(d);
 		break;
 
 	/* This is deprecated, FIOSETOWN should be used instead. */
@@ -1423,16 +1600,23 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 
 			if (sig >= NSIG)
 				error = EINVAL;
-			else
+			else {
+				BPFD_LOCK(d);
 				d->bd_sig = sig;
+				BPFD_UNLOCK(d);
+			}
 			break;
 		}
 	case BIOCGRSIG:
+		BPFD_LOCK(d);
 		*(u_int *)addr = d->bd_sig;
+		BPFD_UNLOCK(d);
 		break;
 
 	case BIOCGETBUFMODE:
+		BPFD_LOCK(d);
 		*(u_int *)addr = d->bd_bufmode;
+		BPFD_UNLOCK(d);
 		break;
 
 	case BIOCSETBUFMODE:
@@ -1487,95 +1671,130 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 /*
  * Set d's packet filter program to fp.  If this file already has a filter,
  * free it and replace it.  Returns EINVAL for bogus requests.
+ *
+ * Note we need global lock here to serialize bpf_setf() and bpf_setif() calls
+ * since reading d->bd_bif can't be protected by d or interface lock due to
+ * lock order.
+ *
+ * Additionally, we have to acquire interface write lock due to bpf_mtap() uses
+ * interface read lock to read all filers.
+ *
  */
 static int
 bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
 {
+#ifdef COMPAT_FREEBSD32
+	struct bpf_program fp_swab;
+	struct bpf_program32 *fp32;
+#endif
 	struct bpf_insn *fcode, *old;
-	u_int wfilter, flen, size;
 #ifdef BPF_JITTER
-	bpf_jit_filter *ofunc;
+	bpf_jit_filter *jfunc, *ofunc;
 #endif
-#ifdef COMPAT_FREEBSD32
-	struct bpf_program32 *fp32;
-	struct bpf_program fp_swab;
+	size_t size;
+	u_int flen;
+	int need_upgrade;
 
-	if (cmd == BIOCSETWF32 || cmd == BIOCSETF32 || cmd == BIOCSETFNR32) {
+#ifdef COMPAT_FREEBSD32
+	switch (cmd) {
+	case BIOCSETF32:
+	case BIOCSETWF32:
+	case BIOCSETFNR32:
 		fp32 = (struct bpf_program32 *)fp;
 		fp_swab.bf_len = fp32->bf_len;
 		fp_swab.bf_insns = (struct bpf_insn *)(uintptr_t)fp32->bf_insns;
 		fp = &fp_swab;
-		if (cmd == BIOCSETWF32)
+		switch (cmd) {
+		case BIOCSETF32:
+			cmd = BIOCSETF;
+			break;
+		case BIOCSETWF32:
 			cmd = BIOCSETWF;
+			break;
+		}
+		break;
 	}
 #endif
-	if (cmd == BIOCSETWF) {
-		old = d->bd_wfilter;
-		wfilter = 1;
-#ifdef BPF_JITTER
-		ofunc = NULL;
-#endif
-	} else {
-		wfilter = 0;
-		old = d->bd_rfilter;
+
+	fcode = NULL;
 #ifdef BPF_JITTER
-		ofunc = d->bd_bfilter;
+	jfunc = ofunc = NULL;
 #endif
-	}
-	if (fp->bf_insns == NULL) {
-		if (fp->bf_len != 0)
+	need_upgrade = 0;
+
+	/*
+	 * Check new filter validness before acquiring any locks.
+	 * Allocate memory for new filter, if needed.
+	 */
+	flen = fp->bf_len;
+	if (flen > bpf_maxinsns || (fp->bf_insns == NULL && flen != 0))
+		return (EINVAL);
+	size = flen * sizeof(*fp->bf_insns);
+	if (size > 0) {
+		/* We're setting up new filter.  Copy and check actual data. */
+		fcode = malloc(size, M_BPF, M_WAITOK);
+		if (copyin(fp->bf_insns, fcode, size) != 0 ||
+		    !bpf_validate(fcode, flen)) {
+			free(fcode, M_BPF);
 			return (EINVAL);
-		BPFD_LOCK(d);
-		if (wfilter)
-			d->bd_wfilter = NULL;
-		else {
-			d->bd_rfilter = NULL;
-#ifdef BPF_JITTER
-			d->bd_bfilter = NULL;
-#endif
-			if (cmd == BIOCSETF)
-				reset_d(d);
 		}
-		BPFD_UNLOCK(d);
-		if (old != NULL)
-			free((caddr_t)old, M_BPF);
 #ifdef BPF_JITTER
-		if (ofunc != NULL)
-			bpf_destroy_jit_filter(ofunc);
+		/* Filter is copied inside fcode and is perfectly valid. */
+		jfunc = bpf_jitter(fcode, flen);
 #endif
-		return (0);
 	}
-	flen = fp->bf_len;
-	if (flen > bpf_maxinsns)
-		return (EINVAL);
 
-	size = flen * sizeof(*fp->bf_insns);
-	fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
-	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
-	    bpf_validate(fcode, (int)flen)) {
-		BPFD_LOCK(d);
-		if (wfilter)
-			d->bd_wfilter = fcode;
-		else {
-			d->bd_rfilter = fcode;
+	BPF_LOCK();
+
+	/*
+	 * Set up new filter.
+	 * Protect filter change by interface lock.
+	 * Additionally, we are protected by global lock here.
+	 */
+	if (d->bd_bif != NULL)
+		BPFIF_WLOCK(d->bd_bif);
+	BPFD_LOCK(d);
+	if (cmd == BIOCSETWF) {
+		old = d->bd_wfilter;
+		d->bd_wfilter = fcode;
+	} else {
+		old = d->bd_rfilter;
+		d->bd_rfilter = fcode;
 #ifdef BPF_JITTER
-			d->bd_bfilter = bpf_jitter(fcode, flen);
+		ofunc = d->bd_bfilter;
+		d->bd_bfilter = jfunc;
 #endif
-			if (cmd == BIOCSETF)
-				reset_d(d);
+		if (cmd == BIOCSETF)
+			reset_d(d);
+
+		if (fcode != NULL) {
+			/*
+			 * Do not require upgrade by first BIOCSETF
+			 * (used to set snaplen) by pcap_open_live().
+			 */
+			if (d->bd_writer != 0 && --d->bd_writer == 0)
+				need_upgrade = 1;
+			CTR4(KTR_NET, "%s: filter function set by pid %d, "
+			    "bd_writer counter %d, need_upgrade %d",
+			    __func__, d->bd_pid, d->bd_writer, need_upgrade);
 		}
-		BPFD_UNLOCK(d);
-		if (old != NULL)
-			free((caddr_t)old, M_BPF);
+	}
+	BPFD_UNLOCK(d);
+	if (d->bd_bif != NULL)
+		BPFIF_WUNLOCK(d->bd_bif);
+	if (old != NULL)
+		free(old, M_BPF);
 #ifdef BPF_JITTER
-		if (ofunc != NULL)
-			bpf_destroy_jit_filter(ofunc);
+	if (ofunc != NULL)
+		bpf_destroy_jit_filter(ofunc);
 #endif
 
-		return (0);
-	}
-	free((caddr_t)fcode, M_BPF);
-	return (EINVAL);
+	/* Move d to active readers list. */
+	if (need_upgrade)
+		bpf_upgraded(d);
+
+	BPF_UNLOCK();
+	return (0);
 }
 
 /*
@@ -1589,28 +1808,30 @@ bpf_setif(struct bpf_d *d, struct ifreq *ifr)
 	struct bpf_if *bp;
 	struct ifnet *theywant;
 
+	BPF_LOCK_ASSERT();
+
 	theywant = ifunit(ifr->ifr_name);
 	if (theywant == NULL || theywant->if_bpf == NULL)
 		return (ENXIO);
 
 	bp = theywant->if_bpf;
 
+	/* Check if interface is not being detached from BPF */
+	BPFIF_RLOCK(bp);
+	if (bp->flags & BPFIF_FLAG_DYING) {
+		BPFIF_RUNLOCK(bp);
+		return (ENXIO);
+	}
+	BPFIF_RUNLOCK(bp);
+
 	/*
 	 * Behavior here depends on the buffering model.  If we're using
 	 * kernel memory buffers, then we can allocate them here.  If we're
 	 * using zero-copy, then the user process must have registered
 	 * buffers by the time we get here.  If not, return an error.
-	 *
-	 * XXXRW: There are locking issues here with multi-threaded use: what
-	 * if two threads try to set the interface at once?
 	 */
 	switch (d->bd_bufmode) {
 	case BPF_BUFMODE_BUFFER:
-		if (d->bd_sbuf == NULL)
-			bpf_buffer_alloc(d);
-		KASSERT(d->bd_sbuf != NULL, ("bpf_setif: bd_sbuf NULL"));
-		break;
-
 	case BPF_BUFMODE_ZBUF:
 		if (d->bd_sbuf == NULL)
 			return (EINVAL);
@@ -1619,15 +1840,8 @@ bpf_setif(struct bpf_d *d, struct ifreq *ifr)
 	default:
 		panic("bpf_setif: bufmode %d", d->bd_bufmode);
 	}
-	if (bp != d->bd_bif) {
-		if (d->bd_bif)
-			/*
-			 * Detach if attached to something else.
-			 */
-			bpf_detachd(d);
-
+	if (bp != d->bd_bif)
 		bpf_attachd(d, bp);
-	}
 	BPFD_LOCK(d);
 	reset_d(d);
 	BPFD_UNLOCK(d);
@@ -1655,7 +1869,7 @@ bpfpoll(struct cdev *dev, int events, struct thread *td)
 	 */
 	revents = events & (POLLOUT | POLLWRNORM);
 	BPFD_LOCK(d);
-	d->bd_pid = td->td_proc->p_pid;
+	BPF_PID_REFRESH(d, td);
 	if (events & (POLLIN | POLLRDNORM)) {
 		if (bpf_ready(d))
 			revents |= events & (POLLIN | POLLRDNORM);
@@ -1690,7 +1904,7 @@ bpfkqfilter(struct cdev *dev, struct knote *kn)
 	 * Refresh PID associated with this descriptor.
 	 */
 	BPFD_LOCK(d);
-	d->bd_pid = curthread->td_proc->p_pid;
+	BPF_PID_REFRESH_CUR(d);
 	kn->kn_fop = &bpfread_filtops;
 	kn->kn_hook = d;
 	knlist_add(&d->bd_sel.si_note, kn, 1);
@@ -1746,9 +1960,19 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
 	struct timeval tv;
 
 	gottime = 0;
-	BPFIF_LOCK(bp);
+
+	BPFIF_RLOCK(bp);
+
 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
-		BPFD_LOCK(d);
+		/*
+		 * We are not using any locks for d here because:
+		 * 1) any filter change is protected by interface
+		 * write lock
+		 * 2) destroying/detaching d is protected by interface
+		 * write lock, too
+		 */
+
+		/* XXX: Do not protect counter for the sake of performance. */
 		++d->bd_rcount;
 		/*
 		 * NB: We dont call BPF_CHECK_DIRECTION() here since there is no
@@ -1764,6 +1988,11 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
 #endif
 		slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
 		if (slen != 0) {
+			/*
+			 * Filter matches. Let's to acquire write lock.
+			 */
+			BPFD_LOCK(d);
+
 			d->bd_fcount++;
 			if (!gottime) {
 				microtime(&tv);
@@ -1774,10 +2003,10 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
 #endif
 				catchpacket(d, pkt, pktlen, slen,
 				    bpf_append_bytes, &tv);
+			BPFD_UNLOCK(d);
 		}
-		BPFD_UNLOCK(d);
 	}
-	BPFIF_UNLOCK(bp);
+	BPFIF_RUNLOCK(bp);
 }
 
 #define	BPF_CHECK_DIRECTION(d, r, i)				\
@@ -1786,6 +2015,7 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
 
 /*
  * Incoming linkage from device drivers, when packet is in an mbuf chain.
+ * Locking model is explained in bpf_tap().
  */
 void
 bpf_mtap(struct bpf_if *bp, struct mbuf *m)
@@ -1808,11 +2038,11 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m)
 
 	pktlen = m_length(m, NULL);
 
-	BPFIF_LOCK(bp);
+	BPFIF_RLOCK(bp);
+
 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
 		if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
 			continue;
-		BPFD_LOCK(d);
 		++d->bd_rcount;
 #ifdef BPF_JITTER
 		bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
@@ -1823,6 +2053,8 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m)
 #endif
 		slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
 		if (slen != 0) {
+			BPFD_LOCK(d);
+
 			d->bd_fcount++;
 			if (!gottime) {
 				microtime(&tv);
@@ -1833,10 +2065,10 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m)
 #endif
 				catchpacket(d, (u_char *)m, pktlen, slen,
 				    bpf_append_mbuf, &tv);
+			BPFD_UNLOCK(d);
 		}
-		BPFD_UNLOCK(d);
 	}
-	BPFIF_UNLOCK(bp);
+	BPFIF_RUNLOCK(bp);
 }
 
 /*
@@ -1871,14 +2103,17 @@ bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
 	mb.m_len = dlen;
 	pktlen += dlen;
 
-	BPFIF_LOCK(bp);
+
+	BPFIF_RLOCK(bp);
+
 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
 		if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
 			continue;
-		BPFD_LOCK(d);
 		++d->bd_rcount;
 		slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
 		if (slen != 0) {
+			BPFD_LOCK(d);
+
 			d->bd_fcount++;
 			if (!gottime) {
 				microtime(&tv);
@@ -1889,10 +2124,10 @@ bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
 #endif
 				catchpacket(d, (u_char *)&mb, pktlen, slen,
 				    bpf_append_mbuf, &tv);
+			BPFD_UNLOCK(d);
 		}
-		BPFD_UNLOCK(d);
 	}
-	BPFIF_UNLOCK(bp);
+	BPFIF_RUNLOCK(bp);
 }
 
 #undef	BPF_CHECK_DIRECTION
@@ -2042,7 +2277,7 @@ bpf_freed(struct bpf_d *d)
 	}
 	if (d->bd_wfilter != NULL)
 		free((caddr_t)d->bd_wfilter, M_BPF);
-	mtx_destroy(&d->bd_mtx);
+	mtx_destroy(&d->bd_lock);
 }
 
 /*
@@ -2072,15 +2307,16 @@ bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
 		panic("bpfattach");
 
 	LIST_INIT(&bp->bif_dlist);
+	LIST_INIT(&bp->bif_wlist);
 	bp->bif_ifp = ifp;
 	bp->bif_dlt = dlt;
-	mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF);
+	rw_init(&bp->bif_lock, "bpf interface lock");
 	KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
 	*driverp = bp;
 
-	mtx_lock(&bpf_mtx);
+	BPF_LOCK();
 	LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
-	mtx_unlock(&bpf_mtx);
+	BPF_UNLOCK();
 
 	/*
 	 * Compute the length of the bpf header.  This is not necessarily
@@ -2095,42 +2331,95 @@ bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
 }
 
 /*
- * Detach bpf from an interface.  This involves detaching each descriptor
- * associated with the interface, and leaving bd_bif NULL.  Notify each
- * descriptor as it's detached so that any sleepers wake up and get
- * ENXIO.
+ * Detach bpf from an interface. This involves detaching each descriptor
+ * associated with the interface. Notify each descriptor as it's detached
+ * so that any sleepers wake up and get ENXIO.
  */
 void
 bpfdetach(struct ifnet *ifp)
 {
 	struct bpf_if	*bp;
 	struct bpf_d	*d;
+#ifdef INVARIANTS
+	int ndetached;
 
-	/* Locate BPF interface information */
-	mtx_lock(&bpf_mtx);
-	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
-		if (ifp == bp->bif_ifp)
-			break;
-	}
+	ndetached = 0;
+#endif
+
+	BPF_LOCK();
+	/* Find all bpf_if struct's which reference ifp and detach them. */
+	do {
+		LIST_FOREACH(bp, &bpf_iflist, bif_next) {
+			if (ifp == bp->bif_ifp)
+				break;
+		}
+		if (bp != NULL)
+			LIST_REMOVE(bp, bif_next);
+
+		if (bp != NULL) {
+#ifdef INVARIANTS
+			ndetached++;
+#endif
+			while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
+				bpf_detachd_locked(d);
+				BPFD_LOCK(d);
+				bpf_wakeup(d);
+				BPFD_UNLOCK(d);
+			}
+			/* Free writer-only descriptors */
+			while ((d = LIST_FIRST(&bp->bif_wlist)) != NULL) {
+				bpf_detachd_locked(d);
+				BPFD_LOCK(d);
+				bpf_wakeup(d);
+				BPFD_UNLOCK(d);
+			}
+
+			/*
+			 * Delay freing bp till interface is detached
+			 * and all routes through this interface are removed.
+			 * Mark bp as detached to restrict new consumers.
+			 */
+			BPFIF_WLOCK(bp);
+			bp->flags |= BPFIF_FLAG_DYING;
+			BPFIF_WUNLOCK(bp);
+		}
+	} while (bp != NULL);
+	BPF_UNLOCK();
 
-	/* Interface wasn't attached */
-	if ((bp == NULL) || (bp->bif_ifp == NULL)) {
-		mtx_unlock(&bpf_mtx);
+#ifdef INVARIANTS
+	if (ndetached == 0)
 		printf("bpfdetach: %s was not attached\n", ifp->if_xname);
+#endif
+}
+
+/*
+ * Interface departure handler.
+ * Note departure event does not guarantee interface is going down.
+ */
+static void
+bpf_ifdetach(void *arg __unused, struct ifnet *ifp)
+{
+	struct bpf_if *bp;
+
+	BPF_LOCK();
+	if ((bp = ifp->if_bpf) == NULL) {
+		BPF_UNLOCK();
 		return;
 	}
 
-	LIST_REMOVE(bp, bif_next);
-	mtx_unlock(&bpf_mtx);
-
-	while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
-		bpf_detachd(d);
-		BPFD_LOCK(d);
-		bpf_wakeup(d);
-		BPFD_UNLOCK(d);
+	/* Check if bpfdetach() was called previously */
+	if ((bp->flags & BPFIF_FLAG_DYING) == 0) {
+		BPF_UNLOCK();
+		return;
 	}
 
-	mtx_destroy(&bp->bif_mtx);
+	CTR3(KTR_NET, "%s: freing BPF instance %p for interface %p",
+	    __func__, bp, ifp);
+
+	ifp->if_bpf = NULL;
+	BPF_UNLOCK();
+
+	rw_destroy(&bp->bif_lock);
 	free(bp, M_BPF);
 }
 
@@ -2144,24 +2433,22 @@ bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
 	struct ifnet *ifp;
 	struct bpf_if *bp;
 
+	BPF_LOCK_ASSERT();
+
 	ifp = d->bd_bif->bif_ifp;
 	n = 0;
 	error = 0;
-	mtx_lock(&bpf_mtx);
 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
 		if (bp->bif_ifp != ifp)
 			continue;
 		if (bfl->bfl_list != NULL) {
-			if (n >= bfl->bfl_len) {
-				mtx_unlock(&bpf_mtx);
+			if (n >= bfl->bfl_len)
 				return (ENOMEM);
-			}
 			error = copyout(&bp->bif_dlt,
 			    bfl->bfl_list + n, sizeof(u_int));
 		}
 		n++;
 	}
-	mtx_unlock(&bpf_mtx);
 	bfl->bfl_len = n;
 	return (error);
 }
@@ -2176,18 +2463,19 @@ bpf_setdlt(struct bpf_d *d, u_int dlt)
 	struct ifnet *ifp;
 	struct bpf_if *bp;
 
+	BPF_LOCK_ASSERT();
+
 	if (d->bd_bif->bif_dlt == dlt)
 		return (0);
 	ifp = d->bd_bif->bif_ifp;
-	mtx_lock(&bpf_mtx);
+
 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
 		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
 			break;
 	}
-	mtx_unlock(&bpf_mtx);
+
 	if (bp != NULL) {
 		opromisc = d->bd_promisc;
-		bpf_detachd(d);
 		bpf_attachd(d, bp);
 		BPFD_LOCK(d);
 		reset_d(d);
@@ -2216,6 +2504,11 @@ bpf_drvinit(void *unused)
 	dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf");
 	/* For compatibility */
 	make_dev_alias(dev, "bpf0");
+
+	/* Register interface departure handler */
+	bpf_ifdetach_cookie = EVENTHANDLER_REGISTER(
+		    ifnet_departure_event, bpf_ifdetach, NULL,
+		    EVENTHANDLER_PRI_ANY);
 }
 
 /*
@@ -2229,9 +2522,9 @@ bpf_zero_counters(void)
 	struct bpf_if *bp;
 	struct bpf_d *bd;
 
-	mtx_lock(&bpf_mtx);
+	BPF_LOCK();
 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
-		BPFIF_LOCK(bp);
+		BPFIF_RLOCK(bp);
 		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
 			BPFD_LOCK(bd);
 			bd->bd_rcount = 0;
@@ -2242,11 +2535,14 @@ bpf_zero_counters(void)
 			bd->bd_zcopy = 0;
 			BPFD_UNLOCK(bd);
 		}
-		BPFIF_UNLOCK(bp);
+		BPFIF_RUNLOCK(bp);
 	}
-	mtx_unlock(&bpf_mtx);
+	BPF_UNLOCK();
 }
 
+/*
+ * Fill filter statistics
+ */
 static void
 bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
 {
@@ -2254,6 +2550,7 @@ bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
 	bzero(d, sizeof(*d));
 	BPFD_LOCK_ASSERT(bd);
 	d->bd_structsize = sizeof(*d);
+	/* XXX: reading should be protected by global lock */
 	d->bd_immediate = bd->bd_immediate;
 	d->bd_promisc = bd->bd_promisc;
 	d->bd_hdrcmplt = bd->bd_hdrcmplt;
@@ -2278,6 +2575,9 @@ bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
 	d->bd_bufmode = bd->bd_bufmode;
 }
 
+/*
+ * Handle `netstat -B' stats request
+ */
 static int
 bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
 {
@@ -2315,24 +2615,31 @@ bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
 	if (bpf_bpfd_cnt == 0)
 		return (SYSCTL_OUT(req, 0, 0));
 	xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK);
-	mtx_lock(&bpf_mtx);
+	BPF_LOCK();
 	if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
-		mtx_unlock(&bpf_mtx);
+		BPF_UNLOCK();
 		free(xbdbuf, M_BPF);
 		return (ENOMEM);
 	}
 	index = 0;
 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
-		BPFIF_LOCK(bp);
+		BPFIF_RLOCK(bp);
+		/* Send writers-only first */
+		LIST_FOREACH(bd, &bp->bif_wlist, bd_next) {
+			xbd = &xbdbuf[index++];
+			BPFD_LOCK(bd);
+			bpfstats_fill_xbpf(xbd, bd);
+			BPFD_UNLOCK(bd);
+		}
 		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
 			xbd = &xbdbuf[index++];
 			BPFD_LOCK(bd);
 			bpfstats_fill_xbpf(xbd, bd);
 			BPFD_UNLOCK(bd);
 		}
-		BPFIF_UNLOCK(bp);
+		BPFIF_RUNLOCK(bp);
 	}
-	mtx_unlock(&bpf_mtx);
+	BPF_UNLOCK();
 	error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
 	free(xbdbuf, M_BPF);
 	return (error);
diff --git a/freebsd/sys/net/bpf.h b/freebsd/sys/net/bpf.h
index 726483a5..004815ad 100644
--- a/freebsd/sys/net/bpf.h
+++ b/freebsd/sys/net/bpf.h
@@ -917,14 +917,21 @@ SYSCTL_DECL(_net_bpf);
 
 /*
  * Descriptor associated with each attached hardware interface.
+ * FIXME: this structure is exposed to external callers to speed up
+ * bpf_peers_present() call. However we cover all fields not needed by
+ * this function via BPF_INTERNAL define
  */
 struct bpf_if {
 	LIST_ENTRY(bpf_if)	bif_next;	/* list of all interfaces */
 	LIST_HEAD(, bpf_d)	bif_dlist;	/* descriptor list */
+#ifdef BPF_INTERNAL
 	u_int bif_dlt;				/* link layer type */
 	u_int bif_hdrlen;		/* length of header (with padding) */
 	struct ifnet *bif_ifp;		/* corresponding interface */
-	struct mtx	bif_mtx;	/* mutex for interface */
+	struct rwlock bif_lock;		/* interface lock */
+	LIST_HEAD(, bpf_d)	bif_wlist;	/* writer-only list */
+	int flags;			/* Interface flags */
+#endif
 };
 
 void	 bpf_bufheld(struct bpf_d *d);
diff --git a/freebsd/sys/net/bpf_buffer.c b/freebsd/sys/net/bpf_buffer.c
index 7ebfb0a8..382497f6 100644
--- a/freebsd/sys/net/bpf_buffer.c
+++ b/freebsd/sys/net/bpf_buffer.c
@@ -4,7 +4,7 @@
  * Copyright (c) 2007 Seccuris Inc.
  * All rights reserved.
  *
- * This sofware was developed by Robert N. M. Watson under contract to
+ * This software was developed by Robert N. M. Watson under contract to
  * Seccuris Inc.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -95,21 +95,6 @@ static int bpf_maxbufsize = BPF_MAXBUFSIZE;
 SYSCTL_INT(_net_bpf, OID_AUTO, maxbufsize, CTLFLAG_RW,
     &bpf_maxbufsize, 0, "Default capture buffer in bytes");
 
-void
-bpf_buffer_alloc(struct bpf_d *d)
-{
-
-	KASSERT(d->bd_fbuf == NULL, ("bpf_buffer_alloc: bd_fbuf != NULL"));
-	KASSERT(d->bd_sbuf == NULL, ("bpf_buffer_alloc: bd_sbuf != NULL"));
-	KASSERT(d->bd_hbuf == NULL, ("bpf_buffer_alloc: bd_hbuf != NULL"));
-
-	d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
-	d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
-	d->bd_hbuf = NULL;
-	d->bd_slen = 0;
-	d->bd_hlen = 0;
-}
-
 /*
  * Simple data copy to the current kernel buffer.
  */
@@ -185,18 +170,42 @@ int
 bpf_buffer_ioctl_sblen(struct bpf_d *d, u_int *i)
 {
 	u_int size;
+	caddr_t fbuf, sbuf;
 
-	BPFD_LOCK(d);
-	if (d->bd_bif != NULL) {
-		BPFD_UNLOCK(d);
-		return (EINVAL);
-	}
 	size = *i;
 	if (size > bpf_maxbufsize)
 		*i = size = bpf_maxbufsize;
 	else if (size < BPF_MINBUFSIZE)
 		*i = size = BPF_MINBUFSIZE;
+
+	/* Allocate buffers immediately */
+	fbuf = (caddr_t)malloc(size, M_BPF, M_WAITOK);
+	sbuf = (caddr_t)malloc(size, M_BPF, M_WAITOK);
+
+	BPFD_LOCK(d);
+	if (d->bd_bif != NULL) {
+		/* Interface already attached, unable to change buffers */
+		BPFD_UNLOCK(d);
+		free(fbuf, M_BPF);
+		free(sbuf, M_BPF);
+		return (EINVAL);
+	}
+
+	/* Free old buffers if set */
+	if (d->bd_fbuf != NULL)
+		free(d->bd_fbuf, M_BPF);
+	if (d->bd_sbuf != NULL)
+		free(d->bd_sbuf, M_BPF);
+
+	/* Fill in new data */
 	d->bd_bufsize = size;
+	d->bd_fbuf = fbuf;
+	d->bd_sbuf = sbuf;
+
+	d->bd_hbuf = NULL;
+	d->bd_slen = 0;
+	d->bd_hlen = 0;
+
 	BPFD_UNLOCK(d);
 	return (0);
 }
diff --git a/freebsd/sys/net/bpf_buffer.h b/freebsd/sys/net/bpf_buffer.h
index 82d0310b..c1dc1f3a 100644
--- a/freebsd/sys/net/bpf_buffer.h
+++ b/freebsd/sys/net/bpf_buffer.h
@@ -2,7 +2,7 @@
  * Copyright (c) 2007 Seccuris Inc.
  * All rights reserved.
  *
- * This sofware was developed by Robert N. M. Watson under contract to
+ * This software was developed by Robert N. M. Watson under contract to
  * Seccuris Inc.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -36,7 +36,6 @@
 #error "no user-serviceable parts inside"
 #endif
 
-void	bpf_buffer_alloc(struct bpf_d *d);
 void	bpf_buffer_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset,
 	    void *src, u_int len);
 void	bpf_buffer_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset,
diff --git a/freebsd/sys/net/bpf_zerocopy.h b/freebsd/sys/net/bpf_zerocopy.h
index c541a15d..a5709b86 100644
--- a/freebsd/sys/net/bpf_zerocopy.h
+++ b/freebsd/sys/net/bpf_zerocopy.h
@@ -2,7 +2,7 @@
  * Copyright (c) 2007 Seccuris Inc.
  * All rights reserved.
  *
- * This sofware was developed by Robert N. M. Watson under contract to
+ * This software was developed by Robert N. M. Watson under contract to
  * Seccuris Inc.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/freebsd/sys/net/bpfdesc.h b/freebsd/sys/net/bpfdesc.h
index 03cb20dd..c3265ce1 100644
--- a/freebsd/sys/net/bpfdesc.h
+++ b/freebsd/sys/net/bpfdesc.h
@@ -79,6 +79,7 @@ struct bpf_d {
 	u_char		bd_promisc;	/* true if listening promiscuously */
 	u_char		bd_state;	/* idle, waiting, or timed out */
 	u_char		bd_immediate;	/* true to return on packet arrival */
+	u_char		bd_writer;	/* non-zero if d is writer-only */
 	int		bd_hdrcmplt;	/* false to fill in src lladdr automatically */
 	int		bd_direction;	/* select packet direction */
 	int		bd_feedback;	/* true to feed back sent packets */
@@ -86,7 +87,7 @@ struct bpf_d {
 	int		bd_sig;		/* signal to send upon packet reception */
 	struct sigio *	bd_sigio;	/* information for async I/O */
 	struct selinfo	bd_sel;		/* bsd select info */
-	struct mtx	bd_mtx;		/* mutex for this descriptor */
+	struct mtx	bd_lock;	/* per-descriptor lock */
 	struct callout	bd_callout;	/* for BPF timeouts with select */
 	struct label	*bd_label;	/* MAC label for descriptor */
 	u_int64_t	bd_fcount;	/* number of packets which matched filter */
@@ -105,10 +106,16 @@ struct bpf_d {
 #define BPF_WAITING	1		/* waiting for read timeout in select */
 #define BPF_TIMED_OUT	2		/* read timeout has expired in select */
 
-#define BPFD_LOCK(bd)		mtx_lock(&(bd)->bd_mtx)
-#define BPFD_UNLOCK(bd)		mtx_unlock(&(bd)->bd_mtx)
-#define BPFD_LOCK_ASSERT(bd)	mtx_assert(&(bd)->bd_mtx, MA_OWNED)
+#define BPFD_LOCK(bd)		mtx_lock(&(bd)->bd_lock)
+#define BPFD_UNLOCK(bd)		mtx_unlock(&(bd)->bd_lock)
+#define BPFD_LOCK_ASSERT(bd)	mtx_assert(&(bd)->bd_lock, MA_OWNED)
 
+#define BPF_PID_REFRESH(bd, td)	(bd)->bd_pid = (td)->td_proc->p_pid
+#define BPF_PID_REFRESH_CUR(bd)	(bd)->bd_pid = curthread->td_proc->p_pid
+
+#define BPF_LOCK()		mtx_lock(&bpf_mtx)
+#define BPF_UNLOCK()		mtx_unlock(&bpf_mtx)
+#define BPF_LOCK_ASSERT()	mtx_assert(&bpf_mtx, MA_OWNED)
 /*
  * External representation of the bpf descriptor
  */
@@ -143,7 +150,11 @@ struct xbpf_d {
 	u_int64_t	bd_spare[4];
 };
 
-#define BPFIF_LOCK(bif)		mtx_lock(&(bif)->bif_mtx)
-#define BPFIF_UNLOCK(bif)	mtx_unlock(&(bif)->bif_mtx)
+#define BPFIF_RLOCK(bif)	rw_rlock(&(bif)->bif_lock)
+#define BPFIF_RUNLOCK(bif)	rw_runlock(&(bif)->bif_lock)
+#define BPFIF_WLOCK(bif)	rw_wlock(&(bif)->bif_lock)
+#define BPFIF_WUNLOCK(bif)	rw_wunlock(&(bif)->bif_lock)
+
+#define BPFIF_FLAG_DYING	1	/* Reject new bpf consumers */
 
 #endif
diff --git a/freebsd/sys/net/bridgestp.c b/freebsd/sys/net/bridgestp.c
index cc7f4e6f..1b2ef7cf 100644
--- a/freebsd/sys/net/bridgestp.c
+++ b/freebsd/sys/net/bridgestp.c
@@ -129,14 +129,14 @@ static int	bstp_rerooted(struct bstp_state *, struct bstp_port *);
 static uint32_t	bstp_calc_path_cost(struct bstp_port *);
 static void	bstp_notify_state(void *, int);
 static void	bstp_notify_rtage(void *, int);
-static void	bstp_ifupdstatus(struct bstp_state *, struct bstp_port *);
+static void	bstp_ifupdstatus(void *, int);
 static void	bstp_enable_port(struct bstp_state *, struct bstp_port *);
 static void	bstp_disable_port(struct bstp_state *, struct bstp_port *);
 static void	bstp_tick(void *);
 static void	bstp_timer_start(struct bstp_timer *, uint16_t);
 static void	bstp_timer_stop(struct bstp_timer *);
 static void	bstp_timer_latch(struct bstp_timer *);
-static int	bstp_timer_expired(struct bstp_timer *);
+static int	bstp_timer_dectest(struct bstp_timer *);
 static void	bstp_hello_timer_expiry(struct bstp_state *,
 		    struct bstp_port *);
 static void	bstp_message_age_expiry(struct bstp_state *,
@@ -448,7 +448,7 @@ bstp_pdu_flags(struct bstp_port *bp)
 	return (flags);
 }
 
-struct mbuf *
+void
 bstp_input(struct bstp_port *bp, struct ifnet *ifp, struct mbuf *m)
 {
 	struct bstp_state *bs = bp->bp_bs;
@@ -458,7 +458,7 @@ bstp_input(struct bstp_port *bp, struct ifnet *ifp, struct mbuf *m)
 
 	if (bp->bp_active == 0) {
 		m_freem(m);
-		return (NULL);
+		return;
 	}
 
 	BSTP_LOCK(bs);
@@ -523,7 +523,6 @@ out:
 	BSTP_UNLOCK(bs);
 	if (m)
 		m_freem(m);
-	return (NULL);
 }
 
 static void
@@ -1680,7 +1679,7 @@ bstp_set_autoptp(struct bstp_port *bp, int set)
 	if (set) {
 		bp->bp_flags |= BSTP_PORT_AUTOPTP;
 		if (bp->bp_role != BSTP_ROLE_DISABLED)
-			bstp_ifupdstatus(bs, bp);
+			taskqueue_enqueue(taskqueue_swi, &bp->bp_mediatask);
 	} else
 		bp->bp_flags &= ~BSTP_PORT_AUTOPTP;
 	BSTP_UNLOCK(bs);
@@ -1770,85 +1769,93 @@ bstp_notify_rtage(void *arg, int pending)
 }
 
 void
-bstp_linkstate(struct ifnet *ifp, int state)
+bstp_linkstate(struct bstp_port *bp)
 {
-	struct bstp_state *bs;
-	struct bstp_port *bp;
+	struct bstp_state *bs = bp->bp_bs;
 
-	/* search for the stp port */
-	mtx_lock(&bstp_list_mtx);
-	LIST_FOREACH(bs, &bstp_list, bs_list) {
-		BSTP_LOCK(bs);
-		LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
-			if (bp->bp_ifp == ifp) {
-				bstp_ifupdstatus(bs, bp);
-				bstp_update_state(bs, bp);
-				/* it only exists once so return */
-				BSTP_UNLOCK(bs);
-				mtx_unlock(&bstp_list_mtx);
-				return;
-			}
-		}
-		BSTP_UNLOCK(bs);
-	}
-	mtx_unlock(&bstp_list_mtx);
+	if (!bp->bp_active)
+		return;
+
+	bstp_ifupdstatus(bp, 0);
+	BSTP_LOCK(bs);
+	bstp_update_state(bs, bp);
+	BSTP_UNLOCK(bs);
 }
 
 static void
-bstp_ifupdstatus(struct bstp_state *bs, struct bstp_port *bp)
+bstp_ifupdstatus(void *arg, int pending)
 {
+	struct bstp_port *bp = (struct bstp_port *)arg;
+	struct bstp_state *bs = bp->bp_bs;
 	struct ifnet *ifp = bp->bp_ifp;
 	struct ifmediareq ifmr;
-	int error = 0;
+	int error, changed;
 
-	BSTP_LOCK_ASSERT(bs);
+	if (!bp->bp_active)
+		return;
 
 	bzero((char *)&ifmr, sizeof(ifmr));
 	error = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (caddr_t)&ifmr);
 
+	BSTP_LOCK(bs);
+	changed = 0;
 	if ((error == 0) && (ifp->if_flags & IFF_UP)) {
 		if (ifmr.ifm_status & IFM_ACTIVE) {
 			/* A full-duplex link is assumed to be point to point */
 			if (bp->bp_flags & BSTP_PORT_AUTOPTP) {
-				bp->bp_ptp_link =
-				    ifmr.ifm_active & IFM_FDX ? 1 : 0;
+				int fdx;
+
+				fdx = ifmr.ifm_active & IFM_FDX ? 1 : 0;
+				if (bp->bp_ptp_link ^ fdx) {
+					bp->bp_ptp_link = fdx;
+					changed = 1;
+				}
 			}
 
 			/* Calc the cost if the link was down previously */
 			if (bp->bp_flags & BSTP_PORT_PNDCOST) {
-				bp->bp_path_cost = bstp_calc_path_cost(bp);
+				uint32_t cost;
+
+				cost = bstp_calc_path_cost(bp);
+				if (bp->bp_path_cost != cost) {
+					bp->bp_path_cost = cost;
+					changed = 1;
+				}
 				bp->bp_flags &= ~BSTP_PORT_PNDCOST;
 			}
 
-			if (bp->bp_role == BSTP_ROLE_DISABLED)
+			if (bp->bp_role == BSTP_ROLE_DISABLED) {
 				bstp_enable_port(bs, bp);
+				changed = 1;
+			}
 		} else {
 			if (bp->bp_role != BSTP_ROLE_DISABLED) {
 				bstp_disable_port(bs, bp);
+				changed = 1;
 				if ((bp->bp_flags & BSTP_PORT_ADMEDGE) &&
 				    bp->bp_protover == BSTP_PROTO_RSTP)
 					bp->bp_operedge = 1;
 			}
 		}
-		return;
-	}
-
-	if (bp->bp_infois != BSTP_INFO_DISABLED)
+	} else if (bp->bp_infois != BSTP_INFO_DISABLED) {
 		bstp_disable_port(bs, bp);
+		changed = 1;
+	}
+	if (changed)
+		bstp_assign_roles(bs);
+	BSTP_UNLOCK(bs);
 }
 
 static void
 bstp_enable_port(struct bstp_state *bs, struct bstp_port *bp)
 {
 	bp->bp_infois = BSTP_INFO_AGED;
-	bstp_assign_roles(bs);
 }
 
 static void
 bstp_disable_port(struct bstp_state *bs, struct bstp_port *bp)
 {
 	bp->bp_infois = BSTP_INFO_DISABLED;
-	bstp_assign_roles(bs);
 }
 
 static void
@@ -1862,30 +1869,34 @@ bstp_tick(void *arg)
 	if (bs->bs_running == 0)
 		return;
 
-	/* slow timer to catch missed link events */
-	if (bstp_timer_expired(&bs->bs_link_timer)) {
-		LIST_FOREACH(bp, &bs->bs_bplist, bp_next)
-			bstp_ifupdstatus(bs, bp);
+	CURVNET_SET(bs->bs_vnet);
+
+	/* poll link events on interfaces that do not support linkstate */
+	if (bstp_timer_dectest(&bs->bs_link_timer)) {
+		LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
+			if (!(bp->bp_ifp->if_capabilities & IFCAP_LINKSTATE))
+				taskqueue_enqueue(taskqueue_swi, &bp->bp_mediatask);
+		}
 		bstp_timer_start(&bs->bs_link_timer, BSTP_LINK_TIMER);
 	}
 
 	LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
 		/* no events need to happen for these */
-		bstp_timer_expired(&bp->bp_tc_timer);
-		bstp_timer_expired(&bp->bp_recent_root_timer);
-		bstp_timer_expired(&bp->bp_forward_delay_timer);
-		bstp_timer_expired(&bp->bp_recent_backup_timer);
+		bstp_timer_dectest(&bp->bp_tc_timer);
+		bstp_timer_dectest(&bp->bp_recent_root_timer);
+		bstp_timer_dectest(&bp->bp_forward_delay_timer);
+		bstp_timer_dectest(&bp->bp_recent_backup_timer);
 
-		if (bstp_timer_expired(&bp->bp_hello_timer))
+		if (bstp_timer_dectest(&bp->bp_hello_timer))
 			bstp_hello_timer_expiry(bs, bp);
 
-		if (bstp_timer_expired(&bp->bp_message_age_timer))
+		if (bstp_timer_dectest(&bp->bp_message_age_timer))
 			bstp_message_age_expiry(bs, bp);
 
-		if (bstp_timer_expired(&bp->bp_migrate_delay_timer))
+		if (bstp_timer_dectest(&bp->bp_migrate_delay_timer))
 			bstp_migrate_delay_expiry(bs, bp);
 
-		if (bstp_timer_expired(&bp->bp_edge_delay_timer))
+		if (bstp_timer_dectest(&bp->bp_edge_delay_timer))
 			bstp_edge_delay_expiry(bs, bp);
 
 		/* update the various state machines for the port */
@@ -1895,6 +1906,8 @@ bstp_tick(void *arg)
 			bp->bp_txcount--;
 	}
 
+	CURVNET_RESTORE();
+
 	callout_reset(&bs->bs_bstpcallout, hz, bstp_tick, bs);
 }
 
@@ -1922,7 +1935,7 @@ bstp_timer_latch(struct bstp_timer *t)
 }
 
 static int
-bstp_timer_expired(struct bstp_timer *t)
+bstp_timer_dectest(struct bstp_timer *t)
 {
 	if (t->active == 0 || t->latched)
 		return (0);
@@ -2010,24 +2023,33 @@ bstp_reinit(struct bstp_state *bs)
 	struct bstp_port *bp;
 	struct ifnet *ifp, *mif;
 	u_char *e_addr;
+	void *bridgeptr;
 	static const u_char llzero[ETHER_ADDR_LEN];	/* 00:00:00:00:00:00 */
 
 	BSTP_LOCK_ASSERT(bs);
 
+	if (LIST_EMPTY(&bs->bs_bplist))
+		goto disablestp;
+
 	mif = NULL;
+	bridgeptr = LIST_FIRST(&bs->bs_bplist)->bp_ifp->if_bridge;
+	KASSERT(bridgeptr != NULL, ("Invalid bridge pointer"));
 	/*
 	 * Search through the Ethernet adapters and find the one with the
-	 * lowest value. The adapter which we take the MAC address from does
-	 * not need to be part of the bridge, it just needs to be a unique
-	 * value.
+	 * lowest value. Make sure the adapter which we take the MAC address
+	 * from is part of this bridge, so we can have more than one independent
+	 * bridges in the same STP domain.
 	 */
 	IFNET_RLOCK_NOSLEEP();
 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if (ifp->if_type != IFT_ETHER)
-			continue;
+			continue;	/* Not Ethernet */
+
+		if (ifp->if_bridge != bridgeptr)
+			continue;	/* Not part of our bridge */
 
 		if (bstp_addr_cmp(IF_LLADDR(ifp), llzero) == 0)
-			continue;
+			continue;	/* No mac address set */
 
 		if (mif == NULL) {
 			mif = ifp;
@@ -2039,21 +2061,8 @@ bstp_reinit(struct bstp_state *bs)
 		}
 	}
 	IFNET_RUNLOCK_NOSLEEP();
-
-	if (LIST_EMPTY(&bs->bs_bplist) || mif == NULL) {
-		/* Set the bridge and root id (lower bits) to zero */
-		bs->bs_bridge_pv.pv_dbridge_id =
-		    ((uint64_t)bs->bs_bridge_priority) << 48;
-		bs->bs_bridge_pv.pv_root_id = bs->bs_bridge_pv.pv_dbridge_id;
-		bs->bs_root_pv = bs->bs_bridge_pv;
-		/* Disable any remaining ports, they will have no MAC address */
-		LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
-			bp->bp_infois = BSTP_INFO_DISABLED;
-			bstp_set_port_role(bp, BSTP_ROLE_DISABLED);
-		}
-		callout_stop(&bs->bs_bstpcallout);
-		return;
-	}
+	if (mif == NULL)
+		goto disablestp;
 
 	e_addr = IF_LLADDR(mif);
 	bs->bs_bridge_pv.pv_dbridge_id =
@@ -2076,11 +2085,25 @@ bstp_reinit(struct bstp_state *bs)
 	LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
 		bp->bp_port_id = (bp->bp_priority << 8) |
 		    (bp->bp_ifp->if_index  & 0xfff);
-		bstp_ifupdstatus(bs, bp);
+		taskqueue_enqueue(taskqueue_swi, &bp->bp_mediatask);
 	}
 
 	bstp_assign_roles(bs);
 	bstp_timer_start(&bs->bs_link_timer, BSTP_LINK_TIMER);
+	return;
+
+disablestp:
+	/* Set the bridge and root id (lower bits) to zero */
+	bs->bs_bridge_pv.pv_dbridge_id =
+	    ((uint64_t)bs->bs_bridge_priority) << 48;
+	bs->bs_bridge_pv.pv_root_id = bs->bs_bridge_pv.pv_dbridge_id;
+	bs->bs_root_pv = bs->bs_bridge_pv;
+	/* Disable any remaining ports, they will have no MAC address */
+	LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
+		bp->bp_infois = BSTP_INFO_DISABLED;
+		bstp_set_port_role(bp, BSTP_ROLE_DISABLED);
+	}
+	callout_stop(&bs->bs_bstpcallout);
 }
 
 static int
@@ -2090,10 +2113,8 @@ bstp_modevent(module_t mod, int type, void *data)
 	case MOD_LOAD:
 		mtx_init(&bstp_list_mtx, "bridgestp list", NULL, MTX_DEF);
 		LIST_INIT(&bstp_list);
-		bstp_linkstate_p = bstp_linkstate;
 		break;
 	case MOD_UNLOAD:
-		bstp_linkstate_p = NULL;
 		mtx_destroy(&bstp_list_mtx);
 		break;
 	default:
@@ -2128,6 +2149,7 @@ bstp_attach(struct bstp_state *bs, struct bstp_cb_ops *cb)
 	bs->bs_protover = BSTP_PROTO_RSTP;
 	bs->bs_state_cb = cb->bcb_state;
 	bs->bs_rtage_cb = cb->bcb_rtage;
+	bs->bs_vnet = curvnet;
 
 	getmicrotime(&bs->bs_last_tc_time);
 
@@ -2184,6 +2206,7 @@ bstp_create(struct bstp_state *bs, struct bstp_port *bp, struct ifnet *ifp)
 	bp->bp_priority = BSTP_DEFAULT_PORT_PRIORITY;
 	TASK_INIT(&bp->bp_statetask, 0, bstp_notify_state, bp);
 	TASK_INIT(&bp->bp_rtagetask, 0, bstp_notify_rtage, bp);
+	TASK_INIT(&bp->bp_mediatask, 0, bstp_ifupdstatus, bp);
 
 	/* Init state */
 	bp->bp_infois = BSTP_INFO_DISABLED;
@@ -2247,4 +2270,5 @@ bstp_destroy(struct bstp_port *bp)
 	KASSERT(bp->bp_active == 0, ("port is still attached"));
 	taskqueue_drain(taskqueue_swi, &bp->bp_statetask);
 	taskqueue_drain(taskqueue_swi, &bp->bp_rtagetask);
+	taskqueue_drain(taskqueue_swi, &bp->bp_mediatask);
 }
diff --git a/freebsd/sys/net/bridgestp.h b/freebsd/sys/net/bridgestp.h
index 74086fce..cbb8d53c 100644
--- a/freebsd/sys/net/bridgestp.h
+++ b/freebsd/sys/net/bridgestp.h
@@ -326,6 +326,7 @@ struct bstp_port {
 	uint8_t			bp_txcount;
 	struct task		bp_statetask;
 	struct task		bp_rtagetask;
+	struct task		bp_mediatask;
 };
 
 /*
@@ -358,6 +359,7 @@ struct bstp_state {
 	LIST_HEAD(, bstp_port)	bs_bplist;
 	bstp_state_cb_t		bs_state_cb;
 	bstp_rtage_cb_t		bs_rtage_cb;
+	struct vnet		*bs_vnet;
 };
 
 #define	BSTP_LOCK_INIT(_bs)	mtx_init(&(_bs)->bs_mtx, "bstp", NULL, MTX_DEF)
@@ -368,8 +370,6 @@ struct bstp_state {
 
 extern const uint8_t bstp_etheraddr[];
 
-extern	void (*bstp_linkstate_p)(struct ifnet *ifp, int state);
-
 void	bstp_attach(struct bstp_state *, struct bstp_cb_ops *);
 void	bstp_detach(struct bstp_state *);
 void	bstp_init(struct bstp_state *);
@@ -378,7 +378,7 @@ int	bstp_create(struct bstp_state *, struct bstp_port *, struct ifnet *);
 int	bstp_enable(struct bstp_port *);
 void	bstp_disable(struct bstp_port *);
 void	bstp_destroy(struct bstp_port *);
-void	bstp_linkstate(struct ifnet *, int);
+void	bstp_linkstate(struct bstp_port *);
 int	bstp_set_htime(struct bstp_state *, int);
 int	bstp_set_fdelay(struct bstp_state *, int);
 int	bstp_set_maxage(struct bstp_state *, int);
@@ -391,6 +391,6 @@ int	bstp_set_edge(struct bstp_port *, int);
 int	bstp_set_autoedge(struct bstp_port *, int);
 int	bstp_set_ptp(struct bstp_port *, int);
 int	bstp_set_autoptp(struct bstp_port *, int);
-struct mbuf *bstp_input(struct bstp_port *, struct ifnet *, struct mbuf *);
+void	bstp_input(struct bstp_port *, struct ifnet *, struct mbuf *);
 
 #endif /* _KERNEL */
diff --git a/freebsd/sys/net/ieee8023ad_lacp.c b/freebsd/sys/net/ieee8023ad_lacp.c
index 6e06ffe5..1b4418a2 100644
--- a/freebsd/sys/net/ieee8023ad_lacp.c
+++ b/freebsd/sys/net/ieee8023ad_lacp.c
@@ -814,10 +814,10 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
 		return (NULL);
 	}
 
-	if (m->m_flags & M_FLOWID)
+	if (sc->use_flowid && (m->m_flags & M_FLOWID))
 		hash = m->m_pkthdr.flowid;
 	else
-		hash = lagg_hashmbuf(m, lsc->lsc_hashkey);
+		hash = lagg_hashmbuf(sc, m, lsc->lsc_hashkey);
 	hash %= pm->pm_count;
 	lp = pm->pm_map[hash];
 
diff --git a/freebsd/sys/net/if.c b/freebsd/sys/net/if.c
index 918f8c4e..5dffd06d 100644
--- a/freebsd/sys/net/if.c
+++ b/freebsd/sys/net/if.c
@@ -60,6 +60,8 @@
 #include <sys/taskqueue.h>
 #include <sys/domain.h>
 #include <sys/jail.h>
+#include <sys/priv.h>
+
 #include <machine/stdarg.h>
 #include <vm/uma.h>
 
@@ -104,7 +106,7 @@ SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
 
 TUNABLE_INT("net.link.ifqmaxlen", &ifqmaxlen);
-SYSCTL_UINT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN,
+SYSCTL_INT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN,
     &ifqmaxlen, 0, "max send queue size");
 
 /* Log link state change events */
@@ -126,7 +128,7 @@ MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions");
 static struct sx ifdescr_sx;
 SX_SYSINIT(ifdescr_sx, &ifdescr_sx, "ifnet descr");
 
-void	(*bstp_linkstate_p)(struct ifnet *ifp, int state);
+void	(*bridge_linkstate_p)(struct ifnet *ifp);
 void	(*ng_ether_link_state_p)(struct ifnet *ifp, int state);
 void	(*lagg_linkstate_p)(struct ifnet *ifp, int state);
 /* These are external hooks for CARP. */
@@ -277,6 +279,7 @@ ifindex_alloc_locked(u_short *idxp)
 
 	IFNET_WLOCK_ASSERT();
 
+retry:
 	/*
 	 * Try to find an empty slot below V_if_index.  If we fail, take the
 	 * next slot.
@@ -289,10 +292,12 @@ ifindex_alloc_locked(u_short *idxp)
 	/* Catch if_index overflow. */
 	if (idx < 1)
 		return (ENOSPC);
+	if (idx >= V_if_indexlim) {
+		if_grow();
+		goto retry;
+	}
 	if (idx > V_if_index)
 		V_if_index = idx;
-	if (V_if_index >= V_if_indexlim)
-		if_grow();
 	*idxp = idx;
 	return (0);
 }
@@ -362,10 +367,12 @@ vnet_if_init(const void *unused __unused)
 
 	TAILQ_INIT(&V_ifnet);
 	TAILQ_INIT(&V_ifg_head);
+	IFNET_WLOCK();
 	if_grow();				/* create initial table */
+	IFNET_WUNLOCK();
 	vnet_if_clone_init();
 }
-VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_FIRST, vnet_if_init,
+VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init,
     NULL);
 
 /* ARGSUSED*/
@@ -376,7 +383,7 @@ if_init(void *dummy __unused)
 	IFNET_LOCK_INIT();
 	if_clone_init();
 }
-SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_SECOND, if_init, NULL);
+SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_FIRST, if_init, NULL);
 
 
 #ifdef VIMAGE
@@ -384,8 +391,10 @@ static void
 vnet_if_uninit(const void *unused __unused)
 {
 
-	VNET_ASSERT(TAILQ_EMPTY(&V_ifnet));
-	VNET_ASSERT(TAILQ_EMPTY(&V_ifg_head));
+	VNET_ASSERT(TAILQ_EMPTY(&V_ifnet), ("%s:%d tailq &V_ifnet=%p "
+	    "not empty", __func__, __LINE__, &V_ifnet));
+	VNET_ASSERT(TAILQ_EMPTY(&V_ifg_head), ("%s:%d tailq &V_ifg_head=%p "
+	    "not empty", __func__, __LINE__, &V_ifg_head));
 
 	free((caddr_t)V_ifindex_table, M_IFNET);
 }
@@ -396,16 +405,25 @@ VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST,
 static void
 if_grow(void)
 {
+	int oldlim;
 	u_int n;
 	struct ifindex_entry *e;
 
-	V_if_indexlim <<= 1;
-	n = V_if_indexlim * sizeof(*e);
+	IFNET_WLOCK_ASSERT();
+	oldlim = V_if_indexlim;
+	IFNET_WUNLOCK();
+	n = (oldlim << 1) * sizeof(*e);
 	e = malloc(n, M_IFNET, M_WAITOK | M_ZERO);
+	IFNET_WLOCK();
+	if (V_if_indexlim != oldlim) {
+		free(e, M_IFNET);
+		return;
+	}
 	if (V_ifindex_table != NULL) {
 		memcpy((caddr_t)e, (caddr_t)V_ifindex_table, n/2);
 		free((caddr_t)V_ifindex_table, M_IFNET);
 	}
+	V_if_indexlim <<= 1;
 	V_ifindex_table = e;
 }
 
@@ -472,8 +490,8 @@ if_alloc(u_char type)
 }
 
 /*
- * Do the actual work of freeing a struct ifnet, associated index, and layer
- * 2 common structure.  This call is made when the last reference to an
+ * Do the actual work of freeing a struct ifnet, and layer 2 common
+ * structure.  This call is made when the last reference to an
  * interface is released.
  */
 static void
@@ -483,13 +501,6 @@ if_free_internal(struct ifnet *ifp)
 	KASSERT((ifp->if_flags & IFF_DYING),
 	    ("if_free_internal: interface not dying"));
 
-	IFNET_WLOCK();
-	KASSERT(ifp == ifnet_byindex_locked(ifp->if_index),
-	    ("%s: freeing unallocated ifnet", ifp->if_xname));
-
-	ifindex_free_locked(ifp->if_index);
-	IFNET_WUNLOCK();
-
 	if (if_com_free[ifp->if_alloctype] != NULL)
 		if_com_free[ifp->if_alloctype](ifp->if_l2com,
 		    ifp->if_alloctype);
@@ -520,6 +531,14 @@ if_free_type(struct ifnet *ifp, u_char type)
 	    ifp->if_alloctype));
 
 	ifp->if_flags |= IFF_DYING;			/* XXX: Locking */
+
+	IFNET_WLOCK();
+	KASSERT(ifp == ifnet_byindex_locked(ifp->if_index),
+	    ("%s: freeing unallocated ifnet", ifp->if_xname));
+
+	ifindex_free_locked(ifp->if_index);
+	IFNET_WUNLOCK();
+
 	if (!refcount_release(&ifp->if_refcount))
 		return;
 	if_free_internal(ifp);
@@ -818,10 +837,10 @@ if_purgemaddrs(struct ifnet *ifp)
 	struct ifmultiaddr *ifma;
 	struct ifmultiaddr *next;
 
-	IF_ADDR_LOCK(ifp);
+	IF_ADDR_WLOCK(ifp);
 	TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
 		if_delmulti_locked(ifp, ifma, 1);
-	IF_ADDR_UNLOCK(ifp);
+	IF_ADDR_WUNLOCK(ifp);
 }
 
 /*
@@ -1165,10 +1184,10 @@ if_addgroup(struct ifnet *ifp, const char *groupname)
 	ifgl->ifgl_group = ifg;
 	ifgm->ifgm_ifp = ifp;
 
-	IF_ADDR_LOCK(ifp);
+	IF_ADDR_WLOCK(ifp);
 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
-	IF_ADDR_UNLOCK(ifp);
+	IF_ADDR_WUNLOCK(ifp);
 
 	IFNET_WUNLOCK();
 
@@ -1195,9 +1214,9 @@ if_delgroup(struct ifnet *ifp, const char *groupname)
 		return (ENOENT);
 	}
 
-	IF_ADDR_LOCK(ifp);
+	IF_ADDR_WLOCK(ifp);
 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
-	IF_ADDR_UNLOCK(ifp);
+	IF_ADDR_WUNLOCK(ifp);
 
 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
 		if (ifgm->ifgm_ifp == ifp)
@@ -1238,9 +1257,9 @@ if_delgroups(struct ifnet *ifp)
 
 		strlcpy(groupname, ifgl->ifgl_group->ifg_group, IFNAMSIZ);
 
-		IF_ADDR_LOCK(ifp);
+		IF_ADDR_WLOCK(ifp);
 		TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
-		IF_ADDR_UNLOCK(ifp);
+		IF_ADDR_WUNLOCK(ifp);
 
 		TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
 			if (ifgm->ifgm_ifp == ifp)
@@ -1282,33 +1301,33 @@ if_getgroup(struct ifgroupreq *data, struct ifnet *ifp)
 	struct ifgroupreq	*ifgr = data;
 
 	if (ifgr->ifgr_len == 0) {
-		IF_ADDR_LOCK(ifp);
+		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
 			ifgr->ifgr_len += sizeof(struct ifg_req);
-		IF_ADDR_UNLOCK(ifp);
+		IF_ADDR_RUNLOCK(ifp);
 		return (0);
 	}
 
 	len = ifgr->ifgr_len;
 	ifgp = ifgr->ifgr_groups;
 	/* XXX: wire */
-	IF_ADDR_LOCK(ifp);
+	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
 		if (len < sizeof(ifgrq)) {
-			IF_ADDR_UNLOCK(ifp);
+			IF_ADDR_RUNLOCK(ifp);
 			return (EINVAL);
 		}
 		bzero(&ifgrq, sizeof ifgrq);
 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
 		    sizeof(ifgrq.ifgrq_group));
 		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
-		    	IF_ADDR_UNLOCK(ifp);
+		    	IF_ADDR_RUNLOCK(ifp);
 			return (error);
 		}
 		len -= sizeof(ifgrq);
 		ifgp++;
 	}
-	IF_ADDR_UNLOCK(ifp);
+	IF_ADDR_RUNLOCK(ifp);
 
 	return (0);
 }
@@ -1415,28 +1434,28 @@ void
 if_addr_rlock(struct ifnet *ifp)
 {
 
-	IF_ADDR_LOCK(ifp);
+	IF_ADDR_RLOCK(ifp);
 }
 
 void
 if_addr_runlock(struct ifnet *ifp)
 {
 
-	IF_ADDR_UNLOCK(ifp);
+	IF_ADDR_RUNLOCK(ifp);
 }
 
 void
 if_maddr_rlock(struct ifnet *ifp)
 {
 
-	IF_ADDR_LOCK(ifp);
+	IF_ADDR_RLOCK(ifp);
 }
 
 void
 if_maddr_runlock(struct ifnet *ifp)
 {
 
-	IF_ADDR_UNLOCK(ifp);
+	IF_ADDR_RUNLOCK(ifp);
 }
 
 /*
@@ -1548,14 +1567,14 @@ ifa_ifwithaddr_internal(struct sockaddr *addr, int getref)
 
 	IFNET_RLOCK_NOSLEEP();
 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
-		IF_ADDR_LOCK(ifp);
+		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != addr->sa_family)
 				continue;
 			if (sa_equal(addr, ifa->ifa_addr)) {
 				if (getref)
 					ifa_ref(ifa);
-				IF_ADDR_UNLOCK(ifp);
+				IF_ADDR_RUNLOCK(ifp);
 				goto done;
 			}
 			/* IP6 doesn't have broadcast */
@@ -1565,11 +1584,11 @@ ifa_ifwithaddr_internal(struct sockaddr *addr, int getref)
 			    sa_equal(ifa->ifa_broadaddr, addr)) {
 				if (getref)
 					ifa_ref(ifa);
-				IF_ADDR_UNLOCK(ifp);
+				IF_ADDR_RUNLOCK(ifp);
 				goto done;
 			}
 		}
-		IF_ADDR_UNLOCK(ifp);
+		IF_ADDR_RUNLOCK(ifp);
 	}
 	ifa = NULL;
 done:
@@ -1603,7 +1622,7 @@ ifa_ifwithbroadaddr(struct sockaddr *addr)
 
 	IFNET_RLOCK_NOSLEEP();
 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
-		IF_ADDR_LOCK(ifp);
+		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != addr->sa_family)
 				continue;
@@ -1612,11 +1631,11 @@ ifa_ifwithbroadaddr(struct sockaddr *addr)
 			    ifa->ifa_broadaddr->sa_len != 0 &&
 			    sa_equal(ifa->ifa_broadaddr, addr)) {
 				ifa_ref(ifa);
-				IF_ADDR_UNLOCK(ifp);
+				IF_ADDR_RUNLOCK(ifp);
 				goto done;
 			}
 		}
-		IF_ADDR_UNLOCK(ifp);
+		IF_ADDR_RUNLOCK(ifp);
 	}
 	ifa = NULL;
 done:
@@ -1638,18 +1657,18 @@ ifa_ifwithdstaddr(struct sockaddr *addr)
 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
 			continue;
-		IF_ADDR_LOCK(ifp);
+		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != addr->sa_family)
 				continue;
 			if (ifa->ifa_dstaddr != NULL &&
 			    sa_equal(addr, ifa->ifa_dstaddr)) {
 				ifa_ref(ifa);
-				IF_ADDR_UNLOCK(ifp);
+				IF_ADDR_RUNLOCK(ifp);
 				goto done;
 			}
 		}
-		IF_ADDR_UNLOCK(ifp);
+		IF_ADDR_RUNLOCK(ifp);
 	}
 	ifa = NULL;
 done:
@@ -1683,12 +1702,12 @@ ifa_ifwithnet(struct sockaddr *addr, int ignore_ptp)
 	/*
 	 * Scan though each interface, looking for ones that have addresses
 	 * in this address family.  Maintain a reference on ifa_maybe once
-	 * we find one, as we release the IF_ADDR_LOCK() that kept it stable
+	 * we find one, as we release the IF_ADDR_RLOCK() that kept it stable
 	 * when we move onto the next interface.
 	 */
 	IFNET_RLOCK_NOSLEEP();
 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
-		IF_ADDR_LOCK(ifp);
+		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			char *cp, *cp2, *cp3;
 
@@ -1707,7 +1726,7 @@ next:				continue;
 				if (ifa->ifa_dstaddr != NULL &&
 				    sa_equal(addr, ifa->ifa_dstaddr)) {
 					ifa_ref(ifa);
-					IF_ADDR_UNLOCK(ifp);
+					IF_ADDR_RUNLOCK(ifp);
 					goto done;
 				}
 			} else {
@@ -1718,7 +1737,7 @@ next:				continue;
 				if (ifa->ifa_claim_addr) {
 					if ((*ifa->ifa_claim_addr)(ifa, addr)) {
 						ifa_ref(ifa);
-						IF_ADDR_UNLOCK(ifp);
+						IF_ADDR_RUNLOCK(ifp);
 						goto done;
 					}
 					continue;
@@ -1758,7 +1777,7 @@ next:				continue;
 				}
 			}
 		}
-		IF_ADDR_UNLOCK(ifp);
+		IF_ADDR_RUNLOCK(ifp);
 	}
 	ifa = ifa_maybe;
 	ifa_maybe = NULL;
@@ -1784,7 +1803,7 @@ ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
 
 	if (af >= AF_MAX)
 		return (NULL);
-	IF_ADDR_LOCK(ifp);
+	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != af)
 			continue;
@@ -1816,7 +1835,7 @@ ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
 done:
 	if (ifa != NULL)
 		ifa_ref(ifa);
-	IF_ADDR_UNLOCK(ifp);
+	IF_ADDR_RUNLOCK(ifp);
 	return (ifa);
 }
 
@@ -1936,14 +1955,10 @@ do_link_state_change(void *arg, int pending)
 		(*ng_ether_link_state_p)(ifp, link_state);
 	if (ifp->if_carp)
 		(*carp_linkstate_p)(ifp);
-	if (ifp->if_bridge) {
-		KASSERT(bstp_linkstate_p != NULL,("if_bridge bstp not loaded!"));
-		(*bstp_linkstate_p)(ifp, link_state);
-	}
-	if (ifp->if_lagg) {
-		KASSERT(lagg_linkstate_p != NULL,("if_lagg not loaded!"));
+	if (ifp->if_bridge)
+		(*bridge_linkstate_p)(ifp);
+	if (ifp->if_lagg)
 		(*lagg_linkstate_p)(ifp, link_state);
-	}
 
 	if (IS_DEFAULT_VNET(curvnet))
 		devctl_notify("IFNET", ifp->if_xname,
@@ -2180,6 +2195,20 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
 		free(odescrbuf, M_IFDESCR);
 		break;
 
+	case SIOCGIFFIB:
+		ifr->ifr_fib = ifp->if_fib;
+		break;
+
+	case SIOCSIFFIB:
+		error = priv_check(td, PRIV_NET_SETIFFIB);
+		if (error)
+			return (error);
+		if (ifr->ifr_fib >= rt_numfibs)
+			return (EINVAL);
+
+		ifp->if_fib = ifr->ifr_fib;
+		break;
+
 	case SIOCSIFFLAGS:
 		error = priv_check(td, PRIV_NET_SETIFFLAGS);
 		if (error)
@@ -2379,9 +2408,9 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
 			 * lose a race while we check if the membership
 			 * already exists.
 			 */
-			IF_ADDR_LOCK(ifp);
+			IF_ADDR_RLOCK(ifp);
 			ifma = if_findmulti(ifp, &ifr->ifr_addr);
-			IF_ADDR_UNLOCK(ifp);
+			IF_ADDR_RUNLOCK(ifp);
 			if (ifma != NULL)
 				error = EADDRINUSE;
 			else
@@ -2492,10 +2521,13 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
 	int error;
 	int oif_flags;
 
+	CURVNET_SET(so->so_vnet);
 	switch (cmd) {
 	case SIOCGIFCONF:
 	case OSIOCGIFCONF:
-		return (ifconf(cmd, data));
+		error = ifconf(cmd, data);
+		CURVNET_RESTORE();
+		return (error);
 
 #ifdef COMPAT_FREEBSD32
 	case SIOCGIFCONF32:
@@ -2507,7 +2539,11 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
 			ifc.ifc_len = ifc32->ifc_len;
 			ifc.ifc_buf = PTRIN(ifc32->ifc_buf);
 
-			return (ifconf(SIOCGIFCONF, (void *)&ifc));
+			error = ifconf(SIOCGIFCONF, (void *)&ifc);
+			CURVNET_RESTORE();
+			if (error == 0)
+				ifc32->ifc_len = ifc.ifc_len;
+			return (error);
 		}
 #endif
 	}
@@ -2517,49 +2553,74 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
 #ifdef VIMAGE
 	case SIOCSIFRVNET:
 		error = priv_check(td, PRIV_NET_SETIFVNET);
-		if (error)
-			return (error);
-		return (if_vmove_reclaim(td, ifr->ifr_name, ifr->ifr_jid));
+		if (error == 0)
+			error = if_vmove_reclaim(td, ifr->ifr_name,
+			    ifr->ifr_jid);
+		CURVNET_RESTORE();
+		return (error);
 #endif
 	case SIOCIFCREATE:
 	case SIOCIFCREATE2:
 		error = priv_check(td, PRIV_NET_IFCREATE);
-		if (error)
-			return (error);
-		return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name),
-			cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL));
+		if (error == 0)
+			error = if_clone_create(ifr->ifr_name,
+			    sizeof(ifr->ifr_name),
+			    cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL);
+		CURVNET_RESTORE();
+		return (error);
 	case SIOCIFDESTROY:
 		error = priv_check(td, PRIV_NET_IFDESTROY);
-		if (error)
-			return (error);
-		return if_clone_destroy(ifr->ifr_name);
+		if (error == 0)
+			error = if_clone_destroy(ifr->ifr_name);
+		CURVNET_RESTORE();
+		return (error);
 
 	case SIOCIFGCLONERS:
-		return (if_clone_list((struct if_clonereq *)data));
+		error = if_clone_list((struct if_clonereq *)data);
+		CURVNET_RESTORE();
+		return (error);
 	case SIOCGIFGMEMB:
-		return (if_getgroupmembers((struct ifgroupreq *)data));
+		error = if_getgroupmembers((struct ifgroupreq *)data);
+		CURVNET_RESTORE();
+		return (error);
 	}
 
 	ifp = ifunit_ref(ifr->ifr_name);
-	if (ifp == NULL)
+	if (ifp == NULL) {
+		CURVNET_RESTORE();
 		return (ENXIO);
+	}
 
 	error = ifhwioctl(cmd, ifp, data, td);
 	if (error != ENOIOCTL) {
 		if_rele(ifp);
+		CURVNET_RESTORE();
 		return (error);
 	}
 
 	oif_flags = ifp->if_flags;
 	if (so->so_proto == NULL) {
 		if_rele(ifp);
+		CURVNET_RESTORE();
 		return (EOPNOTSUPP);
 	}
+
+	/*
+	 * Pass the request on to the socket control method, and if the
+	 * latter returns EOPNOTSUPP, directly to the interface.
+	 *
+	 * Make an exception for the legacy SIOCSIF* requests.  Drivers
+	 * trust SIOCSIFADDR et al to come from an already privileged
+	 * layer, and do not perform any credentials checks or input
+	 * validation.
+	 */
 #ifndef COMPAT_43
 	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
 								 data,
 								 ifp, td));
-	if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL)
+	if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL &&
+	    cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR &&
+	    cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK)
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 #else
 	{
@@ -2603,7 +2664,9 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
 								   data,
 								   ifp, td));
 		if (error == EOPNOTSUPP && ifp != NULL &&
-		    ifp->if_ioctl != NULL)
+		    ifp->if_ioctl != NULL &&
+		    cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR &&
+		    cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK)
 			error = (*ifp->if_ioctl)(ifp, cmd, data);
 		switch (ocmd) {
 
@@ -2627,6 +2690,7 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
 #endif
 	}
 	if_rele(ifp);
+	CURVNET_RESTORE();
 	return (error);
 }
 
@@ -2776,7 +2840,7 @@ again:
 		}
 
 		addrs = 0;
-		IF_ADDR_LOCK(ifp);
+		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			struct sockaddr *sa = ifa->ifa_addr;
 
@@ -2808,7 +2872,7 @@ again:
 			if (!sbuf_overflowed(sb))
 				valid_len = sbuf_len(sb);
 		}
-		IF_ADDR_UNLOCK(ifp);
+		IF_ADDR_RUNLOCK(ifp);
 		if (addrs == 0) {
 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
 			sbuf_bcat(sb, &ifr, sizeof(ifr));
@@ -2966,13 +3030,13 @@ if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
 	 * If the address is already present, return a new reference to it;
 	 * otherwise, allocate storage and set up a new address.
 	 */
-	IF_ADDR_LOCK(ifp);
+	IF_ADDR_WLOCK(ifp);
 	ifma = if_findmulti(ifp, sa);
 	if (ifma != NULL) {
 		ifma->ifma_refcount++;
 		if (retifma != NULL)
 			*retifma = ifma;
-		IF_ADDR_UNLOCK(ifp);
+		IF_ADDR_WUNLOCK(ifp);
 		return (0);
 	}
 
@@ -3038,7 +3102,7 @@ if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
 	 * pointer is still valid.
 	 */
 	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
-	IF_ADDR_UNLOCK(ifp);
+	IF_ADDR_WUNLOCK(ifp);
 
 	/*
 	 * We are certain we have added something, so call down to the
@@ -3058,7 +3122,7 @@ free_llsa_out:
 		free(llsa, M_IFMADDR);
 
 unlock_out:
-	IF_ADDR_UNLOCK(ifp);
+	IF_ADDR_WUNLOCK(ifp);
 	return (error);
 }
 
@@ -3092,12 +3156,12 @@ if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
 	if (ifp == NULL)
 		return (ENOENT);
 
-	IF_ADDR_LOCK(ifp);
+	IF_ADDR_WLOCK(ifp);
 	lastref = 0;
 	ifma = if_findmulti(ifp, sa);
 	if (ifma != NULL)
 		lastref = if_delmulti_locked(ifp, ifma, 0);
-	IF_ADDR_UNLOCK(ifp);
+	IF_ADDR_WUNLOCK(ifp);
 
 	if (ifma == NULL)
 		return (ENOENT);
@@ -3119,10 +3183,10 @@ if_delallmulti(struct ifnet *ifp)
 	struct ifmultiaddr *ifma;
 	struct ifmultiaddr *next;
 
-	IF_ADDR_LOCK(ifp);
+	IF_ADDR_WLOCK(ifp);
 	TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
 		if_delmulti_locked(ifp, ifma, 0);
-	IF_ADDR_UNLOCK(ifp);
+	IF_ADDR_WUNLOCK(ifp);
 }
 
 /*
@@ -3159,7 +3223,7 @@ if_delmulti_ifma(struct ifmultiaddr *ifma)
 	 * If and only if the ifnet instance exists: Acquire the address lock.
 	 */
 	if (ifp != NULL)
-		IF_ADDR_LOCK(ifp);
+		IF_ADDR_WLOCK(ifp);
 
 	lastref = if_delmulti_locked(ifp, ifma, 0);
 
@@ -3169,7 +3233,7 @@ if_delmulti_ifma(struct ifmultiaddr *ifma)
 		 *  Release the address lock.
 		 *  If the group was left: update the hardware hash filter.
 		 */
-		IF_ADDR_UNLOCK(ifp);
+		IF_ADDR_WUNLOCK(ifp);
 		if (lastref && ifp->if_ioctl != NULL) {
 			(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
 		}
@@ -3191,7 +3255,7 @@ if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching)
 	if (ifp != NULL && ifma->ifma_ifp != NULL) {
 		KASSERT(ifma->ifma_ifp == ifp,
 		    ("%s: inconsistent ifp %p", __func__, ifp));
-		IF_ADDR_LOCK_ASSERT(ifp);
+		IF_ADDR_WLOCK_ASSERT(ifp);
 	}
 
 	ifp = ifma->ifma_ifp;
@@ -3264,14 +3328,14 @@ if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
 	struct ifaddr *ifa;
 	struct ifreq ifr;
 
-	IF_ADDR_LOCK(ifp);
+	IF_ADDR_RLOCK(ifp);
 	ifa = ifp->if_addr;
 	if (ifa == NULL) {
-		IF_ADDR_UNLOCK(ifp);
+		IF_ADDR_RUNLOCK(ifp);
 		return (EINVAL);
 	}
 	ifa_ref(ifa);
-	IF_ADDR_UNLOCK(ifp);
+	IF_ADDR_RUNLOCK(ifp);
 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 	if (sdl == NULL) {
 		ifa_free(ifa);
diff --git a/freebsd/sys/net/if.h b/freebsd/sys/net/if.h
index 1a6423f6..25d43ac3 100644
--- a/freebsd/sys/net/if.h
+++ b/freebsd/sys/net/if.h
@@ -145,7 +145,7 @@ struct if_data {
 #define	IFF_LINK2	0x4000		/* per link layer defined bit */
 #define	IFF_ALTPHYS	IFF_LINK2	/* use alternate physical connection */
 #define	IFF_MULTICAST	0x8000		/* (i) supports multicast */
-/*			0x10000		*/
+#define	IFF_CANTCONFIG	0x10000		/* (i) unconfigurable using ioctl(2) */
 #define	IFF_PPROMISC	0x20000		/* (n) user-requested promisc mode */
 #define	IFF_MONITOR	0x40000		/* (n) user-requested monitor mode */
 #define	IFF_STATICARP	0x80000		/* (n) static ARP */
@@ -165,7 +165,7 @@ struct if_data {
 #define	IFF_CANTCHANGE \
 	(IFF_BROADCAST|IFF_POINTOPOINT|IFF_DRV_RUNNING|IFF_DRV_OACTIVE|\
 	    IFF_SIMPLEX|IFF_MULTICAST|IFF_ALLMULTI|IFF_SMART|IFF_PROMISC|\
-	    IFF_DYING)
+	    IFF_DYING|IFF_CANTCONFIG)
 
 /*
  * Values for if_link_state.
@@ -220,6 +220,7 @@ struct if_data {
 #define	IFCAP_POLLING_NOCOUNT	0x20000 /* polling ticks cannot be fragmented */
 #define	IFCAP_VLAN_HWTSO	0x40000 /* can do IFCAP_TSO on VLANs */
 #define	IFCAP_LINKSTATE		0x80000 /* the runtime link state is dynamic */
+#define	IFCAP_NETMAP		0x100000 /* netmap mode supported/enabled */
 
 #define IFCAP_HWCSUM	(IFCAP_RXCSUM | IFCAP_TXCSUM)
 #define	IFCAP_TSO	(IFCAP_TSO4 | IFCAP_TSO6)
@@ -232,6 +233,7 @@ struct if_data {
 /*
  * Message format for use in obtaining information about interfaces
  * from getkerninfo and the routing socket
+ * For the new, extensible interface see struct if_msghdrl below.
  */
 struct if_msghdr {
 	u_short	ifm_msglen;	/* to skip over non-understood messages */
@@ -244,8 +246,34 @@ struct if_msghdr {
 };
 
 /*
+ * The 'l' version shall be used by new interfaces, like NET_RT_IFLISTL.  It is
+ * extensible after ifm_data_off or within ifm_data.  Both the if_msghdr and
+ * if_data now have a member field detailing the struct length in addition to
+ * the routing message length.  Macros are provided to find the start of
+ * ifm_data and the start of the socket address strucutres immediately following
+ * struct if_msghdrl given a pointer to struct if_msghdrl.
+ */
+#define	IF_MSGHDRL_IFM_DATA(_l) \
+    (struct if_data *)((char *)(_l) + (_l)->ifm_data_off)
+#define	IF_MSGHDRL_RTA(_l) \
+    (void *)((uintptr_t)(_l) + (_l)->ifm_len)
+struct if_msghdrl {
+	u_short	ifm_msglen;	/* to skip over non-understood messages */
+	u_char	ifm_version;	/* future binary compatibility */
+	u_char	ifm_type;	/* message type */
+	int	ifm_addrs;	/* like rtm_addrs */
+	int	ifm_flags;	/* value of if_flags */
+	u_short	ifm_index;	/* index for associated ifp */
+	u_short _ifm_spare1;	/* spare space to grow if_index, see if_var.h */
+	u_short	ifm_len;	/* length of if_msghdrl incl. if_data */
+	u_short	ifm_data_off;	/* offset of if_data from beginning */
+	struct	if_data ifm_data;/* statistics and other data about if */
+};
+
+/*
  * Message format for use in obtaining information about interface addresses
  * from getkerninfo and the routing socket
+ * For the new, extensible interface see struct ifa_msghdrl below.
  */
 struct ifa_msghdr {
 	u_short	ifam_msglen;	/* to skip over non-understood messages */
@@ -258,6 +286,33 @@ struct ifa_msghdr {
 };
 
 /*
+ * The 'l' version shall be used by new interfaces, like NET_RT_IFLISTL.  It is
+ * extensible after ifam_metric or within ifam_data.  Both the ifa_msghdrl and
+ * if_data now have a member field detailing the struct length in addition to
+ * the routing message length.  Macros are provided to find the start of
+ * ifm_data and the start of the socket address strucutres immediately following
+ * struct ifa_msghdrl given a pointer to struct ifa_msghdrl.
+ */
+#define	IFA_MSGHDRL_IFAM_DATA(_l) \
+    (struct if_data *)((char *)(_l) + (_l)->ifam_data_off)
+#define	IFA_MSGHDRL_RTA(_l) \
+    (void *)((uintptr_t)(_l) + (_l)->ifam_len)
+struct ifa_msghdrl {
+	u_short	ifam_msglen;	/* to skip over non-understood messages */
+	u_char	ifam_version;	/* future binary compatibility */
+	u_char	ifam_type;	/* message type */
+	int	ifam_addrs;	/* like rtm_addrs */
+	int	ifam_flags;	/* value of ifa_flags */
+	u_short	ifam_index;	/* index for associated ifp */
+	u_short _ifam_spare1;	/* spare space to grow if_index, see if_var.h */
+	u_short	ifam_len;	/* length of ifa_msghdrl incl. if_data */
+	u_short	ifam_data_off;	/* offset of if_data from beginning */
+	int	ifam_metric;	/* value of ifa_metric */
+	struct	if_data ifam_data;/* statistics and other data about if or
+				 * address */
+};
+
+/*
  * Message format for use in obtaining information about multicast addresses
  * from the routing socket
  */
@@ -315,6 +370,7 @@ struct	ifreq {
 		int	ifru_media;
 		caddr_t	ifru_data;
 		int	ifru_cap[2];
+		u_int	ifru_fib;
 	} ifr_ifru;
 #define	ifr_addr	ifr_ifru.ifru_addr	/* address */
 #define	ifr_dstaddr	ifr_ifru.ifru_dstaddr	/* other end of p-to-p link */
@@ -331,6 +387,7 @@ struct	ifreq {
 #define	ifr_reqcap	ifr_ifru.ifru_cap[0]	/* requested capabilities */
 #define	ifr_curcap	ifr_ifru.ifru_cap[1]	/* current capabilities */
 #define	ifr_index	ifr_ifru.ifru_index	/* interface index */
+#define	ifr_fib		ifr_ifru.ifru_fib	/* interface fib */
 };
 
 #define	_SIZEOF_ADDR_IFREQ(ifr) \
diff --git a/freebsd/sys/net/if_arcsubr.c b/freebsd/sys/net/if_arcsubr.c
index dc75b445..e9422068 100644
--- a/freebsd/sys/net/if_arcsubr.c
+++ b/freebsd/sys/net/if_arcsubr.c
@@ -610,6 +610,7 @@ arc_input(struct ifnet *ifp, struct mbuf *m)
 		m_freem(m);
 		return;
 	}
+	M_SETFIB(m, ifp->if_fib);
 	netisr_dispatch(isr, m);
 }
 
diff --git a/freebsd/sys/net/if_atmsubr.c b/freebsd/sys/net/if_atmsubr.c
index 747bc936..e3ce4ea0 100644
--- a/freebsd/sys/net/if_atmsubr.c
+++ b/freebsd/sys/net/if_atmsubr.c
@@ -334,6 +334,7 @@ atm_input(struct ifnet *ifp, struct atm_pseudohdr *ah, struct mbuf *m,
 			return;
 		}
 	}
+	M_SETFIB(m, ifp->if_fib);
 	netisr_dispatch(isr, m);
 }
 
diff --git a/freebsd/sys/net/if_bridge.c b/freebsd/sys/net/if_bridge.c
index 5c15a78f..52146381 100644
--- a/freebsd/sys/net/if_bridge.c
+++ b/freebsd/sys/net/if_bridge.c
@@ -87,6 +87,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/malloc.h>
 #include <sys/protosw.h>
 #include <sys/systm.h>
+#include <sys/jail.h>
 #include <rtems/bsd/sys/time.h>
 #include <sys/socket.h> /* for net/if.h */
 #include <sys/sockio.h>
@@ -145,10 +146,10 @@ __FBSDID("$FreeBSD$");
 #define	BRIDGE_RTHASH_MASK		(BRIDGE_RTHASH_SIZE - 1)
 
 /*
- * Maximum number of addresses to cache.
+ * Default maximum number of addresses to cache.
  */
 #ifndef BRIDGE_RTABLE_MAX
-#define	BRIDGE_RTABLE_MAX		100
+#define	BRIDGE_RTABLE_MAX		2000
 #endif
 
 /*
@@ -334,6 +335,10 @@ static int	bridge_ip6_checkbasic(struct mbuf **mp);
 #endif /* INET6 */
 static int	bridge_fragment(struct ifnet *, struct mbuf *,
 		    struct ether_header *, int, struct llc *);
+static void	bridge_linkstate(struct ifnet *ifp);
+static void	bridge_linkcheck(struct bridge_softc *sc);
+
+extern void (*bridge_linkstate_p)(struct ifnet *ifp);
 
 /* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */
 #define	VLANTAGOF(_m)	\
@@ -356,19 +361,26 @@ static int pfil_local_phys = 0; /* run pfil hooks on the physical interface for
                                    locally destined packets */
 static int log_stp   = 0;   /* log STP state changes */
 static int bridge_inherit_mac = 0;   /* share MAC with first bridge member */
+TUNABLE_INT("net.link.bridge.pfil_onlyip", &pfil_onlyip);
 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip, CTLFLAG_RW,
     &pfil_onlyip, 0, "Only pass IP packets when pfil is enabled");
+TUNABLE_INT("net.link.bridge.ipfw_arp", &pfil_ipfw_arp);
 SYSCTL_INT(_net_link_bridge, OID_AUTO, ipfw_arp, CTLFLAG_RW,
     &pfil_ipfw_arp, 0, "Filter ARP packets through IPFW layer2");
+TUNABLE_INT("net.link.bridge.pfil_bridge", &pfil_bridge);
 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge, CTLFLAG_RW,
     &pfil_bridge, 0, "Packet filter on the bridge interface");
+TUNABLE_INT("net.link.bridge.pfil_member", &pfil_member);
 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member, CTLFLAG_RW,
     &pfil_member, 0, "Packet filter on the member interface");
+TUNABLE_INT("net.link.bridge.pfil_local_phys", &pfil_local_phys);
 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_local_phys, CTLFLAG_RW,
     &pfil_local_phys, 0,
     "Packet filter on the physical interface for locally destined packets");
+TUNABLE_INT("net.link.bridge.log_stp", &log_stp);
 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW,
     &log_stp, 0, "Log STP state changes");
+TUNABLE_INT("net.link.bridge.inherit_mac", &bridge_inherit_mac);
 SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac, CTLFLAG_RW,
     &bridge_inherit_mac, 0,
     "Inherit MAC address from the first bridge member");
@@ -490,6 +502,7 @@ bridge_modevent(module_t mod, int type, void *data)
 		bridge_input_p = bridge_input;
 		bridge_output_p = bridge_output;
 		bridge_dn_p = bridge_dummynet;
+		bridge_linkstate_p = bridge_linkstate;
 		bridge_detach_cookie = EVENTHANDLER_REGISTER(
 		    ifnet_departure_event, bridge_ifdetach, NULL,
 		    EVENTHANDLER_PRI_ANY);
@@ -502,6 +515,7 @@ bridge_modevent(module_t mod, int type, void *data)
 		bridge_input_p = NULL;
 		bridge_output_p = NULL;
 		bridge_dn_p = NULL;
+		bridge_linkstate_p = NULL;
 		mtx_destroy(&bridge_list_mtx);
 		break;
 	default:
@@ -562,7 +576,8 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
 	struct bridge_softc *sc, *sc2;
 	struct ifnet *bifp, *ifp;
-	int retry;
+	int fb, retry;
+	unsigned long hostid;
 
 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
 	ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
@@ -595,17 +610,30 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 	IFQ_SET_READY(&ifp->if_snd);
 
 	/*
-	 * Generate a random ethernet address with a locally administered
-	 * address.
+	 * Generate an ethernet address with a locally administered address.
 	 *
 	 * Since we are using random ethernet addresses for the bridge, it is
 	 * possible that we might have address collisions, so make sure that
 	 * this hardware address isn't already in use on another bridge.
+	 * The first try uses the hostid and falls back to arc4rand().
 	 */
+	fb = 0;
+	getcredhostid(curthread->td_ucred, &hostid);
 	for (retry = 1; retry != 0;) {
-		arc4rand(sc->sc_defaddr, ETHER_ADDR_LEN, 1);
-		sc->sc_defaddr[0] &= ~1;	/* clear multicast bit */
-		sc->sc_defaddr[0] |= 2;		/* set the LAA bit */
+		if (fb || hostid == 0) {
+			arc4rand(sc->sc_defaddr, ETHER_ADDR_LEN, 1);
+			sc->sc_defaddr[0] &= ~1;/* clear multicast bit */
+			sc->sc_defaddr[0] |= 2;	/* set the LAA bit */
+		} else {
+			sc->sc_defaddr[0] = 0x2;
+			sc->sc_defaddr[1] = (hostid >> 24) & 0xff;
+			sc->sc_defaddr[2] = (hostid >> 16) & 0xff;
+			sc->sc_defaddr[3] = (hostid >> 8 ) & 0xff;
+			sc->sc_defaddr[4] =  hostid        & 0xff;
+			sc->sc_defaddr[5] = ifp->if_dunit & 0xff;
+		}
+
+		fb = 1;
 		retry = 0;
 		mtx_lock(&bridge_list_mtx);
 		LIST_FOREACH(sc2, &bridge_list, sc_list) {
@@ -939,6 +967,7 @@ bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
 		EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
 	}
 
+	bridge_linkcheck(sc);
 	bridge_mutecaps(sc);	/* recalcuate now this interface is removed */
 	bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
 	KASSERT(bif->bif_addrcnt == 0,
@@ -1066,17 +1095,16 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
 
 	/* Set interface capabilities to the intersection set of all members */
 	bridge_mutecaps(sc);
+	bridge_linkcheck(sc);
 
+	/* Place the interface into promiscuous mode */
 	switch (ifs->if_type) {
-	case IFT_ETHER:
-	case IFT_L2VLAN:
-		/*
-		 * Place the interface into promiscuous mode.
-		 */
-		BRIDGE_UNLOCK(sc);
-		error = ifpromisc(ifs, 1);
-		BRIDGE_LOCK(sc);
-		break;
+		case IFT_ETHER:
+		case IFT_L2VLAN:
+			BRIDGE_UNLOCK(sc);
+			error = ifpromisc(ifs, 1);
+			BRIDGE_LOCK(sc);
+			break;
 	}
 	if (error)
 		bridge_delete_member(sc, bif, 0);
@@ -2195,11 +2223,9 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
 		/* Tap off 802.1D packets; they do not get forwarded. */
 		if (memcmp(eh->ether_dhost, bstp_etheraddr,
 		    ETHER_ADDR_LEN) == 0) {
-			m = bstp_input(&bif->bif_stp, ifp, m);
-			if (m == NULL) {
-				BRIDGE_UNLOCK(sc);
-				return (NULL);
-			}
+			bstp_input(&bif->bif_stp, ifp, m); /* consumes mbuf */
+			BRIDGE_UNLOCK(sc);
+			return (NULL);
 		}
 
 		if ((bif->bif_flags & IFBIF_STP) &&
@@ -3456,3 +3482,46 @@ out:
 		m_freem(m);
 	return (error);
 }
+
+static void
+bridge_linkstate(struct ifnet *ifp)
+{
+	struct bridge_softc *sc = ifp->if_bridge;
+	struct bridge_iflist *bif;
+
+	BRIDGE_LOCK(sc);
+	bif = bridge_lookup_member_if(sc, ifp);
+	if (bif == NULL) {
+		BRIDGE_UNLOCK(sc);
+		return;
+	}
+	bridge_linkcheck(sc);
+	BRIDGE_UNLOCK(sc);
+
+	bstp_linkstate(&bif->bif_stp);
+}
+
+static void
+bridge_linkcheck(struct bridge_softc *sc)
+{
+	struct bridge_iflist *bif;
+	int new_link, hasls;
+
+	BRIDGE_LOCK_ASSERT(sc);
+	new_link = LINK_STATE_DOWN;
+	hasls = 0;
+	/* Our link is considered up if at least one of our ports is active */
+	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+		if (bif->bif_ifp->if_capabilities & IFCAP_LINKSTATE)
+			hasls++;
+		if (bif->bif_ifp->if_link_state == LINK_STATE_UP) {
+			new_link = LINK_STATE_UP;
+			break;
+		}
+	}
+	if (!LIST_EMPTY(&sc->sc_iflist) && !hasls) {
+		/* If no interfaces support link-state then we default to up */
+		new_link = LINK_STATE_UP;
+	}
+	if_link_state_change(sc->sc_ifp, new_link);
+}
diff --git a/freebsd/sys/net/if_epair.c b/freebsd/sys/net/if_epair.c
index cd7a6c79..fafc0259 100644
--- a/freebsd/sys/net/if_epair.c
+++ b/freebsd/sys/net/if_epair.c
@@ -68,6 +68,7 @@ __FBSDID("$FreeBSD$");
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_clone.h>
+#include <net/if_media.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
@@ -94,6 +95,8 @@ static struct mbuf *epair_nh_m2cpuid(struct mbuf *, uintptr_t, u_int *);
 static void epair_nh_drainedcpu(u_int);
 
 static void epair_start_locked(struct ifnet *);
+static int epair_media_change(struct ifnet *);
+static void epair_media_status(struct ifnet *, struct ifmediareq *);
 
 static int epair_clone_match(struct if_clone *, const char *);
 static int epair_clone_create(struct if_clone *, char *, size_t, caddr_t);
@@ -129,6 +132,7 @@ SYSCTL_PROC(_net_link_epair, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW,
 struct epair_softc {
 	struct ifnet	*ifp;		/* This ifp. */
 	struct ifnet	*oifp;		/* other ifp of pair. */
+	struct ifmedia	media;		/* Media config (fake). */
 	u_int		refcount;	/* # of mbufs in flight. */
 	u_int		cpuid;		/* CPU ID assigned upon creation. */
 	void		(*if_qflush)(struct ifnet *);
@@ -191,10 +195,7 @@ epair_dpcpu_init(void)
 	struct eid_list *s;
 	u_int cpuid;
 
-	for (cpuid = 0; cpuid <= mp_maxid; cpuid++) {
-		if (CPU_ABSENT(cpuid))
-			continue;
-
+	CPU_FOREACH(cpuid) {
 		epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu);
 
 		/* Initialize per-cpu lock. */
@@ -219,10 +220,7 @@ epair_dpcpu_detach(void)
 	struct epair_dpcpu *epair_dpcpu;
 	u_int cpuid;
 
-	for (cpuid = 0; cpuid <= mp_maxid; cpuid++) {
-		if (CPU_ABSENT(cpuid))
-			continue;
-
+	CPU_FOREACH(cpuid) {
 		epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu);
 
 		/* Destroy per-cpu lock. */
@@ -332,10 +330,7 @@ epair_remove_ifp_from_draining(struct ifnet *ifp)
 	struct epair_ifp_drain *elm, *tvar;
 	u_int cpuid;
 
-	for (cpuid = 0; cpuid <= mp_maxid; cpuid++) {
-		if (CPU_ABSENT(cpuid))
-			continue;
-
+	CPU_FOREACH(cpuid) {
 		epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu);
 		EPAIR_LOCK(epair_dpcpu);
 		STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list,
@@ -622,8 +617,25 @@ epair_qflush(struct ifnet *ifp)
 }
 
 static int
+epair_media_change(struct ifnet *ifp __unused)
+{
+
+	/* Do nothing. */
+	return (0);
+}
+
+static void
+epair_media_status(struct ifnet *ifp __unused, struct ifmediareq *imr)
+{
+
+	imr->ifm_status = IFM_AVALID | IFM_ACTIVE;
+	imr->ifm_active = IFM_ETHER | IFM_10G_T | IFM_FDX;
+}
+
+static int
 epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
+	struct epair_softc *sc;
 	struct ifreq *ifr;
 	int error;
 
@@ -635,6 +647,12 @@ epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 		error = 0;
 		break;
 
+	case SIOCSIFMEDIA:
+	case SIOCGIFMEDIA:
+		sc = ifp->if_softc;
+		error = ifmedia_ioctl(ifp, ifr, &sc->media, cmd);
+		break;
+
 	case SIOCSIFMTU:
 		/* We basically allow all kinds of MTUs. */
 		ifp->if_mtu = ifr->ifr_mtu;
@@ -794,6 +812,8 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 	ifp->if_dname = ifc->ifc_name;
 	ifp->if_dunit = unit;
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+	ifp->if_capabilities = IFCAP_VLAN_MTU;
+	ifp->if_capenable = IFCAP_VLAN_MTU;
 	ifp->if_start = epair_start;
 	ifp->if_ioctl = epair_ioctl;
 	ifp->if_init  = epair_init;
@@ -818,6 +838,8 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 	ifp->if_dname = ifc->ifc_name;
 	ifp->if_dunit = unit;
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+	ifp->if_capabilities = IFCAP_VLAN_MTU;
+	ifp->if_capenable = IFCAP_VLAN_MTU;
 	ifp->if_start = epair_start;
 	ifp->if_ioctl = epair_ioctl;
 	ifp->if_init  = epair_init;
@@ -840,6 +862,14 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 	strlcpy(name, sca->ifp->if_xname, len);
 	DPRINTF("name='%s/%db' created sca=%p scb=%p\n", name, unit, sca, scb);
 
+	/* Initialise pseudo media types. */
+	ifmedia_init(&sca->media, 0, epair_media_change, epair_media_status);
+	ifmedia_add(&sca->media, IFM_ETHER | IFM_10G_T, 0, NULL);
+	ifmedia_set(&sca->media, IFM_ETHER | IFM_10G_T);
+	ifmedia_init(&scb->media, 0, epair_media_change, epair_media_status);
+	ifmedia_add(&scb->media, IFM_ETHER | IFM_10G_T, 0, NULL);
+	ifmedia_set(&scb->media, IFM_ETHER | IFM_10G_T);
+
 	/* Tell the world, that we are ready to rock. */
 	sca->ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	scb->ifp->if_drv_flags |= IFF_DRV_RUNNING;
@@ -876,37 +906,41 @@ epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
 	if_link_state_change(oifp, LINK_STATE_DOWN);
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 	oifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+
+	/*
+	 * Get rid of our second half. As the other of the two
+	 * interfaces may reside in a different vnet, we need to
+	 * switch before freeing them.
+	 */
+	CURVNET_SET_QUIET(oifp->if_vnet);
 	ether_ifdetach(oifp);
-	ether_ifdetach(ifp);
 	/*
 	 * Wait for all packets to be dispatched to if_input.
-	 * The numbers can only go down as the interfaces are
+	 * The numbers can only go down as the interface is
 	 * detached so there is no need to use atomics.
 	 */
-	DPRINTF("sca refcnt=%u scb refcnt=%u\n", sca->refcount, scb->refcount);
-	EPAIR_REFCOUNT_ASSERT(sca->refcount == 1 && scb->refcount == 1,
-	    ("%s: ifp=%p sca->refcount!=1: %d || ifp=%p scb->refcount!=1: %d",
-	    __func__, ifp, sca->refcount, oifp, scb->refcount));
-
-	/*
-	 * Get rid of our second half.
-	 */
+	DPRINTF("scb refcnt=%u\n", scb->refcount);
+	EPAIR_REFCOUNT_ASSERT(scb->refcount == 1,
+	    ("%s: ifp=%p scb->refcount!=1: %d", __func__, oifp, scb->refcount));
 	oifp->if_softc = NULL;
 	error = if_clone_destroyif(ifc, oifp);
 	if (error)
 		panic("%s: if_clone_destroyif() for our 2nd iface failed: %d",
 		    __func__, error);
+	if_free(oifp);
+	ifmedia_removeall(&scb->media);
+	free(scb, M_EPAIR);
+	CURVNET_RESTORE();
 
+	ether_ifdetach(ifp);
 	/*
-	 * Finish cleaning up. Free them and release the unit.
-	 * As the other of the two interfaces my reside in a different vnet,
-	 * we need to switch before freeing them.
+	 * Wait for all packets to be dispatched to if_input.
 	 */
-	CURVNET_SET_QUIET(oifp->if_vnet);
-	if_free(oifp);
-	CURVNET_RESTORE();
+	DPRINTF("sca refcnt=%u\n", sca->refcount);
+	EPAIR_REFCOUNT_ASSERT(sca->refcount == 1,
+	    ("%s: ifp=%p sca->refcount!=1: %d", __func__, ifp, sca->refcount));
 	if_free(ifp);
-	free(scb, M_EPAIR);
+	ifmedia_removeall(&sca->media);
 	free(sca, M_EPAIR);
 	ifc_free_unit(ifc, unit);
 
diff --git a/freebsd/sys/net/if_ethersubr.c b/freebsd/sys/net/if_ethersubr.c
index 02a5d002..b7c48731 100644
--- a/freebsd/sys/net/if_ethersubr.c
+++ b/freebsd/sys/net/if_ethersubr.c
@@ -662,8 +662,10 @@ ether_input(struct ifnet *ifp, struct mbuf *m)
 		m = (*lagg_input_p)(ifp, m);
 		if (m != NULL)
 			ifp = m->m_pkthdr.rcvif;
-		else 
+		else {
+			CURVNET_RESTORE();
 			return;
+		}
 	}
 
 	/*
@@ -682,6 +684,7 @@ ether_input(struct ifnet *ifp, struct mbuf *m)
 #endif
 			ifp->if_ierrors++;
 			m_freem(m);
+			CURVNET_RESTORE();
 			return;
 		}
 
@@ -694,6 +697,8 @@ ether_input(struct ifnet *ifp, struct mbuf *m)
 		m_adj(m, ETHER_VLAN_ENCAP_LEN);
 	}
 
+	M_SETFIB(m, ifp->if_fib);
+
 	/* Allow ng_ether(4) to claim this frame. */
 	if (IFP2AC(ifp)->ac_netgraph != NULL) {
 		KASSERT(ng_ether_input_p != NULL,
diff --git a/freebsd/sys/net/if_faith.c b/freebsd/sys/net/if_faith.c
index d99e16ea..58de362a 100644
--- a/freebsd/sys/net/if_faith.c
+++ b/freebsd/sys/net/if_faith.c
@@ -340,7 +340,7 @@ faithprefix(in6)
 	sin6.sin6_family = AF_INET6;
 	sin6.sin6_len = sizeof(struct sockaddr_in6);
 	sin6.sin6_addr = *in6;
-	rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL);
+	rt = in6_rtalloc1((struct sockaddr *)&sin6, 0, 0UL, RT_DEFAULT_FIB);
 	if (rt && rt->rt_ifp && rt->rt_ifp->if_type == IFT_FAITH &&
 	    (rt->rt_ifp->if_flags & IFF_UP) != 0)
 		ret = 1;
diff --git a/freebsd/sys/net/if_fddisubr.c b/freebsd/sys/net/if_fddisubr.c
index ba4db83f..154fe2fc 100644
--- a/freebsd/sys/net/if_fddisubr.c
+++ b/freebsd/sys/net/if_fddisubr.c
@@ -552,6 +552,7 @@ fddi_input(ifp, m)
 		ifp->if_noproto++;
 		goto dropanyway;
 	}
+	M_SETFIB(m, ifp->if_fib);
 	netisr_dispatch(isr, m);
 	return;
 
diff --git a/freebsd/sys/net/if_fwsubr.c b/freebsd/sys/net/if_fwsubr.c
index a9931419..df90d48d 100644
--- a/freebsd/sys/net/if_fwsubr.c
+++ b/freebsd/sys/net/if_fwsubr.c
@@ -629,6 +629,7 @@ firewire_input(struct ifnet *ifp, struct mbuf *m, uint16_t src)
 		return;
 	}
 
+	M_SETFIB(m, ifp->if_fib);
 	netisr_dispatch(isr, m);
 }
 
diff --git a/freebsd/sys/net/if_gif.c b/freebsd/sys/net/if_gif.c
index d9144419..1a8e4c8d 100644
--- a/freebsd/sys/net/if_gif.c
+++ b/freebsd/sys/net/if_gif.c
@@ -37,6 +37,7 @@
 
 #include <rtems/bsd/sys/param.h>
 #include <sys/systm.h>
+#include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
@@ -493,7 +494,7 @@ gif_input(m, af, ifp)
 	struct ifnet *ifp;
 {
 	int isr, n;
-	struct gif_softc *sc = ifp->if_softc;
+	struct gif_softc *sc;
 	struct etherip_header *eip;
 	struct ether_header *eh;
 	struct ifnet *oldifp;
@@ -503,7 +504,7 @@ gif_input(m, af, ifp)
 		m_freem(m);
 		return;
 	}
-
+	sc = ifp->if_softc;
 	m->m_pkthdr.rcvif = ifp;
 
 #ifdef MAC
@@ -614,6 +615,7 @@ gif_input(m, af, ifp)
 
 	ifp->if_ipackets++;
 	ifp->if_ibytes += m->m_pkthdr.len;
+	M_SETFIB(m, ifp->if_fib);
 	netisr_dispatch(isr, m);
 }
 
@@ -823,6 +825,12 @@ gif_ioctl(ifp, cmd, data)
 		}
 		if (src->sa_len > size)
 			return EINVAL;
+		error = prison_if(curthread->td_ucred, src);
+		if (error != 0)
+			return (error);
+		error = prison_if(curthread->td_ucred, dst);
+		if (error != 0)
+			return (error);
 		bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
 #ifdef INET6
 		if (dst->sa_family == AF_INET6) {
diff --git a/freebsd/sys/net/if_gre.c b/freebsd/sys/net/if_gre.c
index a75e52a4..21f39eb2 100644
--- a/freebsd/sys/net/if_gre.c
+++ b/freebsd/sys/net/if_gre.c
@@ -55,7 +55,9 @@
 #include <rtems/bsd/local/opt_inet6.h>
 
 #include <rtems/bsd/sys/param.h>
+#include <sys/jail.h>
 #include <sys/kernel.h>
+#include <sys/libkern.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mbuf.h>
@@ -99,6 +101,14 @@
 
 #define GRENAME	"gre"
 
+#define	MTAG_COOKIE_GRE		1307983903
+#define	MTAG_GRE_NESTING	1
+struct mtag_gre_nesting {
+	uint16_t	count;
+	uint16_t	max;
+	struct ifnet	*ifp[];
+};
+
 /*
  * gre_mtx protects all global variables in if_gre.c.
  * XXX: gre_softc data not protected yet.
@@ -204,7 +214,6 @@ gre_clone_create(ifc, unit, params)
 	sc->g_proto = IPPROTO_GRE;
 	GRE2IFP(sc)->if_flags |= IFF_LINK0;
 	sc->encap = NULL;
-	sc->called = 0;
 #ifndef __rtems__
 	sc->gre_fibnum = curthread->td_proc->p_fibnum;
 #else /* __rtems__ */
@@ -252,23 +261,77 @@ gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
 	struct gre_softc *sc = ifp->if_softc;
 	struct greip *gh;
 	struct ip *ip;
+	struct m_tag *mtag;
+	struct mtag_gre_nesting *gt;
+	size_t len;
 	u_short gre_ip_id = 0;
 	uint8_t gre_ip_tos = 0;
 	u_int16_t etype = 0;
 	struct mobile_h mob_h;
 	u_int32_t af;
-	int extra = 0;
+	int extra = 0, max;
 
 	/*
-	 * gre may cause infinite recursion calls when misconfigured.
-	 * We'll prevent this by introducing upper limit.
+	 * gre may cause infinite recursion calls when misconfigured.  High
+	 * nesting level may cause stack exhaustion.  We'll prevent this by
+	 * detecting loops and by introducing upper limit.
 	 */
-	if (++(sc->called) > max_gre_nesting) {
-		printf("%s: gre_output: recursively called too many "
-		       "times(%d)\n", if_name(GRE2IFP(sc)), sc->called);
-		m_freem(m);
-		error = EIO;    /* is there better errno? */
-		goto end;
+	mtag = m_tag_locate(m, MTAG_COOKIE_GRE, MTAG_GRE_NESTING, NULL);
+	if (mtag != NULL) {
+		struct ifnet **ifp2;
+
+		gt = (struct mtag_gre_nesting *)(mtag + 1);
+		gt->count++;
+		if (gt->count > min(gt->max,max_gre_nesting)) {
+			printf("%s: hit maximum recursion limit %u on %s\n",
+				__func__, gt->count - 1, ifp->if_xname);
+			m_freem(m);
+			error = EIO;	/* is there better errno? */
+			goto end;
+		}
+
+		ifp2 = gt->ifp;
+		for (max = gt->count - 1; max > 0; max--) {
+			if (*ifp2 == ifp)
+				break;
+			ifp2++;
+		}
+		if (*ifp2 == ifp) {
+			printf("%s: detected loop with nexting %u on %s\n",
+				__func__, gt->count-1, ifp->if_xname);
+			m_freem(m);
+			error = EIO;	/* is there better errno? */
+			goto end;
+		}
+		*ifp2 = ifp;
+
+	} else {
+		/*
+		 * Given that people should NOT increase max_gre_nesting beyond
+		 * their real needs, we allocate once per packet rather than
+		 * allocating an mtag once per passing through gre.
+		 *
+		 * Note: the sysctl does not actually check for saneness, so we
+		 * limit the maximum numbers of possible recursions here.
+		 */
+		max = imin(max_gre_nesting, 256);
+		/* If someone sets the sysctl <= 0, we want at least 1. */
+		max = imax(max, 1);
+		len = sizeof(struct mtag_gre_nesting) +
+		    max * sizeof(struct ifnet *);
+		mtag = m_tag_alloc(MTAG_COOKIE_GRE, MTAG_GRE_NESTING, len,
+		    M_NOWAIT);
+		if (mtag == NULL) {
+			m_freem(m);
+			error = ENOMEM;
+			goto end;
+		}
+		gt = (struct mtag_gre_nesting *)(mtag + 1);
+		bzero(gt, len);
+		gt->count = 1;
+		gt->max = max;
+		*gt->ifp = ifp;
+		m_tag_prepend(m, mtag);
 	}
 
 	if (!((ifp->if_flags & IFF_UP) &&
@@ -456,7 +519,6 @@ gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
 	error = ip_output(m, NULL, &sc->route, IP_FORWARDING,
 	    (struct ip_moptions *)NULL, (struct inpcb *)NULL);
   end:
-	sc->called = 0;
 	if (error)
 		ifp->if_oerrors++;
 	return (error);
@@ -649,6 +711,9 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 		si.sin_len = sizeof(struct sockaddr_in);
 		si.sin_addr.s_addr = sc->g_src.s_addr;
 		sa = sintosa(&si);
+		error = prison_if(curthread->td_ucred, sa);
+		if (error != 0)
+			break;
 		ifr->ifr_addr = *sa;
 		break;
 	case GREGADDRD:
@@ -657,6 +722,9 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 		si.sin_len = sizeof(struct sockaddr_in);
 		si.sin_addr.s_addr = sc->g_dst.s_addr;
 		sa = sintosa(&si);
+		error = prison_if(curthread->td_ucred, sa);
+		if (error != 0)
+			break;
 		ifr->ifr_addr = *sa;
 		break;
 	case SIOCSIFPHYADDR:
@@ -720,8 +788,14 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 		si.sin_family = AF_INET;
 		si.sin_len = sizeof(struct sockaddr_in);
 		si.sin_addr.s_addr = sc->g_src.s_addr;
+		error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
+		if (error != 0)
+			break;
 		memcpy(&lifr->addr, &si, sizeof(si));
 		si.sin_addr.s_addr = sc->g_dst.s_addr;
+		error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
+		if (error != 0)
+			break;
 		memcpy(&lifr->dstaddr, &si, sizeof(si));
 		break;
 	case SIOCGIFPSRCADDR:
@@ -736,6 +810,9 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 		si.sin_family = AF_INET;
 		si.sin_len = sizeof(struct sockaddr_in);
 		si.sin_addr.s_addr = sc->g_src.s_addr;
+		error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
+		if (error != 0)
+			break;
 		bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr));
 		break;
 	case SIOCGIFPDSTADDR:
@@ -750,6 +827,9 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 		si.sin_family = AF_INET;
 		si.sin_len = sizeof(struct sockaddr_in);
 		si.sin_addr.s_addr = sc->g_dst.s_addr;
+		error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
+		if (error != 0)
+			break;
 		bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr));
 		break;
 	case GRESKEY:
diff --git a/freebsd/sys/net/if_gre.h b/freebsd/sys/net/if_gre.h
index 186d4cc6..13b882c8 100644
--- a/freebsd/sys/net/if_gre.h
+++ b/freebsd/sys/net/if_gre.h
@@ -68,8 +68,6 @@ struct gre_softc {
 
 	const struct encaptab *encap;	/* encapsulation cookie */
 
-	int called;		/* infinite recursion preventer */
-
 	uint32_t key;		/* key included in outgoing GRE packets */
 				/* zero means none */
 
diff --git a/freebsd/sys/net/if_iso88025subr.c b/freebsd/sys/net/if_iso88025subr.c
index 6a39956e..b52853a2 100644
--- a/freebsd/sys/net/if_iso88025subr.c
+++ b/freebsd/sys/net/if_iso88025subr.c
@@ -682,6 +682,7 @@ iso88025_input(ifp, m)
 		break;
 	}
 
+	M_SETFIB(m, ifp->if_fib);
 	netisr_dispatch(isr, m);
 	return;
 
diff --git a/freebsd/sys/net/if_lagg.c b/freebsd/sys/net/if_lagg.c
index a1c90cdf..5d5064a4 100644
--- a/freebsd/sys/net/if_lagg.c
+++ b/freebsd/sys/net/if_lagg.c
@@ -169,6 +169,11 @@ static int lagg_failover_rx_all = 0; /* Allow input on any failover links */
 SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW,
     &lagg_failover_rx_all, 0,
     "Accept input from any interface in a failover lagg");
+static int def_use_flowid = 1; /* Default value for using M_FLOWID */
+TUNABLE_INT("net.link.lagg.default_use_flowid", &def_use_flowid);
+SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RW,
+    &def_use_flowid, 0,
+    "Default setting for using flow id for load sharing");
 
 static int
 lagg_modevent(module_t mod, int type, void *data)
@@ -206,6 +211,7 @@ static moduledata_t lagg_mod = {
 };
 
 DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(if_lagg, 1);
 
 #if __FreeBSD_version >= 800000
 /*
@@ -258,6 +264,8 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 	struct ifnet *ifp;
 	int i, error = 0;
 	static const u_char eaddr[6];	/* 00:00:00:00:00:00 */
+	struct sysctl_oid *oid;
+	char num[14];			/* sufficient for 32 bits */
 
 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
 	ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
@@ -266,6 +274,17 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 		return (ENOSPC);
 	}
 
+	sysctl_ctx_init(&sc->ctx);
+	snprintf(num, sizeof(num), "%u", unit);
+	sc->use_flowid = def_use_flowid;
+	oid = SYSCTL_ADD_NODE(&sc->ctx, &SYSCTL_NODE_CHILDREN(_net_link, lagg),
+		OID_AUTO, num, CTLFLAG_RD, NULL, "");
+	SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
+		"use_flowid", CTLTYPE_INT|CTLFLAG_RW, &sc->use_flowid, sc->use_flowid,
+		"Use flow id for load sharing");
+	/* Hash all layers by default */
+	sc->sc_flags = LAGG_F_HASHL2|LAGG_F_HASHL3|LAGG_F_HASHL4;
+
 	sc->sc_proto = LAGG_PROTO_NONE;
 	for (i = 0; lagg_protos[i].ti_proto != LAGG_PROTO_NONE; i++) {
 		if (lagg_protos[i].ti_proto == LAGG_PROTO_DEFAULT) {
@@ -345,6 +364,7 @@ lagg_clone_destroy(struct ifnet *ifp)
 
 	LAGG_WUNLOCK(sc);
 
+	sysctl_ctx_free(&sc->ctx);
 	ifmedia_removeall(&sc->sc_media);
 	ether_ifdetach(ifp);
 	if_free_type(ifp, IFT_ETHER);
@@ -738,28 +758,18 @@ fallback:
 	return (EINVAL);
 }
 
+/*
+ * For direct output to child ports.
+ */
 static int
 lagg_port_output(struct ifnet *ifp, struct mbuf *m,
 	struct sockaddr *dst, struct route *ro)
 {
 	struct lagg_port *lp = ifp->if_lagg;
-	struct ether_header *eh;
-	short type = 0;
 
 	switch (dst->sa_family) {
 		case pseudo_AF_HDRCMPLT:
 		case AF_UNSPEC:
-			eh = (struct ether_header *)dst->sa_data;
-			type = eh->ether_type;
-			break;
-	}
-
-	/*
-	 * Only allow ethernet types required to initiate or maintain the link,
-	 * aggregated frames take a different path.
-	 */
-	switch (ntohs(type)) {
-		case ETHERTYPE_PAE:	/* EAPOL PAE/802.1x */
 			return ((*lp->lp_output)(ifp, m, dst, ro));
 	}
 
@@ -776,6 +786,9 @@ lagg_port_ifdetach(void *arg __unused, struct ifnet *ifp)
 
 	if ((lp = ifp->if_lagg) == NULL)
 		return;
+	/* If the ifnet is just being renamed, don't do anything. */
+	if (ifp->if_flags & IFF_RENAMING)
+		return;
 
 	sc = lp->lp_softc;
 
@@ -871,6 +884,7 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 	struct lagg_reqall *ra = (struct lagg_reqall *)data;
 	struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf;
+	struct lagg_reqflags *rf = (struct lagg_reqflags *)data;
 	struct ifreq *ifr = (struct ifreq *)data;
 	struct lagg_port *lp;
 	struct ifnet *tpif;
@@ -923,11 +937,11 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 			error = EPROTONOSUPPORT;
 			break;
 		}
+		LAGG_WLOCK(sc);
 		if (sc->sc_proto != LAGG_PROTO_NONE) {
-			LAGG_WLOCK(sc);
-			error = sc->sc_detach(sc);
-			/* Reset protocol and pointers */
+			/* Reset protocol first in case detach unlocks */
 			sc->sc_proto = LAGG_PROTO_NONE;
+			error = sc->sc_detach(sc);
 			sc->sc_detach = NULL;
 			sc->sc_start = NULL;
 			sc->sc_input = NULL;
@@ -939,10 +953,14 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 			sc->sc_lladdr = NULL;
 			sc->sc_req = NULL;
 			sc->sc_portreq = NULL;
-			LAGG_WUNLOCK(sc);
+		} else if (sc->sc_input != NULL) {
+			/* Still detaching */
+			error = EBUSY;
 		}
-		if (error != 0)
+		if (error != 0) {
+			LAGG_WUNLOCK(sc);
 			break;
+		}
 		for (int i = 0; i < (sizeof(lagg_protos) /
 		    sizeof(lagg_protos[0])); i++) {
 			if (lagg_protos[i].ti_proto == ra->ra_proto) {
@@ -950,7 +968,6 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 					printf("%s: using proto %u\n",
 					    sc->sc_ifname,
 					    lagg_protos[i].ti_proto);
-				LAGG_WLOCK(sc);
 				sc->sc_proto = lagg_protos[i].ti_proto;
 				if (sc->sc_proto != LAGG_PROTO_NONE)
 					error = lagg_protos[i].ti_attach(sc);
@@ -958,8 +975,25 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 				return (error);
 			}
 		}
+		LAGG_WUNLOCK(sc);
 		error = EPROTONOSUPPORT;
 		break;
+	case SIOCGLAGGFLAGS:
+		rf->rf_flags = sc->sc_flags;
+		break;
+	case SIOCSLAGGHASH:
+		error = priv_check(td, PRIV_NET_LAGG);
+		if (error)
+			break;
+		if ((rf->rf_flags & LAGG_F_HASHMASK) == 0) {
+			error = EINVAL;
+			break;
+		}
+		LAGG_WLOCK(sc);
+		sc->sc_flags &= ~LAGG_F_HASHMASK;
+		sc->sc_flags |= rf->rf_flags & LAGG_F_HASHMASK;
+		LAGG_WUNLOCK(sc);
+		break;
 	case SIOCGLAGGPORT:
 		if (rp->rp_portname[0] == '\0' ||
 		    (tpif = ifunit(rp->rp_portname)) == NULL) {
@@ -1215,14 +1249,15 @@ lagg_input(struct ifnet *ifp, struct mbuf *m)
 	struct lagg_softc *sc = lp->lp_softc;
 	struct ifnet *scifp = sc->sc_ifp;
 
+	LAGG_RLOCK(sc);
 	if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
 	    (lp->lp_flags & LAGG_PORT_DISABLED) ||
 	    sc->sc_proto == LAGG_PROTO_NONE) {
+		LAGG_RUNLOCK(sc);
 		m_freem(m);
 		return (NULL);
 	}
 
-	LAGG_RLOCK(sc);
 	ETHER_BPF_MTAP(scifp, m);
 
 	m = (*sc->sc_input)(sc, lp, m);
@@ -1388,42 +1423,55 @@ lagg_gethdr(struct mbuf *m, u_int off, u_int len, void *buf)
 }
 
 uint32_t
-lagg_hashmbuf(struct mbuf *m, uint32_t key)
+lagg_hashmbuf(struct lagg_softc *sc, struct mbuf *m, uint32_t key)
 {
 	uint16_t etype;
-	uint32_t p = 0;
+	uint32_t p = key;
 	int off;
 	struct ether_header *eh;
-	struct ether_vlan_header vlanbuf;
 	const struct ether_vlan_header *vlan;
 #ifdef INET
 	const struct ip *ip;
-	struct ip ipbuf;
+	const uint32_t *ports;
+	int iphlen;
 #endif
 #ifdef INET6
 	const struct ip6_hdr *ip6;
-	struct ip6_hdr ip6buf;
 	uint32_t flow;
 #endif
+	union {
+#ifdef INET
+		struct ip ip;
+#endif
+#ifdef INET6
+		struct ip6_hdr ip6;
+#endif
+		struct ether_vlan_header vlan;
+		uint32_t port;
+	} buf;
+
 
 	off = sizeof(*eh);
 	if (m->m_len < off)
 		goto out;
 	eh = mtod(m, struct ether_header *);
 	etype = ntohs(eh->ether_type);
-	p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, key);
-	p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
+	if (sc->sc_flags & LAGG_F_HASHL2) {
+		p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p);
+		p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
+	}
 
 	/* Special handling for encapsulating VLAN frames */
-	if (m->m_flags & M_VLANTAG) {
+	if ((m->m_flags & M_VLANTAG) && (sc->sc_flags & LAGG_F_HASHL2)) {
 		p = hash32_buf(&m->m_pkthdr.ether_vtag,
 		    sizeof(m->m_pkthdr.ether_vtag), p);
 	} else if (etype == ETHERTYPE_VLAN) {
-		vlan = lagg_gethdr(m, off,  sizeof(*vlan), &vlanbuf);
+		vlan = lagg_gethdr(m, off,  sizeof(*vlan), &buf);
 		if (vlan == NULL)
 			goto out;
 
-		p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
+		if (sc->sc_flags & LAGG_F_HASHL2)
+			p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
 		etype = ntohs(vlan->evl_proto);
 		off += sizeof(*vlan) - sizeof(*eh);
 	}
@@ -1431,17 +1479,37 @@ lagg_hashmbuf(struct mbuf *m, uint32_t key)
 	switch (etype) {
 #ifdef INET
 	case ETHERTYPE_IP:
-		ip = lagg_gethdr(m, off, sizeof(*ip), &ipbuf);
+		ip = lagg_gethdr(m, off, sizeof(*ip), &buf);
 		if (ip == NULL)
 			goto out;
 
-		p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p);
-		p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p);
+		if (sc->sc_flags & LAGG_F_HASHL3) {
+			p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p);
+			p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p);
+		}
+		if (!(sc->sc_flags & LAGG_F_HASHL4))
+			break;
+		switch (ip->ip_p) {
+			case IPPROTO_TCP:
+			case IPPROTO_UDP:
+			case IPPROTO_SCTP:
+				iphlen = ip->ip_hl << 2;
+				if (iphlen < sizeof(*ip))
+					break;
+				off += iphlen;
+				ports = lagg_gethdr(m, off, sizeof(*ports), &buf);
+				if (ports == NULL)
+					break;
+				p = hash32_buf(ports, sizeof(*ports), p);
+				break;
+		}
 		break;
 #endif
 #ifdef INET6
 	case ETHERTYPE_IPV6:
-		ip6 = lagg_gethdr(m, off, sizeof(*ip6), &ip6buf);
+		if (!(sc->sc_flags & LAGG_F_HASHL3))
+			break;
+		ip6 = lagg_gethdr(m, off, sizeof(*ip6), &buf);
 		if (ip6 == NULL)
 			goto out;
 
@@ -1668,10 +1736,10 @@ lagg_lb_start(struct lagg_softc *sc, struct mbuf *m)
 	struct lagg_port *lp = NULL;
 	uint32_t p = 0;
 
-	if (m->m_flags & M_FLOWID)
+	if (sc->use_flowid && (m->m_flags & M_FLOWID))
 		p = m->m_pkthdr.flowid;
 	else
-		p = lagg_hashmbuf(m, lb->lb_key);
+		p = lagg_hashmbuf(sc, m, lb->lb_key);
 	p %= sc->sc_count;
 	lp = lb->lb_ports[p];
 
@@ -1788,7 +1856,7 @@ lagg_lacp_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 	etype = ntohs(eh->ether_type);
 
 	/* Tap off LACP control messages */
-	if (etype == ETHERTYPE_SLOW) {
+	if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_SLOW) {
 		m = lacp_input(lp, m);
 		if (m == NULL)
 			return (NULL);
diff --git a/freebsd/sys/net/if_lagg.h b/freebsd/sys/net/if_lagg.h
index 0034c617..27ab46f2 100644
--- a/freebsd/sys/net/if_lagg.h
+++ b/freebsd/sys/net/if_lagg.h
@@ -21,6 +21,8 @@
 #ifndef _NET_LAGG_H
 #define _NET_LAGG_H
 
+#include <sys/sysctl.h>
+
 /*
  * Global definitions
  */
@@ -29,6 +31,12 @@
 #define	LAGG_MAX_NAMESIZE	32	/* name of a protocol */
 #define	LAGG_MAX_STACKING	4	/* maximum number of stacked laggs */
 
+/* Lagg flags */
+#define	LAGG_F_HASHL2		0x00000001	/* hash layer 2 */
+#define	LAGG_F_HASHL3		0x00000002	/* hash layer 3 */
+#define	LAGG_F_HASHL4		0x00000004	/* hash layer 4 */
+#define	LAGG_F_HASHMASK		0x00000007
+
 /* Port flags */
 #define	LAGG_PORT_SLAVE		0x00000000	/* normal enslaved port */
 #define	LAGG_PORT_MASTER	0x00000001	/* primary port */
@@ -120,6 +128,14 @@ struct lagg_reqall {
 #define	SIOCGLAGG		_IOWR('i', 143, struct lagg_reqall)
 #define	SIOCSLAGG		 _IOW('i', 144, struct lagg_reqall)
 
+struct lagg_reqflags {
+	char			rf_ifname[IFNAMSIZ];	/* name of the lagg */
+	uint32_t		rf_flags;		/* lagg protocol */
+};
+
+#define	SIOCGLAGGFLAGS		_IOWR('i', 145, struct lagg_reqflags)
+#define	SIOCSLAGGHASH		 _IOW('i', 146, struct lagg_reqflags)
+
 #ifdef _KERNEL
 /*
  * Internal kernel part
@@ -177,6 +193,7 @@ struct lagg_softc {
 	struct ifmedia			sc_media;	/* media config */
 	caddr_t				sc_psc;		/* protocol data */
 	uint32_t			sc_seq;		/* sequence counter */
+	uint32_t			sc_flags;
 
 	SLIST_HEAD(__tplhd, lagg_port)	sc_ports;	/* list of interfaces */
 	SLIST_ENTRY(lagg_softc)	sc_entries;
@@ -202,6 +219,8 @@ struct lagg_softc {
 	eventhandler_tag vlan_attach;
 	eventhandler_tag vlan_detach;
 #endif
+	struct sysctl_ctx_list		ctx;		/* sysctl variables */
+	int				use_flowid;	/* use M_FLOWID */
 };
 
 struct lagg_port {
@@ -240,7 +259,7 @@ extern struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *);
 extern void	(*lagg_linkstate_p)(struct ifnet *, int );
 
 int		lagg_enqueue(struct ifnet *, struct mbuf *);
-uint32_t	lagg_hashmbuf(struct mbuf *, uint32_t);
+uint32_t	lagg_hashmbuf(struct lagg_softc *, struct mbuf *, uint32_t);
 
 #endif /* _KERNEL */
 
diff --git a/freebsd/sys/net/if_llatbl.c b/freebsd/sys/net/if_llatbl.c
index 3ffcc21a..80888559 100644
--- a/freebsd/sys/net/if_llatbl.c
+++ b/freebsd/sys/net/if_llatbl.c
@@ -102,18 +102,35 @@ done:
  * This function is called by the timer functions
  * such as arptimer() and nd6_llinfo_timer(), and
  * the caller does the locking.
+ *
+ * Returns the number of held packets, if any, that were dropped.
  */
-void
+size_t
 llentry_free(struct llentry *lle)
 {
-	
+	size_t pkts_dropped;
+	struct mbuf *next;
+
+	pkts_dropped = 0;
 	LLE_WLOCK_ASSERT(lle);
 	LIST_REMOVE(lle, lle_next);
 
-	if (lle->la_hold != NULL)
+	while ((lle->la_numheld > 0) && (lle->la_hold != NULL)) {
+		next = lle->la_hold->m_nextpkt;
 		m_freem(lle->la_hold);
+		lle->la_hold = next;
+		lle->la_numheld--;
+		pkts_dropped++;
+	}
+
+	KASSERT(lle->la_numheld == 0, 
+		("%s: la_numheld %d > 0, pkts_droped %zd", __func__, 
+		 lle->la_numheld, pkts_dropped));
 
+	lle->la_flags &= ~LLE_VALID;
 	LLE_FREE_LOCKED(lle);
+
+	return (pkts_dropped);
 }
 
 /*
@@ -214,7 +231,8 @@ lltable_drain(int af)
 #endif
 
 void
-lltable_prefix_free(int af, struct sockaddr *prefix, struct sockaddr *mask)
+lltable_prefix_free(int af, struct sockaddr *prefix, struct sockaddr *mask,
+	    u_int flags)
 {
 	struct lltable *llt;
 
@@ -223,7 +241,7 @@ lltable_prefix_free(int af, struct sockaddr *prefix, struct sockaddr *mask)
 		if (llt->llt_af != af)
 			continue;
 
-		llt->llt_prefix_free(llt, prefix, mask);
+		llt->llt_prefix_free(llt, prefix, mask, flags);
 	}
 	LLTABLE_RUNLOCK();
 }
@@ -414,6 +432,7 @@ llatbl_lle_show(struct llentry_sa *la)
 	db_printf(" lle_tbl=%p\n", lle->lle_tbl);
 	db_printf(" lle_head=%p\n", lle->lle_head);
 	db_printf(" la_hold=%p\n", lle->la_hold);
+	db_printf(" la_numheld=%d\n", lle->la_numheld);
 	db_printf(" la_expire=%ju\n", (uintmax_t)lle->la_expire);
 	db_printf(" la_flags=0x%04x\n", lle->la_flags);
 	db_printf(" la_asked=%u\n", lle->la_asked);
diff --git a/freebsd/sys/net/if_llatbl.h b/freebsd/sys/net/if_llatbl.h
index a4d02ab0..8b15e5c8 100644
--- a/freebsd/sys/net/if_llatbl.h
+++ b/freebsd/sys/net/if_llatbl.h
@@ -58,6 +58,7 @@ struct llentry {
 	struct lltable		 *lle_tbl;
 	struct llentries	 *lle_head;
 	struct mbuf		 *la_hold;
+	int     		 la_numheld;  /* # of packets currently held */
 	time_t			 la_expire;
 	uint16_t		 la_flags;    
 	uint16_t		 la_asked;
@@ -115,19 +116,12 @@ struct llentry {
 		LLE_WUNLOCK(lle);				\
 	}							\
 	/* guard against invalid refs */			\
-	lle = 0;						\
+	lle = NULL;						\
 } while (0)
 
 #define	LLE_FREE(lle) do {					\
 	LLE_WLOCK(lle);						\
-	if ((lle)->lle_refcnt <= 1)				\
-		(lle)->lle_tbl->llt_free((lle)->lle_tbl, (lle));\
-	else {							\
-		(lle)->lle_refcnt--;				\
-		LLE_WUNLOCK(lle);				\
-	}							\
-	/* guard against invalid refs */			\
-	lle = NULL;						\
+	LLE_FREE_LOCKED(lle);					\
 } while (0)
 
 
@@ -152,15 +146,13 @@ struct lltable {
 	int			llt_af;
 	struct ifnet		*llt_ifp;
 
-	struct llentry *	(*llt_new)(const struct sockaddr *, u_int);
 	void			(*llt_free)(struct lltable *, struct llentry *);
 	void			(*llt_prefix_free)(struct lltable *,
 				    const struct sockaddr *prefix,
-				    const struct sockaddr *mask);
+				    const struct sockaddr *mask,
+				    u_int flags);
 	struct llentry *	(*llt_lookup)(struct lltable *, u_int flags,
 				    const struct sockaddr *l3addr);
-	int			(*llt_rtcheck)(struct ifnet *, u_int flags,
-				    const struct sockaddr *);
 	int			(*llt_dump)(struct lltable *,
 				     struct sysctl_req *);
 };
@@ -185,13 +177,13 @@ MALLOC_DECLARE(M_LLTABLE);
 struct lltable *lltable_init(struct ifnet *, int);
 void		lltable_free(struct lltable *);
 void		lltable_prefix_free(int, struct sockaddr *, 
-                       struct sockaddr *);
+                       struct sockaddr *, u_int);
 #if 0
 void		lltable_drain(int);
 #endif
 int		lltable_sysctl_dumparp(int, struct sysctl_req *);
 
-void		llentry_free(struct llentry *);
+size_t		llentry_free(struct llentry *);
 int		llentry_update(struct llentry **, struct lltable *,
                        struct sockaddr_storage *, struct ifnet *);
 
diff --git a/freebsd/sys/net/if_media.c b/freebsd/sys/net/if_media.c
index 46b57b42..3bc6122c 100644
--- a/freebsd/sys/net/if_media.c
+++ b/freebsd/sys/net/if_media.c
@@ -237,7 +237,7 @@ ifmedia_ioctl(ifp, ifr, ifm, cmd)
 		/*
 		 * If no change, we're done.
 		 * XXX Automedia may invole software intervention.
-		 *     Keep going in case the the connected media changed.
+		 *     Keep going in case the connected media changed.
 		 *     Similarly, if best match changed (kernel debugger?).
 		 */
 		if ((IFM_SUBTYPE(newmedia) != IFM_AUTO) &&
diff --git a/freebsd/sys/net/if_media.h b/freebsd/sys/net/if_media.h
index 337ad685..2c833228 100644
--- a/freebsd/sys/net/if_media.h
+++ b/freebsd/sys/net/if_media.h
@@ -36,7 +36,7 @@
  */
 
 #ifndef _NET_IF_MEDIA_H_
-#define _NET_IF_MEDIA_H_
+#define	_NET_IF_MEDIA_H_
 
 /*
  * Prototypes and definitions for BSD/OS-compatible network interface
@@ -144,13 +144,12 @@ uint64_t	ifmedia_baudrate(int);
 #define	IFM_10G_LR	18		/* 10GBase-LR 1310nm Single-mode */
 #define	IFM_10G_SR	19		/* 10GBase-SR 850nm Multi-mode */
 #define	IFM_10G_CX4	20		/* 10GBase CX4 copper */
-#define IFM_2500_SX	21		/* 2500BaseSX - multi-mode fiber */
-#define IFM_10G_TWINAX	22		/* 10GBase Twinax copper */
-#define IFM_10G_TWINAX_LONG	23	/* 10GBase Twinax Long copper */
-#define IFM_10G_LRM	24		/* 10GBase-LRM 850nm Multi-mode */
-#define IFM_UNKNOWN	25		/* media types not defined yet */
-#define IFM_10G_T	26		/* 10GBase-T - RJ45 */
-
+#define	IFM_2500_SX	21		/* 2500BaseSX - multi-mode fiber */
+#define	IFM_10G_TWINAX	22		/* 10GBase Twinax copper */
+#define	IFM_10G_TWINAX_LONG	23	/* 10GBase Twinax Long copper */
+#define	IFM_10G_LRM	24		/* 10GBase-LRM 850nm Multi-mode */
+#define	IFM_UNKNOWN	25		/* media types not defined yet */
+#define	IFM_10G_T	26		/* 10GBase-T - RJ45 */
 
 /* note 31 is the max! */
 
@@ -232,20 +231,20 @@ uint64_t	ifmedia_baudrate(int);
 /*
  * ATM
  */
-#define IFM_ATM	0x000000a0
-#define IFM_ATM_UNKNOWN		3
-#define IFM_ATM_UTP_25		4
-#define IFM_ATM_TAXI_100	5
-#define IFM_ATM_TAXI_140	6
-#define IFM_ATM_MM_155		7
-#define IFM_ATM_SM_155		8
-#define IFM_ATM_UTP_155		9
-#define IFM_ATM_MM_622		10
-#define IFM_ATM_SM_622		11
+#define	IFM_ATM	0x000000a0
+#define	IFM_ATM_UNKNOWN		3
+#define	IFM_ATM_UTP_25		4
+#define	IFM_ATM_TAXI_100	5
+#define	IFM_ATM_TAXI_140	6
+#define	IFM_ATM_MM_155		7
+#define	IFM_ATM_SM_155		8
+#define	IFM_ATM_UTP_155		9
+#define	IFM_ATM_MM_622		10
+#define	IFM_ATM_SM_622		11
 #define	IFM_ATM_VIRTUAL		12
-#define IFM_ATM_SDH		0x00000100	/* SDH instead of SONET */
-#define IFM_ATM_NOSCRAMB	0x00000200	/* no scrambling */
-#define IFM_ATM_UNASSIGNED	0x00000400	/* unassigned cells */
+#define	IFM_ATM_SDH		0x00000100	/* SDH instead of SONET */
+#define	IFM_ATM_NOSCRAMB	0x00000200	/* no scrambling */
+#define	IFM_ATM_UNASSIGNED	0x00000400	/* unassigned cells */
 
 /*
  * CARP Common Address Redundancy Protocol
@@ -295,22 +294,22 @@ uint64_t	ifmedia_baudrate(int);
 #define	IFM_STATUS_VALID	IFM_AVALID
 
 /* List of "status valid" bits, for ifconfig(8). */
-#define IFM_STATUS_VALID_LIST {						\
-        IFM_AVALID,							\
-        0								\
+#define	IFM_STATUS_VALID_LIST {						\
+	IFM_AVALID,							\
+	0								\
 }
 
 /*
  * Macros to extract various bits of information from the media word.
  */
-#define	IFM_TYPE(x)         ((x) & IFM_NMASK)
-#define	IFM_SUBTYPE(x)      ((x) & IFM_TMASK)
-#define	IFM_TYPE_OPTIONS(x) ((x) & IFM_OMASK)
-#define	IFM_INST(x)         (((x) & IFM_IMASK) >> IFM_ISHIFT)
-#define	IFM_OPTIONS(x)	((x) & (IFM_OMASK|IFM_GMASK))
-#define	IFM_MODE(x)	    ((x) & IFM_MMASK)
+#define	IFM_TYPE(x)		((x) & IFM_NMASK)
+#define	IFM_SUBTYPE(x)		((x) & IFM_TMASK)
+#define	IFM_TYPE_OPTIONS(x)	((x) & IFM_OMASK)
+#define	IFM_INST(x)		(((x) & IFM_IMASK) >> IFM_ISHIFT)
+#define	IFM_OPTIONS(x)		((x) & (IFM_OMASK | IFM_GMASK))
+#define	IFM_MODE(x)		((x) & IFM_MMASK)
 
-#define	IFM_INST_MAX	IFM_INST(IFM_IMASK)
+#define	IFM_INST_MAX		IFM_INST(IFM_IMASK)
 
 /*
  * Macro to create a media word.
@@ -371,6 +370,7 @@ struct ifmedia_description {
 }
 
 #define	IFM_SUBTYPE_ETHERNET_ALIASES {					\
+	{ IFM_10_T,	"10baseT" },					\
 	{ IFM_10_T,	"UTP" },					\
 	{ IFM_10_T,	"10UTP" },					\
 	{ IFM_10_2,	"BNC" },					\
@@ -390,6 +390,23 @@ struct ifmedia_description {
 	{ IFM_1000_T,	"1000TX" },					\
 	{ IFM_1000_T,	"1000T" },					\
 	{ IFM_2500_SX,	"2500SX" },					\
+									\
+	/*								\
+	 * Shorthands for common media+option combinations as announced	\
+	 * by miibus(4)							\
+	 */								\
+	{ IFM_10_T | IFM_FDX,			"10baseT-FDX" },	\
+	{ IFM_10_T | IFM_FDX | IFM_FLOW,	"10baseT-FDX-flow" },	\
+	{ IFM_100_TX | IFM_FDX,			"100baseTX-FDX" },	\
+	{ IFM_100_TX | IFM_FDX | IFM_FLOW,	"100baseTX-FDX-flow" },	\
+	{ IFM_1000_T | IFM_FDX,			"1000baseT-FDX" },	\
+	{ IFM_1000_T | IFM_FDX | IFM_FLOW,	"1000baseT-FDX-flow" },	\
+	{ IFM_1000_T | IFM_FDX | IFM_FLOW | IFM_ETH_MASTER,		\
+	    "1000baseT-FDX-flow-master" },				\
+	{ IFM_1000_T | IFM_FDX | IFM_ETH_MASTER,			\
+	    "1000baseT-FDX-master" },					\
+	{ IFM_1000_T | IFM_ETH_MASTER,		"1000baseT-master" },	\
+									\
 	{ 0, NULL },							\
 }
 
@@ -539,7 +556,7 @@ struct ifmedia_description {
 	{ 0, NULL },							\
 }
 
-# define IFM_SUBTYPE_ATM_DESCRIPTIONS {					\
+#define	IFM_SUBTYPE_ATM_DESCRIPTIONS {					\
 	{ IFM_ATM_UNKNOWN,	"Unknown" },				\
 	{ IFM_ATM_UTP_25,	"UTP/25.6MBit" },			\
 	{ IFM_ATM_TAXI_100,	"Taxi/100MBit" },			\
@@ -553,7 +570,7 @@ struct ifmedia_description {
 	{ 0, NULL },							\
 }
 
-# define IFM_SUBTYPE_ATM_ALIASES {					\
+#define	IFM_SUBTYPE_ATM_ALIASES {					\
 	{ IFM_ATM_UNKNOWN,	"UNKNOWN" },				\
 	{ IFM_ATM_UTP_25,	"UTP-25" },				\
 	{ IFM_ATM_TAXI_100,	"TAXI-100" },				\
@@ -574,7 +591,6 @@ struct ifmedia_description {
 	{ 0, NULL },							\
 }
 
-
 #define	IFM_SUBTYPE_SHARED_DESCRIPTIONS {				\
 	{ IFM_AUTO,	"autoselect" },					\
 	{ IFM_MANUAL,	"manual" },					\
@@ -584,6 +600,13 @@ struct ifmedia_description {
 
 #define	IFM_SUBTYPE_SHARED_ALIASES {					\
 	{ IFM_AUTO,	"auto" },					\
+									\
+	/*								\
+	 * Shorthands for common media+option combinations as announced	\
+	 * by miibus(4)							\
+	 */								\
+	{ IFM_AUTO | IFM_FLOW,	"auto-flow" },				\
+									\
 	{ 0, NULL },							\
 }
 
@@ -598,6 +621,15 @@ struct ifmedia_description {
 	{ 0, NULL },							\
 }
 
+#define	IFM_SHARED_OPTION_ALIASES {					\
+	{ IFM_FDX,	"fdx" },					\
+	{ IFM_HDX,	"hdx" },					\
+	{ IFM_FLOW,	"flow" },					\
+	{ IFM_LOOP,	"loop" },					\
+	{ IFM_LOOP,	"loopback" },					\
+	{ 0, NULL },							\
+}
+
 /*
  * Baudrate descriptions for the various media types.
  */
@@ -606,7 +638,7 @@ struct ifmedia_baudrate {
 	uint64_t	ifmb_baudrate;		/* corresponding baudrate */
 };
 
-#define IFM_BAUDRATE_DESCRIPTIONS {					\
+#define	IFM_BAUDRATE_DESCRIPTIONS {					\
 	{ IFM_ETHER | IFM_10_T,		IF_Mbps(10) },			\
 	{ IFM_ETHER | IFM_10_2,		IF_Mbps(10) },			\
 	{ IFM_ETHER | IFM_10_5,		IF_Mbps(10) },			\
@@ -670,10 +702,10 @@ struct ifmedia_status_description {
 	const char *ifms_string[2];
 };
 
-#define IFM_STATUS_DESC(ifms, bit)					\
+#define	IFM_STATUS_DESC(ifms, bit)					\
 	(ifms)->ifms_string[((ifms)->ifms_bit & (bit)) ? 1 : 0]
 
-#define IFM_STATUS_DESCRIPTIONS {					\
+#define	IFM_STATUS_DESCRIPTIONS {					\
 	{ IFM_ETHER,		IFM_AVALID,	IFM_ACTIVE,		\
 	    { "no carrier", "active" } },				\
 	{ IFM_FDDI,		IFM_AVALID,	IFM_ACTIVE,		\
diff --git a/freebsd/sys/net/if_spppfr.c b/freebsd/sys/net/if_spppfr.c
index be080a7d..f25bad7b 100644
--- a/freebsd/sys/net/if_spppfr.c
+++ b/freebsd/sys/net/if_spppfr.c
@@ -282,6 +282,8 @@ drop:		++ifp->if_ierrors;
 	if (! (ifp->if_flags & IFF_UP))
 		goto drop;
 
+	M_SETFIB(m, ifp->if_fib);
+
 	/* Check queue. */
 	if (netisr_queue(isr, m)) {	/* (0) on success. */
 		if (debug)
diff --git a/freebsd/sys/net/if_spppsubr.c b/freebsd/sys/net/if_spppsubr.c
index d5f3487a..01743f47 100644
--- a/freebsd/sys/net/if_spppsubr.c
+++ b/freebsd/sys/net/if_spppsubr.c
@@ -739,6 +739,7 @@ sppp_input(struct ifnet *ifp, struct mbuf *m)
 		goto drop;
 
 	SPPP_UNLOCK(sp);
+	M_SETFIB(m, ifp->if_fib);
 	/* Check queue. */
 	if (netisr_queue(isr, m)) {	/* (0) on success. */
 		if (debug)
diff --git a/freebsd/sys/net/if_stf.c b/freebsd/sys/net/if_stf.c
index 79466119..a808548c 100644
--- a/freebsd/sys/net/if_stf.c
+++ b/freebsd/sys/net/if_stf.c
@@ -787,6 +787,7 @@ in_stf_input(m, off)
 	 */
 	ifp->if_ipackets++;
 	ifp->if_ibytes += m->m_pkthdr.len;
+	M_SETFIB(m, ifp->if_fib);
 	netisr_dispatch(NETISR_IPV6, m);
 }
 
diff --git a/freebsd/sys/net/if_tap.c b/freebsd/sys/net/if_tap.c
index cd775369..6e6b6a64 100644
--- a/freebsd/sys/net/if_tap.c
+++ b/freebsd/sys/net/if_tap.c
@@ -44,6 +44,7 @@
 #include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/filio.h>
+#include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
@@ -66,8 +67,9 @@
 #include <net/if.h>
 #include <net/if_clone.h>
 #include <net/if_dl.h>
-#include <net/route.h>
 #include <net/if_types.h>
+#include <net/route.h>
+#include <net/vnet.h>
 
 #include <netinet/in.h>
 
@@ -216,6 +218,8 @@ tap_destroy(struct tap_softc *tp)
 	KASSERT(!(tp->tap_flags & TAP_OPEN),
 		("%s flags is out of sync", ifp->if_xname));
 
+	CURVNET_SET(ifp->if_vnet);
+	seldrain(&tp->tap_rsel);
 	knlist_destroy(&tp->tap_rsel.si_note);
 	destroy_dev(tp->tap_dev);
 	ether_ifdetach(ifp);
@@ -223,6 +227,7 @@ tap_destroy(struct tap_softc *tp)
 
 	mtx_destroy(&tp->tap_mtx);
 	free(tp, M_TAP);
+	CURVNET_RESTORE();
 }
 
 static void
@@ -364,6 +369,7 @@ tapclone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **d
 	if (unit == -1)
 		append_unit = 1;
 
+	CURVNET_SET(CRED_TO_VNET(cred));
 	/* find any existing device, or allocate new unit number */
 	i = clone_create(&tapclones, &tap_cdevsw, &unit, dev, extra);
 	if (i) {
@@ -382,6 +388,7 @@ tapclone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **d
 	}
 
 	if_clone_create(name, namelen, NULL);
+	CURVNET_RESTORE();
 } /* tapclone */
 
 
@@ -526,6 +533,7 @@ tapclose(struct cdev *dev, int foo, int bar, struct thread *td)
 
 	/* junk all pending output */
 	mtx_lock(&tp->tap_mtx);
+	CURVNET_SET(ifp->if_vnet);
 	IF_DRAIN(&ifp->if_snd);
 
 	/*
@@ -549,6 +557,8 @@ tapclose(struct cdev *dev, int foo, int bar, struct thread *td)
 	}
 
 	if_link_state_change(ifp, LINK_STATE_DOWN);
+	CURVNET_RESTORE();
+
 	funsetown(&tp->tap_sigio);
 	selwakeuppri(&tp->tap_rsel, PZERO+1);
 	KNOTE_LOCKED(&tp->tap_rsel.si_note, 0);
@@ -950,7 +960,9 @@ tapwrite(struct cdev *dev, struct uio *uio, int flag)
 	}
 
 	/* Pass packet up to parent. */
+	CURVNET_SET(ifp->if_vnet);
 	(*ifp->if_input)(ifp, m);
+	CURVNET_RESTORE();
 	ifp->if_ipackets ++; /* ibytes are counted in parent */
 
 	return (0);
diff --git a/freebsd/sys/net/if_tun.c b/freebsd/sys/net/if_tun.c
index b6fa0e5a..444113f4 100644
--- a/freebsd/sys/net/if_tun.c
+++ b/freebsd/sys/net/if_tun.c
@@ -128,7 +128,7 @@ static void	tunclone(void *arg, struct ucred *cred, char *name,
 		    int namelen, struct cdev **dev);
 static void	tuncreate(const char *name, struct cdev *dev);
 static int	tunifioctl(struct ifnet *, u_long, caddr_t);
-static int	tuninit(struct ifnet *);
+static void	tuninit(struct ifnet *);
 static int	tunmodevent(module_t, int, void *);
 static int	tunoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
 		    struct route *ro);
@@ -230,8 +230,8 @@ tunclone(void *arg, struct ucred *cred, char *name, int namelen,
 	i = clone_create(&tunclones, &tun_cdevsw, &u, dev, 0);
 	if (i) {
 		if (append_unit) {
-			namelen = snprintf(devname, sizeof(devname), "%s%d", name,
-			    u);
+			namelen = snprintf(devname, sizeof(devname), "%s%d",
+			    name, u);
 			name = devname;
 		}
 		/* No preexisting struct cdev *, create one */
@@ -261,6 +261,7 @@ tun_destroy(struct tun_softc *tp)
 	if_detach(TUN2IFP(tp));
 	if_free(TUN2IFP(tp));
 	destroy_dev(dev);
+	seldrain(&tp->tun_rsel);
 	knlist_destroy(&tp->tun_rsel.si_note);
 	mtx_destroy(&tp->tun_mtx);
 	cv_destroy(&tp->tun_cv);
@@ -504,14 +505,13 @@ tunclose(struct cdev *dev, int foo, int bar, struct thread *td)
 	return (0);
 }
 
-static int
+static void
 tuninit(struct ifnet *ifp)
 {
 	struct tun_softc *tp = ifp->if_softc;
 #ifdef INET
 	struct ifaddr *ifa;
 #endif
-	int error = 0;
 
 	TUNDEBUG(ifp, "tuninit\n");
 
@@ -538,7 +538,6 @@ tuninit(struct ifnet *ifp)
 	if_addr_runlock(ifp);
 #endif
 	mtx_unlock(&tp->tun_mtx);
-	return (error);
 }
 
 /*
@@ -562,12 +561,12 @@ tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 		mtx_unlock(&tp->tun_mtx);
 		break;
 	case SIOCSIFADDR:
-		error = tuninit(ifp);
-		TUNDEBUG(ifp, "address set, error=%d\n", error);
+		tuninit(ifp);
+		TUNDEBUG(ifp, "address set\n");
 		break;
 	case SIOCSIFDSTADDR:
-		error = tuninit(ifp);
-		TUNDEBUG(ifp, "destination address set, error=%d\n", error);
+		tuninit(ifp);
+		TUNDEBUG(ifp, "destination address set\n");
 		break;
 	case SIOCSIFMTU:
 		ifp->if_mtu = ifr->ifr_mtu;
@@ -587,11 +586,8 @@ tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
  * tunoutput - queue packets from higher level ready to put out.
  */
 static int
-tunoutput(
-	struct ifnet *ifp,
-	struct mbuf *m0,
-	struct sockaddr *dst,
-	struct route *ro)
+tunoutput(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
+    struct route *ro)
 {
 	struct tun_softc *tp = ifp->if_softc;
 	u_short cached_tun_flags;
@@ -671,10 +667,8 @@ tunoutput(
 	}
 
 	error = (ifp->if_transmit)(ifp, m0);
-	if (error) {
-		ifp->if_collisions++;
+	if (error)
 		return (ENOBUFS);
-	}
 	ifp->if_opackets++;
 	return (0);
 }
@@ -683,7 +677,8 @@ tunoutput(
  * the cdevsw interface is now pretty minimal.
  */
 static	int
-tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
+tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
+    struct thread *td)
 {
 	int		error;
 	struct tun_softc *tp = dev->si_drv1;
@@ -875,7 +870,6 @@ tunwrite(struct cdev *dev, struct uio *uio, int flag)
 	struct tun_softc *tp = dev->si_drv1;
 	struct ifnet	*ifp = TUN2IFP(tp);
 	struct mbuf	*m;
-	int		error = 0;
 	uint32_t	family;
 	int 		isr;
 
@@ -895,7 +889,7 @@ tunwrite(struct cdev *dev, struct uio *uio, int flag)
 
 	if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, 0, M_PKTHDR)) == NULL) {
 		ifp->if_ierrors++;
-		return (error);
+		return (ENOBUFS);
 	}
 
 	m->m_pkthdr.rcvif = ifp;
@@ -950,6 +944,7 @@ tunwrite(struct cdev *dev, struct uio *uio, int flag)
 	ifp->if_ibytes += m->m_pkthdr.len;
 	ifp->if_ipackets++;
 	CURVNET_SET(ifp->if_vnet);
+	M_SETFIB(m, ifp->if_fib);
 	netisr_dispatch(isr, m);
 	CURVNET_RESTORE();
 	return (0);
diff --git a/freebsd/sys/net/if_var.h b/freebsd/sys/net/if_var.h
index 172ebe0e..c5c489fb 100644
--- a/freebsd/sys/net/if_var.h
+++ b/freebsd/sys/net/if_var.h
@@ -197,17 +197,18 @@ struct ifnet {
 					/* protected by if_addr_mtx */
 	void	*if_pf_kif;
 	void	*if_lagg;		/* lagg glue */
-	u_char	 if_alloctype;		/* if_type at time of allocation */
+	u_char	if_alloctype;		/* if_type at time of allocation */
 
 	/*
 	 * Spare fields are added so that we can modify sensitive data
 	 * structures without changing the kernel binary interface, and must
 	 * be used with care where binary compatibility is required.
 	 */
-	char	 if_cspare[3];
+	char	if_cspare[3];
 	char	*if_description;	/* interface description */
-	void	*if_pspare[7];
-	int	if_ispare[4];
+	void	*if_pspare[7];		/* 1 netmap, 6 TBD */
+	int	if_ispare[3];
+	u_int	if_fib;			/* interface FIB */
 };
 
 typedef void if_init_f_t(void *);
@@ -249,9 +250,15 @@ typedef void if_init_f_t(void *);
 #define	IF_ADDR_LOCK_INIT(if)	mtx_init(&(if)->if_addr_mtx,		\
 				    "if_addr_mtx", NULL, MTX_DEF)
 #define	IF_ADDR_LOCK_DESTROY(if)	mtx_destroy(&(if)->if_addr_mtx)
-#define	IF_ADDR_LOCK(if)	mtx_lock(&(if)->if_addr_mtx)
-#define	IF_ADDR_UNLOCK(if)	mtx_unlock(&(if)->if_addr_mtx)
+#define	IF_ADDR_WLOCK(if)	mtx_lock(&(if)->if_addr_mtx)
+#define	IF_ADDR_WUNLOCK(if)	mtx_unlock(&(if)->if_addr_mtx)
+#define	IF_ADDR_RLOCK(if)	mtx_lock(&(if)->if_addr_mtx)
+#define	IF_ADDR_RUNLOCK(if)	mtx_unlock(&(if)->if_addr_mtx)
 #define	IF_ADDR_LOCK_ASSERT(if)	mtx_assert(&(if)->if_addr_mtx, MA_OWNED)
+#define	IF_ADDR_WLOCK_ASSERT(if)	mtx_assert(&(if)->if_addr_mtx, MA_OWNED)
+/* XXX: Compat. */
+#define	IF_ADDR_LOCK(if)	IF_ADDR_WLOCK(if)
+#define	IF_ADDR_UNLOCK(if)	IF_ADDR_WUNLOCK(if)
 
 /*
  * Function variations on locking macros intended to be used by loadable
diff --git a/freebsd/sys/net/if_vlan.c b/freebsd/sys/net/if_vlan.c
index 576243d9..81c151a5 100644
--- a/freebsd/sys/net/if_vlan.c
+++ b/freebsd/sys/net/if_vlan.c
@@ -36,9 +36,8 @@
  * we need to pretend to be enough of an Ethernet implementation
  * to make arp work.  The way we do this is by telling everyone
  * that we are an Ethernet, and then catch the packets that
- * ether_output() left on our output queue when it calls
- * if_start(), rewrite them for use by the real outgoing interface,
- * and ask it to send them.
+ * ether_output() sends to us via if_transmit(), rewrite them for
+ * use by the real outgoing interface, and ask it to send them.
  */
 
 #include <sys/cdefs.h>
@@ -181,16 +180,17 @@ static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk,
 #endif
 static	void trunk_destroy(struct ifvlantrunk *trunk);
 
-static	void vlan_start(struct ifnet *ifp);
 static	void vlan_init(void *foo);
 static	void vlan_input(struct ifnet *ifp, struct mbuf *m);
 static	int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr);
+static	void vlan_qflush(struct ifnet *ifp);
 static	int vlan_setflag(struct ifnet *ifp, int flag, int status,
     int (*func)(struct ifnet *, int));
 static	int vlan_setflags(struct ifnet *ifp, int status);
 static	int vlan_setmulti(struct ifnet *ifp);
+static	int vlan_transmit(struct ifnet *ifp, struct mbuf *m);
 static	void vlan_unconfig(struct ifnet *ifp);
-static	void vlan_unconfig_locked(struct ifnet *ifp);
+static	void vlan_unconfig_locked(struct ifnet *ifp, int departing);
 static	int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag);
 static	void vlan_link_state(struct ifnet *ifp, int link);
 static	void vlan_capabilities(struct ifvlan *ifv);
@@ -545,7 +545,7 @@ vlan_ifdetach(void *arg __unused, struct ifnet *ifp)
 #ifdef VLAN_ARRAY
 	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
 		if ((ifv = ifp->if_vlantrunk->vlans[i])) {
-			vlan_unconfig_locked(ifv->ifv_ifp);
+			vlan_unconfig_locked(ifv->ifv_ifp, 1);
 			if (ifp->if_vlantrunk == NULL)
 				break;
 		}
@@ -553,7 +553,7 @@ vlan_ifdetach(void *arg __unused, struct ifnet *ifp)
 restart:
 	for (i = 0; i < (1 << ifp->if_vlantrunk->hwidth); i++)
 		if ((ifv = LIST_FIRST(&ifp->if_vlantrunk->hash[i]))) {
-			vlan_unconfig_locked(ifv->ifv_ifp);
+			vlan_unconfig_locked(ifv->ifv_ifp, 1);
 			if (ifp->if_vlantrunk)
 				goto restart;	/* trunk->hwidth can change */
 			else
@@ -809,9 +809,9 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 	/* NB: mtu is not set here */
 
 	ifp->if_init = vlan_init;
-	ifp->if_start = vlan_start;
+	ifp->if_transmit = vlan_transmit;
+	ifp->if_qflush = vlan_qflush;
 	ifp->if_ioctl = vlan_ioctl;
-	ifp->if_snd.ifq_maxlen = ifqmaxlen;
 	ifp->if_flags = VLAN_IFFLAGS;
 	ether_ifattach(ifp, eaddr);
 	/* Now undo some of the damage... */
@@ -823,7 +823,7 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 		error = vlan_config(ifv, p, tag);
 		if (error != 0) {
 			/*
-			 * Since we've partialy failed, we need to back
+			 * Since we've partially failed, we need to back
 			 * out all the way, otherwise userland could get
 			 * confused.  Thus, we destroy the interface.
 			 */
@@ -867,99 +867,99 @@ vlan_init(void *foo __unused)
 }
 
 /*
- * The if_start method for vlan(4) interface. It doesn't
- * raises the IFF_DRV_OACTIVE flag, since it is called
- * only from IFQ_HANDOFF() macro in ether_output_frame().
- * If the interface queue is full, and vlan_start() is
- * not called, the queue would never get emptied and
- * interface would stall forever.
+ * The if_transmit method for vlan(4) interface.
  */
-static void
-vlan_start(struct ifnet *ifp)
+static int
+vlan_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	struct ifvlan *ifv;
 	struct ifnet *p;
-	struct mbuf *m;
-	int error;
+	int error, len, mcast;
 
 	ifv = ifp->if_softc;
 	p = PARENT(ifv);
+	len = m->m_pkthdr.len;
+	mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
 
-	for (;;) {
-		IF_DEQUEUE(&ifp->if_snd, m);
-		if (m == NULL)
-			break;
-		BPF_MTAP(ifp, m);
+	BPF_MTAP(ifp, m);
 
-		/*
-		 * Do not run parent's if_start() if the parent is not up,
-		 * or parent's driver will cause a system crash.
-		 */
-		if (!UP_AND_RUNNING(p)) {
-			m_freem(m);
-			ifp->if_collisions++;
-			continue;
-		}
+	/*
+	 * Do not run parent's if_transmit() if the parent is not up,
+	 * or parent's driver will cause a system crash.
+	 */
+	if (!UP_AND_RUNNING(p)) {
+		m_freem(m);
+		ifp->if_oerrors++;
+		return (0);
+	}
 
-		/*
-		 * Pad the frame to the minimum size allowed if told to.
-		 * This option is in accord with IEEE Std 802.1Q, 2003 Ed.,
-		 * paragraph C.4.4.3.b.  It can help to work around buggy
-		 * bridges that violate paragraph C.4.4.3.a from the same
-		 * document, i.e., fail to pad short frames after untagging.
-		 * E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but
-		 * untagging it will produce a 62-byte frame, which is a runt
-		 * and requires padding.  There are VLAN-enabled network
-		 * devices that just discard such runts instead or mishandle
-		 * them somehow.
-		 */
-		if (soft_pad) {
-			static char pad[8];	/* just zeros */
-			int n;
-
-			for (n = ETHERMIN + ETHER_HDR_LEN - m->m_pkthdr.len;
-			     n > 0; n -= sizeof(pad))
-				if (!m_append(m, min(n, sizeof(pad)), pad))
-					break;
-
-			if (n > 0) {
-				if_printf(ifp, "cannot pad short frame\n");
-				ifp->if_oerrors++;
-				m_freem(m);
-				continue;
-			}
-		}
+	/*
+	 * Pad the frame to the minimum size allowed if told to.
+	 * This option is in accord with IEEE Std 802.1Q, 2003 Ed.,
+	 * paragraph C.4.4.3.b.  It can help to work around buggy
+	 * bridges that violate paragraph C.4.4.3.a from the same
+	 * document, i.e., fail to pad short frames after untagging.
+	 * E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but
+	 * untagging it will produce a 62-byte frame, which is a runt
+	 * and requires padding.  There are VLAN-enabled network
+	 * devices that just discard such runts instead or mishandle
+	 * them somehow.
+	 */
+	if (soft_pad) {
+		static char pad[8];	/* just zeros */
+		int n;
 
-		/*
-		 * If underlying interface can do VLAN tag insertion itself,
-		 * just pass the packet along. However, we need some way to
-		 * tell the interface where the packet came from so that it
-		 * knows how to find the VLAN tag to use, so we attach a
-		 * packet tag that holds it.
-		 */
-		if (p->if_capenable & IFCAP_VLAN_HWTAGGING) {
-			m->m_pkthdr.ether_vtag = ifv->ifv_tag;
-			m->m_flags |= M_VLANTAG;
-		} else {
-			m = ether_vlanencap(m, ifv->ifv_tag);
-			if (m == NULL) {
-				if_printf(ifp,
-				    "unable to prepend VLAN header\n");
-				ifp->if_oerrors++;
-				continue;
-			}
+		for (n = ETHERMIN + ETHER_HDR_LEN - m->m_pkthdr.len;
+		     n > 0; n -= sizeof(pad))
+			if (!m_append(m, min(n, sizeof(pad)), pad))
+				break;
+
+		if (n > 0) {
+			if_printf(ifp, "cannot pad short frame\n");
+			ifp->if_oerrors++;
+			m_freem(m);
+			return (0);
 		}
+	}
 
-		/*
-		 * Send it, precisely as ether_output() would have.
-		 * We are already running at splimp.
-		 */
-		error = (p->if_transmit)(p, m);
-		if (!error)
-			ifp->if_opackets++;
-		else
+	/*
+	 * If underlying interface can do VLAN tag insertion itself,
+	 * just pass the packet along. However, we need some way to
+	 * tell the interface where the packet came from so that it
+	 * knows how to find the VLAN tag to use, so we attach a
+	 * packet tag that holds it.
+	 */
+	if (p->if_capenable & IFCAP_VLAN_HWTAGGING) {
+		m->m_pkthdr.ether_vtag = ifv->ifv_tag;
+		m->m_flags |= M_VLANTAG;
+	} else {
+		m = ether_vlanencap(m, ifv->ifv_tag);
+		if (m == NULL) {
+			if_printf(ifp, "unable to prepend VLAN header\n");
 			ifp->if_oerrors++;
+			return (0);
+		}
 	}
+
+	/*
+	 * Send it, precisely as ether_output() would have.
+	 */
+	error = (p->if_transmit)(p, m);
+	if (!error) {
+		ifp->if_opackets++;
+		ifp->if_omcasts += mcast;
+		ifp->if_obytes += len;
+	} else
+		ifp->if_oerrors++;
+	return (error);
+}
+
+/*
+ * The ifp->if_qflush entry point for vlan(4) is a no-op.
+ */
+static void
+vlan_qflush(struct ifnet *ifp __unused)
+{
 }
 
 static void
@@ -1165,17 +1165,18 @@ vlan_unconfig(struct ifnet *ifp)
 {
 
 	VLAN_LOCK();
-	vlan_unconfig_locked(ifp);
+	vlan_unconfig_locked(ifp, 0);
 	VLAN_UNLOCK();
 }
 
 static void
-vlan_unconfig_locked(struct ifnet *ifp)
+vlan_unconfig_locked(struct ifnet *ifp, int departing)
 {
 	struct ifvlantrunk *trunk;
 	struct vlan_mc_entry *mc;
 	struct ifvlan *ifv;
 	struct ifnet  *parent;
+	int error;
 
 	VLAN_LOCK_ASSERT();
 
@@ -1206,13 +1207,21 @@ vlan_unconfig_locked(struct ifnet *ifp)
 			    ETHER_ADDR_LEN);
 
 			/*
-			 * This may fail if the parent interface is
-			 * being detached.  Regardless, we should do a
-			 * best effort to free this interface as much
-			 * as possible as all callers expect vlan
-			 * destruction to succeed.
+			 * If the parent interface is being detached,
+			 * all its multicast addresses have already
+			 * been removed.  Warn about errors if
+			 * if_delmulti() does fail, but don't abort as
+			 * all callers expect vlan destruction to
+			 * succeed.
 			 */
-			(void)if_delmulti(parent, (struct sockaddr *)&sdl);
+			if (!departing) {
+				error = if_delmulti(parent,
+				    (struct sockaddr *)&sdl);
+				if (error)
+					if_printf(ifp,
+		    "Failed to delete multicast address from parent: %d\n",
+					    error);
+			}
 			SLIST_REMOVE_HEAD(&ifv->vlan_mc_listhead, mc_entries);
 			free(mc, M_VLAN);
 		}
diff --git a/freebsd/sys/net/netisr.c b/freebsd/sys/net/netisr.c
index 465b0b29..6ba71233 100644
--- a/freebsd/sys/net/netisr.c
+++ b/freebsd/sys/net/netisr.c
@@ -2,8 +2,12 @@
 
 /*-
  * Copyright (c) 2007-2009 Robert N. M. Watson
+ * Copyright (c) 2010 Juniper Networks, Inc.
  * All rights reserved.
  *
+ * This software was developed by Robert N. M. Watson under contract
+ * to Juniper Networks, Inc.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -34,13 +38,13 @@ __FBSDID("$FreeBSD$");
  * dispatched) and asynchronous (deferred dispatch) processing of packets by
  * registered protocol handlers.  Callers pass a protocol identifier and
  * packet to netisr, along with a direct dispatch hint, and work will either
- * be immediately processed with the registered handler, or passed to a
- * kernel software interrupt (SWI) thread for deferred dispatch.  Callers
- * will generally select one or the other based on:
+ * be immediately processed by the registered handler, or passed to a
+ * software interrupt (SWI) thread for deferred dispatch.  Callers will
+ * generally select one or the other based on:
  *
- * - Might directly dispatching a netisr handler lead to code reentrance or
+ * - Whether directly dispatching a netisr handler lead to code reentrance or
  *   lock recursion, such as entering the socket code from the socket code.
- * - Might directly dispatching a netisr handler lead to recursive
+ * - Whether directly dispatching a netisr handler lead to recursive
  *   processing, such as when decapsulating several wrapped layers of tunnel
  *   information (IPSEC within IPSEC within ...).
  *
@@ -56,9 +60,9 @@ __FBSDID("$FreeBSD$");
  * more than one flow.
  *
  * netisr supports several policy variations, represented by the
- * NETISR_POLICY_* constants, allowing protocols to play a varying role in
+ * NETISR_POLICY_* constants, allowing protocols to play various roles in
  * identifying flows, assigning work to CPUs, etc.  These are described in
- * detail in netisr.h.
+ * netisr.h.
  */
 
 #include <rtems/bsd/local/opt_ddb.h>
@@ -85,9 +89,11 @@ __FBSDID("$FreeBSD$");
 #include <ddb/ddb.h>
 #endif
 
+#define	_WANT_NETISR_INTERNAL	/* Enable definitions from netisr_internal.h */
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/netisr.h>
+#include <net/netisr_internal.h>
 #include <net/vnet.h>
 
 /*-
@@ -97,13 +103,13 @@ __FBSDID("$FreeBSD$");
  *
  * The following data structures and fields are protected by this lock:
  *
- * - The np array, including all fields of struct netisr_proto.
+ * - The netisr_proto array, including all fields of struct netisr_proto.
  * - The nws array, including all fields of struct netisr_worker.
  * - The nws_array array.
  *
  * Note: the NETISR_LOCKING define controls whether read locks are acquired
  * in packet processing paths requiring netisr registration stability.  This
- * is disabled by default as it can lead to a measurable performance
+ * is disabled by default as it can lead to measurable performance
  * degradation even with rmlocks (3%-6% for loopback ping-pong traffic), and
  * because netisr registration and unregistration is extremely rare at
  * runtime.  If it becomes more common, this decision should be revisited.
@@ -158,111 +164,58 @@ SYSCTL_INT(_net_isr, OID_AUTO, direct, CTLFLAG_RW,
  */
 static int	netisr_maxthreads = -1;		/* Max number of threads. */
 TUNABLE_INT("net.isr.maxthreads", &netisr_maxthreads);
-SYSCTL_INT(_net_isr, OID_AUTO, maxthreads, CTLFLAG_RD,
+SYSCTL_INT(_net_isr, OID_AUTO, maxthreads, CTLFLAG_RDTUN,
     &netisr_maxthreads, 0,
     "Use at most this many CPUs for netisr processing");
 
 static int	netisr_bindthreads = 0;		/* Bind threads to CPUs. */
 TUNABLE_INT("net.isr.bindthreads", &netisr_bindthreads);
-SYSCTL_INT(_net_isr, OID_AUTO, bindthreads, CTLFLAG_RD,
+SYSCTL_INT(_net_isr, OID_AUTO, bindthreads, CTLFLAG_RDTUN,
     &netisr_bindthreads, 0, "Bind netisr threads to CPUs.");
 
 /*
- * Limit per-workstream queues to at most net.isr.maxqlimit, both for initial
- * configuration and later modification using netisr_setqlimit().
+ * Limit per-workstream mbuf queue limits s to at most net.isr.maxqlimit,
+ * both for initial configuration and later modification using
+ * netisr_setqlimit().
  */
 #define	NETISR_DEFAULT_MAXQLIMIT	10240
 static u_int	netisr_maxqlimit = NETISR_DEFAULT_MAXQLIMIT;
 TUNABLE_INT("net.isr.maxqlimit", &netisr_maxqlimit);
-SYSCTL_INT(_net_isr, OID_AUTO, maxqlimit, CTLFLAG_RD,
+SYSCTL_UINT(_net_isr, OID_AUTO, maxqlimit, CTLFLAG_RDTUN,
     &netisr_maxqlimit, 0,
     "Maximum netisr per-protocol, per-CPU queue depth.");
 
 /*
- * The default per-workstream queue limit for protocols that don't initialize
- * the nh_qlimit field of their struct netisr_handler.  If this is set above
- * netisr_maxqlimit, we truncate it to the maximum during boot.
+ * The default per-workstream mbuf queue limit for protocols that don't
+ * initialize the nh_qlimit field of their struct netisr_handler.  If this is
+ * set above netisr_maxqlimit, we truncate it to the maximum during boot.
  */
 #define	NETISR_DEFAULT_DEFAULTQLIMIT	256
 static u_int	netisr_defaultqlimit = NETISR_DEFAULT_DEFAULTQLIMIT;
 TUNABLE_INT("net.isr.defaultqlimit", &netisr_defaultqlimit);
-SYSCTL_INT(_net_isr, OID_AUTO, defaultqlimit, CTLFLAG_RD,
+SYSCTL_UINT(_net_isr, OID_AUTO, defaultqlimit, CTLFLAG_RDTUN,
     &netisr_defaultqlimit, 0,
     "Default netisr per-protocol, per-CPU queue limit if not set by protocol");
 
 /*
- * Each protocol is described by a struct netisr_proto, which holds all
- * global per-protocol information.  This data structure is set up by
- * netisr_register(), and derived from the public struct netisr_handler.
- */
-struct netisr_proto {
-	const char	*np_name;	/* Character string protocol name. */
-	netisr_handler_t *np_handler;	/* Protocol handler. */
-	netisr_m2flow_t	*np_m2flow;	/* Query flow for untagged packet. */
-	netisr_m2cpuid_t *np_m2cpuid;	/* Query CPU to process packet on. */
-	netisr_drainedcpu_t *np_drainedcpu; /* Callback when drained a queue. */
-	u_int		 np_qlimit;	/* Maximum per-CPU queue depth. */
-	u_int		 np_policy;	/* Work placement policy. */
-};
-
-#define	NETISR_MAXPROT		16		/* Compile-time limit. */
-
-/*
- * The np array describes all registered protocols, indexed by protocol
- * number.
+ * Store and export the compile-time constant NETISR_MAXPROT limit on the
+ * number of protocols that can register with netisr at a time.  This is
+ * required for crashdump analysis, as it sizes netisr_proto[].
  */
-static struct netisr_proto	np[NETISR_MAXPROT];
-
-/*
- * Protocol-specific work for each workstream is described by struct
- * netisr_work.  Each work descriptor consists of an mbuf queue and
- * statistics.
- */
-struct netisr_work {
-	/*
-	 * Packet queue, linked by m_nextpkt.
-	 */
-	struct mbuf	*nw_head;
-	struct mbuf	*nw_tail;
-	u_int		 nw_len;
-	u_int		 nw_qlimit;
-	u_int		 nw_watermark;
-
-	/*
-	 * Statistics -- written unlocked, but mostly from curcpu.
-	 */
-	u_int64_t	 nw_dispatched; /* Number of direct dispatches. */
-	u_int64_t	 nw_hybrid_dispatched; /* "" hybrid dispatches. */
-	u_int64_t	 nw_qdrops;	/* "" drops. */
-	u_int64_t	 nw_queued;	/* "" enqueues. */
-	u_int64_t	 nw_handled;	/* "" handled in worker. */
-};
+static u_int	netisr_maxprot = NETISR_MAXPROT;
+SYSCTL_UINT(_net_isr, OID_AUTO, maxprot, CTLFLAG_RD,
+    &netisr_maxprot, 0,
+    "Compile-time limit on the number of protocols supported by netisr.");
 
 /*
- * Workstreams hold a set of ordered work across each protocol, and are
- * described by netisr_workstream.  Each workstream is associated with a
- * worker thread, which in turn is pinned to a CPU.  Work associated with a
- * workstream can be processd in other threads during direct dispatch;
- * concurrent processing is prevented by the NWS_RUNNING flag, which
- * indicates that a thread is already processing the work queue.
+ * The netisr_proto array describes all registered protocols, indexed by
+ * protocol number.  See netisr_internal.h for more details.
  */
-struct netisr_workstream {
-	struct intr_event *nws_intr_event;	/* Handler for stream. */
-	void		*nws_swi_cookie;	/* swi(9) cookie for stream. */
-	struct mtx	 nws_mtx;		/* Synchronize work. */
-	u_int		 nws_cpu;		/* CPU pinning. */
-	u_int		 nws_flags;		/* Wakeup flags. */
-	u_int		 nws_pendingbits;	/* Scheduled protocols. */
-
-	/*
-	 * Each protocol has per-workstream data.
-	 */
-	struct netisr_work	nws_work[NETISR_MAXPROT];
-} __aligned(CACHE_LINE_SIZE);
+static struct netisr_proto	netisr_proto[NETISR_MAXPROT];
 
 #ifndef __rtems__
 /*
- * Per-CPU workstream data.
+ * Per-CPU workstream data.  See netisr_internal.h for more details.
  */
 DPCPU_DEFINE(struct netisr_workstream, nws);
 
@@ -278,20 +231,13 @@ static u_int				 nws_array[MAXCPU];
  * CPUs once fully started.
  */
 static u_int				 nws_count;
-SYSCTL_INT(_net_isr, OID_AUTO, numthreads, CTLFLAG_RD,
+SYSCTL_UINT(_net_isr, OID_AUTO, numthreads, CTLFLAG_RD,
     &nws_count, 0, "Number of extant netisr threads.");
 #else /* __rtems__ */
 static struct netisr_workstream rtems_bsd_nws;
 #endif /* __rtems__ */
 
 /*
- * Per-workstream flags.
- */
-#define	NWS_RUNNING	0x00000001	/* Currently running in a thread. */
-#define	NWS_DISPATCHING	0x00000002	/* Currently being direct-dispatched. */
-#define	NWS_SCHEDULED	0x00000004	/* Signal issued. */
-
-/*
  * Synchronization for each workstream: a mutex protects all mutable fields
  * in each stream, including per-protocol state (mbuf queues).  The SWI is
  * woken up if asynchronous dispatch is required.
@@ -324,7 +270,7 @@ netisr_get_cpuid(u_int cpunumber)
 }
 
 /*
- * The default implementation of -> CPU ID mapping.
+ * The default implementation of flow -> CPU ID mapping.
  *
  * Non-static so that protocols can use it to map their own work to specific
  * CPUs in a manner consistent to netisr for affinity purposes.
@@ -381,36 +327,34 @@ netisr_register(const struct netisr_handler *nhp)
 	 * Test that no existing registration exists for this protocol.
 	 */
 	NETISR_WLOCK();
-	KASSERT(np[proto].np_name == NULL,
+	KASSERT(netisr_proto[proto].np_name == NULL,
 	    ("%s(%u, %s): name present", __func__, proto, name));
-	KASSERT(np[proto].np_handler == NULL,
+	KASSERT(netisr_proto[proto].np_handler == NULL,
 	    ("%s(%u, %s): handler present", __func__, proto, name));
 
-	np[proto].np_name = name;
-	np[proto].np_handler = nhp->nh_handler;
-	np[proto].np_m2flow = nhp->nh_m2flow;
-	np[proto].np_m2cpuid = nhp->nh_m2cpuid;
-	np[proto].np_drainedcpu = nhp->nh_drainedcpu;
+	netisr_proto[proto].np_name = name;
+	netisr_proto[proto].np_handler = nhp->nh_handler;
+	netisr_proto[proto].np_m2flow = nhp->nh_m2flow;
+	netisr_proto[proto].np_m2cpuid = nhp->nh_m2cpuid;
+	netisr_proto[proto].np_drainedcpu = nhp->nh_drainedcpu;
 	if (nhp->nh_qlimit == 0)
-		np[proto].np_qlimit = netisr_defaultqlimit;
+		netisr_proto[proto].np_qlimit = netisr_defaultqlimit;
 	else if (nhp->nh_qlimit > netisr_maxqlimit) {
 		printf("%s: %s requested queue limit %u capped to "
 		    "net.isr.maxqlimit %u\n", __func__, name, nhp->nh_qlimit,
 		    netisr_maxqlimit);
-		np[proto].np_qlimit = netisr_maxqlimit;
+		netisr_proto[proto].np_qlimit = netisr_maxqlimit;
 	} else
-		np[proto].np_qlimit = nhp->nh_qlimit;
-	np[proto].np_policy = nhp->nh_policy;
-	for (i = 0; i <= mp_maxid; i++) {
-		if (CPU_ABSENT(i))
-			continue;
+		netisr_proto[proto].np_qlimit = nhp->nh_qlimit;
+	netisr_proto[proto].np_policy = nhp->nh_policy;
+	CPU_FOREACH(i) {
 #ifndef __rtems__
 		npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto];
 #else /* __rtems__ */
 		npwp = &rtems_bsd_nws.nws_work[proto];
 #endif /* __rtems__ */
 		bzero(npwp, sizeof(*npwp));
-		npwp->nw_qlimit = np[proto].np_qlimit;
+		npwp->nw_qlimit = netisr_proto[proto].np_qlimit;
 	}
 	NETISR_WUNLOCK();
 }
@@ -435,13 +379,11 @@ netisr_clearqdrops(const struct netisr_handler *nhp)
 	    ("%s(%u): protocol too big for %s", __func__, proto, name));
 
 	NETISR_WLOCK();
-	KASSERT(np[proto].np_handler != NULL,
+	KASSERT(netisr_proto[proto].np_handler != NULL,
 	    ("%s(%u): protocol not registered for %s", __func__, proto,
 	    name));
 
-	for (i = 0; i <= mp_maxid; i++) {
-		if (CPU_ABSENT(i))
-			continue;
+	CPU_FOREACH(i) {
 #ifndef __rtems__
 		npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto];
 #else /* __rtems__ */
@@ -453,7 +395,7 @@ netisr_clearqdrops(const struct netisr_handler *nhp)
 }
 
 /*
- * Query the current drop counters across all workstreams for a protocol.
+ * Query current drop counters across all workstreams for a protocol.
  */
 void
 netisr_getqdrops(const struct netisr_handler *nhp, u_int64_t *qdropp)
@@ -474,13 +416,11 @@ netisr_getqdrops(const struct netisr_handler *nhp, u_int64_t *qdropp)
 	    ("%s(%u): protocol too big for %s", __func__, proto, name));
 
 	NETISR_RLOCK(&tracker);
-	KASSERT(np[proto].np_handler != NULL,
+	KASSERT(netisr_proto[proto].np_handler != NULL,
 	    ("%s(%u): protocol not registered for %s", __func__, proto,
 	    name));
 
-	for (i = 0; i <= mp_maxid; i++) {
-		if (CPU_ABSENT(i))
-			continue;
+	CPU_FOREACH(i) {
 #ifndef __rtems__
 		npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto];
 #else /* __rtems__ */
@@ -492,7 +432,7 @@ netisr_getqdrops(const struct netisr_handler *nhp, u_int64_t *qdropp)
 }
 
 /*
- * Query the current queue limit for per-workstream queues for a protocol.
+ * Query current per-workstream queue limit for a protocol.
  */
 void
 netisr_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp)
@@ -511,10 +451,10 @@ netisr_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp)
 	    ("%s(%u): protocol too big for %s", __func__, proto, name));
 
 	NETISR_RLOCK(&tracker);
-	KASSERT(np[proto].np_handler != NULL,
+	KASSERT(netisr_proto[proto].np_handler != NULL,
 	    ("%s(%u): protocol not registered for %s", __func__, proto,
 	    name));
-	*qlimitp = np[proto].np_qlimit;
+	*qlimitp = netisr_proto[proto].np_qlimit;
 	NETISR_RUNLOCK(&tracker);
 }
 
@@ -543,14 +483,12 @@ netisr_setqlimit(const struct netisr_handler *nhp, u_int qlimit)
 	    ("%s(%u): protocol too big for %s", __func__, proto, name));
 
 	NETISR_WLOCK();
-	KASSERT(np[proto].np_handler != NULL,
+	KASSERT(netisr_proto[proto].np_handler != NULL,
 	    ("%s(%u): protocol not registered for %s", __func__, proto,
 	    name));
 
-	np[proto].np_qlimit = qlimit;
-	for (i = 0; i <= mp_maxid; i++) {
-		if (CPU_ABSENT(i))
-			continue;
+	netisr_proto[proto].np_qlimit = qlimit;
+	CPU_FOREACH(i) {
 #ifndef __rtems__
 		npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto];
 #else /* __rtems__ */
@@ -608,19 +546,17 @@ netisr_unregister(const struct netisr_handler *nhp)
 	    ("%s(%u): protocol too big for %s", __func__, proto, name));
 
 	NETISR_WLOCK();
-	KASSERT(np[proto].np_handler != NULL,
+	KASSERT(netisr_proto[proto].np_handler != NULL,
 	    ("%s(%u): protocol not registered for %s", __func__, proto,
 	    name));
 
-	np[proto].np_name = NULL;
-	np[proto].np_handler = NULL;
-	np[proto].np_m2flow = NULL;
-	np[proto].np_m2cpuid = NULL;
-	np[proto].np_qlimit = 0;
-	np[proto].np_policy = 0;
-	for (i = 0; i <= mp_maxid; i++) {
-		if (CPU_ABSENT(i))
-			continue;
+	netisr_proto[proto].np_name = NULL;
+	netisr_proto[proto].np_handler = NULL;
+	netisr_proto[proto].np_m2flow = NULL;
+	netisr_proto[proto].np_m2cpuid = NULL;
+	netisr_proto[proto].np_qlimit = 0;
+	netisr_proto[proto].np_policy = 0;
+	CPU_FOREACH(i) {
 #ifndef __rtems__
 		npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto];
 #else /* __rtems__ */
@@ -744,22 +680,23 @@ netisr_process_workstream_proto(struct netisr_workstream *nwsp, u_int proto)
 		if (local_npw.nw_head == NULL)
 			local_npw.nw_tail = NULL;
 		local_npw.nw_len--;
-		VNET_ASSERT(m->m_pkthdr.rcvif != NULL);
+		VNET_ASSERT(m->m_pkthdr.rcvif != NULL,
+		    ("%s:%d rcvif == NULL: m=%p", __func__, __LINE__, m));
 		CURVNET_SET(m->m_pkthdr.rcvif->if_vnet);
-		np[proto].np_handler(m);
+		netisr_proto[proto].np_handler(m);
 		CURVNET_RESTORE();
 	}
 	KASSERT(local_npw.nw_len == 0,
 	    ("%s(%u): len %u", __func__, proto, local_npw.nw_len));
-	if (np[proto].np_drainedcpu)
-		np[proto].np_drainedcpu(nwsp->nws_cpu);
+	if (netisr_proto[proto].np_drainedcpu)
+		netisr_proto[proto].np_drainedcpu(nwsp->nws_cpu);
 	NWS_LOCK(nwsp);
 	npwp->nw_handled += handled;
 	return (handled);
 }
 
 /*
- * SWI handler for netisr -- processes prackets in a set of workstreams that
+ * SWI handler for netisr -- processes packets in a set of workstreams that
  * it owns, woken up by calls to NWS_SIGNAL().  If this workstream is already
  * being direct dispatched, go back to sleep and wait for the dispatching
  * thread to wake us up again.
@@ -827,6 +764,11 @@ netisr_queue_workstream(struct netisr_workstream *nwsp, u_int proto,
 		npwp->nw_len++;
 		if (npwp->nw_len > npwp->nw_watermark)
 			npwp->nw_watermark = npwp->nw_len;
+
+		/*
+		 * We must set the bit regardless of NWS_RUNNING, so that
+		 * swi_net() keeps calling netisr_process_workstream_proto().
+		 */
 		nwsp->nws_pendingbits |= (1 << proto);
 		if (!(nwsp->nws_flags & 
 		    (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED))) {
@@ -887,10 +829,10 @@ netisr_queue_src(u_int proto, uintptr_t source, struct mbuf *m)
 #ifdef NETISR_LOCKING
 	NETISR_RLOCK(&tracker);
 #endif
-	KASSERT(np[proto].np_handler != NULL,
+	KASSERT(netisr_proto[proto].np_handler != NULL,
 	    ("%s: invalid proto %u", __func__, proto));
 
-	m = netisr_select_cpuid(&np[proto], source, m, &cpuid);
+	m = netisr_select_cpuid(&netisr_proto[proto], source, m, &cpuid);
 	if (m != NULL) {
 		KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__,
 		    cpuid));
@@ -911,7 +853,7 @@ netisr_queue(u_int proto, struct mbuf *m)
 }
 
 /*
- * Dispatch a packet for netisr processing, direct dispatch permitted by
+ * Dispatch a packet for netisr processing; direct dispatch is permitted by
  * calling context.
  */
 int
@@ -936,7 +878,7 @@ netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m)
 #ifdef NETISR_LOCKING
 	NETISR_RLOCK(&tracker);
 #endif
-	KASSERT(np[proto].np_handler != NULL,
+	KASSERT(netisr_proto[proto].np_handler != NULL,
 	    ("%s: invalid proto %u", __func__, proto));
 
 	/*
@@ -951,7 +893,7 @@ netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m)
 		npwp = &nwsp->nws_work[proto];
 		npwp->nw_dispatched++;
 		npwp->nw_handled++;
-		np[proto].np_handler(m);
+		netisr_proto[proto].np_handler(m);
 		error = 0;
 		goto out_unlock;
 	}
@@ -961,7 +903,7 @@ netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m)
 	 * dispatch if we're on the right CPU and the netisr worker isn't
 	 * already running.
 	 */
-	m = netisr_select_cpuid(&np[proto], source, m, &cpuid);
+	m = netisr_select_cpuid(&netisr_proto[proto], source, m, &cpuid);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto out_unlock;
@@ -1000,7 +942,7 @@ netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m)
 	 */
 	nwsp->nws_flags |= NWS_DISPATCHING;
 	NWS_UNLOCK(nwsp);
-	np[proto].np_handler(m);
+	netisr_proto[proto].np_handler(m);
 	NWS_LOCK(nwsp);
 	nwsp->nws_flags &= ~NWS_DISPATCHING;
 	npwp->nw_handled++;
@@ -1171,6 +1113,166 @@ netisr_start(void *arg)
 SYSINIT(netisr_start, SI_SUB_SMP, SI_ORDER_MIDDLE, netisr_start, NULL);
 #endif /* __rtems__ */
 
+/*
+ * Sysctl monitoring for netisr: query a list of registered protocols.
+ */
+static int
+sysctl_netisr_proto(SYSCTL_HANDLER_ARGS)
+{
+	struct rm_priotracker tracker;
+	struct sysctl_netisr_proto *snpp, *snp_array;
+	struct netisr_proto *npp;
+	u_int counter, proto;
+	int error;
+
+	if (req->newptr != NULL)
+		return (EINVAL);
+	snp_array = malloc(sizeof(*snp_array) * NETISR_MAXPROT, M_TEMP,
+	    M_ZERO | M_WAITOK);
+	counter = 0;
+	NETISR_RLOCK(&tracker);
+	for (proto = 0; proto < NETISR_MAXPROT; proto++) {
+		npp = &netisr_proto[proto];
+		if (npp->np_name == NULL)
+			continue;
+		snpp = &snp_array[counter];
+		snpp->snp_version = sizeof(*snpp);
+		strlcpy(snpp->snp_name, npp->np_name, NETISR_NAMEMAXLEN);
+		snpp->snp_proto = proto;
+		snpp->snp_qlimit = npp->np_qlimit;
+		snpp->snp_policy = npp->np_policy;
+		if (npp->np_m2flow != NULL)
+			snpp->snp_flags |= NETISR_SNP_FLAGS_M2FLOW;
+		if (npp->np_m2cpuid != NULL)
+			snpp->snp_flags |= NETISR_SNP_FLAGS_M2CPUID;
+		if (npp->np_drainedcpu != NULL)
+			snpp->snp_flags |= NETISR_SNP_FLAGS_DRAINEDCPU;
+		counter++;
+	}
+	NETISR_RUNLOCK(&tracker);
+	KASSERT(counter <= NETISR_MAXPROT,
+	    ("sysctl_netisr_proto: counter too big (%d)", counter));
+	error = SYSCTL_OUT(req, snp_array, sizeof(*snp_array) * counter);
+	free(snp_array, M_TEMP);
+	return (error);
+}
+
+SYSCTL_PROC(_net_isr, OID_AUTO, proto,
+    CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_proto,
+    "S,sysctl_netisr_proto",
+    "Return list of protocols registered with netisr");
+
+/*
+ * Sysctl monitoring for netisr: query a list of workstreams.
+ */
+static int
+sysctl_netisr_workstream(SYSCTL_HANDLER_ARGS)
+{
+	struct rm_priotracker tracker;
+	struct sysctl_netisr_workstream *snwsp, *snws_array;
+	struct netisr_workstream *nwsp;
+	u_int counter, cpuid;
+	int error;
+
+	if (req->newptr != NULL)
+		return (EINVAL);
+	snws_array = malloc(sizeof(*snws_array) * MAXCPU, M_TEMP,
+	    M_ZERO | M_WAITOK);
+	counter = 0;
+	NETISR_RLOCK(&tracker);
+	CPU_FOREACH(cpuid) {
+		nwsp = DPCPU_ID_PTR(cpuid, nws);
+		if (nwsp->nws_intr_event == NULL)
+			continue;
+		NWS_LOCK(nwsp);
+		snwsp = &snws_array[counter];
+		snwsp->snws_version = sizeof(*snwsp);
+
+		/*
+		 * For now, we equate workstream IDs and CPU IDs in the
+		 * kernel, but expose them independently to userspace in case
+		 * that assumption changes in the future.
+		 */
+		snwsp->snws_wsid = cpuid;
+		snwsp->snws_cpu = cpuid;
+		if (nwsp->nws_intr_event != NULL)
+			snwsp->snws_flags |= NETISR_SNWS_FLAGS_INTR;
+		NWS_UNLOCK(nwsp);
+		counter++;
+	}
+	NETISR_RUNLOCK(&tracker);
+	KASSERT(counter <= MAXCPU,
+	    ("sysctl_netisr_workstream: counter too big (%d)", counter));
+	error = SYSCTL_OUT(req, snws_array, sizeof(*snws_array) * counter);
+	free(snws_array, M_TEMP);
+	return (error);
+}
+
+SYSCTL_PROC(_net_isr, OID_AUTO, workstream,
+    CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_workstream,
+    "S,sysctl_netisr_workstream",
+    "Return list of workstreams implemented by netisr");
+
+/*
+ * Sysctl monitoring for netisr: query per-protocol data across all
+ * workstreams.
+ */
+static int
+sysctl_netisr_work(SYSCTL_HANDLER_ARGS)
+{
+	struct rm_priotracker tracker;
+	struct sysctl_netisr_work *snwp, *snw_array;
+	struct netisr_workstream *nwsp;
+	struct netisr_proto *npp;
+	struct netisr_work *nwp;
+	u_int counter, cpuid, proto;
+	int error;
+
+	if (req->newptr != NULL)
+		return (EINVAL);
+	snw_array = malloc(sizeof(*snw_array) * MAXCPU * NETISR_MAXPROT,
+	    M_TEMP, M_ZERO | M_WAITOK);
+	counter = 0;
+	NETISR_RLOCK(&tracker);
+	CPU_FOREACH(cpuid) {
+		nwsp = DPCPU_ID_PTR(cpuid, nws);
+		if (nwsp->nws_intr_event == NULL)
+			continue;
+		NWS_LOCK(nwsp);
+		for (proto = 0; proto < NETISR_MAXPROT; proto++) {
+			npp = &netisr_proto[proto];
+			if (npp->np_name == NULL)
+				continue;
+			nwp = &nwsp->nws_work[proto];
+			snwp = &snw_array[counter];
+			snwp->snw_version = sizeof(*snwp);
+			snwp->snw_wsid = cpuid;		/* See comment above. */
+			snwp->snw_proto = proto;
+			snwp->snw_len = nwp->nw_len;
+			snwp->snw_watermark = nwp->nw_watermark;
+			snwp->snw_dispatched = nwp->nw_dispatched;
+			snwp->snw_hybrid_dispatched =
+			    nwp->nw_hybrid_dispatched;
+			snwp->snw_qdrops = nwp->nw_qdrops;
+			snwp->snw_queued = nwp->nw_queued;
+			snwp->snw_handled = nwp->nw_handled;
+			counter++;
+		}
+		NWS_UNLOCK(nwsp);
+	}
+	KASSERT(counter <= MAXCPU * NETISR_MAXPROT,
+	    ("sysctl_netisr_work: counter too big (%d)", counter));
+	NETISR_RUNLOCK(&tracker);
+	error = SYSCTL_OUT(req, snw_array, sizeof(*snw_array) * counter);
+	free(snw_array, M_TEMP);
+	return (error);
+}
+
+SYSCTL_PROC(_net_isr, OID_AUTO, work,
+    CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_work,
+    "S,sysctl_netisr_work",
+    "Return list of per-workstream, per-protocol work in netisr");
+
 #ifdef DDB
 DB_SHOW_COMMAND(netisr, db_show_netisr)
 {
@@ -1181,15 +1283,13 @@ DB_SHOW_COMMAND(netisr, db_show_netisr)
 
 	db_printf("%3s %6s %5s %5s %5s %8s %8s %8s %8s\n", "CPU", "Proto",
 	    "Len", "WMark", "Max", "Disp", "HDisp", "Drop", "Queue");
-	for (cpuid = 0; cpuid <= mp_maxid; cpuid++) {
-		if (CPU_ABSENT(cpuid))
-			continue;
+	CPU_FOREACH(cpuid) {
 		nwsp = DPCPU_ID_PTR(cpuid, nws);
 		if (nwsp->nws_intr_event == NULL)
 			continue;
 		first = 1;
 		for (proto = 0; proto < NETISR_MAXPROT; proto++) {
-			if (np[proto].np_handler == NULL)
+			if (netisr_proto[proto].np_handler == NULL)
 				continue;
 			nwp = &nwsp->nws_work[proto];
 			if (first) {
@@ -1199,7 +1299,7 @@ DB_SHOW_COMMAND(netisr, db_show_netisr)
 				db_printf("%3s ", "");
 			db_printf(
 			    "%6s %5d %5d %5d %8ju %8ju %8ju %8ju\n",
-			    np[proto].np_name, nwp->nw_len,
+			    netisr_proto[proto].np_name, nwp->nw_len,
 			    nwp->nw_watermark, nwp->nw_qlimit,
 			    nwp->nw_dispatched, nwp->nw_hybrid_dispatched,
 			    nwp->nw_qdrops, nwp->nw_queued);
diff --git a/freebsd/sys/net/netisr.h b/freebsd/sys/net/netisr.h
index 72e7f17f..cd692f6d 100644
--- a/freebsd/sys/net/netisr.h
+++ b/freebsd/sys/net/netisr.h
@@ -1,7 +1,11 @@
 /*-
  * Copyright (c) 2007-2009 Robert N. M. Watson
+ * Copyright (c) 2010 Juniper Networks, Inc.
  * All rights reserved.
  *
+ * This software was developed by Robert N. M. Watson under contract
+ * to Juniper Networks, Inc.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -28,7 +32,6 @@
 
 #ifndef _NET_NETISR_H_
 #define _NET_NETISR_H_
-#ifdef _KERNEL
 
 /*
  * The netisr (network interrupt service routine) provides a deferred
@@ -39,6 +42,13 @@
  * Historically, this was implemented by the BSD software ISR facility; it is
  * now implemented via a software ithread (SWI).
  */
+
+/*
+ * Protocol numbers, which are encoded in monitoring applications and kernel
+ * modules.  Internally, these are used in bit shift operations so must have
+ * a value 0 < proto < 32; we currently further limit at compile-time to 16
+ * for array-sizing purposes.
+ */
 #define	NETISR_IP	1
 #define	NETISR_IGMP	2		/* IGMPv3 output queue */
 #define	NETISR_ROUTE	3		/* routing socket */
@@ -52,6 +62,78 @@
 #define	NETISR_NATM	11
 #define	NETISR_EPAIR	12		/* if_epair(4) */
 
+/*
+ * Protocol ordering and affinity policy constants.  See the detailed
+ * discussion of policies later in the file.
+ */
+#define	NETISR_POLICY_SOURCE	1	/* Maintain source ordering. */
+#define	NETISR_POLICY_FLOW	2	/* Maintain flow ordering. */
+#define	NETISR_POLICY_CPU	3	/* Protocol determines CPU placement. */
+
+/*
+ * Monitoring data structures, exported by sysctl(2).
+ *
+ * Three sysctls are defined.  First, a per-protocol structure exported by
+ * net.isr.proto.
+ */
+#define	NETISR_NAMEMAXLEN	32
+struct sysctl_netisr_proto {
+	u_int	snp_version;			/* Length of struct. */
+	char	snp_name[NETISR_NAMEMAXLEN];	/* nh_name */
+	u_int	snp_proto;			/* nh_proto */
+	u_int	snp_qlimit;			/* nh_qlimit */
+	u_int	snp_policy;			/* nh_policy */
+	u_int	snp_flags;			/* Various flags. */
+	u_int	_snp_ispare[7];
+};
+
+/*
+ * Flags for sysctl_netisr_proto.snp_flags.
+ */
+#define	NETISR_SNP_FLAGS_M2FLOW		0x00000001	/* nh_m2flow */
+#define	NETISR_SNP_FLAGS_M2CPUID	0x00000002	/* nh_m2cpuid */
+#define	NETISR_SNP_FLAGS_DRAINEDCPU	0x00000004	/* nh_drainedcpu */
+
+/*
+ * Next, a structure per-workstream, with per-protocol data, exported as
+ * net.isr.workstream.
+ */
+struct sysctl_netisr_workstream {
+	u_int	snws_version;			/* Length of struct. */
+	u_int	snws_flags;			/* Various flags. */
+	u_int	snws_wsid;			/* Workstream ID. */
+	u_int	snws_cpu;			/* nws_cpu */
+	u_int	_snws_ispare[12];
+};
+
+/*
+ * Flags for sysctl_netisr_workstream.snws_flags
+ */
+#define	NETISR_SNWS_FLAGS_INTR		0x00000001	/* nws_intr_event */
+
+/*
+ * Finally, a per-workstream-per-protocol structure, exported as
+ * net.isr.work.
+ */
+struct sysctl_netisr_work {
+	u_int	snw_version;			/* Length of struct. */
+	u_int	snw_wsid;			/* Workstream ID. */
+	u_int	snw_proto;			/* Protocol number. */
+	u_int	snw_len;			/* nw_len */
+	u_int	snw_watermark;			/* nw_watermark */
+	u_int	_snw_ispare[3];
+
+	uint64_t	snw_dispatched;		/* nw_dispatched */
+	uint64_t	snw_hybrid_dispatched;	/* nw_hybrid_dispatched */
+	uint64_t	snw_qdrops;		/* nw_qdrops */
+	uint64_t	snw_queued;		/* nw_queued */
+	uint64_t	snw_handled;		/* nw_handled */
+
+	uint64_t	_snw_llspare[7];
+};
+
+#ifdef _KERNEL
+
 /*-
  * Protocols express ordering constraints and affinity preferences by
  * implementing one or neither of nh_m2flow and nh_m2cpuid, which are used by
@@ -91,10 +173,6 @@ typedef struct mbuf	*netisr_m2cpuid_t(struct mbuf *m, uintptr_t source,
 typedef	struct mbuf	*netisr_m2flow_t(struct mbuf *m, uintptr_t source);
 typedef void		 netisr_drainedcpu_t(u_int cpuid);
 
-#define	NETISR_POLICY_SOURCE	1	/* Maintain source ordering. */
-#define	NETISR_POLICY_FLOW	2	/* Maintain flow ordering. */
-#define	NETISR_POLICY_CPU	3	/* Protocol determines CPU placement. */
-
 /*
  * Data structure describing a protocol handler.
  */
diff --git a/freebsd/sys/net/netisr_internal.h b/freebsd/sys/net/netisr_internal.h
new file mode 100644
index 00000000..40afaf16
--- /dev/null
+++ b/freebsd/sys/net/netisr_internal.h
@@ -0,0 +1,127 @@
+/*-
+ * Copyright (c) 2007-2009 Robert N. M. Watson
+ * Copyright (c) 2010 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * This software was developed by Robert N. M. Watson under contract
+ * to Juniper Networks, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NET_NETISR_INTERNAL_H_
+#define	_NET_NETISR_INTERNAL_H_
+
+#ifndef _WANT_NETISR_INTERNAL
+#error "no user-serviceable parts inside"
+#endif
+
+/*
+ * These definitions are private to the netisr implementation, but provided
+ * here for use by post-mortem crashdump analysis tools.  They should not be
+ * used in any other context as they can and will change.  Public definitions
+ * may be found in netisr.h.
+ */
+
+#ifndef _KERNEL
+typedef void *netisr_handler_t;
+typedef void *netisr_m2flow_t;
+typedef void *netisr_m2cpuid_t;
+typedef void *netisr_drainedcpu_t;
+#endif
+
+/*
+ * Each protocol is described by a struct netisr_proto, which holds all
+ * global per-protocol information.  This data structure is set up by
+ * netisr_register(), and derived from the public struct netisr_handler.
+ */
+struct netisr_proto {
+	const char	*np_name;	/* Character string protocol name. */
+	netisr_handler_t *np_handler;	/* Protocol handler. */
+	netisr_m2flow_t	*np_m2flow;	/* Query flow for untagged packet. */
+	netisr_m2cpuid_t *np_m2cpuid;	/* Query CPU to process packet on. */
+	netisr_drainedcpu_t *np_drainedcpu; /* Callback when drained a queue. */
+	u_int		 np_qlimit;	/* Maximum per-CPU queue depth. */
+	u_int		 np_policy;	/* Work placement policy. */
+};
+
+#define	NETISR_MAXPROT	16		/* Compile-time limit. */
+
+/*
+ * Protocol-specific work for each workstream is described by struct
+ * netisr_work.  Each work descriptor consists of an mbuf queue and
+ * statistics.
+ */
+struct netisr_work {
+	/*
+	 * Packet queue, linked by m_nextpkt.
+	 */
+	struct mbuf	*nw_head;
+	struct mbuf	*nw_tail;
+	u_int		 nw_len;
+	u_int		 nw_qlimit;
+	u_int		 nw_watermark;
+
+	/*
+	 * Statistics -- written unlocked, but mostly from curcpu.
+	 */
+	u_int64_t	 nw_dispatched; /* Number of direct dispatches. */
+	u_int64_t	 nw_hybrid_dispatched; /* "" hybrid dispatches. */
+	u_int64_t	 nw_qdrops;	/* "" drops. */
+	u_int64_t	 nw_queued;	/* "" enqueues. */
+	u_int64_t	 nw_handled;	/* "" handled in worker. */
+};
+
+/*
+ * Workstreams hold a queue of ordered work across each protocol, and are
+ * described by netisr_workstream.  Each workstream is associated with a
+ * worker thread, which in turn is pinned to a CPU.  Work associated with a
+ * workstream can be processd in other threads during direct dispatch;
+ * concurrent processing is prevented by the NWS_RUNNING flag, which
+ * indicates that a thread is already processing the work queue.  It is
+ * important to prevent a directly dispatched packet from "skipping ahead" of
+ * work already in the workstream queue.
+ */
+struct netisr_workstream {
+	struct intr_event *nws_intr_event;	/* Handler for stream. */
+	void		*nws_swi_cookie;	/* swi(9) cookie for stream. */
+	struct mtx	 nws_mtx;		/* Synchronize work. */
+	u_int		 nws_cpu;		/* CPU pinning. */
+	u_int		 nws_flags;		/* Wakeup flags. */
+	u_int		 nws_pendingbits;	/* Scheduled protocols. */
+
+	/*
+	 * Each protocol has per-workstream data.
+	 */
+	struct netisr_work	nws_work[NETISR_MAXPROT];
+} __aligned(CACHE_LINE_SIZE);
+
+/*
+ * Per-workstream flags.
+ */
+#define	NWS_RUNNING	0x00000001	/* Currently running in a thread. */
+#define	NWS_DISPATCHING	0x00000002	/* Currently being direct-dispatched. */
+#define	NWS_SCHEDULED	0x00000004	/* Signal issued. */
+
+#endif /* !_NET_NETISR_INTERNAL_H_ */
diff --git a/freebsd/sys/net/radix_mpath.c b/freebsd/sys/net/radix_mpath.c
index bb7b6fd4..6a3e3ef7 100644
--- a/freebsd/sys/net/radix_mpath.c
+++ b/freebsd/sys/net/radix_mpath.c
@@ -102,10 +102,7 @@ rt_mpath_matchgate(struct rtentry *rt, struct sockaddr *gate)
 {
 	struct radix_node *rn;
 
-	if (!rn_mpath_next((struct radix_node *)rt))
-		return rt;
-
-	if (!gate)
+	if (!gate || !rt->rt_gateway)
 		return NULL;
 
 	/* beyond here, we use rn as the master copy */
diff --git a/freebsd/sys/net/raw_cb.h b/freebsd/sys/net/raw_cb.h
index 35b546c5..1b347e02 100644
--- a/freebsd/sys/net/raw_cb.h
+++ b/freebsd/sys/net/raw_cb.h
@@ -70,9 +70,14 @@ pr_init_t	raw_init;
  * Library routines for raw socket usrreq functions; will always be wrapped
  * so that protocol-specific functions can be handled.
  */
+typedef int (*raw_input_cb_fn)(struct mbuf *, struct sockproto *,
+    struct sockaddr *, struct rawcb *);
+
 int	 raw_attach(struct socket *, int);
 void	 raw_detach(struct rawcb *);
 void	 raw_input(struct mbuf *, struct sockproto *, struct sockaddr *);
+void	 raw_input_ext(struct mbuf *, struct sockproto *, struct sockaddr *,
+	    raw_input_cb_fn);
 
 /*
  * Generic pr_usrreqs entries for raw socket protocols, usually wrapped so
diff --git a/freebsd/sys/net/raw_usrreq.c b/freebsd/sys/net/raw_usrreq.c
index 0723799f..0d7973e9 100644
--- a/freebsd/sys/net/raw_usrreq.c
+++ b/freebsd/sys/net/raw_usrreq.c
@@ -73,6 +73,14 @@ raw_init(void)
 void
 raw_input(struct mbuf *m0, struct sockproto *proto, struct sockaddr *src)
 {
+
+	return (raw_input_ext(m0, proto, src, NULL));
+}
+
+void
+raw_input_ext(struct mbuf *m0, struct sockproto *proto, struct sockaddr *src,
+    raw_input_cb_fn cb)
+{
 	struct rawcb *rp;
 	struct mbuf *m = m0;
 	struct socket *last;
@@ -85,6 +93,8 @@ raw_input(struct mbuf *m0, struct sockproto *proto, struct sockaddr *src)
 		if (rp->rcb_proto.sp_protocol  &&
 		    rp->rcb_proto.sp_protocol != proto->sp_protocol)
 			continue;
+		if (cb != NULL && (*cb)(m, proto, src, rp) != 0)
+			continue;
 		if (last) {
 			struct mbuf *n;
 			n = m_copy(m, 0, (int)M_COPYALL);
diff --git a/freebsd/sys/net/route.c b/freebsd/sys/net/route.c
index 5827cc00..3821c208 100644
--- a/freebsd/sys/net/route.c
+++ b/freebsd/sys/net/route.c
@@ -37,6 +37,7 @@
  ***********************************************************************/
 
 #include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
 #include <rtems/bsd/local/opt_route.h>
 #include <rtems/bsd/local/opt_mrouting.h>
 #include <rtems/bsd/local/opt_mpath.h>
@@ -69,12 +70,34 @@
 
 #include <vm/uma.h>
 
+/* We use 4 bits in the mbuf flags, thus we are limited to 16 FIBS. */
+#define	RT_MAXFIBS	16
+
+/* Kernel config default option. */
+#ifdef ROUTETABLES
+#if ROUTETABLES <= 0
+#error "ROUTETABLES defined too low"
+#endif
+#if ROUTETABLES > RT_MAXFIBS
+#error "ROUTETABLES defined too big"
+#endif
+#define	RT_NUMFIBS	ROUTETABLES
+#endif /* ROUTETABLES */
+/* Initialize to default if not otherwise set. */
+#ifndef	RT_NUMFIBS
+#define	RT_NUMFIBS	1
+#endif
+
 u_int rt_numfibs = RT_NUMFIBS;
-SYSCTL_INT(_net, OID_AUTO, fibs, CTLFLAG_RD, &rt_numfibs, 0, "");
+SYSCTL_UINT(_net, OID_AUTO, fibs, CTLFLAG_RD, &rt_numfibs, 0, "");
 /*
  * Allow the boot code to allow LESS than RT_MAXFIBS to be used.
  * We can't do more because storage is statically allocated for now.
- * (for compatibility reasons.. this will change).
+ * (for compatibility reasons.. this will change. When this changes, code should
+ * be refactored to protocol independent parts and protocol dependent parts,
+ * probably hanging of domain(9) specific storage to not need the full
+ * fib * af RNH allocation etc. but allow tuning the number of tables per
+ * address family).
  */
 TUNABLE_INT("net.fibs", &rt_numfibs);
 
@@ -84,9 +107,12 @@ TUNABLE_INT("net.fibs", &rt_numfibs);
  * changes for the FIB of the caller when adding a new set of addresses
  * to an interface.  XXX this is a shotgun aproach to a problem that needs
  * a more fine grained solution.. that will come.
+ * XXX also has the problems getting the FIB from curthread which will not
+ * always work given the fib can be overridden and prefixes can be added
+ * from the network stack context.
  */
 u_int rt_add_addr_allfibs = 1;
-SYSCTL_INT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RW,
+SYSCTL_UINT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RW,
     &rt_add_addr_allfibs, 0, "");
 TUNABLE_INT("net.add_addr_allfibs", &rt_add_addr_allfibs);
 
@@ -118,12 +144,6 @@ VNET_DEFINE(int, rttrash);		/* routes not in table but not freed */
 static VNET_DEFINE(uma_zone_t, rtzone);		/* Routing table UMA zone. */
 #define	V_rtzone	VNET(rtzone)
 
-#if 0
-/* default fib for tunnels to use */
-u_int tunnel_fib = 0;
-SYSCTL_INT(_net, OID_AUTO, tunnelfib, CTLFLAG_RD, &tunnel_fib, 0, "");
-#endif
-
 #ifndef __rtems__
 /*
  * handler for net.my_fibnum
@@ -206,27 +226,23 @@ vnet_route_init(const void *unused __unused)
 	V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, 0);
 	for (dom = domains; dom; dom = dom->dom_next) {
-		if (dom->dom_rtattach)  {
-			for  (table = 0; table < rt_numfibs; table++) {
-				if ( (fam = dom->dom_family) == AF_INET ||
-				    table == 0) {
- 			        	/* for now only AF_INET has > 1 table */
-					/* XXX MRT
-					 * rtattach will be also called
-					 * from vfs_export.c but the
-					 * offset will be 0
-					 * (only for AF_INET and AF_INET6
-					 * which don't need it anyhow)
-					 */
-					rnh = rt_tables_get_rnh_ptr(table, fam);
-					if (rnh == NULL)
-						panic("%s: rnh NULL", __func__);
-					dom->dom_rtattach((void **)rnh,
-				    	    dom->dom_rtoffset);
-				} else {
-					break;
-				}
-			}
+		if (dom->dom_rtattach == NULL)
+			continue;
+
+		for  (table = 0; table < rt_numfibs; table++) {
+			fam = dom->dom_family;
+			if (table != 0 && fam != AF_INET6 && fam != AF_INET)
+				break;
+
+			/*
+			 * XXX MRT rtattach will be also called from
+			 * vfs_export.c but the offset will be 0 (only for
+			 * AF_INET and AF_INET6 which don't need it anyhow).
+			 */
+			rnh = rt_tables_get_rnh_ptr(table, fam);
+			if (rnh == NULL)
+				panic("%s: rnh NULL", __func__);
+			dom->dom_rtattach((void **)rnh, dom->dom_rtoffset);
 		}
 	}
 }
@@ -243,20 +259,19 @@ vnet_route_uninit(const void *unused __unused)
 	struct radix_node_head **rnh;
 
 	for (dom = domains; dom; dom = dom->dom_next) {
-		if (dom->dom_rtdetach) {
-			for (table = 0; table < rt_numfibs; table++) {
-				if ( (fam = dom->dom_family) == AF_INET ||
-				    table == 0) {
-					/* For now only AF_INET has > 1 tbl. */
-					rnh = rt_tables_get_rnh_ptr(table, fam);
-					if (rnh == NULL)
-						panic("%s: rnh NULL", __func__);
-					dom->dom_rtdetach((void **)rnh,
-					    dom->dom_rtoffset);
-				} else {
-					break;
-				}
-			}
+		if (dom->dom_rtdetach == NULL)
+			continue;
+
+		for (table = 0; table < rt_numfibs; table++) {
+			fam = dom->dom_family;
+
+			if (table != 0 && fam != AF_INET6 && fam != AF_INET)
+				break;
+
+			rnh = rt_tables_get_rnh_ptr(table, fam);
+			if (rnh == NULL)
+				panic("%s: rnh NULL", __func__);
+			dom->dom_rtdetach((void **)rnh, dom->dom_rtoffset);
 		}
 	}
 }
@@ -286,7 +301,8 @@ setfib(struct thread *td, struct setfib_args *uap)
 void
 rtalloc(struct route *ro)
 {
-	rtalloc_ign_fib(ro, 0UL, 0);
+
+	rtalloc_ign_fib(ro, 0UL, RT_DEFAULT_FIB);
 }
 
 void
@@ -306,7 +322,7 @@ rtalloc_ign(struct route *ro, u_long ignore)
 		RTFREE(rt);
 		ro->ro_rt = NULL;
 	}
-	ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, 0);
+	ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, RT_DEFAULT_FIB);
 	if (ro->ro_rt)
 		RT_UNLOCK(ro->ro_rt);
 }
@@ -336,7 +352,8 @@ rtalloc_ign_fib(struct route *ro, u_long ignore, u_int fibnum)
 struct rtentry *
 rtalloc1(struct sockaddr *dst, int report, u_long ignflags)
 {
-	return (rtalloc1_fib(dst, report, ignflags, 0));
+
+	return (rtalloc1_fib(dst, report, ignflags, RT_DEFAULT_FIB));
 }
 
 struct rtentry *
@@ -344,7 +361,6 @@ rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags,
 		    u_int fibnum)
 {
 	struct radix_node_head *rnh;
-	struct rtentry *rt;
 	struct radix_node *rn;
 	struct rtentry *newrt;
 	struct rt_addrinfo info;
@@ -352,17 +368,23 @@ rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags,
 	int needlock;
 
 	KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum"));
-	if (dst->sa_family != AF_INET)	/* Only INET supports > 1 fib now */
-		fibnum = 0;
+	switch (dst->sa_family) {
+	case AF_INET6:
+	case AF_INET:
+		/* We support multiple FIBs. */
+		break;
+	default:
+		fibnum = RT_DEFAULT_FIB;
+		break;
+	}
 	rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
 	newrt = NULL;
+	if (rnh == NULL)
+		goto miss;
+
 	/*
 	 * Look up the address in the table for that Address Family
 	 */
-	if (rnh == NULL) {
-		V_rtstat.rts_unreach++;
-		goto miss;
-	}
 	needlock = !(ignflags & RTF_RNH_LOCKED);
 	if (needlock)
 		RADIX_NODE_HEAD_RLOCK(rnh);
@@ -372,7 +394,7 @@ rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags,
 #endif
 	rn = rnh->rnh_matchaddr(dst, rnh);
 	if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) {
-		newrt = rt = RNTORT(rn);
+		newrt = RNTORT(rn);
 		RT_LOCK(newrt);
 		RT_ADDREF(newrt);
 		if (needlock)
@@ -387,8 +409,9 @@ rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags,
 	 * Which basically means
 	 * "caint get there frm here"
 	 */
-	V_rtstat.rts_unreach++;
 miss:
+	V_rtstat.rts_unreach++;
+
 	if (report) {
 		/*
 		 * If required, report the failure to the supervising
@@ -397,8 +420,8 @@ miss:
 		 */
 		bzero(&info, sizeof(info));
 		info.rti_info[RTAX_DST] = dst;
-		rt_missmsg(msgtype, &info, 0, err);
-	}
+		rt_missmsg_fib(msgtype, &info, 0, err, fibnum);
+	}	
 done:
 	if (newrt)
 		RT_LOCK_ASSERT(newrt);
@@ -499,7 +522,8 @@ rtredirect(struct sockaddr *dst,
 	int flags,
 	struct sockaddr *src)
 {
-	rtredirect_fib(dst, gateway, netmask, flags, src, 0);
+
+	rtredirect_fib(dst, gateway, netmask, flags, src, RT_DEFAULT_FIB);
 }
 
 void
@@ -545,7 +569,7 @@ rtredirect_fib(struct sockaddr *dst,
 		goto done;
 	/*
 	 * Create a new entry if we just got back a wildcard entry
-	 * or the the lookup failed.  This is necessary for hosts
+	 * or the lookup failed.  This is necessary for hosts
 	 * which use routing redirects generated by smart gateways
 	 * to dynamically build the routing tables.
 	 */
@@ -622,7 +646,7 @@ out:
 	info.rti_info[RTAX_GATEWAY] = gateway;
 	info.rti_info[RTAX_NETMASK] = netmask;
 	info.rti_info[RTAX_AUTHOR] = src;
-	rt_missmsg(RTM_REDIRECT, &info, flags, error);
+	rt_missmsg_fib(RTM_REDIRECT, &info, flags, error, fibnum);
 	if (ifa != NULL)
 		ifa_free(ifa);
 }
@@ -630,7 +654,8 @@ out:
 int
 rtioctl(u_long req, caddr_t data)
 {
-	return (rtioctl_fib(req, data, 0));
+
+	return (rtioctl_fib(req, data, RT_DEFAULT_FIB));
 }
 
 /*
@@ -660,7 +685,8 @@ rtioctl_fib(u_long req, caddr_t data, u_int fibnum)
 struct ifaddr *
 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway)
 {
-	return (ifa_ifwithroute_fib(flags, dst, gateway, 0));
+
+	return (ifa_ifwithroute_fib(flags, dst, gateway, RT_DEFAULT_FIB));
 }
 
 struct ifaddr *
@@ -745,7 +771,9 @@ rtrequest(int req,
 	int flags,
 	struct rtentry **ret_nrt)
 {
-	return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt, 0));
+
+	return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt,
+	    RT_DEFAULT_FIB));
 }
 
 int
@@ -784,7 +812,8 @@ rtrequest_fib(int req,
 int
 rt_getifa(struct rt_addrinfo *info)
 {
-	return (rt_getifa_fib(info, 0));
+
+	return (rt_getifa_fib(info, RT_DEFAULT_FIB));
 }
 
 /*
@@ -1038,11 +1067,20 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
 	register struct radix_node_head *rnh;
 	struct ifaddr *ifa;
 	struct sockaddr *ndst;
+	struct sockaddr_storage mdst;
 #define senderr(x) { error = x ; goto bad; }
 
 	KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum"));
-	if (dst->sa_family != AF_INET)	/* Only INET supports > 1 fib now */
-		fibnum = 0;
+	switch (dst->sa_family) {
+	case AF_INET6:
+	case AF_INET:
+		/* We support multiple FIBs. */
+		break;
+	default:
+		fibnum = RT_DEFAULT_FIB;
+		break;
+	}
+
 	/*
 	 * Find the correct routing tree to use for this Address Family
 	 */
@@ -1064,6 +1102,10 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
 
 	switch (req) {
 	case RTM_DELETE:
+		if (netmask) {
+			rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask);
+			dst = (struct sockaddr *)&mdst;
+		}
 #ifdef RADIX_MPATH
 		if (rn_mpath_capable(rnh)) {
 			error = rn_mpath_update(req, info, rnh, ret_nrt);
@@ -1144,8 +1186,7 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
 		rt->rt_flags = RTF_UP | flags;
 		rt->rt_fibnum = fibnum;
 		/*
-		 * Add the gateway. Possibly re-malloc-ing the storage for it
-		 *
+		 * Add the gateway. Possibly re-malloc-ing the storage for it.
 		 */
 		RT_LOCK(rt);
 		if ((error = rt_setgate(rt, dst, gateway)) != 0) {
@@ -1194,11 +1235,17 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
 
 #ifdef FLOWTABLE
 		rt0 = NULL;
-		/* XXX
-		 * "flow-table" only support IPv4 at the moment.
-		 */
+		/* "flow-table" only supports IPv6 and IPv4 at the moment. */
+		switch (dst->sa_family) {
+#ifdef notyet
+#ifdef INET6
+		case AF_INET6:
+#endif
+#endif
 #ifdef INET
-		if (dst->sa_family == AF_INET) {
+		case AF_INET:
+#endif
+#if defined(INET6) || defined(INET)
 			rn = rnh->rnh_matchaddr(dst, rnh);
 			if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) {
 				struct sockaddr *mask;
@@ -1237,9 +1284,9 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
 					}
 				}
 			}
+#endif/* INET6 || INET */
 		}
-#endif
-#endif
+#endif /* FLOWTABLE */
 
 		/* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
 		rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes);
@@ -1261,9 +1308,20 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
 		}
 #ifdef FLOWTABLE
 		else if (rt0 != NULL) {
+			switch (dst->sa_family) {
+#ifdef notyet
+#ifdef INET6
+			case AF_INET6:
+				flowtable_route_flush(V_ip6_ft, rt0);
+				break;
+#endif
+#endif
 #ifdef INET
-			flowtable_route_flush(V_ip_ft, rt0);
+			case AF_INET:
+				flowtable_route_flush(V_ip_ft, rt0);
+				break;
 #endif
+			}
 			RTFREE(rt0);
 		}
 #endif
@@ -1395,8 +1453,17 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
 		dst = ifa->ifa_addr;
 		netmask = ifa->ifa_netmask;
 	}
-	if ( dst->sa_family != AF_INET)
-		fibnum = 0;
+	if (dst->sa_len == 0)
+		return(EINVAL);
+	switch (dst->sa_family) {
+	case AF_INET6:
+	case AF_INET:
+		/* We support multiple FIBs. */
+		break;
+	default:
+		fibnum = RT_DEFAULT_FIB;
+		break;
+	}
 	if (fibnum == -1) {
 		if (rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD) {
 #ifndef __rtems__
@@ -1413,8 +1480,6 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
 		startfib = fibnum;
 		endfib = fibnum;
 	}
-	if (dst->sa_len == 0)
-		return(EINVAL);
 
 	/*
 	 * If it's a delete, check that if it exists,
@@ -1438,9 +1503,7 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
 	 * Now go through all the requested tables (fibs) and do the
 	 * requested action. Realistically, this will either be fib 0
 	 * for protocols that don't do multiple tables or all the
-	 * tables for those that do. XXX For this version only AF_INET.
-	 * When that changes code should be refactored to protocol
-	 * independent parts and protocol dependent parts.
+	 * tables for those that do.
 	 */
 	for ( fibnum = startfib; fibnum <= endfib; fibnum++) {
 		if (cmd == RTM_DELETE) {
@@ -1494,7 +1557,7 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
 		 */
 		bzero((caddr_t)&info, sizeof(info));
 		info.rti_ifa = ifa;
-		info.rti_flags = flags | ifa->ifa_flags;
+		info.rti_flags = flags | (ifa->ifa_flags & ~IFA_RTSELF);
 		info.rti_info[RTAX_DST] = dst;
 		/*
 		 * doing this for compatibility reasons
@@ -1514,10 +1577,10 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
 #ifdef RADIX_MPATH
 			/*
 			 * in case address alias finds the first address
-			 * e.g. ifconfig bge0 192.103.54.246/24
-			 * e.g. ifconfig bge0 192.103.54.247/24
-			 * the address set in the route is 192.103.54.246
-			 * so we need to replace it with 192.103.54.247
+			 * e.g. ifconfig bge0 192.0.2.246/24
+			 * e.g. ifconfig bge0 192.0.2.247/24
+			 * the address set in the route is 192.0.2.246
+			 * so we need to replace it with 192.0.2.247
 			 */
 			if (memcmp(rt->rt_ifa->ifa_addr,
 			    ifa->ifa_addr, ifa->ifa_addr->sa_len)) {
@@ -1538,7 +1601,7 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
 			}
 			RT_ADDREF(rt);
 			RT_UNLOCK(rt);
-			rt_newaddrmsg(cmd, ifa, error, rt);
+			rt_newaddrmsg_fib(cmd, ifa, error, rt, fibnum);
 			RT_LOCK(rt);
 			RT_REMREF(rt);
 			if (cmd == RTM_DELETE) {
@@ -1580,12 +1643,14 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
 	return (error);
 }
 
+#ifndef BURN_BRIDGES
 /* special one for inet internal use. may not use. */
 int
 rtinit_fib(struct ifaddr *ifa, int cmd, int flags)
 {
 	return (rtinit1(ifa, cmd, flags, -1));
 }
+#endif
 
 /*
  * Set up a routing table entry, normally
@@ -1595,7 +1660,7 @@ int
 rtinit(struct ifaddr *ifa, int cmd, int flags)
 {
 	struct sockaddr *dst;
-	int fib = 0;
+	int fib = RT_DEFAULT_FIB;
 
 	if (flags & RTF_HOST) {
 		dst = ifa->ifa_dstaddr;
@@ -1603,7 +1668,12 @@ rtinit(struct ifaddr *ifa, int cmd, int flags)
 		dst = ifa->ifa_addr;
 	}
 
-	if (dst->sa_family == AF_INET)
+	switch (dst->sa_family) {
+	case AF_INET6:
+	case AF_INET:
+		/* We do support multiple FIBs. */
 		fib = -1;
+		break;
+	}
 	return (rtinit1(ifa, cmd, flags, fib));
 }
diff --git a/freebsd/sys/net/route.h b/freebsd/sys/net/route.h
index 4014b3f6..b26ac441 100644
--- a/freebsd/sys/net/route.h
+++ b/freebsd/sys/net/route.h
@@ -86,30 +86,8 @@ struct rt_metrics {
 #define	RTM_RTTUNIT	1000000	/* units for rtt, rttvar, as units per sec */
 #define	RTTTOPRHZ(r)	((r) / (RTM_RTTUNIT / PR_SLOWHZ))
 
-/* MRT compile-time constants */
-#ifdef _KERNEL
- #ifndef ROUTETABLES
-  #define RT_NUMFIBS 1
-  #define RT_MAXFIBS 1
- #else
-  /* while we use 4 bits in the mbuf flags, we are limited to 16 */
-  #define RT_MAXFIBS 16
-  #if ROUTETABLES > RT_MAXFIBS
-   #define RT_NUMFIBS RT_MAXFIBS
-   #error "ROUTETABLES defined too big"
-  #else
-   #if ROUTETABLES == 0
-    #define RT_NUMFIBS 1
-   #else
-    #define RT_NUMFIBS ROUTETABLES
-   #endif
-  #endif
- #endif
-#endif
-
+#define	RT_DEFAULT_FIB	0	/* Explicitly mark fib=0 restricted cases */
 extern u_int rt_numfibs;	/* number fo usable routing tables */
-extern u_int tunnel_fib;	/* tunnels use these */
-extern u_int fwd_fib;		/* packets being forwarded use these routes */
 /*
  * XXX kernel function pointer `rt_output' is visible to applications.
  */
@@ -325,7 +303,6 @@ struct rt_addrinfo {
 #define	RT_LOCK_INIT(_rt) \
 	mtx_init(&(_rt)->rt_mtx, "rtentry", NULL, MTX_DEF | MTX_DUPOK)
 #define	RT_LOCK(_rt)		mtx_lock(&(_rt)->rt_mtx)
-#define	RT_TRYLOCK(_rt)		mtx_trylock(&(_rt)->rt_mtx)
 #define	RT_UNLOCK(_rt)		mtx_unlock(&(_rt)->rt_mtx)
 #define	RT_LOCK_DESTROY(_rt)	mtx_destroy(&(_rt)->rt_mtx)
 #define	RT_LOCK_ASSERT(_rt)	mtx_assert(&(_rt)->rt_mtx, MA_OWNED)
@@ -360,22 +337,6 @@ struct rt_addrinfo {
 	RTFREE_LOCKED(_rt);					\
 } while (0)
 
-#define RT_TEMP_UNLOCK(_rt) do {				\
-	RT_ADDREF(_rt);						\
-	RT_UNLOCK(_rt);						\
-} while (0)
-
-#define RT_RELOCK(_rt) do {					\
-	RT_LOCK(_rt);						\
-	if ((_rt)->rt_refcnt <= 1) {				\
-		rtfree(_rt);					\
-		_rt = 0; /*  signal that it went away */	\
-	} else {						\
-		RT_REMREF(_rt);					\
-		/* note that _rt is still valid */		\
-	}							\
-} while (0)
-
 struct radix_node_head *rt_tables_get_rnh(int, int);
 
 struct ifmultiaddr;
@@ -384,7 +345,9 @@ void	 rt_ieee80211msg(struct ifnet *, int, void *, size_t);
 void	 rt_ifannouncemsg(struct ifnet *, int);
 void	 rt_ifmsg(struct ifnet *);
 void	 rt_missmsg(int, struct rt_addrinfo *, int, int);
+void	 rt_missmsg_fib(int, struct rt_addrinfo *, int, int, int);
 void	 rt_newaddrmsg(int, struct ifaddr *, int, struct rtentry *);
+void	 rt_newaddrmsg_fib(int, struct ifaddr *, int, struct rtentry *, int);
 void	 rt_newmaddrmsg(int, struct ifmultiaddr *);
 int	 rt_setgate(struct rtentry *, struct sockaddr *, struct sockaddr *);
 void 	 rt_maskedcopy(struct sockaddr *, struct sockaddr *, struct sockaddr *);
@@ -418,8 +381,10 @@ void	 rtredirect(struct sockaddr *, struct sockaddr *,
 int	 rtrequest(int, struct sockaddr *,
 	    struct sockaddr *, struct sockaddr *, int, struct rtentry **);
 
+#ifndef BURN_BRIDGES
 /* defaults to "all" FIBs */
 int	 rtinit_fib(struct ifaddr *, int, int);
+#endif
 
 /* XXX MRT NEW VERSIONS THAT USE FIBs
  * For now the protocol indepedent versions are the same as the AF_INET ones
diff --git a/freebsd/sys/net/rtsock.c b/freebsd/sys/net/rtsock.c
index bfdecf87..beca84da 100644
--- a/freebsd/sys/net/rtsock.c
+++ b/freebsd/sys/net/rtsock.c
@@ -116,7 +116,34 @@ struct if_msghdr32 {
 	uint16_t ifm_index;
 	struct	if_data32 ifm_data;
 };
-#endif
+
+struct if_msghdrl32 {
+	uint16_t ifm_msglen;
+	uint8_t	ifm_version;
+	uint8_t	ifm_type;
+	int32_t	ifm_addrs;
+	int32_t	ifm_flags;
+	uint16_t ifm_index;
+	uint16_t _ifm_spare1;
+	uint16_t ifm_len;
+	uint16_t ifm_data_off;
+	struct	if_data32 ifm_data;
+};
+
+struct ifa_msghdrl32 {
+	uint16_t ifam_msglen;
+	uint8_t	ifam_version;
+	uint8_t	ifam_type;
+	int32_t	ifam_addrs;
+	int32_t	ifam_flags;
+	uint16_t ifam_index;
+	uint16_t _ifam_spare1;
+	uint16_t ifam_len;
+	uint16_t ifam_data_off;
+	int32_t	ifam_metric;
+	struct	if_data32 ifam_data;
+};
+#endif /* COMPAT_FREEBSD32 */
 
 MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
 
@@ -124,6 +151,13 @@ MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
 static struct	sockaddr route_src = { 2, PF_ROUTE, };
 static struct	sockaddr sa_zero   = { sizeof(sa_zero), AF_INET, };
 
+/*
+ * Used by rtsock/raw_input callback code to decide whether to filter the update
+ * notification to a socket bound to a particular FIB.
+ */
+#define	RTS_FILTER_FIB	M_PROTO8
+#define	RTS_ALLFIBS	-1
+
 static struct {
 	int	ip_count;	/* attached w/ AF_INET */
 	int	ip6_count;	/* attached w/ AF_INET6 */
@@ -161,7 +195,7 @@ static void	rt_setmetrics(u_long which, const struct rt_metrics *in,
 			struct rt_metrics_lite *out);
 static void	rt_getmetrics(const struct rt_metrics_lite *in,
 			struct rt_metrics *out);
-static void	rt_dispatch(struct mbuf *, const struct sockaddr *);
+static void	rt_dispatch(struct mbuf *, sa_family_t);
 
 static struct netisr_handler rtsock_nh = {
 	.nh_name = "rtsock",
@@ -200,6 +234,31 @@ rts_init(void)
 }
 SYSINIT(rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rts_init, 0);
 
+static int
+raw_input_rts_cb(struct mbuf *m, struct sockproto *proto, struct sockaddr *src,
+    struct rawcb *rp)
+{
+	int fibnum;
+
+	KASSERT(m != NULL, ("%s: m is NULL", __func__));
+	KASSERT(proto != NULL, ("%s: proto is NULL", __func__));
+	KASSERT(rp != NULL, ("%s: rp is NULL", __func__));
+
+	/* No filtering requested. */
+	if ((m->m_flags & RTS_FILTER_FIB) == 0)
+		return (0);
+
+	/* Check if it is a rts and the fib matches the one of the socket. */
+	fibnum = M_GETFIB(m);
+	if (proto->sp_family != PF_ROUTE ||
+	    rp->rcb_socket == NULL ||
+	    rp->rcb_socket->so_fibnum == fibnum)
+		return (0);
+
+	/* Filtering requested and no match, the socket shall be skipped. */
+	return (1);
+}
+
 static void
 rts_input(struct mbuf *m)
 {
@@ -216,7 +275,7 @@ rts_input(struct mbuf *m)
 	} else
 		route_proto.sp_protocol = 0;
 
-	raw_input(m, &route_proto, &route_src);
+	raw_input_ext(m, &route_proto, &route_src, raw_input_rts_cb);
 }
 
 /*
@@ -428,7 +487,7 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
 		 * Try to find an address on the given outgoing interface
 		 * that belongs to the jail.
 		 */
-		IF_ADDR_LOCK(ifp);
+		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			struct sockaddr *sa;
 			sa = ifa->ifa_addr;
@@ -440,7 +499,7 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
 				break;
 			}
 		}
-		IF_ADDR_UNLOCK(ifp);
+		IF_ADDR_RUNLOCK(ifp);
 		if (!found) {
 			/*
 			 * As a last resort return the 'default' jail address.
@@ -470,7 +529,7 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
 		 * Try to find an address on the given outgoing interface
 		 * that belongs to the jail.
 		 */
-		IF_ADDR_LOCK(ifp);
+		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			struct sockaddr *sa;
 			sa = ifa->ifa_addr;
@@ -483,7 +542,7 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
 				break;
 			}
 		}
-		IF_ADDR_UNLOCK(ifp);
+		IF_ADDR_RUNLOCK(ifp);
 		if (!found) {
 			/*
 			 * As a last resort return the 'default' jail address.
@@ -521,6 +580,7 @@ route_output(struct mbuf *m, struct socket *so)
 	int len, error = 0;
 	struct ifnet *ifp = NULL;
 	union sockaddr_union saun;
+	sa_family_t saf = AF_UNSPEC;
 
 #define senderr(e) { error = e; goto flush;}
 	if (m == NULL || ((m->m_len < sizeof(long)) &&
@@ -561,6 +621,7 @@ route_output(struct mbuf *m, struct socket *so)
 	    (info.rti_info[RTAX_GATEWAY] != NULL &&
 	     info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX))
 		senderr(EINVAL);
+	saf = info.rti_info[RTAX_DST]->sa_family;
 	/*
 	 * Verify that the caller has the appropriate privilege; RTM_GET
 	 * is the only operation the non-superuser is allowed.
@@ -898,6 +959,8 @@ flush:
 		Free(rtm);
 	}
 	if (m) {
+		M_SETFIB(m, so->so_fibnum);
+		m->m_flags |= RTS_FILTER_FIB;
 		if (rp) {
 			/*
 			 * XXX insure we don't get a copy by
@@ -905,10 +968,10 @@ flush:
 			 */
 			unsigned short family = rp->rcb_proto.sp_family;
 			rp->rcb_proto.sp_family = 0;
-			rt_dispatch(m, info.rti_info[RTAX_DST]);
+			rt_dispatch(m, saf);
 			rp->rcb_proto.sp_family = family;
 		} else
-			rt_dispatch(m, info.rti_info[RTAX_DST]);
+			rt_dispatch(m, saf);
 	}
     }
 	return (error);
@@ -984,6 +1047,9 @@ rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
 	return (0);
 }
 
+/*
+ * Used by the routing socket.
+ */
 static struct mbuf *
 rt_msg1(int type, struct rt_addrinfo *rtinfo)
 {
@@ -1051,6 +1117,9 @@ rt_msg1(int type, struct rt_addrinfo *rtinfo)
 	return (m);
 }
 
+/*
+ * Used by the sysctl code and routing socket.
+ */
 static int
 rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w)
 {
@@ -1064,17 +1133,31 @@ again:
 
 	case RTM_DELADDR:
 	case RTM_NEWADDR:
-		len = sizeof(struct ifa_msghdr);
+		if (w != NULL && w->w_op == NET_RT_IFLISTL) {
+#ifdef COMPAT_FREEBSD32
+			if (w->w_req->flags & SCTL_MASK32)
+				len = sizeof(struct ifa_msghdrl32);
+			else
+#endif
+				len = sizeof(struct ifa_msghdrl);
+		} else
+			len = sizeof(struct ifa_msghdr);
 		break;
 
 	case RTM_IFINFO:
 #ifdef COMPAT_FREEBSD32
 		if (w != NULL && w->w_req->flags & SCTL_MASK32) {
-			len = sizeof(struct if_msghdr32);
+			if (w->w_op == NET_RT_IFLISTL)
+				len = sizeof(struct if_msghdrl32);
+			else
+				len = sizeof(struct if_msghdr32);
 			break;
 		}
 #endif
-		len = sizeof(struct if_msghdr);
+		if (w != NULL && w->w_op == NET_RT_IFLISTL)
+			len = sizeof(struct if_msghdrl);
+		else
+			len = sizeof(struct if_msghdr);
 		break;
 
 	case RTM_NEWMADDR:
@@ -1137,7 +1220,8 @@ again:
  * destination.
  */
 void
-rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
+rt_missmsg_fib(int type, struct rt_addrinfo *rtinfo, int flags, int error,
+    int fibnum)
 {
 	struct rt_msghdr *rtm;
 	struct mbuf *m;
@@ -1148,11 +1232,26 @@ rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
 	m = rt_msg1(type, rtinfo);
 	if (m == NULL)
 		return;
+
+	if (fibnum != RTS_ALLFIBS) {
+		KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: fibnum out "
+		    "of range 0 <= %d < %d", __func__, fibnum, rt_numfibs));
+		M_SETFIB(m, fibnum);
+		m->m_flags |= RTS_FILTER_FIB;
+	}
+
 	rtm = mtod(m, struct rt_msghdr *);
 	rtm->rtm_flags = RTF_DONE | flags;
 	rtm->rtm_errno = error;
 	rtm->rtm_addrs = rtinfo->rti_addrs;
-	rt_dispatch(m, sa);
+	rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
+}
+
+void
+rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
+{
+
+	rt_missmsg_fib(type, rtinfo, flags, error, RTS_ALLFIBS);
 }
 
 /*
@@ -1177,7 +1276,7 @@ rt_ifmsg(struct ifnet *ifp)
 	ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
 	ifm->ifm_data = ifp->if_data;
 	ifm->ifm_addrs = 0;
-	rt_dispatch(m, NULL);
+	rt_dispatch(m, AF_UNSPEC);
 }
 
 /*
@@ -1189,7 +1288,8 @@ rt_ifmsg(struct ifnet *ifp)
  * copies of it.
  */
 void
-rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
+rt_newaddrmsg_fib(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt,
+    int fibnum)
 {
 	struct rt_addrinfo info;
 	struct sockaddr *sa = NULL;
@@ -1247,10 +1347,24 @@ rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
 			rtm->rtm_errno = error;
 			rtm->rtm_addrs = info.rti_addrs;
 		}
-		rt_dispatch(m, sa);
+		if (fibnum != RTS_ALLFIBS) {
+			KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: "
+			    "fibnum out of range 0 <= %d < %d", __func__,
+			     fibnum, rt_numfibs));
+			M_SETFIB(m, fibnum);
+			m->m_flags |= RTS_FILTER_FIB;
+		}
+		rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
 	}
 }
 
+void
+rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
+{
+
+	rt_newaddrmsg_fib(cmd, ifa, error, rt, RTS_ALLFIBS);
+}
+
 /*
  * This is the analogue to the rt_newaddrmsg which performs the same
  * function but for multicast group memberhips.  This is easier since
@@ -1283,7 +1397,7 @@ rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
 	    __func__));
 	ifmam->ifmam_index = ifp->if_index;
 	ifmam->ifmam_addrs = info.rti_addrs;
-	rt_dispatch(m, ifma->ifma_addr);
+	rt_dispatch(m, ifma->ifma_addr ? ifma->ifma_addr->sa_family : AF_UNSPEC);
 }
 
 static struct mbuf *
@@ -1343,7 +1457,7 @@ rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len)
 		if (m->m_flags & M_PKTHDR)
 			m->m_pkthdr.len += data_len;
 		mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len;
-		rt_dispatch(m, NULL);
+		rt_dispatch(m, AF_UNSPEC);
 	}
 }
 
@@ -1359,11 +1473,11 @@ rt_ifannouncemsg(struct ifnet *ifp, int what)
 
 	m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info);
 	if (m != NULL)
-		rt_dispatch(m, NULL);
+		rt_dispatch(m, AF_UNSPEC);
 }
 
 static void
-rt_dispatch(struct mbuf *m, const struct sockaddr *sa)
+rt_dispatch(struct mbuf *m, sa_family_t saf)
 {
 	struct m_tag *tag;
 
@@ -1372,14 +1486,14 @@ rt_dispatch(struct mbuf *m, const struct sockaddr *sa)
 	 * use when injecting the mbuf into the routing socket buffer from
 	 * the netisr.
 	 */
-	if (sa != NULL) {
+	if (saf != AF_UNSPEC) {
 		tag = m_tag_get(PACKET_TAG_RTSOCKFAM, sizeof(unsigned short),
 		    M_NOWAIT);
 		if (tag == NULL) {
 			m_freem(m);
 			return;
 		}
-		*(unsigned short *)(tag + 1) = sa->sa_family;
+		*(unsigned short *)(tag + 1) = saf;
 		m_tag_prepend(m, tag);
 	}
 #ifdef VIMAGE
@@ -1473,6 +1587,127 @@ copy_ifdata32(struct if_data *src, struct if_data32 *dst)
 #endif
 
 static int
+sysctl_iflist_ifml(struct ifnet *ifp, struct rt_addrinfo *info,
+    struct walkarg *w, int len)
+{
+	struct if_msghdrl *ifm;
+
+#ifdef COMPAT_FREEBSD32
+	if (w->w_req->flags & SCTL_MASK32) {
+		struct if_msghdrl32 *ifm32;
+
+		ifm32 = (struct if_msghdrl32 *)w->w_tmem;
+		ifm32->ifm_addrs = info->rti_addrs;
+		ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
+		ifm32->ifm_index = ifp->if_index;
+		ifm32->_ifm_spare1 = 0;
+		ifm32->ifm_len = sizeof(*ifm32);
+		ifm32->ifm_data_off = offsetof(struct if_msghdrl32, ifm_data);
+
+		copy_ifdata32(&ifp->if_data, &ifm32->ifm_data);
+
+		return (SYSCTL_OUT(w->w_req, (caddr_t)ifm32, len));
+	}
+#endif
+	ifm = (struct if_msghdrl *)w->w_tmem;
+	ifm->ifm_addrs = info->rti_addrs;
+	ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
+	ifm->ifm_index = ifp->if_index;
+	ifm->_ifm_spare1 = 0;
+	ifm->ifm_len = sizeof(*ifm);
+	ifm->ifm_data_off = offsetof(struct if_msghdrl, ifm_data);
+
+	ifm->ifm_data = ifp->if_data;
+
+	return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
+}
+
+static int
+sysctl_iflist_ifm(struct ifnet *ifp, struct rt_addrinfo *info,
+    struct walkarg *w, int len)
+{
+	struct if_msghdr *ifm;
+
+#ifdef COMPAT_FREEBSD32
+	if (w->w_req->flags & SCTL_MASK32) {
+		struct if_msghdr32 *ifm32;
+
+		ifm32 = (struct if_msghdr32 *)w->w_tmem;
+		ifm32->ifm_addrs = info->rti_addrs;
+		ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
+		ifm32->ifm_index = ifp->if_index;
+
+		copy_ifdata32(&ifp->if_data, &ifm32->ifm_data);
+
+		return (SYSCTL_OUT(w->w_req, (caddr_t)ifm32, len));
+	}
+#endif
+	ifm = (struct if_msghdr *)w->w_tmem;
+	ifm->ifm_addrs = info->rti_addrs;
+	ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
+	ifm->ifm_index = ifp->if_index;
+
+	ifm->ifm_data = ifp->if_data;
+
+	return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
+}
+
+static int
+sysctl_iflist_ifaml(struct ifaddr *ifa, struct rt_addrinfo *info,
+    struct walkarg *w, int len)
+{
+	struct ifa_msghdrl *ifam;
+
+#ifdef COMPAT_FREEBSD32
+	if (w->w_req->flags & SCTL_MASK32) {
+		struct ifa_msghdrl32 *ifam32;
+
+		ifam32 = (struct ifa_msghdrl32 *)w->w_tmem;
+		ifam32->ifam_addrs = info->rti_addrs;
+		ifam32->ifam_flags = ifa->ifa_flags;
+		ifam32->ifam_index = ifa->ifa_ifp->if_index;
+		ifam32->_ifam_spare1 = 0;
+		ifam32->ifam_len = sizeof(*ifam32);
+		ifam32->ifam_data_off =
+		    offsetof(struct ifa_msghdrl32, ifam_data);
+		ifam32->ifam_metric = ifa->ifa_metric;
+
+		copy_ifdata32(&ifa->ifa_ifp->if_data, &ifam32->ifam_data);
+
+		return (SYSCTL_OUT(w->w_req, (caddr_t)ifam32, len));
+	}
+#endif
+
+	ifam = (struct ifa_msghdrl *)w->w_tmem;
+	ifam->ifam_addrs = info->rti_addrs;
+	ifam->ifam_flags = ifa->ifa_flags;
+	ifam->ifam_index = ifa->ifa_ifp->if_index;
+	ifam->_ifam_spare1 = 0;
+	ifam->ifam_len = sizeof(*ifam);
+	ifam->ifam_data_off = offsetof(struct ifa_msghdrl, ifam_data);
+	ifam->ifam_metric = ifa->ifa_metric;
+
+	ifam->ifam_data = ifa->if_data;
+
+	return (SYSCTL_OUT(w->w_req, w->w_tmem, len));
+}
+
+static int
+sysctl_iflist_ifam(struct ifaddr *ifa, struct rt_addrinfo *info,
+    struct walkarg *w, int len)
+{
+	struct ifa_msghdr *ifam;
+
+	ifam = (struct ifa_msghdr *)w->w_tmem;
+	ifam->ifam_addrs = info->rti_addrs;
+	ifam->ifam_flags = ifa->ifa_flags;
+	ifam->ifam_index = ifa->ifa_ifp->if_index;
+	ifam->ifam_metric = ifa->ifa_metric;
+
+	return (SYSCTL_OUT(w->w_req, w->w_tmem, len));
+}
+
+static int
 sysctl_iflist(int af, struct walkarg *w)
 {
 	struct ifnet *ifp;
@@ -1485,38 +1720,16 @@ sysctl_iflist(int af, struct walkarg *w)
 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if (w->w_arg && w->w_arg != ifp->if_index)
 			continue;
-		IF_ADDR_LOCK(ifp);
+		IF_ADDR_RLOCK(ifp);
 		ifa = ifp->if_addr;
 		info.rti_info[RTAX_IFP] = ifa->ifa_addr;
 		len = rt_msg2(RTM_IFINFO, &info, NULL, w);
 		info.rti_info[RTAX_IFP] = NULL;
 		if (w->w_req && w->w_tmem) {
-			struct if_msghdr *ifm;
-
-#ifdef COMPAT_FREEBSD32
-			if (w->w_req->flags & SCTL_MASK32) {
-				struct if_msghdr32 *ifm32;
-
-				ifm32 = (struct if_msghdr32 *)w->w_tmem;
-				ifm32->ifm_index = ifp->if_index;
-				ifm32->ifm_flags = ifp->if_flags |
-				    ifp->if_drv_flags;
-				copy_ifdata32(&ifp->if_data, &ifm32->ifm_data);
-				ifm32->ifm_addrs = info.rti_addrs;
-				error = SYSCTL_OUT(w->w_req, (caddr_t)ifm32,
-				    len);
-				goto sysctl_out;
-			}
-#endif
-			ifm = (struct if_msghdr *)w->w_tmem;
-			ifm->ifm_index = ifp->if_index;
-			ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
-			ifm->ifm_data = ifp->if_data;
-			ifm->ifm_addrs = info.rti_addrs;
-			error = SYSCTL_OUT(w->w_req, (caddr_t)ifm, len);
-#ifdef COMPAT_FREEBSD32
-		sysctl_out:
-#endif
+			if (w->w_op == NET_RT_IFLISTL)
+				error = sysctl_iflist_ifml(ifp, &info, w, len);
+			else
+				error = sysctl_iflist_ifm(ifp, &info, w, len);
 			if (error)
 				goto done;
 		}
@@ -1531,25 +1744,23 @@ sysctl_iflist(int af, struct walkarg *w)
 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
 			len = rt_msg2(RTM_NEWADDR, &info, NULL, w);
 			if (w->w_req && w->w_tmem) {
-				struct ifa_msghdr *ifam;
-
-				ifam = (struct ifa_msghdr *)w->w_tmem;
-				ifam->ifam_index = ifa->ifa_ifp->if_index;
-				ifam->ifam_flags = ifa->ifa_flags;
-				ifam->ifam_metric = ifa->ifa_metric;
-				ifam->ifam_addrs = info.rti_addrs;
-				error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
+				if (w->w_op == NET_RT_IFLISTL)
+					error = sysctl_iflist_ifaml(ifa, &info,
+					    w, len);
+				else
+					error = sysctl_iflist_ifam(ifa, &info,
+					    w, len);
 				if (error)
 					goto done;
 			}
 		}
-		IF_ADDR_UNLOCK(ifp);
+		IF_ADDR_RUNLOCK(ifp);
 		info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
 			info.rti_info[RTAX_BRD] = NULL;
 	}
 done:
 	if (ifp != NULL)
-		IF_ADDR_UNLOCK(ifp);
+		IF_ADDR_RUNLOCK(ifp);
 	IFNET_RUNLOCK();
 	return (error);
 }
@@ -1570,7 +1781,7 @@ sysctl_ifmalist(int af, struct walkarg *w)
 			continue;
 		ifa = ifp->if_addr;
 		info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL;
-		IF_ADDR_LOCK(ifp);
+		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 			if (af && af != ifma->ifma_addr->sa_family)
 				continue;
@@ -1591,12 +1802,12 @@ sysctl_ifmalist(int af, struct walkarg *w)
 				ifmam->ifmam_addrs = info.rti_addrs;
 				error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
 				if (error) {
-					IF_ADDR_UNLOCK(ifp);
+					IF_ADDR_RUNLOCK(ifp);
 					goto done;
 				}
 			}
 		}
-		IF_ADDR_UNLOCK(ifp);
+		IF_ADDR_RUNLOCK(ifp);
 	}
 done:
 	IFNET_RUNLOCK();
@@ -1662,16 +1873,17 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS)
 			rnh = rt_tables_get_rnh(BSD_DEFAULT_FIB, i);
 #endif /* __rtems__ */
 			if (rnh != NULL) {
-				RADIX_NODE_HEAD_LOCK(rnh);
+				RADIX_NODE_HEAD_RLOCK(rnh); 
 			    	error = rnh->rnh_walktree(rnh,
 				    sysctl_dumpentry, &w);
-				RADIX_NODE_HEAD_UNLOCK(rnh);
+				RADIX_NODE_HEAD_RUNLOCK(rnh);
 			} else if (af != 0)
 				error = EAFNOSUPPORT;
 		}
 		break;
 
 	case NET_RT_IFLIST:
+	case NET_RT_IFLISTL:
 		error = sysctl_iflist(af, &w);
 		break;
 
diff --git a/freebsd/sys/net/vnet.h b/freebsd/sys/net/vnet.h
index d3c426a4..8ef1c00d 100644
--- a/freebsd/sys/net/vnet.h
+++ b/freebsd/sys/net/vnet.h
@@ -116,22 +116,27 @@ void	vnet_destroy(struct vnet *vnet);
  * Various macros -- get and set the current network stack, but also
  * assertions.
  */
+#if defined(INVARIANTS) || defined(VNET_DEBUG)
+#define	VNET_ASSERT(exp, msg)	do {					\
+	if (!(exp))							\
+		panic msg;						\
+} while (0)
+#else
+#define	VNET_ASSERT(exp, msg)	do {					\
+} while (0)
+#endif
+
 #ifdef VNET_DEBUG
 void vnet_log_recursion(struct vnet *, const char *, int);
 
-#define	VNET_ASSERT(condition)						\
-	if (!(condition)) {						\
-		printf("VNET_ASSERT @ %s:%d %s():\n",			\
-			__FILE__, __LINE__, __FUNCTION__);		\
-		panic(#condition);					\
-	}
-
 #define	CURVNET_SET_QUIET(arg)						\
-	VNET_ASSERT((arg)->vnet_magic_n == VNET_MAGIC_N);		\
+	VNET_ASSERT((arg) != NULL && (arg)->vnet_magic_n == VNET_MAGIC_N, \
+	    ("CURVNET_SET at %s:%d %s() curvnet=%p vnet=%p",		\
+	    __FILE__, __LINE__, __func__, curvnet, (arg)));		\
 	struct vnet *saved_vnet = curvnet;				\
 	const char *saved_vnet_lpush = curthread->td_vnet_lpush;	\
 	curvnet = arg;							\
-	curthread->td_vnet_lpush = __FUNCTION__;
+	curthread->td_vnet_lpush = __func__;
  
 #define	CURVNET_SET_VERBOSE(arg)					\
 	CURVNET_SET_QUIET(arg)						\
@@ -141,21 +146,31 @@ void vnet_log_recursion(struct vnet *, const char *, int);
 #define	CURVNET_SET(arg)	CURVNET_SET_VERBOSE(arg)
  
 #define	CURVNET_RESTORE()						\
-	VNET_ASSERT(saved_vnet == NULL ||				\
-		    saved_vnet->vnet_magic_n == VNET_MAGIC_N);		\
+	VNET_ASSERT(curvnet != NULL && (saved_vnet == NULL ||		\
+	    saved_vnet->vnet_magic_n == VNET_MAGIC_N),			\
+	    ("CURVNET_RESTORE at %s:%d %s() curvnet=%p saved_vnet=%p",	\
+	    __FILE__, __LINE__, __func__, curvnet, saved_vnet));	\
 	curvnet = saved_vnet;						\
 	curthread->td_vnet_lpush = saved_vnet_lpush;
 #else /* !VNET_DEBUG */
-#define	VNET_ASSERT(condition)
 
-#define	CURVNET_SET(arg)						\
+#define	CURVNET_SET_QUIET(arg)						\
+	VNET_ASSERT((arg) != NULL && (arg)->vnet_magic_n == VNET_MAGIC_N, \
+	    ("CURVNET_SET at %s:%d %s() curvnet=%p vnet=%p",		\
+	    __FILE__, __LINE__, __func__, curvnet, (arg)));		\
 	struct vnet *saved_vnet = curvnet;				\
 	curvnet = arg;	
  
-#define	CURVNET_SET_VERBOSE(arg)	CURVNET_SET(arg)
-#define	CURVNET_SET_QUIET(arg)		CURVNET_SET(arg)
+#define	CURVNET_SET_VERBOSE(arg)					\
+	CURVNET_SET_QUIET(arg)
+
+#define	CURVNET_SET(arg)	CURVNET_SET_VERBOSE(arg)
  
 #define	CURVNET_RESTORE()						\
+	VNET_ASSERT(curvnet != NULL && (saved_vnet == NULL ||		\
+	    saved_vnet->vnet_magic_n == VNET_MAGIC_N),			\
+	    ("CURVNET_RESTORE at %s:%d %s() curvnet=%p saved_vnet=%p",	\
+	    __FILE__, __LINE__, __func__, curvnet, saved_vnet));	\
 	curvnet = saved_vnet;
 #endif /* VNET_DEBUG */
 
@@ -191,15 +206,6 @@ extern struct sx vnet_sxlock;
  * Virtual network stack memory allocator, which allows global variables to
  * be automatically instantiated for each network stack instance.
  */
-__asm__(
-#if defined(__arm__)
-	".section " VNET_SETNAME ", \"aw\", %progbits\n"
-#else
-	".section " VNET_SETNAME ", \"aw\", @progbits\n"
-#endif
-	"\t.p2align " __XSTRING(CACHE_LINE_SHIFT) "\n"
-	"\t.previous");
-
 #define	VNET_NAME(n)		vnet_entry_##n
 #define	VNET_DECLARE(t, n)	extern t VNET_NAME(n)
 #define	VNET_DEFINE(t, n)	t VNET_NAME(n) __section(VNET_SETNAME) __used
@@ -357,7 +363,7 @@ do {									\
  */
 #define	curvnet			NULL
 
-#define	VNET_ASSERT(condition)
+#define	VNET_ASSERT(exp, msg)
 #define	CURVNET_SET(arg)
 #define	CURVNET_SET_QUIET(arg)
 #define	CURVNET_RESTORE()
author	Sebastian Huber <sebastian.huber@embedded-brains.de>	2013-11-04 11:33:00 +0100
committer	Sebastian Huber <sebastian.huber@embedded-brains.de>	2013-11-04 15:28:21 +0100
commit	af5333e0a02b2295304d4e029b15ee15a4fe2b3a (patch)
tree	c5c43680d374f58b487eeeaf18fb7ec6b84ba074 /freebsd/sys/net
parent	BUS_SPACE(9): Use simple memory model for ARM (diff)
download	rtems-libbsd-af5333e0a02b2295304d4e029b15ee15a4fe2b3a.tar.bz2