summaryrefslogtreecommitdiffstats
path: root/freebsd/sys/net
diff options
context:
space:
mode:
authorSebastian Huber <sebastian.huber@embedded-brains.de>2013-11-04 11:33:00 +0100
committerSebastian Huber <sebastian.huber@embedded-brains.de>2013-11-04 15:28:21 +0100
commitaf5333e0a02b2295304d4e029b15ee15a4fe2b3a (patch)
treec5c43680d374f58b487eeeaf18fb7ec6b84ba074 /freebsd/sys/net
parentBUS_SPACE(9): Use simple memory model for ARM (diff)
downloadrtems-libbsd-af5333e0a02b2295304d4e029b15ee15a4fe2b3a.tar.bz2
Update to FreeBSD 8.4
Diffstat (limited to 'freebsd/sys/net')
-rw-r--r--freebsd/sys/net/bpf.c633
-rw-r--r--freebsd/sys/net/bpf.h9
-rw-r--r--freebsd/sys/net/bpf_buffer.c51
-rw-r--r--freebsd/sys/net/bpf_buffer.h3
-rw-r--r--freebsd/sys/net/bpf_zerocopy.h2
-rw-r--r--freebsd/sys/net/bpfdesc.h23
-rw-r--r--freebsd/sys/net/bridgestp.c174
-rw-r--r--freebsd/sys/net/bridgestp.h8
-rw-r--r--freebsd/sys/net/ieee8023ad_lacp.c4
-rw-r--r--freebsd/sys/net/if.c256
-rw-r--r--freebsd/sys/net/if.h61
-rw-r--r--freebsd/sys/net/if_arcsubr.c1
-rw-r--r--freebsd/sys/net/if_atmsubr.c1
-rw-r--r--freebsd/sys/net/if_bridge.c113
-rw-r--r--freebsd/sys/net/if_epair.c92
-rw-r--r--freebsd/sys/net/if_ethersubr.c7
-rw-r--r--freebsd/sys/net/if_faith.c2
-rw-r--r--freebsd/sys/net/if_fddisubr.c1
-rw-r--r--freebsd/sys/net/if_fwsubr.c1
-rw-r--r--freebsd/sys/net/if_gif.c12
-rw-r--r--freebsd/sys/net/if_gre.c102
-rw-r--r--freebsd/sys/net/if_gre.h2
-rw-r--r--freebsd/sys/net/if_iso88025subr.c1
-rw-r--r--freebsd/sys/net/if_lagg.c142
-rw-r--r--freebsd/sys/net/if_lagg.h21
-rw-r--r--freebsd/sys/net/if_llatbl.c29
-rw-r--r--freebsd/sys/net/if_llatbl.h22
-rw-r--r--freebsd/sys/net/if_media.c2
-rw-r--r--freebsd/sys/net/if_media.h106
-rw-r--r--freebsd/sys/net/if_spppfr.c2
-rw-r--r--freebsd/sys/net/if_spppsubr.c1
-rw-r--r--freebsd/sys/net/if_stf.c1
-rw-r--r--freebsd/sys/net/if_tap.c14
-rw-r--r--freebsd/sys/net/if_tun.c37
-rw-r--r--freebsd/sys/net/if_var.h19
-rw-r--r--freebsd/sys/net/if_vlan.c201
-rw-r--r--freebsd/sys/net/netisr.c402
-rw-r--r--freebsd/sys/net/netisr.h88
-rw-r--r--freebsd/sys/net/netisr_internal.h127
-rw-r--r--freebsd/sys/net/radix_mpath.c5
-rw-r--r--freebsd/sys/net/raw_cb.h5
-rw-r--r--freebsd/sys/net/raw_usrreq.c10
-rw-r--r--freebsd/sys/net/route.c252
-rw-r--r--freebsd/sys/net/route.h45
-rw-r--r--freebsd/sys/net/rtsock.c342
-rw-r--r--freebsd/sys/net/vnet.h56
46 files changed, 2446 insertions, 1042 deletions
diff --git a/freebsd/sys/net/bpf.c b/freebsd/sys/net/bpf.c
index d9223313..179d5f0a 100644
--- a/freebsd/sys/net/bpf.c
+++ b/freebsd/sys/net/bpf.c
@@ -45,6 +45,8 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/types.h>
#include <rtems/bsd/sys/param.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/rwlock.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/fcntl.h>
@@ -68,6 +70,7 @@ __FBSDID("$FreeBSD$");
#include <sys/socket.h>
#include <net/if.h>
+#define BPF_INTERNAL
#include <net/bpf.h>
#include <net/bpf_buffer.h>
#ifdef BPF_JITTER
@@ -141,6 +144,7 @@ static int bpf_bpfd_cnt;
static void bpf_attachd(struct bpf_d *, struct bpf_if *);
static void bpf_detachd(struct bpf_d *);
+static void bpf_detachd_locked(struct bpf_d *);
static void bpf_freed(struct bpf_d *);
static int bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
struct sockaddr *, int *, struct bpf_insn *);
@@ -152,7 +156,7 @@ static void catchpacket(struct bpf_d *, u_char *, u_int, u_int,
void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int),
struct timeval *);
static void reset_d(struct bpf_d *);
-static int bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
+static int bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
static int bpf_setdlt(struct bpf_d *, u_int);
static void filt_bpfdetach(struct knote *);
@@ -170,6 +174,12 @@ SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW,
SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW,
bpf_stats_sysctl, "bpf statistics portal");
+static VNET_DEFINE(int, bpf_optimize_writers) = 0;
+#define V_bpf_optimize_writers VNET(bpf_optimize_writers)
+SYSCTL_VNET_INT(_net_bpf, OID_AUTO, optimize_writers,
+ CTLFLAG_RW, &VNET_NAME(bpf_optimize_writers), 0,
+ "Do not send packets until BPF program is set");
+
static d_open_t bpfopen;
static d_read_t bpfread;
static d_write_t bpfwrite;
@@ -191,6 +201,37 @@ static struct cdevsw bpf_cdevsw = {
static struct filterops bpfread_filtops =
{ 1, NULL, filt_bpfdetach, filt_bpfread };
+eventhandler_tag bpf_ifdetach_cookie = NULL;
+
+/*
+ * LOCKING MODEL USED BY BPF:
+ * Locks:
+ * 1) global lock (BPF_LOCK). Mutex, used to protect interface addition/removal,
+ * some global counters and every bpf_if reference.
+ * 2) Interface lock. Rwlock, used to protect list of BPF descriptors and their filters.
+ * 3) Descriptor lock. Mutex, used to protect BPF buffers and various structure fields
+ * used by bpf_mtap code.
+ *
+ * Lock order:
+ *
+ * Global lock, interface lock, descriptor lock
+ *
+ * We have to acquire interface lock before descriptor main lock due to BPF_MTAP[2]
+ * working model. In many places (like bpf_detachd) we start with BPF descriptor
+ * (and we need to at least rlock it to get reliable interface pointer). This
+ * gives us potential LOR. As a result, we use global lock to protect from bpf_if
+ * change in every such place.
+ *
+ * Changing d->bd_bif is protected by 1) global lock, 2) interface lock and
+ * 3) descriptor main wlock.
+ * Reading bd_bif can be protected by any of these locks, typically global lock.
+ *
+ * Changing read/write BPF filter is protected by the same three locks,
+ * the same applies for reading.
+ *
+ * Sleeping in global lock is not allowed due to bpfdetach() using it.
+ */
+
/*
* Wrapper functions for various buffering methods. If the set of buffer
* modes expands, we will probably want to introduce a switch data structure
@@ -284,7 +325,6 @@ bpf_canfreebuf(struct bpf_d *d)
static int
bpf_canwritebuf(struct bpf_d *d)
{
-
BPFD_LOCK_ASSERT(d);
switch (d->bd_bufmode) {
@@ -563,17 +603,92 @@ bad:
static void
bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
{
+ int op_w;
+
+ BPF_LOCK_ASSERT();
+
+ /*
+ * Save sysctl value to protect from sysctl change
+ * between reads
+ */
+ op_w = V_bpf_optimize_writers;
+
+ if (d->bd_bif != NULL)
+ bpf_detachd_locked(d);
/*
- * Point d at bp, and add d to the interface's list of listeners.
- * Finally, point the driver's bpf cookie at the interface so
- * it will divert packets to bpf.
+ * Point d at bp, and add d to the interface's list.
+ * Since there are many applicaiotns using BPF for
+ * sending raw packets only (dhcpd, cdpd are good examples)
+ * we can delay adding d to the list of active listeners until
+ * some filter is configured.
*/
- BPFIF_LOCK(bp);
+
+ BPFIF_WLOCK(bp);
+ BPFD_LOCK(d);
+
d->bd_bif = bp;
- LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
+
+ if (op_w != 0) {
+ /* Add to writers-only list */
+ LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next);
+ /*
+ * We decrement bd_writer on every filter set operation.
+ * First BIOCSETF is done by pcap_open_live() to set up
+ * snap length. After that appliation usually sets its own filter
+ */
+ d->bd_writer = 2;
+ } else
+ LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
+
+ BPFD_UNLOCK(d);
+ BPFIF_WUNLOCK(bp);
bpf_bpfd_cnt++;
- BPFIF_UNLOCK(bp);
+
+ CTR3(KTR_NET, "%s: bpf_attach called by pid %d, adding to %s list",
+ __func__, d->bd_pid, d->bd_writer ? "writer" : "active");
+
+ if (op_w == 0)
+ EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
+}
+
+/*
+ * Add d to the list of active bp filters.
+ * Reuqires bpf_attachd() to be called before
+ */
+static void
+bpf_upgraded(struct bpf_d *d)
+{
+ struct bpf_if *bp;
+
+ BPF_LOCK_ASSERT();
+
+ bp = d->bd_bif;
+
+ /*
+ * Filter can be set several times without specifying interface.
+ * Mark d as reader and exit.
+ */
+ if (bp == NULL) {
+ BPFD_LOCK(d);
+ d->bd_writer = 0;
+ BPFD_UNLOCK(d);
+ return;
+ }
+
+ BPFIF_WLOCK(bp);
+ BPFD_LOCK(d);
+
+ /* Remove from writers-only list */
+ LIST_REMOVE(d, bd_next);
+ LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
+ /* Mark d as reader */
+ d->bd_writer = 0;
+
+ BPFD_UNLOCK(d);
+ BPFIF_WUNLOCK(bp);
+
+ CTR2(KTR_NET, "%s: upgrade required by pid %d", __func__, d->bd_pid);
EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
}
@@ -584,26 +699,47 @@ bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
static void
bpf_detachd(struct bpf_d *d)
{
+ BPF_LOCK();
+ bpf_detachd_locked(d);
+ BPF_UNLOCK();
+}
+
+static void
+bpf_detachd_locked(struct bpf_d *d)
+{
int error;
struct bpf_if *bp;
struct ifnet *ifp;
- bp = d->bd_bif;
- BPFIF_LOCK(bp);
+ CTR2(KTR_NET, "%s: detach required by pid %d", __func__, d->bd_pid);
+
+ BPF_LOCK_ASSERT();
+
+ /* Check if descriptor is attached */
+ if ((bp = d->bd_bif) == NULL)
+ return;
+
+ BPFIF_WLOCK(bp);
BPFD_LOCK(d);
- ifp = d->bd_bif->bif_ifp;
+
+ /* Save bd_writer value */
+ error = d->bd_writer;
/*
* Remove d from the interface's descriptor list.
*/
LIST_REMOVE(d, bd_next);
- bpf_bpfd_cnt--;
+ ifp = bp->bif_ifp;
d->bd_bif = NULL;
BPFD_UNLOCK(d);
- BPFIF_UNLOCK(bp);
+ BPFIF_WUNLOCK(bp);
+
+ bpf_bpfd_cnt--;
- EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0);
+ /* Call event handler iff d is attached */
+ if (error == 0)
+ EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0);
/*
* Check if this descriptor had requested promiscuous mode.
@@ -642,14 +778,11 @@ bpf_dtor(void *data)
d->bd_state = BPF_IDLE;
BPFD_UNLOCK(d);
funsetown(&d->bd_sigio);
- mtx_lock(&bpf_mtx);
- if (d->bd_bif)
- bpf_detachd(d);
- mtx_unlock(&bpf_mtx);
- selwakeuppri(&d->bd_sel, PRINET);
+ bpf_detachd(d);
#ifdef MAC
mac_bpfdesc_destroy(d);
#endif /* MAC */
+ seldrain(&d->bd_sel);
knlist_destroy(&d->bd_sel.si_note);
callout_drain(&d->bd_callout);
bpf_freed(d);
@@ -665,7 +798,7 @@ static int
bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
{
struct bpf_d *d;
- int error;
+ int error, size;
d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
error = devfs_set_cdevpriv(d, bpf_dtor);
@@ -683,14 +816,18 @@ bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
d->bd_bufmode = BPF_BUFMODE_BUFFER;
d->bd_sig = SIGIO;
d->bd_direction = BPF_D_INOUT;
- d->bd_pid = td->td_proc->p_pid;
+ BPF_PID_REFRESH(d, td);
#ifdef MAC
mac_bpfdesc_init(d);
mac_bpfdesc_create(td->td_ucred, d);
#endif
- mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
- callout_init_mtx(&d->bd_callout, &d->bd_mtx, 0);
- knlist_init_mtx(&d->bd_sel.si_note, &d->bd_mtx);
+ mtx_init(&d->bd_lock, devtoname(dev), "bpf cdev lock", MTX_DEF);
+ callout_init_mtx(&d->bd_callout, &d->bd_lock, 0);
+ knlist_init_mtx(&d->bd_sel.si_note, &d->bd_lock);
+
+ /* Allocate default buffers */
+ size = d->bd_bufsize;
+ bpf_buffer_ioctl_sblen(d, &size);
return (0);
}
@@ -720,7 +857,7 @@ bpfread(struct cdev *dev, struct uio *uio, int ioflag)
non_block = ((ioflag & O_NONBLOCK) != 0);
BPFD_LOCK(d);
- d->bd_pid = curthread->td_proc->p_pid;
+ BPF_PID_REFRESH_CUR(d);
if (d->bd_bufmode != BPF_BUFMODE_BUFFER) {
BPFD_UNLOCK(d);
return (EOPNOTSUPP);
@@ -766,7 +903,7 @@ bpfread(struct cdev *dev, struct uio *uio, int ioflag)
BPFD_UNLOCK(d);
return (EWOULDBLOCK);
}
- error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
+ error = msleep(d, &d->bd_lock, PRINET|PCATCH,
"bpf", d->bd_rtout);
if (error == EINTR || error == ERESTART) {
BPFD_UNLOCK(d);
@@ -883,8 +1020,9 @@ bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
if (error != 0)
return (error);
- d->bd_pid = curthread->td_proc->p_pid;
+ BPF_PID_REFRESH_CUR(d);
d->bd_wcount++;
+ /* XXX: locking required */
if (d->bd_bif == NULL) {
d->bd_wdcount++;
return (ENXIO);
@@ -905,6 +1043,7 @@ bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
bzero(&dst, sizeof(dst));
m = NULL;
hlen = 0;
+ /* XXX: bpf_movein() can sleep */
error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
&m, &dst, &hlen, d->bd_wfilter);
if (error) {
@@ -964,7 +1103,7 @@ static void
reset_d(struct bpf_d *d)
{
- mtx_assert(&d->bd_mtx, MA_OWNED);
+ BPFD_LOCK_ASSERT(d);
if ((d->bd_hbuf != NULL) &&
(d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) {
@@ -1030,7 +1169,7 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
* Refresh PID associated with this descriptor.
*/
BPFD_LOCK(d);
- d->bd_pid = td->td_proc->p_pid;
+ BPF_PID_REFRESH(d, td);
if (d->bd_state == BPF_WAITING)
callout_stop(&d->bd_callout);
d->bd_state = BPF_IDLE;
@@ -1081,7 +1220,9 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
case BIOCGDLTLIST32:
case BIOCGRTIMEOUT32:
case BIOCSRTIMEOUT32:
+ BPFD_LOCK(d);
d->bd_compat32 = 1;
+ BPFD_UNLOCK(d);
}
#endif
@@ -1126,7 +1267,9 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
* Get buffer len [for read()].
*/
case BIOCGBLEN:
+ BPFD_LOCK(d);
*(u_int *)addr = d->bd_bufsize;
+ BPFD_UNLOCK(d);
break;
/*
@@ -1181,10 +1324,12 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
* Get current data link type.
*/
case BIOCGDLT:
+ BPF_LOCK();
if (d->bd_bif == NULL)
error = EINVAL;
else
*(u_int *)addr = d->bd_bif->bif_dlt;
+ BPF_UNLOCK();
break;
/*
@@ -1199,6 +1344,7 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
list32 = (struct bpf_dltlist32 *)addr;
dltlist.bfl_len = list32->bfl_len;
dltlist.bfl_list = PTRIN(list32->bfl_list);
+ BPF_LOCK();
if (d->bd_bif == NULL)
error = EINVAL;
else {
@@ -1206,31 +1352,37 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
if (error == 0)
list32->bfl_len = dltlist.bfl_len;
}
+ BPF_UNLOCK();
break;
}
#endif
case BIOCGDLTLIST:
+ BPF_LOCK();
if (d->bd_bif == NULL)
error = EINVAL;
else
error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
+ BPF_UNLOCK();
break;
/*
* Set data link type.
*/
case BIOCSDLT:
+ BPF_LOCK();
if (d->bd_bif == NULL)
error = EINVAL;
else
error = bpf_setdlt(d, *(u_int *)addr);
+ BPF_UNLOCK();
break;
/*
* Get interface name.
*/
case BIOCGETIF:
+ BPF_LOCK();
if (d->bd_bif == NULL)
error = EINVAL;
else {
@@ -1240,13 +1392,16 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
strlcpy(ifr->ifr_name, ifp->if_xname,
sizeof(ifr->ifr_name));
}
+ BPF_UNLOCK();
break;
/*
* Set interface.
*/
case BIOCSETIF:
+ BPF_LOCK();
error = bpf_setif(d, (struct ifreq *)addr);
+ BPF_UNLOCK();
break;
/*
@@ -1329,7 +1484,9 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
* Set immediate mode.
*/
case BIOCIMMEDIATE:
+ BPFD_LOCK(d);
d->bd_immediate = *(u_int *)addr;
+ BPFD_UNLOCK(d);
break;
case BIOCVERSION:
@@ -1345,21 +1502,27 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
* Get "header already complete" flag
*/
case BIOCGHDRCMPLT:
+ BPFD_LOCK(d);
*(u_int *)addr = d->bd_hdrcmplt;
+ BPFD_UNLOCK(d);
break;
/*
* Set "header already complete" flag
*/
case BIOCSHDRCMPLT:
+ BPFD_LOCK(d);
d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
+ BPFD_UNLOCK(d);
break;
/*
* Get packet direction flag
*/
case BIOCGDIRECTION:
+ BPFD_LOCK(d);
*(u_int *)addr = d->bd_direction;
+ BPFD_UNLOCK(d);
break;
/*
@@ -1374,7 +1537,9 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
case BPF_D_IN:
case BPF_D_INOUT:
case BPF_D_OUT:
+ BPFD_LOCK(d);
d->bd_direction = direction;
+ BPFD_UNLOCK(d);
break;
default:
error = EINVAL;
@@ -1383,26 +1548,38 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
break;
case BIOCFEEDBACK:
+ BPFD_LOCK(d);
d->bd_feedback = *(u_int *)addr;
+ BPFD_UNLOCK(d);
break;
case BIOCLOCK:
+ BPFD_LOCK(d);
d->bd_locked = 1;
+ BPFD_UNLOCK(d);
break;
case FIONBIO: /* Non-blocking I/O */
break;
case FIOASYNC: /* Send signal on receive packets */
+ BPFD_LOCK(d);
d->bd_async = *(int *)addr;
+ BPFD_UNLOCK(d);
break;
case FIOSETOWN:
+ /*
+ * XXX: Add some sort of locking here?
+ * fsetown() can sleep.
+ */
error = fsetown(*(int *)addr, &d->bd_sigio);
break;
case FIOGETOWN:
+ BPFD_LOCK(d);
*(int *)addr = fgetown(&d->bd_sigio);
+ BPFD_UNLOCK(d);
break;
/* This is deprecated, FIOSETOWN should be used instead. */
@@ -1423,16 +1600,23 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
if (sig >= NSIG)
error = EINVAL;
- else
+ else {
+ BPFD_LOCK(d);
d->bd_sig = sig;
+ BPFD_UNLOCK(d);
+ }
break;
}
case BIOCGRSIG:
+ BPFD_LOCK(d);
*(u_int *)addr = d->bd_sig;
+ BPFD_UNLOCK(d);
break;
case BIOCGETBUFMODE:
+ BPFD_LOCK(d);
*(u_int *)addr = d->bd_bufmode;
+ BPFD_UNLOCK(d);
break;
case BIOCSETBUFMODE:
@@ -1487,95 +1671,130 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
/*
* Set d's packet filter program to fp. If this file already has a filter,
* free it and replace it. Returns EINVAL for bogus requests.
+ *
+ * Note we need global lock here to serialize bpf_setf() and bpf_setif() calls
+ * since reading d->bd_bif can't be protected by d or interface lock due to
+ * lock order.
+ *
+ * Additionally, we have to acquire interface write lock due to bpf_mtap() uses
+ * interface read lock to read all filers.
+ *
*/
static int
bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
{
+#ifdef COMPAT_FREEBSD32
+ struct bpf_program fp_swab;
+ struct bpf_program32 *fp32;
+#endif
struct bpf_insn *fcode, *old;
- u_int wfilter, flen, size;
#ifdef BPF_JITTER
- bpf_jit_filter *ofunc;
+ bpf_jit_filter *jfunc, *ofunc;
#endif
-#ifdef COMPAT_FREEBSD32
- struct bpf_program32 *fp32;
- struct bpf_program fp_swab;
+ size_t size;
+ u_int flen;
+ int need_upgrade;
- if (cmd == BIOCSETWF32 || cmd == BIOCSETF32 || cmd == BIOCSETFNR32) {
+#ifdef COMPAT_FREEBSD32
+ switch (cmd) {
+ case BIOCSETF32:
+ case BIOCSETWF32:
+ case BIOCSETFNR32:
fp32 = (struct bpf_program32 *)fp;
fp_swab.bf_len = fp32->bf_len;
fp_swab.bf_insns = (struct bpf_insn *)(uintptr_t)fp32->bf_insns;
fp = &fp_swab;
- if (cmd == BIOCSETWF32)
+ switch (cmd) {
+ case BIOCSETF32:
+ cmd = BIOCSETF;
+ break;
+ case BIOCSETWF32:
cmd = BIOCSETWF;
+ break;
+ }
+ break;
}
#endif
- if (cmd == BIOCSETWF) {
- old = d->bd_wfilter;
- wfilter = 1;
-#ifdef BPF_JITTER
- ofunc = NULL;
-#endif
- } else {
- wfilter = 0;
- old = d->bd_rfilter;
+
+ fcode = NULL;
#ifdef BPF_JITTER
- ofunc = d->bd_bfilter;
+ jfunc = ofunc = NULL;
#endif
- }
- if (fp->bf_insns == NULL) {
- if (fp->bf_len != 0)
+ need_upgrade = 0;
+
+ /*
+ * Check new filter validness before acquiring any locks.
+ * Allocate memory for new filter, if needed.
+ */
+ flen = fp->bf_len;
+ if (flen > bpf_maxinsns || (fp->bf_insns == NULL && flen != 0))
+ return (EINVAL);
+ size = flen * sizeof(*fp->bf_insns);
+ if (size > 0) {
+ /* We're setting up new filter. Copy and check actual data. */
+ fcode = malloc(size, M_BPF, M_WAITOK);
+ if (copyin(fp->bf_insns, fcode, size) != 0 ||
+ !bpf_validate(fcode, flen)) {
+ free(fcode, M_BPF);
return (EINVAL);
- BPFD_LOCK(d);
- if (wfilter)
- d->bd_wfilter = NULL;
- else {
- d->bd_rfilter = NULL;
-#ifdef BPF_JITTER
- d->bd_bfilter = NULL;
-#endif
- if (cmd == BIOCSETF)
- reset_d(d);
}
- BPFD_UNLOCK(d);
- if (old != NULL)
- free((caddr_t)old, M_BPF);
#ifdef BPF_JITTER
- if (ofunc != NULL)
- bpf_destroy_jit_filter(ofunc);
+ /* Filter is copied inside fcode and is perfectly valid. */
+ jfunc = bpf_jitter(fcode, flen);
#endif
- return (0);
}
- flen = fp->bf_len;
- if (flen > bpf_maxinsns)
- return (EINVAL);
- size = flen * sizeof(*fp->bf_insns);
- fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
- if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
- bpf_validate(fcode, (int)flen)) {
- BPFD_LOCK(d);
- if (wfilter)
- d->bd_wfilter = fcode;
- else {
- d->bd_rfilter = fcode;
+ BPF_LOCK();
+
+ /*
+ * Set up new filter.
+ * Protect filter change by interface lock.
+ * Additionally, we are protected by global lock here.
+ */
+ if (d->bd_bif != NULL)
+ BPFIF_WLOCK(d->bd_bif);
+ BPFD_LOCK(d);
+ if (cmd == BIOCSETWF) {
+ old = d->bd_wfilter;
+ d->bd_wfilter = fcode;
+ } else {
+ old = d->bd_rfilter;
+ d->bd_rfilter = fcode;
#ifdef BPF_JITTER
- d->bd_bfilter = bpf_jitter(fcode, flen);
+ ofunc = d->bd_bfilter;
+ d->bd_bfilter = jfunc;
#endif
- if (cmd == BIOCSETF)
- reset_d(d);
+ if (cmd == BIOCSETF)
+ reset_d(d);
+
+ if (fcode != NULL) {
+ /*
+ * Do not require upgrade by first BIOCSETF
+ * (used to set snaplen) by pcap_open_live().
+ */
+ if (d->bd_writer != 0 && --d->bd_writer == 0)
+ need_upgrade = 1;
+ CTR4(KTR_NET, "%s: filter function set by pid %d, "
+ "bd_writer counter %d, need_upgrade %d",
+ __func__, d->bd_pid, d->bd_writer, need_upgrade);
}
- BPFD_UNLOCK(d);
- if (old != NULL)
- free((caddr_t)old, M_BPF);
+ }
+ BPFD_UNLOCK(d);
+ if (d->bd_bif != NULL)
+ BPFIF_WUNLOCK(d->bd_bif);
+ if (old != NULL)
+ free(old, M_BPF);
#ifdef BPF_JITTER
- if (ofunc != NULL)
- bpf_destroy_jit_filter(ofunc);
+ if (ofunc != NULL)
+ bpf_destroy_jit_filter(ofunc);
#endif
- return (0);
- }
- free((caddr_t)fcode, M_BPF);
- return (EINVAL);
+ /* Move d to active readers list. */
+ if (need_upgrade)
+ bpf_upgraded(d);
+
+ BPF_UNLOCK();
+ return (0);
}
/*
@@ -1589,28 +1808,30 @@ bpf_setif(struct bpf_d *d, struct ifreq *ifr)
struct bpf_if *bp;
struct ifnet *theywant;
+ BPF_LOCK_ASSERT();
+
theywant = ifunit(ifr->ifr_name);
if (theywant == NULL || theywant->if_bpf == NULL)
return (ENXIO);
bp = theywant->if_bpf;
+ /* Check if interface is not being detached from BPF */
+ BPFIF_RLOCK(bp);
+ if (bp->flags & BPFIF_FLAG_DYING) {
+ BPFIF_RUNLOCK(bp);
+ return (ENXIO);
+ }
+ BPFIF_RUNLOCK(bp);
+
/*
* Behavior here depends on the buffering model. If we're using
* kernel memory buffers, then we can allocate them here. If we're
* using zero-copy, then the user process must have registered
* buffers by the time we get here. If not, return an error.
- *
- * XXXRW: There are locking issues here with multi-threaded use: what
- * if two threads try to set the interface at once?
*/
switch (d->bd_bufmode) {
case BPF_BUFMODE_BUFFER:
- if (d->bd_sbuf == NULL)
- bpf_buffer_alloc(d);
- KASSERT(d->bd_sbuf != NULL, ("bpf_setif: bd_sbuf NULL"));
- break;
-
case BPF_BUFMODE_ZBUF:
if (d->bd_sbuf == NULL)
return (EINVAL);
@@ -1619,15 +1840,8 @@ bpf_setif(struct bpf_d *d, struct ifreq *ifr)
default:
panic("bpf_setif: bufmode %d", d->bd_bufmode);
}
- if (bp != d->bd_bif) {
- if (d->bd_bif)
- /*
- * Detach if attached to something else.
- */
- bpf_detachd(d);
-
+ if (bp != d->bd_bif)
bpf_attachd(d, bp);
- }
BPFD_LOCK(d);
reset_d(d);
BPFD_UNLOCK(d);
@@ -1655,7 +1869,7 @@ bpfpoll(struct cdev *dev, int events, struct thread *td)
*/
revents = events & (POLLOUT | POLLWRNORM);
BPFD_LOCK(d);
- d->bd_pid = td->td_proc->p_pid;
+ BPF_PID_REFRESH(d, td);
if (events & (POLLIN | POLLRDNORM)) {
if (bpf_ready(d))
revents |= events & (POLLIN | POLLRDNORM);
@@ -1690,7 +1904,7 @@ bpfkqfilter(struct cdev *dev, struct knote *kn)
* Refresh PID associated with this descriptor.
*/
BPFD_LOCK(d);
- d->bd_pid = curthread->td_proc->p_pid;
+ BPF_PID_REFRESH_CUR(d);
kn->kn_fop = &bpfread_filtops;
kn->kn_hook = d;
knlist_add(&d->bd_sel.si_note, kn, 1);
@@ -1746,9 +1960,19 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
struct timeval tv;
gottime = 0;
- BPFIF_LOCK(bp);
+
+ BPFIF_RLOCK(bp);
+
LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
- BPFD_LOCK(d);
+ /*
+ * We are not using any locks for d here because:
+ * 1) any filter change is protected by interface
+ * write lock
+ * 2) destroying/detaching d is protected by interface
+ * write lock, too
+ */
+
+ /* XXX: Do not protect counter for the sake of performance. */
++d->bd_rcount;
/*
* NB: We dont call BPF_CHECK_DIRECTION() here since there is no
@@ -1764,6 +1988,11 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
#endif
slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
if (slen != 0) {
+ /*
+ * Filter matches. Let's to acquire write lock.
+ */
+ BPFD_LOCK(d);
+
d->bd_fcount++;
if (!gottime) {
microtime(&tv);
@@ -1774,10 +2003,10 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
#endif
catchpacket(d, pkt, pktlen, slen,
bpf_append_bytes, &tv);
+ BPFD_UNLOCK(d);
}
- BPFD_UNLOCK(d);
}
- BPFIF_UNLOCK(bp);
+ BPFIF_RUNLOCK(bp);
}
#define BPF_CHECK_DIRECTION(d, r, i) \
@@ -1786,6 +2015,7 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
/*
* Incoming linkage from device drivers, when packet is in an mbuf chain.
+ * Locking model is explained in bpf_tap().
*/
void
bpf_mtap(struct bpf_if *bp, struct mbuf *m)
@@ -1808,11 +2038,11 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m)
pktlen = m_length(m, NULL);
- BPFIF_LOCK(bp);
+ BPFIF_RLOCK(bp);
+
LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
continue;
- BPFD_LOCK(d);
++d->bd_rcount;
#ifdef BPF_JITTER
bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
@@ -1823,6 +2053,8 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m)
#endif
slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
if (slen != 0) {
+ BPFD_LOCK(d);
+
d->bd_fcount++;
if (!gottime) {
microtime(&tv);
@@ -1833,10 +2065,10 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m)
#endif
catchpacket(d, (u_char *)m, pktlen, slen,
bpf_append_mbuf, &tv);
+ BPFD_UNLOCK(d);
}
- BPFD_UNLOCK(d);
}
- BPFIF_UNLOCK(bp);
+ BPFIF_RUNLOCK(bp);
}
/*
@@ -1871,14 +2103,17 @@ bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
mb.m_len = dlen;
pktlen += dlen;
- BPFIF_LOCK(bp);
+
+ BPFIF_RLOCK(bp);
+
LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
continue;
- BPFD_LOCK(d);
++d->bd_rcount;
slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
if (slen != 0) {
+ BPFD_LOCK(d);
+
d->bd_fcount++;
if (!gottime) {
microtime(&tv);
@@ -1889,10 +2124,10 @@ bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
#endif
catchpacket(d, (u_char *)&mb, pktlen, slen,
bpf_append_mbuf, &tv);
+ BPFD_UNLOCK(d);
}
- BPFD_UNLOCK(d);
}
- BPFIF_UNLOCK(bp);
+ BPFIF_RUNLOCK(bp);
}
#undef BPF_CHECK_DIRECTION
@@ -2042,7 +2277,7 @@ bpf_freed(struct bpf_d *d)
}
if (d->bd_wfilter != NULL)
free((caddr_t)d->bd_wfilter, M_BPF);
- mtx_destroy(&d->bd_mtx);
+ mtx_destroy(&d->bd_lock);
}
/*
@@ -2072,15 +2307,16 @@ bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
panic("bpfattach");
LIST_INIT(&bp->bif_dlist);
+ LIST_INIT(&bp->bif_wlist);
bp->bif_ifp = ifp;
bp->bif_dlt = dlt;
- mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF);
+ rw_init(&bp->bif_lock, "bpf interface lock");
KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
*driverp = bp;
- mtx_lock(&bpf_mtx);
+ BPF_LOCK();
LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
- mtx_unlock(&bpf_mtx);
+ BPF_UNLOCK();
/*
* Compute the length of the bpf header. This is not necessarily
@@ -2095,42 +2331,95 @@ bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
}
/*
- * Detach bpf from an interface. This involves detaching each descriptor
- * associated with the interface, and leaving bd_bif NULL. Notify each
- * descriptor as it's detached so that any sleepers wake up and get
- * ENXIO.
+ * Detach bpf from an interface. This involves detaching each descriptor
+ * associated with the interface. Notify each descriptor as it's detached
+ * so that any sleepers wake up and get ENXIO.
*/
void
bpfdetach(struct ifnet *ifp)
{
struct bpf_if *bp;
struct bpf_d *d;
+#ifdef INVARIANTS
+ int ndetached;
- /* Locate BPF interface information */
- mtx_lock(&bpf_mtx);
- LIST_FOREACH(bp, &bpf_iflist, bif_next) {
- if (ifp == bp->bif_ifp)
- break;
- }
+ ndetached = 0;
+#endif
+
+ BPF_LOCK();
+ /* Find all bpf_if struct's which reference ifp and detach them. */
+ do {
+ LIST_FOREACH(bp, &bpf_iflist, bif_next) {
+ if (ifp == bp->bif_ifp)
+ break;
+ }
+ if (bp != NULL)
+ LIST_REMOVE(bp, bif_next);
+
+ if (bp != NULL) {
+#ifdef INVARIANTS
+ ndetached++;
+#endif
+ while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
+ bpf_detachd_locked(d);
+ BPFD_LOCK(d);
+ bpf_wakeup(d);
+ BPFD_UNLOCK(d);
+ }
+ /* Free writer-only descriptors */
+ while ((d = LIST_FIRST(&bp->bif_wlist)) != NULL) {
+ bpf_detachd_locked(d);
+ BPFD_LOCK(d);
+ bpf_wakeup(d);
+ BPFD_UNLOCK(d);
+ }
+
+ /*
+ * Delay freing bp till interface is detached
+ * and all routes through this interface are removed.
+ * Mark bp as detached to restrict new consumers.
+ */
+ BPFIF_WLOCK(bp);
+ bp->flags |= BPFIF_FLAG_DYING;
+ BPFIF_WUNLOCK(bp);
+ }
+ } while (bp != NULL);
+ BPF_UNLOCK();
- /* Interface wasn't attached */
- if ((bp == NULL) || (bp->bif_ifp == NULL)) {
- mtx_unlock(&bpf_mtx);
+#ifdef INVARIANTS
+ if (ndetached == 0)
printf("bpfdetach: %s was not attached\n", ifp->if_xname);
+#endif
+}
+
+/*
+ * Interface departure handler.
+ * Note departure event does not guarantee interface is going down.
+ */
+static void
+bpf_ifdetach(void *arg __unused, struct ifnet *ifp)
+{
+ struct bpf_if *bp;
+
+ BPF_LOCK();
+ if ((bp = ifp->if_bpf) == NULL) {
+ BPF_UNLOCK();
return;
}
- LIST_REMOVE(bp, bif_next);
- mtx_unlock(&bpf_mtx);
-
- while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
- bpf_detachd(d);
- BPFD_LOCK(d);
- bpf_wakeup(d);
- BPFD_UNLOCK(d);
+ /* Check if bpfdetach() was called previously */
+ if ((bp->flags & BPFIF_FLAG_DYING) == 0) {
+ BPF_UNLOCK();
+ return;
}
- mtx_destroy(&bp->bif_mtx);
+ CTR3(KTR_NET, "%s: freing BPF instance %p for interface %p",
+ __func__, bp, ifp);
+
+ ifp->if_bpf = NULL;
+ BPF_UNLOCK();
+
+ rw_destroy(&bp->bif_lock);
free(bp, M_BPF);
}
@@ -2144,24 +2433,22 @@ bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
struct ifnet *ifp;
struct bpf_if *bp;
+ BPF_LOCK_ASSERT();
+
ifp = d->bd_bif->bif_ifp;
n = 0;
error = 0;
- mtx_lock(&bpf_mtx);
LIST_FOREACH(bp, &bpf_iflist, bif_next) {
if (bp->bif_ifp != ifp)
continue;
if (bfl->bfl_list != NULL) {
- if (n >= bfl->bfl_len) {
- mtx_unlock(&bpf_mtx);
+ if (n >= bfl->bfl_len)
return (ENOMEM);
- }
error = copyout(&bp->bif_dlt,
bfl->bfl_list + n, sizeof(u_int));
}
n++;
}
- mtx_unlock(&bpf_mtx);
bfl->bfl_len = n;
return (error);
}
@@ -2176,18 +2463,19 @@ bpf_setdlt(struct bpf_d *d, u_int dlt)
struct ifnet *ifp;
struct bpf_if *bp;
+ BPF_LOCK_ASSERT();
+
if (d->bd_bif->bif_dlt == dlt)
return (0);
ifp = d->bd_bif->bif_ifp;
- mtx_lock(&bpf_mtx);
+
LIST_FOREACH(bp, &bpf_iflist, bif_next) {
if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
break;
}
- mtx_unlock(&bpf_mtx);
+
if (bp != NULL) {
opromisc = d->bd_promisc;
- bpf_detachd(d);
bpf_attachd(d, bp);
BPFD_LOCK(d);
reset_d(d);
@@ -2216,6 +2504,11 @@ bpf_drvinit(void *unused)
dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf");
/* For compatibility */
make_dev_alias(dev, "bpf0");
+
+ /* Register interface departure handler */
+ bpf_ifdetach_cookie = EVENTHANDLER_REGISTER(
+ ifnet_departure_event, bpf_ifdetach, NULL,
+ EVENTHANDLER_PRI_ANY);
}
/*
@@ -2229,9 +2522,9 @@ bpf_zero_counters(void)
struct bpf_if *bp;
struct bpf_d *bd;
- mtx_lock(&bpf_mtx);
+ BPF_LOCK();
LIST_FOREACH(bp, &bpf_iflist, bif_next) {
- BPFIF_LOCK(bp);
+ BPFIF_RLOCK(bp);
LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
BPFD_LOCK(bd);
bd->bd_rcount = 0;
@@ -2242,11 +2535,14 @@ bpf_zero_counters(void)
bd->bd_zcopy = 0;
BPFD_UNLOCK(bd);
}
- BPFIF_UNLOCK(bp);
+ BPFIF_RUNLOCK(bp);
}
- mtx_unlock(&bpf_mtx);
+ BPF_UNLOCK();
}
+/*
+ * Fill filter statistics
+ */
static void
bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
{
@@ -2254,6 +2550,7 @@ bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
bzero(d, sizeof(*d));
BPFD_LOCK_ASSERT(bd);
d->bd_structsize = sizeof(*d);
+ /* XXX: reading should be protected by global lock */
d->bd_immediate = bd->bd_immediate;
d->bd_promisc = bd->bd_promisc;
d->bd_hdrcmplt = bd->bd_hdrcmplt;
@@ -2278,6 +2575,9 @@ bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
d->bd_bufmode = bd->bd_bufmode;
}
+/*
+ * Handle `netstat -B' stats request
+ */
static int
bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
{
@@ -2315,24 +2615,31 @@ bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
if (bpf_bpfd_cnt == 0)
return (SYSCTL_OUT(req, 0, 0));
xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK);
- mtx_lock(&bpf_mtx);
+ BPF_LOCK();
if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
- mtx_unlock(&bpf_mtx);
+ BPF_UNLOCK();
free(xbdbuf, M_BPF);
return (ENOMEM);
}
index = 0;
LIST_FOREACH(bp, &bpf_iflist, bif_next) {
- BPFIF_LOCK(bp);
+ BPFIF_RLOCK(bp);
+ /* Send writers-only first */
+ LIST_FOREACH(bd, &bp->bif_wlist, bd_next) {
+ xbd = &xbdbuf[index++];
+ BPFD_LOCK(bd);
+ bpfstats_fill_xbpf(xbd, bd);
+ BPFD_UNLOCK(bd);
+ }
LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
xbd = &xbdbuf[index++];
BPFD_LOCK(bd);
bpfstats_fill_xbpf(xbd, bd);
BPFD_UNLOCK(bd);
}
- BPFIF_UNLOCK(bp);
+ BPFIF_RUNLOCK(bp);
}
- mtx_unlock(&bpf_mtx);
+ BPF_UNLOCK();
error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
free(xbdbuf, M_BPF);
return (error);
diff --git a/freebsd/sys/net/bpf.h b/freebsd/sys/net/bpf.h
index 726483a5..004815ad 100644
--- a/freebsd/sys/net/bpf.h
+++ b/freebsd/sys/net/bpf.h
@@ -917,14 +917,21 @@ SYSCTL_DECL(_net_bpf);
/*
* Descriptor associated with each attached hardware interface.
+ * FIXME: this structure is exposed to external callers to speed up
+ * bpf_peers_present() call. However we cover all fields not needed by
+ * this function via BPF_INTERNAL define
*/
struct bpf_if {
LIST_ENTRY(bpf_if) bif_next; /* list of all interfaces */
LIST_HEAD(, bpf_d) bif_dlist; /* descriptor list */
+#ifdef BPF_INTERNAL
u_int bif_dlt; /* link layer type */
u_int bif_hdrlen; /* length of header (with padding) */
struct ifnet *bif_ifp; /* corresponding interface */
- struct mtx bif_mtx; /* mutex for interface */
+ struct rwlock bif_lock; /* interface lock */
+ LIST_HEAD(, bpf_d) bif_wlist; /* writer-only list */
+ int flags; /* Interface flags */
+#endif
};
void bpf_bufheld(struct bpf_d *d);
diff --git a/freebsd/sys/net/bpf_buffer.c b/freebsd/sys/net/bpf_buffer.c
index 7ebfb0a8..382497f6 100644
--- a/freebsd/sys/net/bpf_buffer.c
+++ b/freebsd/sys/net/bpf_buffer.c
@@ -4,7 +4,7 @@
* Copyright (c) 2007 Seccuris Inc.
* All rights reserved.
*
- * This sofware was developed by Robert N. M. Watson under contract to
+ * This software was developed by Robert N. M. Watson under contract to
* Seccuris Inc.
*
* Redistribution and use in source and binary forms, with or without
@@ -95,21 +95,6 @@ static int bpf_maxbufsize = BPF_MAXBUFSIZE;
SYSCTL_INT(_net_bpf, OID_AUTO, maxbufsize, CTLFLAG_RW,
&bpf_maxbufsize, 0, "Default capture buffer in bytes");
-void
-bpf_buffer_alloc(struct bpf_d *d)
-{
-
- KASSERT(d->bd_fbuf == NULL, ("bpf_buffer_alloc: bd_fbuf != NULL"));
- KASSERT(d->bd_sbuf == NULL, ("bpf_buffer_alloc: bd_sbuf != NULL"));
- KASSERT(d->bd_hbuf == NULL, ("bpf_buffer_alloc: bd_hbuf != NULL"));
-
- d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
- d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
- d->bd_hbuf = NULL;
- d->bd_slen = 0;
- d->bd_hlen = 0;
-}
-
/*
* Simple data copy to the current kernel buffer.
*/
@@ -185,18 +170,42 @@ int
bpf_buffer_ioctl_sblen(struct bpf_d *d, u_int *i)
{
u_int size;
+ caddr_t fbuf, sbuf;
- BPFD_LOCK(d);
- if (d->bd_bif != NULL) {
- BPFD_UNLOCK(d);
- return (EINVAL);
- }
size = *i;
if (size > bpf_maxbufsize)
*i = size = bpf_maxbufsize;
else if (size < BPF_MINBUFSIZE)
*i = size = BPF_MINBUFSIZE;
+
+ /* Allocate buffers immediately */
+ fbuf = (caddr_t)malloc(size, M_BPF, M_WAITOK);
+ sbuf = (caddr_t)malloc(size, M_BPF, M_WAITOK);
+
+ BPFD_LOCK(d);
+ if (d->bd_bif != NULL) {
+ /* Interface already attached, unable to change buffers */
+ BPFD_UNLOCK(d);
+ free(fbuf, M_BPF);
+ free(sbuf, M_BPF);
+ return (EINVAL);
+ }
+
+ /* Free old buffers if set */
+ if (d->bd_fbuf != NULL)
+ free(d->bd_fbuf, M_BPF);
+ if (d->bd_sbuf != NULL)
+ free(d->bd_sbuf, M_BPF);
+
+ /* Fill in new data */
d->bd_bufsize = size;
+ d->bd_fbuf = fbuf;
+ d->bd_sbuf = sbuf;
+
+ d->bd_hbuf = NULL;
+ d->bd_slen = 0;
+ d->bd_hlen = 0;
+
BPFD_UNLOCK(d);
return (0);
}
diff --git a/freebsd/sys/net/bpf_buffer.h b/freebsd/sys/net/bpf_buffer.h
index 82d0310b..c1dc1f3a 100644
--- a/freebsd/sys/net/bpf_buffer.h
+++ b/freebsd/sys/net/bpf_buffer.h
@@ -2,7 +2,7 @@
* Copyright (c) 2007 Seccuris Inc.
* All rights reserved.
*
- * This sofware was developed by Robert N. M. Watson under contract to
+ * This software was developed by Robert N. M. Watson under contract to
* Seccuris Inc.
*
* Redistribution and use in source and binary forms, with or without
@@ -36,7 +36,6 @@
#error "no user-serviceable parts inside"
#endif
-void bpf_buffer_alloc(struct bpf_d *d);
void bpf_buffer_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset,
void *src, u_int len);
void bpf_buffer_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset,
diff --git a/freebsd/sys/net/bpf_zerocopy.h b/freebsd/sys/net/bpf_zerocopy.h
index c541a15d..a5709b86 100644
--- a/freebsd/sys/net/bpf_zerocopy.h
+++ b/freebsd/sys/net/bpf_zerocopy.h
@@ -2,7 +2,7 @@
* Copyright (c) 2007 Seccuris Inc.
* All rights reserved.
*
- * This sofware was developed by Robert N. M. Watson under contract to
+ * This software was developed by Robert N. M. Watson under contract to
* Seccuris Inc.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/freebsd/sys/net/bpfdesc.h b/freebsd/sys/net/bpfdesc.h
index 03cb20dd..c3265ce1 100644
--- a/freebsd/sys/net/bpfdesc.h
+++ b/freebsd/sys/net/bpfdesc.h
@@ -79,6 +79,7 @@ struct bpf_d {
u_char bd_promisc; /* true if listening promiscuously */
u_char bd_state; /* idle, waiting, or timed out */
u_char bd_immediate; /* true to return on packet arrival */
+ u_char bd_writer; /* non-zero if d is writer-only */
int bd_hdrcmplt; /* false to fill in src lladdr automatically */
int bd_direction; /* select packet direction */
int bd_feedback; /* true to feed back sent packets */
@@ -86,7 +87,7 @@ struct bpf_d {
int bd_sig; /* signal to send upon packet reception */
struct sigio * bd_sigio; /* information for async I/O */
struct selinfo bd_sel; /* bsd select info */
- struct mtx bd_mtx; /* mutex for this descriptor */
+ struct mtx bd_lock; /* per-descriptor lock */
struct callout bd_callout; /* for BPF timeouts with select */
struct label *bd_label; /* MAC label for descriptor */
u_int64_t bd_fcount; /* number of packets which matched filter */
@@ -105,10 +106,16 @@ struct bpf_d {
#define BPF_WAITING 1 /* waiting for read timeout in select */
#define BPF_TIMED_OUT 2 /* read timeout has expired in select */
-#define BPFD_LOCK(bd) mtx_lock(&(bd)->bd_mtx)
-#define BPFD_UNLOCK(bd) mtx_unlock(&(bd)->bd_mtx)
-#define BPFD_LOCK_ASSERT(bd) mtx_assert(&(bd)->bd_mtx, MA_OWNED)
+#define BPFD_LOCK(bd) mtx_lock(&(bd)->bd_lock)
+#define BPFD_UNLOCK(bd) mtx_unlock(&(bd)->bd_lock)
+#define BPFD_LOCK_ASSERT(bd) mtx_assert(&(bd)->bd_lock, MA_OWNED)
+#define BPF_PID_REFRESH(bd, td) (bd)->bd_pid = (td)->td_proc->p_pid
+#define BPF_PID_REFRESH_CUR(bd) (bd)->bd_pid = curthread->td_proc->p_pid
+
+#define BPF_LOCK() mtx_lock(&bpf_mtx)
+#define BPF_UNLOCK() mtx_unlock(&bpf_mtx)
+#define BPF_LOCK_ASSERT() mtx_assert(&bpf_mtx, MA_OWNED)
/*
* External representation of the bpf descriptor
*/
@@ -143,7 +150,11 @@ struct xbpf_d {
u_int64_t bd_spare[4];
};
-#define BPFIF_LOCK(bif) mtx_lock(&(bif)->bif_mtx)
-#define BPFIF_UNLOCK(bif) mtx_unlock(&(bif)->bif_mtx)
+#define BPFIF_RLOCK(bif) rw_rlock(&(bif)->bif_lock)
+#define BPFIF_RUNLOCK(bif) rw_runlock(&(bif)->bif_lock)
+#define BPFIF_WLOCK(bif) rw_wlock(&(bif)->bif_lock)
+#define BPFIF_WUNLOCK(bif) rw_wunlock(&(bif)->bif_lock)
+
+#define BPFIF_FLAG_DYING 1 /* Reject new bpf consumers */
#endif
diff --git a/freebsd/sys/net/bridgestp.c b/freebsd/sys/net/bridgestp.c
index cc7f4e6f..1b2ef7cf 100644
--- a/freebsd/sys/net/bridgestp.c
+++ b/freebsd/sys/net/bridgestp.c
@@ -129,14 +129,14 @@ static int bstp_rerooted(struct bstp_state *, struct bstp_port *);
static uint32_t bstp_calc_path_cost(struct bstp_port *);
static void bstp_notify_state(void *, int);
static void bstp_notify_rtage(void *, int);
-static void bstp_ifupdstatus(struct bstp_state *, struct bstp_port *);
+static void bstp_ifupdstatus(void *, int);
static void bstp_enable_port(struct bstp_state *, struct bstp_port *);
static void bstp_disable_port(struct bstp_state *, struct bstp_port *);
static void bstp_tick(void *);
static void bstp_timer_start(struct bstp_timer *, uint16_t);
static void bstp_timer_stop(struct bstp_timer *);
static void bstp_timer_latch(struct bstp_timer *);
-static int bstp_timer_expired(struct bstp_timer *);
+static int bstp_timer_dectest(struct bstp_timer *);
static void bstp_hello_timer_expiry(struct bstp_state *,
struct bstp_port *);
static void bstp_message_age_expiry(struct bstp_state *,
@@ -448,7 +448,7 @@ bstp_pdu_flags(struct bstp_port *bp)
return (flags);
}
-struct mbuf *
+void
bstp_input(struct bstp_port *bp, struct ifnet *ifp, struct mbuf *m)
{
struct bstp_state *bs = bp->bp_bs;
@@ -458,7 +458,7 @@ bstp_input(struct bstp_port *bp, struct ifnet *ifp, struct mbuf *m)
if (bp->bp_active == 0) {
m_freem(m);
- return (NULL);
+ return;
}
BSTP_LOCK(bs);
@@ -523,7 +523,6 @@ out:
BSTP_UNLOCK(bs);
if (m)
m_freem(m);
- return (NULL);
}
static void
@@ -1680,7 +1679,7 @@ bstp_set_autoptp(struct bstp_port *bp, int set)
if (set) {
bp->bp_flags |= BSTP_PORT_AUTOPTP;
if (bp->bp_role != BSTP_ROLE_DISABLED)
- bstp_ifupdstatus(bs, bp);
+ taskqueue_enqueue(taskqueue_swi, &bp->bp_mediatask);
} else
bp->bp_flags &= ~BSTP_PORT_AUTOPTP;
BSTP_UNLOCK(bs);
@@ -1770,85 +1769,93 @@ bstp_notify_rtage(void *arg, int pending)
}
void
-bstp_linkstate(struct ifnet *ifp, int state)
+bstp_linkstate(struct bstp_port *bp)
{
- struct bstp_state *bs;
- struct bstp_port *bp;
+ struct bstp_state *bs = bp->bp_bs;
- /* search for the stp port */
- mtx_lock(&bstp_list_mtx);
- LIST_FOREACH(bs, &bstp_list, bs_list) {
- BSTP_LOCK(bs);
- LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
- if (bp->bp_ifp == ifp) {
- bstp_ifupdstatus(bs, bp);
- bstp_update_state(bs, bp);
- /* it only exists once so return */
- BSTP_UNLOCK(bs);
- mtx_unlock(&bstp_list_mtx);
- return;
- }
- }
- BSTP_UNLOCK(bs);
- }
- mtx_unlock(&bstp_list_mtx);
+ if (!bp->bp_active)
+ return;
+
+ bstp_ifupdstatus(bp, 0);
+ BSTP_LOCK(bs);
+ bstp_update_state(bs, bp);
+ BSTP_UNLOCK(bs);
}
static void
-bstp_ifupdstatus(struct bstp_state *bs, struct bstp_port *bp)
+bstp_ifupdstatus(void *arg, int pending)
{
+ struct bstp_port *bp = (struct bstp_port *)arg;
+ struct bstp_state *bs = bp->bp_bs;
struct ifnet *ifp = bp->bp_ifp;
struct ifmediareq ifmr;
- int error = 0;
+ int error, changed;
- BSTP_LOCK_ASSERT(bs);
+ if (!bp->bp_active)
+ return;
bzero((char *)&ifmr, sizeof(ifmr));
error = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (caddr_t)&ifmr);
+ BSTP_LOCK(bs);
+ changed = 0;
if ((error == 0) && (ifp->if_flags & IFF_UP)) {
if (ifmr.ifm_status & IFM_ACTIVE) {
/* A full-duplex link is assumed to be point to point */
if (bp->bp_flags & BSTP_PORT_AUTOPTP) {
- bp->bp_ptp_link =
- ifmr.ifm_active & IFM_FDX ? 1 : 0;
+ int fdx;
+
+ fdx = ifmr.ifm_active & IFM_FDX ? 1 : 0;
+ if (bp->bp_ptp_link ^ fdx) {
+ bp->bp_ptp_link = fdx;
+ changed = 1;
+ }
}
/* Calc the cost if the link was down previously */
if (bp->bp_flags & BSTP_PORT_PNDCOST) {
- bp->bp_path_cost = bstp_calc_path_cost(bp);
+ uint32_t cost;
+
+ cost = bstp_calc_path_cost(bp);
+ if (bp->bp_path_cost != cost) {
+ bp->bp_path_cost = cost;
+ changed = 1;
+ }
bp->bp_flags &= ~BSTP_PORT_PNDCOST;
}
- if (bp->bp_role == BSTP_ROLE_DISABLED)
+ if (bp->bp_role == BSTP_ROLE_DISABLED) {
bstp_enable_port(bs, bp);
+ changed = 1;
+ }
} else {
if (bp->bp_role != BSTP_ROLE_DISABLED) {
bstp_disable_port(bs, bp);
+ changed = 1;
if ((bp->bp_flags & BSTP_PORT_ADMEDGE) &&
bp->bp_protover == BSTP_PROTO_RSTP)
bp->bp_operedge = 1;
}
}
- return;
- }
-
- if (bp->bp_infois != BSTP_INFO_DISABLED)
+ } else if (bp->bp_infois != BSTP_INFO_DISABLED) {
bstp_disable_port(bs, bp);
+ changed = 1;
+ }
+ if (changed)
+ bstp_assign_roles(bs);
+ BSTP_UNLOCK(bs);
}
static void
bstp_enable_port(struct bstp_state *bs, struct bstp_port *bp)
{
bp->bp_infois = BSTP_INFO_AGED;
- bstp_assign_roles(bs);
}
static void
bstp_disable_port(struct bstp_state *bs, struct bstp_port *bp)
{
bp->bp_infois = BSTP_INFO_DISABLED;
- bstp_assign_roles(bs);
}
static void
@@ -1862,30 +1869,34 @@ bstp_tick(void *arg)
if (bs->bs_running == 0)
return;
- /* slow timer to catch missed link events */
- if (bstp_timer_expired(&bs->bs_link_timer)) {
- LIST_FOREACH(bp, &bs->bs_bplist, bp_next)
- bstp_ifupdstatus(bs, bp);
+ CURVNET_SET(bs->bs_vnet);
+
+ /* poll link events on interfaces that do not support linkstate */
+ if (bstp_timer_dectest(&bs->bs_link_timer)) {
+ LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
+ if (!(bp->bp_ifp->if_capabilities & IFCAP_LINKSTATE))
+ taskqueue_enqueue(taskqueue_swi, &bp->bp_mediatask);
+ }
bstp_timer_start(&bs->bs_link_timer, BSTP_LINK_TIMER);
}
LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
/* no events need to happen for these */
- bstp_timer_expired(&bp->bp_tc_timer);
- bstp_timer_expired(&bp->bp_recent_root_timer);
- bstp_timer_expired(&bp->bp_forward_delay_timer);
- bstp_timer_expired(&bp->bp_recent_backup_timer);
+ bstp_timer_dectest(&bp->bp_tc_timer);
+ bstp_timer_dectest(&bp->bp_recent_root_timer);
+ bstp_timer_dectest(&bp->bp_forward_delay_timer);
+ bstp_timer_dectest(&bp->bp_recent_backup_timer);
- if (bstp_timer_expired(&bp->bp_hello_timer))
+ if (bstp_timer_dectest(&bp->bp_hello_timer))
bstp_hello_timer_expiry(bs, bp);
- if (bstp_timer_expired(&bp->bp_message_age_timer))
+ if (bstp_timer_dectest(&bp->bp_message_age_timer))
bstp_message_age_expiry(bs, bp);
- if (bstp_timer_expired(&bp->bp_migrate_delay_timer))
+ if (bstp_timer_dectest(&bp->bp_migrate_delay_timer))
bstp_migrate_delay_expiry(bs, bp);
- if (bstp_timer_expired(&bp->bp_edge_delay_timer))
+ if (bstp_timer_dectest(&bp->bp_edge_delay_timer))
bstp_edge_delay_expiry(bs, bp);
/* update the various state machines for the port */
@@ -1895,6 +1906,8 @@ bstp_tick(void *arg)
bp->bp_txcount--;
}
+ CURVNET_RESTORE();
+
callout_reset(&bs->bs_bstpcallout, hz, bstp_tick, bs);
}
@@ -1922,7 +1935,7 @@ bstp_timer_latch(struct bstp_timer *t)
}
static int
-bstp_timer_expired(struct bstp_timer *t)
+bstp_timer_dectest(struct bstp_timer *t)
{
if (t->active == 0 || t->latched)
return (0);
@@ -2010,24 +2023,33 @@ bstp_reinit(struct bstp_state *bs)
struct bstp_port *bp;
struct ifnet *ifp, *mif;
u_char *e_addr;
+ void *bridgeptr;
static const u_char llzero[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */
BSTP_LOCK_ASSERT(bs);
+ if (LIST_EMPTY(&bs->bs_bplist))
+ goto disablestp;
+
mif = NULL;
+ bridgeptr = LIST_FIRST(&bs->bs_bplist)->bp_ifp->if_bridge;
+ KASSERT(bridgeptr != NULL, ("Invalid bridge pointer"));
/*
* Search through the Ethernet adapters and find the one with the
- * lowest value. The adapter which we take the MAC address from does
- * not need to be part of the bridge, it just needs to be a unique
- * value.
+ * lowest value. Make sure the adapter which we take the MAC address
+ * from is part of this bridge, so we can have more than one independent
+ * bridges in the same STP domain.
*/
IFNET_RLOCK_NOSLEEP();
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if (ifp->if_type != IFT_ETHER)
- continue;
+ continue; /* Not Ethernet */
+
+ if (ifp->if_bridge != bridgeptr)
+ continue; /* Not part of our bridge */
if (bstp_addr_cmp(IF_LLADDR(ifp), llzero) == 0)
- continue;
+ continue; /* No mac address set */
if (mif == NULL) {
mif = ifp;
@@ -2039,21 +2061,8 @@ bstp_reinit(struct bstp_state *bs)
}
}
IFNET_RUNLOCK_NOSLEEP();
-
- if (LIST_EMPTY(&bs->bs_bplist) || mif == NULL) {
- /* Set the bridge and root id (lower bits) to zero */
- bs->bs_bridge_pv.pv_dbridge_id =
- ((uint64_t)bs->bs_bridge_priority) << 48;
- bs->bs_bridge_pv.pv_root_id = bs->bs_bridge_pv.pv_dbridge_id;
- bs->bs_root_pv = bs->bs_bridge_pv;
- /* Disable any remaining ports, they will have no MAC address */
- LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
- bp->bp_infois = BSTP_INFO_DISABLED;
- bstp_set_port_role(bp, BSTP_ROLE_DISABLED);
- }
- callout_stop(&bs->bs_bstpcallout);
- return;
- }
+ if (mif == NULL)
+ goto disablestp;
e_addr = IF_LLADDR(mif);
bs->bs_bridge_pv.pv_dbridge_id =
@@ -2076,11 +2085,25 @@ bstp_reinit(struct bstp_state *bs)
LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
bp->bp_port_id = (bp->bp_priority << 8) |
(bp->bp_ifp->if_index & 0xfff);
- bstp_ifupdstatus(bs, bp);
+ taskqueue_enqueue(taskqueue_swi, &bp->bp_mediatask);
}
bstp_assign_roles(bs);
bstp_timer_start(&bs->bs_link_timer, BSTP_LINK_TIMER);
+ return;
+
+disablestp:
+ /* Set the bridge and root id (lower bits) to zero */
+ bs->bs_bridge_pv.pv_dbridge_id =
+ ((uint64_t)bs->bs_bridge_priority) << 48;
+ bs->bs_bridge_pv.pv_root_id = bs->bs_bridge_pv.pv_dbridge_id;
+ bs->bs_root_pv = bs->bs_bridge_pv;
+ /* Disable any remaining ports, they will have no MAC address */
+ LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
+ bp->bp_infois = BSTP_INFO_DISABLED;
+ bstp_set_port_role(bp, BSTP_ROLE_DISABLED);
+ }
+ callout_stop(&bs->bs_bstpcallout);
}
static int
@@ -2090,10 +2113,8 @@ bstp_modevent(module_t mod, int type, void *data)
case MOD_LOAD:
mtx_init(&bstp_list_mtx, "bridgestp list", NULL, MTX_DEF);
LIST_INIT(&bstp_list);
- bstp_linkstate_p = bstp_linkstate;
break;
case MOD_UNLOAD:
- bstp_linkstate_p = NULL;
mtx_destroy(&bstp_list_mtx);
break;
default:
@@ -2128,6 +2149,7 @@ bstp_attach(struct bstp_state *bs, struct bstp_cb_ops *cb)
bs->bs_protover = BSTP_PROTO_RSTP;
bs->bs_state_cb = cb->bcb_state;
bs->bs_rtage_cb = cb->bcb_rtage;
+ bs->bs_vnet = curvnet;
getmicrotime(&bs->bs_last_tc_time);
@@ -2184,6 +2206,7 @@ bstp_create(struct bstp_state *bs, struct bstp_port *bp, struct ifnet *ifp)
bp->bp_priority = BSTP_DEFAULT_PORT_PRIORITY;
TASK_INIT(&bp->bp_statetask, 0, bstp_notify_state, bp);
TASK_INIT(&bp->bp_rtagetask, 0, bstp_notify_rtage, bp);
+ TASK_INIT(&bp->bp_mediatask, 0, bstp_ifupdstatus, bp);
/* Init state */
bp->bp_infois = BSTP_INFO_DISABLED;
@@ -2247,4 +2270,5 @@ bstp_destroy(struct bstp_port *bp)
KASSERT(bp->bp_active == 0, ("port is still attached"));
taskqueue_drain(taskqueue_swi, &bp->bp_statetask);
taskqueue_drain(taskqueue_swi, &bp->bp_rtagetask);
+ taskqueue_drain(taskqueue_swi, &bp->bp_mediatask);
}
diff --git a/freebsd/sys/net/bridgestp.h b/freebsd/sys/net/bridgestp.h
index 74086fce..cbb8d53c 100644
--- a/freebsd/sys/net/bridgestp.h
+++ b/freebsd/sys/net/bridgestp.h
@@ -326,6 +326,7 @@ struct bstp_port {
uint8_t bp_txcount;
struct task bp_statetask;
struct task bp_rtagetask;
+ struct task bp_mediatask;
};
/*
@@ -358,6 +359,7 @@ struct bstp_state {
LIST_HEAD(, bstp_port) bs_bplist;
bstp_state_cb_t bs_state_cb;
bstp_rtage_cb_t bs_rtage_cb;
+ struct vnet *bs_vnet;
};
#define BSTP_LOCK_INIT(_bs) mtx_init(&(_bs)->bs_mtx, "bstp", NULL, MTX_DEF)
@@ -368,8 +370,6 @@ struct bstp_state {
extern const uint8_t bstp_etheraddr[];
-extern void (*bstp_linkstate_p)(struct ifnet *ifp, int state);
-
void bstp_attach(struct bstp_state *, struct bstp_cb_ops *);
void bstp_detach(struct bstp_state *);
void bstp_init(struct bstp_state *);
@@ -378,7 +378,7 @@ int bstp_create(struct bstp_state *, struct bstp_port *, struct ifnet *);
int bstp_enable(struct bstp_port *);
void bstp_disable(struct bstp_port *);
void bstp_destroy(struct bstp_port *);
-void bstp_linkstate(struct ifnet *, int);
+void bstp_linkstate(struct bstp_port *);
int bstp_set_htime(struct bstp_state *, int);
int bstp_set_fdelay(struct bstp_state *, int);
int bstp_set_maxage(struct bstp_state *, int);
@@ -391,6 +391,6 @@ int bstp_set_edge(struct bstp_port *, int);
int bstp_set_autoedge(struct bstp_port *, int);
int bstp_set_ptp(struct bstp_port *, int);
int bstp_set_autoptp(struct bstp_port *, int);
-struct mbuf *bstp_input(struct bstp_port *, struct ifnet *, struct mbuf *);
+void bstp_input(struct bstp_port *, struct ifnet *, struct mbuf *);
#endif /* _KERNEL */
diff --git a/freebsd/sys/net/ieee8023ad_lacp.c b/freebsd/sys/net/ieee8023ad_lacp.c
index 6e06ffe5..1b4418a2 100644
--- a/freebsd/sys/net/ieee8023ad_lacp.c
+++ b/freebsd/sys/net/ieee8023ad_lacp.c
@@ -814,10 +814,10 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
return (NULL);
}
- if (m->m_flags & M_FLOWID)
+ if (sc->use_flowid && (m->m_flags & M_FLOWID))
hash = m->m_pkthdr.flowid;
else
- hash = lagg_hashmbuf(m, lsc->lsc_hashkey);
+ hash = lagg_hashmbuf(sc, m, lsc->lsc_hashkey);
hash %= pm->pm_count;
lp = pm->pm_map[hash];
diff --git a/freebsd/sys/net/if.c b/freebsd/sys/net/if.c
index 918f8c4e..5dffd06d 100644
--- a/freebsd/sys/net/if.c
+++ b/freebsd/sys/net/if.c
@@ -60,6 +60,8 @@
#include <sys/taskqueue.h>
#include <sys/domain.h>
#include <sys/jail.h>
+#include <sys/priv.h>
+
#include <machine/stdarg.h>
#include <vm/uma.h>
@@ -104,7 +106,7 @@ SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
TUNABLE_INT("net.link.ifqmaxlen", &ifqmaxlen);
-SYSCTL_UINT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN,
+SYSCTL_INT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN,
&ifqmaxlen, 0, "max send queue size");
/* Log link state change events */
@@ -126,7 +128,7 @@ MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions");
static struct sx ifdescr_sx;
SX_SYSINIT(ifdescr_sx, &ifdescr_sx, "ifnet descr");
-void (*bstp_linkstate_p)(struct ifnet *ifp, int state);
+void (*bridge_linkstate_p)(struct ifnet *ifp);
void (*ng_ether_link_state_p)(struct ifnet *ifp, int state);
void (*lagg_linkstate_p)(struct ifnet *ifp, int state);
/* These are external hooks for CARP. */
@@ -277,6 +279,7 @@ ifindex_alloc_locked(u_short *idxp)
IFNET_WLOCK_ASSERT();
+retry:
/*
* Try to find an empty slot below V_if_index. If we fail, take the
* next slot.
@@ -289,10 +292,12 @@ ifindex_alloc_locked(u_short *idxp)
/* Catch if_index overflow. */
if (idx < 1)
return (ENOSPC);
+ if (idx >= V_if_indexlim) {
+ if_grow();
+ goto retry;
+ }
if (idx > V_if_index)
V_if_index = idx;
- if (V_if_index >= V_if_indexlim)
- if_grow();
*idxp = idx;
return (0);
}
@@ -362,10 +367,12 @@ vnet_if_init(const void *unused __unused)
TAILQ_INIT(&V_ifnet);
TAILQ_INIT(&V_ifg_head);
+ IFNET_WLOCK();
if_grow(); /* create initial table */
+ IFNET_WUNLOCK();
vnet_if_clone_init();
}
-VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_FIRST, vnet_if_init,
+VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init,
NULL);
/* ARGSUSED*/
@@ -376,7 +383,7 @@ if_init(void *dummy __unused)
IFNET_LOCK_INIT();
if_clone_init();
}
-SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_SECOND, if_init, NULL);
+SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_FIRST, if_init, NULL);
#ifdef VIMAGE
@@ -384,8 +391,10 @@ static void
vnet_if_uninit(const void *unused __unused)
{
- VNET_ASSERT(TAILQ_EMPTY(&V_ifnet));
- VNET_ASSERT(TAILQ_EMPTY(&V_ifg_head));
+ VNET_ASSERT(TAILQ_EMPTY(&V_ifnet), ("%s:%d tailq &V_ifnet=%p "
+ "not empty", __func__, __LINE__, &V_ifnet));
+ VNET_ASSERT(TAILQ_EMPTY(&V_ifg_head), ("%s:%d tailq &V_ifg_head=%p "
+ "not empty", __func__, __LINE__, &V_ifg_head));
free((caddr_t)V_ifindex_table, M_IFNET);
}
@@ -396,16 +405,25 @@ VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST,
static void
if_grow(void)
{
+ int oldlim;
u_int n;
struct ifindex_entry *e;
- V_if_indexlim <<= 1;
- n = V_if_indexlim * sizeof(*e);
+ IFNET_WLOCK_ASSERT();
+ oldlim = V_if_indexlim;
+ IFNET_WUNLOCK();
+ n = (oldlim << 1) * sizeof(*e);
e = malloc(n, M_IFNET, M_WAITOK | M_ZERO);
+ IFNET_WLOCK();
+ if (V_if_indexlim != oldlim) {
+ free(e, M_IFNET);
+ return;
+ }
if (V_ifindex_table != NULL) {
memcpy((caddr_t)e, (caddr_t)V_ifindex_table, n/2);
free((caddr_t)V_ifindex_table, M_IFNET);
}
+ V_if_indexlim <<= 1;
V_ifindex_table = e;
}
@@ -472,8 +490,8 @@ if_alloc(u_char type)
}
/*
- * Do the actual work of freeing a struct ifnet, associated index, and layer
- * 2 common structure. This call is made when the last reference to an
+ * Do the actual work of freeing a struct ifnet, and layer 2 common
+ * structure. This call is made when the last reference to an
* interface is released.
*/
static void
@@ -483,13 +501,6 @@ if_free_internal(struct ifnet *ifp)
KASSERT((ifp->if_flags & IFF_DYING),
("if_free_internal: interface not dying"));
- IFNET_WLOCK();
- KASSERT(ifp == ifnet_byindex_locked(ifp->if_index),
- ("%s: freeing unallocated ifnet", ifp->if_xname));
-
- ifindex_free_locked(ifp->if_index);
- IFNET_WUNLOCK();
-
if (if_com_free[ifp->if_alloctype] != NULL)
if_com_free[ifp->if_alloctype](ifp->if_l2com,
ifp->if_alloctype);
@@ -520,6 +531,14 @@ if_free_type(struct ifnet *ifp, u_char type)
ifp->if_alloctype));
ifp->if_flags |= IFF_DYING; /* XXX: Locking */
+
+ IFNET_WLOCK();
+ KASSERT(ifp == ifnet_byindex_locked(ifp->if_index),
+ ("%s: freeing unallocated ifnet", ifp->if_xname));
+
+ ifindex_free_locked(ifp->if_index);
+ IFNET_WUNLOCK();
+
if (!refcount_release(&ifp->if_refcount))
return;
if_free_internal(ifp);
@@ -818,10 +837,10 @@ if_purgemaddrs(struct ifnet *ifp)
struct ifmultiaddr *ifma;
struct ifmultiaddr *next;
- IF_ADDR_LOCK(ifp);
+ IF_ADDR_WLOCK(ifp);
TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
if_delmulti_locked(ifp, ifma, 1);
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_WUNLOCK(ifp);
}
/*
@@ -1165,10 +1184,10 @@ if_addgroup(struct ifnet *ifp, const char *groupname)
ifgl->ifgl_group = ifg;
ifgm->ifgm_ifp = ifp;
- IF_ADDR_LOCK(ifp);
+ IF_ADDR_WLOCK(ifp);
TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_WUNLOCK(ifp);
IFNET_WUNLOCK();
@@ -1195,9 +1214,9 @@ if_delgroup(struct ifnet *ifp, const char *groupname)
return (ENOENT);
}
- IF_ADDR_LOCK(ifp);
+ IF_ADDR_WLOCK(ifp);
TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_WUNLOCK(ifp);
TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
if (ifgm->ifgm_ifp == ifp)
@@ -1238,9 +1257,9 @@ if_delgroups(struct ifnet *ifp)
strlcpy(groupname, ifgl->ifgl_group->ifg_group, IFNAMSIZ);
- IF_ADDR_LOCK(ifp);
+ IF_ADDR_WLOCK(ifp);
TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_WUNLOCK(ifp);
TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
if (ifgm->ifgm_ifp == ifp)
@@ -1282,33 +1301,33 @@ if_getgroup(struct ifgroupreq *data, struct ifnet *ifp)
struct ifgroupreq *ifgr = data;
if (ifgr->ifgr_len == 0) {
- IF_ADDR_LOCK(ifp);
+ IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
ifgr->ifgr_len += sizeof(struct ifg_req);
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_RUNLOCK(ifp);
return (0);
}
len = ifgr->ifgr_len;
ifgp = ifgr->ifgr_groups;
/* XXX: wire */
- IF_ADDR_LOCK(ifp);
+ IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
if (len < sizeof(ifgrq)) {
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_RUNLOCK(ifp);
return (EINVAL);
}
bzero(&ifgrq, sizeof ifgrq);
strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
sizeof(ifgrq.ifgrq_group));
if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_RUNLOCK(ifp);
return (error);
}
len -= sizeof(ifgrq);
ifgp++;
}
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_RUNLOCK(ifp);
return (0);
}
@@ -1415,28 +1434,28 @@ void
if_addr_rlock(struct ifnet *ifp)
{
- IF_ADDR_LOCK(ifp);
+ IF_ADDR_RLOCK(ifp);
}
void
if_addr_runlock(struct ifnet *ifp)
{
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_RUNLOCK(ifp);
}
void
if_maddr_rlock(struct ifnet *ifp)
{
- IF_ADDR_LOCK(ifp);
+ IF_ADDR_RLOCK(ifp);
}
void
if_maddr_runlock(struct ifnet *ifp)
{
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_RUNLOCK(ifp);
}
/*
@@ -1548,14 +1567,14 @@ ifa_ifwithaddr_internal(struct sockaddr *addr, int getref)
IFNET_RLOCK_NOSLEEP();
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
- IF_ADDR_LOCK(ifp);
+ IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != addr->sa_family)
continue;
if (sa_equal(addr, ifa->ifa_addr)) {
if (getref)
ifa_ref(ifa);
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_RUNLOCK(ifp);
goto done;
}
/* IP6 doesn't have broadcast */
@@ -1565,11 +1584,11 @@ ifa_ifwithaddr_internal(struct sockaddr *addr, int getref)
sa_equal(ifa->ifa_broadaddr, addr)) {
if (getref)
ifa_ref(ifa);
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_RUNLOCK(ifp);
goto done;
}
}
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_RUNLOCK(ifp);
}
ifa = NULL;
done:
@@ -1603,7 +1622,7 @@ ifa_ifwithbroadaddr(struct sockaddr *addr)
IFNET_RLOCK_NOSLEEP();
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
- IF_ADDR_LOCK(ifp);
+ IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != addr->sa_family)
continue;
@@ -1612,11 +1631,11 @@ ifa_ifwithbroadaddr(struct sockaddr *addr)
ifa->ifa_broadaddr->sa_len != 0 &&
sa_equal(ifa->ifa_broadaddr, addr)) {
ifa_ref(ifa);
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_RUNLOCK(ifp);
goto done;
}
}
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_RUNLOCK(ifp);
}
ifa = NULL;
done:
@@ -1638,18 +1657,18 @@ ifa_ifwithdstaddr(struct sockaddr *addr)
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
continue;
- IF_ADDR_LOCK(ifp);
+ IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != addr->sa_family)
continue;
if (ifa->ifa_dstaddr != NULL &&
sa_equal(addr, ifa->ifa_dstaddr)) {
ifa_ref(ifa);
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_RUNLOCK(ifp);
goto done;
}
}
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_RUNLOCK(ifp);
}
ifa = NULL;
done:
@@ -1683,12 +1702,12 @@ ifa_ifwithnet(struct sockaddr *addr, int ignore_ptp)
/*
* Scan though each interface, looking for ones that have addresses
* in this address family. Maintain a reference on ifa_maybe once
- * we find one, as we release the IF_ADDR_LOCK() that kept it stable
+ * we find one, as we release the IF_ADDR_RLOCK() that kept it stable
* when we move onto the next interface.
*/
IFNET_RLOCK_NOSLEEP();
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
- IF_ADDR_LOCK(ifp);
+ IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
char *cp, *cp2, *cp3;
@@ -1707,7 +1726,7 @@ next: continue;
if (ifa->ifa_dstaddr != NULL &&
sa_equal(addr, ifa->ifa_dstaddr)) {
ifa_ref(ifa);
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_RUNLOCK(ifp);
goto done;
}
} else {
@@ -1718,7 +1737,7 @@ next: continue;
if (ifa->ifa_claim_addr) {
if ((*ifa->ifa_claim_addr)(ifa, addr)) {
ifa_ref(ifa);
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_RUNLOCK(ifp);
goto done;
}
continue;
@@ -1758,7 +1777,7 @@ next: continue;
}
}
}
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_RUNLOCK(ifp);
}
ifa = ifa_maybe;
ifa_maybe = NULL;
@@ -1784,7 +1803,7 @@ ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
if (af >= AF_MAX)
return (NULL);
- IF_ADDR_LOCK(ifp);
+ IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != af)
continue;
@@ -1816,7 +1835,7 @@ ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
done:
if (ifa != NULL)
ifa_ref(ifa);
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_RUNLOCK(ifp);
return (ifa);
}
@@ -1936,14 +1955,10 @@ do_link_state_change(void *arg, int pending)
(*ng_ether_link_state_p)(ifp, link_state);
if (ifp->if_carp)
(*carp_linkstate_p)(ifp);
- if (ifp->if_bridge) {
- KASSERT(bstp_linkstate_p != NULL,("if_bridge bstp not loaded!"));
- (*bstp_linkstate_p)(ifp, link_state);
- }
- if (ifp->if_lagg) {
- KASSERT(lagg_linkstate_p != NULL,("if_lagg not loaded!"));
+ if (ifp->if_bridge)
+ (*bridge_linkstate_p)(ifp);
+ if (ifp->if_lagg)
(*lagg_linkstate_p)(ifp, link_state);
- }
if (IS_DEFAULT_VNET(curvnet))
devctl_notify("IFNET", ifp->if_xname,
@@ -2180,6 +2195,20 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
free(odescrbuf, M_IFDESCR);
break;
+ case SIOCGIFFIB:
+ ifr->ifr_fib = ifp->if_fib;
+ break;
+
+ case SIOCSIFFIB:
+ error = priv_check(td, PRIV_NET_SETIFFIB);
+ if (error)
+ return (error);
+ if (ifr->ifr_fib >= rt_numfibs)
+ return (EINVAL);
+
+ ifp->if_fib = ifr->ifr_fib;
+ break;
+
case SIOCSIFFLAGS:
error = priv_check(td, PRIV_NET_SETIFFLAGS);
if (error)
@@ -2379,9 +2408,9 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
* lose a race while we check if the membership
* already exists.
*/
- IF_ADDR_LOCK(ifp);
+ IF_ADDR_RLOCK(ifp);
ifma = if_findmulti(ifp, &ifr->ifr_addr);
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_RUNLOCK(ifp);
if (ifma != NULL)
error = EADDRINUSE;
else
@@ -2492,10 +2521,13 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
int error;
int oif_flags;
+ CURVNET_SET(so->so_vnet);
switch (cmd) {
case SIOCGIFCONF:
case OSIOCGIFCONF:
- return (ifconf(cmd, data));
+ error = ifconf(cmd, data);
+ CURVNET_RESTORE();
+ return (error);
#ifdef COMPAT_FREEBSD32
case SIOCGIFCONF32:
@@ -2507,7 +2539,11 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
ifc.ifc_len = ifc32->ifc_len;
ifc.ifc_buf = PTRIN(ifc32->ifc_buf);
- return (ifconf(SIOCGIFCONF, (void *)&ifc));
+ error = ifconf(SIOCGIFCONF, (void *)&ifc);
+ CURVNET_RESTORE();
+ if (error == 0)
+ ifc32->ifc_len = ifc.ifc_len;
+ return (error);
}
#endif
}
@@ -2517,49 +2553,74 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
#ifdef VIMAGE
case SIOCSIFRVNET:
error = priv_check(td, PRIV_NET_SETIFVNET);
- if (error)
- return (error);
- return (if_vmove_reclaim(td, ifr->ifr_name, ifr->ifr_jid));
+ if (error == 0)
+ error = if_vmove_reclaim(td, ifr->ifr_name,
+ ifr->ifr_jid);
+ CURVNET_RESTORE();
+ return (error);
#endif
case SIOCIFCREATE:
case SIOCIFCREATE2:
error = priv_check(td, PRIV_NET_IFCREATE);
- if (error)
- return (error);
- return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name),
- cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL));
+ if (error == 0)
+ error = if_clone_create(ifr->ifr_name,
+ sizeof(ifr->ifr_name),
+ cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL);
+ CURVNET_RESTORE();
+ return (error);
case SIOCIFDESTROY:
error = priv_check(td, PRIV_NET_IFDESTROY);
- if (error)
- return (error);
- return if_clone_destroy(ifr->ifr_name);
+ if (error == 0)
+ error = if_clone_destroy(ifr->ifr_name);
+ CURVNET_RESTORE();
+ return (error);
case SIOCIFGCLONERS:
- return (if_clone_list((struct if_clonereq *)data));
+ error = if_clone_list((struct if_clonereq *)data);
+ CURVNET_RESTORE();
+ return (error);
case SIOCGIFGMEMB:
- return (if_getgroupmembers((struct ifgroupreq *)data));
+ error = if_getgroupmembers((struct ifgroupreq *)data);
+ CURVNET_RESTORE();
+ return (error);
}
ifp = ifunit_ref(ifr->ifr_name);
- if (ifp == NULL)
+ if (ifp == NULL) {
+ CURVNET_RESTORE();
return (ENXIO);
+ }
error = ifhwioctl(cmd, ifp, data, td);
if (error != ENOIOCTL) {
if_rele(ifp);
+ CURVNET_RESTORE();
return (error);
}
oif_flags = ifp->if_flags;
if (so->so_proto == NULL) {
if_rele(ifp);
+ CURVNET_RESTORE();
return (EOPNOTSUPP);
}
+
+ /*
+ * Pass the request on to the socket control method, and if the
+ * latter returns EOPNOTSUPP, directly to the interface.
+ *
+ * Make an exception for the legacy SIOCSIF* requests. Drivers
+ * trust SIOCSIFADDR et al to come from an already privileged
+ * layer, and do not perform any credentials checks or input
+ * validation.
+ */
#ifndef COMPAT_43
error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
data,
ifp, td));
- if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL)
+ if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL &&
+ cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR &&
+ cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK)
error = (*ifp->if_ioctl)(ifp, cmd, data);
#else
{
@@ -2603,7 +2664,9 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
data,
ifp, td));
if (error == EOPNOTSUPP && ifp != NULL &&
- ifp->if_ioctl != NULL)
+ ifp->if_ioctl != NULL &&
+ cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR &&
+ cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK)
error = (*ifp->if_ioctl)(ifp, cmd, data);
switch (ocmd) {
@@ -2627,6 +2690,7 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
#endif
}
if_rele(ifp);
+ CURVNET_RESTORE();
return (error);
}
@@ -2776,7 +2840,7 @@ again:
}
addrs = 0;
- IF_ADDR_LOCK(ifp);
+ IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
struct sockaddr *sa = ifa->ifa_addr;
@@ -2808,7 +2872,7 @@ again:
if (!sbuf_overflowed(sb))
valid_len = sbuf_len(sb);
}
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_RUNLOCK(ifp);
if (addrs == 0) {
bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
sbuf_bcat(sb, &ifr, sizeof(ifr));
@@ -2966,13 +3030,13 @@ if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
* If the address is already present, return a new reference to it;
* otherwise, allocate storage and set up a new address.
*/
- IF_ADDR_LOCK(ifp);
+ IF_ADDR_WLOCK(ifp);
ifma = if_findmulti(ifp, sa);
if (ifma != NULL) {
ifma->ifma_refcount++;
if (retifma != NULL)
*retifma = ifma;
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_WUNLOCK(ifp);
return (0);
}
@@ -3038,7 +3102,7 @@ if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
* pointer is still valid.
*/
rt_newmaddrmsg(RTM_NEWMADDR, ifma);
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_WUNLOCK(ifp);
/*
* We are certain we have added something, so call down to the
@@ -3058,7 +3122,7 @@ free_llsa_out:
free(llsa, M_IFMADDR);
unlock_out:
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_WUNLOCK(ifp);
return (error);
}
@@ -3092,12 +3156,12 @@ if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
if (ifp == NULL)
return (ENOENT);
- IF_ADDR_LOCK(ifp);
+ IF_ADDR_WLOCK(ifp);
lastref = 0;
ifma = if_findmulti(ifp, sa);
if (ifma != NULL)
lastref = if_delmulti_locked(ifp, ifma, 0);
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_WUNLOCK(ifp);
if (ifma == NULL)
return (ENOENT);
@@ -3119,10 +3183,10 @@ if_delallmulti(struct ifnet *ifp)
struct ifmultiaddr *ifma;
struct ifmultiaddr *next;
- IF_ADDR_LOCK(ifp);
+ IF_ADDR_WLOCK(ifp);
TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
if_delmulti_locked(ifp, ifma, 0);
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_WUNLOCK(ifp);
}
/*
@@ -3159,7 +3223,7 @@ if_delmulti_ifma(struct ifmultiaddr *ifma)
* If and only if the ifnet instance exists: Acquire the address lock.
*/
if (ifp != NULL)
- IF_ADDR_LOCK(ifp);
+ IF_ADDR_WLOCK(ifp);
lastref = if_delmulti_locked(ifp, ifma, 0);
@@ -3169,7 +3233,7 @@ if_delmulti_ifma(struct ifmultiaddr *ifma)
* Release the address lock.
* If the group was left: update the hardware hash filter.
*/
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_WUNLOCK(ifp);
if (lastref && ifp->if_ioctl != NULL) {
(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
}
@@ -3191,7 +3255,7 @@ if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching)
if (ifp != NULL && ifma->ifma_ifp != NULL) {
KASSERT(ifma->ifma_ifp == ifp,
("%s: inconsistent ifp %p", __func__, ifp));
- IF_ADDR_LOCK_ASSERT(ifp);
+ IF_ADDR_WLOCK_ASSERT(ifp);
}
ifp = ifma->ifma_ifp;
@@ -3264,14 +3328,14 @@ if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
struct ifaddr *ifa;
struct ifreq ifr;
- IF_ADDR_LOCK(ifp);
+ IF_ADDR_RLOCK(ifp);
ifa = ifp->if_addr;
if (ifa == NULL) {
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_RUNLOCK(ifp);
return (EINVAL);
}
ifa_ref(ifa);
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_RUNLOCK(ifp);
sdl = (struct sockaddr_dl *)ifa->ifa_addr;
if (sdl == NULL) {
ifa_free(ifa);
diff --git a/freebsd/sys/net/if.h b/freebsd/sys/net/if.h
index 1a6423f6..25d43ac3 100644
--- a/freebsd/sys/net/if.h
+++ b/freebsd/sys/net/if.h
@@ -145,7 +145,7 @@ struct if_data {
#define IFF_LINK2 0x4000 /* per link layer defined bit */
#define IFF_ALTPHYS IFF_LINK2 /* use alternate physical connection */
#define IFF_MULTICAST 0x8000 /* (i) supports multicast */
-/* 0x10000 */
+#define IFF_CANTCONFIG 0x10000 /* (i) unconfigurable using ioctl(2) */
#define IFF_PPROMISC 0x20000 /* (n) user-requested promisc mode */
#define IFF_MONITOR 0x40000 /* (n) user-requested monitor mode */
#define IFF_STATICARP 0x80000 /* (n) static ARP */
@@ -165,7 +165,7 @@ struct if_data {
#define IFF_CANTCHANGE \
(IFF_BROADCAST|IFF_POINTOPOINT|IFF_DRV_RUNNING|IFF_DRV_OACTIVE|\
IFF_SIMPLEX|IFF_MULTICAST|IFF_ALLMULTI|IFF_SMART|IFF_PROMISC|\
- IFF_DYING)
+ IFF_DYING|IFF_CANTCONFIG)
/*
* Values for if_link_state.
@@ -220,6 +220,7 @@ struct if_data {
#define IFCAP_POLLING_NOCOUNT 0x20000 /* polling ticks cannot be fragmented */
#define IFCAP_VLAN_HWTSO 0x40000 /* can do IFCAP_TSO on VLANs */
#define IFCAP_LINKSTATE 0x80000 /* the runtime link state is dynamic */
+#define IFCAP_NETMAP 0x100000 /* netmap mode supported/enabled */
#define IFCAP_HWCSUM (IFCAP_RXCSUM | IFCAP_TXCSUM)
#define IFCAP_TSO (IFCAP_TSO4 | IFCAP_TSO6)
@@ -232,6 +233,7 @@ struct if_data {
/*
* Message format for use in obtaining information about interfaces
* from getkerninfo and the routing socket
+ * For the new, extensible interface see struct if_msghdrl below.
*/
struct if_msghdr {
u_short ifm_msglen; /* to skip over non-understood messages */
@@ -244,8 +246,34 @@ struct if_msghdr {
};
/*
+ * The 'l' version shall be used by new interfaces, like NET_RT_IFLISTL. It is
+ * extensible after ifm_data_off or within ifm_data. Both the if_msghdr and
+ * if_data now have a member field detailing the struct length in addition to
+ * the routing message length. Macros are provided to find the start of
+ * ifm_data and the start of the socket address strucutres immediately following
+ * struct if_msghdrl given a pointer to struct if_msghdrl.
+ */
+#define IF_MSGHDRL_IFM_DATA(_l) \
+ (struct if_data *)((char *)(_l) + (_l)->ifm_data_off)
+#define IF_MSGHDRL_RTA(_l) \
+ (void *)((uintptr_t)(_l) + (_l)->ifm_len)
+struct if_msghdrl {
+ u_short ifm_msglen; /* to skip over non-understood messages */
+ u_char ifm_version; /* future binary compatibility */
+ u_char ifm_type; /* message type */
+ int ifm_addrs; /* like rtm_addrs */
+ int ifm_flags; /* value of if_flags */
+ u_short ifm_index; /* index for associated ifp */
+ u_short _ifm_spare1; /* spare space to grow if_index, see if_var.h */
+ u_short ifm_len; /* length of if_msghdrl incl. if_data */
+ u_short ifm_data_off; /* offset of if_data from beginning */
+ struct if_data ifm_data;/* statistics and other data about if */
+};
+
+/*
* Message format for use in obtaining information about interface addresses
* from getkerninfo and the routing socket
+ * For the new, extensible interface see struct ifa_msghdrl below.
*/
struct ifa_msghdr {
u_short ifam_msglen; /* to skip over non-understood messages */
@@ -258,6 +286,33 @@ struct ifa_msghdr {
};
/*
+ * The 'l' version shall be used by new interfaces, like NET_RT_IFLISTL. It is
+ * extensible after ifam_metric or within ifam_data. Both the ifa_msghdrl and
+ * if_data now have a member field detailing the struct length in addition to
+ * the routing message length. Macros are provided to find the start of
+ * ifm_data and the start of the socket address strucutres immediately following
+ * struct ifa_msghdrl given a pointer to struct ifa_msghdrl.
+ */
+#define IFA_MSGHDRL_IFAM_DATA(_l) \
+ (struct if_data *)((char *)(_l) + (_l)->ifam_data_off)
+#define IFA_MSGHDRL_RTA(_l) \
+ (void *)((uintptr_t)(_l) + (_l)->ifam_len)
+struct ifa_msghdrl {
+ u_short ifam_msglen; /* to skip over non-understood messages */
+ u_char ifam_version; /* future binary compatibility */
+ u_char ifam_type; /* message type */
+ int ifam_addrs; /* like rtm_addrs */
+ int ifam_flags; /* value of ifa_flags */
+ u_short ifam_index; /* index for associated ifp */
+ u_short _ifam_spare1; /* spare space to grow if_index, see if_var.h */
+ u_short ifam_len; /* length of ifa_msghdrl incl. if_data */
+ u_short ifam_data_off; /* offset of if_data from beginning */
+ int ifam_metric; /* value of ifa_metric */
+ struct if_data ifam_data;/* statistics and other data about if or
+ * address */
+};
+
+/*
* Message format for use in obtaining information about multicast addresses
* from the routing socket
*/
@@ -315,6 +370,7 @@ struct ifreq {
int ifru_media;
caddr_t ifru_data;
int ifru_cap[2];
+ u_int ifru_fib;
} ifr_ifru;
#define ifr_addr ifr_ifru.ifru_addr /* address */
#define ifr_dstaddr ifr_ifru.ifru_dstaddr /* other end of p-to-p link */
@@ -331,6 +387,7 @@ struct ifreq {
#define ifr_reqcap ifr_ifru.ifru_cap[0] /* requested capabilities */
#define ifr_curcap ifr_ifru.ifru_cap[1] /* current capabilities */
#define ifr_index ifr_ifru.ifru_index /* interface index */
+#define ifr_fib ifr_ifru.ifru_fib /* interface fib */
};
#define _SIZEOF_ADDR_IFREQ(ifr) \
diff --git a/freebsd/sys/net/if_arcsubr.c b/freebsd/sys/net/if_arcsubr.c
index dc75b445..e9422068 100644
--- a/freebsd/sys/net/if_arcsubr.c
+++ b/freebsd/sys/net/if_arcsubr.c
@@ -610,6 +610,7 @@ arc_input(struct ifnet *ifp, struct mbuf *m)
m_freem(m);
return;
}
+ M_SETFIB(m, ifp->if_fib);
netisr_dispatch(isr, m);
}
diff --git a/freebsd/sys/net/if_atmsubr.c b/freebsd/sys/net/if_atmsubr.c
index 747bc936..e3ce4ea0 100644
--- a/freebsd/sys/net/if_atmsubr.c
+++ b/freebsd/sys/net/if_atmsubr.c
@@ -334,6 +334,7 @@ atm_input(struct ifnet *ifp, struct atm_pseudohdr *ah, struct mbuf *m,
return;
}
}
+ M_SETFIB(m, ifp->if_fib);
netisr_dispatch(isr, m);
}
diff --git a/freebsd/sys/net/if_bridge.c b/freebsd/sys/net/if_bridge.c
index 5c15a78f..52146381 100644
--- a/freebsd/sys/net/if_bridge.c
+++ b/freebsd/sys/net/if_bridge.c
@@ -87,6 +87,7 @@ __FBSDID("$FreeBSD$");
#include <sys/malloc.h>
#include <sys/protosw.h>
#include <sys/systm.h>
+#include <sys/jail.h>
#include <rtems/bsd/sys/time.h>
#include <sys/socket.h> /* for net/if.h */
#include <sys/sockio.h>
@@ -145,10 +146,10 @@ __FBSDID("$FreeBSD$");
#define BRIDGE_RTHASH_MASK (BRIDGE_RTHASH_SIZE - 1)
/*
- * Maximum number of addresses to cache.
+ * Default maximum number of addresses to cache.
*/
#ifndef BRIDGE_RTABLE_MAX
-#define BRIDGE_RTABLE_MAX 100
+#define BRIDGE_RTABLE_MAX 2000
#endif
/*
@@ -334,6 +335,10 @@ static int bridge_ip6_checkbasic(struct mbuf **mp);
#endif /* INET6 */
static int bridge_fragment(struct ifnet *, struct mbuf *,
struct ether_header *, int, struct llc *);
+static void bridge_linkstate(struct ifnet *ifp);
+static void bridge_linkcheck(struct bridge_softc *sc);
+
+extern void (*bridge_linkstate_p)(struct ifnet *ifp);
/* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */
#define VLANTAGOF(_m) \
@@ -356,19 +361,26 @@ static int pfil_local_phys = 0; /* run pfil hooks on the physical interface for
locally destined packets */
static int log_stp = 0; /* log STP state changes */
static int bridge_inherit_mac = 0; /* share MAC with first bridge member */
+TUNABLE_INT("net.link.bridge.pfil_onlyip", &pfil_onlyip);
SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip, CTLFLAG_RW,
&pfil_onlyip, 0, "Only pass IP packets when pfil is enabled");
+TUNABLE_INT("net.link.bridge.ipfw_arp", &pfil_ipfw_arp);
SYSCTL_INT(_net_link_bridge, OID_AUTO, ipfw_arp, CTLFLAG_RW,
&pfil_ipfw_arp, 0, "Filter ARP packets through IPFW layer2");
+TUNABLE_INT("net.link.bridge.pfil_bridge", &pfil_bridge);
SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge, CTLFLAG_RW,
&pfil_bridge, 0, "Packet filter on the bridge interface");
+TUNABLE_INT("net.link.bridge.pfil_member", &pfil_member);
SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member, CTLFLAG_RW,
&pfil_member, 0, "Packet filter on the member interface");
+TUNABLE_INT("net.link.bridge.pfil_local_phys", &pfil_local_phys);
SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_local_phys, CTLFLAG_RW,
&pfil_local_phys, 0,
"Packet filter on the physical interface for locally destined packets");
+TUNABLE_INT("net.link.bridge.log_stp", &log_stp);
SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW,
&log_stp, 0, "Log STP state changes");
+TUNABLE_INT("net.link.bridge.inherit_mac", &bridge_inherit_mac);
SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac, CTLFLAG_RW,
&bridge_inherit_mac, 0,
"Inherit MAC address from the first bridge member");
@@ -490,6 +502,7 @@ bridge_modevent(module_t mod, int type, void *data)
bridge_input_p = bridge_input;
bridge_output_p = bridge_output;
bridge_dn_p = bridge_dummynet;
+ bridge_linkstate_p = bridge_linkstate;
bridge_detach_cookie = EVENTHANDLER_REGISTER(
ifnet_departure_event, bridge_ifdetach, NULL,
EVENTHANDLER_PRI_ANY);
@@ -502,6 +515,7 @@ bridge_modevent(module_t mod, int type, void *data)
bridge_input_p = NULL;
bridge_output_p = NULL;
bridge_dn_p = NULL;
+ bridge_linkstate_p = NULL;
mtx_destroy(&bridge_list_mtx);
break;
default:
@@ -562,7 +576,8 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params)
{
struct bridge_softc *sc, *sc2;
struct ifnet *bifp, *ifp;
- int retry;
+ int fb, retry;
+ unsigned long hostid;
sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
@@ -595,17 +610,30 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params)
IFQ_SET_READY(&ifp->if_snd);
/*
- * Generate a random ethernet address with a locally administered
- * address.
+ * Generate an ethernet address with a locally administered address.
*
* Since we are using random ethernet addresses for the bridge, it is
* possible that we might have address collisions, so make sure that
* this hardware address isn't already in use on another bridge.
+ * The first try uses the hostid and falls back to arc4rand().
*/
+ fb = 0;
+ getcredhostid(curthread->td_ucred, &hostid);
for (retry = 1; retry != 0;) {
- arc4rand(sc->sc_defaddr, ETHER_ADDR_LEN, 1);
- sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
- sc->sc_defaddr[0] |= 2; /* set the LAA bit */
+ if (fb || hostid == 0) {
+ arc4rand(sc->sc_defaddr, ETHER_ADDR_LEN, 1);
+ sc->sc_defaddr[0] &= ~1;/* clear multicast bit */
+ sc->sc_defaddr[0] |= 2; /* set the LAA bit */
+ } else {
+ sc->sc_defaddr[0] = 0x2;
+ sc->sc_defaddr[1] = (hostid >> 24) & 0xff;
+ sc->sc_defaddr[2] = (hostid >> 16) & 0xff;
+ sc->sc_defaddr[3] = (hostid >> 8 ) & 0xff;
+ sc->sc_defaddr[4] = hostid & 0xff;
+ sc->sc_defaddr[5] = ifp->if_dunit & 0xff;
+ }
+
+ fb = 1;
retry = 0;
mtx_lock(&bridge_list_mtx);
LIST_FOREACH(sc2, &bridge_list, sc_list) {
@@ -939,6 +967,7 @@ bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
}
+ bridge_linkcheck(sc);
bridge_mutecaps(sc); /* recalcuate now this interface is removed */
bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
KASSERT(bif->bif_addrcnt == 0,
@@ -1066,17 +1095,16 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
/* Set interface capabilities to the intersection set of all members */
bridge_mutecaps(sc);
+ bridge_linkcheck(sc);
+ /* Place the interface into promiscuous mode */
switch (ifs->if_type) {
- case IFT_ETHER:
- case IFT_L2VLAN:
- /*
- * Place the interface into promiscuous mode.
- */
- BRIDGE_UNLOCK(sc);
- error = ifpromisc(ifs, 1);
- BRIDGE_LOCK(sc);
- break;
+ case IFT_ETHER:
+ case IFT_L2VLAN:
+ BRIDGE_UNLOCK(sc);
+ error = ifpromisc(ifs, 1);
+ BRIDGE_LOCK(sc);
+ break;
}
if (error)
bridge_delete_member(sc, bif, 0);
@@ -2195,11 +2223,9 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
/* Tap off 802.1D packets; they do not get forwarded. */
if (memcmp(eh->ether_dhost, bstp_etheraddr,
ETHER_ADDR_LEN) == 0) {
- m = bstp_input(&bif->bif_stp, ifp, m);
- if (m == NULL) {
- BRIDGE_UNLOCK(sc);
- return (NULL);
- }
+ bstp_input(&bif->bif_stp, ifp, m); /* consumes mbuf */
+ BRIDGE_UNLOCK(sc);
+ return (NULL);
}
if ((bif->bif_flags & IFBIF_STP) &&
@@ -3456,3 +3482,46 @@ out:
m_freem(m);
return (error);
}
+
+static void
+bridge_linkstate(struct ifnet *ifp)
+{
+ struct bridge_softc *sc = ifp->if_bridge;
+ struct bridge_iflist *bif;
+
+ BRIDGE_LOCK(sc);
+ bif = bridge_lookup_member_if(sc, ifp);
+ if (bif == NULL) {
+ BRIDGE_UNLOCK(sc);
+ return;
+ }
+ bridge_linkcheck(sc);
+ BRIDGE_UNLOCK(sc);
+
+ bstp_linkstate(&bif->bif_stp);
+}
+
+static void
+bridge_linkcheck(struct bridge_softc *sc)
+{
+ struct bridge_iflist *bif;
+ int new_link, hasls;
+
+ BRIDGE_LOCK_ASSERT(sc);
+ new_link = LINK_STATE_DOWN;
+ hasls = 0;
+ /* Our link is considered up if at least one of our ports is active */
+ LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+ if (bif->bif_ifp->if_capabilities & IFCAP_LINKSTATE)
+ hasls++;
+ if (bif->bif_ifp->if_link_state == LINK_STATE_UP) {
+ new_link = LINK_STATE_UP;
+ break;
+ }
+ }
+ if (!LIST_EMPTY(&sc->sc_iflist) && !hasls) {
+ /* If no interfaces support link-state then we default to up */
+ new_link = LINK_STATE_UP;
+ }
+ if_link_state_change(sc->sc_ifp, new_link);
+}
diff --git a/freebsd/sys/net/if_epair.c b/freebsd/sys/net/if_epair.c
index cd7a6c79..fafc0259 100644
--- a/freebsd/sys/net/if_epair.c
+++ b/freebsd/sys/net/if_epair.c
@@ -68,6 +68,7 @@ __FBSDID("$FreeBSD$");
#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_clone.h>
+#include <net/if_media.h>
#include <net/if_var.h>
#include <net/if_types.h>
#include <net/netisr.h>
@@ -94,6 +95,8 @@ static struct mbuf *epair_nh_m2cpuid(struct mbuf *, uintptr_t, u_int *);
static void epair_nh_drainedcpu(u_int);
static void epair_start_locked(struct ifnet *);
+static int epair_media_change(struct ifnet *);
+static void epair_media_status(struct ifnet *, struct ifmediareq *);
static int epair_clone_match(struct if_clone *, const char *);
static int epair_clone_create(struct if_clone *, char *, size_t, caddr_t);
@@ -129,6 +132,7 @@ SYSCTL_PROC(_net_link_epair, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW,
struct epair_softc {
struct ifnet *ifp; /* This ifp. */
struct ifnet *oifp; /* other ifp of pair. */
+ struct ifmedia media; /* Media config (fake). */
u_int refcount; /* # of mbufs in flight. */
u_int cpuid; /* CPU ID assigned upon creation. */
void (*if_qflush)(struct ifnet *);
@@ -191,10 +195,7 @@ epair_dpcpu_init(void)
struct eid_list *s;
u_int cpuid;
- for (cpuid = 0; cpuid <= mp_maxid; cpuid++) {
- if (CPU_ABSENT(cpuid))
- continue;
-
+ CPU_FOREACH(cpuid) {
epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu);
/* Initialize per-cpu lock. */
@@ -219,10 +220,7 @@ epair_dpcpu_detach(void)
struct epair_dpcpu *epair_dpcpu;
u_int cpuid;
- for (cpuid = 0; cpuid <= mp_maxid; cpuid++) {
- if (CPU_ABSENT(cpuid))
- continue;
-
+ CPU_FOREACH(cpuid) {
epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu);
/* Destroy per-cpu lock. */
@@ -332,10 +330,7 @@ epair_remove_ifp_from_draining(struct ifnet *ifp)
struct epair_ifp_drain *elm, *tvar;
u_int cpuid;
- for (cpuid = 0; cpuid <= mp_maxid; cpuid++) {
- if (CPU_ABSENT(cpuid))
- continue;
-
+ CPU_FOREACH(cpuid) {
epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu);
EPAIR_LOCK(epair_dpcpu);
STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list,
@@ -622,8 +617,25 @@ epair_qflush(struct ifnet *ifp)
}
static int
+epair_media_change(struct ifnet *ifp __unused)
+{
+
+ /* Do nothing. */
+ return (0);
+}
+
+static void
+epair_media_status(struct ifnet *ifp __unused, struct ifmediareq *imr)
+{
+
+ imr->ifm_status = IFM_AVALID | IFM_ACTIVE;
+ imr->ifm_active = IFM_ETHER | IFM_10G_T | IFM_FDX;
+}
+
+static int
epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
+ struct epair_softc *sc;
struct ifreq *ifr;
int error;
@@ -635,6 +647,12 @@ epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
error = 0;
break;
+ case SIOCSIFMEDIA:
+ case SIOCGIFMEDIA:
+ sc = ifp->if_softc;
+ error = ifmedia_ioctl(ifp, ifr, &sc->media, cmd);
+ break;
+
case SIOCSIFMTU:
/* We basically allow all kinds of MTUs. */
ifp->if_mtu = ifr->ifr_mtu;
@@ -794,6 +812,8 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
ifp->if_dname = ifc->ifc_name;
ifp->if_dunit = unit;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+ ifp->if_capabilities = IFCAP_VLAN_MTU;
+ ifp->if_capenable = IFCAP_VLAN_MTU;
ifp->if_start = epair_start;
ifp->if_ioctl = epair_ioctl;
ifp->if_init = epair_init;
@@ -818,6 +838,8 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
ifp->if_dname = ifc->ifc_name;
ifp->if_dunit = unit;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+ ifp->if_capabilities = IFCAP_VLAN_MTU;
+ ifp->if_capenable = IFCAP_VLAN_MTU;
ifp->if_start = epair_start;
ifp->if_ioctl = epair_ioctl;
ifp->if_init = epair_init;
@@ -840,6 +862,14 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
strlcpy(name, sca->ifp->if_xname, len);
DPRINTF("name='%s/%db' created sca=%p scb=%p\n", name, unit, sca, scb);
+ /* Initialise pseudo media types. */
+ ifmedia_init(&sca->media, 0, epair_media_change, epair_media_status);
+ ifmedia_add(&sca->media, IFM_ETHER | IFM_10G_T, 0, NULL);
+ ifmedia_set(&sca->media, IFM_ETHER | IFM_10G_T);
+ ifmedia_init(&scb->media, 0, epair_media_change, epair_media_status);
+ ifmedia_add(&scb->media, IFM_ETHER | IFM_10G_T, 0, NULL);
+ ifmedia_set(&scb->media, IFM_ETHER | IFM_10G_T);
+
/* Tell the world, that we are ready to rock. */
sca->ifp->if_drv_flags |= IFF_DRV_RUNNING;
scb->ifp->if_drv_flags |= IFF_DRV_RUNNING;
@@ -876,37 +906,41 @@ epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
if_link_state_change(oifp, LINK_STATE_DOWN);
ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
oifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+
+ /*
+ * Get rid of our second half. As the other of the two
+ * interfaces may reside in a different vnet, we need to
+ * switch before freeing them.
+ */
+ CURVNET_SET_QUIET(oifp->if_vnet);
ether_ifdetach(oifp);
- ether_ifdetach(ifp);
/*
* Wait for all packets to be dispatched to if_input.
- * The numbers can only go down as the interfaces are
+ * The numbers can only go down as the interface is
* detached so there is no need to use atomics.
*/
- DPRINTF("sca refcnt=%u scb refcnt=%u\n", sca->refcount, scb->refcount);
- EPAIR_REFCOUNT_ASSERT(sca->refcount == 1 && scb->refcount == 1,
- ("%s: ifp=%p sca->refcount!=1: %d || ifp=%p scb->refcount!=1: %d",
- __func__, ifp, sca->refcount, oifp, scb->refcount));
-
- /*
- * Get rid of our second half.
- */
+ DPRINTF("scb refcnt=%u\n", scb->refcount);
+ EPAIR_REFCOUNT_ASSERT(scb->refcount == 1,
+ ("%s: ifp=%p scb->refcount!=1: %d", __func__, oifp, scb->refcount));
oifp->if_softc = NULL;
error = if_clone_destroyif(ifc, oifp);
if (error)
panic("%s: if_clone_destroyif() for our 2nd iface failed: %d",
__func__, error);
+ if_free(oifp);
+ ifmedia_removeall(&scb->media);
+ free(scb, M_EPAIR);
+ CURVNET_RESTORE();
+ ether_ifdetach(ifp);
/*
- * Finish cleaning up. Free them and release the unit.
- * As the other of the two interfaces my reside in a different vnet,
- * we need to switch before freeing them.
+ * Wait for all packets to be dispatched to if_input.
*/
- CURVNET_SET_QUIET(oifp->if_vnet);
- if_free(oifp);
- CURVNET_RESTORE();
+ DPRINTF("sca refcnt=%u\n", sca->refcount);
+ EPAIR_REFCOUNT_ASSERT(sca->refcount == 1,
+ ("%s: ifp=%p sca->refcount!=1: %d", __func__, ifp, sca->refcount));
if_free(ifp);
- free(scb, M_EPAIR);
+ ifmedia_removeall(&sca->media);
free(sca, M_EPAIR);
ifc_free_unit(ifc, unit);
diff --git a/freebsd/sys/net/if_ethersubr.c b/freebsd/sys/net/if_ethersubr.c
index 02a5d002..b7c48731 100644
--- a/freebsd/sys/net/if_ethersubr.c
+++ b/freebsd/sys/net/if_ethersubr.c
@@ -662,8 +662,10 @@ ether_input(struct ifnet *ifp, struct mbuf *m)
m = (*lagg_input_p)(ifp, m);
if (m != NULL)
ifp = m->m_pkthdr.rcvif;
- else
+ else {
+ CURVNET_RESTORE();
return;
+ }
}
/*
@@ -682,6 +684,7 @@ ether_input(struct ifnet *ifp, struct mbuf *m)
#endif
ifp->if_ierrors++;
m_freem(m);
+ CURVNET_RESTORE();
return;
}
@@ -694,6 +697,8 @@ ether_input(struct ifnet *ifp, struct mbuf *m)
m_adj(m, ETHER_VLAN_ENCAP_LEN);
}
+ M_SETFIB(m, ifp->if_fib);
+
/* Allow ng_ether(4) to claim this frame. */
if (IFP2AC(ifp)->ac_netgraph != NULL) {
KASSERT(ng_ether_input_p != NULL,
diff --git a/freebsd/sys/net/if_faith.c b/freebsd/sys/net/if_faith.c
index d99e16ea..58de362a 100644
--- a/freebsd/sys/net/if_faith.c
+++ b/freebsd/sys/net/if_faith.c
@@ -340,7 +340,7 @@ faithprefix(in6)
sin6.sin6_family = AF_INET6;
sin6.sin6_len = sizeof(struct sockaddr_in6);
sin6.sin6_addr = *in6;
- rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL);
+ rt = in6_rtalloc1((struct sockaddr *)&sin6, 0, 0UL, RT_DEFAULT_FIB);
if (rt && rt->rt_ifp && rt->rt_ifp->if_type == IFT_FAITH &&
(rt->rt_ifp->if_flags & IFF_UP) != 0)
ret = 1;
diff --git a/freebsd/sys/net/if_fddisubr.c b/freebsd/sys/net/if_fddisubr.c
index ba4db83f..154fe2fc 100644
--- a/freebsd/sys/net/if_fddisubr.c
+++ b/freebsd/sys/net/if_fddisubr.c
@@ -552,6 +552,7 @@ fddi_input(ifp, m)
ifp->if_noproto++;
goto dropanyway;
}
+ M_SETFIB(m, ifp->if_fib);
netisr_dispatch(isr, m);
return;
diff --git a/freebsd/sys/net/if_fwsubr.c b/freebsd/sys/net/if_fwsubr.c
index a9931419..df90d48d 100644
--- a/freebsd/sys/net/if_fwsubr.c
+++ b/freebsd/sys/net/if_fwsubr.c
@@ -629,6 +629,7 @@ firewire_input(struct ifnet *ifp, struct mbuf *m, uint16_t src)
return;
}
+ M_SETFIB(m, ifp->if_fib);
netisr_dispatch(isr, m);
}
diff --git a/freebsd/sys/net/if_gif.c b/freebsd/sys/net/if_gif.c
index d9144419..1a8e4c8d 100644
--- a/freebsd/sys/net/if_gif.c
+++ b/freebsd/sys/net/if_gif.c
@@ -37,6 +37,7 @@
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
+#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
@@ -493,7 +494,7 @@ gif_input(m, af, ifp)
struct ifnet *ifp;
{
int isr, n;
- struct gif_softc *sc = ifp->if_softc;
+ struct gif_softc *sc;
struct etherip_header *eip;
struct ether_header *eh;
struct ifnet *oldifp;
@@ -503,7 +504,7 @@ gif_input(m, af, ifp)
m_freem(m);
return;
}
-
+ sc = ifp->if_softc;
m->m_pkthdr.rcvif = ifp;
#ifdef MAC
@@ -614,6 +615,7 @@ gif_input(m, af, ifp)
ifp->if_ipackets++;
ifp->if_ibytes += m->m_pkthdr.len;
+ M_SETFIB(m, ifp->if_fib);
netisr_dispatch(isr, m);
}
@@ -823,6 +825,12 @@ gif_ioctl(ifp, cmd, data)
}
if (src->sa_len > size)
return EINVAL;
+ error = prison_if(curthread->td_ucred, src);
+ if (error != 0)
+ return (error);
+ error = prison_if(curthread->td_ucred, dst);
+ if (error != 0)
+ return (error);
bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
#ifdef INET6
if (dst->sa_family == AF_INET6) {
diff --git a/freebsd/sys/net/if_gre.c b/freebsd/sys/net/if_gre.c
index a75e52a4..21f39eb2 100644
--- a/freebsd/sys/net/if_gre.c
+++ b/freebsd/sys/net/if_gre.c
@@ -55,7 +55,9 @@
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/sys/param.h>
+#include <sys/jail.h>
#include <sys/kernel.h>
+#include <sys/libkern.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/mbuf.h>
@@ -99,6 +101,14 @@
#define GRENAME "gre"
+#define MTAG_COOKIE_GRE 1307983903
+#define MTAG_GRE_NESTING 1
+struct mtag_gre_nesting {
+ uint16_t count;
+ uint16_t max;
+ struct ifnet *ifp[];
+};
+
/*
* gre_mtx protects all global variables in if_gre.c.
* XXX: gre_softc data not protected yet.
@@ -204,7 +214,6 @@ gre_clone_create(ifc, unit, params)
sc->g_proto = IPPROTO_GRE;
GRE2IFP(sc)->if_flags |= IFF_LINK0;
sc->encap = NULL;
- sc->called = 0;
#ifndef __rtems__
sc->gre_fibnum = curthread->td_proc->p_fibnum;
#else /* __rtems__ */
@@ -252,23 +261,77 @@ gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
struct gre_softc *sc = ifp->if_softc;
struct greip *gh;
struct ip *ip;
+ struct m_tag *mtag;
+ struct mtag_gre_nesting *gt;
+ size_t len;
u_short gre_ip_id = 0;
uint8_t gre_ip_tos = 0;
u_int16_t etype = 0;
struct mobile_h mob_h;
u_int32_t af;
- int extra = 0;
+ int extra = 0, max;
/*
- * gre may cause infinite recursion calls when misconfigured.
- * We'll prevent this by introducing upper limit.
+ * gre may cause infinite recursion calls when misconfigured. High
+ * nesting level may cause stack exhaustion. We'll prevent this by
+ * detecting loops and by introducing upper limit.
*/
- if (++(sc->called) > max_gre_nesting) {
- printf("%s: gre_output: recursively called too many "
- "times(%d)\n", if_name(GRE2IFP(sc)), sc->called);
- m_freem(m);
- error = EIO; /* is there better errno? */
- goto end;
+ mtag = m_tag_locate(m, MTAG_COOKIE_GRE, MTAG_GRE_NESTING, NULL);
+ if (mtag != NULL) {
+ struct ifnet **ifp2;
+
+ gt = (struct mtag_gre_nesting *)(mtag + 1);
+ gt->count++;
+ if (gt->count > min(gt->max,max_gre_nesting)) {
+ printf("%s: hit maximum recursion limit %u on %s\n",
+ __func__, gt->count - 1, ifp->if_xname);
+ m_freem(m);
+ error = EIO; /* is there better errno? */
+ goto end;
+ }
+
+ ifp2 = gt->ifp;
+ for (max = gt->count - 1; max > 0; max--) {
+ if (*ifp2 == ifp)
+ break;
+ ifp2++;
+ }
+ if (*ifp2 == ifp) {
+ printf("%s: detected loop with nexting %u on %s\n",
+ __func__, gt->count-1, ifp->if_xname);
+ m_freem(m);
+ error = EIO; /* is there better errno? */
+ goto end;
+ }
+ *ifp2 = ifp;
+
+ } else {
+ /*
+ * Given that people should NOT increase max_gre_nesting beyond
+ * their real needs, we allocate once per packet rather than
+ * allocating an mtag once per passing through gre.
+ *
+ * Note: the sysctl does not actually check for saneness, so we
+ * limit the maximum numbers of possible recursions here.
+ */
+ max = imin(max_gre_nesting, 256);
+ /* If someone sets the sysctl <= 0, we want at least 1. */
+ max = imax(max, 1);
+ len = sizeof(struct mtag_gre_nesting) +
+ max * sizeof(struct ifnet *);
+ mtag = m_tag_alloc(MTAG_COOKIE_GRE, MTAG_GRE_NESTING, len,
+ M_NOWAIT);
+ if (mtag == NULL) {
+ m_freem(m);
+ error = ENOMEM;
+ goto end;
+ }
+ gt = (struct mtag_gre_nesting *)(mtag + 1);
+ bzero(gt, len);
+ gt->count = 1;
+ gt->max = max;
+ *gt->ifp = ifp;
+ m_tag_prepend(m, mtag);
}
if (!((ifp->if_flags & IFF_UP) &&
@@ -456,7 +519,6 @@ gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
error = ip_output(m, NULL, &sc->route, IP_FORWARDING,
(struct ip_moptions *)NULL, (struct inpcb *)NULL);
end:
- sc->called = 0;
if (error)
ifp->if_oerrors++;
return (error);
@@ -649,6 +711,9 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
si.sin_len = sizeof(struct sockaddr_in);
si.sin_addr.s_addr = sc->g_src.s_addr;
sa = sintosa(&si);
+ error = prison_if(curthread->td_ucred, sa);
+ if (error != 0)
+ break;
ifr->ifr_addr = *sa;
break;
case GREGADDRD:
@@ -657,6 +722,9 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
si.sin_len = sizeof(struct sockaddr_in);
si.sin_addr.s_addr = sc->g_dst.s_addr;
sa = sintosa(&si);
+ error = prison_if(curthread->td_ucred, sa);
+ if (error != 0)
+ break;
ifr->ifr_addr = *sa;
break;
case SIOCSIFPHYADDR:
@@ -720,8 +788,14 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
si.sin_family = AF_INET;
si.sin_len = sizeof(struct sockaddr_in);
si.sin_addr.s_addr = sc->g_src.s_addr;
+ error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
+ if (error != 0)
+ break;
memcpy(&lifr->addr, &si, sizeof(si));
si.sin_addr.s_addr = sc->g_dst.s_addr;
+ error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
+ if (error != 0)
+ break;
memcpy(&lifr->dstaddr, &si, sizeof(si));
break;
case SIOCGIFPSRCADDR:
@@ -736,6 +810,9 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
si.sin_family = AF_INET;
si.sin_len = sizeof(struct sockaddr_in);
si.sin_addr.s_addr = sc->g_src.s_addr;
+ error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
+ if (error != 0)
+ break;
bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr));
break;
case SIOCGIFPDSTADDR:
@@ -750,6 +827,9 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
si.sin_family = AF_INET;
si.sin_len = sizeof(struct sockaddr_in);
si.sin_addr.s_addr = sc->g_dst.s_addr;
+ error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
+ if (error != 0)
+ break;
bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr));
break;
case GRESKEY:
diff --git a/freebsd/sys/net/if_gre.h b/freebsd/sys/net/if_gre.h
index 186d4cc6..13b882c8 100644
--- a/freebsd/sys/net/if_gre.h
+++ b/freebsd/sys/net/if_gre.h
@@ -68,8 +68,6 @@ struct gre_softc {
const struct encaptab *encap; /* encapsulation cookie */
- int called; /* infinite recursion preventer */
-
uint32_t key; /* key included in outgoing GRE packets */
/* zero means none */
diff --git a/freebsd/sys/net/if_iso88025subr.c b/freebsd/sys/net/if_iso88025subr.c
index 6a39956e..b52853a2 100644
--- a/freebsd/sys/net/if_iso88025subr.c
+++ b/freebsd/sys/net/if_iso88025subr.c
@@ -682,6 +682,7 @@ iso88025_input(ifp, m)
break;
}
+ M_SETFIB(m, ifp->if_fib);
netisr_dispatch(isr, m);
return;
diff --git a/freebsd/sys/net/if_lagg.c b/freebsd/sys/net/if_lagg.c
index a1c90cdf..5d5064a4 100644
--- a/freebsd/sys/net/if_lagg.c
+++ b/freebsd/sys/net/if_lagg.c
@@ -169,6 +169,11 @@ static int lagg_failover_rx_all = 0; /* Allow input on any failover links */
SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW,
&lagg_failover_rx_all, 0,
"Accept input from any interface in a failover lagg");
+static int def_use_flowid = 1; /* Default value for using M_FLOWID */
+TUNABLE_INT("net.link.lagg.default_use_flowid", &def_use_flowid);
+SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RW,
+ &def_use_flowid, 0,
+ "Default setting for using flow id for load sharing");
static int
lagg_modevent(module_t mod, int type, void *data)
@@ -206,6 +211,7 @@ static moduledata_t lagg_mod = {
};
DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(if_lagg, 1);
#if __FreeBSD_version >= 800000
/*
@@ -258,6 +264,8 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
struct ifnet *ifp;
int i, error = 0;
static const u_char eaddr[6]; /* 00:00:00:00:00:00 */
+ struct sysctl_oid *oid;
+ char num[14]; /* sufficient for 32 bits */
sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
@@ -266,6 +274,17 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
return (ENOSPC);
}
+ sysctl_ctx_init(&sc->ctx);
+ snprintf(num, sizeof(num), "%u", unit);
+ sc->use_flowid = def_use_flowid;
+ oid = SYSCTL_ADD_NODE(&sc->ctx, &SYSCTL_NODE_CHILDREN(_net_link, lagg),
+ OID_AUTO, num, CTLFLAG_RD, NULL, "");
+ SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
+ "use_flowid", CTLTYPE_INT|CTLFLAG_RW, &sc->use_flowid, sc->use_flowid,
+ "Use flow id for load sharing");
+ /* Hash all layers by default */
+ sc->sc_flags = LAGG_F_HASHL2|LAGG_F_HASHL3|LAGG_F_HASHL4;
+
sc->sc_proto = LAGG_PROTO_NONE;
for (i = 0; lagg_protos[i].ti_proto != LAGG_PROTO_NONE; i++) {
if (lagg_protos[i].ti_proto == LAGG_PROTO_DEFAULT) {
@@ -345,6 +364,7 @@ lagg_clone_destroy(struct ifnet *ifp)
LAGG_WUNLOCK(sc);
+ sysctl_ctx_free(&sc->ctx);
ifmedia_removeall(&sc->sc_media);
ether_ifdetach(ifp);
if_free_type(ifp, IFT_ETHER);
@@ -738,28 +758,18 @@ fallback:
return (EINVAL);
}
+/*
+ * For direct output to child ports.
+ */
static int
lagg_port_output(struct ifnet *ifp, struct mbuf *m,
struct sockaddr *dst, struct route *ro)
{
struct lagg_port *lp = ifp->if_lagg;
- struct ether_header *eh;
- short type = 0;
switch (dst->sa_family) {
case pseudo_AF_HDRCMPLT:
case AF_UNSPEC:
- eh = (struct ether_header *)dst->sa_data;
- type = eh->ether_type;
- break;
- }
-
- /*
- * Only allow ethernet types required to initiate or maintain the link,
- * aggregated frames take a different path.
- */
- switch (ntohs(type)) {
- case ETHERTYPE_PAE: /* EAPOL PAE/802.1x */
return ((*lp->lp_output)(ifp, m, dst, ro));
}
@@ -776,6 +786,9 @@ lagg_port_ifdetach(void *arg __unused, struct ifnet *ifp)
if ((lp = ifp->if_lagg) == NULL)
return;
+ /* If the ifnet is just being renamed, don't do anything. */
+ if (ifp->if_flags & IFF_RENAMING)
+ return;
sc = lp->lp_softc;
@@ -871,6 +884,7 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
struct lagg_reqall *ra = (struct lagg_reqall *)data;
struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf;
+ struct lagg_reqflags *rf = (struct lagg_reqflags *)data;
struct ifreq *ifr = (struct ifreq *)data;
struct lagg_port *lp;
struct ifnet *tpif;
@@ -923,11 +937,11 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
error = EPROTONOSUPPORT;
break;
}
+ LAGG_WLOCK(sc);
if (sc->sc_proto != LAGG_PROTO_NONE) {
- LAGG_WLOCK(sc);
- error = sc->sc_detach(sc);
- /* Reset protocol and pointers */
+ /* Reset protocol first in case detach unlocks */
sc->sc_proto = LAGG_PROTO_NONE;
+ error = sc->sc_detach(sc);
sc->sc_detach = NULL;
sc->sc_start = NULL;
sc->sc_input = NULL;
@@ -939,10 +953,14 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
sc->sc_lladdr = NULL;
sc->sc_req = NULL;
sc->sc_portreq = NULL;
- LAGG_WUNLOCK(sc);
+ } else if (sc->sc_input != NULL) {
+ /* Still detaching */
+ error = EBUSY;
}
- if (error != 0)
+ if (error != 0) {
+ LAGG_WUNLOCK(sc);
break;
+ }
for (int i = 0; i < (sizeof(lagg_protos) /
sizeof(lagg_protos[0])); i++) {
if (lagg_protos[i].ti_proto == ra->ra_proto) {
@@ -950,7 +968,6 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
printf("%s: using proto %u\n",
sc->sc_ifname,
lagg_protos[i].ti_proto);
- LAGG_WLOCK(sc);
sc->sc_proto = lagg_protos[i].ti_proto;
if (sc->sc_proto != LAGG_PROTO_NONE)
error = lagg_protos[i].ti_attach(sc);
@@ -958,8 +975,25 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
return (error);
}
}
+ LAGG_WUNLOCK(sc);
error = EPROTONOSUPPORT;
break;
+ case SIOCGLAGGFLAGS:
+ rf->rf_flags = sc->sc_flags;
+ break;
+ case SIOCSLAGGHASH:
+ error = priv_check(td, PRIV_NET_LAGG);
+ if (error)
+ break;
+ if ((rf->rf_flags & LAGG_F_HASHMASK) == 0) {
+ error = EINVAL;
+ break;
+ }
+ LAGG_WLOCK(sc);
+ sc->sc_flags &= ~LAGG_F_HASHMASK;
+ sc->sc_flags |= rf->rf_flags & LAGG_F_HASHMASK;
+ LAGG_WUNLOCK(sc);
+ break;
case SIOCGLAGGPORT:
if (rp->rp_portname[0] == '\0' ||
(tpif = ifunit(rp->rp_portname)) == NULL) {
@@ -1215,14 +1249,15 @@ lagg_input(struct ifnet *ifp, struct mbuf *m)
struct lagg_softc *sc = lp->lp_softc;
struct ifnet *scifp = sc->sc_ifp;
+ LAGG_RLOCK(sc);
if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
(lp->lp_flags & LAGG_PORT_DISABLED) ||
sc->sc_proto == LAGG_PROTO_NONE) {
+ LAGG_RUNLOCK(sc);
m_freem(m);
return (NULL);
}
- LAGG_RLOCK(sc);
ETHER_BPF_MTAP(scifp, m);
m = (*sc->sc_input)(sc, lp, m);
@@ -1388,42 +1423,55 @@ lagg_gethdr(struct mbuf *m, u_int off, u_int len, void *buf)
}
uint32_t
-lagg_hashmbuf(struct mbuf *m, uint32_t key)
+lagg_hashmbuf(struct lagg_softc *sc, struct mbuf *m, uint32_t key)
{
uint16_t etype;
- uint32_t p = 0;
+ uint32_t p = key;
int off;
struct ether_header *eh;
- struct ether_vlan_header vlanbuf;
const struct ether_vlan_header *vlan;
#ifdef INET
const struct ip *ip;
- struct ip ipbuf;
+ const uint32_t *ports;
+ int iphlen;
#endif
#ifdef INET6
const struct ip6_hdr *ip6;
- struct ip6_hdr ip6buf;
uint32_t flow;
#endif
+ union {
+#ifdef INET
+ struct ip ip;
+#endif
+#ifdef INET6
+ struct ip6_hdr ip6;
+#endif
+ struct ether_vlan_header vlan;
+ uint32_t port;
+ } buf;
+
off = sizeof(*eh);
if (m->m_len < off)
goto out;
eh = mtod(m, struct ether_header *);
etype = ntohs(eh->ether_type);
- p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, key);
- p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
+ if (sc->sc_flags & LAGG_F_HASHL2) {
+ p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p);
+ p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
+ }
/* Special handling for encapsulating VLAN frames */
- if (m->m_flags & M_VLANTAG) {
+ if ((m->m_flags & M_VLANTAG) && (sc->sc_flags & LAGG_F_HASHL2)) {
p = hash32_buf(&m->m_pkthdr.ether_vtag,
sizeof(m->m_pkthdr.ether_vtag), p);
} else if (etype == ETHERTYPE_VLAN) {
- vlan = lagg_gethdr(m, off, sizeof(*vlan), &vlanbuf);
+ vlan = lagg_gethdr(m, off, sizeof(*vlan), &buf);
if (vlan == NULL)
goto out;
- p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
+ if (sc->sc_flags & LAGG_F_HASHL2)
+ p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
etype = ntohs(vlan->evl_proto);
off += sizeof(*vlan) - sizeof(*eh);
}
@@ -1431,17 +1479,37 @@ lagg_hashmbuf(struct mbuf *m, uint32_t key)
switch (etype) {
#ifdef INET
case ETHERTYPE_IP:
- ip = lagg_gethdr(m, off, sizeof(*ip), &ipbuf);
+ ip = lagg_gethdr(m, off, sizeof(*ip), &buf);
if (ip == NULL)
goto out;
- p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p);
- p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p);
+ if (sc->sc_flags & LAGG_F_HASHL3) {
+ p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p);
+ p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p);
+ }
+ if (!(sc->sc_flags & LAGG_F_HASHL4))
+ break;
+ switch (ip->ip_p) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_SCTP:
+ iphlen = ip->ip_hl << 2;
+ if (iphlen < sizeof(*ip))
+ break;
+ off += iphlen;
+ ports = lagg_gethdr(m, off, sizeof(*ports), &buf);
+ if (ports == NULL)
+ break;
+ p = hash32_buf(ports, sizeof(*ports), p);
+ break;
+ }
break;
#endif
#ifdef INET6
case ETHERTYPE_IPV6:
- ip6 = lagg_gethdr(m, off, sizeof(*ip6), &ip6buf);
+ if (!(sc->sc_flags & LAGG_F_HASHL3))
+ break;
+ ip6 = lagg_gethdr(m, off, sizeof(*ip6), &buf);
if (ip6 == NULL)
goto out;
@@ -1668,10 +1736,10 @@ lagg_lb_start(struct lagg_softc *sc, struct mbuf *m)
struct lagg_port *lp = NULL;
uint32_t p = 0;
- if (m->m_flags & M_FLOWID)
+ if (sc->use_flowid && (m->m_flags & M_FLOWID))
p = m->m_pkthdr.flowid;
else
- p = lagg_hashmbuf(m, lb->lb_key);
+ p = lagg_hashmbuf(sc, m, lb->lb_key);
p %= sc->sc_count;
lp = lb->lb_ports[p];
@@ -1788,7 +1856,7 @@ lagg_lacp_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
etype = ntohs(eh->ether_type);
/* Tap off LACP control messages */
- if (etype == ETHERTYPE_SLOW) {
+ if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_SLOW) {
m = lacp_input(lp, m);
if (m == NULL)
return (NULL);
diff --git a/freebsd/sys/net/if_lagg.h b/freebsd/sys/net/if_lagg.h
index 0034c617..27ab46f2 100644
--- a/freebsd/sys/net/if_lagg.h
+++ b/freebsd/sys/net/if_lagg.h
@@ -21,6 +21,8 @@
#ifndef _NET_LAGG_H
#define _NET_LAGG_H
+#include <sys/sysctl.h>
+
/*
* Global definitions
*/
@@ -29,6 +31,12 @@
#define LAGG_MAX_NAMESIZE 32 /* name of a protocol */
#define LAGG_MAX_STACKING 4 /* maximum number of stacked laggs */
+/* Lagg flags */
+#define LAGG_F_HASHL2 0x00000001 /* hash layer 2 */
+#define LAGG_F_HASHL3 0x00000002 /* hash layer 3 */
+#define LAGG_F_HASHL4 0x00000004 /* hash layer 4 */
+#define LAGG_F_HASHMASK 0x00000007
+
/* Port flags */
#define LAGG_PORT_SLAVE 0x00000000 /* normal enslaved port */
#define LAGG_PORT_MASTER 0x00000001 /* primary port */
@@ -120,6 +128,14 @@ struct lagg_reqall {
#define SIOCGLAGG _IOWR('i', 143, struct lagg_reqall)
#define SIOCSLAGG _IOW('i', 144, struct lagg_reqall)
+struct lagg_reqflags {
+ char rf_ifname[IFNAMSIZ]; /* name of the lagg */
+ uint32_t rf_flags; /* lagg protocol */
+};
+
+#define SIOCGLAGGFLAGS _IOWR('i', 145, struct lagg_reqflags)
+#define SIOCSLAGGHASH _IOW('i', 146, struct lagg_reqflags)
+
#ifdef _KERNEL
/*
* Internal kernel part
@@ -177,6 +193,7 @@ struct lagg_softc {
struct ifmedia sc_media; /* media config */
caddr_t sc_psc; /* protocol data */
uint32_t sc_seq; /* sequence counter */
+ uint32_t sc_flags;
SLIST_HEAD(__tplhd, lagg_port) sc_ports; /* list of interfaces */
SLIST_ENTRY(lagg_softc) sc_entries;
@@ -202,6 +219,8 @@ struct lagg_softc {
eventhandler_tag vlan_attach;
eventhandler_tag vlan_detach;
#endif
+ struct sysctl_ctx_list ctx; /* sysctl variables */
+ int use_flowid; /* use M_FLOWID */
};
struct lagg_port {
@@ -240,7 +259,7 @@ extern struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *);
extern void (*lagg_linkstate_p)(struct ifnet *, int );
int lagg_enqueue(struct ifnet *, struct mbuf *);
-uint32_t lagg_hashmbuf(struct mbuf *, uint32_t);
+uint32_t lagg_hashmbuf(struct lagg_softc *, struct mbuf *, uint32_t);
#endif /* _KERNEL */
diff --git a/freebsd/sys/net/if_llatbl.c b/freebsd/sys/net/if_llatbl.c
index 3ffcc21a..80888559 100644
--- a/freebsd/sys/net/if_llatbl.c
+++ b/freebsd/sys/net/if_llatbl.c
@@ -102,18 +102,35 @@ done:
* This function is called by the timer functions
* such as arptimer() and nd6_llinfo_timer(), and
* the caller does the locking.
+ *
+ * Returns the number of held packets, if any, that were dropped.
*/
-void
+size_t
llentry_free(struct llentry *lle)
{
-
+ size_t pkts_dropped;
+ struct mbuf *next;
+
+ pkts_dropped = 0;
LLE_WLOCK_ASSERT(lle);
LIST_REMOVE(lle, lle_next);
- if (lle->la_hold != NULL)
+ while ((lle->la_numheld > 0) && (lle->la_hold != NULL)) {
+ next = lle->la_hold->m_nextpkt;
m_freem(lle->la_hold);
+ lle->la_hold = next;
+ lle->la_numheld--;
+ pkts_dropped++;
+ }
+
+ KASSERT(lle->la_numheld == 0,
+ ("%s: la_numheld %d > 0, pkts_droped %zd", __func__,
+ lle->la_numheld, pkts_dropped));
+ lle->la_flags &= ~LLE_VALID;
LLE_FREE_LOCKED(lle);
+
+ return (pkts_dropped);
}
/*
@@ -214,7 +231,8 @@ lltable_drain(int af)
#endif
void
-lltable_prefix_free(int af, struct sockaddr *prefix, struct sockaddr *mask)
+lltable_prefix_free(int af, struct sockaddr *prefix, struct sockaddr *mask,
+ u_int flags)
{
struct lltable *llt;
@@ -223,7 +241,7 @@ lltable_prefix_free(int af, struct sockaddr *prefix, struct sockaddr *mask)
if (llt->llt_af != af)
continue;
- llt->llt_prefix_free(llt, prefix, mask);
+ llt->llt_prefix_free(llt, prefix, mask, flags);
}
LLTABLE_RUNLOCK();
}
@@ -414,6 +432,7 @@ llatbl_lle_show(struct llentry_sa *la)
db_printf(" lle_tbl=%p\n", lle->lle_tbl);
db_printf(" lle_head=%p\n", lle->lle_head);
db_printf(" la_hold=%p\n", lle->la_hold);
+ db_printf(" la_numheld=%d\n", lle->la_numheld);
db_printf(" la_expire=%ju\n", (uintmax_t)lle->la_expire);
db_printf(" la_flags=0x%04x\n", lle->la_flags);
db_printf(" la_asked=%u\n", lle->la_asked);
diff --git a/freebsd/sys/net/if_llatbl.h b/freebsd/sys/net/if_llatbl.h
index a4d02ab0..8b15e5c8 100644
--- a/freebsd/sys/net/if_llatbl.h
+++ b/freebsd/sys/net/if_llatbl.h
@@ -58,6 +58,7 @@ struct llentry {
struct lltable *lle_tbl;
struct llentries *lle_head;
struct mbuf *la_hold;
+ int la_numheld; /* # of packets currently held */
time_t la_expire;
uint16_t la_flags;
uint16_t la_asked;
@@ -115,19 +116,12 @@ struct llentry {
LLE_WUNLOCK(lle); \
} \
/* guard against invalid refs */ \
- lle = 0; \
+ lle = NULL; \
} while (0)
#define LLE_FREE(lle) do { \
LLE_WLOCK(lle); \
- if ((lle)->lle_refcnt <= 1) \
- (lle)->lle_tbl->llt_free((lle)->lle_tbl, (lle));\
- else { \
- (lle)->lle_refcnt--; \
- LLE_WUNLOCK(lle); \
- } \
- /* guard against invalid refs */ \
- lle = NULL; \
+ LLE_FREE_LOCKED(lle); \
} while (0)
@@ -152,15 +146,13 @@ struct lltable {
int llt_af;
struct ifnet *llt_ifp;
- struct llentry * (*llt_new)(const struct sockaddr *, u_int);
void (*llt_free)(struct lltable *, struct llentry *);
void (*llt_prefix_free)(struct lltable *,
const struct sockaddr *prefix,
- const struct sockaddr *mask);
+ const struct sockaddr *mask,
+ u_int flags);
struct llentry * (*llt_lookup)(struct lltable *, u_int flags,
const struct sockaddr *l3addr);
- int (*llt_rtcheck)(struct ifnet *, u_int flags,
- const struct sockaddr *);
int (*llt_dump)(struct lltable *,
struct sysctl_req *);
};
@@ -185,13 +177,13 @@ MALLOC_DECLARE(M_LLTABLE);
struct lltable *lltable_init(struct ifnet *, int);
void lltable_free(struct lltable *);
void lltable_prefix_free(int, struct sockaddr *,
- struct sockaddr *);
+ struct sockaddr *, u_int);
#if 0
void lltable_drain(int);
#endif
int lltable_sysctl_dumparp(int, struct sysctl_req *);
-void llentry_free(struct llentry *);
+size_t llentry_free(struct llentry *);
int llentry_update(struct llentry **, struct lltable *,
struct sockaddr_storage *, struct ifnet *);
diff --git a/freebsd/sys/net/if_media.c b/freebsd/sys/net/if_media.c
index 46b57b42..3bc6122c 100644
--- a/freebsd/sys/net/if_media.c
+++ b/freebsd/sys/net/if_media.c
@@ -237,7 +237,7 @@ ifmedia_ioctl(ifp, ifr, ifm, cmd)
/*
* If no change, we're done.
* XXX Automedia may invole software intervention.
- * Keep going in case the the connected media changed.
+ * Keep going in case the connected media changed.
* Similarly, if best match changed (kernel debugger?).
*/
if ((IFM_SUBTYPE(newmedia) != IFM_AUTO) &&
diff --git a/freebsd/sys/net/if_media.h b/freebsd/sys/net/if_media.h
index 337ad685..2c833228 100644
--- a/freebsd/sys/net/if_media.h
+++ b/freebsd/sys/net/if_media.h
@@ -36,7 +36,7 @@
*/
#ifndef _NET_IF_MEDIA_H_
-#define _NET_IF_MEDIA_H_
+#define _NET_IF_MEDIA_H_
/*
* Prototypes and definitions for BSD/OS-compatible network interface
@@ -144,13 +144,12 @@ uint64_t ifmedia_baudrate(int);
#define IFM_10G_LR 18 /* 10GBase-LR 1310nm Single-mode */
#define IFM_10G_SR 19 /* 10GBase-SR 850nm Multi-mode */
#define IFM_10G_CX4 20 /* 10GBase CX4 copper */
-#define IFM_2500_SX 21 /* 2500BaseSX - multi-mode fiber */
-#define IFM_10G_TWINAX 22 /* 10GBase Twinax copper */
-#define IFM_10G_TWINAX_LONG 23 /* 10GBase Twinax Long copper */
-#define IFM_10G_LRM 24 /* 10GBase-LRM 850nm Multi-mode */
-#define IFM_UNKNOWN 25 /* media types not defined yet */
-#define IFM_10G_T 26 /* 10GBase-T - RJ45 */
-
+#define IFM_2500_SX 21 /* 2500BaseSX - multi-mode fiber */
+#define IFM_10G_TWINAX 22 /* 10GBase Twinax copper */
+#define IFM_10G_TWINAX_LONG 23 /* 10GBase Twinax Long copper */
+#define IFM_10G_LRM 24 /* 10GBase-LRM 850nm Multi-mode */
+#define IFM_UNKNOWN 25 /* media types not defined yet */
+#define IFM_10G_T 26 /* 10GBase-T - RJ45 */
/* note 31 is the max! */
@@ -232,20 +231,20 @@ uint64_t ifmedia_baudrate(int);
/*
* ATM
*/
-#define IFM_ATM 0x000000a0
-#define IFM_ATM_UNKNOWN 3
-#define IFM_ATM_UTP_25 4
-#define IFM_ATM_TAXI_100 5
-#define IFM_ATM_TAXI_140 6
-#define IFM_ATM_MM_155 7
-#define IFM_ATM_SM_155 8
-#define IFM_ATM_UTP_155 9
-#define IFM_ATM_MM_622 10
-#define IFM_ATM_SM_622 11
+#define IFM_ATM 0x000000a0
+#define IFM_ATM_UNKNOWN 3
+#define IFM_ATM_UTP_25 4
+#define IFM_ATM_TAXI_100 5
+#define IFM_ATM_TAXI_140 6
+#define IFM_ATM_MM_155 7
+#define IFM_ATM_SM_155 8
+#define IFM_ATM_UTP_155 9
+#define IFM_ATM_MM_622 10
+#define IFM_ATM_SM_622 11
#define IFM_ATM_VIRTUAL 12
-#define IFM_ATM_SDH 0x00000100 /* SDH instead of SONET */
-#define IFM_ATM_NOSCRAMB 0x00000200 /* no scrambling */
-#define IFM_ATM_UNASSIGNED 0x00000400 /* unassigned cells */
+#define IFM_ATM_SDH 0x00000100 /* SDH instead of SONET */
+#define IFM_ATM_NOSCRAMB 0x00000200 /* no scrambling */
+#define IFM_ATM_UNASSIGNED 0x00000400 /* unassigned cells */
/*
* CARP Common Address Redundancy Protocol
@@ -295,22 +294,22 @@ uint64_t ifmedia_baudrate(int);
#define IFM_STATUS_VALID IFM_AVALID
/* List of "status valid" bits, for ifconfig(8). */
-#define IFM_STATUS_VALID_LIST { \
- IFM_AVALID, \
- 0 \
+#define IFM_STATUS_VALID_LIST { \
+ IFM_AVALID, \
+ 0 \
}
/*
* Macros to extract various bits of information from the media word.
*/
-#define IFM_TYPE(x) ((x) & IFM_NMASK)
-#define IFM_SUBTYPE(x) ((x) & IFM_TMASK)
-#define IFM_TYPE_OPTIONS(x) ((x) & IFM_OMASK)
-#define IFM_INST(x) (((x) & IFM_IMASK) >> IFM_ISHIFT)
-#define IFM_OPTIONS(x) ((x) & (IFM_OMASK|IFM_GMASK))
-#define IFM_MODE(x) ((x) & IFM_MMASK)
+#define IFM_TYPE(x) ((x) & IFM_NMASK)
+#define IFM_SUBTYPE(x) ((x) & IFM_TMASK)
+#define IFM_TYPE_OPTIONS(x) ((x) & IFM_OMASK)
+#define IFM_INST(x) (((x) & IFM_IMASK) >> IFM_ISHIFT)
+#define IFM_OPTIONS(x) ((x) & (IFM_OMASK | IFM_GMASK))
+#define IFM_MODE(x) ((x) & IFM_MMASK)
-#define IFM_INST_MAX IFM_INST(IFM_IMASK)
+#define IFM_INST_MAX IFM_INST(IFM_IMASK)
/*
* Macro to create a media word.
@@ -371,6 +370,7 @@ struct ifmedia_description {
}
#define IFM_SUBTYPE_ETHERNET_ALIASES { \
+ { IFM_10_T, "10baseT" }, \
{ IFM_10_T, "UTP" }, \
{ IFM_10_T, "10UTP" }, \
{ IFM_10_2, "BNC" }, \
@@ -390,6 +390,23 @@ struct ifmedia_description {
{ IFM_1000_T, "1000TX" }, \
{ IFM_1000_T, "1000T" }, \
{ IFM_2500_SX, "2500SX" }, \
+ \
+ /* \
+ * Shorthands for common media+option combinations as announced \
+ * by miibus(4) \
+ */ \
+ { IFM_10_T | IFM_FDX, "10baseT-FDX" }, \
+ { IFM_10_T | IFM_FDX | IFM_FLOW, "10baseT-FDX-flow" }, \
+ { IFM_100_TX | IFM_FDX, "100baseTX-FDX" }, \
+ { IFM_100_TX | IFM_FDX | IFM_FLOW, "100baseTX-FDX-flow" }, \
+ { IFM_1000_T | IFM_FDX, "1000baseT-FDX" }, \
+ { IFM_1000_T | IFM_FDX | IFM_FLOW, "1000baseT-FDX-flow" }, \
+ { IFM_1000_T | IFM_FDX | IFM_FLOW | IFM_ETH_MASTER, \
+ "1000baseT-FDX-flow-master" }, \
+ { IFM_1000_T | IFM_FDX | IFM_ETH_MASTER, \
+ "1000baseT-FDX-master" }, \
+ { IFM_1000_T | IFM_ETH_MASTER, "1000baseT-master" }, \
+ \
{ 0, NULL }, \
}
@@ -539,7 +556,7 @@ struct ifmedia_description {
{ 0, NULL }, \
}
-# define IFM_SUBTYPE_ATM_DESCRIPTIONS { \
+#define IFM_SUBTYPE_ATM_DESCRIPTIONS { \
{ IFM_ATM_UNKNOWN, "Unknown" }, \
{ IFM_ATM_UTP_25, "UTP/25.6MBit" }, \
{ IFM_ATM_TAXI_100, "Taxi/100MBit" }, \
@@ -553,7 +570,7 @@ struct ifmedia_description {
{ 0, NULL }, \
}
-# define IFM_SUBTYPE_ATM_ALIASES { \
+#define IFM_SUBTYPE_ATM_ALIASES { \
{ IFM_ATM_UNKNOWN, "UNKNOWN" }, \
{ IFM_ATM_UTP_25, "UTP-25" }, \
{ IFM_ATM_TAXI_100, "TAXI-100" }, \
@@ -574,7 +591,6 @@ struct ifmedia_description {
{ 0, NULL }, \
}
-
#define IFM_SUBTYPE_SHARED_DESCRIPTIONS { \
{ IFM_AUTO, "autoselect" }, \
{ IFM_MANUAL, "manual" }, \
@@ -584,6 +600,13 @@ struct ifmedia_description {
#define IFM_SUBTYPE_SHARED_ALIASES { \
{ IFM_AUTO, "auto" }, \
+ \
+ /* \
+ * Shorthands for common media+option combinations as announced \
+ * by miibus(4) \
+ */ \
+ { IFM_AUTO | IFM_FLOW, "auto-flow" }, \
+ \
{ 0, NULL }, \
}
@@ -598,6 +621,15 @@ struct ifmedia_description {
{ 0, NULL }, \
}
+#define IFM_SHARED_OPTION_ALIASES { \
+ { IFM_FDX, "fdx" }, \
+ { IFM_HDX, "hdx" }, \
+ { IFM_FLOW, "flow" }, \
+ { IFM_LOOP, "loop" }, \
+ { IFM_LOOP, "loopback" }, \
+ { 0, NULL }, \
+}
+
/*
* Baudrate descriptions for the various media types.
*/
@@ -606,7 +638,7 @@ struct ifmedia_baudrate {
uint64_t ifmb_baudrate; /* corresponding baudrate */
};
-#define IFM_BAUDRATE_DESCRIPTIONS { \
+#define IFM_BAUDRATE_DESCRIPTIONS { \
{ IFM_ETHER | IFM_10_T, IF_Mbps(10) }, \
{ IFM_ETHER | IFM_10_2, IF_Mbps(10) }, \
{ IFM_ETHER | IFM_10_5, IF_Mbps(10) }, \
@@ -670,10 +702,10 @@ struct ifmedia_status_description {
const char *ifms_string[2];
};
-#define IFM_STATUS_DESC(ifms, bit) \
+#define IFM_STATUS_DESC(ifms, bit) \
(ifms)->ifms_string[((ifms)->ifms_bit & (bit)) ? 1 : 0]
-#define IFM_STATUS_DESCRIPTIONS { \
+#define IFM_STATUS_DESCRIPTIONS { \
{ IFM_ETHER, IFM_AVALID, IFM_ACTIVE, \
{ "no carrier", "active" } }, \
{ IFM_FDDI, IFM_AVALID, IFM_ACTIVE, \
diff --git a/freebsd/sys/net/if_spppfr.c b/freebsd/sys/net/if_spppfr.c
index be080a7d..f25bad7b 100644
--- a/freebsd/sys/net/if_spppfr.c
+++ b/freebsd/sys/net/if_spppfr.c
@@ -282,6 +282,8 @@ drop: ++ifp->if_ierrors;
if (! (ifp->if_flags & IFF_UP))
goto drop;
+ M_SETFIB(m, ifp->if_fib);
+
/* Check queue. */
if (netisr_queue(isr, m)) { /* (0) on success. */
if (debug)
diff --git a/freebsd/sys/net/if_spppsubr.c b/freebsd/sys/net/if_spppsubr.c
index d5f3487a..01743f47 100644
--- a/freebsd/sys/net/if_spppsubr.c
+++ b/freebsd/sys/net/if_spppsubr.c
@@ -739,6 +739,7 @@ sppp_input(struct ifnet *ifp, struct mbuf *m)
goto drop;
SPPP_UNLOCK(sp);
+ M_SETFIB(m, ifp->if_fib);
/* Check queue. */
if (netisr_queue(isr, m)) { /* (0) on success. */
if (debug)
diff --git a/freebsd/sys/net/if_stf.c b/freebsd/sys/net/if_stf.c
index 79466119..a808548c 100644
--- a/freebsd/sys/net/if_stf.c
+++ b/freebsd/sys/net/if_stf.c
@@ -787,6 +787,7 @@ in_stf_input(m, off)
*/
ifp->if_ipackets++;
ifp->if_ibytes += m->m_pkthdr.len;
+ M_SETFIB(m, ifp->if_fib);
netisr_dispatch(NETISR_IPV6, m);
}
diff --git a/freebsd/sys/net/if_tap.c b/freebsd/sys/net/if_tap.c
index cd775369..6e6b6a64 100644
--- a/freebsd/sys/net/if_tap.c
+++ b/freebsd/sys/net/if_tap.c
@@ -44,6 +44,7 @@
#include <sys/conf.h>
#include <sys/fcntl.h>
#include <sys/filio.h>
+#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
@@ -66,8 +67,9 @@
#include <net/if.h>
#include <net/if_clone.h>
#include <net/if_dl.h>
-#include <net/route.h>
#include <net/if_types.h>
+#include <net/route.h>
+#include <net/vnet.h>
#include <netinet/in.h>
@@ -216,6 +218,8 @@ tap_destroy(struct tap_softc *tp)
KASSERT(!(tp->tap_flags & TAP_OPEN),
("%s flags is out of sync", ifp->if_xname));
+ CURVNET_SET(ifp->if_vnet);
+ seldrain(&tp->tap_rsel);
knlist_destroy(&tp->tap_rsel.si_note);
destroy_dev(tp->tap_dev);
ether_ifdetach(ifp);
@@ -223,6 +227,7 @@ tap_destroy(struct tap_softc *tp)
mtx_destroy(&tp->tap_mtx);
free(tp, M_TAP);
+ CURVNET_RESTORE();
}
static void
@@ -364,6 +369,7 @@ tapclone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **d
if (unit == -1)
append_unit = 1;
+ CURVNET_SET(CRED_TO_VNET(cred));
/* find any existing device, or allocate new unit number */
i = clone_create(&tapclones, &tap_cdevsw, &unit, dev, extra);
if (i) {
@@ -382,6 +388,7 @@ tapclone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **d
}
if_clone_create(name, namelen, NULL);
+ CURVNET_RESTORE();
} /* tapclone */
@@ -526,6 +533,7 @@ tapclose(struct cdev *dev, int foo, int bar, struct thread *td)
/* junk all pending output */
mtx_lock(&tp->tap_mtx);
+ CURVNET_SET(ifp->if_vnet);
IF_DRAIN(&ifp->if_snd);
/*
@@ -549,6 +557,8 @@ tapclose(struct cdev *dev, int foo, int bar, struct thread *td)
}
if_link_state_change(ifp, LINK_STATE_DOWN);
+ CURVNET_RESTORE();
+
funsetown(&tp->tap_sigio);
selwakeuppri(&tp->tap_rsel, PZERO+1);
KNOTE_LOCKED(&tp->tap_rsel.si_note, 0);
@@ -950,7 +960,9 @@ tapwrite(struct cdev *dev, struct uio *uio, int flag)
}
/* Pass packet up to parent. */
+ CURVNET_SET(ifp->if_vnet);
(*ifp->if_input)(ifp, m);
+ CURVNET_RESTORE();
ifp->if_ipackets ++; /* ibytes are counted in parent */
return (0);
diff --git a/freebsd/sys/net/if_tun.c b/freebsd/sys/net/if_tun.c
index b6fa0e5a..444113f4 100644
--- a/freebsd/sys/net/if_tun.c
+++ b/freebsd/sys/net/if_tun.c
@@ -128,7 +128,7 @@ static void tunclone(void *arg, struct ucred *cred, char *name,
int namelen, struct cdev **dev);
static void tuncreate(const char *name, struct cdev *dev);
static int tunifioctl(struct ifnet *, u_long, caddr_t);
-static int tuninit(struct ifnet *);
+static void tuninit(struct ifnet *);
static int tunmodevent(module_t, int, void *);
static int tunoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
struct route *ro);
@@ -230,8 +230,8 @@ tunclone(void *arg, struct ucred *cred, char *name, int namelen,
i = clone_create(&tunclones, &tun_cdevsw, &u, dev, 0);
if (i) {
if (append_unit) {
- namelen = snprintf(devname, sizeof(devname), "%s%d", name,
- u);
+ namelen = snprintf(devname, sizeof(devname), "%s%d",
+ name, u);
name = devname;
}
/* No preexisting struct cdev *, create one */
@@ -261,6 +261,7 @@ tun_destroy(struct tun_softc *tp)
if_detach(TUN2IFP(tp));
if_free(TUN2IFP(tp));
destroy_dev(dev);
+ seldrain(&tp->tun_rsel);
knlist_destroy(&tp->tun_rsel.si_note);
mtx_destroy(&tp->tun_mtx);
cv_destroy(&tp->tun_cv);
@@ -504,14 +505,13 @@ tunclose(struct cdev *dev, int foo, int bar, struct thread *td)
return (0);
}
-static int
+static void
tuninit(struct ifnet *ifp)
{
struct tun_softc *tp = ifp->if_softc;
#ifdef INET
struct ifaddr *ifa;
#endif
- int error = 0;
TUNDEBUG(ifp, "tuninit\n");
@@ -538,7 +538,6 @@ tuninit(struct ifnet *ifp)
if_addr_runlock(ifp);
#endif
mtx_unlock(&tp->tun_mtx);
- return (error);
}
/*
@@ -562,12 +561,12 @@ tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
mtx_unlock(&tp->tun_mtx);
break;
case SIOCSIFADDR:
- error = tuninit(ifp);
- TUNDEBUG(ifp, "address set, error=%d\n", error);
+ tuninit(ifp);
+ TUNDEBUG(ifp, "address set\n");
break;
case SIOCSIFDSTADDR:
- error = tuninit(ifp);
- TUNDEBUG(ifp, "destination address set, error=%d\n", error);
+ tuninit(ifp);
+ TUNDEBUG(ifp, "destination address set\n");
break;
case SIOCSIFMTU:
ifp->if_mtu = ifr->ifr_mtu;
@@ -587,11 +586,8 @@ tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
* tunoutput - queue packets from higher level ready to put out.
*/
static int
-tunoutput(
- struct ifnet *ifp,
- struct mbuf *m0,
- struct sockaddr *dst,
- struct route *ro)
+tunoutput(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
+ struct route *ro)
{
struct tun_softc *tp = ifp->if_softc;
u_short cached_tun_flags;
@@ -671,10 +667,8 @@ tunoutput(
}
error = (ifp->if_transmit)(ifp, m0);
- if (error) {
- ifp->if_collisions++;
+ if (error)
return (ENOBUFS);
- }
ifp->if_opackets++;
return (0);
}
@@ -683,7 +677,8 @@ tunoutput(
* the cdevsw interface is now pretty minimal.
*/
static int
-tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
+tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
+ struct thread *td)
{
int error;
struct tun_softc *tp = dev->si_drv1;
@@ -875,7 +870,6 @@ tunwrite(struct cdev *dev, struct uio *uio, int flag)
struct tun_softc *tp = dev->si_drv1;
struct ifnet *ifp = TUN2IFP(tp);
struct mbuf *m;
- int error = 0;
uint32_t family;
int isr;
@@ -895,7 +889,7 @@ tunwrite(struct cdev *dev, struct uio *uio, int flag)
if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, 0, M_PKTHDR)) == NULL) {
ifp->if_ierrors++;
- return (error);
+ return (ENOBUFS);
}
m->m_pkthdr.rcvif = ifp;
@@ -950,6 +944,7 @@ tunwrite(struct cdev *dev, struct uio *uio, int flag)
ifp->if_ibytes += m->m_pkthdr.len;
ifp->if_ipackets++;
CURVNET_SET(ifp->if_vnet);
+ M_SETFIB(m, ifp->if_fib);
netisr_dispatch(isr, m);
CURVNET_RESTORE();
return (0);
diff --git a/freebsd/sys/net/if_var.h b/freebsd/sys/net/if_var.h
index 172ebe0e..c5c489fb 100644
--- a/freebsd/sys/net/if_var.h
+++ b/freebsd/sys/net/if_var.h
@@ -197,17 +197,18 @@ struct ifnet {
/* protected by if_addr_mtx */
void *if_pf_kif;
void *if_lagg; /* lagg glue */
- u_char if_alloctype; /* if_type at time of allocation */
+ u_char if_alloctype; /* if_type at time of allocation */
/*
* Spare fields are added so that we can modify sensitive data
* structures without changing the kernel binary interface, and must
* be used with care where binary compatibility is required.
*/
- char if_cspare[3];
+ char if_cspare[3];
char *if_description; /* interface description */
- void *if_pspare[7];
- int if_ispare[4];
+ void *if_pspare[7]; /* 1 netmap, 6 TBD */
+ int if_ispare[3];
+ u_int if_fib; /* interface FIB */
};
typedef void if_init_f_t(void *);
@@ -249,9 +250,15 @@ typedef void if_init_f_t(void *);
#define IF_ADDR_LOCK_INIT(if) mtx_init(&(if)->if_addr_mtx, \
"if_addr_mtx", NULL, MTX_DEF)
#define IF_ADDR_LOCK_DESTROY(if) mtx_destroy(&(if)->if_addr_mtx)
-#define IF_ADDR_LOCK(if) mtx_lock(&(if)->if_addr_mtx)
-#define IF_ADDR_UNLOCK(if) mtx_unlock(&(if)->if_addr_mtx)
+#define IF_ADDR_WLOCK(if) mtx_lock(&(if)->if_addr_mtx)
+#define IF_ADDR_WUNLOCK(if) mtx_unlock(&(if)->if_addr_mtx)
+#define IF_ADDR_RLOCK(if) mtx_lock(&(if)->if_addr_mtx)
+#define IF_ADDR_RUNLOCK(if) mtx_unlock(&(if)->if_addr_mtx)
#define IF_ADDR_LOCK_ASSERT(if) mtx_assert(&(if)->if_addr_mtx, MA_OWNED)
+#define IF_ADDR_WLOCK_ASSERT(if) mtx_assert(&(if)->if_addr_mtx, MA_OWNED)
+/* XXX: Compat. */
+#define IF_ADDR_LOCK(if) IF_ADDR_WLOCK(if)
+#define IF_ADDR_UNLOCK(if) IF_ADDR_WUNLOCK(if)
/*
* Function variations on locking macros intended to be used by loadable
diff --git a/freebsd/sys/net/if_vlan.c b/freebsd/sys/net/if_vlan.c
index 576243d9..81c151a5 100644
--- a/freebsd/sys/net/if_vlan.c
+++ b/freebsd/sys/net/if_vlan.c
@@ -36,9 +36,8 @@
* we need to pretend to be enough of an Ethernet implementation
* to make arp work. The way we do this is by telling everyone
* that we are an Ethernet, and then catch the packets that
- * ether_output() left on our output queue when it calls
- * if_start(), rewrite them for use by the real outgoing interface,
- * and ask it to send them.
+ * ether_output() sends to us via if_transmit(), rewrite them for
+ * use by the real outgoing interface, and ask it to send them.
*/
#include <sys/cdefs.h>
@@ -181,16 +180,17 @@ static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk,
#endif
static void trunk_destroy(struct ifvlantrunk *trunk);
-static void vlan_start(struct ifnet *ifp);
static void vlan_init(void *foo);
static void vlan_input(struct ifnet *ifp, struct mbuf *m);
static int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr);
+static void vlan_qflush(struct ifnet *ifp);
static int vlan_setflag(struct ifnet *ifp, int flag, int status,
int (*func)(struct ifnet *, int));
static int vlan_setflags(struct ifnet *ifp, int status);
static int vlan_setmulti(struct ifnet *ifp);
+static int vlan_transmit(struct ifnet *ifp, struct mbuf *m);
static void vlan_unconfig(struct ifnet *ifp);
-static void vlan_unconfig_locked(struct ifnet *ifp);
+static void vlan_unconfig_locked(struct ifnet *ifp, int departing);
static int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag);
static void vlan_link_state(struct ifnet *ifp, int link);
static void vlan_capabilities(struct ifvlan *ifv);
@@ -545,7 +545,7 @@ vlan_ifdetach(void *arg __unused, struct ifnet *ifp)
#ifdef VLAN_ARRAY
for (i = 0; i < VLAN_ARRAY_SIZE; i++)
if ((ifv = ifp->if_vlantrunk->vlans[i])) {
- vlan_unconfig_locked(ifv->ifv_ifp);
+ vlan_unconfig_locked(ifv->ifv_ifp, 1);
if (ifp->if_vlantrunk == NULL)
break;
}
@@ -553,7 +553,7 @@ vlan_ifdetach(void *arg __unused, struct ifnet *ifp)
restart:
for (i = 0; i < (1 << ifp->if_vlantrunk->hwidth); i++)
if ((ifv = LIST_FIRST(&ifp->if_vlantrunk->hash[i]))) {
- vlan_unconfig_locked(ifv->ifv_ifp);
+ vlan_unconfig_locked(ifv->ifv_ifp, 1);
if (ifp->if_vlantrunk)
goto restart; /* trunk->hwidth can change */
else
@@ -809,9 +809,9 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
/* NB: mtu is not set here */
ifp->if_init = vlan_init;
- ifp->if_start = vlan_start;
+ ifp->if_transmit = vlan_transmit;
+ ifp->if_qflush = vlan_qflush;
ifp->if_ioctl = vlan_ioctl;
- ifp->if_snd.ifq_maxlen = ifqmaxlen;
ifp->if_flags = VLAN_IFFLAGS;
ether_ifattach(ifp, eaddr);
/* Now undo some of the damage... */
@@ -823,7 +823,7 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
error = vlan_config(ifv, p, tag);
if (error != 0) {
/*
- * Since we've partialy failed, we need to back
+ * Since we've partially failed, we need to back
* out all the way, otherwise userland could get
* confused. Thus, we destroy the interface.
*/
@@ -867,99 +867,99 @@ vlan_init(void *foo __unused)
}
/*
- * The if_start method for vlan(4) interface. It doesn't
- * raises the IFF_DRV_OACTIVE flag, since it is called
- * only from IFQ_HANDOFF() macro in ether_output_frame().
- * If the interface queue is full, and vlan_start() is
- * not called, the queue would never get emptied and
- * interface would stall forever.
+ * The if_transmit method for vlan(4) interface.
*/
-static void
-vlan_start(struct ifnet *ifp)
+static int
+vlan_transmit(struct ifnet *ifp, struct mbuf *m)
{
struct ifvlan *ifv;
struct ifnet *p;
- struct mbuf *m;
- int error;
+ int error, len, mcast;
ifv = ifp->if_softc;
p = PARENT(ifv);
+ len = m->m_pkthdr.len;
+ mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
- for (;;) {
- IF_DEQUEUE(&ifp->if_snd, m);
- if (m == NULL)
- break;
- BPF_MTAP(ifp, m);
+ BPF_MTAP(ifp, m);
- /*
- * Do not run parent's if_start() if the parent is not up,
- * or parent's driver will cause a system crash.
- */
- if (!UP_AND_RUNNING(p)) {
- m_freem(m);
- ifp->if_collisions++;
- continue;
- }
+ /*
+ * Do not run parent's if_transmit() if the parent is not up,
+ * or parent's driver will cause a system crash.
+ */
+ if (!UP_AND_RUNNING(p)) {
+ m_freem(m);
+ ifp->if_oerrors++;
+ return (0);
+ }
- /*
- * Pad the frame to the minimum size allowed if told to.
- * This option is in accord with IEEE Std 802.1Q, 2003 Ed.,
- * paragraph C.4.4.3.b. It can help to work around buggy
- * bridges that violate paragraph C.4.4.3.a from the same
- * document, i.e., fail to pad short frames after untagging.
- * E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but
- * untagging it will produce a 62-byte frame, which is a runt
- * and requires padding. There are VLAN-enabled network
- * devices that just discard such runts instead or mishandle
- * them somehow.
- */
- if (soft_pad) {
- static char pad[8]; /* just zeros */
- int n;
-
- for (n = ETHERMIN + ETHER_HDR_LEN - m->m_pkthdr.len;
- n > 0; n -= sizeof(pad))
- if (!m_append(m, min(n, sizeof(pad)), pad))
- break;
-
- if (n > 0) {
- if_printf(ifp, "cannot pad short frame\n");
- ifp->if_oerrors++;
- m_freem(m);
- continue;
- }
- }
+ /*
+ * Pad the frame to the minimum size allowed if told to.
+ * This option is in accord with IEEE Std 802.1Q, 2003 Ed.,
+ * paragraph C.4.4.3.b. It can help to work around buggy
+ * bridges that violate paragraph C.4.4.3.a from the same
+ * document, i.e., fail to pad short frames after untagging.
+ * E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but
+ * untagging it will produce a 62-byte frame, which is a runt
+ * and requires padding. There are VLAN-enabled network
+ * devices that just discard such runts instead or mishandle
+ * them somehow.
+ */
+ if (soft_pad) {
+ static char pad[8]; /* just zeros */
+ int n;
- /*
- * If underlying interface can do VLAN tag insertion itself,
- * just pass the packet along. However, we need some way to
- * tell the interface where the packet came from so that it
- * knows how to find the VLAN tag to use, so we attach a
- * packet tag that holds it.
- */
- if (p->if_capenable & IFCAP_VLAN_HWTAGGING) {
- m->m_pkthdr.ether_vtag = ifv->ifv_tag;
- m->m_flags |= M_VLANTAG;
- } else {
- m = ether_vlanencap(m, ifv->ifv_tag);
- if (m == NULL) {
- if_printf(ifp,
- "unable to prepend VLAN header\n");
- ifp->if_oerrors++;
- continue;
- }
+ for (n = ETHERMIN + ETHER_HDR_LEN - m->m_pkthdr.len;
+ n > 0; n -= sizeof(pad))
+ if (!m_append(m, min(n, sizeof(pad)), pad))
+ break;
+
+ if (n > 0) {
+ if_printf(ifp, "cannot pad short frame\n");
+ ifp->if_oerrors++;
+ m_freem(m);
+ return (0);
}
+ }
- /*
- * Send it, precisely as ether_output() would have.
- * We are already running at splimp.
- */
- error = (p->if_transmit)(p, m);
- if (!error)
- ifp->if_opackets++;
- else
+ /*
+ * If underlying interface can do VLAN tag insertion itself,
+ * just pass the packet along. However, we need some way to
+ * tell the interface where the packet came from so that it
+ * knows how to find the VLAN tag to use, so we attach a
+ * packet tag that holds it.
+ */
+ if (p->if_capenable & IFCAP_VLAN_HWTAGGING) {
+ m->m_pkthdr.ether_vtag = ifv->ifv_tag;
+ m->m_flags |= M_VLANTAG;
+ } else {
+ m = ether_vlanencap(m, ifv->ifv_tag);
+ if (m == NULL) {
+ if_printf(ifp, "unable to prepend VLAN header\n");
ifp->if_oerrors++;
+ return (0);
+ }
}
+
+ /*
+ * Send it, precisely as ether_output() would have.
+ */
+ error = (p->if_transmit)(p, m);
+ if (!error) {
+ ifp->if_opackets++;
+ ifp->if_omcasts += mcast;
+ ifp->if_obytes += len;
+ } else
+ ifp->if_oerrors++;
+ return (error);
+}
+
+/*
+ * The ifp->if_qflush entry point for vlan(4) is a no-op.
+ */
+static void
+vlan_qflush(struct ifnet *ifp __unused)
+{
}
static void
@@ -1165,17 +1165,18 @@ vlan_unconfig(struct ifnet *ifp)
{
VLAN_LOCK();
- vlan_unconfig_locked(ifp);
+ vlan_unconfig_locked(ifp, 0);
VLAN_UNLOCK();
}
static void
-vlan_unconfig_locked(struct ifnet *ifp)
+vlan_unconfig_locked(struct ifnet *ifp, int departing)
{
struct ifvlantrunk *trunk;
struct vlan_mc_entry *mc;
struct ifvlan *ifv;
struct ifnet *parent;
+ int error;
VLAN_LOCK_ASSERT();
@@ -1206,13 +1207,21 @@ vlan_unconfig_locked(struct ifnet *ifp)
ETHER_ADDR_LEN);
/*
- * This may fail if the parent interface is
- * being detached. Regardless, we should do a
- * best effort to free this interface as much
- * as possible as all callers expect vlan
- * destruction to succeed.
+ * If the parent interface is being detached,
+ * all its multicast addresses have already
+ * been removed. Warn about errors if
+ * if_delmulti() does fail, but don't abort as
+ * all callers expect vlan destruction to
+ * succeed.
*/
- (void)if_delmulti(parent, (struct sockaddr *)&sdl);
+ if (!departing) {
+ error = if_delmulti(parent,
+ (struct sockaddr *)&sdl);
+ if (error)
+ if_printf(ifp,
+ "Failed to delete multicast address from parent: %d\n",
+ error);
+ }
SLIST_REMOVE_HEAD(&ifv->vlan_mc_listhead, mc_entries);
free(mc, M_VLAN);
}
diff --git a/freebsd/sys/net/netisr.c b/freebsd/sys/net/netisr.c
index 465b0b29..6ba71233 100644
--- a/freebsd/sys/net/netisr.c
+++ b/freebsd/sys/net/netisr.c
@@ -2,8 +2,12 @@
/*-
* Copyright (c) 2007-2009 Robert N. M. Watson
+ * Copyright (c) 2010 Juniper Networks, Inc.
* All rights reserved.
*
+ * This software was developed by Robert N. M. Watson under contract
+ * to Juniper Networks, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -34,13 +38,13 @@ __FBSDID("$FreeBSD$");
* dispatched) and asynchronous (deferred dispatch) processing of packets by
* registered protocol handlers. Callers pass a protocol identifier and
* packet to netisr, along with a direct dispatch hint, and work will either
- * be immediately processed with the registered handler, or passed to a
- * kernel software interrupt (SWI) thread for deferred dispatch. Callers
- * will generally select one or the other based on:
+ * be immediately processed by the registered handler, or passed to a
+ * software interrupt (SWI) thread for deferred dispatch. Callers will
+ * generally select one or the other based on:
*
- * - Might directly dispatching a netisr handler lead to code reentrance or
+ * - Whether directly dispatching a netisr handler lead to code reentrance or
* lock recursion, such as entering the socket code from the socket code.
- * - Might directly dispatching a netisr handler lead to recursive
+ * - Whether directly dispatching a netisr handler lead to recursive
* processing, such as when decapsulating several wrapped layers of tunnel
* information (IPSEC within IPSEC within ...).
*
@@ -56,9 +60,9 @@ __FBSDID("$FreeBSD$");
* more than one flow.
*
* netisr supports several policy variations, represented by the
- * NETISR_POLICY_* constants, allowing protocols to play a varying role in
+ * NETISR_POLICY_* constants, allowing protocols to play various roles in
* identifying flows, assigning work to CPUs, etc. These are described in
- * detail in netisr.h.
+ * netisr.h.
*/
#include <rtems/bsd/local/opt_ddb.h>
@@ -85,9 +89,11 @@ __FBSDID("$FreeBSD$");
#include <ddb/ddb.h>
#endif
+#define _WANT_NETISR_INTERNAL /* Enable definitions from netisr_internal.h */
#include <net/if.h>
#include <net/if_var.h>
#include <net/netisr.h>
+#include <net/netisr_internal.h>
#include <net/vnet.h>
/*-
@@ -97,13 +103,13 @@ __FBSDID("$FreeBSD$");
*
* The following data structures and fields are protected by this lock:
*
- * - The np array, including all fields of struct netisr_proto.
+ * - The netisr_proto array, including all fields of struct netisr_proto.
* - The nws array, including all fields of struct netisr_worker.
* - The nws_array array.
*
* Note: the NETISR_LOCKING define controls whether read locks are acquired
* in packet processing paths requiring netisr registration stability. This
- * is disabled by default as it can lead to a measurable performance
+ * is disabled by default as it can lead to measurable performance
* degradation even with rmlocks (3%-6% for loopback ping-pong traffic), and
* because netisr registration and unregistration is extremely rare at
* runtime. If it becomes more common, this decision should be revisited.
@@ -158,111 +164,58 @@ SYSCTL_INT(_net_isr, OID_AUTO, direct, CTLFLAG_RW,
*/
static int netisr_maxthreads = -1; /* Max number of threads. */
TUNABLE_INT("net.isr.maxthreads", &netisr_maxthreads);
-SYSCTL_INT(_net_isr, OID_AUTO, maxthreads, CTLFLAG_RD,
+SYSCTL_INT(_net_isr, OID_AUTO, maxthreads, CTLFLAG_RDTUN,
&netisr_maxthreads, 0,
"Use at most this many CPUs for netisr processing");
static int netisr_bindthreads = 0; /* Bind threads to CPUs. */
TUNABLE_INT("net.isr.bindthreads", &netisr_bindthreads);
-SYSCTL_INT(_net_isr, OID_AUTO, bindthreads, CTLFLAG_RD,
+SYSCTL_INT(_net_isr, OID_AUTO, bindthreads, CTLFLAG_RDTUN,
&netisr_bindthreads, 0, "Bind netisr threads to CPUs.");
/*
- * Limit per-workstream queues to at most net.isr.maxqlimit, both for initial
- * configuration and later modification using netisr_setqlimit().
+ * Limit per-workstream mbuf queue limits s to at most net.isr.maxqlimit,
+ * both for initial configuration and later modification using
+ * netisr_setqlimit().
*/
#define NETISR_DEFAULT_MAXQLIMIT 10240
static u_int netisr_maxqlimit = NETISR_DEFAULT_MAXQLIMIT;
TUNABLE_INT("net.isr.maxqlimit", &netisr_maxqlimit);
-SYSCTL_INT(_net_isr, OID_AUTO, maxqlimit, CTLFLAG_RD,
+SYSCTL_UINT(_net_isr, OID_AUTO, maxqlimit, CTLFLAG_RDTUN,
&netisr_maxqlimit, 0,
"Maximum netisr per-protocol, per-CPU queue depth.");
/*
- * The default per-workstream queue limit for protocols that don't initialize
- * the nh_qlimit field of their struct netisr_handler. If this is set above
- * netisr_maxqlimit, we truncate it to the maximum during boot.
+ * The default per-workstream mbuf queue limit for protocols that don't
+ * initialize the nh_qlimit field of their struct netisr_handler. If this is
+ * set above netisr_maxqlimit, we truncate it to the maximum during boot.
*/
#define NETISR_DEFAULT_DEFAULTQLIMIT 256
static u_int netisr_defaultqlimit = NETISR_DEFAULT_DEFAULTQLIMIT;
TUNABLE_INT("net.isr.defaultqlimit", &netisr_defaultqlimit);
-SYSCTL_INT(_net_isr, OID_AUTO, defaultqlimit, CTLFLAG_RD,
+SYSCTL_UINT(_net_isr, OID_AUTO, defaultqlimit, CTLFLAG_RDTUN,
&netisr_defaultqlimit, 0,
"Default netisr per-protocol, per-CPU queue limit if not set by protocol");
/*
- * Each protocol is described by a struct netisr_proto, which holds all
- * global per-protocol information. This data structure is set up by
- * netisr_register(), and derived from the public struct netisr_handler.
- */
-struct netisr_proto {
- const char *np_name; /* Character string protocol name. */
- netisr_handler_t *np_handler; /* Protocol handler. */
- netisr_m2flow_t *np_m2flow; /* Query flow for untagged packet. */
- netisr_m2cpuid_t *np_m2cpuid; /* Query CPU to process packet on. */
- netisr_drainedcpu_t *np_drainedcpu; /* Callback when drained a queue. */
- u_int np_qlimit; /* Maximum per-CPU queue depth. */
- u_int np_policy; /* Work placement policy. */
-};
-
-#define NETISR_MAXPROT 16 /* Compile-time limit. */
-
-/*
- * The np array describes all registered protocols, indexed by protocol
- * number.
+ * Store and export the compile-time constant NETISR_MAXPROT limit on the
+ * number of protocols that can register with netisr at a time. This is
+ * required for crashdump analysis, as it sizes netisr_proto[].
*/
-static struct netisr_proto np[NETISR_MAXPROT];
-
-/*
- * Protocol-specific work for each workstream is described by struct
- * netisr_work. Each work descriptor consists of an mbuf queue and
- * statistics.
- */
-struct netisr_work {
- /*
- * Packet queue, linked by m_nextpkt.
- */
- struct mbuf *nw_head;
- struct mbuf *nw_tail;
- u_int nw_len;
- u_int nw_qlimit;
- u_int nw_watermark;
-
- /*
- * Statistics -- written unlocked, but mostly from curcpu.
- */
- u_int64_t nw_dispatched; /* Number of direct dispatches. */
- u_int64_t nw_hybrid_dispatched; /* "" hybrid dispatches. */
- u_int64_t nw_qdrops; /* "" drops. */
- u_int64_t nw_queued; /* "" enqueues. */
- u_int64_t nw_handled; /* "" handled in worker. */
-};
+static u_int netisr_maxprot = NETISR_MAXPROT;
+SYSCTL_UINT(_net_isr, OID_AUTO, maxprot, CTLFLAG_RD,
+ &netisr_maxprot, 0,
+ "Compile-time limit on the number of protocols supported by netisr.");
/*
- * Workstreams hold a set of ordered work across each protocol, and are
- * described by netisr_workstream. Each workstream is associated with a
- * worker thread, which in turn is pinned to a CPU. Work associated with a
- * workstream can be processd in other threads during direct dispatch;
- * concurrent processing is prevented by the NWS_RUNNING flag, which
- * indicates that a thread is already processing the work queue.
+ * The netisr_proto array describes all registered protocols, indexed by
+ * protocol number. See netisr_internal.h for more details.
*/
-struct netisr_workstream {
- struct intr_event *nws_intr_event; /* Handler for stream. */
- void *nws_swi_cookie; /* swi(9) cookie for stream. */
- struct mtx nws_mtx; /* Synchronize work. */
- u_int nws_cpu; /* CPU pinning. */
- u_int nws_flags; /* Wakeup flags. */
- u_int nws_pendingbits; /* Scheduled protocols. */
-
- /*
- * Each protocol has per-workstream data.
- */
- struct netisr_work nws_work[NETISR_MAXPROT];
-} __aligned(CACHE_LINE_SIZE);
+static struct netisr_proto netisr_proto[NETISR_MAXPROT];
#ifndef __rtems__
/*
- * Per-CPU workstream data.
+ * Per-CPU workstream data. See netisr_internal.h for more details.
*/
DPCPU_DEFINE(struct netisr_workstream, nws);
@@ -278,20 +231,13 @@ static u_int nws_array[MAXCPU];
* CPUs once fully started.
*/
static u_int nws_count;
-SYSCTL_INT(_net_isr, OID_AUTO, numthreads, CTLFLAG_RD,
+SYSCTL_UINT(_net_isr, OID_AUTO, numthreads, CTLFLAG_RD,
&nws_count, 0, "Number of extant netisr threads.");
#else /* __rtems__ */
static struct netisr_workstream rtems_bsd_nws;
#endif /* __rtems__ */
/*
- * Per-workstream flags.
- */
-#define NWS_RUNNING 0x00000001 /* Currently running in a thread. */
-#define NWS_DISPATCHING 0x00000002 /* Currently being direct-dispatched. */
-#define NWS_SCHEDULED 0x00000004 /* Signal issued. */
-
-/*
* Synchronization for each workstream: a mutex protects all mutable fields
* in each stream, including per-protocol state (mbuf queues). The SWI is
* woken up if asynchronous dispatch is required.
@@ -324,7 +270,7 @@ netisr_get_cpuid(u_int cpunumber)
}
/*
- * The default implementation of -> CPU ID mapping.
+ * The default implementation of flow -> CPU ID mapping.
*
* Non-static so that protocols can use it to map their own work to specific
* CPUs in a manner consistent to netisr for affinity purposes.
@@ -381,36 +327,34 @@ netisr_register(const struct netisr_handler *nhp)
* Test that no existing registration exists for this protocol.
*/
NETISR_WLOCK();
- KASSERT(np[proto].np_name == NULL,
+ KASSERT(netisr_proto[proto].np_name == NULL,
("%s(%u, %s): name present", __func__, proto, name));
- KASSERT(np[proto].np_handler == NULL,
+ KASSERT(netisr_proto[proto].np_handler == NULL,
("%s(%u, %s): handler present", __func__, proto, name));
- np[proto].np_name = name;
- np[proto].np_handler = nhp->nh_handler;
- np[proto].np_m2flow = nhp->nh_m2flow;
- np[proto].np_m2cpuid = nhp->nh_m2cpuid;
- np[proto].np_drainedcpu = nhp->nh_drainedcpu;
+ netisr_proto[proto].np_name = name;
+ netisr_proto[proto].np_handler = nhp->nh_handler;
+ netisr_proto[proto].np_m2flow = nhp->nh_m2flow;
+ netisr_proto[proto].np_m2cpuid = nhp->nh_m2cpuid;
+ netisr_proto[proto].np_drainedcpu = nhp->nh_drainedcpu;
if (nhp->nh_qlimit == 0)
- np[proto].np_qlimit = netisr_defaultqlimit;
+ netisr_proto[proto].np_qlimit = netisr_defaultqlimit;
else if (nhp->nh_qlimit > netisr_maxqlimit) {
printf("%s: %s requested queue limit %u capped to "
"net.isr.maxqlimit %u\n", __func__, name, nhp->nh_qlimit,
netisr_maxqlimit);
- np[proto].np_qlimit = netisr_maxqlimit;
+ netisr_proto[proto].np_qlimit = netisr_maxqlimit;
} else
- np[proto].np_qlimit = nhp->nh_qlimit;
- np[proto].np_policy = nhp->nh_policy;
- for (i = 0; i <= mp_maxid; i++) {
- if (CPU_ABSENT(i))
- continue;
+ netisr_proto[proto].np_qlimit = nhp->nh_qlimit;
+ netisr_proto[proto].np_policy = nhp->nh_policy;
+ CPU_FOREACH(i) {
#ifndef __rtems__
npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto];
#else /* __rtems__ */
npwp = &rtems_bsd_nws.nws_work[proto];
#endif /* __rtems__ */
bzero(npwp, sizeof(*npwp));
- npwp->nw_qlimit = np[proto].np_qlimit;
+ npwp->nw_qlimit = netisr_proto[proto].np_qlimit;
}
NETISR_WUNLOCK();
}
@@ -435,13 +379,11 @@ netisr_clearqdrops(const struct netisr_handler *nhp)
("%s(%u): protocol too big for %s", __func__, proto, name));
NETISR_WLOCK();
- KASSERT(np[proto].np_handler != NULL,
+ KASSERT(netisr_proto[proto].np_handler != NULL,
("%s(%u): protocol not registered for %s", __func__, proto,
name));
- for (i = 0; i <= mp_maxid; i++) {
- if (CPU_ABSENT(i))
- continue;
+ CPU_FOREACH(i) {
#ifndef __rtems__
npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto];
#else /* __rtems__ */
@@ -453,7 +395,7 @@ netisr_clearqdrops(const struct netisr_handler *nhp)
}
/*
- * Query the current drop counters across all workstreams for a protocol.
+ * Query current drop counters across all workstreams for a protocol.
*/
void
netisr_getqdrops(const struct netisr_handler *nhp, u_int64_t *qdropp)
@@ -474,13 +416,11 @@ netisr_getqdrops(const struct netisr_handler *nhp, u_int64_t *qdropp)
("%s(%u): protocol too big for %s", __func__, proto, name));
NETISR_RLOCK(&tracker);
- KASSERT(np[proto].np_handler != NULL,
+ KASSERT(netisr_proto[proto].np_handler != NULL,
("%s(%u): protocol not registered for %s", __func__, proto,
name));
- for (i = 0; i <= mp_maxid; i++) {
- if (CPU_ABSENT(i))
- continue;
+ CPU_FOREACH(i) {
#ifndef __rtems__
npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto];
#else /* __rtems__ */
@@ -492,7 +432,7 @@ netisr_getqdrops(const struct netisr_handler *nhp, u_int64_t *qdropp)
}
/*
- * Query the current queue limit for per-workstream queues for a protocol.
+ * Query current per-workstream queue limit for a protocol.
*/
void
netisr_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp)
@@ -511,10 +451,10 @@ netisr_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp)
("%s(%u): protocol too big for %s", __func__, proto, name));
NETISR_RLOCK(&tracker);
- KASSERT(np[proto].np_handler != NULL,
+ KASSERT(netisr_proto[proto].np_handler != NULL,
("%s(%u): protocol not registered for %s", __func__, proto,
name));
- *qlimitp = np[proto].np_qlimit;
+ *qlimitp = netisr_proto[proto].np_qlimit;
NETISR_RUNLOCK(&tracker);
}
@@ -543,14 +483,12 @@ netisr_setqlimit(const struct netisr_handler *nhp, u_int qlimit)
("%s(%u): protocol too big for %s", __func__, proto, name));
NETISR_WLOCK();
- KASSERT(np[proto].np_handler != NULL,
+ KASSERT(netisr_proto[proto].np_handler != NULL,
("%s(%u): protocol not registered for %s", __func__, proto,
name));
- np[proto].np_qlimit = qlimit;
- for (i = 0; i <= mp_maxid; i++) {
- if (CPU_ABSENT(i))
- continue;
+ netisr_proto[proto].np_qlimit = qlimit;
+ CPU_FOREACH(i) {
#ifndef __rtems__
npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto];
#else /* __rtems__ */
@@ -608,19 +546,17 @@ netisr_unregister(const struct netisr_handler *nhp)
("%s(%u): protocol too big for %s", __func__, proto, name));
NETISR_WLOCK();
- KASSERT(np[proto].np_handler != NULL,
+ KASSERT(netisr_proto[proto].np_handler != NULL,
("%s(%u): protocol not registered for %s", __func__, proto,
name));
- np[proto].np_name = NULL;
- np[proto].np_handler = NULL;
- np[proto].np_m2flow = NULL;
- np[proto].np_m2cpuid = NULL;
- np[proto].np_qlimit = 0;
- np[proto].np_policy = 0;
- for (i = 0; i <= mp_maxid; i++) {
- if (CPU_ABSENT(i))
- continue;
+ netisr_proto[proto].np_name = NULL;
+ netisr_proto[proto].np_handler = NULL;
+ netisr_proto[proto].np_m2flow = NULL;
+ netisr_proto[proto].np_m2cpuid = NULL;
+ netisr_proto[proto].np_qlimit = 0;
+ netisr_proto[proto].np_policy = 0;
+ CPU_FOREACH(i) {
#ifndef __rtems__
npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto];
#else /* __rtems__ */
@@ -744,22 +680,23 @@ netisr_process_workstream_proto(struct netisr_workstream *nwsp, u_int proto)
if (local_npw.nw_head == NULL)
local_npw.nw_tail = NULL;
local_npw.nw_len--;
- VNET_ASSERT(m->m_pkthdr.rcvif != NULL);
+ VNET_ASSERT(m->m_pkthdr.rcvif != NULL,
+ ("%s:%d rcvif == NULL: m=%p", __func__, __LINE__, m));
CURVNET_SET(m->m_pkthdr.rcvif->if_vnet);
- np[proto].np_handler(m);
+ netisr_proto[proto].np_handler(m);
CURVNET_RESTORE();
}
KASSERT(local_npw.nw_len == 0,
("%s(%u): len %u", __func__, proto, local_npw.nw_len));
- if (np[proto].np_drainedcpu)
- np[proto].np_drainedcpu(nwsp->nws_cpu);
+ if (netisr_proto[proto].np_drainedcpu)
+ netisr_proto[proto].np_drainedcpu(nwsp->nws_cpu);
NWS_LOCK(nwsp);
npwp->nw_handled += handled;
return (handled);
}
/*
- * SWI handler for netisr -- processes prackets in a set of workstreams that
+ * SWI handler for netisr -- processes packets in a set of workstreams that
* it owns, woken up by calls to NWS_SIGNAL(). If this workstream is already
* being direct dispatched, go back to sleep and wait for the dispatching
* thread to wake us up again.
@@ -827,6 +764,11 @@ netisr_queue_workstream(struct netisr_workstream *nwsp, u_int proto,
npwp->nw_len++;
if (npwp->nw_len > npwp->nw_watermark)
npwp->nw_watermark = npwp->nw_len;
+
+ /*
+ * We must set the bit regardless of NWS_RUNNING, so that
+ * swi_net() keeps calling netisr_process_workstream_proto().
+ */
nwsp->nws_pendingbits |= (1 << proto);
if (!(nwsp->nws_flags &
(NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED))) {
@@ -887,10 +829,10 @@ netisr_queue_src(u_int proto, uintptr_t source, struct mbuf *m)
#ifdef NETISR_LOCKING
NETISR_RLOCK(&tracker);
#endif
- KASSERT(np[proto].np_handler != NULL,
+ KASSERT(netisr_proto[proto].np_handler != NULL,
("%s: invalid proto %u", __func__, proto));
- m = netisr_select_cpuid(&np[proto], source, m, &cpuid);
+ m = netisr_select_cpuid(&netisr_proto[proto], source, m, &cpuid);
if (m != NULL) {
KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__,
cpuid));
@@ -911,7 +853,7 @@ netisr_queue(u_int proto, struct mbuf *m)
}
/*
- * Dispatch a packet for netisr processing, direct dispatch permitted by
+ * Dispatch a packet for netisr processing; direct dispatch is permitted by
* calling context.
*/
int
@@ -936,7 +878,7 @@ netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m)
#ifdef NETISR_LOCKING
NETISR_RLOCK(&tracker);
#endif
- KASSERT(np[proto].np_handler != NULL,
+ KASSERT(netisr_proto[proto].np_handler != NULL,
("%s: invalid proto %u", __func__, proto));
/*
@@ -951,7 +893,7 @@ netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m)
npwp = &nwsp->nws_work[proto];
npwp->nw_dispatched++;
npwp->nw_handled++;
- np[proto].np_handler(m);
+ netisr_proto[proto].np_handler(m);
error = 0;
goto out_unlock;
}
@@ -961,7 +903,7 @@ netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m)
* dispatch if we're on the right CPU and the netisr worker isn't
* already running.
*/
- m = netisr_select_cpuid(&np[proto], source, m, &cpuid);
+ m = netisr_select_cpuid(&netisr_proto[proto], source, m, &cpuid);
if (m == NULL) {
error = ENOBUFS;
goto out_unlock;
@@ -1000,7 +942,7 @@ netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m)
*/
nwsp->nws_flags |= NWS_DISPATCHING;
NWS_UNLOCK(nwsp);
- np[proto].np_handler(m);
+ netisr_proto[proto].np_handler(m);
NWS_LOCK(nwsp);
nwsp->nws_flags &= ~NWS_DISPATCHING;
npwp->nw_handled++;
@@ -1171,6 +1113,166 @@ netisr_start(void *arg)
SYSINIT(netisr_start, SI_SUB_SMP, SI_ORDER_MIDDLE, netisr_start, NULL);
#endif /* __rtems__ */
+/*
+ * Sysctl monitoring for netisr: query a list of registered protocols.
+ */
+static int
+sysctl_netisr_proto(SYSCTL_HANDLER_ARGS)
+{
+ struct rm_priotracker tracker;
+ struct sysctl_netisr_proto *snpp, *snp_array;
+ struct netisr_proto *npp;
+ u_int counter, proto;
+ int error;
+
+ if (req->newptr != NULL)
+ return (EINVAL);
+ snp_array = malloc(sizeof(*snp_array) * NETISR_MAXPROT, M_TEMP,
+ M_ZERO | M_WAITOK);
+ counter = 0;
+ NETISR_RLOCK(&tracker);
+ for (proto = 0; proto < NETISR_MAXPROT; proto++) {
+ npp = &netisr_proto[proto];
+ if (npp->np_name == NULL)
+ continue;
+ snpp = &snp_array[counter];
+ snpp->snp_version = sizeof(*snpp);
+ strlcpy(snpp->snp_name, npp->np_name, NETISR_NAMEMAXLEN);
+ snpp->snp_proto = proto;
+ snpp->snp_qlimit = npp->np_qlimit;
+ snpp->snp_policy = npp->np_policy;
+ if (npp->np_m2flow != NULL)
+ snpp->snp_flags |= NETISR_SNP_FLAGS_M2FLOW;
+ if (npp->np_m2cpuid != NULL)
+ snpp->snp_flags |= NETISR_SNP_FLAGS_M2CPUID;
+ if (npp->np_drainedcpu != NULL)
+ snpp->snp_flags |= NETISR_SNP_FLAGS_DRAINEDCPU;
+ counter++;
+ }
+ NETISR_RUNLOCK(&tracker);
+ KASSERT(counter <= NETISR_MAXPROT,
+ ("sysctl_netisr_proto: counter too big (%d)", counter));
+ error = SYSCTL_OUT(req, snp_array, sizeof(*snp_array) * counter);
+ free(snp_array, M_TEMP);
+ return (error);
+}
+
+SYSCTL_PROC(_net_isr, OID_AUTO, proto,
+ CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_proto,
+ "S,sysctl_netisr_proto",
+ "Return list of protocols registered with netisr");
+
+/*
+ * Sysctl monitoring for netisr: query a list of workstreams.
+ */
+static int
+sysctl_netisr_workstream(SYSCTL_HANDLER_ARGS)
+{
+ struct rm_priotracker tracker;
+ struct sysctl_netisr_workstream *snwsp, *snws_array;
+ struct netisr_workstream *nwsp;
+ u_int counter, cpuid;
+ int error;
+
+ if (req->newptr != NULL)
+ return (EINVAL);
+ snws_array = malloc(sizeof(*snws_array) * MAXCPU, M_TEMP,
+ M_ZERO | M_WAITOK);
+ counter = 0;
+ NETISR_RLOCK(&tracker);
+ CPU_FOREACH(cpuid) {
+ nwsp = DPCPU_ID_PTR(cpuid, nws);
+ if (nwsp->nws_intr_event == NULL)
+ continue;
+ NWS_LOCK(nwsp);
+ snwsp = &snws_array[counter];
+ snwsp->snws_version = sizeof(*snwsp);
+
+ /*
+ * For now, we equate workstream IDs and CPU IDs in the
+ * kernel, but expose them independently to userspace in case
+ * that assumption changes in the future.
+ */
+ snwsp->snws_wsid = cpuid;
+ snwsp->snws_cpu = cpuid;
+ if (nwsp->nws_intr_event != NULL)
+ snwsp->snws_flags |= NETISR_SNWS_FLAGS_INTR;
+ NWS_UNLOCK(nwsp);
+ counter++;
+ }
+ NETISR_RUNLOCK(&tracker);
+ KASSERT(counter <= MAXCPU,
+ ("sysctl_netisr_workstream: counter too big (%d)", counter));
+ error = SYSCTL_OUT(req, snws_array, sizeof(*snws_array) * counter);
+ free(snws_array, M_TEMP);
+ return (error);
+}
+
+SYSCTL_PROC(_net_isr, OID_AUTO, workstream,
+ CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_workstream,
+ "S,sysctl_netisr_workstream",
+ "Return list of workstreams implemented by netisr");
+
+/*
+ * Sysctl monitoring for netisr: query per-protocol data across all
+ * workstreams.
+ */
+static int
+sysctl_netisr_work(SYSCTL_HANDLER_ARGS)
+{
+ struct rm_priotracker tracker;
+ struct sysctl_netisr_work *snwp, *snw_array;
+ struct netisr_workstream *nwsp;
+ struct netisr_proto *npp;
+ struct netisr_work *nwp;
+ u_int counter, cpuid, proto;
+ int error;
+
+ if (req->newptr != NULL)
+ return (EINVAL);
+ snw_array = malloc(sizeof(*snw_array) * MAXCPU * NETISR_MAXPROT,
+ M_TEMP, M_ZERO | M_WAITOK);
+ counter = 0;
+ NETISR_RLOCK(&tracker);
+ CPU_FOREACH(cpuid) {
+ nwsp = DPCPU_ID_PTR(cpuid, nws);
+ if (nwsp->nws_intr_event == NULL)
+ continue;
+ NWS_LOCK(nwsp);
+ for (proto = 0; proto < NETISR_MAXPROT; proto++) {
+ npp = &netisr_proto[proto];
+ if (npp->np_name == NULL)
+ continue;
+ nwp = &nwsp->nws_work[proto];
+ snwp = &snw_array[counter];
+ snwp->snw_version = sizeof(*snwp);
+ snwp->snw_wsid = cpuid; /* See comment above. */
+ snwp->snw_proto = proto;
+ snwp->snw_len = nwp->nw_len;
+ snwp->snw_watermark = nwp->nw_watermark;
+ snwp->snw_dispatched = nwp->nw_dispatched;
+ snwp->snw_hybrid_dispatched =
+ nwp->nw_hybrid_dispatched;
+ snwp->snw_qdrops = nwp->nw_qdrops;
+ snwp->snw_queued = nwp->nw_queued;
+ snwp->snw_handled = nwp->nw_handled;
+ counter++;
+ }
+ NWS_UNLOCK(nwsp);
+ }
+ KASSERT(counter <= MAXCPU * NETISR_MAXPROT,
+ ("sysctl_netisr_work: counter too big (%d)", counter));
+ NETISR_RUNLOCK(&tracker);
+ error = SYSCTL_OUT(req, snw_array, sizeof(*snw_array) * counter);
+ free(snw_array, M_TEMP);
+ return (error);
+}
+
+SYSCTL_PROC(_net_isr, OID_AUTO, work,
+ CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_work,
+ "S,sysctl_netisr_work",
+ "Return list of per-workstream, per-protocol work in netisr");
+
#ifdef DDB
DB_SHOW_COMMAND(netisr, db_show_netisr)
{
@@ -1181,15 +1283,13 @@ DB_SHOW_COMMAND(netisr, db_show_netisr)
db_printf("%3s %6s %5s %5s %5s %8s %8s %8s %8s\n", "CPU", "Proto",
"Len", "WMark", "Max", "Disp", "HDisp", "Drop", "Queue");
- for (cpuid = 0; cpuid <= mp_maxid; cpuid++) {
- if (CPU_ABSENT(cpuid))
- continue;
+ CPU_FOREACH(cpuid) {
nwsp = DPCPU_ID_PTR(cpuid, nws);
if (nwsp->nws_intr_event == NULL)
continue;
first = 1;
for (proto = 0; proto < NETISR_MAXPROT; proto++) {
- if (np[proto].np_handler == NULL)
+ if (netisr_proto[proto].np_handler == NULL)
continue;
nwp = &nwsp->nws_work[proto];
if (first) {
@@ -1199,7 +1299,7 @@ DB_SHOW_COMMAND(netisr, db_show_netisr)
db_printf("%3s ", "");
db_printf(
"%6s %5d %5d %5d %8ju %8ju %8ju %8ju\n",
- np[proto].np_name, nwp->nw_len,
+ netisr_proto[proto].np_name, nwp->nw_len,
nwp->nw_watermark, nwp->nw_qlimit,
nwp->nw_dispatched, nwp->nw_hybrid_dispatched,
nwp->nw_qdrops, nwp->nw_queued);
diff --git a/freebsd/sys/net/netisr.h b/freebsd/sys/net/netisr.h
index 72e7f17f..cd692f6d 100644
--- a/freebsd/sys/net/netisr.h
+++ b/freebsd/sys/net/netisr.h
@@ -1,7 +1,11 @@
/*-
* Copyright (c) 2007-2009 Robert N. M. Watson
+ * Copyright (c) 2010 Juniper Networks, Inc.
* All rights reserved.
*
+ * This software was developed by Robert N. M. Watson under contract
+ * to Juniper Networks, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -28,7 +32,6 @@
#ifndef _NET_NETISR_H_
#define _NET_NETISR_H_
-#ifdef _KERNEL
/*
* The netisr (network interrupt service routine) provides a deferred
@@ -39,6 +42,13 @@
* Historically, this was implemented by the BSD software ISR facility; it is
* now implemented via a software ithread (SWI).
*/
+
+/*
+ * Protocol numbers, which are encoded in monitoring applications and kernel
+ * modules. Internally, these are used in bit shift operations so must have
+ * a value 0 < proto < 32; we currently further limit at compile-time to 16
+ * for array-sizing purposes.
+ */
#define NETISR_IP 1
#define NETISR_IGMP 2 /* IGMPv3 output queue */
#define NETISR_ROUTE 3 /* routing socket */
@@ -52,6 +62,78 @@
#define NETISR_NATM 11
#define NETISR_EPAIR 12 /* if_epair(4) */
+/*
+ * Protocol ordering and affinity policy constants. See the detailed
+ * discussion of policies later in the file.
+ */
+#define NETISR_POLICY_SOURCE 1 /* Maintain source ordering. */
+#define NETISR_POLICY_FLOW 2 /* Maintain flow ordering. */
+#define NETISR_POLICY_CPU 3 /* Protocol determines CPU placement. */
+
+/*
+ * Monitoring data structures, exported by sysctl(2).
+ *
+ * Three sysctls are defined. First, a per-protocol structure exported by
+ * net.isr.proto.
+ */
+#define NETISR_NAMEMAXLEN 32
+struct sysctl_netisr_proto {
+ u_int snp_version; /* Length of struct. */
+ char snp_name[NETISR_NAMEMAXLEN]; /* nh_name */
+ u_int snp_proto; /* nh_proto */
+ u_int snp_qlimit; /* nh_qlimit */
+ u_int snp_policy; /* nh_policy */
+ u_int snp_flags; /* Various flags. */
+ u_int _snp_ispare[7];
+};
+
+/*
+ * Flags for sysctl_netisr_proto.snp_flags.
+ */
+#define NETISR_SNP_FLAGS_M2FLOW 0x00000001 /* nh_m2flow */
+#define NETISR_SNP_FLAGS_M2CPUID 0x00000002 /* nh_m2cpuid */
+#define NETISR_SNP_FLAGS_DRAINEDCPU 0x00000004 /* nh_drainedcpu */
+
+/*
+ * Next, a structure per-workstream, with per-protocol data, exported as
+ * net.isr.workstream.
+ */
+struct sysctl_netisr_workstream {
+ u_int snws_version; /* Length of struct. */
+ u_int snws_flags; /* Various flags. */
+ u_int snws_wsid; /* Workstream ID. */
+ u_int snws_cpu; /* nws_cpu */
+ u_int _snws_ispare[12];
+};
+
+/*
+ * Flags for sysctl_netisr_workstream.snws_flags
+ */
+#define NETISR_SNWS_FLAGS_INTR 0x00000001 /* nws_intr_event */
+
+/*
+ * Finally, a per-workstream-per-protocol structure, exported as
+ * net.isr.work.
+ */
+struct sysctl_netisr_work {
+ u_int snw_version; /* Length of struct. */
+ u_int snw_wsid; /* Workstream ID. */
+ u_int snw_proto; /* Protocol number. */
+ u_int snw_len; /* nw_len */
+ u_int snw_watermark; /* nw_watermark */
+ u_int _snw_ispare[3];
+
+ uint64_t snw_dispatched; /* nw_dispatched */
+ uint64_t snw_hybrid_dispatched; /* nw_hybrid_dispatched */
+ uint64_t snw_qdrops; /* nw_qdrops */
+ uint64_t snw_queued; /* nw_queued */
+ uint64_t snw_handled; /* nw_handled */
+
+ uint64_t _snw_llspare[7];
+};
+
+#ifdef _KERNEL
+
/*-
* Protocols express ordering constraints and affinity preferences by
* implementing one or neither of nh_m2flow and nh_m2cpuid, which are used by
@@ -91,10 +173,6 @@ typedef struct mbuf *netisr_m2cpuid_t(struct mbuf *m, uintptr_t source,
typedef struct mbuf *netisr_m2flow_t(struct mbuf *m, uintptr_t source);
typedef void netisr_drainedcpu_t(u_int cpuid);
-#define NETISR_POLICY_SOURCE 1 /* Maintain source ordering. */
-#define NETISR_POLICY_FLOW 2 /* Maintain flow ordering. */
-#define NETISR_POLICY_CPU 3 /* Protocol determines CPU placement. */
-
/*
* Data structure describing a protocol handler.
*/
diff --git a/freebsd/sys/net/netisr_internal.h b/freebsd/sys/net/netisr_internal.h
new file mode 100644
index 00000000..40afaf16
--- /dev/null
+++ b/freebsd/sys/net/netisr_internal.h
@@ -0,0 +1,127 @@
+/*-
+ * Copyright (c) 2007-2009 Robert N. M. Watson
+ * Copyright (c) 2010 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * This software was developed by Robert N. M. Watson under contract
+ * to Juniper Networks, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NET_NETISR_INTERNAL_H_
+#define _NET_NETISR_INTERNAL_H_
+
+#ifndef _WANT_NETISR_INTERNAL
+#error "no user-serviceable parts inside"
+#endif
+
+/*
+ * These definitions are private to the netisr implementation, but provided
+ * here for use by post-mortem crashdump analysis tools. They should not be
+ * used in any other context as they can and will change. Public definitions
+ * may be found in netisr.h.
+ */
+
+#ifndef _KERNEL
+typedef void *netisr_handler_t;
+typedef void *netisr_m2flow_t;
+typedef void *netisr_m2cpuid_t;
+typedef void *netisr_drainedcpu_t;
+#endif
+
+/*
+ * Each protocol is described by a struct netisr_proto, which holds all
+ * global per-protocol information. This data structure is set up by
+ * netisr_register(), and derived from the public struct netisr_handler.
+ */
+struct netisr_proto {
+ const char *np_name; /* Character string protocol name. */
+ netisr_handler_t *np_handler; /* Protocol handler. */
+ netisr_m2flow_t *np_m2flow; /* Query flow for untagged packet. */
+ netisr_m2cpuid_t *np_m2cpuid; /* Query CPU to process packet on. */
+ netisr_drainedcpu_t *np_drainedcpu; /* Callback when drained a queue. */
+ u_int np_qlimit; /* Maximum per-CPU queue depth. */
+ u_int np_policy; /* Work placement policy. */
+};
+
+#define NETISR_MAXPROT 16 /* Compile-time limit. */
+
+/*
+ * Protocol-specific work for each workstream is described by struct
+ * netisr_work. Each work descriptor consists of an mbuf queue and
+ * statistics.
+ */
+struct netisr_work {
+ /*
+ * Packet queue, linked by m_nextpkt.
+ */
+ struct mbuf *nw_head;
+ struct mbuf *nw_tail;
+ u_int nw_len;
+ u_int nw_qlimit;
+ u_int nw_watermark;
+
+ /*
+ * Statistics -- written unlocked, but mostly from curcpu.
+ */
+ u_int64_t nw_dispatched; /* Number of direct dispatches. */
+ u_int64_t nw_hybrid_dispatched; /* "" hybrid dispatches. */
+ u_int64_t nw_qdrops; /* "" drops. */
+ u_int64_t nw_queued; /* "" enqueues. */
+ u_int64_t nw_handled; /* "" handled in worker. */
+};
+
+/*
+ * Workstreams hold a queue of ordered work across each protocol, and are
+ * described by netisr_workstream. Each workstream is associated with a
+ * worker thread, which in turn is pinned to a CPU. Work associated with a
+ * workstream can be processd in other threads during direct dispatch;
+ * concurrent processing is prevented by the NWS_RUNNING flag, which
+ * indicates that a thread is already processing the work queue. It is
+ * important to prevent a directly dispatched packet from "skipping ahead" of
+ * work already in the workstream queue.
+ */
+struct netisr_workstream {
+ struct intr_event *nws_intr_event; /* Handler for stream. */
+ void *nws_swi_cookie; /* swi(9) cookie for stream. */
+ struct mtx nws_mtx; /* Synchronize work. */
+ u_int nws_cpu; /* CPU pinning. */
+ u_int nws_flags; /* Wakeup flags. */
+ u_int nws_pendingbits; /* Scheduled protocols. */
+
+ /*
+ * Each protocol has per-workstream data.
+ */
+ struct netisr_work nws_work[NETISR_MAXPROT];
+} __aligned(CACHE_LINE_SIZE);
+
+/*
+ * Per-workstream flags.
+ */
+#define NWS_RUNNING 0x00000001 /* Currently running in a thread. */
+#define NWS_DISPATCHING 0x00000002 /* Currently being direct-dispatched. */
+#define NWS_SCHEDULED 0x00000004 /* Signal issued. */
+
+#endif /* !_NET_NETISR_INTERNAL_H_ */
diff --git a/freebsd/sys/net/radix_mpath.c b/freebsd/sys/net/radix_mpath.c
index bb7b6fd4..6a3e3ef7 100644
--- a/freebsd/sys/net/radix_mpath.c
+++ b/freebsd/sys/net/radix_mpath.c
@@ -102,10 +102,7 @@ rt_mpath_matchgate(struct rtentry *rt, struct sockaddr *gate)
{
struct radix_node *rn;
- if (!rn_mpath_next((struct radix_node *)rt))
- return rt;
-
- if (!gate)
+ if (!gate || !rt->rt_gateway)
return NULL;
/* beyond here, we use rn as the master copy */
diff --git a/freebsd/sys/net/raw_cb.h b/freebsd/sys/net/raw_cb.h
index 35b546c5..1b347e02 100644
--- a/freebsd/sys/net/raw_cb.h
+++ b/freebsd/sys/net/raw_cb.h
@@ -70,9 +70,14 @@ pr_init_t raw_init;
* Library routines for raw socket usrreq functions; will always be wrapped
* so that protocol-specific functions can be handled.
*/
+typedef int (*raw_input_cb_fn)(struct mbuf *, struct sockproto *,
+ struct sockaddr *, struct rawcb *);
+
int raw_attach(struct socket *, int);
void raw_detach(struct rawcb *);
void raw_input(struct mbuf *, struct sockproto *, struct sockaddr *);
+void raw_input_ext(struct mbuf *, struct sockproto *, struct sockaddr *,
+ raw_input_cb_fn);
/*
* Generic pr_usrreqs entries for raw socket protocols, usually wrapped so
diff --git a/freebsd/sys/net/raw_usrreq.c b/freebsd/sys/net/raw_usrreq.c
index 0723799f..0d7973e9 100644
--- a/freebsd/sys/net/raw_usrreq.c
+++ b/freebsd/sys/net/raw_usrreq.c
@@ -73,6 +73,14 @@ raw_init(void)
void
raw_input(struct mbuf *m0, struct sockproto *proto, struct sockaddr *src)
{
+
+ return (raw_input_ext(m0, proto, src, NULL));
+}
+
+void
+raw_input_ext(struct mbuf *m0, struct sockproto *proto, struct sockaddr *src,
+ raw_input_cb_fn cb)
+{
struct rawcb *rp;
struct mbuf *m = m0;
struct socket *last;
@@ -85,6 +93,8 @@ raw_input(struct mbuf *m0, struct sockproto *proto, struct sockaddr *src)
if (rp->rcb_proto.sp_protocol &&
rp->rcb_proto.sp_protocol != proto->sp_protocol)
continue;
+ if (cb != NULL && (*cb)(m, proto, src, rp) != 0)
+ continue;
if (last) {
struct mbuf *n;
n = m_copy(m, 0, (int)M_COPYALL);
diff --git a/freebsd/sys/net/route.c b/freebsd/sys/net/route.c
index 5827cc00..3821c208 100644
--- a/freebsd/sys/net/route.c
+++ b/freebsd/sys/net/route.c
@@ -37,6 +37,7 @@
***********************************************************************/
#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/local/opt_route.h>
#include <rtems/bsd/local/opt_mrouting.h>
#include <rtems/bsd/local/opt_mpath.h>
@@ -69,12 +70,34 @@
#include <vm/uma.h>
+/* We use 4 bits in the mbuf flags, thus we are limited to 16 FIBS. */
+#define RT_MAXFIBS 16
+
+/* Kernel config default option. */
+#ifdef ROUTETABLES
+#if ROUTETABLES <= 0
+#error "ROUTETABLES defined too low"
+#endif
+#if ROUTETABLES > RT_MAXFIBS
+#error "ROUTETABLES defined too big"
+#endif
+#define RT_NUMFIBS ROUTETABLES
+#endif /* ROUTETABLES */
+/* Initialize to default if not otherwise set. */
+#ifndef RT_NUMFIBS
+#define RT_NUMFIBS 1
+#endif
+
u_int rt_numfibs = RT_NUMFIBS;
-SYSCTL_INT(_net, OID_AUTO, fibs, CTLFLAG_RD, &rt_numfibs, 0, "");
+SYSCTL_UINT(_net, OID_AUTO, fibs, CTLFLAG_RD, &rt_numfibs, 0, "");
/*
* Allow the boot code to allow LESS than RT_MAXFIBS to be used.
* We can't do more because storage is statically allocated for now.
- * (for compatibility reasons.. this will change).
+ * (for compatibility reasons.. this will change. When this changes, code should
+ * be refactored to protocol independent parts and protocol dependent parts,
+ * probably hanging of domain(9) specific storage to not need the full
+ * fib * af RNH allocation etc. but allow tuning the number of tables per
+ * address family).
*/
TUNABLE_INT("net.fibs", &rt_numfibs);
@@ -84,9 +107,12 @@ TUNABLE_INT("net.fibs", &rt_numfibs);
* changes for the FIB of the caller when adding a new set of addresses
* to an interface. XXX this is a shotgun aproach to a problem that needs
* a more fine grained solution.. that will come.
+ * XXX also has the problems getting the FIB from curthread which will not
+ * always work given the fib can be overridden and prefixes can be added
+ * from the network stack context.
*/
u_int rt_add_addr_allfibs = 1;
-SYSCTL_INT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RW,
+SYSCTL_UINT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RW,
&rt_add_addr_allfibs, 0, "");
TUNABLE_INT("net.add_addr_allfibs", &rt_add_addr_allfibs);
@@ -118,12 +144,6 @@ VNET_DEFINE(int, rttrash); /* routes not in table but not freed */
static VNET_DEFINE(uma_zone_t, rtzone); /* Routing table UMA zone. */
#define V_rtzone VNET(rtzone)
-#if 0
-/* default fib for tunnels to use */
-u_int tunnel_fib = 0;
-SYSCTL_INT(_net, OID_AUTO, tunnelfib, CTLFLAG_RD, &tunnel_fib, 0, "");
-#endif
-
#ifndef __rtems__
/*
* handler for net.my_fibnum
@@ -206,27 +226,23 @@ vnet_route_init(const void *unused __unused)
V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), NULL, NULL,
NULL, NULL, UMA_ALIGN_PTR, 0);
for (dom = domains; dom; dom = dom->dom_next) {
- if (dom->dom_rtattach) {
- for (table = 0; table < rt_numfibs; table++) {
- if ( (fam = dom->dom_family) == AF_INET ||
- table == 0) {
- /* for now only AF_INET has > 1 table */
- /* XXX MRT
- * rtattach will be also called
- * from vfs_export.c but the
- * offset will be 0
- * (only for AF_INET and AF_INET6
- * which don't need it anyhow)
- */
- rnh = rt_tables_get_rnh_ptr(table, fam);
- if (rnh == NULL)
- panic("%s: rnh NULL", __func__);
- dom->dom_rtattach((void **)rnh,
- dom->dom_rtoffset);
- } else {
- break;
- }
- }
+ if (dom->dom_rtattach == NULL)
+ continue;
+
+ for (table = 0; table < rt_numfibs; table++) {
+ fam = dom->dom_family;
+ if (table != 0 && fam != AF_INET6 && fam != AF_INET)
+ break;
+
+ /*
+ * XXX MRT rtattach will be also called from
+ * vfs_export.c but the offset will be 0 (only for
+ * AF_INET and AF_INET6 which don't need it anyhow).
+ */
+ rnh = rt_tables_get_rnh_ptr(table, fam);
+ if (rnh == NULL)
+ panic("%s: rnh NULL", __func__);
+ dom->dom_rtattach((void **)rnh, dom->dom_rtoffset);
}
}
}
@@ -243,20 +259,19 @@ vnet_route_uninit(const void *unused __unused)
struct radix_node_head **rnh;
for (dom = domains; dom; dom = dom->dom_next) {
- if (dom->dom_rtdetach) {
- for (table = 0; table < rt_numfibs; table++) {
- if ( (fam = dom->dom_family) == AF_INET ||
- table == 0) {
- /* For now only AF_INET has > 1 tbl. */
- rnh = rt_tables_get_rnh_ptr(table, fam);
- if (rnh == NULL)
- panic("%s: rnh NULL", __func__);
- dom->dom_rtdetach((void **)rnh,
- dom->dom_rtoffset);
- } else {
- break;
- }
- }
+ if (dom->dom_rtdetach == NULL)
+ continue;
+
+ for (table = 0; table < rt_numfibs; table++) {
+ fam = dom->dom_family;
+
+ if (table != 0 && fam != AF_INET6 && fam != AF_INET)
+ break;
+
+ rnh = rt_tables_get_rnh_ptr(table, fam);
+ if (rnh == NULL)
+ panic("%s: rnh NULL", __func__);
+ dom->dom_rtdetach((void **)rnh, dom->dom_rtoffset);
}
}
}
@@ -286,7 +301,8 @@ setfib(struct thread *td, struct setfib_args *uap)
void
rtalloc(struct route *ro)
{
- rtalloc_ign_fib(ro, 0UL, 0);
+
+ rtalloc_ign_fib(ro, 0UL, RT_DEFAULT_FIB);
}
void
@@ -306,7 +322,7 @@ rtalloc_ign(struct route *ro, u_long ignore)
RTFREE(rt);
ro->ro_rt = NULL;
}
- ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, 0);
+ ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, RT_DEFAULT_FIB);
if (ro->ro_rt)
RT_UNLOCK(ro->ro_rt);
}
@@ -336,7 +352,8 @@ rtalloc_ign_fib(struct route *ro, u_long ignore, u_int fibnum)
struct rtentry *
rtalloc1(struct sockaddr *dst, int report, u_long ignflags)
{
- return (rtalloc1_fib(dst, report, ignflags, 0));
+
+ return (rtalloc1_fib(dst, report, ignflags, RT_DEFAULT_FIB));
}
struct rtentry *
@@ -344,7 +361,6 @@ rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags,
u_int fibnum)
{
struct radix_node_head *rnh;
- struct rtentry *rt;
struct radix_node *rn;
struct rtentry *newrt;
struct rt_addrinfo info;
@@ -352,17 +368,23 @@ rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags,
int needlock;
KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum"));
- if (dst->sa_family != AF_INET) /* Only INET supports > 1 fib now */
- fibnum = 0;
+ switch (dst->sa_family) {
+ case AF_INET6:
+ case AF_INET:
+ /* We support multiple FIBs. */
+ break;
+ default:
+ fibnum = RT_DEFAULT_FIB;
+ break;
+ }
rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
newrt = NULL;
+ if (rnh == NULL)
+ goto miss;
+
/*
* Look up the address in the table for that Address Family
*/
- if (rnh == NULL) {
- V_rtstat.rts_unreach++;
- goto miss;
- }
needlock = !(ignflags & RTF_RNH_LOCKED);
if (needlock)
RADIX_NODE_HEAD_RLOCK(rnh);
@@ -372,7 +394,7 @@ rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags,
#endif
rn = rnh->rnh_matchaddr(dst, rnh);
if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) {
- newrt = rt = RNTORT(rn);
+ newrt = RNTORT(rn);
RT_LOCK(newrt);
RT_ADDREF(newrt);
if (needlock)
@@ -387,8 +409,9 @@ rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags,
* Which basically means
* "caint get there frm here"
*/
- V_rtstat.rts_unreach++;
miss:
+ V_rtstat.rts_unreach++;
+
if (report) {
/*
* If required, report the failure to the supervising
@@ -397,8 +420,8 @@ miss:
*/
bzero(&info, sizeof(info));
info.rti_info[RTAX_DST] = dst;
- rt_missmsg(msgtype, &info, 0, err);
- }
+ rt_missmsg_fib(msgtype, &info, 0, err, fibnum);
+ }
done:
if (newrt)
RT_LOCK_ASSERT(newrt);
@@ -499,7 +522,8 @@ rtredirect(struct sockaddr *dst,
int flags,
struct sockaddr *src)
{
- rtredirect_fib(dst, gateway, netmask, flags, src, 0);
+
+ rtredirect_fib(dst, gateway, netmask, flags, src, RT_DEFAULT_FIB);
}
void
@@ -545,7 +569,7 @@ rtredirect_fib(struct sockaddr *dst,
goto done;
/*
* Create a new entry if we just got back a wildcard entry
- * or the the lookup failed. This is necessary for hosts
+ * or the lookup failed. This is necessary for hosts
* which use routing redirects generated by smart gateways
* to dynamically build the routing tables.
*/
@@ -622,7 +646,7 @@ out:
info.rti_info[RTAX_GATEWAY] = gateway;
info.rti_info[RTAX_NETMASK] = netmask;
info.rti_info[RTAX_AUTHOR] = src;
- rt_missmsg(RTM_REDIRECT, &info, flags, error);
+ rt_missmsg_fib(RTM_REDIRECT, &info, flags, error, fibnum);
if (ifa != NULL)
ifa_free(ifa);
}
@@ -630,7 +654,8 @@ out:
int
rtioctl(u_long req, caddr_t data)
{
- return (rtioctl_fib(req, data, 0));
+
+ return (rtioctl_fib(req, data, RT_DEFAULT_FIB));
}
/*
@@ -660,7 +685,8 @@ rtioctl_fib(u_long req, caddr_t data, u_int fibnum)
struct ifaddr *
ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway)
{
- return (ifa_ifwithroute_fib(flags, dst, gateway, 0));
+
+ return (ifa_ifwithroute_fib(flags, dst, gateway, RT_DEFAULT_FIB));
}
struct ifaddr *
@@ -745,7 +771,9 @@ rtrequest(int req,
int flags,
struct rtentry **ret_nrt)
{
- return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt, 0));
+
+ return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt,
+ RT_DEFAULT_FIB));
}
int
@@ -784,7 +812,8 @@ rtrequest_fib(int req,
int
rt_getifa(struct rt_addrinfo *info)
{
- return (rt_getifa_fib(info, 0));
+
+ return (rt_getifa_fib(info, RT_DEFAULT_FIB));
}
/*
@@ -1038,11 +1067,20 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
register struct radix_node_head *rnh;
struct ifaddr *ifa;
struct sockaddr *ndst;
+ struct sockaddr_storage mdst;
#define senderr(x) { error = x ; goto bad; }
KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum"));
- if (dst->sa_family != AF_INET) /* Only INET supports > 1 fib now */
- fibnum = 0;
+ switch (dst->sa_family) {
+ case AF_INET6:
+ case AF_INET:
+ /* We support multiple FIBs. */
+ break;
+ default:
+ fibnum = RT_DEFAULT_FIB;
+ break;
+ }
+
/*
* Find the correct routing tree to use for this Address Family
*/
@@ -1064,6 +1102,10 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
switch (req) {
case RTM_DELETE:
+ if (netmask) {
+ rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask);
+ dst = (struct sockaddr *)&mdst;
+ }
#ifdef RADIX_MPATH
if (rn_mpath_capable(rnh)) {
error = rn_mpath_update(req, info, rnh, ret_nrt);
@@ -1144,8 +1186,7 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
rt->rt_flags = RTF_UP | flags;
rt->rt_fibnum = fibnum;
/*
- * Add the gateway. Possibly re-malloc-ing the storage for it
- *
+ * Add the gateway. Possibly re-malloc-ing the storage for it.
*/
RT_LOCK(rt);
if ((error = rt_setgate(rt, dst, gateway)) != 0) {
@@ -1194,11 +1235,17 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
#ifdef FLOWTABLE
rt0 = NULL;
- /* XXX
- * "flow-table" only support IPv4 at the moment.
- */
+ /* "flow-table" only supports IPv6 and IPv4 at the moment. */
+ switch (dst->sa_family) {
+#ifdef notyet
+#ifdef INET6
+ case AF_INET6:
+#endif
+#endif
#ifdef INET
- if (dst->sa_family == AF_INET) {
+ case AF_INET:
+#endif
+#if defined(INET6) || defined(INET)
rn = rnh->rnh_matchaddr(dst, rnh);
if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) {
struct sockaddr *mask;
@@ -1237,9 +1284,9 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
}
}
}
+#endif/* INET6 || INET */
}
-#endif
-#endif
+#endif /* FLOWTABLE */
/* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes);
@@ -1261,9 +1308,20 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
}
#ifdef FLOWTABLE
else if (rt0 != NULL) {
+ switch (dst->sa_family) {
+#ifdef notyet
+#ifdef INET6
+ case AF_INET6:
+ flowtable_route_flush(V_ip6_ft, rt0);
+ break;
+#endif
+#endif
#ifdef INET
- flowtable_route_flush(V_ip_ft, rt0);
+ case AF_INET:
+ flowtable_route_flush(V_ip_ft, rt0);
+ break;
#endif
+ }
RTFREE(rt0);
}
#endif
@@ -1395,8 +1453,17 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
dst = ifa->ifa_addr;
netmask = ifa->ifa_netmask;
}
- if ( dst->sa_family != AF_INET)
- fibnum = 0;
+ if (dst->sa_len == 0)
+ return(EINVAL);
+ switch (dst->sa_family) {
+ case AF_INET6:
+ case AF_INET:
+ /* We support multiple FIBs. */
+ break;
+ default:
+ fibnum = RT_DEFAULT_FIB;
+ break;
+ }
if (fibnum == -1) {
if (rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD) {
#ifndef __rtems__
@@ -1413,8 +1480,6 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
startfib = fibnum;
endfib = fibnum;
}
- if (dst->sa_len == 0)
- return(EINVAL);
/*
* If it's a delete, check that if it exists,
@@ -1438,9 +1503,7 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
* Now go through all the requested tables (fibs) and do the
* requested action. Realistically, this will either be fib 0
* for protocols that don't do multiple tables or all the
- * tables for those that do. XXX For this version only AF_INET.
- * When that changes code should be refactored to protocol
- * independent parts and protocol dependent parts.
+ * tables for those that do.
*/
for ( fibnum = startfib; fibnum <= endfib; fibnum++) {
if (cmd == RTM_DELETE) {
@@ -1494,7 +1557,7 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
*/
bzero((caddr_t)&info, sizeof(info));
info.rti_ifa = ifa;
- info.rti_flags = flags | ifa->ifa_flags;
+ info.rti_flags = flags | (ifa->ifa_flags & ~IFA_RTSELF);
info.rti_info[RTAX_DST] = dst;
/*
* doing this for compatibility reasons
@@ -1514,10 +1577,10 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
#ifdef RADIX_MPATH
/*
* in case address alias finds the first address
- * e.g. ifconfig bge0 192.103.54.246/24
- * e.g. ifconfig bge0 192.103.54.247/24
- * the address set in the route is 192.103.54.246
- * so we need to replace it with 192.103.54.247
+ * e.g. ifconfig bge0 192.0.2.246/24
+ * e.g. ifconfig bge0 192.0.2.247/24
+ * the address set in the route is 192.0.2.246
+ * so we need to replace it with 192.0.2.247
*/
if (memcmp(rt->rt_ifa->ifa_addr,
ifa->ifa_addr, ifa->ifa_addr->sa_len)) {
@@ -1538,7 +1601,7 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
}
RT_ADDREF(rt);
RT_UNLOCK(rt);
- rt_newaddrmsg(cmd, ifa, error, rt);
+ rt_newaddrmsg_fib(cmd, ifa, error, rt, fibnum);
RT_LOCK(rt);
RT_REMREF(rt);
if (cmd == RTM_DELETE) {
@@ -1580,12 +1643,14 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
return (error);
}
+#ifndef BURN_BRIDGES
/* special one for inet internal use. may not use. */
int
rtinit_fib(struct ifaddr *ifa, int cmd, int flags)
{
return (rtinit1(ifa, cmd, flags, -1));
}
+#endif
/*
* Set up a routing table entry, normally
@@ -1595,7 +1660,7 @@ int
rtinit(struct ifaddr *ifa, int cmd, int flags)
{
struct sockaddr *dst;
- int fib = 0;
+ int fib = RT_DEFAULT_FIB;
if (flags & RTF_HOST) {
dst = ifa->ifa_dstaddr;
@@ -1603,7 +1668,12 @@ rtinit(struct ifaddr *ifa, int cmd, int flags)
dst = ifa->ifa_addr;
}
- if (dst->sa_family == AF_INET)
+ switch (dst->sa_family) {
+ case AF_INET6:
+ case AF_INET:
+ /* We do support multiple FIBs. */
fib = -1;
+ break;
+ }
return (rtinit1(ifa, cmd, flags, fib));
}
diff --git a/freebsd/sys/net/route.h b/freebsd/sys/net/route.h
index 4014b3f6..b26ac441 100644
--- a/freebsd/sys/net/route.h
+++ b/freebsd/sys/net/route.h
@@ -86,30 +86,8 @@ struct rt_metrics {
#define RTM_RTTUNIT 1000000 /* units for rtt, rttvar, as units per sec */
#define RTTTOPRHZ(r) ((r) / (RTM_RTTUNIT / PR_SLOWHZ))
-/* MRT compile-time constants */
-#ifdef _KERNEL
- #ifndef ROUTETABLES
- #define RT_NUMFIBS 1
- #define RT_MAXFIBS 1
- #else
- /* while we use 4 bits in the mbuf flags, we are limited to 16 */
- #define RT_MAXFIBS 16
- #if ROUTETABLES > RT_MAXFIBS
- #define RT_NUMFIBS RT_MAXFIBS
- #error "ROUTETABLES defined too big"
- #else
- #if ROUTETABLES == 0
- #define RT_NUMFIBS 1
- #else
- #define RT_NUMFIBS ROUTETABLES
- #endif
- #endif
- #endif
-#endif
-
+#define RT_DEFAULT_FIB 0 /* Explicitly mark fib=0 restricted cases */
extern u_int rt_numfibs; /* number fo usable routing tables */
-extern u_int tunnel_fib; /* tunnels use these */
-extern u_int fwd_fib; /* packets being forwarded use these routes */
/*
* XXX kernel function pointer `rt_output' is visible to applications.
*/
@@ -325,7 +303,6 @@ struct rt_addrinfo {
#define RT_LOCK_INIT(_rt) \
mtx_init(&(_rt)->rt_mtx, "rtentry", NULL, MTX_DEF | MTX_DUPOK)
#define RT_LOCK(_rt) mtx_lock(&(_rt)->rt_mtx)
-#define RT_TRYLOCK(_rt) mtx_trylock(&(_rt)->rt_mtx)
#define RT_UNLOCK(_rt) mtx_unlock(&(_rt)->rt_mtx)
#define RT_LOCK_DESTROY(_rt) mtx_destroy(&(_rt)->rt_mtx)
#define RT_LOCK_ASSERT(_rt) mtx_assert(&(_rt)->rt_mtx, MA_OWNED)
@@ -360,22 +337,6 @@ struct rt_addrinfo {
RTFREE_LOCKED(_rt); \
} while (0)
-#define RT_TEMP_UNLOCK(_rt) do { \
- RT_ADDREF(_rt); \
- RT_UNLOCK(_rt); \
-} while (0)
-
-#define RT_RELOCK(_rt) do { \
- RT_LOCK(_rt); \
- if ((_rt)->rt_refcnt <= 1) { \
- rtfree(_rt); \
- _rt = 0; /* signal that it went away */ \
- } else { \
- RT_REMREF(_rt); \
- /* note that _rt is still valid */ \
- } \
-} while (0)
-
struct radix_node_head *rt_tables_get_rnh(int, int);
struct ifmultiaddr;
@@ -384,7 +345,9 @@ void rt_ieee80211msg(struct ifnet *, int, void *, size_t);
void rt_ifannouncemsg(struct ifnet *, int);
void rt_ifmsg(struct ifnet *);
void rt_missmsg(int, struct rt_addrinfo *, int, int);
+void rt_missmsg_fib(int, struct rt_addrinfo *, int, int, int);
void rt_newaddrmsg(int, struct ifaddr *, int, struct rtentry *);
+void rt_newaddrmsg_fib(int, struct ifaddr *, int, struct rtentry *, int);
void rt_newmaddrmsg(int, struct ifmultiaddr *);
int rt_setgate(struct rtentry *, struct sockaddr *, struct sockaddr *);
void rt_maskedcopy(struct sockaddr *, struct sockaddr *, struct sockaddr *);
@@ -418,8 +381,10 @@ void rtredirect(struct sockaddr *, struct sockaddr *,
int rtrequest(int, struct sockaddr *,
struct sockaddr *, struct sockaddr *, int, struct rtentry **);
+#ifndef BURN_BRIDGES
/* defaults to "all" FIBs */
int rtinit_fib(struct ifaddr *, int, int);
+#endif
/* XXX MRT NEW VERSIONS THAT USE FIBs
* For now the protocol indepedent versions are the same as the AF_INET ones
diff --git a/freebsd/sys/net/rtsock.c b/freebsd/sys/net/rtsock.c
index bfdecf87..beca84da 100644
--- a/freebsd/sys/net/rtsock.c
+++ b/freebsd/sys/net/rtsock.c
@@ -116,7 +116,34 @@ struct if_msghdr32 {
uint16_t ifm_index;
struct if_data32 ifm_data;
};
-#endif
+
+struct if_msghdrl32 {
+ uint16_t ifm_msglen;
+ uint8_t ifm_version;
+ uint8_t ifm_type;
+ int32_t ifm_addrs;
+ int32_t ifm_flags;
+ uint16_t ifm_index;
+ uint16_t _ifm_spare1;
+ uint16_t ifm_len;
+ uint16_t ifm_data_off;
+ struct if_data32 ifm_data;
+};
+
+struct ifa_msghdrl32 {
+ uint16_t ifam_msglen;
+ uint8_t ifam_version;
+ uint8_t ifam_type;
+ int32_t ifam_addrs;
+ int32_t ifam_flags;
+ uint16_t ifam_index;
+ uint16_t _ifam_spare1;
+ uint16_t ifam_len;
+ uint16_t ifam_data_off;
+ int32_t ifam_metric;
+ struct if_data32 ifam_data;
+};
+#endif /* COMPAT_FREEBSD32 */
MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
@@ -124,6 +151,13 @@ MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
static struct sockaddr route_src = { 2, PF_ROUTE, };
static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, };
+/*
+ * Used by rtsock/raw_input callback code to decide whether to filter the update
+ * notification to a socket bound to a particular FIB.
+ */
+#define RTS_FILTER_FIB M_PROTO8
+#define RTS_ALLFIBS -1
+
static struct {
int ip_count; /* attached w/ AF_INET */
int ip6_count; /* attached w/ AF_INET6 */
@@ -161,7 +195,7 @@ static void rt_setmetrics(u_long which, const struct rt_metrics *in,
struct rt_metrics_lite *out);
static void rt_getmetrics(const struct rt_metrics_lite *in,
struct rt_metrics *out);
-static void rt_dispatch(struct mbuf *, const struct sockaddr *);
+static void rt_dispatch(struct mbuf *, sa_family_t);
static struct netisr_handler rtsock_nh = {
.nh_name = "rtsock",
@@ -200,6 +234,31 @@ rts_init(void)
}
SYSINIT(rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rts_init, 0);
+static int
+raw_input_rts_cb(struct mbuf *m, struct sockproto *proto, struct sockaddr *src,
+ struct rawcb *rp)
+{
+ int fibnum;
+
+ KASSERT(m != NULL, ("%s: m is NULL", __func__));
+ KASSERT(proto != NULL, ("%s: proto is NULL", __func__));
+ KASSERT(rp != NULL, ("%s: rp is NULL", __func__));
+
+ /* No filtering requested. */
+ if ((m->m_flags & RTS_FILTER_FIB) == 0)
+ return (0);
+
+ /* Check if it is a rts and the fib matches the one of the socket. */
+ fibnum = M_GETFIB(m);
+ if (proto->sp_family != PF_ROUTE ||
+ rp->rcb_socket == NULL ||
+ rp->rcb_socket->so_fibnum == fibnum)
+ return (0);
+
+ /* Filtering requested and no match, the socket shall be skipped. */
+ return (1);
+}
+
static void
rts_input(struct mbuf *m)
{
@@ -216,7 +275,7 @@ rts_input(struct mbuf *m)
} else
route_proto.sp_protocol = 0;
- raw_input(m, &route_proto, &route_src);
+ raw_input_ext(m, &route_proto, &route_src, raw_input_rts_cb);
}
/*
@@ -428,7 +487,7 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
* Try to find an address on the given outgoing interface
* that belongs to the jail.
*/
- IF_ADDR_LOCK(ifp);
+ IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
struct sockaddr *sa;
sa = ifa->ifa_addr;
@@ -440,7 +499,7 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
break;
}
}
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_RUNLOCK(ifp);
if (!found) {
/*
* As a last resort return the 'default' jail address.
@@ -470,7 +529,7 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
* Try to find an address on the given outgoing interface
* that belongs to the jail.
*/
- IF_ADDR_LOCK(ifp);
+ IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
struct sockaddr *sa;
sa = ifa->ifa_addr;
@@ -483,7 +542,7 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
break;
}
}
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_RUNLOCK(ifp);
if (!found) {
/*
* As a last resort return the 'default' jail address.
@@ -521,6 +580,7 @@ route_output(struct mbuf *m, struct socket *so)
int len, error = 0;
struct ifnet *ifp = NULL;
union sockaddr_union saun;
+ sa_family_t saf = AF_UNSPEC;
#define senderr(e) { error = e; goto flush;}
if (m == NULL || ((m->m_len < sizeof(long)) &&
@@ -561,6 +621,7 @@ route_output(struct mbuf *m, struct socket *so)
(info.rti_info[RTAX_GATEWAY] != NULL &&
info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX))
senderr(EINVAL);
+ saf = info.rti_info[RTAX_DST]->sa_family;
/*
* Verify that the caller has the appropriate privilege; RTM_GET
* is the only operation the non-superuser is allowed.
@@ -898,6 +959,8 @@ flush:
Free(rtm);
}
if (m) {
+ M_SETFIB(m, so->so_fibnum);
+ m->m_flags |= RTS_FILTER_FIB;
if (rp) {
/*
* XXX insure we don't get a copy by
@@ -905,10 +968,10 @@ flush:
*/
unsigned short family = rp->rcb_proto.sp_family;
rp->rcb_proto.sp_family = 0;
- rt_dispatch(m, info.rti_info[RTAX_DST]);
+ rt_dispatch(m, saf);
rp->rcb_proto.sp_family = family;
} else
- rt_dispatch(m, info.rti_info[RTAX_DST]);
+ rt_dispatch(m, saf);
}
}
return (error);
@@ -984,6 +1047,9 @@ rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
return (0);
}
+/*
+ * Used by the routing socket.
+ */
static struct mbuf *
rt_msg1(int type, struct rt_addrinfo *rtinfo)
{
@@ -1051,6 +1117,9 @@ rt_msg1(int type, struct rt_addrinfo *rtinfo)
return (m);
}
+/*
+ * Used by the sysctl code and routing socket.
+ */
static int
rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w)
{
@@ -1064,17 +1133,31 @@ again:
case RTM_DELADDR:
case RTM_NEWADDR:
- len = sizeof(struct ifa_msghdr);
+ if (w != NULL && w->w_op == NET_RT_IFLISTL) {
+#ifdef COMPAT_FREEBSD32
+ if (w->w_req->flags & SCTL_MASK32)
+ len = sizeof(struct ifa_msghdrl32);
+ else
+#endif
+ len = sizeof(struct ifa_msghdrl);
+ } else
+ len = sizeof(struct ifa_msghdr);
break;
case RTM_IFINFO:
#ifdef COMPAT_FREEBSD32
if (w != NULL && w->w_req->flags & SCTL_MASK32) {
- len = sizeof(struct if_msghdr32);
+ if (w->w_op == NET_RT_IFLISTL)
+ len = sizeof(struct if_msghdrl32);
+ else
+ len = sizeof(struct if_msghdr32);
break;
}
#endif
- len = sizeof(struct if_msghdr);
+ if (w != NULL && w->w_op == NET_RT_IFLISTL)
+ len = sizeof(struct if_msghdrl);
+ else
+ len = sizeof(struct if_msghdr);
break;
case RTM_NEWMADDR:
@@ -1137,7 +1220,8 @@ again:
* destination.
*/
void
-rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
+rt_missmsg_fib(int type, struct rt_addrinfo *rtinfo, int flags, int error,
+ int fibnum)
{
struct rt_msghdr *rtm;
struct mbuf *m;
@@ -1148,11 +1232,26 @@ rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
m = rt_msg1(type, rtinfo);
if (m == NULL)
return;
+
+ if (fibnum != RTS_ALLFIBS) {
+ KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: fibnum out "
+ "of range 0 <= %d < %d", __func__, fibnum, rt_numfibs));
+ M_SETFIB(m, fibnum);
+ m->m_flags |= RTS_FILTER_FIB;
+ }
+
rtm = mtod(m, struct rt_msghdr *);
rtm->rtm_flags = RTF_DONE | flags;
rtm->rtm_errno = error;
rtm->rtm_addrs = rtinfo->rti_addrs;
- rt_dispatch(m, sa);
+ rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
+}
+
+void
+rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
+{
+
+ rt_missmsg_fib(type, rtinfo, flags, error, RTS_ALLFIBS);
}
/*
@@ -1177,7 +1276,7 @@ rt_ifmsg(struct ifnet *ifp)
ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
ifm->ifm_data = ifp->if_data;
ifm->ifm_addrs = 0;
- rt_dispatch(m, NULL);
+ rt_dispatch(m, AF_UNSPEC);
}
/*
@@ -1189,7 +1288,8 @@ rt_ifmsg(struct ifnet *ifp)
* copies of it.
*/
void
-rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
+rt_newaddrmsg_fib(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt,
+ int fibnum)
{
struct rt_addrinfo info;
struct sockaddr *sa = NULL;
@@ -1247,10 +1347,24 @@ rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
rtm->rtm_errno = error;
rtm->rtm_addrs = info.rti_addrs;
}
- rt_dispatch(m, sa);
+ if (fibnum != RTS_ALLFIBS) {
+ KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: "
+ "fibnum out of range 0 <= %d < %d", __func__,
+ fibnum, rt_numfibs));
+ M_SETFIB(m, fibnum);
+ m->m_flags |= RTS_FILTER_FIB;
+ }
+ rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
}
}
+void
+rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
+{
+
+ rt_newaddrmsg_fib(cmd, ifa, error, rt, RTS_ALLFIBS);
+}
+
/*
* This is the analogue to the rt_newaddrmsg which performs the same
* function but for multicast group memberhips. This is easier since
@@ -1283,7 +1397,7 @@ rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
__func__));
ifmam->ifmam_index = ifp->if_index;
ifmam->ifmam_addrs = info.rti_addrs;
- rt_dispatch(m, ifma->ifma_addr);
+ rt_dispatch(m, ifma->ifma_addr ? ifma->ifma_addr->sa_family : AF_UNSPEC);
}
static struct mbuf *
@@ -1343,7 +1457,7 @@ rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len)
if (m->m_flags & M_PKTHDR)
m->m_pkthdr.len += data_len;
mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len;
- rt_dispatch(m, NULL);
+ rt_dispatch(m, AF_UNSPEC);
}
}
@@ -1359,11 +1473,11 @@ rt_ifannouncemsg(struct ifnet *ifp, int what)
m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info);
if (m != NULL)
- rt_dispatch(m, NULL);
+ rt_dispatch(m, AF_UNSPEC);
}
static void
-rt_dispatch(struct mbuf *m, const struct sockaddr *sa)
+rt_dispatch(struct mbuf *m, sa_family_t saf)
{
struct m_tag *tag;
@@ -1372,14 +1486,14 @@ rt_dispatch(struct mbuf *m, const struct sockaddr *sa)
* use when injecting the mbuf into the routing socket buffer from
* the netisr.
*/
- if (sa != NULL) {
+ if (saf != AF_UNSPEC) {
tag = m_tag_get(PACKET_TAG_RTSOCKFAM, sizeof(unsigned short),
M_NOWAIT);
if (tag == NULL) {
m_freem(m);
return;
}
- *(unsigned short *)(tag + 1) = sa->sa_family;
+ *(unsigned short *)(tag + 1) = saf;
m_tag_prepend(m, tag);
}
#ifdef VIMAGE
@@ -1473,6 +1587,127 @@ copy_ifdata32(struct if_data *src, struct if_data32 *dst)
#endif
static int
+sysctl_iflist_ifml(struct ifnet *ifp, struct rt_addrinfo *info,
+ struct walkarg *w, int len)
+{
+ struct if_msghdrl *ifm;
+
+#ifdef COMPAT_FREEBSD32
+ if (w->w_req->flags & SCTL_MASK32) {
+ struct if_msghdrl32 *ifm32;
+
+ ifm32 = (struct if_msghdrl32 *)w->w_tmem;
+ ifm32->ifm_addrs = info->rti_addrs;
+ ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
+ ifm32->ifm_index = ifp->if_index;
+ ifm32->_ifm_spare1 = 0;
+ ifm32->ifm_len = sizeof(*ifm32);
+ ifm32->ifm_data_off = offsetof(struct if_msghdrl32, ifm_data);
+
+ copy_ifdata32(&ifp->if_data, &ifm32->ifm_data);
+
+ return (SYSCTL_OUT(w->w_req, (caddr_t)ifm32, len));
+ }
+#endif
+ ifm = (struct if_msghdrl *)w->w_tmem;
+ ifm->ifm_addrs = info->rti_addrs;
+ ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
+ ifm->ifm_index = ifp->if_index;
+ ifm->_ifm_spare1 = 0;
+ ifm->ifm_len = sizeof(*ifm);
+ ifm->ifm_data_off = offsetof(struct if_msghdrl, ifm_data);
+
+ ifm->ifm_data = ifp->if_data;
+
+ return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
+}
+
+static int
+sysctl_iflist_ifm(struct ifnet *ifp, struct rt_addrinfo *info,
+ struct walkarg *w, int len)
+{
+ struct if_msghdr *ifm;
+
+#ifdef COMPAT_FREEBSD32
+ if (w->w_req->flags & SCTL_MASK32) {
+ struct if_msghdr32 *ifm32;
+
+ ifm32 = (struct if_msghdr32 *)w->w_tmem;
+ ifm32->ifm_addrs = info->rti_addrs;
+ ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
+ ifm32->ifm_index = ifp->if_index;
+
+ copy_ifdata32(&ifp->if_data, &ifm32->ifm_data);
+
+ return (SYSCTL_OUT(w->w_req, (caddr_t)ifm32, len));
+ }
+#endif
+ ifm = (struct if_msghdr *)w->w_tmem;
+ ifm->ifm_addrs = info->rti_addrs;
+ ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
+ ifm->ifm_index = ifp->if_index;
+
+ ifm->ifm_data = ifp->if_data;
+
+ return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
+}
+
+static int
+sysctl_iflist_ifaml(struct ifaddr *ifa, struct rt_addrinfo *info,
+ struct walkarg *w, int len)
+{
+ struct ifa_msghdrl *ifam;
+
+#ifdef COMPAT_FREEBSD32
+ if (w->w_req->flags & SCTL_MASK32) {
+ struct ifa_msghdrl32 *ifam32;
+
+ ifam32 = (struct ifa_msghdrl32 *)w->w_tmem;
+ ifam32->ifam_addrs = info->rti_addrs;
+ ifam32->ifam_flags = ifa->ifa_flags;
+ ifam32->ifam_index = ifa->ifa_ifp->if_index;
+ ifam32->_ifam_spare1 = 0;
+ ifam32->ifam_len = sizeof(*ifam32);
+ ifam32->ifam_data_off =
+ offsetof(struct ifa_msghdrl32, ifam_data);
+ ifam32->ifam_metric = ifa->ifa_metric;
+
+ copy_ifdata32(&ifa->ifa_ifp->if_data, &ifam32->ifam_data);
+
+ return (SYSCTL_OUT(w->w_req, (caddr_t)ifam32, len));
+ }
+#endif
+
+ ifam = (struct ifa_msghdrl *)w->w_tmem;
+ ifam->ifam_addrs = info->rti_addrs;
+ ifam->ifam_flags = ifa->ifa_flags;
+ ifam->ifam_index = ifa->ifa_ifp->if_index;
+ ifam->_ifam_spare1 = 0;
+ ifam->ifam_len = sizeof(*ifam);
+ ifam->ifam_data_off = offsetof(struct ifa_msghdrl, ifam_data);
+ ifam->ifam_metric = ifa->ifa_metric;
+
+ ifam->ifam_data = ifa->if_data;
+
+ return (SYSCTL_OUT(w->w_req, w->w_tmem, len));
+}
+
+static int
+sysctl_iflist_ifam(struct ifaddr *ifa, struct rt_addrinfo *info,
+ struct walkarg *w, int len)
+{
+ struct ifa_msghdr *ifam;
+
+ ifam = (struct ifa_msghdr *)w->w_tmem;
+ ifam->ifam_addrs = info->rti_addrs;
+ ifam->ifam_flags = ifa->ifa_flags;
+ ifam->ifam_index = ifa->ifa_ifp->if_index;
+ ifam->ifam_metric = ifa->ifa_metric;
+
+ return (SYSCTL_OUT(w->w_req, w->w_tmem, len));
+}
+
+static int
sysctl_iflist(int af, struct walkarg *w)
{
struct ifnet *ifp;
@@ -1485,38 +1720,16 @@ sysctl_iflist(int af, struct walkarg *w)
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if (w->w_arg && w->w_arg != ifp->if_index)
continue;
- IF_ADDR_LOCK(ifp);
+ IF_ADDR_RLOCK(ifp);
ifa = ifp->if_addr;
info.rti_info[RTAX_IFP] = ifa->ifa_addr;
len = rt_msg2(RTM_IFINFO, &info, NULL, w);
info.rti_info[RTAX_IFP] = NULL;
if (w->w_req && w->w_tmem) {
- struct if_msghdr *ifm;
-
-#ifdef COMPAT_FREEBSD32
- if (w->w_req->flags & SCTL_MASK32) {
- struct if_msghdr32 *ifm32;
-
- ifm32 = (struct if_msghdr32 *)w->w_tmem;
- ifm32->ifm_index = ifp->if_index;
- ifm32->ifm_flags = ifp->if_flags |
- ifp->if_drv_flags;
- copy_ifdata32(&ifp->if_data, &ifm32->ifm_data);
- ifm32->ifm_addrs = info.rti_addrs;
- error = SYSCTL_OUT(w->w_req, (caddr_t)ifm32,
- len);
- goto sysctl_out;
- }
-#endif
- ifm = (struct if_msghdr *)w->w_tmem;
- ifm->ifm_index = ifp->if_index;
- ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
- ifm->ifm_data = ifp->if_data;
- ifm->ifm_addrs = info.rti_addrs;
- error = SYSCTL_OUT(w->w_req, (caddr_t)ifm, len);
-#ifdef COMPAT_FREEBSD32
- sysctl_out:
-#endif
+ if (w->w_op == NET_RT_IFLISTL)
+ error = sysctl_iflist_ifml(ifp, &info, w, len);
+ else
+ error = sysctl_iflist_ifm(ifp, &info, w, len);
if (error)
goto done;
}
@@ -1531,25 +1744,23 @@ sysctl_iflist(int af, struct walkarg *w)
info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
len = rt_msg2(RTM_NEWADDR, &info, NULL, w);
if (w->w_req && w->w_tmem) {
- struct ifa_msghdr *ifam;
-
- ifam = (struct ifa_msghdr *)w->w_tmem;
- ifam->ifam_index = ifa->ifa_ifp->if_index;
- ifam->ifam_flags = ifa->ifa_flags;
- ifam->ifam_metric = ifa->ifa_metric;
- ifam->ifam_addrs = info.rti_addrs;
- error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
+ if (w->w_op == NET_RT_IFLISTL)
+ error = sysctl_iflist_ifaml(ifa, &info,
+ w, len);
+ else
+ error = sysctl_iflist_ifam(ifa, &info,
+ w, len);
if (error)
goto done;
}
}
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_RUNLOCK(ifp);
info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
info.rti_info[RTAX_BRD] = NULL;
}
done:
if (ifp != NULL)
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_RUNLOCK(ifp);
IFNET_RUNLOCK();
return (error);
}
@@ -1570,7 +1781,7 @@ sysctl_ifmalist(int af, struct walkarg *w)
continue;
ifa = ifp->if_addr;
info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL;
- IF_ADDR_LOCK(ifp);
+ IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
if (af && af != ifma->ifma_addr->sa_family)
continue;
@@ -1591,12 +1802,12 @@ sysctl_ifmalist(int af, struct walkarg *w)
ifmam->ifmam_addrs = info.rti_addrs;
error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
if (error) {
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_RUNLOCK(ifp);
goto done;
}
}
}
- IF_ADDR_UNLOCK(ifp);
+ IF_ADDR_RUNLOCK(ifp);
}
done:
IFNET_RUNLOCK();
@@ -1662,16 +1873,17 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS)
rnh = rt_tables_get_rnh(BSD_DEFAULT_FIB, i);
#endif /* __rtems__ */
if (rnh != NULL) {
- RADIX_NODE_HEAD_LOCK(rnh);
+ RADIX_NODE_HEAD_RLOCK(rnh);
error = rnh->rnh_walktree(rnh,
sysctl_dumpentry, &w);
- RADIX_NODE_HEAD_UNLOCK(rnh);
+ RADIX_NODE_HEAD_RUNLOCK(rnh);
} else if (af != 0)
error = EAFNOSUPPORT;
}
break;
case NET_RT_IFLIST:
+ case NET_RT_IFLISTL:
error = sysctl_iflist(af, &w);
break;
diff --git a/freebsd/sys/net/vnet.h b/freebsd/sys/net/vnet.h
index d3c426a4..8ef1c00d 100644
--- a/freebsd/sys/net/vnet.h
+++ b/freebsd/sys/net/vnet.h
@@ -116,22 +116,27 @@ void vnet_destroy(struct vnet *vnet);
* Various macros -- get and set the current network stack, but also
* assertions.
*/
+#if defined(INVARIANTS) || defined(VNET_DEBUG)
+#define VNET_ASSERT(exp, msg) do { \
+ if (!(exp)) \
+ panic msg; \
+} while (0)
+#else
+#define VNET_ASSERT(exp, msg) do { \
+} while (0)
+#endif
+
#ifdef VNET_DEBUG
void vnet_log_recursion(struct vnet *, const char *, int);
-#define VNET_ASSERT(condition) \
- if (!(condition)) { \
- printf("VNET_ASSERT @ %s:%d %s():\n", \
- __FILE__, __LINE__, __FUNCTION__); \
- panic(#condition); \
- }
-
#define CURVNET_SET_QUIET(arg) \
- VNET_ASSERT((arg)->vnet_magic_n == VNET_MAGIC_N); \
+ VNET_ASSERT((arg) != NULL && (arg)->vnet_magic_n == VNET_MAGIC_N, \
+ ("CURVNET_SET at %s:%d %s() curvnet=%p vnet=%p", \
+ __FILE__, __LINE__, __func__, curvnet, (arg))); \
struct vnet *saved_vnet = curvnet; \
const char *saved_vnet_lpush = curthread->td_vnet_lpush; \
curvnet = arg; \
- curthread->td_vnet_lpush = __FUNCTION__;
+ curthread->td_vnet_lpush = __func__;
#define CURVNET_SET_VERBOSE(arg) \
CURVNET_SET_QUIET(arg) \
@@ -141,21 +146,31 @@ void vnet_log_recursion(struct vnet *, const char *, int);
#define CURVNET_SET(arg) CURVNET_SET_VERBOSE(arg)
#define CURVNET_RESTORE() \
- VNET_ASSERT(saved_vnet == NULL || \
- saved_vnet->vnet_magic_n == VNET_MAGIC_N); \
+ VNET_ASSERT(curvnet != NULL && (saved_vnet == NULL || \
+ saved_vnet->vnet_magic_n == VNET_MAGIC_N), \
+ ("CURVNET_RESTORE at %s:%d %s() curvnet=%p saved_vnet=%p", \
+ __FILE__, __LINE__, __func__, curvnet, saved_vnet)); \
curvnet = saved_vnet; \
curthread->td_vnet_lpush = saved_vnet_lpush;
#else /* !VNET_DEBUG */
-#define VNET_ASSERT(condition)
-#define CURVNET_SET(arg) \
+#define CURVNET_SET_QUIET(arg) \
+ VNET_ASSERT((arg) != NULL && (arg)->vnet_magic_n == VNET_MAGIC_N, \
+ ("CURVNET_SET at %s:%d %s() curvnet=%p vnet=%p", \
+ __FILE__, __LINE__, __func__, curvnet, (arg))); \
struct vnet *saved_vnet = curvnet; \
curvnet = arg;
-#define CURVNET_SET_VERBOSE(arg) CURVNET_SET(arg)
-#define CURVNET_SET_QUIET(arg) CURVNET_SET(arg)
+#define CURVNET_SET_VERBOSE(arg) \
+ CURVNET_SET_QUIET(arg)
+
+#define CURVNET_SET(arg) CURVNET_SET_VERBOSE(arg)
#define CURVNET_RESTORE() \
+ VNET_ASSERT(curvnet != NULL && (saved_vnet == NULL || \
+ saved_vnet->vnet_magic_n == VNET_MAGIC_N), \
+ ("CURVNET_RESTORE at %s:%d %s() curvnet=%p saved_vnet=%p", \
+ __FILE__, __LINE__, __func__, curvnet, saved_vnet)); \
curvnet = saved_vnet;
#endif /* VNET_DEBUG */
@@ -191,15 +206,6 @@ extern struct sx vnet_sxlock;
* Virtual network stack memory allocator, which allows global variables to
* be automatically instantiated for each network stack instance.
*/
-__asm__(
-#if defined(__arm__)
- ".section " VNET_SETNAME ", \"aw\", %progbits\n"
-#else
- ".section " VNET_SETNAME ", \"aw\", @progbits\n"
-#endif
- "\t.p2align " __XSTRING(CACHE_LINE_SHIFT) "\n"
- "\t.previous");
-
#define VNET_NAME(n) vnet_entry_##n
#define VNET_DECLARE(t, n) extern t VNET_NAME(n)
#define VNET_DEFINE(t, n) t VNET_NAME(n) __section(VNET_SETNAME) __used
@@ -357,7 +363,7 @@ do { \
*/
#define curvnet NULL
-#define VNET_ASSERT(condition)
+#define VNET_ASSERT(exp, msg)
#define CURVNET_SET(arg)
#define CURVNET_SET_QUIET(arg)
#define CURVNET_RESTORE()