summaryrefslogtreecommitdiffstats
path: root/freebsd/sys/net
diff options
context:
space:
mode:
authorSebastian Huber <sebastian.huber@embedded-brains.de>2019-09-24 11:05:03 +0200
committerSebastian Huber <sebastian.huber@embedded-brains.de>2019-11-13 10:47:04 +0100
commita5ddb0ea69f21c16b7697a935d7a0c16bb3cffcf (patch)
treedb091fb0f7d091804482156c9f3f55879ac93d5b /freebsd/sys/net
parenttest/syscalls01: Fix sporadic test failures (diff)
downloadrtems-libbsd-a5ddb0ea69f21c16b7697a935d7a0c16bb3cffcf.tar.bz2
Update to FreeBSD head 2019-09-24
Git mirror commit 6b0307a0a5184339393f555d5d424190d8a8277a.
Diffstat (limited to 'freebsd/sys/net')
-rw-r--r--freebsd/sys/net/altq/altq_cbq.c5
-rw-r--r--freebsd/sys/net/altq/altq_codel.c5
-rw-r--r--freebsd/sys/net/altq/altq_fairq.c5
-rw-r--r--freebsd/sys/net/altq/altq_hfsc.c15
-rw-r--r--freebsd/sys/net/altq/altq_hfsc.h1
-rw-r--r--freebsd/sys/net/altq/altq_priq.c5
-rw-r--r--freebsd/sys/net/altq/altq_subr.c21
-rw-r--r--freebsd/sys/net/altq/altq_var.h12
-rw-r--r--freebsd/sys/net/bpf.c778
-rw-r--r--freebsd/sys/net/bpf.h16
-rw-r--r--freebsd/sys/net/bpf_buffer.c13
-rw-r--r--freebsd/sys/net/bpfdesc.h8
-rw-r--r--freebsd/sys/net/bridgestp.c8
-rw-r--r--freebsd/sys/net/ethernet.h5
-rw-r--r--freebsd/sys/net/ieee8023ad_lacp.c49
-rw-r--r--freebsd/sys/net/ieee8023ad_lacp.h9
-rw-r--r--freebsd/sys/net/ieee_oui.h85
-rw-r--r--freebsd/sys/net/if.c193
-rw-r--r--freebsd/sys/net/if_arp.h3
-rw-r--r--freebsd/sys/net/if_bridge.c208
-rw-r--r--freebsd/sys/net/if_clone.h4
-rw-r--r--freebsd/sys/net/if_dead.c18
-rw-r--r--freebsd/sys/net/if_enc.c10
-rw-r--r--freebsd/sys/net/if_ethersubr.c104
-rw-r--r--freebsd/sys/net/if_gre.c153
-rw-r--r--freebsd/sys/net/if_gre.h66
-rw-r--r--freebsd/sys/net/if_lagg.c242
-rw-r--r--freebsd/sys/net/if_lagg.h6
-rw-r--r--freebsd/sys/net/if_llatbl.c11
-rw-r--r--freebsd/sys/net/if_llatbl.h2
-rw-r--r--freebsd/sys/net/if_spppsubr.c20
-rw-r--r--freebsd/sys/net/if_stf.c1
-rw-r--r--freebsd/sys/net/if_tap.c1133
-rw-r--r--freebsd/sys/net/if_tap.h24
-rw-r--r--freebsd/sys/net/if_tapvar.h71
-rw-r--r--freebsd/sys/net/if_tun.c1055
-rw-r--r--freebsd/sys/net/if_tun.h1
-rw-r--r--freebsd/sys/net/if_tuntap.c1734
-rw-r--r--freebsd/sys/net/if_var.h75
-rw-r--r--freebsd/sys/net/if_vlan.c282
-rw-r--r--freebsd/sys/net/if_vlan_var.h4
-rw-r--r--freebsd/sys/net/iflib.h65
-rw-r--r--freebsd/sys/net/netisr.c1
-rw-r--r--freebsd/sys/net/pfil.c882
-rw-r--r--freebsd/sys/net/pfil.h233
-rw-r--r--freebsd/sys/net/pfvar.h47
-rw-r--r--freebsd/sys/net/route.c86
-rw-r--r--freebsd/sys/net/route.h1
-rw-r--r--freebsd/sys/net/route_var.h1
-rw-r--r--freebsd/sys/net/rtsock.c448
-rw-r--r--freebsd/sys/net/sff8472.h79
-rw-r--r--freebsd/sys/net/vnet.h3
52 files changed, 4664 insertions, 3642 deletions
diff --git a/freebsd/sys/net/altq/altq_cbq.c b/freebsd/sys/net/altq/altq_cbq.c
index 015e35bf..7c99f8a8 100644
--- a/freebsd/sys/net/altq/altq_cbq.c
+++ b/freebsd/sys/net/altq/altq_cbq.c
@@ -225,12 +225,11 @@ cbq_pfattach(struct pf_altq *a)
}
int
-cbq_add_altq(struct pf_altq *a)
+cbq_add_altq(struct ifnet *ifp, struct pf_altq *a)
{
cbq_state_t *cbqp;
- struct ifnet *ifp;
- if ((ifp = ifunit(a->ifname)) == NULL)
+ if (ifp == NULL)
return (EINVAL);
if (!ALTQ_IS_READY(&ifp->if_snd))
return (ENODEV);
diff --git a/freebsd/sys/net/altq/altq_codel.c b/freebsd/sys/net/altq/altq_codel.c
index 4a55cdbe..375fc382 100644
--- a/freebsd/sys/net/altq/altq_codel.c
+++ b/freebsd/sys/net/altq/altq_codel.c
@@ -91,13 +91,12 @@ codel_pfattach(struct pf_altq *a)
}
int
-codel_add_altq(struct pf_altq *a)
+codel_add_altq(struct ifnet *ifp, struct pf_altq *a)
{
struct codel_if *cif;
- struct ifnet *ifp;
struct codel_opts *opts;
- if ((ifp = ifunit(a->ifname)) == NULL)
+ if (ifp == NULL)
return (EINVAL);
if (!ALTQ_IS_READY(&ifp->if_snd))
return (ENODEV);
diff --git a/freebsd/sys/net/altq/altq_fairq.c b/freebsd/sys/net/altq/altq_fairq.c
index a1bc3fdb..5b7646e2 100644
--- a/freebsd/sys/net/altq/altq_fairq.c
+++ b/freebsd/sys/net/altq/altq_fairq.c
@@ -150,12 +150,11 @@ fairq_pfattach(struct pf_altq *a)
}
int
-fairq_add_altq(struct pf_altq *a)
+fairq_add_altq(struct ifnet *ifp, struct pf_altq *a)
{
struct fairq_if *pif;
- struct ifnet *ifp;
- if ((ifp = ifunit(a->ifname)) == NULL)
+ if (ifp == NULL)
return (EINVAL);
if (!ALTQ_IS_READY(&ifp->if_snd))
return (ENODEV);
diff --git a/freebsd/sys/net/altq/altq_hfsc.c b/freebsd/sys/net/altq/altq_hfsc.c
index 202915a8..024055e3 100644
--- a/freebsd/sys/net/altq/altq_hfsc.c
+++ b/freebsd/sys/net/altq/altq_hfsc.c
@@ -161,12 +161,11 @@ hfsc_pfattach(struct pf_altq *a)
}
int
-hfsc_add_altq(struct pf_altq *a)
+hfsc_add_altq(struct ifnet *ifp, struct pf_altq *a)
{
struct hfsc_if *hif;
- struct ifnet *ifp;
- if ((ifp = ifunit(a->ifname)) == NULL)
+ if (ifp == NULL)
return (EINVAL);
if (!ALTQ_IS_READY(&ifp->if_snd))
return (ENODEV);
@@ -508,6 +507,7 @@ hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc,
goto err_ret;
}
}
+ cl->cl_slot = i;
if (flags & HFCF_DEFAULTCLASS)
hif->hif_defaultclass = cl;
@@ -560,7 +560,7 @@ hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc,
static int
hfsc_class_destroy(struct hfsc_class *cl)
{
- int i, s;
+ int s;
if (cl == NULL)
return (0);
@@ -591,12 +591,7 @@ hfsc_class_destroy(struct hfsc_class *cl)
ASSERT(p != NULL);
}
- for (i = 0; i < HFSC_MAX_CLASSES; i++)
- if (cl->cl_hif->hif_class_tbl[i] == cl) {
- cl->cl_hif->hif_class_tbl[i] = NULL;
- break;
- }
-
+ cl->cl_hif->hif_class_tbl[cl->cl_slot] = NULL;
cl->cl_hif->hif_classes--;
IFQ_UNLOCK(cl->cl_hif->hif_ifq);
splx(s);
diff --git a/freebsd/sys/net/altq/altq_hfsc.h b/freebsd/sys/net/altq/altq_hfsc.h
index fa4aa811..c43c6671 100644
--- a/freebsd/sys/net/altq/altq_hfsc.h
+++ b/freebsd/sys/net/altq/altq_hfsc.h
@@ -214,6 +214,7 @@ struct runtime_sc {
struct hfsc_class {
u_int cl_id; /* class id (just for debug) */
+ u_int cl_slot; /* slot in hif class table */
u_int32_t cl_handle; /* class handle */
struct hfsc_if *cl_hif; /* back pointer to struct hfsc_if */
int cl_flags; /* misc flags */
diff --git a/freebsd/sys/net/altq/altq_priq.c b/freebsd/sys/net/altq/altq_priq.c
index 5e77aef2..0090d8fa 100644
--- a/freebsd/sys/net/altq/altq_priq.c
+++ b/freebsd/sys/net/altq/altq_priq.c
@@ -97,12 +97,11 @@ priq_pfattach(struct pf_altq *a)
}
int
-priq_add_altq(struct pf_altq *a)
+priq_add_altq(struct ifnet * ifp, struct pf_altq *a)
{
struct priq_if *pif;
- struct ifnet *ifp;
- if ((ifp = ifunit(a->ifname)) == NULL)
+ if (ifp == NULL)
return (EINVAL);
if (!ALTQ_IS_READY(&ifp->if_snd))
return (ENODEV);
diff --git a/freebsd/sys/net/altq/altq_subr.c b/freebsd/sys/net/altq/altq_subr.c
index 61aaec59..151bdf10 100644
--- a/freebsd/sys/net/altq/altq_subr.c
+++ b/freebsd/sys/net/altq/altq_subr.c
@@ -412,11 +412,11 @@ tbr_timeout(arg)
{
VNET_ITERATOR_DECL(vnet_iter);
struct ifnet *ifp;
- int active, s;
+ struct epoch_tracker et;
+ int active;
active = 0;
- s = splnet();
- IFNET_RLOCK_NOSLEEP();
+ NET_EPOCH_ENTER(et);
VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
@@ -433,8 +433,7 @@ tbr_timeout(arg)
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK_NOSLEEP();
- IFNET_RUNLOCK_NOSLEEP();
- splx(s);
+ NET_EPOCH_EXIT(et);
if (active > 0)
CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
else
@@ -523,7 +522,7 @@ altq_pfdetach(struct pf_altq *a)
* malloc with WAITOK, also it is not yet clear which lock to use.
*/
int
-altq_add(struct pf_altq *a)
+altq_add(struct ifnet *ifp, struct pf_altq *a)
{
int error = 0;
@@ -538,27 +537,27 @@ altq_add(struct pf_altq *a)
switch (a->scheduler) {
#ifdef ALTQ_CBQ
case ALTQT_CBQ:
- error = cbq_add_altq(a);
+ error = cbq_add_altq(ifp, a);
break;
#endif
#ifdef ALTQ_PRIQ
case ALTQT_PRIQ:
- error = priq_add_altq(a);
+ error = priq_add_altq(ifp, a);
break;
#endif
#ifdef ALTQ_HFSC
case ALTQT_HFSC:
- error = hfsc_add_altq(a);
+ error = hfsc_add_altq(ifp, a);
break;
#endif
#ifdef ALTQ_FAIRQ
case ALTQT_FAIRQ:
- error = fairq_add_altq(a);
+ error = fairq_add_altq(ifp, a);
break;
#endif
#ifdef ALTQ_CODEL
case ALTQT_CODEL:
- error = codel_add_altq(a);
+ error = codel_add_altq(ifp, a);
break;
#endif
default:
diff --git a/freebsd/sys/net/altq/altq_var.h b/freebsd/sys/net/altq/altq_var.h
index 47326a03..f711e093 100644
--- a/freebsd/sys/net/altq/altq_var.h
+++ b/freebsd/sys/net/altq/altq_var.h
@@ -199,40 +199,40 @@ int tbr_set(struct ifaltq *, struct tb_profile *);
int altq_pfattach(struct pf_altq *);
int altq_pfdetach(struct pf_altq *);
-int altq_add(struct pf_altq *);
+int altq_add(struct ifnet *, struct pf_altq *);
int altq_remove(struct pf_altq *);
int altq_add_queue(struct pf_altq *);
int altq_remove_queue(struct pf_altq *);
int altq_getqstats(struct pf_altq *, void *, int *, int);
int cbq_pfattach(struct pf_altq *);
-int cbq_add_altq(struct pf_altq *);
+int cbq_add_altq(struct ifnet *, struct pf_altq *);
int cbq_remove_altq(struct pf_altq *);
int cbq_add_queue(struct pf_altq *);
int cbq_remove_queue(struct pf_altq *);
int cbq_getqstats(struct pf_altq *, void *, int *, int);
int codel_pfattach(struct pf_altq *);
-int codel_add_altq(struct pf_altq *);
+int codel_add_altq(struct ifnet *, struct pf_altq *);
int codel_remove_altq(struct pf_altq *);
int codel_getqstats(struct pf_altq *, void *, int *, int);
int priq_pfattach(struct pf_altq *);
-int priq_add_altq(struct pf_altq *);
+int priq_add_altq(struct ifnet *, struct pf_altq *);
int priq_remove_altq(struct pf_altq *);
int priq_add_queue(struct pf_altq *);
int priq_remove_queue(struct pf_altq *);
int priq_getqstats(struct pf_altq *, void *, int *, int);
int hfsc_pfattach(struct pf_altq *);
-int hfsc_add_altq(struct pf_altq *);
+int hfsc_add_altq(struct ifnet *, struct pf_altq *);
int hfsc_remove_altq(struct pf_altq *);
int hfsc_add_queue(struct pf_altq *);
int hfsc_remove_queue(struct pf_altq *);
int hfsc_getqstats(struct pf_altq *, void *, int *, int);
int fairq_pfattach(struct pf_altq *);
-int fairq_add_altq(struct pf_altq *);
+int fairq_add_altq(struct ifnet *, struct pf_altq *);
int fairq_remove_altq(struct pf_altq *);
int fairq_add_queue(struct pf_altq *);
int fairq_remove_queue(struct pf_altq *);
diff --git a/freebsd/sys/net/bpf.c b/freebsd/sys/net/bpf.c
index edee632b..101ac4e0 100644
--- a/freebsd/sys/net/bpf.c
+++ b/freebsd/sys/net/bpf.c
@@ -5,6 +5,7 @@
*
* Copyright (c) 1990, 1991, 1993
* The Regents of the University of California. All rights reserved.
+ * Copyright (c) 2019 Andrey V. Elsukov <ae@FreeBSD.org>
*
* This code is derived from the Stanford/CMU enet packet filter,
* (net/enet.c) distributed as part of 4.3BSD, and code contributed
@@ -45,16 +46,16 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_ddb.h>
#include <rtems/bsd/local/opt_netgraph.h>
-#include <sys/types.h>
#include <sys/param.h>
-#include <sys/lock.h>
-#include <sys/rwlock.h>
-#include <sys/systm.h>
#include <sys/conf.h>
+#include <sys/eventhandler.h>
#include <sys/fcntl.h>
#include <sys/jail.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
+#include <sys/mutex.h>
#include <sys/time.h>
#include <sys/priv.h>
#include <sys/proc.h>
@@ -64,6 +65,7 @@ __FBSDID("$FreeBSD$");
#include <sys/ttycom.h>
#include <sys/uio.h>
#include <sys/sysent.h>
+#include <sys/systm.h>
#include <sys/event.h>
#include <sys/file.h>
@@ -99,14 +101,16 @@ __FBSDID("$FreeBSD$");
#include <security/mac/mac_framework.h>
#ifdef __rtems__
#include <rtems/imfs.h>
+#undef devfs_get_cdevpriv
#define devfs_get_cdevpriv(x) 0
+#undef devtoname
#define devtoname(x) "bpf"
#endif /* __rtems__ */
MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
static struct bpf_if_ext dead_bpf_if = {
- .bif_dlist = LIST_HEAD_INITIALIZER()
+ .bif_dlist = CK_LIST_HEAD_INITIALIZER()
};
struct bpf_if {
@@ -115,19 +119,22 @@ struct bpf_if {
struct bpf_if_ext bif_ext; /* public members */
u_int bif_dlt; /* link layer type */
u_int bif_hdrlen; /* length of link header */
+ struct bpfd_list bif_wlist; /* writer-only list */
struct ifnet *bif_ifp; /* corresponding interface */
- struct rwlock bif_lock; /* interface lock */
- LIST_HEAD(, bpf_d) bif_wlist; /* writer-only list */
- int bif_flags; /* Interface flags */
struct bpf_if **bif_bpf; /* Pointer to pointer to us */
+ volatile u_int bif_refcnt;
+ struct epoch_context epoch_ctx;
};
CTASSERT(offsetof(struct bpf_if, bif_ext) == 0);
-#define BPFIF_RLOCK(bif) rw_rlock(&(bif)->bif_lock)
-#define BPFIF_RUNLOCK(bif) rw_runlock(&(bif)->bif_lock)
-#define BPFIF_WLOCK(bif) rw_wlock(&(bif)->bif_lock)
-#define BPFIF_WUNLOCK(bif) rw_wunlock(&(bif)->bif_lock)
+struct bpf_program_buffer {
+ struct epoch_context epoch_ctx;
+#ifdef BPF_JITTER
+ bpf_jit_filter *func;
+#endif
+ void *buffer[0];
+};
#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
@@ -187,18 +194,24 @@ struct bpf_dltlist32 {
#define BPF_LOCK_ASSERT() sx_assert(&bpf_sx, SA_XLOCKED)
/*
* bpf_iflist is a list of BPF interface structures, each corresponding to a
- * specific DLT. The same network interface might have several BPF interface
+ * specific DLT. The same network interface might have several BPF interface
* structures registered by different layers in the stack (i.e., 802.11
* frames, ethernet frames, etc).
*/
-static LIST_HEAD(, bpf_if) bpf_iflist, bpf_freelist;
+CK_LIST_HEAD(bpf_iflist, bpf_if);
+static struct bpf_iflist bpf_iflist;
static struct sx bpf_sx; /* bpf global lock */
static int bpf_bpfd_cnt;
+static void bpfif_ref(struct bpf_if *);
+static void bpfif_rele(struct bpf_if *);
+
+static void bpfd_ref(struct bpf_d *);
+static void bpfd_rele(struct bpf_d *);
static void bpf_attachd(struct bpf_d *, struct bpf_if *);
static void bpf_detachd(struct bpf_d *);
-static void bpf_detachd_locked(struct bpf_d *);
-static void bpf_freed(struct bpf_d *);
+static void bpf_detachd_locked(struct bpf_d *, bool);
+static void bpfd_free(epoch_context_t);
static int bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
struct sockaddr *, int *, struct bpf_d *);
static int bpf_setif(struct bpf_d *, struct ifreq *);
@@ -261,37 +274,106 @@ static struct filterops bpfread_filtops = {
.f_event = filt_bpfread,
};
-eventhandler_tag bpf_ifdetach_cookie = NULL;
-
/*
- * LOCKING MODEL USED BY BPF:
+ * LOCKING MODEL USED BY BPF
+ *
* Locks:
- * 1) global lock (BPF_LOCK). Mutex, used to protect interface addition/removal,
- * some global counters and every bpf_if reference.
- * 2) Interface lock. Rwlock, used to protect list of BPF descriptors and their filters.
- * 3) Descriptor lock. Mutex, used to protect BPF buffers and various structure fields
- * used by bpf_mtap code.
+ * 1) global lock (BPF_LOCK). Sx, used to protect some global counters,
+ * every bpf_iflist changes, serializes ioctl access to bpf descriptors.
+ * 2) Descriptor lock. Mutex, used to protect BPF buffers and various
+ * structure fields used by bpf_*tap* code.
+ *
+ * Lock order: global lock, then descriptor lock.
*
- * Lock order:
+ * There are several possible consumers:
*
- * Global lock, interface lock, descriptor lock
+ * 1. The kernel registers interface pointer with bpfattach().
+ * Each call allocates new bpf_if structure, references ifnet pointer
+ * and links bpf_if into bpf_iflist chain. This is protected with global
+ * lock.
*
- * We have to acquire interface lock before descriptor main lock due to BPF_MTAP[2]
- * working model. In many places (like bpf_detachd) we start with BPF descriptor
- * (and we need to at least rlock it to get reliable interface pointer). This
- * gives us potential LOR. As a result, we use global lock to protect from bpf_if
- * change in every such place.
+ * 2. An userland application uses ioctl() call to bpf_d descriptor.
+ * All such call are serialized with global lock. BPF filters can be
+ * changed, but pointer to old filter will be freed using epoch_call().
+ * Thus it should be safe for bpf_tap/bpf_mtap* code to do access to
+ * filter pointers, even if change will happen during bpf_tap execution.
+ * Destroying of bpf_d descriptor also is doing using epoch_call().
*
- * Changing d->bd_bif is protected by 1) global lock, 2) interface lock and
- * 3) descriptor main wlock.
- * Reading bd_bif can be protected by any of these locks, typically global lock.
+ * 3. An userland application can write packets into bpf_d descriptor.
+ * There we need to be sure, that ifnet won't disappear during bpfwrite().
*
- * Changing read/write BPF filter is protected by the same three locks,
- * the same applies for reading.
+ * 4. The kernel invokes bpf_tap/bpf_mtap* functions. The access to
+ * bif_dlist is protected with net_epoch_preempt section. So, it should
+ * be safe to make access to bpf_d descriptor inside the section.
*
- * Sleeping in global lock is not allowed due to bpfdetach() using it.
+ * 5. The kernel invokes bpfdetach() on interface destroying. All lists
+ * are modified with global lock held and actual free() is done using
+ * epoch_call().
*/
+static void
+bpfif_free(epoch_context_t ctx)
+{
+ struct bpf_if *bp;
+
+ bp = __containerof(ctx, struct bpf_if, epoch_ctx);
+ if_rele(bp->bif_ifp);
+ free(bp, M_BPF);
+}
+
+static void
+bpfif_ref(struct bpf_if *bp)
+{
+
+ refcount_acquire(&bp->bif_refcnt);
+}
+
+static void
+bpfif_rele(struct bpf_if *bp)
+{
+
+ if (!refcount_release(&bp->bif_refcnt))
+ return;
+ epoch_call(net_epoch_preempt, &bp->epoch_ctx, bpfif_free);
+}
+
+static void
+bpfd_ref(struct bpf_d *d)
+{
+
+ refcount_acquire(&d->bd_refcnt);
+}
+
+static void
+bpfd_rele(struct bpf_d *d)
+{
+
+ if (!refcount_release(&d->bd_refcnt))
+ return;
+ epoch_call(net_epoch_preempt, &d->epoch_ctx, bpfd_free);
+}
+
+static struct bpf_program_buffer*
+bpf_program_buffer_alloc(size_t size, int flags)
+{
+
+ return (malloc(sizeof(struct bpf_program_buffer) + size,
+ M_BPF, flags));
+}
+
+static void
+bpf_program_buffer_free(epoch_context_t ctx)
+{
+ struct bpf_program_buffer *ptr;
+
+ ptr = __containerof(ctx, struct bpf_program_buffer, epoch_ctx);
+#ifdef BPF_JITTER
+ if (ptr->func != NULL)
+ bpf_destroy_jit_filter(ptr->func);
+#endif
+ free(ptr, M_BPF);
+}
+
/*
* Wrapper functions for various buffering methods. If the set of buffer
* modes expands, we will probably want to introduce a switch data structure
@@ -673,7 +755,8 @@ bad:
}
/*
- * Attach file to the bpf interface, i.e. make d listen on bp.
+ * Attach descriptor to the bpf interface, i.e. make d listen on bp,
+ * then reset its buffers and counters with reset_d().
*/
static void
bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
@@ -689,7 +772,7 @@ bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
op_w = V_bpf_optimize_writers || d->bd_writer;
if (d->bd_bif != NULL)
- bpf_detachd_locked(d);
+ bpf_detachd_locked(d, false);
/*
* Point d at bp, and add d to the interface's list.
* Since there are many applications using BPF for
@@ -698,26 +781,27 @@ bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
* some filter is configured.
*/
- BPFIF_WLOCK(bp);
BPFD_LOCK(d);
-
+ /*
+ * Hold reference to bpif while descriptor uses this interface.
+ */
+ bpfif_ref(bp);
d->bd_bif = bp;
-
if (op_w != 0) {
/* Add to writers-only list */
- LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next);
+ CK_LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next);
/*
* We decrement bd_writer on every filter set operation.
* First BIOCSETF is done by pcap_open_live() to set up
- * snap length. After that appliation usually sets its own filter
+ * snap length. After that appliation usually sets its own
+ * filter.
*/
d->bd_writer = 2;
} else
- LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
+ CK_LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
+ reset_d(d);
BPFD_UNLOCK(d);
- BPFIF_WUNLOCK(bp);
-
bpf_bpfd_cnt++;
CTR3(KTR_NET, "%s: bpf_attach called by pid %d, adding to %s list",
@@ -731,7 +815,8 @@ bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
* Check if we need to upgrade our descriptor @d from write-only mode.
*/
static int
-bpf_check_upgrade(u_long cmd, struct bpf_d *d, struct bpf_insn *fcode, int flen)
+bpf_check_upgrade(u_long cmd, struct bpf_d *d, struct bpf_insn *fcode,
+ int flen)
{
int is_snap, need_upgrade;
@@ -751,7 +836,8 @@ bpf_check_upgrade(u_long cmd, struct bpf_d *d, struct bpf_insn *fcode, int flen)
* we'd prefer to treat k=0 (deny ALL) case the same way: e.g.
* do not consider upgrading immediately
*/
- if (cmd == BIOCSETF && flen == 1 && fcode[0].code == (BPF_RET | BPF_K))
+ if (cmd == BIOCSETF && flen == 1 &&
+ fcode[0].code == (BPF_RET | BPF_K))
is_snap = 1;
else
is_snap = 0;
@@ -789,88 +875,45 @@ bpf_check_upgrade(u_long cmd, struct bpf_d *d, struct bpf_insn *fcode, int flen)
}
/*
- * Add d to the list of active bp filters.
- * Requires bpf_attachd() to be called before.
- */
-static void
-bpf_upgraded(struct bpf_d *d)
-{
- struct bpf_if *bp;
-
- BPF_LOCK_ASSERT();
-
- bp = d->bd_bif;
-
- /*
- * Filter can be set several times without specifying interface.
- * Mark d as reader and exit.
- */
- if (bp == NULL) {
- BPFD_LOCK(d);
- d->bd_writer = 0;
- BPFD_UNLOCK(d);
- return;
- }
-
- BPFIF_WLOCK(bp);
- BPFD_LOCK(d);
-
- /* Remove from writers-only list */
- LIST_REMOVE(d, bd_next);
- LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
- /* Mark d as reader */
- d->bd_writer = 0;
-
- BPFD_UNLOCK(d);
- BPFIF_WUNLOCK(bp);
-
- CTR2(KTR_NET, "%s: upgrade required by pid %d", __func__, d->bd_pid);
-
- EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
-}
-
-/*
* Detach a file from its interface.
*/
static void
bpf_detachd(struct bpf_d *d)
{
BPF_LOCK();
- bpf_detachd_locked(d);
+ bpf_detachd_locked(d, false);
BPF_UNLOCK();
}
static void
-bpf_detachd_locked(struct bpf_d *d)
+bpf_detachd_locked(struct bpf_d *d, bool detached_ifp)
{
- int error;
struct bpf_if *bp;
struct ifnet *ifp;
-
- CTR2(KTR_NET, "%s: detach required by pid %d", __func__, d->bd_pid);
+ int error;
BPF_LOCK_ASSERT();
+ CTR2(KTR_NET, "%s: detach required by pid %d", __func__, d->bd_pid);
/* Check if descriptor is attached */
if ((bp = d->bd_bif) == NULL)
return;
- BPFIF_WLOCK(bp);
BPFD_LOCK(d);
-
+ /* Remove d from the interface's descriptor list. */
+ CK_LIST_REMOVE(d, bd_next);
/* Save bd_writer value */
error = d->bd_writer;
-
- /*
- * Remove d from the interface's descriptor list.
- */
- LIST_REMOVE(d, bd_next);
-
ifp = bp->bif_ifp;
d->bd_bif = NULL;
+ if (detached_ifp) {
+ /*
+ * Notify descriptor as it's detached, so that any
+ * sleepers wake up and get ENXIO.
+ */
+ bpf_wakeup(d);
+ }
BPFD_UNLOCK(d);
- BPFIF_WUNLOCK(bp);
-
bpf_bpfd_cnt--;
/* Call event handler iff d is attached */
@@ -879,9 +922,9 @@ bpf_detachd_locked(struct bpf_d *d)
/*
* Check if this descriptor had requested promiscuous mode.
- * If so, turn it off.
+ * If so and ifnet is not detached, turn it off.
*/
- if (d->bd_promisc) {
+ if (d->bd_promisc && !detached_ifp) {
d->bd_promisc = 0;
CURVNET_SET(ifp->if_vnet);
error = ifpromisc(ifp, 0);
@@ -897,6 +940,7 @@ bpf_detachd_locked(struct bpf_d *d)
"bpf_detach: ifpromisc failed (%d)\n", error);
}
}
+ bpfif_rele(bp);
}
/*
@@ -921,8 +965,7 @@ bpf_dtor(void *data)
seldrain(&d->bd_sel);
knlist_destroy(&d->bd_sel.si_note);
callout_drain(&d->bd_callout);
- bpf_freed(d);
- free(d, M_BPF);
+ bpfd_rele(d);
}
/*
@@ -975,6 +1018,7 @@ bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
d->bd_bufmode = BPF_BUFMODE_BUFFER;
d->bd_sig = SIGIO;
d->bd_direction = BPF_D_INOUT;
+ d->bd_refcnt = 1;
BPF_PID_REFRESH(d, td);
#ifdef MAC
mac_bpfdesc_init(d);
@@ -1162,7 +1206,8 @@ bpf_timed_out(void *arg)
BPFD_LOCK_ASSERT(d);
- if (callout_pending(&d->bd_callout) || !callout_active(&d->bd_callout))
+ if (callout_pending(&d->bd_callout) ||
+ !callout_active(&d->bd_callout))
return;
if (d->bd_state == BPF_WAITING) {
d->bd_state = BPF_TIMED_OUT;
@@ -1192,49 +1237,73 @@ bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
bpfwrite(struct bpf_d *d, struct uio *uio, int ioflag)
#endif /* __rtems__ */
{
+ struct route ro;
+ struct sockaddr dst;
+ struct epoch_tracker et;
+ struct bpf_if *bp;
#ifndef __rtems__
struct bpf_d *d;
#endif /* __rtems__ */
struct ifnet *ifp;
struct mbuf *m, *mc;
- struct sockaddr dst;
- struct route ro;
int error, hlen;
error = devfs_get_cdevpriv((void **)&d);
if (error != 0)
return (error);
+ NET_EPOCH_ENTER(et);
+ BPFD_LOCK(d);
BPF_PID_REFRESH_CUR(d);
counter_u64_add(d->bd_wcount, 1);
- /* XXX: locking required */
- if (d->bd_bif == NULL) {
- counter_u64_add(d->bd_wdcount, 1);
- return (ENXIO);
+ if ((bp = d->bd_bif) == NULL) {
+ error = ENXIO;
+ goto out_locked;
}
- ifp = d->bd_bif->bif_ifp;
-
+ ifp = bp->bif_ifp;
if ((ifp->if_flags & IFF_UP) == 0) {
- counter_u64_add(d->bd_wdcount, 1);
- return (ENETDOWN);
+ error = ENETDOWN;
+ goto out_locked;
}
- if (uio->uio_resid == 0) {
- counter_u64_add(d->bd_wdcount, 1);
- return (0);
- }
+ if (uio->uio_resid == 0)
+ goto out_locked;
bzero(&dst, sizeof(dst));
m = NULL;
hlen = 0;
- /* XXX: bpf_movein() can sleep */
- error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
+
+ /*
+ * Take extra reference, unlock d and exit from epoch section,
+ * since bpf_movein() can sleep.
+ */
+ bpfd_ref(d);
+ NET_EPOCH_EXIT(et);
+ BPFD_UNLOCK(d);
+
+ error = bpf_movein(uio, (int)bp->bif_dlt, ifp,
&m, &dst, &hlen, d);
- if (error) {
+
+ if (error != 0) {
counter_u64_add(d->bd_wdcount, 1);
+ bpfd_rele(d);
return (error);
}
+
+ BPFD_LOCK(d);
+ /*
+ * Check that descriptor is still attached to the interface.
+ * This can happen on bpfdetach(). To avoid access to detached
+ * ifnet, free mbuf and return ENXIO.
+ */
+ if (d->bd_bif == NULL) {
+ counter_u64_add(d->bd_wdcount, 1);
+ BPFD_UNLOCK(d);
+ bpfd_rele(d);
+ m_freem(m);
+ return (ENXIO);
+ }
counter_u64_add(d->bd_wfcount, 1);
if (d->bd_hdrcmplt)
dst.sa_family = pseudo_AF_HDRCMPLT;
@@ -1255,11 +1324,9 @@ bpfwrite(struct bpf_d *d, struct uio *uio, int ioflag)
CURVNET_SET(ifp->if_vnet);
#ifdef MAC
- BPFD_LOCK(d);
mac_bpfdesc_create_mbuf(d, m);
if (mc != NULL)
mac_bpfdesc_create_mbuf(d, mc);
- BPFD_UNLOCK(d);
#endif
bzero(&ro, sizeof(ro));
@@ -1269,6 +1336,9 @@ bpfwrite(struct bpf_d *d, struct uio *uio, int ioflag)
ro.ro_flags = RT_HAS_HEADER;
}
+ /* Avoid possible recursion on BPFD_LOCK(). */
+ NET_EPOCH_ENTER(et);
+ BPFD_UNLOCK(d);
error = (*ifp->if_output)(ifp, m, &dst, &ro);
if (error)
counter_u64_add(d->bd_wdcount, 1);
@@ -1279,8 +1349,15 @@ bpfwrite(struct bpf_d *d, struct uio *uio, int ioflag)
else
m_freem(mc);
}
+ NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
+ bpfd_rele(d);
+ return (error);
+out_locked:
+ counter_u64_add(d->bd_wdcount, 1);
+ NET_EPOCH_EXIT(et);
+ BPFD_UNLOCK(d);
return (error);
}
@@ -1916,16 +1993,11 @@ bpfioctl(struct bpf_d *d, u_long cmd, caddr_t addr, int flags,
}
/*
- * Set d's packet filter program to fp. If this file already has a filter,
- * free it and replace it. Returns EINVAL for bogus requests.
- *
- * Note we need global lock here to serialize bpf_setf() and bpf_setif() calls
- * since reading d->bd_bif can't be protected by d or interface lock due to
- * lock order.
- *
- * Additionally, we have to acquire interface write lock due to bpf_mtap() uses
- * interface read lock to read all filers.
+ * Set d's packet filter program to fp. If this file already has a filter,
+ * free it and replace it. Returns EINVAL for bogus requests.
*
+ * Note we use global lock here to serialize bpf_setf() and bpf_setif()
+ * calls.
*/
static int
bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
@@ -1934,13 +2006,14 @@ bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
struct bpf_program fp_swab;
struct bpf_program32 *fp32;
#endif
- struct bpf_insn *fcode, *old;
+ struct bpf_program_buffer *fcode;
+ struct bpf_insn *filter;
#ifdef BPF_JITTER
- bpf_jit_filter *jfunc, *ofunc;
+ bpf_jit_filter *jfunc;
#endif
size_t size;
u_int flen;
- int need_upgrade;
+ bool track_event;
#ifdef COMPAT_FREEBSD32
switch (cmd) {
@@ -1949,7 +2022,8 @@ bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
case BIOCSETFNR32:
fp32 = (struct bpf_program32 *)fp;
fp_swab.bf_len = fp32->bf_len;
- fp_swab.bf_insns = (struct bpf_insn *)(uintptr_t)fp32->bf_insns;
+ fp_swab.bf_insns =
+ (struct bpf_insn *)(uintptr_t)fp32->bf_insns;
fp = &fp_swab;
switch (cmd) {
case BIOCSETF32:
@@ -1963,12 +2037,10 @@ bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
}
#endif
- fcode = NULL;
+ filter = NULL;
#ifdef BPF_JITTER
- jfunc = ofunc = NULL;
+ jfunc = NULL;
#endif
- need_upgrade = 0;
-
/*
* Check new filter validness before acquiring any locks.
* Allocate memory for new filter, if needed.
@@ -1978,10 +2050,11 @@ bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
return (EINVAL);
size = flen * sizeof(*fp->bf_insns);
if (size > 0) {
- /* We're setting up new filter. Copy and check actual data. */
- fcode = malloc(size, M_BPF, M_WAITOK);
- if (copyin(fp->bf_insns, fcode, size) != 0 ||
- !bpf_validate(fcode, flen)) {
+ /* We're setting up new filter. Copy and check actual data. */
+ fcode = bpf_program_buffer_alloc(size, M_WAITOK);
+ filter = (struct bpf_insn *)fcode->buffer;
+ if (copyin(fp->bf_insns, filter, size) != 0 ||
+ !bpf_validate(filter, flen)) {
free(fcode, M_BPF);
return (EINVAL);
}
@@ -1991,49 +2064,72 @@ bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
* Filter is copied inside fcode and is
* perfectly valid.
*/
- jfunc = bpf_jitter(fcode, flen);
+ jfunc = bpf_jitter(filter, flen);
}
#endif
}
- BPF_LOCK();
+ track_event = false;
+ fcode = NULL;
- /*
- * Set up new filter.
- * Protect filter change by interface lock.
- * Additionally, we are protected by global lock here.
- */
- if (d->bd_bif != NULL)
- BPFIF_WLOCK(d->bd_bif);
+ BPF_LOCK();
BPFD_LOCK(d);
+ /* Set up new filter. */
if (cmd == BIOCSETWF) {
- old = d->bd_wfilter;
- d->bd_wfilter = fcode;
+ if (d->bd_wfilter != NULL) {
+ fcode = __containerof((void *)d->bd_wfilter,
+ struct bpf_program_buffer, buffer);
+#ifdef BPF_JITTER
+ fcode->func = NULL;
+#endif
+ }
+ d->bd_wfilter = filter;
} else {
- old = d->bd_rfilter;
- d->bd_rfilter = fcode;
+ if (d->bd_rfilter != NULL) {
+ fcode = __containerof((void *)d->bd_rfilter,
+ struct bpf_program_buffer, buffer);
+#ifdef BPF_JITTER
+ fcode->func = d->bd_bfilter;
+#endif
+ }
+ d->bd_rfilter = filter;
#ifdef BPF_JITTER
- ofunc = d->bd_bfilter;
d->bd_bfilter = jfunc;
#endif
if (cmd == BIOCSETF)
reset_d(d);
- need_upgrade = bpf_check_upgrade(cmd, d, fcode, flen);
+ if (bpf_check_upgrade(cmd, d, filter, flen) != 0) {
+ /*
+ * Filter can be set several times without
+ * specifying interface. In this case just mark d
+ * as reader.
+ */
+ d->bd_writer = 0;
+ if (d->bd_bif != NULL) {
+ /*
+ * Remove descriptor from writers-only list
+ * and add it to active readers list.
+ */
+ CK_LIST_REMOVE(d, bd_next);
+ CK_LIST_INSERT_HEAD(&d->bd_bif->bif_dlist,
+ d, bd_next);
+ CTR2(KTR_NET,
+ "%s: upgrade required by pid %d",
+ __func__, d->bd_pid);
+ track_event = true;
+ }
+ }
}
BPFD_UNLOCK(d);
- if (d->bd_bif != NULL)
- BPFIF_WUNLOCK(d->bd_bif);
- if (old != NULL)
- free(old, M_BPF);
-#ifdef BPF_JITTER
- if (ofunc != NULL)
- bpf_destroy_jit_filter(ofunc);
-#endif
- /* Move d to active readers list. */
- if (need_upgrade != 0)
- bpf_upgraded(d);
+ if (fcode != NULL)
+ epoch_call(net_epoch_preempt, &fcode->epoch_ctx,
+ bpf_program_buffer_free);
+
+ if (track_event)
+ EVENTHANDLER_INVOKE(bpf_track,
+ d->bd_bif->bif_ifp, d->bd_bif->bif_dlt, 1);
BPF_UNLOCK();
return (0);
@@ -2057,15 +2153,6 @@ bpf_setif(struct bpf_d *d, struct ifreq *ifr)
return (ENXIO);
bp = theywant->if_bpf;
-
- /* Check if interface is not being detached from BPF */
- BPFIF_RLOCK(bp);
- if (bp->bif_flags & BPFIF_FLAG_DYING) {
- BPFIF_RUNLOCK(bp);
- return (ENXIO);
- }
- BPFIF_RUNLOCK(bp);
-
/*
* At this point, we expect the buffer is already allocated. If not,
* return an error.
@@ -2084,9 +2171,11 @@ bpf_setif(struct bpf_d *d, struct ifreq *ifr)
}
if (bp != d->bd_bif)
bpf_attachd(d, bp);
- BPFD_LOCK(d);
- reset_d(d);
- BPFD_UNLOCK(d);
+ else {
+ BPFD_LOCK(d);
+ reset_d(d);
+ BPFD_UNLOCK(d);
+ }
return (0);
}
@@ -2253,6 +2342,7 @@ bpf_gettime(struct bintime *bt, int tstype, struct mbuf *m)
void
bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
{
+ struct epoch_tracker et;
struct bintime bt;
struct bpf_d *d;
#ifdef BPF_JITTER
@@ -2262,24 +2352,14 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
int gottime;
gottime = BPF_TSTAMP_NONE;
-
- BPFIF_RLOCK(bp);
-
- LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
- /*
- * We are not using any locks for d here because:
- * 1) any filter change is protected by interface
- * write lock
- * 2) destroying/detaching d is protected by interface
- * write lock, too
- */
-
+ NET_EPOCH_ENTER(et);
+ CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
counter_u64_add(d->bd_rcount, 1);
/*
- * NB: We dont call BPF_CHECK_DIRECTION() here since there is no
- * way for the caller to indiciate to us whether this packet
- * is inbound or outbound. In the bpf_mtap() routines, we use
- * the interface pointers on the mbuf to figure it out.
+ * NB: We dont call BPF_CHECK_DIRECTION() here since there
+ * is no way for the caller to indiciate to us whether this
+ * packet is inbound or outbound. In the bpf_mtap() routines,
+ * we use the interface pointers on the mbuf to figure it out.
*/
#ifdef BPF_JITTER
bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
@@ -2293,10 +2373,10 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
* Filter matches. Let's to acquire write lock.
*/
BPFD_LOCK(d);
-
counter_u64_add(d->bd_fcount, 1);
if (gottime < bpf_ts_quality(d->bd_tstamp))
- gottime = bpf_gettime(&bt, d->bd_tstamp, NULL);
+ gottime = bpf_gettime(&bt, d->bd_tstamp,
+ NULL);
#ifdef MAC
if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
#endif
@@ -2305,7 +2385,7 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
BPFD_UNLOCK(d);
}
}
- BPFIF_RUNLOCK(bp);
+ NET_EPOCH_EXIT(et);
}
#define BPF_CHECK_DIRECTION(d, r, i) \
@@ -2319,6 +2399,7 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
void
bpf_mtap(struct bpf_if *bp, struct mbuf *m)
{
+ struct epoch_tracker et;
struct bintime bt;
struct bpf_d *d;
#ifdef BPF_JITTER
@@ -2328,7 +2409,7 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m)
int gottime;
/* Skip outgoing duplicate packets. */
- if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
+ if ((m->m_flags & M_PROMISC) != 0 && m_rcvif(m) == NULL) {
m->m_flags &= ~M_PROMISC;
return;
}
@@ -2336,17 +2417,17 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m)
pktlen = m_length(m, NULL);
gottime = BPF_TSTAMP_NONE;
- BPFIF_RLOCK(bp);
-
- LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
- if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
+ NET_EPOCH_ENTER(et);
+ CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
+ if (BPF_CHECK_DIRECTION(d, m_rcvif(m), bp->bif_ifp))
continue;
counter_u64_add(d->bd_rcount, 1);
#ifdef BPF_JITTER
bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
/* XXX We cannot handle multiple mbufs. */
if (bf != NULL && m->m_next == NULL)
- slen = (*(bf->func))(mtod(m, u_char *), pktlen, pktlen);
+ slen = (*(bf->func))(mtod(m, u_char *), pktlen,
+ pktlen);
else
#endif
slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
@@ -2364,7 +2445,7 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m)
BPFD_UNLOCK(d);
}
}
- BPFIF_RUNLOCK(bp);
+ NET_EPOCH_EXIT(et);
}
/*
@@ -2374,6 +2455,7 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m)
void
bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
{
+ struct epoch_tracker et;
struct bintime bt;
struct mbuf mb;
struct bpf_d *d;
@@ -2392,6 +2474,7 @@ bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
* Note that we cut corners here; we only setup what's
* absolutely needed--this mbuf should never go anywhere else.
*/
+ mb.m_flags = 0;
mb.m_next = m;
mb.m_data = data;
mb.m_len = dlen;
@@ -2399,9 +2482,8 @@ bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
gottime = BPF_TSTAMP_NONE;
- BPFIF_RLOCK(bp);
-
- LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
+ NET_EPOCH_ENTER(et);
+ CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
continue;
counter_u64_add(d->bd_rcount, 1);
@@ -2420,11 +2502,10 @@ bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
BPFD_UNLOCK(d);
}
}
- BPFIF_RUNLOCK(bp);
+ NET_EPOCH_EXIT(et);
}
#undef BPF_CHECK_DIRECTION
-
#undef BPF_TSTAMP_NONE
#undef BPF_TSTAMP_FAST
#undef BPF_TSTAMP_NORMAL
@@ -2514,6 +2595,11 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
int tstype;
BPFD_LOCK_ASSERT(d);
+ if (d->bd_bif == NULL) {
+ /* Descriptor was detached in concurrent thread */
+ counter_u64_add(d->bd_dcount, 1);
+ return;
+ }
/*
* Detect whether user space has released a buffer back to us, and if
@@ -2643,26 +2729,36 @@ copy:
* Called on close.
*/
static void
-bpf_freed(struct bpf_d *d)
+bpfd_free(epoch_context_t ctx)
{
+ struct bpf_d *d;
+ struct bpf_program_buffer *p;
/*
* We don't need to lock out interrupts since this descriptor has
* been detached from its interface and it yet hasn't been marked
* free.
*/
+ d = __containerof(ctx, struct bpf_d, epoch_ctx);
bpf_free(d);
if (d->bd_rfilter != NULL) {
- free((caddr_t)d->bd_rfilter, M_BPF);
+ p = __containerof((void *)d->bd_rfilter,
+ struct bpf_program_buffer, buffer);
#ifdef BPF_JITTER
- if (d->bd_bfilter != NULL)
- bpf_destroy_jit_filter(d->bd_bfilter);
+ p->func = d->bd_bfilter;
#endif
+ bpf_program_buffer_free(&p->epoch_ctx);
+ }
+ if (d->bd_wfilter != NULL) {
+ p = __containerof((void *)d->bd_wfilter,
+ struct bpf_program_buffer, buffer);
+#ifdef BPF_JITTER
+ p->func = NULL;
+#endif
+ bpf_program_buffer_free(&p->epoch_ctx);
}
- if (d->bd_wfilter != NULL)
- free((caddr_t)d->bd_wfilter, M_BPF);
- mtx_destroy(&d->bd_lock);
+ mtx_destroy(&d->bd_lock);
counter_u64_free(d->bd_rcount);
counter_u64_free(d->bd_dcount);
counter_u64_free(d->bd_fcount);
@@ -2670,7 +2766,7 @@ bpf_freed(struct bpf_d *d)
counter_u64_free(d->bd_wfcount);
counter_u64_free(d->bd_wdcount);
counter_u64_free(d->bd_zcopy);
-
+ free(d, M_BPF);
}
/*
@@ -2691,29 +2787,33 @@ bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
* headers are not yet supporrted).
*/
void
-bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
+bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen,
+ struct bpf_if **driverp)
{
struct bpf_if *bp;
- bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
- if (bp == NULL)
- panic("bpfattach");
+ KASSERT(*driverp == NULL,
+ ("bpfattach2: driverp already initialized"));
- LIST_INIT(&bp->bif_dlist);
- LIST_INIT(&bp->bif_wlist);
+ bp = malloc(sizeof(*bp), M_BPF, M_WAITOK | M_ZERO);
+
+ CK_LIST_INIT(&bp->bif_dlist);
+ CK_LIST_INIT(&bp->bif_wlist);
bp->bif_ifp = ifp;
bp->bif_dlt = dlt;
- rw_init(&bp->bif_lock, "bpf interface lock");
- KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
+ bp->bif_hdrlen = hdrlen;
bp->bif_bpf = driverp;
+ bp->bif_refcnt = 1;
*driverp = bp;
-
+ /*
+ * Reference ifnet pointer, so it won't freed until
+ * we release it.
+ */
+ if_ref(ifp);
BPF_LOCK();
- LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
+ CK_LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
BPF_UNLOCK();
- bp->bif_hdrlen = hdrlen;
-
if (bootverbose && IS_DEFAULT_VNET(curvnet))
if_printf(ifp, "bpf attached\n");
}
@@ -2752,98 +2852,32 @@ bpf_get_bp_params(struct bpf_if *bp, u_int *bif_dlt, u_int *bif_hdrlen)
void
bpfdetach(struct ifnet *ifp)
{
- struct bpf_if *bp, *bp_temp;
- struct bpf_d *d;
- int ndetached;
-
- ndetached = 0;
+ struct bpf_if *bp, *bp_temp;
+ struct bpf_d *d;
BPF_LOCK();
/* Find all bpf_if struct's which reference ifp and detach them. */
- LIST_FOREACH_SAFE(bp, &bpf_iflist, bif_next, bp_temp) {
+ CK_LIST_FOREACH_SAFE(bp, &bpf_iflist, bif_next, bp_temp) {
if (ifp != bp->bif_ifp)
continue;
- LIST_REMOVE(bp, bif_next);
- /* Add to to-be-freed list */
- LIST_INSERT_HEAD(&bpf_freelist, bp, bif_next);
-
- ndetached++;
- /*
- * Delay freeing bp till interface is detached
- * and all routes through this interface are removed.
- * Mark bp as detached to restrict new consumers.
- */
- BPFIF_WLOCK(bp);
- bp->bif_flags |= BPFIF_FLAG_DYING;
+ CK_LIST_REMOVE(bp, bif_next);
*bp->bif_bpf = (struct bpf_if *)&dead_bpf_if;
- BPFIF_WUNLOCK(bp);
- CTR4(KTR_NET, "%s: sheduling free for encap %d (%p) for if %p",
+ CTR4(KTR_NET,
+ "%s: sheduling free for encap %d (%p) for if %p",
__func__, bp->bif_dlt, bp, ifp);
- /* Free common descriptors */
- while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
- bpf_detachd_locked(d);
- BPFD_LOCK(d);
- bpf_wakeup(d);
- BPFD_UNLOCK(d);
+ /* Detach common descriptors */
+ while ((d = CK_LIST_FIRST(&bp->bif_dlist)) != NULL) {
+ bpf_detachd_locked(d, true);
}
- /* Free writer-only descriptors */
- while ((d = LIST_FIRST(&bp->bif_wlist)) != NULL) {
- bpf_detachd_locked(d);
- BPFD_LOCK(d);
- bpf_wakeup(d);
- BPFD_UNLOCK(d);
+ /* Detach writer-only descriptors */
+ while ((d = CK_LIST_FIRST(&bp->bif_wlist)) != NULL) {
+ bpf_detachd_locked(d, true);
}
- }
- BPF_UNLOCK();
-
-#ifdef INVARIANTS
- if (ndetached == 0)
- printf("bpfdetach: %s was not attached\n", ifp->if_xname);
-#endif
-}
-
-/*
- * Interface departure handler.
- * Note departure event does not guarantee interface is going down.
- * Interface renaming is currently done via departure/arrival event set.
- *
- * Departure handled is called after all routes pointing to
- * given interface are removed and interface is in down state
- * restricting any packets to be sent/received. We assume it is now safe
- * to free data allocated by BPF.
- */
-static void
-bpf_ifdetach(void *arg __unused, struct ifnet *ifp)
-{
- struct bpf_if *bp, *bp_temp;
- int nmatched = 0;
-
- /* Ignore ifnet renaming. */
- if (ifp->if_flags & IFF_RENAMING)
- return;
-
- BPF_LOCK();
- /*
- * Find matching entries in free list.
- * Nothing should be found if bpfdetach() was not called.
- */
- LIST_FOREACH_SAFE(bp, &bpf_freelist, bif_next, bp_temp) {
- if (ifp != bp->bif_ifp)
- continue;
-
- CTR3(KTR_NET, "%s: freeing BPF instance %p for interface %p",
- __func__, bp, ifp);
-
- LIST_REMOVE(bp, bif_next);
-
- rw_destroy(&bp->bif_lock);
- free(bp, M_BPF);
-
- nmatched++;
+ bpfif_rele(bp);
}
BPF_UNLOCK();
}
@@ -2862,9 +2896,8 @@ bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
BPF_LOCK_ASSERT();
ifp = d->bd_bif->bif_ifp;
-again:
n1 = 0;
- LIST_FOREACH(bp, &bpf_iflist, bif_next) {
+ CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
if (bp->bif_ifp == ifp)
n1++;
}
@@ -2874,24 +2907,16 @@ again:
}
if (n1 > bfl->bfl_len)
return (ENOMEM);
- BPF_UNLOCK();
+
lst = malloc(n1 * sizeof(u_int), M_TEMP, M_WAITOK);
n = 0;
- BPF_LOCK();
- LIST_FOREACH(bp, &bpf_iflist, bif_next) {
+ CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
if (bp->bif_ifp != ifp)
continue;
- if (n >= n1) {
- free(lst, M_TEMP);
- goto again;
- }
- lst[n] = bp->bif_dlt;
- n++;
+ lst[n++] = bp->bif_dlt;
}
- BPF_UNLOCK();
error = copyout(lst, bfl->bfl_list, sizeof(u_int) * n);
free(lst, M_TEMP);
- BPF_LOCK();
bfl->bfl_len = n;
return (error);
}
@@ -2907,33 +2932,34 @@ bpf_setdlt(struct bpf_d *d, u_int dlt)
struct bpf_if *bp;
BPF_LOCK_ASSERT();
+ MPASS(d->bd_bif != NULL);
+ /*
+ * It is safe to check bd_bif without BPFD_LOCK, it can not be
+ * changed while we hold global lock.
+ */
if (d->bd_bif->bif_dlt == dlt)
return (0);
- ifp = d->bd_bif->bif_ifp;
- LIST_FOREACH(bp, &bpf_iflist, bif_next) {
+ ifp = d->bd_bif->bif_ifp;
+ CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
break;
}
+ if (bp == NULL)
+ return (EINVAL);
- if (bp != NULL) {
- opromisc = d->bd_promisc;
- bpf_attachd(d, bp);
- BPFD_LOCK(d);
- reset_d(d);
- BPFD_UNLOCK(d);
- if (opromisc) {
- error = ifpromisc(bp->bif_ifp, 1);
- if (error)
- if_printf(bp->bif_ifp,
- "bpf_setdlt: ifpromisc failed (%d)\n",
- error);
- else
- d->bd_promisc = 1;
- }
+ opromisc = d->bd_promisc;
+ bpf_attachd(d, bp);
+ if (opromisc) {
+ error = ifpromisc(bp->bif_ifp, 1);
+ if (error)
+ if_printf(bp->bif_ifp, "%s: ifpromisc failed (%d)\n",
+ __func__, error);
+ else
+ d->bd_promisc = 1;
}
- return (bp == NULL ? EINVAL : 0);
+ return (0);
}
#ifdef __rtems__
static struct bpf_d *
@@ -2973,7 +2999,7 @@ bpf_imfs_readv(rtems_libio_t *iop, const struct iovec *iov, int iovcnt, ssize_t
struct bpf_d *d = bpf_imfs_get_context_by_iop(iop);
struct thread *td = rtems_bsd_get_curthread_or_null();
struct uio uio = {
- .uio_iov = iov,
+ .uio_iov = RTEMS_DECONST(struct iovec *, iov),
.uio_iovcnt = iovcnt,
.uio_offset = 0,
.uio_resid = total,
@@ -3014,7 +3040,7 @@ bpf_imfs_writev(rtems_libio_t *iop, const struct iovec *iov, int iovcnt, ssize_t
struct bpf_d *d = bpf_imfs_get_context_by_iop(iop);
struct thread *td = rtems_bsd_get_curthread_or_null();
struct uio uio = {
- .uio_iov = iov,
+ .uio_iov = RTEMS_DECONST(struct iovec *, iov),
.uio_iovcnt = iovcnt,
.uio_offset = 0,
.uio_resid = total,
@@ -3042,7 +3068,7 @@ static ssize_t
bpf_imfs_write(rtems_libio_t *iop, const void *buffer, size_t count)
{
struct iovec iov = {
- .iov_base = buffer,
+ .iov_base = RTEMS_DECONST(void *, buffer),
.iov_len = count
};
@@ -3115,24 +3141,23 @@ bpf_drvinit(void *unused)
#endif /* __rtems__ */
sx_init(&bpf_sx, "bpf global lock");
- LIST_INIT(&bpf_iflist);
- LIST_INIT(&bpf_freelist);
+ CK_LIST_INIT(&bpf_iflist);
#ifndef __rtems__
dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf");
/* For compatibility */
make_dev_alias(dev, "bpf0");
-#else /* __rtems__ */
- rv = IMFS_make_generic_node("/dev/bpf", mode, &bpf_imfs_control, NULL);
- BSD_ASSERT(rv == 0);
- rv = symlink("/dev/bpf", "/dev/bpf0");
- BSD_ASSERT(rv == 0);
-#endif /* __rtems__ */
/* Register interface departure handler */
bpf_ifdetach_cookie = EVENTHANDLER_REGISTER(
ifnet_departure_event, bpf_ifdetach, NULL,
EVENTHANDLER_PRI_ANY);
+#else /* __rtems__ */
+ rv = IMFS_make_generic_node("/dev/bpf", mode, &bpf_imfs_control, NULL);
+ BSD_ASSERT(rv == 0);
+ rv = symlink("/dev/bpf", "/dev/bpf0");
+ BSD_ASSERT(rv == 0);
+#endif /* __rtems__ */
}
/*
@@ -3147,19 +3172,19 @@ bpf_zero_counters(void)
struct bpf_d *bd;
BPF_LOCK();
- LIST_FOREACH(bp, &bpf_iflist, bif_next) {
- BPFIF_RLOCK(bp);
- LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
- BPFD_LOCK(bd);
+ /*
+ * We are protected by global lock here, interfaces and
+ * descriptors can not be deleted while we hold it.
+ */
+ CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
+ CK_LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
counter_u64_zero(bd->bd_rcount);
counter_u64_zero(bd->bd_dcount);
counter_u64_zero(bd->bd_fcount);
counter_u64_zero(bd->bd_wcount);
counter_u64_zero(bd->bd_wfcount);
counter_u64_zero(bd->bd_zcopy);
- BPFD_UNLOCK(bd);
}
- BPFIF_RUNLOCK(bp);
}
BPF_UNLOCK();
}
@@ -3171,10 +3196,9 @@ static void
bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
{
+ BPF_LOCK_ASSERT();
bzero(d, sizeof(*d));
- BPFD_LOCK_ASSERT(bd);
d->bd_structsize = sizeof(*d);
- /* XXX: reading should be protected by global lock */
d->bd_immediate = bd->bd_immediate;
d->bd_promisc = bd->bd_promisc;
d->bd_hdrcmplt = bd->bd_hdrcmplt;
@@ -3251,22 +3275,16 @@ bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
return (ENOMEM);
}
index = 0;
- LIST_FOREACH(bp, &bpf_iflist, bif_next) {
- BPFIF_RLOCK(bp);
+ CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
/* Send writers-only first */
- LIST_FOREACH(bd, &bp->bif_wlist, bd_next) {
+ CK_LIST_FOREACH(bd, &bp->bif_wlist, bd_next) {
xbd = &xbdbuf[index++];
- BPFD_LOCK(bd);
bpfstats_fill_xbpf(xbd, bd);
- BPFD_UNLOCK(bd);
}
- LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
+ CK_LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
xbd = &xbdbuf[index++];
- BPFD_LOCK(bd);
bpfstats_fill_xbpf(xbd, bd);
- BPFD_UNLOCK(bd);
}
- BPFIF_RUNLOCK(bp);
}
BPF_UNLOCK();
error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
@@ -3346,10 +3364,10 @@ bpf_show_bpf_if(struct bpf_if *bpf_if)
/* bif_ext.bif_dlist */
BPF_DB_PRINTF("%#x", bif_dlt);
BPF_DB_PRINTF("%u", bif_hdrlen);
- BPF_DB_PRINTF("%p", bif_ifp);
- /* bif_lock */
/* bif_wlist */
- BPF_DB_PRINTF("%#x", bif_flags);
+ BPF_DB_PRINTF("%p", bif_ifp);
+ BPF_DB_PRINTF("%p", bif_bpf);
+ BPF_DB_PRINTF("%u", bif_refcnt);
}
DB_SHOW_COMMAND(bpf_if, db_show_bpf_if)
diff --git a/freebsd/sys/net/bpf.h b/freebsd/sys/net/bpf.h
index d8eb7ff4..55b03b54 100644
--- a/freebsd/sys/net/bpf.h
+++ b/freebsd/sys/net/bpf.h
@@ -42,6 +42,10 @@
#ifndef _NET_BPF_H_
#define _NET_BPF_H_
+#include <sys/_eventhandler.h>
+#include <sys/ck.h>
+#include <net/dlt.h>
+
#if defined(__rtems__) && !defined(__FreeBSD__)
#define __FreeBSD__ 1
#endif /* defined(__rtems__) && !defined(__FreeBSD__) */
@@ -236,9 +240,6 @@ struct bpf_zbuf_header {
u_int _bzh_pad[5];
};
-/* Pull in data-link level type codes. */
-#include <net/dlt.h>
-
/*
* The instruction encodings.
*
@@ -412,10 +413,11 @@ SYSCTL_DECL(_net_bpf);
* bpf_peers_present() calls.
*/
struct bpf_if;
+CK_LIST_HEAD(bpfd_list, bpf_d);
struct bpf_if_ext {
- LIST_ENTRY(bpf_if) bif_next; /* list of all interfaces */
- LIST_HEAD(, bpf_d) bif_dlist; /* descriptor list */
+ CK_LIST_ENTRY(bpf_if) bif_next; /* list of all interfaces */
+ struct bpfd_list bif_dlist; /* descriptor list */
};
void bpf_bufheld(struct bpf_d *d);
@@ -439,7 +441,7 @@ bpf_peers_present(struct bpf_if *bpf)
struct bpf_if_ext *ext;
ext = (struct bpf_if_ext *)bpf;
- if (!LIST_EMPTY(&ext->bif_dlist))
+ if (!CK_LIST_EMPTY(&ext->bif_dlist))
return (1);
return (0);
}
@@ -467,12 +469,10 @@ bpf_peers_present(struct bpf_if *bpf)
*/
#define BPF_MEMWORDS 16
-#ifdef _SYS_EVENTHANDLER_H_
/* BPF attach/detach events */
struct ifnet;
typedef void (*bpf_track_fn)(void *, struct ifnet *, int /* dlt */,
int /* 1 =>'s attach */);
EVENTHANDLER_DECLARE(bpf_track, bpf_track_fn);
-#endif /* _SYS_EVENTHANDLER_H_ */
#endif /* _NET_BPF_H_ */
diff --git a/freebsd/sys/net/bpf_buffer.c b/freebsd/sys/net/bpf_buffer.c
index 7a182a61..daa9e267 100644
--- a/freebsd/sys/net/bpf_buffer.c
+++ b/freebsd/sys/net/bpf_buffer.c
@@ -71,8 +71,10 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_bpf.h>
#include <sys/param.h>
+#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
+#include <sys/mutex.h>
#include <sys/socket.h>
#include <sys/uio.h>
#include <sys/kernel.h>
@@ -119,19 +121,10 @@ bpf_buffer_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
{
const struct mbuf *m;
u_char *dst;
- u_int count;
m = (struct mbuf *)src;
dst = (u_char *)buf + offset;
- while (len > 0) {
- if (m == NULL)
- panic("bpf_mcopy");
- count = min(m->m_len, len);
- bcopy(mtod(m, void *), dst, count);
- m = m->m_next;
- dst += count;
- len -= count;
- }
+ m_copydata(m, 0, len, dst);
}
/*
diff --git a/freebsd/sys/net/bpfdesc.h b/freebsd/sys/net/bpfdesc.h
index 2ce9204b..c28a74f9 100644
--- a/freebsd/sys/net/bpfdesc.h
+++ b/freebsd/sys/net/bpfdesc.h
@@ -43,9 +43,10 @@
#include <sys/callout.h>
#include <sys/selinfo.h>
-#include <sys/queue.h>
+#include <sys/ck.h>
#include <sys/conf.h>
#include <sys/counter.h>
+#include <sys/epoch.h>
#include <net/if.h>
/*
@@ -53,7 +54,7 @@
*/
struct zbuf;
struct bpf_d {
- LIST_ENTRY(bpf_d) bd_next; /* Linked list of descriptors */
+ CK_LIST_ENTRY(bpf_d) bd_next; /* Linked list of descriptors */
/*
* Buffer slots: two memory buffers store the incoming packets.
* The model has three slots. Sbuf is always occupied.
@@ -106,6 +107,9 @@ struct bpf_d {
counter_u64_t bd_wdcount; /* number of packets dropped during a write */
counter_u64_t bd_zcopy; /* number of zero copy operations */
u_char bd_compat32; /* 32-bit stream on LP64 system */
+
+ volatile u_int bd_refcnt;
+ struct epoch_context epoch_ctx;
};
/* Values for bd_state */
diff --git a/freebsd/sys/net/bridgestp.c b/freebsd/sys/net/bridgestp.c
index 49e772b3..424f4d69 100644
--- a/freebsd/sys/net/bridgestp.c
+++ b/freebsd/sys/net/bridgestp.c
@@ -2024,6 +2024,7 @@ bstp_same_bridgeid(uint64_t id1, uint64_t id2)
void
bstp_reinit(struct bstp_state *bs)
{
+ struct epoch_tracker et;
struct bstp_port *bp;
struct ifnet *ifp, *mif;
u_char *e_addr;
@@ -2044,7 +2045,7 @@ bstp_reinit(struct bstp_state *bs)
* from is part of this bridge, so we can have more than one independent
* bridges in the same STP domain.
*/
- IFNET_RLOCK_NOSLEEP();
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if (ifp->if_type != IFT_ETHER)
continue; /* Not Ethernet */
@@ -2064,7 +2065,7 @@ bstp_reinit(struct bstp_state *bs)
continue;
}
}
- IFNET_RUNLOCK_NOSLEEP();
+ NET_EPOCH_EXIT(et);
if (mif == NULL)
goto disablestp;
@@ -2275,4 +2276,7 @@ bstp_destroy(struct bstp_port *bp)
taskqueue_drain(taskqueue_swi, &bp->bp_statetask);
taskqueue_drain(taskqueue_swi, &bp->bp_rtagetask);
taskqueue_drain(taskqueue_swi, &bp->bp_mediatask);
+
+ if (bp->bp_bs->bs_root_port == bp)
+ bstp_assign_roles(bp->bp_bs);
}
diff --git a/freebsd/sys/net/ethernet.h b/freebsd/sys/net/ethernet.h
index fa75c1df..7ceb9b80 100644
--- a/freebsd/sys/net/ethernet.h
+++ b/freebsd/sys/net/ethernet.h
@@ -401,6 +401,8 @@ struct ether_vlan_header {
#ifdef _KERNEL
+#include <sys/_eventhandler.h>
+
struct ifnet;
struct mbuf;
struct route;
@@ -422,12 +424,11 @@ void ether_vlan_mtap(struct bpf_if *, struct mbuf *,
struct mbuf *ether_vlanencap(struct mbuf *, uint16_t);
bool ether_8021q_frame(struct mbuf **mp, struct ifnet *ife, struct ifnet *p,
uint16_t vid, uint8_t pcp);
+void ether_gen_addr(struct ifnet *ifp, struct ether_addr *hwaddr);
-#ifdef _SYS_EVENTHANDLER_H_
/* new ethernet interface attached event */
typedef void (*ether_ifattach_event_handler_t)(void *, struct ifnet *);
EVENTHANDLER_DECLARE(ether_ifattach_event, ether_ifattach_event_handler_t);
-#endif
#else /* _KERNEL */
diff --git a/freebsd/sys/net/ieee8023ad_lacp.c b/freebsd/sys/net/ieee8023ad_lacp.c
index 9a70d6a1..46076a23 100644
--- a/freebsd/sys/net/ieee8023ad_lacp.c
+++ b/freebsd/sys/net/ieee8023ad_lacp.c
@@ -34,6 +34,7 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <rtems/bsd/local/opt_kern_tls.h>
#include <rtems/bsd/local/opt_ratelimit.h>
#include <sys/param.h>
@@ -837,7 +838,9 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
struct lacp_softc *lsc = LACP_SOFTC(sc);
struct lacp_portmap *pm;
struct lacp_port *lp;
+ struct lacp_port **map;
uint32_t hash;
+ int count;
if (__predict_false(lsc->lsc_suppress_distributing)) {
LACP_DPRINTF((NULL, "%s: waiting transit\n", __func__));
@@ -850,13 +853,31 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
return (NULL);
}
+#ifdef NUMA
+ if ((sc->sc_opts & LAGG_OPT_USE_NUMA) &&
+ pm->pm_num_dom > 1 && m->m_pkthdr.numa_domain < MAXMEMDOM) {
+ count = pm->pm_numa[m->m_pkthdr.numa_domain].count;
+ if (count > 0) {
+ map = pm->pm_numa[m->m_pkthdr.numa_domain].map;
+ } else {
+ /* No ports on this domain; use global hash. */
+ map = pm->pm_map;
+ count = pm->pm_count;
+ }
+ } else
+#endif
+ {
+ map = pm->pm_map;
+ count = pm->pm_count;
+ }
if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
hash = m->m_pkthdr.flowid >> sc->flowid_shift;
else
hash = m_ether_tcpip_hash(sc->sc_flags, m, lsc->lsc_hashkey);
- hash %= pm->pm_count;
- lp = pm->pm_map[hash];
+
+ hash %= count;
+ lp = map[hash];
KASSERT((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0,
("aggregated port is not distributing"));
@@ -864,7 +885,7 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
return (lp->lp_lagg);
}
-#ifdef RATELIMIT
+#if defined(RATELIMIT) || defined(KERN_TLS)
struct lagg_port *
lacp_select_tx_port_by_hash(struct lagg_softc *sc, uint32_t flowid)
{
@@ -1046,6 +1067,10 @@ lacp_update_portmap(struct lacp_softc *lsc)
uint64_t speed;
u_int newmap;
int i;
+#ifdef NUMA
+ int count;
+ uint8_t domain;
+#endif
newmap = lsc->lsc_activemap == 0 ? 1 : 0;
p = &lsc->lsc_pmap[newmap];
@@ -1056,9 +1081,25 @@ lacp_update_portmap(struct lacp_softc *lsc)
if (la != NULL && la->la_nports > 0) {
p->pm_count = la->la_nports;
i = 0;
- TAILQ_FOREACH(lp, &la->la_ports, lp_dist_q)
+ TAILQ_FOREACH(lp, &la->la_ports, lp_dist_q) {
p->pm_map[i++] = lp;
+#ifdef NUMA
+ domain = lp->lp_ifp->if_numa_domain;
+ if (domain >= MAXMEMDOM)
+ continue;
+ count = p->pm_numa[domain].count;
+ p->pm_numa[domain].map[count] = lp;
+ p->pm_numa[domain].count++;
+#endif
+ }
KASSERT(i == p->pm_count, ("Invalid port count"));
+
+#ifdef NUMA
+ for (i = 0; i < MAXMEMDOM; i++) {
+ if (p->pm_numa[i].count != 0)
+ p->pm_num_dom++;
+ }
+#endif
speed = lacp_aggregator_bandwidth(la);
}
sc->sc_ifp->if_baudrate = speed;
diff --git a/freebsd/sys/net/ieee8023ad_lacp.h b/freebsd/sys/net/ieee8023ad_lacp.h
index 5ae48ceb..b6a0860f 100644
--- a/freebsd/sys/net/ieee8023ad_lacp.h
+++ b/freebsd/sys/net/ieee8023ad_lacp.h
@@ -197,8 +197,15 @@ enum lacp_mux_state {
#define LACP_MAX_PORTS 32
+struct lacp_numa {
+ int count;
+ struct lacp_port *map[LACP_MAX_PORTS];
+};
+
struct lacp_portmap {
int pm_count;
+ int pm_num_dom;
+ struct lacp_numa pm_numa[MAXMEMDOM];
struct lacp_port *pm_map[LACP_MAX_PORTS];
};
@@ -286,7 +293,7 @@ struct lacp_softc {
struct mbuf *lacp_input(struct lagg_port *, struct mbuf *);
struct lagg_port *lacp_select_tx_port(struct lagg_softc *, struct mbuf *);
-#ifdef RATELIMIT
+#if defined(RATELIMIT) || defined(KERN_TLS)
struct lagg_port *lacp_select_tx_port_by_hash(struct lagg_softc *, uint32_t);
#endif
void lacp_attach(struct lagg_softc *);
diff --git a/freebsd/sys/net/ieee_oui.h b/freebsd/sys/net/ieee_oui.h
new file mode 100644
index 00000000..068328d8
--- /dev/null
+++ b/freebsd/sys/net/ieee_oui.h
@@ -0,0 +1,85 @@
+/* -
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2013 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ * Author: George V. Neville-Neil
+ *
+ */
+
+/* Organizationally Unique Identifier assigned by IEEE 14 Nov 2013 */
+#define OUI_FREEBSD_BASE 0x589cfc000000
+#define OUI_FREEBSD(nic) (OUI_FREEBSD_BASE | (nic))
+
+/*
+ * OUIs are most often used to uniquely identify network interfaces
+ * and occupy the first 3 bytes of both destination and source MAC
+ * addresses. The following allocations exist so that various
+ * software systems associated with FreeBSD can have unique IDs in the
+ * absence of hardware. The use of OUIs for this purpose is not fully
+ * fleshed out but is now in common use in virtualization technology.
+ *
+ * Allocations from this range are expected to be made using COMMON
+ * SENSE by developers. Do NOT take a large range just because
+ * they're currently wide open. Take the smallest useful range for
+ * your system. We have (2^24 - 2) available addresses (see Reserved
+ * Values below) but that is far from infinite.
+ *
+ * In the event of a conflict arbitration of allocation in this file
+ * is subject to core@ approval.
+ *
+ * Applications are differentiated based on the high order bit(s) of
+ * the remaining three bytes. Our first allocation has all 0s, the
+ * next allocation has the highest bit set. Allocating in this way
+ * gives us 254 allocations of 64K addresses. Address blocks can be
+ * concatenated if necessary.
+ *
+ * Reserved Values: 0x000000 and 0xffffff are reserved and MUST NOT BE
+ * allocated for any reason.
+ */
+
+/* Allocate 20 bits to bhyve */
+#define OUI_FREEBSD_BHYVE_LOW OUI_FREEBSD(0x000001)
+#define OUI_FREEBSD_BHYVE_HIGH OUI_FREEBSD(0x0fffff)
+
+/*
+ * Allocate 16 bits for a pool to give to various interfaces that need a
+ * generated address, but don't quite need to slice off a whole section of
+ * the OUI (e.g. cloned interfaces, one-off NICs of various vendors).
+ *
+ * ether_gen_addr should be used to generate an address from this pool.
+ */
+#define OUI_FREEBSD_GENERATED_MASK 0x10ffff
+#define OUI_FREEBSD_GENERATED_LOW OUI_FREEBSD(0x100000)
+#define OUI_FREEBSD_GENERATED_HIGH OUI_FREEBSD(OUI_FREEBSD_GENERATED_MASK)
+
+/* Allocate 16 bits for emulated NVMe devices */
+#define OUI_FREEBSD_NVME_MASK 0x20ffff
+#define OUI_FREEBSD_NVME_LOW OUI_FREEBSD(0x200000)
+#define OUI_FREEBSD_NVME_HIGH OUI_FREEBSD(OUI_FREEBSD_NVME_MASK)
diff --git a/freebsd/sys/net/if.c b/freebsd/sys/net/if.c
index 9d233444..c1fd928e 100644
--- a/freebsd/sys/net/if.c
+++ b/freebsd/sys/net/if.c
@@ -38,9 +38,10 @@
#include <rtems/bsd/local/opt_inet.h>
#include <sys/param.h>
-#include <sys/types.h>
#include <sys/conf.h>
+#include <sys/eventhandler.h>
#include <sys/malloc.h>
+#include <sys/domainset.h>
#include <sys/sbuf.h>
#include <sys/bus.h>
#include <sys/epoch.h>
@@ -175,14 +176,14 @@ struct ifmediareq32 {
#define SIOCGIFXMEDIA32 _IOC_NEWTYPE(SIOCGIFXMEDIA, struct ifmediareq32)
#define _CASE_IOC_IFGROUPREQ_32(cmd) \
- case _IOC_NEWTYPE((cmd), struct ifgroupreq32):
+ _IOC_NEWTYPE((cmd), struct ifgroupreq32): case
#else /* !COMPAT_FREEBSD32 */
#define _CASE_IOC_IFGROUPREQ_32(cmd)
#endif /* !COMPAT_FREEBSD32 */
#define CASE_IOC_IFGROUPREQ(cmd) \
_CASE_IOC_IFGROUPREQ_32(cmd) \
- case (cmd)
+ (cmd)
union ifreq_union {
struct ifreq ifr;
@@ -270,7 +271,6 @@ static void if_route(struct ifnet *, int flag, int fam);
static int if_setflag(struct ifnet *, int, int, int *, int);
static int if_transmit(struct ifnet *ifp, struct mbuf *m);
static void if_unroute(struct ifnet *, int flag, int fam);
-static void link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
static int if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int);
static void do_link_state_change(void *, int);
static int if_getgroup(struct ifgroupreq *, struct ifnet *);
@@ -358,16 +358,17 @@ ifnet_byindex(u_short idx)
struct ifnet *
ifnet_byindex_ref(u_short idx)
{
+ struct epoch_tracker et;
struct ifnet *ifp;
- IFNET_RLOCK_NOSLEEP();
+ NET_EPOCH_ENTER(et);
ifp = ifnet_byindex_locked(idx);
if (ifp == NULL || (ifp->if_flags & IFF_DYING)) {
- IFNET_RUNLOCK_NOSLEEP();
+ NET_EPOCH_EXIT(et);
return (NULL);
}
if_ref(ifp);
- IFNET_RUNLOCK_NOSLEEP();
+ NET_EPOCH_EXIT(et);
return (ifp);
}
@@ -431,14 +432,15 @@ ifnet_setbyindex(u_short idx, struct ifnet *ifp)
struct ifaddr *
ifaddr_byindex(u_short idx)
{
+ struct epoch_tracker et;
struct ifnet *ifp;
struct ifaddr *ifa = NULL;
- IFNET_RLOCK_NOSLEEP();
+ NET_EPOCH_ENTER(et);
ifp = ifnet_byindex_locked(idx);
if (ifp != NULL && (ifa = ifp->if_addr) != NULL)
ifa_ref(ifa);
- IFNET_RUNLOCK_NOSLEEP();
+ NET_EPOCH_EXIT(et);
return (ifa);
}
@@ -531,13 +533,23 @@ if_grow(void)
* registered for the passed type.
*/
struct ifnet *
-if_alloc(u_char type)
+if_alloc_domain(u_char type, int numa_domain)
{
struct ifnet *ifp;
u_short idx;
void *old;
- ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO);
+#ifndef __rtems__
+ KASSERT(numa_domain <= IF_NODOM, ("numa_domain too large"));
+ if (numa_domain == IF_NODOM)
+#endif /* __rtems__ */
+ ifp = malloc(sizeof(struct ifnet), M_IFNET,
+ M_WAITOK | M_ZERO);
+#ifndef __rtems__
+ else
+ ifp = malloc_domainset(sizeof(struct ifnet), M_IFNET,
+ DOMAINSET_PREF(numa_domain), M_WAITOK | M_ZERO);
+#endif /* __rtems__ */
restart:
IFNET_WLOCK();
idx = ifindex_alloc(&old);
@@ -552,6 +564,9 @@ if_alloc(u_char type)
ifp->if_index = idx;
ifp->if_type = type;
ifp->if_alloctype = type;
+#ifndef __rtems__
+ ifp->if_numa_domain = numa_domain;
+#endif /* __rtems__ */
#ifdef VIMAGE
ifp->if_vnet = curvnet;
#endif
@@ -585,6 +600,22 @@ if_alloc(u_char type)
return (ifp);
}
+struct ifnet *
+if_alloc_dev(u_char type, device_t dev)
+{
+ int numa_domain;
+
+ if (dev == NULL || bus_get_domain(dev, &numa_domain) != 0)
+ return (if_alloc_domain(type, IF_NODOM));
+ return (if_alloc_domain(type, numa_domain));
+}
+
+struct ifnet *
+if_alloc(u_char type)
+{
+
+ return (if_alloc_domain(type, IF_NODOM));
+}
/*
* Do the actual work of freeing a struct ifnet, and layer 2 common
* structure. This call is made when the last reference to an
@@ -613,7 +644,14 @@ if_free_internal(struct ifnet *ifp)
free(ifp->if_description, M_IFDESCR);
free(ifp->if_hw_addr, M_IFADDR);
- free(ifp, M_IFNET);
+#ifndef __rtems__
+ if (ifp->if_numa_domain == IF_NODOM)
+#endif /* __rtems__ */
+ free(ifp, M_IFNET);
+#ifndef __rtems__
+ else
+ free_domain(ifp, M_IFNET);
+#endif /* __rtems__ */
}
static void
@@ -840,7 +878,6 @@ if_attach_internal(struct ifnet *ifp, int vmove, struct if_clone *ifc)
sdl->sdl_type = ifp->if_type;
ifp->if_addr = ifa;
ifa->ifa_ifp = ifp;
- ifa->ifa_rtrequest = link_rtrequest;
ifa->ifa_addr = (struct sockaddr *)sdl;
sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
ifa->ifa_netmask = (struct sockaddr *)sdl;
@@ -976,12 +1013,14 @@ if_purgeaddrs(struct ifnet *ifp)
struct ifaddr *ifa;
while (1) {
- NET_EPOCH_ENTER();
+ struct epoch_tracker et;
+
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_LINK)
break;
}
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
if (ifa == NULL)
break;
@@ -1107,6 +1146,15 @@ if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp)
curvnet->vnet_ifcnt--;
#endif
epoch_wait_preempt(net_epoch_preempt);
+
+ /*
+ * Ensure all pending EPOCH(9) callbacks have been executed. This
+ * fixes issues about late destruction of multicast options
+ * which lead to leave group calls, which in turn access the
+ * belonging ifnet structure:
+ */
+ epoch_drain_callbacks(net_epoch_preempt);
+
/*
* In any case (destroy or vmove) detach us from the groups
* and remove/wait for pending events on the taskq.
@@ -1618,38 +1666,39 @@ ifgr_groups_get(void *ifgrp)
static int
if_getgroup(struct ifgroupreq *ifgr, struct ifnet *ifp)
{
+ struct epoch_tracker et;
int len, error;
struct ifg_list *ifgl;
struct ifg_req ifgrq, *ifgp;
if (ifgr->ifgr_len == 0) {
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
ifgr->ifgr_len += sizeof(struct ifg_req);
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return (0);
}
len = ifgr->ifgr_len;
ifgp = ifgr_groups_get(ifgr);
/* XXX: wire */
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
if (len < sizeof(ifgrq)) {
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return (EINVAL);
}
bzero(&ifgrq, sizeof ifgrq);
strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
sizeof(ifgrq.ifgrq_group));
if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return (error);
}
len -= sizeof(ifgrq);
ifgp++;
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return (0);
}
@@ -1869,6 +1918,7 @@ static int
ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa,
struct sockaddr *ia)
{
+ struct epoch_tracker et;
int error;
struct rt_addrinfo info;
struct sockaddr_dl null_sdl;
@@ -1879,6 +1929,16 @@ ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa,
bzero(&info, sizeof(info));
if (cmd != RTM_DELETE)
info.rti_ifp = V_loif;
+ if (cmd == RTM_ADD) {
+ /* explicitly specify (loopback) ifa */
+ if (info.rti_ifp != NULL) {
+ NET_EPOCH_ENTER(et);
+ info.rti_ifa = ifaof_ifpforaddr(ifa->ifa_addr, info.rti_ifp);
+ if (info.rti_ifa != NULL)
+ ifa_ref(info.rti_ifa);
+ NET_EPOCH_EXIT(et);
+ }
+ }
info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC | RTF_PINNED;
info.rti_info[RTAX_DST] = ia;
info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl;
@@ -1963,11 +2023,12 @@ done:
int
ifa_ifwithaddr_check(const struct sockaddr *addr)
{
+ struct epoch_tracker et;
int rc;
- NET_EPOCH_ENTER();
+ NET_EPOCH_ENTER(et);
rc = (ifa_ifwithaddr(addr) != NULL);
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
return (rc);
}
@@ -2057,9 +2118,7 @@ ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum)
/*
* Scan though each interface, looking for ones that have addresses
- * in this address family and the requested fib. Maintain a reference
- * on ifa_maybe once we find one, as we release the IF_ADDR_RLOCK() that
- * kept it stable when we move onto the next interface.
+ * in this address family and the requested fib.
*/
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
@@ -2188,38 +2247,6 @@ ifa_preferred(struct ifaddr *cur, struct ifaddr *next)
((*carp_master_p)(next) && !(*carp_master_p)(cur))));
}
-#include <net/if_llatbl.h>
-
-/*
- * Default action when installing a route with a Link Level gateway.
- * Lookup an appropriate real ifa to point to.
- * This should be moved to /sys/net/link.c eventually.
- */
-static void
-link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
-{
- struct ifaddr *ifa, *oifa;
- struct sockaddr *dst;
- struct ifnet *ifp;
-
- if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == NULL) ||
- ((ifp = ifa->ifa_ifp) == NULL) || ((dst = rt_key(rt)) == NULL))
- return;
- NET_EPOCH_ENTER();
- ifa = ifaof_ifpforaddr(dst, ifp);
- if (ifa) {
- oifa = rt->rt_ifa;
- if (oifa != ifa) {
- ifa_free(oifa);
- ifa_ref(ifa);
- }
- rt->rt_ifa = ifa;
- if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
- ifa->ifa_rtrequest(cmd, rt, info);
- }
- NET_EPOCH_EXIT();
-}
-
struct sockaddr_dl *
link_alloc_sdl(size_t size, int flags)
{
@@ -2418,9 +2445,10 @@ if_qflush(struct ifnet *ifp)
struct ifnet *
ifunit_ref(const char *name)
{
+ struct epoch_tracker et;
struct ifnet *ifp;
- IFNET_RLOCK_NOSLEEP();
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0 &&
!(ifp->if_flags & IFF_DYING))
@@ -2428,21 +2456,22 @@ ifunit_ref(const char *name)
}
if (ifp != NULL)
if_ref(ifp);
- IFNET_RUNLOCK_NOSLEEP();
+ NET_EPOCH_EXIT(et);
return (ifp);
}
struct ifnet *
ifunit(const char *name)
{
+ struct epoch_tracker et;
struct ifnet *ifp;
- IFNET_RLOCK_NOSLEEP();
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
break;
}
- IFNET_RUNLOCK_NOSLEEP();
+ NET_EPOCH_EXIT(et);
return (ifp);
}
@@ -2706,6 +2735,8 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
if (strlen(new_name) == IFNAMSIZ-1)
return (EINVAL);
}
+ if (strcmp(new_name, ifp->if_xname) == 0)
+ break;
if (ifunit(new_name) != NULL)
return (EEXIST);
@@ -2830,6 +2861,7 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
return (EINVAL);
if (cmd == SIOCADDMULTI) {
+ struct epoch_tracker et;
struct ifmultiaddr *ifma;
/*
@@ -2839,9 +2871,9 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
* lose a race while we check if the membership
* already exists.
*/
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
ifma = if_findmulti(ifp, &ifr->ifr_addr);
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
if (ifma != NULL)
error = EADDRINUSE;
else
@@ -2878,6 +2910,7 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
case SIOCGIFGENERIC:
case SIOCGIFRSSKEY:
case SIOCGIFRSSHASH:
+ case SIOCGIFDOWNREASON:
if (ifp->if_ioctl == NULL)
return (EOPNOTSUPP);
error = (*ifp->if_ioctl)(ifp, cmd, data);
@@ -2895,7 +2928,7 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
error = if_gethwaddr(ifp, ifr);
break;
- CASE_IOC_IFGROUPREQ(SIOCAIFGROUP):
+ case CASE_IOC_IFGROUPREQ(SIOCAIFGROUP):
error = priv_check(td, PRIV_NET_ADDIFGROUP);
if (error)
return (error);
@@ -2904,12 +2937,12 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
return (error);
break;
- CASE_IOC_IFGROUPREQ(SIOCGIFGROUP):
+ case CASE_IOC_IFGROUPREQ(SIOCGIFGROUP):
if ((error = if_getgroup((struct ifgroupreq *)data, ifp)))
return (error);
break;
- CASE_IOC_IFGROUPREQ(SIOCDIFGROUP):
+ case CASE_IOC_IFGROUPREQ(SIOCDIFGROUP):
error = priv_check(td, PRIV_NET_DELIFGROUP);
if (error)
return (error);
@@ -3080,7 +3113,7 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
error = if_clone_list((struct if_clonereq *)data);
goto out_noref;
- CASE_IOC_IFGROUPREQ(SIOCGIFGMEMB):
+ case CASE_IOC_IFGROUPREQ(SIOCGIFGMEMB):
error = if_getgroupmembers((struct ifgroupreq *)data);
goto out_noref;
@@ -3280,6 +3313,7 @@ again:
IFNET_RLOCK();
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ struct epoch_tracker et;
int addrs;
/*
@@ -3296,7 +3330,7 @@ again:
}
addrs = 0;
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
struct sockaddr *sa = ifa->ifa_addr;
@@ -3324,7 +3358,7 @@ again:
if (sbuf_error(sb) == 0)
valid_len = sbuf_len(sb);
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
if (addrs == 0) {
sbuf_bcat(sb, &ifr, sizeof(ifr));
max_len += sizeof(ifr);
@@ -3631,15 +3665,16 @@ if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
struct ifmultiaddr *ifma;
int lastref;
#ifdef INVARIANTS
+ struct epoch_tracker et;
struct ifnet *oifp;
- IFNET_RLOCK_NOSLEEP();
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(oifp, &V_ifnet, if_link)
if (ifp == oifp)
break;
if (ifp != oifp)
ifp = NULL;
- IFNET_RUNLOCK_NOSLEEP();
+ NET_EPOCH_EXIT(et);
KASSERT(ifp != NULL, ("%s: ifnet went away", __func__));
#endif
@@ -3705,15 +3740,16 @@ if_delmulti_ifma_flags(struct ifmultiaddr *ifma, int flags)
if (ifp == NULL) {
printf("%s: ifma_ifp seems to be detached\n", __func__);
} else {
+ struct epoch_tracker et;
struct ifnet *oifp;
- IFNET_RLOCK_NOSLEEP();
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(oifp, &V_ifnet, if_link)
if (ifp == oifp)
break;
if (ifp != oifp)
ifp = NULL;
- IFNET_RUNLOCK_NOSLEEP();
+ NET_EPOCH_EXIT(et);
}
#endif
/*
@@ -3837,10 +3873,11 @@ if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
struct sockaddr_dl *sdl;
struct ifaddr *ifa;
struct ifreq ifr;
+ struct epoch_tracker et;
int rc;
rc = 0;
- NET_EPOCH_ENTER();
+ NET_EPOCH_ENTER(et);
ifa = ifp->if_addr;
if (ifa == NULL) {
rc = EINVAL;
@@ -3874,7 +3911,7 @@ if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
* to re-init it in order to reprogram its
* address filter.
*/
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
if ((ifp->if_flags & IFF_UP) != 0) {
if (ifp->if_ioctl) {
ifp->if_flags &= ~IFF_UP;
@@ -3890,7 +3927,7 @@ if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
EVENTHANDLER_INVOKE(iflladdr_event, ifp);
return (0);
out:
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
return (rc);
}
@@ -4305,6 +4342,8 @@ if_getsoftc(if_t ifp)
void
if_setrcvif(struct mbuf *m, if_t ifp)
{
+
+ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
m->m_pkthdr.rcvif = (struct ifnet *)ifp;
}
diff --git a/freebsd/sys/net/if_arp.h b/freebsd/sys/net/if_arp.h
index 070dbafe..f4c3bec2 100644
--- a/freebsd/sys/net/if_arp.h
+++ b/freebsd/sys/net/if_arp.h
@@ -105,8 +105,9 @@ struct arpstat {
uint64_t rxrequests; /* # of ARP requests received by this host. */
uint64_t rxreplies; /* # of ARP replies received by this host. */
uint64_t received; /* # of ARP packets received by this host. */
+ uint64_t txerrors; /* # of ARP requests failed to send. */
- uint64_t arp_spares[4]; /* For either the upper or lower half. */
+ uint64_t arp_spares[3]; /* For either the upper or lower half. */
/* Abnormal event and error counting: */
uint64_t dropped; /* # of packets dropped waiting for a reply. */
uint64_t timeouts; /* # of times with entries removed */
diff --git a/freebsd/sys/net/if_bridge.c b/freebsd/sys/net/if_bridge.c
index aa56be48..18e0e7bf 100644
--- a/freebsd/sys/net/if_bridge.c
+++ b/freebsd/sys/net/if_bridge.c
@@ -228,7 +228,7 @@ struct bridge_softc {
struct bstp_state sc_stp; /* STP state */
uint32_t sc_brtexceeded; /* # of cache drops */
struct ifnet *sc_ifaddr; /* member mac copied from */
- u_char sc_defaddr[6]; /* Default MAC address */
+ struct ether_addr sc_defaddr; /* Default MAC address */
};
VNET_DEFINE_STATIC(struct mtx, bridge_list_mtx);
@@ -237,7 +237,8 @@ static eventhandler_tag bridge_detach_cookie;
int bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
-uma_zone_t bridge_rtnode_zone;
+VNET_DEFINE_STATIC(uma_zone_t, bridge_rtnode_zone);
+#define V_bridge_rtnode_zone VNET(bridge_rtnode_zone)
static int bridge_clone_create(struct if_clone *, int, caddr_t);
static void bridge_clone_destroy(struct ifnet *);
@@ -529,6 +530,9 @@ static void
vnet_bridge_init(const void *unused __unused)
{
+ V_bridge_rtnode_zone = uma_zcreate("bridge_rtnode",
+ sizeof(struct bridge_rtnode), NULL, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, 0);
BRIDGE_LIST_LOCK_INIT();
LIST_INIT(&V_bridge_list);
V_bridge_cloner = if_clone_simple(bridge_name,
@@ -544,6 +548,7 @@ vnet_bridge_uninit(const void *unused __unused)
if_clone_detach(V_bridge_cloner);
V_bridge_cloner = NULL;
BRIDGE_LIST_LOCK_DESTROY();
+ uma_zdestroy(V_bridge_rtnode_zone);
}
VNET_SYSUNINIT(vnet_bridge_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY,
vnet_bridge_uninit, NULL);
@@ -554,9 +559,6 @@ bridge_modevent(module_t mod, int type, void *data)
switch (type) {
case MOD_LOAD:
- bridge_rtnode_zone = uma_zcreate("bridge_rtnode",
- sizeof(struct bridge_rtnode), NULL, NULL, NULL, NULL,
- UMA_ALIGN_PTR, 0);
bridge_dn_p = bridge_dummynet;
bridge_detach_cookie = EVENTHANDLER_REGISTER(
ifnet_departure_event, bridge_ifdetach, NULL,
@@ -565,7 +567,6 @@ bridge_modevent(module_t mod, int type, void *data)
case MOD_UNLOAD:
EVENTHANDLER_DEREGISTER(ifnet_departure_event,
bridge_detach_cookie);
- uma_zdestroy(bridge_rtnode_zone);
bridge_dn_p = NULL;
break;
default:
@@ -672,16 +673,14 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params)
getcredhostid(curthread->td_ucred, &hostid);
do {
if (fb || hostid == 0) {
- arc4rand(sc->sc_defaddr, ETHER_ADDR_LEN, 1);
- sc->sc_defaddr[0] &= ~1;/* clear multicast bit */
- sc->sc_defaddr[0] |= 2; /* set the LAA bit */
+ ether_gen_addr(ifp, &sc->sc_defaddr);
} else {
- sc->sc_defaddr[0] = 0x2;
- sc->sc_defaddr[1] = (hostid >> 24) & 0xff;
- sc->sc_defaddr[2] = (hostid >> 16) & 0xff;
- sc->sc_defaddr[3] = (hostid >> 8 ) & 0xff;
- sc->sc_defaddr[4] = hostid & 0xff;
- sc->sc_defaddr[5] = ifp->if_dunit & 0xff;
+ sc->sc_defaddr.octet[0] = 0x2;
+ sc->sc_defaddr.octet[1] = (hostid >> 24) & 0xff;
+ sc->sc_defaddr.octet[2] = (hostid >> 16) & 0xff;
+ sc->sc_defaddr.octet[3] = (hostid >> 8 ) & 0xff;
+ sc->sc_defaddr.octet[4] = hostid & 0xff;
+ sc->sc_defaddr.octet[5] = ifp->if_dunit & 0xff;
}
fb = 1;
@@ -689,7 +688,7 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params)
BRIDGE_LIST_LOCK();
LIST_FOREACH(sc2, &V_bridge_list, sc_list) {
bifp = sc2->sc_ifp;
- if (memcmp(sc->sc_defaddr,
+ if (memcmp(sc->sc_defaddr.octet,
IF_LLADDR(bifp), ETHER_ADDR_LEN) == 0) {
retry = 1;
break;
@@ -699,7 +698,7 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params)
} while (retry == 1);
bstp_attach(&sc->sc_stp, &bridge_ops);
- ether_ifattach(ifp, sc->sc_defaddr);
+ ether_ifattach(ifp, sc->sc_defaddr.octet);
/* Now undo some of the damage... */
ifp->if_baudrate = 0;
ifp->if_type = IFT_BRIDGE;
@@ -734,6 +733,9 @@ bridge_clone_destroy(struct ifnet *ifp)
bridge_delete_span(sc, bif);
}
+ /* Tear down the routing table. */
+ bridge_rtable_fini(sc);
+
BRIDGE_UNLOCK(sc);
callout_drain(&sc->sc_brcallout);
@@ -746,9 +748,6 @@ bridge_clone_destroy(struct ifnet *ifp)
ether_ifdetach(ifp);
if_free(ifp);
- /* Tear down the routing table. */
- bridge_rtable_fini(sc);
-
BRIDGE_LOCK_DESTROY(sc);
free(sc, M_DEVBUF);
}
@@ -927,7 +926,7 @@ bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
{
struct ifnet *ifp = bif->bif_ifp;
struct ifreq ifr;
- int error;
+ int error, mask, stuck;
BRIDGE_UNLOCK_ASSERT(sc);
@@ -940,10 +939,12 @@ bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
if_printf(sc->sc_ifp,
"error setting capabilities on %s: %d\n",
ifp->if_xname, error);
- if ((ifp->if_capenable & ~set) != 0)
+ mask = BRIDGE_IFCAPS_MASK | BRIDGE_IFCAPS_STRIP;
+ stuck = ifp->if_capenable & mask & ~set;
+ if (stuck != 0)
if_printf(sc->sc_ifp,
"can't disable some capabilities on %s: 0x%x\n",
- ifp->if_xname, ifp->if_capenable & ~set);
+ ifp->if_xname, stuck);
}
}
@@ -1018,7 +1019,7 @@ bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
*/
if (V_bridge_inherit_mac && sc->sc_ifaddr == ifs) {
if (LIST_EMPTY(&sc->sc_iflist)) {
- bcopy(sc->sc_defaddr,
+ bcopy(&sc->sc_defaddr,
IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
sc->sc_ifaddr = NULL;
} else {
@@ -1189,7 +1190,7 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
* the default randomly generated one.
*/
if (V_bridge_inherit_mac && LIST_EMPTY(&sc->sc_iflist) &&
- !memcmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr, ETHER_ADDR_LEN)) {
+ !memcmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr.octet, ETHER_ADDR_LEN)) {
bcopy(IF_LLADDR(ifs), IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
sc->sc_ifaddr = ifs;
EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
@@ -1972,9 +1973,9 @@ bridge_dummynet(struct mbuf *m, struct ifnet *ifp)
return;
}
- if (PFIL_HOOKED(&V_inet_pfil_hook)
+ if (PFIL_HOOKED_OUT(V_inet_pfil_head)
#ifdef INET6
- || PFIL_HOOKED(&V_inet6_pfil_hook)
+ || PFIL_HOOKED_OUT(V_inet6_pfil_head)
#endif
) {
if (bridge_pfil(&m, sc->sc_ifp, ifp, PFIL_OUT) != 0)
@@ -2001,7 +2002,7 @@ bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
struct rtentry *rt)
{
struct ether_header *eh;
- struct ifnet *dst_if;
+ struct ifnet *bifp, *dst_if;
struct bridge_softc *sc;
uint16_t vlan;
@@ -2016,13 +2017,14 @@ bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
vlan = VLANTAGOF(m);
BRIDGE_LOCK(sc);
+ bifp = sc->sc_ifp;
/*
* If bridge is down, but the original output interface is up,
* go ahead and send out that interface. Otherwise, the packet
* is dropped below.
*/
- if ((sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+ if ((bifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
dst_if = ifp;
goto sendunicast;
}
@@ -2035,6 +2037,9 @@ bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
dst_if = NULL;
else
dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan);
+ /* Tap any traffic not passing back out the originating interface */
+ if (dst_if != ifp)
+ ETHER_BPF_MTAP(bifp, m);
if (dst_if == NULL) {
struct bridge_iflist *bif;
struct mbuf *mc;
@@ -2072,7 +2077,7 @@ bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
} else {
mc = m_copypacket(m, M_NOWAIT);
if (mc == NULL) {
- if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
+ if_inc_counter(bifp, IFCOUNTER_OERRORS, 1);
continue;
}
}
@@ -2232,9 +2237,9 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
ETHER_BPF_MTAP(ifp, m);
/* run the packet filter */
- if (PFIL_HOOKED(&V_inet_pfil_hook)
+ if (PFIL_HOOKED_IN(V_inet_pfil_head)
#ifdef INET6
- || PFIL_HOOKED(&V_inet6_pfil_hook)
+ || PFIL_HOOKED_IN(V_inet6_pfil_head)
#endif
) {
BRIDGE_UNLOCK(sc);
@@ -2272,9 +2277,9 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
BRIDGE_UNLOCK(sc);
- if (PFIL_HOOKED(&V_inet_pfil_hook)
+ if (PFIL_HOOKED_OUT(V_inet_pfil_head)
#ifdef INET6
- || PFIL_HOOKED(&V_inet6_pfil_hook)
+ || PFIL_HOOKED_OUT(V_inet6_pfil_head)
#endif
) {
if (bridge_pfil(&m, ifp, dst_if, PFIL_OUT) != 0)
@@ -2411,7 +2416,7 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
#ifdef INET6
# define OR_PFIL_HOOKED_INET6 \
- || PFIL_HOOKED(&V_inet6_pfil_hook)
+ || PFIL_HOOKED_IN(V_inet6_pfil_head)
#else
# define OR_PFIL_HOOKED_INET6
#endif
@@ -2423,22 +2428,6 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
if (memcmp(IF_LLADDR((iface)), eh->ether_dhost, ETHER_ADDR_LEN) == 0 \
OR_CARP_CHECK_WE_ARE_DST((iface)) \
) { \
- if ((iface)->if_type == IFT_BRIDGE) { \
- ETHER_BPF_MTAP(iface, m); \
- if_inc_counter(iface, IFCOUNTER_IPACKETS, 1); \
- if_inc_counter(iface, IFCOUNTER_IBYTES, m->m_pkthdr.len); \
- /* Filter on the physical interface. */ \
- if (V_pfil_local_phys && \
- (PFIL_HOOKED(&V_inet_pfil_hook) \
- OR_PFIL_HOOKED_INET6)) { \
- if (bridge_pfil(&m, NULL, ifp, \
- PFIL_IN) != 0 || m == NULL) { \
- BRIDGE_UNLOCK(sc); \
- return (NULL); \
- } \
- eh = mtod(m, struct ether_header *); \
- } \
- } \
if (bif->bif_flags & IFBIF_LEARNING) { \
error = bridge_rtupdate(sc, eh->ether_shost, \
vlan, bif, 0, IFBAF_DYNAMIC); \
@@ -2449,6 +2438,26 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
} \
} \
m->m_pkthdr.rcvif = iface; \
+ if ((iface) == ifp) { \
+ /* Skip bridge processing... src == dest */ \
+ BRIDGE_UNLOCK(sc); \
+ return (m); \
+ } \
+ /* It's passing over or to the bridge, locally. */ \
+ ETHER_BPF_MTAP(bifp, m); \
+ if_inc_counter(bifp, IFCOUNTER_IPACKETS, 1); \
+ if_inc_counter(bifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); \
+ /* Filter on the physical interface. */ \
+ if (V_pfil_local_phys && (PFIL_HOOKED_IN(V_inet_pfil_head) \
+ OR_PFIL_HOOKED_INET6)) { \
+ if (bridge_pfil(&m, NULL, ifp, \
+ PFIL_IN) != 0 || m == NULL) { \
+ BRIDGE_UNLOCK(sc); \
+ return (NULL); \
+ } \
+ } \
+ if ((iface) != bifp) \
+ ETHER_BPF_MTAP(iface, m); \
BRIDGE_UNLOCK(sc); \
return (m); \
} \
@@ -2519,9 +2528,9 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
}
/* Filter on the bridge interface before broadcasting */
- if (runfilt && (PFIL_HOOKED(&V_inet_pfil_hook)
+ if (runfilt && (PFIL_HOOKED_OUT(V_inet_pfil_head)
#ifdef INET6
- || PFIL_HOOKED(&V_inet6_pfil_hook)
+ || PFIL_HOOKED_OUT(V_inet6_pfil_head)
#endif
)) {
if (bridge_pfil(&m, sc->sc_ifp, NULL, PFIL_OUT) != 0)
@@ -2566,9 +2575,9 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
* pointer so we do not redundantly filter on the bridge for
* each interface we broadcast on.
*/
- if (runfilt && (PFIL_HOOKED(&V_inet_pfil_hook)
+ if (runfilt && (PFIL_HOOKED_OUT(V_inet_pfil_head)
#ifdef INET6
- || PFIL_HOOKED(&V_inet6_pfil_hook)
+ || PFIL_HOOKED_OUT(V_inet6_pfil_head)
#endif
)) {
if (used == 0) {
@@ -2671,7 +2680,7 @@ bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan,
* initialize the expiration time and Ethernet
* address.
*/
- brt = uma_zalloc(bridge_rtnode_zone, M_NOWAIT | M_ZERO);
+ brt = uma_zalloc(V_bridge_rtnode_zone, M_NOWAIT | M_ZERO);
if (brt == NULL)
return (ENOMEM);
@@ -2684,7 +2693,7 @@ bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan,
brt->brt_vlan = vlan;
if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
- uma_zfree(bridge_rtnode_zone, brt);
+ uma_zfree(V_bridge_rtnode_zone, brt);
return (error);
}
brt->brt_dst = bif;
@@ -2768,11 +2777,14 @@ bridge_timer(void *arg)
BRIDGE_LOCK_ASSERT(sc);
+ /* Destruction of rtnodes requires a proper vnet context */
+ CURVNET_SET(sc->sc_ifp->if_vnet);
bridge_rtage(sc);
if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
callout_reset(&sc->sc_brcallout,
bridge_rtable_prune_period * hz, bridge_timer, sc);
+ CURVNET_RESTORE();
}
/*
@@ -3030,7 +3042,7 @@ bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
LIST_REMOVE(brt, brt_list);
sc->sc_brtcnt--;
brt->brt_dst->bif_addrcnt--;
- uma_zfree(bridge_rtnode_zone, brt);
+ uma_zfree(V_bridge_rtnode_zone, brt);
}
/*
@@ -3044,6 +3056,7 @@ bridge_rtable_expire(struct ifnet *ifp, int age)
struct bridge_softc *sc = ifp->if_bridge;
struct bridge_rtnode *brt;
+ CURVNET_SET(ifp->if_vnet);
BRIDGE_LOCK(sc);
/*
@@ -3062,6 +3075,7 @@ bridge_rtable_expire(struct ifnet *ifp, int age)
}
}
BRIDGE_UNLOCK(sc);
+ CURVNET_RESTORE();
}
/*
@@ -3103,6 +3117,7 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
struct ip *ip;
struct llc llc1;
u_int16_t ether_type;
+ pfil_return_t rv;
snap = 0;
error = -1; /* Default error if not error == 0 */
@@ -3174,14 +3189,14 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
}
/* Run the packet through pfil before stripping link headers */
- if (PFIL_HOOKED(&V_link_pfil_hook) && V_pfil_ipfw != 0 &&
- dir == PFIL_OUT && ifp != NULL) {
-
- error = pfil_run_hooks(&V_link_pfil_hook, mp, ifp, dir, 0,
- NULL);
-
- if (*mp == NULL || error != 0) /* packet consumed by filter */
- return (error);
+ if (PFIL_HOOKED_OUT(V_link_pfil_head) && V_pfil_ipfw != 0 &&
+ dir == PFIL_OUT && ifp != NULL) {
+ switch (pfil_run_hooks(V_link_pfil_head, mp, ifp, dir, NULL)) {
+ case PFIL_DROPPED:
+ return (EPERM);
+ case PFIL_CONSUMED:
+ return (0);
+ }
}
/* Strip off the Ethernet header and keep a copy. */
@@ -3219,6 +3234,7 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
/*
* Run the packet through pfil
*/
+ rv = PFIL_PASS;
switch (ether_type) {
case ETHERTYPE_IP:
/*
@@ -3228,25 +3244,19 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
* Keep the order:
* in_if -> bridge_if -> out_if
*/
- if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL)
- error = pfil_run_hooks(&V_inet_pfil_hook, mp, bifp,
- dir, 0, NULL);
-
- if (*mp == NULL || error != 0) /* filter may consume */
+ if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL && (rv =
+ pfil_run_hooks(V_inet_pfil_head, mp, bifp, dir, NULL)) !=
+ PFIL_PASS)
break;
- if (V_pfil_member && ifp != NULL)
- error = pfil_run_hooks(&V_inet_pfil_hook, mp, ifp,
- dir, 0, NULL);
-
- if (*mp == NULL || error != 0) /* filter may consume */
+ if (V_pfil_member && ifp != NULL && (rv =
+ pfil_run_hooks(V_inet_pfil_head, mp, ifp, dir, NULL)) !=
+ PFIL_PASS)
break;
- if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL)
- error = pfil_run_hooks(&V_inet_pfil_hook, mp, bifp,
- dir, 0, NULL);
-
- if (*mp == NULL || error != 0) /* filter may consume */
+ if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL && (rv =
+ pfil_run_hooks(V_inet_pfil_head, mp, bifp, dir, NULL)) !=
+ PFIL_PASS)
break;
/* check if we need to fragment the packet */
@@ -3282,35 +3292,33 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
break;
#ifdef INET6
case ETHERTYPE_IPV6:
- if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL)
- error = pfil_run_hooks(&V_inet6_pfil_hook, mp, bifp,
- dir, 0, NULL);
-
- if (*mp == NULL || error != 0) /* filter may consume */
+ if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL && (rv =
+ pfil_run_hooks(V_inet6_pfil_head, mp, bifp, dir, NULL)) !=
+ PFIL_PASS)
break;
- if (V_pfil_member && ifp != NULL)
- error = pfil_run_hooks(&V_inet6_pfil_hook, mp, ifp,
- dir, 0, NULL);
-
- if (*mp == NULL || error != 0) /* filter may consume */
+ if (V_pfil_member && ifp != NULL && (rv =
+ pfil_run_hooks(V_inet6_pfil_head, mp, ifp, dir, NULL)) !=
+ PFIL_PASS)
break;
- if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL)
- error = pfil_run_hooks(&V_inet6_pfil_hook, mp, bifp,
- dir, 0, NULL);
+ if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL && (rv =
+ pfil_run_hooks(V_inet6_pfil_head, mp, bifp, dir, NULL)) !=
+ PFIL_PASS)
+ break;
break;
#endif
+ }
+
+ switch (rv) {
+ case PFIL_CONSUMED:
+ return (0);
+ case PFIL_DROPPED:
+ return (EPERM);
default:
- error = 0;
break;
}
- if (*mp == NULL)
- return (error);
- if (error != 0)
- goto bad;
-
error = -1;
/*
diff --git a/freebsd/sys/net/if_clone.h b/freebsd/sys/net/if_clone.h
index 5dceacf6..30d604f3 100644
--- a/freebsd/sys/net/if_clone.h
+++ b/freebsd/sys/net/if_clone.h
@@ -37,6 +37,8 @@
#ifdef _KERNEL
+#include <sys/_eventhandler.h>
+
#define IFC_NOGROUP 0x1
struct if_clone;
@@ -65,11 +67,9 @@ const char *ifc_name(struct if_clone *);
void ifc_flags_set(struct if_clone *, int flags);
int ifc_flags_get(struct if_clone *);
-#ifdef _SYS_EVENTHANDLER_H_
/* Interface clone event. */
typedef void (*if_clone_event_handler_t)(void *, struct if_clone *);
EVENTHANDLER_DECLARE(if_clone_event, if_clone_event_handler_t);
-#endif
/* The below interfaces used only by net/if.c. */
void vnet_if_clone_init(void);
diff --git a/freebsd/sys/net/if_dead.c b/freebsd/sys/net/if_dead.c
index 552be13f..ff73ceaf 100644
--- a/freebsd/sys/net/if_dead.c
+++ b/freebsd/sys/net/if_dead.c
@@ -128,6 +128,23 @@ ifdead_snd_tag_free(struct m_snd_tag *pmt)
{
}
+static void
+ifdead_ratelimit_query(struct ifnet *ifp __unused,
+ struct if_ratelimit_query_results *q)
+{
+ /*
+ * This guy does not support
+ * this interface. Not sure
+ * why we would specify a
+ * flag on the interface
+ * that says we do.
+ */
+ q->rate_table = NULL;
+ q->flags = RT_NOSUPPORT;
+ q->max_flows = 0;
+ q->number_of_rates = 0;
+}
+
void
if_dead(struct ifnet *ifp)
{
@@ -144,4 +161,5 @@ if_dead(struct ifnet *ifp)
ifp->if_snd_tag_modify = ifdead_snd_tag_modify;
ifp->if_snd_tag_query = ifdead_snd_tag_query;
ifp->if_snd_tag_free = ifdead_snd_tag_free;
+ ifp->if_ratelimit_query = ifdead_ratelimit_query;
}
diff --git a/freebsd/sys/net/if_enc.c b/freebsd/sys/net/if_enc.c
index ebfbf5cb..9e7fcc53 100644
--- a/freebsd/sys/net/if_enc.c
+++ b/freebsd/sys/net/if_enc.c
@@ -287,24 +287,24 @@ enc_hhook(int32_t hhook_type, int32_t hhook_id, void *udata, void *ctx_data,
switch (hhook_id) {
#ifdef INET
case AF_INET:
- ph = &V_inet_pfil_hook;
+ ph = V_inet_pfil_head;
break;
#endif
#ifdef INET6
case AF_INET6:
- ph = &V_inet6_pfil_hook;
+ ph = V_inet6_pfil_head;
break;
#endif
default:
ph = NULL;
}
- if (ph == NULL || !PFIL_HOOKED(ph))
+ if (ph == NULL || (pdir == PFIL_OUT && !PFIL_HOOKED_OUT(ph)) ||
+ (pdir == PFIL_IN && !PFIL_HOOKED_IN(ph)))
return (0);
/* Make a packet looks like it was received on enc(4) */
rcvif = (*ctx->mp)->m_pkthdr.rcvif;
(*ctx->mp)->m_pkthdr.rcvif = ifp;
- if (pfil_run_hooks(ph, ctx->mp, ifp, pdir, 0, ctx->inp) != 0 ||
- *ctx->mp == NULL) {
+ if (pfil_run_hooks(ph, ctx->mp, ifp, pdir, ctx->inp) != PFIL_PASS) {
*ctx->mp = NULL; /* consumed by filter */
return (EACCES);
}
diff --git a/freebsd/sys/net/if_ethersubr.c b/freebsd/sys/net/if_ethersubr.c
index 96ed309a..6c5c2ccb 100644
--- a/freebsd/sys/net/if_ethersubr.c
+++ b/freebsd/sys/net/if_ethersubr.c
@@ -44,11 +44,13 @@
#include <sys/systm.h>
#include <sys/bus.h>
#include <sys/eventhandler.h>
+#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/mbuf.h>
+#include <sys/proc.h>
#include <sys/priv.h>
#include <sys/random.h>
#include <sys/socket.h>
@@ -56,6 +58,7 @@
#include <sys/sysctl.h>
#include <sys/uuid.h>
+#include <net/ieee_oui.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/if_arp.h>
@@ -87,12 +90,14 @@
#endif
#include <security/mac/mac_framework.h>
+#include <crypto/sha1.h>
+
#ifdef CTASSERT
CTASSERT(sizeof (struct ether_header) == ETHER_ADDR_LEN * 2 + 2);
CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN);
#endif
-VNET_DEFINE(struct pfil_head, link_pfil_hook); /* Packet filter hooks */
+VNET_DEFINE(pfil_head_t, link_pfil_head); /* Packet filter hooks */
/* netgraph node hooks for ng_ether(4) */
void (*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp);
@@ -459,7 +464,6 @@ ether_set_pcp(struct mbuf **mp, struct ifnet *ifp, uint8_t pcp)
int
ether_output_frame(struct ifnet *ifp, struct mbuf *m)
{
- int error;
uint8_t pcp;
pcp = ifp->if_pcp;
@@ -467,27 +471,27 @@ ether_output_frame(struct ifnet *ifp, struct mbuf *m)
!ether_set_pcp(&m, ifp, pcp))
return (0);
- if (PFIL_HOOKED(&V_link_pfil_hook)) {
- error = pfil_run_hooks(&V_link_pfil_hook, &m, ifp,
- PFIL_OUT, 0, NULL);
- if (error != 0)
+ if (PFIL_HOOKED_OUT(V_link_pfil_head))
+ switch (pfil_run_hooks(V_link_pfil_head, &m, ifp, PFIL_OUT,
+ NULL)) {
+ case PFIL_DROPPED:
return (EACCES);
-
- if (m == NULL)
+ case PFIL_CONSUMED:
return (0);
- }
+ }
#ifdef EXPERIMENTAL
#if defined(INET6) && defined(INET)
/* draft-ietf-6man-ipv6only-flag */
- /* Catch ETHERTYPE_IP, and ETHERTYPE_ARP if we are v6-only. */
- if ((ND_IFINFO(ifp)->flags & ND6_IFF_IPV6_ONLY) != 0) {
+ /* Catch ETHERTYPE_IP, and ETHERTYPE_[REV]ARP if we are v6-only. */
+ if ((ND_IFINFO(ifp)->flags & ND6_IFF_IPV6_ONLY_MASK) != 0) {
struct ether_header *eh;
eh = mtod(m, struct ether_header *);
switch (ntohs(eh->ether_type)) {
case ETHERTYPE_IP:
case ETHERTYPE_ARP:
+ case ETHERTYPE_REVARP:
m_freem(m);
return (EAFNOSUPPORT);
/* NOTREACHED */
@@ -538,6 +542,25 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m)
etype = ntohs(eh->ether_type);
random_harvest_queue_ether(m, sizeof(*m));
+#ifdef EXPERIMENTAL
+#if defined(INET6) && defined(INET)
+ /* draft-ietf-6man-ipv6only-flag */
+ /* Catch ETHERTYPE_IP, and ETHERTYPE_[REV]ARP if we are v6-only. */
+ if ((ND_IFINFO(ifp)->flags & ND6_IFF_IPV6_ONLY_MASK) != 0) {
+
+ switch (etype) {
+ case ETHERTYPE_IP:
+ case ETHERTYPE_ARP:
+ case ETHERTYPE_REVARP:
+ m_freem(m);
+ return;
+ /* NOTREACHED */
+ break;
+ };
+ }
+#endif
+#endif
+
CURVNET_SET_QUIET(ifp->if_vnet);
if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
@@ -739,14 +762,14 @@ SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL);
static void
vnet_ether_init(__unused void *arg)
{
- int i;
+ struct pfil_head_args args;
+
+ args.pa_version = PFIL_VERSION;
+ args.pa_flags = PFIL_IN | PFIL_OUT;
+ args.pa_type = PFIL_TYPE_ETHERNET;
+ args.pa_headname = PFIL_ETHER_NAME;
+ V_link_pfil_head = pfil_head_register(&args);
- /* Initialize packet filter hooks. */
- V_link_pfil_hook.ph_type = PFIL_TYPE_AF;
- V_link_pfil_hook.ph_af = AF_LINK;
- if ((i = pfil_head_register(&V_link_pfil_hook)) != 0)
- printf("%s: WARNING: unable to register pfil link hook, "
- "error %d\n", __func__, i);
#ifdef VIMAGE
netisr_register_vnet(&ether_nh);
#endif
@@ -758,11 +781,8 @@ VNET_SYSINIT(vnet_ether_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
static void
vnet_ether_pfil_destroy(__unused void *arg)
{
- int i;
- if ((i = pfil_head_unregister(&V_link_pfil_hook)) != 0)
- printf("%s: WARNING: unable to unregister pfil link hook, "
- "error %d\n", __func__, i);
+ pfil_head_unregister(V_link_pfil_head);
}
VNET_SYSUNINIT(vnet_ether_pfil_uninit, SI_SUB_PROTO_PFIL, SI_ORDER_ANY,
vnet_ether_pfil_destroy, NULL);
@@ -798,6 +818,7 @@ ether_input(struct ifnet *ifp, struct mbuf *m)
* We will rely on rcvif being set properly in the deferred context,
* so assert it is correct here.
*/
+ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch m %p "
"rcvif %p ifp %p", __func__, m, m->m_pkthdr.rcvif, ifp));
CURVNET_SET_QUIET(ifp->if_vnet);
@@ -820,10 +841,8 @@ ether_demux(struct ifnet *ifp, struct mbuf *m)
KASSERT(ifp != NULL, ("%s: NULL interface pointer", __func__));
/* Do not grab PROMISC frames in case we are re-entered. */
- if (PFIL_HOOKED(&V_link_pfil_hook) && !(m->m_flags & M_PROMISC)) {
- i = pfil_run_hooks(&V_link_pfil_hook, &m, ifp, PFIL_IN, 0,
- NULL);
-
+ if (PFIL_HOOKED_IN(V_link_pfil_head) && !(m->m_flags & M_PROMISC)) {
+ i = pfil_run_hooks(V_link_pfil_head, &m, ifp, PFIL_IN, NULL);
if (i != 0 || m == NULL)
return;
}
@@ -1390,5 +1409,38 @@ ether_8021q_frame(struct mbuf **mp, struct ifnet *ife, struct ifnet *p,
return (true);
}
+/*
+ * Allocate an address from the FreeBSD Foundation OUI. This uses a
+ * cryptographic hash function on the containing jail's UUID and the interface
+ * name to attempt to provide a unique but stable address. Pseudo-interfaces
+ * which require a MAC address should use this function to allocate
+ * non-locally-administered addresses.
+ */
+void
+ether_gen_addr(struct ifnet *ifp, struct ether_addr *hwaddr)
+{
+#define ETHER_GEN_ADDR_BUFSIZ HOSTUUIDLEN + IFNAMSIZ + 2
+ SHA1_CTX ctx;
+ char buf[ETHER_GEN_ADDR_BUFSIZ];
+ char uuid[HOSTUUIDLEN + 1];
+ uint64_t addr;
+ int i, sz;
+ char digest[SHA1_RESULTLEN];
+
+ getcredhostuuid(curthread->td_ucred, uuid, sizeof(uuid));
+ sz = snprintf(buf, ETHER_GEN_ADDR_BUFSIZ, "%s-%s", uuid, ifp->if_xname);
+ SHA1Init(&ctx);
+ SHA1Update(&ctx, buf, sz);
+ SHA1Final(digest, &ctx);
+
+ addr = ((digest[0] << 16) | (digest[1] << 8) | digest[2]) &
+ OUI_FREEBSD_GENERATED_MASK;
+ addr = OUI_FREEBSD(addr);
+ for (i = 0; i < ETHER_ADDR_LEN; ++i) {
+ hwaddr->octet[i] = addr >> ((ETHER_ADDR_LEN - i - 1) * 8) &
+ 0xFF;
+ }
+}
+
DECLARE_MODULE(ether, ether_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
MODULE_VERSION(ether, 1);
diff --git a/freebsd/sys/net/if_gre.c b/freebsd/sys/net/if_gre.c
index 4fbc105e..5aeb8266 100644
--- a/freebsd/sys/net/if_gre.c
+++ b/freebsd/sys/net/if_gre.c
@@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
+#include <rtems/bsd/local/opt_rss.h>
#include <sys/param.h>
#include <sys/kernel.h>
@@ -51,6 +52,7 @@ __FBSDID("$FreeBSD$");
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/socket.h>
+#include <sys/socketvar.h>
#include <sys/sockio.h>
#include <sys/sx.h>
#include <sys/sysctl.h>
@@ -67,19 +69,27 @@ __FBSDID("$FreeBSD$");
#include <net/route.h>
#include <netinet/in.h>
+#include <netinet/in_pcb.h>
#ifdef INET
#include <netinet/in_var.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
+#ifdef RSS
+#include <netinet/in_rss.h>
+#endif
#endif
#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/in6_var.h>
#include <netinet6/ip6_var.h>
+#ifdef RSS
+#include <netinet6/in6_rss.h>
+#endif
#endif
#include <netinet/ip_encap.h>
+#include <netinet/udp.h>
#include <net/bpf.h>
#include <net/if_gre.h>
@@ -153,6 +163,7 @@ vnet_gre_uninit(const void *unused __unused)
#ifdef INET6
in6_gre_uninit();
#endif
+ /* XXX: epoch_call drain */
}
VNET_SYSUNINIT(vnet_gre_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
vnet_gre_uninit, NULL);
@@ -272,6 +283,7 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
break;
case GRESKEY:
case GRESOPTS:
+ case GRESPORT:
if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
break;
if ((error = copyin(ifr_data_get_ptr(ifr), &opt,
@@ -287,23 +299,45 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
}
if (sc->gre_options == opt)
break;
+ } else if (cmd == GRESPORT) {
+ if (opt != 0 && (opt < V_ipport_hifirstauto ||
+ opt > V_ipport_hilastauto)) {
+ error = EINVAL;
+ break;
+ }
+ if (sc->gre_port == opt)
+ break;
+ if ((sc->gre_options & GRE_UDPENCAP) == 0) {
+ /*
+ * UDP encapsulation is not enabled, thus
+ * there is no need to reattach softc.
+ */
+ sc->gre_port = opt;
+ break;
+ }
}
switch (sc->gre_family) {
#ifdef INET
case AF_INET:
- in_gre_setopts(sc, cmd, opt);
+ error = in_gre_setopts(sc, cmd, opt);
break;
#endif
#ifdef INET6
case AF_INET6:
- in6_gre_setopts(sc, cmd, opt);
+ error = in6_gre_setopts(sc, cmd, opt);
break;
#endif
default:
+ /*
+ * Tunnel is not yet configured.
+ * We can just change any parameters.
+ */
if (cmd == GRESKEY)
sc->gre_key = opt;
- else
+ if (cmd == GRESOPTS)
sc->gre_options = opt;
+ if (cmd == GRESPORT)
+ sc->gre_port = opt;
break;
}
/*
@@ -319,6 +353,10 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
error = copyout(&sc->gre_options, ifr_data_get_ptr(ifr),
sizeof(sc->gre_options));
break;
+ case GREGPORT:
+ error = copyout(&sc->gre_port, ifr_data_get_ptr(ifr),
+ sizeof(sc->gre_port));
+ break;
default:
error = EINVAL;
break;
@@ -343,6 +381,7 @@ end:
static void
gre_delete_tunnel(struct gre_softc *sc)
{
+ struct gre_socket *gs;
sx_assert(&gre_ioctl_sx, SA_XLOCKED);
if (sc->gre_family != 0) {
@@ -352,6 +391,16 @@ gre_delete_tunnel(struct gre_softc *sc)
free(sc->gre_hdr, M_GRE);
sc->gre_family = 0;
}
+ /*
+ * If this Tunnel was the last one that could use UDP socket,
+ * we should unlink socket from hash table and close it.
+ */
+ if ((gs = sc->gre_so) != NULL && CK_LIST_EMPTY(&gs->list)) {
+ CK_LIST_REMOVE(gs, chain);
+ soclose(gs->so);
+ epoch_call(net_epoch_preempt, &gs->epoch_ctx, gre_sofree);
+ sc->gre_so = NULL;
+ }
GRE2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
if_link_state_change(GRE2IFP(sc), LINK_STATE_DOWN);
}
@@ -378,7 +427,38 @@ gre_hashdestroy(struct gre_list *hash)
}
void
-gre_updatehdr(struct gre_softc *sc, struct grehdr *gh)
+gre_sofree(epoch_context_t ctx)
+{
+ struct gre_socket *gs;
+
+ gs = __containerof(ctx, struct gre_socket, epoch_ctx);
+ free(gs, M_GRE);
+}
+
+static __inline uint16_t
+gre_cksum_add(uint16_t sum, uint16_t a)
+{
+ uint16_t res;
+
+ res = sum + a;
+ return (res + (res < a));
+}
+
+void
+gre_update_udphdr(struct gre_softc *sc, struct udphdr *udp, uint16_t csum)
+{
+
+ sx_assert(&gre_ioctl_sx, SA_XLOCKED);
+ MPASS(sc->gre_options & GRE_UDPENCAP);
+
+ udp->uh_dport = htons(GRE_UDPPORT);
+ udp->uh_sport = htons(sc->gre_port);
+ udp->uh_sum = csum;
+ udp->uh_ulen = 0;
+}
+
+void
+gre_update_hdr(struct gre_softc *sc, struct grehdr *gh)
{
uint32_t *opts;
uint16_t flags;
@@ -545,6 +625,52 @@ gre_setseqn(struct grehdr *gh, uint32_t seq)
*opts = htonl(seq);
}
+static uint32_t
+gre_flowid(struct gre_softc *sc, struct mbuf *m, uint32_t af)
+{
+ uint32_t flowid;
+
+ if ((sc->gre_options & GRE_UDPENCAP) == 0 || sc->gre_port != 0)
+ return (0);
+#ifndef RSS
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ flowid = mtod(m, struct ip *)->ip_src.s_addr ^
+ mtod(m, struct ip *)->ip_dst.s_addr;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ flowid = mtod(m, struct ip6_hdr *)->ip6_src.s6_addr32[3] ^
+ mtod(m, struct ip6_hdr *)->ip6_dst.s6_addr32[3];
+ break;
+#endif
+ default:
+ flowid = 0;
+ }
+#else /* RSS */
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ flowid = rss_hash_ip4_2tuple(mtod(m, struct ip *)->ip_src,
+ mtod(m, struct ip *)->ip_dst);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ flowid = rss_hash_ip6_2tuple(
+ &mtod(m, struct ip6_hdr *)->ip6_src,
+ &mtod(m, struct ip6_hdr *)->ip6_dst);
+ break;
+#endif
+ default:
+ flowid = 0;
+ }
+#endif
+ return (flowid);
+}
+
#define MTAG_GRE 1307983903
static int
gre_transmit(struct ifnet *ifp, struct mbuf *m)
@@ -552,7 +678,8 @@ gre_transmit(struct ifnet *ifp, struct mbuf *m)
GRE_RLOCK_TRACKER;
struct gre_softc *sc;
struct grehdr *gh;
- uint32_t af;
+ struct udphdr *uh;
+ uint32_t af, flowid;
int error, len;
uint16_t proto;
@@ -579,6 +706,7 @@ gre_transmit(struct ifnet *ifp, struct mbuf *m)
af = m->m_pkthdr.csum_data;
BPF_MTAP2(ifp, &af, sizeof(af), m);
m->m_flags &= ~(M_BCAST|M_MCAST);
+ flowid = gre_flowid(sc, m, af);
M_SETFIB(m, sc->gre_fibnum);
M_PREPEND(m, sc->gre_hlen, M_NOWAIT);
if (m == NULL) {
@@ -620,6 +748,19 @@ gre_transmit(struct ifnet *ifp, struct mbuf *m)
error = ENETDOWN;
goto drop;
}
+ if (sc->gre_options & GRE_UDPENCAP) {
+ uh = (struct udphdr *)mtodo(m, len);
+ uh->uh_sport |= htons(V_ipport_hifirstauto) |
+ (flowid >> 16) | (flowid & 0xFFFF);
+ uh->uh_sport = htons(ntohs(uh->uh_sport) %
+ V_ipport_hilastauto);
+ uh->uh_ulen = htons(m->m_pkthdr.len - len);
+ uh->uh_sum = gre_cksum_add(uh->uh_sum,
+ htons(m->m_pkthdr.len - len + IPPROTO_UDP));
+ m->m_pkthdr.csum_flags = sc->gre_csumflags;
+ m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+ len += sizeof(struct udphdr);
+ }
gh = (struct grehdr *)mtodo(m, len);
gh->gre_proto = proto;
if (sc->gre_options & GRE_ENABLE_SEQ)
@@ -637,7 +778,7 @@ gre_transmit(struct ifnet *ifp, struct mbuf *m)
#endif
#ifdef INET6
case AF_INET6:
- error = in6_gre_output(m, af, sc->gre_hlen);
+ error = in6_gre_output(m, af, sc->gre_hlen, flowid);
break;
#endif
default:
diff --git a/freebsd/sys/net/if_gre.h b/freebsd/sys/net/if_gre.h
index 4b93321a..de3c5979 100644
--- a/freebsd/sys/net/if_gre.h
+++ b/freebsd/sys/net/if_gre.h
@@ -53,14 +53,35 @@ struct greip {
struct ip gi_ip;
struct grehdr gi_gre;
} __packed;
-#endif
+
+struct greudp {
+ struct ip gi_ip;
+ struct udphdr gi_udp;
+ struct grehdr gi_gre;
+} __packed;
+#endif /* INET */
#ifdef INET6
struct greip6 {
struct ip6_hdr gi6_ip6;
struct grehdr gi6_gre;
} __packed;
-#endif
+
+struct greudp6 {
+ struct ip6_hdr gi6_ip6;
+ struct udphdr gi6_udp;
+ struct grehdr gi6_gre;
+} __packed;
+#endif /* INET6 */
+
+CK_LIST_HEAD(gre_list, gre_softc);
+CK_LIST_HEAD(gre_sockets, gre_socket);
+struct gre_socket {
+ struct socket *so;
+ struct gre_list list;
+ CK_LIST_ENTRY(gre_socket) chain;
+ struct epoch_context epoch_ctx;
+};
struct gre_softc {
struct ifnet *gre_ifp;
@@ -69,22 +90,26 @@ struct gre_softc {
uint32_t gre_oseq;
uint32_t gre_key;
uint32_t gre_options;
+ uint32_t gre_csumflags;
+ uint32_t gre_port;
u_int gre_fibnum;
u_int gre_hlen; /* header size */
union {
void *hdr;
#ifdef INET
- struct greip *gihdr;
+ struct greip *iphdr;
+ struct greudp *udphdr;
#endif
#ifdef INET6
- struct greip6 *gi6hdr;
+ struct greip6 *ip6hdr;
+ struct greudp6 *udp6hdr;
#endif
} gre_uhdr;
+ struct gre_socket *gre_so;
CK_LIST_ENTRY(gre_softc) chain;
CK_LIST_ENTRY(gre_softc) srchash;
};
-CK_LIST_HEAD(gre_list, gre_softc);
MALLOC_DECLARE(M_GRE);
#ifndef GRE_HASH_SIZE
@@ -98,28 +123,35 @@ MALLOC_DECLARE(M_GRE);
#define GRE_WAIT() epoch_wait_preempt(net_epoch_preempt)
#define gre_hdr gre_uhdr.hdr
-#define gre_gihdr gre_uhdr.gihdr
-#define gre_gi6hdr gre_uhdr.gi6hdr
-#define gre_oip gre_gihdr->gi_ip
-#define gre_oip6 gre_gi6hdr->gi6_ip6
+#define gre_iphdr gre_uhdr.iphdr
+#define gre_ip6hdr gre_uhdr.ip6hdr
+#define gre_udphdr gre_uhdr.udphdr
+#define gre_udp6hdr gre_uhdr.udp6hdr
+
+#define gre_oip gre_iphdr->gi_ip
+#define gre_udp gre_udphdr->gi_udp
+#define gre_oip6 gre_ip6hdr->gi6_ip6
+#define gre_udp6 gre_udp6hdr->gi6_udp
struct gre_list *gre_hashinit(void);
void gre_hashdestroy(struct gre_list *);
int gre_input(struct mbuf *, int, int, void *);
-void gre_updatehdr(struct gre_softc *, struct grehdr *);
+void gre_update_hdr(struct gre_softc *, struct grehdr *);
+void gre_update_udphdr(struct gre_softc *, struct udphdr *, uint16_t);
+void gre_sofree(epoch_context_t);
void in_gre_init(void);
void in_gre_uninit(void);
-void in_gre_setopts(struct gre_softc *, u_long, uint32_t);
+int in_gre_setopts(struct gre_softc *, u_long, uint32_t);
int in_gre_ioctl(struct gre_softc *, u_long, caddr_t);
int in_gre_output(struct mbuf *, int, int);
void in6_gre_init(void);
void in6_gre_uninit(void);
-void in6_gre_setopts(struct gre_softc *, u_long, uint32_t);
+int in6_gre_setopts(struct gre_softc *, u_long, uint32_t);
int in6_gre_ioctl(struct gre_softc *, u_long, caddr_t);
-int in6_gre_output(struct mbuf *, int, int);
+int in6_gre_output(struct mbuf *, int, int, uint32_t);
/*
* CISCO uses special type for GRE tunnel created as part of WCCP
* connection, while in fact those packets are just IPv4 encapsulated
@@ -139,9 +171,15 @@ int in6_gre_output(struct mbuf *, int, int);
#define GRESKEY _IOW('i', 108, struct ifreq)
#define GREGOPTS _IOWR('i', 109, struct ifreq)
#define GRESOPTS _IOW('i', 110, struct ifreq)
+#define GREGPORT _IOWR('i', 111, struct ifreq)
+#define GRESPORT _IOW('i', 112, struct ifreq)
+
+/* GRE-in-UDP encapsulation destination port as defined in RFC8086 */
+#define GRE_UDPPORT 4754
#define GRE_ENABLE_CSUM 0x0001
#define GRE_ENABLE_SEQ 0x0002
-#define GRE_OPTMASK (GRE_ENABLE_CSUM|GRE_ENABLE_SEQ)
+#define GRE_UDPENCAP 0x0004
+#define GRE_OPTMASK (GRE_ENABLE_CSUM|GRE_ENABLE_SEQ|GRE_UDPENCAP)
#endif /* _NET_IF_GRE_H_ */
diff --git a/freebsd/sys/net/if_lagg.c b/freebsd/sys/net/if_lagg.c
index 85099115..b82313eb 100644
--- a/freebsd/sys/net/if_lagg.c
+++ b/freebsd/sys/net/if_lagg.c
@@ -25,6 +25,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
+#include <rtems/bsd/local/opt_kern_tls.h>
#include <rtems/bsd/local/opt_ratelimit.h>
#include <sys/param.h>
@@ -97,6 +98,11 @@ static struct {
{0, NULL}
};
+struct lagg_snd_tag {
+ struct m_snd_tag com;
+ struct m_snd_tag *tag;
+};
+
VNET_DEFINE(SLIST_HEAD(__trhead, lagg_softc), lagg_list); /* list of laggs */
#define V_lagg_list VNET(lagg_list)
VNET_DEFINE_STATIC(struct mtx, lagg_list_mtx);
@@ -113,6 +119,7 @@ static void lagg_clone_destroy(struct ifnet *);
VNET_DEFINE_STATIC(struct if_clone *, lagg_cloner);
#define V_lagg_cloner VNET(lagg_cloner)
static const char laggname[] = "lagg";
+static MALLOC_DEFINE(M_LAGG, laggname, "802.3AD Link Aggregation Interface");
static void lagg_capabilities(struct lagg_softc *);
static int lagg_port_create(struct lagg_softc *, struct ifnet *);
@@ -131,10 +138,17 @@ static void lagg_port2req(struct lagg_port *, struct lagg_reqport *);
static void lagg_init(void *);
static void lagg_stop(struct lagg_softc *);
static int lagg_ioctl(struct ifnet *, u_long, caddr_t);
-#ifdef RATELIMIT
+#if defined(KERN_TLS) || defined(RATELIMIT)
static int lagg_snd_tag_alloc(struct ifnet *,
union if_snd_tag_alloc_params *,
struct m_snd_tag **);
+static int lagg_snd_tag_modify(struct m_snd_tag *,
+ union if_snd_tag_modify_params *);
+static int lagg_snd_tag_query(struct m_snd_tag *,
+ union if_snd_tag_query_params *);
+static void lagg_snd_tag_free(struct m_snd_tag *);
+static void lagg_ratelimit_query(struct ifnet *,
+ struct if_ratelimit_query_results *);
#endif
static int lagg_setmulti(struct lagg_port *);
static int lagg_clrmulti(struct lagg_port *);
@@ -264,6 +278,13 @@ SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RWTUN,
&VNET_NAME(def_use_flowid), 0,
"Default setting for using flow id for load sharing");
+/* Default value for using numa */
+VNET_DEFINE_STATIC(int, def_use_numa) = 1;
+#define V_def_use_numa VNET(def_use_numa)
+SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_numa, CTLFLAG_RWTUN,
+ &VNET_NAME(def_use_numa), 0,
+ "Use numa to steer flows");
+
/* Default value for flowid shift */
VNET_DEFINE_STATIC(int, def_flowid_shift) = 16;
#define V_def_flowid_shift VNET(def_flowid_shift)
@@ -480,10 +501,10 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
struct ifnet *ifp;
static const u_char eaddr[6]; /* 00:00:00:00:00:00 */
- sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
+ sc = malloc(sizeof(*sc), M_LAGG, M_WAITOK|M_ZERO);
ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
if (ifp == NULL) {
- free(sc, M_DEVBUF);
+ free(sc, M_LAGG);
return (ENOSPC);
}
LAGG_SX_INIT(sc);
@@ -491,6 +512,8 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
LAGG_XLOCK(sc);
if (V_def_use_flowid)
sc->sc_opts |= LAGG_OPT_USE_FLOWID;
+ if (V_def_use_numa)
+ sc->sc_opts |= LAGG_OPT_USE_NUMA;
sc->flowid_shift = V_def_flowid_shift;
/* Hash all layers by default */
@@ -514,12 +537,14 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
ifp->if_ioctl = lagg_ioctl;
ifp->if_get_counter = lagg_get_counter;
ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
-#ifdef RATELIMIT
+#if defined(KERN_TLS) || defined(RATELIMIT)
ifp->if_snd_tag_alloc = lagg_snd_tag_alloc;
- ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS | IFCAP_TXRTLMT;
-#else
- ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS;
+ ifp->if_snd_tag_modify = lagg_snd_tag_modify;
+ ifp->if_snd_tag_query = lagg_snd_tag_query;
+ ifp->if_snd_tag_free = lagg_snd_tag_free;
+ ifp->if_ratelimit_query = lagg_ratelimit_query;
#endif
+ ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS;
/*
* Attach as an ordinary ethernet device, children will be attached
@@ -572,7 +597,7 @@ lagg_clone_destroy(struct ifnet *ifp)
LAGG_LIST_UNLOCK();
LAGG_SX_DESTROY(sc);
- free(sc, M_DEVBUF);
+ free(sc, M_LAGG);
}
static void
@@ -686,7 +711,7 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
ifr.ifr_mtu = oldmtu;
}
- lp = malloc(sizeof(struct lagg_port), M_DEVBUF, M_WAITOK|M_ZERO);
+ lp = malloc(sizeof(struct lagg_port), M_LAGG, M_WAITOK|M_ZERO);
lp->lp_softc = sc;
/* Check if port is a stacked lagg */
@@ -694,7 +719,7 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
SLIST_FOREACH(sc_ptr, &V_lagg_list, sc_entries) {
if (ifp == sc_ptr->sc_ifp) {
LAGG_LIST_UNLOCK();
- free(lp, M_DEVBUF);
+ free(lp, M_LAGG);
if (oldmtu != -1)
(*ifp->if_ioctl)(ifp, SIOCSIFMTU,
(caddr_t)&ifr);
@@ -705,7 +730,7 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
if (lagg_port_checkstacking(sc_ptr) >=
LAGG_MAX_STACKING) {
LAGG_LIST_UNLOCK();
- free(lp, M_DEVBUF);
+ free(lp, M_LAGG);
if (oldmtu != -1)
(*ifp->if_ioctl)(ifp, SIOCSIFMTU,
(caddr_t)&ifr);
@@ -753,7 +778,6 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
* is predictable and `ifconfig laggN create ...` command
* will lead to the same result each time.
*/
- LAGG_RLOCK();
CK_SLIST_FOREACH(tlp, &sc->sc_ports, lp_entries) {
if (tlp->lp_ifp->if_index < ifp->if_index && (
CK_SLIST_NEXT(tlp, lp_entries) == NULL ||
@@ -761,7 +785,6 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
ifp->if_index))
break;
}
- LAGG_RUNLOCK();
if (tlp != NULL)
CK_SLIST_INSERT_AFTER(tlp, lp, lp_entries);
else
@@ -816,7 +839,7 @@ lagg_port_destroy_cb(epoch_context_t ec)
ifp = lp->lp_ifp;
if_rele(ifp);
- free(lp, M_DEVBUF);
+ free(lp, M_LAGG);
}
static int
@@ -1250,6 +1273,8 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
switch (ro->ro_opts) {
case LAGG_OPT_USE_FLOWID:
case -LAGG_OPT_USE_FLOWID:
+ case LAGG_OPT_USE_NUMA:
+ case -LAGG_OPT_USE_NUMA:
case LAGG_OPT_FLOWIDSHIFT:
valid = 1;
lacp = 0;
@@ -1528,49 +1553,142 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
return (error);
}
-#ifdef RATELIMIT
-static int
-lagg_snd_tag_alloc(struct ifnet *ifp,
- union if_snd_tag_alloc_params *params,
- struct m_snd_tag **ppmt)
+#if defined(KERN_TLS) || defined(RATELIMIT)
+static inline struct lagg_snd_tag *
+mst_to_lst(struct m_snd_tag *mst)
{
- struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
+
+ return (__containerof(mst, struct lagg_snd_tag, com));
+}
+
+/*
+ * Look up the port used by a specific flow. This only works for lagg
+ * protocols with deterministic port mappings (e.g. not roundrobin).
+ * In addition protocols which use a hash to map flows to ports must
+ * be configured to use the mbuf flowid rather than hashing packet
+ * contents.
+ */
+static struct lagg_port *
+lookup_snd_tag_port(struct ifnet *ifp, uint32_t flowid, uint32_t flowtype)
+{
+ struct lagg_softc *sc;
struct lagg_port *lp;
struct lagg_lb *lb;
uint32_t p;
+ sc = ifp->if_softc;
+
switch (sc->sc_proto) {
case LAGG_PROTO_FAILOVER:
- lp = lagg_link_active(sc, sc->sc_primary);
- break;
+ return (lagg_link_active(sc, sc->sc_primary));
case LAGG_PROTO_LOADBALANCE:
if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 ||
- params->hdr.flowtype == M_HASHTYPE_NONE)
- return (EOPNOTSUPP);
- p = params->hdr.flowid >> sc->flowid_shift;
+ flowtype == M_HASHTYPE_NONE)
+ return (NULL);
+ p = flowid >> sc->flowid_shift;
p %= sc->sc_count;
lb = (struct lagg_lb *)sc->sc_psc;
lp = lb->lb_ports[p];
- lp = lagg_link_active(sc, lp);
- break;
+ return (lagg_link_active(sc, lp));
case LAGG_PROTO_LACP:
if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 ||
- params->hdr.flowtype == M_HASHTYPE_NONE)
- return (EOPNOTSUPP);
- lp = lacp_select_tx_port_by_hash(sc, params->hdr.flowid);
- break;
+ flowtype == M_HASHTYPE_NONE)
+ return (NULL);
+ return (lacp_select_tx_port_by_hash(sc, flowid));
default:
- return (EOPNOTSUPP);
+ return (NULL);
}
- if (lp == NULL)
+}
+
+static int
+lagg_snd_tag_alloc(struct ifnet *ifp,
+ union if_snd_tag_alloc_params *params,
+ struct m_snd_tag **ppmt)
+{
+ struct lagg_snd_tag *lst;
+ struct lagg_softc *sc;
+ struct lagg_port *lp;
+ struct ifnet *lp_ifp;
+ int error;
+
+ sc = ifp->if_softc;
+
+ LAGG_RLOCK();
+ lp = lookup_snd_tag_port(ifp, params->hdr.flowid, params->hdr.flowtype);
+ if (lp == NULL) {
+ LAGG_RUNLOCK();
return (EOPNOTSUPP);
- ifp = lp->lp_ifp;
- if (ifp == NULL || ifp->if_snd_tag_alloc == NULL ||
- (ifp->if_capenable & IFCAP_TXRTLMT) == 0)
+ }
+ if (lp->lp_ifp == NULL || lp->lp_ifp->if_snd_tag_alloc == NULL) {
+ LAGG_RUNLOCK();
return (EOPNOTSUPP);
+ }
+ lp_ifp = lp->lp_ifp;
+ if_ref(lp_ifp);
+ LAGG_RUNLOCK();
+
+ lst = malloc(sizeof(*lst), M_LAGG, M_NOWAIT);
+ if (lst == NULL) {
+ if_rele(lp_ifp);
+ return (ENOMEM);
+ }
+
+ error = lp_ifp->if_snd_tag_alloc(lp_ifp, params, &lst->tag);
+ if_rele(lp_ifp);
+ if (error) {
+ free(lst, M_LAGG);
+ return (error);
+ }
+
+ m_snd_tag_init(&lst->com, ifp);
- /* forward allocation request */
- return (ifp->if_snd_tag_alloc(ifp, params, ppmt));
+ *ppmt = &lst->com;
+ return (0);
+}
+
+static int
+lagg_snd_tag_modify(struct m_snd_tag *mst,
+ union if_snd_tag_modify_params *params)
+{
+ struct lagg_snd_tag *lst;
+
+ lst = mst_to_lst(mst);
+ return (lst->tag->ifp->if_snd_tag_modify(lst->tag, params));
+}
+
+static int
+lagg_snd_tag_query(struct m_snd_tag *mst,
+ union if_snd_tag_query_params *params)
+{
+ struct lagg_snd_tag *lst;
+
+ lst = mst_to_lst(mst);
+ return (lst->tag->ifp->if_snd_tag_query(lst->tag, params));
+}
+
+static void
+lagg_snd_tag_free(struct m_snd_tag *mst)
+{
+ struct lagg_snd_tag *lst;
+
+ lst = mst_to_lst(mst);
+ m_snd_tag_rele(lst->tag);
+ free(lst, M_LAGG);
+}
+
+static void
+lagg_ratelimit_query(struct ifnet *ifp __unused, struct if_ratelimit_query_results *q)
+{
+ /*
+ * For lagg, we have an indirect
+ * interface. The caller needs to
+ * get a ratelimit tag on the actual
+ * interface the flow will go on.
+ */
+ q->rate_table = NULL;
+ q->flags = RT_IS_INDIRECT;
+ q->max_flows = 0;
+ q->number_of_rates = 0;
}
#endif
@@ -1588,7 +1706,7 @@ lagg_setmulti(struct lagg_port *lp)
CK_STAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) {
if (ifma->ifma_addr->sa_family != AF_LINK)
continue;
- mc = malloc(sizeof(struct lagg_mc), M_DEVBUF, M_NOWAIT);
+ mc = malloc(sizeof(struct lagg_mc), M_LAGG, M_NOWAIT);
if (mc == NULL) {
IF_ADDR_WUNLOCK(scifp);
return (ENOMEM);
@@ -1619,7 +1737,7 @@ lagg_clrmulti(struct lagg_port *lp)
SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries);
if (mc->mc_ifma && lp->lp_detaching == 0)
if_delmulti_ifma(mc->mc_ifma);
- free(mc, M_DEVBUF);
+ free(mc, M_LAGG);
}
return (0);
}
@@ -1696,6 +1814,10 @@ lagg_transmit(struct ifnet *ifp, struct mbuf *m)
struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
int error;
+#if defined(KERN_TLS) || defined(RATELIMIT)
+ if (m->m_pkthdr.csum_flags & CSUM_SND_TAG)
+ MPASS(m->m_pkthdr.snd_tag->ifp == ifp);
+#endif
LAGG_RLOCK();
/* We need a Tx algorithm and at least one port */
if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
@@ -1848,12 +1970,20 @@ struct lagg_port *
lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp)
{
struct lagg_port *lp_next, *rval = NULL;
- struct epoch_tracker net_et;
/*
* Search a port which reports an active link state.
*/
+#ifdef INVARIANTS
+ /*
+ * This is called with either LAGG_RLOCK() held or
+ * LAGG_XLOCK(sc) held.
+ */
+ if (!in_epoch(net_epoch_preempt))
+ LAGG_XLOCK_ASSERT(sc);
+#endif
+
if (lp == NULL)
goto search;
if (LAGG_PORTACTIVE(lp)) {
@@ -1866,15 +1996,12 @@ lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp)
goto found;
}
- search:
- epoch_enter_preempt(net_epoch_preempt, &net_et);
+search:
CK_SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
if (LAGG_PORTACTIVE(lp_next)) {
- epoch_exit_preempt(net_epoch_preempt, &net_et);
return (lp_next);
}
}
- epoch_exit_preempt(net_epoch_preempt, &net_et);
found:
return (rval);
}
@@ -1883,6 +2010,21 @@ int
lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
{
+#if defined(KERN_TLS) || defined(RATELIMIT)
+ if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) {
+ struct lagg_snd_tag *lst;
+ struct m_snd_tag *mst;
+
+ mst = m->m_pkthdr.snd_tag;
+ lst = mst_to_lst(mst);
+ if (lst->tag->ifp != ifp) {
+ m_freem(m);
+ return (EAGAIN);
+ }
+ m->m_pkthdr.snd_tag = m_snd_tag_ref(lst->tag);
+ m_snd_tag_rele(mst);
+ }
+#endif
return (ifp->if_transmit)(ifp, m);
}
@@ -1956,7 +2098,7 @@ lagg_bcast_start(struct lagg_softc *sc, struct mbuf *m)
struct lagg_port *lp, *last = NULL;
struct mbuf *m0;
- LAGG_RLOCK();
+ LAGG_RLOCK_ASSERT();
CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
if (!LAGG_PORTACTIVE(lp))
continue;
@@ -1977,7 +2119,6 @@ lagg_bcast_start(struct lagg_softc *sc, struct mbuf *m)
}
last = lp;
}
- LAGG_RUNLOCK();
if (last == NULL) {
m_freem(m);
@@ -2063,7 +2204,7 @@ lagg_lb_attach(struct lagg_softc *sc)
struct lagg_lb *lb;
LAGG_XLOCK_ASSERT(sc);
- lb = malloc(sizeof(struct lagg_lb), M_DEVBUF, M_WAITOK | M_ZERO);
+ lb = malloc(sizeof(struct lagg_lb), M_LAGG, M_WAITOK | M_ZERO);
lb->lb_key = m_ether_tcpip_hash_init();
sc->sc_psc = lb;
@@ -2078,7 +2219,7 @@ lagg_lb_detach(struct lagg_softc *sc)
lb = (struct lagg_lb *)sc->sc_psc;
if (lb != NULL)
- free(lb, M_DEVBUF);
+ free(lb, M_LAGG);
}
static int
@@ -2090,7 +2231,7 @@ lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp)
rv = 0;
bzero(&lb->lb_ports, sizeof(lb->lb_ports));
- LAGG_RLOCK();
+ LAGG_XLOCK_ASSERT(sc);
CK_SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
if (lp_next == lp)
continue;
@@ -2103,7 +2244,6 @@ lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp)
sc->sc_ifname, lp_next->lp_ifp->if_xname, i);
lb->lb_ports[i++] = lp_next;
}
- LAGG_RUNLOCK();
return (rv);
}
diff --git a/freebsd/sys/net/if_lagg.h b/freebsd/sys/net/if_lagg.h
index f1e2d8f4..2c566c0d 100644
--- a/freebsd/sys/net/if_lagg.h
+++ b/freebsd/sys/net/if_lagg.h
@@ -143,6 +143,7 @@ struct lagg_reqopts {
#define LAGG_OPT_USE_FLOWID 0x01 /* enable use of flowid */
/* Pseudo flags which are used in ro_opts but not stored into sc_opts. */
#define LAGG_OPT_FLOWIDSHIFT 0x02 /* set flowid shift */
+#define LAGG_OPT_USE_NUMA 0x04 /* enable use of numa */
#define LAGG_OPT_FLOWIDSHIFT_MASK 0x1f /* flowid is uint32_t */
#define LAGG_OPT_LACP_STRICT 0x10 /* LACP strict mode */
#define LAGG_OPT_LACP_TXTEST 0x20 /* LACP debug: txtest */
@@ -158,8 +159,9 @@ struct lagg_reqopts {
#define SIOCGLAGGOPTS _IOWR('i', 152, struct lagg_reqopts)
#define SIOCSLAGGOPTS _IOW('i', 153, struct lagg_reqopts)
-#define LAGG_OPT_BITS "\020\001USE_FLOWID\005LACP_STRICT" \
- "\006LACP_TXTEST\007LACP_RXTEST"
+#define LAGG_OPT_BITS "\020\001USE_FLOWID\003USE_NUMA" \
+ "\005LACP_STRICT\006LACP_TXTEST" \
+ "\007LACP_RXTEST"
#ifdef _KERNEL
diff --git a/freebsd/sys/net/if_llatbl.c b/freebsd/sys/net/if_llatbl.c
index b220d7aa..e79b9ba9 100644
--- a/freebsd/sys/net/if_llatbl.c
+++ b/freebsd/sys/net/if_llatbl.c
@@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/eventhandler.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/syslog.h>
@@ -92,6 +93,7 @@ static int htable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f,
static int
lltable_dump_af(struct lltable *llt, struct sysctl_req *wr)
{
+ struct epoch_tracker et;
int error;
LLTABLE_LIST_LOCK_ASSERT();
@@ -100,10 +102,10 @@ lltable_dump_af(struct lltable *llt, struct sysctl_req *wr)
return (0);
error = 0;
- IF_AFDATA_RLOCK(llt->llt_ifp);
+ NET_EPOCH_ENTER(et);
error = lltable_foreach_lle(llt,
(llt_foreach_cb_t *)llt->llt_dump_entry, wr);
- IF_AFDATA_RUNLOCK(llt->llt_ifp);
+ NET_EPOCH_EXIT(et);
return (error);
}
@@ -455,11 +457,12 @@ struct llentry *
llentry_alloc(struct ifnet *ifp, struct lltable *lt,
struct sockaddr_storage *dst)
{
+ struct epoch_tracker et;
struct llentry *la, *la_tmp;
- IF_AFDATA_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
la = lla_lookup(lt, LLE_EXCLUSIVE, (struct sockaddr *)dst);
- IF_AFDATA_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
if (la != NULL) {
LLE_ADDREF(la);
diff --git a/freebsd/sys/net/if_llatbl.h b/freebsd/sys/net/if_llatbl.h
index 74301284..7bf57bdb 100644
--- a/freebsd/sys/net/if_llatbl.h
+++ b/freebsd/sys/net/if_llatbl.h
@@ -32,6 +32,7 @@ __FBSDID("$FreeBSD$");
#ifndef _NET_IF_LLATBL_H_
#define _NET_IF_LLATBL_H_
+#include <sys/_eventhandler.h>
#include <sys/_rwlock.h>
#include <netinet/in.h>
#include <sys/epoch.h>
@@ -267,7 +268,6 @@ llentry_mark_used(struct llentry *lle)
int lla_rt_output(struct rt_msghdr *, struct rt_addrinfo *);
-#include <sys/eventhandler.h>
enum {
LLENTRY_RESOLVED,
LLENTRY_TIMEDOUT,
diff --git a/freebsd/sys/net/if_spppsubr.c b/freebsd/sys/net/if_spppsubr.c
index f5b78dec..9aec7cd1 100644
--- a/freebsd/sys/net/if_spppsubr.c
+++ b/freebsd/sys/net/if_spppsubr.c
@@ -1062,15 +1062,13 @@ sppp_detach(struct ifnet *ifp)
KASSERT(mtx_initialized(&sp->mtx), ("sppp mutex is not initialized"));
/* Stop keepalive handler. */
- if (!callout_drain(&sp->keepalive_callout))
- callout_stop(&sp->keepalive_callout);
+ callout_drain(&sp->keepalive_callout);
for (i = 0; i < IDX_COUNT; i++) {
- if (!callout_drain(&sp->ch[i]))
- callout_stop(&sp->ch[i]);
+ callout_drain(&sp->ch[i]);
}
- if (!callout_drain(&sp->pap_my_to_ch))
- callout_stop(&sp->pap_my_to_ch);
+ callout_drain(&sp->pap_my_to_ch);
+
mtx_destroy(&sp->pp_cpq.ifq_mtx);
mtx_destroy(&sp->pp_fastq.ifq_mtx);
mtx_destroy(&sp->mtx);
@@ -4339,16 +4337,12 @@ sppp_chap_tld(struct sppp *sp)
static void
sppp_chap_scr(struct sppp *sp)
{
- u_long *ch, seed;
+ u_long *ch;
u_char clen;
/* Compute random challenge. */
ch = (u_long *)sp->myauth.challenge;
- read_random(&seed, sizeof seed);
- ch[0] = seed ^ random();
- ch[1] = seed ^ random();
- ch[2] = seed ^ random();
- ch[3] = seed ^ random();
+ arc4random_buf(ch, 4 * sizeof(*ch));
clen = AUTHKEYLEN;
sp->confid[IDX_CHAP] = ++sp->pp_seq[IDX_CHAP];
@@ -4809,7 +4803,7 @@ sppp_keepalive(void *dummy)
sppp_cisco_send (sp, CISCO_KEEPALIVE_REQ,
++sp->pp_seq[IDX_LCP], sp->pp_rseq[IDX_LCP]);
else if (sp->pp_phase >= PHASE_AUTHENTICATE) {
- long nmagic = htonl (sp->lcp.magic);
+ uint32_t nmagic = htonl(sp->lcp.magic);
sp->lcp.echoid = ++sp->pp_seq[IDX_LCP];
sppp_cp_send (sp, PPP_LCP, ECHO_REQ,
sp->lcp.echoid, 4, &nmagic);
diff --git a/freebsd/sys/net/if_stf.c b/freebsd/sys/net/if_stf.c
index 3ba9f8c0..7185fb8d 100644
--- a/freebsd/sys/net/if_stf.c
+++ b/freebsd/sys/net/if_stf.c
@@ -730,6 +730,7 @@ stf_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
}
ifp->if_flags |= IFF_UP;
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
break;
case SIOCADDMULTI:
diff --git a/freebsd/sys/net/if_tap.c b/freebsd/sys/net/if_tap.c
deleted file mode 100644
index dbf3e599..00000000
--- a/freebsd/sys/net/if_tap.c
+++ /dev/null
@@ -1,1133 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * BASED ON:
- * -------------------------------------------------------------------------
- *
- * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
- * Nottingham University 1987.
- */
-
-/*
- * $FreeBSD$
- * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $
- */
-
-#include <rtems/bsd/local/opt_inet.h>
-
-#include <sys/param.h>
-#include <sys/conf.h>
-#include <sys/fcntl.h>
-#include <sys/filio.h>
-#include <sys/jail.h>
-#include <sys/kernel.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/module.h>
-#include <sys/poll.h>
-#include <sys/priv.h>
-#include <sys/proc.h>
-#include <sys/selinfo.h>
-#include <sys/signalvar.h>
-#include <sys/socket.h>
-#include <sys/sockio.h>
-#include <sys/sysctl.h>
-#include <sys/systm.h>
-#include <sys/ttycom.h>
-#include <sys/uio.h>
-#include <sys/queue.h>
-
-#include <net/bpf.h>
-#include <net/ethernet.h>
-#include <net/if.h>
-#include <net/if_var.h>
-#include <net/if_clone.h>
-#include <net/if_dl.h>
-#include <net/if_media.h>
-#include <net/if_types.h>
-#include <net/route.h>
-#include <net/vnet.h>
-
-#include <netinet/in.h>
-
-#include <net/if_tapvar.h>
-#include <net/if_tap.h>
-
-#define CDEV_NAME "tap"
-#define TAPDEBUG if (tapdebug) printf
-
-static const char tapname[] = "tap";
-static const char vmnetname[] = "vmnet";
-#define TAPMAXUNIT 0x7fff
-#define VMNET_DEV_MASK CLONE_FLAG0
-
-/* module */
-static int tapmodevent(module_t, int, void *);
-
-/* device */
-static void tapclone(void *, struct ucred *, char *, int,
- struct cdev **);
-static void tapcreate(struct cdev *);
-
-/* network interface */
-static void tapifstart(struct ifnet *);
-static int tapifioctl(struct ifnet *, u_long, caddr_t);
-static void tapifinit(void *);
-
-static int tap_clone_create(struct if_clone *, int, caddr_t);
-static void tap_clone_destroy(struct ifnet *);
-static struct if_clone *tap_cloner;
-static int vmnet_clone_create(struct if_clone *, int, caddr_t);
-static void vmnet_clone_destroy(struct ifnet *);
-static struct if_clone *vmnet_cloner;
-
-/* character device */
-static d_open_t tapopen;
-static d_close_t tapclose;
-static d_read_t tapread;
-static d_write_t tapwrite;
-static d_ioctl_t tapioctl;
-static d_poll_t tappoll;
-static d_kqfilter_t tapkqfilter;
-
-/* kqueue(2) */
-static int tapkqread(struct knote *, long);
-static int tapkqwrite(struct knote *, long);
-static void tapkqdetach(struct knote *);
-
-static struct filterops tap_read_filterops = {
- .f_isfd = 1,
- .f_attach = NULL,
- .f_detach = tapkqdetach,
- .f_event = tapkqread,
-};
-
-static struct filterops tap_write_filterops = {
- .f_isfd = 1,
- .f_attach = NULL,
- .f_detach = tapkqdetach,
- .f_event = tapkqwrite,
-};
-
-static struct cdevsw tap_cdevsw = {
- .d_version = D_VERSION,
- .d_flags = D_NEEDMINOR,
- .d_open = tapopen,
- .d_close = tapclose,
- .d_read = tapread,
- .d_write = tapwrite,
- .d_ioctl = tapioctl,
- .d_poll = tappoll,
- .d_name = CDEV_NAME,
- .d_kqfilter = tapkqfilter,
-};
-
-/*
- * All global variables in if_tap.c are locked with tapmtx, with the
- * exception of tapdebug, which is accessed unlocked; tapclones is
- * static at runtime.
- */
-static struct mtx tapmtx;
-static int tapdebug = 0; /* debug flag */
-static int tapuopen = 0; /* allow user open() */
-static int tapuponopen = 0; /* IFF_UP on open() */
-static int tapdclone = 1; /* enable devfs cloning */
-static SLIST_HEAD(, tap_softc) taphead; /* first device */
-static struct clonedevs *tapclones;
-
-MALLOC_DECLARE(M_TAP);
-MALLOC_DEFINE(M_TAP, CDEV_NAME, "Ethernet tunnel interface");
-SYSCTL_INT(_debug, OID_AUTO, if_tap_debug, CTLFLAG_RW, &tapdebug, 0, "");
-
-SYSCTL_DECL(_net_link);
-static SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0,
- "Ethernet tunnel software network interface");
-SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tapuopen, 0,
- "Allow user to open /dev/tap (based on node permissions)");
-SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0,
- "Bring interface up when /dev/tap is opened");
-SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tapdclone, 0,
- "Enable legacy devfs interface creation");
-SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tapdebug, 0, "");
-
-DEV_MODULE(if_tap, tapmodevent, NULL);
-
-static int
-tap_clone_create(struct if_clone *ifc, int unit, caddr_t params)
-{
- struct cdev *dev;
- int i;
-
- /* Find any existing device, or allocate new unit number. */
- i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, 0);
- if (i) {
- dev = make_dev(&tap_cdevsw, unit, UID_ROOT, GID_WHEEL, 0600,
- "%s%d", tapname, unit);
- }
-
- tapcreate(dev);
- return (0);
-}
-
-/* vmnet devices are tap devices in disguise */
-static int
-vmnet_clone_create(struct if_clone *ifc, int unit, caddr_t params)
-{
- struct cdev *dev;
- int i;
-
- /* Find any existing device, or allocate new unit number. */
- i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, VMNET_DEV_MASK);
- if (i) {
- dev = make_dev(&tap_cdevsw, unit | VMNET_DEV_MASK, UID_ROOT,
- GID_WHEEL, 0600, "%s%d", vmnetname, unit);
- }
-
- tapcreate(dev);
- return (0);
-}
-
-static void
-tap_destroy(struct tap_softc *tp)
-{
- struct ifnet *ifp = tp->tap_ifp;
-
- CURVNET_SET(ifp->if_vnet);
- destroy_dev(tp->tap_dev);
- seldrain(&tp->tap_rsel);
- knlist_clear(&tp->tap_rsel.si_note, 0);
- knlist_destroy(&tp->tap_rsel.si_note);
- ether_ifdetach(ifp);
- if_free(ifp);
-
- mtx_destroy(&tp->tap_mtx);
- free(tp, M_TAP);
- CURVNET_RESTORE();
-}
-
-static void
-tap_clone_destroy(struct ifnet *ifp)
-{
- struct tap_softc *tp = ifp->if_softc;
-
- mtx_lock(&tapmtx);
- SLIST_REMOVE(&taphead, tp, tap_softc, tap_next);
- mtx_unlock(&tapmtx);
- tap_destroy(tp);
-}
-
-/* vmnet devices are tap devices in disguise */
-static void
-vmnet_clone_destroy(struct ifnet *ifp)
-{
- tap_clone_destroy(ifp);
-}
-
-/*
- * tapmodevent
- *
- * module event handler
- */
-static int
-tapmodevent(module_t mod, int type, void *data)
-{
- static eventhandler_tag eh_tag = NULL;
- struct tap_softc *tp = NULL;
- struct ifnet *ifp = NULL;
-
- switch (type) {
- case MOD_LOAD:
-
- /* intitialize device */
-
- mtx_init(&tapmtx, "tapmtx", NULL, MTX_DEF);
- SLIST_INIT(&taphead);
-
- clone_setup(&tapclones);
- eh_tag = EVENTHANDLER_REGISTER(dev_clone, tapclone, 0, 1000);
- if (eh_tag == NULL) {
- clone_cleanup(&tapclones);
- mtx_destroy(&tapmtx);
- return (ENOMEM);
- }
- tap_cloner = if_clone_simple(tapname, tap_clone_create,
- tap_clone_destroy, 0);
- vmnet_cloner = if_clone_simple(vmnetname, vmnet_clone_create,
- vmnet_clone_destroy, 0);
- return (0);
-
- case MOD_UNLOAD:
- /*
- * The EBUSY algorithm here can't quite atomically
- * guarantee that this is race-free since we have to
- * release the tap mtx to deregister the clone handler.
- */
- mtx_lock(&tapmtx);
- SLIST_FOREACH(tp, &taphead, tap_next) {
- mtx_lock(&tp->tap_mtx);
- if (tp->tap_flags & TAP_OPEN) {
- mtx_unlock(&tp->tap_mtx);
- mtx_unlock(&tapmtx);
- return (EBUSY);
- }
- mtx_unlock(&tp->tap_mtx);
- }
- mtx_unlock(&tapmtx);
-
- EVENTHANDLER_DEREGISTER(dev_clone, eh_tag);
- if_clone_detach(tap_cloner);
- if_clone_detach(vmnet_cloner);
- drain_dev_clone_events();
-
- mtx_lock(&tapmtx);
- while ((tp = SLIST_FIRST(&taphead)) != NULL) {
- SLIST_REMOVE_HEAD(&taphead, tap_next);
- mtx_unlock(&tapmtx);
-
- ifp = tp->tap_ifp;
-
- TAPDEBUG("detaching %s\n", ifp->if_xname);
-
- tap_destroy(tp);
- mtx_lock(&tapmtx);
- }
- mtx_unlock(&tapmtx);
- clone_cleanup(&tapclones);
-
- mtx_destroy(&tapmtx);
-
- break;
-
- default:
- return (EOPNOTSUPP);
- }
-
- return (0);
-} /* tapmodevent */
-
-
-/*
- * DEVFS handler
- *
- * We need to support two kind of devices - tap and vmnet
- */
-static void
-tapclone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **dev)
-{
- char devname[SPECNAMELEN + 1];
- int i, unit, append_unit;
- int extra;
-
- if (*dev != NULL)
- return;
-
- if (!tapdclone ||
- (!tapuopen && priv_check_cred(cred, PRIV_NET_IFCREATE) != 0))
- return;
-
- unit = 0;
- append_unit = 0;
- extra = 0;
-
- /* We're interested in only tap/vmnet devices. */
- if (strcmp(name, tapname) == 0) {
- unit = -1;
- } else if (strcmp(name, vmnetname) == 0) {
- unit = -1;
- extra = VMNET_DEV_MASK;
- } else if (dev_stdclone(name, NULL, tapname, &unit) != 1) {
- if (dev_stdclone(name, NULL, vmnetname, &unit) != 1) {
- return;
- } else {
- extra = VMNET_DEV_MASK;
- }
- }
-
- if (unit == -1)
- append_unit = 1;
-
- CURVNET_SET(CRED_TO_VNET(cred));
- /* find any existing device, or allocate new unit number */
- i = clone_create(&tapclones, &tap_cdevsw, &unit, dev, extra);
- if (i) {
- if (append_unit) {
- /*
- * We were passed 'tun' or 'tap', with no unit specified
- * so we'll need to append it now.
- */
- namelen = snprintf(devname, sizeof(devname), "%s%d", name,
- unit);
- name = devname;
- }
-
- *dev = make_dev_credf(MAKEDEV_REF, &tap_cdevsw, unit | extra,
- cred, UID_ROOT, GID_WHEEL, 0600, "%s", name);
- }
-
- if_clone_create(name, namelen, NULL);
- CURVNET_RESTORE();
-} /* tapclone */
-
-
-/*
- * tapcreate
- *
- * to create interface
- */
-static void
-tapcreate(struct cdev *dev)
-{
- struct ifnet *ifp = NULL;
- struct tap_softc *tp = NULL;
- unsigned short macaddr_hi;
- uint32_t macaddr_mid;
- int unit;
- const char *name = NULL;
- u_char eaddr[6];
-
- /* allocate driver storage and create device */
- tp = malloc(sizeof(*tp), M_TAP, M_WAITOK | M_ZERO);
- mtx_init(&tp->tap_mtx, "tap_mtx", NULL, MTX_DEF);
- mtx_lock(&tapmtx);
- SLIST_INSERT_HEAD(&taphead, tp, tap_next);
- mtx_unlock(&tapmtx);
-
- unit = dev2unit(dev);
-
- /* select device: tap or vmnet */
- if (unit & VMNET_DEV_MASK) {
- name = vmnetname;
- tp->tap_flags |= TAP_VMNET;
- } else
- name = tapname;
-
- unit &= TAPMAXUNIT;
-
- TAPDEBUG("tapcreate(%s%d). minor = %#x\n", name, unit, dev2unit(dev));
-
- /* generate fake MAC address: 00 bd xx xx xx unit_no */
- macaddr_hi = htons(0x00bd);
- macaddr_mid = (uint32_t) ticks;
- bcopy(&macaddr_hi, eaddr, sizeof(short));
- bcopy(&macaddr_mid, &eaddr[2], sizeof(uint32_t));
- eaddr[5] = (u_char)unit;
-
- /* fill the rest and attach interface */
- ifp = tp->tap_ifp = if_alloc(IFT_ETHER);
- if (ifp == NULL)
- panic("%s%d: can not if_alloc()", name, unit);
- ifp->if_softc = tp;
- if_initname(ifp, name, unit);
- ifp->if_init = tapifinit;
- ifp->if_start = tapifstart;
- ifp->if_ioctl = tapifioctl;
- ifp->if_mtu = ETHERMTU;
- ifp->if_flags = (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST);
- IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
- ifp->if_capabilities |= IFCAP_LINKSTATE;
- ifp->if_capenable |= IFCAP_LINKSTATE;
-
- dev->si_drv1 = tp;
- tp->tap_dev = dev;
-
- ether_ifattach(ifp, eaddr);
-
- mtx_lock(&tp->tap_mtx);
- tp->tap_flags |= TAP_INITED;
- mtx_unlock(&tp->tap_mtx);
-
- knlist_init_mtx(&tp->tap_rsel.si_note, &tp->tap_mtx);
-
- TAPDEBUG("interface %s is created. minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
-} /* tapcreate */
-
-
-/*
- * tapopen
- *
- * to open tunnel. must be superuser
- */
-static int
-tapopen(struct cdev *dev, int flag, int mode, struct thread *td)
-{
- struct tap_softc *tp = NULL;
- struct ifnet *ifp = NULL;
- int error;
-
- if (tapuopen == 0) {
- error = priv_check(td, PRIV_NET_TAP);
- if (error)
- return (error);
- }
-
- if ((dev2unit(dev) & CLONE_UNITMASK) > TAPMAXUNIT)
- return (ENXIO);
-
- tp = dev->si_drv1;
-
- mtx_lock(&tp->tap_mtx);
- if (tp->tap_flags & TAP_OPEN) {
- mtx_unlock(&tp->tap_mtx);
- return (EBUSY);
- }
-
- bcopy(IF_LLADDR(tp->tap_ifp), tp->ether_addr, sizeof(tp->ether_addr));
-#ifndef __rtems__
- tp->tap_pid = td->td_proc->p_pid;
-#else /* __rtems__ */
- tp->tap_pid = BSD_DEFAULT_PID;
-#endif /* __rtems__ */
- tp->tap_flags |= TAP_OPEN;
- ifp = tp->tap_ifp;
-
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
- if (tapuponopen)
- ifp->if_flags |= IFF_UP;
- if_link_state_change(ifp, LINK_STATE_UP);
- mtx_unlock(&tp->tap_mtx);
-
- TAPDEBUG("%s is open. minor = %#x\n", ifp->if_xname, dev2unit(dev));
-
- return (0);
-} /* tapopen */
-
-
-/*
- * tapclose
- *
- * close the device - mark i/f down & delete routing info
- */
-static int
-tapclose(struct cdev *dev, int foo, int bar, struct thread *td)
-{
- struct ifaddr *ifa;
- struct tap_softc *tp = dev->si_drv1;
- struct ifnet *ifp = tp->tap_ifp;
-
- /* junk all pending output */
- mtx_lock(&tp->tap_mtx);
- CURVNET_SET(ifp->if_vnet);
- IF_DRAIN(&ifp->if_snd);
-
- /*
- * Do not bring the interface down, and do not anything with
- * interface, if we are in VMnet mode. Just close the device.
- */
- if (((tp->tap_flags & TAP_VMNET) == 0) &&
- (ifp->if_flags & (IFF_UP | IFF_LINK0)) == IFF_UP) {
- mtx_unlock(&tp->tap_mtx);
- if_down(ifp);
- mtx_lock(&tp->tap_mtx);
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
- mtx_unlock(&tp->tap_mtx);
- CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- rtinit(ifa, (int)RTM_DELETE, 0);
- }
- if_purgeaddrs(ifp);
- mtx_lock(&tp->tap_mtx);
- }
- }
-
- if_link_state_change(ifp, LINK_STATE_DOWN);
- CURVNET_RESTORE();
-
- funsetown(&tp->tap_sigio);
- selwakeuppri(&tp->tap_rsel, PZERO+1);
- KNOTE_LOCKED(&tp->tap_rsel.si_note, 0);
-
- tp->tap_flags &= ~TAP_OPEN;
- tp->tap_pid = 0;
- mtx_unlock(&tp->tap_mtx);
-
- TAPDEBUG("%s is closed. minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
-
- return (0);
-} /* tapclose */
-
-
-/*
- * tapifinit
- *
- * network interface initialization function
- */
-static void
-tapifinit(void *xtp)
-{
- struct tap_softc *tp = (struct tap_softc *)xtp;
- struct ifnet *ifp = tp->tap_ifp;
-
- TAPDEBUG("initializing %s\n", ifp->if_xname);
-
- mtx_lock(&tp->tap_mtx);
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
- mtx_unlock(&tp->tap_mtx);
-
- /* attempt to start output */
- tapifstart(ifp);
-} /* tapifinit */
-
-
-/*
- * tapifioctl
- *
- * Process an ioctl request on network interface
- */
-static int
-tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
-{
- struct tap_softc *tp = ifp->if_softc;
- struct ifreq *ifr = (struct ifreq *)data;
- struct ifstat *ifs = NULL;
- struct ifmediareq *ifmr = NULL;
- int dummy, error = 0;
-
- switch (cmd) {
- case SIOCSIFFLAGS: /* XXX -- just like vmnet does */
- case SIOCADDMULTI:
- case SIOCDELMULTI:
- break;
-
- case SIOCGIFMEDIA:
- ifmr = (struct ifmediareq *)data;
- dummy = ifmr->ifm_count;
- ifmr->ifm_count = 1;
- ifmr->ifm_status = IFM_AVALID;
- ifmr->ifm_active = IFM_ETHER;
- if (tp->tap_flags & TAP_OPEN)
- ifmr->ifm_status |= IFM_ACTIVE;
- ifmr->ifm_current = ifmr->ifm_active;
- if (dummy >= 1) {
- int media = IFM_ETHER;
- error = copyout(&media, ifmr->ifm_ulist,
- sizeof(int));
- }
- break;
-
- case SIOCSIFMTU:
- ifp->if_mtu = ifr->ifr_mtu;
- break;
-
- case SIOCGIFSTATUS:
- ifs = (struct ifstat *)data;
- mtx_lock(&tp->tap_mtx);
- if (tp->tap_pid != 0)
- snprintf(ifs->ascii, sizeof(ifs->ascii),
- "\tOpened by PID %d\n", tp->tap_pid);
- else
- ifs->ascii[0] = '\0';
- mtx_unlock(&tp->tap_mtx);
- break;
-
- default:
- error = ether_ioctl(ifp, cmd, data);
- break;
- }
-
- return (error);
-} /* tapifioctl */
-
-
-/*
- * tapifstart
- *
- * queue packets from higher level ready to put out
- */
-static void
-tapifstart(struct ifnet *ifp)
-{
- struct tap_softc *tp = ifp->if_softc;
-
- TAPDEBUG("%s starting\n", ifp->if_xname);
-
- /*
- * do not junk pending output if we are in VMnet mode.
- * XXX: can this do any harm because of queue overflow?
- */
-
- mtx_lock(&tp->tap_mtx);
- if (((tp->tap_flags & TAP_VMNET) == 0) &&
- ((tp->tap_flags & TAP_READY) != TAP_READY)) {
- struct mbuf *m;
-
- /* Unlocked read. */
- TAPDEBUG("%s not ready, tap_flags = 0x%x\n", ifp->if_xname,
- tp->tap_flags);
-
- for (;;) {
- IF_DEQUEUE(&ifp->if_snd, m);
- if (m != NULL) {
- m_freem(m);
- if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- } else
- break;
- }
- mtx_unlock(&tp->tap_mtx);
-
- return;
- }
-
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
-
- if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
- if (tp->tap_flags & TAP_RWAIT) {
- tp->tap_flags &= ~TAP_RWAIT;
- wakeup(tp);
- }
-
- if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) {
- mtx_unlock(&tp->tap_mtx);
- pgsigio(&tp->tap_sigio, SIGIO, 0);
- mtx_lock(&tp->tap_mtx);
- }
-
- selwakeuppri(&tp->tap_rsel, PZERO+1);
- KNOTE_LOCKED(&tp->tap_rsel.si_note, 0);
- if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); /* obytes are counted in ether_output */
- }
-
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
- mtx_unlock(&tp->tap_mtx);
-} /* tapifstart */
-
-
-/*
- * tapioctl
- *
- * the cdevsw interface is now pretty minimal
- */
-static int
-tapioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
-{
- struct ifreq ifr;
- struct tap_softc *tp = dev->si_drv1;
- struct ifnet *ifp = tp->tap_ifp;
- struct tapinfo *tapp = NULL;
- int f;
- int error;
-#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
- defined(COMPAT_FREEBSD4)
- int ival;
-#endif
-
- switch (cmd) {
- case TAPSIFINFO:
- tapp = (struct tapinfo *)data;
- if (ifp->if_type != tapp->type)
- return (EPROTOTYPE);
- mtx_lock(&tp->tap_mtx);
- if (ifp->if_mtu != tapp->mtu) {
- strlcpy(ifr.ifr_name, if_name(ifp), IFNAMSIZ);
- ifr.ifr_mtu = tapp->mtu;
- CURVNET_SET(ifp->if_vnet);
- error = ifhwioctl(SIOCSIFMTU, ifp,
- (caddr_t)&ifr, td);
- CURVNET_RESTORE();
- if (error) {
- mtx_unlock(&tp->tap_mtx);
- return (error);
- }
- }
- ifp->if_baudrate = tapp->baudrate;
- mtx_unlock(&tp->tap_mtx);
- break;
-
- case TAPGIFINFO:
- tapp = (struct tapinfo *)data;
- mtx_lock(&tp->tap_mtx);
- tapp->mtu = ifp->if_mtu;
- tapp->type = ifp->if_type;
- tapp->baudrate = ifp->if_baudrate;
- mtx_unlock(&tp->tap_mtx);
- break;
-
- case TAPSDEBUG:
- tapdebug = *(int *)data;
- break;
-
- case TAPGDEBUG:
- *(int *)data = tapdebug;
- break;
-
- case TAPGIFNAME: {
- struct ifreq *ifr = (struct ifreq *) data;
-
- strlcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ);
- } break;
-
- case FIONBIO:
- break;
-
- case FIOASYNC:
- mtx_lock(&tp->tap_mtx);
- if (*(int *)data)
- tp->tap_flags |= TAP_ASYNC;
- else
- tp->tap_flags &= ~TAP_ASYNC;
- mtx_unlock(&tp->tap_mtx);
- break;
-
- case FIONREAD:
- if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
- struct mbuf *mb;
-
- IFQ_LOCK(&ifp->if_snd);
- IFQ_POLL_NOLOCK(&ifp->if_snd, mb);
- for (*(int *)data = 0; mb != NULL;
- mb = mb->m_next)
- *(int *)data += mb->m_len;
- IFQ_UNLOCK(&ifp->if_snd);
- } else
- *(int *)data = 0;
- break;
-
- case FIOSETOWN:
- return (fsetown(*(int *)data, &tp->tap_sigio));
-
- case FIOGETOWN:
- *(int *)data = fgetown(&tp->tap_sigio);
- return (0);
-
- /* this is deprecated, FIOSETOWN should be used instead */
- case TIOCSPGRP:
- return (fsetown(-(*(int *)data), &tp->tap_sigio));
-
- /* this is deprecated, FIOGETOWN should be used instead */
- case TIOCGPGRP:
- *(int *)data = -fgetown(&tp->tap_sigio);
- return (0);
-
- /* VMware/VMnet port ioctl's */
-
-#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
- defined(COMPAT_FREEBSD4)
- case _IO('V', 0):
- ival = IOCPARM_IVAL(data);
- data = (caddr_t)&ival;
- /* FALLTHROUGH */
-#endif
- case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */
- f = *(int *)data;
- f &= 0x0fff;
- f &= ~IFF_CANTCHANGE;
- f |= IFF_UP;
-
- mtx_lock(&tp->tap_mtx);
- ifp->if_flags = f | (ifp->if_flags & IFF_CANTCHANGE);
- mtx_unlock(&tp->tap_mtx);
- break;
-
- case SIOCGIFADDR: /* get MAC address of the remote side */
- mtx_lock(&tp->tap_mtx);
- bcopy(tp->ether_addr, data, sizeof(tp->ether_addr));
- mtx_unlock(&tp->tap_mtx);
- break;
-
- case SIOCSIFADDR: /* set MAC address of the remote side */
- mtx_lock(&tp->tap_mtx);
- bcopy(data, tp->ether_addr, sizeof(tp->ether_addr));
- mtx_unlock(&tp->tap_mtx);
- break;
-
- default:
- return (ENOTTY);
- }
- return (0);
-} /* tapioctl */
-
-
-/*
- * tapread
- *
- * the cdevsw read interface - reads a packet at a time, or at
- * least as much of a packet as can be read
- */
-static int
-tapread(struct cdev *dev, struct uio *uio, int flag)
-{
- struct tap_softc *tp = dev->si_drv1;
- struct ifnet *ifp = tp->tap_ifp;
- struct mbuf *m = NULL;
- int error = 0, len;
-
- TAPDEBUG("%s reading, minor = %#x\n", ifp->if_xname, dev2unit(dev));
-
- mtx_lock(&tp->tap_mtx);
- if ((tp->tap_flags & TAP_READY) != TAP_READY) {
- mtx_unlock(&tp->tap_mtx);
-
- /* Unlocked read. */
- TAPDEBUG("%s not ready. minor = %#x, tap_flags = 0x%x\n",
- ifp->if_xname, dev2unit(dev), tp->tap_flags);
-
- return (EHOSTDOWN);
- }
-
- tp->tap_flags &= ~TAP_RWAIT;
-
- /* sleep until we get a packet */
- do {
- IF_DEQUEUE(&ifp->if_snd, m);
-
- if (m == NULL) {
- if (flag & O_NONBLOCK) {
- mtx_unlock(&tp->tap_mtx);
- return (EWOULDBLOCK);
- }
-
- tp->tap_flags |= TAP_RWAIT;
- error = mtx_sleep(tp, &tp->tap_mtx, PCATCH | (PZERO + 1),
- "taprd", 0);
- if (error) {
- mtx_unlock(&tp->tap_mtx);
- return (error);
- }
- }
- } while (m == NULL);
- mtx_unlock(&tp->tap_mtx);
-
- /* feed packet to bpf */
- BPF_MTAP(ifp, m);
-
- /* xfer packet to user space */
- while ((m != NULL) && (uio->uio_resid > 0) && (error == 0)) {
- len = min(uio->uio_resid, m->m_len);
- if (len == 0)
- break;
-
- error = uiomove(mtod(m, void *), len, uio);
- m = m_free(m);
- }
-
- if (m != NULL) {
- TAPDEBUG("%s dropping mbuf, minor = %#x\n", ifp->if_xname,
- dev2unit(dev));
- m_freem(m);
- }
-
- return (error);
-} /* tapread */
-
-
-/*
- * tapwrite
- *
- * the cdevsw write interface - an atomic write is a packet - or else!
- */
-static int
-tapwrite(struct cdev *dev, struct uio *uio, int flag)
-{
- struct ether_header *eh;
- struct tap_softc *tp = dev->si_drv1;
- struct ifnet *ifp = tp->tap_ifp;
- struct mbuf *m;
-
- TAPDEBUG("%s writing, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
-
- if (uio->uio_resid == 0)
- return (0);
-
- if ((uio->uio_resid < 0) || (uio->uio_resid > TAPMRU)) {
- TAPDEBUG("%s invalid packet len = %zd, minor = %#x\n",
- ifp->if_xname, uio->uio_resid, dev2unit(dev));
-
- return (EIO);
- }
-
- if ((m = m_uiotombuf(uio, M_NOWAIT, 0, ETHER_ALIGN,
- M_PKTHDR)) == NULL) {
- if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
- return (ENOBUFS);
- }
-
- m->m_pkthdr.rcvif = ifp;
-
- /*
- * Only pass a unicast frame to ether_input(), if it would actually
- * have been received by non-virtual hardware.
- */
- if (m->m_len < sizeof(struct ether_header)) {
- m_freem(m);
- return (0);
- }
- eh = mtod(m, struct ether_header *);
-
- if (eh && (ifp->if_flags & IFF_PROMISC) == 0 &&
- !ETHER_IS_MULTICAST(eh->ether_dhost) &&
- bcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) != 0) {
- m_freem(m);
- return (0);
- }
-
- /* Pass packet up to parent. */
- CURVNET_SET(ifp->if_vnet);
- (*ifp->if_input)(ifp, m);
- CURVNET_RESTORE();
- if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); /* ibytes are counted in parent */
-
- return (0);
-} /* tapwrite */
-
-
-/*
- * tappoll
- *
- * the poll interface, this is only useful on reads
- * really. the write detect always returns true, write never blocks
- * anyway, it either accepts the packet or drops it
- */
-static int
-tappoll(struct cdev *dev, int events, struct thread *td)
-{
- struct tap_softc *tp = dev->si_drv1;
- struct ifnet *ifp = tp->tap_ifp;
- int revents = 0;
-
- TAPDEBUG("%s polling, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
-
- if (events & (POLLIN | POLLRDNORM)) {
- IFQ_LOCK(&ifp->if_snd);
- if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
- TAPDEBUG("%s have data in queue. len = %d, " \
- "minor = %#x\n", ifp->if_xname,
- ifp->if_snd.ifq_len, dev2unit(dev));
-
- revents |= (events & (POLLIN | POLLRDNORM));
- } else {
- TAPDEBUG("%s waiting for data, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
-
- selrecord(td, &tp->tap_rsel);
- }
- IFQ_UNLOCK(&ifp->if_snd);
- }
-
- if (events & (POLLOUT | POLLWRNORM))
- revents |= (events & (POLLOUT | POLLWRNORM));
-
- return (revents);
-} /* tappoll */
-
-
-/*
- * tap_kqfilter
- *
- * support for kevent() system call
- */
-static int
-tapkqfilter(struct cdev *dev, struct knote *kn)
-{
- struct tap_softc *tp = dev->si_drv1;
- struct ifnet *ifp = tp->tap_ifp;
-
- switch (kn->kn_filter) {
- case EVFILT_READ:
- TAPDEBUG("%s kqfilter: EVFILT_READ, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
- kn->kn_fop = &tap_read_filterops;
- break;
-
- case EVFILT_WRITE:
- TAPDEBUG("%s kqfilter: EVFILT_WRITE, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
- kn->kn_fop = &tap_write_filterops;
- break;
-
- default:
- TAPDEBUG("%s kqfilter: invalid filter, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
- return (EINVAL);
- /* NOT REACHED */
- }
-
- kn->kn_hook = tp;
- knlist_add(&tp->tap_rsel.si_note, kn, 0);
-
- return (0);
-} /* tapkqfilter */
-
-
-/*
- * tap_kqread
- *
- * Return true if there is data in the interface queue
- */
-static int
-tapkqread(struct knote *kn, long hint)
-{
- int ret;
- struct tap_softc *tp = kn->kn_hook;
- struct cdev *dev = tp->tap_dev;
- struct ifnet *ifp = tp->tap_ifp;
-
- if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) {
- TAPDEBUG("%s have data in queue. len = %d, minor = %#x\n",
- ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev));
- ret = 1;
- } else {
- TAPDEBUG("%s waiting for data, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
- ret = 0;
- }
-
- return (ret);
-} /* tapkqread */
-
-
-/*
- * tap_kqwrite
- *
- * Always can write. Return the MTU in kn->data
- */
-static int
-tapkqwrite(struct knote *kn, long hint)
-{
- struct tap_softc *tp = kn->kn_hook;
- struct ifnet *ifp = tp->tap_ifp;
-
- kn->kn_data = ifp->if_mtu;
-
- return (1);
-} /* tapkqwrite */
-
-
-static void
-tapkqdetach(struct knote *kn)
-{
- struct tap_softc *tp = kn->kn_hook;
-
- knlist_remove(&tp->tap_rsel.si_note, kn, 0);
-} /* tapkqdetach */
-
diff --git a/freebsd/sys/net/if_tap.h b/freebsd/sys/net/if_tap.h
index 34f44b38..9718cee4 100644
--- a/freebsd/sys/net/if_tap.h
+++ b/freebsd/sys/net/if_tap.h
@@ -40,24 +40,22 @@
#ifndef _NET_IF_TAP_H_
#define _NET_IF_TAP_H_
-/* refer to if_tapvar.h for the softc stuff */
+#include <net/if_tun.h>
/* maximum receive packet size (hard limit) */
#define TAPMRU 16384
-struct tapinfo {
- int baudrate; /* linespeed */
- short mtu; /* maximum transmission unit */
- u_char type; /* ethernet, tokenring, etc. */
- u_char dummy; /* place holder */
-};
+#define tapinfo tuninfo
-/* ioctl's for get/set debug */
-#define TAPSDEBUG _IOW('t', 90, int)
-#define TAPGDEBUG _IOR('t', 89, int)
-#define TAPSIFINFO _IOW('t', 91, struct tapinfo)
-#define TAPGIFINFO _IOR('t', 92, struct tapinfo)
-#define TAPGIFNAME _IOR('t', 93, struct ifreq)
+/*
+ * ioctl's for get/set debug; these are aliases of TUN* ioctls, see net/if_tun.h
+ * for details.
+ */
+#define TAPSDEBUG TUNSDEBUG
+#define TAPGDEBUG TUNGDEBUG
+#define TAPSIFINFO TUNSIFINFO
+#define TAPGIFINFO TUNGIFINFO
+#define TAPGIFNAME TUNGIFNAME
/* VMware ioctl's */
#define VMIO_SIOCSIFFLAGS _IOWINT('V', 0)
diff --git a/freebsd/sys/net/if_tapvar.h b/freebsd/sys/net/if_tapvar.h
deleted file mode 100644
index f5cf9f3e..00000000
--- a/freebsd/sys/net/if_tapvar.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * BASED ON:
- * -------------------------------------------------------------------------
- *
- * Copyright (c) 1998 Brian Somers <brian@Awfulhak.org>
- * All rights reserved.
- *
- * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
- * Nottingham University 1987.
- */
-
-/*
- * $FreeBSD$
- * $Id: if_tapvar.h,v 0.6 2000/07/11 02:16:08 max Exp $
- */
-
-#ifndef _NET_IF_TAPVAR_H_
-#define _NET_IF_TAPVAR_H_
-
-/*
- * tap_mtx locks tap_flags, tap_pid. tap_next locked with global tapmtx.
- * Other fields locked by owning subsystems.
- */
-struct tap_softc {
- struct ifnet *tap_ifp;
- u_short tap_flags; /* misc flags */
-#define TAP_OPEN (1 << 0)
-#define TAP_INITED (1 << 1)
-#define TAP_RWAIT (1 << 2)
-#define TAP_ASYNC (1 << 3)
-#define TAP_READY (TAP_OPEN|TAP_INITED)
-#define TAP_VMNET (1 << 4)
-
- u_int8_t ether_addr[ETHER_ADDR_LEN]; /* ether addr of the remote side */
-
- pid_t tap_pid; /* PID of process to open */
- struct sigio *tap_sigio; /* information for async I/O */
- struct selinfo tap_rsel; /* read select */
-
- SLIST_ENTRY(tap_softc) tap_next; /* next device in chain */
- struct cdev *tap_dev;
- struct mtx tap_mtx; /* per-softc mutex */
-};
-
-#endif /* !_NET_IF_TAPVAR_H_ */
diff --git a/freebsd/sys/net/if_tun.c b/freebsd/sys/net/if_tun.c
deleted file mode 100644
index 44441773..00000000
--- a/freebsd/sys/net/if_tun.c
+++ /dev/null
@@ -1,1055 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/* $NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $ */
-
-/*-
- * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
- * Nottingham University 1987.
- *
- * This source may be freely distributed, however I would be interested
- * in any changes that are made.
- *
- * This driver takes packets off the IP i/f and hands them up to a
- * user process to have its wicked way with. This driver has it's
- * roots in a similar driver written by Phil Cockcroft (formerly) at
- * UCL. This driver is based much more on read/write/poll mode of
- * operation though.
- *
- * $FreeBSD$
- */
-
-#include <rtems/bsd/local/opt_inet.h>
-#include <rtems/bsd/local/opt_inet6.h>
-
-#include <sys/param.h>
-#include <sys/priv.h>
-#include <sys/proc.h>
-#include <sys/systm.h>
-#include <sys/jail.h>
-#include <sys/mbuf.h>
-#include <sys/module.h>
-#include <sys/socket.h>
-#include <sys/fcntl.h>
-#include <sys/filio.h>
-#include <sys/sockio.h>
-#include <sys/ttycom.h>
-#include <sys/poll.h>
-#include <sys/selinfo.h>
-#include <sys/signalvar.h>
-#include <sys/filedesc.h>
-#include <sys/kernel.h>
-#include <sys/sysctl.h>
-#include <sys/conf.h>
-#include <sys/uio.h>
-#include <sys/malloc.h>
-#include <sys/random.h>
-
-#include <net/if.h>
-#include <net/if_var.h>
-#include <net/if_clone.h>
-#include <net/if_types.h>
-#include <net/netisr.h>
-#include <net/route.h>
-#include <net/vnet.h>
-#ifdef INET
-#include <netinet/in.h>
-#endif
-#include <net/bpf.h>
-#include <net/if_tun.h>
-
-#include <sys/queue.h>
-#include <sys/condvar.h>
-
-#include <security/mac/mac_framework.h>
-
-/*
- * tun_list is protected by global tunmtx. Other mutable fields are
- * protected by tun->tun_mtx, or by their owning subsystem. tun_dev is
- * static for the duration of a tunnel interface.
- */
-struct tun_softc {
- TAILQ_ENTRY(tun_softc) tun_list;
- struct cdev *tun_dev;
- u_short tun_flags; /* misc flags */
-#define TUN_OPEN 0x0001
-#define TUN_INITED 0x0002
-#define TUN_RCOLL 0x0004
-#define TUN_IASET 0x0008
-#define TUN_DSTADDR 0x0010
-#define TUN_LMODE 0x0020
-#define TUN_RWAIT 0x0040
-#define TUN_ASYNC 0x0080
-#define TUN_IFHEAD 0x0100
-
-#define TUN_READY (TUN_OPEN | TUN_INITED)
-
- /*
- * XXXRW: tun_pid is used to exclusively lock /dev/tun. Is this
- * actually needed? Can we just return EBUSY if already open?
- * Problem is that this involved inherent races when a tun device
- * is handed off from one process to another, as opposed to just
- * being slightly stale informationally.
- */
- pid_t tun_pid; /* owning pid */
- struct ifnet *tun_ifp; /* the interface */
- struct sigio *tun_sigio; /* information for async I/O */
- struct selinfo tun_rsel; /* read select */
- struct mtx tun_mtx; /* protect mutable softc fields */
- struct cv tun_cv; /* protect against ref'd dev destroy */
-};
-#define TUN2IFP(sc) ((sc)->tun_ifp)
-
-#define TUNDEBUG if (tundebug) if_printf
-
-/*
- * All mutable global variables in if_tun are locked using tunmtx, with
- * the exception of tundebug, which is used unlocked, and tunclones,
- * which is static after setup.
- */
-static struct mtx tunmtx;
-static const char tunname[] = "tun";
-static MALLOC_DEFINE(M_TUN, tunname, "Tunnel Interface");
-static int tundebug = 0;
-static int tundclone = 1;
-static struct clonedevs *tunclones;
-static TAILQ_HEAD(,tun_softc) tunhead = TAILQ_HEAD_INITIALIZER(tunhead);
-SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
-
-SYSCTL_DECL(_net_link);
-static SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0,
- "IP tunnel software network interface.");
-SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tundclone, 0,
- "Enable legacy devfs interface creation.");
-
-static void tunclone(void *arg, struct ucred *cred, char *name,
- int namelen, struct cdev **dev);
-static void tuncreate(const char *name, struct cdev *dev);
-static int tunifioctl(struct ifnet *, u_long, caddr_t);
-static void tuninit(struct ifnet *);
-static int tunmodevent(module_t, int, void *);
-static int tunoutput(struct ifnet *, struct mbuf *,
- const struct sockaddr *, struct route *ro);
-static void tunstart(struct ifnet *);
-
-static int tun_clone_create(struct if_clone *, int, caddr_t);
-static void tun_clone_destroy(struct ifnet *);
-static struct if_clone *tun_cloner;
-
-static d_open_t tunopen;
-static d_close_t tunclose;
-static d_read_t tunread;
-static d_write_t tunwrite;
-static d_ioctl_t tunioctl;
-static d_poll_t tunpoll;
-static d_kqfilter_t tunkqfilter;
-
-static int tunkqread(struct knote *, long);
-static int tunkqwrite(struct knote *, long);
-static void tunkqdetach(struct knote *);
-
-static struct filterops tun_read_filterops = {
- .f_isfd = 1,
- .f_attach = NULL,
- .f_detach = tunkqdetach,
- .f_event = tunkqread,
-};
-
-static struct filterops tun_write_filterops = {
- .f_isfd = 1,
- .f_attach = NULL,
- .f_detach = tunkqdetach,
- .f_event = tunkqwrite,
-};
-
-static struct cdevsw tun_cdevsw = {
- .d_version = D_VERSION,
- .d_flags = D_NEEDMINOR,
- .d_open = tunopen,
- .d_close = tunclose,
- .d_read = tunread,
- .d_write = tunwrite,
- .d_ioctl = tunioctl,
- .d_poll = tunpoll,
- .d_kqfilter = tunkqfilter,
- .d_name = tunname,
-};
-
-static int
-tun_clone_create(struct if_clone *ifc, int unit, caddr_t params)
-{
- struct cdev *dev;
- int i;
-
- /* find any existing device, or allocate new unit number */
- i = clone_create(&tunclones, &tun_cdevsw, &unit, &dev, 0);
- if (i) {
- /* No preexisting struct cdev *, create one */
- dev = make_dev(&tun_cdevsw, unit,
- UID_UUCP, GID_DIALER, 0600, "%s%d", tunname, unit);
- }
- tuncreate(tunname, dev);
-
- return (0);
-}
-
-static void
-tunclone(void *arg, struct ucred *cred, char *name, int namelen,
- struct cdev **dev)
-{
- char devname[SPECNAMELEN + 1];
- int u, i, append_unit;
-
- if (*dev != NULL)
- return;
-
- /*
- * If tun cloning is enabled, only the superuser can create an
- * interface.
- */
- if (!tundclone || priv_check_cred(cred, PRIV_NET_IFCREATE) != 0)
- return;
-
- if (strcmp(name, tunname) == 0) {
- u = -1;
- } else if (dev_stdclone(name, NULL, tunname, &u) != 1)
- return; /* Don't recognise the name */
- if (u != -1 && u > IF_MAXUNIT)
- return; /* Unit number too high */
-
- if (u == -1)
- append_unit = 1;
- else
- append_unit = 0;
-
- CURVNET_SET(CRED_TO_VNET(cred));
- /* find any existing device, or allocate new unit number */
- i = clone_create(&tunclones, &tun_cdevsw, &u, dev, 0);
- if (i) {
- if (append_unit) {
- namelen = snprintf(devname, sizeof(devname), "%s%d",
- name, u);
- name = devname;
- }
- /* No preexisting struct cdev *, create one */
- *dev = make_dev_credf(MAKEDEV_REF, &tun_cdevsw, u, cred,
- UID_UUCP, GID_DIALER, 0600, "%s", name);
- }
-
- if_clone_create(name, namelen, NULL);
- CURVNET_RESTORE();
-}
-
-static void
-tun_destroy(struct tun_softc *tp)
-{
- struct cdev *dev;
-
- mtx_lock(&tp->tun_mtx);
- if ((tp->tun_flags & TUN_OPEN) != 0)
- cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx);
- else
- mtx_unlock(&tp->tun_mtx);
-
- CURVNET_SET(TUN2IFP(tp)->if_vnet);
- dev = tp->tun_dev;
- bpfdetach(TUN2IFP(tp));
- if_detach(TUN2IFP(tp));
- if_free(TUN2IFP(tp));
- destroy_dev(dev);
- seldrain(&tp->tun_rsel);
- knlist_clear(&tp->tun_rsel.si_note, 0);
- knlist_destroy(&tp->tun_rsel.si_note);
- mtx_destroy(&tp->tun_mtx);
- cv_destroy(&tp->tun_cv);
- free(tp, M_TUN);
- CURVNET_RESTORE();
-}
-
-static void
-tun_clone_destroy(struct ifnet *ifp)
-{
- struct tun_softc *tp = ifp->if_softc;
-
- mtx_lock(&tunmtx);
- TAILQ_REMOVE(&tunhead, tp, tun_list);
- mtx_unlock(&tunmtx);
- tun_destroy(tp);
-}
-
-static int
-tunmodevent(module_t mod, int type, void *data)
-{
- static eventhandler_tag tag;
- struct tun_softc *tp;
-
- switch (type) {
- case MOD_LOAD:
- mtx_init(&tunmtx, "tunmtx", NULL, MTX_DEF);
- clone_setup(&tunclones);
- tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000);
- if (tag == NULL)
- return (ENOMEM);
- tun_cloner = if_clone_simple(tunname, tun_clone_create,
- tun_clone_destroy, 0);
- break;
- case MOD_UNLOAD:
- if_clone_detach(tun_cloner);
- EVENTHANDLER_DEREGISTER(dev_clone, tag);
- drain_dev_clone_events();
-
- mtx_lock(&tunmtx);
- while ((tp = TAILQ_FIRST(&tunhead)) != NULL) {
- TAILQ_REMOVE(&tunhead, tp, tun_list);
- mtx_unlock(&tunmtx);
- tun_destroy(tp);
- mtx_lock(&tunmtx);
- }
- mtx_unlock(&tunmtx);
- clone_cleanup(&tunclones);
- mtx_destroy(&tunmtx);
- break;
- default:
- return EOPNOTSUPP;
- }
- return 0;
-}
-
-static moduledata_t tun_mod = {
- "if_tun",
- tunmodevent,
- 0
-};
-
-DECLARE_MODULE(if_tun, tun_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
-MODULE_VERSION(if_tun, 1);
-
-static void
-tunstart(struct ifnet *ifp)
-{
- struct tun_softc *tp = ifp->if_softc;
- struct mbuf *m;
-
- TUNDEBUG(ifp,"%s starting\n", ifp->if_xname);
- if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
- IFQ_LOCK(&ifp->if_snd);
- IFQ_POLL_NOLOCK(&ifp->if_snd, m);
- if (m == NULL) {
- IFQ_UNLOCK(&ifp->if_snd);
- return;
- }
- IFQ_UNLOCK(&ifp->if_snd);
- }
-
- mtx_lock(&tp->tun_mtx);
- if (tp->tun_flags & TUN_RWAIT) {
- tp->tun_flags &= ~TUN_RWAIT;
- wakeup(tp);
- }
- selwakeuppri(&tp->tun_rsel, PZERO + 1);
- KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
- if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) {
- mtx_unlock(&tp->tun_mtx);
- pgsigio(&tp->tun_sigio, SIGIO, 0);
- } else
- mtx_unlock(&tp->tun_mtx);
-}
-
-/* XXX: should return an error code so it can fail. */
-static void
-tuncreate(const char *name, struct cdev *dev)
-{
- struct tun_softc *sc;
- struct ifnet *ifp;
-
- sc = malloc(sizeof(*sc), M_TUN, M_WAITOK | M_ZERO);
- mtx_init(&sc->tun_mtx, "tun_mtx", NULL, MTX_DEF);
- cv_init(&sc->tun_cv, "tun_condvar");
- sc->tun_flags = TUN_INITED;
- sc->tun_dev = dev;
- mtx_lock(&tunmtx);
- TAILQ_INSERT_TAIL(&tunhead, sc, tun_list);
- mtx_unlock(&tunmtx);
-
- ifp = sc->tun_ifp = if_alloc(IFT_PPP);
- if (ifp == NULL)
- panic("%s%d: failed to if_alloc() interface.\n",
- name, dev2unit(dev));
- if_initname(ifp, name, dev2unit(dev));
- ifp->if_mtu = TUNMTU;
- ifp->if_ioctl = tunifioctl;
- ifp->if_output = tunoutput;
- ifp->if_start = tunstart;
- ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
- ifp->if_softc = sc;
- IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
- ifp->if_snd.ifq_drv_maxlen = 0;
- IFQ_SET_READY(&ifp->if_snd);
- knlist_init_mtx(&sc->tun_rsel.si_note, &sc->tun_mtx);
- ifp->if_capabilities |= IFCAP_LINKSTATE;
- ifp->if_capenable |= IFCAP_LINKSTATE;
-
- if_attach(ifp);
- bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
- dev->si_drv1 = sc;
- TUNDEBUG(ifp, "interface %s is created, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
-}
-
-static int
-tunopen(struct cdev *dev, int flag, int mode, struct thread *td)
-{
- struct ifnet *ifp;
- struct tun_softc *tp;
-
- /*
- * XXXRW: Non-atomic test and set of dev->si_drv1 requires
- * synchronization.
- */
- tp = dev->si_drv1;
- if (!tp) {
- tuncreate(tunname, dev);
- tp = dev->si_drv1;
- }
-
- /*
- * XXXRW: This use of tun_pid is subject to error due to the
- * fact that a reference to the tunnel can live beyond the
- * death of the process that created it. Can we replace this
- * with a simple busy flag?
- */
- mtx_lock(&tp->tun_mtx);
-#ifndef __rtems__
- if (tp->tun_pid != 0 && tp->tun_pid != td->td_proc->p_pid) {
-#else /* __rtems__ */
- if (tp->tun_pid != 0 && tp->tun_pid != BSD_DEFAULT_PID) {
-#endif /* __rtems__ */
- mtx_unlock(&tp->tun_mtx);
- return (EBUSY);
- }
-#ifndef __rtems__
- tp->tun_pid = td->td_proc->p_pid;
-#else /* __rtems__ */
- tp->tun_pid = BSD_DEFAULT_PID;
-#endif /* __rtems__ */
-
- tp->tun_flags |= TUN_OPEN;
- ifp = TUN2IFP(tp);
- if_link_state_change(ifp, LINK_STATE_UP);
- TUNDEBUG(ifp, "open\n");
- mtx_unlock(&tp->tun_mtx);
-
- return (0);
-}
-
-/*
- * tunclose - close the device - mark i/f down & delete
- * routing info
- */
-static int
-tunclose(struct cdev *dev, int foo, int bar, struct thread *td)
-{
- struct tun_softc *tp;
- struct ifnet *ifp;
-
- tp = dev->si_drv1;
- ifp = TUN2IFP(tp);
-
- mtx_lock(&tp->tun_mtx);
- tp->tun_flags &= ~TUN_OPEN;
- tp->tun_pid = 0;
-
- /*
- * junk all pending output
- */
- CURVNET_SET(ifp->if_vnet);
- IFQ_PURGE(&ifp->if_snd);
-
- if (ifp->if_flags & IFF_UP) {
- mtx_unlock(&tp->tun_mtx);
- if_down(ifp);
- mtx_lock(&tp->tun_mtx);
- }
-
- /* Delete all addresses and routes which reference this interface. */
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
- struct ifaddr *ifa;
-
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
- mtx_unlock(&tp->tun_mtx);
- CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- /* deal w/IPv4 PtP destination; unlocked read */
- if (ifa->ifa_addr->sa_family == AF_INET) {
- rtinit(ifa, (int)RTM_DELETE,
- tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0);
- } else {
- rtinit(ifa, (int)RTM_DELETE, 0);
- }
- }
- if_purgeaddrs(ifp);
- mtx_lock(&tp->tun_mtx);
- }
- if_link_state_change(ifp, LINK_STATE_DOWN);
- CURVNET_RESTORE();
-
- funsetown(&tp->tun_sigio);
- selwakeuppri(&tp->tun_rsel, PZERO + 1);
- KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
- TUNDEBUG (ifp, "closed\n");
-
- cv_broadcast(&tp->tun_cv);
- mtx_unlock(&tp->tun_mtx);
- return (0);
-}
-
-static void
-tuninit(struct ifnet *ifp)
-{
- struct tun_softc *tp = ifp->if_softc;
-#ifdef INET
- struct ifaddr *ifa;
-#endif
-
- TUNDEBUG(ifp, "tuninit\n");
-
- mtx_lock(&tp->tun_mtx);
- ifp->if_flags |= IFF_UP;
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
- getmicrotime(&ifp->if_lastchange);
-
-#ifdef INET
- if_addr_rlock(ifp);
- CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- if (ifa->ifa_addr->sa_family == AF_INET) {
- struct sockaddr_in *si;
-
- si = (struct sockaddr_in *)ifa->ifa_addr;
- if (si->sin_addr.s_addr)
- tp->tun_flags |= TUN_IASET;
-
- si = (struct sockaddr_in *)ifa->ifa_dstaddr;
- if (si && si->sin_addr.s_addr)
- tp->tun_flags |= TUN_DSTADDR;
- }
- }
- if_addr_runlock(ifp);
-#endif
- mtx_unlock(&tp->tun_mtx);
-}
-
-/*
- * Process an ioctl request.
- */
-static int
-tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
-{
- struct ifreq *ifr = (struct ifreq *)data;
- struct tun_softc *tp = ifp->if_softc;
- struct ifstat *ifs;
- int error = 0;
-
- switch(cmd) {
- case SIOCGIFSTATUS:
- ifs = (struct ifstat *)data;
- mtx_lock(&tp->tun_mtx);
- if (tp->tun_pid)
- snprintf(ifs->ascii, sizeof(ifs->ascii),
- "\tOpened by PID %d\n", tp->tun_pid);
- else
- ifs->ascii[0] = '\0';
- mtx_unlock(&tp->tun_mtx);
- break;
- case SIOCSIFADDR:
- tuninit(ifp);
- TUNDEBUG(ifp, "address set\n");
- break;
- case SIOCSIFMTU:
- ifp->if_mtu = ifr->ifr_mtu;
- TUNDEBUG(ifp, "mtu set\n");
- break;
- case SIOCSIFFLAGS:
- case SIOCADDMULTI:
- case SIOCDELMULTI:
- break;
- default:
- error = EINVAL;
- }
- return (error);
-}
-
-/*
- * tunoutput - queue packets from higher level ready to put out.
- */
-static int
-tunoutput(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst,
- struct route *ro)
-{
- struct tun_softc *tp = ifp->if_softc;
- u_short cached_tun_flags;
- int error;
- u_int32_t af;
-
- TUNDEBUG (ifp, "tunoutput\n");
-
-#ifdef MAC
- error = mac_ifnet_check_transmit(ifp, m0);
- if (error) {
- m_freem(m0);
- return (error);
- }
-#endif
-
- /* Could be unlocked read? */
- mtx_lock(&tp->tun_mtx);
- cached_tun_flags = tp->tun_flags;
- mtx_unlock(&tp->tun_mtx);
- if ((cached_tun_flags & TUN_READY) != TUN_READY) {
- TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
- m_freem (m0);
- return (EHOSTDOWN);
- }
-
- if ((ifp->if_flags & IFF_UP) != IFF_UP) {
- m_freem (m0);
- return (EHOSTDOWN);
- }
-
- /* BPF writes need to be handled specially. */
- if (dst->sa_family == AF_UNSPEC)
- bcopy(dst->sa_data, &af, sizeof(af));
- else
- af = dst->sa_family;
-
- if (bpf_peers_present(ifp->if_bpf))
- bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0);
-
- /* prepend sockaddr? this may abort if the mbuf allocation fails */
- if (cached_tun_flags & TUN_LMODE) {
- /* allocate space for sockaddr */
- M_PREPEND(m0, dst->sa_len, M_NOWAIT);
-
- /* if allocation failed drop packet */
- if (m0 == NULL) {
- if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
- if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- return (ENOBUFS);
- } else {
- bcopy(dst, m0->m_data, dst->sa_len);
- }
- }
-
- if (cached_tun_flags & TUN_IFHEAD) {
- /* Prepend the address family */
- M_PREPEND(m0, 4, M_NOWAIT);
-
- /* if allocation failed drop packet */
- if (m0 == NULL) {
- if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
- if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- return (ENOBUFS);
- } else
- *(u_int32_t *)m0->m_data = htonl(af);
- } else {
-#ifdef INET
- if (af != AF_INET)
-#endif
- {
- m_freem(m0);
- return (EAFNOSUPPORT);
- }
- }
-
- error = (ifp->if_transmit)(ifp, m0);
- if (error)
- return (ENOBUFS);
- if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
- return (0);
-}
-
-/*
- * the cdevsw interface is now pretty minimal.
- */
-static int
-tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
- struct thread *td)
-{
- struct ifreq ifr;
- struct tun_softc *tp = dev->si_drv1;
- struct tuninfo *tunp;
- int error;
-
- switch (cmd) {
- case TUNSIFINFO:
- tunp = (struct tuninfo *)data;
- if (TUN2IFP(tp)->if_type != tunp->type)
- return (EPROTOTYPE);
- mtx_lock(&tp->tun_mtx);
- if (TUN2IFP(tp)->if_mtu != tunp->mtu) {
- strlcpy(ifr.ifr_name, if_name(TUN2IFP(tp)), IFNAMSIZ);
- ifr.ifr_mtu = tunp->mtu;
- CURVNET_SET(TUN2IFP(tp)->if_vnet);
- error = ifhwioctl(SIOCSIFMTU, TUN2IFP(tp),
- (caddr_t)&ifr, td);
- CURVNET_RESTORE();
- if (error) {
- mtx_unlock(&tp->tun_mtx);
- return (error);
- }
- }
- TUN2IFP(tp)->if_baudrate = tunp->baudrate;
- mtx_unlock(&tp->tun_mtx);
- break;
- case TUNGIFINFO:
- tunp = (struct tuninfo *)data;
- mtx_lock(&tp->tun_mtx);
- tunp->mtu = TUN2IFP(tp)->if_mtu;
- tunp->type = TUN2IFP(tp)->if_type;
- tunp->baudrate = TUN2IFP(tp)->if_baudrate;
- mtx_unlock(&tp->tun_mtx);
- break;
- case TUNSDEBUG:
- tundebug = *(int *)data;
- break;
- case TUNGDEBUG:
- *(int *)data = tundebug;
- break;
- case TUNSLMODE:
- mtx_lock(&tp->tun_mtx);
- if (*(int *)data) {
- tp->tun_flags |= TUN_LMODE;
- tp->tun_flags &= ~TUN_IFHEAD;
- } else
- tp->tun_flags &= ~TUN_LMODE;
- mtx_unlock(&tp->tun_mtx);
- break;
- case TUNSIFHEAD:
- mtx_lock(&tp->tun_mtx);
- if (*(int *)data) {
- tp->tun_flags |= TUN_IFHEAD;
- tp->tun_flags &= ~TUN_LMODE;
- } else
- tp->tun_flags &= ~TUN_IFHEAD;
- mtx_unlock(&tp->tun_mtx);
- break;
- case TUNGIFHEAD:
- mtx_lock(&tp->tun_mtx);
- *(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0;
- mtx_unlock(&tp->tun_mtx);
- break;
- case TUNSIFMODE:
- /* deny this if UP */
- if (TUN2IFP(tp)->if_flags & IFF_UP)
- return(EBUSY);
-
- switch (*(int *)data & ~IFF_MULTICAST) {
- case IFF_POINTOPOINT:
- case IFF_BROADCAST:
- mtx_lock(&tp->tun_mtx);
- TUN2IFP(tp)->if_flags &=
- ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
- TUN2IFP(tp)->if_flags |= *(int *)data;
- mtx_unlock(&tp->tun_mtx);
- break;
- default:
- return(EINVAL);
- }
- break;
- case TUNSIFPID:
- mtx_lock(&tp->tun_mtx);
-#ifndef __rtems__
- tp->tun_pid = curthread->td_proc->p_pid;
-#else /* __rtems__ */
- tp->tun_pid = BSD_DEFAULT_PID;
-#endif /* __rtems__ */
- mtx_unlock(&tp->tun_mtx);
- break;
- case FIONBIO:
- break;
- case FIOASYNC:
- mtx_lock(&tp->tun_mtx);
- if (*(int *)data)
- tp->tun_flags |= TUN_ASYNC;
- else
- tp->tun_flags &= ~TUN_ASYNC;
- mtx_unlock(&tp->tun_mtx);
- break;
- case FIONREAD:
- if (!IFQ_IS_EMPTY(&TUN2IFP(tp)->if_snd)) {
- struct mbuf *mb;
- IFQ_LOCK(&TUN2IFP(tp)->if_snd);
- IFQ_POLL_NOLOCK(&TUN2IFP(tp)->if_snd, mb);
- for (*(int *)data = 0; mb != NULL; mb = mb->m_next)
- *(int *)data += mb->m_len;
- IFQ_UNLOCK(&TUN2IFP(tp)->if_snd);
- } else
- *(int *)data = 0;
- break;
- case FIOSETOWN:
- return (fsetown(*(int *)data, &tp->tun_sigio));
-
- case FIOGETOWN:
- *(int *)data = fgetown(&tp->tun_sigio);
- return (0);
-
- /* This is deprecated, FIOSETOWN should be used instead. */
- case TIOCSPGRP:
- return (fsetown(-(*(int *)data), &tp->tun_sigio));
-
- /* This is deprecated, FIOGETOWN should be used instead. */
- case TIOCGPGRP:
- *(int *)data = -fgetown(&tp->tun_sigio);
- return (0);
-
- default:
- return (ENOTTY);
- }
- return (0);
-}
-
-/*
- * The cdevsw read interface - reads a packet at a time, or at
- * least as much of a packet as can be read.
- */
-static int
-tunread(struct cdev *dev, struct uio *uio, int flag)
-{
- struct tun_softc *tp = dev->si_drv1;
- struct ifnet *ifp = TUN2IFP(tp);
- struct mbuf *m;
- int error=0, len;
-
- TUNDEBUG (ifp, "read\n");
- mtx_lock(&tp->tun_mtx);
- if ((tp->tun_flags & TUN_READY) != TUN_READY) {
- mtx_unlock(&tp->tun_mtx);
- TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
- return (EHOSTDOWN);
- }
-
- tp->tun_flags &= ~TUN_RWAIT;
-
- do {
- IFQ_DEQUEUE(&ifp->if_snd, m);
- if (m == NULL) {
- if (flag & O_NONBLOCK) {
- mtx_unlock(&tp->tun_mtx);
- return (EWOULDBLOCK);
- }
- tp->tun_flags |= TUN_RWAIT;
- error = mtx_sleep(tp, &tp->tun_mtx, PCATCH | (PZERO + 1),
- "tunread", 0);
- if (error != 0) {
- mtx_unlock(&tp->tun_mtx);
- return (error);
- }
- }
- } while (m == NULL);
- mtx_unlock(&tp->tun_mtx);
-
- while (m && uio->uio_resid > 0 && error == 0) {
- len = min(uio->uio_resid, m->m_len);
- if (len != 0)
- error = uiomove(mtod(m, void *), len, uio);
- m = m_free(m);
- }
-
- if (m) {
- TUNDEBUG(ifp, "Dropping mbuf\n");
- m_freem(m);
- }
- return (error);
-}
-
-/*
- * the cdevsw write interface - an atomic write is a packet - or else!
- */
-static int
-tunwrite(struct cdev *dev, struct uio *uio, int flag)
-{
- struct tun_softc *tp = dev->si_drv1;
- struct ifnet *ifp = TUN2IFP(tp);
- struct mbuf *m;
- uint32_t family, mru;
- int isr;
-
- TUNDEBUG(ifp, "tunwrite\n");
-
- if ((ifp->if_flags & IFF_UP) != IFF_UP)
- /* ignore silently */
- return (0);
-
- if (uio->uio_resid == 0)
- return (0);
-
- mru = TUNMRU;
- if (tp->tun_flags & TUN_IFHEAD)
- mru += sizeof(family);
- if (uio->uio_resid < 0 || uio->uio_resid > mru) {
- TUNDEBUG(ifp, "len=%zd!\n", uio->uio_resid);
- return (EIO);
- }
-
- if ((m = m_uiotombuf(uio, M_NOWAIT, 0, 0, M_PKTHDR)) == NULL) {
- if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
- return (ENOBUFS);
- }
-
- m->m_pkthdr.rcvif = ifp;
-#ifdef MAC
- mac_ifnet_create_mbuf(ifp, m);
-#endif
-
- /* Could be unlocked read? */
- mtx_lock(&tp->tun_mtx);
- if (tp->tun_flags & TUN_IFHEAD) {
- mtx_unlock(&tp->tun_mtx);
- if (m->m_len < sizeof(family) &&
- (m = m_pullup(m, sizeof(family))) == NULL)
- return (ENOBUFS);
- family = ntohl(*mtod(m, u_int32_t *));
- m_adj(m, sizeof(family));
- } else {
- mtx_unlock(&tp->tun_mtx);
- family = AF_INET;
- }
-
- BPF_MTAP2(ifp, &family, sizeof(family), m);
-
- switch (family) {
-#ifdef INET
- case AF_INET:
- isr = NETISR_IP;
- break;
-#endif
-#ifdef INET6
- case AF_INET6:
- isr = NETISR_IPV6;
- break;
-#endif
- default:
- m_freem(m);
- return (EAFNOSUPPORT);
- }
- random_harvest_queue(m, sizeof(*m), RANDOM_NET_TUN);
- if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
- if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
- CURVNET_SET(ifp->if_vnet);
- M_SETFIB(m, ifp->if_fib);
- netisr_dispatch(isr, m);
- CURVNET_RESTORE();
- return (0);
-}
-
-/*
- * tunpoll - the poll interface, this is only useful on reads
- * really. The write detect always returns true, write never blocks
- * anyway, it either accepts the packet or drops it.
- */
-static int
-tunpoll(struct cdev *dev, int events, struct thread *td)
-{
- struct tun_softc *tp = dev->si_drv1;
- struct ifnet *ifp = TUN2IFP(tp);
- int revents = 0;
- struct mbuf *m;
-
- TUNDEBUG(ifp, "tunpoll\n");
-
- if (events & (POLLIN | POLLRDNORM)) {
- IFQ_LOCK(&ifp->if_snd);
- IFQ_POLL_NOLOCK(&ifp->if_snd, m);
- if (m != NULL) {
- TUNDEBUG(ifp, "tunpoll q=%d\n", ifp->if_snd.ifq_len);
- revents |= events & (POLLIN | POLLRDNORM);
- } else {
- TUNDEBUG(ifp, "tunpoll waiting\n");
- selrecord(td, &tp->tun_rsel);
- }
- IFQ_UNLOCK(&ifp->if_snd);
- }
- if (events & (POLLOUT | POLLWRNORM))
- revents |= events & (POLLOUT | POLLWRNORM);
-
- return (revents);
-}
-
-/*
- * tunkqfilter - support for the kevent() system call.
- */
-static int
-tunkqfilter(struct cdev *dev, struct knote *kn)
-{
- struct tun_softc *tp = dev->si_drv1;
- struct ifnet *ifp = TUN2IFP(tp);
-
- switch(kn->kn_filter) {
- case EVFILT_READ:
- TUNDEBUG(ifp, "%s kqfilter: EVFILT_READ, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
- kn->kn_fop = &tun_read_filterops;
- break;
-
- case EVFILT_WRITE:
- TUNDEBUG(ifp, "%s kqfilter: EVFILT_WRITE, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
- kn->kn_fop = &tun_write_filterops;
- break;
-
- default:
- TUNDEBUG(ifp, "%s kqfilter: invalid filter, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
- return(EINVAL);
- }
-
- kn->kn_hook = tp;
- knlist_add(&tp->tun_rsel.si_note, kn, 0);
-
- return (0);
-}
-
-/*
- * Return true of there is data in the interface queue.
- */
-static int
-tunkqread(struct knote *kn, long hint)
-{
- int ret;
- struct tun_softc *tp = kn->kn_hook;
- struct cdev *dev = tp->tun_dev;
- struct ifnet *ifp = TUN2IFP(tp);
-
- if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) {
- TUNDEBUG(ifp,
- "%s have data in the queue. Len = %d, minor = %#x\n",
- ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev));
- ret = 1;
- } else {
- TUNDEBUG(ifp,
- "%s waiting for data, minor = %#x\n", ifp->if_xname,
- dev2unit(dev));
- ret = 0;
- }
-
- return (ret);
-}
-
-/*
- * Always can write, always return MTU in kn->data.
- */
-static int
-tunkqwrite(struct knote *kn, long hint)
-{
- struct tun_softc *tp = kn->kn_hook;
- struct ifnet *ifp = TUN2IFP(tp);
-
- kn->kn_data = ifp->if_mtu;
-
- return (1);
-}
-
-static void
-tunkqdetach(struct knote *kn)
-{
- struct tun_softc *tp = kn->kn_hook;
-
- knlist_remove(&tp->tun_rsel.si_note, kn, 0);
-}
diff --git a/freebsd/sys/net/if_tun.h b/freebsd/sys/net/if_tun.h
index 1ea375f7..a44c87bd 100644
--- a/freebsd/sys/net/if_tun.h
+++ b/freebsd/sys/net/if_tun.h
@@ -40,6 +40,7 @@ struct tuninfo {
#define TUNSIFINFO _IOW('t', 91, struct tuninfo)
#define TUNGIFINFO _IOR('t', 92, struct tuninfo)
#define TUNSLMODE _IOW('t', 93, int)
+#define TUNGIFNAME _IOR('t', 93, struct ifreq)
#define TUNSIFMODE _IOW('t', 94, int)
#define TUNSIFPID _IO('t', 95)
#define TUNSIFHEAD _IOW('t', 96, int)
diff --git a/freebsd/sys/net/if_tuntap.c b/freebsd/sys/net/if_tuntap.c
new file mode 100644
index 00000000..3516d82b
--- /dev/null
+++ b/freebsd/sys/net/if_tuntap.c
@@ -0,0 +1,1734 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/* $NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $ */
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
+ * All rights reserved.
+ * Copyright (c) 2019 Kyle Evans <kevans@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * BASED ON:
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
+ * Nottingham University 1987.
+ *
+ * This source may be freely distributed, however I would be interested
+ * in any changes that are made.
+ *
+ * This driver takes packets off the IP i/f and hands them up to a
+ * user process to have its wicked way with. This driver has it's
+ * roots in a similar driver written by Phil Cockcroft (formerly) at
+ * UCL. This driver is based much more on read/write/poll mode of
+ * operation though.
+ *
+ * $FreeBSD$
+ */
+
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/jail.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/socket.h>
+#include <sys/eventhandler.h>
+#include <sys/fcntl.h>
+#include <sys/filio.h>
+#include <sys/sockio.h>
+#include <sys/sx.h>
+#include <sys/ttycom.h>
+#include <sys/poll.h>
+#include <sys/selinfo.h>
+#include <sys/signalvar.h>
+#include <sys/filedesc.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/conf.h>
+#include <sys/uio.h>
+#include <sys/malloc.h>
+#include <sys/random.h>
+#include <sys/ctype.h>
+
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_clone.h>
+#include <net/if_dl.h>
+#include <net/if_media.h>
+#include <net/if_types.h>
+#include <net/netisr.h>
+#include <net/route.h>
+#include <net/vnet.h>
+#ifdef INET
+#include <netinet/in.h>
+#endif
+#include <net/bpf.h>
+#include <net/if_tap.h>
+#include <net/if_tun.h>
+
+#include <sys/queue.h>
+#include <sys/condvar.h>
+#include <security/mac/mac_framework.h>
+
+struct tuntap_driver;
+
+/*
+ * tun_list is protected by global tunmtx. Other mutable fields are
+ * protected by tun->tun_mtx, or by their owning subsystem. tun_dev is
+ * static for the duration of a tunnel interface.
+ */
+struct tuntap_softc {
+ TAILQ_ENTRY(tuntap_softc) tun_list;
+ struct cdev *tun_dev;
+ u_short tun_flags; /* misc flags */
+#define TUN_OPEN 0x0001
+#define TUN_INITED 0x0002
+#define TUN_IASET 0x0008
+#define TUN_DSTADDR 0x0010
+#define TUN_LMODE 0x0020
+#define TUN_RWAIT 0x0040
+#define TUN_ASYNC 0x0080
+#define TUN_IFHEAD 0x0100
+#define TUN_DYING 0x0200
+#define TUN_L2 0x0400
+#define TUN_VMNET 0x0800
+
+#define TUN_DRIVER_IDENT_MASK (TUN_L2 | TUN_VMNET)
+#define TUN_READY (TUN_OPEN | TUN_INITED)
+
+#ifndef __rtems__
+ pid_t tun_pid; /* owning pid */
+#endif /* __rtems__ */
+ struct ifnet *tun_ifp; /* the interface */
+ struct sigio *tun_sigio; /* async I/O info */
+ struct tuntap_driver *tun_drv; /* appropriate driver */
+ struct selinfo tun_rsel; /* read select */
+ struct mtx tun_mtx; /* softc field mutex */
+ struct cv tun_cv; /* for ref'd dev destroy */
+ struct ether_addr tun_ether; /* remote address */
+};
+#define TUN2IFP(sc) ((sc)->tun_ifp)
+
+#define TUNDEBUG if (tundebug) if_printf
+
+#define TUN_LOCK(tp) mtx_lock(&(tp)->tun_mtx)
+#define TUN_UNLOCK(tp) mtx_unlock(&(tp)->tun_mtx)
+
+#define TUN_VMIO_FLAG_MASK 0x0fff
+
+/*
+ * All mutable global variables in if_tun are locked using tunmtx, with
+ * the exception of tundebug, which is used unlocked, and the drivers' *clones,
+ * which are static after setup.
+ */
+static struct mtx tunmtx;
+static eventhandler_tag tag;
+static const char tunname[] = "tun";
+static const char tapname[] = "tap";
+static const char vmnetname[] = "vmnet";
+static MALLOC_DEFINE(M_TUN, tunname, "Tunnel Interface");
+static int tundebug = 0;
+static int tundclone = 1;
+static int tap_allow_uopen = 0; /* allow user open() */
+static int tapuponopen = 0; /* IFF_UP on open() */
+static int tapdclone = 1; /* enable devfs cloning */
+
+static TAILQ_HEAD(,tuntap_softc) tunhead = TAILQ_HEAD_INITIALIZER(tunhead);
+SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
+
+static struct sx tun_ioctl_sx;
+SX_SYSINIT(tun_ioctl_sx, &tun_ioctl_sx, "tun_ioctl");
+
+SYSCTL_DECL(_net_link);
+/* tun */
+static SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0,
+ "IP tunnel software network interface");
+SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tundclone, 0,
+ "Enable legacy devfs interface creation");
+
+/* tap */
+static SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0,
+ "Ethernet tunnel software network interface");
+SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tap_allow_uopen, 0,
+ "Allow user to open /dev/tap (based on node permissions)");
+SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0,
+ "Bring interface up when /dev/tap is opened");
+SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tapdclone, 0,
+ "Enable legacy devfs interface creation");
+SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tundebug, 0, "");
+
+static int tuntap_name2info(const char *name, int *unit, int *flags);
+static void tunclone(void *arg, struct ucred *cred, char *name,
+ int namelen, struct cdev **dev);
+static void tuncreate(struct cdev *dev, struct tuntap_driver *);
+static int tunifioctl(struct ifnet *, u_long, caddr_t);
+static void tuninit(struct ifnet *);
+static void tunifinit(void *xtp);
+static int tuntapmodevent(module_t, int, void *);
+static int tunoutput(struct ifnet *, struct mbuf *,
+ const struct sockaddr *, struct route *ro);
+static void tunstart(struct ifnet *);
+static void tunstart_l2(struct ifnet *);
+
+static int tun_clone_match(struct if_clone *ifc, const char *name);
+static int tap_clone_match(struct if_clone *ifc, const char *name);
+static int vmnet_clone_match(struct if_clone *ifc, const char *name);
+static int tun_clone_create(struct if_clone *, char *, size_t, caddr_t);
+static int tun_clone_destroy(struct if_clone *, struct ifnet *);
+
+static d_open_t tunopen;
+static d_close_t tunclose;
+static d_read_t tunread;
+static d_write_t tunwrite;
+static d_ioctl_t tunioctl;
+static d_poll_t tunpoll;
+static d_kqfilter_t tunkqfilter;
+
+static int tunkqread(struct knote *, long);
+static int tunkqwrite(struct knote *, long);
+static void tunkqdetach(struct knote *);
+
+static struct filterops tun_read_filterops = {
+ .f_isfd = 1,
+ .f_attach = NULL,
+ .f_detach = tunkqdetach,
+ .f_event = tunkqread,
+};
+
+static struct filterops tun_write_filterops = {
+ .f_isfd = 1,
+ .f_attach = NULL,
+ .f_detach = tunkqdetach,
+ .f_event = tunkqwrite,
+};
+
+static struct tuntap_driver {
+ struct cdevsw cdevsw;
+ int ident_flags;
+ struct unrhdr *unrhdr;
+ struct clonedevs *clones;
+ ifc_match_t *clone_match_fn;
+ ifc_create_t *clone_create_fn;
+ ifc_destroy_t *clone_destroy_fn;
+} tuntap_drivers[] = {
+ {
+ .ident_flags = 0,
+ .cdevsw = {
+ .d_version = D_VERSION,
+ .d_flags = D_NEEDMINOR,
+ .d_open = tunopen,
+ .d_close = tunclose,
+ .d_read = tunread,
+ .d_write = tunwrite,
+ .d_ioctl = tunioctl,
+ .d_poll = tunpoll,
+ .d_kqfilter = tunkqfilter,
+ .d_name = tunname,
+ },
+ .clone_match_fn = tun_clone_match,
+ .clone_create_fn = tun_clone_create,
+ .clone_destroy_fn = tun_clone_destroy,
+ },
+ {
+ .ident_flags = TUN_L2,
+ .cdevsw = {
+ .d_version = D_VERSION,
+ .d_flags = D_NEEDMINOR,
+ .d_open = tunopen,
+ .d_close = tunclose,
+ .d_read = tunread,
+ .d_write = tunwrite,
+ .d_ioctl = tunioctl,
+ .d_poll = tunpoll,
+ .d_kqfilter = tunkqfilter,
+ .d_name = tapname,
+ },
+ .clone_match_fn = tap_clone_match,
+ .clone_create_fn = tun_clone_create,
+ .clone_destroy_fn = tun_clone_destroy,
+ },
+ {
+ .ident_flags = TUN_L2 | TUN_VMNET,
+ .cdevsw = {
+ .d_version = D_VERSION,
+ .d_flags = D_NEEDMINOR,
+ .d_open = tunopen,
+ .d_close = tunclose,
+ .d_read = tunread,
+ .d_write = tunwrite,
+ .d_ioctl = tunioctl,
+ .d_poll = tunpoll,
+ .d_kqfilter = tunkqfilter,
+ .d_name = vmnetname,
+ },
+ .clone_match_fn = vmnet_clone_match,
+ .clone_create_fn = tun_clone_create,
+ .clone_destroy_fn = tun_clone_destroy,
+ },
+};
+
+struct tuntap_driver_cloner {
+ SLIST_ENTRY(tuntap_driver_cloner) link;
+ struct tuntap_driver *drv;
+ struct if_clone *cloner;
+};
+
+VNET_DEFINE_STATIC(SLIST_HEAD(, tuntap_driver_cloner), tuntap_driver_cloners) =
+ SLIST_HEAD_INITIALIZER(tuntap_driver_cloners);
+
+#define V_tuntap_driver_cloners VNET(tuntap_driver_cloners)
+
+/*
+ * Sets unit and/or flags given the device name. Must be called with correct
+ * vnet context.
+ */
+static int
+tuntap_name2info(const char *name, int *outunit, int *outflags)
+{
+ struct tuntap_driver *drv;
+ struct tuntap_driver_cloner *drvc;
+ char *dname;
+ int flags, unit;
+ bool found;
+
+ if (name == NULL)
+ return (EINVAL);
+
+ /*
+ * Needed for dev_stdclone, but dev_stdclone will not modify, it just
+ * wants to be able to pass back a char * through the second param. We
+ * will always set that as NULL here, so we'll fake it.
+ */
+ dname = __DECONST(char *, name);
+ found = false;
+
+ KASSERT(!SLIST_EMPTY(&V_tuntap_driver_cloners),
+ ("tuntap_driver_cloners failed to initialize"));
+ SLIST_FOREACH(drvc, &V_tuntap_driver_cloners, link) {
+ KASSERT(drvc->drv != NULL,
+ ("tuntap_driver_cloners entry not properly initialized"));
+ drv = drvc->drv;
+
+ if (strcmp(name, drv->cdevsw.d_name) == 0) {
+ found = true;
+ unit = -1;
+ flags = drv->ident_flags;
+ break;
+ }
+
+ if (dev_stdclone(dname, NULL, drv->cdevsw.d_name, &unit) == 1) {
+ found = true;
+ flags = drv->ident_flags;
+ break;
+ }
+ }
+
+ if (!found)
+ return (ENXIO);
+
+ if (outunit != NULL)
+ *outunit = unit;
+ if (outflags != NULL)
+ *outflags = flags;
+ return (0);
+}
+
+/*
+ * Get driver information from a set of flags specified. Masks the identifying
+ * part of the flags and compares it against all of the available
+ * tuntap_drivers. Must be called with correct vnet context.
+ */
+static struct tuntap_driver *
+tuntap_driver_from_flags(int tun_flags)
+{
+ struct tuntap_driver *drv;
+ struct tuntap_driver_cloner *drvc;
+
+ KASSERT(!SLIST_EMPTY(&V_tuntap_driver_cloners),
+ ("tuntap_driver_cloners failed to initialize"));
+ SLIST_FOREACH(drvc, &V_tuntap_driver_cloners, link) {
+ KASSERT(drvc->drv != NULL,
+ ("tuntap_driver_cloners entry not properly initialized"));
+ drv = drvc->drv;
+ if ((tun_flags & TUN_DRIVER_IDENT_MASK) == drv->ident_flags)
+ return (drv);
+ }
+
+ return (NULL);
+}
+
+
+
+static int
+tun_clone_match(struct if_clone *ifc, const char *name)
+{
+ int tunflags;
+
+ if (tuntap_name2info(name, NULL, &tunflags) == 0) {
+ if ((tunflags & TUN_L2) == 0)
+ return (1);
+ }
+
+ return (0);
+}
+
+static int
+tap_clone_match(struct if_clone *ifc, const char *name)
+{
+ int tunflags;
+
+ if (tuntap_name2info(name, NULL, &tunflags) == 0) {
+ if ((tunflags & (TUN_L2 | TUN_VMNET)) == TUN_L2)
+ return (1);
+ }
+
+ return (0);
+}
+
+static int
+vmnet_clone_match(struct if_clone *ifc, const char *name)
+{
+ int tunflags;
+
+ if (tuntap_name2info(name, NULL, &tunflags) == 0) {
+ if ((tunflags & TUN_VMNET) != 0)
+ return (1);
+ }
+
+ return (0);
+}
+
+static int
+tun_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
+{
+ struct tuntap_driver *drv;
+ struct cdev *dev;
+ int err, i, tunflags, unit;
+
+ tunflags = 0;
+ /* The name here tells us exactly what we're creating */
+ err = tuntap_name2info(name, &unit, &tunflags);
+ if (err != 0)
+ return (err);
+
+ drv = tuntap_driver_from_flags(tunflags);
+ if (drv == NULL)
+ return (ENXIO);
+
+ if (unit != -1) {
+ /* If this unit number is still available that's okay. */
+ if (alloc_unr_specific(drv->unrhdr, unit) == -1)
+ return (EEXIST);
+ } else {
+ unit = alloc_unr(drv->unrhdr);
+ }
+
+ snprintf(name, IFNAMSIZ, "%s%d", drv->cdevsw.d_name, unit);
+
+ /* find any existing device, or allocate new unit number */
+ i = clone_create(&drv->clones, &drv->cdevsw, &unit, &dev, 0);
+ if (i) {
+ /* No preexisting struct cdev *, create one */
+ dev = make_dev(&drv->cdevsw, unit, UID_UUCP, GID_DIALER, 0600,
+ "%s%d", drv->cdevsw.d_name, unit);
+ }
+
+ tuncreate(dev, drv);
+
+ return (0);
+}
+
+static void
+tunclone(void *arg, struct ucred *cred, char *name, int namelen,
+ struct cdev **dev)
+{
+ char devname[SPECNAMELEN + 1];
+ struct tuntap_driver *drv;
+ int append_unit, i, u, tunflags;
+ bool mayclone;
+
+ if (*dev != NULL)
+ return;
+
+ tunflags = 0;
+ CURVNET_SET(CRED_TO_VNET(cred));
+ if (tuntap_name2info(name, &u, &tunflags) != 0)
+ goto out; /* Not recognized */
+
+ if (u != -1 && u > IF_MAXUNIT)
+ goto out; /* Unit number too high */
+
+ mayclone = priv_check_cred(cred, PRIV_NET_IFCREATE) == 0;
+ if ((tunflags & TUN_L2) != 0) {
+ /* tap/vmnet allow user open with a sysctl */
+ mayclone = (mayclone || tap_allow_uopen) && tapdclone;
+ } else {
+ mayclone = mayclone && tundclone;
+ }
+
+ /*
+ * If tun cloning is enabled, only the superuser can create an
+ * interface.
+ */
+ if (!mayclone)
+ goto out;
+
+ if (u == -1)
+ append_unit = 1;
+ else
+ append_unit = 0;
+
+ drv = tuntap_driver_from_flags(tunflags);
+ if (drv == NULL)
+ goto out;
+
+ /* find any existing device, or allocate new unit number */
+ i = clone_create(&drv->clones, &drv->cdevsw, &u, dev, 0);
+ if (i) {
+ if (append_unit) {
+ namelen = snprintf(devname, sizeof(devname), "%s%d",
+ name, u);
+ name = devname;
+ }
+ /* No preexisting struct cdev *, create one */
+ *dev = make_dev_credf(MAKEDEV_REF, &drv->cdevsw, u, cred,
+ UID_UUCP, GID_DIALER, 0600, "%s", name);
+ }
+
+ if_clone_create(name, namelen, NULL);
+out:
+ CURVNET_RESTORE();
+}
+
+static void
+tun_destroy(struct tuntap_softc *tp)
+{
+
+ TUN_LOCK(tp);
+ tp->tun_flags |= TUN_DYING;
+ if ((tp->tun_flags & TUN_OPEN) != 0)
+ cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx);
+ else
+ TUN_UNLOCK(tp);
+
+ CURVNET_SET(TUN2IFP(tp)->if_vnet);
+
+ destroy_dev(tp->tun_dev);
+ seldrain(&tp->tun_rsel);
+ knlist_clear(&tp->tun_rsel.si_note, 0);
+ knlist_destroy(&tp->tun_rsel.si_note);
+ if ((tp->tun_flags & TUN_L2) != 0) {
+ ether_ifdetach(TUN2IFP(tp));
+ } else {
+ bpfdetach(TUN2IFP(tp));
+ if_detach(TUN2IFP(tp));
+ }
+ sx_xlock(&tun_ioctl_sx);
+ TUN2IFP(tp)->if_softc = NULL;
+ sx_xunlock(&tun_ioctl_sx);
+ free_unr(tp->tun_drv->unrhdr, TUN2IFP(tp)->if_dunit);
+ if_free(TUN2IFP(tp));
+ mtx_destroy(&tp->tun_mtx);
+ cv_destroy(&tp->tun_cv);
+ free(tp, M_TUN);
+ CURVNET_RESTORE();
+}
+
+static int
+tun_clone_destroy(struct if_clone *ifc __unused, struct ifnet *ifp)
+{
+ struct tuntap_softc *tp = ifp->if_softc;
+
+ mtx_lock(&tunmtx);
+ TAILQ_REMOVE(&tunhead, tp, tun_list);
+ mtx_unlock(&tunmtx);
+ tun_destroy(tp);
+
+ return (0);
+}
+
+static void
+vnet_tun_init(const void *unused __unused)
+{
+ struct tuntap_driver *drv;
+ struct tuntap_driver_cloner *drvc;
+ int i;
+
+ for (i = 0; i < nitems(tuntap_drivers); ++i) {
+ drv = &tuntap_drivers[i];
+ drvc = malloc(sizeof(*drvc), M_TUN, M_WAITOK | M_ZERO);
+
+ drvc->drv = drv;
+ drvc->cloner = if_clone_advanced(drv->cdevsw.d_name, 0,
+ drv->clone_match_fn, drv->clone_create_fn,
+ drv->clone_destroy_fn);
+ SLIST_INSERT_HEAD(&V_tuntap_driver_cloners, drvc, link);
+ };
+}
+VNET_SYSINIT(vnet_tun_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
+ vnet_tun_init, NULL);
+
+#ifndef __rtems__
+static void
+vnet_tun_uninit(const void *unused __unused)
+{
+ struct tuntap_driver_cloner *drvc;
+
+ while (!SLIST_EMPTY(&V_tuntap_driver_cloners)) {
+ drvc = SLIST_FIRST(&V_tuntap_driver_cloners);
+ SLIST_REMOVE_HEAD(&V_tuntap_driver_cloners, link);
+
+ if_clone_detach(drvc->cloner);
+ free(drvc, M_TUN);
+ }
+}
+VNET_SYSUNINIT(vnet_tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
+ vnet_tun_uninit, NULL);
+
+static void
+tun_uninit(const void *unused __unused)
+{
+ struct tuntap_driver *drv;
+ struct tuntap_softc *tp;
+ int i;
+
+ EVENTHANDLER_DEREGISTER(dev_clone, tag);
+ drain_dev_clone_events();
+
+ mtx_lock(&tunmtx);
+ while ((tp = TAILQ_FIRST(&tunhead)) != NULL) {
+ TAILQ_REMOVE(&tunhead, tp, tun_list);
+ mtx_unlock(&tunmtx);
+ tun_destroy(tp);
+ mtx_lock(&tunmtx);
+ }
+ mtx_unlock(&tunmtx);
+ for (i = 0; i < nitems(tuntap_drivers); ++i) {
+ drv = &tuntap_drivers[i];
+ delete_unrhdr(drv->unrhdr);
+ clone_cleanup(&drv->clones);
+ }
+ mtx_destroy(&tunmtx);
+}
+SYSUNINIT(tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY, tun_uninit, NULL);
+#endif /* __rtems__ */
+
+static int
+tuntapmodevent(module_t mod, int type, void *data)
+{
+ struct tuntap_driver *drv;
+ int i;
+
+ switch (type) {
+ case MOD_LOAD:
+ mtx_init(&tunmtx, "tunmtx", NULL, MTX_DEF);
+ for (i = 0; i < nitems(tuntap_drivers); ++i) {
+ drv = &tuntap_drivers[i];
+ clone_setup(&drv->clones);
+ drv->unrhdr = new_unrhdr(0, IF_MAXUNIT, &tunmtx);
+ }
+ tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000);
+ if (tag == NULL)
+ return (ENOMEM);
+ break;
+ case MOD_UNLOAD:
+ /* See tun_uninit, so it's done after the vnet_sysuninit() */
+ break;
+ default:
+ return EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static moduledata_t tuntap_mod = {
+ "if_tuntap",
+ tuntapmodevent,
+ 0
+};
+
+DECLARE_MODULE(if_tuntap, tuntap_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(if_tuntap, 1);
+MODULE_VERSION(if_tun, 1);
+MODULE_VERSION(if_tap, 1);
+
+static void
+tunstart(struct ifnet *ifp)
+{
+ struct tuntap_softc *tp = ifp->if_softc;
+ struct mbuf *m;
+
+ TUNDEBUG(ifp, "starting\n");
+ if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ IFQ_LOCK(&ifp->if_snd);
+ IFQ_POLL_NOLOCK(&ifp->if_snd, m);
+ if (m == NULL) {
+ IFQ_UNLOCK(&ifp->if_snd);
+ return;
+ }
+ IFQ_UNLOCK(&ifp->if_snd);
+ }
+
+ TUN_LOCK(tp);
+ if (tp->tun_flags & TUN_RWAIT) {
+ tp->tun_flags &= ~TUN_RWAIT;
+ wakeup(tp);
+ }
+ selwakeuppri(&tp->tun_rsel, PZERO + 1);
+ KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
+ if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) {
+ TUN_UNLOCK(tp);
+ pgsigio(&tp->tun_sigio, SIGIO, 0);
+ } else
+ TUN_UNLOCK(tp);
+}
+
+/*
+ * tunstart_l2
+ *
+ * queue packets from higher level ready to put out
+ */
+static void
+tunstart_l2(struct ifnet *ifp)
+{
+ struct tuntap_softc *tp = ifp->if_softc;
+
+ TUNDEBUG(ifp, "starting\n");
+
+ /*
+ * do not junk pending output if we are in VMnet mode.
+ * XXX: can this do any harm because of queue overflow?
+ */
+
+ TUN_LOCK(tp);
+ if (((tp->tun_flags & TUN_VMNET) == 0) &&
+ ((tp->tun_flags & TUN_READY) != TUN_READY)) {
+ struct mbuf *m;
+
+ /* Unlocked read. */
+ TUNDEBUG(ifp, "not ready, tun_flags = 0x%x\n", tp->tun_flags);
+
+ for (;;) {
+ IF_DEQUEUE(&ifp->if_snd, m);
+ if (m != NULL) {
+ m_freem(m);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ } else
+ break;
+ }
+ TUN_UNLOCK(tp);
+
+ return;
+ }
+
+ ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+
+ if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
+ if (tp->tun_flags & TUN_RWAIT) {
+ tp->tun_flags &= ~TUN_RWAIT;
+ wakeup(tp);
+ }
+
+ if ((tp->tun_flags & TUN_ASYNC) && (tp->tun_sigio != NULL)) {
+ TUN_UNLOCK(tp);
+ pgsigio(&tp->tun_sigio, SIGIO, 0);
+ TUN_LOCK(tp);
+ }
+
+ selwakeuppri(&tp->tun_rsel, PZERO+1);
+ KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); /* obytes are counted in ether_output */
+ }
+
+ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ TUN_UNLOCK(tp);
+} /* tunstart_l2 */
+
+
+/* XXX: should return an error code so it can fail. */
+static void
+tuncreate(struct cdev *dev, struct tuntap_driver *drv)
+{
+ struct tuntap_softc *sc;
+ struct ifnet *ifp;
+ struct ether_addr eaddr;
+ int iflags;
+ u_char type;
+
+ sc = malloc(sizeof(*sc), M_TUN, M_WAITOK | M_ZERO);
+ mtx_init(&sc->tun_mtx, "tun_mtx", NULL, MTX_DEF);
+ cv_init(&sc->tun_cv, "tun_condvar");
+ sc->tun_flags = drv->ident_flags;
+ sc->tun_dev = dev;
+ sc->tun_drv = drv;
+ mtx_lock(&tunmtx);
+ TAILQ_INSERT_TAIL(&tunhead, sc, tun_list);
+ mtx_unlock(&tunmtx);
+
+ iflags = IFF_MULTICAST;
+ if ((sc->tun_flags & TUN_L2) != 0) {
+ type = IFT_ETHER;
+ iflags |= IFF_BROADCAST | IFF_SIMPLEX;
+ } else {
+ type = IFT_PPP;
+ iflags |= IFF_POINTOPOINT;
+ }
+ ifp = sc->tun_ifp = if_alloc(type);
+ if (ifp == NULL)
+ panic("%s%d: failed to if_alloc() interface.\n",
+ drv->cdevsw.d_name, dev2unit(dev));
+ ifp->if_softc = sc;
+ if_initname(ifp, drv->cdevsw.d_name, dev2unit(dev));
+ ifp->if_ioctl = tunifioctl;
+ ifp->if_flags = iflags;
+ IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
+ knlist_init_mtx(&sc->tun_rsel.si_note, &sc->tun_mtx);
+ ifp->if_capabilities |= IFCAP_LINKSTATE;
+ ifp->if_capenable |= IFCAP_LINKSTATE;
+
+ if ((sc->tun_flags & TUN_L2) != 0) {
+ ifp->if_mtu = ETHERMTU;
+ ifp->if_init = tunifinit;
+ ifp->if_start = tunstart_l2;
+
+ ether_gen_addr(ifp, &eaddr);
+ ether_ifattach(ifp, eaddr.octet);
+ } else {
+ ifp->if_mtu = TUNMTU;
+ ifp->if_start = tunstart;
+ ifp->if_output = tunoutput;
+
+ ifp->if_snd.ifq_drv_maxlen = 0;
+ IFQ_SET_READY(&ifp->if_snd);
+
+ if_attach(ifp);
+ bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
+ }
+ dev->si_drv1 = sc;
+
+ TUN_LOCK(sc);
+ sc->tun_flags |= TUN_INITED;
+ TUN_UNLOCK(sc);
+
+ TUNDEBUG(ifp, "interface %s is created, minor = %#x\n",
+ ifp->if_xname, dev2unit(dev));
+}
+
+static int
+tunopen(struct cdev *dev, int flag, int mode, struct thread *td)
+{
+ struct ifnet *ifp;
+ struct tuntap_driver *drv;
+ struct tuntap_softc *tp;
+ int error, tunflags;
+
+ tunflags = 0;
+ CURVNET_SET(TD_TO_VNET(td));
+ error = tuntap_name2info(dev->si_name, NULL, &tunflags);
+ if (error != 0) {
+ CURVNET_RESTORE();
+ return (error); /* Shouldn't happen */
+ }
+
+ if ((tunflags & TUN_L2) != 0) {
+ /* Restrict? */
+ if (tap_allow_uopen == 0) {
+ error = priv_check(td, PRIV_NET_TAP);
+ if (error != 0) {
+ CURVNET_RESTORE();
+ return (error);
+ }
+ }
+ }
+
+ /*
+ * XXXRW: Non-atomic test and set of dev->si_drv1 requires
+ * synchronization.
+ */
+ tp = dev->si_drv1;
+ if (!tp) {
+ drv = tuntap_driver_from_flags(tunflags);
+ if (drv == NULL) {
+ CURVNET_RESTORE();
+ return (ENXIO);
+ }
+ tuncreate(dev, drv);
+ tp = dev->si_drv1;
+ }
+
+ TUN_LOCK(tp);
+ if ((tp->tun_flags & (TUN_OPEN | TUN_DYING)) != 0) {
+ TUN_UNLOCK(tp);
+ CURVNET_RESTORE();
+ return (EBUSY);
+ }
+
+ ifp = TUN2IFP(tp);
+
+ if ((tp->tun_flags & TUN_L2) != 0) {
+ bcopy(IF_LLADDR(ifp), tp->tun_ether.octet,
+ sizeof(tp->tun_ether.octet));
+
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+
+ if (tapuponopen)
+ ifp->if_flags |= IFF_UP;
+ }
+
+#ifndef __rtems__
+ tp->tun_pid = td->td_proc->p_pid;
+#endif /* __rtems__ */
+ tp->tun_flags |= TUN_OPEN;
+
+ if_link_state_change(ifp, LINK_STATE_UP);
+ TUNDEBUG(ifp, "open\n");
+ TUN_UNLOCK(tp);
+ CURVNET_RESTORE();
+ return (0);
+}
+
+/*
+ * tunclose - close the device - mark i/f down & delete
+ * routing info
+ */
+static int
+tunclose(struct cdev *dev, int foo, int bar, struct thread *td)
+{
+ struct tuntap_softc *tp;
+ struct ifnet *ifp;
+ bool l2tun;
+
+ tp = dev->si_drv1;
+ ifp = TUN2IFP(tp);
+
+ TUN_LOCK(tp);
+#ifndef __rtems__
+ /*
+ * Simply close the device if this isn't the controlling process. This
+ * may happen if, for instance, the tunnel has been handed off to
+ * another process. The original controller should be able to close it
+ * without putting us into an inconsistent state.
+ */
+ if (td->td_proc->p_pid != tp->tun_pid) {
+ TUN_UNLOCK(tp);
+ return (0);
+ }
+#endif /* __rtems__ */
+
+ /*
+ * junk all pending output
+ */
+ CURVNET_SET(ifp->if_vnet);
+
+ l2tun = false;
+ if ((tp->tun_flags & TUN_L2) != 0) {
+ l2tun = true;
+ IF_DRAIN(&ifp->if_snd);
+ } else {
+ IFQ_PURGE(&ifp->if_snd);
+ }
+
+ /* For vmnet, we won't do most of the address/route bits */
+ if ((tp->tun_flags & TUN_VMNET) != 0 ||
+ (l2tun && (ifp->if_flags & IFF_LINK0) != 0))
+ goto out;
+
+ if (ifp->if_flags & IFF_UP) {
+ TUN_UNLOCK(tp);
+ if_down(ifp);
+ TUN_LOCK(tp);
+ }
+
+ /* Delete all addresses and routes which reference this interface. */
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ struct ifaddr *ifa;
+
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ TUN_UNLOCK(tp);
+ CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ /* deal w/IPv4 PtP destination; unlocked read */
+ if (!l2tun && ifa->ifa_addr->sa_family == AF_INET) {
+ rtinit(ifa, (int)RTM_DELETE,
+ tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0);
+ } else {
+ rtinit(ifa, (int)RTM_DELETE, 0);
+ }
+ }
+ if_purgeaddrs(ifp);
+ TUN_LOCK(tp);
+ }
+
+out:
+ if_link_state_change(ifp, LINK_STATE_DOWN);
+ CURVNET_RESTORE();
+
+ funsetown(&tp->tun_sigio);
+ selwakeuppri(&tp->tun_rsel, PZERO + 1);
+ KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
+ TUNDEBUG (ifp, "closed\n");
+ tp->tun_flags &= ~TUN_OPEN;
+#ifndef __rtems__
+ tp->tun_pid = 0;
+#endif /* __rtems__ */
+
+ cv_broadcast(&tp->tun_cv);
+ TUN_UNLOCK(tp);
+ return (0);
+}
+
+static void
+tuninit(struct ifnet *ifp)
+{
+ struct tuntap_softc *tp = ifp->if_softc;
+#ifdef INET
+ struct ifaddr *ifa;
+#endif
+
+ TUNDEBUG(ifp, "tuninit\n");
+
+ TUN_LOCK(tp);
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ if ((tp->tun_flags & TUN_L2) == 0) {
+ ifp->if_flags |= IFF_UP;
+ getmicrotime(&ifp->if_lastchange);
+#ifdef INET
+ if_addr_rlock(ifp);
+ CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr->sa_family == AF_INET) {
+ struct sockaddr_in *si;
+
+ si = (struct sockaddr_in *)ifa->ifa_addr;
+ if (si->sin_addr.s_addr)
+ tp->tun_flags |= TUN_IASET;
+
+ si = (struct sockaddr_in *)ifa->ifa_dstaddr;
+ if (si && si->sin_addr.s_addr)
+ tp->tun_flags |= TUN_DSTADDR;
+ }
+ }
+ if_addr_runlock(ifp);
+#endif
+ TUN_UNLOCK(tp);
+ } else {
+ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ TUN_UNLOCK(tp);
+ /* attempt to start output */
+ tunstart_l2(ifp);
+ }
+
+}
+
+/*
+ * Used only for l2 tunnel.
+ */
+static void
+tunifinit(void *xtp)
+{
+ struct tuntap_softc *tp;
+
+ tp = (struct tuntap_softc *)xtp;
+ tuninit(tp->tun_ifp);
+}
+
+/*
+ * Process an ioctl request.
+ */
+static int
+tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ struct ifreq *ifr = (struct ifreq *)data;
+ struct tuntap_softc *tp;
+ struct ifstat *ifs;
+ struct ifmediareq *ifmr;
+ int dummy, error = 0;
+ bool l2tun;
+
+ ifmr = NULL;
+ sx_xlock(&tun_ioctl_sx);
+ tp = ifp->if_softc;
+ if (tp == NULL) {
+ error = ENXIO;
+ goto bad;
+ }
+ l2tun = (tp->tun_flags & TUN_L2) != 0;
+ switch(cmd) {
+ case SIOCGIFSTATUS:
+ ifs = (struct ifstat *)data;
+ TUN_LOCK(tp);
+#ifndef __rtems__
+ if (tp->tun_pid)
+ snprintf(ifs->ascii, sizeof(ifs->ascii),
+ "\tOpened by PID %d\n", tp->tun_pid);
+ else
+#endif /* __rtems__ */
+ ifs->ascii[0] = '\0';
+ TUN_UNLOCK(tp);
+ break;
+ case SIOCSIFADDR:
+ if (l2tun)
+ error = ether_ioctl(ifp, cmd, data);
+ else
+ tuninit(ifp);
+ if (error == 0)
+ TUNDEBUG(ifp, "address set\n");
+ break;
+ case SIOCSIFMTU:
+ ifp->if_mtu = ifr->ifr_mtu;
+ TUNDEBUG(ifp, "mtu set\n");
+ break;
+ case SIOCSIFFLAGS:
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ break;
+ case SIOCGIFMEDIA:
+ if (!l2tun) {
+ error = EINVAL;
+ break;
+ }
+
+ ifmr = (struct ifmediareq *)data;
+ dummy = ifmr->ifm_count;
+ ifmr->ifm_count = 1;
+ ifmr->ifm_status = IFM_AVALID;
+ ifmr->ifm_active = IFM_ETHER;
+ if (tp->tun_flags & TUN_OPEN)
+ ifmr->ifm_status |= IFM_ACTIVE;
+ ifmr->ifm_current = ifmr->ifm_active;
+ if (dummy >= 1) {
+ int media = IFM_ETHER;
+ error = copyout(&media, ifmr->ifm_ulist, sizeof(int));
+ }
+ break;
+ default:
+ if (l2tun) {
+ error = ether_ioctl(ifp, cmd, data);
+ } else {
+ error = EINVAL;
+ }
+ }
+bad:
+ sx_xunlock(&tun_ioctl_sx);
+ return (error);
+}
+
+/*
+ * tunoutput - queue packets from higher level ready to put out.
+ */
+static int
+tunoutput(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst,
+ struct route *ro)
+{
+ struct tuntap_softc *tp = ifp->if_softc;
+ u_short cached_tun_flags;
+ int error;
+ u_int32_t af;
+
+ TUNDEBUG (ifp, "tunoutput\n");
+
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m0);
+ if (error) {
+ m_freem(m0);
+ return (error);
+ }
+#endif
+
+ /* Could be unlocked read? */
+ TUN_LOCK(tp);
+ cached_tun_flags = tp->tun_flags;
+ TUN_UNLOCK(tp);
+ if ((cached_tun_flags & TUN_READY) != TUN_READY) {
+ TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
+ m_freem (m0);
+ return (EHOSTDOWN);
+ }
+
+ if ((ifp->if_flags & IFF_UP) != IFF_UP) {
+ m_freem (m0);
+ return (EHOSTDOWN);
+ }
+
+ /* BPF writes need to be handled specially. */
+ if (dst->sa_family == AF_UNSPEC)
+ bcopy(dst->sa_data, &af, sizeof(af));
+ else
+ af = dst->sa_family;
+
+ if (bpf_peers_present(ifp->if_bpf))
+ bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0);
+
+ /* prepend sockaddr? this may abort if the mbuf allocation fails */
+ if (cached_tun_flags & TUN_LMODE) {
+ /* allocate space for sockaddr */
+ M_PREPEND(m0, dst->sa_len, M_NOWAIT);
+
+ /* if allocation failed drop packet */
+ if (m0 == NULL) {
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ return (ENOBUFS);
+ } else {
+ bcopy(dst, m0->m_data, dst->sa_len);
+ }
+ }
+
+ if (cached_tun_flags & TUN_IFHEAD) {
+ /* Prepend the address family */
+ M_PREPEND(m0, 4, M_NOWAIT);
+
+ /* if allocation failed drop packet */
+ if (m0 == NULL) {
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ return (ENOBUFS);
+ } else
+ *(u_int32_t *)m0->m_data = htonl(af);
+ } else {
+#ifdef INET
+ if (af != AF_INET)
+#endif
+ {
+ m_freem(m0);
+ return (EAFNOSUPPORT);
+ }
+ }
+
+ error = (ifp->if_transmit)(ifp, m0);
+ if (error)
+ return (ENOBUFS);
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ return (0);
+}
+
+/*
+ * the cdevsw interface is now pretty minimal.
+ */
+static int
+tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
+ struct thread *td)
+{
+ struct ifreq ifr, *ifrp;
+ struct tuntap_softc *tp = dev->si_drv1;
+ struct tuninfo *tunp;
+ int error, iflags;
+#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
+ defined(COMPAT_FREEBSD4)
+ int ival;
+#endif
+ bool l2tun;
+
+ l2tun = (tp->tun_flags & TUN_L2) != 0;
+ if (l2tun) {
+ /* tap specific ioctls */
+ switch(cmd) {
+ /* VMware/VMnet port ioctl's */
+#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
+ defined(COMPAT_FREEBSD4)
+ case _IO('V', 0):
+ ival = IOCPARM_IVAL(data);
+ data = (caddr_t)&ival;
+ /* FALLTHROUGH */
+#endif
+ case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */
+ iflags = *(int *)data;
+ iflags &= TUN_VMIO_FLAG_MASK;
+ iflags &= ~IFF_CANTCHANGE;
+ iflags |= IFF_UP;
+
+ TUN_LOCK(tp);
+ TUN2IFP(tp)->if_flags = iflags |
+ (TUN2IFP(tp)->if_flags & IFF_CANTCHANGE);
+ TUN_UNLOCK(tp);
+
+ return (0);
+ case SIOCGIFADDR: /* get MAC address of the remote side */
+ TUN_LOCK(tp);
+ bcopy(&tp->tun_ether.octet, data,
+ sizeof(tp->tun_ether.octet));
+ TUN_UNLOCK(tp);
+
+ return (0);
+ case SIOCSIFADDR: /* set MAC address of the remote side */
+ TUN_LOCK(tp);
+ bcopy(data, &tp->tun_ether.octet,
+ sizeof(tp->tun_ether.octet));
+ TUN_UNLOCK(tp);
+
+ return (0);
+ }
+
+ /* Fall through to the common ioctls if unhandled */
+ } else {
+ switch (cmd) {
+ case TUNSLMODE:
+ TUN_LOCK(tp);
+ if (*(int *)data) {
+ tp->tun_flags |= TUN_LMODE;
+ tp->tun_flags &= ~TUN_IFHEAD;
+ } else
+ tp->tun_flags &= ~TUN_LMODE;
+ TUN_UNLOCK(tp);
+
+ return (0);
+ case TUNSIFHEAD:
+ TUN_LOCK(tp);
+ if (*(int *)data) {
+ tp->tun_flags |= TUN_IFHEAD;
+ tp->tun_flags &= ~TUN_LMODE;
+ } else
+ tp->tun_flags &= ~TUN_IFHEAD;
+ TUN_UNLOCK(tp);
+
+ return (0);
+ case TUNGIFHEAD:
+ TUN_LOCK(tp);
+ *(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0;
+ TUN_UNLOCK(tp);
+
+ return (0);
+ case TUNSIFMODE:
+ /* deny this if UP */
+ if (TUN2IFP(tp)->if_flags & IFF_UP)
+ return (EBUSY);
+
+ switch (*(int *)data & ~IFF_MULTICAST) {
+ case IFF_POINTOPOINT:
+ case IFF_BROADCAST:
+ TUN_LOCK(tp);
+ TUN2IFP(tp)->if_flags &=
+ ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
+ TUN2IFP(tp)->if_flags |= *(int *)data;
+ TUN_UNLOCK(tp);
+
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ return (0);
+ case TUNSIFPID:
+#ifndef __rtems__
+ TUN_LOCK(tp);
+ tp->tun_pid = curthread->td_proc->p_pid;
+ TUN_UNLOCK(tp);
+#endif /* __rtems__ */
+
+ return (0);
+ }
+ /* Fall through to the common ioctls if unhandled */
+ }
+
+ switch (cmd) {
+ case TUNGIFNAME:
+ ifrp = (struct ifreq *)data;
+ strlcpy(ifrp->ifr_name, TUN2IFP(tp)->if_xname, IFNAMSIZ);
+
+ return (0);
+ case TUNSIFINFO:
+ tunp = (struct tuninfo *)data;
+ if (TUN2IFP(tp)->if_type != tunp->type)
+ return (EPROTOTYPE);
+ TUN_LOCK(tp);
+ if (TUN2IFP(tp)->if_mtu != tunp->mtu) {
+ strlcpy(ifr.ifr_name, if_name(TUN2IFP(tp)), IFNAMSIZ);
+ ifr.ifr_mtu = tunp->mtu;
+ CURVNET_SET(TUN2IFP(tp)->if_vnet);
+ error = ifhwioctl(SIOCSIFMTU, TUN2IFP(tp),
+ (caddr_t)&ifr, td);
+ CURVNET_RESTORE();
+ if (error) {
+ TUN_UNLOCK(tp);
+ return (error);
+ }
+ }
+ TUN2IFP(tp)->if_baudrate = tunp->baudrate;
+ TUN_UNLOCK(tp);
+ break;
+ case TUNGIFINFO:
+ tunp = (struct tuninfo *)data;
+ TUN_LOCK(tp);
+ tunp->mtu = TUN2IFP(tp)->if_mtu;
+ tunp->type = TUN2IFP(tp)->if_type;
+ tunp->baudrate = TUN2IFP(tp)->if_baudrate;
+ TUN_UNLOCK(tp);
+ break;
+ case TUNSDEBUG:
+ tundebug = *(int *)data;
+ break;
+ case TUNGDEBUG:
+ *(int *)data = tundebug;
+ break;
+ case FIONBIO:
+ break;
+ case FIOASYNC:
+ TUN_LOCK(tp);
+ if (*(int *)data)
+ tp->tun_flags |= TUN_ASYNC;
+ else
+ tp->tun_flags &= ~TUN_ASYNC;
+ TUN_UNLOCK(tp);
+ break;
+ case FIONREAD:
+ if (!IFQ_IS_EMPTY(&TUN2IFP(tp)->if_snd)) {
+ struct mbuf *mb;
+ IFQ_LOCK(&TUN2IFP(tp)->if_snd);
+ IFQ_POLL_NOLOCK(&TUN2IFP(tp)->if_snd, mb);
+ for (*(int *)data = 0; mb != NULL; mb = mb->m_next)
+ *(int *)data += mb->m_len;
+ IFQ_UNLOCK(&TUN2IFP(tp)->if_snd);
+ } else
+ *(int *)data = 0;
+ break;
+ case FIOSETOWN:
+ return (fsetown(*(int *)data, &tp->tun_sigio));
+
+ case FIOGETOWN:
+ *(int *)data = fgetown(&tp->tun_sigio);
+ return (0);
+
+ /* This is deprecated, FIOSETOWN should be used instead. */
+ case TIOCSPGRP:
+ return (fsetown(-(*(int *)data), &tp->tun_sigio));
+
+ /* This is deprecated, FIOGETOWN should be used instead. */
+ case TIOCGPGRP:
+ *(int *)data = -fgetown(&tp->tun_sigio);
+ return (0);
+
+ default:
+ return (ENOTTY);
+ }
+ return (0);
+}
+
+/*
+ * The cdevsw read interface - reads a packet at a time, or at
+ * least as much of a packet as can be read.
+ */
+static int
+tunread(struct cdev *dev, struct uio *uio, int flag)
+{
+ struct tuntap_softc *tp = dev->si_drv1;
+ struct ifnet *ifp = TUN2IFP(tp);
+ struct mbuf *m;
+ int error=0, len;
+
+ TUNDEBUG (ifp, "read\n");
+ TUN_LOCK(tp);
+ if ((tp->tun_flags & TUN_READY) != TUN_READY) {
+ TUN_UNLOCK(tp);
+ TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
+ return (EHOSTDOWN);
+ }
+
+ tp->tun_flags &= ~TUN_RWAIT;
+
+ for (;;) {
+ IFQ_DEQUEUE(&ifp->if_snd, m);
+ if (m != NULL)
+ break;
+ if (flag & O_NONBLOCK) {
+ TUN_UNLOCK(tp);
+ return (EWOULDBLOCK);
+ }
+ tp->tun_flags |= TUN_RWAIT;
+ error = mtx_sleep(tp, &tp->tun_mtx, PCATCH | (PZERO + 1),
+ "tunread", 0);
+ if (error != 0) {
+ TUN_UNLOCK(tp);
+ return (error);
+ }
+ }
+ TUN_UNLOCK(tp);
+
+ if ((tp->tun_flags & TUN_L2) != 0)
+ BPF_MTAP(ifp, m);
+
+ while (m && uio->uio_resid > 0 && error == 0) {
+ len = min(uio->uio_resid, m->m_len);
+ if (len != 0)
+ error = uiomove(mtod(m, void *), len, uio);
+ m = m_free(m);
+ }
+
+ if (m) {
+ TUNDEBUG(ifp, "Dropping mbuf\n");
+ m_freem(m);
+ }
+ return (error);
+}
+
+static int
+tunwrite_l2(struct tuntap_softc *tp, struct mbuf *m)
+{
+ struct ether_header *eh;
+ struct ifnet *ifp;
+
+ ifp = TUN2IFP(tp);
+
+ /*
+ * Only pass a unicast frame to ether_input(), if it would
+ * actually have been received by non-virtual hardware.
+ */
+ if (m->m_len < sizeof(struct ether_header)) {
+ m_freem(m);
+ return (0);
+ }
+
+ eh = mtod(m, struct ether_header *);
+
+ if (eh && (ifp->if_flags & IFF_PROMISC) == 0 &&
+ !ETHER_IS_MULTICAST(eh->ether_dhost) &&
+ bcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) != 0) {
+ m_freem(m);
+ return (0);
+ }
+
+ /* Pass packet up to parent. */
+ CURVNET_SET(ifp->if_vnet);
+ (*ifp->if_input)(ifp, m);
+ CURVNET_RESTORE();
+ /* ibytes are counted in parent */
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ return (0);
+}
+
+static int
+tunwrite_l3(struct tuntap_softc *tp, struct mbuf *m)
+{
+ struct ifnet *ifp;
+ int family, isr;
+
+ ifp = TUN2IFP(tp);
+ /* Could be unlocked read? */
+ TUN_LOCK(tp);
+ if (tp->tun_flags & TUN_IFHEAD) {
+ TUN_UNLOCK(tp);
+ if (m->m_len < sizeof(family) &&
+ (m = m_pullup(m, sizeof(family))) == NULL)
+ return (ENOBUFS);
+ family = ntohl(*mtod(m, u_int32_t *));
+ m_adj(m, sizeof(family));
+ } else {
+ TUN_UNLOCK(tp);
+ family = AF_INET;
+ }
+
+ BPF_MTAP2(ifp, &family, sizeof(family), m);
+
+ switch (family) {
+#ifdef INET
+ case AF_INET:
+ isr = NETISR_IP;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ isr = NETISR_IPV6;
+ break;
+#endif
+ default:
+ m_freem(m);
+ return (EAFNOSUPPORT);
+ }
+ random_harvest_queue(m, sizeof(*m), RANDOM_NET_TUN);
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ CURVNET_SET(ifp->if_vnet);
+ M_SETFIB(m, ifp->if_fib);
+ netisr_dispatch(isr, m);
+ CURVNET_RESTORE();
+ return (0);
+}
+
+/*
+ * the cdevsw write interface - an atomic write is a packet - or else!
+ */
+static int
+tunwrite(struct cdev *dev, struct uio *uio, int flag)
+{
+ struct tuntap_softc *tp;
+ struct ifnet *ifp;
+ struct mbuf *m;
+ uint32_t mru;
+ int align;
+ bool l2tun;
+
+ tp = dev->si_drv1;
+ ifp = TUN2IFP(tp);
+ TUNDEBUG(ifp, "tunwrite\n");
+ if ((ifp->if_flags & IFF_UP) != IFF_UP)
+ /* ignore silently */
+ return (0);
+
+ if (uio->uio_resid == 0)
+ return (0);
+
+ l2tun = (tp->tun_flags & TUN_L2) != 0;
+ align = 0;
+ mru = l2tun ? TAPMRU : TUNMRU;
+ if (l2tun)
+ align = ETHER_ALIGN;
+ else if ((tp->tun_flags & TUN_IFHEAD) != 0)
+ mru += sizeof(uint32_t); /* family */
+ if (uio->uio_resid < 0 || uio->uio_resid > mru) {
+ TUNDEBUG(ifp, "len=%zd!\n", uio->uio_resid);
+ return (EIO);
+ }
+
+ if ((m = m_uiotombuf(uio, M_NOWAIT, 0, align, M_PKTHDR)) == NULL) {
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+ return (ENOBUFS);
+ }
+
+ m->m_pkthdr.rcvif = ifp;
+#ifdef MAC
+ mac_ifnet_create_mbuf(ifp, m);
+#endif
+
+ if (l2tun)
+ return (tunwrite_l2(tp, m));
+
+ return (tunwrite_l3(tp, m));
+}
+
+/*
+ * tunpoll - the poll interface, this is only useful on reads
+ * really. The write detect always returns true, write never blocks
+ * anyway, it either accepts the packet or drops it.
+ */
+static int
+tunpoll(struct cdev *dev, int events, struct thread *td)
+{
+ struct tuntap_softc *tp = dev->si_drv1;
+ struct ifnet *ifp = TUN2IFP(tp);
+ int revents = 0;
+
+ TUNDEBUG(ifp, "tunpoll\n");
+
+ if (events & (POLLIN | POLLRDNORM)) {
+ IFQ_LOCK(&ifp->if_snd);
+ if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
+ TUNDEBUG(ifp, "tunpoll q=%d\n", ifp->if_snd.ifq_len);
+ revents |= events & (POLLIN | POLLRDNORM);
+ } else {
+ TUNDEBUG(ifp, "tunpoll waiting\n");
+ selrecord(td, &tp->tun_rsel);
+ }
+ IFQ_UNLOCK(&ifp->if_snd);
+ }
+ revents |= events & (POLLOUT | POLLWRNORM);
+
+ return (revents);
+}
+
+/*
+ * tunkqfilter - support for the kevent() system call.
+ */
+static int
+tunkqfilter(struct cdev *dev, struct knote *kn)
+{
+ struct tuntap_softc *tp = dev->si_drv1;
+ struct ifnet *ifp = TUN2IFP(tp);
+
+ switch(kn->kn_filter) {
+ case EVFILT_READ:
+ TUNDEBUG(ifp, "%s kqfilter: EVFILT_READ, minor = %#x\n",
+ ifp->if_xname, dev2unit(dev));
+ kn->kn_fop = &tun_read_filterops;
+ break;
+
+ case EVFILT_WRITE:
+ TUNDEBUG(ifp, "%s kqfilter: EVFILT_WRITE, minor = %#x\n",
+ ifp->if_xname, dev2unit(dev));
+ kn->kn_fop = &tun_write_filterops;
+ break;
+
+ default:
+ TUNDEBUG(ifp, "%s kqfilter: invalid filter, minor = %#x\n",
+ ifp->if_xname, dev2unit(dev));
+ return(EINVAL);
+ }
+
+ kn->kn_hook = tp;
+ knlist_add(&tp->tun_rsel.si_note, kn, 0);
+
+ return (0);
+}
+
+/*
+ * Return true of there is data in the interface queue.
+ */
+static int
+tunkqread(struct knote *kn, long hint)
+{
+ int ret;
+ struct tuntap_softc *tp = kn->kn_hook;
+ struct cdev *dev = tp->tun_dev;
+ struct ifnet *ifp = TUN2IFP(tp);
+
+ if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) {
+ TUNDEBUG(ifp,
+ "%s have data in the queue. Len = %d, minor = %#x\n",
+ ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev));
+ ret = 1;
+ } else {
+ TUNDEBUG(ifp,
+ "%s waiting for data, minor = %#x\n", ifp->if_xname,
+ dev2unit(dev));
+ ret = 0;
+ }
+
+ return (ret);
+}
+
+/*
+ * Always can write, always return MTU in kn->data.
+ */
+static int
+tunkqwrite(struct knote *kn, long hint)
+{
+ struct tuntap_softc *tp = kn->kn_hook;
+ struct ifnet *ifp = TUN2IFP(tp);
+
+ kn->kn_data = ifp->if_mtu;
+
+ return (1);
+}
+
+static void
+tunkqdetach(struct knote *kn)
+{
+ struct tuntap_softc *tp = kn->kn_hook;
+
+ knlist_remove(&tp->tun_rsel.si_note, kn, 0);
+}
diff --git a/freebsd/sys/net/if_var.h b/freebsd/sys/net/if_var.h
index d23928e5..700296fa 100644
--- a/freebsd/sys/net/if_var.h
+++ b/freebsd/sys/net/if_var.h
@@ -73,6 +73,7 @@ struct netmap_adapter;
struct netdump_methods;
#ifdef _KERNEL
+#include <sys/_eventhandler.h>
#include <sys/mbuf.h> /* ifqueue only? */
#include <sys/buf_ring.h>
#include <net/vnet.h>
@@ -95,8 +96,9 @@ CK_STAILQ_HEAD(ifmultihead, ifmultiaddr);
CK_STAILQ_HEAD(ifgrouphead, ifg_group);
#ifdef _KERNEL
-VNET_DECLARE(struct pfil_head, link_pfil_hook); /* packet filter hooks */
-#define V_link_pfil_hook VNET(link_pfil_hook)
+VNET_DECLARE(struct pfil_head *, link_pfil_head);
+#define V_link_pfil_head VNET(link_pfil_head)
+#define PFIL_ETHER_NAME "ethernet"
#define HHOOK_IPSEC_INET 0
#define HHOOK_IPSEC_INET6 1
@@ -193,11 +195,13 @@ struct if_encap_req {
* m_snd_tag" comes from the network driver and it is free to allocate
* as much additional space as it wants for its own use.
*/
+struct ktls_session;
struct m_snd_tag;
#define IF_SND_TAG_TYPE_RATE_LIMIT 0
#define IF_SND_TAG_TYPE_UNLIMITED 1
-#define IF_SND_TAG_TYPE_MAX 2
+#define IF_SND_TAG_TYPE_TLS 2
+#define IF_SND_TAG_TYPE_MAX 3
struct if_snd_tag_alloc_header {
uint32_t type; /* send tag type, see IF_SND_TAG_XXX */
@@ -208,6 +212,14 @@ struct if_snd_tag_alloc_header {
struct if_snd_tag_alloc_rate_limit {
struct if_snd_tag_alloc_header hdr;
uint64_t max_rate; /* in bytes/s */
+ uint32_t flags; /* M_NOWAIT or M_WAITOK */
+ uint32_t reserved; /* alignment */
+};
+
+struct if_snd_tag_alloc_tls {
+ struct if_snd_tag_alloc_header hdr;
+ struct inpcb *inp;
+ const struct ktls_session *tls;
};
struct if_snd_tag_rate_limit_params {
@@ -215,13 +227,14 @@ struct if_snd_tag_rate_limit_params {
uint32_t queue_level; /* 0 (empty) .. 65535 (full) */
#define IF_SND_QUEUE_LEVEL_MIN 0
#define IF_SND_QUEUE_LEVEL_MAX 65535
- uint32_t reserved; /* padding */
+ uint32_t flags; /* M_NOWAIT or M_WAITOK */
};
union if_snd_tag_alloc_params {
struct if_snd_tag_alloc_header hdr;
struct if_snd_tag_alloc_rate_limit rate_limit;
struct if_snd_tag_alloc_rate_limit unlimited;
+ struct if_snd_tag_alloc_tls tls;
};
union if_snd_tag_modify_params {
@@ -234,11 +247,37 @@ union if_snd_tag_query_params {
struct if_snd_tag_rate_limit_params unlimited;
};
+/* Query return flags */
+#define RT_NOSUPPORT 0x00000000 /* Not supported */
+#define RT_IS_INDIRECT 0x00000001 /*
+ * Interface like a lagg, select
+ * the actual interface for
+ * capabilities.
+ */
+#define RT_IS_SELECTABLE 0x00000002 /*
+ * No rate table, you select
+ * rates and the first
+ * number_of_rates are created.
+ */
+#define RT_IS_FIXED_TABLE 0x00000004 /* A fixed table is attached */
+#define RT_IS_UNUSABLE 0x00000008 /* It is not usable for this */
+
+struct if_ratelimit_query_results {
+ const uint64_t *rate_table; /* Pointer to table if present */
+ uint32_t flags; /* Flags indicating results */
+ uint32_t max_flows; /* Max flows using, 0=unlimited */
+ uint32_t number_of_rates; /* How many unique rates can be created */
+ uint32_t min_segment_burst; /* The amount the adapter bursts at each send */
+};
+
typedef int (if_snd_tag_alloc_t)(struct ifnet *, union if_snd_tag_alloc_params *,
struct m_snd_tag **);
typedef int (if_snd_tag_modify_t)(struct m_snd_tag *, union if_snd_tag_modify_params *);
typedef int (if_snd_tag_query_t)(struct m_snd_tag *, union if_snd_tag_query_params *);
typedef void (if_snd_tag_free_t)(struct m_snd_tag *);
+typedef void (if_ratelimit_query_t)(struct ifnet *,
+ struct if_ratelimit_query_results *);
+
/*
* Structure defining a network interface.
@@ -250,7 +289,9 @@ struct ifnet {
CK_STAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if (CK_) */
/* protected by if_addr_lock */
u_char if_alloctype; /* if_type at time of allocation */
-
+#ifndef __rtems__
+ uint8_t if_numa_domain; /* NUMA domain of device */
+#endif /* __rtems__ */
/* Driver and protocol specific information that remains stable. */
void *if_softc; /* pointer to driver state */
void *if_llsoftc; /* link layer softc */
@@ -379,6 +420,7 @@ struct ifnet {
if_snd_tag_modify_t *if_snd_tag_modify;
if_snd_tag_query_t *if_snd_tag_query;
if_snd_tag_free_t *if_snd_tag_free;
+ if_ratelimit_query_t *if_ratelimit_query;
/* Ethernet PCP */
uint8_t if_pcp;
@@ -416,24 +458,21 @@ struct rtems_ifinputreq {
/* for compatibility with other BSDs */
#define if_name(ifp) ((ifp)->if_xname)
+#define IF_NODOM 255
/*
* Locks for address lists on the network interface.
*/
#define IF_ADDR_LOCK_INIT(if) mtx_init(&(if)->if_addr_lock, "if_addr_lock", NULL, MTX_DEF)
#define IF_ADDR_LOCK_DESTROY(if) mtx_destroy(&(if)->if_addr_lock)
-#define IF_ADDR_RLOCK(if) struct epoch_tracker if_addr_et; epoch_enter_preempt(net_epoch_preempt, &if_addr_et);
-#define IF_ADDR_RUNLOCK(if) epoch_exit_preempt(net_epoch_preempt, &if_addr_et);
#define IF_ADDR_WLOCK(if) mtx_lock(&(if)->if_addr_lock)
#define IF_ADDR_WUNLOCK(if) mtx_unlock(&(if)->if_addr_lock)
#define IF_ADDR_LOCK_ASSERT(if) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(if)->if_addr_lock))
#define IF_ADDR_WLOCK_ASSERT(if) mtx_assert(&(if)->if_addr_lock, MA_OWNED)
-#define NET_EPOCH_ENTER() struct epoch_tracker nep_et; epoch_enter_preempt(net_epoch_preempt, &nep_et)
-#define NET_EPOCH_ENTER_ET(et) epoch_enter_preempt(net_epoch_preempt, &(et))
-#define NET_EPOCH_EXIT() epoch_exit_preempt(net_epoch_preempt, &nep_et)
-#define NET_EPOCH_EXIT_ET(et) epoch_exit_preempt(net_epoch_preempt, &(et))
-#define NET_EPOCH_WAIT() epoch_wait_preempt(net_epoch_preempt)
-
+#define NET_EPOCH_ENTER(et) epoch_enter_preempt(net_epoch_preempt, &(et))
+#define NET_EPOCH_EXIT(et) epoch_exit_preempt(net_epoch_preempt, &(et))
+#define NET_EPOCH_WAIT() epoch_wait_preempt(net_epoch_preempt)
+#define NET_EPOCH_ASSERT() MPASS(in_epoch(net_epoch_preempt))
/*
* Function variations on locking macros intended to be used by loadable
@@ -446,7 +485,6 @@ void if_maddr_rlock(if_t ifp); /* if_multiaddrs */
void if_maddr_runlock(if_t ifp); /* if_multiaddrs */
#ifdef _KERNEL
-#ifdef _SYS_EVENTHANDLER_H_
/* interface link layer address change event */
typedef void (*iflladdr_event_handler_t)(void *, struct ifnet *);
EVENTHANDLER_DECLARE(iflladdr_event, iflladdr_event_handler_t);
@@ -474,7 +512,6 @@ EVENTHANDLER_DECLARE(ifnet_link_event, ifnet_link_event_handler_t);
typedef void (*ifnet_event_fn)(void *, struct ifnet *ifp, int event);
EVENTHANDLER_DECLARE(ifnet_event, ifnet_event_fn);
-#endif /* _SYS_EVENTHANDLER_H_ */
/*
* interface groups
@@ -513,16 +550,13 @@ EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t);
mtx_init(&(ifp)->if_afdata_lock, "if_afdata", NULL, MTX_DEF)
#define IF_AFDATA_WLOCK(ifp) mtx_lock(&(ifp)->if_afdata_lock)
-#define IF_AFDATA_RLOCK(ifp) struct epoch_tracker if_afdata_et; epoch_enter_preempt(net_epoch_preempt, &if_afdata_et)
#define IF_AFDATA_WUNLOCK(ifp) mtx_unlock(&(ifp)->if_afdata_lock)
-#define IF_AFDATA_RUNLOCK(ifp) epoch_exit_preempt(net_epoch_preempt, &if_afdata_et)
#define IF_AFDATA_LOCK(ifp) IF_AFDATA_WLOCK(ifp)
#define IF_AFDATA_UNLOCK(ifp) IF_AFDATA_WUNLOCK(ifp)
#define IF_AFDATA_TRYLOCK(ifp) mtx_trylock(&(ifp)->if_afdata_lock)
#define IF_AFDATA_DESTROY(ifp) mtx_destroy(&(ifp)->if_afdata_lock)
#define IF_AFDATA_LOCK_ASSERT(ifp) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ifp)->if_afdata_lock))
-#define IF_AFDATA_RLOCK_ASSERT(ifp) MPASS(in_epoch(net_epoch_preempt));
#define IF_AFDATA_WLOCK_ASSERT(ifp) mtx_assert(&(ifp)->if_afdata_lock, MA_OWNED)
#define IF_AFDATA_UNLOCK_ASSERT(ifp) mtx_assert(&(ifp)->if_afdata_lock, MA_NOTOWNED)
@@ -606,16 +640,13 @@ extern struct sx ifnet_sxlock;
* write, but also whether it was acquired with sleep support or not.
*/
#define IFNET_RLOCK_ASSERT() sx_assert(&ifnet_sxlock, SA_SLOCKED)
-#define IFNET_RLOCK_NOSLEEP_ASSERT() MPASS(in_epoch(net_epoch_preempt))
#define IFNET_WLOCK_ASSERT() do { \
sx_assert(&ifnet_sxlock, SA_XLOCKED); \
rw_assert(&ifnet_rwlock, RA_WLOCKED); \
} while (0)
#define IFNET_RLOCK() sx_slock(&ifnet_sxlock)
-#define IFNET_RLOCK_NOSLEEP() struct epoch_tracker ifnet_rlock_et; epoch_enter_preempt(net_epoch_preempt, &ifnet_rlock_et)
#define IFNET_RUNLOCK() sx_sunlock(&ifnet_sxlock)
-#define IFNET_RUNLOCK_NOSLEEP() epoch_exit_preempt(net_epoch_preempt, &ifnet_rlock_et)
/*
* Look up an ifnet given its index; the _ref variant also acquires a
@@ -654,6 +685,8 @@ int if_delgroup(struct ifnet *, const char *);
int if_addmulti(struct ifnet *, struct sockaddr *, struct ifmultiaddr **);
int if_allmulti(struct ifnet *, int);
struct ifnet* if_alloc(u_char);
+struct ifnet* if_alloc_dev(u_char, device_t dev);
+struct ifnet* if_alloc_domain(u_char, int numa_domain);
void if_attach(struct ifnet *);
void if_dead(struct ifnet *);
int if_delmulti(struct ifnet *, struct sockaddr *);
diff --git a/freebsd/sys/net/if_vlan.c b/freebsd/sys/net/if_vlan.c
index 893bb2cf..10a8a3bf 100644
--- a/freebsd/sys/net/if_vlan.c
+++ b/freebsd/sys/net/if_vlan.c
@@ -48,6 +48,7 @@
__FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_kern_tls.h>
#include <rtems/bsd/local/opt_vlan.h>
#include <rtems/bsd/local/opt_ratelimit.h>
@@ -105,6 +106,20 @@ struct ifvlantrunk {
int refcnt;
};
+#if defined(KERN_TLS) || defined(RATELIMIT)
+struct vlan_snd_tag {
+ struct m_snd_tag com;
+ struct m_snd_tag *tag;
+};
+
+static inline struct vlan_snd_tag *
+mst_to_vst(struct m_snd_tag *mst)
+{
+
+ return (__containerof(mst, struct vlan_snd_tag, com));
+}
+#endif
+
/*
* This macro provides a facility to iterate over every vlan on a trunk with
* the assumption that none will be added/removed during iteration.
@@ -158,7 +173,7 @@ struct vlan_mc_entry {
struct epoch_context mc_epoch_ctx;
};
-struct ifvlan {
+struct ifvlan {
struct ifvlantrunk *ifv_trunk;
struct ifnet *ifv_ifp;
#define TRUNK(ifv) ((ifv)->ifv_trunk)
@@ -166,28 +181,19 @@ struct ifvlan {
void *ifv_cookie;
int ifv_pflags; /* special flags we have set on parent */
int ifv_capenable;
- struct ifv_linkmib {
- int ifvm_encaplen; /* encapsulation length */
- int ifvm_mtufudge; /* MTU fudged by this much */
- int ifvm_mintu; /* min transmission unit */
- uint16_t ifvm_proto; /* encapsulation ethertype */
- uint16_t ifvm_tag; /* tag to apply on packets leaving if */
- uint16_t ifvm_vid; /* VLAN ID */
- uint8_t ifvm_pcp; /* Priority Code Point (PCP). */
- } ifv_mib;
+ int ifv_encaplen; /* encapsulation length */
+ int ifv_mtufudge; /* MTU fudged by this much */
+ int ifv_mintu; /* min transmission unit */
+ uint16_t ifv_proto; /* encapsulation ethertype */
+ uint16_t ifv_tag; /* tag to apply on packets leaving if */
+ uint16_t ifv_vid; /* VLAN ID */
+ uint8_t ifv_pcp; /* Priority Code Point (PCP). */
struct task lladdr_task;
CK_SLIST_HEAD(, vlan_mc_entry) vlan_mc_listhead;
#ifndef VLAN_ARRAY
CK_SLIST_ENTRY(ifvlan) ifv_list;
#endif
};
-#define ifv_proto ifv_mib.ifvm_proto
-#define ifv_tag ifv_mib.ifvm_tag
-#define ifv_vid ifv_mib.ifvm_vid
-#define ifv_pcp ifv_mib.ifvm_pcp
-#define ifv_encaplen ifv_mib.ifvm_encaplen
-#define ifv_mtufudge ifv_mib.ifvm_mtufudge
-#define ifv_mintu ifv_mib.ifvm_mintu
/* Special flags we should propagate to parent. */
static struct {
@@ -235,10 +241,6 @@ static struct sx _VLAN_SX_ID;
#define VLAN_LOCKING_DESTROY() \
sx_destroy(&_VLAN_SX_ID)
-#define VLAN_RLOCK() NET_EPOCH_ENTER();
-#define VLAN_RUNLOCK() NET_EPOCH_EXIT();
-#define VLAN_RLOCK_ASSERT() MPASS(in_epoch(net_epoch_preempt))
-
#define VLAN_SLOCK() sx_slock(&_VLAN_SX_ID)
#define VLAN_SUNLOCK() sx_sunlock(&_VLAN_SX_ID)
#define VLAN_XLOCK() sx_xlock(&_VLAN_SX_ID)
@@ -254,11 +256,8 @@ static struct sx _VLAN_SX_ID;
*/
#define TRUNK_LOCK_INIT(trunk) mtx_init(&(trunk)->lock, vlanname, NULL, MTX_DEF)
#define TRUNK_LOCK_DESTROY(trunk) mtx_destroy(&(trunk)->lock)
-#define TRUNK_RLOCK(trunk) NET_EPOCH_ENTER()
#define TRUNK_WLOCK(trunk) mtx_lock(&(trunk)->lock)
-#define TRUNK_RUNLOCK(trunk) NET_EPOCH_EXIT();
#define TRUNK_WUNLOCK(trunk) mtx_unlock(&(trunk)->lock)
-#define TRUNK_RLOCK_ASSERT(trunk) MPASS(in_epoch(net_epoch_preempt))
#define TRUNK_LOCK_ASSERT(trunk) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(trunk)->lock))
#define TRUNK_WLOCK_ASSERT(trunk) mtx_assert(&(trunk)->lock, MA_OWNED);
@@ -282,9 +281,14 @@ static void trunk_destroy(struct ifvlantrunk *trunk);
static void vlan_init(void *foo);
static void vlan_input(struct ifnet *ifp, struct mbuf *m);
static int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr);
-#ifdef RATELIMIT
+#if defined(KERN_TLS) || defined(RATELIMIT)
static int vlan_snd_tag_alloc(struct ifnet *,
union if_snd_tag_alloc_params *, struct m_snd_tag **);
+static int vlan_snd_tag_modify(struct m_snd_tag *,
+ union if_snd_tag_modify_params *);
+static int vlan_snd_tag_query(struct m_snd_tag *,
+ union if_snd_tag_query_params *);
+static void vlan_snd_tag_free(struct m_snd_tag *);
#endif
static void vlan_qflush(struct ifnet *ifp);
static int vlan_setflag(struct ifnet *ifp, int flag, int status,
@@ -292,6 +296,8 @@ static int vlan_setflag(struct ifnet *ifp, int flag, int status,
static int vlan_setflags(struct ifnet *ifp, int status);
static int vlan_setmulti(struct ifnet *ifp);
static int vlan_transmit(struct ifnet *ifp, struct mbuf *m);
+static int vlan_output(struct ifnet *ifp, struct mbuf *m,
+ const struct sockaddr *dst, struct route *ro);
static void vlan_unconfig(struct ifnet *ifp);
static void vlan_unconfig_locked(struct ifnet *ifp, int departing);
static int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag);
@@ -474,7 +480,7 @@ vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid)
{
struct ifvlan *ifv;
- TRUNK_RLOCK_ASSERT(trunk);
+ NET_EPOCH_ASSERT();
CK_SLIST_FOREACH(ifv, &trunk->hash[HASH(vid, trunk->hmask)], ifv_list)
if (ifv->ifv_vid == vid)
@@ -619,16 +625,17 @@ vlan_setmulti(struct ifnet *ifp)
static void
vlan_iflladdr(void *arg __unused, struct ifnet *ifp)
{
+ struct epoch_tracker et;
struct ifvlan *ifv;
struct ifnet *ifv_ifp;
struct ifvlantrunk *trunk;
struct sockaddr_dl *sdl;
- /* Need the rmlock since this is run on taskqueue_swi. */
- VLAN_RLOCK();
+ /* Need the epoch since this is run on taskqueue_swi. */
+ NET_EPOCH_ENTER(et);
trunk = ifp->if_vlantrunk;
if (trunk == NULL) {
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
return;
}
@@ -654,7 +661,7 @@ vlan_iflladdr(void *arg __unused, struct ifnet *ifp)
taskqueue_enqueue(taskqueue_thread, &ifv->lladdr_task);
}
TRUNK_WUNLOCK(trunk);
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
}
/*
@@ -700,17 +707,18 @@ vlan_ifdetach(void *arg __unused, struct ifnet *ifp)
static struct ifnet *
vlan_trunkdev(struct ifnet *ifp)
{
+ struct epoch_tracker et;
struct ifvlan *ifv;
if (ifp->if_type != IFT_L2VLAN)
return (NULL);
- VLAN_RLOCK();
+ NET_EPOCH_ENTER(et);
ifv = ifp->if_softc;
ifp = NULL;
if (ifv->ifv_trunk)
ifp = PARENT(ifv);
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
return (ifp);
}
@@ -782,20 +790,21 @@ vlan_setcookie(struct ifnet *ifp, void *cookie)
static struct ifnet *
vlan_devat(struct ifnet *ifp, uint16_t vid)
{
+ struct epoch_tracker et;
struct ifvlantrunk *trunk;
struct ifvlan *ifv;
- VLAN_RLOCK();
+ NET_EPOCH_ENTER(et);
trunk = ifp->if_vlantrunk;
if (trunk == NULL) {
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
return (NULL);
}
ifp = NULL;
ifv = vlan_gethash(trunk, vid);
if (ifv)
ifp = ifv->ifv_ifp;
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
return (ifp);
}
@@ -1055,17 +1064,16 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
strlcpy(ifp->if_xname, name, IFNAMSIZ);
ifp->if_dname = vlanname;
ifp->if_dunit = unit;
- /* NB: flags are not set here */
- ifp->if_linkmib = &ifv->ifv_mib;
- ifp->if_linkmiblen = sizeof(ifv->ifv_mib);
- /* NB: mtu is not set here */
ifp->if_init = vlan_init;
ifp->if_transmit = vlan_transmit;
ifp->if_qflush = vlan_qflush;
ifp->if_ioctl = vlan_ioctl;
-#ifdef RATELIMIT
+#if defined(KERN_TLS) || defined(RATELIMIT)
ifp->if_snd_tag_alloc = vlan_snd_tag_alloc;
+ ifp->if_snd_tag_modify = vlan_snd_tag_modify;
+ ifp->if_snd_tag_query = vlan_snd_tag_query;
+ ifp->if_snd_tag_free = vlan_snd_tag_free;
#endif
ifp->if_flags = VLAN_IFFLAGS;
ether_ifattach(ifp, eaddr);
@@ -1135,15 +1143,16 @@ vlan_init(void *foo __unused)
static int
vlan_transmit(struct ifnet *ifp, struct mbuf *m)
{
+ struct epoch_tracker et;
struct ifvlan *ifv;
struct ifnet *p;
int error, len, mcast;
- VLAN_RLOCK();
+ NET_EPOCH_ENTER(et);
ifv = ifp->if_softc;
if (TRUNK(ifv) == NULL) {
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
m_freem(m);
return (ENETDOWN);
}
@@ -1153,20 +1162,40 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m)
BPF_MTAP(ifp, m);
+#if defined(KERN_TLS) || defined(RATELIMIT)
+ if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) {
+ struct vlan_snd_tag *vst;
+ struct m_snd_tag *mst;
+
+ MPASS(m->m_pkthdr.snd_tag->ifp == ifp);
+ mst = m->m_pkthdr.snd_tag;
+ vst = mst_to_vst(mst);
+ if (vst->tag->ifp != p) {
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ NET_EPOCH_EXIT(et);
+ m_freem(m);
+ return (EAGAIN);
+ }
+
+ m->m_pkthdr.snd_tag = m_snd_tag_ref(vst->tag);
+ m_snd_tag_rele(mst);
+ }
+#endif
+
/*
* Do not run parent's if_transmit() if the parent is not up,
* or parent's driver will cause a system crash.
*/
if (!UP_AND_RUNNING(p)) {
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
m_freem(m);
return (ENETDOWN);
}
if (!ether_8021q_frame(&m, ifp, p, ifv->ifv_vid, ifv->ifv_pcp)) {
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
return (0);
}
@@ -1180,10 +1209,31 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m)
if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast);
} else
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
return (error);
}
+static int
+vlan_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
+{
+ struct epoch_tracker et;
+ struct ifvlan *ifv;
+ struct ifnet *p;
+
+ NET_EPOCH_ENTER(et);
+ ifv = ifp->if_softc;
+ if (TRUNK(ifv) == NULL) {
+ NET_EPOCH_EXIT(et);
+ m_freem(m);
+ return (ENETDOWN);
+ }
+ p = PARENT(ifv);
+ NET_EPOCH_EXIT(et);
+ return p->if_output(ifp, m, dst, ro);
+}
+
+
/*
* The ifp->if_qflush entry point for vlan(4) is a no-op.
*/
@@ -1195,15 +1245,16 @@ vlan_qflush(struct ifnet *ifp __unused)
static void
vlan_input(struct ifnet *ifp, struct mbuf *m)
{
+ struct epoch_tracker et;
struct ifvlantrunk *trunk;
struct ifvlan *ifv;
struct m_tag *mtag;
uint16_t vid, tag;
- VLAN_RLOCK();
+ NET_EPOCH_ENTER(et);
trunk = ifp->if_vlantrunk;
if (trunk == NULL) {
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
m_freem(m);
return;
}
@@ -1226,7 +1277,7 @@ vlan_input(struct ifnet *ifp, struct mbuf *m)
if (m->m_len < sizeof(*evl) &&
(m = m_pullup(m, sizeof(*evl))) == NULL) {
if_printf(ifp, "cannot pullup VLAN header\n");
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
return;
}
evl = mtod(m, struct ether_vlan_header *);
@@ -1249,7 +1300,7 @@ vlan_input(struct ifnet *ifp, struct mbuf *m)
__func__, ifp->if_xname, ifp->if_type);
#endif
if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
m_freem(m);
return;
}
@@ -1259,7 +1310,7 @@ vlan_input(struct ifnet *ifp, struct mbuf *m)
ifv = vlan_gethash(trunk, vid);
if (ifv == NULL || !UP_AND_RUNNING(ifv->ifv_ifp)) {
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
m_freem(m);
return;
@@ -1279,7 +1330,7 @@ vlan_input(struct ifnet *ifp, struct mbuf *m)
sizeof(uint8_t), M_NOWAIT);
if (mtag == NULL) {
if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
m_freem(m);
return;
}
@@ -1290,7 +1341,7 @@ vlan_input(struct ifnet *ifp, struct mbuf *m)
m->m_pkthdr.rcvif = ifv->ifv_ifp;
if_inc_counter(ifv->ifv_ifp, IFCOUNTER_IPACKETS, 1);
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
/* Pass it back through the parent's input routine. */
(*ifv->ifv_ifp->if_input)(ifv->ifv_ifp, m);
@@ -1316,6 +1367,7 @@ vlan_lladdr_fn(void *arg, int pending __unused)
static int
vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid)
{
+ struct epoch_tracker et;
struct ifvlantrunk *trunk;
struct ifnet *ifp;
int error = 0;
@@ -1397,7 +1449,6 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid)
*/
ifp->if_mtu = p->if_mtu - ifv->ifv_mtufudge;
ifp->if_baudrate = p->if_baudrate;
- ifp->if_output = p->if_output;
ifp->if_input = p->if_input;
ifp->if_resolvemulti = p->if_resolvemulti;
ifp->if_addrlen = p->if_addrlen;
@@ -1405,6 +1456,12 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid)
ifp->if_pcp = ifv->ifv_pcp;
/*
+ * We wrap the parent's if_output using vlan_output to ensure that it
+ * can't become stale.
+ */
+ ifp->if_output = vlan_output;
+
+ /*
* Copy only a selected subset of flags from the parent.
* Other flags are none of our business.
*/
@@ -1415,9 +1472,9 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid)
ifp->if_link_state = p->if_link_state;
- TRUNK_RLOCK(TRUNK(ifv));
+ NET_EPOCH_ENTER(et);
vlan_capabilities(ifv);
- TRUNK_RUNLOCK(TRUNK(ifv));
+ NET_EPOCH_EXIT(et);
/*
* Set up our interface address to reflect the underlying
@@ -1589,14 +1646,15 @@ vlan_setflags(struct ifnet *ifp, int status)
static void
vlan_link_state(struct ifnet *ifp)
{
+ struct epoch_tracker et;
struct ifvlantrunk *trunk;
struct ifvlan *ifv;
/* Called from a taskqueue_swi task, so we cannot sleep. */
- VLAN_RLOCK();
+ NET_EPOCH_ENTER(et);
trunk = ifp->if_vlantrunk;
if (trunk == NULL) {
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
return;
}
@@ -1607,7 +1665,7 @@ vlan_link_state(struct ifnet *ifp)
trunk->parent->if_link_state);
}
TRUNK_WUNLOCK(trunk);
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
}
static void
@@ -1620,7 +1678,7 @@ vlan_capabilities(struct ifvlan *ifv)
u_long hwa = 0;
VLAN_SXLOCK_ASSERT();
- TRUNK_RLOCK_ASSERT(TRUNK(ifv));
+ NET_EPOCH_ASSERT();
p = PARENT(ifv);
ifp = ifv->ifv_ifp;
@@ -1704,6 +1762,30 @@ vlan_capabilities(struct ifvlan *ifv)
ena |= (mena & IFCAP_TXRTLMT);
#endif
+ /*
+ * If the parent interface supports unmapped mbufs, so does
+ * the VLAN interface. Note that this should be fine even for
+ * interfaces that don't support hardware tagging as headers
+ * are prepended in normal mbufs to unmapped mbufs holding
+ * payload data.
+ */
+ cap |= (p->if_capabilities & IFCAP_NOMAP);
+ ena |= (mena & IFCAP_NOMAP);
+
+ /*
+ * If the parent interface can offload encryption and segmentation
+ * of TLS records over TCP, propagate it's capability to the VLAN
+ * interface.
+ *
+ * All TLS drivers in the tree today can deal with VLANs. If
+ * this ever changes, then a new IFCAP_VLAN_TXTLS can be
+ * defined.
+ */
+ if (p->if_capabilities & IFCAP_TXTLS)
+ cap |= p->if_capabilities & IFCAP_TXTLS;
+ if (p->if_capenable & IFCAP_TXTLS)
+ ena |= mena & IFCAP_TXTLS;
+
ifp->if_capabilities = cap;
ifp->if_capenable = ena;
ifp->if_hwassist = hwa;
@@ -1712,6 +1794,7 @@ vlan_capabilities(struct ifvlan *ifv)
static void
vlan_trunk_capabilities(struct ifnet *ifp)
{
+ struct epoch_tracker et;
struct ifvlantrunk *trunk;
struct ifvlan *ifv;
@@ -1721,11 +1804,11 @@ vlan_trunk_capabilities(struct ifnet *ifp)
VLAN_SUNLOCK();
return;
}
- TRUNK_RLOCK(trunk);
+ NET_EPOCH_ENTER(et);
VLAN_FOREACH(ifv, trunk) {
vlan_capabilities(ifv);
}
- TRUNK_RUNLOCK(trunk);
+ NET_EPOCH_EXIT(et);
VLAN_SUNLOCK();
}
@@ -1917,9 +2000,11 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
ifv->ifv_capenable = ifr->ifr_reqcap;
trunk = TRUNK(ifv);
if (trunk != NULL) {
- TRUNK_RLOCK(trunk);
+ struct epoch_tracker et;
+
+ NET_EPOCH_ENTER(et);
vlan_capabilities(ifv);
- TRUNK_RUNLOCK(trunk);
+ NET_EPOCH_EXIT(et);
}
VLAN_SUNLOCK();
break;
@@ -1932,18 +2017,77 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
return (error);
}
-#ifdef RATELIMIT
+#if defined(KERN_TLS) || defined(RATELIMIT)
static int
vlan_snd_tag_alloc(struct ifnet *ifp,
union if_snd_tag_alloc_params *params,
struct m_snd_tag **ppmt)
{
+ struct epoch_tracker et;
+ struct vlan_snd_tag *vst;
+ struct ifvlan *ifv;
+ struct ifnet *parent;
+ int error;
- /* get trunk device */
- ifp = vlan_trunkdev(ifp);
- if (ifp == NULL || (ifp->if_capenable & IFCAP_TXRTLMT) == 0)
+ NET_EPOCH_ENTER(et);
+ ifv = ifp->if_softc;
+ if (ifv->ifv_trunk != NULL)
+ parent = PARENT(ifv);
+ else
+ parent = NULL;
+ if (parent == NULL || parent->if_snd_tag_alloc == NULL) {
+ NET_EPOCH_EXIT(et);
return (EOPNOTSUPP);
- /* forward allocation request */
- return (ifp->if_snd_tag_alloc(ifp, params, ppmt));
+ }
+ if_ref(parent);
+ NET_EPOCH_EXIT(et);
+
+ vst = malloc(sizeof(*vst), M_VLAN, M_NOWAIT);
+ if (vst == NULL) {
+ if_rele(parent);
+ return (ENOMEM);
+ }
+
+ error = parent->if_snd_tag_alloc(parent, params, &vst->tag);
+ if_rele(parent);
+ if (error) {
+ free(vst, M_VLAN);
+ return (error);
+ }
+
+ m_snd_tag_init(&vst->com, ifp);
+
+ *ppmt = &vst->com;
+ return (0);
+}
+
+static int
+vlan_snd_tag_modify(struct m_snd_tag *mst,
+ union if_snd_tag_modify_params *params)
+{
+ struct vlan_snd_tag *vst;
+
+ vst = mst_to_vst(mst);
+ return (vst->tag->ifp->if_snd_tag_modify(vst->tag, params));
+}
+
+static int
+vlan_snd_tag_query(struct m_snd_tag *mst,
+ union if_snd_tag_query_params *params)
+{
+ struct vlan_snd_tag *vst;
+
+ vst = mst_to_vst(mst);
+ return (vst->tag->ifp->if_snd_tag_query(vst->tag, params));
+}
+
+static void
+vlan_snd_tag_free(struct m_snd_tag *mst)
+{
+ struct vlan_snd_tag *vst;
+
+ vst = mst_to_vst(mst);
+ m_snd_tag_rele(vst->tag);
+ free(vst, M_VLAN);
}
#endif
diff --git a/freebsd/sys/net/if_vlan_var.h b/freebsd/sys/net/if_vlan_var.h
index 0b66ec0a..28b0fa73 100644
--- a/freebsd/sys/net/if_vlan_var.h
+++ b/freebsd/sys/net/if_vlan_var.h
@@ -150,13 +150,13 @@ extern int (*vlan_pcp_p)(struct ifnet *, uint16_t *);
extern int (*vlan_setcookie_p)(struct ifnet *, void *);
extern void *(*vlan_cookie_p)(struct ifnet *);
-#ifdef _SYS_EVENTHANDLER_H_
+#include <sys/_eventhandler.h>
+
/* VLAN state change events */
typedef void (*vlan_config_fn)(void *, struct ifnet *, uint16_t);
typedef void (*vlan_unconfig_fn)(void *, struct ifnet *, uint16_t);
EVENTHANDLER_DECLARE(vlan_config, vlan_config_fn);
EVENTHANDLER_DECLARE(vlan_unconfig, vlan_unconfig_fn);
-#endif /* _SYS_EVENTHANDLER_H_ */
#endif /* _KERNEL */
diff --git a/freebsd/sys/net/iflib.h b/freebsd/sys/net/iflib.h
index 8c2be41b..cda00c4c 100644
--- a/freebsd/sys/net/iflib.h
+++ b/freebsd/sys/net/iflib.h
@@ -69,6 +69,9 @@ typedef struct if_rxd_frag {
uint16_t irf_len;
} *if_rxd_frag_t;
+/* bnxt supports 64 with hardware LRO enabled */
+#define IFLIB_MAX_RX_SEGS 64
+
typedef struct if_rxd_info {
/* set by iflib */
uint16_t iri_qsidx; /* qset index */
@@ -76,7 +79,7 @@ typedef struct if_rxd_info {
/* XXX redundant with the new irf_len field */
uint16_t iri_len; /* packet length */
qidx_t iri_cidx; /* consumer index of cq */
- struct ifnet *iri_ifp; /* some drivers >1 interface per softc */
+ if_t iri_ifp; /* driver may have >1 iface per softc */
/* updated by driver */
if_rxd_frag_t iri_frags;
@@ -129,12 +132,12 @@ typedef struct if_pkt_info {
uint8_t ipi_mflags; /* packet mbuf flags */
uint32_t ipi_tcp_seq; /* tcp seqno */
- uint32_t ipi_tcp_sum; /* tcp csum */
+ uint32_t __spare0__;
} *if_pkt_info_t;
typedef struct if_irq {
struct resource *ii_res;
- int ii_rid;
+ int __spare0__;
void *ii_tag;
} *if_irq_t;
@@ -163,7 +166,7 @@ typedef struct pci_vendor_info {
uint32_t pvi_subdevice_id;
uint32_t pvi_rev_id;
uint32_t pvi_class_mask;
- caddr_t pvi_name;
+ const char *pvi_name;
} pci_vendor_info_t;
#define PVID(vendor, devid, name) {vendor, devid, 0, 0, 0, 0, name}
@@ -191,9 +194,8 @@ typedef struct if_softc_ctx {
int isc_vectors;
int isc_nrxqsets;
int isc_ntxqsets;
- uint8_t isc_min_tx_latency; /* disable doorbell update batching */
- uint8_t isc_rx_mvec_enable; /* generate mvecs on rx */
- uint32_t isc_txrx_budget_bytes_max;
+ uint16_t __spare0__;
+ uint32_t __spare1__;
int isc_msix_bar; /* can be model specific - initialize in attach_pre */
int isc_tx_nsegments; /* can be model specific - initialize in attach_pre */
int isc_ntxd[8];
@@ -215,16 +217,23 @@ typedef struct if_softc_ctx {
int isc_rss_table_mask;
int isc_nrxqsets_max;
int isc_ntxqsets_max;
- uint32_t isc_tx_qdepth;
+ uint32_t __spare2__;
iflib_intr_mode_t isc_intr;
uint16_t isc_max_frame_size; /* set at init time by driver */
uint16_t isc_min_frame_size; /* set at init time by driver, only used if
IFLIB_NEED_ETHER_PAD is set. */
uint32_t isc_pause_frames; /* set by driver for iflib_timer to detect */
- pci_vendor_info_t isc_vendor_info; /* set by iflib prior to attach_pre */
+ uint32_t __spare3__;
+ uint32_t __spare4__;
+ uint32_t __spare5__;
+ uint32_t __spare6__;
+ uint32_t __spare7__;
+ uint32_t __spare8__;
+ caddr_t __spare9__;
int isc_disable_msix;
if_txrx_t isc_txrx;
+ struct ifmedia *isc_media;
} *if_softc_ctx_t;
/*
@@ -244,8 +253,8 @@ struct if_shared_ctx {
int isc_admin_intrcnt; /* # of admin/link interrupts */
/* fields necessary for probe */
- pci_vendor_info_t *isc_vendor_info;
- char *isc_driver_version;
+ const pci_vendor_info_t *isc_vendor_info;
+ const char *isc_driver_version;
/* optional function to transform the read values to match the table*/
void (*isc_parse_devinfo) (uint16_t *device_id, uint16_t *subvendor_id,
uint16_t *subdevice_id, uint16_t *rev_id);
@@ -260,7 +269,7 @@ struct if_shared_ctx {
int isc_nfl __aligned(CACHE_LINE_SIZE);
int isc_ntxqs; /* # of tx queues per tx qset - usually 1 */
int isc_nrxqs; /* # of rx queues per rx qset - intel 1, chelsio 2, broadcom 3 */
- int isc_rx_process_limit;
+ int __spare0__;
int isc_tx_reclaim_thresh;
int isc_flags;
const char *isc_name;
@@ -284,11 +293,6 @@ typedef enum {
IFLIB_INTR_IOV,
} iflib_intr_type_t;
-#ifndef ETH_ADDR_LEN
-#define ETH_ADDR_LEN 6
-#endif
-
-
/*
* Interface has a separate command queue for RX
*/
@@ -358,7 +362,10 @@ typedef enum {
* Interface needs admin task to ignore interface up/down status
*/
#define IFLIB_ADMIN_ALWAYS_RUN 0x10000
-
+/*
+ * Driver will pass the media
+ */
+#define IFLIB_DRIVER_MEDIA 0x20000
/*
* field accessors
@@ -378,6 +385,8 @@ void iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN]);
void iflib_request_reset(if_ctx_t ctx);
uint8_t iflib_in_detach(if_ctx_t ctx);
+uint32_t iflib_get_rx_mbuf_sz(if_ctx_t ctx);
+
/*
* If the driver can plug cleanly in to newbus use these
*/
@@ -388,6 +397,12 @@ int iflib_device_suspend(device_t);
int iflib_device_resume(device_t);
int iflib_device_shutdown(device_t);
+/*
+ * Use this instead of iflib_device_probe if the driver should report
+ * BUS_PROBE_VENDOR instead of BUS_PROBE_DEFAULT. (For example, an out-of-tree
+ * driver based on iflib).
+ */
+int iflib_device_probe_vendor(device_t);
int iflib_device_iov_init(device_t, uint16_t, const nvlist_t *);
void iflib_device_iov_uninit(device_t);
@@ -400,8 +415,6 @@ int iflib_device_iov_add_vf(device_t, uint16_t, const nvlist_t *);
int iflib_device_register(device_t dev, void *softc, if_shared_ctx_t sctx, if_ctx_t *ctxp);
int iflib_device_deregister(if_ctx_t);
-
-
int iflib_irq_alloc(if_ctx_t, if_irq_t, int, driver_filter_t, void *filter_arg, driver_intr_t, void *arg, const char *name);
int iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid,
iflib_intr_type_t type, driver_filter_t *filter,
@@ -410,33 +423,28 @@ void iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t t
void iflib_irq_free(if_ctx_t ctx, if_irq_t irq);
-void iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name);
+void iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu,
+ const char *name);
void iflib_config_gtask_init(void *ctx, struct grouptask *gtask,
gtask_fn_t *fn, const char *name);
-
void iflib_config_gtask_deinit(struct grouptask *gtask);
-
-
void iflib_tx_intr_deferred(if_ctx_t ctx, int txqid);
void iflib_rx_intr_deferred(if_ctx_t ctx, int rxqid);
void iflib_admin_intr_deferred(if_ctx_t ctx);
void iflib_iov_intr_deferred(if_ctx_t ctx);
-
void iflib_link_state_change(if_ctx_t ctx, int linkstate, uint64_t baudrate);
int iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags);
+int iflib_dma_alloc_align(if_ctx_t ctx, int size, int align, iflib_dma_info_t dma, int mapflags);
void iflib_dma_free(iflib_dma_info_t dma);
-
int iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count);
void iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count);
-
struct sx *iflib_ctx_lock_get(if_ctx_t);
-struct mtx *iflib_qset_lock_get(if_ctx_t, uint16_t);
void iflib_led_create(if_ctx_t ctx);
@@ -448,4 +456,5 @@ void iflib_add_int_delay_sysctl(if_ctx_t, const char *, const char *,
*/
if_pseudo_t iflib_clone_register(if_shared_ctx_t);
void iflib_clone_deregister(if_pseudo_t);
+
#endif /* __IFLIB_H_ */
diff --git a/freebsd/sys/net/netisr.c b/freebsd/sys/net/netisr.c
index a3da964b..0f7c4800 100644
--- a/freebsd/sys/net/netisr.c
+++ b/freebsd/sys/net/netisr.c
@@ -868,6 +868,7 @@ netisr_select_cpuid(struct netisr_proto *npp, u_int dispatch_policy,
("%s: invalid policy %u for %s", __func__, npp->np_policy,
npp->np_name));
+ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
ifp = m->m_pkthdr.rcvif;
if (ifp != NULL)
*cpuidp = nws_array[(ifp->if_index + source) % nws_count];
diff --git a/freebsd/sys/net/pfil.c b/freebsd/sys/net/pfil.c
index 65af515f..1dea915d 100644
--- a/freebsd/sys/net/pfil.c
+++ b/freebsd/sys/net/pfil.c
@@ -6,6 +6,7 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
+ * Copyright (c) 2019 Gleb Smirnoff <glebius@FreeBSD.org>
* Copyright (c) 1996 Matthew R. Green
* All rights reserved.
*
@@ -34,445 +35,650 @@
*/
#include <sys/param.h>
+#include <sys/conf.h>
#include <sys/kernel.h>
+#include <sys/epoch.h>
#include <sys/errno.h>
#include <sys/lock.h>
#include <sys/malloc.h>
-#include <sys/rmlock.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/systm.h>
-#include <sys/condvar.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/queue.h>
+#include <sys/ucred.h>
+#include <sys/jail.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/pfil.h>
-static struct mtx pfil_global_lock;
-
-MTX_SYSINIT(pfil_heads_lock, &pfil_global_lock, "pfil_head_list lock",
- MTX_DEF);
-
-static struct packet_filter_hook *pfil_chain_get(int, struct pfil_head *);
-static int pfil_chain_add(pfil_chain_t *, struct packet_filter_hook *, int);
-static int pfil_chain_remove(pfil_chain_t *, void *, void *);
-static int pfil_add_hook_priv(void *, void *, int, struct pfil_head *, bool);
+static MALLOC_DEFINE(M_PFIL, "pfil", "pfil(9) packet filter hooks");
+
+static int pfil_ioctl(struct cdev *, u_long, caddr_t, int, struct thread *);
+static struct cdevsw pfil_cdevsw = {
+ .d_ioctl = pfil_ioctl,
+ .d_name = PFILDEV,
+ .d_version = D_VERSION,
+};
+static struct cdev *pfil_dev;
+
+static struct mtx pfil_lock;
+MTX_SYSINIT(pfil_mtxinit, &pfil_lock, "pfil(9) lock", MTX_DEF);
+#define PFIL_LOCK() mtx_lock(&pfil_lock)
+#define PFIL_UNLOCK() mtx_unlock(&pfil_lock)
+#define PFIL_LOCK_ASSERT() mtx_assert(&pfil_lock, MA_OWNED)
+
+#define PFIL_EPOCH net_epoch_preempt
+#define PFIL_EPOCH_ENTER(et) epoch_enter_preempt(net_epoch_preempt, &(et))
+#define PFIL_EPOCH_EXIT(et) epoch_exit_preempt(net_epoch_preempt, &(et))
+
+struct pfil_hook {
+ pfil_func_t hook_func;
+ void *hook_ruleset;
+ int hook_flags;
+ int hook_links;
+ enum pfil_types hook_type;
+ const char *hook_modname;
+ const char *hook_rulname;
+ LIST_ENTRY(pfil_hook) hook_list;
+};
+
+struct pfil_link {
+ CK_STAILQ_ENTRY(pfil_link) link_chain;
+ pfil_func_t link_func;
+ void *link_ruleset;
+ int link_flags;
+ struct pfil_hook *link_hook;
+ struct epoch_context link_epoch_ctx;
+};
+
+typedef CK_STAILQ_HEAD(pfil_chain, pfil_link) pfil_chain_t;
+struct pfil_head {
+ int head_nhooksin;
+ int head_nhooksout;
+ pfil_chain_t head_in;
+ pfil_chain_t head_out;
+ int head_flags;
+ enum pfil_types head_type;
+ LIST_ENTRY(pfil_head) head_list;
+ const char *head_name;
+};
LIST_HEAD(pfilheadhead, pfil_head);
-VNET_DEFINE(struct pfilheadhead, pfil_head_list);
+VNET_DEFINE_STATIC(struct pfilheadhead, pfil_head_list) =
+ LIST_HEAD_INITIALIZER(pfil_head_list);
#define V_pfil_head_list VNET(pfil_head_list)
-VNET_DEFINE(struct rmlock, pfil_lock);
-
-#define PFIL_LOCK_INIT_REAL(l, t) \
- rm_init_flags(l, "PFil " t " rmlock", RM_RECURSE)
-#define PFIL_LOCK_DESTROY_REAL(l) \
- rm_destroy(l)
-#define PFIL_LOCK_INIT(p) do { \
- if ((p)->flags & PFIL_FLAG_PRIVATE_LOCK) { \
- PFIL_LOCK_INIT_REAL(&(p)->ph_lock, "private"); \
- (p)->ph_plock = &(p)->ph_lock; \
- } else \
- (p)->ph_plock = &V_pfil_lock; \
-} while (0)
-#define PFIL_LOCK_DESTROY(p) do { \
- if ((p)->flags & PFIL_FLAG_PRIVATE_LOCK) \
- PFIL_LOCK_DESTROY_REAL((p)->ph_plock); \
-} while (0)
-
-#define PFIL_TRY_RLOCK(p, t) rm_try_rlock((p)->ph_plock, (t))
-#define PFIL_RLOCK(p, t) rm_rlock((p)->ph_plock, (t))
-#define PFIL_WLOCK(p) rm_wlock((p)->ph_plock)
-#define PFIL_RUNLOCK(p, t) rm_runlock((p)->ph_plock, (t))
-#define PFIL_WUNLOCK(p) rm_wunlock((p)->ph_plock)
-#define PFIL_WOWNED(p) rm_wowned((p)->ph_plock)
-
-#define PFIL_HEADLIST_LOCK() mtx_lock(&pfil_global_lock)
-#define PFIL_HEADLIST_UNLOCK() mtx_unlock(&pfil_global_lock)
-/*
- * pfil_run_hooks() runs the specified packet filter hook chain.
- */
+LIST_HEAD(pfilhookhead, pfil_hook);
+VNET_DEFINE_STATIC(struct pfilhookhead, pfil_hook_list) =
+ LIST_HEAD_INITIALIZER(pfil_hook_list);
+#define V_pfil_hook_list VNET(pfil_hook_list)
+
+static struct pfil_link *pfil_link_remove(pfil_chain_t *, pfil_hook_t );
+static void pfil_link_free(epoch_context_t);
+
int
-pfil_run_hooks(struct pfil_head *ph, struct mbuf **mp, struct ifnet *ifp,
- int dir, int flags, struct inpcb *inp)
+pfil_realloc(pfil_packet_t *p, int flags, struct ifnet *ifp)
{
- struct rm_priotracker rmpt;
- struct packet_filter_hook *pfh;
- struct mbuf *m = *mp;
- int rv = 0;
-
- PFIL_RLOCK(ph, &rmpt);
- KASSERT(ph->ph_nhooks >= 0, ("Pfil hook count dropped < 0"));
- for (pfh = pfil_chain_get(dir, ph); pfh != NULL;
- pfh = TAILQ_NEXT(pfh, pfil_chain)) {
- if (pfh->pfil_func_flags != NULL) {
- rv = (*pfh->pfil_func_flags)(pfh->pfil_arg, &m, ifp,
- dir, flags, inp);
- if (rv != 0 || m == NULL)
- break;
- }
- if (pfh->pfil_func != NULL) {
- rv = (*pfh->pfil_func)(pfh->pfil_arg, &m, ifp, dir,
- inp);
- if (rv != 0 || m == NULL)
- break;
- }
- }
- PFIL_RUNLOCK(ph, &rmpt);
- *mp = m;
- return (rv);
+ struct mbuf *m;
+
+ MPASS(flags & PFIL_MEMPTR);
+
+ if ((m = m_devget(p->mem, PFIL_LENGTH(flags), 0, ifp, NULL)) == NULL)
+ return (ENOMEM);
+ *p = pfil_packet_align(*p);
+ *p->m = m;
+
+ return (0);
}
-static struct packet_filter_hook *
-pfil_chain_get(int dir, struct pfil_head *ph)
+static __noinline int
+pfil_fake_mbuf(pfil_func_t func, pfil_packet_t *p, struct ifnet *ifp, int flags,
+ void *ruleset, struct inpcb *inp)
{
+ struct mbuf m, *mp;
+ pfil_return_t rv;
+
+ (void)m_init(&m, M_NOWAIT, MT_DATA, M_NOFREE | M_PKTHDR);
+ m_extadd(&m, p->mem, PFIL_LENGTH(flags), NULL, NULL, NULL, 0,
+ EXT_RXRING);
+ m.m_len = m.m_pkthdr.len = PFIL_LENGTH(flags);
+ mp = &m;
+ flags &= ~(PFIL_MEMPTR | PFIL_LENMASK);
+
+ rv = func(&mp, ifp, flags, ruleset, inp);
+ if (rv == PFIL_PASS && mp != &m) {
+ /*
+ * Firewalls that need pfil_fake_mbuf() most likely don't
+ * know they need return PFIL_REALLOCED.
+ */
+ rv = PFIL_REALLOCED;
+ *p = pfil_packet_align(*p);
+ *p->m = mp;
+ }
- if (dir == PFIL_IN)
- return (TAILQ_FIRST(&ph->ph_in));
- else if (dir == PFIL_OUT)
- return (TAILQ_FIRST(&ph->ph_out));
- else
- return (NULL);
+ return (rv);
}
-#ifndef __rtems__
/*
- * pfil_try_rlock() acquires rm reader lock for specified head
- * if this is immediately possible.
+ * pfil_run_hooks() runs the specified packet filter hook chain.
*/
int
-pfil_try_rlock(struct pfil_head *ph, struct rm_priotracker *tracker)
+pfil_run_hooks(struct pfil_head *head, pfil_packet_t p, struct ifnet *ifp,
+ int flags, struct inpcb *inp)
{
-
- return (PFIL_TRY_RLOCK(ph, tracker));
+ struct epoch_tracker et;
+ pfil_chain_t *pch;
+ struct pfil_link *link;
+ pfil_return_t rv;
+ bool realloc = false;
+
+ if (PFIL_DIR(flags) == PFIL_IN)
+ pch = &head->head_in;
+ else if (__predict_true(PFIL_DIR(flags) == PFIL_OUT))
+ pch = &head->head_out;
+ else
+ panic("%s: bogus flags %d", __func__, flags);
+
+ rv = PFIL_PASS;
+ PFIL_EPOCH_ENTER(et);
+ CK_STAILQ_FOREACH(link, pch, link_chain) {
+ if ((flags & PFIL_MEMPTR) && !(link->link_flags & PFIL_MEMPTR))
+ rv = pfil_fake_mbuf(link->link_func, &p, ifp, flags,
+ link->link_ruleset, inp);
+ else
+ rv = (*link->link_func)(p, ifp, flags,
+ link->link_ruleset, inp);
+ if (rv == PFIL_DROPPED || rv == PFIL_CONSUMED)
+ break;
+ else if (rv == PFIL_REALLOCED) {
+ flags &= ~(PFIL_MEMPTR | PFIL_LENMASK);
+ realloc = true;
+ }
+ }
+ PFIL_EPOCH_EXIT(et);
+ if (realloc && rv == PFIL_PASS)
+ rv = PFIL_REALLOCED;
+ return (rv);
}
-#endif /* __rtems__ */
/*
- * pfil_rlock() acquires rm reader lock for specified head.
+ * pfil_head_register() registers a pfil_head with the packet filter hook
+ * mechanism.
*/
-void
-pfil_rlock(struct pfil_head *ph, struct rm_priotracker *tracker)
+pfil_head_t
+pfil_head_register(struct pfil_head_args *pa)
{
+ struct pfil_head *head, *list;
- PFIL_RLOCK(ph, tracker);
-}
+ MPASS(pa->pa_version == PFIL_VERSION);
-/*
- * pfil_runlock() releases reader lock for specified head.
- */
-void
-pfil_runlock(struct pfil_head *ph, struct rm_priotracker *tracker)
-{
+ head = malloc(sizeof(struct pfil_head), M_PFIL, M_WAITOK);
+
+ head->head_nhooksin = head->head_nhooksout = 0;
+ head->head_flags = pa->pa_flags;
+ head->head_type = pa->pa_type;
+ head->head_name = pa->pa_headname;
+ CK_STAILQ_INIT(&head->head_in);
+ CK_STAILQ_INIT(&head->head_out);
+
+ PFIL_LOCK();
+ LIST_FOREACH(list, &V_pfil_head_list, head_list)
+ if (strcmp(pa->pa_headname, list->head_name) == 0) {
+ printf("pfil: duplicate head \"%s\"\n",
+ pa->pa_headname);
+ }
+ LIST_INSERT_HEAD(&V_pfil_head_list, head, head_list);
+ PFIL_UNLOCK();
- PFIL_RUNLOCK(ph, tracker);
+ return (head);
}
/*
- * pfil_wlock() acquires writer lock for specified head.
+ * pfil_head_unregister() removes a pfil_head from the packet filter hook
+ * mechanism. The producer of the hook promises that all outstanding
+ * invocations of the hook have completed before it unregisters the hook.
*/
void
-pfil_wlock(struct pfil_head *ph)
+pfil_head_unregister(pfil_head_t ph)
{
+ struct pfil_link *link, *next;
+
+ PFIL_LOCK();
+ LIST_REMOVE(ph, head_list);
- PFIL_WLOCK(ph);
+ CK_STAILQ_FOREACH_SAFE(link, &ph->head_in, link_chain, next) {
+ link->link_hook->hook_links--;
+ free(link, M_PFIL);
+ }
+ CK_STAILQ_FOREACH_SAFE(link, &ph->head_out, link_chain, next) {
+ link->link_hook->hook_links--;
+ free(link, M_PFIL);
+ }
+ PFIL_UNLOCK();
}
-/*
- * pfil_wunlock() releases writer lock for specified head.
- */
-void
-pfil_wunlock(struct pfil_head *ph)
+pfil_hook_t
+pfil_add_hook(struct pfil_hook_args *pa)
{
+ struct pfil_hook *hook, *list;
+
+ MPASS(pa->pa_version == PFIL_VERSION);
+
+ hook = malloc(sizeof(struct pfil_hook), M_PFIL, M_WAITOK | M_ZERO);
+ hook->hook_func = pa->pa_func;
+ hook->hook_ruleset = pa->pa_ruleset;
+ hook->hook_flags = pa->pa_flags;
+ hook->hook_type = pa->pa_type;
+ hook->hook_modname = pa->pa_modname;
+ hook->hook_rulname = pa->pa_rulname;
+
+ PFIL_LOCK();
+ LIST_FOREACH(list, &V_pfil_hook_list, hook_list)
+ if (strcmp(pa->pa_modname, list->hook_modname) == 0 &&
+ strcmp(pa->pa_rulname, list->hook_rulname) == 0) {
+ printf("pfil: duplicate hook \"%s:%s\"\n",
+ pa->pa_modname, pa->pa_rulname);
+ }
+ LIST_INSERT_HEAD(&V_pfil_hook_list, hook, hook_list);
+ PFIL_UNLOCK();
- PFIL_WUNLOCK(ph);
+ return (hook);
}
-/*
- * pfil_wowned() returns a non-zero value if the current thread owns
- * an exclusive lock.
- */
-int
-pfil_wowned(struct pfil_head *ph)
+static int
+pfil_unlink(struct pfil_link_args *pa, pfil_head_t head, pfil_hook_t hook)
{
+ struct pfil_link *in, *out;
+
+ PFIL_LOCK_ASSERT();
- return (PFIL_WOWNED(ph));
+ if (pa->pa_flags & PFIL_IN) {
+ in = pfil_link_remove(&head->head_in, hook);
+ if (in != NULL) {
+ head->head_nhooksin--;
+ hook->hook_links--;
+ }
+ } else
+ in = NULL;
+ if (pa->pa_flags & PFIL_OUT) {
+ out = pfil_link_remove(&head->head_out, hook);
+ if (out != NULL) {
+ head->head_nhooksout--;
+ hook->hook_links--;
+ }
+ } else
+ out = NULL;
+ PFIL_UNLOCK();
+
+ if (in != NULL)
+ epoch_call(PFIL_EPOCH, &in->link_epoch_ctx, pfil_link_free);
+ if (out != NULL)
+ epoch_call(PFIL_EPOCH, &out->link_epoch_ctx, pfil_link_free);
+
+ if (in == NULL && out == NULL)
+ return (ENOENT);
+ else
+ return (0);
}
-/*
- * pfil_head_register() registers a pfil_head with the packet filter hook
- * mechanism.
- */
int
-pfil_head_register(struct pfil_head *ph)
+pfil_link(struct pfil_link_args *pa)
{
- struct pfil_head *lph;
-
- PFIL_HEADLIST_LOCK();
- LIST_FOREACH(lph, &V_pfil_head_list, ph_list) {
- if (ph->ph_type == lph->ph_type &&
- ph->ph_un.phu_val == lph->ph_un.phu_val) {
- PFIL_HEADLIST_UNLOCK();
- return (EEXIST);
- }
+ struct pfil_link *in, *out, *link;
+ struct pfil_head *head;
+ struct pfil_hook *hook;
+ int error;
+
+ MPASS(pa->pa_version == PFIL_VERSION);
+
+ if ((pa->pa_flags & (PFIL_IN | PFIL_UNLINK)) == PFIL_IN)
+ in = malloc(sizeof(*in), M_PFIL, M_WAITOK | M_ZERO);
+ else
+ in = NULL;
+ if ((pa->pa_flags & (PFIL_OUT | PFIL_UNLINK)) == PFIL_OUT)
+ out = malloc(sizeof(*out), M_PFIL, M_WAITOK | M_ZERO);
+ else
+ out = NULL;
+
+ PFIL_LOCK();
+ if (pa->pa_flags & PFIL_HEADPTR)
+ head = pa->pa_head;
+ else
+ LIST_FOREACH(head, &V_pfil_head_list, head_list)
+ if (strcmp(pa->pa_headname, head->head_name) == 0)
+ break;
+ if (pa->pa_flags & PFIL_HOOKPTR)
+ hook = pa->pa_hook;
+ else
+ LIST_FOREACH(hook, &V_pfil_hook_list, hook_list)
+ if (strcmp(pa->pa_modname, hook->hook_modname) == 0 &&
+ strcmp(pa->pa_rulname, hook->hook_rulname) == 0)
+ break;
+ if (head == NULL || hook == NULL) {
+ error = ENOENT;
+ goto fail;
+ }
+
+ if (pa->pa_flags & PFIL_UNLINK)
+ return (pfil_unlink(pa, head, hook));
+
+ if (head->head_type != hook->hook_type ||
+ ((hook->hook_flags & pa->pa_flags) & ~head->head_flags)) {
+ error = EINVAL;
+ goto fail;
+ }
+
+ if (pa->pa_flags & PFIL_IN)
+ CK_STAILQ_FOREACH(link, &head->head_in, link_chain)
+ if (link->link_hook == hook) {
+ error = EEXIST;
+ goto fail;
+ }
+ if (pa->pa_flags & PFIL_OUT)
+ CK_STAILQ_FOREACH(link, &head->head_out, link_chain)
+ if (link->link_hook == hook) {
+ error = EEXIST;
+ goto fail;
+ }
+
+ if (pa->pa_flags & PFIL_IN) {
+ in->link_hook = hook;
+ in->link_func = hook->hook_func;
+ in->link_flags = hook->hook_flags;
+ in->link_ruleset = hook->hook_ruleset;
+ if (pa->pa_flags & PFIL_APPEND)
+ CK_STAILQ_INSERT_TAIL(&head->head_in, in, link_chain);
+ else
+ CK_STAILQ_INSERT_HEAD(&head->head_in, in, link_chain);
+ hook->hook_links++;
+ head->head_nhooksin++;
+ }
+ if (pa->pa_flags & PFIL_OUT) {
+ out->link_hook = hook;
+ out->link_func = hook->hook_func;
+ out->link_flags = hook->hook_flags;
+ out->link_ruleset = hook->hook_ruleset;
+ if (pa->pa_flags & PFIL_APPEND)
+ CK_STAILQ_INSERT_HEAD(&head->head_out, out, link_chain);
+ else
+ CK_STAILQ_INSERT_TAIL(&head->head_out, out, link_chain);
+ hook->hook_links++;
+ head->head_nhooksout++;
}
- PFIL_LOCK_INIT(ph);
- ph->ph_nhooks = 0;
- TAILQ_INIT(&ph->ph_in);
- TAILQ_INIT(&ph->ph_out);
- LIST_INSERT_HEAD(&V_pfil_head_list, ph, ph_list);
- PFIL_HEADLIST_UNLOCK();
+ PFIL_UNLOCK();
+
return (0);
+
+fail:
+ PFIL_UNLOCK();
+ free(in, M_PFIL);
+ free(out, M_PFIL);
+ return (error);
+}
+
+static void
+pfil_link_free(epoch_context_t ctx)
+{
+ struct pfil_link *link;
+
+ link = __containerof(ctx, struct pfil_link, link_epoch_ctx);
+ free(link, M_PFIL);
}
/*
- * pfil_head_unregister() removes a pfil_head from the packet filter hook
- * mechanism. The producer of the hook promises that all outstanding
- * invocations of the hook have completed before it unregisters the hook.
+ * pfil_remove_hook removes a filter from all filtering points.
*/
-int
-pfil_head_unregister(struct pfil_head *ph)
+void
+pfil_remove_hook(pfil_hook_t hook)
{
- struct packet_filter_hook *pfh, *pfnext;
-
- PFIL_HEADLIST_LOCK();
- LIST_REMOVE(ph, ph_list);
- PFIL_HEADLIST_UNLOCK();
- TAILQ_FOREACH_SAFE(pfh, &ph->ph_in, pfil_chain, pfnext)
- free(pfh, M_IFADDR);
- TAILQ_FOREACH_SAFE(pfh, &ph->ph_out, pfil_chain, pfnext)
- free(pfh, M_IFADDR);
- PFIL_LOCK_DESTROY(ph);
- return (0);
+ struct pfil_head *head;
+ struct pfil_link *in, *out;
+
+ PFIL_LOCK();
+ LIST_FOREACH(head, &V_pfil_head_list, head_list) {
+retry:
+ in = pfil_link_remove(&head->head_in, hook);
+ if (in != NULL) {
+ head->head_nhooksin--;
+ hook->hook_links--;
+ epoch_call(PFIL_EPOCH, &in->link_epoch_ctx,
+ pfil_link_free);
+ }
+ out = pfil_link_remove(&head->head_out, hook);
+ if (out != NULL) {
+ head->head_nhooksout--;
+ hook->hook_links--;
+ epoch_call(PFIL_EPOCH, &out->link_epoch_ctx,
+ pfil_link_free);
+ }
+ if (in != NULL || out != NULL)
+ /* What if some stupid admin put same filter twice? */
+ goto retry;
+ }
+ LIST_REMOVE(hook, hook_list);
+ PFIL_UNLOCK();
+ MPASS(hook->hook_links == 0);
+ free(hook, M_PFIL);
}
/*
- * pfil_head_get() returns the pfil_head for a given key/dlt.
+ * Internal: Remove a pfil hook from a hook chain.
*/
-struct pfil_head *
-pfil_head_get(int type, u_long val)
+static struct pfil_link *
+pfil_link_remove(pfil_chain_t *chain, pfil_hook_t hook)
{
- struct pfil_head *ph;
+ struct pfil_link *link;
- PFIL_HEADLIST_LOCK();
- LIST_FOREACH(ph, &V_pfil_head_list, ph_list)
- if (ph->ph_type == type && ph->ph_un.phu_val == val)
- break;
- PFIL_HEADLIST_UNLOCK();
- return (ph);
+ PFIL_LOCK_ASSERT();
+
+ CK_STAILQ_FOREACH(link, chain, link_chain)
+ if (link->link_hook == hook) {
+ CK_STAILQ_REMOVE(chain, link, pfil_link, link_chain);
+ return (link);
+ }
+
+ return (NULL);
}
-/*
- * pfil_add_hook_flags() adds a function to the packet filter hook. the
- * flags are:
- * PFIL_IN call me on incoming packets
- * PFIL_OUT call me on outgoing packets
- * PFIL_ALL call me on all of the above
- * PFIL_WAITOK OK to call malloc with M_WAITOK.
- */
-int
-pfil_add_hook_flags(pfil_func_flags_t func, void *arg, int flags,
- struct pfil_head *ph)
+static void
+pfil_init(const void *unused __unused)
{
- return (pfil_add_hook_priv(func, arg, flags, ph, true));
+ struct make_dev_args args;
+ int error;
+
+ make_dev_args_init(&args);
+ args.mda_flags = MAKEDEV_WAITOK | MAKEDEV_CHECKNAME;
+ args.mda_devsw = &pfil_cdevsw;
+ args.mda_uid = UID_ROOT;
+ args.mda_gid = GID_WHEEL;
+ args.mda_mode = 0600;
+ error = make_dev_s(&args, &pfil_dev, PFILDEV);
+ KASSERT(error == 0, ("%s: failed to create dev: %d", __func__, error));
}
+/*
+ * Make sure the pfil bits are first before any possible subsystem which
+ * might piggyback on the SI_SUB_PROTO_PFIL.
+ */
+SYSINIT(pfil_init, SI_SUB_PROTO_PFIL, SI_ORDER_FIRST, pfil_init, NULL);
/*
- * pfil_add_hook() adds a function to the packet filter hook. the
- * flags are:
- * PFIL_IN call me on incoming packets
- * PFIL_OUT call me on outgoing packets
- * PFIL_ALL call me on all of the above
- * PFIL_WAITOK OK to call malloc with M_WAITOK.
+ * User control interface.
*/
-int
-pfil_add_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph)
+static int pfilioc_listheads(struct pfilioc_list *);
+static int pfilioc_listhooks(struct pfilioc_list *);
+static int pfilioc_link(struct pfilioc_link *);
+
+static int
+pfil_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
+ struct thread *td)
{
- return (pfil_add_hook_priv(func, arg, flags, ph, false));
+ int error;
+
+ CURVNET_SET(TD_TO_VNET(td));
+ error = 0;
+ switch (cmd) {
+ case PFILIOC_LISTHEADS:
+ error = pfilioc_listheads((struct pfilioc_list *)addr);
+ break;
+ case PFILIOC_LISTHOOKS:
+ error = pfilioc_listhooks((struct pfilioc_list *)addr);
+ break;
+ case PFILIOC_LINK:
+ error = pfilioc_link((struct pfilioc_link *)addr);
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ CURVNET_RESTORE();
+ return (error);
}
static int
-pfil_add_hook_priv(void *func, void *arg, int flags,
- struct pfil_head *ph, bool hasflags)
+pfilioc_listheads(struct pfilioc_list *req)
{
- struct packet_filter_hook *pfh1 = NULL;
- struct packet_filter_hook *pfh2 = NULL;
- int err;
-
- if (flags & PFIL_IN) {
- pfh1 = (struct packet_filter_hook *)malloc(sizeof(*pfh1),
- M_IFADDR, (flags & PFIL_WAITOK) ? M_WAITOK : M_NOWAIT);
- if (pfh1 == NULL) {
- err = ENOMEM;
- goto error;
- }
- }
- if (flags & PFIL_OUT) {
- pfh2 = (struct packet_filter_hook *)malloc(sizeof(*pfh1),
- M_IFADDR, (flags & PFIL_WAITOK) ? M_WAITOK : M_NOWAIT);
- if (pfh2 == NULL) {
- err = ENOMEM;
- goto error;
- }
+ struct pfil_head *head;
+ struct pfil_link *link;
+ struct pfilioc_head *iohead;
+ struct pfilioc_hook *iohook;
+ u_int nheads, nhooks, hd, hk;
+ int error;
+
+ PFIL_LOCK();
+restart:
+ nheads = nhooks = 0;
+ LIST_FOREACH(head, &V_pfil_head_list, head_list) {
+ nheads++;
+ nhooks += head->head_nhooksin + head->head_nhooksout;
}
- PFIL_WLOCK(ph);
- if (flags & PFIL_IN) {
- pfh1->pfil_func_flags = hasflags ? func : NULL;
- pfh1->pfil_func = hasflags ? NULL : func;
- pfh1->pfil_arg = arg;
- err = pfil_chain_add(&ph->ph_in, pfh1, flags & ~PFIL_OUT);
- if (err)
- goto locked_error;
- ph->ph_nhooks++;
+ PFIL_UNLOCK();
+
+ if (req->pio_nheads < nheads || req->pio_nhooks < nhooks) {
+ req->pio_nheads = nheads;
+ req->pio_nhooks = nhooks;
+ return (0);
}
- if (flags & PFIL_OUT) {
- pfh2->pfil_func_flags = hasflags ? func : NULL;
- pfh2->pfil_func = hasflags ? NULL : func;
- pfh2->pfil_arg = arg;
- err = pfil_chain_add(&ph->ph_out, pfh2, flags & ~PFIL_IN);
- if (err) {
- if (flags & PFIL_IN)
- pfil_chain_remove(&ph->ph_in, func, arg);
- goto locked_error;
+
+ iohead = malloc(sizeof(*iohead) * nheads, M_TEMP, M_WAITOK);
+ iohook = malloc(sizeof(*iohook) * nhooks, M_TEMP, M_WAITOK);
+
+ hd = hk = 0;
+ PFIL_LOCK();
+ LIST_FOREACH(head, &V_pfil_head_list, head_list) {
+ if (hd + 1 > nheads ||
+ hk + head->head_nhooksin + head->head_nhooksout > nhooks) {
+ /* Configuration changed during malloc(). */
+ free(iohead, M_TEMP);
+ free(iohook, M_TEMP);
+ goto restart;
+ }
+ strlcpy(iohead[hd].pio_name, head->head_name,
+ sizeof(iohead[0].pio_name));
+ iohead[hd].pio_nhooksin = head->head_nhooksin;
+ iohead[hd].pio_nhooksout = head->head_nhooksout;
+ iohead[hd].pio_type = head->head_type;
+ CK_STAILQ_FOREACH(link, &head->head_in, link_chain) {
+ strlcpy(iohook[hk].pio_module,
+ link->link_hook->hook_modname,
+ sizeof(iohook[0].pio_module));
+ strlcpy(iohook[hk].pio_ruleset,
+ link->link_hook->hook_rulname,
+ sizeof(iohook[0].pio_ruleset));
+ hk++;
}
- ph->ph_nhooks++;
+ CK_STAILQ_FOREACH(link, &head->head_out, link_chain) {
+ strlcpy(iohook[hk].pio_module,
+ link->link_hook->hook_modname,
+ sizeof(iohook[0].pio_module));
+ strlcpy(iohook[hk].pio_ruleset,
+ link->link_hook->hook_rulname,
+ sizeof(iohook[0].pio_ruleset));
+ hk++;
+ }
+ hd++;
}
- PFIL_WUNLOCK(ph);
- return (0);
-locked_error:
- PFIL_WUNLOCK(ph);
-error:
- if (pfh1 != NULL)
- free(pfh1, M_IFADDR);
- if (pfh2 != NULL)
- free(pfh2, M_IFADDR);
- return (err);
-}
+ PFIL_UNLOCK();
-/*
- * pfil_remove_hook_flags removes a specific function from the packet filter hook
- * chain.
- */
-int
-pfil_remove_hook_flags(pfil_func_flags_t func, void *arg, int flags,
- struct pfil_head *ph)
-{
- return (pfil_remove_hook((pfil_func_t)func, arg, flags, ph));
-}
+ error = copyout(iohead, req->pio_heads,
+ sizeof(*iohead) * min(hd, req->pio_nheads));
+ if (error == 0)
+ error = copyout(iohook, req->pio_hooks,
+ sizeof(*iohook) * min(req->pio_nhooks, hk));
-/*
- * pfil_remove_hook removes a specific function from the packet filter hook
- * chain.
- */
-int
-pfil_remove_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph)
-{
- int err = 0;
+ req->pio_nheads = hd;
+ req->pio_nhooks = hk;
- PFIL_WLOCK(ph);
- if (flags & PFIL_IN) {
- err = pfil_chain_remove(&ph->ph_in, func, arg);
- if (err == 0)
- ph->ph_nhooks--;
- }
- if ((err == 0) && (flags & PFIL_OUT)) {
- err = pfil_chain_remove(&ph->ph_out, func, arg);
- if (err == 0)
- ph->ph_nhooks--;
- }
- PFIL_WUNLOCK(ph);
- return (err);
-}
+ free(iohead, M_TEMP);
+ free(iohook, M_TEMP);
-/*
- * Internal: Add a new pfil hook into a hook chain.
- */
-static int
-pfil_chain_add(pfil_chain_t *chain, struct packet_filter_hook *pfh1, int flags)
-{
- struct packet_filter_hook *pfh;
-
- /*
- * First make sure the hook is not already there.
- */
- TAILQ_FOREACH(pfh, chain, pfil_chain)
- if (((pfh->pfil_func != NULL && pfh->pfil_func == pfh1->pfil_func) ||
- (pfh->pfil_func_flags != NULL &&
- pfh->pfil_func_flags == pfh1->pfil_func_flags)) &&
- pfh->pfil_arg == pfh1->pfil_arg)
- return (EEXIST);
-
- /*
- * Insert the input list in reverse order of the output list so that
- * the same path is followed in or out of the kernel.
- */
- if (flags & PFIL_IN)
- TAILQ_INSERT_HEAD(chain, pfh1, pfil_chain);
- else
- TAILQ_INSERT_TAIL(chain, pfh1, pfil_chain);
- return (0);
+ return (error);
}
-/*
- * Internal: Remove a pfil hook from a hook chain.
- */
static int
-pfil_chain_remove(pfil_chain_t *chain, void *func, void *arg)
+pfilioc_listhooks(struct pfilioc_list *req)
{
- struct packet_filter_hook *pfh;
-
- TAILQ_FOREACH(pfh, chain, pfil_chain)
- if ((pfh->pfil_func == func || pfh->pfil_func_flags == func) &&
- pfh->pfil_arg == arg) {
- TAILQ_REMOVE(chain, pfh, pfil_chain);
- free(pfh, M_IFADDR);
- return (0);
+ struct pfil_hook *hook;
+ struct pfilioc_hook *iohook;
+ u_int nhooks, hk;
+ int error;
+
+ PFIL_LOCK();
+restart:
+ nhooks = 0;
+ LIST_FOREACH(hook, &V_pfil_hook_list, hook_list)
+ nhooks++;
+ PFIL_UNLOCK();
+
+ if (req->pio_nhooks < nhooks) {
+ req->pio_nhooks = nhooks;
+ return (0);
+ }
+
+ iohook = malloc(sizeof(*iohook) * nhooks, M_TEMP, M_WAITOK);
+
+ hk = 0;
+ PFIL_LOCK();
+ LIST_FOREACH(hook, &V_pfil_hook_list, hook_list) {
+ if (hk + 1 > nhooks) {
+ /* Configuration changed during malloc(). */
+ free(iohook, M_TEMP);
+ goto restart;
}
- return (ENOENT);
-}
+ strlcpy(iohook[hk].pio_module, hook->hook_modname,
+ sizeof(iohook[0].pio_module));
+ strlcpy(iohook[hk].pio_ruleset, hook->hook_rulname,
+ sizeof(iohook[0].pio_ruleset));
+ iohook[hk].pio_type = hook->hook_type;
+ iohook[hk].pio_flags = hook->hook_flags;
+ hk++;
+ }
+ PFIL_UNLOCK();
-/*
- * Stuff that must be initialized for every instance (including the first of
- * course).
- */
-static void
-vnet_pfil_init(const void *unused __unused)
-{
+ error = copyout(iohook, req->pio_hooks,
+ sizeof(*iohook) * min(req->pio_nhooks, hk));
+ req->pio_nhooks = hk;
+ free(iohook, M_TEMP);
- LIST_INIT(&V_pfil_head_list);
- PFIL_LOCK_INIT_REAL(&V_pfil_lock, "shared");
+ return (error);
}
-/*
- * Called for the removal of each instance.
- */
-static void
-vnet_pfil_uninit(const void *unused __unused)
+static int
+pfilioc_link(struct pfilioc_link *req)
{
+ struct pfil_link_args args;
- KASSERT(LIST_EMPTY(&V_pfil_head_list),
- ("%s: pfil_head_list %p not empty", __func__, &V_pfil_head_list));
- PFIL_LOCK_DESTROY_REAL(&V_pfil_lock);
-}
+ if (req->pio_flags & ~(PFIL_IN | PFIL_OUT | PFIL_UNLINK | PFIL_APPEND))
+ return (EINVAL);
-/*
- * Starting up.
- *
- * VNET_SYSINIT is called for each existing vnet and each new vnet.
- * Make sure the pfil bits are first before any possible subsystem which
- * might piggyback on the SI_SUB_PROTO_PFIL.
- */
-VNET_SYSINIT(vnet_pfil_init, SI_SUB_PROTO_PFIL, SI_ORDER_FIRST,
- vnet_pfil_init, NULL);
-
-/*
- * Closing up shop. These are done in REVERSE ORDER. Not called on reboot.
- *
- * VNET_SYSUNINIT is called for each exiting vnet as it exits.
- */
-VNET_SYSUNINIT(vnet_pfil_uninit, SI_SUB_PROTO_PFIL, SI_ORDER_FIRST,
- vnet_pfil_uninit, NULL);
+ args.pa_version = PFIL_VERSION;
+ args.pa_flags = req->pio_flags;
+ args.pa_headname = req->pio_name;
+ args.pa_modname = req->pio_module;
+ args.pa_rulname = req->pio_ruleset;
+
+ return (pfil_link(&args));
+}
diff --git a/freebsd/sys/net/pfil.h b/freebsd/sys/net/pfil.h
index 8fdaf5a6..da045b30 100644
--- a/freebsd/sys/net/pfil.h
+++ b/freebsd/sys/net/pfil.h
@@ -4,6 +4,7 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
+ * Copyright (c) 2019 Gleb Smirnoff <glebius@FreeBSD.org>
* Copyright (c) 1996 Matthew R. Green
* All rights reserved.
*
@@ -34,98 +35,180 @@
#ifndef _NET_PFIL_H_
#define _NET_PFIL_H_
-#include <sys/systm.h>
-#include <sys/queue.h>
-#include <sys/_lock.h>
-#include <sys/_mutex.h>
-#include <sys/lock.h>
-#include <sys/rmlock.h>
-#include <net/vnet.h>
+#include <sys/ioccom.h>
+enum pfil_types {
+ PFIL_TYPE_IP4,
+ PFIL_TYPE_IP6,
+ PFIL_TYPE_ETHERNET,
+};
+
+#define MAXPFILNAME 64
+
+struct pfilioc_head {
+ char pio_name[MAXPFILNAME];
+ int pio_nhooksin;
+ int pio_nhooksout;
+ enum pfil_types pio_type;
+};
+
+struct pfilioc_hook {
+ char pio_module[MAXPFILNAME];
+ char pio_ruleset[MAXPFILNAME];
+ int pio_flags;
+ enum pfil_types pio_type;
+};
+
+struct pfilioc_list {
+ u_int pio_nheads;
+ u_int pio_nhooks;
+ struct pfilioc_head *pio_heads;
+ struct pfilioc_hook *pio_hooks;
+};
+
+struct pfilioc_link {
+ char pio_name[MAXPFILNAME];
+ char pio_module[MAXPFILNAME];
+ char pio_ruleset[MAXPFILNAME];
+ int pio_flags;
+};
+
+#define PFILDEV "pfil"
+#define PFILIOC_LISTHEADS _IOWR('P', 1, struct pfilioc_list)
+#define PFILIOC_LISTHOOKS _IOWR('P', 2, struct pfilioc_list)
+#define PFILIOC_LINK _IOW('P', 3, struct pfilioc_link)
+
+#define PFIL_IN 0x00010000
+#define PFIL_OUT 0x00020000
+#define PFIL_FWD 0x00040000
+#define PFIL_DIR(f) ((f) & (PFIL_IN|PFIL_OUT))
+#define PFIL_MEMPTR 0x00080000
+#define PFIL_HEADPTR 0x00100000
+#define PFIL_HOOKPTR 0x00200000
+#define PFIL_APPEND 0x00400000
+#define PFIL_UNLINK 0x00800000
+#define PFIL_LENMASK 0x0000ffff
+#define PFIL_LENGTH(f) ((f) & PFIL_LENMASK)
+
+#ifdef _KERNEL
struct mbuf;
struct ifnet;
struct inpcb;
-typedef int (*pfil_func_t)(void *, struct mbuf **, struct ifnet *, int,
- struct inpcb *);
-typedef int (*pfil_func_flags_t)(void *, struct mbuf **, struct ifnet *,
- int, int, struct inpcb *);
+typedef union {
+ struct mbuf **m;
+ void *mem;
+ uintptr_t __ui;
+} pfil_packet_t __attribute__((__transparent_union__));
+
+static inline pfil_packet_t
+pfil_packet_align(pfil_packet_t p)
+{
+
+ return ((pfil_packet_t ) (((uintptr_t)(p).mem +
+ (_Alignof(void *) - 1)) & - _Alignof(void *)));
+}
+
+static inline struct mbuf *
+pfil_mem2mbuf(void *v)
+{
+
+ return (*(struct mbuf **) (((uintptr_t)(v) +
+ (_Alignof(void *) - 1)) & - _Alignof(void *)));
+}
+
+typedef enum {
+ PFIL_PASS = 0,
+ PFIL_DROPPED,
+ PFIL_CONSUMED,
+ PFIL_REALLOCED,
+} pfil_return_t;
+
+typedef pfil_return_t (*pfil_func_t)(pfil_packet_t, struct ifnet *, int,
+ void *, struct inpcb *);
+/*
+ * A pfil head is created by a packet intercept point.
+ *
+ * A pfil hook is created by a packet filter.
+ *
+ * Hooks are chained on heads. Historically some hooking happens
+ * automatically, e.g. ipfw(4), pf(4) and ipfilter(4) would register
+ * theirselves on IPv4 and IPv6 input/output.
+ */
+
+typedef struct pfil_hook * pfil_hook_t;
+typedef struct pfil_head * pfil_head_t;
/*
- * The packet filter hooks are designed for anything to call them to
- * possibly intercept the packet. Multiple filter hooks are chained
- * together and after each other in the specified order.
+ * Give us a chance to modify pfil_xxx_args structures in future.
*/
-struct packet_filter_hook {
- TAILQ_ENTRY(packet_filter_hook) pfil_chain;
- pfil_func_t pfil_func;
- pfil_func_flags_t pfil_func_flags;
- void *pfil_arg;
+#define PFIL_VERSION 1
+
+/* Argument structure used by packet filters to register themselves. */
+struct pfil_hook_args {
+ int pa_version;
+ int pa_flags;
+ enum pfil_types pa_type;
+ pfil_func_t pa_func;
+ void *pa_ruleset;
+ const char *pa_modname;
+ const char *pa_rulname;
};
-#define PFIL_IN 0x00000001
-#define PFIL_OUT 0x00000002
-#define PFIL_WAITOK 0x00000004
-#define PFIL_FWD 0x00000008
-#define PFIL_ALL (PFIL_IN|PFIL_OUT)
+/* Public functions for pfil hook management by packet filters. */
+pfil_hook_t pfil_add_hook(struct pfil_hook_args *);
+void pfil_remove_hook(pfil_hook_t);
-typedef TAILQ_HEAD(pfil_chain, packet_filter_hook) pfil_chain_t;
+/* Argument structure used by ioctl() and packet filters to set filters. */
+struct pfil_link_args {
+ int pa_version;
+ int pa_flags;
+ union {
+ const char *pa_headname;
+ pfil_head_t pa_head;
+ };
+ union {
+ struct {
+ const char *pa_modname;
+ const char *pa_rulname;
+ };
+ pfil_hook_t pa_hook;
+ };
+};
-#define PFIL_TYPE_AF 1 /* key is AF_* type */
-#define PFIL_TYPE_IFNET 2 /* key is ifnet pointer */
+/* Public function to configure filter chains. Used by ioctl() and filters. */
+int pfil_link(struct pfil_link_args *);
-#define PFIL_FLAG_PRIVATE_LOCK 0x01 /* Personal lock instead of global */
+/* Argument structure used by inspection points to register themselves. */
+struct pfil_head_args {
+ int pa_version;
+ int pa_flags;
+ enum pfil_types pa_type;
+ const char *pa_headname;
+};
+/* Public functions for pfil head management by inspection points. */
+pfil_head_t pfil_head_register(struct pfil_head_args *);
+void pfil_head_unregister(pfil_head_t);
+
+/* Public functions to run the packet inspection by inspection points. */
+int pfil_run_hooks(struct pfil_head *, pfil_packet_t, struct ifnet *, int,
+ struct inpcb *inp);
/*
- * A pfil head is created by each protocol or packet intercept point.
- * For packet is then run through the hook chain for inspection.
+ * Minimally exposed structure to avoid function call in case of absence
+ * of any filters by protocols and macros to do the check.
*/
-struct pfil_head {
- pfil_chain_t ph_in;
- pfil_chain_t ph_out;
- int ph_type;
- int ph_nhooks;
-#if defined( __linux__ ) || defined( _WIN32 )
- rwlock_t ph_mtx;
-#else
- struct rmlock *ph_plock; /* Pointer to the used lock */
- struct rmlock ph_lock; /* Private lock storage */
- int flags;
-#endif
- union {
- u_long phu_val;
- void *phu_ptr;
- } ph_un;
-#define ph_af ph_un.phu_val
-#define ph_ifnet ph_un.phu_ptr
- LIST_ENTRY(pfil_head) ph_list;
+struct _pfil_head {
+ int head_nhooksin;
+ int head_nhooksout;
};
+#define PFIL_HOOKED_IN(p) (((struct _pfil_head *)(p))->head_nhooksin > 0)
+#define PFIL_HOOKED_OUT(p) (((struct _pfil_head *)(p))->head_nhooksout > 0)
-VNET_DECLARE(struct rmlock, pfil_lock);
-#define V_pfil_lock VNET(pfil_lock)
-
-/* Public functions for pfil hook management by packet filters. */
-struct pfil_head *pfil_head_get(int, u_long);
-int pfil_add_hook_flags(pfil_func_flags_t, void *, int, struct pfil_head *);
-int pfil_add_hook(pfil_func_t, void *, int, struct pfil_head *);
-int pfil_remove_hook_flags(pfil_func_flags_t, void *, int, struct pfil_head *);
-int pfil_remove_hook(pfil_func_t, void *, int, struct pfil_head *);
-#define PFIL_HOOKED(p) ((p)->ph_nhooks > 0)
-
-/* Public functions to run the packet inspection by protocols. */
-int pfil_run_hooks(struct pfil_head *, struct mbuf **, struct ifnet *, int,
- int, struct inpcb *inp);
-
-/* Public functions for pfil head management by protocols. */
-int pfil_head_register(struct pfil_head *);
-int pfil_head_unregister(struct pfil_head *);
-
-/* Public pfil locking functions for self managed locks by packet filters. */
-int pfil_try_rlock(struct pfil_head *, struct rm_priotracker *);
-void pfil_rlock(struct pfil_head *, struct rm_priotracker *);
-void pfil_runlock(struct pfil_head *, struct rm_priotracker *);
-void pfil_wlock(struct pfil_head *);
-void pfil_wunlock(struct pfil_head *);
-int pfil_wowned(struct pfil_head *ph);
+/*
+ * Alloc mbuf to be used instead of memory pointer.
+ */
+int pfil_realloc(pfil_packet_t *, int, struct ifnet *);
+#endif /* _KERNEL */
#endif /* _NET_PFIL_H_ */
diff --git a/freebsd/sys/net/pfvar.h b/freebsd/sys/net/pfvar.h
index 2924c06d..bfa7e773 100644
--- a/freebsd/sys/net/pfvar.h
+++ b/freebsd/sys/net/pfvar.h
@@ -41,6 +41,7 @@
#include <sys/cpuset.h>
#include <sys/malloc.h>
#include <sys/refcount.h>
+#include <sys/sysctl.h>
#include <sys/lock.h>
#include <sys/rmlock.h>
#include <sys/tree.h>
@@ -95,6 +96,9 @@ struct pf_addr_wrap {
#ifdef _KERNEL
+SYSCTL_DECL(_net_pf);
+MALLOC_DECLARE(M_PFHASH);
+
struct pfi_dynaddr {
TAILQ_ENTRY(pfi_dynaddr) entry;
struct pf_addr pfid_addr4;
@@ -1017,6 +1021,17 @@ struct pfr_tstats {
int pfrts_cnt;
int pfrts_refcnt[PFR_REFCNT_MAX];
};
+
+struct pfr_ktstats {
+ struct pfr_table pfrts_t;
+ counter_u64_t pfrkts_packets[PFR_DIR_MAX][PFR_OP_TABLE_MAX];
+ counter_u64_t pfrkts_bytes[PFR_DIR_MAX][PFR_OP_TABLE_MAX];
+ counter_u64_t pfrkts_match;
+ counter_u64_t pfrkts_nomatch;
+ long pfrkts_tzero;
+ int pfrkts_cnt;
+ int pfrkts_refcnt[PFR_REFCNT_MAX];
+};
#define pfrts_name pfrts_t.pfrt_name
#define pfrts_flags pfrts_t.pfrt_flags
@@ -1030,8 +1045,9 @@ union sockaddr_union {
#endif /* _SOCKADDR_UNION_DEFINED */
struct pfr_kcounters {
- u_int64_t pfrkc_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
- u_int64_t pfrkc_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+ counter_u64_t pfrkc_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+ counter_u64_t pfrkc_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+ long pfrkc_tzero;
};
SLIST_HEAD(pfr_kentryworkq, pfr_kentry);
@@ -1039,8 +1055,7 @@ struct pfr_kentry {
struct radix_node pfrke_node[2];
union sockaddr_union pfrke_sa;
SLIST_ENTRY(pfr_kentry) pfrke_workq;
- struct pfr_kcounters *pfrke_counters;
- long pfrke_tzero;
+ struct pfr_kcounters pfrke_counters;
u_int8_t pfrke_af;
u_int8_t pfrke_net;
u_int8_t pfrke_not;
@@ -1050,7 +1065,7 @@ struct pfr_kentry {
SLIST_HEAD(pfr_ktableworkq, pfr_ktable);
RB_HEAD(pfr_ktablehead, pfr_ktable);
struct pfr_ktable {
- struct pfr_tstats pfrkt_ts;
+ struct pfr_ktstats pfrkt_kts;
RB_ENTRY(pfr_ktable) pfrkt_tree;
SLIST_ENTRY(pfr_ktable) pfrkt_workq;
struct radix_node_head *pfrkt_ip4;
@@ -1061,18 +1076,18 @@ struct pfr_ktable {
long pfrkt_larg;
int pfrkt_nflags;
};
-#define pfrkt_t pfrkt_ts.pfrts_t
+#define pfrkt_t pfrkt_kts.pfrts_t
#define pfrkt_name pfrkt_t.pfrt_name
#define pfrkt_anchor pfrkt_t.pfrt_anchor
#define pfrkt_ruleset pfrkt_t.pfrt_ruleset
#define pfrkt_flags pfrkt_t.pfrt_flags
-#define pfrkt_cnt pfrkt_ts.pfrts_cnt
-#define pfrkt_refcnt pfrkt_ts.pfrts_refcnt
-#define pfrkt_packets pfrkt_ts.pfrts_packets
-#define pfrkt_bytes pfrkt_ts.pfrts_bytes
-#define pfrkt_match pfrkt_ts.pfrts_match
-#define pfrkt_nomatch pfrkt_ts.pfrts_nomatch
-#define pfrkt_tzero pfrkt_ts.pfrts_tzero
+#define pfrkt_cnt pfrkt_kts.pfrkts_cnt
+#define pfrkt_refcnt pfrkt_kts.pfrkts_refcnt
+#define pfrkt_packets pfrkt_kts.pfrkts_packets
+#define pfrkt_bytes pfrkt_kts.pfrkts_bytes
+#define pfrkt_match pfrkt_kts.pfrkts_match
+#define pfrkt_nomatch pfrkt_kts.pfrkts_nomatch
+#define pfrkt_tzero pfrkt_kts.pfrkts_tzero
/* keep synced with pfi_kif, used in RB_FIND */
struct pfi_kif_cmp {
@@ -1601,7 +1616,7 @@ VNET_DECLARE(uint64_t, pf_stateid[MAXCPU]);
#define V_pf_stateid VNET(pf_stateid)
TAILQ_HEAD(pf_altqqueue, pf_altq);
-VNET_DECLARE(struct pf_altqqueue, pf_altqs[2]);
+VNET_DECLARE(struct pf_altqqueue, pf_altqs[4]);
#define V_pf_altqs VNET(pf_altqs)
VNET_DECLARE(struct pf_palist, pf_pabuf);
#define V_pf_pabuf VNET(pf_pabuf)
@@ -1616,8 +1631,12 @@ VNET_DECLARE(u_int32_t, ticket_pabuf);
#define V_ticket_pabuf VNET(ticket_pabuf)
VNET_DECLARE(struct pf_altqqueue *, pf_altqs_active);
#define V_pf_altqs_active VNET(pf_altqs_active)
+VNET_DECLARE(struct pf_altqqueue *, pf_altq_ifs_active);
+#define V_pf_altq_ifs_active VNET(pf_altq_ifs_active)
VNET_DECLARE(struct pf_altqqueue *, pf_altqs_inactive);
#define V_pf_altqs_inactive VNET(pf_altqs_inactive)
+VNET_DECLARE(struct pf_altqqueue *, pf_altq_ifs_inactive);
+#define V_pf_altq_ifs_inactive VNET(pf_altq_ifs_inactive)
VNET_DECLARE(struct pf_rulequeue, pf_unlinked_rules);
#define V_pf_unlinked_rules VNET(pf_unlinked_rules)
diff --git a/freebsd/sys/net/route.c b/freebsd/sys/net/route.c
index 3cd909c1..36f3bf41 100644
--- a/freebsd/sys/net/route.c
+++ b/freebsd/sys/net/route.c
@@ -625,11 +625,12 @@ rtredirect_fib(struct sockaddr *dst,
int error = 0;
short *stat = NULL;
struct rt_addrinfo info;
+ struct epoch_tracker et;
struct ifaddr *ifa;
struct rib_head *rnh;
ifa = NULL;
- NET_EPOCH_ENTER();
+ NET_EPOCH_ENTER(et);
rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
if (rnh == NULL) {
error = EAFNOSUPPORT;
@@ -724,7 +725,7 @@ done:
if (rt)
RTFREE_LOCKED(rt);
out:
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
if (error)
V_rtstat.rts_badredirect++;
else if (stat != NULL)
@@ -1307,11 +1308,14 @@ rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info)
/*
* Look up rt_addrinfo for a specific fib. Note that if rti_ifa is defined,
* it will be referenced so the caller must free it.
+ *
+ * Assume basic consistency checks are executed by callers:
+ * RTAX_DST exists, if RTF_GATEWAY is set, RTAX_GATEWAY exists as well.
*/
int
rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
{
- struct ifaddr *ifa;
+ struct epoch_tracker et;
int needref, error;
/*
@@ -1320,22 +1324,55 @@ rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
*/
error = 0;
needref = (info->rti_ifa == NULL);
- NET_EPOCH_ENTER();
+ NET_EPOCH_ENTER(et);
+
+ /* If we have interface specified by the ifindex in the address, use it */
if (info->rti_ifp == NULL && ifpaddr != NULL &&
- ifpaddr->sa_family == AF_LINK &&
- (ifa = ifa_ifwithnet(ifpaddr, 0, fibnum)) != NULL) {
- info->rti_ifp = ifa->ifa_ifp;
+ ifpaddr->sa_family == AF_LINK) {
+ const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)ifpaddr;
+ if (sdl->sdl_index != 0)
+ info->rti_ifp = ifnet_byindex_locked(sdl->sdl_index);
}
+ /*
+ * If we have source address specified, try to find it
+ * TODO: avoid enumerating all ifas on all interfaces.
+ */
if (info->rti_ifa == NULL && ifaaddr != NULL)
info->rti_ifa = ifa_ifwithaddr(ifaaddr);
if (info->rti_ifa == NULL) {
struct sockaddr *sa;
- sa = ifaaddr != NULL ? ifaaddr :
- (gateway != NULL ? gateway : dst);
- if (sa != NULL && info->rti_ifp != NULL)
+ /*
+ * Most common use case for the userland-supplied routes.
+ *
+ * Choose sockaddr to select ifa.
+ * -- if ifp is set --
+ * Order of preference:
+ * 1) IFA address
+ * 2) gateway address
+ * Note: for interface routes link-level gateway address
+ * is specified to indicate the interface index without
+ * specifying RTF_GATEWAY. In this case, ignore gateway
+ * Note: gateway AF may be different from dst AF. In this case,
+ * ignore gateway
+ * 3) final destination.
+ * 4) if all of these fails, try to get at least link-level ifa.
+ * -- else --
+ * try to lookup gateway or dst in the routing table to get ifa
+ */
+ if (info->rti_info[RTAX_IFA] != NULL)
+ sa = info->rti_info[RTAX_IFA];
+ else if ((info->rti_flags & RTF_GATEWAY) != 0 &&
+ gateway->sa_family == dst->sa_family)
+ sa = gateway;
+ else
+ sa = dst;
+ if (info->rti_ifp != NULL) {
info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp);
- else if (dst != NULL && gateway != NULL)
+ /* Case 4 */
+ if (info->rti_ifa == NULL && gateway != NULL)
+ info->rti_ifa = ifaof_ifpforaddr(gateway, info->rti_ifp);
+ } else if (dst != NULL && gateway != NULL)
info->rti_ifa = ifa_ifwithroute(flags, dst, gateway,
fibnum);
else if (sa != NULL)
@@ -1348,7 +1385,7 @@ rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
ifa_ref(info->rti_ifa);
} else
error = ENETUNREACH;
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
return (error);
}
@@ -1585,6 +1622,8 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
switch (req) {
case RTM_DELETE:
if (netmask) {
+ if (dst->sa_len > sizeof(mdst))
+ return (EINVAL);
rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask);
dst = (struct sockaddr *)&mdst;
}
@@ -1990,7 +2029,7 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
char tempbuf[_SOCKADDR_TMPSIZE];
int didwork = 0;
int a_failure = 0;
- static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
+ struct sockaddr_dl *sdl = NULL;
struct rib_head *rnh;
if (flags & RTF_HOST) {
@@ -2045,7 +2084,14 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
rt_maskedcopy(dst, (struct sockaddr *)tempbuf, netmask);
dst = (struct sockaddr *)tempbuf;
}
- }
+ } else if (cmd == RTM_ADD) {
+ sdl = (struct sockaddr_dl *)tempbuf;
+ bzero(sdl, sizeof(struct sockaddr_dl));
+ sdl->sdl_family = AF_LINK;
+ sdl->sdl_len = sizeof(struct sockaddr_dl);
+ sdl->sdl_type = ifa->ifa_ifp->if_type;
+ sdl->sdl_index = ifa->ifa_ifp->if_index;
+ }
/*
* Now go through all the requested tables (fibs) and do the
* requested action. Realistically, this will either be fib 0
@@ -2108,8 +2154,7 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
* doing this for compatibility reasons
*/
if (cmd == RTM_ADD)
- info.rti_info[RTAX_GATEWAY] =
- (struct sockaddr *)&null_sdl;
+ info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)sdl;
else
info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
info.rti_info[RTAX_NETMASK] = netmask;
@@ -2136,15 +2181,6 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
rt->rt_ifa = ifa;
}
#endif
- /*
- * doing this for compatibility reasons
- */
- if (cmd == RTM_ADD) {
- ((struct sockaddr_dl *)rt->rt_gateway)->sdl_type =
- rt->rt_ifp->if_type;
- ((struct sockaddr_dl *)rt->rt_gateway)->sdl_index =
- rt->rt_ifp->if_index;
- }
RT_ADDREF(rt);
RT_UNLOCK(rt);
rt_newaddrmsg_fib(cmd, ifa, error, rt, fibnum);
diff --git a/freebsd/sys/net/route.h b/freebsd/sys/net/route.h
index c4333838..bdeb9869 100644
--- a/freebsd/sys/net/route.h
+++ b/freebsd/sys/net/route.h
@@ -210,6 +210,7 @@ struct rtentry {
#define NHF_DEFAULT 0x0080 /* Default route */
#define NHF_BROADCAST 0x0100 /* RTF_BROADCAST */
#define NHF_GATEWAY 0x0200 /* RTF_GATEWAY */
+#define NHF_HOST 0x0400 /* RTF_HOST */
/* Nexthop request flags */
#define NHR_IFAIF 0x01 /* Return ifa_ifp interface */
diff --git a/freebsd/sys/net/route_var.h b/freebsd/sys/net/route_var.h
index 9d0d1931..db3db4e3 100644
--- a/freebsd/sys/net/route_var.h
+++ b/freebsd/sys/net/route_var.h
@@ -67,6 +67,7 @@ fib_rte_to_nh_flags(int rt_flags)
uint16_t res;
res = (rt_flags & RTF_REJECT) ? NHF_REJECT : 0;
+ res |= (rt_flags & RTF_HOST) ? NHF_HOST : 0;
res |= (rt_flags & RTF_BLACKHOLE) ? NHF_BLACKHOLE : 0;
res |= (rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) ? NHF_REDIRECT : 0;
res |= (rt_flags & RTF_BROADCAST) ? NHF_BROADCAST : 0;
diff --git a/freebsd/sys/net/rtsock.c b/freebsd/sys/net/rtsock.c
index e1b87095..6c457a9d 100644
--- a/freebsd/sys/net/rtsock.c
+++ b/freebsd/sys/net/rtsock.c
@@ -33,6 +33,7 @@
* @(#)rtsock.c 8.7 (Berkeley) 10/12/95
* $FreeBSD$
*/
+#include <rtems/bsd/local/opt_ddb.h>
#include <rtems/bsd/local/opt_mpath.h>
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
@@ -55,6 +56,11 @@
#include <sys/sysctl.h>
#include <sys/systm.h>
+#ifdef DDB
+#include <ddb/ddb.h>
+#include <ddb/db_lex.h>
+#endif
+
#include <net/if.h>
#include <net/if_var.h>
#include <net/if_dl.h>
@@ -448,6 +454,9 @@ static int
rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
struct rtentry *rt, union sockaddr_union *saun, struct ucred *cred)
{
+#if defined(INET) || defined(INET6)
+ struct epoch_tracker et;
+#endif
/* First, see if the returned address is part of the jail. */
if (prison_if(cred, rt->rt_ifa->ifa_addr) == 0) {
@@ -468,7 +477,7 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
* Try to find an address on the given outgoing interface
* that belongs to the jail.
*/
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
struct sockaddr *sa;
sa = ifa->ifa_addr;
@@ -480,7 +489,7 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
break;
}
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
if (!found) {
/*
* As a last resort return the 'default' jail address.
@@ -510,7 +519,7 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
* Try to find an address on the given outgoing interface
* that belongs to the jail.
*/
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
struct sockaddr *sa;
sa = ifa->ifa_addr;
@@ -523,7 +532,7 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
break;
}
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
if (!found) {
/*
* As a last resort return the 'default' jail address.
@@ -627,6 +636,8 @@ route_output(struct mbuf *m, struct socket *so, ...)
if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info))
senderr(EINVAL);
+ if (rtm->rtm_flags & RTF_RNH_LOCKED)
+ senderr(EINVAL);
info.rti_flags = rtm->rtm_flags;
if (info.rti_info[RTAX_DST] == NULL ||
info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
@@ -798,16 +809,17 @@ route_output(struct mbuf *m, struct socket *so, ...)
if (rt->rt_ifp != NULL &&
rt->rt_ifp->if_type == IFT_PROPVIRTUAL) {
+ struct epoch_tracker et;
struct ifaddr *ifa;
- NET_EPOCH_ENTER();
+ NET_EPOCH_ENTER(et);
ifa = ifa_ifwithnet(info.rti_info[RTAX_DST], 1,
RT_ALL_FIBS);
if (ifa != NULL)
rt_maskedcopy(ifa->ifa_addr,
&laddr,
ifa->ifa_netmask);
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
} else
rt_maskedcopy(rt->rt_ifa->ifa_addr,
&laddr,
@@ -1571,7 +1583,7 @@ sysctl_dumpentry(struct radix_node *rn, void *vw)
struct rt_addrinfo info;
struct sockaddr_storage ss;
- IFNET_RLOCK_NOSLEEP_ASSERT();
+ NET_EPOCH_ASSERT();
if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
return 0;
@@ -1765,7 +1777,7 @@ sysctl_iflist(int af, struct walkarg *w)
bzero((caddr_t)&info, sizeof(info));
bzero(&ifd, sizeof(ifd));
- NET_EPOCH_ENTER_ET(et);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if (w->w_arg && w->w_arg != ifp->if_index)
continue;
@@ -1815,7 +1827,7 @@ sysctl_iflist(int af, struct walkarg *w)
info.rti_info[RTAX_BRD] = NULL;
}
done:
- NET_EPOCH_EXIT_ET(et);
+ NET_EPOCH_EXIT(et);
return (error);
}
@@ -1823,6 +1835,7 @@ static int
sysctl_ifmalist(int af, struct walkarg *w)
{
struct rt_addrinfo info;
+ struct epoch_tracker et;
struct ifaddr *ifa;
struct ifmultiaddr *ifma;
struct ifnet *ifp;
@@ -1831,13 +1844,12 @@ sysctl_ifmalist(int af, struct walkarg *w)
error = 0;
bzero((caddr_t)&info, sizeof(info));
- IFNET_RLOCK_NOSLEEP();
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if (w->w_arg && w->w_arg != ifp->if_index)
continue;
ifa = ifp->if_addr;
info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL;
- IF_ADDR_RLOCK(ifp);
CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
if (af && af != ifma->ifma_addr->sa_family)
continue;
@@ -1864,11 +1876,10 @@ sysctl_ifmalist(int af, struct walkarg *w)
break;
}
}
- IF_ADDR_RUNLOCK(ifp);
if (error != 0)
break;
}
- IFNET_RUNLOCK_NOSLEEP();
+ NET_EPOCH_EXIT(et);
return (error);
}
@@ -1955,11 +1966,13 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS)
for (error = 0; error == 0 && i <= lim; i++) {
rnh = rt_tables_get_rnh(fib, i);
if (rnh != NULL) {
+ struct epoch_tracker et;
+
RIB_RLOCK(rnh);
- IFNET_RLOCK_NOSLEEP();
+ NET_EPOCH_ENTER(et);
error = rnh->rnh_walktree(&rnh->head,
sysctl_dumpentry, &w);
- IFNET_RUNLOCK_NOSLEEP();
+ NET_EPOCH_EXIT(et);
RIB_RUNLOCK(rnh);
} else if (af != 0)
error = EAFNOSUPPORT;
@@ -2008,3 +2021,408 @@ static struct domain routedomain = {
};
VNET_DOMAIN_SET(route);
+
+#ifdef DDB
+/*
+ * Unfortunately, RTF_ values are expressed as raw masks rather than powers of
+ * 2, so we cannot use them as nice C99 initializer indices below.
+ */
+static const char * const rtf_flag_strings[] = {
+ "UP",
+ "GATEWAY",
+ "HOST",
+ "REJECT",
+ "DYNAMIC",
+ "MODIFIED",
+ "DONE",
+ "UNUSED_0x80",
+ "UNUSED_0x100",
+ "XRESOLVE",
+ "LLDATA",
+ "STATIC",
+ "BLACKHOLE",
+ "UNUSED_0x2000",
+ "PROTO2",
+ "PROTO1",
+ "UNUSED_0x10000",
+ "UNUSED_0x20000",
+ "PROTO3",
+ "FIXEDMTU",
+ "PINNED",
+ "LOCAL",
+ "BROADCAST",
+ "MULTICAST",
+ /* Big gap. */
+ [28] = "STICKY",
+ [30] = "RNH_LOCKED",
+ [31] = "GWFLAG_COMPAT",
+};
+
+static const char * __pure
+rt_flag_name(unsigned idx)
+{
+ if (idx >= nitems(rtf_flag_strings))
+ return ("INVALID_FLAG");
+ if (rtf_flag_strings[idx] == NULL)
+ return ("UNKNOWN");
+ return (rtf_flag_strings[idx]);
+}
+
+static void
+rt_dumpaddr_ddb(const char *name, const struct sockaddr *sa)
+{
+ char buf[INET6_ADDRSTRLEN], *res;
+
+ res = NULL;
+ if (sa == NULL)
+ res = "NULL";
+ else if (sa->sa_family == AF_INET) {
+ res = inet_ntop(AF_INET,
+ &((const struct sockaddr_in *)sa)->sin_addr,
+ buf, sizeof(buf));
+ } else if (sa->sa_family == AF_INET6) {
+ res = inet_ntop(AF_INET6,
+ &((const struct sockaddr_in6 *)sa)->sin6_addr,
+ buf, sizeof(buf));
+ } else if (sa->sa_family == AF_LINK) {
+ res = "on link";
+ }
+
+ if (res != NULL) {
+ db_printf("%s <%s> ", name, res);
+ return;
+ }
+
+ db_printf("%s <af:%d> ", name, sa->sa_family);
+}
+
+static int
+rt_dumpentry_ddb(struct radix_node *rn, void *arg __unused)
+{
+ struct sockaddr_storage ss;
+ struct rtentry *rt;
+ int flags, idx;
+
+ /* If RNTORT is important, put it in a header. */
+ rt = (void *)rn;
+
+ rt_dumpaddr_ddb("dst", rt_key(rt));
+ rt_dumpaddr_ddb("gateway", rt->rt_gateway);
+ rt_dumpaddr_ddb("netmask", rtsock_fix_netmask(rt_key(rt), rt_mask(rt),
+ &ss));
+ if (rt->rt_ifp != NULL && (rt->rt_ifp->if_flags & IFF_DYING) == 0) {
+ rt_dumpaddr_ddb("ifp", rt->rt_ifp->if_addr->ifa_addr);
+ rt_dumpaddr_ddb("ifa", rt->rt_ifa->ifa_addr);
+ }
+
+ db_printf("flags ");
+ flags = rt->rt_flags;
+ if (flags == 0)
+ db_printf("none");
+
+ while ((idx = ffs(flags)) > 0) {
+ idx--;
+
+ if (flags != rt->rt_flags)
+ db_printf(",");
+ db_printf("%s", rt_flag_name(idx));
+
+ flags &= ~(1ul << idx);
+ }
+
+ db_printf("\n");
+ return (0);
+}
+
+DB_SHOW_COMMAND(routetable, db_show_routetable_cmd)
+{
+ struct rib_head *rnh;
+ int error, i, lim;
+
+ if (have_addr)
+ i = lim = addr;
+ else {
+ i = 1;
+ lim = AF_MAX;
+ }
+
+ for (; i <= lim; i++) {
+ rnh = rt_tables_get_rnh(0, i);
+ if (rnh == NULL) {
+ if (have_addr) {
+ db_printf("%s: AF %d not supported?\n",
+ __func__, i);
+ break;
+ }
+ continue;
+ }
+
+ if (!have_addr && i > 1)
+ db_printf("\n");
+
+ db_printf("Route table for AF %d%s%s%s:\n", i,
+ (i == AF_INET || i == AF_INET6) ? " (" : "",
+ (i == AF_INET) ? "INET" : (i == AF_INET6) ? "INET6" : "",
+ (i == AF_INET || i == AF_INET6) ? ")" : "");
+
+ error = rnh->rnh_walktree(&rnh->head, rt_dumpentry_ddb, NULL);
+ if (error != 0)
+ db_printf("%s: walktree(%d): %d\n", __func__, i,
+ error);
+ }
+}
+
+_DB_FUNC(_show, route, db_show_route_cmd, db_show_table, CS_OWN, NULL)
+{
+ char buf[INET6_ADDRSTRLEN], *bp;
+ const void *dst_addrp;
+ struct sockaddr *dstp;
+ struct rtentry *rt;
+ union {
+ struct sockaddr_in dest_sin;
+ struct sockaddr_in6 dest_sin6;
+ } u;
+ uint16_t hextets[8];
+ unsigned i, tets;
+ int t, af, exp, tokflags;
+
+ /*
+ * Undecoded address family. No double-colon expansion seen yet.
+ */
+ af = -1;
+ exp = -1;
+ /* Assume INET6 to start; we can work back if guess was wrong. */
+ tokflags = DRT_WSPACE | DRT_HEX | DRT_HEXADECIMAL;
+
+ /*
+ * db_command has lexed 'show route' for us.
+ */
+ t = db_read_token_flags(tokflags);
+ if (t == tWSPACE)
+ t = db_read_token_flags(tokflags);
+
+ /*
+ * tEOL: Just 'show route' isn't a valid mode.
+ * tMINUS: It's either '-h' or some invalid option. Regardless, usage.
+ */
+ if (t == tEOL || t == tMINUS)
+ goto usage;
+
+ db_unread_token(t);
+
+ tets = nitems(hextets);
+
+ /*
+ * Each loop iteration, we expect to read one octet (v4) or hextet
+ * (v6), followed by an appropriate field separator ('.' or ':' or
+ * '::').
+ *
+ * At the start of each loop, we're looking for a number (octet or
+ * hextet).
+ *
+ * INET6 addresses have a special case where they may begin with '::'.
+ */
+ for (i = 0; i < tets; i++) {
+ t = db_read_token_flags(tokflags);
+
+ if (t == tCOLONCOLON) {
+ /* INET6 with leading '::' or invalid. */
+ if (i != 0) {
+ db_printf("Parse error: unexpected extra "
+ "colons.\n");
+ goto exit;
+ }
+
+ af = AF_INET6;
+ exp = i;
+ hextets[i] = 0;
+ continue;
+ } else if (t == tNUMBER) {
+ /*
+ * Lexer separates out '-' as tMINUS, but make the
+ * assumption explicit here.
+ */
+ MPASS(db_tok_number >= 0);
+
+ if (af == AF_INET && db_tok_number > UINT8_MAX) {
+ db_printf("Not a valid v4 octet: %ld\n",
+ (long)db_tok_number);
+ goto exit;
+ }
+ hextets[i] = db_tok_number;
+ } else if (t == tEOL) {
+ /*
+ * We can only detect the end of an IPv6 address in
+ * compact representation with EOL.
+ */
+ if (af != AF_INET6 || exp < 0) {
+ db_printf("Parse failed. Got unexpected EOF "
+ "when the address is not a compact-"
+ "representation IPv6 address.\n");
+ goto exit;
+ }
+ break;
+ } else {
+ db_printf("Parse failed. Unexpected token %d.\n", t);
+ goto exit;
+ }
+
+ /* Next, look for a separator, if appropriate. */
+ if (i == tets - 1)
+ continue;
+
+ t = db_read_token_flags(tokflags);
+ if (af < 0) {
+ if (t == tCOLON) {
+ af = AF_INET6;
+ continue;
+ }
+ if (t == tCOLONCOLON) {
+ af = AF_INET6;
+ i++;
+ hextets[i] = 0;
+ exp = i;
+ continue;
+ }
+ if (t == tDOT) {
+ unsigned hn, dn;
+
+ af = AF_INET;
+ /* Need to fixup the first parsed number. */
+ if (hextets[0] > 0x255 ||
+ (hextets[0] & 0xf0) > 0x90 ||
+ (hextets[0] & 0xf) > 9) {
+ db_printf("Not a valid v4 octet: %x\n",
+ hextets[0]);
+ goto exit;
+ }
+
+ hn = hextets[0];
+ dn = (hn >> 8) * 100 +
+ ((hn >> 4) & 0xf) * 10 +
+ (hn & 0xf);
+
+ hextets[0] = dn;
+
+ /* Switch to decimal for remaining octets. */
+ tokflags &= ~DRT_RADIX_MASK;
+ tokflags |= DRT_DECIMAL;
+
+ tets = 4;
+ continue;
+ }
+
+ db_printf("Parse error. Unexpected token %d.\n", t);
+ goto exit;
+ } else if (af == AF_INET) {
+ if (t == tDOT)
+ continue;
+ db_printf("Expected '.' (%d) between octets but got "
+ "(%d).\n", tDOT, t);
+ goto exit;
+
+ } else if (af == AF_INET6) {
+ if (t == tCOLON)
+ continue;
+ if (t == tCOLONCOLON) {
+ if (exp < 0) {
+ i++;
+ hextets[i] = 0;
+ exp = i;
+ continue;
+ }
+ db_printf("Got bogus second '::' in v6 "
+ "address.\n");
+ goto exit;
+ }
+ if (t == tEOL) {
+ /*
+ * Handle in the earlier part of the loop
+ * because we need to handle trailing :: too.
+ */
+ db_unread_token(t);
+ continue;
+ }
+
+ db_printf("Expected ':' (%d) or '::' (%d) between "
+ "hextets but got (%d).\n", tCOLON, tCOLONCOLON, t);
+ goto exit;
+ }
+ }
+
+ /* Check for trailing garbage. */
+ if (i == tets) {
+ t = db_read_token_flags(tokflags);
+ if (t != tEOL) {
+ db_printf("Got unexpected garbage after address "
+ "(%d).\n", t);
+ goto exit;
+ }
+ }
+
+ /*
+ * Need to expand compact INET6 addresses.
+ *
+ * Technically '::' for a single ':0:' is MUST NOT but just in case,
+ * don't bother expanding that form (exp >= 0 && i == tets case).
+ */
+ if (af == AF_INET6 && exp >= 0 && i < tets) {
+ if (exp + 1 < i) {
+ memmove(&hextets[exp + 1 + (nitems(hextets) - i)],
+ &hextets[exp + 1],
+ (i - (exp + 1)) * sizeof(hextets[0]));
+ }
+ memset(&hextets[exp + 1], 0, (nitems(hextets) - i) *
+ sizeof(hextets[0]));
+ }
+
+ memset(&u, 0, sizeof(u));
+ if (af == AF_INET) {
+ u.dest_sin.sin_family = AF_INET;
+ u.dest_sin.sin_len = sizeof(u.dest_sin);
+ u.dest_sin.sin_addr.s_addr = htonl(
+ ((uint32_t)hextets[0] << 24) |
+ ((uint32_t)hextets[1] << 16) |
+ ((uint32_t)hextets[2] << 8) |
+ (uint32_t)hextets[3]);
+ dstp = (void *)&u.dest_sin;
+ dst_addrp = &u.dest_sin.sin_addr;
+ } else if (af == AF_INET6) {
+ u.dest_sin6.sin6_family = AF_INET6;
+ u.dest_sin6.sin6_len = sizeof(u.dest_sin6);
+ for (i = 0; i < nitems(hextets); i++)
+ u.dest_sin6.sin6_addr.s6_addr16[i] = htons(hextets[i]);
+ dstp = (void *)&u.dest_sin6;
+ dst_addrp = &u.dest_sin6.sin6_addr;
+ } else {
+ MPASS(false);
+ /* UNREACHABLE */
+ /* Appease Clang false positive: */
+ dstp = NULL;
+ }
+
+ bp = inet_ntop(af, dst_addrp, buf, sizeof(buf));
+ if (bp != NULL)
+ db_printf("Looking up route to destination '%s'\n", bp);
+
+ CURVNET_SET(vnet0);
+ rt = rtalloc1(dstp, 0, RTF_RNH_LOCKED);
+ CURVNET_RESTORE();
+
+ if (rt == NULL) {
+ db_printf("Could not get route for that server.\n");
+ return;
+ }
+
+ rt_dumpentry_ddb((void *)rt, NULL);
+ RTFREE_LOCKED(rt);
+
+ return;
+usage:
+ db_printf("Usage: 'show route <address>'\n"
+ " Currently accepts only dotted-decimal INET or colon-separated\n"
+ " hextet INET6 addresses.\n");
+exit:
+ db_skip_to_eol();
+}
+#endif
diff --git a/freebsd/sys/net/sff8472.h b/freebsd/sys/net/sff8472.h
index d38fcfc0..9fa465a1 100644
--- a/freebsd/sys/net/sff8472.h
+++ b/freebsd/sys/net/sff8472.h
@@ -379,7 +379,7 @@ enum {
/*
* Table 3.2 Identifier values.
- * Identifier constants has taken from SFF-8024 rev 4.2 table 4.1
+ * Identifier constants has taken from SFF-8024 rev 4.6 table 4.1
* (as referenced by table 3.2 footer)
* */
enum {
@@ -396,10 +396,10 @@ enum {
SFF_8024_ID_X2 = 0xA, /* X2 */
SFF_8024_ID_DWDM_SFP = 0xB, /* DWDM-SFP */
SFF_8024_ID_QSFP = 0xC, /* QSFP */
- SFF_8024_ID_QSFPPLUS = 0xD, /* QSFP+ */
+ SFF_8024_ID_QSFPPLUS = 0xD, /* QSFP+ or later */
SFF_8024_ID_CXP = 0xE, /* CXP */
- SFF_8024_ID_HD4X = 0xF, /* Shielded Mini Multilane HD 4X */
- SFF_8024_ID_HD8X = 0x10, /* Shielded Mini Multilane HD 8X */
+ SFF_8024_ID_HD4X = 0xF, /* Shielded Mini Multilane HD 4X */
+ SFF_8024_ID_HD8X = 0x10, /* Shielded Mini Multilane HD 8X */
SFF_8024_ID_QSFP28 = 0x11, /* QSFP28 or later */
SFF_8024_ID_CXP2 = 0x12, /* CXP2 (aka CXP28) */
SFF_8024_ID_CDFP = 0x13, /* CDFP (Style 1/Style 2) */
@@ -408,34 +408,49 @@ enum {
SFF_8024_ID_CDFP3 = 0x16, /* CDFP (Style3) */
SFF_8024_ID_MICROQSFP = 0x17, /* microQSFP */
SFF_8024_ID_QSFP_DD = 0x18, /* QSFP-DD 8X Pluggable Transceiver */
- SFF_8024_ID_LAST = SFF_8024_ID_QSFP_DD
- };
-
-static const char *sff_8024_id[SFF_8024_ID_LAST + 1] = {"Unknown",
- "GBIC",
- "SFF",
- "SFP/SFP+/SFP28",
- "XBI",
- "Xenpak",
- "XFP",
- "XFF",
- "XFP-E",
- "XPAK",
- "X2",
- "DWDM-SFP/SFP+",
- "QSFP",
- "QSFP+",
- "CXP",
- "HD4X",
- "HD8X",
- "QSFP28",
- "CXP2",
- "CDFP",
- "SMM4",
- "SMM8",
- "CDFP3",
- "microQSFP",
- "QSFP-DD"};
+ SFF_8024_ID_OSFP8X = 0x19, /* OSFP 8X Pluggable Transceiver */
+ SFF_8024_ID_SFP_DD = 0x1A, /* SFP-DD 2X Pluggable Transceiver */
+ SFF_8024_ID_DSFP = 0x1B, /* DSFP Dual SFF Pluggable Transceiver */
+ SFF_8024_ID_X4ML = 0x1C, /* x4 MiniLink/OcuLink */
+ SFF_8024_ID_X8ML = 0x1D, /* x8 MiniLink */
+ SFF_8024_ID_QSFP_CMIS = 0x1E, /* QSFP+ or later w/ Common Management
+ Interface Specification */
+ SFF_8024_ID_LAST = SFF_8024_ID_QSFP_CMIS
+};
+
+static const char *sff_8024_id[SFF_8024_ID_LAST + 1] = {
+ "Unknown",
+ "GBIC",
+ "SFF",
+ "SFP/SFP+/SFP28",
+ "XBI",
+ "Xenpak",
+ "XFP",
+ "XFF",
+ "XFP-E",
+ "XPAK",
+ "X2",
+ "DWDM-SFP/SFP+",
+ "QSFP",
+ "QSFP+",
+ "CXP",
+ "HD4X",
+ "HD8X",
+ "QSFP28",
+ "CXP2",
+ "CDFP",
+ "SMM4",
+ "SMM8",
+ "CDFP3",
+ "microQSFP",
+ "QSFP-DD",
+ "QSFP8X",
+ "SFP-DD",
+ "DSFP",
+ "x4MiniLink/OcuLink",
+ "x8MiniLink",
+ "QSFP+(CIMS)"
+};
/* Keep compatibility with old definitions */
#define SFF_8472_ID_UNKNOWN SFF_8024_ID_UNKNOWN
diff --git a/freebsd/sys/net/vnet.h b/freebsd/sys/net/vnet.h
index b4168750..a8c9887e 100644
--- a/freebsd/sys/net/vnet.h
+++ b/freebsd/sys/net/vnet.h
@@ -273,7 +273,8 @@ extern struct sx vnet_sxlock;
/* struct _hack is to stop this from being used with static data */
#define VNET_DEFINE(t, n) \
struct _hack; t VNET_NAME(n) __section(VNET_SETNAME) __used
-#if defined(KLD_MODULE) && (defined(__aarch64__) || defined(__riscv))
+#if defined(KLD_MODULE) && (defined(__aarch64__) || defined(__riscv) \
+ || defined(__powerpc64__))
/*
* As with DPCPU_DEFINE_STATIC we are unable to mark this data as static
* in modules on some architectures.