diff options
author | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2013-11-04 11:33:00 +0100 |
---|---|---|
committer | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2013-11-04 15:28:21 +0100 |
commit | af5333e0a02b2295304d4e029b15ee15a4fe2b3a (patch) | |
tree | c5c43680d374f58b487eeeaf18fb7ec6b84ba074 /freebsd/sys/net/bpf.c | |
parent | BUS_SPACE(9): Use simple memory model for ARM (diff) | |
download | rtems-libbsd-af5333e0a02b2295304d4e029b15ee15a4fe2b3a.tar.bz2 |
Update to FreeBSD 8.4
Diffstat (limited to 'freebsd/sys/net/bpf.c')
-rw-r--r-- | freebsd/sys/net/bpf.c | 633 |
1 files changed, 470 insertions, 163 deletions
diff --git a/freebsd/sys/net/bpf.c b/freebsd/sys/net/bpf.c index d9223313..179d5f0a 100644 --- a/freebsd/sys/net/bpf.c +++ b/freebsd/sys/net/bpf.c @@ -45,6 +45,8 @@ __FBSDID("$FreeBSD$"); #include <rtems/bsd/sys/types.h> #include <rtems/bsd/sys/param.h> +#include <rtems/bsd/sys/lock.h> +#include <sys/rwlock.h> #include <sys/systm.h> #include <sys/conf.h> #include <sys/fcntl.h> @@ -68,6 +70,7 @@ __FBSDID("$FreeBSD$"); #include <sys/socket.h> #include <net/if.h> +#define BPF_INTERNAL #include <net/bpf.h> #include <net/bpf_buffer.h> #ifdef BPF_JITTER @@ -141,6 +144,7 @@ static int bpf_bpfd_cnt; static void bpf_attachd(struct bpf_d *, struct bpf_if *); static void bpf_detachd(struct bpf_d *); +static void bpf_detachd_locked(struct bpf_d *); static void bpf_freed(struct bpf_d *); static int bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **, struct sockaddr *, int *, struct bpf_insn *); @@ -152,7 +156,7 @@ static void catchpacket(struct bpf_d *, u_char *, u_int, u_int, void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int), struct timeval *); static void reset_d(struct bpf_d *); -static int bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd); +static int bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd); static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); static int bpf_setdlt(struct bpf_d *, u_int); static void filt_bpfdetach(struct knote *); @@ -170,6 +174,12 @@ SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW, SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW, bpf_stats_sysctl, "bpf statistics portal"); +static VNET_DEFINE(int, bpf_optimize_writers) = 0; +#define V_bpf_optimize_writers VNET(bpf_optimize_writers) +SYSCTL_VNET_INT(_net_bpf, OID_AUTO, optimize_writers, + CTLFLAG_RW, &VNET_NAME(bpf_optimize_writers), 0, + "Do not send packets until BPF program is set"); + static d_open_t bpfopen; static d_read_t bpfread; static d_write_t bpfwrite; @@ -191,6 +201,37 @@ static struct cdevsw bpf_cdevsw = { static struct filterops bpfread_filtops = { 1, NULL, filt_bpfdetach, filt_bpfread }; +eventhandler_tag bpf_ifdetach_cookie = NULL; + +/* + * LOCKING MODEL USED BY BPF: + * Locks: + * 1) global lock (BPF_LOCK). Mutex, used to protect interface addition/removal, + * some global counters and every bpf_if reference. + * 2) Interface lock. Rwlock, used to protect list of BPF descriptors and their filters. + * 3) Descriptor lock. Mutex, used to protect BPF buffers and various structure fields + * used by bpf_mtap code. + * + * Lock order: + * + * Global lock, interface lock, descriptor lock + * + * We have to acquire interface lock before descriptor main lock due to BPF_MTAP[2] + * working model. In many places (like bpf_detachd) we start with BPF descriptor + * (and we need to at least rlock it to get reliable interface pointer). This + * gives us potential LOR. As a result, we use global lock to protect from bpf_if + * change in every such place. + * + * Changing d->bd_bif is protected by 1) global lock, 2) interface lock and + * 3) descriptor main wlock. + * Reading bd_bif can be protected by any of these locks, typically global lock. + * + * Changing read/write BPF filter is protected by the same three locks, + * the same applies for reading. + * + * Sleeping in global lock is not allowed due to bpfdetach() using it. + */ + /* * Wrapper functions for various buffering methods. If the set of buffer * modes expands, we will probably want to introduce a switch data structure @@ -284,7 +325,6 @@ bpf_canfreebuf(struct bpf_d *d) static int bpf_canwritebuf(struct bpf_d *d) { - BPFD_LOCK_ASSERT(d); switch (d->bd_bufmode) { @@ -563,17 +603,92 @@ bad: static void bpf_attachd(struct bpf_d *d, struct bpf_if *bp) { + int op_w; + + BPF_LOCK_ASSERT(); + + /* + * Save sysctl value to protect from sysctl change + * between reads + */ + op_w = V_bpf_optimize_writers; + + if (d->bd_bif != NULL) + bpf_detachd_locked(d); /* - * Point d at bp, and add d to the interface's list of listeners. - * Finally, point the driver's bpf cookie at the interface so - * it will divert packets to bpf. + * Point d at bp, and add d to the interface's list. + * Since there are many applicaiotns using BPF for + * sending raw packets only (dhcpd, cdpd are good examples) + * we can delay adding d to the list of active listeners until + * some filter is configured. */ - BPFIF_LOCK(bp); + + BPFIF_WLOCK(bp); + BPFD_LOCK(d); + d->bd_bif = bp; - LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next); + + if (op_w != 0) { + /* Add to writers-only list */ + LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next); + /* + * We decrement bd_writer on every filter set operation. + * First BIOCSETF is done by pcap_open_live() to set up + * snap length. After that appliation usually sets its own filter + */ + d->bd_writer = 2; + } else + LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next); + + BPFD_UNLOCK(d); + BPFIF_WUNLOCK(bp); bpf_bpfd_cnt++; - BPFIF_UNLOCK(bp); + + CTR3(KTR_NET, "%s: bpf_attach called by pid %d, adding to %s list", + __func__, d->bd_pid, d->bd_writer ? "writer" : "active"); + + if (op_w == 0) + EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1); +} + +/* + * Add d to the list of active bp filters. + * Reuqires bpf_attachd() to be called before + */ +static void +bpf_upgraded(struct bpf_d *d) +{ + struct bpf_if *bp; + + BPF_LOCK_ASSERT(); + + bp = d->bd_bif; + + /* + * Filter can be set several times without specifying interface. + * Mark d as reader and exit. + */ + if (bp == NULL) { + BPFD_LOCK(d); + d->bd_writer = 0; + BPFD_UNLOCK(d); + return; + } + + BPFIF_WLOCK(bp); + BPFD_LOCK(d); + + /* Remove from writers-only list */ + LIST_REMOVE(d, bd_next); + LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next); + /* Mark d as reader */ + d->bd_writer = 0; + + BPFD_UNLOCK(d); + BPFIF_WUNLOCK(bp); + + CTR2(KTR_NET, "%s: upgrade required by pid %d", __func__, d->bd_pid); EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1); } @@ -584,26 +699,47 @@ bpf_attachd(struct bpf_d *d, struct bpf_if *bp) static void bpf_detachd(struct bpf_d *d) { + BPF_LOCK(); + bpf_detachd_locked(d); + BPF_UNLOCK(); +} + +static void +bpf_detachd_locked(struct bpf_d *d) +{ int error; struct bpf_if *bp; struct ifnet *ifp; - bp = d->bd_bif; - BPFIF_LOCK(bp); + CTR2(KTR_NET, "%s: detach required by pid %d", __func__, d->bd_pid); + + BPF_LOCK_ASSERT(); + + /* Check if descriptor is attached */ + if ((bp = d->bd_bif) == NULL) + return; + + BPFIF_WLOCK(bp); BPFD_LOCK(d); - ifp = d->bd_bif->bif_ifp; + + /* Save bd_writer value */ + error = d->bd_writer; /* * Remove d from the interface's descriptor list. */ LIST_REMOVE(d, bd_next); - bpf_bpfd_cnt--; + ifp = bp->bif_ifp; d->bd_bif = NULL; BPFD_UNLOCK(d); - BPFIF_UNLOCK(bp); + BPFIF_WUNLOCK(bp); + + bpf_bpfd_cnt--; - EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0); + /* Call event handler iff d is attached */ + if (error == 0) + EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0); /* * Check if this descriptor had requested promiscuous mode. @@ -642,14 +778,11 @@ bpf_dtor(void *data) d->bd_state = BPF_IDLE; BPFD_UNLOCK(d); funsetown(&d->bd_sigio); - mtx_lock(&bpf_mtx); - if (d->bd_bif) - bpf_detachd(d); - mtx_unlock(&bpf_mtx); - selwakeuppri(&d->bd_sel, PRINET); + bpf_detachd(d); #ifdef MAC mac_bpfdesc_destroy(d); #endif /* MAC */ + seldrain(&d->bd_sel); knlist_destroy(&d->bd_sel.si_note); callout_drain(&d->bd_callout); bpf_freed(d); @@ -665,7 +798,7 @@ static int bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td) { struct bpf_d *d; - int error; + int error, size; d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO); error = devfs_set_cdevpriv(d, bpf_dtor); @@ -683,14 +816,18 @@ bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td) d->bd_bufmode = BPF_BUFMODE_BUFFER; d->bd_sig = SIGIO; d->bd_direction = BPF_D_INOUT; - d->bd_pid = td->td_proc->p_pid; + BPF_PID_REFRESH(d, td); #ifdef MAC mac_bpfdesc_init(d); mac_bpfdesc_create(td->td_ucred, d); #endif - mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF); - callout_init_mtx(&d->bd_callout, &d->bd_mtx, 0); - knlist_init_mtx(&d->bd_sel.si_note, &d->bd_mtx); + mtx_init(&d->bd_lock, devtoname(dev), "bpf cdev lock", MTX_DEF); + callout_init_mtx(&d->bd_callout, &d->bd_lock, 0); + knlist_init_mtx(&d->bd_sel.si_note, &d->bd_lock); + + /* Allocate default buffers */ + size = d->bd_bufsize; + bpf_buffer_ioctl_sblen(d, &size); return (0); } @@ -720,7 +857,7 @@ bpfread(struct cdev *dev, struct uio *uio, int ioflag) non_block = ((ioflag & O_NONBLOCK) != 0); BPFD_LOCK(d); - d->bd_pid = curthread->td_proc->p_pid; + BPF_PID_REFRESH_CUR(d); if (d->bd_bufmode != BPF_BUFMODE_BUFFER) { BPFD_UNLOCK(d); return (EOPNOTSUPP); @@ -766,7 +903,7 @@ bpfread(struct cdev *dev, struct uio *uio, int ioflag) BPFD_UNLOCK(d); return (EWOULDBLOCK); } - error = msleep(d, &d->bd_mtx, PRINET|PCATCH, + error = msleep(d, &d->bd_lock, PRINET|PCATCH, "bpf", d->bd_rtout); if (error == EINTR || error == ERESTART) { BPFD_UNLOCK(d); @@ -883,8 +1020,9 @@ bpfwrite(struct cdev *dev, struct uio *uio, int ioflag) if (error != 0) return (error); - d->bd_pid = curthread->td_proc->p_pid; + BPF_PID_REFRESH_CUR(d); d->bd_wcount++; + /* XXX: locking required */ if (d->bd_bif == NULL) { d->bd_wdcount++; return (ENXIO); @@ -905,6 +1043,7 @@ bpfwrite(struct cdev *dev, struct uio *uio, int ioflag) bzero(&dst, sizeof(dst)); m = NULL; hlen = 0; + /* XXX: bpf_movein() can sleep */ error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp, &m, &dst, &hlen, d->bd_wfilter); if (error) { @@ -964,7 +1103,7 @@ static void reset_d(struct bpf_d *d) { - mtx_assert(&d->bd_mtx, MA_OWNED); + BPFD_LOCK_ASSERT(d); if ((d->bd_hbuf != NULL) && (d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) { @@ -1030,7 +1169,7 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, * Refresh PID associated with this descriptor. */ BPFD_LOCK(d); - d->bd_pid = td->td_proc->p_pid; + BPF_PID_REFRESH(d, td); if (d->bd_state == BPF_WAITING) callout_stop(&d->bd_callout); d->bd_state = BPF_IDLE; @@ -1081,7 +1220,9 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, case BIOCGDLTLIST32: case BIOCGRTIMEOUT32: case BIOCSRTIMEOUT32: + BPFD_LOCK(d); d->bd_compat32 = 1; + BPFD_UNLOCK(d); } #endif @@ -1126,7 +1267,9 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, * Get buffer len [for read()]. */ case BIOCGBLEN: + BPFD_LOCK(d); *(u_int *)addr = d->bd_bufsize; + BPFD_UNLOCK(d); break; /* @@ -1181,10 +1324,12 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, * Get current data link type. */ case BIOCGDLT: + BPF_LOCK(); if (d->bd_bif == NULL) error = EINVAL; else *(u_int *)addr = d->bd_bif->bif_dlt; + BPF_UNLOCK(); break; /* @@ -1199,6 +1344,7 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, list32 = (struct bpf_dltlist32 *)addr; dltlist.bfl_len = list32->bfl_len; dltlist.bfl_list = PTRIN(list32->bfl_list); + BPF_LOCK(); if (d->bd_bif == NULL) error = EINVAL; else { @@ -1206,31 +1352,37 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, if (error == 0) list32->bfl_len = dltlist.bfl_len; } + BPF_UNLOCK(); break; } #endif case BIOCGDLTLIST: + BPF_LOCK(); if (d->bd_bif == NULL) error = EINVAL; else error = bpf_getdltlist(d, (struct bpf_dltlist *)addr); + BPF_UNLOCK(); break; /* * Set data link type. */ case BIOCSDLT: + BPF_LOCK(); if (d->bd_bif == NULL) error = EINVAL; else error = bpf_setdlt(d, *(u_int *)addr); + BPF_UNLOCK(); break; /* * Get interface name. */ case BIOCGETIF: + BPF_LOCK(); if (d->bd_bif == NULL) error = EINVAL; else { @@ -1240,13 +1392,16 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, strlcpy(ifr->ifr_name, ifp->if_xname, sizeof(ifr->ifr_name)); } + BPF_UNLOCK(); break; /* * Set interface. */ case BIOCSETIF: + BPF_LOCK(); error = bpf_setif(d, (struct ifreq *)addr); + BPF_UNLOCK(); break; /* @@ -1329,7 +1484,9 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, * Set immediate mode. */ case BIOCIMMEDIATE: + BPFD_LOCK(d); d->bd_immediate = *(u_int *)addr; + BPFD_UNLOCK(d); break; case BIOCVERSION: @@ -1345,21 +1502,27 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, * Get "header already complete" flag */ case BIOCGHDRCMPLT: + BPFD_LOCK(d); *(u_int *)addr = d->bd_hdrcmplt; + BPFD_UNLOCK(d); break; /* * Set "header already complete" flag */ case BIOCSHDRCMPLT: + BPFD_LOCK(d); d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0; + BPFD_UNLOCK(d); break; /* * Get packet direction flag */ case BIOCGDIRECTION: + BPFD_LOCK(d); *(u_int *)addr = d->bd_direction; + BPFD_UNLOCK(d); break; /* @@ -1374,7 +1537,9 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, case BPF_D_IN: case BPF_D_INOUT: case BPF_D_OUT: + BPFD_LOCK(d); d->bd_direction = direction; + BPFD_UNLOCK(d); break; default: error = EINVAL; @@ -1383,26 +1548,38 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, break; case BIOCFEEDBACK: + BPFD_LOCK(d); d->bd_feedback = *(u_int *)addr; + BPFD_UNLOCK(d); break; case BIOCLOCK: + BPFD_LOCK(d); d->bd_locked = 1; + BPFD_UNLOCK(d); break; case FIONBIO: /* Non-blocking I/O */ break; case FIOASYNC: /* Send signal on receive packets */ + BPFD_LOCK(d); d->bd_async = *(int *)addr; + BPFD_UNLOCK(d); break; case FIOSETOWN: + /* + * XXX: Add some sort of locking here? + * fsetown() can sleep. + */ error = fsetown(*(int *)addr, &d->bd_sigio); break; case FIOGETOWN: + BPFD_LOCK(d); *(int *)addr = fgetown(&d->bd_sigio); + BPFD_UNLOCK(d); break; /* This is deprecated, FIOSETOWN should be used instead. */ @@ -1423,16 +1600,23 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, if (sig >= NSIG) error = EINVAL; - else + else { + BPFD_LOCK(d); d->bd_sig = sig; + BPFD_UNLOCK(d); + } break; } case BIOCGRSIG: + BPFD_LOCK(d); *(u_int *)addr = d->bd_sig; + BPFD_UNLOCK(d); break; case BIOCGETBUFMODE: + BPFD_LOCK(d); *(u_int *)addr = d->bd_bufmode; + BPFD_UNLOCK(d); break; case BIOCSETBUFMODE: @@ -1487,95 +1671,130 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, /* * Set d's packet filter program to fp. If this file already has a filter, * free it and replace it. Returns EINVAL for bogus requests. + * + * Note we need global lock here to serialize bpf_setf() and bpf_setif() calls + * since reading d->bd_bif can't be protected by d or interface lock due to + * lock order. + * + * Additionally, we have to acquire interface write lock due to bpf_mtap() uses + * interface read lock to read all filers. + * */ static int bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd) { +#ifdef COMPAT_FREEBSD32 + struct bpf_program fp_swab; + struct bpf_program32 *fp32; +#endif struct bpf_insn *fcode, *old; - u_int wfilter, flen, size; #ifdef BPF_JITTER - bpf_jit_filter *ofunc; + bpf_jit_filter *jfunc, *ofunc; #endif -#ifdef COMPAT_FREEBSD32 - struct bpf_program32 *fp32; - struct bpf_program fp_swab; + size_t size; + u_int flen; + int need_upgrade; - if (cmd == BIOCSETWF32 || cmd == BIOCSETF32 || cmd == BIOCSETFNR32) { +#ifdef COMPAT_FREEBSD32 + switch (cmd) { + case BIOCSETF32: + case BIOCSETWF32: + case BIOCSETFNR32: fp32 = (struct bpf_program32 *)fp; fp_swab.bf_len = fp32->bf_len; fp_swab.bf_insns = (struct bpf_insn *)(uintptr_t)fp32->bf_insns; fp = &fp_swab; - if (cmd == BIOCSETWF32) + switch (cmd) { + case BIOCSETF32: + cmd = BIOCSETF; + break; + case BIOCSETWF32: cmd = BIOCSETWF; + break; + } + break; } #endif - if (cmd == BIOCSETWF) { - old = d->bd_wfilter; - wfilter = 1; -#ifdef BPF_JITTER - ofunc = NULL; -#endif - } else { - wfilter = 0; - old = d->bd_rfilter; + + fcode = NULL; #ifdef BPF_JITTER - ofunc = d->bd_bfilter; + jfunc = ofunc = NULL; #endif - } - if (fp->bf_insns == NULL) { - if (fp->bf_len != 0) + need_upgrade = 0; + + /* + * Check new filter validness before acquiring any locks. + * Allocate memory for new filter, if needed. + */ + flen = fp->bf_len; + if (flen > bpf_maxinsns || (fp->bf_insns == NULL && flen != 0)) + return (EINVAL); + size = flen * sizeof(*fp->bf_insns); + if (size > 0) { + /* We're setting up new filter. Copy and check actual data. */ + fcode = malloc(size, M_BPF, M_WAITOK); + if (copyin(fp->bf_insns, fcode, size) != 0 || + !bpf_validate(fcode, flen)) { + free(fcode, M_BPF); return (EINVAL); - BPFD_LOCK(d); - if (wfilter) - d->bd_wfilter = NULL; - else { - d->bd_rfilter = NULL; -#ifdef BPF_JITTER - d->bd_bfilter = NULL; -#endif - if (cmd == BIOCSETF) - reset_d(d); } - BPFD_UNLOCK(d); - if (old != NULL) - free((caddr_t)old, M_BPF); #ifdef BPF_JITTER - if (ofunc != NULL) - bpf_destroy_jit_filter(ofunc); + /* Filter is copied inside fcode and is perfectly valid. */ + jfunc = bpf_jitter(fcode, flen); #endif - return (0); } - flen = fp->bf_len; - if (flen > bpf_maxinsns) - return (EINVAL); - size = flen * sizeof(*fp->bf_insns); - fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK); - if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 && - bpf_validate(fcode, (int)flen)) { - BPFD_LOCK(d); - if (wfilter) - d->bd_wfilter = fcode; - else { - d->bd_rfilter = fcode; + BPF_LOCK(); + + /* + * Set up new filter. + * Protect filter change by interface lock. + * Additionally, we are protected by global lock here. + */ + if (d->bd_bif != NULL) + BPFIF_WLOCK(d->bd_bif); + BPFD_LOCK(d); + if (cmd == BIOCSETWF) { + old = d->bd_wfilter; + d->bd_wfilter = fcode; + } else { + old = d->bd_rfilter; + d->bd_rfilter = fcode; #ifdef BPF_JITTER - d->bd_bfilter = bpf_jitter(fcode, flen); + ofunc = d->bd_bfilter; + d->bd_bfilter = jfunc; #endif - if (cmd == BIOCSETF) - reset_d(d); + if (cmd == BIOCSETF) + reset_d(d); + + if (fcode != NULL) { + /* + * Do not require upgrade by first BIOCSETF + * (used to set snaplen) by pcap_open_live(). + */ + if (d->bd_writer != 0 && --d->bd_writer == 0) + need_upgrade = 1; + CTR4(KTR_NET, "%s: filter function set by pid %d, " + "bd_writer counter %d, need_upgrade %d", + __func__, d->bd_pid, d->bd_writer, need_upgrade); } - BPFD_UNLOCK(d); - if (old != NULL) - free((caddr_t)old, M_BPF); + } + BPFD_UNLOCK(d); + if (d->bd_bif != NULL) + BPFIF_WUNLOCK(d->bd_bif); + if (old != NULL) + free(old, M_BPF); #ifdef BPF_JITTER - if (ofunc != NULL) - bpf_destroy_jit_filter(ofunc); + if (ofunc != NULL) + bpf_destroy_jit_filter(ofunc); #endif - return (0); - } - free((caddr_t)fcode, M_BPF); - return (EINVAL); + /* Move d to active readers list. */ + if (need_upgrade) + bpf_upgraded(d); + + BPF_UNLOCK(); + return (0); } /* @@ -1589,28 +1808,30 @@ bpf_setif(struct bpf_d *d, struct ifreq *ifr) struct bpf_if *bp; struct ifnet *theywant; + BPF_LOCK_ASSERT(); + theywant = ifunit(ifr->ifr_name); if (theywant == NULL || theywant->if_bpf == NULL) return (ENXIO); bp = theywant->if_bpf; + /* Check if interface is not being detached from BPF */ + BPFIF_RLOCK(bp); + if (bp->flags & BPFIF_FLAG_DYING) { + BPFIF_RUNLOCK(bp); + return (ENXIO); + } + BPFIF_RUNLOCK(bp); + /* * Behavior here depends on the buffering model. If we're using * kernel memory buffers, then we can allocate them here. If we're * using zero-copy, then the user process must have registered * buffers by the time we get here. If not, return an error. - * - * XXXRW: There are locking issues here with multi-threaded use: what - * if two threads try to set the interface at once? */ switch (d->bd_bufmode) { case BPF_BUFMODE_BUFFER: - if (d->bd_sbuf == NULL) - bpf_buffer_alloc(d); - KASSERT(d->bd_sbuf != NULL, ("bpf_setif: bd_sbuf NULL")); - break; - case BPF_BUFMODE_ZBUF: if (d->bd_sbuf == NULL) return (EINVAL); @@ -1619,15 +1840,8 @@ bpf_setif(struct bpf_d *d, struct ifreq *ifr) default: panic("bpf_setif: bufmode %d", d->bd_bufmode); } - if (bp != d->bd_bif) { - if (d->bd_bif) - /* - * Detach if attached to something else. - */ - bpf_detachd(d); - + if (bp != d->bd_bif) bpf_attachd(d, bp); - } BPFD_LOCK(d); reset_d(d); BPFD_UNLOCK(d); @@ -1655,7 +1869,7 @@ bpfpoll(struct cdev *dev, int events, struct thread *td) */ revents = events & (POLLOUT | POLLWRNORM); BPFD_LOCK(d); - d->bd_pid = td->td_proc->p_pid; + BPF_PID_REFRESH(d, td); if (events & (POLLIN | POLLRDNORM)) { if (bpf_ready(d)) revents |= events & (POLLIN | POLLRDNORM); @@ -1690,7 +1904,7 @@ bpfkqfilter(struct cdev *dev, struct knote *kn) * Refresh PID associated with this descriptor. */ BPFD_LOCK(d); - d->bd_pid = curthread->td_proc->p_pid; + BPF_PID_REFRESH_CUR(d); kn->kn_fop = &bpfread_filtops; kn->kn_hook = d; knlist_add(&d->bd_sel.si_note, kn, 1); @@ -1746,9 +1960,19 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) struct timeval tv; gottime = 0; - BPFIF_LOCK(bp); + + BPFIF_RLOCK(bp); + LIST_FOREACH(d, &bp->bif_dlist, bd_next) { - BPFD_LOCK(d); + /* + * We are not using any locks for d here because: + * 1) any filter change is protected by interface + * write lock + * 2) destroying/detaching d is protected by interface + * write lock, too + */ + + /* XXX: Do not protect counter for the sake of performance. */ ++d->bd_rcount; /* * NB: We dont call BPF_CHECK_DIRECTION() here since there is no @@ -1764,6 +1988,11 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) #endif slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen); if (slen != 0) { + /* + * Filter matches. Let's to acquire write lock. + */ + BPFD_LOCK(d); + d->bd_fcount++; if (!gottime) { microtime(&tv); @@ -1774,10 +2003,10 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) #endif catchpacket(d, pkt, pktlen, slen, bpf_append_bytes, &tv); + BPFD_UNLOCK(d); } - BPFD_UNLOCK(d); } - BPFIF_UNLOCK(bp); + BPFIF_RUNLOCK(bp); } #define BPF_CHECK_DIRECTION(d, r, i) \ @@ -1786,6 +2015,7 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) /* * Incoming linkage from device drivers, when packet is in an mbuf chain. + * Locking model is explained in bpf_tap(). */ void bpf_mtap(struct bpf_if *bp, struct mbuf *m) @@ -1808,11 +2038,11 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m) pktlen = m_length(m, NULL); - BPFIF_LOCK(bp); + BPFIF_RLOCK(bp); + LIST_FOREACH(d, &bp->bif_dlist, bd_next) { if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp)) continue; - BPFD_LOCK(d); ++d->bd_rcount; #ifdef BPF_JITTER bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL; @@ -1823,6 +2053,8 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m) #endif slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0); if (slen != 0) { + BPFD_LOCK(d); + d->bd_fcount++; if (!gottime) { microtime(&tv); @@ -1833,10 +2065,10 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m) #endif catchpacket(d, (u_char *)m, pktlen, slen, bpf_append_mbuf, &tv); + BPFD_UNLOCK(d); } - BPFD_UNLOCK(d); } - BPFIF_UNLOCK(bp); + BPFIF_RUNLOCK(bp); } /* @@ -1871,14 +2103,17 @@ bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m) mb.m_len = dlen; pktlen += dlen; - BPFIF_LOCK(bp); + + BPFIF_RLOCK(bp); + LIST_FOREACH(d, &bp->bif_dlist, bd_next) { if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp)) continue; - BPFD_LOCK(d); ++d->bd_rcount; slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0); if (slen != 0) { + BPFD_LOCK(d); + d->bd_fcount++; if (!gottime) { microtime(&tv); @@ -1889,10 +2124,10 @@ bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m) #endif catchpacket(d, (u_char *)&mb, pktlen, slen, bpf_append_mbuf, &tv); + BPFD_UNLOCK(d); } - BPFD_UNLOCK(d); } - BPFIF_UNLOCK(bp); + BPFIF_RUNLOCK(bp); } #undef BPF_CHECK_DIRECTION @@ -2042,7 +2277,7 @@ bpf_freed(struct bpf_d *d) } if (d->bd_wfilter != NULL) free((caddr_t)d->bd_wfilter, M_BPF); - mtx_destroy(&d->bd_mtx); + mtx_destroy(&d->bd_lock); } /* @@ -2072,15 +2307,16 @@ bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) panic("bpfattach"); LIST_INIT(&bp->bif_dlist); + LIST_INIT(&bp->bif_wlist); bp->bif_ifp = ifp; bp->bif_dlt = dlt; - mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF); + rw_init(&bp->bif_lock, "bpf interface lock"); KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized")); *driverp = bp; - mtx_lock(&bpf_mtx); + BPF_LOCK(); LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next); - mtx_unlock(&bpf_mtx); + BPF_UNLOCK(); /* * Compute the length of the bpf header. This is not necessarily @@ -2095,42 +2331,95 @@ bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) } /* - * Detach bpf from an interface. This involves detaching each descriptor - * associated with the interface, and leaving bd_bif NULL. Notify each - * descriptor as it's detached so that any sleepers wake up and get - * ENXIO. + * Detach bpf from an interface. This involves detaching each descriptor + * associated with the interface. Notify each descriptor as it's detached + * so that any sleepers wake up and get ENXIO. */ void bpfdetach(struct ifnet *ifp) { struct bpf_if *bp; struct bpf_d *d; +#ifdef INVARIANTS + int ndetached; - /* Locate BPF interface information */ - mtx_lock(&bpf_mtx); - LIST_FOREACH(bp, &bpf_iflist, bif_next) { - if (ifp == bp->bif_ifp) - break; - } + ndetached = 0; +#endif + + BPF_LOCK(); + /* Find all bpf_if struct's which reference ifp and detach them. */ + do { + LIST_FOREACH(bp, &bpf_iflist, bif_next) { + if (ifp == bp->bif_ifp) + break; + } + if (bp != NULL) + LIST_REMOVE(bp, bif_next); + + if (bp != NULL) { +#ifdef INVARIANTS + ndetached++; +#endif + while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) { + bpf_detachd_locked(d); + BPFD_LOCK(d); + bpf_wakeup(d); + BPFD_UNLOCK(d); + } + /* Free writer-only descriptors */ + while ((d = LIST_FIRST(&bp->bif_wlist)) != NULL) { + bpf_detachd_locked(d); + BPFD_LOCK(d); + bpf_wakeup(d); + BPFD_UNLOCK(d); + } + + /* + * Delay freing bp till interface is detached + * and all routes through this interface are removed. + * Mark bp as detached to restrict new consumers. + */ + BPFIF_WLOCK(bp); + bp->flags |= BPFIF_FLAG_DYING; + BPFIF_WUNLOCK(bp); + } + } while (bp != NULL); + BPF_UNLOCK(); - /* Interface wasn't attached */ - if ((bp == NULL) || (bp->bif_ifp == NULL)) { - mtx_unlock(&bpf_mtx); +#ifdef INVARIANTS + if (ndetached == 0) printf("bpfdetach: %s was not attached\n", ifp->if_xname); +#endif +} + +/* + * Interface departure handler. + * Note departure event does not guarantee interface is going down. + */ +static void +bpf_ifdetach(void *arg __unused, struct ifnet *ifp) +{ + struct bpf_if *bp; + + BPF_LOCK(); + if ((bp = ifp->if_bpf) == NULL) { + BPF_UNLOCK(); return; } - LIST_REMOVE(bp, bif_next); - mtx_unlock(&bpf_mtx); - - while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) { - bpf_detachd(d); - BPFD_LOCK(d); - bpf_wakeup(d); - BPFD_UNLOCK(d); + /* Check if bpfdetach() was called previously */ + if ((bp->flags & BPFIF_FLAG_DYING) == 0) { + BPF_UNLOCK(); + return; } - mtx_destroy(&bp->bif_mtx); + CTR3(KTR_NET, "%s: freing BPF instance %p for interface %p", + __func__, bp, ifp); + + ifp->if_bpf = NULL; + BPF_UNLOCK(); + + rw_destroy(&bp->bif_lock); free(bp, M_BPF); } @@ -2144,24 +2433,22 @@ bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl) struct ifnet *ifp; struct bpf_if *bp; + BPF_LOCK_ASSERT(); + ifp = d->bd_bif->bif_ifp; n = 0; error = 0; - mtx_lock(&bpf_mtx); LIST_FOREACH(bp, &bpf_iflist, bif_next) { if (bp->bif_ifp != ifp) continue; if (bfl->bfl_list != NULL) { - if (n >= bfl->bfl_len) { - mtx_unlock(&bpf_mtx); + if (n >= bfl->bfl_len) return (ENOMEM); - } error = copyout(&bp->bif_dlt, bfl->bfl_list + n, sizeof(u_int)); } n++; } - mtx_unlock(&bpf_mtx); bfl->bfl_len = n; return (error); } @@ -2176,18 +2463,19 @@ bpf_setdlt(struct bpf_d *d, u_int dlt) struct ifnet *ifp; struct bpf_if *bp; + BPF_LOCK_ASSERT(); + if (d->bd_bif->bif_dlt == dlt) return (0); ifp = d->bd_bif->bif_ifp; - mtx_lock(&bpf_mtx); + LIST_FOREACH(bp, &bpf_iflist, bif_next) { if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) break; } - mtx_unlock(&bpf_mtx); + if (bp != NULL) { opromisc = d->bd_promisc; - bpf_detachd(d); bpf_attachd(d, bp); BPFD_LOCK(d); reset_d(d); @@ -2216,6 +2504,11 @@ bpf_drvinit(void *unused) dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf"); /* For compatibility */ make_dev_alias(dev, "bpf0"); + + /* Register interface departure handler */ + bpf_ifdetach_cookie = EVENTHANDLER_REGISTER( + ifnet_departure_event, bpf_ifdetach, NULL, + EVENTHANDLER_PRI_ANY); } /* @@ -2229,9 +2522,9 @@ bpf_zero_counters(void) struct bpf_if *bp; struct bpf_d *bd; - mtx_lock(&bpf_mtx); + BPF_LOCK(); LIST_FOREACH(bp, &bpf_iflist, bif_next) { - BPFIF_LOCK(bp); + BPFIF_RLOCK(bp); LIST_FOREACH(bd, &bp->bif_dlist, bd_next) { BPFD_LOCK(bd); bd->bd_rcount = 0; @@ -2242,11 +2535,14 @@ bpf_zero_counters(void) bd->bd_zcopy = 0; BPFD_UNLOCK(bd); } - BPFIF_UNLOCK(bp); + BPFIF_RUNLOCK(bp); } - mtx_unlock(&bpf_mtx); + BPF_UNLOCK(); } +/* + * Fill filter statistics + */ static void bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd) { @@ -2254,6 +2550,7 @@ bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd) bzero(d, sizeof(*d)); BPFD_LOCK_ASSERT(bd); d->bd_structsize = sizeof(*d); + /* XXX: reading should be protected by global lock */ d->bd_immediate = bd->bd_immediate; d->bd_promisc = bd->bd_promisc; d->bd_hdrcmplt = bd->bd_hdrcmplt; @@ -2278,6 +2575,9 @@ bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd) d->bd_bufmode = bd->bd_bufmode; } +/* + * Handle `netstat -B' stats request + */ static int bpf_stats_sysctl(SYSCTL_HANDLER_ARGS) { @@ -2315,24 +2615,31 @@ bpf_stats_sysctl(SYSCTL_HANDLER_ARGS) if (bpf_bpfd_cnt == 0) return (SYSCTL_OUT(req, 0, 0)); xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK); - mtx_lock(&bpf_mtx); + BPF_LOCK(); if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) { - mtx_unlock(&bpf_mtx); + BPF_UNLOCK(); free(xbdbuf, M_BPF); return (ENOMEM); } index = 0; LIST_FOREACH(bp, &bpf_iflist, bif_next) { - BPFIF_LOCK(bp); + BPFIF_RLOCK(bp); + /* Send writers-only first */ + LIST_FOREACH(bd, &bp->bif_wlist, bd_next) { + xbd = &xbdbuf[index++]; + BPFD_LOCK(bd); + bpfstats_fill_xbpf(xbd, bd); + BPFD_UNLOCK(bd); + } LIST_FOREACH(bd, &bp->bif_dlist, bd_next) { xbd = &xbdbuf[index++]; BPFD_LOCK(bd); bpfstats_fill_xbpf(xbd, bd); BPFD_UNLOCK(bd); } - BPFIF_UNLOCK(bp); + BPFIF_RUNLOCK(bp); } - mtx_unlock(&bpf_mtx); + BPF_UNLOCK(); error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd)); free(xbdbuf, M_BPF); return (error); |