diff options
author | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2013-11-06 16:20:21 +0100 |
---|---|---|
committer | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2013-11-11 10:08:08 +0100 |
commit | 66659ff1ad6831b0ea7425fa6ecd8a8687523658 (patch) | |
tree | 48e22b475fa8854128e0861a33fed6f78c8094b5 /freebsd/sys/net | |
parent | Define __GLOBL1() and __GLOBL() (diff) | |
download | rtems-libbsd-66659ff1ad6831b0ea7425fa6ecd8a8687523658.tar.bz2 |
Update to FreeBSD 9.2
Diffstat (limited to 'freebsd/sys/net')
50 files changed, 1889 insertions, 651 deletions
diff --git a/freebsd/sys/net/bpf.c b/freebsd/sys/net/bpf.c index 6b1e4b8a..55e60e9e 100644 --- a/freebsd/sys/net/bpf.c +++ b/freebsd/sys/net/bpf.c @@ -95,12 +95,16 @@ MALLOC_DEFINE(M_BPF, "BPF", "BPF data"); #define PRINET 26 /* interruptible */ +#define SIZEOF_BPF_HDR(type) \ + (offsetof(type, bh_hdrlen) + sizeof(((type *)0)->bh_hdrlen)) + #ifdef COMPAT_FREEBSD32 #include <sys/mount.h> #include <compat/freebsd32/freebsd32.h> #define BPF_ALIGNMENT32 sizeof(int32_t) #define BPF_WORDALIGN32(x) (((x)+(BPF_ALIGNMENT32-1))&~(BPF_ALIGNMENT32-1)) +#ifndef BURN_BRIDGES /* * 32-bit version of structure prepended to each packet. We use this header * instead of the standard one for 32-bit streams. We mark the a stream as @@ -113,6 +117,7 @@ struct bpf_hdr32 { uint16_t bh_hdrlen; /* length of bpf header (this struct plus alignment padding) */ }; +#endif struct bpf_program32 { u_int bf_len; @@ -125,11 +130,11 @@ struct bpf_dltlist32 { }; #define BIOCSETF32 _IOW('B', 103, struct bpf_program32) -#define BIOCSRTIMEOUT32 _IOW('B',109, struct timeval32) -#define BIOCGRTIMEOUT32 _IOR('B',110, struct timeval32) -#define BIOCGDLTLIST32 _IOWR('B',121, struct bpf_dltlist32) -#define BIOCSETWF32 _IOW('B',123, struct bpf_program32) -#define BIOCSETFNR32 _IOW('B',130, struct bpf_program32) +#define BIOCSRTIMEOUT32 _IOW('B', 109, struct timeval32) +#define BIOCGRTIMEOUT32 _IOR('B', 110, struct timeval32) +#define BIOCGDLTLIST32 _IOWR('B', 121, struct bpf_dltlist32) +#define BIOCSETWF32 _IOW('B', 123, struct bpf_program32) +#define BIOCSETFNR32 _IOW('B', 130, struct bpf_program32) #endif /* @@ -154,7 +159,7 @@ static __inline void bpf_wakeup(struct bpf_d *); static void catchpacket(struct bpf_d *, u_char *, u_int, u_int, void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int), - struct timeval *); + struct bintime *); static void reset_d(struct bpf_d *); static int bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd); static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); @@ -171,7 +176,7 @@ SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW, static int bpf_zerocopy_enable = 0; SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW, &bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions"); -SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW, +static SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW, bpf_stats_sysctl, "bpf statistics portal"); static VNET_DEFINE(int, bpf_optimize_writers) = 0; @@ -198,8 +203,11 @@ static struct cdevsw bpf_cdevsw = { .d_kqfilter = bpfkqfilter, }; -static struct filterops bpfread_filtops = - { 1, NULL, filt_bpfdetach, filt_bpfread }; +static struct filterops bpfread_filtops = { + .f_isfd = 1, + .f_detach = filt_bpfdetach, + .f_event = filt_bpfread, +}; eventhandler_tag bpf_ifdetach_cookie = NULL; @@ -813,6 +821,7 @@ bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td) * particular buffer method. */ bpf_buffer_init(d); + d->bd_hbuf_in_use = 0; d->bd_bufmode = BPF_BUFMODE_BUFFER; d->bd_sig = SIGIO; d->bd_direction = BPF_D_INOUT; @@ -866,6 +875,14 @@ bpfread(struct cdev *dev, struct uio *uio, int ioflag) callout_stop(&d->bd_callout); timed_out = (d->bd_state == BPF_TIMED_OUT); d->bd_state = BPF_IDLE; + while (d->bd_hbuf_in_use) { + error = mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, + PRINET|PCATCH, "bd_hbuf", 0); + if (error != 0) { + BPFD_UNLOCK(d); + return (error); + } + } /* * If the hold buffer is empty, then do a timed sleep, which * ends when the timeout expires or when enough packets @@ -934,24 +951,27 @@ bpfread(struct cdev *dev, struct uio *uio, int ioflag) /* * At this point, we know we have something in the hold slot. */ + d->bd_hbuf_in_use = 1; BPFD_UNLOCK(d); /* * Move data from hold buffer into user space. * We know the entire buffer is transferred since * we checked above that the read buffer is bpf_bufsize bytes. - * - * XXXRW: More synchronization needed here: what if a second thread - * issues a read on the same fd at the same time? Don't want this - * getting invalidated. + * + * We do not have to worry about simultaneous reads because + * we waited for sole access to the hold buffer above. */ error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio); BPFD_LOCK(d); + KASSERT(d->bd_hbuf != NULL, ("bpfread: lost bd_hbuf")); d->bd_fbuf = d->bd_hbuf; d->bd_hbuf = NULL; d->bd_hlen = 0; bpf_buf_reclaimed(d); + d->bd_hbuf_in_use = 0; + wakeup(&d->bd_hbuf_in_use); BPFD_UNLOCK(d); return (error); @@ -1105,6 +1125,9 @@ reset_d(struct bpf_d *d) BPFD_LOCK_ASSERT(d); + while (d->bd_hbuf_in_use) + mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, PRINET, + "bd_hbuf", 0); if ((d->bd_hbuf != NULL) && (d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) { /* Free the hold buffer. */ @@ -1145,6 +1168,8 @@ reset_d(struct bpf_d *d) * BIOCSHDRCMPLT Set "header already complete" flag * BIOCGDIRECTION Get packet direction flag * BIOCSDIRECTION Set packet direction flag + * BIOCGTSTAMP Get time stamp format and resolution. + * BIOCSTSTAMP Set time stamp format and resolution. * BIOCLOCK Set "locked" flag * BIOCFEEDBACK Set packet feedback mode. * BIOCSETZBUF Set current zero-copy buffer locations. @@ -1193,6 +1218,7 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, case BIOCVERSION: case BIOCGRSIG: case BIOCGHDRCMPLT: + case BIOCSTSTAMP: case BIOCFEEDBACK: case FIONREAD: case BIOCLOCK: @@ -1242,6 +1268,9 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, BPFD_LOCK(d); n = d->bd_slen; + while (d->bd_hbuf_in_use) + mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, + PRINET, "bd_hbuf", 0); if (d->bd_hbuf) n += d->bd_hlen; BPFD_UNLOCK(d); @@ -1547,6 +1576,30 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, } break; + /* + * Get packet timestamp format and resolution. + */ + case BIOCGTSTAMP: + BPFD_LOCK(d); + *(u_int *)addr = d->bd_tstamp; + BPFD_UNLOCK(d); + break; + + /* + * Set packet timestamp format and resolution. + */ + case BIOCSTSTAMP: + { + u_int func; + + func = *(u_int *)addr; + if (BPF_T_VALID(func)) + d->bd_tstamp = func; + else + error = EINVAL; + } + break; + case BIOCFEEDBACK: BPFD_LOCK(d); d->bd_feedback = *(u_int *)addr; @@ -1931,6 +1984,9 @@ filt_bpfread(struct knote *kn, long hint) ready = bpf_ready(d); if (ready) { kn->kn_data = d->bd_slen; + while (d->bd_hbuf_in_use) + mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, + PRINET, "bd_hbuf", 0); if (d->bd_hbuf) kn->kn_data += d->bd_hlen; } else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { @@ -1942,6 +1998,48 @@ filt_bpfread(struct knote *kn, long hint) return (ready); } +#define BPF_TSTAMP_NONE 0 +#define BPF_TSTAMP_FAST 1 +#define BPF_TSTAMP_NORMAL 2 +#define BPF_TSTAMP_EXTERN 3 + +static int +bpf_ts_quality(int tstype) +{ + + if (tstype == BPF_T_NONE) + return (BPF_TSTAMP_NONE); + if ((tstype & BPF_T_FAST) != 0) + return (BPF_TSTAMP_FAST); + + return (BPF_TSTAMP_NORMAL); +} + +static int +bpf_gettime(struct bintime *bt, int tstype, struct mbuf *m) +{ + struct m_tag *tag; + int quality; + + quality = bpf_ts_quality(tstype); + if (quality == BPF_TSTAMP_NONE) + return (quality); + + if (m != NULL) { + tag = m_tag_locate(m, MTAG_BPF, MTAG_BPF_TIMESTAMP, NULL); + if (tag != NULL) { + *bt = *(struct bintime *)(tag + 1); + return (BPF_TSTAMP_EXTERN); + } + } + if (quality == BPF_TSTAMP_NORMAL) + binuptime(bt); + else + getbinuptime(bt); + + return (quality); +} + /* * Incoming linkage from device drivers. Process the packet pkt, of length * pktlen, which is stored in a contiguous buffer. The packet is parsed @@ -1951,15 +2049,15 @@ filt_bpfread(struct knote *kn, long hint) void bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) { + struct bintime bt; struct bpf_d *d; #ifdef BPF_JITTER bpf_jit_filter *bf; #endif u_int slen; int gottime; - struct timeval tv; - gottime = 0; + gottime = BPF_TSTAMP_NONE; BPFIF_RLOCK(bp); @@ -1994,15 +2092,13 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) BPFD_LOCK(d); d->bd_fcount++; - if (!gottime) { - microtime(&tv); - gottime = 1; - } + if (gottime < bpf_ts_quality(d->bd_tstamp)) + gottime = bpf_gettime(&bt, d->bd_tstamp, NULL); #ifdef MAC if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) #endif catchpacket(d, pkt, pktlen, slen, - bpf_append_bytes, &tv); + bpf_append_bytes, &bt); BPFD_UNLOCK(d); } } @@ -2020,13 +2116,13 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) void bpf_mtap(struct bpf_if *bp, struct mbuf *m) { + struct bintime bt; struct bpf_d *d; #ifdef BPF_JITTER bpf_jit_filter *bf; #endif u_int pktlen, slen; int gottime; - struct timeval tv; /* Skip outgoing duplicate packets. */ if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) { @@ -2034,9 +2130,8 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m) return; } - gottime = 0; - pktlen = m_length(m, NULL); + gottime = BPF_TSTAMP_NONE; BPFIF_RLOCK(bp); @@ -2056,15 +2151,13 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m) BPFD_LOCK(d); d->bd_fcount++; - if (!gottime) { - microtime(&tv); - gottime = 1; - } + if (gottime < bpf_ts_quality(d->bd_tstamp)) + gottime = bpf_gettime(&bt, d->bd_tstamp, m); #ifdef MAC if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) #endif catchpacket(d, (u_char *)m, pktlen, slen, - bpf_append_mbuf, &tv); + bpf_append_mbuf, &bt); BPFD_UNLOCK(d); } } @@ -2078,11 +2171,11 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m) void bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m) { + struct bintime bt; struct mbuf mb; struct bpf_d *d; u_int pktlen, slen; int gottime; - struct timeval tv; /* Skip outgoing duplicate packets. */ if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) { @@ -2090,8 +2183,6 @@ bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m) return; } - gottime = 0; - pktlen = m_length(m, NULL); /* * Craft on-stack mbuf suitable for passing to bpf_filter. @@ -2103,6 +2194,7 @@ bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m) mb.m_len = dlen; pktlen += dlen; + gottime = BPF_TSTAMP_NONE; BPFIF_RLOCK(bp); @@ -2115,15 +2207,13 @@ bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m) BPFD_LOCK(d); d->bd_fcount++; - if (!gottime) { - microtime(&tv); - gottime = 1; - } + if (gottime < bpf_ts_quality(d->bd_tstamp)) + gottime = bpf_gettime(&bt, d->bd_tstamp, m); #ifdef MAC if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) #endif catchpacket(d, (u_char *)&mb, pktlen, slen, - bpf_append_mbuf, &tv); + bpf_append_mbuf, &bt); BPFD_UNLOCK(d); } } @@ -2132,6 +2222,69 @@ bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m) #undef BPF_CHECK_DIRECTION +#undef BPF_TSTAMP_NONE +#undef BPF_TSTAMP_FAST +#undef BPF_TSTAMP_NORMAL +#undef BPF_TSTAMP_EXTERN + +static int +bpf_hdrlen(struct bpf_d *d) +{ + int hdrlen; + + hdrlen = d->bd_bif->bif_hdrlen; +#ifndef BURN_BRIDGES + if (d->bd_tstamp == BPF_T_NONE || + BPF_T_FORMAT(d->bd_tstamp) == BPF_T_MICROTIME) +#ifdef COMPAT_FREEBSD32 + if (d->bd_compat32) + hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr32); + else +#endif + hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr); + else +#endif + hdrlen += SIZEOF_BPF_HDR(struct bpf_xhdr); +#ifdef COMPAT_FREEBSD32 + if (d->bd_compat32) + hdrlen = BPF_WORDALIGN32(hdrlen); + else +#endif + hdrlen = BPF_WORDALIGN(hdrlen); + + return (hdrlen - d->bd_bif->bif_hdrlen); +} + +static void +bpf_bintime2ts(struct bintime *bt, struct bpf_ts *ts, int tstype) +{ + struct bintime bt2; + struct timeval tsm; + struct timespec tsn; + + if ((tstype & BPF_T_MONOTONIC) == 0) { + bt2 = *bt; + bintime_add(&bt2, &boottimebin); + bt = &bt2; + } + switch (BPF_T_FORMAT(tstype)) { + case BPF_T_MICROTIME: + bintime2timeval(bt, &tsm); + ts->bt_sec = tsm.tv_sec; + ts->bt_frac = tsm.tv_usec; + break; + case BPF_T_NANOTIME: + bintime2timespec(bt, &tsn); + ts->bt_sec = tsn.tv_sec; + ts->bt_frac = tsn.tv_nsec; + break; + case BPF_T_BINTIME: + ts->bt_sec = bt->sec; + ts->bt_frac = bt->frac; + break; + } +} + /* * Move the packet data from interface memory (pkt) into the * store buffer. "cpfn" is the routine called to do the actual data @@ -2142,15 +2295,19 @@ bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m) static void catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int), - struct timeval *tv) + struct bintime *bt) { - struct bpf_hdr hdr; + struct bpf_xhdr hdr; +#ifndef BURN_BRIDGES + struct bpf_hdr hdr_old; #ifdef COMPAT_FREEBSD32 - struct bpf_hdr32 hdr32; + struct bpf_hdr32 hdr32_old; +#endif #endif - int totlen, curlen; - int hdrlen = d->bd_bif->bif_hdrlen; + int caplen, curlen, hdrlen, totlen; int do_wakeup = 0; + int do_timestamp; + int tstype; BPFD_LOCK_ASSERT(d); @@ -2162,6 +2319,9 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, * spot to do it. */ if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) { + while (d->bd_hbuf_in_use) + mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, + PRINET, "bd_hbuf", 0); d->bd_fbuf = d->bd_hbuf; d->bd_hbuf = NULL; d->bd_hlen = 0; @@ -2174,6 +2334,7 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, * much. Otherwise, transfer the whole packet (unless * we hit the buffer size limit). */ + hdrlen = bpf_hdrlen(d); totlen = hdrlen + min(snaplen, pktlen); if (totlen > d->bd_bufsize) totlen = d->bd_bufsize; @@ -2203,6 +2364,9 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, ++d->bd_dcount; return; } + while (d->bd_hbuf_in_use) + mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, + PRINET, "bd_hbuf", 0); ROTATE_BUFFERS(d); do_wakeup = 1; curlen = 0; @@ -2213,19 +2377,39 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, * reader should be woken up. */ do_wakeup = 1; + caplen = totlen - hdrlen; + tstype = d->bd_tstamp; + do_timestamp = tstype != BPF_T_NONE; +#ifndef BURN_BRIDGES + if (tstype == BPF_T_NONE || BPF_T_FORMAT(tstype) == BPF_T_MICROTIME) { + struct bpf_ts ts; + if (do_timestamp) + bpf_bintime2ts(bt, &ts, tstype); #ifdef COMPAT_FREEBSD32 - /* - * If this is a 32-bit stream, then stick a 32-bit header at the - * front and copy the data into the buffer. - */ - if (d->bd_compat32) { - bzero(&hdr32, sizeof(hdr32)); - hdr32.bh_tstamp.tv_sec = tv->tv_sec; - hdr32.bh_tstamp.tv_usec = tv->tv_usec; - hdr32.bh_datalen = pktlen; - hdr32.bh_hdrlen = hdrlen; - hdr.bh_caplen = hdr32.bh_caplen = totlen - hdrlen; - bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr32, sizeof(hdr32)); + if (d->bd_compat32) { + bzero(&hdr32_old, sizeof(hdr32_old)); + if (do_timestamp) { + hdr32_old.bh_tstamp.tv_sec = ts.bt_sec; + hdr32_old.bh_tstamp.tv_usec = ts.bt_frac; + } + hdr32_old.bh_datalen = pktlen; + hdr32_old.bh_hdrlen = hdrlen; + hdr32_old.bh_caplen = caplen; + bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr32_old, + sizeof(hdr32_old)); + goto copy; + } +#endif + bzero(&hdr_old, sizeof(hdr_old)); + if (do_timestamp) { + hdr_old.bh_tstamp.tv_sec = ts.bt_sec; + hdr_old.bh_tstamp.tv_usec = ts.bt_frac; + } + hdr_old.bh_datalen = pktlen; + hdr_old.bh_hdrlen = hdrlen; + hdr_old.bh_caplen = caplen; + bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr_old, + sizeof(hdr_old)); goto copy; } #endif @@ -2235,19 +2419,20 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, * move forward the length of the header plus padding. */ bzero(&hdr, sizeof(hdr)); - hdr.bh_tstamp = *tv; + if (do_timestamp) + bpf_bintime2ts(bt, &hdr.bh_tstamp, tstype); hdr.bh_datalen = pktlen; hdr.bh_hdrlen = hdrlen; - hdr.bh_caplen = totlen - hdrlen; + hdr.bh_caplen = caplen; bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr)); /* * Copy the packet data into the store buffer and update its length. */ -#ifdef COMPAT_FREEBSD32 - copy: +#ifndef BURN_BRIDGES +copy: #endif - (*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, hdr.bh_caplen); + (*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, caplen); d->bd_slen = curlen + totlen; if (do_wakeup) @@ -2318,13 +2503,7 @@ bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next); BPF_UNLOCK(); - /* - * Compute the length of the bpf header. This is not necessarily - * equal to SIZEOF_BPF_HDR because we want to insert spacing such - * that the network layer header begins on a longword boundary (for - * performance reasons and to alleviate alignment restrictions). - */ - bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen; + bp->bif_hdrlen = hdrlen; if (bootverbose) if_printf(ifp, "bpf attached\n"); diff --git a/freebsd/sys/net/bpf.h b/freebsd/sys/net/bpf.h index 004815ad..e362f161 100644 --- a/freebsd/sys/net/bpf.h +++ b/freebsd/sys/net/bpf.h @@ -45,6 +45,8 @@ typedef int32_t bpf_int32; typedef u_int32_t bpf_u_int32; +typedef int64_t bpf_int64; +typedef u_int64_t bpf_u_int64; /* * Alignment macros. BPF_WORDALIGN rounds up to the next @@ -113,36 +115,38 @@ struct bpf_zbuf { size_t bz_buflen; /* Size of zero-copy buffers. */ }; -#define BIOCGBLEN _IOR('B',102, u_int) -#define BIOCSBLEN _IOWR('B',102, u_int) -#define BIOCSETF _IOW('B',103, struct bpf_program) -#define BIOCFLUSH _IO('B',104) -#define BIOCPROMISC _IO('B',105) -#define BIOCGDLT _IOR('B',106, u_int) -#define BIOCGETIF _IOR('B',107, struct ifreq) -#define BIOCSETIF _IOW('B',108, struct ifreq) -#define BIOCSRTIMEOUT _IOW('B',109, struct timeval) -#define BIOCGRTIMEOUT _IOR('B',110, struct timeval) -#define BIOCGSTATS _IOR('B',111, struct bpf_stat) -#define BIOCIMMEDIATE _IOW('B',112, u_int) -#define BIOCVERSION _IOR('B',113, struct bpf_version) -#define BIOCGRSIG _IOR('B',114, u_int) -#define BIOCSRSIG _IOW('B',115, u_int) -#define BIOCGHDRCMPLT _IOR('B',116, u_int) -#define BIOCSHDRCMPLT _IOW('B',117, u_int) -#define BIOCGDIRECTION _IOR('B',118, u_int) -#define BIOCSDIRECTION _IOW('B',119, u_int) -#define BIOCSDLT _IOW('B',120, u_int) -#define BIOCGDLTLIST _IOWR('B',121, struct bpf_dltlist) +#define BIOCGBLEN _IOR('B', 102, u_int) +#define BIOCSBLEN _IOWR('B', 102, u_int) +#define BIOCSETF _IOW('B', 103, struct bpf_program) +#define BIOCFLUSH _IO('B', 104) +#define BIOCPROMISC _IO('B', 105) +#define BIOCGDLT _IOR('B', 106, u_int) +#define BIOCGETIF _IOR('B', 107, struct ifreq) +#define BIOCSETIF _IOW('B', 108, struct ifreq) +#define BIOCSRTIMEOUT _IOW('B', 109, struct timeval) +#define BIOCGRTIMEOUT _IOR('B', 110, struct timeval) +#define BIOCGSTATS _IOR('B', 111, struct bpf_stat) +#define BIOCIMMEDIATE _IOW('B', 112, u_int) +#define BIOCVERSION _IOR('B', 113, struct bpf_version) +#define BIOCGRSIG _IOR('B', 114, u_int) +#define BIOCSRSIG _IOW('B', 115, u_int) +#define BIOCGHDRCMPLT _IOR('B', 116, u_int) +#define BIOCSHDRCMPLT _IOW('B', 117, u_int) +#define BIOCGDIRECTION _IOR('B', 118, u_int) +#define BIOCSDIRECTION _IOW('B', 119, u_int) +#define BIOCSDLT _IOW('B', 120, u_int) +#define BIOCGDLTLIST _IOWR('B', 121, struct bpf_dltlist) #define BIOCLOCK _IO('B', 122) -#define BIOCSETWF _IOW('B',123, struct bpf_program) -#define BIOCFEEDBACK _IOW('B',124, u_int) -#define BIOCGETBUFMODE _IOR('B',125, u_int) -#define BIOCSETBUFMODE _IOW('B',126, u_int) -#define BIOCGETZMAX _IOR('B',127, size_t) -#define BIOCROTZBUF _IOR('B',128, struct bpf_zbuf) -#define BIOCSETZBUF _IOW('B',129, struct bpf_zbuf) -#define BIOCSETFNR _IOW('B',130, struct bpf_program) +#define BIOCSETWF _IOW('B', 123, struct bpf_program) +#define BIOCFEEDBACK _IOW('B', 124, u_int) +#define BIOCGETBUFMODE _IOR('B', 125, u_int) +#define BIOCSETBUFMODE _IOW('B', 126, u_int) +#define BIOCGETZMAX _IOR('B', 127, size_t) +#define BIOCROTZBUF _IOR('B', 128, struct bpf_zbuf) +#define BIOCSETZBUF _IOW('B', 129, struct bpf_zbuf) +#define BIOCSETFNR _IOW('B', 130, struct bpf_program) +#define BIOCGTSTAMP _IOR('B', 131, u_int) +#define BIOCSTSTAMP _IOW('B', 132, u_int) /* Obsolete */ #define BIOCGSEESENT BIOCGDIRECTION @@ -155,9 +159,48 @@ enum bpf_direction { BPF_D_OUT /* See outgoing packets */ }; +/* Time stamping functions */ +#define BPF_T_MICROTIME 0x0000 +#define BPF_T_NANOTIME 0x0001 +#define BPF_T_BINTIME 0x0002 +#define BPF_T_NONE 0x0003 +#define BPF_T_FORMAT_MASK 0x0003 +#define BPF_T_NORMAL 0x0000 +#define BPF_T_FAST 0x0100 +#define BPF_T_MONOTONIC 0x0200 +#define BPF_T_MONOTONIC_FAST (BPF_T_FAST | BPF_T_MONOTONIC) +#define BPF_T_FLAG_MASK 0x0300 +#define BPF_T_FORMAT(t) ((t) & BPF_T_FORMAT_MASK) +#define BPF_T_FLAG(t) ((t) & BPF_T_FLAG_MASK) +#define BPF_T_VALID(t) \ + ((t) == BPF_T_NONE || (BPF_T_FORMAT(t) != BPF_T_NONE && \ + ((t) & ~(BPF_T_FORMAT_MASK | BPF_T_FLAG_MASK)) == 0)) + +#define BPF_T_MICROTIME_FAST (BPF_T_MICROTIME | BPF_T_FAST) +#define BPF_T_NANOTIME_FAST (BPF_T_NANOTIME | BPF_T_FAST) +#define BPF_T_BINTIME_FAST (BPF_T_BINTIME | BPF_T_FAST) +#define BPF_T_MICROTIME_MONOTONIC (BPF_T_MICROTIME | BPF_T_MONOTONIC) +#define BPF_T_NANOTIME_MONOTONIC (BPF_T_NANOTIME | BPF_T_MONOTONIC) +#define BPF_T_BINTIME_MONOTONIC (BPF_T_BINTIME | BPF_T_MONOTONIC) +#define BPF_T_MICROTIME_MONOTONIC_FAST (BPF_T_MICROTIME | BPF_T_MONOTONIC_FAST) +#define BPF_T_NANOTIME_MONOTONIC_FAST (BPF_T_NANOTIME | BPF_T_MONOTONIC_FAST) +#define BPF_T_BINTIME_MONOTONIC_FAST (BPF_T_BINTIME | BPF_T_MONOTONIC_FAST) + /* * Structure prepended to each packet. */ +struct bpf_ts { + bpf_int64 bt_sec; /* seconds */ + bpf_u_int64 bt_frac; /* fraction */ +}; +struct bpf_xhdr { + struct bpf_ts bh_tstamp; /* time stamp */ + bpf_u_int32 bh_caplen; /* length of captured portion */ + bpf_u_int32 bh_datalen; /* original length of packet */ + u_short bh_hdrlen; /* length of bpf header (this struct + plus alignment padding) */ +}; +/* Obsolete */ struct bpf_hdr { struct timeval bh_tstamp; /* time stamp */ bpf_u_int32 bh_caplen; /* length of captured portion */ @@ -165,14 +208,9 @@ struct bpf_hdr { u_short bh_hdrlen; /* length of bpf header (this struct plus alignment padding) */ }; -/* - * Because the structure above is not a multiple of 4 bytes, some compilers - * will insist on inserting padding; hence, sizeof(struct bpf_hdr) won't work. - * Only the kernel needs to know about it; applications use bh_hdrlen. - */ #ifdef _KERNEL -#define SIZEOF_BPF_HDR (sizeof(struct bpf_hdr) <= 20 ? 18 : \ - sizeof(struct bpf_hdr)) +#define MTAG_BPF 0x627066 +#define MTAG_BPF_TIMESTAMP 0 #endif /* @@ -241,6 +279,24 @@ struct bpf_zbuf_header { */ #define DLT_SYMANTEC_FIREWALL 99 +/* + * Values between 100 and 103 are used in capture file headers as + * link-layer header type LINKTYPE_ values corresponding to DLT_ types + * that differ between platforms; don't use those values for new DLT_ + * new types. + */ + +/* + * Values starting with 104 are used for newly-assigned link-layer + * header type values; for those link-layer header types, the DLT_ + * value returned by pcap_datalink() and passed to pcap_open_dead(), + * and the LINKTYPE_ value that appears in capture files, are the + * same. + * + * DLT_MATCHING_MIN is the lowest such value; DLT_MATCHING_MAX is + * the highest such value. + */ +#define DLT_MATCHING_MIN 104 /* * This value was defined by libpcap 0.5; platforms that have defined @@ -806,6 +862,281 @@ struct bpf_zbuf_header { */ #define DLT_IEEE802_15_4_NONASK_PHY 215 +/* + * David Gibson <david@gibson.dropbear.id.au> requested this for + * captures from the Linux kernel /dev/input/eventN devices. This + * is used to communicate keystrokes and mouse movements from the + * Linux kernel to display systems, such as Xorg. + */ +#define DLT_LINUX_EVDEV 216 + +/* + * GSM Um and Abis interfaces, preceded by a "gsmtap" header. + * + * Requested by Harald Welte <laforge@gnumonks.org>. + */ +#define DLT_GSMTAP_UM 217 +#define DLT_GSMTAP_ABIS 218 + +/* + * MPLS, with an MPLS label as the link-layer header. + * Requested by Michele Marchetto <michele@openbsd.org> on behalf + * of OpenBSD. + */ +#define DLT_MPLS 219 + +/* + * USB packets, beginning with a Linux USB header, with the USB header + * padded to 64 bytes; required for memory-mapped access. + */ +#define DLT_USB_LINUX_MMAPPED 220 + +/* + * DECT packets, with a pseudo-header; requested by + * Matthias Wenzel <tcpdump@mazzoo.de>. + */ +#define DLT_DECT 221 +/* + * From: "Lidwa, Eric (GSFC-582.0)[SGT INC]" <eric.lidwa-1@nasa.gov> + * Date: Mon, 11 May 2009 11:18:30 -0500 + * + * DLT_AOS. We need it for AOS Space Data Link Protocol. + * I have already written dissectors for but need an OK from + * legal before I can submit a patch. + * + */ +#define DLT_AOS 222 + +/* + * Wireless HART (Highway Addressable Remote Transducer) + * From the HART Communication Foundation + * IES/PAS 62591 + * + * Requested by Sam Roberts <vieuxtech@gmail.com>. + */ +#define DLT_WIHART 223 + +/* + * Fibre Channel FC-2 frames, beginning with a Frame_Header. + * Requested by Kahou Lei <kahou82@gmail.com>. + */ +#define DLT_FC_2 224 + +/* + * Fibre Channel FC-2 frames, beginning with an encoding of the + * SOF, and ending with an encoding of the EOF. + * + * The encodings represent the frame delimiters as 4-byte sequences + * representing the corresponding ordered sets, with K28.5 + * represented as 0xBC, and the D symbols as the corresponding + * byte values; for example, SOFi2, which is K28.5 - D21.5 - D1.2 - D21.2, + * is represented as 0xBC 0xB5 0x55 0x55. + * + * Requested by Kahou Lei <kahou82@gmail.com>. + */ +#define DLT_FC_2_WITH_FRAME_DELIMS 225 +/* + * Solaris ipnet pseudo-header; requested by Darren Reed <Darren.Reed@Sun.COM>. + * + * The pseudo-header starts with a one-byte version number; for version 2, + * the pseudo-header is: + * + * struct dl_ipnetinfo { + * u_int8_t dli_version; + * u_int8_t dli_family; + * u_int16_t dli_htype; + * u_int32_t dli_pktlen; + * u_int32_t dli_ifindex; + * u_int32_t dli_grifindex; + * u_int32_t dli_zsrc; + * u_int32_t dli_zdst; + * }; + * + * dli_version is 2 for the current version of the pseudo-header. + * + * dli_family is a Solaris address family value, so it's 2 for IPv4 + * and 26 for IPv6. + * + * dli_htype is a "hook type" - 0 for incoming packets, 1 for outgoing + * packets, and 2 for packets arriving from another zone on the same + * machine. + * + * dli_pktlen is the length of the packet data following the pseudo-header + * (so the captured length minus dli_pktlen is the length of the + * pseudo-header, assuming the entire pseudo-header was captured). + * + * dli_ifindex is the interface index of the interface on which the + * packet arrived. + * + * dli_grifindex is the group interface index number (for IPMP interfaces). + * + * dli_zsrc is the zone identifier for the source of the packet. + * + * dli_zdst is the zone identifier for the destination of the packet. + * + * A zone number of 0 is the global zone; a zone number of 0xffffffff + * means that the packet arrived from another host on the network, not + * from another zone on the same machine. + * + * An IPv4 or IPv6 datagram follows the pseudo-header; dli_family indicates + * which of those it is. + */ +#define DLT_IPNET 226 + +/* + * CAN (Controller Area Network) frames, with a pseudo-header as supplied + * by Linux SocketCAN. See Documentation/networking/can.txt in the Linux + * source. + * + * Requested by Felix Obenhuber <felix@obenhuber.de>. + */ +#define DLT_CAN_SOCKETCAN 227 + +/* + * Raw IPv4/IPv6; different from DLT_RAW in that the DLT_ value specifies + * whether it's v4 or v6. Requested by Darren Reed <Darren.Reed@Sun.COM>. + */ +#define DLT_IPV4 228 +#define DLT_IPV6 229 + +/* + * IEEE 802.15.4, exactly as it appears in the spec (no padding, no + * nothing), and with no FCS at the end of the frame; requested by + * Jon Smirl <jonsmirl@gmail.com>. + */ +#define DLT_IEEE802_15_4_NOFCS 230 + +/* + * Raw D-Bus: + * + * http://www.freedesktop.org/wiki/Software/dbus + * + * messages: + * + * http://dbus.freedesktop.org/doc/dbus-specification.html#message-protocol-messages + * + * starting with the endianness flag, followed by the message type, etc., + * but without the authentication handshake before the message sequence: + * + * http://dbus.freedesktop.org/doc/dbus-specification.html#auth-protocol + * + * Requested by Martin Vidner <martin@vidner.net>. + */ +#define DLT_DBUS 231 + +/* + * Juniper-private data link type, as per request from + * Hannes Gredler <hannes@juniper.net>. + */ +#define DLT_JUNIPER_VS 232 +#define DLT_JUNIPER_SRX_E2E 233 +#define DLT_JUNIPER_FIBRECHANNEL 234 + +/* + * DVB-CI (DVB Common Interface for communication between a PC Card + * module and a DVB receiver). See + * + * http://www.kaiser.cx/pcap-dvbci.html + * + * for the specification. + * + * Requested by Martin Kaiser <martin@kaiser.cx>. + */ +#define DLT_DVB_CI 235 + +/* + * Variant of 3GPP TS 27.010 multiplexing protocol (similar to, but + * *not* the same as, 27.010). Requested by Hans-Christoph Schemmel + * <hans-christoph.schemmel@cinterion.com>. + */ +#define DLT_MUX27010 236 + +/* + * STANAG 5066 D_PDUs. Requested by M. Baris Demiray + * <barisdemiray@gmail.com>. + */ +#define DLT_STANAG_5066_D_PDU 237 + +/* + * Juniper-private data link type, as per request from + * Hannes Gredler <hannes@juniper.net>. + */ +#define DLT_JUNIPER_ATM_CEMIC 238 + +/* + * NetFilter LOG messages + * (payload of netlink NFNL_SUBSYS_ULOG/NFULNL_MSG_PACKET packets) + * + * Requested by Jakub Zawadzki <darkjames-ws@darkjames.pl> + */ +#define DLT_NFLOG 239 + +/* + * Hilscher Gesellschaft fuer Systemautomation mbH link-layer type + * for Ethernet packets with a 4-byte pseudo-header and always + * with the payload including the FCS, as supplied by their + * netANALYZER hardware and software. + * + * Requested by Holger P. Frommer <HPfrommer@hilscher.com> + */ +#define DLT_NETANALYZER 240 + +/* + * Hilscher Gesellschaft fuer Systemautomation mbH link-layer type + * for Ethernet packets with a 4-byte pseudo-header and FCS and + * with the Ethernet header preceded by 7 bytes of preamble and + * 1 byte of SFD, as supplied by their netANALYZER hardware and + * software. + * + * Requested by Holger P. Frommer <HPfrommer@hilscher.com> + */ +#define DLT_NETANALYZER_TRANSPARENT 241 + +/* + * IP-over-Infiniband, as specified by RFC 4391. + * + * Requested by Petr Sumbera <petr.sumbera@oracle.com>. + */ +#define DLT_IPOIB 242 + +/* + * MPEG-2 transport stream (ISO 13818-1/ITU-T H.222.0). + * + * Requested by Guy Martin <gmsoft@tuxicoman.be>. + */ +#define DLT_MPEG_2_TS 243 + +/* + * ng4T GmbH's UMTS Iub/Iur-over-ATM and Iub/Iur-over-IP format as + * used by their ng40 protocol tester. + * + * Requested by Jens Grimmer <jens.grimmer@ng4t.com>. + */ +#define DLT_NG40 244 + +/* + * Pseudo-header giving adapter number and flags, followed by an NFC + * (Near-Field Communications) Logical Link Control Protocol (LLCP) PDU, + * as specified by NFC Forum Logical Link Control Protocol Technical + * Specification LLCP 1.1. + * + * Requested by Mike Wakerly <mikey@google.com>. + */ +#define DLT_NFC_LLCP 245 + +/* + * 245 is used as LINKTYPE_PFSYNC; do not use it for any other purpose. + * + * DLT_PFSYNC has different values on different platforms, and all of + * them collide with something used elsewhere. On platforms that + * don't already define it, define it as 245. + */ +#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && !defined(__DragonFly__) && !defined(__APPLE__) +#define DLT_PFSYNC 246 +#endif + +#define DLT_MATCHING_MAX 246 /* highest value in the "matching" range */ + /* * DLT and savefile link type values are split into a class and * a member of that class. A class value of 0 indicates a regular @@ -904,7 +1235,8 @@ SYSCTL_DECL(_net_bpf); /* * Rotate the packet buffers in descriptor d. Move the store buffer into the * hold slot, and the free buffer ino the store slot. Zero the length of the - * new store buffer. Descriptor lock should be held. + * new store buffer. Descriptor lock should be held. Hold buffer must + * not be marked "in use". */ #define ROTATE_BUFFERS(d) do { \ (d)->bd_hbuf = (d)->bd_sbuf; \ @@ -926,7 +1258,7 @@ struct bpf_if { LIST_HEAD(, bpf_d) bif_dlist; /* descriptor list */ #ifdef BPF_INTERNAL u_int bif_dlt; /* link layer type */ - u_int bif_hdrlen; /* length of header (with padding) */ + u_int bif_hdrlen; /* length of link header */ struct ifnet *bif_ifp; /* corresponding interface */ struct rwlock bif_lock; /* interface lock */ LIST_HEAD(, bpf_d) bif_wlist; /* writer-only list */ diff --git a/freebsd/sys/net/bpf_buffer.c b/freebsd/sys/net/bpf_buffer.c index 910bcd0c..ec6aed74 100644 --- a/freebsd/sys/net/bpf_buffer.c +++ b/freebsd/sys/net/bpf_buffer.c @@ -81,6 +81,8 @@ __FBSDID("$FreeBSD$"); #include <net/bpf_buffer.h> #include <net/bpfdesc.h> +#define PRINET 26 /* interruptible */ + /* * Implement historical kernel memory buffering model for BPF: two malloc(9) * kernel buffers are hung off of the descriptor. The size is fixed prior to @@ -90,10 +92,10 @@ __FBSDID("$FreeBSD$"); static int bpf_bufsize = 4096; SYSCTL_INT(_net_bpf, OID_AUTO, bufsize, CTLFLAG_RW, - &bpf_bufsize, 0, "Maximum capture buffer size in bytes"); + &bpf_bufsize, 0, "Default capture buffer size in bytes"); static int bpf_maxbufsize = BPF_MAXBUFSIZE; SYSCTL_INT(_net_bpf, OID_AUTO, maxbufsize, CTLFLAG_RW, - &bpf_maxbufsize, 0, "Default capture buffer in bytes"); + &bpf_maxbufsize, 0, "Maximum capture buffer in bytes"); /* * Simple data copy to the current kernel buffer. @@ -191,6 +193,9 @@ bpf_buffer_ioctl_sblen(struct bpf_d *d, u_int *i) return (EINVAL); } + while (d->bd_hbuf_in_use) + mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, + PRINET, "bd_hbuf", 0); /* Free old buffers if set */ if (d->bd_fbuf != NULL) free(d->bd_fbuf, M_BPF); diff --git a/freebsd/sys/net/bpf_filter.c b/freebsd/sys/net/bpf_filter.c index 3e310000..a313f4bd 100644 --- a/freebsd/sys/net/bpf_filter.c +++ b/freebsd/sys/net/bpf_filter.c @@ -179,6 +179,8 @@ bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen) bpf_u_int32 k; u_int32_t mem[BPF_MEMWORDS]; + bzero(mem, sizeof(mem)); + if (pc == NULL) /* * No filter means accept all. diff --git a/freebsd/sys/net/bpf_jitter.c b/freebsd/sys/net/bpf_jitter.c index cd4d7d2b..e3d91e85 100644 --- a/freebsd/sys/net/bpf_jitter.c +++ b/freebsd/sys/net/bpf_jitter.c @@ -2,7 +2,7 @@ /*- * Copyright (C) 2002-2003 NetGroup, Politecnico di Torino (Italy) - * Copyright (C) 2005-2008 Jung-uk Kim <jkim@FreeBSD.org> + * Copyright (C) 2005-2009 Jung-uk Kim <jkim@FreeBSD.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -44,14 +44,15 @@ __FBSDID("$FreeBSD$"); #include <sys/sysctl.h> #else #include <stdlib.h> -#include <string.h> +#include <sys/mman.h> +#include <rtems/bsd/sys/param.h> #include <rtems/bsd/sys/types.h> #endif #include <net/bpf.h> #include <net/bpf_jitter.h> -bpf_filter_func bpf_jit_compile(struct bpf_insn *, u_int, int *); +bpf_filter_func bpf_jit_compile(struct bpf_insn *, u_int, size_t *); static u_int bpf_jit_accept_all(u_char *, u_int, u_int); @@ -62,27 +63,36 @@ SYSCTL_NODE(_net, OID_AUTO, bpf_jitter, CTLFLAG_RW, 0, "BPF JIT compiler"); int bpf_jitter_enable = 1; SYSCTL_INT(_net_bpf_jitter, OID_AUTO, enable, CTLFLAG_RW, &bpf_jitter_enable, 0, "enable BPF JIT compiler"); +#endif bpf_jit_filter * bpf_jitter(struct bpf_insn *fp, int nins) { bpf_jit_filter *filter; - /* Allocate the filter structure */ + /* Allocate the filter structure. */ +#ifdef _KERNEL filter = (struct bpf_jit_filter *)malloc(sizeof(*filter), - M_BPFJIT, M_NOWAIT | M_ZERO); + M_BPFJIT, M_NOWAIT); +#else + filter = (struct bpf_jit_filter *)malloc(sizeof(*filter)); +#endif if (filter == NULL) return (NULL); - /* No filter means accept all */ + /* No filter means accept all. */ if (fp == NULL || nins == 0) { filter->func = bpf_jit_accept_all; return (filter); } - /* Create the binary */ - if ((filter->func = bpf_jit_compile(fp, nins, filter->mem)) == NULL) { + /* Create the binary. */ + if ((filter->func = bpf_jit_compile(fp, nins, &filter->size)) == NULL) { +#ifdef _KERNEL free(filter, M_BPFJIT); +#else + free(filter); +#endif return (NULL); } @@ -93,46 +103,16 @@ void bpf_destroy_jit_filter(bpf_jit_filter *filter) { +#ifdef _KERNEL if (filter->func != bpf_jit_accept_all) free(filter->func, M_BPFJIT); free(filter, M_BPFJIT); -} #else -bpf_jit_filter * -bpf_jitter(struct bpf_insn *fp, int nins) -{ - bpf_jit_filter *filter; - - /* Allocate the filter structure */ - filter = (struct bpf_jit_filter *)malloc(sizeof(*filter)); - if (filter == NULL) - return (NULL); - memset(filter, 0, sizeof(*filter)); - - /* No filter means accept all */ - if (fp == NULL || nins == 0) { - filter->func = bpf_jit_accept_all; - return (filter); - } - - /* Create the binary */ - if ((filter->func = bpf_jit_compile(fp, nins, filter->mem)) == NULL) { - free(filter); - return (NULL); - } - - return (filter); -} - -void -bpf_destroy_jit_filter(bpf_jit_filter *filter) -{ - if (filter->func != bpf_jit_accept_all) - free(filter->func); + munmap(filter->func, filter->size); free(filter); -} #endif +} static u_int bpf_jit_accept_all(__unused u_char *p, __unused u_int wirelen, diff --git a/freebsd/sys/net/bpf_jitter.h b/freebsd/sys/net/bpf_jitter.h index 04491b01..90a1ff5f 100644 --- a/freebsd/sys/net/bpf_jitter.h +++ b/freebsd/sys/net/bpf_jitter.h @@ -1,6 +1,6 @@ /*- * Copyright (C) 2002-2003 NetGroup, Politecnico di Torino (Italy) - * Copyright (C) 2005-2008 Jung-uk Kim <jkim@FreeBSD.org> + * Copyright (C) 2005-2009 Jung-uk Kim <jkim@FreeBSD.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -53,8 +53,7 @@ typedef u_int (*bpf_filter_func)(u_char *, u_int, u_int); typedef struct bpf_jit_filter { /* The native filtering binary, in the form of a bpf_filter_func. */ bpf_filter_func func; - - int mem[BPF_MEMWORDS]; /* Scratch memory */ + size_t size; } bpf_jit_filter; /* diff --git a/freebsd/sys/net/bpfdesc.h b/freebsd/sys/net/bpfdesc.h index c3265ce1..496f0b36 100644 --- a/freebsd/sys/net/bpfdesc.h +++ b/freebsd/sys/net/bpfdesc.h @@ -63,6 +63,7 @@ struct bpf_d { caddr_t bd_sbuf; /* store slot */ caddr_t bd_hbuf; /* hold slot */ caddr_t bd_fbuf; /* free slot */ + int bd_hbuf_in_use; /* don't rotate buffers */ int bd_slen; /* current length of store buffer */ int bd_hlen; /* current length of hold buffer */ @@ -82,6 +83,7 @@ struct bpf_d { u_char bd_writer; /* non-zero if d is writer-only */ int bd_hdrcmplt; /* false to fill in src lladdr automatically */ int bd_direction; /* select packet direction */ + int bd_tstamp; /* select time stamping function */ int bd_feedback; /* true to feed back sent packets */ int bd_async; /* non-zero if packet reception should generate signal */ int bd_sig; /* signal to send upon packet reception */ diff --git a/freebsd/sys/net/flowtable.h b/freebsd/sys/net/flowtable.h index 6e79a3cf..d810fa33 100644 --- a/freebsd/sys/net/flowtable.h +++ b/freebsd/sys/net/flowtable.h @@ -37,6 +37,7 @@ $FreeBSD$ #define FL_HASH_ALL (1<<0) /* hash 4-tuple + protocol */ #define FL_PCPU (1<<1) /* pcpu cache */ #define FL_NOAUTO (1<<2) /* don't automatically add flentry on miss */ +#define FL_IPV6 (1<<9) #define FL_TCP (1<<11) #define FL_SCTP (1<<12) diff --git a/freebsd/sys/net/ieee8023ad_lacp.c b/freebsd/sys/net/ieee8023ad_lacp.c index 92e705b0..a1c1e49e 100644 --- a/freebsd/sys/net/ieee8023ad_lacp.c +++ b/freebsd/sys/net/ieee8023ad_lacp.c @@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$"); #include <sys/kernel.h> /* hz */ #include <sys/socket.h> /* for net/if.h */ #include <sys/sockio.h> +#include <sys/sysctl.h> #include <machine/stdarg.h> #include <rtems/bsd/sys/lock.h> #include <sys/rwlock.h> @@ -170,7 +171,8 @@ static void lacp_enable_distributing(struct lacp_port *); static int lacp_xmit_lacpdu(struct lacp_port *); static int lacp_xmit_marker(struct lacp_port *); -#if defined(LACP_DEBUG) +/* Debugging */ + static void lacp_dump_lacpdu(const struct lacpdu *); static const char *lacp_format_partner(const struct lacp_peerinfo *, char *, size_t); @@ -186,10 +188,14 @@ static const char *lacp_format_portid(const struct lacp_portid *, char *, size_t); static void lacp_dprintf(const struct lacp_port *, const char *, ...) __attribute__((__format__(__printf__, 2, 3))); -#define LACP_DPRINTF(a) lacp_dprintf a -#else -#define LACP_DPRINTF(a) /* nothing */ -#endif + +static int lacp_debug = 0; +SYSCTL_INT(_net, OID_AUTO, lacp_debug, CTLFLAG_RW | CTLFLAG_TUN, + &lacp_debug, 0, "Enable LACP debug logging (1=debug, 2=trace)"); +TUNABLE_INT("net.lacp_debug", &lacp_debug); + +#define LACP_DPRINTF(a) if (lacp_debug > 0) { lacp_dprintf a ; } +#define LACP_TRACE(a) if (lacp_debug > 1) { lacp_dprintf(a,"%s\n",__func__); } /* * partner administration variables. @@ -292,10 +298,10 @@ lacp_pdu_input(struct lacp_port *lp, struct mbuf *m) goto bad; } -#if defined(LACP_DEBUG) - LACP_DPRINTF((lp, "lacpdu receive\n")); - lacp_dump_lacpdu(du); -#endif /* defined(LACP_DEBUG) */ + if (lacp_debug > 0) { + lacp_dprintf(lp, "lacpdu receive\n"); + lacp_dump_lacpdu(du); + } LACP_LOCK(lsc); lacp_sm_rx(lp, du); @@ -372,10 +378,10 @@ lacp_xmit_lacpdu(struct lacp_port *lp) sizeof(du->ldu_collector)); du->ldu_collector.lci_maxdelay = 0; -#if defined(LACP_DEBUG) - LACP_DPRINTF((lp, "lacpdu transmit\n")); - lacp_dump_lacpdu(du); -#endif /* defined(LACP_DEBUG) */ + if (lacp_debug > 0) { + lacp_dprintf(lp, "lacpdu transmit\n"); + lacp_dump_lacpdu(du); + } m->m_flags |= M_MCAST; @@ -649,9 +655,7 @@ lacp_disable_distributing(struct lacp_port *lp) { struct lacp_aggregator *la = lp->lp_aggregator; struct lacp_softc *lsc = lp->lp_lsc; -#if defined(LACP_DEBUG) char buf[LACP_LAGIDSTR_MAX+1]; -#endif /* defined(LACP_DEBUG) */ LACP_LOCK_ASSERT(lsc); @@ -686,9 +690,7 @@ lacp_enable_distributing(struct lacp_port *lp) { struct lacp_aggregator *la = lp->lp_aggregator; struct lacp_softc *lsc = lp->lp_lsc; -#if defined(LACP_DEBUG) char buf[LACP_LAGIDSTR_MAX+1]; -#endif /* defined(LACP_DEBUG) */ LACP_LOCK_ASSERT(lsc); @@ -722,7 +724,8 @@ lacp_transit_expire(void *vp) LACP_LOCK_ASSERT(lsc); - LACP_DPRINTF((NULL, "%s\n", __func__)); + LACP_TRACE(NULL); + lsc->lsc_suppress_distributing = FALSE; } @@ -840,7 +843,8 @@ lacp_suppress_distributing(struct lacp_softc *lsc, struct lacp_aggregator *la) return; } - LACP_DPRINTF((NULL, "%s\n", __func__)); + LACP_TRACE(NULL); + lsc->lsc_suppress_distributing = TRUE; /* send a marker frame down each port to verify the queues are empty */ @@ -910,11 +914,9 @@ lacp_select_active_aggregator(struct lacp_softc *lsc) struct lacp_aggregator *la; struct lacp_aggregator *best_la = NULL; uint64_t best_speed = 0; -#if defined(LACP_DEBUG) char buf[LACP_LAGIDSTR_MAX+1]; -#endif /* defined(LACP_DEBUG) */ - LACP_DPRINTF((NULL, "%s:\n", __func__)); + LACP_TRACE(NULL); TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) { uint64_t speed; @@ -948,7 +950,6 @@ lacp_select_active_aggregator(struct lacp_softc *lsc) KASSERT(best_la == NULL || !TAILQ_EMPTY(&best_la->la_ports), ("invalid aggregator list")); -#if defined(LACP_DEBUG) if (lsc->lsc_active_aggregator != best_la) { LACP_DPRINTF((NULL, "active aggregator changed\n")); LACP_DPRINTF((NULL, "old %s\n", @@ -959,7 +960,6 @@ lacp_select_active_aggregator(struct lacp_softc *lsc) } LACP_DPRINTF((NULL, "new %s\n", lacp_format_lagid_aggregator(best_la, buf, sizeof(buf)))); -#endif /* defined(LACP_DEBUG) */ if (lsc->lsc_active_aggregator != best_la) { sc->sc_ifp->if_baudrate = best_speed; @@ -1042,9 +1042,7 @@ lacp_compose_key(struct lacp_port *lp) static void lacp_aggregator_addref(struct lacp_softc *lsc, struct lacp_aggregator *la) { -#if defined(LACP_DEBUG) char buf[LACP_LAGIDSTR_MAX+1]; -#endif LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n", __func__, @@ -1060,9 +1058,7 @@ lacp_aggregator_addref(struct lacp_softc *lsc, struct lacp_aggregator *la) static void lacp_aggregator_delref(struct lacp_softc *lsc, struct lacp_aggregator *la) { -#if defined(LACP_DEBUG) char buf[LACP_LAGIDSTR_MAX+1]; -#endif LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n", __func__, @@ -1197,9 +1193,7 @@ lacp_select(struct lacp_port *lp) { struct lacp_softc *lsc = lp->lp_lsc; struct lacp_aggregator *la; -#if defined(LACP_DEBUG) char buf[LACP_LAGIDSTR_MAX+1]; -#endif if (lp->lp_aggregator) { return; @@ -1280,7 +1274,8 @@ lacp_sm_mux(struct lacp_port *lp) enum lacp_selected selected = lp->lp_selected; struct lacp_aggregator *la; - /* LACP_DPRINTF((lp, "%s: state %d\n", __func__, lp->lp_mux_state)); */ + if (lacp_debug > 1) + lacp_dprintf(lp, "%s: state %d\n", __func__, lp->lp_mux_state); re_eval: la = lp->lp_aggregator; @@ -1389,9 +1384,7 @@ static void lacp_sm_mux_timer(struct lacp_port *lp) { struct lacp_aggregator *la = lp->lp_aggregator; -#if defined(LACP_DEBUG) char buf[LACP_LAGIDSTR_MAX+1]; -#endif KASSERT(la->la_pending > 0, ("no pending event")); @@ -1539,11 +1532,9 @@ lacp_sm_rx_record_pdu(struct lacp_port *lp, const struct lacpdu *du) { boolean_t active; uint8_t oldpstate; -#if defined(LACP_DEBUG) char buf[LACP_STATESTR_MAX+1]; -#endif - /* LACP_DPRINTF((lp, "%s\n", __func__)); */ + LACP_TRACE(lp); oldpstate = lp->lp_partner.lip_state; @@ -1578,7 +1569,8 @@ lacp_sm_rx_record_pdu(struct lacp_port *lp, const struct lacpdu *du) static void lacp_sm_rx_update_ntt(struct lacp_port *lp, const struct lacpdu *du) { - /* LACP_DPRINTF((lp, "%s\n", __func__)); */ + + LACP_TRACE(lp); if (lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner) || !LACP_STATE_EQ(lp->lp_state, du->ldu_partner.lip_state, @@ -1593,7 +1585,7 @@ lacp_sm_rx_record_default(struct lacp_port *lp) { uint8_t oldpstate; - /* LACP_DPRINTF((lp, "%s\n", __func__)); */ + LACP_TRACE(lp); oldpstate = lp->lp_partner.lip_state; lp->lp_partner = lacp_partner_admin; @@ -1605,7 +1597,8 @@ static void lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *lp, const struct lacp_peerinfo *info) { - /* LACP_DPRINTF((lp, "%s\n", __func__)); */ + + LACP_TRACE(lp); if (lacp_compare_peerinfo(&lp->lp_partner, info) || !LACP_STATE_EQ(lp->lp_partner.lip_state, info->lip_state, @@ -1618,7 +1611,8 @@ lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *lp, static void lacp_sm_rx_update_selected(struct lacp_port *lp, const struct lacpdu *du) { - /* LACP_DPRINTF((lp, "%s\n", __func__)); */ + + LACP_TRACE(lp); lacp_sm_rx_update_selected_from_peerinfo(lp, &du->ldu_actor); } @@ -1626,7 +1620,8 @@ lacp_sm_rx_update_selected(struct lacp_port *lp, const struct lacpdu *du) static void lacp_sm_rx_update_default_selected(struct lacp_port *lp) { - /* LACP_DPRINTF((lp, "%s\n", __func__)); */ + + LACP_TRACE(lp); lacp_sm_rx_update_selected_from_peerinfo(lp, &lacp_partner_admin); } @@ -1814,7 +1809,7 @@ tlv_check(const void *p, size_t size, const struct tlvhdr *tlv, return (0); } -#if defined(LACP_DEBUG) +/* Debugging */ const char * lacp_format_mac(const uint8_t *mac, char *buf, size_t buflen) { @@ -1944,4 +1939,3 @@ lacp_dprintf(const struct lacp_port *lp, const char *fmt, ...) vprintf(fmt, va); va_end(va); } -#endif diff --git a/freebsd/sys/net/if.c b/freebsd/sys/net/if.c index 7d7869a3..ea4a8a46 100644 --- a/freebsd/sys/net/if.c +++ b/freebsd/sys/net/if.c @@ -76,18 +76,18 @@ #include <net/vnet.h> #if defined(INET) || defined(INET6) -/*XXX*/ #include <netinet/in.h> #include <netinet/in_var.h> +#include <netinet/ip.h> #include <netinet/ip_carp.h> +#ifdef INET +#include <netinet/if_ether.h> +#endif /* INET */ #ifdef INET6 #include <netinet6/in6_var.h> #include <netinet6/in6_ifattach.h> -#endif -#endif -#ifdef INET -#include <netinet/if_ether.h> -#endif +#endif /* INET6 */ +#endif /* INET || INET6 */ #include <security/mac/mac_framework.h> @@ -100,8 +100,6 @@ struct ifindex_entry { struct ifnet *ife_ifnet; }; -static int slowtimo_started; - SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers"); SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management"); @@ -122,7 +120,7 @@ SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW, &ifdescr_maxlen, 0, "administrative maximum length for interface description"); -MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions"); +static MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions"); /* global sx for non-critical path ifdescr */ static struct sx ifdescr_sx; @@ -161,10 +159,8 @@ static int ifconf(u_long, caddr_t); static void if_freemulti(struct ifmultiaddr *); static void if_init(void *); static void if_grow(void); -static void if_check(void *); static void if_route(struct ifnet *, int flag, int fam); static int if_setflag(struct ifnet *, int, int, int *, int); -static void if_slowtimo(void *); static int if_transmit(struct ifnet *ifp, struct mbuf *m); static void if_unroute(struct ifnet *, int flag, int fam); static void link_rtrequest(int, struct rtentry *, struct rt_addrinfo *); @@ -221,12 +217,7 @@ struct sx ifnet_sxlock; static if_com_alloc_t *if_com_alloc[256]; static if_com_free_t *if_com_free[256]; -/* - * System initialization - */ -SYSINIT(interface_check, SI_SUB_PROTO_IF, SI_ORDER_FIRST, if_check, NULL); - -MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals"); +static MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals"); MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); @@ -427,18 +418,6 @@ if_grow(void) V_ifindex_table = e; } -static void -if_check(void *dummy __unused) -{ - - /* - * If at least one interface added during boot uses - * if_watchdog then start the timer. - */ - if (slowtimo_started) - if_slowtimo(0); -} - /* * Allocate a struct ifnet and an index for an interface. A layer 2 * common structure will also be allocated if an allocation routine is @@ -532,6 +511,7 @@ if_free_type(struct ifnet *ifp, u_char type) ifp->if_flags |= IFF_DYING; /* XXX: Locking */ + CURVNET_SET_QUIET(ifp->if_vnet); IFNET_WLOCK(); KASSERT(ifp == ifnet_byindex_locked(ifp->if_index), ("%s: freeing unallocated ifnet", ifp->if_xname)); @@ -539,9 +519,9 @@ if_free_type(struct ifnet *ifp, u_char type) ifindex_free_locked(ifp->if_index); IFNET_WUNLOCK(); - if (!refcount_release(&ifp->if_refcount)) - return; - if_free_internal(ifp); + if (refcount_release(&ifp->if_refcount)) + if_free_internal(ifp); + CURVNET_RESTORE(); } /* @@ -692,6 +672,15 @@ if_attach_internal(struct ifnet *ifp, int vmove) TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link); /* Reliably crash if used uninitialized. */ ifp->if_broadcastaddr = NULL; + +#if defined(INET) || defined(INET6) + /* Initialize to max value. */ + if (ifp->if_hw_tsomax == 0) + ifp->if_hw_tsomax = IP_MAXPACKET; + KASSERT(ifp->if_hw_tsomax <= IP_MAXPACKET && + ifp->if_hw_tsomax >= IP_MAXPACKET / 8, + ("%s: tsomax outside of range", __func__)); +#endif } #ifdef VIMAGE else { @@ -725,18 +714,6 @@ if_attach_internal(struct ifnet *ifp, int vmove) /* Announce the interface. */ rt_ifannouncemsg(ifp, IFAN_ARRIVAL); - - if (!vmove && ifp->if_watchdog != NULL) { - if_printf(ifp, - "WARNING: using obsoleted if_watchdog interface\n"); - - /* - * Note that we need if_slowtimo(). If this happens after - * boot, then call if_slowtimo() directly. - */ - if (atomic_cmpset_int(&slowtimo_started, 0, 1) && !cold) - if_slowtimo(0); - } } static void @@ -856,7 +833,9 @@ void if_detach(struct ifnet *ifp) { + CURVNET_SET_QUIET(ifp->if_vnet); if_detach_internal(ifp, 0); + CURVNET_RESTORE(); } static void @@ -1414,7 +1393,8 @@ if_rtdel(struct radix_node *rn, void *arg) return (0); err = rtrequest_fib(RTM_DELETE, rt_key(rt), rt->rt_gateway, - rt_mask(rt), rt->rt_flags|RTF_RNH_LOCKED, + rt_mask(rt), + rt->rt_flags|RTF_RNH_LOCKED|RTF_PINNED, (struct rtentry **) NULL, rt->rt_fibnum); if (err) { log(LOG_WARNING, "if_rtdel: error %d\n", err); @@ -1917,8 +1897,13 @@ if_route(struct ifnet *ifp, int flag, int fam) #endif } -void (*vlan_link_state_p)(struct ifnet *, int); /* XXX: private from if_vlan */ +void (*vlan_link_state_p)(struct ifnet *); /* XXX: private from if_vlan */ void (*vlan_trunk_cap_p)(struct ifnet *); /* XXX: private from if_vlan */ +struct ifnet *(*vlan_trunkdev_p)(struct ifnet *); +struct ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t); +int (*vlan_tag_p)(struct ifnet *, uint16_t *); +int (*vlan_setcookie_p)(struct ifnet *, void *); +void *(*vlan_cookie_p)(struct ifnet *); /* * Handle a change in the interface link state. To avoid LORs @@ -1948,7 +1933,7 @@ do_link_state_change(void *arg, int pending) /* Notify that the link state has changed. */ rt_ifmsg(ifp); if (ifp->if_vlantrunk != NULL) - (*vlan_link_state_p)(ifp, 0); + (*vlan_link_state_p)(ifp); if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) && IFP2AC(ifp)->ac_netgraph != NULL) @@ -1969,6 +1954,7 @@ do_link_state_change(void *arg, int pending) if (log_link_state_change) log(LOG_NOTICE, "%s: link state changed to %s\n", ifp->if_xname, (link_state == LINK_STATE_UP) ? "UP" : "DOWN" ); + EVENTHANDLER_INVOKE(ifnet_link_event, ifp, ifp->if_link_state); CURVNET_RESTORE(); } @@ -2023,39 +2009,6 @@ if_qflush(struct ifnet *ifp) } /* - * Handle interface watchdog timer routines. Called - * from softclock, we decrement timers (if set) and - * call the appropriate interface routine on expiration. - * - * XXXRW: Note that because timeouts run with Giant, if_watchdog() is called - * holding Giant. - */ -static void -if_slowtimo(void *arg) -{ - VNET_ITERATOR_DECL(vnet_iter); - struct ifnet *ifp; - int s = splimp(); - - VNET_LIST_RLOCK_NOSLEEP(); - IFNET_RLOCK_NOSLEEP(); - VNET_FOREACH(vnet_iter) { - CURVNET_SET(vnet_iter); - TAILQ_FOREACH(ifp, &V_ifnet, if_link) { - if (ifp->if_timer == 0 || --ifp->if_timer) - continue; - if (ifp->if_watchdog) - (*ifp->if_watchdog)(ifp); - } - CURVNET_RESTORE(); - } - IFNET_RUNLOCK_NOSLEEP(); - VNET_LIST_RUNLOCK_NOSLEEP(); - splx(s); - timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ); -} - -/* * Map interface name to interface structure pointer, with or without * returning a reference. */ @@ -2869,7 +2822,7 @@ again: max_len += sa->sa_len; } - if (!sbuf_overflowed(sb)) + if (sbuf_error(sb) == 0) valid_len = sbuf_len(sb); } IF_ADDR_RUNLOCK(ifp); @@ -2878,7 +2831,7 @@ again: sbuf_bcat(sb, &ifr, sizeof(ifr)); max_len += sizeof(ifr); - if (!sbuf_overflowed(sb)) + if (sbuf_error(sb) == 0) valid_len = sbuf_len(sb); } } diff --git a/freebsd/sys/net/if.h b/freebsd/sys/net/if.h index 25d43ac3..5795baac 100644 --- a/freebsd/sys/net/if.h +++ b/freebsd/sys/net/if.h @@ -43,9 +43,11 @@ /* * <net/if.h> does not depend on <sys/time.h> on most other systems. This * helps userland compatibility. (struct timeval ifi_lastchange) + * The same holds for <sys/socket.h>. (struct sockaddr ifru_addr) */ #ifndef _KERNEL #include <rtems/bsd/sys/time.h> +#include <sys/socket.h> #endif struct ifnet; @@ -199,6 +201,13 @@ struct if_data { * field. IFCAP_* and CSUM_* do not match one to one and CSUM_* may be * more detailed or differenciated than IFCAP_*. * Hwassist features are defined CSUM_* in sys/mbuf.h + * + * Capabilities that cannot be arbitrarily changed with ifconfig/ioctl + * are listed in IFCAP_CANTCHANGE, similar to IFF_CANTCHANGE. + * This is not strictly necessary because the common code never + * changes capabilities, and it is left to the individual driver + * to do the right thing. However, having the filter here + * avoids replication of the same code in all individual drivers. */ #define IFCAP_RXCSUM 0x00001 /* can offload checksum on RX */ #define IFCAP_TXCSUM 0x00002 /* can offload checksum on TX */ @@ -221,12 +230,18 @@ struct if_data { #define IFCAP_VLAN_HWTSO 0x40000 /* can do IFCAP_TSO on VLANs */ #define IFCAP_LINKSTATE 0x80000 /* the runtime link state is dynamic */ #define IFCAP_NETMAP 0x100000 /* netmap mode supported/enabled */ +#define IFCAP_RXCSUM_IPV6 0x200000 /* can offload checksum on IPv6 RX */ +#define IFCAP_TXCSUM_IPV6 0x400000 /* can offload checksum on IPv6 TX */ + +#define IFCAP_HWCSUM_IPV6 (IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6) #define IFCAP_HWCSUM (IFCAP_RXCSUM | IFCAP_TXCSUM) #define IFCAP_TSO (IFCAP_TSO4 | IFCAP_TSO6) #define IFCAP_WOL (IFCAP_WOL_UCAST | IFCAP_WOL_MCAST | IFCAP_WOL_MAGIC) #define IFCAP_TOE (IFCAP_TOE4 | IFCAP_TOE6) +#define IFCAP_CANTCHANGE (IFCAP_NETMAP) + #define IFQ_MAXLEN 50 #define IFNET_SLOWHZ 1 /* granularity is 1 second */ diff --git a/freebsd/sys/net/if_arcsubr.c b/freebsd/sys/net/if_arcsubr.c index 8ed35f77..fae432ad 100644 --- a/freebsd/sys/net/if_arcsubr.c +++ b/freebsd/sys/net/if_arcsubr.c @@ -839,7 +839,7 @@ arc_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa, } } -MALLOC_DEFINE(M_ARCCOM, "arccom", "ARCNET interface internals"); +static MALLOC_DEFINE(M_ARCCOM, "arccom", "ARCNET interface internals"); static void* arc_alloc(u_char type, struct ifnet *ifp) diff --git a/freebsd/sys/net/if_arp.h b/freebsd/sys/net/if_arp.h index 2bb63582..38c64020 100644 --- a/freebsd/sys/net/if_arp.h +++ b/freebsd/sys/net/if_arp.h @@ -50,6 +50,7 @@ struct arphdr { #define ARPHRD_ARCNET 7 /* arcnet hardware format */ #define ARPHRD_FRELAY 15 /* frame relay hardware format */ #define ARPHRD_IEEE1394 24 /* firewire hardware format */ +#define ARPHRD_INFINIBAND 32 /* infiniband hardware format */ u_short ar_pro; /* format of protocol address */ u_char ar_hln; /* length of hardware address */ u_char ar_pln; /* length of protocol address */ diff --git a/freebsd/sys/net/if_atmsubr.c b/freebsd/sys/net/if_atmsubr.c index 3d6295e6..a4cbeb09 100644 --- a/freebsd/sys/net/if_atmsubr.c +++ b/freebsd/sys/net/if_atmsubr.c @@ -100,7 +100,7 @@ void (*atm_harp_event_p)(struct ifnet *, uint32_t, void *); SYSCTL_NODE(_hw, OID_AUTO, atm, CTLFLAG_RW, 0, "ATM hardware"); -MALLOC_DEFINE(M_IFATM, "ifatm", "atm interface internals"); +static MALLOC_DEFINE(M_IFATM, "ifatm", "atm interface internals"); #ifndef ETHERTYPE_IPV6 #define ETHERTYPE_IPV6 0x86dd diff --git a/freebsd/sys/net/if_bridge.c b/freebsd/sys/net/if_bridge.c index 266926a5..c8533f05 100644 --- a/freebsd/sys/net/if_bridge.c +++ b/freebsd/sys/net/if_bridge.c @@ -121,6 +121,7 @@ __FBSDID("$FreeBSD$"); #ifdef INET6 #include <netinet/ip6.h> #include <netinet6/ip6_var.h> +#include <netinet6/in6_ifattach.h> #endif #if defined(INET) || defined(INET6) #include <netinet/ip_carp.h> @@ -134,7 +135,7 @@ __FBSDID("$FreeBSD$"); #include <net/route.h> #include <netinet/ip_fw.h> -#include <netinet/ipfw/ip_fw_private.h> +#include <netpfil/ipfw/ip_fw_private.h> /* * Size of the route hash table. Must be a power of two. @@ -350,7 +351,7 @@ static struct bstp_cb_ops bridge_ops = { }; SYSCTL_DECL(_net_link); -SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW, 0, "Bridge"); +static SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW, 0, "Bridge"); static int pfil_onlyip = 1; /* only pass IP[46] packets when pfil is enabled */ static int pfil_bridge = 1; /* run pfil hooks on the bridge interface */ @@ -385,6 +386,12 @@ SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac, CTLFLAG_RW, &bridge_inherit_mac, 0, "Inherit MAC address from the first bridge member"); +static VNET_DEFINE(int, allow_llz_overlap) = 0; +#define V_allow_llz_overlap VNET(allow_llz_overlap) +SYSCTL_VNET_INT(_net_link_bridge, OID_AUTO, allow_llz_overlap, CTLFLAG_RW, + &VNET_NAME(allow_llz_overlap), 0, "Allow overlap of link-local scope " + "zones of a bridge interface and the member interfaces"); + struct bridge_control { int (*bc_func)(struct bridge_softc *, void *); int bc_argsize; @@ -1043,14 +1050,6 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg) if (ifs->if_bridge != NULL) return (EBUSY); - bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO); - if (bif == NULL) - return (ENOMEM); - - bif->bif_ifp = ifs; - bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER; - bif->bif_savedcaps = ifs->if_capenable; - switch (ifs->if_type) { case IFT_ETHER: case IFT_L2VLAN: @@ -1058,20 +1057,70 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg) /* permitted interface types */ break; default: - error = EINVAL; - goto out; + return (EINVAL); } +#ifdef INET6 + /* + * Two valid inet6 addresses with link-local scope must not be + * on the parent interface and the member interfaces at the + * same time. This restriction is needed to prevent violation + * of link-local scope zone. Attempts to add a member + * interface which has inet6 addresses when the parent has + * inet6 triggers removal of all inet6 addresses on the member + * interface. + */ + + /* Check if the parent interface has a link-local scope addr. */ + if (V_allow_llz_overlap == 0 && + in6ifa_llaonifp(sc->sc_ifp) != NULL) { + /* + * If any, remove all inet6 addresses from the member + * interfaces. + */ + BRIDGE_XLOCK(sc); + LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { + if (in6ifa_llaonifp(bif->bif_ifp)) { + BRIDGE_UNLOCK(sc); + in6_ifdetach(bif->bif_ifp); + BRIDGE_LOCK(sc); + if_printf(sc->sc_ifp, + "IPv6 addresses on %s have been removed " + "before adding it as a member to prevent " + "IPv6 address scope violation.\n", + bif->bif_ifp->if_xname); + } + } + BRIDGE_XDROP(sc); + if (in6ifa_llaonifp(ifs)) { + BRIDGE_UNLOCK(sc); + in6_ifdetach(ifs); + BRIDGE_LOCK(sc); + if_printf(sc->sc_ifp, + "IPv6 addresses on %s have been removed " + "before adding it as a member to prevent " + "IPv6 address scope violation.\n", + ifs->if_xname); + } + } +#endif /* Allow the first Ethernet member to define the MTU */ if (LIST_EMPTY(&sc->sc_iflist)) sc->sc_ifp->if_mtu = ifs->if_mtu; else if (sc->sc_ifp->if_mtu != ifs->if_mtu) { if_printf(sc->sc_ifp, "invalid MTU: %lu(%s) != %lu\n", ifs->if_mtu, ifs->if_xname, sc->sc_ifp->if_mtu); - error = EINVAL; - goto out; + return (EINVAL); } + bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO); + if (bif == NULL) + return (ENOMEM); + + bif->bif_ifp = ifs; + bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER; + bif->bif_savedcaps = ifs->if_capenable; + /* * Assign the interface's MAC address to the bridge if it's the first * member and the MAC address of the bridge has not been changed from @@ -1106,12 +1155,10 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg) BRIDGE_LOCK(sc); break; } - if (error) - bridge_delete_member(sc, bif, 0); -out: + if (error) { - if (bif != NULL) - free(bif, M_DEVBUF); + bridge_delete_member(sc, bif, 0); + free(bif, M_DEVBUF); } return (error); } @@ -1702,6 +1749,9 @@ bridge_ifdetach(void *arg __unused, struct ifnet *ifp) struct bridge_softc *sc = ifp->if_bridge; struct bridge_iflist *bif; + if (ifp->if_flags & IFF_RENAMING) + return; + /* Check if the interface is a bridge member */ if (sc != NULL) { BRIDGE_LOCK(sc); @@ -1813,8 +1863,10 @@ bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m) m->m_flags &= ~M_VLANTAG; } - if (err == 0) - dst_ifp->if_transmit(dst_ifp, m); + if ((err = dst_ifp->if_transmit(dst_ifp, m))) { + m_freem(m0); + break; + } } if (err == 0) { @@ -2315,6 +2367,7 @@ bridge_input(struct ifnet *ifp, struct mbuf *m) BRIDGE_UNLOCK(sc); \ return (NULL); \ } \ + eh = mtod(m, struct ether_header *); \ } \ } \ if (bif->bif_flags & IFBIF_LEARNING) { \ @@ -3111,6 +3164,7 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir) args.m = *mp; args.oif = ifp; args.next_hop = NULL; + args.next_hop6 = NULL; args.eh = &eh2; args.inp = NULL; /* used by ipfw uid/gid/jail rules */ i = V_ip_fw_chk_ptr(&args); @@ -3396,14 +3450,14 @@ bridge_ip6_checkbasic(struct mbuf **mp) if ((m = m_copyup(m, sizeof(struct ip6_hdr), (max_linkhdr + 3) & ~3)) == NULL) { /* XXXJRT new stat, please */ - V_ip6stat.ip6s_toosmall++; + IP6STAT_INC(ip6s_toosmall); in6_ifstat_inc(inifp, ifs6_in_hdrerr); goto bad; } } else if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) { struct ifnet *inifp = m->m_pkthdr.rcvif; if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) { - V_ip6stat.ip6s_toosmall++; + IP6STAT_INC(ip6s_toosmall); in6_ifstat_inc(inifp, ifs6_in_hdrerr); goto bad; } @@ -3412,7 +3466,7 @@ bridge_ip6_checkbasic(struct mbuf **mp) ip6 = mtod(m, struct ip6_hdr *); if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) { - V_ip6stat.ip6s_badvers++; + IP6STAT_INC(ip6s_badvers); in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr); goto bad; } @@ -3468,7 +3522,7 @@ bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh, continue; } bcopy(eh, mtod(m0, caddr_t), ETHER_HDR_LEN); - } else + } else m_freem(m); } diff --git a/freebsd/sys/net/if_dead.c b/freebsd/sys/net/if_dead.c index b0231cc9..b85793f8 100644 --- a/freebsd/sys/net/if_dead.c +++ b/freebsd/sys/net/if_dead.c @@ -72,12 +72,6 @@ ifdead_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) return (ENXIO); } -static void -ifdead_watchdog(struct ifnet *ifp) -{ - -} - static int ifdead_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa, struct sockaddr *sa) @@ -109,7 +103,6 @@ if_dead(struct ifnet *ifp) ifp->if_input = ifdead_input; ifp->if_start = ifdead_start; ifp->if_ioctl = ifdead_ioctl; - ifp->if_watchdog = ifdead_watchdog; ifp->if_resolvemulti = ifdead_resolvemulti; ifp->if_qflush = ifdead_qflush; ifp->if_transmit = ifdead_transmit; diff --git a/freebsd/sys/net/if_enc.c b/freebsd/sys/net/if_enc.c index fdbcb9b8..91d34722 100644 --- a/freebsd/sys/net/if_enc.c +++ b/freebsd/sys/net/if_enc.c @@ -29,6 +29,10 @@ * $FreeBSD$ */ +#include <rtems/bsd/local/opt_inet.h> +#include <rtems/bsd/local/opt_inet6.h> +#include <rtems/bsd/local/opt_enc.h> + #include <rtems/bsd/sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> @@ -55,14 +59,12 @@ #include <netinet/ip.h> #include <netinet/ip_var.h> #include <netinet/in_var.h> -#include <rtems/bsd/local/opt_inet6.h> #ifdef INET6 #include <netinet/ip6.h> #include <netinet6/ip6_var.h> #endif -#include <rtems/bsd/local/opt_enc.h> #include <netipsec/ipsec.h> #include <netipsec/xform.h> @@ -102,22 +104,22 @@ IFC_SIMPLE_DECLARE(enc, 1); * Before and after are relative to when we are stripping the * outer IP header. */ -SYSCTL_NODE(_net, OID_AUTO, enc, CTLFLAG_RW, 0, "enc sysctl"); +static SYSCTL_NODE(_net, OID_AUTO, enc, CTLFLAG_RW, 0, "enc sysctl"); -SYSCTL_NODE(_net_enc, OID_AUTO, in, CTLFLAG_RW, 0, "enc input sysctl"); +static SYSCTL_NODE(_net_enc, OID_AUTO, in, CTLFLAG_RW, 0, "enc input sysctl"); static int ipsec_filter_mask_in = ENC_BEFORE; -SYSCTL_XINT(_net_enc_in, OID_AUTO, ipsec_filter_mask, CTLFLAG_RW, +SYSCTL_INT(_net_enc_in, OID_AUTO, ipsec_filter_mask, CTLFLAG_RW, &ipsec_filter_mask_in, 0, "IPsec input firewall filter mask"); static int ipsec_bpf_mask_in = ENC_BEFORE; -SYSCTL_XINT(_net_enc_in, OID_AUTO, ipsec_bpf_mask, CTLFLAG_RW, +SYSCTL_INT(_net_enc_in, OID_AUTO, ipsec_bpf_mask, CTLFLAG_RW, &ipsec_bpf_mask_in, 0, "IPsec input bpf mask"); -SYSCTL_NODE(_net_enc, OID_AUTO, out, CTLFLAG_RW, 0, "enc output sysctl"); +static SYSCTL_NODE(_net_enc, OID_AUTO, out, CTLFLAG_RW, 0, "enc output sysctl"); static int ipsec_filter_mask_out = ENC_BEFORE; -SYSCTL_XINT(_net_enc_out, OID_AUTO, ipsec_filter_mask, CTLFLAG_RW, +SYSCTL_INT(_net_enc_out, OID_AUTO, ipsec_filter_mask, CTLFLAG_RW, &ipsec_filter_mask_out, 0, "IPsec output firewall filter mask"); static int ipsec_bpf_mask_out = ENC_BEFORE|ENC_AFTER; -SYSCTL_XINT(_net_enc_out, OID_AUTO, ipsec_bpf_mask, CTLFLAG_RW, +SYSCTL_INT(_net_enc_out, OID_AUTO, ipsec_bpf_mask, CTLFLAG_RW, &ipsec_bpf_mask_out, 0, "IPsec output bpf mask"); static void @@ -179,12 +181,12 @@ enc_modevent(module_t mod, int type, void *data) } static moduledata_t enc_mod = { - "enc", + "if_enc", enc_modevent, 0 }; -DECLARE_MODULE(enc, enc_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); +DECLARE_MODULE(if_enc, enc_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); static int enc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, @@ -245,11 +247,14 @@ ipsec_filter(struct mbuf **mp, int dir, int flags) } /* Skip pfil(9) if no filters are loaded */ - if (!(PFIL_HOOKED(&V_inet_pfil_hook) + if (1 +#ifdef INET + && !PFIL_HOOKED(&V_inet_pfil_hook) +#endif #ifdef INET6 - || PFIL_HOOKED(&V_inet6_pfil_hook) + && !PFIL_HOOKED(&V_inet6_pfil_hook) #endif - )) { + ) { return (0); } @@ -265,6 +270,7 @@ ipsec_filter(struct mbuf **mp, int dir, int flags) error = 0; ip = mtod(*mp, struct ip *); switch (ip->ip_v) { +#ifdef INET case 4: /* * before calling the firewall, swap fields the same as @@ -284,7 +290,7 @@ ipsec_filter(struct mbuf **mp, int dir, int flags) ip->ip_len = htons(ip->ip_len); ip->ip_off = htons(ip->ip_off); break; - +#endif #ifdef INET6 case 6: error = pfil_run_hooks(&V_inet6_pfil_hook, mp, diff --git a/freebsd/sys/net/if_epair.c b/freebsd/sys/net/if_epair.c index a79a8341..00ab65bd 100644 --- a/freebsd/sys/net/if_epair.c +++ b/freebsd/sys/net/if_epair.c @@ -77,7 +77,7 @@ __FBSDID("$FreeBSD$"); #define EPAIRNAME "epair" SYSCTL_DECL(_net_link); -SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW, 0, "epair sysctl"); +static SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW, 0, "epair sysctl"); #ifdef EPAIR_DEBUG static int epair_debug = 0; diff --git a/freebsd/sys/net/if_ethersubr.c b/freebsd/sys/net/if_ethersubr.c index 63013b63..5ee2606e 100644 --- a/freebsd/sys/net/if_ethersubr.c +++ b/freebsd/sys/net/if_ethersubr.c @@ -74,7 +74,7 @@ #include <netinet/ip_carp.h> #include <netinet/ip_var.h> #include <netinet/ip_fw.h> -#include <netinet/ipfw/ip_fw_private.h> +#include <netpfil/ipfw/ip_fw_private.h> #endif #ifdef INET6 #include <netinet6/nd6.h> @@ -136,7 +136,7 @@ static void ether_reassign(struct ifnet *, struct vnet *, char *); #endif /* XXX: should be in an arp support file, not here */ -MALLOC_DEFINE(M_ARPCOM, "arpcom", "802.* interface internals"); +static MALLOC_DEFINE(M_ARPCOM, "arpcom", "802.* interface internals"); #define ETHER_IS_BROADCAST(addr) \ (bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0) @@ -470,7 +470,7 @@ ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst, int shared) if (mtag == NULL) { args.rule.slot = 0; } else { - /* dummynet packet, already partially processed */ + /* dummynet packet, already partially processed */ struct ipfw_rule_ref *r; /* XXX can we free it after use ? */ @@ -501,6 +501,7 @@ ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst, int shared) args.m = m; /* the packet we are looking at */ args.oif = dst; /* destination, if any */ args.next_hop = NULL; /* we do not support forward yet */ + args.next_hop6 = NULL; /* we do not support forward yet */ args.eh = &save_eh; /* MAC header for bridged/MAC packets */ args.inp = NULL; /* used by ipfw uid/gid/jail rules */ i = V_ip_fw_chk_ptr(&args); @@ -563,7 +564,7 @@ ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst, int shared) * mbuf chain m with the ethernet header at the front. */ static void -ether_input(struct ifnet *ifp, struct mbuf *m) +ether_input_internal(struct ifnet *ifp, struct mbuf *m) { struct ether_header *eh; u_short etype; @@ -695,6 +696,7 @@ ether_input(struct ifnet *ifp, struct mbuf *m) bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, ETHER_HDR_LEN - ETHER_TYPE_LEN); m_adj(m, ETHER_VLAN_ENCAP_LEN); + eh = mtod(m, struct ether_header *); } M_SETFIB(m, ifp->if_fib); @@ -709,6 +711,7 @@ ether_input(struct ifnet *ifp, struct mbuf *m) CURVNET_RESTORE(); return; } + eh = mtod(m, struct ether_header *); } /* @@ -723,6 +726,7 @@ ether_input(struct ifnet *ifp, struct mbuf *m) CURVNET_RESTORE(); return; } + eh = mtod(m, struct ether_header *); } #if defined(INET) || defined(INET6) @@ -762,6 +766,46 @@ ether_input(struct ifnet *ifp, struct mbuf *m) } /* + * Ethernet input dispatch; by default, direct dispatch here regardless of + * global configuration. + */ +static void +ether_nh_input(struct mbuf *m) +{ + + ether_input_internal(m->m_pkthdr.rcvif, m); +} + +static struct netisr_handler ether_nh = { + .nh_name = "ether", + .nh_handler = ether_nh_input, + .nh_proto = NETISR_ETHER, + .nh_policy = NETISR_POLICY_SOURCE, + .nh_dispatch = NETISR_DISPATCH_DIRECT, +}; + +static void +ether_init(__unused void *arg) +{ + + netisr_register(ðer_nh); +} +SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL); + +static void +ether_input(struct ifnet *ifp, struct mbuf *m) +{ + + /* + * We will rely on rcvif being set properly in the deferred context, + * so assert it is correct here. + */ + KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch", __func__)); + + netisr_dispatch(NETISR_ETHER, m); +} + +/* * Upper layer processing for a received Ethernet packet. */ void diff --git a/freebsd/sys/net/if_fwsubr.c b/freebsd/sys/net/if_fwsubr.c index 61cc7039..b022ecae 100644 --- a/freebsd/sys/net/if_fwsubr.c +++ b/freebsd/sys/net/if_fwsubr.c @@ -65,7 +65,7 @@ #include <security/mac/mac_framework.h> -MALLOC_DEFINE(M_FWCOM, "fw_com", "firewire interface internals"); +static MALLOC_DEFINE(M_FWCOM, "fw_com", "firewire interface internals"); struct fw_hwaddr firewire_broadcastaddr = { 0xffffffff, diff --git a/freebsd/sys/net/if_gif.c b/freebsd/sys/net/if_gif.c index e309d73a..316a3958 100644 --- a/freebsd/sys/net/if_gif.c +++ b/freebsd/sys/net/if_gif.c @@ -114,7 +114,7 @@ IFC_SIMPLE_DECLARE(gif, 0); static int gifmodevent(module_t, int, void *); SYSCTL_DECL(_net_link); -SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0, +static SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0, "Generic Tunnel Interface"); #ifndef MAX_GIF_NEST /* diff --git a/freebsd/sys/net/if_gre.c b/freebsd/sys/net/if_gre.c index 2eb4433f..b7e0bd15 100644 --- a/freebsd/sys/net/if_gre.c +++ b/freebsd/sys/net/if_gre.c @@ -20,13 +20,6 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the NetBSD - * Foundation, Inc. and its contributors. - * 4. Neither the name of The NetBSD Foundation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED @@ -157,7 +150,7 @@ static const struct protosw in_mobile_protosw = { #endif SYSCTL_DECL(_net_link); -SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0, +static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0, "Generic Routing Encapsulation"); #ifndef MAX_GRE_NEST /* @@ -356,6 +349,12 @@ gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m); } + if ((ifp->if_flags & IFF_MONITOR) != 0) { + m_freem(m); + error = ENETDOWN; + goto end; + } + m->m_flags &= ~(M_BCAST|M_MCAST); if (sc->g_proto == IPPROTO_MOBILE) { diff --git a/freebsd/sys/net/if_gre.h b/freebsd/sys/net/if_gre.h index 13b882c8..74d16b1c 100644 --- a/freebsd/sys/net/if_gre.h +++ b/freebsd/sys/net/if_gre.h @@ -16,13 +16,6 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the NetBSD - * Foundation, Inc. and its contributors. - * 4. Neither the name of The NetBSD Foundation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED diff --git a/freebsd/sys/net/if_iso88025subr.c b/freebsd/sys/net/if_iso88025subr.c index d59e2b2a..660dc7dd 100644 --- a/freebsd/sys/net/if_iso88025subr.c +++ b/freebsd/sys/net/if_iso88025subr.c @@ -783,7 +783,7 @@ iso88025_resolvemulti (ifp, llsa, sa) return (0); } -MALLOC_DEFINE(M_ISO88025, "arpcom", "802.5 interface internals"); +static MALLOC_DEFINE(M_ISO88025, "arpcom", "802.5 interface internals"); static void* iso88025_alloc(u_char type, struct ifnet *ifp) diff --git a/freebsd/sys/net/if_lagg.c b/freebsd/sys/net/if_lagg.c index f836f3ae..4dee2afe 100644 --- a/freebsd/sys/net/if_lagg.c +++ b/freebsd/sys/net/if_lagg.c @@ -54,8 +54,10 @@ __FBSDID("$FreeBSD$"); #include <net/if_var.h> #include <net/bpf.h> -#ifdef INET +#if defined(INET) || defined(INET6) #include <netinet/in.h> +#endif +#ifdef INET #include <netinet/in_systm.h> #include <netinet/if_ether.h> #include <netinet/ip.h> @@ -63,6 +65,8 @@ __FBSDID("$FreeBSD$"); #ifdef INET6 #include <netinet/ip6.h> +#include <netinet6/in6_var.h> +#include <netinet6/in6_ifattach.h> #endif #include <net/if_vlan_var.h> @@ -98,7 +102,9 @@ static int lagg_port_ioctl(struct ifnet *, u_long, caddr_t); static int lagg_port_output(struct ifnet *, struct mbuf *, struct sockaddr *, struct route *); static void lagg_port_ifdetach(void *arg __unused, struct ifnet *); +#ifdef LAGG_PORT_STACKING static int lagg_port_checkstacking(struct lagg_softc *); +#endif static void lagg_port2req(struct lagg_port *, struct lagg_reqport *); static void lagg_init(void *); static void lagg_stop(struct lagg_softc *); @@ -108,7 +114,8 @@ static int lagg_ether_cmdmulti(struct lagg_port *, int); static int lagg_setflag(struct lagg_port *, int, int, int (*func)(struct ifnet *, int)); static int lagg_setflags(struct lagg_port *, int status); -static void lagg_start(struct ifnet *); +static int lagg_transmit(struct ifnet *, struct mbuf *); +static void lagg_qflush(struct ifnet *); static int lagg_media_change(struct ifnet *); static void lagg_media_status(struct ifnet *, struct ifmediareq *); static struct lagg_port *lagg_link_active(struct lagg_softc *, @@ -163,7 +170,8 @@ static const struct { }; SYSCTL_DECL(_net_link); -SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0, "Link Aggregation"); +static SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0, + "Link Aggregation"); static int lagg_failover_rx_all = 0; /* Allow input on any failover links */ SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW, @@ -282,6 +290,9 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "use_flowid", CTLTYPE_INT|CTLFLAG_RW, &sc->use_flowid, sc->use_flowid, "Use flow id for load sharing"); + SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, + "count", CTLTYPE_INT|CTLFLAG_RD, &sc->sc_count, sc->sc_count, + "Total number of ports"); /* Hash all layers by default */ sc->sc_flags = LAGG_F_HASHL2|LAGG_F_HASHL3|LAGG_F_HASHL4; @@ -310,15 +321,12 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) if_initname(ifp, ifc->ifc_name, unit); ifp->if_type = IFT_ETHER; ifp->if_softc = sc; - ifp->if_start = lagg_start; + ifp->if_transmit = lagg_transmit; + ifp->if_qflush = lagg_qflush; ifp->if_init = lagg_init; ifp->if_ioctl = lagg_ioctl; ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST; - IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); - ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; - IFQ_SET_READY(&ifp->if_snd); - /* * Attach as an ordinary ethernet device, childs will be attached * as special device IFT_IEEE8023ADLAG. @@ -360,7 +368,8 @@ lagg_clone_destroy(struct ifnet *ifp) while ((lp = SLIST_FIRST(&sc->sc_ports)) != NULL) lagg_port_destroy(lp, 1); /* Unhook the aggregation protocol */ - (*sc->sc_detach)(sc); + if (sc->sc_detach != NULL) + (*sc->sc_detach)(sc); LAGG_WUNLOCK(sc); @@ -489,7 +498,9 @@ lagg_port_setlladdr(void *arg, int pending) ifp = llq->llq_ifp; /* Set the link layer address */ + CURVNET_SET(ifp->if_vnet); error = if_setlladdr(ifp, llq->llq_lladdr, ETHER_ADDR_LEN); + CURVNET_RESTORE(); if (error) printf("%s: setlladdr failed on %s\n", __func__, ifp->if_xname); @@ -513,13 +524,46 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) return (ENOSPC); /* Check if port has already been associated to a lagg */ - if (ifp->if_lagg != NULL) + if (ifp->if_lagg != NULL) { + /* Port is already in the current lagg? */ + lp = (struct lagg_port *)ifp->if_lagg; + if (lp->lp_softc == sc) + return (EEXIST); return (EBUSY); + } /* XXX Disallow non-ethernet interfaces (this should be any of 802) */ if (ifp->if_type != IFT_ETHER) return (EPROTONOSUPPORT); +#ifdef INET6 + /* + * The member interface should not have inet6 address because + * two interfaces with a valid link-local scope zone must not be + * merged in any form. This restriction is needed to + * prevent violation of link-local scope zone. Attempts to + * add a member interface which has inet6 addresses triggers + * removal of all inet6 addresses on the member interface. + */ + SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { + if (in6ifa_llaonifp(lp->lp_ifp)) { + in6_ifdetach(lp->lp_ifp); + if_printf(sc->sc_ifp, + "IPv6 addresses on %s have been removed " + "before adding it as a member to prevent " + "IPv6 address scope violation.\n", + lp->lp_ifp->if_xname); + } + } + if (in6ifa_llaonifp(ifp)) { + in6_ifdetach(ifp); + if_printf(sc->sc_ifp, + "IPv6 addresses on %s have been removed " + "before adding it as a member to prevent " + "IPv6 address scope violation.\n", + ifp->if_xname); + } +#endif /* Allow the first Ethernet member to define the MTU */ if (SLIST_EMPTY(&sc->sc_ports)) sc->sc_ifp->if_mtu = ifp->if_mtu; @@ -540,7 +584,8 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) mtx_unlock(&lagg_list_mtx); free(lp, M_DEVBUF); return (EINVAL); - /* XXX disable stacking for the moment, its untested + /* XXX disable stacking for the moment, its untested */ +#ifdef LAGG_PORT_STACKING lp->lp_flags |= LAGG_PORT_STACK; if (lagg_port_checkstacking(sc_ptr) >= LAGG_MAX_STACKING) { @@ -548,7 +593,7 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) free(lp, M_DEVBUF); return (E2BIG); } - */ +#endif } } mtx_unlock(&lagg_list_mtx); @@ -599,6 +644,7 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) return (error); } +#ifdef LAGG_PORT_STACKING static int lagg_port_checkstacking(struct lagg_softc *sc) { @@ -617,6 +663,7 @@ lagg_port_checkstacking(struct lagg_softc *sc) return (m + 1); } +#endif static int lagg_port_destroy(struct lagg_port *lp, int runpd) @@ -1211,35 +1258,45 @@ lagg_setflags(struct lagg_port *lp, int status) return (0); } -static void -lagg_start(struct ifnet *ifp) +static int +lagg_transmit(struct ifnet *ifp, struct mbuf *m) { struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; - struct mbuf *m; - int error = 0; + int error, len, mcast; + + len = m->m_pkthdr.len; + mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0; LAGG_RLOCK(sc); /* We need a Tx algorithm and at least one port */ if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) { - IF_DRAIN(&ifp->if_snd); LAGG_RUNLOCK(sc); - return; + m_freem(m); + ifp->if_oerrors++; + return (ENXIO); } - for (;; error = 0) { - IFQ_DEQUEUE(&ifp->if_snd, m); - if (m == NULL) - break; - - ETHER_BPF_MTAP(ifp, m); + ETHER_BPF_MTAP(ifp, m); - error = (*sc->sc_start)(sc, m); - if (error == 0) - ifp->if_opackets++; - else - ifp->if_oerrors++; - } + error = (*sc->sc_start)(sc, m); LAGG_RUNLOCK(sc); + + if (error == 0) { + ifp->if_opackets++; + ifp->if_omcasts += mcast; + ifp->if_obytes += len; + } else + ifp->if_oerrors++; + + return (error); +} + +/* + * The ifp->if_qflush entry point for lagg(4) is no-op. + */ +static void +lagg_qflush(struct ifnet *ifp __unused) +{ } static struct mbuf * @@ -1572,7 +1629,7 @@ lagg_rr_start(struct lagg_softc *sc, struct mbuf *m) */ if ((lp = lagg_link_active(sc, lp)) == NULL) { m_freem(m); - return (ENOENT); + return (ENETDOWN); } /* Send mbuf */ @@ -1620,7 +1677,7 @@ lagg_fail_start(struct lagg_softc *sc, struct mbuf *m) /* Use the master port if active or the next available port */ if ((lp = lagg_link_active(sc, sc->sc_primary)) == NULL) { m_freem(m); - return (ENOENT); + return (ENETDOWN); } /* Send mbuf */ @@ -1749,7 +1806,7 @@ lagg_lb_start(struct lagg_softc *sc, struct mbuf *m) */ if ((lp = lagg_link_active(sc, lp)) == NULL) { m_freem(m); - return (ENOENT); + return (ENETDOWN); } /* Send mbuf */ diff --git a/freebsd/sys/net/if_llatbl.c b/freebsd/sys/net/if_llatbl.c index 53077589..55b816a7 100644 --- a/freebsd/sys/net/if_llatbl.c +++ b/freebsd/sys/net/if_llatbl.c @@ -111,10 +111,13 @@ llentry_free(struct llentry *lle) size_t pkts_dropped; struct mbuf *next; - pkts_dropped = 0; + IF_AFDATA_WLOCK_ASSERT(lle->lle_tbl->llt_ifp); LLE_WLOCK_ASSERT(lle); + LIST_REMOVE(lle, lle_next); + lle->la_flags &= ~(LLE_VALID | LLE_LINKED); + pkts_dropped = 0; while ((lle->la_numheld > 0) && (lle->la_hold != NULL)) { next = lle->la_hold->m_nextpkt; m_freem(lle->la_hold); @@ -123,53 +126,43 @@ llentry_free(struct llentry *lle) pkts_dropped++; } - KASSERT(lle->la_numheld == 0, - ("%s: la_numheld %d > 0, pkts_droped %zd", __func__, + KASSERT(lle->la_numheld == 0, + ("%s: la_numheld %d > 0, pkts_droped %zd", __func__, lle->la_numheld, pkts_dropped)); - lle->la_flags &= ~LLE_VALID; LLE_FREE_LOCKED(lle); return (pkts_dropped); } /* - * Update an llentry for address dst (equivalent to rtalloc for new-arp) - * Caller must pass in a valid struct llentry * (or NULL) + * (al)locate an llentry for address dst (equivalent to rtalloc for new-arp). * - * if found the llentry * is returned referenced and unlocked + * If found the llentry * is returned referenced and unlocked. */ -int -llentry_update(struct llentry **llep, struct lltable *lt, - struct sockaddr_storage *dst, struct ifnet *ifp) +struct llentry * +llentry_alloc(struct ifnet *ifp, struct lltable *lt, + struct sockaddr_storage *dst) { struct llentry *la; - IF_AFDATA_RLOCK(ifp); - la = lla_lookup(lt, LLE_EXCLUSIVE, - (struct sockaddr *)dst); + IF_AFDATA_RLOCK(ifp); + la = lla_lookup(lt, LLE_EXCLUSIVE, (struct sockaddr *)dst); IF_AFDATA_RUNLOCK(ifp); - if ((la == NULL) && + if ((la == NULL) && (ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0) { IF_AFDATA_WLOCK(ifp); - la = lla_lookup(lt, - (LLE_CREATE | LLE_EXCLUSIVE), + la = lla_lookup(lt, (LLE_CREATE | LLE_EXCLUSIVE), (struct sockaddr *)dst); - IF_AFDATA_WUNLOCK(ifp); + IF_AFDATA_WUNLOCK(ifp); } - if (la != NULL && (*llep != la)) { - if (*llep != NULL) - LLE_FREE(*llep); + + if (la != NULL) { LLE_ADDREF(la); LLE_WUNLOCK(la); - *llep = la; - } else if (la != NULL) - LLE_WUNLOCK(la); - - if (la == NULL) - return (ENOENT); + } - return (0); + return (la); } /* @@ -187,17 +180,16 @@ lltable_free(struct lltable *llt) SLIST_REMOVE(&V_lltables, llt, lltable, llt_link); LLTABLE_WUNLOCK(); - for (i=0; i < LLTBL_HASHTBL_SIZE; i++) { + IF_AFDATA_WLOCK(llt->llt_ifp); + for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) { LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) { - int canceled; - - canceled = callout_drain(&lle->la_timer); LLE_WLOCK(lle); - if (canceled) + if (callout_stop(&lle->la_timer)) LLE_REMREF(lle); llentry_free(lle); } } + IF_AFDATA_WUNLOCK(llt->llt_ifp); free(llt, M_LLTABLE); } @@ -232,7 +224,7 @@ lltable_drain(int af) void lltable_prefix_free(int af, struct sockaddr *prefix, struct sockaddr *mask, - u_int flags) + u_int flags) { struct lltable *llt; @@ -302,7 +294,7 @@ lla_rt_output(struct rt_msghdr *rtm, struct rt_addrinfo *info) if (rtm->rtm_flags & RTF_ANNOUNCE) { flags |= LLE_PUB; #ifdef INET - if (dst->sa_family == AF_INET && + if (dst->sa_family == AF_INET && ((struct sockaddr_inarp *)dst)->sin_other != 0) { struct rtentry *rt; ((struct sockaddr_inarp *)dst)->sin_other = 0; @@ -347,7 +339,7 @@ lla_rt_output(struct rt_msghdr *rtm, struct rt_addrinfo *info) if (flags & LLE_CREATE) flags |= LLE_EXCLUSIVE; - + IF_AFDATA_LOCK(ifp); lle = lla_lookup(llt, flags, dst); IF_AFDATA_UNLOCK(ifp); @@ -383,7 +375,7 @@ lla_rt_output(struct rt_msghdr *rtm, struct rt_addrinfo *info) #ifdef INET /* gratuitous ARP */ if ((laflags & LLE_PUB) && dst->sa_family == AF_INET) { - arprequest(ifp, + arprequest(ifp, &((struct sockaddr_in *)dst)->sin_addr, &((struct sockaddr_in *)dst)->sin_addr, ((laflags & LLE_PROXY) ? @@ -456,7 +448,7 @@ llatbl_lle_show(struct llentry_sa *la) sin = (struct sockaddr_in *)&la->l3_addr; inet_ntoa_r(sin->sin_addr, l3s); - db_printf(" l3_addr=%s\n", l3s); + db_printf(" l3_addr=%s\n", l3s); break; } #endif @@ -468,7 +460,7 @@ llatbl_lle_show(struct llentry_sa *la) sin6 = (struct sockaddr_in6 *)&la->l3_addr; ip6_sprintf(l3s, &sin6->sin6_addr); - db_printf(" l3_addr=%s\n", l3s); + db_printf(" l3_addr=%s\n", l3s); break; } #endif diff --git a/freebsd/sys/net/if_llatbl.h b/freebsd/sys/net/if_llatbl.h index 8b15e5c8..8ac72c4f 100644 --- a/freebsd/sys/net/if_llatbl.h +++ b/freebsd/sys/net/if_llatbl.h @@ -30,6 +30,8 @@ __FBSDID("$FreeBSD$"); #ifndef _NET_IF_LLATBL_H_ #define _NET_IF_LLATBL_H_ +#include <rtems/bsd/local/opt_ofed.h> + #include <sys/_rwlock.h> #include <netinet/in.h> @@ -57,21 +59,25 @@ struct llentry { struct rwlock lle_lock; struct lltable *lle_tbl; struct llentries *lle_head; + void (*lle_free)(struct lltable *, struct llentry *); struct mbuf *la_hold; - int la_numheld; /* # of packets currently held */ + int la_numheld; /* # of packets currently held */ time_t la_expire; - uint16_t la_flags; + uint16_t la_flags; uint16_t la_asked; uint16_t la_preempt; uint16_t ln_byhint; int16_t ln_state; /* IPv6 has ND6_LLINFO_NOSTATE == -2 */ - uint16_t ln_router; + uint16_t ln_router; time_t ln_ntick; int lle_refcnt; - + union { uint64_t mac_aligned; uint16_t mac16[3]; +#ifdef OFED + uint8_t mac8[20]; /* IB needs 20 bytes. */ +#endif } ll_addr; /* XXX af-private? */ @@ -97,26 +103,28 @@ struct llentry { #define LLE_ADDREF(lle) do { \ LLE_WLOCK_ASSERT(lle); \ KASSERT((lle)->lle_refcnt >= 0, \ - ("negative refcnt %d", (lle)->lle_refcnt)); \ + ("negative refcnt %d on lle %p", \ + (lle)->lle_refcnt, (lle))); \ (lle)->lle_refcnt++; \ } while (0) #define LLE_REMREF(lle) do { \ LLE_WLOCK_ASSERT(lle); \ - KASSERT((lle)->lle_refcnt > 1, \ - ("bogus refcnt %d", (lle)->lle_refcnt)); \ + KASSERT((lle)->lle_refcnt > 0, \ + ("bogus refcnt %d on lle %p", \ + (lle)->lle_refcnt, (lle))); \ (lle)->lle_refcnt--; \ } while (0) #define LLE_FREE_LOCKED(lle) do { \ - if ((lle)->lle_refcnt <= 1) \ - (lle)->lle_tbl->llt_free((lle)->lle_tbl, (lle));\ + if ((lle)->lle_refcnt == 1) \ + (lle)->lle_free((lle)->lle_tbl, (lle)); \ else { \ - (lle)->lle_refcnt--; \ + LLE_REMREF(lle); \ LLE_WUNLOCK(lle); \ } \ /* guard against invalid refs */ \ - lle = NULL; \ + (lle) = NULL; \ } while (0) #define LLE_FREE(lle) do { \ @@ -146,7 +154,6 @@ struct lltable { int llt_af; struct ifnet *llt_ifp; - void (*llt_free)(struct lltable *, struct llentry *); void (*llt_prefix_free)(struct lltable *, const struct sockaddr *prefix, const struct sockaddr *mask, @@ -154,7 +161,7 @@ struct lltable { struct llentry * (*llt_lookup)(struct lltable *, u_int flags, const struct sockaddr *l3addr); int (*llt_dump)(struct lltable *, - struct sysctl_req *); + struct sysctl_req *); }; MALLOC_DECLARE(M_LLTABLE); @@ -167,25 +174,26 @@ MALLOC_DECLARE(M_LLTABLE); #define LLE_VALID 0x0008 /* ll_addr is valid */ #define LLE_PROXY 0x0010 /* proxy entry ??? */ #define LLE_PUB 0x0020 /* publish entry ??? */ +#define LLE_LINKED 0x0040 /* linked to lookup structure */ +#define LLE_EXCLUSIVE 0x2000 /* return lle xlocked */ #define LLE_DELETE 0x4000 /* delete on a lookup - match LLE_IFADDR */ #define LLE_CREATE 0x8000 /* create on a lookup miss */ -#define LLE_EXCLUSIVE 0x2000 /* return lle xlocked */ #define LLATBL_HASH(key, mask) \ (((((((key >> 8) ^ key) >> 8) ^ key) >> 8) ^ key) & mask) struct lltable *lltable_init(struct ifnet *, int); void lltable_free(struct lltable *); -void lltable_prefix_free(int, struct sockaddr *, - struct sockaddr *, u_int); +void lltable_prefix_free(int, struct sockaddr *, + struct sockaddr *, u_int); #if 0 void lltable_drain(int); #endif int lltable_sysctl_dumparp(int, struct sysctl_req *); size_t llentry_free(struct llentry *); -int llentry_update(struct llentry **, struct lltable *, - struct sockaddr_storage *, struct ifnet *); +struct llentry *llentry_alloc(struct ifnet *, struct lltable *, + struct sockaddr_storage *); /* * Generic link layer address lookup function. @@ -197,4 +205,14 @@ lla_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr) } int lla_rt_output(struct rt_msghdr *, struct rt_addrinfo *); + +#include <sys/eventhandler.h> +enum { + LLENTRY_RESOLVED, + LLENTRY_TIMEDOUT, + LLENTRY_DELETED, + LLENTRY_EXPIRED, +}; +typedef void (*lle_event_fn)(void *, struct llentry *, int); +EVENTHANDLER_DECLARE(lle_event, lle_event_fn); #endif /* _NET_IF_LLATBL_H_ */ diff --git a/freebsd/sys/net/if_loop.c b/freebsd/sys/net/if_loop.c index 958019d9..b40dec8e 100644 --- a/freebsd/sys/net/if_loop.c +++ b/freebsd/sys/net/if_loop.c @@ -94,7 +94,9 @@ #endif #define LO_CSUM_FEATURES (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP) -#define LO_CSUM_SET (CSUM_DATA_VALID | CSUM_PSEUDO_HDR | \ +#define LO_CSUM_FEATURES6 (CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6) +#define LO_CSUM_SET (CSUM_DATA_VALID | CSUM_DATA_VALID_IPV6 | \ + CSUM_PSEUDO_HDR | \ CSUM_IP_CHECKED | CSUM_IP_VALID | \ CSUM_SCTP_VALID) @@ -145,8 +147,9 @@ lo_clone_create(struct if_clone *ifc, int unit, caddr_t params) ifp->if_ioctl = loioctl; ifp->if_output = looutput; ifp->if_snd.ifq_maxlen = ifqmaxlen; - ifp->if_capabilities = ifp->if_capenable = IFCAP_HWCSUM; - ifp->if_hwassist = LO_CSUM_FEATURES; + ifp->if_capabilities = ifp->if_capenable = + IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6; + ifp->if_hwassist = LO_CSUM_FEATURES | LO_CSUM_FEATURES6; if_attach(ifp); bpfattach(ifp, DLT_NULL, sizeof(u_int32_t)); if (V_loif == NULL) @@ -254,7 +257,24 @@ looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, m->m_pkthdr.csum_flags = LO_CSUM_SET; } m->m_pkthdr.csum_flags &= ~LO_CSUM_FEATURES; + break; case AF_INET6: +#if 0 + /* + * XXX-BZ for now always claim the checksum is good despite + * any interface flags. This is a workaround for 9.1-R and + * a proper solution ought to be sought later. + */ + if (ifp->if_capenable & IFCAP_RXCSUM_IPV6) { + m->m_pkthdr.csum_data = 0xffff; + m->m_pkthdr.csum_flags = LO_CSUM_SET; + } +#else + m->m_pkthdr.csum_data = 0xffff; + m->m_pkthdr.csum_flags = LO_CSUM_SET; +#endif + m->m_pkthdr.csum_flags &= ~LO_CSUM_FEATURES6; + break; case AF_IPX: case AF_APPLETALK: break; @@ -438,10 +458,29 @@ loioctl(struct ifnet *ifp, u_long cmd, caddr_t data) ifp->if_capenable ^= IFCAP_RXCSUM; if ((mask & IFCAP_TXCSUM) != 0) ifp->if_capenable ^= IFCAP_TXCSUM; + if ((mask & IFCAP_RXCSUM_IPV6) != 0) { +#if 0 + ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; +#else + error = EOPNOTSUPP; + break; +#endif + } + if ((mask & IFCAP_TXCSUM_IPV6) != 0) { +#if 0 + ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; +#else + error = EOPNOTSUPP; + break; +#endif + } + ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist = LO_CSUM_FEATURES; - else - ifp->if_hwassist = 0; +#if 0 + if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) + ifp->if_hwassist |= LO_CSUM_FEATURES6; +#endif break; default: diff --git a/freebsd/sys/net/if_media.h b/freebsd/sys/net/if_media.h index 2c833228..6424d662 100644 --- a/freebsd/sys/net/if_media.h +++ b/freebsd/sys/net/if_media.h @@ -150,6 +150,9 @@ uint64_t ifmedia_baudrate(int); #define IFM_10G_LRM 24 /* 10GBase-LRM 850nm Multi-mode */ #define IFM_UNKNOWN 25 /* media types not defined yet */ #define IFM_10G_T 26 /* 10GBase-T - RJ45 */ +#define IFM_40G_CR4 27 /* 40GBase-CR4 */ +#define IFM_40G_SR4 28 /* 40GBase-SR4 */ +#define IFM_40G_LR4 29 /* 40GBase-LR4 */ /* note 31 is the max! */ @@ -366,6 +369,9 @@ struct ifmedia_description { { IFM_10G_TWINAX_LONG, "10Gbase-Twinax-Long" }, \ { IFM_UNKNOWN, "Unknown" }, \ { IFM_10G_T, "10Gbase-T" }, \ + { IFM_40G_CR4, "40Gbase-CR4" }, \ + { IFM_40G_SR4, "40Gbase-SR4" }, \ + { IFM_40G_LR4, "40Gbase-LR4" }, \ { 0, NULL }, \ } @@ -488,6 +494,7 @@ struct ifmedia_description { { IFM_IEEE80211_OFDM3, "OFDM/3Mbps" }, \ { IFM_IEEE80211_OFDM4, "OFDM/4.5Mbps" }, \ { IFM_IEEE80211_OFDM27, "OFDM/27Mbps" }, \ + { IFM_IEEE80211_MCS, "MCS" }, \ { 0, NULL }, \ } @@ -526,6 +533,7 @@ struct ifmedia_description { { IFM_IEEE80211_OFDM3, "OFDM3" }, \ { IFM_IEEE80211_OFDM4, "OFDM4.5" }, \ { IFM_IEEE80211_OFDM27, "OFDM27" }, \ + { IFM_IEEE80211_MCS, "MCS" }, \ { 0, NULL }, \ } @@ -662,6 +670,9 @@ struct ifmedia_baudrate { { IFM_ETHER | IFM_10G_TWINAX_LONG, IF_Gbps(10ULL) }, \ { IFM_ETHER | IFM_10G_LRM, IF_Gbps(10ULL) }, \ { IFM_ETHER | IFM_10G_T, IF_Gbps(10ULL) }, \ + { IFM_ETHER | IFM_40G_CR4, IF_Gbps(40ULL) }, \ + { IFM_ETHER | IFM_40G_SR4, IF_Gbps(40ULL) }, \ + { IFM_ETHER | IFM_40G_LR4, IF_Gbps(40ULL) }, \ \ { IFM_TOKEN | IFM_TOK_STP4, IF_Mbps(4) }, \ { IFM_TOKEN | IFM_TOK_STP16, IF_Mbps(16) }, \ diff --git a/freebsd/sys/net/if_mib.c b/freebsd/sys/net/if_mib.c index a7d436ad..ec7a6984 100644 --- a/freebsd/sys/net/if_mib.c +++ b/freebsd/sys/net/if_mib.c @@ -65,7 +65,7 @@ */ SYSCTL_DECL(_net_link_generic); -SYSCTL_NODE(_net_link_generic, IFMIB_SYSTEM, system, CTLFLAG_RW, 0, +static SYSCTL_NODE(_net_link_generic, IFMIB_SYSTEM, system, CTLFLAG_RW, 0, "Variables global to all interfaces"); SYSCTL_VNET_INT(_net_link_generic_system, IFMIB_IFCOUNT, ifcount, CTLFLAG_RD, @@ -166,6 +166,6 @@ out: return error; } -SYSCTL_NODE(_net_link_generic, IFMIB_IFDATA, ifdata, CTLFLAG_RW, +static SYSCTL_NODE(_net_link_generic, IFMIB_IFDATA, ifdata, CTLFLAG_RW, sysctl_ifdata, "Interface table"); diff --git a/freebsd/sys/net/if_spppsubr.c b/freebsd/sys/net/if_spppsubr.c index 5619554a..4f2f6d05 100644 --- a/freebsd/sys/net/if_spppsubr.c +++ b/freebsd/sys/net/if_spppsubr.c @@ -176,7 +176,7 @@ #define STATE_ACK_SENT 8 #define STATE_OPENED 9 -MALLOC_DEFINE(M_SPPP, "sppp", "synchronous PPP interface internals"); +static MALLOC_DEFINE(M_SPPP, "sppp", "synchronous PPP interface internals"); struct ppp_header { u_char address; diff --git a/freebsd/sys/net/if_stf.c b/freebsd/sys/net/if_stf.c index da09de7c..985c5651 100644 --- a/freebsd/sys/net/if_stf.c +++ b/freebsd/sys/net/if_stf.c @@ -123,12 +123,17 @@ #include <security/mac/mac_framework.h> SYSCTL_DECL(_net_link); -SYSCTL_NODE(_net_link, IFT_STF, stf, CTLFLAG_RW, 0, "6to4 Interface"); +static SYSCTL_NODE(_net_link, IFT_STF, stf, CTLFLAG_RW, 0, "6to4 Interface"); static int stf_route_cache = 1; SYSCTL_INT(_net_link_stf, OID_AUTO, route_cache, CTLFLAG_RW, &stf_route_cache, 0, "Caching of IPv4 routes for 6to4 Output"); +static int stf_permit_rfc1918 = 0; +TUNABLE_INT("net.link.stf.permit_rfc1918", &stf_permit_rfc1918); +SYSCTL_INT(_net_link_stf, OID_AUTO, permit_rfc1918, CTLFLAG_RW | CTLFLAG_TUN, + &stf_permit_rfc1918, 0, "Permit the use of private IPv4 addresses"); + #define STFNAME "stf" #define STFUNIT 0 @@ -586,9 +591,10 @@ isrfc1918addr(in) * returns 1 if private address range: * 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16 */ - if ((ntohl(in->s_addr) & 0xff000000) >> 24 == 10 || + if (stf_permit_rfc1918 == 0 && ( + (ntohl(in->s_addr) & 0xff000000) >> 24 == 10 || (ntohl(in->s_addr) & 0xfff00000) >> 16 == 172 * 256 + 16 || - (ntohl(in->s_addr) & 0xffff0000) >> 16 == 192 * 256 + 168) + (ntohl(in->s_addr) & 0xffff0000) >> 16 == 192 * 256 + 168)) return 1; return 0; @@ -799,7 +805,7 @@ stf_rtrequest(cmd, rt, info) struct rt_addrinfo *info; { RT_LOCK_ASSERT(rt); - rt->rt_rmx.rmx_mtu = IPV6_MMTU; + rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; } static int @@ -812,7 +818,7 @@ stf_ioctl(ifp, cmd, data) struct ifreq *ifr; struct sockaddr_in6 *sin6; struct in_addr addr; - int error; + int error, mtu; error = 0; switch (cmd) { @@ -846,6 +852,18 @@ stf_ioctl(ifp, cmd, data) error = EAFNOSUPPORT; break; + case SIOCGIFMTU: + break; + + case SIOCSIFMTU: + ifr = (struct ifreq *)data; + mtu = ifr->ifr_mtu; + /* RFC 4213 3.2 ideal world MTU */ + if (mtu < IPV6_MINMTU || mtu > IF_MAXMTU - 20) + return (EINVAL); + ifp->if_mtu = mtu; + break; + default: error = EINVAL; break; diff --git a/freebsd/sys/net/if_tap.c b/freebsd/sys/net/if_tap.c index b34b77de..9c501f16 100644 --- a/freebsd/sys/net/if_tap.c +++ b/freebsd/sys/net/if_tap.c @@ -67,6 +67,7 @@ #include <net/if.h> #include <net/if_clone.h> #include <net/if_dl.h> +#include <net/if_media.h> #include <net/if_types.h> #include <net/route.h> #include <net/vnet.h> @@ -165,7 +166,7 @@ MALLOC_DEFINE(M_TAP, CDEV_NAME, "Ethernet tunnel interface"); SYSCTL_INT(_debug, OID_AUTO, if_tap_debug, CTLFLAG_RW, &tapdebug, 0, ""); SYSCTL_DECL(_net_link); -SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0, +static SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0, "Ethernet tunnel software network interface"); SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tapuopen, 0, "Allow user to open /dev/tap (based on node permissions)"); @@ -608,7 +609,8 @@ tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data) struct tap_softc *tp = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; struct ifstat *ifs = NULL; - int dummy; + struct ifmediareq *ifmr = NULL; + int dummy, error = 0; switch (cmd) { case SIOCSIFFLAGS: /* XXX -- just like vmnet does */ @@ -616,6 +618,22 @@ tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data) case SIOCDELMULTI: break; + case SIOCGIFMEDIA: + ifmr = (struct ifmediareq *)data; + dummy = ifmr->ifm_count; + ifmr->ifm_count = 1; + ifmr->ifm_status = IFM_AVALID; + ifmr->ifm_active = IFM_ETHER; + if (tp->tap_flags & TAP_OPEN) + ifmr->ifm_status |= IFM_ACTIVE; + ifmr->ifm_current = ifmr->ifm_active; + if (dummy >= 1) { + int media = IFM_ETHER; + error = copyout(&media, ifmr->ifm_ulist, + sizeof(int)); + } + break; + case SIOCSIFMTU: ifp->if_mtu = ifr->ifr_mtu; break; @@ -632,11 +650,11 @@ tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data) break; default: - return (ether_ioctl(ifp, cmd, data)); - /* NOT REACHED */ + error = ether_ioctl(ifp, cmd, data); + break; } - return (0); + return (error); } /* tapifioctl */ @@ -921,7 +939,7 @@ tapwrite(struct cdev *dev, struct uio *uio, int flag) struct ifnet *ifp = tp->tap_ifp; struct mbuf *m; - TAPDEBUG("%s writting, minor = %#x\n", + TAPDEBUG("%s writing, minor = %#x\n", ifp->if_xname, dev2unit(dev)); if (uio->uio_resid == 0) diff --git a/freebsd/sys/net/if_tun.c b/freebsd/sys/net/if_tun.c index 669c0b1b..25b73294 100644 --- a/freebsd/sys/net/if_tun.c +++ b/freebsd/sys/net/if_tun.c @@ -117,7 +117,7 @@ static TAILQ_HEAD(,tun_softc) tunhead = TAILQ_HEAD_INITIALIZER(tunhead); SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, ""); SYSCTL_DECL(_net_link); -SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0, +static SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0, "IP tunnel software network interface."); SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RW, &tundclone, 0, "Enable legacy devfs interface creation."); diff --git a/freebsd/sys/net/if_types.h b/freebsd/sys/net/if_types.h index b2d3a159..c2effacd 100644 --- a/freebsd/sys/net/if_types.h +++ b/freebsd/sys/net/if_types.h @@ -238,6 +238,7 @@ #define IFT_ATMVCIENDPT 0xc2 /* ATM VCI End Point */ #define IFT_OPTICALCHANNEL 0xc3 /* Optical Channel */ #define IFT_OPTICALTRANSPORT 0xc4 /* Optical Transport */ +#define IFT_INFINIBAND 0xc7 /* Infiniband */ #define IFT_BRIDGE 0xd1 /* Transparent bridge interface */ #define IFT_STF 0xd7 /* 6to4 interface */ diff --git a/freebsd/sys/net/if_var.h b/freebsd/sys/net/if_var.h index c5c489fb..5992fa0b 100644 --- a/freebsd/sys/net/if_var.h +++ b/freebsd/sys/net/if_var.h @@ -141,7 +141,7 @@ struct ifnet { struct carp_if *if_carp; /* carp interface structure */ struct bpf_if *if_bpf; /* packet filter structure */ u_short if_index; /* numeric abbreviation for this if */ - short if_timer; /* time 'til if_watchdog called */ + short if_index_reserved; /* spare space to grow if_index */ struct ifvlantrunk *if_vlantrunk; /* pointer to 802.1q data */ int if_flags; /* up/down, broadcast, etc. */ int if_capabilities; /* interface features & capabilities */ @@ -161,8 +161,6 @@ struct ifnet { (struct ifnet *); int (*if_ioctl) /* ioctl routine */ (struct ifnet *, u_long, caddr_t); - void (*if_watchdog) /* timer routine */ - (struct ifnet *); void (*if_init) /* Init routine */ (void *); int (*if_resolvemulti) /* validate/resolve multicast */ @@ -197,6 +195,8 @@ struct ifnet { /* protected by if_addr_mtx */ void *if_pf_kif; void *if_lagg; /* lagg glue */ + char *if_description; /* interface description */ + u_int if_fib; /* interface FIB */ u_char if_alloctype; /* if_type at time of allocation */ /* @@ -205,10 +205,12 @@ struct ifnet { * be used with care where binary compatibility is required. */ char if_cspare[3]; - char *if_description; /* interface description */ - void *if_pspare[7]; /* 1 netmap, 6 TBD */ + u_int if_hw_tsomax; /* tso burst length limit, the minmum + * is (IP_MAXPACKET / 8). + * XXXAO: Have to find a better place + * for it eventually. */ int if_ispare[3]; - u_int if_fib; /* interface FIB */ + void *if_pspare[8]; /* 1 netmap, 7 TDB */ }; typedef void if_init_f_t(void *); @@ -329,6 +331,18 @@ void if_maddr_runlock(struct ifnet *ifp); /* if_multiaddrs */ IF_UNLOCK(ifq); \ } while (0) +#define _IF_DEQUEUE_ALL(ifq, m) do { \ + (m) = (ifq)->ifq_head; \ + (ifq)->ifq_head = (ifq)->ifq_tail = NULL; \ + (ifq)->ifq_len = 0; \ +} while (0) + +#define IF_DEQUEUE_ALL(ifq, m) do { \ + IF_LOCK(ifq); \ + _IF_DEQUEUE_ALL(ifq, m); \ + IF_UNLOCK(ifq); \ +} while (0) + #define _IF_POLL(ifq, m) ((m) = (ifq)->ifq_head) #define IF_POLL(ifq, m) _IF_POLL(ifq, m) @@ -361,6 +375,9 @@ EVENTHANDLER_DECLARE(ifnet_arrival_event, ifnet_arrival_event_handler_t); /* interface departure event */ typedef void (*ifnet_departure_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(ifnet_departure_event, ifnet_departure_event_handler_t); +/* Interface link state change event */ +typedef void (*ifnet_link_event_handler_t)(void *, struct ifnet *, int); +EVENTHANDLER_DECLARE(ifnet_link_event, ifnet_link_event_handler_t); /* * interface groups @@ -406,6 +423,8 @@ EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t); #define IF_AFDATA_DESTROY(ifp) rw_destroy(&(ifp)->if_afdata_lock) #define IF_AFDATA_LOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_LOCKED) +#define IF_AFDATA_RLOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_RLOCKED) +#define IF_AFDATA_WLOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_WLOCKED) #define IF_AFDATA_UNLOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_UNLOCKED) int if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, @@ -574,22 +593,10 @@ do { \ } while (0) #ifdef _KERNEL -static __inline void -drbr_stats_update(struct ifnet *ifp, int len, int mflags) -{ -#ifndef NO_SLOW_STATS - ifp->if_obytes += len; - if (mflags & M_MCAST) - ifp->if_omcasts++; -#endif -} - static __inline int drbr_enqueue(struct ifnet *ifp, struct buf_ring *br, struct mbuf *m) { int error = 0; - int len = m->m_pkthdr.len; - int mflags = m->m_flags; #ifdef ALTQ if (ALTQ_IS_ENABLED(&ifp->if_snd)) { @@ -597,16 +604,53 @@ drbr_enqueue(struct ifnet *ifp, struct buf_ring *br, struct mbuf *m) return (error); } #endif - if ((error = buf_ring_enqueue_bytes(br, m, len)) == ENOBUFS) { - br->br_drops++; + error = buf_ring_enqueue(br, m); + if (error) m_freem(m); - } else - drbr_stats_update(ifp, len, mflags); - + return (error); } static __inline void +drbr_putback(struct ifnet *ifp, struct buf_ring *br, struct mbuf *new) +{ + /* + * The top of the list needs to be swapped + * for this one. + */ +#ifdef ALTQ + if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) { + /* + * Peek in altq case dequeued it + * so put it back. + */ + IFQ_DRV_PREPEND(&ifp->if_snd, new); + return; + } +#endif + buf_ring_putback_sc(br, new); +} + +static __inline struct mbuf * +drbr_peek(struct ifnet *ifp, struct buf_ring *br) +{ +#ifdef ALTQ + struct mbuf *m; + if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) { + /* + * Pull it off like a dequeue + * since drbr_advance() does nothing + * for altq and drbr_putback() will + * use the old prepend function. + */ + IFQ_DEQUEUE(&ifp->if_snd, m); + return (m); + } +#endif + return(buf_ring_peek(br)); +} + +static __inline void drbr_flush(struct ifnet *ifp, struct buf_ring *br) { struct mbuf *m; @@ -633,7 +677,7 @@ drbr_dequeue(struct ifnet *ifp, struct buf_ring *br) #ifdef ALTQ struct mbuf *m; - if (ALTQ_IS_ENABLED(&ifp->if_snd)) { + if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) { IFQ_DEQUEUE(&ifp->if_snd, m); return (m); } @@ -641,6 +685,18 @@ drbr_dequeue(struct ifnet *ifp, struct buf_ring *br) return (buf_ring_dequeue_sc(br)); } +static __inline void +drbr_advance(struct ifnet *ifp, struct buf_ring *br) +{ +#ifdef ALTQ + /* Nothing to do here since peek dequeues in altq case */ + if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) + return; +#endif + return (buf_ring_advance_sc(br)); +} + + static __inline struct mbuf * drbr_dequeue_cond(struct ifnet *ifp, struct buf_ring *br, int (*func) (struct mbuf *, void *), void *arg) @@ -703,6 +759,8 @@ drbr_inuse(struct ifnet *ifp, struct buf_ring *br) #define IF_MINMTU 72 #define IF_MAXMTU 65535 +#define TOEDEV(ifp) ((ifp)->if_llsoftc) + #endif /* _KERNEL */ /* diff --git a/freebsd/sys/net/if_vlan.c b/freebsd/sys/net/if_vlan.c index 95ea1a9e..f31b9be2 100644 --- a/freebsd/sys/net/if_vlan.c +++ b/freebsd/sys/net/if_vlan.c @@ -43,6 +43,7 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include <rtems/bsd/local/opt_inet.h> #include <rtems/bsd/local/opt_vlan.h> #include <rtems/bsd/sys/param.h> @@ -57,6 +58,7 @@ __FBSDID("$FreeBSD$"); #include <sys/sockio.h> #include <sys/sysctl.h> #include <sys/systm.h> +#include <sys/sx.h> #include <net/bpf.h> #include <net/ethernet.h> @@ -67,6 +69,11 @@ __FBSDID("$FreeBSD$"); #include <net/if_vlan_var.h> #include <net/vnet.h> +#ifdef INET +#include <netinet/in.h> +#include <netinet/if_ether.h> +#endif + #define VLANNAME "vlan" #define VLAN_DEF_HWIDTH 4 #define VLAN_IFFLAGS (IFF_BROADCAST | IFF_MULTICAST) @@ -91,13 +98,14 @@ struct ifvlantrunk { }; struct vlan_mc_entry { - struct ether_addr mc_addr; + struct sockaddr_dl mc_addr; SLIST_ENTRY(vlan_mc_entry) mc_entries; }; struct ifvlan { struct ifvlantrunk *ifv_trunk; struct ifnet *ifv_ifp; + void *ifv_cookie; #define TRUNK(ifv) ((ifv)->ifv_trunk) #define PARENT(ifv) ((ifv)->ifv_trunk->parent) int ifv_pflags; /* special flags we have set on parent */ @@ -130,8 +138,10 @@ static struct { }; SYSCTL_DECL(_net_link); -SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW, 0, "IEEE 802.1Q VLAN"); -SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW, 0, "for consistency"); +static SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW, 0, + "IEEE 802.1Q VLAN"); +static SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW, 0, + "for consistency"); static int soft_pad = 0; SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW, &soft_pad, 0, @@ -154,12 +164,12 @@ static eventhandler_tag iflladdr_tag; * however on practice it does not. Probably this is because array * is too big to fit into CPU cache. */ -static struct mtx ifv_mtx; -#define VLAN_LOCK_INIT() mtx_init(&ifv_mtx, "vlan_global", NULL, MTX_DEF) -#define VLAN_LOCK_DESTROY() mtx_destroy(&ifv_mtx) -#define VLAN_LOCK_ASSERT() mtx_assert(&ifv_mtx, MA_OWNED) -#define VLAN_LOCK() mtx_lock(&ifv_mtx) -#define VLAN_UNLOCK() mtx_unlock(&ifv_mtx) +static struct sx ifv_lock; +#define VLAN_LOCK_INIT() sx_init(&ifv_lock, "vlan_global") +#define VLAN_LOCK_DESTROY() sx_destroy(&ifv_lock) +#define VLAN_LOCK_ASSERT() sx_assert(&ifv_lock, SA_LOCKED) +#define VLAN_LOCK() sx_xlock(&ifv_lock) +#define VLAN_UNLOCK() sx_xunlock(&ifv_lock) #define TRUNK_LOCK_INIT(trunk) rw_init(&(trunk)->rw, VLANNAME) #define TRUNK_LOCK_DESTROY(trunk) rw_destroy(&(trunk)->rw) #define TRUNK_LOCK(trunk) rw_wlock(&(trunk)->rw) @@ -192,7 +202,7 @@ static int vlan_transmit(struct ifnet *ifp, struct mbuf *m); static void vlan_unconfig(struct ifnet *ifp); static void vlan_unconfig_locked(struct ifnet *ifp, int departing); static int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag); -static void vlan_link_state(struct ifnet *ifp, int link); +static void vlan_link_state(struct ifnet *ifp); static void vlan_capabilities(struct ifvlan *ifv); static void vlan_trunk_capabilities(struct ifnet *ifp); @@ -388,6 +398,47 @@ vlan_dumphash(struct ifvlantrunk *trunk) } } #endif /* 0 */ +#else + +static __inline struct ifvlan * +vlan_gethash(struct ifvlantrunk *trunk, uint16_t tag) +{ + + return trunk->vlans[tag]; +} + +static __inline int +vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv) +{ + + if (trunk->vlans[ifv->ifv_tag] != NULL) + return EEXIST; + trunk->vlans[ifv->ifv_tag] = ifv; + trunk->refcnt++; + + return (0); +} + +static __inline int +vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv) +{ + + trunk->vlans[ifv->ifv_tag] = NULL; + trunk->refcnt--; + + return (0); +} + +static __inline void +vlan_freehash(struct ifvlantrunk *trunk) +{ +} + +static __inline void +vlan_inithash(struct ifvlantrunk *trunk) +{ +} + #endif /* !VLAN_ARRAY */ static void @@ -396,9 +447,7 @@ trunk_destroy(struct ifvlantrunk *trunk) VLAN_LOCK_ASSERT(); TRUNK_LOCK(trunk); -#ifndef VLAN_ARRAY vlan_freehash(trunk); -#endif trunk->parent->if_vlantrunk = NULL; TRUNK_UNLOCK(trunk); TRUNK_LOCK_DESTROY(trunk); @@ -423,7 +472,6 @@ vlan_setmulti(struct ifnet *ifp) struct ifmultiaddr *ifma, *rifma = NULL; struct ifvlan *sc; struct vlan_mc_entry *mc; - struct sockaddr_dl sdl; int error; /*VLAN_LOCK_ASSERT();*/ @@ -434,17 +482,9 @@ vlan_setmulti(struct ifnet *ifp) CURVNET_SET_QUIET(ifp_p->if_vnet); - bzero((char *)&sdl, sizeof(sdl)); - sdl.sdl_len = sizeof(sdl); - sdl.sdl_family = AF_LINK; - sdl.sdl_index = ifp_p->if_index; - sdl.sdl_type = IFT_ETHER; - sdl.sdl_alen = ETHER_ADDR_LEN; - /* First, remove any existing filter entries. */ while ((mc = SLIST_FIRST(&sc->vlan_mc_listhead)) != NULL) { - bcopy((char *)&mc->mc_addr, LLADDR(&sdl), ETHER_ADDR_LEN); - error = if_delmulti(ifp_p, (struct sockaddr *)&sdl); + error = if_delmulti(ifp_p, (struct sockaddr *)&mc->mc_addr); if (error) return (error); SLIST_REMOVE_HEAD(&sc->vlan_mc_listhead, mc_entries); @@ -458,12 +498,11 @@ vlan_setmulti(struct ifnet *ifp) mc = malloc(sizeof(struct vlan_mc_entry), M_VLAN, M_NOWAIT); if (mc == NULL) return (ENOMEM); - bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), - (char *)&mc->mc_addr, ETHER_ADDR_LEN); + bcopy(ifma->ifma_addr, &mc->mc_addr, ifma->ifma_addr->sa_len); + mc->mc_addr.sdl_index = ifp_p->if_index; SLIST_INSERT_HEAD(&sc->vlan_mc_listhead, mc, mc_entries); - bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), - LLADDR(&sdl), ETHER_ADDR_LEN); - error = if_addmulti(ifp_p, (struct sockaddr *)&sdl, &rifma); + error = if_addmulti(ifp_p, (struct sockaddr *)&mc->mc_addr, + &rifma); if (error) return (error); } @@ -505,7 +544,8 @@ vlan_iflladdr(void *arg __unused, struct ifnet *ifp) LIST_FOREACH_SAFE(ifv, &ifp->if_vlantrunk->hash[i], ifv_list, next) { #endif /* VLAN_ARRAY */ VLAN_UNLOCK(); - if_setlladdr(ifv->ifv_ifp, IF_LLADDR(ifp), ETHER_ADDR_LEN); + if_setlladdr(ifv->ifv_ifp, IF_LLADDR(ifp), + ifp->if_addrlen); VLAN_LOCK(); } VLAN_UNLOCK(); @@ -566,6 +606,92 @@ restart: } /* + * Return the trunk device for a virtual interface. + */ +static struct ifnet * +vlan_trunkdev(struct ifnet *ifp) +{ + struct ifvlan *ifv; + + if (ifp->if_type != IFT_L2VLAN) + return (NULL); + ifv = ifp->if_softc; + ifp = NULL; + VLAN_LOCK(); + if (ifv->ifv_trunk) + ifp = PARENT(ifv); + VLAN_UNLOCK(); + return (ifp); +} + +/* + * Return the 16bit vlan tag for this interface. + */ +static int +vlan_tag(struct ifnet *ifp, uint16_t *tagp) +{ + struct ifvlan *ifv; + + if (ifp->if_type != IFT_L2VLAN) + return (EINVAL); + ifv = ifp->if_softc; + *tagp = ifv->ifv_tag; + return (0); +} + +/* + * Return a driver specific cookie for this interface. Synchronization + * with setcookie must be provided by the driver. + */ +static void * +vlan_cookie(struct ifnet *ifp) +{ + struct ifvlan *ifv; + + if (ifp->if_type != IFT_L2VLAN) + return (NULL); + ifv = ifp->if_softc; + return (ifv->ifv_cookie); +} + +/* + * Store a cookie in our softc that drivers can use to store driver + * private per-instance data in. + */ +static int +vlan_setcookie(struct ifnet *ifp, void *cookie) +{ + struct ifvlan *ifv; + + if (ifp->if_type != IFT_L2VLAN) + return (EINVAL); + ifv = ifp->if_softc; + ifv->ifv_cookie = cookie; + return (0); +} + +/* + * Return the vlan device present at the specific tag. + */ +static struct ifnet * +vlan_devat(struct ifnet *ifp, uint16_t tag) +{ + struct ifvlantrunk *trunk; + struct ifvlan *ifv; + + trunk = ifp->if_vlantrunk; + if (trunk == NULL) + return (NULL); + ifp = NULL; + TRUNK_RLOCK(trunk); + ifv = vlan_gethash(trunk, tag); + if (ifv) + ifp = ifv->ifv_ifp; + TRUNK_RUNLOCK(trunk); + return (ifp); +} + +/* * VLAN support can be loaded as a module. The only place in the * system that's intimately aware of this is ether_input. We hook * into this code through vlan_input_p which is defined there and @@ -575,7 +701,7 @@ restart: extern void (*vlan_input_p)(struct ifnet *, struct mbuf *); /* For if_link_state_change() eyes only... */ -extern void (*vlan_link_state_p)(struct ifnet *, int); +extern void (*vlan_link_state_p)(struct ifnet *); static int vlan_modevent(module_t mod, int type, void *data) @@ -595,6 +721,11 @@ vlan_modevent(module_t mod, int type, void *data) vlan_input_p = vlan_input; vlan_link_state_p = vlan_link_state; vlan_trunk_cap_p = vlan_trunk_capabilities; + vlan_trunkdev_p = vlan_trunkdev; + vlan_cookie_p = vlan_cookie; + vlan_setcookie_p = vlan_setcookie; + vlan_tag_p = vlan_tag; + vlan_devat_p = vlan_devat; #ifndef VIMAGE if_clone_attach(&vlan_cloner); #endif @@ -617,6 +748,11 @@ vlan_modevent(module_t mod, int type, void *data) vlan_input_p = NULL; vlan_link_state_p = NULL; vlan_trunk_cap_p = NULL; + vlan_trunkdev_p = NULL; + vlan_tag_p = NULL; + vlan_cookie_p = NULL; + vlan_setcookie_p = NULL; + vlan_devat_p = NULL; VLAN_LOCK_DESTROY(); if (bootverbose) printf("vlan: unloaded\n"); @@ -667,7 +803,12 @@ vlan_clone_match_ethertag(struct if_clone *ifc, const char *name, int *tag) /* Check for <etherif>.<vlan> style interface names. */ IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { - if (ifp->if_type != IFT_ETHER) + /* + * We can handle non-ethernet hardware types as long as + * they handle the tagging and headers themselves. + */ + if (ifp->if_type != IFT_ETHER && + (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) continue; if (strncmp(ifp->if_xname, name, strlen(ifp->if_xname)) != 0) continue; @@ -720,6 +861,8 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) struct ifvlan *ifv; struct ifnet *ifp; struct ifnet *p; + struct ifaddr *ifa; + struct sockaddr_dl *sdl; struct vlanreq vlr; static const u_char eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ @@ -818,6 +961,9 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) ifp->if_baudrate = 0; ifp->if_type = IFT_L2VLAN; ifp->if_hdrlen = ETHER_VLAN_ENCAP_LEN; + ifa = ifp->if_addr; + sdl = (struct sockaddr_dl *)ifa->ifa_addr; + sdl->sdl_type = IFT_L2VLAN; if (ethertag) { error = vlan_config(ifv, p, tag); @@ -890,7 +1036,7 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m) if (!UP_AND_RUNNING(p)) { m_freem(m); ifp->if_oerrors++; - return (0); + return (ENETDOWN); } /* @@ -905,7 +1051,7 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m) * devices that just discard such runts instead or mishandle * them somehow. */ - if (soft_pad) { + if (soft_pad && p->if_type == IFT_ETHER) { static char pad[8]; /* just zeros */ int n; @@ -1017,11 +1163,7 @@ vlan_input(struct ifnet *ifp, struct mbuf *m) } TRUNK_RLOCK(trunk); -#ifdef VLAN_ARRAY - ifv = trunk->vlans[tag]; -#else ifv = vlan_gethash(trunk, tag); -#endif if (ifv == NULL || !UP_AND_RUNNING(ifv->ifv_ifp)) { TRUNK_RUNLOCK(trunk); m_freem(m); @@ -1047,7 +1189,8 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag) /* VID numbers 0x0 and 0xFFF are reserved */ if (tag == 0 || tag == 0xFFF) return (EINVAL); - if (p->if_type != IFT_ETHER) + if (p->if_type != IFT_ETHER && + (p->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) return (EPROTONOSUPPORT); if ((p->if_flags & VLAN_IFFLAGS) != VLAN_IFFLAGS) return (EPROTONOSUPPORT); @@ -1057,15 +1200,11 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag) if (p->if_vlantrunk == NULL) { trunk = malloc(sizeof(struct ifvlantrunk), M_VLAN, M_WAITOK | M_ZERO); -#ifndef VLAN_ARRAY vlan_inithash(trunk); -#endif VLAN_LOCK(); if (p->if_vlantrunk != NULL) { /* A race that that is very unlikely to be hit. */ -#ifndef VLAN_ARRAY vlan_freehash(trunk); -#endif free(trunk, M_VLAN); goto exists; } @@ -1081,18 +1220,9 @@ exists: } ifv->ifv_tag = tag; /* must set this before vlan_inshash() */ -#ifdef VLAN_ARRAY - if (trunk->vlans[tag] != NULL) { - error = EEXIST; - goto done; - } - trunk->vlans[tag] = ifv; - trunk->refcnt++; -#else error = vlan_inshash(trunk, ifv); if (error) goto done; -#endif ifv->ifv_proto = ETHERTYPE_VLAN; ifv->ifv_encaplen = ETHER_VLAN_ENCAP_LEN; ifv->ifv_mintu = ETHERMIN; @@ -1122,8 +1252,19 @@ exists: ifv->ifv_trunk = trunk; ifp = ifv->ifv_ifp; + /* + * Initialize fields from our parent. This duplicates some + * work with ether_ifattach() but allows for non-ethernet + * interfaces to also work. + */ ifp->if_mtu = p->if_mtu - ifv->ifv_mtufudge; ifp->if_baudrate = p->if_baudrate; + ifp->if_output = p->if_output; + ifp->if_input = p->if_input; + ifp->if_resolvemulti = p->if_resolvemulti; + ifp->if_addrlen = p->if_addrlen; + ifp->if_broadcastaddr = p->if_broadcastaddr; + /* * Copy only a selected subset of flags from the parent. * Other flags are none of our business. @@ -1138,10 +1279,12 @@ exists: vlan_capabilities(ifv); /* - * Set up our ``Ethernet address'' to reflect the underlying + * Set up our interface address to reflect the underlying * physical interface's. */ - bcopy(IF_LLADDR(p), IF_LLADDR(ifp), ETHER_ADDR_LEN); + bcopy(IF_LLADDR(p), IF_LLADDR(ifp), p->if_addrlen); + ((struct sockaddr_dl *)ifp->if_addr->ifa_addr)->sdl_alen = + p->if_addrlen; /* * Configure multicast addresses that may already be @@ -1185,7 +1328,6 @@ vlan_unconfig_locked(struct ifnet *ifp, int departing) parent = NULL; if (trunk != NULL) { - struct sockaddr_dl sdl; TRUNK_LOCK(trunk); parent = trunk->parent; @@ -1195,17 +1337,7 @@ vlan_unconfig_locked(struct ifnet *ifp, int departing) * empty the list of multicast groups that we may have joined * while we were alive from the parent's list. */ - bzero((char *)&sdl, sizeof(sdl)); - sdl.sdl_len = sizeof(sdl); - sdl.sdl_family = AF_LINK; - sdl.sdl_index = parent->if_index; - sdl.sdl_type = IFT_ETHER; - sdl.sdl_alen = ETHER_ADDR_LEN; - while ((mc = SLIST_FIRST(&ifv->vlan_mc_listhead)) != NULL) { - bcopy((char *)&mc->mc_addr, LLADDR(&sdl), - ETHER_ADDR_LEN); - /* * If the parent interface is being detached, * all its multicast addresses have already @@ -1216,7 +1348,7 @@ vlan_unconfig_locked(struct ifnet *ifp, int departing) */ if (!departing) { error = if_delmulti(parent, - (struct sockaddr *)&sdl); + (struct sockaddr *)&mc->mc_addr); if (error) if_printf(ifp, "Failed to delete multicast address from parent: %d\n", @@ -1227,12 +1359,7 @@ vlan_unconfig_locked(struct ifnet *ifp, int departing) } vlan_setflags(ifp, 0); /* clear special flags on parent */ -#ifdef VLAN_ARRAY - trunk->vlans[ifv->ifv_tag] = NULL; - trunk->refcnt--; -#else vlan_remhash(trunk, ifv); -#endif ifv->ifv_trunk = NULL; /* @@ -1323,7 +1450,7 @@ vlan_setflags(struct ifnet *ifp, int status) /* Inform all vlans that their parent has changed link state */ static void -vlan_link_state(struct ifnet *ifp, int link) +vlan_link_state(struct ifnet *ifp) { struct ifvlantrunk *trunk = ifp->if_vlantrunk; struct ifvlan *ifv; @@ -1366,7 +1493,7 @@ vlan_capabilities(struct ifvlan *ifv) p->if_capenable & IFCAP_VLAN_HWTAGGING) { ifp->if_capenable = p->if_capenable & IFCAP_HWCSUM; ifp->if_hwassist = p->if_hwassist & (CSUM_IP | CSUM_TCP | - CSUM_UDP | CSUM_SCTP | CSUM_IP_FRAGS | CSUM_FRAGMENT); + CSUM_UDP | CSUM_SCTP | CSUM_FRAGMENT); } else { ifp->if_capenable = 0; ifp->if_hwassist = 0; @@ -1385,6 +1512,22 @@ vlan_capabilities(struct ifvlan *ifv) ifp->if_capenable &= ~(p->if_capenable & IFCAP_TSO); ifp->if_hwassist &= ~(p->if_hwassist & CSUM_TSO); } + + /* + * If the parent interface can offload TCP connections over VLANs then + * propagate its TOE capability to the VLAN interface. + * + * All TOE drivers in the tree today can deal with VLANs. If this + * changes then IFCAP_VLAN_TOE should be promoted to a full capability + * with its own bit. + */ +#define IFCAP_VLAN_TOE IFCAP_TOE + if (p->if_capabilities & IFCAP_VLAN_TOE) + ifp->if_capabilities |= p->if_capabilities & IFCAP_TOE; + if (p->if_capenable & IFCAP_VLAN_TOE) { + TOEDEV(ifp) = TOEDEV(p); + ifp->if_capenable |= p->if_capenable & IFCAP_TOE; + } } static void @@ -1413,14 +1556,31 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct ifnet *p; struct ifreq *ifr; + struct ifaddr *ifa; struct ifvlan *ifv; struct vlanreq vlr; int error = 0; ifr = (struct ifreq *)data; + ifa = (struct ifaddr *) data; ifv = ifp->if_softc; switch (cmd) { + case SIOCSIFADDR: + ifp->if_flags |= IFF_UP; +#ifdef INET + if (ifa->ifa_addr->sa_family == AF_INET) + arp_ifinit(ifp, ifa); +#endif + break; + case SIOCGIFADDR: + { + struct sockaddr *sa; + + sa = (struct sockaddr *)&ifr->ifr_data; + bcopy(IF_LLADDR(ifp), sa->sa_data, ifp->if_addrlen); + } + break; case SIOCGIFMEDIA: VLAN_LOCK(); if (TRUNK(ifv) != NULL) { @@ -1482,7 +1642,7 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) break; } p = ifunit(vlr.vlr_parent); - if (p == 0) { + if (p == NULL) { error = ENOENT; break; } @@ -1540,7 +1700,8 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) break; default: - error = ether_ioctl(ifp, cmd, data); + error = EINVAL; + break; } return (error); diff --git a/freebsd/sys/net/if_vlan_var.h b/freebsd/sys/net/if_vlan_var.h index ec71df1a..fd3fc4f3 100644 --- a/freebsd/sys/net/if_vlan_var.h +++ b/freebsd/sys/net/if_vlan_var.h @@ -131,7 +131,25 @@ struct vlanreq { (*vlan_trunk_cap_p)(_ifp); \ } while (0) +#define VLAN_TRUNKDEV(_ifp) \ + (_ifp)->if_type == IFT_L2VLAN ? (*vlan_trunkdev_p)((_ifp)) : NULL +#define VLAN_TAG(_ifp, _tag) \ + (_ifp)->if_type == IFT_L2VLAN ? (*vlan_tag_p)((_ifp), (_tag)) : EINVAL +#define VLAN_COOKIE(_ifp) \ + (_ifp)->if_type == IFT_L2VLAN ? (*vlan_cookie_p)((_ifp)) : NULL +#define VLAN_SETCOOKIE(_ifp, _cookie) \ + (_ifp)->if_type == IFT_L2VLAN ? \ + (*vlan_setcookie_p)((_ifp), (_cookie)) : EINVAL +#define VLAN_DEVAT(_ifp, _tag) \ + (_ifp)->if_vlantrunk != NULL ? (*vlan_devat_p)((_ifp), (_tag)) : NULL + extern void (*vlan_trunk_cap_p)(struct ifnet *); +extern struct ifnet *(*vlan_trunkdev_p)(struct ifnet *); +extern struct ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t); +extern int (*vlan_tag_p)(struct ifnet *, uint16_t *); +extern int (*vlan_setcookie_p)(struct ifnet *, void *); +extern void *(*vlan_cookie_p)(struct ifnet *); + #endif /* _KERNEL */ #endif /* _NET_IF_VLAN_VAR_H_ */ diff --git a/freebsd/sys/net/netisr.c b/freebsd/sys/net/netisr.c index 13e12147..c8a4d7b5 100644 --- a/freebsd/sys/net/netisr.c +++ b/freebsd/sys/net/netisr.c @@ -2,7 +2,7 @@ /*- * Copyright (c) 2007-2009 Robert N. M. Watson - * Copyright (c) 2010 Juniper Networks, Inc. + * Copyright (c) 2010-2011 Juniper Networks, Inc. * All rights reserved. * * This software was developed by Robert N. M. Watson under contract @@ -126,35 +126,47 @@ static struct rmlock netisr_rmlock; #define NETISR_WUNLOCK() rm_wunlock(&netisr_rmlock) /* #define NETISR_LOCKING */ -SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr"); +static SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr"); /*- - * Three direct dispatch policies are supported: + * Three global direct dispatch policies are supported: * - * - Always defer: all work is scheduled for a netisr, regardless of context. - * (!direct) + * NETISR_DISPATCH_QUEUED: All work is deferred for a netisr, regardless of + * context (may be overriden by protocols). * - * - Hybrid: if the executing context allows direct dispatch, and we're - * running on the CPU the work would be done on, then direct dispatch if it - * wouldn't violate ordering constraints on the workstream. - * (direct && !direct_force) + * NETISR_DISPATCH_HYBRID: If the executing context allows direct dispatch, + * and we're running on the CPU the work would be performed on, then direct + * dispatch it if it wouldn't violate ordering constraints on the workstream. * - * - Always direct: if the executing context allows direct dispatch, always - * direct dispatch. (direct && direct_force) + * NETISR_DISPATCH_DIRECT: If the executing context allows direct dispatch, + * always direct dispatch. (The default.) * * Notice that changing the global policy could lead to short periods of * misordered processing, but this is considered acceptable as compared to - * the complexity of enforcing ordering during policy changes. + * the complexity of enforcing ordering during policy changes. Protocols can + * override the global policy (when they're not doing that, they select + * NETISR_DISPATCH_DEFAULT). */ -static int netisr_direct_force = 1; /* Always direct dispatch. */ -TUNABLE_INT("net.isr.direct_force", &netisr_direct_force); -SYSCTL_INT(_net_isr, OID_AUTO, direct_force, CTLFLAG_RW, - &netisr_direct_force, 0, "Force direct dispatch"); +#define NETISR_DISPATCH_POLICY_DEFAULT NETISR_DISPATCH_DIRECT +#define NETISR_DISPATCH_POLICY_MAXSTR 20 /* Used for temporary buffers. */ +static u_int netisr_dispatch_policy = NETISR_DISPATCH_POLICY_DEFAULT; +static int sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS); +SYSCTL_PROC(_net_isr, OID_AUTO, dispatch, CTLTYPE_STRING | CTLFLAG_RW | + CTLFLAG_TUN, 0, 0, sysctl_netisr_dispatch_policy, "A", + "netisr dispatch policy"); -static int netisr_direct = 1; /* Enable direct dispatch. */ -TUNABLE_INT("net.isr.direct", &netisr_direct); -SYSCTL_INT(_net_isr, OID_AUTO, direct, CTLFLAG_RW, - &netisr_direct, 0, "Enable direct dispatch"); +/* + * These sysctls were used in previous versions to control and export + * dispatch policy state. Now, we provide read-only export via them so that + * older netstat binaries work. At some point they can be garbage collected. + */ +static int netisr_direct_force; +SYSCTL_INT(_net_isr, OID_AUTO, direct_force, CTLFLAG_RD, + &netisr_direct_force, 0, "compat: force direct dispatch"); + +static int netisr_direct; +SYSCTL_INT(_net_isr, OID_AUTO, direct, CTLFLAG_RD, &netisr_direct, 0, + "compat: enable direct dispatch"); /* * Allow the administrator to limit the number of threads (CPUs) to use for @@ -284,6 +296,106 @@ netisr_default_flow2cpu(u_int flowid) #endif /* __rtems__ */ /* + * Dispatch tunable and sysctl configuration. + */ +struct netisr_dispatch_table_entry { + u_int ndte_policy; + const char *ndte_policy_str; +}; +static const struct netisr_dispatch_table_entry netisr_dispatch_table[] = { + { NETISR_DISPATCH_DEFAULT, "default" }, + { NETISR_DISPATCH_DEFERRED, "deferred" }, + { NETISR_DISPATCH_HYBRID, "hybrid" }, + { NETISR_DISPATCH_DIRECT, "direct" }, +}; +static const u_int netisr_dispatch_table_len = + (sizeof(netisr_dispatch_table) / sizeof(netisr_dispatch_table[0])); + +static void +netisr_dispatch_policy_to_str(u_int dispatch_policy, char *buffer, + u_int buflen) +{ + const struct netisr_dispatch_table_entry *ndtep; + const char *str; + u_int i; + + str = "unknown"; + for (i = 0; i < netisr_dispatch_table_len; i++) { + ndtep = &netisr_dispatch_table[i]; + if (ndtep->ndte_policy == dispatch_policy) { + str = ndtep->ndte_policy_str; + break; + } + } + snprintf(buffer, buflen, "%s", str); +} + +static int +netisr_dispatch_policy_from_str(const char *str, u_int *dispatch_policyp) +{ + const struct netisr_dispatch_table_entry *ndtep; + u_int i; + + for (i = 0; i < netisr_dispatch_table_len; i++) { + ndtep = &netisr_dispatch_table[i]; + if (strcmp(ndtep->ndte_policy_str, str) == 0) { + *dispatch_policyp = ndtep->ndte_policy; + return (0); + } + } + return (EINVAL); +} + +static void +netisr_dispatch_policy_compat(void) +{ + + switch (netisr_dispatch_policy) { + case NETISR_DISPATCH_DEFERRED: + netisr_direct_force = 0; + netisr_direct = 0; + break; + + case NETISR_DISPATCH_HYBRID: + netisr_direct_force = 0; + netisr_direct = 1; + break; + + case NETISR_DISPATCH_DIRECT: + netisr_direct_force = 1; + netisr_direct = 1; + break; + + default: + panic("%s: unknown policy %u", __func__, + netisr_dispatch_policy); + } +} + +static int +sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS) +{ + char tmp[NETISR_DISPATCH_POLICY_MAXSTR]; + u_int dispatch_policy; + int error; + + netisr_dispatch_policy_to_str(netisr_dispatch_policy, tmp, + sizeof(tmp)); + error = sysctl_handle_string(oidp, tmp, sizeof(tmp), req); + if (error == 0 && req->newptr != NULL) { + error = netisr_dispatch_policy_from_str(tmp, + &dispatch_policy); + if (error == 0 && dispatch_policy == NETISR_DISPATCH_DEFAULT) + error = EINVAL; + if (error == 0) { + netisr_dispatch_policy = dispatch_policy; + netisr_dispatch_policy_compat(); + } + } + return (error); +} + +/* * Register a new netisr handler, which requires initializing per-protocol * fields for each workstream. All netisr work is briefly suspended while * the protocol is installed. @@ -320,6 +432,12 @@ netisr_register(const struct netisr_handler *nhp) KASSERT(nhp->nh_policy != NETISR_POLICY_CPU || nhp->nh_m2cpuid != NULL, ("%s: nh_policy == CPU but m2cpuid not defined for %s", __func__, name)); + KASSERT(nhp->nh_dispatch == NETISR_DISPATCH_DEFAULT || + nhp->nh_dispatch == NETISR_DISPATCH_DEFERRED || + nhp->nh_dispatch == NETISR_DISPATCH_HYBRID || + nhp->nh_dispatch == NETISR_DISPATCH_DIRECT, + ("%s: invalid nh_dispatch (%u)", __func__, nhp->nh_dispatch)); + KASSERT(proto < NETISR_MAXPROT, ("%s(%u, %s): protocol too big", __func__, proto, name)); @@ -347,6 +465,7 @@ netisr_register(const struct netisr_handler *nhp) } else netisr_proto[proto].np_qlimit = nhp->nh_qlimit; netisr_proto[proto].np_policy = nhp->nh_policy; + netisr_proto[proto].np_dispatch = nhp->nh_dispatch; CPU_FOREACH(i) { #ifndef __rtems__ npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; @@ -569,15 +688,32 @@ netisr_unregister(const struct netisr_handler *nhp) } /* + * Compose the global and per-protocol policies on dispatch, and return the + * dispatch policy to use. + */ +static u_int +netisr_get_dispatch(struct netisr_proto *npp) +{ + + /* + * Protocol-specific configuration overrides the global default. + */ + if (npp->np_dispatch != NETISR_DISPATCH_DEFAULT) + return (npp->np_dispatch); + return (netisr_dispatch_policy); +} + +/* * Look up the workstream given a packet and source identifier. Do this by * checking the protocol's policy, and optionally call out to the protocol * for assistance if required. */ static struct mbuf * -netisr_select_cpuid(struct netisr_proto *npp, uintptr_t source, - struct mbuf *m, u_int *cpuidp) +netisr_select_cpuid(struct netisr_proto *npp, u_int dispatch_policy, + uintptr_t source, struct mbuf *m, u_int *cpuidp) { struct ifnet *ifp; + u_int policy; NETISR_LOCK_ASSERT(); @@ -596,11 +732,30 @@ netisr_select_cpuid(struct netisr_proto *npp, uintptr_t source, * If we want to support per-interface policies, we should do that * here first. */ - switch (npp->np_policy) { - case NETISR_POLICY_CPU: - return (npp->np_m2cpuid(m, source, cpuidp)); + policy = npp->np_policy; + if (policy == NETISR_POLICY_CPU) { + m = npp->np_m2cpuid(m, source, cpuidp); + if (m == NULL) + return (NULL); + + /* + * It's possible for a protocol not to have a good idea about + * where to process a packet, in which case we fall back on + * the netisr code to decide. In the hybrid case, return the + * current CPU ID, which will force an immediate direct + * dispatch. In the queued case, fall back on the SOURCE + * policy. + */ + if (*cpuidp != NETISR_CPUID_NONE) + return (m); + if (dispatch_policy == NETISR_DISPATCH_HYBRID) { + *cpuidp = curcpu; + return (m); + } + policy = NETISR_POLICY_SOURCE; + } - case NETISR_POLICY_FLOW: + if (policy == NETISR_POLICY_FLOW) { if (!(m->m_flags & M_FLOWID) && npp->np_m2flow != NULL) { m = npp->np_m2flow(m, source); if (m == NULL) @@ -611,25 +766,22 @@ netisr_select_cpuid(struct netisr_proto *npp, uintptr_t source, netisr_default_flow2cpu(m->m_pkthdr.flowid); return (m); } - /* FALLTHROUGH */ - - case NETISR_POLICY_SOURCE: - ifp = m->m_pkthdr.rcvif; - if (ifp != NULL) - *cpuidp = nws_array[(ifp->if_index + source) % - nws_count]; - else - *cpuidp = nws_array[source % nws_count]; - return (m); - - default: - panic("%s: invalid policy %u for %s", __func__, - npp->np_policy, npp->np_name); + policy = NETISR_POLICY_SOURCE; } + + KASSERT(policy == NETISR_POLICY_SOURCE, + ("%s: invalid policy %u for %s", __func__, npp->np_policy, + npp->np_name)); + + ifp = m->m_pkthdr.rcvif; + if (ifp != NULL) + *cpuidp = nws_array[(ifp->if_index + source) % nws_count]; + else + *cpuidp = nws_array[source % nws_count]; #else /* __rtems__ */ *cpuidp = 0; - return (m); #endif /* __rtems__ */ + return (m); } /* @@ -832,7 +984,8 @@ netisr_queue_src(u_int proto, uintptr_t source, struct mbuf *m) KASSERT(netisr_proto[proto].np_handler != NULL, ("%s: invalid proto %u", __func__, proto)); - m = netisr_select_cpuid(&netisr_proto[proto], source, m, &cpuid); + m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_DEFERRED, + source, m, &cpuid); if (m != NULL) { KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); @@ -863,23 +1016,23 @@ netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m) struct rm_priotracker tracker; #endif struct netisr_workstream *nwsp; + struct netisr_proto *npp; struct netisr_work *npwp; int dosignal, error; - u_int cpuid; - - /* - * If direct dispatch is entirely disabled, fall back on queueing. - */ - if (!netisr_direct) - return (netisr_queue_src(proto, source, m)); + u_int cpuid, dispatch_policy; KASSERT(proto < NETISR_MAXPROT, ("%s: invalid proto %u", __func__, proto)); #ifdef NETISR_LOCKING NETISR_RLOCK(&tracker); #endif - KASSERT(netisr_proto[proto].np_handler != NULL, - ("%s: invalid proto %u", __func__, proto)); + npp = &netisr_proto[proto]; + KASSERT(npp->np_handler != NULL, ("%s: invalid proto %u", __func__, + proto)); + + dispatch_policy = netisr_get_dispatch(npp); + if (dispatch_policy == NETISR_DISPATCH_DEFERRED) + return (netisr_queue_src(proto, source, m)); /* * If direct dispatch is forced, then unconditionally dispatch @@ -888,7 +1041,7 @@ netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m) * nws_flags because all netisr processing will be source ordered due * to always being forced to directly dispatch. */ - if (netisr_direct_force) { + if (dispatch_policy == NETISR_DISPATCH_DIRECT) { nwsp = DPCPU_PTR(nws); npwp = &nwsp->nws_work[proto]; npwp->nw_dispatched++; @@ -898,20 +1051,24 @@ netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m) goto out_unlock; } + KASSERT(dispatch_policy == NETISR_DISPATCH_HYBRID, + ("%s: unknown dispatch policy (%u)", __func__, dispatch_policy)); + /* * Otherwise, we execute in a hybrid mode where we will try to direct * dispatch if we're on the right CPU and the netisr worker isn't * already running. */ - m = netisr_select_cpuid(&netisr_proto[proto], source, m, &cpuid); +#ifndef __rtems__ + sched_pin(); +#endif /* __rtems__ */ + m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_HYBRID, + source, m, &cpuid); if (m == NULL) { error = ENOBUFS; - goto out_unlock; + goto out_unpin; } KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); -#ifndef __rtems__ - sched_pin(); -#endif /* __rtems__ */ if (cpuid != curcpu) goto queue_fallback; nwsp = DPCPU_PTR(nws); @@ -1050,6 +1207,9 @@ netisr_start_swi(u_int cpuid, struct pcpu *pc) static void netisr_init(void *arg) { + char tmp[NETISR_DISPATCH_POLICY_MAXSTR]; + u_int dispatch_policy; + int error; KASSERT(curcpu == 0, ("%s: not on CPU 0", __func__)); @@ -1081,6 +1241,22 @@ netisr_init(void *arg) #endif #ifndef __rtems__ + if (TUNABLE_STR_FETCH("net.isr.dispatch", tmp, sizeof(tmp))) { + error = netisr_dispatch_policy_from_str(tmp, + &dispatch_policy); + if (error == 0 && dispatch_policy == NETISR_DISPATCH_DEFAULT) + error = EINVAL; + if (error == 0) { + netisr_dispatch_policy = dispatch_policy; + netisr_dispatch_policy_compat(); + } else + printf( + "%s: invalid dispatch policy %s, using default\n", + __func__, tmp); + } +#endif /* __rtems__ */ + +#ifndef __rtems__ netisr_start_swi(curcpu, pcpu_find(curcpu)); #else /* __rtems__ */ netisr_start_swi(0, NULL); @@ -1098,7 +1274,7 @@ netisr_start(void *arg) { struct pcpu *pc; - SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { if (nws_count >= netisr_maxthreads) break; /* XXXRW: Is skipping absent CPUs still required here? */ @@ -1141,6 +1317,7 @@ sysctl_netisr_proto(SYSCTL_HANDLER_ARGS) snpp->snp_proto = proto; snpp->snp_qlimit = npp->np_qlimit; snpp->snp_policy = npp->np_policy; + snpp->snp_dispatch = npp->np_dispatch; if (npp->np_m2flow != NULL) snpp->snp_flags |= NETISR_SNP_FLAGS_M2FLOW; if (npp->np_m2cpuid != NULL) diff --git a/freebsd/sys/net/netisr.h b/freebsd/sys/net/netisr.h index cd692f6d..83bf9ce5 100644 --- a/freebsd/sys/net/netisr.h +++ b/freebsd/sys/net/netisr.h @@ -1,6 +1,6 @@ /*- * Copyright (c) 2007-2009 Robert N. M. Watson - * Copyright (c) 2010 Juniper Networks, Inc. + * Copyright (c) 2010-2011 Juniper Networks, Inc. * All rights reserved. * * This software was developed by Robert N. M. Watson under contract @@ -71,6 +71,15 @@ #define NETISR_POLICY_CPU 3 /* Protocol determines CPU placement. */ /* + * Protocol dispatch policy constants; selects whether and when direct + * dispatch is permitted. + */ +#define NETISR_DISPATCH_DEFAULT 0 /* Use global default. */ +#define NETISR_DISPATCH_DEFERRED 1 /* Always defer dispatch. */ +#define NETISR_DISPATCH_HYBRID 2 /* Allow hybrid dispatch. */ +#define NETISR_DISPATCH_DIRECT 3 /* Always direct dispatch. */ + +/* * Monitoring data structures, exported by sysctl(2). * * Three sysctls are defined. First, a per-protocol structure exported by @@ -84,7 +93,8 @@ struct sysctl_netisr_proto { u_int snp_qlimit; /* nh_qlimit */ u_int snp_policy; /* nh_policy */ u_int snp_flags; /* Various flags. */ - u_int _snp_ispare[7]; + u_int snp_dispatch; /* Dispatch policy. */ + u_int _snp_ispare[6]; }; /* @@ -173,6 +183,8 @@ typedef struct mbuf *netisr_m2cpuid_t(struct mbuf *m, uintptr_t source, typedef struct mbuf *netisr_m2flow_t(struct mbuf *m, uintptr_t source); typedef void netisr_drainedcpu_t(u_int cpuid); +#define NETISR_CPUID_NONE ((u_int)-1) /* No affinity returned. */ + /* * Data structure describing a protocol handler. */ @@ -185,7 +197,8 @@ struct netisr_handler { u_int nh_proto; /* Integer protocol ID. */ u_int nh_qlimit; /* Maximum per-CPU queue depth. */ u_int nh_policy; /* Work placement policy. */ - u_int nh_ispare[5]; /* For future use. */ + u_int nh_dispatch; /* Dispatch policy. */ + u_int nh_ispare[4]; /* For future use. */ void *nh_pspare[4]; /* For future use. */ }; diff --git a/freebsd/sys/net/netisr_internal.h b/freebsd/sys/net/netisr_internal.h index 40afaf16..ac3ed0f2 100644 --- a/freebsd/sys/net/netisr_internal.h +++ b/freebsd/sys/net/netisr_internal.h @@ -1,6 +1,6 @@ /*- * Copyright (c) 2007-2009 Robert N. M. Watson - * Copyright (c) 2010 Juniper Networks, Inc. + * Copyright (c) 2010-2011 Juniper Networks, Inc. * All rights reserved. * * This software was developed by Robert N. M. Watson under contract @@ -64,6 +64,7 @@ struct netisr_proto { netisr_drainedcpu_t *np_drainedcpu; /* Callback when drained a queue. */ u_int np_qlimit; /* Maximum per-CPU queue depth. */ u_int np_policy; /* Work placement policy. */ + u_int np_dispatch; /* Work dispatch policy. */ }; #define NETISR_MAXPROT 16 /* Compile-time limit. */ diff --git a/freebsd/sys/net/pfil.c b/freebsd/sys/net/pfil.c index 7fceea35..123d03c4 100644 --- a/freebsd/sys/net/pfil.c +++ b/freebsd/sys/net/pfil.c @@ -288,25 +288,27 @@ pfil_list_remove(pfil_list_t *list, return (ENOENT); } -/**************** - * Stuff that must be initialized for every instance - * (including the first of course). +/* + * Stuff that must be initialized for every instance (including the first of + * course). */ static int vnet_pfil_init(const void *unused) { + LIST_INIT(&V_pfil_head_list); return (0); } -/*********************** +/* * Called for the removal of each instance. */ static int vnet_pfil_uninit(const void *unused) { + /* XXX should panic if list is not empty */ - return 0; + return (0); } /* Define startup order. */ @@ -315,17 +317,17 @@ vnet_pfil_uninit(const void *unused) #define PFIL_VNET_ORDER (PFIL_MODEVENT_ORDER + 2) /* Later still. */ /* - * Starting up. + * Starting up. + * * VNET_SYSINIT is called for each existing vnet and each new vnet. */ VNET_SYSINIT(vnet_pfil_init, PFIL_SYSINIT_ORDER, PFIL_VNET_ORDER, - vnet_pfil_init, NULL); + vnet_pfil_init, NULL); /* - * Closing up shop. These are done in REVERSE ORDER, - * Not called on reboot. + * Closing up shop. These are done in REVERSE ORDER. Not called on reboot. + * * VNET_SYSUNINIT is called for each exiting vnet as it exits. */ VNET_SYSUNINIT(vnet_pfil_uninit, PFIL_SYSINIT_ORDER, PFIL_VNET_ORDER, - vnet_pfil_uninit, NULL); - + vnet_pfil_uninit, NULL); diff --git a/freebsd/sys/net/pfil.h b/freebsd/sys/net/pfil.h index 142da67d..da06dedf 100644 --- a/freebsd/sys/net/pfil.h +++ b/freebsd/sys/net/pfil.h @@ -69,7 +69,11 @@ struct pfil_head { pfil_list_t ph_out; int ph_type; int ph_nhooks; +#if defined( __linux__ ) || defined( _WIN32 ) + rwlock_t ph_mtx; +#else struct rmlock ph_lock; +#endif union { u_long phu_val; void *phu_ptr; diff --git a/freebsd/sys/net/radix.c b/freebsd/sys/net/radix.c index d31a5b3c..875a482c 100644 --- a/freebsd/sys/net/radix.c +++ b/freebsd/sys/net/radix.c @@ -52,8 +52,8 @@ #include <stdio.h> #include <strings.h> #include <stdlib.h> -#define log(x, arg...) fprintf(stderr, ## arg) -#define panic(x) fprintf(stderr, "PANIC: %s", x), exit(1) +#define log(x, arg...) fprintf(stderr, ## arg) +#define panic(x) fprintf(stderr, "PANIC: %s", x), exit(1) #define min(a, b) ((a) < (b) ? (a) : (b) ) #include <net/radix.h> #endif /* !_KERNEL */ diff --git a/freebsd/sys/net/radix.h b/freebsd/sys/net/radix.h index 29659b54..5bacaa3a 100644 --- a/freebsd/sys/net/radix.h +++ b/freebsd/sys/net/radix.h @@ -105,6 +105,8 @@ typedef int walktree_f_t(struct radix_node *, void *); struct radix_node_head { struct radix_node *rnh_treetop; + u_int rnh_gen; /* generation counter */ + int rnh_multipath; /* multipath capable ? */ int rnh_addrsize; /* permit, but not require fixed keys */ int rnh_pktsize; /* permit, but not require fixed keys */ struct radix_node *(*rnh_addaddr) /* add based on sockaddr */ @@ -131,7 +133,6 @@ struct radix_node_head { void (*rnh_close) /* do something when the last ref drops */ (struct radix_node *rn, struct radix_node_head *head); struct radix_node rnh_nodes[3]; /* empty tree for common case */ - int rnh_multipath; /* multipath capable ? */ #ifdef _KERNEL struct rwlock rnh_lock; /* locks entire radix tree */ #endif diff --git a/freebsd/sys/net/raw_cb.c b/freebsd/sys/net/raw_cb.c index 68b8bd26..10db8bba 100644 --- a/freebsd/sys/net/raw_cb.c +++ b/freebsd/sys/net/raw_cb.c @@ -61,7 +61,8 @@ struct mtx rawcb_mtx; VNET_DEFINE(struct rawcb_list_head, rawcb_list); -SYSCTL_NODE(_net, OID_AUTO, raw, CTLFLAG_RW, 0, "Raw socket infrastructure"); +static SYSCTL_NODE(_net, OID_AUTO, raw, CTLFLAG_RW, 0, + "Raw socket infrastructure"); static u_long raw_sendspace = RAWSNDQ; SYSCTL_ULONG(_net_raw, OID_AUTO, sendspace, CTLFLAG_RW, &raw_sendspace, 0, diff --git a/freebsd/sys/net/route.c b/freebsd/sys/net/route.c index a0589947..fdd8a12c 100644 --- a/freebsd/sys/net/route.c +++ b/freebsd/sys/net/route.c @@ -69,6 +69,10 @@ #include <netinet/ip_mroute.h> #include <vm/uma.h> +#ifdef __rtems__ +#include <machine/rtems-bsd-syscall-api.h> +#include <sys/file.h> +#endif /* __rtems__ */ /* We use 4 bits in the mbuf flags, thus we are limited to 16 FIBS. */ #define RT_MAXFIBS 16 @@ -144,7 +148,6 @@ VNET_DEFINE(int, rttrash); /* routes not in table but not freed */ static VNET_DEFINE(uma_zone_t, rtzone); /* Routing table UMA zone. */ #define V_rtzone VNET(rtzone) -#ifndef __rtems__ /* * handler for net.my_fibnum */ @@ -154,14 +157,17 @@ sysctl_my_fibnum(SYSCTL_HANDLER_ARGS) int fibnum; int error; +#ifndef __rtems__ fibnum = curthread->td_proc->p_fibnum; +#else /* __rtems__ */ + fibnum = BSD_DEFAULT_FIB; +#endif /* __rtems__ */ error = sysctl_handle_int(oidp, &fibnum, 0, req); return (error); } SYSCTL_PROC(_net, OID_AUTO, my_fibnum, CTLTYPE_INT|CTLFLAG_RD, NULL, 0, &sysctl_my_fibnum, "I", "default FIB of caller"); -#endif /* __rtems__ */ static __inline struct radix_node_head ** rt_tables_get_rnh_ptr(int table, int fam) @@ -279,20 +285,40 @@ VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, vnet_route_uninit, 0); #endif -#ifndef __rtems__ #ifndef _SYS_SYSPROTO_H_ struct setfib_args { int fibnum; }; #endif +#ifdef __rtems__ +static +#endif /* __rtems__ */ int -setfib(struct thread *td, struct setfib_args *uap) +sys_setfib(struct thread *td, struct setfib_args *uap) { if (uap->fibnum < 0 || uap->fibnum >= rt_numfibs) return EINVAL; +#ifndef __rtems__ td->td_proc->p_fibnum = uap->fibnum; +#else /* __rtems__ */ + if (uap->fibnum != BSD_DEFAULT_FIB) + return EINVAL; +#endif /* __rtems__ */ return (0); } +#ifdef __rtems__ +int +setfib(int fibnum) +{ + struct setfib_args ua = { + .fibnum = fibnum + }; + int error; + + error = sys_setfib(NULL, &ua); + + return rtems_bsd_error_to_status_and_errno(error); +} #endif /* __rtems__ */ /* @@ -1118,6 +1144,14 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, error = 0; } #endif + if ((flags & RTF_PINNED) == 0) { + /* Check if target route can be deleted */ + rt = (struct rtentry *)rnh->rnh_lookup(dst, + netmask, rnh); + if ((rt != NULL) && (rt->rt_flags & RTF_PINNED)) + senderr(EADDRINUSE); + } + /* * Remove the item from the tree and return it. * Complain if it is not there and do no more processing. @@ -1237,11 +1271,9 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, rt0 = NULL; /* "flow-table" only supports IPv6 and IPv4 at the moment. */ switch (dst->sa_family) { -#ifdef notyet #ifdef INET6 case AF_INET6: #endif -#endif #ifdef INET case AF_INET: #endif @@ -1309,13 +1341,11 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, #ifdef FLOWTABLE else if (rt0 != NULL) { switch (dst->sa_family) { -#ifdef notyet #ifdef INET6 case AF_INET6: flowtable_route_flush(V_ip6_ft, rt0); break; #endif -#endif #ifdef INET case AF_INET: flowtable_route_flush(V_ip_ft, rt0); @@ -1445,6 +1475,7 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) int didwork = 0; int a_failure = 0; static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; + struct radix_node_head *rnh; if (flags & RTF_HOST) { dst = ifa->ifa_dstaddr; @@ -1507,7 +1538,6 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) */ for ( fibnum = startfib; fibnum <= endfib; fibnum++) { if (cmd == RTM_DELETE) { - struct radix_node_head *rnh; struct radix_node *rn; /* * Look up an rtentry that is in the routing tree and @@ -1517,7 +1547,7 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) if (rnh == NULL) /* this table doesn't exist but others might */ continue; - RADIX_NODE_HEAD_LOCK(rnh); + RADIX_NODE_HEAD_RLOCK(rnh); #ifdef RADIX_MPATH if (rn_mpath_capable(rnh)) { @@ -1546,7 +1576,7 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) (rn->rn_flags & RNF_ROOT) || RNTORT(rn)->rt_ifa != ifa || !sa_equal((struct sockaddr *)rn->rn_key, dst)); - RADIX_NODE_HEAD_UNLOCK(rnh); + RADIX_NODE_HEAD_RUNLOCK(rnh); if (error) { /* this is only an error if bad on ALL tables */ continue; @@ -1557,7 +1587,8 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) */ bzero((caddr_t)&info, sizeof(info)); info.rti_ifa = ifa; - info.rti_flags = flags | (ifa->ifa_flags & ~IFA_RTSELF); + info.rti_flags = flags | + (ifa->ifa_flags & ~IFA_RTSELF) | RTF_PINNED; info.rti_info[RTAX_DST] = dst; /* * doing this for compatibility reasons @@ -1569,6 +1600,33 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; info.rti_info[RTAX_NETMASK] = netmask; error = rtrequest1_fib(cmd, &info, &rt, fibnum); + + if ((error == EEXIST) && (cmd == RTM_ADD)) { + /* + * Interface route addition failed. + * Atomically delete current prefix generating + * RTM_DELETE message, and retry adding + * interface prefix. + */ + rnh = rt_tables_get_rnh(fibnum, dst->sa_family); + RADIX_NODE_HEAD_LOCK(rnh); + + /* Delete old prefix */ + info.rti_ifa = NULL; + info.rti_flags = RTF_RNH_LOCKED; + + error = rtrequest1_fib(RTM_DELETE, &info, &rt, fibnum); + if (error == 0) { + info.rti_ifa = ifa; + info.rti_flags = flags | RTF_RNH_LOCKED | + (ifa->ifa_flags & ~IFA_RTSELF) | RTF_PINNED; + error = rtrequest1_fib(cmd, &info, &rt, fibnum); + } + + RADIX_NODE_HEAD_UNLOCK(rnh); + } + + if (error == 0 && rt != NULL) { /* * notify any listening routing agents of the change diff --git a/freebsd/sys/net/route.h b/freebsd/sys/net/route.h index b26ac441..997f3cd6 100644 --- a/freebsd/sys/net/route.h +++ b/freebsd/sys/net/route.h @@ -49,9 +49,14 @@ struct route { struct rtentry *ro_rt; struct llentry *ro_lle; + struct in_ifaddr *ro_ia; + int ro_flags; struct sockaddr ro_dst; }; +#define RT_CACHING_CONTEXT 0x1 /* XXX: not used anywhere */ +#define RT_NORTREF 0x2 /* doesn't hold reference on ro_rt */ + /* * These numbers are used by reliable protocols for determining * retransmission behavior and are included in the routing structure. @@ -171,7 +176,7 @@ struct ortentry { /* 0x20000 unused, was RTF_WASCLONED */ #define RTF_PROTO3 0x40000 /* protocol specific routing flag */ /* 0x80000 unused */ -#define RTF_PINNED 0x100000 /* future use */ +#define RTF_PINNED 0x100000 /* route is immutable */ #define RTF_LOCAL 0x200000 /* route represents a local address */ #define RTF_BROADCAST 0x400000 /* route represents a bcast address */ #define RTF_MULTICAST 0x800000 /* route represents a mcast address */ @@ -337,6 +342,18 @@ struct rt_addrinfo { RTFREE_LOCKED(_rt); \ } while (0) +#define RO_RTFREE(_ro) do { \ + if ((_ro)->ro_rt) { \ + if ((_ro)->ro_flags & RT_NORTREF) { \ + (_ro)->ro_flags &= ~RT_NORTREF; \ + (_ro)->ro_rt = NULL; \ + } else { \ + RT_LOCK((_ro)->ro_rt); \ + RTFREE_LOCKED((_ro)->ro_rt); \ + } \ + } \ +} while (0) + struct radix_node_head *rt_tables_get_rnh(int, int); struct ifmultiaddr; @@ -404,6 +421,7 @@ int rtrequest1_fib(int, struct rt_addrinfo *, struct rtentry **, u_int); #include <sys/eventhandler.h> typedef void (*rtevent_arp_update_fn)(void *, struct rtentry *, uint8_t *, struct sockaddr *); typedef void (*rtevent_redirect_fn)(void *, struct rtentry *, struct rtentry *, struct sockaddr *); +/* route_arp_update_event is no longer generated; see arp_update_event */ EVENTHANDLER_DECLARE(route_arp_update_event, rtevent_arp_update_fn); EVENTHANDLER_DECLARE(route_redirect_event, rtevent_redirect_fn); #endif diff --git a/freebsd/sys/net/rtsock.c b/freebsd/sys/net/rtsock.c index a421fd61..1eebe095 100644 --- a/freebsd/sys/net/rtsock.c +++ b/freebsd/sys/net/rtsock.c @@ -172,7 +172,7 @@ MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF); #define RTSOCK_UNLOCK() mtx_unlock(&rtsock_mtx) #define RTSOCK_LOCK_ASSERT() mtx_assert(&rtsock_mtx, MA_OWNED) -SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD, 0, ""); +static SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD, 0, ""); struct walkarg { int w_tmemsize; @@ -956,7 +956,6 @@ flush: m = NULL; } else if (m->m_pkthdr.len > rtm->rtm_msglen) m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len); - Free(rtm); } if (m) { M_SETFIB(m, so->so_fibnum); @@ -973,6 +972,9 @@ flush: } else rt_dispatch(m, saf); } + /* info.rti_info[RTAX_DST] (used above) can point inside of rtm */ + if (rtm) + Free(rtm); } return (error); #undef sa_equal @@ -1821,6 +1823,7 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS) u_int namelen = arg2; struct radix_node_head *rnh = NULL; /* silence compiler. */ int i, lim, error = EINVAL; + int fib = 0; u_char af; struct walkarg w; @@ -1828,7 +1831,25 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS) namelen--; if (req->newptr) return (EPERM); - if (namelen != 3) + if (name[1] == NET_RT_DUMP) { + if (namelen == 3) +#ifndef __rtems__ + fib = req->td->td_proc->p_fibnum; +#else /* __rtems__ */ + fib = BSD_DEFAULT_FIB; +#endif /* __rtems__ */ + else if (namelen == 4) + fib = (name[3] == -1) ? +#ifndef __rtems__ + req->td->td_proc->p_fibnum : name[3]; +#else /* __rtems__ */ + BSD_DEFAULT_FIB : name[3]; +#endif /* __rtems__ */ + else + return ((namelen < 3) ? EISDIR : ENOTDIR); + if (fib < 0 || fib >= rt_numfibs) + return (EINVAL); + } else if (namelen != 3) return ((namelen < 3) ? EISDIR : ENOTDIR); af = name[0]; if (af > AF_MAX) @@ -1867,11 +1888,7 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS) * take care of routing entries */ for (error = 0; error == 0 && i <= lim; i++) { -#ifndef __rtems__ - rnh = rt_tables_get_rnh(req->td->td_proc->p_fibnum, i); -#else /* __rtems__ */ - rnh = rt_tables_get_rnh(BSD_DEFAULT_FIB, i); -#endif /* __rtems__ */ + rnh = rt_tables_get_rnh(fib, i); if (rnh != NULL) { RADIX_NODE_HEAD_RLOCK(rnh); error = rnh->rnh_walktree(rnh, @@ -1896,7 +1913,7 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS) return (error); } -SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, ""); +static SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, ""); /* * Definitions of protocols supported in the ROUTE domain. diff --git a/freebsd/sys/net/vnet.h b/freebsd/sys/net/vnet.h index 8ef1c00d..01e26cdb 100644 --- a/freebsd/sys/net/vnet.h +++ b/freebsd/sys/net/vnet.h @@ -95,7 +95,9 @@ struct vnet { * Location of the kernel's 'set_vnet' linker set. */ extern uintptr_t *__start_set_vnet; +__GLOBL(__start_set_vnet); extern uintptr_t *__stop_set_vnet; +__GLOBL(__stop_set_vnet); #define VNET_START (uintptr_t)&__start_set_vnet #define VNET_STOP (uintptr_t)&__stop_set_vnet @@ -249,6 +251,7 @@ int vnet_sysctl_handle_uint(SYSCTL_HANDLER_ARGS); ptr, val, vnet_sysctl_handle_int, "I", descr) #define SYSCTL_VNET_PROC(parent, nbr, name, access, ptr, arg, handler, \ fmt, descr) \ + CTASSERT(((access) & CTLTYPE) != 0); \ SYSCTL_OID(parent, nbr, name, CTLFLAG_VNET|(access), ptr, arg, \ handler, fmt, descr) #define SYSCTL_VNET_OPAQUE(parent, nbr, name, access, ptr, len, fmt, \ |