summaryrefslogtreecommitdiffstats
path: root/freebsd/sys/net
diff options
context:
space:
mode:
authorSebastian Huber <sebastian.huber@embedded-brains.de>2013-10-09 22:42:09 +0200
committerSebastian Huber <sebastian.huber@embedded-brains.de>2013-10-10 09:06:58 +0200
commitbceabc95c1c85d793200446fa85f1ddc6313ea29 (patch)
tree973c8bd8deca9fd69913f2895cc91e0e6114d46c /freebsd/sys/net
parentAdd FreeBSD sources as a submodule (diff)
downloadrtems-libbsd-bceabc95c1c85d793200446fa85f1ddc6313ea29.tar.bz2
Move files to match FreeBSD layout
Diffstat (limited to 'freebsd/sys/net')
-rw-r--r--freebsd/sys/net/bpf.c2398
-rw-r--r--freebsd/sys/net/bpf.h974
-rw-r--r--freebsd/sys/net/bpf_buffer.c212
-rw-r--r--freebsd/sys/net/bpf_buffer.h50
-rw-r--r--freebsd/sys/net/bpf_filter.c582
-rw-r--r--freebsd/sys/net/bpf_jitter.c143
-rw-r--r--freebsd/sys/net/bpf_jitter.h84
-rw-r--r--freebsd/sys/net/bpf_zerocopy.h56
-rw-r--r--freebsd/sys/net/bpfdesc.h149
-rw-r--r--freebsd/sys/net/bridgestp.c2250
-rw-r--r--freebsd/sys/net/bridgestp.h396
-rw-r--r--freebsd/sys/net/ethernet.h2
-rw-r--r--freebsd/sys/net/fddi.h105
-rw-r--r--freebsd/sys/net/firewire.h142
-rw-r--r--freebsd/sys/net/flowtable.h82
-rw-r--r--freebsd/sys/net/ieee8023ad_lacp.c1947
-rw-r--r--freebsd/sys/net/ieee8023ad_lacp.h333
-rw-r--r--freebsd/sys/net/if.c3431
-rw-r--r--freebsd/sys/net/if.h2
-rw-r--r--freebsd/sys/net/if_arc.h143
-rw-r--r--freebsd/sys/net/if_arcsubr.c886
-rw-r--r--freebsd/sys/net/if_arp.h2
-rw-r--r--freebsd/sys/net/if_atm.h337
-rw-r--r--freebsd/sys/net/if_atmsubr.c504
-rw-r--r--freebsd/sys/net/if_bridge.c3458
-rw-r--r--freebsd/sys/net/if_bridgevar.h328
-rw-r--r--freebsd/sys/net/if_clone.c617
-rw-r--r--freebsd/sys/net/if_clone.h116
-rw-r--r--freebsd/sys/net/if_dead.c116
-rw-r--r--freebsd/sys/net/if_disc.c247
-rw-r--r--freebsd/sys/net/if_dl.h2
-rw-r--r--freebsd/sys/net/if_edsc.c356
-rw-r--r--freebsd/sys/net/if_ef.c610
-rw-r--r--freebsd/sys/net/if_enc.c375
-rw-r--r--freebsd/sys/net/if_enc.h35
-rw-r--r--freebsd/sys/net/if_epair.c955
-rw-r--r--freebsd/sys/net/if_ethersubr.c1364
-rw-r--r--freebsd/sys/net/if_faith.c353
-rw-r--r--freebsd/sys/net/if_fddisubr.c800
-rw-r--r--freebsd/sys/net/if_fwsubr.c853
-rw-r--r--freebsd/sys/net/if_gif.c1025
-rw-r--r--freebsd/sys/net/if_gif.h130
-rw-r--r--freebsd/sys/net/if_gre.c909
-rw-r--r--freebsd/sys/net/if_gre.h194
-rw-r--r--freebsd/sys/net/if_iso88025subr.c831
-rw-r--r--freebsd/sys/net/if_lagg.c1808
-rw-r--r--freebsd/sys/net/if_lagg.h247
-rw-r--r--freebsd/sys/net/if_llatbl.c528
-rw-r--r--freebsd/sys/net/if_llatbl.h208
-rw-r--r--freebsd/sys/net/if_llc.h161
-rw-r--r--freebsd/sys/net/if_loop.c451
-rw-r--r--freebsd/sys/net/if_media.c566
-rw-r--r--freebsd/sys/net/if_media.h692
-rw-r--r--freebsd/sys/net/if_mib.c171
-rw-r--r--freebsd/sys/net/if_mib.h171
-rw-r--r--freebsd/sys/net/if_sppp.h234
-rw-r--r--freebsd/sys/net/if_spppfr.c636
-rw-r--r--freebsd/sys/net/if_spppsubr.c5492
-rw-r--r--freebsd/sys/net/if_stf.c850
-rw-r--r--freebsd/sys/net/if_stf.h38
-rw-r--r--freebsd/sys/net/if_tap.c1086
-rw-r--r--freebsd/sys/net/if_tap.h74
-rw-r--r--freebsd/sys/net/if_tapvar.h69
-rw-r--r--freebsd/sys/net/if_tun.c1059
-rw-r--r--freebsd/sys/net/if_tun.h48
-rw-r--r--freebsd/sys/net/if_types.h2
-rw-r--r--freebsd/sys/net/if_var.h904
-rw-r--r--freebsd/sys/net/if_vlan.c1538
-rw-r--r--freebsd/sys/net/if_vlan_var.h137
-rw-r--r--freebsd/sys/net/iso88025.h172
-rw-r--r--freebsd/sys/net/netisr.c1172
-rw-r--r--freebsd/sys/net/netisr.h156
-rw-r--r--freebsd/sys/net/pfil.c331
-rw-r--r--freebsd/sys/net/pfil.h117
-rw-r--r--freebsd/sys/net/pfkeyv2.h432
-rw-r--r--freebsd/sys/net/ppp_defs.h158
-rw-r--r--freebsd/sys/net/radix.c1205
-rw-r--r--freebsd/sys/net/radix.h176
-rw-r--r--freebsd/sys/net/radix_mpath.c365
-rw-r--r--freebsd/sys/net/radix_mpath.h63
-rw-r--r--freebsd/sys/net/raw_cb.c119
-rw-r--r--freebsd/sys/net/raw_cb.h84
-rw-r--r--freebsd/sys/net/raw_usrreq.c266
-rw-r--r--freebsd/sys/net/route.c1601
-rw-r--r--freebsd/sys/net/route.h2
-rw-r--r--freebsd/sys/net/rtsock.c1702
-rw-r--r--freebsd/sys/net/slcompress.c609
-rw-r--r--freebsd/sys/net/slcompress.h158
-rw-r--r--freebsd/sys/net/vnet.h437
-rw-r--r--freebsd/sys/net/zlib.c5409
-rw-r--r--freebsd/sys/net/zlib.h1018
91 files changed, 61806 insertions, 0 deletions
diff --git a/freebsd/sys/net/bpf.c b/freebsd/sys/net/bpf.c
new file mode 100644
index 00000000..684c7343
--- /dev/null
+++ b/freebsd/sys/net/bpf.c
@@ -0,0 +1,2398 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1990, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)bpf.c 8.4 (Berkeley) 1/9/95
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_bpf.h>
+#include <freebsd/local/opt_compat.h>
+#include <freebsd/local/opt_netgraph.h>
+
+#include <freebsd/sys/types.h>
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/conf.h>
+#include <freebsd/sys/fcntl.h>
+#include <freebsd/sys/jail.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/time.h>
+#include <freebsd/sys/priv.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/signalvar.h>
+#include <freebsd/sys/filio.h>
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/ttycom.h>
+#include <freebsd/sys/uio.h>
+
+#include <freebsd/sys/event.h>
+#include <freebsd/sys/file.h>
+#include <freebsd/sys/poll.h>
+#include <freebsd/sys/proc.h>
+
+#include <freebsd/sys/socket.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/bpf.h>
+#include <freebsd/net/bpf_buffer.h>
+#ifdef BPF_JITTER
+#include <freebsd/net/bpf_jitter.h>
+#endif
+#include <freebsd/net/bpf_zerocopy.h>
+#include <freebsd/net/bpfdesc.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/if_ether.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/sysctl.h>
+
+#include <freebsd/net80211/ieee80211_freebsd.h>
+
+#include <freebsd/security/mac/mac_framework.h>
+
+MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
+
+#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
+
+#define PRINET 26 /* interruptible */
+
+#ifdef COMPAT_FREEBSD32
+#include <freebsd/sys/mount.h>
+#include <freebsd/compat/freebsd32/freebsd32.h>
+#define BPF_ALIGNMENT32 sizeof(int32_t)
+#define BPF_WORDALIGN32(x) (((x)+(BPF_ALIGNMENT32-1))&~(BPF_ALIGNMENT32-1))
+
+/*
+ * 32-bit version of structure prepended to each packet. We use this header
+ * instead of the standard one for 32-bit streams. We mark the a stream as
+ * 32-bit the first time we see a 32-bit compat ioctl request.
+ */
+struct bpf_hdr32 {
+ struct timeval32 bh_tstamp; /* time stamp */
+ uint32_t bh_caplen; /* length of captured portion */
+ uint32_t bh_datalen; /* original length of packet */
+ uint16_t bh_hdrlen; /* length of bpf header (this struct
+ plus alignment padding) */
+};
+
+struct bpf_program32 {
+ u_int bf_len;
+ uint32_t bf_insns;
+};
+
+struct bpf_dltlist32 {
+ u_int bfl_len;
+ u_int bfl_list;
+};
+
+#define BIOCSETF32 _IOW('B', 103, struct bpf_program32)
+#define BIOCSRTIMEOUT32 _IOW('B',109, struct timeval32)
+#define BIOCGRTIMEOUT32 _IOR('B',110, struct timeval32)
+#define BIOCGDLTLIST32 _IOWR('B',121, struct bpf_dltlist32)
+#define BIOCSETWF32 _IOW('B',123, struct bpf_program32)
+#define BIOCSETFNR32 _IOW('B',130, struct bpf_program32)
+#endif
+
+/*
+ * bpf_iflist is a list of BPF interface structures, each corresponding to a
+ * specific DLT. The same network interface might have several BPF interface
+ * structures registered by different layers in the stack (i.e., 802.11
+ * frames, ethernet frames, etc).
+ */
+static LIST_HEAD(, bpf_if) bpf_iflist;
+static struct mtx bpf_mtx; /* bpf global lock */
+static int bpf_bpfd_cnt;
+
+static void bpf_attachd(struct bpf_d *, struct bpf_if *);
+static void bpf_detachd(struct bpf_d *);
+static void bpf_freed(struct bpf_d *);
+static int bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
+ struct sockaddr *, int *, struct bpf_insn *);
+static int bpf_setif(struct bpf_d *, struct ifreq *);
+static void bpf_timed_out(void *);
+static __inline void
+ bpf_wakeup(struct bpf_d *);
+static void catchpacket(struct bpf_d *, u_char *, u_int, u_int,
+ void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int),
+ struct timeval *);
+static void reset_d(struct bpf_d *);
+static int bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
+static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
+static int bpf_setdlt(struct bpf_d *, u_int);
+static void filt_bpfdetach(struct knote *);
+static int filt_bpfread(struct knote *, long);
+static void bpf_drvinit(void *);
+static int bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
+
+SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl");
+int bpf_maxinsns = BPF_MAXINSNS;
+SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
+ &bpf_maxinsns, 0, "Maximum bpf program instructions");
+static int bpf_zerocopy_enable = 0;
+SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW,
+ &bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions");
+SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW,
+ bpf_stats_sysctl, "bpf statistics portal");
+
+static d_open_t bpfopen;
+static d_read_t bpfread;
+static d_write_t bpfwrite;
+static d_ioctl_t bpfioctl;
+static d_poll_t bpfpoll;
+static d_kqfilter_t bpfkqfilter;
+
+static struct cdevsw bpf_cdevsw = {
+ .d_version = D_VERSION,
+ .d_open = bpfopen,
+ .d_read = bpfread,
+ .d_write = bpfwrite,
+ .d_ioctl = bpfioctl,
+ .d_poll = bpfpoll,
+ .d_name = "bpf",
+ .d_kqfilter = bpfkqfilter,
+};
+
+static struct filterops bpfread_filtops =
+ { 1, NULL, filt_bpfdetach, filt_bpfread };
+
+/*
+ * Wrapper functions for various buffering methods. If the set of buffer
+ * modes expands, we will probably want to introduce a switch data structure
+ * similar to protosw, et.
+ */
+static void
+bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
+ u_int len)
+{
+
+ BPFD_LOCK_ASSERT(d);
+
+ switch (d->bd_bufmode) {
+ case BPF_BUFMODE_BUFFER:
+ return (bpf_buffer_append_bytes(d, buf, offset, src, len));
+
+ case BPF_BUFMODE_ZBUF:
+ d->bd_zcopy++;
+ return (bpf_zerocopy_append_bytes(d, buf, offset, src, len));
+
+ default:
+ panic("bpf_buf_append_bytes");
+ }
+}
+
+static void
+bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
+ u_int len)
+{
+
+ BPFD_LOCK_ASSERT(d);
+
+ switch (d->bd_bufmode) {
+ case BPF_BUFMODE_BUFFER:
+ return (bpf_buffer_append_mbuf(d, buf, offset, src, len));
+
+ case BPF_BUFMODE_ZBUF:
+ d->bd_zcopy++;
+ return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len));
+
+ default:
+ panic("bpf_buf_append_mbuf");
+ }
+}
+
+/*
+ * This function gets called when the free buffer is re-assigned.
+ */
+static void
+bpf_buf_reclaimed(struct bpf_d *d)
+{
+
+ BPFD_LOCK_ASSERT(d);
+
+ switch (d->bd_bufmode) {
+ case BPF_BUFMODE_BUFFER:
+ return;
+
+ case BPF_BUFMODE_ZBUF:
+ bpf_zerocopy_buf_reclaimed(d);
+ return;
+
+ default:
+ panic("bpf_buf_reclaimed");
+ }
+}
+
+/*
+ * If the buffer mechanism has a way to decide that a held buffer can be made
+ * free, then it is exposed via the bpf_canfreebuf() interface. (1) is
+ * returned if the buffer can be discarded, (0) is returned if it cannot.
+ */
+static int
+bpf_canfreebuf(struct bpf_d *d)
+{
+
+ BPFD_LOCK_ASSERT(d);
+
+ switch (d->bd_bufmode) {
+ case BPF_BUFMODE_ZBUF:
+ return (bpf_zerocopy_canfreebuf(d));
+ }
+ return (0);
+}
+
+/*
+ * Allow the buffer model to indicate that the current store buffer is
+ * immutable, regardless of the appearance of space. Return (1) if the
+ * buffer is writable, and (0) if not.
+ */
+static int
+bpf_canwritebuf(struct bpf_d *d)
+{
+
+ BPFD_LOCK_ASSERT(d);
+
+ switch (d->bd_bufmode) {
+ case BPF_BUFMODE_ZBUF:
+ return (bpf_zerocopy_canwritebuf(d));
+ }
+ return (1);
+}
+
+/*
+ * Notify buffer model that an attempt to write to the store buffer has
+ * resulted in a dropped packet, in which case the buffer may be considered
+ * full.
+ */
+static void
+bpf_buffull(struct bpf_d *d)
+{
+
+ BPFD_LOCK_ASSERT(d);
+
+ switch (d->bd_bufmode) {
+ case BPF_BUFMODE_ZBUF:
+ bpf_zerocopy_buffull(d);
+ break;
+ }
+}
+
+/*
+ * Notify the buffer model that a buffer has moved into the hold position.
+ */
+void
+bpf_bufheld(struct bpf_d *d)
+{
+
+ BPFD_LOCK_ASSERT(d);
+
+ switch (d->bd_bufmode) {
+ case BPF_BUFMODE_ZBUF:
+ bpf_zerocopy_bufheld(d);
+ break;
+ }
+}
+
+static void
+bpf_free(struct bpf_d *d)
+{
+
+ switch (d->bd_bufmode) {
+ case BPF_BUFMODE_BUFFER:
+ return (bpf_buffer_free(d));
+
+ case BPF_BUFMODE_ZBUF:
+ return (bpf_zerocopy_free(d));
+
+ default:
+ panic("bpf_buf_free");
+ }
+}
+
+static int
+bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio)
+{
+
+ if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
+ return (EOPNOTSUPP);
+ return (bpf_buffer_uiomove(d, buf, len, uio));
+}
+
+static int
+bpf_ioctl_sblen(struct bpf_d *d, u_int *i)
+{
+
+ if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
+ return (EOPNOTSUPP);
+ return (bpf_buffer_ioctl_sblen(d, i));
+}
+
+static int
+bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i)
+{
+
+ if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
+ return (EOPNOTSUPP);
+ return (bpf_zerocopy_ioctl_getzmax(td, d, i));
+}
+
+static int
+bpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
+{
+
+ if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
+ return (EOPNOTSUPP);
+ return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz));
+}
+
+static int
+bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
+{
+
+ if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
+ return (EOPNOTSUPP);
+ return (bpf_zerocopy_ioctl_setzbuf(td, d, bz));
+}
+
+/*
+ * General BPF functions.
+ */
+static int
+bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
+ struct sockaddr *sockp, int *hdrlen, struct bpf_insn *wfilter)
+{
+ const struct ieee80211_bpf_params *p;
+ struct ether_header *eh;
+ struct mbuf *m;
+ int error;
+ int len;
+ int hlen;
+ int slen;
+
+ /*
+ * Build a sockaddr based on the data link layer type.
+ * We do this at this level because the ethernet header
+ * is copied directly into the data field of the sockaddr.
+ * In the case of SLIP, there is no header and the packet
+ * is forwarded as is.
+ * Also, we are careful to leave room at the front of the mbuf
+ * for the link level header.
+ */
+ switch (linktype) {
+
+ case DLT_SLIP:
+ sockp->sa_family = AF_INET;
+ hlen = 0;
+ break;
+
+ case DLT_EN10MB:
+ sockp->sa_family = AF_UNSPEC;
+ /* XXX Would MAXLINKHDR be better? */
+ hlen = ETHER_HDR_LEN;
+ break;
+
+ case DLT_FDDI:
+ sockp->sa_family = AF_IMPLINK;
+ hlen = 0;
+ break;
+
+ case DLT_RAW:
+ sockp->sa_family = AF_UNSPEC;
+ hlen = 0;
+ break;
+
+ case DLT_NULL:
+ /*
+ * null interface types require a 4 byte pseudo header which
+ * corresponds to the address family of the packet.
+ */
+ sockp->sa_family = AF_UNSPEC;
+ hlen = 4;
+ break;
+
+ case DLT_ATM_RFC1483:
+ /*
+ * en atm driver requires 4-byte atm pseudo header.
+ * though it isn't standard, vpi:vci needs to be
+ * specified anyway.
+ */
+ sockp->sa_family = AF_UNSPEC;
+ hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
+ break;
+
+ case DLT_PPP:
+ sockp->sa_family = AF_UNSPEC;
+ hlen = 4; /* This should match PPP_HDRLEN */
+ break;
+
+ case DLT_IEEE802_11: /* IEEE 802.11 wireless */
+ sockp->sa_family = AF_IEEE80211;
+ hlen = 0;
+ break;
+
+ case DLT_IEEE802_11_RADIO: /* IEEE 802.11 wireless w/ phy params */
+ sockp->sa_family = AF_IEEE80211;
+ sockp->sa_len = 12; /* XXX != 0 */
+ hlen = sizeof(struct ieee80211_bpf_params);
+ break;
+
+ default:
+ return (EIO);
+ }
+
+ len = uio->uio_resid;
+
+ if (len - hlen > ifp->if_mtu)
+ return (EMSGSIZE);
+
+ if ((unsigned)len > MJUM16BYTES)
+ return (EIO);
+
+ if (len <= MHLEN)
+ MGETHDR(m, M_WAIT, MT_DATA);
+ else if (len <= MCLBYTES)
+ m = m_getcl(M_WAIT, MT_DATA, M_PKTHDR);
+ else
+ m = m_getjcl(M_WAIT, MT_DATA, M_PKTHDR,
+#if (MJUMPAGESIZE > MCLBYTES)
+ len <= MJUMPAGESIZE ? MJUMPAGESIZE :
+#endif
+ (len <= MJUM9BYTES ? MJUM9BYTES : MJUM16BYTES));
+ m->m_pkthdr.len = m->m_len = len;
+ m->m_pkthdr.rcvif = NULL;
+ *mp = m;
+
+ if (m->m_len < hlen) {
+ error = EPERM;
+ goto bad;
+ }
+
+ error = uiomove(mtod(m, u_char *), len, uio);
+ if (error)
+ goto bad;
+
+ slen = bpf_filter(wfilter, mtod(m, u_char *), len, len);
+ if (slen == 0) {
+ error = EPERM;
+ goto bad;
+ }
+
+ /* Check for multicast destination */
+ switch (linktype) {
+ case DLT_EN10MB:
+ eh = mtod(m, struct ether_header *);
+ if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
+ if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost,
+ ETHER_ADDR_LEN) == 0)
+ m->m_flags |= M_BCAST;
+ else
+ m->m_flags |= M_MCAST;
+ }
+ break;
+ }
+
+ /*
+ * Make room for link header, and copy it to sockaddr
+ */
+ if (hlen != 0) {
+ if (sockp->sa_family == AF_IEEE80211) {
+ /*
+ * Collect true length from the parameter header
+ * NB: sockp is known to be zero'd so if we do a
+ * short copy unspecified parameters will be
+ * zero.
+ * NB: packet may not be aligned after stripping
+ * bpf params
+ * XXX check ibp_vers
+ */
+ p = mtod(m, const struct ieee80211_bpf_params *);
+ hlen = p->ibp_len;
+ if (hlen > sizeof(sockp->sa_data)) {
+ error = EINVAL;
+ goto bad;
+ }
+ }
+ bcopy(m->m_data, sockp->sa_data, hlen);
+ }
+ *hdrlen = hlen;
+
+ return (0);
+bad:
+ m_freem(m);
+ return (error);
+}
+
+/*
+ * Attach file to the bpf interface, i.e. make d listen on bp.
+ */
+static void
+bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
+{
+ /*
+ * Point d at bp, and add d to the interface's list of listeners.
+ * Finally, point the driver's bpf cookie at the interface so
+ * it will divert packets to bpf.
+ */
+ BPFIF_LOCK(bp);
+ d->bd_bif = bp;
+ LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
+
+ bpf_bpfd_cnt++;
+ BPFIF_UNLOCK(bp);
+
+ EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
+}
+
+/*
+ * Detach a file from its interface.
+ */
+static void
+bpf_detachd(struct bpf_d *d)
+{
+ int error;
+ struct bpf_if *bp;
+ struct ifnet *ifp;
+
+ bp = d->bd_bif;
+ BPFIF_LOCK(bp);
+ BPFD_LOCK(d);
+ ifp = d->bd_bif->bif_ifp;
+
+ /*
+ * Remove d from the interface's descriptor list.
+ */
+ LIST_REMOVE(d, bd_next);
+
+ bpf_bpfd_cnt--;
+ d->bd_bif = NULL;
+ BPFD_UNLOCK(d);
+ BPFIF_UNLOCK(bp);
+
+ EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0);
+
+ /*
+ * Check if this descriptor had requested promiscuous mode.
+ * If so, turn it off.
+ */
+ if (d->bd_promisc) {
+ d->bd_promisc = 0;
+ CURVNET_SET(ifp->if_vnet);
+ error = ifpromisc(ifp, 0);
+ CURVNET_RESTORE();
+ if (error != 0 && error != ENXIO) {
+ /*
+ * ENXIO can happen if a pccard is unplugged
+ * Something is really wrong if we were able to put
+ * the driver into promiscuous mode, but can't
+ * take it out.
+ */
+ if_printf(bp->bif_ifp,
+ "bpf_detach: ifpromisc failed (%d)\n", error);
+ }
+ }
+}
+
+/*
+ * Close the descriptor by detaching it from its interface,
+ * deallocating its buffers, and marking it free.
+ */
+static void
+bpf_dtor(void *data)
+{
+ struct bpf_d *d = data;
+
+ BPFD_LOCK(d);
+ if (d->bd_state == BPF_WAITING)
+ callout_stop(&d->bd_callout);
+ d->bd_state = BPF_IDLE;
+ BPFD_UNLOCK(d);
+ funsetown(&d->bd_sigio);
+ mtx_lock(&bpf_mtx);
+ if (d->bd_bif)
+ bpf_detachd(d);
+ mtx_unlock(&bpf_mtx);
+ selwakeuppri(&d->bd_sel, PRINET);
+#ifdef MAC
+ mac_bpfdesc_destroy(d);
+#endif /* MAC */
+ knlist_destroy(&d->bd_sel.si_note);
+ callout_drain(&d->bd_callout);
+ bpf_freed(d);
+ free(d, M_BPF);
+}
+
+/*
+ * Open ethernet device. Returns ENXIO for illegal minor device number,
+ * EBUSY if file is open by another process.
+ */
+/* ARGSUSED */
+static int
+bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
+{
+ struct bpf_d *d;
+ int error;
+
+ d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
+ error = devfs_set_cdevpriv(d, bpf_dtor);
+ if (error != 0) {
+ free(d, M_BPF);
+ return (error);
+ }
+
+ /*
+ * For historical reasons, perform a one-time initialization call to
+ * the buffer routines, even though we're not yet committed to a
+ * particular buffer method.
+ */
+ bpf_buffer_init(d);
+ d->bd_bufmode = BPF_BUFMODE_BUFFER;
+ d->bd_sig = SIGIO;
+ d->bd_direction = BPF_D_INOUT;
+ d->bd_pid = td->td_proc->p_pid;
+#ifdef MAC
+ mac_bpfdesc_init(d);
+ mac_bpfdesc_create(td->td_ucred, d);
+#endif
+ mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
+ callout_init_mtx(&d->bd_callout, &d->bd_mtx, 0);
+ knlist_init_mtx(&d->bd_sel.si_note, &d->bd_mtx);
+
+ return (0);
+}
+
+/*
+ * bpfread - read next chunk of packets from buffers
+ */
+static int
+bpfread(struct cdev *dev, struct uio *uio, int ioflag)
+{
+ struct bpf_d *d;
+ int error;
+ int non_block;
+ int timed_out;
+
+ error = devfs_get_cdevpriv((void **)&d);
+ if (error != 0)
+ return (error);
+
+ /*
+ * Restrict application to use a buffer the same size as
+ * as kernel buffers.
+ */
+ if (uio->uio_resid != d->bd_bufsize)
+ return (EINVAL);
+
+ non_block = ((ioflag & O_NONBLOCK) != 0);
+
+ BPFD_LOCK(d);
+ d->bd_pid = curthread->td_proc->p_pid;
+ if (d->bd_bufmode != BPF_BUFMODE_BUFFER) {
+ BPFD_UNLOCK(d);
+ return (EOPNOTSUPP);
+ }
+ if (d->bd_state == BPF_WAITING)
+ callout_stop(&d->bd_callout);
+ timed_out = (d->bd_state == BPF_TIMED_OUT);
+ d->bd_state = BPF_IDLE;
+ /*
+ * If the hold buffer is empty, then do a timed sleep, which
+ * ends when the timeout expires or when enough packets
+ * have arrived to fill the store buffer.
+ */
+ while (d->bd_hbuf == NULL) {
+ if (d->bd_slen != 0) {
+ /*
+ * A packet(s) either arrived since the previous
+ * read or arrived while we were asleep.
+ */
+ if (d->bd_immediate || non_block || timed_out) {
+ /*
+ * Rotate the buffers and return what's here
+ * if we are in immediate mode, non-blocking
+ * flag is set, or this descriptor timed out.
+ */
+ ROTATE_BUFFERS(d);
+ break;
+ }
+ }
+
+ /*
+ * No data is available, check to see if the bpf device
+ * is still pointed at a real interface. If not, return
+ * ENXIO so that the userland process knows to rebind
+ * it before using it again.
+ */
+ if (d->bd_bif == NULL) {
+ BPFD_UNLOCK(d);
+ return (ENXIO);
+ }
+
+ if (non_block) {
+ BPFD_UNLOCK(d);
+ return (EWOULDBLOCK);
+ }
+ error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
+ "bpf", d->bd_rtout);
+ if (error == EINTR || error == ERESTART) {
+ BPFD_UNLOCK(d);
+ return (error);
+ }
+ if (error == EWOULDBLOCK) {
+ /*
+ * On a timeout, return what's in the buffer,
+ * which may be nothing. If there is something
+ * in the store buffer, we can rotate the buffers.
+ */
+ if (d->bd_hbuf)
+ /*
+ * We filled up the buffer in between
+ * getting the timeout and arriving
+ * here, so we don't need to rotate.
+ */
+ break;
+
+ if (d->bd_slen == 0) {
+ BPFD_UNLOCK(d);
+ return (0);
+ }
+ ROTATE_BUFFERS(d);
+ break;
+ }
+ }
+ /*
+ * At this point, we know we have something in the hold slot.
+ */
+ BPFD_UNLOCK(d);
+
+ /*
+ * Move data from hold buffer into user space.
+ * We know the entire buffer is transferred since
+ * we checked above that the read buffer is bpf_bufsize bytes.
+ *
+ * XXXRW: More synchronization needed here: what if a second thread
+ * issues a read on the same fd at the same time? Don't want this
+ * getting invalidated.
+ */
+ error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio);
+
+ BPFD_LOCK(d);
+ d->bd_fbuf = d->bd_hbuf;
+ d->bd_hbuf = NULL;
+ d->bd_hlen = 0;
+ bpf_buf_reclaimed(d);
+ BPFD_UNLOCK(d);
+
+ return (error);
+}
+
+/*
+ * If there are processes sleeping on this descriptor, wake them up.
+ */
+static __inline void
+bpf_wakeup(struct bpf_d *d)
+{
+
+ BPFD_LOCK_ASSERT(d);
+ if (d->bd_state == BPF_WAITING) {
+ callout_stop(&d->bd_callout);
+ d->bd_state = BPF_IDLE;
+ }
+ wakeup(d);
+ if (d->bd_async && d->bd_sig && d->bd_sigio)
+ pgsigio(&d->bd_sigio, d->bd_sig, 0);
+
+ selwakeuppri(&d->bd_sel, PRINET);
+ KNOTE_LOCKED(&d->bd_sel.si_note, 0);
+}
+
+static void
+bpf_timed_out(void *arg)
+{
+ struct bpf_d *d = (struct bpf_d *)arg;
+
+ BPFD_LOCK_ASSERT(d);
+
+ if (callout_pending(&d->bd_callout) || !callout_active(&d->bd_callout))
+ return;
+ if (d->bd_state == BPF_WAITING) {
+ d->bd_state = BPF_TIMED_OUT;
+ if (d->bd_slen != 0)
+ bpf_wakeup(d);
+ }
+}
+
+static int
+bpf_ready(struct bpf_d *d)
+{
+
+ BPFD_LOCK_ASSERT(d);
+
+ if (!bpf_canfreebuf(d) && d->bd_hlen != 0)
+ return (1);
+ if ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
+ d->bd_slen != 0)
+ return (1);
+ return (0);
+}
+
+static int
+bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
+{
+ struct bpf_d *d;
+ struct ifnet *ifp;
+ struct mbuf *m, *mc;
+ struct sockaddr dst;
+ int error, hlen;
+
+ error = devfs_get_cdevpriv((void **)&d);
+ if (error != 0)
+ return (error);
+
+ d->bd_pid = curthread->td_proc->p_pid;
+ d->bd_wcount++;
+ if (d->bd_bif == NULL) {
+ d->bd_wdcount++;
+ return (ENXIO);
+ }
+
+ ifp = d->bd_bif->bif_ifp;
+
+ if ((ifp->if_flags & IFF_UP) == 0) {
+ d->bd_wdcount++;
+ return (ENETDOWN);
+ }
+
+ if (uio->uio_resid == 0) {
+ d->bd_wdcount++;
+ return (0);
+ }
+
+ bzero(&dst, sizeof(dst));
+ m = NULL;
+ hlen = 0;
+ error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
+ &m, &dst, &hlen, d->bd_wfilter);
+ if (error) {
+ d->bd_wdcount++;
+ return (error);
+ }
+ d->bd_wfcount++;
+ if (d->bd_hdrcmplt)
+ dst.sa_family = pseudo_AF_HDRCMPLT;
+
+ if (d->bd_feedback) {
+ mc = m_dup(m, M_DONTWAIT);
+ if (mc != NULL)
+ mc->m_pkthdr.rcvif = ifp;
+ /* Set M_PROMISC for outgoing packets to be discarded. */
+ if (d->bd_direction == BPF_D_INOUT)
+ m->m_flags |= M_PROMISC;
+ } else
+ mc = NULL;
+
+ m->m_pkthdr.len -= hlen;
+ m->m_len -= hlen;
+ m->m_data += hlen; /* XXX */
+
+ CURVNET_SET(ifp->if_vnet);
+#ifdef MAC
+ BPFD_LOCK(d);
+ mac_bpfdesc_create_mbuf(d, m);
+ if (mc != NULL)
+ mac_bpfdesc_create_mbuf(d, mc);
+ BPFD_UNLOCK(d);
+#endif
+
+ error = (*ifp->if_output)(ifp, m, &dst, NULL);
+ if (error)
+ d->bd_wdcount++;
+
+ if (mc != NULL) {
+ if (error == 0)
+ (*ifp->if_input)(ifp, mc);
+ else
+ m_freem(mc);
+ }
+ CURVNET_RESTORE();
+
+ return (error);
+}
+
+/*
+ * Reset a descriptor by flushing its packet buffer and clearing the receive
+ * and drop counts. This is doable for kernel-only buffers, but with
+ * zero-copy buffers, we can't write to (or rotate) buffers that are
+ * currently owned by userspace. It would be nice if we could encapsulate
+ * this logic in the buffer code rather than here.
+ */
+static void
+reset_d(struct bpf_d *d)
+{
+
+ mtx_assert(&d->bd_mtx, MA_OWNED);
+
+ if ((d->bd_hbuf != NULL) &&
+ (d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) {
+ /* Free the hold buffer. */
+ d->bd_fbuf = d->bd_hbuf;
+ d->bd_hbuf = NULL;
+ d->bd_hlen = 0;
+ bpf_buf_reclaimed(d);
+ }
+ if (bpf_canwritebuf(d))
+ d->bd_slen = 0;
+ d->bd_rcount = 0;
+ d->bd_dcount = 0;
+ d->bd_fcount = 0;
+ d->bd_wcount = 0;
+ d->bd_wfcount = 0;
+ d->bd_wdcount = 0;
+ d->bd_zcopy = 0;
+}
+
+/*
+ * FIONREAD Check for read packet available.
+ * SIOCGIFADDR Get interface address - convenient hook to driver.
+ * BIOCGBLEN Get buffer len [for read()].
+ * BIOCSETF Set read filter.
+ * BIOCSETFNR Set read filter without resetting descriptor.
+ * BIOCSETWF Set write filter.
+ * BIOCFLUSH Flush read packet buffer.
+ * BIOCPROMISC Put interface into promiscuous mode.
+ * BIOCGDLT Get link layer type.
+ * BIOCGETIF Get interface name.
+ * BIOCSETIF Set interface.
+ * BIOCSRTIMEOUT Set read timeout.
+ * BIOCGRTIMEOUT Get read timeout.
+ * BIOCGSTATS Get packet stats.
+ * BIOCIMMEDIATE Set immediate mode.
+ * BIOCVERSION Get filter language version.
+ * BIOCGHDRCMPLT Get "header already complete" flag
+ * BIOCSHDRCMPLT Set "header already complete" flag
+ * BIOCGDIRECTION Get packet direction flag
+ * BIOCSDIRECTION Set packet direction flag
+ * BIOCLOCK Set "locked" flag
+ * BIOCFEEDBACK Set packet feedback mode.
+ * BIOCSETZBUF Set current zero-copy buffer locations.
+ * BIOCGETZMAX Get maximum zero-copy buffer size.
+ * BIOCROTZBUF Force rotation of zero-copy buffer
+ * BIOCSETBUFMODE Set buffer mode.
+ * BIOCGETBUFMODE Get current buffer mode.
+ */
+/* ARGSUSED */
+static int
+bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
+ struct thread *td)
+{
+ struct bpf_d *d;
+ int error;
+
+ error = devfs_get_cdevpriv((void **)&d);
+ if (error != 0)
+ return (error);
+
+ /*
+ * Refresh PID associated with this descriptor.
+ */
+ BPFD_LOCK(d);
+ d->bd_pid = td->td_proc->p_pid;
+ if (d->bd_state == BPF_WAITING)
+ callout_stop(&d->bd_callout);
+ d->bd_state = BPF_IDLE;
+ BPFD_UNLOCK(d);
+
+ if (d->bd_locked == 1) {
+ switch (cmd) {
+ case BIOCGBLEN:
+ case BIOCFLUSH:
+ case BIOCGDLT:
+ case BIOCGDLTLIST:
+#ifdef COMPAT_FREEBSD32
+ case BIOCGDLTLIST32:
+#endif
+ case BIOCGETIF:
+ case BIOCGRTIMEOUT:
+#ifdef COMPAT_FREEBSD32
+ case BIOCGRTIMEOUT32:
+#endif
+ case BIOCGSTATS:
+ case BIOCVERSION:
+ case BIOCGRSIG:
+ case BIOCGHDRCMPLT:
+ case BIOCFEEDBACK:
+ case FIONREAD:
+ case BIOCLOCK:
+ case BIOCSRTIMEOUT:
+#ifdef COMPAT_FREEBSD32
+ case BIOCSRTIMEOUT32:
+#endif
+ case BIOCIMMEDIATE:
+ case TIOCGPGRP:
+ case BIOCROTZBUF:
+ break;
+ default:
+ return (EPERM);
+ }
+ }
+#ifdef COMPAT_FREEBSD32
+ /*
+ * If we see a 32-bit compat ioctl, mark the stream as 32-bit so
+ * that it will get 32-bit packet headers.
+ */
+ switch (cmd) {
+ case BIOCSETF32:
+ case BIOCSETFNR32:
+ case BIOCSETWF32:
+ case BIOCGDLTLIST32:
+ case BIOCGRTIMEOUT32:
+ case BIOCSRTIMEOUT32:
+ d->bd_compat32 = 1;
+ }
+#endif
+
+ CURVNET_SET(TD_TO_VNET(td));
+ switch (cmd) {
+
+ default:
+ error = EINVAL;
+ break;
+
+ /*
+ * Check for read packet available.
+ */
+ case FIONREAD:
+ {
+ int n;
+
+ BPFD_LOCK(d);
+ n = d->bd_slen;
+ if (d->bd_hbuf)
+ n += d->bd_hlen;
+ BPFD_UNLOCK(d);
+
+ *(int *)addr = n;
+ break;
+ }
+
+ case SIOCGIFADDR:
+ {
+ struct ifnet *ifp;
+
+ if (d->bd_bif == NULL)
+ error = EINVAL;
+ else {
+ ifp = d->bd_bif->bif_ifp;
+ error = (*ifp->if_ioctl)(ifp, cmd, addr);
+ }
+ break;
+ }
+
+ /*
+ * Get buffer len [for read()].
+ */
+ case BIOCGBLEN:
+ *(u_int *)addr = d->bd_bufsize;
+ break;
+
+ /*
+ * Set buffer length.
+ */
+ case BIOCSBLEN:
+ error = bpf_ioctl_sblen(d, (u_int *)addr);
+ break;
+
+ /*
+ * Set link layer read filter.
+ */
+ case BIOCSETF:
+ case BIOCSETFNR:
+ case BIOCSETWF:
+#ifdef COMPAT_FREEBSD32
+ case BIOCSETF32:
+ case BIOCSETFNR32:
+ case BIOCSETWF32:
+#endif
+ error = bpf_setf(d, (struct bpf_program *)addr, cmd);
+ break;
+
+ /*
+ * Flush read packet buffer.
+ */
+ case BIOCFLUSH:
+ BPFD_LOCK(d);
+ reset_d(d);
+ BPFD_UNLOCK(d);
+ break;
+
+ /*
+ * Put interface into promiscuous mode.
+ */
+ case BIOCPROMISC:
+ if (d->bd_bif == NULL) {
+ /*
+ * No interface attached yet.
+ */
+ error = EINVAL;
+ break;
+ }
+ if (d->bd_promisc == 0) {
+ error = ifpromisc(d->bd_bif->bif_ifp, 1);
+ if (error == 0)
+ d->bd_promisc = 1;
+ }
+ break;
+
+ /*
+ * Get current data link type.
+ */
+ case BIOCGDLT:
+ if (d->bd_bif == NULL)
+ error = EINVAL;
+ else
+ *(u_int *)addr = d->bd_bif->bif_dlt;
+ break;
+
+ /*
+ * Get a list of supported data link types.
+ */
+#ifdef COMPAT_FREEBSD32
+ case BIOCGDLTLIST32:
+ {
+ struct bpf_dltlist32 *list32;
+ struct bpf_dltlist dltlist;
+
+ list32 = (struct bpf_dltlist32 *)addr;
+ dltlist.bfl_len = list32->bfl_len;
+ dltlist.bfl_list = PTRIN(list32->bfl_list);
+ if (d->bd_bif == NULL)
+ error = EINVAL;
+ else {
+ error = bpf_getdltlist(d, &dltlist);
+ if (error == 0)
+ list32->bfl_len = dltlist.bfl_len;
+ }
+ break;
+ }
+#endif
+
+ case BIOCGDLTLIST:
+ if (d->bd_bif == NULL)
+ error = EINVAL;
+ else
+ error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
+ break;
+
+ /*
+ * Set data link type.
+ */
+ case BIOCSDLT:
+ if (d->bd_bif == NULL)
+ error = EINVAL;
+ else
+ error = bpf_setdlt(d, *(u_int *)addr);
+ break;
+
+ /*
+ * Get interface name.
+ */
+ case BIOCGETIF:
+ if (d->bd_bif == NULL)
+ error = EINVAL;
+ else {
+ struct ifnet *const ifp = d->bd_bif->bif_ifp;
+ struct ifreq *const ifr = (struct ifreq *)addr;
+
+ strlcpy(ifr->ifr_name, ifp->if_xname,
+ sizeof(ifr->ifr_name));
+ }
+ break;
+
+ /*
+ * Set interface.
+ */
+ case BIOCSETIF:
+ error = bpf_setif(d, (struct ifreq *)addr);
+ break;
+
+ /*
+ * Set read timeout.
+ */
+ case BIOCSRTIMEOUT:
+#ifdef COMPAT_FREEBSD32
+ case BIOCSRTIMEOUT32:
+#endif
+ {
+ struct timeval *tv = (struct timeval *)addr;
+#ifdef COMPAT_FREEBSD32
+ struct timeval32 *tv32;
+ struct timeval tv64;
+
+ if (cmd == BIOCSRTIMEOUT32) {
+ tv32 = (struct timeval32 *)addr;
+ tv = &tv64;
+ tv->tv_sec = tv32->tv_sec;
+ tv->tv_usec = tv32->tv_usec;
+ } else
+#endif
+ tv = (struct timeval *)addr;
+
+ /*
+ * Subtract 1 tick from tvtohz() since this isn't
+ * a one-shot timer.
+ */
+ if ((error = itimerfix(tv)) == 0)
+ d->bd_rtout = tvtohz(tv) - 1;
+ break;
+ }
+
+ /*
+ * Get read timeout.
+ */
+ case BIOCGRTIMEOUT:
+#ifdef COMPAT_FREEBSD32
+ case BIOCGRTIMEOUT32:
+#endif
+ {
+ struct timeval *tv;
+#ifdef COMPAT_FREEBSD32
+ struct timeval32 *tv32;
+ struct timeval tv64;
+
+ if (cmd == BIOCGRTIMEOUT32)
+ tv = &tv64;
+ else
+#endif
+ tv = (struct timeval *)addr;
+
+ tv->tv_sec = d->bd_rtout / hz;
+ tv->tv_usec = (d->bd_rtout % hz) * tick;
+#ifdef COMPAT_FREEBSD32
+ if (cmd == BIOCGRTIMEOUT32) {
+ tv32 = (struct timeval32 *)addr;
+ tv32->tv_sec = tv->tv_sec;
+ tv32->tv_usec = tv->tv_usec;
+ }
+#endif
+
+ break;
+ }
+
+ /*
+ * Get packet stats.
+ */
+ case BIOCGSTATS:
+ {
+ struct bpf_stat *bs = (struct bpf_stat *)addr;
+
+ /* XXXCSJP overflow */
+ bs->bs_recv = d->bd_rcount;
+ bs->bs_drop = d->bd_dcount;
+ break;
+ }
+
+ /*
+ * Set immediate mode.
+ */
+ case BIOCIMMEDIATE:
+ d->bd_immediate = *(u_int *)addr;
+ break;
+
+ case BIOCVERSION:
+ {
+ struct bpf_version *bv = (struct bpf_version *)addr;
+
+ bv->bv_major = BPF_MAJOR_VERSION;
+ bv->bv_minor = BPF_MINOR_VERSION;
+ break;
+ }
+
+ /*
+ * Get "header already complete" flag
+ */
+ case BIOCGHDRCMPLT:
+ *(u_int *)addr = d->bd_hdrcmplt;
+ break;
+
+ /*
+ * Set "header already complete" flag
+ */
+ case BIOCSHDRCMPLT:
+ d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
+ break;
+
+ /*
+ * Get packet direction flag
+ */
+ case BIOCGDIRECTION:
+ *(u_int *)addr = d->bd_direction;
+ break;
+
+ /*
+ * Set packet direction flag
+ */
+ case BIOCSDIRECTION:
+ {
+ u_int direction;
+
+ direction = *(u_int *)addr;
+ switch (direction) {
+ case BPF_D_IN:
+ case BPF_D_INOUT:
+ case BPF_D_OUT:
+ d->bd_direction = direction;
+ break;
+ default:
+ error = EINVAL;
+ }
+ }
+ break;
+
+ case BIOCFEEDBACK:
+ d->bd_feedback = *(u_int *)addr;
+ break;
+
+ case BIOCLOCK:
+ d->bd_locked = 1;
+ break;
+
+ case FIONBIO: /* Non-blocking I/O */
+ break;
+
+ case FIOASYNC: /* Send signal on receive packets */
+ d->bd_async = *(int *)addr;
+ break;
+
+ case FIOSETOWN:
+ error = fsetown(*(int *)addr, &d->bd_sigio);
+ break;
+
+ case FIOGETOWN:
+ *(int *)addr = fgetown(&d->bd_sigio);
+ break;
+
+ /* This is deprecated, FIOSETOWN should be used instead. */
+ case TIOCSPGRP:
+ error = fsetown(-(*(int *)addr), &d->bd_sigio);
+ break;
+
+ /* This is deprecated, FIOGETOWN should be used instead. */
+ case TIOCGPGRP:
+ *(int *)addr = -fgetown(&d->bd_sigio);
+ break;
+
+ case BIOCSRSIG: /* Set receive signal */
+ {
+ u_int sig;
+
+ sig = *(u_int *)addr;
+
+ if (sig >= NSIG)
+ error = EINVAL;
+ else
+ d->bd_sig = sig;
+ break;
+ }
+ case BIOCGRSIG:
+ *(u_int *)addr = d->bd_sig;
+ break;
+
+ case BIOCGETBUFMODE:
+ *(u_int *)addr = d->bd_bufmode;
+ break;
+
+ case BIOCSETBUFMODE:
+ /*
+ * Allow the buffering mode to be changed as long as we
+ * haven't yet committed to a particular mode. Our
+ * definition of commitment, for now, is whether or not a
+ * buffer has been allocated or an interface attached, since
+ * that's the point where things get tricky.
+ */
+ switch (*(u_int *)addr) {
+ case BPF_BUFMODE_BUFFER:
+ break;
+
+ case BPF_BUFMODE_ZBUF:
+ if (bpf_zerocopy_enable)
+ break;
+ /* FALLSTHROUGH */
+
+ default:
+ CURVNET_RESTORE();
+ return (EINVAL);
+ }
+
+ BPFD_LOCK(d);
+ if (d->bd_sbuf != NULL || d->bd_hbuf != NULL ||
+ d->bd_fbuf != NULL || d->bd_bif != NULL) {
+ BPFD_UNLOCK(d);
+ CURVNET_RESTORE();
+ return (EBUSY);
+ }
+ d->bd_bufmode = *(u_int *)addr;
+ BPFD_UNLOCK(d);
+ break;
+
+ case BIOCGETZMAX:
+ error = bpf_ioctl_getzmax(td, d, (size_t *)addr);
+ break;
+
+ case BIOCSETZBUF:
+ error = bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr);
+ break;
+
+ case BIOCROTZBUF:
+ error = bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr);
+ break;
+ }
+ CURVNET_RESTORE();
+ return (error);
+}
+
+/*
+ * Set d's packet filter program to fp. If this file already has a filter,
+ * free it and replace it. Returns EINVAL for bogus requests.
+ */
+static int
+bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
+{
+ struct bpf_insn *fcode, *old;
+ u_int wfilter, flen, size;
+#ifdef BPF_JITTER
+ bpf_jit_filter *ofunc;
+#endif
+#ifdef COMPAT_FREEBSD32
+ struct bpf_program32 *fp32;
+ struct bpf_program fp_swab;
+
+ if (cmd == BIOCSETWF32 || cmd == BIOCSETF32 || cmd == BIOCSETFNR32) {
+ fp32 = (struct bpf_program32 *)fp;
+ fp_swab.bf_len = fp32->bf_len;
+ fp_swab.bf_insns = (struct bpf_insn *)(uintptr_t)fp32->bf_insns;
+ fp = &fp_swab;
+ if (cmd == BIOCSETWF32)
+ cmd = BIOCSETWF;
+ }
+#endif
+ if (cmd == BIOCSETWF) {
+ old = d->bd_wfilter;
+ wfilter = 1;
+#ifdef BPF_JITTER
+ ofunc = NULL;
+#endif
+ } else {
+ wfilter = 0;
+ old = d->bd_rfilter;
+#ifdef BPF_JITTER
+ ofunc = d->bd_bfilter;
+#endif
+ }
+ if (fp->bf_insns == NULL) {
+ if (fp->bf_len != 0)
+ return (EINVAL);
+ BPFD_LOCK(d);
+ if (wfilter)
+ d->bd_wfilter = NULL;
+ else {
+ d->bd_rfilter = NULL;
+#ifdef BPF_JITTER
+ d->bd_bfilter = NULL;
+#endif
+ if (cmd == BIOCSETF)
+ reset_d(d);
+ }
+ BPFD_UNLOCK(d);
+ if (old != NULL)
+ free((caddr_t)old, M_BPF);
+#ifdef BPF_JITTER
+ if (ofunc != NULL)
+ bpf_destroy_jit_filter(ofunc);
+#endif
+ return (0);
+ }
+ flen = fp->bf_len;
+ if (flen > bpf_maxinsns)
+ return (EINVAL);
+
+ size = flen * sizeof(*fp->bf_insns);
+ fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
+ if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
+ bpf_validate(fcode, (int)flen)) {
+ BPFD_LOCK(d);
+ if (wfilter)
+ d->bd_wfilter = fcode;
+ else {
+ d->bd_rfilter = fcode;
+#ifdef BPF_JITTER
+ d->bd_bfilter = bpf_jitter(fcode, flen);
+#endif
+ if (cmd == BIOCSETF)
+ reset_d(d);
+ }
+ BPFD_UNLOCK(d);
+ if (old != NULL)
+ free((caddr_t)old, M_BPF);
+#ifdef BPF_JITTER
+ if (ofunc != NULL)
+ bpf_destroy_jit_filter(ofunc);
+#endif
+
+ return (0);
+ }
+ free((caddr_t)fcode, M_BPF);
+ return (EINVAL);
+}
+
+/*
+ * Detach a file from its current interface (if attached at all) and attach
+ * to the interface indicated by the name stored in ifr.
+ * Return an errno or 0.
+ */
+static int
+bpf_setif(struct bpf_d *d, struct ifreq *ifr)
+{
+ struct bpf_if *bp;
+ struct ifnet *theywant;
+
+ theywant = ifunit(ifr->ifr_name);
+ if (theywant == NULL || theywant->if_bpf == NULL)
+ return (ENXIO);
+
+ bp = theywant->if_bpf;
+
+ /*
+ * Behavior here depends on the buffering model. If we're using
+ * kernel memory buffers, then we can allocate them here. If we're
+ * using zero-copy, then the user process must have registered
+ * buffers by the time we get here. If not, return an error.
+ *
+ * XXXRW: There are locking issues here with multi-threaded use: what
+ * if two threads try to set the interface at once?
+ */
+ switch (d->bd_bufmode) {
+ case BPF_BUFMODE_BUFFER:
+ if (d->bd_sbuf == NULL)
+ bpf_buffer_alloc(d);
+ KASSERT(d->bd_sbuf != NULL, ("bpf_setif: bd_sbuf NULL"));
+ break;
+
+ case BPF_BUFMODE_ZBUF:
+ if (d->bd_sbuf == NULL)
+ return (EINVAL);
+ break;
+
+ default:
+ panic("bpf_setif: bufmode %d", d->bd_bufmode);
+ }
+ if (bp != d->bd_bif) {
+ if (d->bd_bif)
+ /*
+ * Detach if attached to something else.
+ */
+ bpf_detachd(d);
+
+ bpf_attachd(d, bp);
+ }
+ BPFD_LOCK(d);
+ reset_d(d);
+ BPFD_UNLOCK(d);
+ return (0);
+}
+
+/*
+ * Support for select() and poll() system calls
+ *
+ * Return true iff the specific operation will not block indefinitely.
+ * Otherwise, return false but make a note that a selwakeup() must be done.
+ */
+static int
+bpfpoll(struct cdev *dev, int events, struct thread *td)
+{
+ struct bpf_d *d;
+ int revents;
+
+ if (devfs_get_cdevpriv((void **)&d) != 0 || d->bd_bif == NULL)
+ return (events &
+ (POLLHUP|POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM));
+
+ /*
+ * Refresh PID associated with this descriptor.
+ */
+ revents = events & (POLLOUT | POLLWRNORM);
+ BPFD_LOCK(d);
+ d->bd_pid = td->td_proc->p_pid;
+ if (events & (POLLIN | POLLRDNORM)) {
+ if (bpf_ready(d))
+ revents |= events & (POLLIN | POLLRDNORM);
+ else {
+ selrecord(td, &d->bd_sel);
+ /* Start the read timeout if necessary. */
+ if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
+ callout_reset(&d->bd_callout, d->bd_rtout,
+ bpf_timed_out, d);
+ d->bd_state = BPF_WAITING;
+ }
+ }
+ }
+ BPFD_UNLOCK(d);
+ return (revents);
+}
+
+/*
+ * Support for kevent() system call. Register EVFILT_READ filters and
+ * reject all others.
+ */
+int
+bpfkqfilter(struct cdev *dev, struct knote *kn)
+{
+ struct bpf_d *d;
+
+ if (devfs_get_cdevpriv((void **)&d) != 0 ||
+ kn->kn_filter != EVFILT_READ)
+ return (1);
+
+ /*
+ * Refresh PID associated with this descriptor.
+ */
+ BPFD_LOCK(d);
+ d->bd_pid = curthread->td_proc->p_pid;
+ kn->kn_fop = &bpfread_filtops;
+ kn->kn_hook = d;
+ knlist_add(&d->bd_sel.si_note, kn, 1);
+ BPFD_UNLOCK(d);
+
+ return (0);
+}
+
+static void
+filt_bpfdetach(struct knote *kn)
+{
+ struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
+
+ knlist_remove(&d->bd_sel.si_note, kn, 0);
+}
+
+static int
+filt_bpfread(struct knote *kn, long hint)
+{
+ struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
+ int ready;
+
+ BPFD_LOCK_ASSERT(d);
+ ready = bpf_ready(d);
+ if (ready) {
+ kn->kn_data = d->bd_slen;
+ if (d->bd_hbuf)
+ kn->kn_data += d->bd_hlen;
+ } else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
+ callout_reset(&d->bd_callout, d->bd_rtout,
+ bpf_timed_out, d);
+ d->bd_state = BPF_WAITING;
+ }
+
+ return (ready);
+}
+
+/*
+ * Incoming linkage from device drivers. Process the packet pkt, of length
+ * pktlen, which is stored in a contiguous buffer. The packet is parsed
+ * by each process' filter, and if accepted, stashed into the corresponding
+ * buffer.
+ */
+void
+bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
+{
+ struct bpf_d *d;
+#ifdef BPF_JITTER
+ bpf_jit_filter *bf;
+#endif
+ u_int slen;
+ int gottime;
+ struct timeval tv;
+
+ gottime = 0;
+ BPFIF_LOCK(bp);
+ LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
+ BPFD_LOCK(d);
+ ++d->bd_rcount;
+ /*
+ * NB: We dont call BPF_CHECK_DIRECTION() here since there is no
+ * way for the caller to indiciate to us whether this packet
+ * is inbound or outbound. In the bpf_mtap() routines, we use
+ * the interface pointers on the mbuf to figure it out.
+ */
+#ifdef BPF_JITTER
+ bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
+ if (bf != NULL)
+ slen = (*(bf->func))(pkt, pktlen, pktlen);
+ else
+#endif
+ slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
+ if (slen != 0) {
+ d->bd_fcount++;
+ if (!gottime) {
+ microtime(&tv);
+ gottime = 1;
+ }
+#ifdef MAC
+ if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
+#endif
+ catchpacket(d, pkt, pktlen, slen,
+ bpf_append_bytes, &tv);
+ }
+ BPFD_UNLOCK(d);
+ }
+ BPFIF_UNLOCK(bp);
+}
+
+#define BPF_CHECK_DIRECTION(d, r, i) \
+ (((d)->bd_direction == BPF_D_IN && (r) != (i)) || \
+ ((d)->bd_direction == BPF_D_OUT && (r) == (i)))
+
+/*
+ * Incoming linkage from device drivers, when packet is in an mbuf chain.
+ */
+void
+bpf_mtap(struct bpf_if *bp, struct mbuf *m)
+{
+ struct bpf_d *d;
+#ifdef BPF_JITTER
+ bpf_jit_filter *bf;
+#endif
+ u_int pktlen, slen;
+ int gottime;
+ struct timeval tv;
+
+ /* Skip outgoing duplicate packets. */
+ if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
+ m->m_flags &= ~M_PROMISC;
+ return;
+ }
+
+ gottime = 0;
+
+ pktlen = m_length(m, NULL);
+
+ BPFIF_LOCK(bp);
+ LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
+ if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
+ continue;
+ BPFD_LOCK(d);
+ ++d->bd_rcount;
+#ifdef BPF_JITTER
+ bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
+ /* XXX We cannot handle multiple mbufs. */
+ if (bf != NULL && m->m_next == NULL)
+ slen = (*(bf->func))(mtod(m, u_char *), pktlen, pktlen);
+ else
+#endif
+ slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
+ if (slen != 0) {
+ d->bd_fcount++;
+ if (!gottime) {
+ microtime(&tv);
+ gottime = 1;
+ }
+#ifdef MAC
+ if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
+#endif
+ catchpacket(d, (u_char *)m, pktlen, slen,
+ bpf_append_mbuf, &tv);
+ }
+ BPFD_UNLOCK(d);
+ }
+ BPFIF_UNLOCK(bp);
+}
+
+/*
+ * Incoming linkage from device drivers, when packet is in
+ * an mbuf chain and to be prepended by a contiguous header.
+ */
+void
+bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
+{
+ struct mbuf mb;
+ struct bpf_d *d;
+ u_int pktlen, slen;
+ int gottime;
+ struct timeval tv;
+
+ /* Skip outgoing duplicate packets. */
+ if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
+ m->m_flags &= ~M_PROMISC;
+ return;
+ }
+
+ gottime = 0;
+
+ pktlen = m_length(m, NULL);
+ /*
+ * Craft on-stack mbuf suitable for passing to bpf_filter.
+ * Note that we cut corners here; we only setup what's
+ * absolutely needed--this mbuf should never go anywhere else.
+ */
+ mb.m_next = m;
+ mb.m_data = data;
+ mb.m_len = dlen;
+ pktlen += dlen;
+
+ BPFIF_LOCK(bp);
+ LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
+ if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
+ continue;
+ BPFD_LOCK(d);
+ ++d->bd_rcount;
+ slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
+ if (slen != 0) {
+ d->bd_fcount++;
+ if (!gottime) {
+ microtime(&tv);
+ gottime = 1;
+ }
+#ifdef MAC
+ if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
+#endif
+ catchpacket(d, (u_char *)&mb, pktlen, slen,
+ bpf_append_mbuf, &tv);
+ }
+ BPFD_UNLOCK(d);
+ }
+ BPFIF_UNLOCK(bp);
+}
+
+#undef BPF_CHECK_DIRECTION
+
+/*
+ * Move the packet data from interface memory (pkt) into the
+ * store buffer. "cpfn" is the routine called to do the actual data
+ * transfer. bcopy is passed in to copy contiguous chunks, while
+ * bpf_append_mbuf is passed in to copy mbuf chains. In the latter case,
+ * pkt is really an mbuf.
+ */
+static void
+catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
+ void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int),
+ struct timeval *tv)
+{
+ struct bpf_hdr hdr;
+#ifdef COMPAT_FREEBSD32
+ struct bpf_hdr32 hdr32;
+#endif
+ int totlen, curlen;
+ int hdrlen = d->bd_bif->bif_hdrlen;
+ int do_wakeup = 0;
+
+ BPFD_LOCK_ASSERT(d);
+
+ /*
+ * Detect whether user space has released a buffer back to us, and if
+ * so, move it from being a hold buffer to a free buffer. This may
+ * not be the best place to do it (for example, we might only want to
+ * run this check if we need the space), but for now it's a reliable
+ * spot to do it.
+ */
+ if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) {
+ d->bd_fbuf = d->bd_hbuf;
+ d->bd_hbuf = NULL;
+ d->bd_hlen = 0;
+ bpf_buf_reclaimed(d);
+ }
+
+ /*
+ * Figure out how many bytes to move. If the packet is
+ * greater or equal to the snapshot length, transfer that
+ * much. Otherwise, transfer the whole packet (unless
+ * we hit the buffer size limit).
+ */
+ totlen = hdrlen + min(snaplen, pktlen);
+ if (totlen > d->bd_bufsize)
+ totlen = d->bd_bufsize;
+
+ /*
+ * Round up the end of the previous packet to the next longword.
+ *
+ * Drop the packet if there's no room and no hope of room
+ * If the packet would overflow the storage buffer or the storage
+ * buffer is considered immutable by the buffer model, try to rotate
+ * the buffer and wakeup pending processes.
+ */
+#ifdef COMPAT_FREEBSD32
+ if (d->bd_compat32)
+ curlen = BPF_WORDALIGN32(d->bd_slen);
+ else
+#endif
+ curlen = BPF_WORDALIGN(d->bd_slen);
+ if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) {
+ if (d->bd_fbuf == NULL) {
+ /*
+ * There's no room in the store buffer, and no
+ * prospect of room, so drop the packet. Notify the
+ * buffer model.
+ */
+ bpf_buffull(d);
+ ++d->bd_dcount;
+ return;
+ }
+ ROTATE_BUFFERS(d);
+ do_wakeup = 1;
+ curlen = 0;
+ } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
+ /*
+ * Immediate mode is set, or the read timeout has already
+ * expired during a select call. A packet arrived, so the
+ * reader should be woken up.
+ */
+ do_wakeup = 1;
+#ifdef COMPAT_FREEBSD32
+ /*
+ * If this is a 32-bit stream, then stick a 32-bit header at the
+ * front and copy the data into the buffer.
+ */
+ if (d->bd_compat32) {
+ bzero(&hdr32, sizeof(hdr32));
+ hdr32.bh_tstamp.tv_sec = tv->tv_sec;
+ hdr32.bh_tstamp.tv_usec = tv->tv_usec;
+ hdr32.bh_datalen = pktlen;
+ hdr32.bh_hdrlen = hdrlen;
+ hdr.bh_caplen = hdr32.bh_caplen = totlen - hdrlen;
+ bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr32, sizeof(hdr32));
+ goto copy;
+ }
+#endif
+
+ /*
+ * Append the bpf header. Note we append the actual header size, but
+ * move forward the length of the header plus padding.
+ */
+ bzero(&hdr, sizeof(hdr));
+ hdr.bh_tstamp = *tv;
+ hdr.bh_datalen = pktlen;
+ hdr.bh_hdrlen = hdrlen;
+ hdr.bh_caplen = totlen - hdrlen;
+ bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr));
+
+ /*
+ * Copy the packet data into the store buffer and update its length.
+ */
+#ifdef COMPAT_FREEBSD32
+ copy:
+#endif
+ (*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, hdr.bh_caplen);
+ d->bd_slen = curlen + totlen;
+
+ if (do_wakeup)
+ bpf_wakeup(d);
+}
+
+/*
+ * Free buffers currently in use by a descriptor.
+ * Called on close.
+ */
+static void
+bpf_freed(struct bpf_d *d)
+{
+
+ /*
+ * We don't need to lock out interrupts since this descriptor has
+ * been detached from its interface and it yet hasn't been marked
+ * free.
+ */
+ bpf_free(d);
+ if (d->bd_rfilter != NULL) {
+ free((caddr_t)d->bd_rfilter, M_BPF);
+#ifdef BPF_JITTER
+ if (d->bd_bfilter != NULL)
+ bpf_destroy_jit_filter(d->bd_bfilter);
+#endif
+ }
+ if (d->bd_wfilter != NULL)
+ free((caddr_t)d->bd_wfilter, M_BPF);
+ mtx_destroy(&d->bd_mtx);
+}
+
+/*
+ * Attach an interface to bpf. dlt is the link layer type; hdrlen is the
+ * fixed size of the link header (variable length headers not yet supported).
+ */
+void
+bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
+{
+
+ bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
+}
+
+/*
+ * Attach an interface to bpf. ifp is a pointer to the structure
+ * defining the interface to be attached, dlt is the link layer type,
+ * and hdrlen is the fixed size of the link header (variable length
+ * headers are not yet supporrted).
+ */
+void
+bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
+{
+ struct bpf_if *bp;
+
+ bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
+ if (bp == NULL)
+ panic("bpfattach");
+
+ LIST_INIT(&bp->bif_dlist);
+ bp->bif_ifp = ifp;
+ bp->bif_dlt = dlt;
+ mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF);
+ KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
+ *driverp = bp;
+
+ mtx_lock(&bpf_mtx);
+ LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
+ mtx_unlock(&bpf_mtx);
+
+ /*
+ * Compute the length of the bpf header. This is not necessarily
+ * equal to SIZEOF_BPF_HDR because we want to insert spacing such
+ * that the network layer header begins on a longword boundary (for
+ * performance reasons and to alleviate alignment restrictions).
+ */
+ bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
+
+ if (bootverbose)
+ if_printf(ifp, "bpf attached\n");
+}
+
+/*
+ * Detach bpf from an interface. This involves detaching each descriptor
+ * associated with the interface, and leaving bd_bif NULL. Notify each
+ * descriptor as it's detached so that any sleepers wake up and get
+ * ENXIO.
+ */
+void
+bpfdetach(struct ifnet *ifp)
+{
+ struct bpf_if *bp;
+ struct bpf_d *d;
+
+ /* Locate BPF interface information */
+ mtx_lock(&bpf_mtx);
+ LIST_FOREACH(bp, &bpf_iflist, bif_next) {
+ if (ifp == bp->bif_ifp)
+ break;
+ }
+
+ /* Interface wasn't attached */
+ if ((bp == NULL) || (bp->bif_ifp == NULL)) {
+ mtx_unlock(&bpf_mtx);
+ printf("bpfdetach: %s was not attached\n", ifp->if_xname);
+ return;
+ }
+
+ LIST_REMOVE(bp, bif_next);
+ mtx_unlock(&bpf_mtx);
+
+ while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
+ bpf_detachd(d);
+ BPFD_LOCK(d);
+ bpf_wakeup(d);
+ BPFD_UNLOCK(d);
+ }
+
+ mtx_destroy(&bp->bif_mtx);
+ free(bp, M_BPF);
+}
+
+/*
+ * Get a list of available data link type of the interface.
+ */
+static int
+bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
+{
+ int n, error;
+ struct ifnet *ifp;
+ struct bpf_if *bp;
+
+ ifp = d->bd_bif->bif_ifp;
+ n = 0;
+ error = 0;
+ mtx_lock(&bpf_mtx);
+ LIST_FOREACH(bp, &bpf_iflist, bif_next) {
+ if (bp->bif_ifp != ifp)
+ continue;
+ if (bfl->bfl_list != NULL) {
+ if (n >= bfl->bfl_len) {
+ mtx_unlock(&bpf_mtx);
+ return (ENOMEM);
+ }
+ error = copyout(&bp->bif_dlt,
+ bfl->bfl_list + n, sizeof(u_int));
+ }
+ n++;
+ }
+ mtx_unlock(&bpf_mtx);
+ bfl->bfl_len = n;
+ return (error);
+}
+
+/*
+ * Set the data link type of a BPF instance.
+ */
+static int
+bpf_setdlt(struct bpf_d *d, u_int dlt)
+{
+ int error, opromisc;
+ struct ifnet *ifp;
+ struct bpf_if *bp;
+
+ if (d->bd_bif->bif_dlt == dlt)
+ return (0);
+ ifp = d->bd_bif->bif_ifp;
+ mtx_lock(&bpf_mtx);
+ LIST_FOREACH(bp, &bpf_iflist, bif_next) {
+ if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
+ break;
+ }
+ mtx_unlock(&bpf_mtx);
+ if (bp != NULL) {
+ opromisc = d->bd_promisc;
+ bpf_detachd(d);
+ bpf_attachd(d, bp);
+ BPFD_LOCK(d);
+ reset_d(d);
+ BPFD_UNLOCK(d);
+ if (opromisc) {
+ error = ifpromisc(bp->bif_ifp, 1);
+ if (error)
+ if_printf(bp->bif_ifp,
+ "bpf_setdlt: ifpromisc failed (%d)\n",
+ error);
+ else
+ d->bd_promisc = 1;
+ }
+ }
+ return (bp == NULL ? EINVAL : 0);
+}
+
+static void
+bpf_drvinit(void *unused)
+{
+ struct cdev *dev;
+
+ mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
+ LIST_INIT(&bpf_iflist);
+
+ dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf");
+ /* For compatibility */
+ make_dev_alias(dev, "bpf0");
+}
+
+/*
+ * Zero out the various packet counters associated with all of the bpf
+ * descriptors. At some point, we will probably want to get a bit more
+ * granular and allow the user to specify descriptors to be zeroed.
+ */
+static void
+bpf_zero_counters(void)
+{
+ struct bpf_if *bp;
+ struct bpf_d *bd;
+
+ mtx_lock(&bpf_mtx);
+ LIST_FOREACH(bp, &bpf_iflist, bif_next) {
+ BPFIF_LOCK(bp);
+ LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
+ BPFD_LOCK(bd);
+ bd->bd_rcount = 0;
+ bd->bd_dcount = 0;
+ bd->bd_fcount = 0;
+ bd->bd_wcount = 0;
+ bd->bd_wfcount = 0;
+ bd->bd_zcopy = 0;
+ BPFD_UNLOCK(bd);
+ }
+ BPFIF_UNLOCK(bp);
+ }
+ mtx_unlock(&bpf_mtx);
+}
+
+static void
+bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
+{
+
+ bzero(d, sizeof(*d));
+ BPFD_LOCK_ASSERT(bd);
+ d->bd_structsize = sizeof(*d);
+ d->bd_immediate = bd->bd_immediate;
+ d->bd_promisc = bd->bd_promisc;
+ d->bd_hdrcmplt = bd->bd_hdrcmplt;
+ d->bd_direction = bd->bd_direction;
+ d->bd_feedback = bd->bd_feedback;
+ d->bd_async = bd->bd_async;
+ d->bd_rcount = bd->bd_rcount;
+ d->bd_dcount = bd->bd_dcount;
+ d->bd_fcount = bd->bd_fcount;
+ d->bd_sig = bd->bd_sig;
+ d->bd_slen = bd->bd_slen;
+ d->bd_hlen = bd->bd_hlen;
+ d->bd_bufsize = bd->bd_bufsize;
+ d->bd_pid = bd->bd_pid;
+ strlcpy(d->bd_ifname,
+ bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
+ d->bd_locked = bd->bd_locked;
+ d->bd_wcount = bd->bd_wcount;
+ d->bd_wdcount = bd->bd_wdcount;
+ d->bd_wfcount = bd->bd_wfcount;
+ d->bd_zcopy = bd->bd_zcopy;
+ d->bd_bufmode = bd->bd_bufmode;
+}
+
+static int
+bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct xbpf_d *xbdbuf, *xbd, zerostats;
+ int index, error;
+ struct bpf_if *bp;
+ struct bpf_d *bd;
+
+ /*
+ * XXX This is not technically correct. It is possible for non
+ * privileged users to open bpf devices. It would make sense
+ * if the users who opened the devices were able to retrieve
+ * the statistics for them, too.
+ */
+ error = priv_check(req->td, PRIV_NET_BPF);
+ if (error)
+ return (error);
+ /*
+ * Check to see if the user is requesting that the counters be
+ * zeroed out. Explicitly check that the supplied data is zeroed,
+ * as we aren't allowing the user to set the counters currently.
+ */
+ if (req->newptr != NULL) {
+ if (req->newlen != sizeof(zerostats))
+ return (EINVAL);
+ bzero(&zerostats, sizeof(zerostats));
+ xbd = req->newptr;
+ if (bcmp(xbd, &zerostats, sizeof(*xbd)) != 0)
+ return (EINVAL);
+ bpf_zero_counters();
+ return (0);
+ }
+ if (req->oldptr == NULL)
+ return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd)));
+ if (bpf_bpfd_cnt == 0)
+ return (SYSCTL_OUT(req, 0, 0));
+ xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK);
+ mtx_lock(&bpf_mtx);
+ if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
+ mtx_unlock(&bpf_mtx);
+ free(xbdbuf, M_BPF);
+ return (ENOMEM);
+ }
+ index = 0;
+ LIST_FOREACH(bp, &bpf_iflist, bif_next) {
+ BPFIF_LOCK(bp);
+ LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
+ xbd = &xbdbuf[index++];
+ BPFD_LOCK(bd);
+ bpfstats_fill_xbpf(xbd, bd);
+ BPFD_UNLOCK(bd);
+ }
+ BPFIF_UNLOCK(bp);
+ }
+ mtx_unlock(&bpf_mtx);
+ error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
+ free(xbdbuf, M_BPF);
+ return (error);
+}
+
+SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL);
+
+#else /* !DEV_BPF && !NETGRAPH_BPF */
+/*
+ * NOP stubs to allow bpf-using drivers to load and function.
+ *
+ * A 'better' implementation would allow the core bpf functionality
+ * to be loaded at runtime.
+ */
+static struct bpf_if bp_null;
+
+void
+bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
+{
+}
+
+void
+bpf_mtap(struct bpf_if *bp, struct mbuf *m)
+{
+}
+
+void
+bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m)
+{
+}
+
+void
+bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
+{
+
+ bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
+}
+
+void
+bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
+{
+
+ *driverp = &bp_null;
+}
+
+void
+bpfdetach(struct ifnet *ifp)
+{
+}
+
+u_int
+bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
+{
+ return -1; /* "no filter" behaviour */
+}
+
+int
+bpf_validate(const struct bpf_insn *f, int len)
+{
+ return 0; /* false */
+}
+
+#endif /* !DEV_BPF && !NETGRAPH_BPF */
diff --git a/freebsd/sys/net/bpf.h b/freebsd/sys/net/bpf.h
new file mode 100644
index 00000000..d9dd4289
--- /dev/null
+++ b/freebsd/sys/net/bpf.h
@@ -0,0 +1,974 @@
+/*-
+ * Copyright (c) 1990, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)bpf.h 8.1 (Berkeley) 6/10/93
+ * @(#)bpf.h 1.34 (LBL) 6/16/96
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NET_BPF_HH_
+#define _NET_BPF_HH_
+
+/* BSD style release date */
+#define BPF_RELEASE 199606
+
+typedef int32_t bpf_int32;
+typedef u_int32_t bpf_u_int32;
+
+/*
+ * Alignment macros. BPF_WORDALIGN rounds up to the next
+ * even multiple of BPF_ALIGNMENT.
+ */
+#define BPF_ALIGNMENT sizeof(long)
+#define BPF_WORDALIGN(x) (((x)+(BPF_ALIGNMENT-1))&~(BPF_ALIGNMENT-1))
+
+#define BPF_MAXINSNS 512
+#define BPF_MAXBUFSIZE 0x80000
+#define BPF_MINBUFSIZE 32
+
+/*
+ * Structure for BIOCSETF.
+ */
+struct bpf_program {
+ u_int bf_len;
+ struct bpf_insn *bf_insns;
+};
+
+/*
+ * Struct returned by BIOCGSTATS.
+ */
+struct bpf_stat {
+ u_int bs_recv; /* number of packets received */
+ u_int bs_drop; /* number of packets dropped */
+};
+
+/*
+ * Struct return by BIOCVERSION. This represents the version number of
+ * the filter language described by the instruction encodings below.
+ * bpf understands a program iff kernel_major == filter_major &&
+ * kernel_minor >= filter_minor, that is, if the value returned by the
+ * running kernel has the same major number and a minor number equal
+ * equal to or less than the filter being downloaded. Otherwise, the
+ * results are undefined, meaning an error may be returned or packets
+ * may be accepted haphazardly.
+ * It has nothing to do with the source code version.
+ */
+struct bpf_version {
+ u_short bv_major;
+ u_short bv_minor;
+};
+/* Current version number of filter architecture. */
+#define BPF_MAJOR_VERSION 1
+#define BPF_MINOR_VERSION 1
+
+/*
+ * Historically, BPF has supported a single buffering model, first using mbuf
+ * clusters in kernel, and later using malloc(9) buffers in kernel. We now
+ * support multiple buffering modes, which may be queried and set using
+ * BIOCGETBUFMODE and BIOCSETBUFMODE. So as to avoid handling the complexity
+ * of changing modes while sniffing packets, the mode becomes fixed once an
+ * interface has been attached to the BPF descriptor.
+ */
+#define BPF_BUFMODE_BUFFER 1 /* Kernel buffers with read(). */
+#define BPF_BUFMODE_ZBUF 2 /* Zero-copy buffers. */
+
+/*-
+ * Struct used by BIOCSETZBUF, BIOCROTZBUF: describes up to two zero-copy
+ * buffer as used by BPF.
+ */
+struct bpf_zbuf {
+ void *bz_bufa; /* Location of 'a' zero-copy buffer. */
+ void *bz_bufb; /* Location of 'b' zero-copy buffer. */
+ size_t bz_buflen; /* Size of zero-copy buffers. */
+};
+
+#define BIOCGBLEN _IOR('B',102, u_int)
+#define BIOCSBLEN _IOWR('B',102, u_int)
+#define BIOCSETF _IOW('B',103, struct bpf_program)
+#define BIOCFLUSH _IO('B',104)
+#define BIOCPROMISC _IO('B',105)
+#define BIOCGDLT _IOR('B',106, u_int)
+#define BIOCGETIF _IOR('B',107, struct ifreq)
+#define BIOCSETIF _IOW('B',108, struct ifreq)
+#define BIOCSRTIMEOUT _IOW('B',109, struct timeval)
+#define BIOCGRTIMEOUT _IOR('B',110, struct timeval)
+#define BIOCGSTATS _IOR('B',111, struct bpf_stat)
+#define BIOCIMMEDIATE _IOW('B',112, u_int)
+#define BIOCVERSION _IOR('B',113, struct bpf_version)
+#define BIOCGRSIG _IOR('B',114, u_int)
+#define BIOCSRSIG _IOW('B',115, u_int)
+#define BIOCGHDRCMPLT _IOR('B',116, u_int)
+#define BIOCSHDRCMPLT _IOW('B',117, u_int)
+#define BIOCGDIRECTION _IOR('B',118, u_int)
+#define BIOCSDIRECTION _IOW('B',119, u_int)
+#define BIOCSDLT _IOW('B',120, u_int)
+#define BIOCGDLTLIST _IOWR('B',121, struct bpf_dltlist)
+#define BIOCLOCK _IO('B', 122)
+#define BIOCSETWF _IOW('B',123, struct bpf_program)
+#define BIOCFEEDBACK _IOW('B',124, u_int)
+#define BIOCGETBUFMODE _IOR('B',125, u_int)
+#define BIOCSETBUFMODE _IOW('B',126, u_int)
+#define BIOCGETZMAX _IOR('B',127, size_t)
+#define BIOCROTZBUF _IOR('B',128, struct bpf_zbuf)
+#define BIOCSETZBUF _IOW('B',129, struct bpf_zbuf)
+#define BIOCSETFNR _IOW('B',130, struct bpf_program)
+
+/* Obsolete */
+#define BIOCGSEESENT BIOCGDIRECTION
+#define BIOCSSEESENT BIOCSDIRECTION
+
+/* Packet directions */
+enum bpf_direction {
+ BPF_D_IN, /* See incoming packets */
+ BPF_D_INOUT, /* See incoming and outgoing packets */
+ BPF_D_OUT /* See outgoing packets */
+};
+
+/*
+ * Structure prepended to each packet.
+ */
+struct bpf_hdr {
+ struct timeval bh_tstamp; /* time stamp */
+ bpf_u_int32 bh_caplen; /* length of captured portion */
+ bpf_u_int32 bh_datalen; /* original length of packet */
+ u_short bh_hdrlen; /* length of bpf header (this struct
+ plus alignment padding) */
+};
+/*
+ * Because the structure above is not a multiple of 4 bytes, some compilers
+ * will insist on inserting padding; hence, sizeof(struct bpf_hdr) won't work.
+ * Only the kernel needs to know about it; applications use bh_hdrlen.
+ */
+#ifdef _KERNEL
+#define SIZEOF_BPF_HDR (sizeof(struct bpf_hdr) <= 20 ? 18 : \
+ sizeof(struct bpf_hdr))
+#endif
+
+/*
+ * When using zero-copy BPF buffers, a shared memory header is present
+ * allowing the kernel BPF implementation and user process to synchronize
+ * without using system calls. This structure defines that header. When
+ * accessing these fields, appropriate atomic operation and memory barriers
+ * are required in order not to see stale or out-of-order data; see bpf(4)
+ * for reference code to access these fields from userspace.
+ *
+ * The layout of this structure is critical, and must not be changed; if must
+ * fit in a single page on all architectures.
+ */
+struct bpf_zbuf_header {
+ volatile u_int bzh_kernel_gen; /* Kernel generation number. */
+ volatile u_int bzh_kernel_len; /* Length of data in the buffer. */
+ volatile u_int bzh_user_gen; /* User generation number. */
+ u_int _bzh_pad[5];
+};
+
+/*
+ * Data-link level type codes.
+ */
+#define DLT_NULL 0 /* BSD loopback encapsulation */
+#define DLT_EN10MB 1 /* Ethernet (10Mb) */
+#define DLT_EN3MB 2 /* Experimental Ethernet (3Mb) */
+#define DLT_AX25 3 /* Amateur Radio AX.25 */
+#define DLT_PRONET 4 /* Proteon ProNET Token Ring */
+#define DLT_CHAOS 5 /* Chaos */
+#define DLT_IEEE802 6 /* IEEE 802 Networks */
+#define DLT_ARCNET 7 /* ARCNET */
+#define DLT_SLIP 8 /* Serial Line IP */
+#define DLT_PPP 9 /* Point-to-point Protocol */
+#define DLT_FDDI 10 /* FDDI */
+#define DLT_ATM_RFC1483 11 /* LLC/SNAP encapsulated atm */
+#define DLT_RAW 12 /* raw IP */
+
+/*
+ * These are values from BSD/OS's "bpf.h".
+ * These are not the same as the values from the traditional libpcap
+ * "bpf.h"; however, these values shouldn't be generated by any
+ * OS other than BSD/OS, so the correct values to use here are the
+ * BSD/OS values.
+ *
+ * Platforms that have already assigned these values to other
+ * DLT_ codes, however, should give these codes the values
+ * from that platform, so that programs that use these codes will
+ * continue to compile - even though they won't correctly read
+ * files of these types.
+ */
+#define DLT_SLIP_BSDOS 15 /* BSD/OS Serial Line IP */
+#define DLT_PPP_BSDOS 16 /* BSD/OS Point-to-point Protocol */
+
+#define DLT_ATM_CLIP 19 /* Linux Classical-IP over ATM */
+
+/*
+ * These values are defined by NetBSD; other platforms should refrain from
+ * using them for other purposes, so that NetBSD savefiles with link
+ * types of 50 or 51 can be read as this type on all platforms.
+ */
+#define DLT_PPP_SERIAL 50 /* PPP over serial with HDLC encapsulation */
+#define DLT_PPP_ETHER 51 /* PPP over Ethernet */
+
+/*
+ * Reserved for the Symantec Enterprise Firewall.
+ */
+#define DLT_SYMANTEC_FIREWALL 99
+
+
+/*
+ * This value was defined by libpcap 0.5; platforms that have defined
+ * it with a different value should define it here with that value -
+ * a link type of 104 in a save file will be mapped to DLT_C_HDLC,
+ * whatever value that happens to be, so programs will correctly
+ * handle files with that link type regardless of the value of
+ * DLT_C_HDLC.
+ *
+ * The name DLT_C_HDLC was used by BSD/OS; we use that name for source
+ * compatibility with programs written for BSD/OS.
+ *
+ * libpcap 0.5 defined it as DLT_CHDLC; we define DLT_CHDLC as well,
+ * for source compatibility with programs written for libpcap 0.5.
+ */
+#define DLT_C_HDLC 104 /* Cisco HDLC */
+#define DLT_CHDLC DLT_C_HDLC
+
+#define DLT_IEEE802_11 105 /* IEEE 802.11 wireless */
+
+/*
+ * Values between 106 and 107 are used in capture file headers as
+ * link-layer types corresponding to DLT_ types that might differ
+ * between platforms; don't use those values for new DLT_ new types.
+ */
+
+/*
+ * Frame Relay; BSD/OS has a DLT_FR with a value of 11, but that collides
+ * with other values.
+ * DLT_FR and DLT_FRELAY packets start with the Q.922 Frame Relay header
+ * (DLCI, etc.).
+ */
+#define DLT_FRELAY 107
+
+/*
+ * OpenBSD DLT_LOOP, for loopback devices; it's like DLT_NULL, except
+ * that the AF_ type in the link-layer header is in network byte order.
+ *
+ * OpenBSD defines it as 12, but that collides with DLT_RAW, so we
+ * define it as 108 here. If OpenBSD picks up this file, it should
+ * define DLT_LOOP as 12 in its version, as per the comment above -
+ * and should not use 108 as a DLT_ value.
+ */
+#define DLT_LOOP 108
+
+/*
+ * Values between 109 and 112 are used in capture file headers as
+ * link-layer types corresponding to DLT_ types that might differ
+ * between platforms; don't use those values for new DLT_ new types.
+ */
+
+/*
+ * Encapsulated packets for IPsec; DLT_ENC is 13 in OpenBSD, but that's
+ * DLT_SLIP_BSDOS in NetBSD, so we don't use 13 for it in OSes other
+ * than OpenBSD.
+ */
+#define DLT_ENC 109
+
+/*
+ * This is for Linux cooked sockets.
+ */
+#define DLT_LINUX_SLL 113
+
+/*
+ * Apple LocalTalk hardware.
+ */
+#define DLT_LTALK 114
+
+/*
+ * Acorn Econet.
+ */
+#define DLT_ECONET 115
+
+/*
+ * Reserved for use with OpenBSD ipfilter.
+ */
+#define DLT_IPFILTER 116
+
+/*
+ * Reserved for use in capture-file headers as a link-layer type
+ * corresponding to OpenBSD DLT_PFLOG; DLT_PFLOG is 17 in OpenBSD,
+ * but that's DLT_LANE8023 in SuSE 6.3, so we can't use 17 for it
+ * in capture-file headers.
+ */
+#define DLT_PFLOG 117
+
+/*
+ * Registered for Cisco-internal use.
+ */
+#define DLT_CISCO_IOS 118
+
+/*
+ * Reserved for 802.11 cards using the Prism II chips, with a link-layer
+ * header including Prism monitor mode information plus an 802.11
+ * header.
+ */
+#define DLT_PRISM_HEADER 119
+
+/*
+ * Reserved for Aironet 802.11 cards, with an Aironet link-layer header
+ * (see Doug Ambrisko's FreeBSD patches).
+ */
+#define DLT_AIRONET_HEADER 120
+
+/*
+ * Reserved for use by OpenBSD's pfsync device.
+ */
+#define DLT_PFSYNC 121
+
+/*
+ * Reserved for Siemens HiPath HDLC. XXX
+ */
+#define DLT_HHDLC 121
+
+/*
+ * Reserved for RFC 2625 IP-over-Fibre Channel.
+ */
+#define DLT_IP_OVER_FC 122
+
+/*
+ * Reserved for Full Frontal ATM on Solaris.
+ */
+#define DLT_SUNATM 123
+
+/*
+ * Reserved as per request from Kent Dahlgren <kent@praesum.com>
+ * for private use.
+ */
+#define DLT_RIO 124 /* RapidIO */
+#define DLT_PCI_EXP 125 /* PCI Express */
+#define DLT_AURORA 126 /* Xilinx Aurora link layer */
+
+/*
+ * BSD header for 802.11 plus a number of bits of link-layer information
+ * including radio information.
+ */
+#ifndef DLT_IEEE802_11_RADIO
+#define DLT_IEEE802_11_RADIO 127
+#endif
+
+/*
+ * Reserved for TZSP encapsulation.
+ */
+#define DLT_TZSP 128 /* Tazmen Sniffer Protocol */
+
+/*
+ * Reserved for Linux ARCNET.
+ */
+#define DLT_ARCNET_LINUX 129
+
+/*
+ * Juniper-private data link types.
+ */
+#define DLT_JUNIPER_MLPPP 130
+#define DLT_JUNIPER_MLFR 131
+#define DLT_JUNIPER_ES 132
+#define DLT_JUNIPER_GGSN 133
+#define DLT_JUNIPER_MFR 134
+#define DLT_JUNIPER_ATM2 135
+#define DLT_JUNIPER_SERVICES 136
+#define DLT_JUNIPER_ATM1 137
+
+/*
+ * Apple IP-over-IEEE 1394, as per a request from Dieter Siegmund
+ * <dieter@apple.com>. The header that's presented is an Ethernet-like
+ * header:
+ *
+ * #define FIREWIRE_EUI64_LEN 8
+ * struct firewire_header {
+ * u_char firewire_dhost[FIREWIRE_EUI64_LEN];
+ * u_char firewire_shost[FIREWIRE_EUI64_LEN];
+ * u_short firewire_type;
+ * };
+ *
+ * with "firewire_type" being an Ethernet type value, rather than,
+ * for example, raw GASP frames being handed up.
+ */
+#define DLT_APPLE_IP_OVER_IEEE1394 138
+
+/*
+ * Various SS7 encapsulations, as per a request from Jeff Morriss
+ * <jeff.morriss[AT]ulticom.com> and subsequent discussions.
+ */
+#define DLT_MTP2_WITH_PHDR 139 /* pseudo-header with various info, followed by MTP2 */
+#define DLT_MTP2 140 /* MTP2, without pseudo-header */
+#define DLT_MTP3 141 /* MTP3, without pseudo-header or MTP2 */
+#define DLT_SCCP 142 /* SCCP, without pseudo-header or MTP2 or MTP3 */
+
+/*
+ * Reserved for DOCSIS.
+ */
+#define DLT_DOCSIS 143
+
+/*
+ * Reserved for Linux IrDA.
+ */
+#define DLT_LINUX_IRDA 144
+
+/*
+ * Reserved for IBM SP switch and IBM Next Federation switch.
+ */
+#define DLT_IBM_SP 145
+#define DLT_IBM_SN 146
+
+/*
+ * Reserved for private use. If you have some link-layer header type
+ * that you want to use within your organization, with the capture files
+ * using that link-layer header type not ever be sent outside your
+ * organization, you can use these values.
+ *
+ * No libpcap release will use these for any purpose, nor will any
+ * tcpdump release use them, either.
+ *
+ * Do *NOT* use these in capture files that you expect anybody not using
+ * your private versions of capture-file-reading tools to read; in
+ * particular, do *NOT* use them in products, otherwise you may find that
+ * people won't be able to use tcpdump, or snort, or Ethereal, or... to
+ * read capture files from your firewall/intrusion detection/traffic
+ * monitoring/etc. appliance, or whatever product uses that DLT_ value,
+ * and you may also find that the developers of those applications will
+ * not accept patches to let them read those files.
+ *
+ * Also, do not use them if somebody might send you a capture using them
+ * for *their* private type and tools using them for *your* private type
+ * would have to read them.
+ *
+ * Instead, ask "tcpdump-workers@tcpdump.org" for a new DLT_ value,
+ * as per the comment above, and use the type you're given.
+ */
+#define DLT_USER0 147
+#define DLT_USER1 148
+#define DLT_USER2 149
+#define DLT_USER3 150
+#define DLT_USER4 151
+#define DLT_USER5 152
+#define DLT_USER6 153
+#define DLT_USER7 154
+#define DLT_USER8 155
+#define DLT_USER9 156
+#define DLT_USER10 157
+#define DLT_USER11 158
+#define DLT_USER12 159
+#define DLT_USER13 160
+#define DLT_USER14 161
+#define DLT_USER15 162
+
+/*
+ * For future use with 802.11 captures - defined by AbsoluteValue
+ * Systems to store a number of bits of link-layer information
+ * including radio information:
+ *
+ * http://www.shaftnet.org/~pizza/software/capturefrm.txt
+ *
+ * but it might be used by some non-AVS drivers now or in the
+ * future.
+ */
+#define DLT_IEEE802_11_RADIO_AVS 163 /* 802.11 plus AVS radio header */
+
+/*
+ * Juniper-private data link type, as per request from
+ * Hannes Gredler <hannes@juniper.net>. The DLT_s are used
+ * for passing on chassis-internal metainformation such as
+ * QOS profiles, etc..
+ */
+#define DLT_JUNIPER_MONITOR 164
+
+/*
+ * Reserved for BACnet MS/TP.
+ */
+#define DLT_BACNET_MS_TP 165
+
+/*
+ * Another PPP variant as per request from Karsten Keil <kkeil@suse.de>.
+ *
+ * This is used in some OSes to allow a kernel socket filter to distinguish
+ * between incoming and outgoing packets, on a socket intended to
+ * supply pppd with outgoing packets so it can do dial-on-demand and
+ * hangup-on-lack-of-demand; incoming packets are filtered out so they
+ * don't cause pppd to hold the connection up (you don't want random
+ * input packets such as port scans, packets from old lost connections,
+ * etc. to force the connection to stay up).
+ *
+ * The first byte of the PPP header (0xff03) is modified to accomodate
+ * the direction - 0x00 = IN, 0x01 = OUT.
+ */
+#define DLT_PPP_PPPD 166
+
+/*
+ * Names for backwards compatibility with older versions of some PPP
+ * software; new software should use DLT_PPP_PPPD.
+ */
+#define DLT_PPP_WITH_DIRECTION DLT_PPP_PPPD
+#define DLT_LINUX_PPP_WITHDIRECTION DLT_PPP_PPPD
+
+/*
+ * Juniper-private data link type, as per request from
+ * Hannes Gredler <hannes@juniper.net>. The DLT_s are used
+ * for passing on chassis-internal metainformation such as
+ * QOS profiles, cookies, etc..
+ */
+#define DLT_JUNIPER_PPPOE 167
+#define DLT_JUNIPER_PPPOE_ATM 168
+
+#define DLT_GPRS_LLC 169 /* GPRS LLC */
+#define DLT_GPF_T 170 /* GPF-T (ITU-T G.7041/Y.1303) */
+#define DLT_GPF_F 171 /* GPF-F (ITU-T G.7041/Y.1303) */
+
+/*
+ * Requested by Oolan Zimmer <oz@gcom.com> for use in Gcom's T1/E1 line
+ * monitoring equipment.
+ */
+#define DLT_GCOM_T1E1 172
+#define DLT_GCOM_SERIAL 173
+
+/*
+ * Juniper-private data link type, as per request from
+ * Hannes Gredler <hannes@juniper.net>. The DLT_ is used
+ * for internal communication to Physical Interface Cards (PIC)
+ */
+#define DLT_JUNIPER_PIC_PEER 174
+
+/*
+ * Link types requested by Gregor Maier <gregor@endace.com> of Endace
+ * Measurement Systems. They add an ERF header (see
+ * http://www.endace.com/support/EndaceRecordFormat.pdf) in front of
+ * the link-layer header.
+ */
+#define DLT_ERF_ETH 175 /* Ethernet */
+#define DLT_ERF_POS 176 /* Packet-over-SONET */
+
+/*
+ * Requested by Daniele Orlandi <daniele@orlandi.com> for raw LAPD
+ * for vISDN (http://www.orlandi.com/visdn/). Its link-layer header
+ * includes additional information before the LAPD header, so it's
+ * not necessarily a generic LAPD header.
+ */
+#define DLT_LINUX_LAPD 177
+
+/*
+ * Juniper-private data link type, as per request from
+ * Hannes Gredler <hannes@juniper.net>.
+ * The DLT_ are used for prepending meta-information
+ * like interface index, interface name
+ * before standard Ethernet, PPP, Frelay & C-HDLC Frames
+ */
+#define DLT_JUNIPER_ETHER 178
+#define DLT_JUNIPER_PPP 179
+#define DLT_JUNIPER_FRELAY 180
+#define DLT_JUNIPER_CHDLC 181
+
+/*
+ * Multi Link Frame Relay (FRF.16)
+ */
+#define DLT_MFR 182
+
+/*
+ * Juniper-private data link type, as per request from
+ * Hannes Gredler <hannes@juniper.net>.
+ * The DLT_ is used for internal communication with a
+ * voice Adapter Card (PIC)
+ */
+#define DLT_JUNIPER_VP 183
+
+/*
+ * Arinc 429 frames.
+ * DLT_ requested by Gianluca Varenni <gianluca.varenni@cacetech.com>.
+ * Every frame contains a 32bit A429 label.
+ * More documentation on Arinc 429 can be found at
+ * http://www.condoreng.com/support/downloads/tutorials/ARINCTutorial.pdf
+ */
+#define DLT_A429 184
+
+/*
+ * Arinc 653 Interpartition Communication messages.
+ * DLT_ requested by Gianluca Varenni <gianluca.varenni@cacetech.com>.
+ * Please refer to the A653-1 standard for more information.
+ */
+#define DLT_A653_ICM 185
+
+/*
+ * USB packets, beginning with a USB setup header; requested by
+ * Paolo Abeni <paolo.abeni@email.it>.
+ */
+#define DLT_USB 186
+
+/*
+ * Bluetooth HCI UART transport layer (part H:4); requested by
+ * Paolo Abeni.
+ */
+#define DLT_BLUETOOTH_HCI_H4 187
+
+/*
+ * IEEE 802.16 MAC Common Part Sublayer; requested by Maria Cruz
+ * <cruz_petagay@bah.com>.
+ */
+#define DLT_IEEE802_16_MAC_CPS 188
+
+/*
+ * USB packets, beginning with a Linux USB header; requested by
+ * Paolo Abeni <paolo.abeni@email.it>.
+ */
+#define DLT_USB_LINUX 189
+
+/*
+ * Controller Area Network (CAN) v. 2.0B packets.
+ * DLT_ requested by Gianluca Varenni <gianluca.varenni@cacetech.com>.
+ * Used to dump CAN packets coming from a CAN Vector board.
+ * More documentation on the CAN v2.0B frames can be found at
+ * http://www.can-cia.org/downloads/?269
+ */
+#define DLT_CAN20B 190
+
+/*
+ * IEEE 802.15.4, with address fields padded, as is done by Linux
+ * drivers; requested by Juergen Schimmer.
+ */
+#define DLT_IEEE802_15_4_LINUX 191
+
+/*
+ * Per Packet Information encapsulated packets.
+ * DLT_ requested by Gianluca Varenni <gianluca.varenni@cacetech.com>.
+ */
+#define DLT_PPI 192
+
+/*
+ * Header for 802.16 MAC Common Part Sublayer plus a radiotap radio header;
+ * requested by Charles Clancy.
+ */
+#define DLT_IEEE802_16_MAC_CPS_RADIO 193
+
+/*
+ * Juniper-private data link type, as per request from
+ * Hannes Gredler <hannes@juniper.net>.
+ * The DLT_ is used for internal communication with a
+ * integrated service module (ISM).
+ */
+#define DLT_JUNIPER_ISM 194
+
+/*
+ * IEEE 802.15.4, exactly as it appears in the spec (no padding, no
+ * nothing); requested by Mikko Saarnivala <mikko.saarnivala@sensinode.com>.
+ */
+#define DLT_IEEE802_15_4 195
+
+/*
+ * Various link-layer types, with a pseudo-header, for SITA
+ * (http://www.sita.aero/); requested by Fulko Hew (fulko.hew@gmail.com).
+ */
+#define DLT_SITA 196
+
+/*
+ * Various link-layer types, with a pseudo-header, for Endace DAG cards;
+ * encapsulates Endace ERF records. Requested by Stephen Donnelly
+ * <stephen@endace.com>.
+ */
+#define DLT_ERF 197
+
+/*
+ * Special header prepended to Ethernet packets when capturing from a
+ * u10 Networks board. Requested by Phil Mulholland
+ * <phil@u10networks.com>.
+ */
+#define DLT_RAIF1 198
+
+/*
+ * IPMB packet for IPMI, beginning with the I2C slave address, followed
+ * by the netFn and LUN, etc.. Requested by Chanthy Toeung
+ * <chanthy.toeung@ca.kontron.com>.
+ */
+#define DLT_IPMB 199
+
+/*
+ * Juniper-private data link type, as per request from
+ * Hannes Gredler <hannes@juniper.net>.
+ * The DLT_ is used for capturing data on a secure tunnel interface.
+ */
+#define DLT_JUNIPER_ST 200
+
+/*
+ * Bluetooth HCI UART transport layer (part H:4), with pseudo-header
+ * that includes direction information; requested by Paolo Abeni.
+ */
+#define DLT_BLUETOOTH_HCI_H4_WITH_PHDR 201
+
+/*
+ * AX.25 packet with a 1-byte KISS header; see
+ *
+ * http://www.ax25.net/kiss.htm
+ *
+ * as per Richard Stearn <richard@rns-stearn.demon.co.uk>.
+ */
+#define DLT_AX25_KISS 202
+
+/*
+ * LAPD packets from an ISDN channel, starting with the address field,
+ * with no pseudo-header.
+ * Requested by Varuna De Silva <varunax@gmail.com>.
+ */
+#define DLT_LAPD 203
+
+/*
+ * Variants of various link-layer headers, with a one-byte direction
+ * pseudo-header prepended - zero means "received by this host",
+ * non-zero (any non-zero value) means "sent by this host" - as per
+ * Will Barker <w.barker@zen.co.uk>.
+ */
+#define DLT_PPP_WITH_DIR 204 /* PPP - don't confuse with DLT_PPP_WITH_DIRECTION */
+#define DLT_C_HDLC_WITH_DIR 205 /* Cisco HDLC */
+#define DLT_FRELAY_WITH_DIR 206 /* Frame Relay */
+#define DLT_LAPB_WITH_DIR 207 /* LAPB */
+
+/*
+ * 208 is reserved for an as-yet-unspecified proprietary link-layer
+ * type, as requested by Will Barker.
+ */
+
+/*
+ * IPMB with a Linux-specific pseudo-header; as requested by Alexey Neyman
+ * <avn@pigeonpoint.com>.
+ */
+#define DLT_IPMB_LINUX 209
+
+/*
+ * FlexRay automotive bus - http://www.flexray.com/ - as requested
+ * by Hannes Kaelber <hannes.kaelber@x2e.de>.
+ */
+#define DLT_FLEXRAY 210
+
+/*
+ * Media Oriented Systems Transport (MOST) bus for multimedia
+ * transport - http://www.mostcooperation.com/ - as requested
+ * by Hannes Kaelber <hannes.kaelber@x2e.de>.
+ */
+#define DLT_MOST 211
+
+/*
+ * Local Interconnect Network (LIN) bus for vehicle networks -
+ * http://www.lin-subbus.org/ - as requested by Hannes Kaelber
+ * <hannes.kaelber@x2e.de>.
+ */
+#define DLT_LIN 212
+
+/*
+ * X2E-private data link type used for serial line capture,
+ * as requested by Hannes Kaelber <hannes.kaelber@x2e.de>.
+ */
+#define DLT_X2E_SERIAL 213
+
+/*
+ * X2E-private data link type used for the Xoraya data logger
+ * family, as requested by Hannes Kaelber <hannes.kaelber@x2e.de>.
+ */
+#define DLT_X2E_XORAYA 214
+
+/*
+ * IEEE 802.15.4, exactly as it appears in the spec (no padding, no
+ * nothing), but with the PHY-level data for non-ASK PHYs (4 octets
+ * of 0 as preamble, one octet of SFD, one octet of frame length+
+ * reserved bit, and then the MAC-layer data, starting with the
+ * frame control field).
+ *
+ * Requested by Max Filippov <jcmvbkbc@gmail.com>.
+ */
+#define DLT_IEEE802_15_4_NONASK_PHY 215
+
+/*
+ * DLT and savefile link type values are split into a class and
+ * a member of that class. A class value of 0 indicates a regular
+ * DLT_/LINKTYPE_ value.
+ */
+#define DLT_CLASS(x) ((x) & 0x03ff0000)
+
+/*
+ * The instruction encodings.
+ */
+/* instruction classes */
+#define BPF_CLASS(code) ((code) & 0x07)
+#define BPF_LD 0x00
+#define BPF_LDX 0x01
+#define BPF_ST 0x02
+#define BPF_STX 0x03
+#define BPF_ALU 0x04
+#define BPF_JMP 0x05
+#define BPF_RET 0x06
+#define BPF_MISC 0x07
+
+/* ld/ldx fields */
+#define BPF_SIZE(code) ((code) & 0x18)
+#define BPF_W 0x00
+#define BPF_H 0x08
+#define BPF_B 0x10
+#define BPF_MODE(code) ((code) & 0xe0)
+#define BPF_IMM 0x00
+#define BPF_ABS 0x20
+#define BPF_IND 0x40
+#define BPF_MEM 0x60
+#define BPF_LEN 0x80
+#define BPF_MSH 0xa0
+
+/* alu/jmp fields */
+#define BPF_OP(code) ((code) & 0xf0)
+#define BPF_ADD 0x00
+#define BPF_SUB 0x10
+#define BPF_MUL 0x20
+#define BPF_DIV 0x30
+#define BPF_OR 0x40
+#define BPF_AND 0x50
+#define BPF_LSH 0x60
+#define BPF_RSH 0x70
+#define BPF_NEG 0x80
+#define BPF_JA 0x00
+#define BPF_JEQ 0x10
+#define BPF_JGT 0x20
+#define BPF_JGE 0x30
+#define BPF_JSET 0x40
+#define BPF_SRC(code) ((code) & 0x08)
+#define BPF_K 0x00
+#define BPF_X 0x08
+
+/* ret - BPF_K and BPF_X also apply */
+#define BPF_RVAL(code) ((code) & 0x18)
+#define BPF_A 0x10
+
+/* misc */
+#define BPF_MISCOP(code) ((code) & 0xf8)
+#define BPF_TAX 0x00
+#define BPF_TXA 0x80
+
+/*
+ * The instruction data structure.
+ */
+struct bpf_insn {
+ u_short code;
+ u_char jt;
+ u_char jf;
+ bpf_u_int32 k;
+};
+
+/*
+ * Macros for insn array initializers.
+ */
+#define BPF_STMT(code, k) { (u_short)(code), 0, 0, k }
+#define BPF_JUMP(code, k, jt, jf) { (u_short)(code), jt, jf, k }
+
+/*
+ * Structure to retrieve available DLTs for the interface.
+ */
+struct bpf_dltlist {
+ u_int bfl_len; /* number of bfd_list array */
+ u_int *bfl_list; /* array of DLTs */
+};
+
+#ifdef _KERNEL
+#ifdef MALLOC_DECLARE
+MALLOC_DECLARE(M_BPF);
+#endif
+#ifdef SYSCTL_DECL
+SYSCTL_DECL(_net_bpf);
+#endif
+
+/*
+ * Rotate the packet buffers in descriptor d. Move the store buffer into the
+ * hold slot, and the free buffer ino the store slot. Zero the length of the
+ * new store buffer. Descriptor lock should be held.
+ */
+#define ROTATE_BUFFERS(d) do { \
+ (d)->bd_hbuf = (d)->bd_sbuf; \
+ (d)->bd_hlen = (d)->bd_slen; \
+ (d)->bd_sbuf = (d)->bd_fbuf; \
+ (d)->bd_slen = 0; \
+ (d)->bd_fbuf = NULL; \
+ bpf_bufheld(d); \
+} while (0)
+
+/*
+ * Descriptor associated with each attached hardware interface.
+ */
+struct bpf_if {
+ LIST_ENTRY(bpf_if) bif_next; /* list of all interfaces */
+ LIST_HEAD(, bpf_d) bif_dlist; /* descriptor list */
+ u_int bif_dlt; /* link layer type */
+ u_int bif_hdrlen; /* length of header (with padding) */
+ struct ifnet *bif_ifp; /* corresponding interface */
+ struct mtx bif_mtx; /* mutex for interface */
+};
+
+void bpf_bufheld(struct bpf_d *d);
+int bpf_validate(const struct bpf_insn *, int);
+void bpf_tap(struct bpf_if *, u_char *, u_int);
+void bpf_mtap(struct bpf_if *, struct mbuf *);
+void bpf_mtap2(struct bpf_if *, void *, u_int, struct mbuf *);
+void bpfattach(struct ifnet *, u_int, u_int);
+void bpfattach2(struct ifnet *, u_int, u_int, struct bpf_if **);
+void bpfdetach(struct ifnet *);
+
+void bpfilterattach(int);
+u_int bpf_filter(const struct bpf_insn *, u_char *, u_int, u_int);
+
+static __inline int
+bpf_peers_present(struct bpf_if *bpf)
+{
+
+ if (!LIST_EMPTY(&bpf->bif_dlist))
+ return (1);
+ return (0);
+}
+
+#define BPF_TAP(_ifp,_pkt,_pktlen) do { \
+ if (bpf_peers_present((_ifp)->if_bpf)) \
+ bpf_tap((_ifp)->if_bpf, (_pkt), (_pktlen)); \
+} while (0)
+#define BPF_MTAP(_ifp,_m) do { \
+ if (bpf_peers_present((_ifp)->if_bpf)) { \
+ M_ASSERTVALID(_m); \
+ bpf_mtap((_ifp)->if_bpf, (_m)); \
+ } \
+} while (0)
+#define BPF_MTAP2(_ifp,_data,_dlen,_m) do { \
+ if (bpf_peers_present((_ifp)->if_bpf)) { \
+ M_ASSERTVALID(_m); \
+ bpf_mtap2((_ifp)->if_bpf,(_data),(_dlen),(_m)); \
+ } \
+} while (0)
+#endif
+
+/*
+ * Number of scratch memory words (for BPF_LD|BPF_MEM and BPF_ST).
+ */
+#define BPF_MEMWORDS 16
+
+#endif /* _NET_BPF_HH_ */
diff --git a/freebsd/sys/net/bpf_buffer.c b/freebsd/sys/net/bpf_buffer.c
new file mode 100644
index 00000000..623b4f8a
--- /dev/null
+++ b/freebsd/sys/net/bpf_buffer.c
@@ -0,0 +1,212 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2007 Seccuris Inc.
+ * All rights reserved.
+ *
+ * This sofware was developed by Robert N. M. Watson under contract to
+ * Seccuris Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Copyright (c) 1990, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)bpf.c 8.4 (Berkeley) 1/9/95
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_bpf.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/uio.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/sysctl.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/bpf.h>
+#include <freebsd/net/bpf_buffer.h>
+#include <freebsd/net/bpfdesc.h>
+
+/*
+ * Implement historical kernel memory buffering model for BPF: two malloc(9)
+ * kernel buffers are hung off of the descriptor. The size is fixed prior to
+ * attaching to an ifnet, ad cannot be changed after that. read(2) simply
+ * copies the data to user space using uiomove(9).
+ */
+
+static int bpf_bufsize = 4096;
+SYSCTL_INT(_net_bpf, OID_AUTO, bufsize, CTLFLAG_RW,
+ &bpf_bufsize, 0, "Maximum capture buffer size in bytes");
+static int bpf_maxbufsize = BPF_MAXBUFSIZE;
+SYSCTL_INT(_net_bpf, OID_AUTO, maxbufsize, CTLFLAG_RW,
+ &bpf_maxbufsize, 0, "Default capture buffer in bytes");
+
+void
+bpf_buffer_alloc(struct bpf_d *d)
+{
+
+ KASSERT(d->bd_fbuf == NULL, ("bpf_buffer_alloc: bd_fbuf != NULL"));
+ KASSERT(d->bd_sbuf == NULL, ("bpf_buffer_alloc: bd_sbuf != NULL"));
+ KASSERT(d->bd_hbuf == NULL, ("bpf_buffer_alloc: bd_hbuf != NULL"));
+
+ d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
+ d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
+ d->bd_hbuf = NULL;
+ d->bd_slen = 0;
+ d->bd_hlen = 0;
+}
+
+/*
+ * Simple data copy to the current kernel buffer.
+ */
+void
+bpf_buffer_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset,
+ void *src, u_int len)
+{
+ u_char *src_bytes;
+
+ src_bytes = (u_char *)src;
+ bcopy(src_bytes, buf + offset, len);
+}
+
+/*
+ * Scatter-gather data copy from an mbuf chain to the current kernel buffer.
+ */
+void
+bpf_buffer_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
+ u_int len)
+{
+ const struct mbuf *m;
+ u_char *dst;
+ u_int count;
+
+ m = (struct mbuf *)src;
+ dst = (u_char *)buf + offset;
+ while (len > 0) {
+ if (m == NULL)
+ panic("bpf_mcopy");
+ count = min(m->m_len, len);
+ bcopy(mtod(m, void *), dst, count);
+ m = m->m_next;
+ dst += count;
+ len -= count;
+ }
+}
+
+/*
+ * Free BPF kernel buffers on device close.
+ */
+void
+bpf_buffer_free(struct bpf_d *d)
+{
+
+ if (d->bd_sbuf != NULL)
+ free(d->bd_sbuf, M_BPF);
+ if (d->bd_hbuf != NULL)
+ free(d->bd_hbuf, M_BPF);
+ if (d->bd_fbuf != NULL)
+ free(d->bd_fbuf, M_BPF);
+
+#ifdef INVARIANTS
+ d->bd_sbuf = d->bd_hbuf = d->bd_fbuf = (caddr_t)~0;
+#endif
+}
+
+/*
+ * This is a historical initialization that occurs when the BPF descriptor is
+ * first opened. It does not imply selection of a buffer mode, so we don't
+ * allocate buffers here.
+ */
+void
+bpf_buffer_init(struct bpf_d *d)
+{
+
+ d->bd_bufsize = bpf_bufsize;
+}
+
+/*
+ * Allocate or resize buffers.
+ */
+int
+bpf_buffer_ioctl_sblen(struct bpf_d *d, u_int *i)
+{
+ u_int size;
+
+ BPFD_LOCK(d);
+ if (d->bd_bif != NULL) {
+ BPFD_UNLOCK(d);
+ return (EINVAL);
+ }
+ size = *i;
+ if (size > bpf_maxbufsize)
+ *i = size = bpf_maxbufsize;
+ else if (size < BPF_MINBUFSIZE)
+ *i = size = BPF_MINBUFSIZE;
+ d->bd_bufsize = size;
+ BPFD_UNLOCK(d);
+ return (0);
+}
+
+/*
+ * Copy buffer storage to user space in read().
+ */
+int
+bpf_buffer_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio)
+{
+
+ return (uiomove(buf, len, uio));
+}
diff --git a/freebsd/sys/net/bpf_buffer.h b/freebsd/sys/net/bpf_buffer.h
new file mode 100644
index 00000000..545ddb22
--- /dev/null
+++ b/freebsd/sys/net/bpf_buffer.h
@@ -0,0 +1,50 @@
+/*-
+ * Copyright (c) 2007 Seccuris Inc.
+ * All rights reserved.
+ *
+ * This sofware was developed by Robert N. M. Watson under contract to
+ * Seccuris Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NET_BPF_BUFFER_HH_
+#define _NET_BPF_BUFFER_HH_
+
+#ifndef _KERNEL
+#error "no user-serviceable parts inside"
+#endif
+
+void bpf_buffer_alloc(struct bpf_d *d);
+void bpf_buffer_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset,
+ void *src, u_int len);
+void bpf_buffer_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset,
+ void *src, u_int len);
+void bpf_buffer_free(struct bpf_d *d);
+void bpf_buffer_init(struct bpf_d *d);
+int bpf_buffer_ioctl_sblen(struct bpf_d *d, u_int *i);
+int bpf_buffer_uiomove(struct bpf_d *d, caddr_t buf, u_int len,
+ struct uio *uio);
+
+#endif /* !_NET_BPF_BUFFER_HH_ */
diff --git a/freebsd/sys/net/bpf_filter.c b/freebsd/sys/net/bpf_filter.c
new file mode 100644
index 00000000..3452cc4a
--- /dev/null
+++ b/freebsd/sys/net/bpf_filter.c
@@ -0,0 +1,582 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1990, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)bpf_filter.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/sys/param.h>
+
+#if !defined(_KERNEL) || defined(sun)
+#include <freebsd/netinet/in.h>
+#endif
+
+#ifndef __i386__
+#define BPF_ALIGN
+#endif
+
+#ifndef BPF_ALIGN
+#define EXTRACT_SHORT(p) ((u_int16_t)ntohs(*(u_int16_t *)p))
+#define EXTRACT_LONG(p) (ntohl(*(u_int32_t *)p))
+#else
+#define EXTRACT_SHORT(p)\
+ ((u_int16_t)\
+ ((u_int16_t)*((u_char *)p+0)<<8|\
+ (u_int16_t)*((u_char *)p+1)<<0))
+#define EXTRACT_LONG(p)\
+ ((u_int32_t)*((u_char *)p+0)<<24|\
+ (u_int32_t)*((u_char *)p+1)<<16|\
+ (u_int32_t)*((u_char *)p+2)<<8|\
+ (u_int32_t)*((u_char *)p+3)<<0)
+#endif
+
+#ifdef _KERNEL
+#include <freebsd/sys/mbuf.h>
+#else
+#include <freebsd/stdlib.h>
+#endif
+#include <freebsd/net/bpf.h>
+#ifdef _KERNEL
+#define MINDEX(m, k) \
+{ \
+ register int len = m->m_len; \
+ \
+ while (k >= len) { \
+ k -= len; \
+ m = m->m_next; \
+ if (m == 0) \
+ return (0); \
+ len = m->m_len; \
+ } \
+}
+
+static u_int16_t m_xhalf(struct mbuf *m, bpf_u_int32 k, int *err);
+static u_int32_t m_xword(struct mbuf *m, bpf_u_int32 k, int *err);
+
+static u_int32_t
+m_xword(struct mbuf *m, bpf_u_int32 k, int *err)
+{
+ size_t len;
+ u_char *cp, *np;
+ struct mbuf *m0;
+
+ len = m->m_len;
+ while (k >= len) {
+ k -= len;
+ m = m->m_next;
+ if (m == 0)
+ goto bad;
+ len = m->m_len;
+ }
+ cp = mtod(m, u_char *) + k;
+ if (len - k >= 4) {
+ *err = 0;
+ return (EXTRACT_LONG(cp));
+ }
+ m0 = m->m_next;
+ if (m0 == 0 || m0->m_len + len - k < 4)
+ goto bad;
+ *err = 0;
+ np = mtod(m0, u_char *);
+ switch (len - k) {
+ case 1:
+ return (((u_int32_t)cp[0] << 24) |
+ ((u_int32_t)np[0] << 16) |
+ ((u_int32_t)np[1] << 8) |
+ (u_int32_t)np[2]);
+
+ case 2:
+ return (((u_int32_t)cp[0] << 24) |
+ ((u_int32_t)cp[1] << 16) |
+ ((u_int32_t)np[0] << 8) |
+ (u_int32_t)np[1]);
+
+ default:
+ return (((u_int32_t)cp[0] << 24) |
+ ((u_int32_t)cp[1] << 16) |
+ ((u_int32_t)cp[2] << 8) |
+ (u_int32_t)np[0]);
+ }
+ bad:
+ *err = 1;
+ return (0);
+}
+
+static u_int16_t
+m_xhalf(struct mbuf *m, bpf_u_int32 k, int *err)
+{
+ size_t len;
+ u_char *cp;
+ struct mbuf *m0;
+
+ len = m->m_len;
+ while (k >= len) {
+ k -= len;
+ m = m->m_next;
+ if (m == 0)
+ goto bad;
+ len = m->m_len;
+ }
+ cp = mtod(m, u_char *) + k;
+ if (len - k >= 2) {
+ *err = 0;
+ return (EXTRACT_SHORT(cp));
+ }
+ m0 = m->m_next;
+ if (m0 == 0)
+ goto bad;
+ *err = 0;
+ return ((cp[0] << 8) | mtod(m0, u_char *)[0]);
+ bad:
+ *err = 1;
+ return (0);
+}
+#endif
+
+/*
+ * Execute the filter program starting at pc on the packet p
+ * wirelen is the length of the original packet
+ * buflen is the amount of data present
+ */
+u_int
+bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
+{
+ u_int32_t A = 0, X = 0;
+ bpf_u_int32 k;
+ u_int32_t mem[BPF_MEMWORDS];
+
+ if (pc == NULL)
+ /*
+ * No filter means accept all.
+ */
+ return ((u_int)-1);
+
+ --pc;
+ while (1) {
+ ++pc;
+ switch (pc->code) {
+ default:
+#ifdef _KERNEL
+ return (0);
+#else
+ abort();
+#endif
+
+ case BPF_RET|BPF_K:
+ return ((u_int)pc->k);
+
+ case BPF_RET|BPF_A:
+ return ((u_int)A);
+
+ case BPF_LD|BPF_W|BPF_ABS:
+ k = pc->k;
+ if (k > buflen || sizeof(int32_t) > buflen - k) {
+#ifdef _KERNEL
+ int merr;
+
+ if (buflen != 0)
+ return (0);
+ A = m_xword((struct mbuf *)p, k, &merr);
+ if (merr != 0)
+ return (0);
+ continue;
+#else
+ return (0);
+#endif
+ }
+#ifdef BPF_ALIGN
+ if (((intptr_t)(p + k) & 3) != 0)
+ A = EXTRACT_LONG(&p[k]);
+ else
+#endif
+ A = ntohl(*(int32_t *)(p + k));
+ continue;
+
+ case BPF_LD|BPF_H|BPF_ABS:
+ k = pc->k;
+ if (k > buflen || sizeof(int16_t) > buflen - k) {
+#ifdef _KERNEL
+ int merr;
+
+ if (buflen != 0)
+ return (0);
+ A = m_xhalf((struct mbuf *)p, k, &merr);
+ continue;
+#else
+ return (0);
+#endif
+ }
+ A = EXTRACT_SHORT(&p[k]);
+ continue;
+
+ case BPF_LD|BPF_B|BPF_ABS:
+ k = pc->k;
+ if (k >= buflen) {
+#ifdef _KERNEL
+ struct mbuf *m;
+
+ if (buflen != 0)
+ return (0);
+ m = (struct mbuf *)p;
+ MINDEX(m, k);
+ A = mtod(m, u_char *)[k];
+ continue;
+#else
+ return (0);
+#endif
+ }
+ A = p[k];
+ continue;
+
+ case BPF_LD|BPF_W|BPF_LEN:
+ A = wirelen;
+ continue;
+
+ case BPF_LDX|BPF_W|BPF_LEN:
+ X = wirelen;
+ continue;
+
+ case BPF_LD|BPF_W|BPF_IND:
+ k = X + pc->k;
+ if (pc->k > buflen || X > buflen - pc->k ||
+ sizeof(int32_t) > buflen - k) {
+#ifdef _KERNEL
+ int merr;
+
+ if (buflen != 0)
+ return (0);
+ A = m_xword((struct mbuf *)p, k, &merr);
+ if (merr != 0)
+ return (0);
+ continue;
+#else
+ return (0);
+#endif
+ }
+#ifdef BPF_ALIGN
+ if (((intptr_t)(p + k) & 3) != 0)
+ A = EXTRACT_LONG(&p[k]);
+ else
+#endif
+ A = ntohl(*(int32_t *)(p + k));
+ continue;
+
+ case BPF_LD|BPF_H|BPF_IND:
+ k = X + pc->k;
+ if (X > buflen || pc->k > buflen - X ||
+ sizeof(int16_t) > buflen - k) {
+#ifdef _KERNEL
+ int merr;
+
+ if (buflen != 0)
+ return (0);
+ A = m_xhalf((struct mbuf *)p, k, &merr);
+ if (merr != 0)
+ return (0);
+ continue;
+#else
+ return (0);
+#endif
+ }
+ A = EXTRACT_SHORT(&p[k]);
+ continue;
+
+ case BPF_LD|BPF_B|BPF_IND:
+ k = X + pc->k;
+ if (pc->k >= buflen || X >= buflen - pc->k) {
+#ifdef _KERNEL
+ struct mbuf *m;
+
+ if (buflen != 0)
+ return (0);
+ m = (struct mbuf *)p;
+ MINDEX(m, k);
+ A = mtod(m, u_char *)[k];
+ continue;
+#else
+ return (0);
+#endif
+ }
+ A = p[k];
+ continue;
+
+ case BPF_LDX|BPF_MSH|BPF_B:
+ k = pc->k;
+ if (k >= buflen) {
+#ifdef _KERNEL
+ register struct mbuf *m;
+
+ if (buflen != 0)
+ return (0);
+ m = (struct mbuf *)p;
+ MINDEX(m, k);
+ X = (mtod(m, u_char *)[k] & 0xf) << 2;
+ continue;
+#else
+ return (0);
+#endif
+ }
+ X = (p[pc->k] & 0xf) << 2;
+ continue;
+
+ case BPF_LD|BPF_IMM:
+ A = pc->k;
+ continue;
+
+ case BPF_LDX|BPF_IMM:
+ X = pc->k;
+ continue;
+
+ case BPF_LD|BPF_MEM:
+ A = mem[pc->k];
+ continue;
+
+ case BPF_LDX|BPF_MEM:
+ X = mem[pc->k];
+ continue;
+
+ case BPF_ST:
+ mem[pc->k] = A;
+ continue;
+
+ case BPF_STX:
+ mem[pc->k] = X;
+ continue;
+
+ case BPF_JMP|BPF_JA:
+ pc += pc->k;
+ continue;
+
+ case BPF_JMP|BPF_JGT|BPF_K:
+ pc += (A > pc->k) ? pc->jt : pc->jf;
+ continue;
+
+ case BPF_JMP|BPF_JGE|BPF_K:
+ pc += (A >= pc->k) ? pc->jt : pc->jf;
+ continue;
+
+ case BPF_JMP|BPF_JEQ|BPF_K:
+ pc += (A == pc->k) ? pc->jt : pc->jf;
+ continue;
+
+ case BPF_JMP|BPF_JSET|BPF_K:
+ pc += (A & pc->k) ? pc->jt : pc->jf;
+ continue;
+
+ case BPF_JMP|BPF_JGT|BPF_X:
+ pc += (A > X) ? pc->jt : pc->jf;
+ continue;
+
+ case BPF_JMP|BPF_JGE|BPF_X:
+ pc += (A >= X) ? pc->jt : pc->jf;
+ continue;
+
+ case BPF_JMP|BPF_JEQ|BPF_X:
+ pc += (A == X) ? pc->jt : pc->jf;
+ continue;
+
+ case BPF_JMP|BPF_JSET|BPF_X:
+ pc += (A & X) ? pc->jt : pc->jf;
+ continue;
+
+ case BPF_ALU|BPF_ADD|BPF_X:
+ A += X;
+ continue;
+
+ case BPF_ALU|BPF_SUB|BPF_X:
+ A -= X;
+ continue;
+
+ case BPF_ALU|BPF_MUL|BPF_X:
+ A *= X;
+ continue;
+
+ case BPF_ALU|BPF_DIV|BPF_X:
+ if (X == 0)
+ return (0);
+ A /= X;
+ continue;
+
+ case BPF_ALU|BPF_AND|BPF_X:
+ A &= X;
+ continue;
+
+ case BPF_ALU|BPF_OR|BPF_X:
+ A |= X;
+ continue;
+
+ case BPF_ALU|BPF_LSH|BPF_X:
+ A <<= X;
+ continue;
+
+ case BPF_ALU|BPF_RSH|BPF_X:
+ A >>= X;
+ continue;
+
+ case BPF_ALU|BPF_ADD|BPF_K:
+ A += pc->k;
+ continue;
+
+ case BPF_ALU|BPF_SUB|BPF_K:
+ A -= pc->k;
+ continue;
+
+ case BPF_ALU|BPF_MUL|BPF_K:
+ A *= pc->k;
+ continue;
+
+ case BPF_ALU|BPF_DIV|BPF_K:
+ A /= pc->k;
+ continue;
+
+ case BPF_ALU|BPF_AND|BPF_K:
+ A &= pc->k;
+ continue;
+
+ case BPF_ALU|BPF_OR|BPF_K:
+ A |= pc->k;
+ continue;
+
+ case BPF_ALU|BPF_LSH|BPF_K:
+ A <<= pc->k;
+ continue;
+
+ case BPF_ALU|BPF_RSH|BPF_K:
+ A >>= pc->k;
+ continue;
+
+ case BPF_ALU|BPF_NEG:
+ A = -A;
+ continue;
+
+ case BPF_MISC|BPF_TAX:
+ X = A;
+ continue;
+
+ case BPF_MISC|BPF_TXA:
+ A = X;
+ continue;
+ }
+ }
+}
+
+#ifdef _KERNEL
+static const u_short bpf_code_map[] = {
+ 0x10ff, /* 0x00-0x0f: 1111111100001000 */
+ 0x3070, /* 0x10-0x1f: 0000111000001100 */
+ 0x3131, /* 0x20-0x2f: 1000110010001100 */
+ 0x3031, /* 0x30-0x3f: 1000110000001100 */
+ 0x3131, /* 0x40-0x4f: 1000110010001100 */
+ 0x1011, /* 0x50-0x5f: 1000100000001000 */
+ 0x1013, /* 0x60-0x6f: 1100100000001000 */
+ 0x1010, /* 0x70-0x7f: 0000100000001000 */
+ 0x0093, /* 0x80-0x8f: 1100100100000000 */
+ 0x0000, /* 0x90-0x9f: 0000000000000000 */
+ 0x0000, /* 0xa0-0xaf: 0000000000000000 */
+ 0x0002, /* 0xb0-0xbf: 0100000000000000 */
+ 0x0000, /* 0xc0-0xcf: 0000000000000000 */
+ 0x0000, /* 0xd0-0xdf: 0000000000000000 */
+ 0x0000, /* 0xe0-0xef: 0000000000000000 */
+ 0x0000 /* 0xf0-0xff: 0000000000000000 */
+};
+
+#define BPF_VALIDATE_CODE(c) \
+ ((c) <= 0xff && (bpf_code_map[(c) >> 4] & (1 << ((c) & 0xf))) != 0)
+
+/*
+ * Return true if the 'fcode' is a valid filter program.
+ * The constraints are that each jump be forward and to a valid
+ * code. The code must terminate with either an accept or reject.
+ *
+ * The kernel needs to be able to verify an application's filter code.
+ * Otherwise, a bogus program could easily crash the system.
+ */
+int
+bpf_validate(const struct bpf_insn *f, int len)
+{
+ register int i;
+ register const struct bpf_insn *p;
+
+ /* Do not accept negative length filter. */
+ if (len < 0)
+ return (0);
+
+ /* An empty filter means accept all. */
+ if (len == 0)
+ return (1);
+
+ for (i = 0; i < len; ++i) {
+ p = &f[i];
+ /*
+ * Check that the code is valid.
+ */
+ if (!BPF_VALIDATE_CODE(p->code))
+ return (0);
+ /*
+ * Check that that jumps are forward, and within
+ * the code block.
+ */
+ if (BPF_CLASS(p->code) == BPF_JMP) {
+ register u_int offset;
+
+ if (p->code == (BPF_JMP|BPF_JA))
+ offset = p->k;
+ else
+ offset = p->jt > p->jf ? p->jt : p->jf;
+ if (offset >= (u_int)(len - i) - 1)
+ return (0);
+ continue;
+ }
+ /*
+ * Check that memory operations use valid addresses.
+ */
+ if (p->code == BPF_ST || p->code == BPF_STX ||
+ p->code == (BPF_LD|BPF_MEM) ||
+ p->code == (BPF_LDX|BPF_MEM)) {
+ if (p->k >= BPF_MEMWORDS)
+ return (0);
+ continue;
+ }
+ /*
+ * Check for constant division by 0.
+ */
+ if (p->code == (BPF_ALU|BPF_DIV|BPF_K) && p->k == 0)
+ return (0);
+ }
+ return (BPF_CLASS(f[len - 1].code) == BPF_RET);
+}
+#endif
diff --git a/freebsd/sys/net/bpf_jitter.c b/freebsd/sys/net/bpf_jitter.c
new file mode 100644
index 00000000..bb373725
--- /dev/null
+++ b/freebsd/sys/net/bpf_jitter.c
@@ -0,0 +1,143 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (C) 2002-2003 NetGroup, Politecnico di Torino (Italy)
+ * Copyright (C) 2005-2008 Jung-uk Kim <jkim@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the Politecnico di Torino nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifdef _KERNEL
+#include <freebsd/local/opt_bpf.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/sysctl.h>
+#else
+#include <freebsd/stdlib.h>
+#include <freebsd/string.h>
+#include <freebsd/sys/types.h>
+#endif
+
+#include <freebsd/net/bpf.h>
+#include <freebsd/net/bpf_jitter.h>
+
+bpf_filter_func bpf_jit_compile(struct bpf_insn *, u_int, int *);
+
+static u_int bpf_jit_accept_all(u_char *, u_int, u_int);
+
+#ifdef _KERNEL
+MALLOC_DEFINE(M_BPFJIT, "BPF_JIT", "BPF JIT compiler");
+
+SYSCTL_NODE(_net, OID_AUTO, bpf_jitter, CTLFLAG_RW, 0, "BPF JIT compiler");
+int bpf_jitter_enable = 1;
+SYSCTL_INT(_net_bpf_jitter, OID_AUTO, enable, CTLFLAG_RW,
+ &bpf_jitter_enable, 0, "enable BPF JIT compiler");
+
+bpf_jit_filter *
+bpf_jitter(struct bpf_insn *fp, int nins)
+{
+ bpf_jit_filter *filter;
+
+ /* Allocate the filter structure */
+ filter = (struct bpf_jit_filter *)malloc(sizeof(*filter),
+ M_BPFJIT, M_NOWAIT | M_ZERO);
+ if (filter == NULL)
+ return (NULL);
+
+ /* No filter means accept all */
+ if (fp == NULL || nins == 0) {
+ filter->func = bpf_jit_accept_all;
+ return (filter);
+ }
+
+ /* Create the binary */
+ if ((filter->func = bpf_jit_compile(fp, nins, filter->mem)) == NULL) {
+ free(filter, M_BPFJIT);
+ return (NULL);
+ }
+
+ return (filter);
+}
+
+void
+bpf_destroy_jit_filter(bpf_jit_filter *filter)
+{
+
+ if (filter->func != bpf_jit_accept_all)
+ free(filter->func, M_BPFJIT);
+ free(filter, M_BPFJIT);
+}
+#else
+bpf_jit_filter *
+bpf_jitter(struct bpf_insn *fp, int nins)
+{
+ bpf_jit_filter *filter;
+
+ /* Allocate the filter structure */
+ filter = (struct bpf_jit_filter *)malloc(sizeof(*filter));
+ if (filter == NULL)
+ return (NULL);
+ memset(filter, 0, sizeof(*filter));
+
+ /* No filter means accept all */
+ if (fp == NULL || nins == 0) {
+ filter->func = bpf_jit_accept_all;
+ return (filter);
+ }
+
+ /* Create the binary */
+ if ((filter->func = bpf_jit_compile(fp, nins, filter->mem)) == NULL) {
+ free(filter);
+ return (NULL);
+ }
+
+ return (filter);
+}
+
+void
+bpf_destroy_jit_filter(bpf_jit_filter *filter)
+{
+
+ if (filter->func != bpf_jit_accept_all)
+ free(filter->func);
+ free(filter);
+}
+#endif
+
+static u_int
+bpf_jit_accept_all(__unused u_char *p, __unused u_int wirelen,
+ __unused u_int buflen)
+{
+
+ return ((u_int)-1);
+}
diff --git a/freebsd/sys/net/bpf_jitter.h b/freebsd/sys/net/bpf_jitter.h
new file mode 100644
index 00000000..c0dd7e04
--- /dev/null
+++ b/freebsd/sys/net/bpf_jitter.h
@@ -0,0 +1,84 @@
+/*-
+ * Copyright (C) 2002-2003 NetGroup, Politecnico di Torino (Italy)
+ * Copyright (C) 2005-2008 Jung-uk Kim <jkim@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the Politecnico di Torino nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NET_BPF_JITTER_HH_
+#define _NET_BPF_JITTER_HH_
+
+#ifdef _KERNEL
+MALLOC_DECLARE(M_BPFJIT);
+#endif
+
+extern int bpf_jitter_enable;
+
+/*
+ * Prototype of a filtering function created by the jitter.
+ *
+ * The syntax and the meaning of the parameters is analogous to the one of
+ * bpf_filter(). Notice that the filter is not among the parameters because
+ * it is hardwired in the function.
+ */
+typedef u_int (*bpf_filter_func)(u_char *, u_int, u_int);
+
+/* Structure describing a native filtering program created by the jitter. */
+typedef struct bpf_jit_filter {
+ /* The native filtering binary, in the form of a bpf_filter_func. */
+ bpf_filter_func func;
+
+ int mem[BPF_MEMWORDS]; /* Scratch memory */
+} bpf_jit_filter;
+
+/*
+ * BPF jitter, builds a machine function from a BPF program.
+ *
+ * param fp The BPF pseudo-assembly filter that will be translated
+ * into native code.
+ * param nins Number of instructions of the input filter.
+ * return The bpf_jit_filter structure containing the native filtering
+ * binary.
+ *
+ * bpf_jitter allocates the buffers for the new native filter and
+ * then translates the program pointed by fp calling bpf_jit_compile().
+ */
+bpf_jit_filter *bpf_jitter(struct bpf_insn *fp, int nins);
+
+/*
+ * Deletes a filtering function that was previously created by bpf_jitter().
+ *
+ * param filter The filter to destroy.
+ *
+ * This function frees the variuos buffers (code, memory, etc.) associated
+ * with a filtering function.
+ */
+void bpf_destroy_jit_filter(bpf_jit_filter *filter);
+
+#endif /* _NET_BPF_JITTER_HH_ */
diff --git a/freebsd/sys/net/bpf_zerocopy.h b/freebsd/sys/net/bpf_zerocopy.h
new file mode 100644
index 00000000..455bd41c
--- /dev/null
+++ b/freebsd/sys/net/bpf_zerocopy.h
@@ -0,0 +1,56 @@
+/*-
+ * Copyright (c) 2007 Seccuris Inc.
+ * All rights reserved.
+ *
+ * This sofware was developed by Robert N. M. Watson under contract to
+ * Seccuris Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NET_BPF_ZEROCOPY_HH_
+#define _NET_BPF_ZEROCOPY_HH_
+
+#ifndef _KERNEL
+#error "no user-serviceable parts inside"
+#endif
+
+void bpf_zerocopy_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset,
+ void *src, u_int len);
+void bpf_zerocopy_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset,
+ void *src, u_int len);
+void bpf_zerocopy_buffull(struct bpf_d *);
+void bpf_zerocopy_bufheld(struct bpf_d *);
+void bpf_zerocopy_buf_reclaimed(struct bpf_d *);
+int bpf_zerocopy_canfreebuf(struct bpf_d *);
+int bpf_zerocopy_canwritebuf(struct bpf_d *);
+void bpf_zerocopy_free(struct bpf_d *d);
+int bpf_zerocopy_ioctl_getzmax(struct thread *td, struct bpf_d *d,
+ size_t *i);
+int bpf_zerocopy_ioctl_rotzbuf(struct thread *td, struct bpf_d *d,
+ struct bpf_zbuf *bz);
+int bpf_zerocopy_ioctl_setzbuf(struct thread *td, struct bpf_d *d,
+ struct bpf_zbuf *bz);
+
+#endif /* !_NET_BPF_ZEROCOPY_HH_ */
diff --git a/freebsd/sys/net/bpfdesc.h b/freebsd/sys/net/bpfdesc.h
new file mode 100644
index 00000000..d28ecca2
--- /dev/null
+++ b/freebsd/sys/net/bpfdesc.h
@@ -0,0 +1,149 @@
+/*-
+ * Copyright (c) 1990, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)bpfdesc.h 8.1 (Berkeley) 6/10/93
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NET_BPFDESC_HH_
+#define _NET_BPFDESC_HH_
+
+#include <freebsd/sys/callout.h>
+#include <freebsd/sys/selinfo.h>
+#include <freebsd/sys/queue.h>
+#include <freebsd/sys/conf.h>
+#include <freebsd/net/if.h>
+
+/*
+ * Descriptor associated with each open bpf file.
+ */
+struct zbuf;
+struct bpf_d {
+ LIST_ENTRY(bpf_d) bd_next; /* Linked list of descriptors */
+ /*
+ * Buffer slots: two memory buffers store the incoming packets.
+ * The model has three slots. Sbuf is always occupied.
+ * sbuf (store) - Receive interrupt puts packets here.
+ * hbuf (hold) - When sbuf is full, put buffer here and
+ * wakeup read (replace sbuf with fbuf).
+ * fbuf (free) - When read is done, put buffer here.
+ * On receiving, if sbuf is full and fbuf is 0, packet is dropped.
+ */
+ caddr_t bd_sbuf; /* store slot */
+ caddr_t bd_hbuf; /* hold slot */
+ caddr_t bd_fbuf; /* free slot */
+ int bd_slen; /* current length of store buffer */
+ int bd_hlen; /* current length of hold buffer */
+
+ int bd_bufsize; /* absolute length of buffers */
+
+ struct bpf_if * bd_bif; /* interface descriptor */
+ u_long bd_rtout; /* Read timeout in 'ticks' */
+ struct bpf_insn *bd_rfilter; /* read filter code */
+ struct bpf_insn *bd_wfilter; /* write filter code */
+ void *bd_bfilter; /* binary filter code */
+ u_int64_t bd_rcount; /* number of packets received */
+ u_int64_t bd_dcount; /* number of packets dropped */
+
+ u_char bd_promisc; /* true if listening promiscuously */
+ u_char bd_state; /* idle, waiting, or timed out */
+ u_char bd_immediate; /* true to return on packet arrival */
+ int bd_hdrcmplt; /* false to fill in src lladdr automatically */
+ int bd_direction; /* select packet direction */
+ int bd_feedback; /* true to feed back sent packets */
+ int bd_async; /* non-zero if packet reception should generate signal */
+ int bd_sig; /* signal to send upon packet reception */
+ struct sigio * bd_sigio; /* information for async I/O */
+ struct selinfo bd_sel; /* bsd select info */
+ struct mtx bd_mtx; /* mutex for this descriptor */
+ struct callout bd_callout; /* for BPF timeouts with select */
+ struct label *bd_label; /* MAC label for descriptor */
+ u_int64_t bd_fcount; /* number of packets which matched filter */
+ pid_t bd_pid; /* PID which created descriptor */
+ int bd_locked; /* true if descriptor is locked */
+ u_int bd_bufmode; /* Current buffer mode. */
+ u_int64_t bd_wcount; /* number of packets written */
+ u_int64_t bd_wfcount; /* number of packets that matched write filter */
+ u_int64_t bd_wdcount; /* number of packets dropped during a write */
+ u_int64_t bd_zcopy; /* number of zero copy operations */
+ u_char bd_compat32; /* 32-bit stream on LP64 system */
+};
+
+/* Values for bd_state */
+#define BPF_IDLE 0 /* no select in progress */
+#define BPF_WAITING 1 /* waiting for read timeout in select */
+#define BPF_TIMED_OUT 2 /* read timeout has expired in select */
+
+#define BPFD_LOCK(bd) mtx_lock(&(bd)->bd_mtx)
+#define BPFD_UNLOCK(bd) mtx_unlock(&(bd)->bd_mtx)
+#define BPFD_LOCK_ASSERT(bd) mtx_assert(&(bd)->bd_mtx, MA_OWNED)
+
+/*
+ * External representation of the bpf descriptor
+ */
+struct xbpf_d {
+ u_int bd_structsize; /* Size of this structure. */
+ u_char bd_promisc;
+ u_char bd_immediate;
+ u_char __bd_pad[6];
+ int bd_hdrcmplt;
+ int bd_direction;
+ int bd_feedback;
+ int bd_async;
+ u_int64_t bd_rcount;
+ u_int64_t bd_dcount;
+ u_int64_t bd_fcount;
+ int bd_sig;
+ int bd_slen;
+ int bd_hlen;
+ int bd_bufsize;
+ pid_t bd_pid;
+ char bd_ifname[IFNAMSIZ];
+ int bd_locked;
+ u_int64_t bd_wcount;
+ u_int64_t bd_wfcount;
+ u_int64_t bd_wdcount;
+ u_int64_t bd_zcopy;
+ int bd_bufmode;
+ /*
+ * Allocate 4 64 bit unsigned integers for future expansion so we do
+ * not have to worry about breaking the ABI.
+ */
+ u_int64_t bd_spare[4];
+};
+
+#define BPFIF_LOCK(bif) mtx_lock(&(bif)->bif_mtx)
+#define BPFIF_UNLOCK(bif) mtx_unlock(&(bif)->bif_mtx)
+
+#endif
diff --git a/freebsd/sys/net/bridgestp.c b/freebsd/sys/net/bridgestp.c
new file mode 100644
index 00000000..2191be26
--- /dev/null
+++ b/freebsd/sys/net/bridgestp.c
@@ -0,0 +1,2250 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/* $NetBSD: bridgestp.c,v 1.5 2003/11/28 08:56:48 keihan Exp $ */
+
+/*
+ * Copyright (c) 2000 Jason L. Wright (jason@thought.net)
+ * Copyright (c) 2006 Andrew Thompson (thompsa@FreeBSD.org)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * OpenBSD: bridgestp.c,v 1.5 2001/03/22 03:48:29 jason Exp
+ */
+
+/*
+ * Implementation of the spanning tree protocol as defined in
+ * ISO/IEC 802.1D-2004, June 9, 2004.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/callout.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/mutex.h>
+#include <freebsd/sys/taskqueue.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_dl.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/if_llc.h>
+#include <freebsd/net/if_media.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/if_ether.h>
+#include <freebsd/net/bridgestp.h>
+
+#ifdef BRIDGESTP_DEBUG
+#define DPRINTF(fmt, arg...) printf("bstp: " fmt, ##arg)
+#else
+#define DPRINTF(fmt, arg...) (void)0
+#endif
+
+#define PV2ADDR(pv, eaddr) do { \
+ eaddr[0] = pv >> 40; \
+ eaddr[1] = pv >> 32; \
+ eaddr[2] = pv >> 24; \
+ eaddr[3] = pv >> 16; \
+ eaddr[4] = pv >> 8; \
+ eaddr[5] = pv >> 0; \
+} while (0)
+
+#define INFO_BETTER 1
+#define INFO_SAME 0
+#define INFO_WORSE -1
+
+const uint8_t bstp_etheraddr[] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
+
+LIST_HEAD(, bstp_state) bstp_list;
+static struct mtx bstp_list_mtx;
+
+static void bstp_transmit(struct bstp_state *, struct bstp_port *);
+static void bstp_transmit_bpdu(struct bstp_state *, struct bstp_port *);
+static void bstp_transmit_tcn(struct bstp_state *, struct bstp_port *);
+static void bstp_decode_bpdu(struct bstp_port *, struct bstp_cbpdu *,
+ struct bstp_config_unit *);
+static void bstp_send_bpdu(struct bstp_state *, struct bstp_port *,
+ struct bstp_cbpdu *);
+static int bstp_pdu_flags(struct bstp_port *);
+static void bstp_received_stp(struct bstp_state *, struct bstp_port *,
+ struct mbuf **, struct bstp_tbpdu *);
+static void bstp_received_rstp(struct bstp_state *, struct bstp_port *,
+ struct mbuf **, struct bstp_tbpdu *);
+static void bstp_received_tcn(struct bstp_state *, struct bstp_port *,
+ struct bstp_tcn_unit *);
+static void bstp_received_bpdu(struct bstp_state *, struct bstp_port *,
+ struct bstp_config_unit *);
+static int bstp_pdu_rcvtype(struct bstp_port *, struct bstp_config_unit *);
+static int bstp_pdu_bettersame(struct bstp_port *, int);
+static int bstp_info_cmp(struct bstp_pri_vector *,
+ struct bstp_pri_vector *);
+static int bstp_info_superior(struct bstp_pri_vector *,
+ struct bstp_pri_vector *);
+static void bstp_assign_roles(struct bstp_state *);
+static void bstp_update_roles(struct bstp_state *, struct bstp_port *);
+static void bstp_update_state(struct bstp_state *, struct bstp_port *);
+static void bstp_update_tc(struct bstp_port *);
+static void bstp_update_info(struct bstp_port *);
+static void bstp_set_other_tcprop(struct bstp_port *);
+static void bstp_set_all_reroot(struct bstp_state *);
+static void bstp_set_all_sync(struct bstp_state *);
+static void bstp_set_port_state(struct bstp_port *, int);
+static void bstp_set_port_role(struct bstp_port *, int);
+static void bstp_set_port_proto(struct bstp_port *, int);
+static void bstp_set_port_tc(struct bstp_port *, int);
+static void bstp_set_timer_tc(struct bstp_port *);
+static void bstp_set_timer_msgage(struct bstp_port *);
+static int bstp_rerooted(struct bstp_state *, struct bstp_port *);
+static uint32_t bstp_calc_path_cost(struct bstp_port *);
+static void bstp_notify_state(void *, int);
+static void bstp_notify_rtage(void *, int);
+static void bstp_ifupdstatus(struct bstp_state *, struct bstp_port *);
+static void bstp_enable_port(struct bstp_state *, struct bstp_port *);
+static void bstp_disable_port(struct bstp_state *, struct bstp_port *);
+static void bstp_tick(void *);
+static void bstp_timer_start(struct bstp_timer *, uint16_t);
+static void bstp_timer_stop(struct bstp_timer *);
+static void bstp_timer_latch(struct bstp_timer *);
+static int bstp_timer_expired(struct bstp_timer *);
+static void bstp_hello_timer_expiry(struct bstp_state *,
+ struct bstp_port *);
+static void bstp_message_age_expiry(struct bstp_state *,
+ struct bstp_port *);
+static void bstp_migrate_delay_expiry(struct bstp_state *,
+ struct bstp_port *);
+static void bstp_edge_delay_expiry(struct bstp_state *,
+ struct bstp_port *);
+static int bstp_addr_cmp(const uint8_t *, const uint8_t *);
+static int bstp_same_bridgeid(uint64_t, uint64_t);
+static void bstp_reinit(struct bstp_state *);
+
+static void
+bstp_transmit(struct bstp_state *bs, struct bstp_port *bp)
+{
+ if (bs->bs_running == 0)
+ return;
+
+ /*
+ * a PDU can only be sent if we have tx quota left and the
+ * hello timer is running.
+ */
+ if (bp->bp_hello_timer.active == 0) {
+ /* Test if it needs to be reset */
+ bstp_hello_timer_expiry(bs, bp);
+ return;
+ }
+ if (bp->bp_txcount > bs->bs_txholdcount)
+ /* Ran out of karma */
+ return;
+
+ if (bp->bp_protover == BSTP_PROTO_RSTP) {
+ bstp_transmit_bpdu(bs, bp);
+ bp->bp_tc_ack = 0;
+ } else { /* STP */
+ switch (bp->bp_role) {
+ case BSTP_ROLE_DESIGNATED:
+ bstp_transmit_bpdu(bs, bp);
+ bp->bp_tc_ack = 0;
+ break;
+
+ case BSTP_ROLE_ROOT:
+ bstp_transmit_tcn(bs, bp);
+ break;
+ }
+ }
+ bstp_timer_start(&bp->bp_hello_timer, bp->bp_desg_htime);
+ bp->bp_flags &= ~BSTP_PORT_NEWINFO;
+}
+
+static void
+bstp_transmit_bpdu(struct bstp_state *bs, struct bstp_port *bp)
+{
+ struct bstp_cbpdu bpdu;
+
+ BSTP_LOCK_ASSERT(bs);
+
+ bpdu.cbu_rootpri = htons(bp->bp_desg_pv.pv_root_id >> 48);
+ PV2ADDR(bp->bp_desg_pv.pv_root_id, bpdu.cbu_rootaddr);
+
+ bpdu.cbu_rootpathcost = htonl(bp->bp_desg_pv.pv_cost);
+
+ bpdu.cbu_bridgepri = htons(bp->bp_desg_pv.pv_dbridge_id >> 48);
+ PV2ADDR(bp->bp_desg_pv.pv_dbridge_id, bpdu.cbu_bridgeaddr);
+
+ bpdu.cbu_portid = htons(bp->bp_port_id);
+ bpdu.cbu_messageage = htons(bp->bp_desg_msg_age);
+ bpdu.cbu_maxage = htons(bp->bp_desg_max_age);
+ bpdu.cbu_hellotime = htons(bp->bp_desg_htime);
+ bpdu.cbu_forwarddelay = htons(bp->bp_desg_fdelay);
+
+ bpdu.cbu_flags = bstp_pdu_flags(bp);
+
+ switch (bp->bp_protover) {
+ case BSTP_PROTO_STP:
+ bpdu.cbu_bpdutype = BSTP_MSGTYPE_CFG;
+ break;
+
+ case BSTP_PROTO_RSTP:
+ bpdu.cbu_bpdutype = BSTP_MSGTYPE_RSTP;
+ break;
+ }
+
+ bstp_send_bpdu(bs, bp, &bpdu);
+}
+
+static void
+bstp_transmit_tcn(struct bstp_state *bs, struct bstp_port *bp)
+{
+ struct bstp_tbpdu bpdu;
+ struct ifnet *ifp = bp->bp_ifp;
+ struct ether_header *eh;
+ struct mbuf *m;
+
+ KASSERT(bp == bs->bs_root_port, ("%s: bad root port\n", __func__));
+
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
+ return;
+
+ MGETHDR(m, M_DONTWAIT, MT_DATA);
+ if (m == NULL)
+ return;
+
+ m->m_pkthdr.rcvif = ifp;
+ m->m_pkthdr.len = sizeof(*eh) + sizeof(bpdu);
+ m->m_len = m->m_pkthdr.len;
+
+ eh = mtod(m, struct ether_header *);
+
+ memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+ memcpy(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN);
+ eh->ether_type = htons(sizeof(bpdu));
+
+ bpdu.tbu_ssap = bpdu.tbu_dsap = LLC_8021D_LSAP;
+ bpdu.tbu_ctl = LLC_UI;
+ bpdu.tbu_protoid = 0;
+ bpdu.tbu_protover = 0;
+ bpdu.tbu_bpdutype = BSTP_MSGTYPE_TCN;
+
+ memcpy(mtod(m, caddr_t) + sizeof(*eh), &bpdu, sizeof(bpdu));
+
+ bp->bp_txcount++;
+ ifp->if_transmit(ifp, m);
+}
+
+static void
+bstp_decode_bpdu(struct bstp_port *bp, struct bstp_cbpdu *cpdu,
+ struct bstp_config_unit *cu)
+{
+ int flags;
+
+ cu->cu_pv.pv_root_id =
+ (((uint64_t)ntohs(cpdu->cbu_rootpri)) << 48) |
+ (((uint64_t)cpdu->cbu_rootaddr[0]) << 40) |
+ (((uint64_t)cpdu->cbu_rootaddr[1]) << 32) |
+ (((uint64_t)cpdu->cbu_rootaddr[2]) << 24) |
+ (((uint64_t)cpdu->cbu_rootaddr[3]) << 16) |
+ (((uint64_t)cpdu->cbu_rootaddr[4]) << 8) |
+ (((uint64_t)cpdu->cbu_rootaddr[5]) << 0);
+
+ cu->cu_pv.pv_dbridge_id =
+ (((uint64_t)ntohs(cpdu->cbu_bridgepri)) << 48) |
+ (((uint64_t)cpdu->cbu_bridgeaddr[0]) << 40) |
+ (((uint64_t)cpdu->cbu_bridgeaddr[1]) << 32) |
+ (((uint64_t)cpdu->cbu_bridgeaddr[2]) << 24) |
+ (((uint64_t)cpdu->cbu_bridgeaddr[3]) << 16) |
+ (((uint64_t)cpdu->cbu_bridgeaddr[4]) << 8) |
+ (((uint64_t)cpdu->cbu_bridgeaddr[5]) << 0);
+
+ cu->cu_pv.pv_cost = ntohl(cpdu->cbu_rootpathcost);
+ cu->cu_message_age = ntohs(cpdu->cbu_messageage);
+ cu->cu_max_age = ntohs(cpdu->cbu_maxage);
+ cu->cu_hello_time = ntohs(cpdu->cbu_hellotime);
+ cu->cu_forward_delay = ntohs(cpdu->cbu_forwarddelay);
+ cu->cu_pv.pv_dport_id = ntohs(cpdu->cbu_portid);
+ cu->cu_pv.pv_port_id = bp->bp_port_id;
+ cu->cu_message_type = cpdu->cbu_bpdutype;
+
+ /* Strip off unused flags in STP mode */
+ flags = cpdu->cbu_flags;
+ switch (cpdu->cbu_protover) {
+ case BSTP_PROTO_STP:
+ flags &= BSTP_PDU_STPMASK;
+ /* A STP BPDU explicitly conveys a Designated Port */
+ cu->cu_role = BSTP_ROLE_DESIGNATED;
+ break;
+
+ case BSTP_PROTO_RSTP:
+ flags &= BSTP_PDU_RSTPMASK;
+ break;
+ }
+
+ cu->cu_topology_change_ack =
+ (flags & BSTP_PDU_F_TCA) ? 1 : 0;
+ cu->cu_proposal =
+ (flags & BSTP_PDU_F_P) ? 1 : 0;
+ cu->cu_agree =
+ (flags & BSTP_PDU_F_A) ? 1 : 0;
+ cu->cu_learning =
+ (flags & BSTP_PDU_F_L) ? 1 : 0;
+ cu->cu_forwarding =
+ (flags & BSTP_PDU_F_F) ? 1 : 0;
+ cu->cu_topology_change =
+ (flags & BSTP_PDU_F_TC) ? 1 : 0;
+
+ switch ((flags & BSTP_PDU_PRMASK) >> BSTP_PDU_PRSHIFT) {
+ case BSTP_PDU_F_ROOT:
+ cu->cu_role = BSTP_ROLE_ROOT;
+ break;
+ case BSTP_PDU_F_ALT:
+ cu->cu_role = BSTP_ROLE_ALTERNATE;
+ break;
+ case BSTP_PDU_F_DESG:
+ cu->cu_role = BSTP_ROLE_DESIGNATED;
+ break;
+ }
+}
+
+static void
+bstp_send_bpdu(struct bstp_state *bs, struct bstp_port *bp,
+ struct bstp_cbpdu *bpdu)
+{
+ struct ifnet *ifp;
+ struct mbuf *m;
+ struct ether_header *eh;
+
+ BSTP_LOCK_ASSERT(bs);
+
+ ifp = bp->bp_ifp;
+
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
+ return;
+
+ MGETHDR(m, M_DONTWAIT, MT_DATA);
+ if (m == NULL)
+ return;
+
+ eh = mtod(m, struct ether_header *);
+
+ bpdu->cbu_ssap = bpdu->cbu_dsap = LLC_8021D_LSAP;
+ bpdu->cbu_ctl = LLC_UI;
+ bpdu->cbu_protoid = htons(BSTP_PROTO_ID);
+
+ memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+ memcpy(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN);
+
+ switch (bpdu->cbu_bpdutype) {
+ case BSTP_MSGTYPE_CFG:
+ bpdu->cbu_protover = BSTP_PROTO_STP;
+ m->m_pkthdr.len = sizeof(*eh) + BSTP_BPDU_STP_LEN;
+ eh->ether_type = htons(BSTP_BPDU_STP_LEN);
+ memcpy(mtod(m, caddr_t) + sizeof(*eh), bpdu,
+ BSTP_BPDU_STP_LEN);
+ break;
+
+ case BSTP_MSGTYPE_RSTP:
+ bpdu->cbu_protover = BSTP_PROTO_RSTP;
+ bpdu->cbu_versionlen = htons(0);
+ m->m_pkthdr.len = sizeof(*eh) + BSTP_BPDU_RSTP_LEN;
+ eh->ether_type = htons(BSTP_BPDU_RSTP_LEN);
+ memcpy(mtod(m, caddr_t) + sizeof(*eh), bpdu,
+ BSTP_BPDU_RSTP_LEN);
+ break;
+
+ default:
+ panic("not implemented");
+ }
+ m->m_pkthdr.rcvif = ifp;
+ m->m_len = m->m_pkthdr.len;
+
+ bp->bp_txcount++;
+ ifp->if_transmit(ifp, m);
+}
+
+static int
+bstp_pdu_flags(struct bstp_port *bp)
+{
+ int flags = 0;
+
+ if (bp->bp_proposing && bp->bp_state != BSTP_IFSTATE_FORWARDING)
+ flags |= BSTP_PDU_F_P;
+
+ if (bp->bp_agree)
+ flags |= BSTP_PDU_F_A;
+
+ if (bp->bp_tc_timer.active)
+ flags |= BSTP_PDU_F_TC;
+
+ if (bp->bp_tc_ack)
+ flags |= BSTP_PDU_F_TCA;
+
+ switch (bp->bp_state) {
+ case BSTP_IFSTATE_LEARNING:
+ flags |= BSTP_PDU_F_L;
+ break;
+
+ case BSTP_IFSTATE_FORWARDING:
+ flags |= (BSTP_PDU_F_L | BSTP_PDU_F_F);
+ break;
+ }
+
+ switch (bp->bp_role) {
+ case BSTP_ROLE_ROOT:
+ flags |=
+ (BSTP_PDU_F_ROOT << BSTP_PDU_PRSHIFT);
+ break;
+
+ case BSTP_ROLE_ALTERNATE:
+ case BSTP_ROLE_BACKUP: /* fall through */
+ flags |=
+ (BSTP_PDU_F_ALT << BSTP_PDU_PRSHIFT);
+ break;
+
+ case BSTP_ROLE_DESIGNATED:
+ flags |=
+ (BSTP_PDU_F_DESG << BSTP_PDU_PRSHIFT);
+ break;
+ }
+
+ /* Strip off unused flags in either mode */
+ switch (bp->bp_protover) {
+ case BSTP_PROTO_STP:
+ flags &= BSTP_PDU_STPMASK;
+ break;
+ case BSTP_PROTO_RSTP:
+ flags &= BSTP_PDU_RSTPMASK;
+ break;
+ }
+ return (flags);
+}
+
+struct mbuf *
+bstp_input(struct bstp_port *bp, struct ifnet *ifp, struct mbuf *m)
+{
+ struct bstp_state *bs = bp->bp_bs;
+ struct ether_header *eh;
+ struct bstp_tbpdu tpdu;
+ uint16_t len;
+
+ if (bp->bp_active == 0) {
+ m_freem(m);
+ return (NULL);
+ }
+
+ BSTP_LOCK(bs);
+
+ eh = mtod(m, struct ether_header *);
+
+ len = ntohs(eh->ether_type);
+ if (len < sizeof(tpdu))
+ goto out;
+
+ m_adj(m, ETHER_HDR_LEN);
+
+ if (m->m_pkthdr.len > len)
+ m_adj(m, len - m->m_pkthdr.len);
+ if (m->m_len < sizeof(tpdu) &&
+ (m = m_pullup(m, sizeof(tpdu))) == NULL)
+ goto out;
+
+ memcpy(&tpdu, mtod(m, caddr_t), sizeof(tpdu));
+
+ /* basic packet checks */
+ if (tpdu.tbu_dsap != LLC_8021D_LSAP ||
+ tpdu.tbu_ssap != LLC_8021D_LSAP ||
+ tpdu.tbu_ctl != LLC_UI)
+ goto out;
+ if (tpdu.tbu_protoid != BSTP_PROTO_ID)
+ goto out;
+
+ /*
+ * We can treat later versions of the PDU as the same as the maximum
+ * version we implement. All additional parameters/flags are ignored.
+ */
+ if (tpdu.tbu_protover > BSTP_PROTO_MAX)
+ tpdu.tbu_protover = BSTP_PROTO_MAX;
+
+ if (tpdu.tbu_protover != bp->bp_protover) {
+ /*
+ * Wait for the migration delay timer to expire before changing
+ * protocol version to avoid flip-flops.
+ */
+ if (bp->bp_flags & BSTP_PORT_CANMIGRATE)
+ bstp_set_port_proto(bp, tpdu.tbu_protover);
+ else
+ goto out;
+ }
+
+ /* Clear operedge upon receiving a PDU on the port */
+ bp->bp_operedge = 0;
+ bstp_timer_start(&bp->bp_edge_delay_timer,
+ BSTP_DEFAULT_MIGRATE_DELAY);
+
+ switch (tpdu.tbu_protover) {
+ case BSTP_PROTO_STP:
+ bstp_received_stp(bs, bp, &m, &tpdu);
+ break;
+
+ case BSTP_PROTO_RSTP:
+ bstp_received_rstp(bs, bp, &m, &tpdu);
+ break;
+ }
+out:
+ BSTP_UNLOCK(bs);
+ if (m)
+ m_freem(m);
+ return (NULL);
+}
+
+static void
+bstp_received_stp(struct bstp_state *bs, struct bstp_port *bp,
+ struct mbuf **mp, struct bstp_tbpdu *tpdu)
+{
+ struct bstp_cbpdu cpdu;
+ struct bstp_config_unit *cu = &bp->bp_msg_cu;
+ struct bstp_tcn_unit tu;
+
+ switch (tpdu->tbu_bpdutype) {
+ case BSTP_MSGTYPE_TCN:
+ tu.tu_message_type = tpdu->tbu_bpdutype;
+ bstp_received_tcn(bs, bp, &tu);
+ break;
+ case BSTP_MSGTYPE_CFG:
+ if ((*mp)->m_len < BSTP_BPDU_STP_LEN &&
+ (*mp = m_pullup(*mp, BSTP_BPDU_STP_LEN)) == NULL)
+ return;
+ memcpy(&cpdu, mtod(*mp, caddr_t), BSTP_BPDU_STP_LEN);
+
+ bstp_decode_bpdu(bp, &cpdu, cu);
+ bstp_received_bpdu(bs, bp, cu);
+ break;
+ }
+}
+
+static void
+bstp_received_rstp(struct bstp_state *bs, struct bstp_port *bp,
+ struct mbuf **mp, struct bstp_tbpdu *tpdu)
+{
+ struct bstp_cbpdu cpdu;
+ struct bstp_config_unit *cu = &bp->bp_msg_cu;
+
+ if (tpdu->tbu_bpdutype != BSTP_MSGTYPE_RSTP)
+ return;
+
+ if ((*mp)->m_len < BSTP_BPDU_RSTP_LEN &&
+ (*mp = m_pullup(*mp, BSTP_BPDU_RSTP_LEN)) == NULL)
+ return;
+ memcpy(&cpdu, mtod(*mp, caddr_t), BSTP_BPDU_RSTP_LEN);
+
+ bstp_decode_bpdu(bp, &cpdu, cu);
+ bstp_received_bpdu(bs, bp, cu);
+}
+
+static void
+bstp_received_tcn(struct bstp_state *bs, struct bstp_port *bp,
+ struct bstp_tcn_unit *tcn)
+{
+ bp->bp_rcvdtcn = 1;
+ bstp_update_tc(bp);
+}
+
+static void
+bstp_received_bpdu(struct bstp_state *bs, struct bstp_port *bp,
+ struct bstp_config_unit *cu)
+{
+ int type;
+
+ BSTP_LOCK_ASSERT(bs);
+
+ /* We need to have transitioned to INFO_MINE before proceeding */
+ switch (bp->bp_infois) {
+ case BSTP_INFO_DISABLED:
+ case BSTP_INFO_AGED:
+ return;
+ }
+
+ type = bstp_pdu_rcvtype(bp, cu);
+
+ switch (type) {
+ case BSTP_PDU_SUPERIOR:
+ bs->bs_allsynced = 0;
+ bp->bp_agreed = 0;
+ bp->bp_proposing = 0;
+
+ if (cu->cu_proposal && cu->cu_forwarding == 0)
+ bp->bp_proposed = 1;
+ if (cu->cu_topology_change)
+ bp->bp_rcvdtc = 1;
+ if (cu->cu_topology_change_ack)
+ bp->bp_rcvdtca = 1;
+
+ if (bp->bp_agree &&
+ !bstp_pdu_bettersame(bp, BSTP_INFO_RECEIVED))
+ bp->bp_agree = 0;
+
+ /* copy the received priority and timers to the port */
+ bp->bp_port_pv = cu->cu_pv;
+ bp->bp_port_msg_age = cu->cu_message_age;
+ bp->bp_port_max_age = cu->cu_max_age;
+ bp->bp_port_fdelay = cu->cu_forward_delay;
+ bp->bp_port_htime =
+ (cu->cu_hello_time > BSTP_MIN_HELLO_TIME ?
+ cu->cu_hello_time : BSTP_MIN_HELLO_TIME);
+
+ /* set expiry for the new info */
+ bstp_set_timer_msgage(bp);
+
+ bp->bp_infois = BSTP_INFO_RECEIVED;
+ bstp_assign_roles(bs);
+ break;
+
+ case BSTP_PDU_REPEATED:
+ if (cu->cu_proposal && cu->cu_forwarding == 0)
+ bp->bp_proposed = 1;
+ if (cu->cu_topology_change)
+ bp->bp_rcvdtc = 1;
+ if (cu->cu_topology_change_ack)
+ bp->bp_rcvdtca = 1;
+
+ /* rearm the age timer */
+ bstp_set_timer_msgage(bp);
+ break;
+
+ case BSTP_PDU_INFERIOR:
+ if (cu->cu_learning) {
+ bp->bp_agreed = 1;
+ bp->bp_proposing = 0;
+ }
+ break;
+
+ case BSTP_PDU_INFERIORALT:
+ /*
+ * only point to point links are allowed fast
+ * transitions to forwarding.
+ */
+ if (cu->cu_agree && bp->bp_ptp_link) {
+ bp->bp_agreed = 1;
+ bp->bp_proposing = 0;
+ } else
+ bp->bp_agreed = 0;
+
+ if (cu->cu_topology_change)
+ bp->bp_rcvdtc = 1;
+ if (cu->cu_topology_change_ack)
+ bp->bp_rcvdtca = 1;
+ break;
+
+ case BSTP_PDU_OTHER:
+ return; /* do nothing */
+ }
+ /* update the state machines with the new data */
+ bstp_update_state(bs, bp);
+}
+
+static int
+bstp_pdu_rcvtype(struct bstp_port *bp, struct bstp_config_unit *cu)
+{
+ int type;
+
+ /* default return type */
+ type = BSTP_PDU_OTHER;
+
+ switch (cu->cu_role) {
+ case BSTP_ROLE_DESIGNATED:
+ if (bstp_info_superior(&bp->bp_port_pv, &cu->cu_pv))
+ /* bpdu priority is superior */
+ type = BSTP_PDU_SUPERIOR;
+ else if (bstp_info_cmp(&bp->bp_port_pv, &cu->cu_pv) ==
+ INFO_SAME) {
+ if (bp->bp_port_msg_age != cu->cu_message_age ||
+ bp->bp_port_max_age != cu->cu_max_age ||
+ bp->bp_port_fdelay != cu->cu_forward_delay ||
+ bp->bp_port_htime != cu->cu_hello_time)
+ /* bpdu priority is equal and timers differ */
+ type = BSTP_PDU_SUPERIOR;
+ else
+ /* bpdu is equal */
+ type = BSTP_PDU_REPEATED;
+ } else
+ /* bpdu priority is worse */
+ type = BSTP_PDU_INFERIOR;
+
+ break;
+
+ case BSTP_ROLE_ROOT:
+ case BSTP_ROLE_ALTERNATE:
+ case BSTP_ROLE_BACKUP:
+ if (bstp_info_cmp(&bp->bp_port_pv, &cu->cu_pv) <= INFO_SAME)
+ /*
+ * not a designated port and priority is the same or
+ * worse
+ */
+ type = BSTP_PDU_INFERIORALT;
+ break;
+ }
+
+ return (type);
+}
+
+static int
+bstp_pdu_bettersame(struct bstp_port *bp, int newinfo)
+{
+ if (newinfo == BSTP_INFO_RECEIVED &&
+ bp->bp_infois == BSTP_INFO_RECEIVED &&
+ bstp_info_cmp(&bp->bp_port_pv, &bp->bp_msg_cu.cu_pv) >= INFO_SAME)
+ return (1);
+
+ if (newinfo == BSTP_INFO_MINE &&
+ bp->bp_infois == BSTP_INFO_MINE &&
+ bstp_info_cmp(&bp->bp_port_pv, &bp->bp_desg_pv) >= INFO_SAME)
+ return (1);
+
+ return (0);
+}
+
+static int
+bstp_info_cmp(struct bstp_pri_vector *pv,
+ struct bstp_pri_vector *cpv)
+{
+ if (cpv->pv_root_id < pv->pv_root_id)
+ return (INFO_BETTER);
+ if (cpv->pv_root_id > pv->pv_root_id)
+ return (INFO_WORSE);
+
+ if (cpv->pv_cost < pv->pv_cost)
+ return (INFO_BETTER);
+ if (cpv->pv_cost > pv->pv_cost)
+ return (INFO_WORSE);
+
+ if (cpv->pv_dbridge_id < pv->pv_dbridge_id)
+ return (INFO_BETTER);
+ if (cpv->pv_dbridge_id > pv->pv_dbridge_id)
+ return (INFO_WORSE);
+
+ if (cpv->pv_dport_id < pv->pv_dport_id)
+ return (INFO_BETTER);
+ if (cpv->pv_dport_id > pv->pv_dport_id)
+ return (INFO_WORSE);
+
+ return (INFO_SAME);
+}
+
+/*
+ * This message priority vector is superior to the port priority vector and
+ * will replace it if, and only if, the message priority vector is better than
+ * the port priority vector, or the message has been transmitted from the same
+ * designated bridge and designated port as the port priority vector.
+ */
+static int
+bstp_info_superior(struct bstp_pri_vector *pv,
+ struct bstp_pri_vector *cpv)
+{
+ if (bstp_info_cmp(pv, cpv) == INFO_BETTER ||
+ (bstp_same_bridgeid(pv->pv_dbridge_id, cpv->pv_dbridge_id) &&
+ (cpv->pv_dport_id & 0xfff) == (pv->pv_dport_id & 0xfff)))
+ return (1);
+ return (0);
+}
+
+static void
+bstp_assign_roles(struct bstp_state *bs)
+{
+ struct bstp_port *bp, *rbp = NULL;
+ struct bstp_pri_vector pv;
+
+ /* default to our priority vector */
+ bs->bs_root_pv = bs->bs_bridge_pv;
+ bs->bs_root_msg_age = 0;
+ bs->bs_root_max_age = bs->bs_bridge_max_age;
+ bs->bs_root_fdelay = bs->bs_bridge_fdelay;
+ bs->bs_root_htime = bs->bs_bridge_htime;
+ bs->bs_root_port = NULL;
+
+ /* check if any recieved info supersedes us */
+ LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
+ if (bp->bp_infois != BSTP_INFO_RECEIVED)
+ continue;
+
+ pv = bp->bp_port_pv;
+ pv.pv_cost += bp->bp_path_cost;
+
+ /*
+ * The root priority vector is the best of the set comprising
+ * the bridge priority vector plus all root path priority
+ * vectors whose bridge address is not equal to us.
+ */
+ if (bstp_same_bridgeid(pv.pv_dbridge_id,
+ bs->bs_bridge_pv.pv_dbridge_id) == 0 &&
+ bstp_info_cmp(&bs->bs_root_pv, &pv) == INFO_BETTER) {
+ /* the port vector replaces the root */
+ bs->bs_root_pv = pv;
+ bs->bs_root_msg_age = bp->bp_port_msg_age +
+ BSTP_MESSAGE_AGE_INCR;
+ bs->bs_root_max_age = bp->bp_port_max_age;
+ bs->bs_root_fdelay = bp->bp_port_fdelay;
+ bs->bs_root_htime = bp->bp_port_htime;
+ rbp = bp;
+ }
+ }
+
+ LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
+ /* calculate the port designated vector */
+ bp->bp_desg_pv.pv_root_id = bs->bs_root_pv.pv_root_id;
+ bp->bp_desg_pv.pv_cost = bs->bs_root_pv.pv_cost;
+ bp->bp_desg_pv.pv_dbridge_id = bs->bs_bridge_pv.pv_dbridge_id;
+ bp->bp_desg_pv.pv_dport_id = bp->bp_port_id;
+ bp->bp_desg_pv.pv_port_id = bp->bp_port_id;
+
+ /* calculate designated times */
+ bp->bp_desg_msg_age = bs->bs_root_msg_age;
+ bp->bp_desg_max_age = bs->bs_root_max_age;
+ bp->bp_desg_fdelay = bs->bs_root_fdelay;
+ bp->bp_desg_htime = bs->bs_bridge_htime;
+
+
+ switch (bp->bp_infois) {
+ case BSTP_INFO_DISABLED:
+ bstp_set_port_role(bp, BSTP_ROLE_DISABLED);
+ break;
+
+ case BSTP_INFO_AGED:
+ bstp_set_port_role(bp, BSTP_ROLE_DESIGNATED);
+ bstp_update_info(bp);
+ break;
+
+ case BSTP_INFO_MINE:
+ bstp_set_port_role(bp, BSTP_ROLE_DESIGNATED);
+ /* update the port info if stale */
+ if (bstp_info_cmp(&bp->bp_port_pv,
+ &bp->bp_desg_pv) != INFO_SAME ||
+ (rbp != NULL &&
+ (bp->bp_port_msg_age != rbp->bp_port_msg_age ||
+ bp->bp_port_max_age != rbp->bp_port_max_age ||
+ bp->bp_port_fdelay != rbp->bp_port_fdelay ||
+ bp->bp_port_htime != rbp->bp_port_htime)))
+ bstp_update_info(bp);
+ break;
+
+ case BSTP_INFO_RECEIVED:
+ if (bp == rbp) {
+ /*
+ * root priority is derived from this
+ * port, make it the root port.
+ */
+ bstp_set_port_role(bp, BSTP_ROLE_ROOT);
+ bs->bs_root_port = bp;
+ } else if (bstp_info_cmp(&bp->bp_port_pv,
+ &bp->bp_desg_pv) == INFO_BETTER) {
+ /*
+ * the port priority is lower than the root
+ * port.
+ */
+ bstp_set_port_role(bp, BSTP_ROLE_DESIGNATED);
+ bstp_update_info(bp);
+ } else {
+ if (bstp_same_bridgeid(
+ bp->bp_port_pv.pv_dbridge_id,
+ bs->bs_bridge_pv.pv_dbridge_id)) {
+ /*
+ * the designated bridge refers to
+ * another port on this bridge.
+ */
+ bstp_set_port_role(bp,
+ BSTP_ROLE_BACKUP);
+ } else {
+ /*
+ * the port is an inferior path to the
+ * root bridge.
+ */
+ bstp_set_port_role(bp,
+ BSTP_ROLE_ALTERNATE);
+ }
+ }
+ break;
+ }
+ }
+}
+
+static void
+bstp_update_state(struct bstp_state *bs, struct bstp_port *bp)
+{
+ struct bstp_port *bp2;
+ int synced;
+
+ BSTP_LOCK_ASSERT(bs);
+
+ /* check if all the ports have syncronised again */
+ if (!bs->bs_allsynced) {
+ synced = 1;
+ LIST_FOREACH(bp2, &bs->bs_bplist, bp_next) {
+ if (!(bp2->bp_synced ||
+ bp2->bp_role == BSTP_ROLE_ROOT)) {
+ synced = 0;
+ break;
+ }
+ }
+ bs->bs_allsynced = synced;
+ }
+
+ bstp_update_roles(bs, bp);
+ bstp_update_tc(bp);
+}
+
+static void
+bstp_update_roles(struct bstp_state *bs, struct bstp_port *bp)
+{
+ switch (bp->bp_role) {
+ case BSTP_ROLE_DISABLED:
+ /* Clear any flags if set */
+ if (bp->bp_sync || !bp->bp_synced || bp->bp_reroot) {
+ bp->bp_sync = 0;
+ bp->bp_synced = 1;
+ bp->bp_reroot = 0;
+ }
+ break;
+
+ case BSTP_ROLE_ALTERNATE:
+ case BSTP_ROLE_BACKUP:
+ if ((bs->bs_allsynced && !bp->bp_agree) ||
+ (bp->bp_proposed && bp->bp_agree)) {
+ bp->bp_proposed = 0;
+ bp->bp_agree = 1;
+ bp->bp_flags |= BSTP_PORT_NEWINFO;
+ DPRINTF("%s -> ALTERNATE_AGREED\n",
+ bp->bp_ifp->if_xname);
+ }
+
+ if (bp->bp_proposed && !bp->bp_agree) {
+ bstp_set_all_sync(bs);
+ bp->bp_proposed = 0;
+ DPRINTF("%s -> ALTERNATE_PROPOSED\n",
+ bp->bp_ifp->if_xname);
+ }
+
+ /* Clear any flags if set */
+ if (bp->bp_sync || !bp->bp_synced || bp->bp_reroot) {
+ bp->bp_sync = 0;
+ bp->bp_synced = 1;
+ bp->bp_reroot = 0;
+ DPRINTF("%s -> ALTERNATE_PORT\n", bp->bp_ifp->if_xname);
+ }
+ break;
+
+ case BSTP_ROLE_ROOT:
+ if (bp->bp_state != BSTP_IFSTATE_FORWARDING && !bp->bp_reroot) {
+ bstp_set_all_reroot(bs);
+ DPRINTF("%s -> ROOT_REROOT\n", bp->bp_ifp->if_xname);
+ }
+
+ if ((bs->bs_allsynced && !bp->bp_agree) ||
+ (bp->bp_proposed && bp->bp_agree)) {
+ bp->bp_proposed = 0;
+ bp->bp_sync = 0;
+ bp->bp_agree = 1;
+ bp->bp_flags |= BSTP_PORT_NEWINFO;
+ DPRINTF("%s -> ROOT_AGREED\n", bp->bp_ifp->if_xname);
+ }
+
+ if (bp->bp_proposed && !bp->bp_agree) {
+ bstp_set_all_sync(bs);
+ bp->bp_proposed = 0;
+ DPRINTF("%s -> ROOT_PROPOSED\n", bp->bp_ifp->if_xname);
+ }
+
+ if (bp->bp_state != BSTP_IFSTATE_FORWARDING &&
+ (bp->bp_forward_delay_timer.active == 0 ||
+ (bstp_rerooted(bs, bp) &&
+ bp->bp_recent_backup_timer.active == 0 &&
+ bp->bp_protover == BSTP_PROTO_RSTP))) {
+ switch (bp->bp_state) {
+ case BSTP_IFSTATE_DISCARDING:
+ bstp_set_port_state(bp, BSTP_IFSTATE_LEARNING);
+ break;
+ case BSTP_IFSTATE_LEARNING:
+ bstp_set_port_state(bp,
+ BSTP_IFSTATE_FORWARDING);
+ break;
+ }
+ }
+
+ if (bp->bp_state == BSTP_IFSTATE_FORWARDING && bp->bp_reroot) {
+ bp->bp_reroot = 0;
+ DPRINTF("%s -> ROOT_REROOTED\n", bp->bp_ifp->if_xname);
+ }
+ break;
+
+ case BSTP_ROLE_DESIGNATED:
+ if (bp->bp_recent_root_timer.active == 0 && bp->bp_reroot) {
+ bp->bp_reroot = 0;
+ DPRINTF("%s -> DESIGNATED_RETIRED\n",
+ bp->bp_ifp->if_xname);
+ }
+
+ if ((bp->bp_state == BSTP_IFSTATE_DISCARDING &&
+ !bp->bp_synced) || (bp->bp_agreed && !bp->bp_synced) ||
+ (bp->bp_operedge && !bp->bp_synced) ||
+ (bp->bp_sync && bp->bp_synced)) {
+ bstp_timer_stop(&bp->bp_recent_root_timer);
+ bp->bp_synced = 1;
+ bp->bp_sync = 0;
+ DPRINTF("%s -> DESIGNATED_SYNCED\n",
+ bp->bp_ifp->if_xname);
+ }
+
+ if (bp->bp_state != BSTP_IFSTATE_FORWARDING &&
+ !bp->bp_agreed && !bp->bp_proposing &&
+ !bp->bp_operedge) {
+ bp->bp_proposing = 1;
+ bp->bp_flags |= BSTP_PORT_NEWINFO;
+ bstp_timer_start(&bp->bp_edge_delay_timer,
+ (bp->bp_ptp_link ? BSTP_DEFAULT_MIGRATE_DELAY :
+ bp->bp_desg_max_age));
+ DPRINTF("%s -> DESIGNATED_PROPOSE\n",
+ bp->bp_ifp->if_xname);
+ }
+
+ if (bp->bp_state != BSTP_IFSTATE_FORWARDING &&
+ (bp->bp_forward_delay_timer.active == 0 || bp->bp_agreed ||
+ bp->bp_operedge) &&
+ (bp->bp_recent_root_timer.active == 0 || !bp->bp_reroot) &&
+ !bp->bp_sync) {
+ if (bp->bp_agreed)
+ DPRINTF("%s -> AGREED\n", bp->bp_ifp->if_xname);
+ /*
+ * If agreed|operedge then go straight to forwarding,
+ * otherwise follow discard -> learn -> forward.
+ */
+ if (bp->bp_agreed || bp->bp_operedge ||
+ bp->bp_state == BSTP_IFSTATE_LEARNING) {
+ bstp_set_port_state(bp,
+ BSTP_IFSTATE_FORWARDING);
+ bp->bp_agreed = bp->bp_protover;
+ } else if (bp->bp_state == BSTP_IFSTATE_DISCARDING)
+ bstp_set_port_state(bp, BSTP_IFSTATE_LEARNING);
+ }
+
+ if (((bp->bp_sync && !bp->bp_synced) ||
+ (bp->bp_reroot && bp->bp_recent_root_timer.active) ||
+ (bp->bp_flags & BSTP_PORT_DISPUTED)) && !bp->bp_operedge &&
+ bp->bp_state != BSTP_IFSTATE_DISCARDING) {
+ bstp_set_port_state(bp, BSTP_IFSTATE_DISCARDING);
+ bp->bp_flags &= ~BSTP_PORT_DISPUTED;
+ bstp_timer_start(&bp->bp_forward_delay_timer,
+ bp->bp_protover == BSTP_PROTO_RSTP ?
+ bp->bp_desg_htime : bp->bp_desg_fdelay);
+ DPRINTF("%s -> DESIGNATED_DISCARD\n",
+ bp->bp_ifp->if_xname);
+ }
+ break;
+ }
+
+ if (bp->bp_flags & BSTP_PORT_NEWINFO)
+ bstp_transmit(bs, bp);
+}
+
+static void
+bstp_update_tc(struct bstp_port *bp)
+{
+ switch (bp->bp_tcstate) {
+ case BSTP_TCSTATE_ACTIVE:
+ if ((bp->bp_role != BSTP_ROLE_DESIGNATED &&
+ bp->bp_role != BSTP_ROLE_ROOT) || bp->bp_operedge)
+ bstp_set_port_tc(bp, BSTP_TCSTATE_LEARNING);
+
+ if (bp->bp_rcvdtcn)
+ bstp_set_port_tc(bp, BSTP_TCSTATE_TCN);
+ if (bp->bp_rcvdtc)
+ bstp_set_port_tc(bp, BSTP_TCSTATE_TC);
+
+ if (bp->bp_tc_prop && !bp->bp_operedge)
+ bstp_set_port_tc(bp, BSTP_TCSTATE_PROPAG);
+
+ if (bp->bp_rcvdtca)
+ bstp_set_port_tc(bp, BSTP_TCSTATE_ACK);
+ break;
+
+ case BSTP_TCSTATE_INACTIVE:
+ if ((bp->bp_state == BSTP_IFSTATE_LEARNING ||
+ bp->bp_state == BSTP_IFSTATE_FORWARDING) &&
+ bp->bp_fdbflush == 0)
+ bstp_set_port_tc(bp, BSTP_TCSTATE_LEARNING);
+ break;
+
+ case BSTP_TCSTATE_LEARNING:
+ if (bp->bp_rcvdtc || bp->bp_rcvdtcn || bp->bp_rcvdtca ||
+ bp->bp_tc_prop)
+ bstp_set_port_tc(bp, BSTP_TCSTATE_LEARNING);
+ else if (bp->bp_role != BSTP_ROLE_DESIGNATED &&
+ bp->bp_role != BSTP_ROLE_ROOT &&
+ bp->bp_state == BSTP_IFSTATE_DISCARDING)
+ bstp_set_port_tc(bp, BSTP_TCSTATE_INACTIVE);
+
+ if ((bp->bp_role == BSTP_ROLE_DESIGNATED ||
+ bp->bp_role == BSTP_ROLE_ROOT) &&
+ bp->bp_state == BSTP_IFSTATE_FORWARDING &&
+ !bp->bp_operedge)
+ bstp_set_port_tc(bp, BSTP_TCSTATE_DETECTED);
+ break;
+
+ /* these are transient states and go straight back to ACTIVE */
+ case BSTP_TCSTATE_DETECTED:
+ case BSTP_TCSTATE_TCN:
+ case BSTP_TCSTATE_TC:
+ case BSTP_TCSTATE_PROPAG:
+ case BSTP_TCSTATE_ACK:
+ DPRINTF("Invalid TC state for %s\n",
+ bp->bp_ifp->if_xname);
+ break;
+ }
+
+}
+
+static void
+bstp_update_info(struct bstp_port *bp)
+{
+ struct bstp_state *bs = bp->bp_bs;
+
+ bp->bp_proposing = 0;
+ bp->bp_proposed = 0;
+
+ if (bp->bp_agreed && !bstp_pdu_bettersame(bp, BSTP_INFO_MINE))
+ bp->bp_agreed = 0;
+
+ if (bp->bp_synced && !bp->bp_agreed) {
+ bp->bp_synced = 0;
+ bs->bs_allsynced = 0;
+ }
+
+ /* copy the designated pv to the port */
+ bp->bp_port_pv = bp->bp_desg_pv;
+ bp->bp_port_msg_age = bp->bp_desg_msg_age;
+ bp->bp_port_max_age = bp->bp_desg_max_age;
+ bp->bp_port_fdelay = bp->bp_desg_fdelay;
+ bp->bp_port_htime = bp->bp_desg_htime;
+ bp->bp_infois = BSTP_INFO_MINE;
+
+ /* Set transmit flag but do not immediately send */
+ bp->bp_flags |= BSTP_PORT_NEWINFO;
+}
+
+/* set tcprop on every port other than the caller */
+static void
+bstp_set_other_tcprop(struct bstp_port *bp)
+{
+ struct bstp_state *bs = bp->bp_bs;
+ struct bstp_port *bp2;
+
+ BSTP_LOCK_ASSERT(bs);
+
+ LIST_FOREACH(bp2, &bs->bs_bplist, bp_next) {
+ if (bp2 == bp)
+ continue;
+ bp2->bp_tc_prop = 1;
+ }
+}
+
+static void
+bstp_set_all_reroot(struct bstp_state *bs)
+{
+ struct bstp_port *bp;
+
+ BSTP_LOCK_ASSERT(bs);
+
+ LIST_FOREACH(bp, &bs->bs_bplist, bp_next)
+ bp->bp_reroot = 1;
+}
+
+static void
+bstp_set_all_sync(struct bstp_state *bs)
+{
+ struct bstp_port *bp;
+
+ BSTP_LOCK_ASSERT(bs);
+
+ LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
+ bp->bp_sync = 1;
+ bp->bp_synced = 0; /* Not explicit in spec */
+ }
+
+ bs->bs_allsynced = 0;
+}
+
+static void
+bstp_set_port_state(struct bstp_port *bp, int state)
+{
+ if (bp->bp_state == state)
+ return;
+
+ bp->bp_state = state;
+
+ switch (bp->bp_state) {
+ case BSTP_IFSTATE_DISCARDING:
+ DPRINTF("state changed to DISCARDING on %s\n",
+ bp->bp_ifp->if_xname);
+ break;
+
+ case BSTP_IFSTATE_LEARNING:
+ DPRINTF("state changed to LEARNING on %s\n",
+ bp->bp_ifp->if_xname);
+
+ bstp_timer_start(&bp->bp_forward_delay_timer,
+ bp->bp_protover == BSTP_PROTO_RSTP ?
+ bp->bp_desg_htime : bp->bp_desg_fdelay);
+ break;
+
+ case BSTP_IFSTATE_FORWARDING:
+ DPRINTF("state changed to FORWARDING on %s\n",
+ bp->bp_ifp->if_xname);
+
+ bstp_timer_stop(&bp->bp_forward_delay_timer);
+ /* Record that we enabled forwarding */
+ bp->bp_forward_transitions++;
+ break;
+ }
+
+ /* notify the parent bridge */
+ taskqueue_enqueue(taskqueue_swi, &bp->bp_statetask);
+}
+
+static void
+bstp_set_port_role(struct bstp_port *bp, int role)
+{
+ struct bstp_state *bs = bp->bp_bs;
+
+ if (bp->bp_role == role)
+ return;
+
+ /* perform pre-change tasks */
+ switch (bp->bp_role) {
+ case BSTP_ROLE_DISABLED:
+ bstp_timer_start(&bp->bp_forward_delay_timer,
+ bp->bp_desg_max_age);
+ break;
+
+ case BSTP_ROLE_BACKUP:
+ bstp_timer_start(&bp->bp_recent_backup_timer,
+ bp->bp_desg_htime * 2);
+ /* fall through */
+ case BSTP_ROLE_ALTERNATE:
+ bstp_timer_start(&bp->bp_forward_delay_timer,
+ bp->bp_desg_fdelay);
+ bp->bp_sync = 0;
+ bp->bp_synced = 1;
+ bp->bp_reroot = 0;
+ break;
+
+ case BSTP_ROLE_ROOT:
+ bstp_timer_start(&bp->bp_recent_root_timer,
+ BSTP_DEFAULT_FORWARD_DELAY);
+ break;
+ }
+
+ bp->bp_role = role;
+ /* clear values not carried between roles */
+ bp->bp_proposing = 0;
+ bs->bs_allsynced = 0;
+
+ /* initialise the new role */
+ switch (bp->bp_role) {
+ case BSTP_ROLE_DISABLED:
+ case BSTP_ROLE_ALTERNATE:
+ case BSTP_ROLE_BACKUP:
+ DPRINTF("%s role -> ALT/BACK/DISABLED\n",
+ bp->bp_ifp->if_xname);
+ bstp_set_port_state(bp, BSTP_IFSTATE_DISCARDING);
+ bstp_timer_stop(&bp->bp_recent_root_timer);
+ bstp_timer_latch(&bp->bp_forward_delay_timer);
+ bp->bp_sync = 0;
+ bp->bp_synced = 1;
+ bp->bp_reroot = 0;
+ break;
+
+ case BSTP_ROLE_ROOT:
+ DPRINTF("%s role -> ROOT\n",
+ bp->bp_ifp->if_xname);
+ bstp_set_port_state(bp, BSTP_IFSTATE_DISCARDING);
+ bstp_timer_latch(&bp->bp_recent_root_timer);
+ bp->bp_proposing = 0;
+ break;
+
+ case BSTP_ROLE_DESIGNATED:
+ DPRINTF("%s role -> DESIGNATED\n",
+ bp->bp_ifp->if_xname);
+ bstp_timer_start(&bp->bp_hello_timer,
+ bp->bp_desg_htime);
+ bp->bp_agree = 0;
+ break;
+ }
+
+ /* let the TC state know that the role changed */
+ bstp_update_tc(bp);
+}
+
+static void
+bstp_set_port_proto(struct bstp_port *bp, int proto)
+{
+ struct bstp_state *bs = bp->bp_bs;
+
+ /* supported protocol versions */
+ switch (proto) {
+ case BSTP_PROTO_STP:
+ /* we can downgrade protocols only */
+ bstp_timer_stop(&bp->bp_migrate_delay_timer);
+ /* clear unsupported features */
+ bp->bp_operedge = 0;
+ /* STP compat mode only uses 16 bits of the 32 */
+ if (bp->bp_path_cost > 65535)
+ bp->bp_path_cost = 65535;
+ break;
+
+ case BSTP_PROTO_RSTP:
+ bstp_timer_start(&bp->bp_migrate_delay_timer,
+ bs->bs_migration_delay);
+ break;
+
+ default:
+ DPRINTF("Unsupported STP version %d\n", proto);
+ return;
+ }
+
+ bp->bp_protover = proto;
+ bp->bp_flags &= ~BSTP_PORT_CANMIGRATE;
+}
+
+static void
+bstp_set_port_tc(struct bstp_port *bp, int state)
+{
+ struct bstp_state *bs = bp->bp_bs;
+
+ bp->bp_tcstate = state;
+
+ /* initialise the new state */
+ switch (bp->bp_tcstate) {
+ case BSTP_TCSTATE_ACTIVE:
+ DPRINTF("%s -> TC_ACTIVE\n", bp->bp_ifp->if_xname);
+ /* nothing to do */
+ break;
+
+ case BSTP_TCSTATE_INACTIVE:
+ bstp_timer_stop(&bp->bp_tc_timer);
+ /* flush routes on the parent bridge */
+ bp->bp_fdbflush = 1;
+ taskqueue_enqueue(taskqueue_swi, &bp->bp_rtagetask);
+ bp->bp_tc_ack = 0;
+ DPRINTF("%s -> TC_INACTIVE\n", bp->bp_ifp->if_xname);
+ break;
+
+ case BSTP_TCSTATE_LEARNING:
+ bp->bp_rcvdtc = 0;
+ bp->bp_rcvdtcn = 0;
+ bp->bp_rcvdtca = 0;
+ bp->bp_tc_prop = 0;
+ DPRINTF("%s -> TC_LEARNING\n", bp->bp_ifp->if_xname);
+ break;
+
+ case BSTP_TCSTATE_DETECTED:
+ bstp_set_timer_tc(bp);
+ bstp_set_other_tcprop(bp);
+ /* send out notification */
+ bp->bp_flags |= BSTP_PORT_NEWINFO;
+ bstp_transmit(bs, bp);
+ getmicrotime(&bs->bs_last_tc_time);
+ DPRINTF("%s -> TC_DETECTED\n", bp->bp_ifp->if_xname);
+ bp->bp_tcstate = BSTP_TCSTATE_ACTIVE; /* UCT */
+ break;
+
+ case BSTP_TCSTATE_TCN:
+ bstp_set_timer_tc(bp);
+ DPRINTF("%s -> TC_TCN\n", bp->bp_ifp->if_xname);
+ /* fall through */
+ case BSTP_TCSTATE_TC:
+ bp->bp_rcvdtc = 0;
+ bp->bp_rcvdtcn = 0;
+ if (bp->bp_role == BSTP_ROLE_DESIGNATED)
+ bp->bp_tc_ack = 1;
+
+ bstp_set_other_tcprop(bp);
+ DPRINTF("%s -> TC_TC\n", bp->bp_ifp->if_xname);
+ bp->bp_tcstate = BSTP_TCSTATE_ACTIVE; /* UCT */
+ break;
+
+ case BSTP_TCSTATE_PROPAG:
+ /* flush routes on the parent bridge */
+ bp->bp_fdbflush = 1;
+ taskqueue_enqueue(taskqueue_swi, &bp->bp_rtagetask);
+ bp->bp_tc_prop = 0;
+ bstp_set_timer_tc(bp);
+ DPRINTF("%s -> TC_PROPAG\n", bp->bp_ifp->if_xname);
+ bp->bp_tcstate = BSTP_TCSTATE_ACTIVE; /* UCT */
+ break;
+
+ case BSTP_TCSTATE_ACK:
+ bstp_timer_stop(&bp->bp_tc_timer);
+ bp->bp_rcvdtca = 0;
+ DPRINTF("%s -> TC_ACK\n", bp->bp_ifp->if_xname);
+ bp->bp_tcstate = BSTP_TCSTATE_ACTIVE; /* UCT */
+ break;
+ }
+}
+
+static void
+bstp_set_timer_tc(struct bstp_port *bp)
+{
+ struct bstp_state *bs = bp->bp_bs;
+
+ if (bp->bp_tc_timer.active)
+ return;
+
+ switch (bp->bp_protover) {
+ case BSTP_PROTO_RSTP:
+ bstp_timer_start(&bp->bp_tc_timer,
+ bp->bp_desg_htime + BSTP_TICK_VAL);
+ bp->bp_flags |= BSTP_PORT_NEWINFO;
+ break;
+
+ case BSTP_PROTO_STP:
+ bstp_timer_start(&bp->bp_tc_timer,
+ bs->bs_root_max_age + bs->bs_root_fdelay);
+ break;
+ }
+}
+
+static void
+bstp_set_timer_msgage(struct bstp_port *bp)
+{
+ if (bp->bp_port_msg_age + BSTP_MESSAGE_AGE_INCR <=
+ bp->bp_port_max_age) {
+ bstp_timer_start(&bp->bp_message_age_timer,
+ bp->bp_port_htime * 3);
+ } else
+ /* expires immediately */
+ bstp_timer_start(&bp->bp_message_age_timer, 0);
+}
+
+static int
+bstp_rerooted(struct bstp_state *bs, struct bstp_port *bp)
+{
+ struct bstp_port *bp2;
+ int rr_set = 0;
+
+ LIST_FOREACH(bp2, &bs->bs_bplist, bp_next) {
+ if (bp2 == bp)
+ continue;
+ if (bp2->bp_recent_root_timer.active) {
+ rr_set = 1;
+ break;
+ }
+ }
+ return (!rr_set);
+}
+
+int
+bstp_set_htime(struct bstp_state *bs, int t)
+{
+ /* convert seconds to ticks */
+ t *= BSTP_TICK_VAL;
+
+ /* value can only be changed in leagacy stp mode */
+ if (bs->bs_protover != BSTP_PROTO_STP)
+ return (EPERM);
+
+ if (t < BSTP_MIN_HELLO_TIME || t > BSTP_MAX_HELLO_TIME)
+ return (EINVAL);
+
+ BSTP_LOCK(bs);
+ bs->bs_bridge_htime = t;
+ bstp_reinit(bs);
+ BSTP_UNLOCK(bs);
+ return (0);
+}
+
+int
+bstp_set_fdelay(struct bstp_state *bs, int t)
+{
+ /* convert seconds to ticks */
+ t *= BSTP_TICK_VAL;
+
+ if (t < BSTP_MIN_FORWARD_DELAY || t > BSTP_MAX_FORWARD_DELAY)
+ return (EINVAL);
+
+ BSTP_LOCK(bs);
+ bs->bs_bridge_fdelay = t;
+ bstp_reinit(bs);
+ BSTP_UNLOCK(bs);
+ return (0);
+}
+
+int
+bstp_set_maxage(struct bstp_state *bs, int t)
+{
+ /* convert seconds to ticks */
+ t *= BSTP_TICK_VAL;
+
+ if (t < BSTP_MIN_MAX_AGE || t > BSTP_MAX_MAX_AGE)
+ return (EINVAL);
+
+ BSTP_LOCK(bs);
+ bs->bs_bridge_max_age = t;
+ bstp_reinit(bs);
+ BSTP_UNLOCK(bs);
+ return (0);
+}
+
+int
+bstp_set_holdcount(struct bstp_state *bs, int count)
+{
+ struct bstp_port *bp;
+
+ if (count < BSTP_MIN_HOLD_COUNT ||
+ count > BSTP_MAX_HOLD_COUNT)
+ return (EINVAL);
+
+ BSTP_LOCK(bs);
+ bs->bs_txholdcount = count;
+ LIST_FOREACH(bp, &bs->bs_bplist, bp_next)
+ bp->bp_txcount = 0;
+ BSTP_UNLOCK(bs);
+ return (0);
+}
+
+int
+bstp_set_protocol(struct bstp_state *bs, int proto)
+{
+ struct bstp_port *bp;
+
+ switch (proto) {
+ /* Supported protocol versions */
+ case BSTP_PROTO_STP:
+ case BSTP_PROTO_RSTP:
+ break;
+
+ default:
+ return (EINVAL);
+ }
+
+ BSTP_LOCK(bs);
+ bs->bs_protover = proto;
+ bs->bs_bridge_htime = BSTP_DEFAULT_HELLO_TIME;
+ LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
+ /* reinit state */
+ bp->bp_infois = BSTP_INFO_DISABLED;
+ bp->bp_txcount = 0;
+ bstp_set_port_proto(bp, bs->bs_protover);
+ bstp_set_port_role(bp, BSTP_ROLE_DISABLED);
+ bstp_set_port_tc(bp, BSTP_TCSTATE_INACTIVE);
+ bstp_timer_stop(&bp->bp_recent_backup_timer);
+ }
+ bstp_reinit(bs);
+ BSTP_UNLOCK(bs);
+ return (0);
+}
+
+int
+bstp_set_priority(struct bstp_state *bs, int pri)
+{
+ if (pri < 0 || pri > BSTP_MAX_PRIORITY)
+ return (EINVAL);
+
+ /* Limit to steps of 4096 */
+ pri -= pri % 4096;
+
+ BSTP_LOCK(bs);
+ bs->bs_bridge_priority = pri;
+ bstp_reinit(bs);
+ BSTP_UNLOCK(bs);
+ return (0);
+}
+
+int
+bstp_set_port_priority(struct bstp_port *bp, int pri)
+{
+ struct bstp_state *bs = bp->bp_bs;
+
+ if (pri < 0 || pri > BSTP_MAX_PORT_PRIORITY)
+ return (EINVAL);
+
+ /* Limit to steps of 16 */
+ pri -= pri % 16;
+
+ BSTP_LOCK(bs);
+ bp->bp_priority = pri;
+ bstp_reinit(bs);
+ BSTP_UNLOCK(bs);
+ return (0);
+}
+
+int
+bstp_set_path_cost(struct bstp_port *bp, uint32_t path_cost)
+{
+ struct bstp_state *bs = bp->bp_bs;
+
+ if (path_cost > BSTP_MAX_PATH_COST)
+ return (EINVAL);
+
+ /* STP compat mode only uses 16 bits of the 32 */
+ if (bp->bp_protover == BSTP_PROTO_STP && path_cost > 65535)
+ path_cost = 65535;
+
+ BSTP_LOCK(bs);
+
+ if (path_cost == 0) { /* use auto */
+ bp->bp_flags &= ~BSTP_PORT_ADMCOST;
+ bp->bp_path_cost = bstp_calc_path_cost(bp);
+ } else {
+ bp->bp_path_cost = path_cost;
+ bp->bp_flags |= BSTP_PORT_ADMCOST;
+ }
+ bstp_reinit(bs);
+ BSTP_UNLOCK(bs);
+ return (0);
+}
+
+int
+bstp_set_edge(struct bstp_port *bp, int set)
+{
+ struct bstp_state *bs = bp->bp_bs;
+
+ BSTP_LOCK(bs);
+ if ((bp->bp_operedge = set) == 0)
+ bp->bp_flags &= ~BSTP_PORT_ADMEDGE;
+ else
+ bp->bp_flags |= BSTP_PORT_ADMEDGE;
+ BSTP_UNLOCK(bs);
+ return (0);
+}
+
+int
+bstp_set_autoedge(struct bstp_port *bp, int set)
+{
+ struct bstp_state *bs = bp->bp_bs;
+
+ BSTP_LOCK(bs);
+ if (set) {
+ bp->bp_flags |= BSTP_PORT_AUTOEDGE;
+ /* we may be able to transition straight to edge */
+ if (bp->bp_edge_delay_timer.active == 0)
+ bstp_edge_delay_expiry(bs, bp);
+ } else
+ bp->bp_flags &= ~BSTP_PORT_AUTOEDGE;
+ BSTP_UNLOCK(bs);
+ return (0);
+}
+
+int
+bstp_set_ptp(struct bstp_port *bp, int set)
+{
+ struct bstp_state *bs = bp->bp_bs;
+
+ BSTP_LOCK(bs);
+ bp->bp_ptp_link = set;
+ BSTP_UNLOCK(bs);
+ return (0);
+}
+
+int
+bstp_set_autoptp(struct bstp_port *bp, int set)
+{
+ struct bstp_state *bs = bp->bp_bs;
+
+ BSTP_LOCK(bs);
+ if (set) {
+ bp->bp_flags |= BSTP_PORT_AUTOPTP;
+ if (bp->bp_role != BSTP_ROLE_DISABLED)
+ bstp_ifupdstatus(bs, bp);
+ } else
+ bp->bp_flags &= ~BSTP_PORT_AUTOPTP;
+ BSTP_UNLOCK(bs);
+ return (0);
+}
+
+/*
+ * Calculate the path cost according to the link speed.
+ */
+static uint32_t
+bstp_calc_path_cost(struct bstp_port *bp)
+{
+ struct ifnet *ifp = bp->bp_ifp;
+ uint32_t path_cost;
+
+ /* If the priority has been manually set then retain the value */
+ if (bp->bp_flags & BSTP_PORT_ADMCOST)
+ return bp->bp_path_cost;
+
+ if (ifp->if_link_state == LINK_STATE_DOWN) {
+ /* Recalc when the link comes up again */
+ bp->bp_flags |= BSTP_PORT_PNDCOST;
+ return (BSTP_DEFAULT_PATH_COST);
+ }
+
+ if (ifp->if_baudrate < 1000)
+ return (BSTP_DEFAULT_PATH_COST);
+
+ /* formula from section 17.14, IEEE Std 802.1D-2004 */
+ path_cost = 20000000000ULL / (ifp->if_baudrate / 1000);
+
+ if (path_cost > BSTP_MAX_PATH_COST)
+ path_cost = BSTP_MAX_PATH_COST;
+
+ /* STP compat mode only uses 16 bits of the 32 */
+ if (bp->bp_protover == BSTP_PROTO_STP && path_cost > 65535)
+ path_cost = 65535;
+
+ return (path_cost);
+}
+
+/*
+ * Notify the bridge that a port state has changed, we need to do this from a
+ * taskqueue to avoid a LOR.
+ */
+static void
+bstp_notify_state(void *arg, int pending)
+{
+ struct bstp_port *bp = (struct bstp_port *)arg;
+ struct bstp_state *bs = bp->bp_bs;
+
+ if (bp->bp_active == 1 && bs->bs_state_cb != NULL)
+ (*bs->bs_state_cb)(bp->bp_ifp, bp->bp_state);
+}
+
+/*
+ * Flush the routes on the bridge port, we need to do this from a
+ * taskqueue to avoid a LOR.
+ */
+static void
+bstp_notify_rtage(void *arg, int pending)
+{
+ struct bstp_port *bp = (struct bstp_port *)arg;
+ struct bstp_state *bs = bp->bp_bs;
+ int age = 0;
+
+ BSTP_LOCK(bs);
+ switch (bp->bp_protover) {
+ case BSTP_PROTO_STP:
+ /* convert to seconds */
+ age = bp->bp_desg_fdelay / BSTP_TICK_VAL;
+ break;
+
+ case BSTP_PROTO_RSTP:
+ age = 0;
+ break;
+ }
+ BSTP_UNLOCK(bs);
+
+ if (bp->bp_active == 1 && bs->bs_rtage_cb != NULL)
+ (*bs->bs_rtage_cb)(bp->bp_ifp, age);
+
+ /* flush is complete */
+ BSTP_LOCK(bs);
+ bp->bp_fdbflush = 0;
+ BSTP_UNLOCK(bs);
+}
+
+void
+bstp_linkstate(struct ifnet *ifp, int state)
+{
+ struct bstp_state *bs;
+ struct bstp_port *bp;
+
+ /* search for the stp port */
+ mtx_lock(&bstp_list_mtx);
+ LIST_FOREACH(bs, &bstp_list, bs_list) {
+ BSTP_LOCK(bs);
+ LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
+ if (bp->bp_ifp == ifp) {
+ bstp_ifupdstatus(bs, bp);
+ bstp_update_state(bs, bp);
+ /* it only exists once so return */
+ BSTP_UNLOCK(bs);
+ mtx_unlock(&bstp_list_mtx);
+ return;
+ }
+ }
+ BSTP_UNLOCK(bs);
+ }
+ mtx_unlock(&bstp_list_mtx);
+}
+
+static void
+bstp_ifupdstatus(struct bstp_state *bs, struct bstp_port *bp)
+{
+ struct ifnet *ifp = bp->bp_ifp;
+ struct ifmediareq ifmr;
+ int error = 0;
+
+ BSTP_LOCK_ASSERT(bs);
+
+ bzero((char *)&ifmr, sizeof(ifmr));
+ error = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (caddr_t)&ifmr);
+
+ if ((error == 0) && (ifp->if_flags & IFF_UP)) {
+ if (ifmr.ifm_status & IFM_ACTIVE) {
+ /* A full-duplex link is assumed to be point to point */
+ if (bp->bp_flags & BSTP_PORT_AUTOPTP) {
+ bp->bp_ptp_link =
+ ifmr.ifm_active & IFM_FDX ? 1 : 0;
+ }
+
+ /* Calc the cost if the link was down previously */
+ if (bp->bp_flags & BSTP_PORT_PNDCOST) {
+ bp->bp_path_cost = bstp_calc_path_cost(bp);
+ bp->bp_flags &= ~BSTP_PORT_PNDCOST;
+ }
+
+ if (bp->bp_role == BSTP_ROLE_DISABLED)
+ bstp_enable_port(bs, bp);
+ } else {
+ if (bp->bp_role != BSTP_ROLE_DISABLED) {
+ bstp_disable_port(bs, bp);
+ if ((bp->bp_flags & BSTP_PORT_ADMEDGE) &&
+ bp->bp_protover == BSTP_PROTO_RSTP)
+ bp->bp_operedge = 1;
+ }
+ }
+ return;
+ }
+
+ if (bp->bp_infois != BSTP_INFO_DISABLED)
+ bstp_disable_port(bs, bp);
+}
+
+static void
+bstp_enable_port(struct bstp_state *bs, struct bstp_port *bp)
+{
+ bp->bp_infois = BSTP_INFO_AGED;
+ bstp_assign_roles(bs);
+}
+
+static void
+bstp_disable_port(struct bstp_state *bs, struct bstp_port *bp)
+{
+ bp->bp_infois = BSTP_INFO_DISABLED;
+ bstp_assign_roles(bs);
+}
+
+static void
+bstp_tick(void *arg)
+{
+ struct bstp_state *bs = arg;
+ struct bstp_port *bp;
+
+ BSTP_LOCK_ASSERT(bs);
+
+ if (bs->bs_running == 0)
+ return;
+
+ /* slow timer to catch missed link events */
+ if (bstp_timer_expired(&bs->bs_link_timer)) {
+ LIST_FOREACH(bp, &bs->bs_bplist, bp_next)
+ bstp_ifupdstatus(bs, bp);
+ bstp_timer_start(&bs->bs_link_timer, BSTP_LINK_TIMER);
+ }
+
+ LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
+ /* no events need to happen for these */
+ bstp_timer_expired(&bp->bp_tc_timer);
+ bstp_timer_expired(&bp->bp_recent_root_timer);
+ bstp_timer_expired(&bp->bp_forward_delay_timer);
+ bstp_timer_expired(&bp->bp_recent_backup_timer);
+
+ if (bstp_timer_expired(&bp->bp_hello_timer))
+ bstp_hello_timer_expiry(bs, bp);
+
+ if (bstp_timer_expired(&bp->bp_message_age_timer))
+ bstp_message_age_expiry(bs, bp);
+
+ if (bstp_timer_expired(&bp->bp_migrate_delay_timer))
+ bstp_migrate_delay_expiry(bs, bp);
+
+ if (bstp_timer_expired(&bp->bp_edge_delay_timer))
+ bstp_edge_delay_expiry(bs, bp);
+
+ /* update the various state machines for the port */
+ bstp_update_state(bs, bp);
+
+ if (bp->bp_txcount > 0)
+ bp->bp_txcount--;
+ }
+
+ callout_reset(&bs->bs_bstpcallout, hz, bstp_tick, bs);
+}
+
+static void
+bstp_timer_start(struct bstp_timer *t, uint16_t v)
+{
+ t->value = v;
+ t->active = 1;
+ t->latched = 0;
+}
+
+static void
+bstp_timer_stop(struct bstp_timer *t)
+{
+ t->value = 0;
+ t->active = 0;
+ t->latched = 0;
+}
+
+static void
+bstp_timer_latch(struct bstp_timer *t)
+{
+ t->latched = 1;
+ t->active = 1;
+}
+
+static int
+bstp_timer_expired(struct bstp_timer *t)
+{
+ if (t->active == 0 || t->latched)
+ return (0);
+ t->value -= BSTP_TICK_VAL;
+ if (t->value <= 0) {
+ bstp_timer_stop(t);
+ return (1);
+ }
+ return (0);
+}
+
+static void
+bstp_hello_timer_expiry(struct bstp_state *bs, struct bstp_port *bp)
+{
+ if ((bp->bp_flags & BSTP_PORT_NEWINFO) ||
+ bp->bp_role == BSTP_ROLE_DESIGNATED ||
+ (bp->bp_role == BSTP_ROLE_ROOT &&
+ bp->bp_tc_timer.active == 1)) {
+ bstp_timer_start(&bp->bp_hello_timer, bp->bp_desg_htime);
+ bp->bp_flags |= BSTP_PORT_NEWINFO;
+ bstp_transmit(bs, bp);
+ }
+}
+
+static void
+bstp_message_age_expiry(struct bstp_state *bs, struct bstp_port *bp)
+{
+ if (bp->bp_infois == BSTP_INFO_RECEIVED) {
+ bp->bp_infois = BSTP_INFO_AGED;
+ bstp_assign_roles(bs);
+ DPRINTF("aged info on %s\n", bp->bp_ifp->if_xname);
+ }
+}
+
+static void
+bstp_migrate_delay_expiry(struct bstp_state *bs, struct bstp_port *bp)
+{
+ bp->bp_flags |= BSTP_PORT_CANMIGRATE;
+}
+
+static void
+bstp_edge_delay_expiry(struct bstp_state *bs, struct bstp_port *bp)
+{
+ if ((bp->bp_flags & BSTP_PORT_AUTOEDGE) &&
+ bp->bp_protover == BSTP_PROTO_RSTP && bp->bp_proposing &&
+ bp->bp_role == BSTP_ROLE_DESIGNATED) {
+ bp->bp_operedge = 1;
+ DPRINTF("%s -> edge port\n", bp->bp_ifp->if_xname);
+ }
+}
+
+static int
+bstp_addr_cmp(const uint8_t *a, const uint8_t *b)
+{
+ int i, d;
+
+ for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
+ d = ((int)a[i]) - ((int)b[i]);
+ }
+
+ return (d);
+}
+
+/*
+ * compare the bridge address component of the bridgeid
+ */
+static int
+bstp_same_bridgeid(uint64_t id1, uint64_t id2)
+{
+ u_char addr1[ETHER_ADDR_LEN];
+ u_char addr2[ETHER_ADDR_LEN];
+
+ PV2ADDR(id1, addr1);
+ PV2ADDR(id2, addr2);
+
+ if (bstp_addr_cmp(addr1, addr2) == 0)
+ return (1);
+
+ return (0);
+}
+
+void
+bstp_reinit(struct bstp_state *bs)
+{
+ struct bstp_port *bp;
+ struct ifnet *ifp, *mif;
+ u_char *e_addr;
+ static const u_char llzero[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */
+
+ BSTP_LOCK_ASSERT(bs);
+
+ mif = NULL;
+ /*
+ * Search through the Ethernet adapters and find the one with the
+ * lowest value. The adapter which we take the MAC address from does
+ * not need to be part of the bridge, it just needs to be a unique
+ * value.
+ */
+ IFNET_RLOCK_NOSLEEP();
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ if (ifp->if_type != IFT_ETHER)
+ continue;
+
+ if (bstp_addr_cmp(IF_LLADDR(ifp), llzero) == 0)
+ continue;
+
+ if (mif == NULL) {
+ mif = ifp;
+ continue;
+ }
+ if (bstp_addr_cmp(IF_LLADDR(ifp), IF_LLADDR(mif)) < 0) {
+ mif = ifp;
+ continue;
+ }
+ }
+ IFNET_RUNLOCK_NOSLEEP();
+
+ if (LIST_EMPTY(&bs->bs_bplist) || mif == NULL) {
+ /* Set the bridge and root id (lower bits) to zero */
+ bs->bs_bridge_pv.pv_dbridge_id =
+ ((uint64_t)bs->bs_bridge_priority) << 48;
+ bs->bs_bridge_pv.pv_root_id = bs->bs_bridge_pv.pv_dbridge_id;
+ bs->bs_root_pv = bs->bs_bridge_pv;
+ /* Disable any remaining ports, they will have no MAC address */
+ LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
+ bp->bp_infois = BSTP_INFO_DISABLED;
+ bstp_set_port_role(bp, BSTP_ROLE_DISABLED);
+ }
+ callout_stop(&bs->bs_bstpcallout);
+ return;
+ }
+
+ e_addr = IF_LLADDR(mif);
+ bs->bs_bridge_pv.pv_dbridge_id =
+ (((uint64_t)bs->bs_bridge_priority) << 48) |
+ (((uint64_t)e_addr[0]) << 40) |
+ (((uint64_t)e_addr[1]) << 32) |
+ (((uint64_t)e_addr[2]) << 24) |
+ (((uint64_t)e_addr[3]) << 16) |
+ (((uint64_t)e_addr[4]) << 8) |
+ (((uint64_t)e_addr[5]));
+
+ bs->bs_bridge_pv.pv_root_id = bs->bs_bridge_pv.pv_dbridge_id;
+ bs->bs_bridge_pv.pv_cost = 0;
+ bs->bs_bridge_pv.pv_dport_id = 0;
+ bs->bs_bridge_pv.pv_port_id = 0;
+
+ if (bs->bs_running && callout_pending(&bs->bs_bstpcallout) == 0)
+ callout_reset(&bs->bs_bstpcallout, hz, bstp_tick, bs);
+
+ LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
+ bp->bp_port_id = (bp->bp_priority << 8) |
+ (bp->bp_ifp->if_index & 0xfff);
+ bstp_ifupdstatus(bs, bp);
+ }
+
+ bstp_assign_roles(bs);
+ bstp_timer_start(&bs->bs_link_timer, BSTP_LINK_TIMER);
+}
+
+static int
+bstp_modevent(module_t mod, int type, void *data)
+{
+ switch (type) {
+ case MOD_LOAD:
+ mtx_init(&bstp_list_mtx, "bridgestp list", NULL, MTX_DEF);
+ LIST_INIT(&bstp_list);
+ bstp_linkstate_p = bstp_linkstate;
+ break;
+ case MOD_UNLOAD:
+ bstp_linkstate_p = NULL;
+ mtx_destroy(&bstp_list_mtx);
+ break;
+ default:
+ return (EOPNOTSUPP);
+ }
+ return (0);
+}
+
+static moduledata_t bstp_mod = {
+ "bridgestp",
+ bstp_modevent,
+ 0
+};
+
+DECLARE_MODULE(bridgestp, bstp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(bridgestp, 1);
+
+void
+bstp_attach(struct bstp_state *bs, struct bstp_cb_ops *cb)
+{
+ BSTP_LOCK_INIT(bs);
+ callout_init_mtx(&bs->bs_bstpcallout, &bs->bs_mtx, 0);
+ LIST_INIT(&bs->bs_bplist);
+
+ bs->bs_bridge_max_age = BSTP_DEFAULT_MAX_AGE;
+ bs->bs_bridge_htime = BSTP_DEFAULT_HELLO_TIME;
+ bs->bs_bridge_fdelay = BSTP_DEFAULT_FORWARD_DELAY;
+ bs->bs_bridge_priority = BSTP_DEFAULT_BRIDGE_PRIORITY;
+ bs->bs_hold_time = BSTP_DEFAULT_HOLD_TIME;
+ bs->bs_migration_delay = BSTP_DEFAULT_MIGRATE_DELAY;
+ bs->bs_txholdcount = BSTP_DEFAULT_HOLD_COUNT;
+ bs->bs_protover = BSTP_PROTO_RSTP;
+ bs->bs_state_cb = cb->bcb_state;
+ bs->bs_rtage_cb = cb->bcb_rtage;
+
+ getmicrotime(&bs->bs_last_tc_time);
+
+ mtx_lock(&bstp_list_mtx);
+ LIST_INSERT_HEAD(&bstp_list, bs, bs_list);
+ mtx_unlock(&bstp_list_mtx);
+}
+
+void
+bstp_detach(struct bstp_state *bs)
+{
+ KASSERT(LIST_EMPTY(&bs->bs_bplist), ("bstp still active"));
+
+ mtx_lock(&bstp_list_mtx);
+ LIST_REMOVE(bs, bs_list);
+ mtx_unlock(&bstp_list_mtx);
+ callout_drain(&bs->bs_bstpcallout);
+ BSTP_LOCK_DESTROY(bs);
+}
+
+void
+bstp_init(struct bstp_state *bs)
+{
+ BSTP_LOCK(bs);
+ callout_reset(&bs->bs_bstpcallout, hz, bstp_tick, bs);
+ bs->bs_running = 1;
+ bstp_reinit(bs);
+ BSTP_UNLOCK(bs);
+}
+
+void
+bstp_stop(struct bstp_state *bs)
+{
+ struct bstp_port *bp;
+
+ BSTP_LOCK(bs);
+
+ LIST_FOREACH(bp, &bs->bs_bplist, bp_next)
+ bstp_set_port_state(bp, BSTP_IFSTATE_DISCARDING);
+
+ bs->bs_running = 0;
+ callout_stop(&bs->bs_bstpcallout);
+ BSTP_UNLOCK(bs);
+}
+
+int
+bstp_create(struct bstp_state *bs, struct bstp_port *bp, struct ifnet *ifp)
+{
+ bzero(bp, sizeof(struct bstp_port));
+
+ BSTP_LOCK(bs);
+ bp->bp_ifp = ifp;
+ bp->bp_bs = bs;
+ bp->bp_priority = BSTP_DEFAULT_PORT_PRIORITY;
+ TASK_INIT(&bp->bp_statetask, 0, bstp_notify_state, bp);
+ TASK_INIT(&bp->bp_rtagetask, 0, bstp_notify_rtage, bp);
+
+ /* Init state */
+ bp->bp_infois = BSTP_INFO_DISABLED;
+ bp->bp_flags = BSTP_PORT_AUTOEDGE|BSTP_PORT_AUTOPTP;
+ bstp_set_port_state(bp, BSTP_IFSTATE_DISCARDING);
+ bstp_set_port_proto(bp, bs->bs_protover);
+ bstp_set_port_role(bp, BSTP_ROLE_DISABLED);
+ bstp_set_port_tc(bp, BSTP_TCSTATE_INACTIVE);
+ bp->bp_path_cost = bstp_calc_path_cost(bp);
+ BSTP_UNLOCK(bs);
+ return (0);
+}
+
+int
+bstp_enable(struct bstp_port *bp)
+{
+ struct bstp_state *bs = bp->bp_bs;
+ struct ifnet *ifp = bp->bp_ifp;
+
+ KASSERT(bp->bp_active == 0, ("already a bstp member"));
+
+ switch (ifp->if_type) {
+ case IFT_ETHER: /* These can do spanning tree. */
+ break;
+ default:
+ /* Nothing else can. */
+ return (EINVAL);
+ }
+
+ BSTP_LOCK(bs);
+ LIST_INSERT_HEAD(&bs->bs_bplist, bp, bp_next);
+ bp->bp_active = 1;
+ bp->bp_flags |= BSTP_PORT_NEWINFO;
+ bstp_reinit(bs);
+ bstp_update_roles(bs, bp);
+ BSTP_UNLOCK(bs);
+ return (0);
+}
+
+void
+bstp_disable(struct bstp_port *bp)
+{
+ struct bstp_state *bs = bp->bp_bs;
+
+ KASSERT(bp->bp_active == 1, ("not a bstp member"));
+
+ BSTP_LOCK(bs);
+ bstp_disable_port(bs, bp);
+ LIST_REMOVE(bp, bp_next);
+ bp->bp_active = 0;
+ bstp_reinit(bs);
+ BSTP_UNLOCK(bs);
+}
+
+/*
+ * The bstp_port structure is about to be freed by the parent bridge.
+ */
+void
+bstp_destroy(struct bstp_port *bp)
+{
+ KASSERT(bp->bp_active == 0, ("port is still attached"));
+ taskqueue_drain(taskqueue_swi, &bp->bp_statetask);
+ taskqueue_drain(taskqueue_swi, &bp->bp_rtagetask);
+}
diff --git a/freebsd/sys/net/bridgestp.h b/freebsd/sys/net/bridgestp.h
new file mode 100644
index 00000000..91328900
--- /dev/null
+++ b/freebsd/sys/net/bridgestp.h
@@ -0,0 +1,396 @@
+/* $NetBSD: if_bridgevar.h,v 1.4 2003/07/08 07:13:50 itojun Exp $ */
+
+/*
+ * Copyright 2001 Wasabi Systems, Inc.
+ * All rights reserved.
+ *
+ * Written by Jason R. Thorpe for Wasabi Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed for the NetBSD Project by
+ * Wasabi Systems, Inc.
+ * 4. The name of Wasabi Systems, Inc. may not be used to endorse
+ * or promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Jason L. Wright
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * OpenBSD: if_bridge.h,v 1.14 2001/03/22 03:48:29 jason Exp
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * Data structure and control definitions for STP interfaces.
+ */
+
+#include <freebsd/sys/callout.h>
+#include <freebsd/sys/queue.h>
+
+/* STP port states */
+#define BSTP_IFSTATE_DISABLED 0
+#define BSTP_IFSTATE_LISTENING 1
+#define BSTP_IFSTATE_LEARNING 2
+#define BSTP_IFSTATE_FORWARDING 3
+#define BSTP_IFSTATE_BLOCKING 4
+#define BSTP_IFSTATE_DISCARDING 5
+
+#define BSTP_TCSTATE_ACTIVE 1
+#define BSTP_TCSTATE_DETECTED 2
+#define BSTP_TCSTATE_INACTIVE 3
+#define BSTP_TCSTATE_LEARNING 4
+#define BSTP_TCSTATE_PROPAG 5
+#define BSTP_TCSTATE_ACK 6
+#define BSTP_TCSTATE_TC 7
+#define BSTP_TCSTATE_TCN 8
+
+#define BSTP_ROLE_DISABLED 0
+#define BSTP_ROLE_ROOT 1
+#define BSTP_ROLE_DESIGNATED 2
+#define BSTP_ROLE_ALTERNATE 3
+#define BSTP_ROLE_BACKUP 4
+
+#ifdef _KERNEL
+
+/* STP port flags */
+#define BSTP_PORT_CANMIGRATE 0x0001
+#define BSTP_PORT_NEWINFO 0x0002
+#define BSTP_PORT_DISPUTED 0x0004
+#define BSTP_PORT_ADMCOST 0x0008
+#define BSTP_PORT_AUTOEDGE 0x0010
+#define BSTP_PORT_AUTOPTP 0x0020
+#define BSTP_PORT_ADMEDGE 0x0040
+#define BSTP_PORT_PNDCOST 0x0080
+
+/* BPDU priority */
+#define BSTP_PDU_SUPERIOR 1
+#define BSTP_PDU_REPEATED 2
+#define BSTP_PDU_INFERIOR 3
+#define BSTP_PDU_INFERIORALT 4
+#define BSTP_PDU_OTHER 5
+
+/* BPDU flags */
+#define BSTP_PDU_PRMASK 0x0c /* Port Role */
+#define BSTP_PDU_PRSHIFT 2 /* Port Role offset */
+#define BSTP_PDU_F_UNKN 0x00 /* Unknown port (00) */
+#define BSTP_PDU_F_ALT 0x01 /* Alt/Backup port (01) */
+#define BSTP_PDU_F_ROOT 0x02 /* Root port (10) */
+#define BSTP_PDU_F_DESG 0x03 /* Designated port (11) */
+
+#define BSTP_PDU_STPMASK 0x81 /* strip unused STP flags */
+#define BSTP_PDU_RSTPMASK 0x7f /* strip unused RSTP flags */
+#define BSTP_PDU_F_TC 0x01 /* Topology change */
+#define BSTP_PDU_F_P 0x02 /* Proposal flag */
+#define BSTP_PDU_F_L 0x10 /* Learning flag */
+#define BSTP_PDU_F_F 0x20 /* Forwarding flag */
+#define BSTP_PDU_F_A 0x40 /* Agreement flag */
+#define BSTP_PDU_F_TCA 0x80 /* Topology change ack */
+
+/*
+ * Spanning tree defaults.
+ */
+#define BSTP_DEFAULT_MAX_AGE (20 * 256)
+#define BSTP_DEFAULT_HELLO_TIME (2 * 256)
+#define BSTP_DEFAULT_FORWARD_DELAY (15 * 256)
+#define BSTP_DEFAULT_HOLD_TIME (1 * 256)
+#define BSTP_DEFAULT_MIGRATE_DELAY (3 * 256)
+#define BSTP_DEFAULT_HOLD_COUNT 6
+#define BSTP_DEFAULT_BRIDGE_PRIORITY 0x8000
+#define BSTP_DEFAULT_PORT_PRIORITY 0x80
+#define BSTP_DEFAULT_PATH_COST 55
+#define BSTP_MIN_HELLO_TIME (1 * 256)
+#define BSTP_MIN_MAX_AGE (6 * 256)
+#define BSTP_MIN_FORWARD_DELAY (4 * 256)
+#define BSTP_MIN_HOLD_COUNT 1
+#define BSTP_MAX_HELLO_TIME (2 * 256)
+#define BSTP_MAX_MAX_AGE (40 * 256)
+#define BSTP_MAX_FORWARD_DELAY (30 * 256)
+#define BSTP_MAX_HOLD_COUNT 10
+#define BSTP_MAX_PRIORITY 61440
+#define BSTP_MAX_PORT_PRIORITY 240
+#define BSTP_MAX_PATH_COST 200000000
+
+/* BPDU message types */
+#define BSTP_MSGTYPE_CFG 0x00 /* Configuration */
+#define BSTP_MSGTYPE_RSTP 0x02 /* Rapid STP */
+#define BSTP_MSGTYPE_TCN 0x80 /* Topology chg notification */
+
+/* Protocol versions */
+#define BSTP_PROTO_ID 0x00
+#define BSTP_PROTO_STP 0x00
+#define BSTP_PROTO_RSTP 0x02
+#define BSTP_PROTO_MAX BSTP_PROTO_RSTP
+
+#define BSTP_INFO_RECEIVED 1
+#define BSTP_INFO_MINE 2
+#define BSTP_INFO_AGED 3
+#define BSTP_INFO_DISABLED 4
+
+
+#define BSTP_MESSAGE_AGE_INCR (1 * 256) /* in 256ths of a second */
+#define BSTP_TICK_VAL (1 * 256) /* in 256ths of a second */
+#define BSTP_LINK_TIMER (BSTP_TICK_VAL * 15)
+
+/*
+ * Driver callbacks for STP state changes
+ */
+typedef void (*bstp_state_cb_t)(struct ifnet *, int);
+typedef void (*bstp_rtage_cb_t)(struct ifnet *, int);
+struct bstp_cb_ops {
+ bstp_state_cb_t bcb_state;
+ bstp_rtage_cb_t bcb_rtage;
+};
+
+/*
+ * Because BPDU's do not make nicely aligned structures, two different
+ * declarations are used: bstp_?bpdu (wire representation, packed) and
+ * bstp_*_unit (internal, nicely aligned version).
+ */
+
+/* configuration bridge protocol data unit */
+struct bstp_cbpdu {
+ uint8_t cbu_dsap; /* LLC: destination sap */
+ uint8_t cbu_ssap; /* LLC: source sap */
+ uint8_t cbu_ctl; /* LLC: control */
+ uint16_t cbu_protoid; /* protocol id */
+ uint8_t cbu_protover; /* protocol version */
+ uint8_t cbu_bpdutype; /* message type */
+ uint8_t cbu_flags; /* flags (below) */
+
+ /* root id */
+ uint16_t cbu_rootpri; /* root priority */
+ uint8_t cbu_rootaddr[6]; /* root address */
+
+ uint32_t cbu_rootpathcost; /* root path cost */
+
+ /* bridge id */
+ uint16_t cbu_bridgepri; /* bridge priority */
+ uint8_t cbu_bridgeaddr[6]; /* bridge address */
+
+ uint16_t cbu_portid; /* port id */
+ uint16_t cbu_messageage; /* current message age */
+ uint16_t cbu_maxage; /* maximum age */
+ uint16_t cbu_hellotime; /* hello time */
+ uint16_t cbu_forwarddelay; /* forwarding delay */
+ uint8_t cbu_versionlen; /* version 1 length */
+} __packed;
+#define BSTP_BPDU_STP_LEN (3 + 35) /* LLC + STP pdu */
+#define BSTP_BPDU_RSTP_LEN (3 + 36) /* LLC + RSTP pdu */
+
+/* topology change notification bridge protocol data unit */
+struct bstp_tbpdu {
+ uint8_t tbu_dsap; /* LLC: destination sap */
+ uint8_t tbu_ssap; /* LLC: source sap */
+ uint8_t tbu_ctl; /* LLC: control */
+ uint16_t tbu_protoid; /* protocol id */
+ uint8_t tbu_protover; /* protocol version */
+ uint8_t tbu_bpdutype; /* message type */
+} __packed;
+
+/*
+ * Timekeeping structure used in spanning tree code.
+ */
+struct bstp_timer {
+ int active;
+ int latched;
+ int value;
+};
+
+struct bstp_pri_vector {
+ uint64_t pv_root_id;
+ uint32_t pv_cost;
+ uint64_t pv_dbridge_id;
+ uint16_t pv_dport_id;
+ uint16_t pv_port_id;
+};
+
+struct bstp_config_unit {
+ struct bstp_pri_vector cu_pv;
+ uint16_t cu_message_age;
+ uint16_t cu_max_age;
+ uint16_t cu_forward_delay;
+ uint16_t cu_hello_time;
+ uint8_t cu_message_type;
+ uint8_t cu_topology_change_ack;
+ uint8_t cu_topology_change;
+ uint8_t cu_proposal;
+ uint8_t cu_agree;
+ uint8_t cu_learning;
+ uint8_t cu_forwarding;
+ uint8_t cu_role;
+};
+
+struct bstp_tcn_unit {
+ uint8_t tu_message_type;
+};
+
+struct bstp_port {
+ LIST_ENTRY(bstp_port) bp_next;
+ struct ifnet *bp_ifp; /* parent if */
+ struct bstp_state *bp_bs;
+ uint8_t bp_active;
+ uint8_t bp_protover;
+ uint32_t bp_flags;
+ uint32_t bp_path_cost;
+ uint16_t bp_port_msg_age;
+ uint16_t bp_port_max_age;
+ uint16_t bp_port_fdelay;
+ uint16_t bp_port_htime;
+ uint16_t bp_desg_msg_age;
+ uint16_t bp_desg_max_age;
+ uint16_t bp_desg_fdelay;
+ uint16_t bp_desg_htime;
+ struct bstp_timer bp_edge_delay_timer;
+ struct bstp_timer bp_forward_delay_timer;
+ struct bstp_timer bp_hello_timer;
+ struct bstp_timer bp_message_age_timer;
+ struct bstp_timer bp_migrate_delay_timer;
+ struct bstp_timer bp_recent_backup_timer;
+ struct bstp_timer bp_recent_root_timer;
+ struct bstp_timer bp_tc_timer;
+ struct bstp_config_unit bp_msg_cu;
+ struct bstp_pri_vector bp_desg_pv;
+ struct bstp_pri_vector bp_port_pv;
+ uint16_t bp_port_id;
+ uint8_t bp_state;
+ uint8_t bp_tcstate;
+ uint8_t bp_role;
+ uint8_t bp_infois;
+ uint8_t bp_tc_ack;
+ uint8_t bp_tc_prop;
+ uint8_t bp_fdbflush;
+ uint8_t bp_priority;
+ uint8_t bp_ptp_link;
+ uint8_t bp_agree;
+ uint8_t bp_agreed;
+ uint8_t bp_sync;
+ uint8_t bp_synced;
+ uint8_t bp_proposing;
+ uint8_t bp_proposed;
+ uint8_t bp_operedge;
+ uint8_t bp_reroot;
+ uint8_t bp_rcvdtc;
+ uint8_t bp_rcvdtca;
+ uint8_t bp_rcvdtcn;
+ uint32_t bp_forward_transitions;
+ uint8_t bp_txcount;
+ struct task bp_statetask;
+ struct task bp_rtagetask;
+};
+
+/*
+ * Software state for each bridge STP.
+ */
+struct bstp_state {
+ LIST_ENTRY(bstp_state) bs_list;
+ uint8_t bs_running;
+ struct mtx bs_mtx;
+ struct bstp_pri_vector bs_bridge_pv;
+ struct bstp_pri_vector bs_root_pv;
+ struct bstp_port *bs_root_port;
+ uint8_t bs_protover;
+ uint16_t bs_migration_delay;
+ uint16_t bs_edge_delay;
+ uint16_t bs_bridge_max_age;
+ uint16_t bs_bridge_fdelay;
+ uint16_t bs_bridge_htime;
+ uint16_t bs_root_msg_age;
+ uint16_t bs_root_max_age;
+ uint16_t bs_root_fdelay;
+ uint16_t bs_root_htime;
+ uint16_t bs_hold_time;
+ uint16_t bs_bridge_priority;
+ uint8_t bs_txholdcount;
+ uint8_t bs_allsynced;
+ struct callout bs_bstpcallout; /* STP callout */
+ struct bstp_timer bs_link_timer;
+ struct timeval bs_last_tc_time;
+ LIST_HEAD(, bstp_port) bs_bplist;
+ bstp_state_cb_t bs_state_cb;
+ bstp_rtage_cb_t bs_rtage_cb;
+};
+
+#define BSTP_LOCK_INIT(_bs) mtx_init(&(_bs)->bs_mtx, "bstp", NULL, MTX_DEF)
+#define BSTP_LOCK_DESTROY(_bs) mtx_destroy(&(_bs)->bs_mtx)
+#define BSTP_LOCK(_bs) mtx_lock(&(_bs)->bs_mtx)
+#define BSTP_UNLOCK(_bs) mtx_unlock(&(_bs)->bs_mtx)
+#define BSTP_LOCK_ASSERT(_bs) mtx_assert(&(_bs)->bs_mtx, MA_OWNED)
+
+extern const uint8_t bstp_etheraddr[];
+
+extern void (*bstp_linkstate_p)(struct ifnet *ifp, int state);
+
+void bstp_attach(struct bstp_state *, struct bstp_cb_ops *);
+void bstp_detach(struct bstp_state *);
+void bstp_init(struct bstp_state *);
+void bstp_stop(struct bstp_state *);
+int bstp_create(struct bstp_state *, struct bstp_port *, struct ifnet *);
+int bstp_enable(struct bstp_port *);
+void bstp_disable(struct bstp_port *);
+void bstp_destroy(struct bstp_port *);
+void bstp_linkstate(struct ifnet *, int);
+int bstp_set_htime(struct bstp_state *, int);
+int bstp_set_fdelay(struct bstp_state *, int);
+int bstp_set_maxage(struct bstp_state *, int);
+int bstp_set_holdcount(struct bstp_state *, int);
+int bstp_set_protocol(struct bstp_state *, int);
+int bstp_set_priority(struct bstp_state *, int);
+int bstp_set_port_priority(struct bstp_port *, int);
+int bstp_set_path_cost(struct bstp_port *, uint32_t);
+int bstp_set_edge(struct bstp_port *, int);
+int bstp_set_autoedge(struct bstp_port *, int);
+int bstp_set_ptp(struct bstp_port *, int);
+int bstp_set_autoptp(struct bstp_port *, int);
+struct mbuf *bstp_input(struct bstp_port *, struct ifnet *, struct mbuf *);
+
+#endif /* _KERNEL */
diff --git a/freebsd/sys/net/ethernet.h b/freebsd/sys/net/ethernet.h
new file mode 100644
index 00000000..17d022b2
--- /dev/null
+++ b/freebsd/sys/net/ethernet.h
@@ -0,0 +1,2 @@
+#include <freebsd/bsd.h>
+#include <freebsd/net/ethernet.h>
diff --git a/freebsd/sys/net/fddi.h b/freebsd/sys/net/fddi.h
new file mode 100644
index 00000000..03deabff
--- /dev/null
+++ b/freebsd/sys/net/fddi.h
@@ -0,0 +1,105 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ * Copyright (c) 1995 Matt Thomas (thomas@lkg.dec.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)if_fddi.h 8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_IF_FDDI_HH_
+#define _NETINET_IF_FDDI_HH_
+
+#define FDDIIPMTU 4352
+#define FDDIMTU 4470
+#define FDDIMIN 3
+
+#define FDDIFC_C 0x80 /* 0b10000000 */
+#define FDDIFC_L 0x40 /* 0b01000000 */
+#define FDDIFC_F 0x30 /* 0b00110000 */
+#define FDDIFC_Z 0x0F /* 0b00001111 */
+#define FDDIFC_CLFF 0xF0 /* Class/Length/Format bits */
+#define FDDIFC_ZZZZ 0x0F /* Control bits */
+
+/*
+ * FDDI Frame Control values. (48-bit addressing only).
+ */
+#define FDDIFC_VOID 0x40 /* Void frame */
+#define FDDIFC_NRT 0x80 /* Nonrestricted token */
+#define FDDIFC_RT 0xc0 /* Restricted token */
+#define FDDIFC_MAC_BEACON 0xc2 /* MAC Beacon frame */
+#define FDDIFC_MAC_CLAIM 0xc3 /* MAC Claim frame */
+#define FDDIFC_LLC_ASYNC 0x50
+#define FDDIFC_LLC_PRIO0 0
+#define FDDIFC_LLC_PRIO1 1
+#define FDDIFC_LLC_PRIO2 2
+#define FDDIFC_LLC_PRIO3 3
+#define FDDIFC_LLC_PRIO4 4
+#define FDDIFC_LLC_PRIO5 5
+#define FDDIFC_LLC_PRIO6 6
+#define FDDIFC_LLC_PRIO7 7
+#define FDDIFC_LLC_SYNC 0xd0
+#define FDDIFC_IMP_ASYNC 0x60 /* Implementor Async. */
+#define FDDIFC_IMP_SYNC 0xe0 /* Implementor Synch. */
+#define FDDIFC_SMT 0x40
+#define FDDIFC_SMT_INFO 0x41 /* SMT Info */
+#define FDDIFC_SMT_NSA 0x4F /* SMT Next station adrs */
+#define FDDIFC_MAC 0xc0 /* MAC frame */
+
+#define FDDI_ADDR_LEN 6
+#define FDDI_HDR_LEN (sizeof(struct fddi_header))
+
+/*
+ * Structure of an 100Mb/s FDDI header.
+ */
+struct fddi_header {
+ u_char fddi_fc;
+ u_char fddi_dhost[FDDI_ADDR_LEN];
+ u_char fddi_shost[FDDI_ADDR_LEN];
+};
+
+#if defined(_KERNEL)
+#define fddi_ipmulticast_min ether_ipmulticast_min
+#define fddi_ipmulticast_max ether_ipmulticast_max
+#define fddi_addmulti ether_addmulti
+#define fddi_delmulti ether_delmulti
+#define fddi_sprintf ether_sprintf
+
+#define FDDI_BPF_UNSUPPORTED 0
+#define FDDI_BPF_SUPPORTED 1
+
+void fddi_ifattach(struct ifnet *, const u_int8_t *, int);
+void fddi_ifdetach(struct ifnet *, int);
+int fddi_ioctl(struct ifnet *, u_long, caddr_t);
+
+#endif /* _KERNEL */
+#endif /* _NET_FDDI_HH_ */
diff --git a/freebsd/sys/net/firewire.h b/freebsd/sys/net/firewire.h
new file mode 100644
index 00000000..5411dbf8
--- /dev/null
+++ b/freebsd/sys/net/firewire.h
@@ -0,0 +1,142 @@
+/*-
+ * Copyright (c) 2004 Doug Rabson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NET_FIREWIRE_HH_
+#define _NET_FIREWIRE_HH_
+
+#define FW_ENCAP_UNFRAG 0
+#define FW_ENCAP_FIRST 1
+#define FW_ENCAP_LAST 2
+#define FW_ENCAP_NEXT 3
+
+union fw_encap {
+ uint32_t ul[2];
+ struct {
+#if BYTE_ORDER == BIG_ENDIAN
+ uint32_t lf :2;
+ uint32_t reserved :14;
+ uint32_t ether_type :16;
+#else
+ uint32_t ether_type :16;
+ uint32_t reserved :14;
+ uint32_t lf :2;
+#endif
+ } unfrag;
+ struct {
+#if BYTE_ORDER == BIG_ENDIAN
+ uint32_t lf :2;
+ uint32_t reserved1 :2;
+ uint32_t datagram_size :12;
+ uint32_t ether_type :16;
+ uint32_t dgl :16;
+ uint32_t reserved2 :16;
+#else
+ uint32_t ether_type :16;
+ uint32_t datagram_size :12;
+ uint32_t reserved1 :2;
+ uint32_t lf :2;
+ uint32_t reserved2 :16;
+ uint32_t dgl :16;
+#endif
+ } firstfrag;
+ struct {
+#if BYTE_ORDER == BIG_ENDIAN
+ uint32_t lf :2;
+ uint32_t reserved1 :2;
+ uint32_t datagram_size :12;
+ uint32_t reserved2 :4;
+ uint32_t fragment_offset :12;
+ uint32_t dgl :16;
+ uint32_t reserved3 :16;
+#else
+ uint32_t fragment_offset :12;
+ uint32_t reserved2 :4;
+ uint32_t datagram_size :12;
+ uint32_t reserved1 :2;
+ uint32_t lf :2;
+ uint32_t reserved3 :16;
+ uint32_t dgl :16;
+#endif
+ } nextfrag;
+};
+
+#define MTAG_FIREWIRE 1394
+#define MTAG_FIREWIRE_HWADDR 0
+#define MTAG_FIREWIRE_SENDER_EUID 1
+
+struct fw_hwaddr {
+ uint32_t sender_unique_ID_hi;
+ uint32_t sender_unique_ID_lo;
+ uint8_t sender_max_rec;
+ uint8_t sspd;
+ uint16_t sender_unicast_FIFO_hi;
+ uint32_t sender_unicast_FIFO_lo;
+};
+
+/*
+ * BPF wants to see one of these.
+ */
+struct fw_bpfhdr {
+ uint8_t firewire_dhost[8];
+ uint8_t firewire_shost[8];
+ uint16_t firewire_type;
+};
+
+#ifdef _KERNEL
+
+/*
+ * A structure to track the reassembly of a link-level fragmented
+ * datagram.
+ */
+struct fw_reass {
+ STAILQ_ENTRY(fw_reass) fr_link;
+ uint32_t fr_id; /* host+dgl */
+ struct mbuf *fr_frags; /* chain of frags */
+};
+STAILQ_HEAD(fw_reass_list, fw_reass);
+
+struct fw_com {
+ struct ifnet *fc_ifp;
+ struct fw_hwaddr fc_hwaddr;
+ struct firewire_comm *fc_fc;
+ uint8_t fc_broadcast_channel;
+ uint8_t fc_speed; /* our speed */
+ uint16_t fc_node; /* our nodeid */
+ struct fw_reass_list fc_frags; /* partial datagrams */
+};
+#define IFP2FWC(ifp) ((struct fw_com *)(ifp)->if_l2com)
+
+extern void firewire_input(struct ifnet *ifp, struct mbuf *m, uint16_t src);
+extern void firewire_ifattach(struct ifnet *, struct fw_hwaddr *);
+extern void firewire_ifdetach(struct ifnet *);
+extern void firewire_busreset(struct ifnet *);
+extern int firewire_ioctl(struct ifnet *, u_long, caddr_t);
+
+#endif /* !_KERNEL */
+
+#endif /* !_NET_FIREWIRE_HH_ */
diff --git a/freebsd/sys/net/flowtable.h b/freebsd/sys/net/flowtable.h
new file mode 100644
index 00000000..c4a09659
--- /dev/null
+++ b/freebsd/sys/net/flowtable.h
@@ -0,0 +1,82 @@
+/**************************************************************************
+
+Copyright (c) 2008-2010, BitGravity Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the BitGravity Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+$FreeBSD$
+
+***************************************************************************/
+
+#ifndef _NET_FLOWTABLE_HH_
+#define _NET_FLOWTABLE_HH_
+
+#ifdef _KERNEL
+
+#define FL_HASH_ALL (1<<0) /* hash 4-tuple + protocol */
+#define FL_PCPU (1<<1) /* pcpu cache */
+#define FL_NOAUTO (1<<2) /* don't automatically add flentry on miss */
+
+#define FL_TCP (1<<11)
+#define FL_SCTP (1<<12)
+#define FL_UDP (1<<13)
+#define FL_DEBUG (1<<14)
+#define FL_DEBUG_ALL (1<<15)
+
+struct flowtable;
+struct flentry;
+struct route;
+struct route_in6;
+
+VNET_DECLARE(struct flowtable *, ip_ft);
+#define V_ip_ft VNET(ip_ft)
+
+VNET_DECLARE(struct flowtable *, ip6_ft);
+#define V_ip6_ft VNET(ip6_ft)
+
+struct flowtable *flowtable_alloc(char *name, int nentry, int flags);
+
+/*
+ * Given a flow table, look up the L3 and L2 information and
+ * return it in the route.
+ *
+ */
+struct flentry *flowtable_lookup_mbuf(struct flowtable *ft, struct mbuf *m, int af);
+
+struct flentry *flowtable_lookup(struct flowtable *ft, struct sockaddr_storage *ssa,
+ struct sockaddr_storage *dsa, uint32_t fibnum, int flags);
+
+int kern_flowtable_insert(struct flowtable *ft, struct sockaddr_storage *ssa,
+ struct sockaddr_storage *dsa, struct route *ro, uint32_t fibnum, int flags);
+
+void flow_invalidate(struct flentry *fl);
+void flowtable_route_flush(struct flowtable *ft, struct rtentry *rt);
+
+void flow_to_route(struct flentry *fl, struct route *ro);
+
+void flow_to_route_in6(struct flentry *fl, struct route_in6 *ro);
+
+
+#endif /* _KERNEL */
+#endif
diff --git a/freebsd/sys/net/ieee8023ad_lacp.c b/freebsd/sys/net/ieee8023ad_lacp.c
new file mode 100644
index 00000000..75c0d9ea
--- /dev/null
+++ b/freebsd/sys/net/ieee8023ad_lacp.c
@@ -0,0 +1,1947 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/* $NetBSD: ieee8023ad_lacp.c,v 1.3 2005/12/11 12:24:54 christos Exp $ */
+
+/*-
+ * Copyright (c)2005 YAMAMOTO Takashi,
+ * Copyright (c)2008 Andrew Thompson <thompsa@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/callout.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/kernel.h> /* hz */
+#include <freebsd/sys/socket.h> /* for net/if.h */
+#include <freebsd/sys/sockio.h>
+#include <freebsd/machine/stdarg.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/rwlock.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_dl.h>
+#include <freebsd/net/ethernet.h>
+#include <freebsd/net/if_media.h>
+#include <freebsd/net/if_types.h>
+
+#include <freebsd/net/if_lagg.h>
+#include <freebsd/net/ieee8023ad_lacp.h>
+
+/*
+ * actor system priority and port priority.
+ * XXX should be configurable.
+ */
+
+#define LACP_SYSTEM_PRIO 0x8000
+#define LACP_PORT_PRIO 0x8000
+
+const uint8_t ethermulticastaddr_slowprotocols[ETHER_ADDR_LEN] =
+ { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 };
+
+static const struct tlv_template lacp_info_tlv_template[] = {
+ { LACP_TYPE_ACTORINFO,
+ sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) },
+ { LACP_TYPE_PARTNERINFO,
+ sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) },
+ { LACP_TYPE_COLLECTORINFO,
+ sizeof(struct tlvhdr) + sizeof(struct lacp_collectorinfo) },
+ { 0, 0 },
+};
+
+static const struct tlv_template marker_info_tlv_template[] = {
+ { MARKER_TYPE_INFO,
+ sizeof(struct tlvhdr) + sizeof(struct lacp_markerinfo) },
+ { 0, 0 },
+};
+
+static const struct tlv_template marker_response_tlv_template[] = {
+ { MARKER_TYPE_RESPONSE,
+ sizeof(struct tlvhdr) + sizeof(struct lacp_markerinfo) },
+ { 0, 0 },
+};
+
+typedef void (*lacp_timer_func_t)(struct lacp_port *);
+
+static void lacp_fill_actorinfo(struct lacp_port *, struct lacp_peerinfo *);
+static void lacp_fill_markerinfo(struct lacp_port *,
+ struct lacp_markerinfo *);
+
+static uint64_t lacp_aggregator_bandwidth(struct lacp_aggregator *);
+static void lacp_suppress_distributing(struct lacp_softc *,
+ struct lacp_aggregator *);
+static void lacp_transit_expire(void *);
+static void lacp_update_portmap(struct lacp_softc *);
+static void lacp_select_active_aggregator(struct lacp_softc *);
+static uint16_t lacp_compose_key(struct lacp_port *);
+static int tlv_check(const void *, size_t, const struct tlvhdr *,
+ const struct tlv_template *, boolean_t);
+static void lacp_tick(void *);
+
+static void lacp_fill_aggregator_id(struct lacp_aggregator *,
+ const struct lacp_port *);
+static void lacp_fill_aggregator_id_peer(struct lacp_peerinfo *,
+ const struct lacp_peerinfo *);
+static int lacp_aggregator_is_compatible(const struct lacp_aggregator *,
+ const struct lacp_port *);
+static int lacp_peerinfo_is_compatible(const struct lacp_peerinfo *,
+ const struct lacp_peerinfo *);
+
+static struct lacp_aggregator *lacp_aggregator_get(struct lacp_softc *,
+ struct lacp_port *);
+static void lacp_aggregator_addref(struct lacp_softc *,
+ struct lacp_aggregator *);
+static void lacp_aggregator_delref(struct lacp_softc *,
+ struct lacp_aggregator *);
+
+/* receive machine */
+
+static int lacp_pdu_input(struct lacp_port *, struct mbuf *);
+static int lacp_marker_input(struct lacp_port *, struct mbuf *);
+static void lacp_sm_rx(struct lacp_port *, const struct lacpdu *);
+static void lacp_sm_rx_timer(struct lacp_port *);
+static void lacp_sm_rx_set_expired(struct lacp_port *);
+static void lacp_sm_rx_update_ntt(struct lacp_port *,
+ const struct lacpdu *);
+static void lacp_sm_rx_record_pdu(struct lacp_port *,
+ const struct lacpdu *);
+static void lacp_sm_rx_update_selected(struct lacp_port *,
+ const struct lacpdu *);
+static void lacp_sm_rx_record_default(struct lacp_port *);
+static void lacp_sm_rx_update_default_selected(struct lacp_port *);
+static void lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *,
+ const struct lacp_peerinfo *);
+
+/* mux machine */
+
+static void lacp_sm_mux(struct lacp_port *);
+static void lacp_set_mux(struct lacp_port *, enum lacp_mux_state);
+static void lacp_sm_mux_timer(struct lacp_port *);
+
+/* periodic transmit machine */
+
+static void lacp_sm_ptx_update_timeout(struct lacp_port *, uint8_t);
+static void lacp_sm_ptx_tx_schedule(struct lacp_port *);
+static void lacp_sm_ptx_timer(struct lacp_port *);
+
+/* transmit machine */
+
+static void lacp_sm_tx(struct lacp_port *);
+static void lacp_sm_assert_ntt(struct lacp_port *);
+
+static void lacp_run_timers(struct lacp_port *);
+static int lacp_compare_peerinfo(const struct lacp_peerinfo *,
+ const struct lacp_peerinfo *);
+static int lacp_compare_systemid(const struct lacp_systemid *,
+ const struct lacp_systemid *);
+static void lacp_port_enable(struct lacp_port *);
+static void lacp_port_disable(struct lacp_port *);
+static void lacp_select(struct lacp_port *);
+static void lacp_unselect(struct lacp_port *);
+static void lacp_disable_collecting(struct lacp_port *);
+static void lacp_enable_collecting(struct lacp_port *);
+static void lacp_disable_distributing(struct lacp_port *);
+static void lacp_enable_distributing(struct lacp_port *);
+static int lacp_xmit_lacpdu(struct lacp_port *);
+static int lacp_xmit_marker(struct lacp_port *);
+
+#if defined(LACP_DEBUG)
+static void lacp_dump_lacpdu(const struct lacpdu *);
+static const char *lacp_format_partner(const struct lacp_peerinfo *, char *,
+ size_t);
+static const char *lacp_format_lagid(const struct lacp_peerinfo *,
+ const struct lacp_peerinfo *, char *, size_t);
+static const char *lacp_format_lagid_aggregator(const struct lacp_aggregator *,
+ char *, size_t);
+static const char *lacp_format_state(uint8_t, char *, size_t);
+static const char *lacp_format_mac(const uint8_t *, char *, size_t);
+static const char *lacp_format_systemid(const struct lacp_systemid *, char *,
+ size_t);
+static const char *lacp_format_portid(const struct lacp_portid *, char *,
+ size_t);
+static void lacp_dprintf(const struct lacp_port *, const char *, ...)
+ __attribute__((__format__(__printf__, 2, 3)));
+#define LACP_DPRINTF(a) lacp_dprintf a
+#else
+#define LACP_DPRINTF(a) /* nothing */
+#endif
+
+/*
+ * partner administration variables.
+ * XXX should be configurable.
+ */
+
+static const struct lacp_peerinfo lacp_partner_admin = {
+ .lip_systemid = { .lsi_prio = 0xffff },
+ .lip_portid = { .lpi_prio = 0xffff },
+#if 1
+ /* optimistic */
+ .lip_state = LACP_STATE_SYNC | LACP_STATE_AGGREGATION |
+ LACP_STATE_COLLECTING | LACP_STATE_DISTRIBUTING,
+#else
+ /* pessimistic */
+ .lip_state = 0,
+#endif
+};
+
+static const lacp_timer_func_t lacp_timer_funcs[LACP_NTIMER] = {
+ [LACP_TIMER_CURRENT_WHILE] = lacp_sm_rx_timer,
+ [LACP_TIMER_PERIODIC] = lacp_sm_ptx_timer,
+ [LACP_TIMER_WAIT_WHILE] = lacp_sm_mux_timer,
+};
+
+struct mbuf *
+lacp_input(struct lagg_port *lgp, struct mbuf *m)
+{
+ struct lacp_port *lp = LACP_PORT(lgp);
+ uint8_t subtype;
+
+ if (m->m_pkthdr.len < sizeof(struct ether_header) + sizeof(subtype)) {
+ m_freem(m);
+ return (NULL);
+ }
+
+ m_copydata(m, sizeof(struct ether_header), sizeof(subtype), &subtype);
+ switch (subtype) {
+ case SLOWPROTOCOLS_SUBTYPE_LACP:
+ lacp_pdu_input(lp, m);
+ return (NULL);
+
+ case SLOWPROTOCOLS_SUBTYPE_MARKER:
+ lacp_marker_input(lp, m);
+ return (NULL);
+ }
+
+ /* Not a subtype we are interested in */
+ return (m);
+}
+
+/*
+ * lacp_pdu_input: process lacpdu
+ */
+static int
+lacp_pdu_input(struct lacp_port *lp, struct mbuf *m)
+{
+ struct lacp_softc *lsc = lp->lp_lsc;
+ struct lacpdu *du;
+ int error = 0;
+
+ if (m->m_pkthdr.len != sizeof(*du)) {
+ goto bad;
+ }
+
+ if ((m->m_flags & M_MCAST) == 0) {
+ goto bad;
+ }
+
+ if (m->m_len < sizeof(*du)) {
+ m = m_pullup(m, sizeof(*du));
+ if (m == NULL) {
+ return (ENOMEM);
+ }
+ }
+
+ du = mtod(m, struct lacpdu *);
+
+ if (memcmp(&du->ldu_eh.ether_dhost,
+ &ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN)) {
+ goto bad;
+ }
+
+ /*
+ * ignore the version for compatibility with
+ * the future protocol revisions.
+ */
+#if 0
+ if (du->ldu_sph.sph_version != 1) {
+ goto bad;
+ }
+#endif
+
+ /*
+ * ignore tlv types for compatibility with
+ * the future protocol revisions.
+ */
+ if (tlv_check(du, sizeof(*du), &du->ldu_tlv_actor,
+ lacp_info_tlv_template, FALSE)) {
+ goto bad;
+ }
+
+#if defined(LACP_DEBUG)
+ LACP_DPRINTF((lp, "lacpdu receive\n"));
+ lacp_dump_lacpdu(du);
+#endif /* defined(LACP_DEBUG) */
+
+ LACP_LOCK(lsc);
+ lacp_sm_rx(lp, du);
+ LACP_UNLOCK(lsc);
+
+ m_freem(m);
+ return (error);
+
+bad:
+ m_freem(m);
+ return (EINVAL);
+}
+
+static void
+lacp_fill_actorinfo(struct lacp_port *lp, struct lacp_peerinfo *info)
+{
+ struct lagg_port *lgp = lp->lp_lagg;
+ struct lagg_softc *sc = lgp->lp_softc;
+
+ info->lip_systemid.lsi_prio = htons(LACP_SYSTEM_PRIO);
+ memcpy(&info->lip_systemid.lsi_mac,
+ IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
+ info->lip_portid.lpi_prio = htons(LACP_PORT_PRIO);
+ info->lip_portid.lpi_portno = htons(lp->lp_ifp->if_index);
+ info->lip_state = lp->lp_state;
+}
+
+static void
+lacp_fill_markerinfo(struct lacp_port *lp, struct lacp_markerinfo *info)
+{
+ struct ifnet *ifp = lp->lp_ifp;
+
+ /* Fill in the port index and system id (encoded as the MAC) */
+ info->mi_rq_port = htons(ifp->if_index);
+ memcpy(&info->mi_rq_system, lp->lp_systemid.lsi_mac, ETHER_ADDR_LEN);
+ info->mi_rq_xid = htonl(0);
+}
+
+static int
+lacp_xmit_lacpdu(struct lacp_port *lp)
+{
+ struct lagg_port *lgp = lp->lp_lagg;
+ struct mbuf *m;
+ struct lacpdu *du;
+ int error;
+
+ LACP_LOCK_ASSERT(lp->lp_lsc);
+
+ m = m_gethdr(M_DONTWAIT, MT_DATA);
+ if (m == NULL) {
+ return (ENOMEM);
+ }
+ m->m_len = m->m_pkthdr.len = sizeof(*du);
+
+ du = mtod(m, struct lacpdu *);
+ memset(du, 0, sizeof(*du));
+
+ memcpy(&du->ldu_eh.ether_dhost, ethermulticastaddr_slowprotocols,
+ ETHER_ADDR_LEN);
+ memcpy(&du->ldu_eh.ether_shost, lgp->lp_lladdr, ETHER_ADDR_LEN);
+ du->ldu_eh.ether_type = htons(ETHERTYPE_SLOW);
+
+ du->ldu_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_LACP;
+ du->ldu_sph.sph_version = 1;
+
+ TLV_SET(&du->ldu_tlv_actor, LACP_TYPE_ACTORINFO, sizeof(du->ldu_actor));
+ du->ldu_actor = lp->lp_actor;
+
+ TLV_SET(&du->ldu_tlv_partner, LACP_TYPE_PARTNERINFO,
+ sizeof(du->ldu_partner));
+ du->ldu_partner = lp->lp_partner;
+
+ TLV_SET(&du->ldu_tlv_collector, LACP_TYPE_COLLECTORINFO,
+ sizeof(du->ldu_collector));
+ du->ldu_collector.lci_maxdelay = 0;
+
+#if defined(LACP_DEBUG)
+ LACP_DPRINTF((lp, "lacpdu transmit\n"));
+ lacp_dump_lacpdu(du);
+#endif /* defined(LACP_DEBUG) */
+
+ m->m_flags |= M_MCAST;
+
+ /*
+ * XXX should use higher priority queue.
+ * otherwise network congestion can break aggregation.
+ */
+
+ error = lagg_enqueue(lp->lp_ifp, m);
+ return (error);
+}
+
+static int
+lacp_xmit_marker(struct lacp_port *lp)
+{
+ struct lagg_port *lgp = lp->lp_lagg;
+ struct mbuf *m;
+ struct markerdu *mdu;
+ int error;
+
+ LACP_LOCK_ASSERT(lp->lp_lsc);
+
+ m = m_gethdr(M_DONTWAIT, MT_DATA);
+ if (m == NULL) {
+ return (ENOMEM);
+ }
+ m->m_len = m->m_pkthdr.len = sizeof(*mdu);
+
+ mdu = mtod(m, struct markerdu *);
+ memset(mdu, 0, sizeof(*mdu));
+
+ memcpy(&mdu->mdu_eh.ether_dhost, ethermulticastaddr_slowprotocols,
+ ETHER_ADDR_LEN);
+ memcpy(&mdu->mdu_eh.ether_shost, lgp->lp_lladdr, ETHER_ADDR_LEN);
+ mdu->mdu_eh.ether_type = htons(ETHERTYPE_SLOW);
+
+ mdu->mdu_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_MARKER;
+ mdu->mdu_sph.sph_version = 1;
+
+ /* Bump the transaction id and copy over the marker info */
+ lp->lp_marker.mi_rq_xid = htonl(ntohl(lp->lp_marker.mi_rq_xid) + 1);
+ TLV_SET(&mdu->mdu_tlv, MARKER_TYPE_INFO, sizeof(mdu->mdu_info));
+ mdu->mdu_info = lp->lp_marker;
+
+ LACP_DPRINTF((lp, "marker transmit, port=%u, sys=%6D, id=%u\n",
+ ntohs(mdu->mdu_info.mi_rq_port), mdu->mdu_info.mi_rq_system, ":",
+ ntohl(mdu->mdu_info.mi_rq_xid)));
+
+ m->m_flags |= M_MCAST;
+ error = lagg_enqueue(lp->lp_ifp, m);
+ return (error);
+}
+
+void
+lacp_linkstate(struct lagg_port *lgp)
+{
+ struct lacp_port *lp = LACP_PORT(lgp);
+ struct lacp_softc *lsc = lp->lp_lsc;
+ struct ifnet *ifp = lgp->lp_ifp;
+ struct ifmediareq ifmr;
+ int error = 0;
+ u_int media;
+ uint8_t old_state;
+ uint16_t old_key;
+
+ bzero((char *)&ifmr, sizeof(ifmr));
+ error = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (caddr_t)&ifmr);
+ if (error != 0)
+ return;
+
+ LACP_LOCK(lsc);
+ media = ifmr.ifm_active;
+ LACP_DPRINTF((lp, "media changed 0x%x -> 0x%x, ether = %d, fdx = %d, "
+ "link = %d\n", lp->lp_media, media, IFM_TYPE(media) == IFM_ETHER,
+ (media & IFM_FDX) != 0, ifp->if_link_state == LINK_STATE_UP));
+ old_state = lp->lp_state;
+ old_key = lp->lp_key;
+
+ lp->lp_media = media;
+ /*
+ * If the port is not an active full duplex Ethernet link then it can
+ * not be aggregated.
+ */
+ if (IFM_TYPE(media) != IFM_ETHER || (media & IFM_FDX) == 0 ||
+ ifp->if_link_state != LINK_STATE_UP) {
+ lacp_port_disable(lp);
+ } else {
+ lacp_port_enable(lp);
+ }
+ lp->lp_key = lacp_compose_key(lp);
+
+ if (old_state != lp->lp_state || old_key != lp->lp_key) {
+ LACP_DPRINTF((lp, "-> UNSELECTED\n"));
+ lp->lp_selected = LACP_UNSELECTED;
+ }
+ LACP_UNLOCK(lsc);
+}
+
+static void
+lacp_tick(void *arg)
+{
+ struct lacp_softc *lsc = arg;
+ struct lacp_port *lp;
+
+ LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) {
+ if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0)
+ continue;
+
+ lacp_run_timers(lp);
+
+ lacp_select(lp);
+ lacp_sm_mux(lp);
+ lacp_sm_tx(lp);
+ lacp_sm_ptx_tx_schedule(lp);
+ }
+ callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc);
+}
+
+int
+lacp_port_create(struct lagg_port *lgp)
+{
+ struct lagg_softc *sc = lgp->lp_softc;
+ struct lacp_softc *lsc = LACP_SOFTC(sc);
+ struct lacp_port *lp;
+ struct ifnet *ifp = lgp->lp_ifp;
+ struct sockaddr_dl sdl;
+ struct ifmultiaddr *rifma = NULL;
+ int error;
+
+ boolean_t active = TRUE; /* XXX should be configurable */
+ boolean_t fast = FALSE; /* XXX should be configurable */
+
+ bzero((char *)&sdl, sizeof(sdl));
+ sdl.sdl_len = sizeof(sdl);
+ sdl.sdl_family = AF_LINK;
+ sdl.sdl_index = ifp->if_index;
+ sdl.sdl_type = IFT_ETHER;
+ sdl.sdl_alen = ETHER_ADDR_LEN;
+
+ bcopy(&ethermulticastaddr_slowprotocols,
+ LLADDR(&sdl), ETHER_ADDR_LEN);
+ error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma);
+ if (error) {
+ printf("%s: ADDMULTI failed on %s\n", __func__, lgp->lp_ifname);
+ return (error);
+ }
+
+ lp = malloc(sizeof(struct lacp_port),
+ M_DEVBUF, M_NOWAIT|M_ZERO);
+ if (lp == NULL)
+ return (ENOMEM);
+
+ LACP_LOCK(lsc);
+ lgp->lp_psc = (caddr_t)lp;
+ lp->lp_ifp = ifp;
+ lp->lp_lagg = lgp;
+ lp->lp_lsc = lsc;
+ lp->lp_ifma = rifma;
+
+ LIST_INSERT_HEAD(&lsc->lsc_ports, lp, lp_next);
+
+ lacp_fill_actorinfo(lp, &lp->lp_actor);
+ lacp_fill_markerinfo(lp, &lp->lp_marker);
+ lp->lp_state =
+ (active ? LACP_STATE_ACTIVITY : 0) |
+ (fast ? LACP_STATE_TIMEOUT : 0);
+ lp->lp_aggregator = NULL;
+ lacp_sm_rx_set_expired(lp);
+ LACP_UNLOCK(lsc);
+ lacp_linkstate(lgp);
+
+ return (0);
+}
+
+void
+lacp_port_destroy(struct lagg_port *lgp)
+{
+ struct lacp_port *lp = LACP_PORT(lgp);
+ struct lacp_softc *lsc = lp->lp_lsc;
+ int i;
+
+ LACP_LOCK(lsc);
+ for (i = 0; i < LACP_NTIMER; i++) {
+ LACP_TIMER_DISARM(lp, i);
+ }
+
+ lacp_disable_collecting(lp);
+ lacp_disable_distributing(lp);
+ lacp_unselect(lp);
+
+ /* The address may have already been removed by if_purgemaddrs() */
+ if (!lgp->lp_detaching)
+ if_delmulti_ifma(lp->lp_ifma);
+
+ LIST_REMOVE(lp, lp_next);
+ LACP_UNLOCK(lsc);
+ free(lp, M_DEVBUF);
+}
+
+void
+lacp_req(struct lagg_softc *sc, caddr_t data)
+{
+ struct lacp_opreq *req = (struct lacp_opreq *)data;
+ struct lacp_softc *lsc = LACP_SOFTC(sc);
+ struct lacp_aggregator *la = lsc->lsc_active_aggregator;
+
+ LACP_LOCK(lsc);
+ bzero(req, sizeof(struct lacp_opreq));
+ if (la != NULL) {
+ req->actor_prio = ntohs(la->la_actor.lip_systemid.lsi_prio);
+ memcpy(&req->actor_mac, &la->la_actor.lip_systemid.lsi_mac,
+ ETHER_ADDR_LEN);
+ req->actor_key = ntohs(la->la_actor.lip_key);
+ req->actor_portprio = ntohs(la->la_actor.lip_portid.lpi_prio);
+ req->actor_portno = ntohs(la->la_actor.lip_portid.lpi_portno);
+ req->actor_state = la->la_actor.lip_state;
+
+ req->partner_prio = ntohs(la->la_partner.lip_systemid.lsi_prio);
+ memcpy(&req->partner_mac, &la->la_partner.lip_systemid.lsi_mac,
+ ETHER_ADDR_LEN);
+ req->partner_key = ntohs(la->la_partner.lip_key);
+ req->partner_portprio = ntohs(la->la_partner.lip_portid.lpi_prio);
+ req->partner_portno = ntohs(la->la_partner.lip_portid.lpi_portno);
+ req->partner_state = la->la_partner.lip_state;
+ }
+ LACP_UNLOCK(lsc);
+}
+
+void
+lacp_portreq(struct lagg_port *lgp, caddr_t data)
+{
+ struct lacp_opreq *req = (struct lacp_opreq *)data;
+ struct lacp_port *lp = LACP_PORT(lgp);
+ struct lacp_softc *lsc = lp->lp_lsc;
+
+ LACP_LOCK(lsc);
+ req->actor_prio = ntohs(lp->lp_actor.lip_systemid.lsi_prio);
+ memcpy(&req->actor_mac, &lp->lp_actor.lip_systemid.lsi_mac,
+ ETHER_ADDR_LEN);
+ req->actor_key = ntohs(lp->lp_actor.lip_key);
+ req->actor_portprio = ntohs(lp->lp_actor.lip_portid.lpi_prio);
+ req->actor_portno = ntohs(lp->lp_actor.lip_portid.lpi_portno);
+ req->actor_state = lp->lp_actor.lip_state;
+
+ req->partner_prio = ntohs(lp->lp_partner.lip_systemid.lsi_prio);
+ memcpy(&req->partner_mac, &lp->lp_partner.lip_systemid.lsi_mac,
+ ETHER_ADDR_LEN);
+ req->partner_key = ntohs(lp->lp_partner.lip_key);
+ req->partner_portprio = ntohs(lp->lp_partner.lip_portid.lpi_prio);
+ req->partner_portno = ntohs(lp->lp_partner.lip_portid.lpi_portno);
+ req->partner_state = lp->lp_partner.lip_state;
+ LACP_UNLOCK(lsc);
+}
+
+static void
+lacp_disable_collecting(struct lacp_port *lp)
+{
+ LACP_DPRINTF((lp, "collecting disabled\n"));
+ lp->lp_state &= ~LACP_STATE_COLLECTING;
+}
+
+static void
+lacp_enable_collecting(struct lacp_port *lp)
+{
+ LACP_DPRINTF((lp, "collecting enabled\n"));
+ lp->lp_state |= LACP_STATE_COLLECTING;
+}
+
+static void
+lacp_disable_distributing(struct lacp_port *lp)
+{
+ struct lacp_aggregator *la = lp->lp_aggregator;
+ struct lacp_softc *lsc = lp->lp_lsc;
+#if defined(LACP_DEBUG)
+ char buf[LACP_LAGIDSTR_MAX+1];
+#endif /* defined(LACP_DEBUG) */
+
+ LACP_LOCK_ASSERT(lsc);
+
+ if (la == NULL || (lp->lp_state & LACP_STATE_DISTRIBUTING) == 0) {
+ return;
+ }
+
+ KASSERT(!TAILQ_EMPTY(&la->la_ports), ("no aggregator ports"));
+ KASSERT(la->la_nports > 0, ("nports invalid (%d)", la->la_nports));
+ KASSERT(la->la_refcnt >= la->la_nports, ("aggregator refcnt invalid"));
+
+ LACP_DPRINTF((lp, "disable distributing on aggregator %s, "
+ "nports %d -> %d\n",
+ lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
+ la->la_nports, la->la_nports - 1));
+
+ TAILQ_REMOVE(&la->la_ports, lp, lp_dist_q);
+ la->la_nports--;
+
+ if (lsc->lsc_active_aggregator == la) {
+ lacp_suppress_distributing(lsc, la);
+ lacp_select_active_aggregator(lsc);
+ /* regenerate the port map, the active aggregator has changed */
+ lacp_update_portmap(lsc);
+ }
+
+ lp->lp_state &= ~LACP_STATE_DISTRIBUTING;
+}
+
+static void
+lacp_enable_distributing(struct lacp_port *lp)
+{
+ struct lacp_aggregator *la = lp->lp_aggregator;
+ struct lacp_softc *lsc = lp->lp_lsc;
+#if defined(LACP_DEBUG)
+ char buf[LACP_LAGIDSTR_MAX+1];
+#endif /* defined(LACP_DEBUG) */
+
+ LACP_LOCK_ASSERT(lsc);
+
+ if ((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0) {
+ return;
+ }
+
+ LACP_DPRINTF((lp, "enable distributing on aggregator %s, "
+ "nports %d -> %d\n",
+ lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
+ la->la_nports, la->la_nports + 1));
+
+ KASSERT(la->la_refcnt > la->la_nports, ("aggregator refcnt invalid"));
+ TAILQ_INSERT_HEAD(&la->la_ports, lp, lp_dist_q);
+ la->la_nports++;
+
+ lp->lp_state |= LACP_STATE_DISTRIBUTING;
+
+ if (lsc->lsc_active_aggregator == la) {
+ lacp_suppress_distributing(lsc, la);
+ lacp_update_portmap(lsc);
+ } else
+ /* try to become the active aggregator */
+ lacp_select_active_aggregator(lsc);
+}
+
+static void
+lacp_transit_expire(void *vp)
+{
+ struct lacp_softc *lsc = vp;
+
+ LACP_LOCK_ASSERT(lsc);
+
+ LACP_DPRINTF((NULL, "%s\n", __func__));
+ lsc->lsc_suppress_distributing = FALSE;
+}
+
+int
+lacp_attach(struct lagg_softc *sc)
+{
+ struct lacp_softc *lsc;
+
+ lsc = malloc(sizeof(struct lacp_softc),
+ M_DEVBUF, M_NOWAIT|M_ZERO);
+ if (lsc == NULL)
+ return (ENOMEM);
+
+ sc->sc_psc = (caddr_t)lsc;
+ lsc->lsc_softc = sc;
+
+ lsc->lsc_hashkey = arc4random();
+ lsc->lsc_active_aggregator = NULL;
+ LACP_LOCK_INIT(lsc);
+ TAILQ_INIT(&lsc->lsc_aggregators);
+ LIST_INIT(&lsc->lsc_ports);
+
+ callout_init_mtx(&lsc->lsc_transit_callout, &lsc->lsc_mtx, 0);
+ callout_init_mtx(&lsc->lsc_callout, &lsc->lsc_mtx, 0);
+
+ /* if the lagg is already up then do the same */
+ if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
+ lacp_init(sc);
+
+ return (0);
+}
+
+int
+lacp_detach(struct lagg_softc *sc)
+{
+ struct lacp_softc *lsc = LACP_SOFTC(sc);
+
+ KASSERT(TAILQ_EMPTY(&lsc->lsc_aggregators),
+ ("aggregators still active"));
+ KASSERT(lsc->lsc_active_aggregator == NULL,
+ ("aggregator still attached"));
+
+ sc->sc_psc = NULL;
+ callout_drain(&lsc->lsc_transit_callout);
+ callout_drain(&lsc->lsc_callout);
+
+ LACP_LOCK_DESTROY(lsc);
+ free(lsc, M_DEVBUF);
+ return (0);
+}
+
+void
+lacp_init(struct lagg_softc *sc)
+{
+ struct lacp_softc *lsc = LACP_SOFTC(sc);
+
+ LACP_LOCK(lsc);
+ callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc);
+ LACP_UNLOCK(lsc);
+}
+
+void
+lacp_stop(struct lagg_softc *sc)
+{
+ struct lacp_softc *lsc = LACP_SOFTC(sc);
+
+ LACP_LOCK(lsc);
+ callout_stop(&lsc->lsc_transit_callout);
+ callout_stop(&lsc->lsc_callout);
+ LACP_UNLOCK(lsc);
+}
+
+struct lagg_port *
+lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
+{
+ struct lacp_softc *lsc = LACP_SOFTC(sc);
+ struct lacp_portmap *pm;
+ struct lacp_port *lp;
+ uint32_t hash;
+
+ if (__predict_false(lsc->lsc_suppress_distributing)) {
+ LACP_DPRINTF((NULL, "%s: waiting transit\n", __func__));
+ return (NULL);
+ }
+
+ pm = &lsc->lsc_pmap[lsc->lsc_activemap];
+ if (pm->pm_count == 0) {
+ LACP_DPRINTF((NULL, "%s: no active aggregator\n", __func__));
+ return (NULL);
+ }
+
+ if (m->m_flags & M_FLOWID)
+ hash = m->m_pkthdr.flowid;
+ else
+ hash = lagg_hashmbuf(m, lsc->lsc_hashkey);
+ hash %= pm->pm_count;
+ lp = pm->pm_map[hash];
+
+ KASSERT((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0,
+ ("aggregated port is not distributing"));
+
+ return (lp->lp_lagg);
+}
+/*
+ * lacp_suppress_distributing: drop transmit packets for a while
+ * to preserve packet ordering.
+ */
+
+static void
+lacp_suppress_distributing(struct lacp_softc *lsc, struct lacp_aggregator *la)
+{
+ struct lacp_port *lp;
+
+ if (lsc->lsc_active_aggregator != la) {
+ return;
+ }
+
+ LACP_DPRINTF((NULL, "%s\n", __func__));
+ lsc->lsc_suppress_distributing = TRUE;
+
+ /* send a marker frame down each port to verify the queues are empty */
+ LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) {
+ lp->lp_flags |= LACP_PORT_MARK;
+ lacp_xmit_marker(lp);
+ }
+
+ /* set a timeout for the marker frames */
+ callout_reset(&lsc->lsc_transit_callout,
+ LACP_TRANSIT_DELAY * hz / 1000, lacp_transit_expire, lsc);
+}
+
+static int
+lacp_compare_peerinfo(const struct lacp_peerinfo *a,
+ const struct lacp_peerinfo *b)
+{
+ return (memcmp(a, b, offsetof(struct lacp_peerinfo, lip_state)));
+}
+
+static int
+lacp_compare_systemid(const struct lacp_systemid *a,
+ const struct lacp_systemid *b)
+{
+ return (memcmp(a, b, sizeof(*a)));
+}
+
+#if 0 /* unused */
+static int
+lacp_compare_portid(const struct lacp_portid *a,
+ const struct lacp_portid *b)
+{
+ return (memcmp(a, b, sizeof(*a)));
+}
+#endif
+
+static uint64_t
+lacp_aggregator_bandwidth(struct lacp_aggregator *la)
+{
+ struct lacp_port *lp;
+ uint64_t speed;
+
+ lp = TAILQ_FIRST(&la->la_ports);
+ if (lp == NULL) {
+ return (0);
+ }
+
+ speed = ifmedia_baudrate(lp->lp_media);
+ speed *= la->la_nports;
+ if (speed == 0) {
+ LACP_DPRINTF((lp, "speed 0? media=0x%x nports=%d\n",
+ lp->lp_media, la->la_nports));
+ }
+
+ return (speed);
+}
+
+/*
+ * lacp_select_active_aggregator: select an aggregator to be used to transmit
+ * packets from lagg(4) interface.
+ */
+
+static void
+lacp_select_active_aggregator(struct lacp_softc *lsc)
+{
+ struct lagg_softc *sc = lsc->lsc_softc;
+ struct lacp_aggregator *la;
+ struct lacp_aggregator *best_la = NULL;
+ uint64_t best_speed = 0;
+#if defined(LACP_DEBUG)
+ char buf[LACP_LAGIDSTR_MAX+1];
+#endif /* defined(LACP_DEBUG) */
+
+ LACP_DPRINTF((NULL, "%s:\n", __func__));
+
+ TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) {
+ uint64_t speed;
+
+ if (la->la_nports == 0) {
+ continue;
+ }
+
+ speed = lacp_aggregator_bandwidth(la);
+ LACP_DPRINTF((NULL, "%s, speed=%jd, nports=%d\n",
+ lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
+ speed, la->la_nports));
+
+ /* This aggregator is chosen if
+ * the partner has a better system priority
+ * or, the total aggregated speed is higher
+ * or, it is already the chosen aggregator
+ */
+ if ((best_la != NULL && LACP_SYS_PRI(la->la_partner) <
+ LACP_SYS_PRI(best_la->la_partner)) ||
+ speed > best_speed ||
+ (speed == best_speed &&
+ la == lsc->lsc_active_aggregator)) {
+ best_la = la;
+ best_speed = speed;
+ }
+ }
+
+ KASSERT(best_la == NULL || best_la->la_nports > 0,
+ ("invalid aggregator refcnt"));
+ KASSERT(best_la == NULL || !TAILQ_EMPTY(&best_la->la_ports),
+ ("invalid aggregator list"));
+
+#if defined(LACP_DEBUG)
+ if (lsc->lsc_active_aggregator != best_la) {
+ LACP_DPRINTF((NULL, "active aggregator changed\n"));
+ LACP_DPRINTF((NULL, "old %s\n",
+ lacp_format_lagid_aggregator(lsc->lsc_active_aggregator,
+ buf, sizeof(buf))));
+ } else {
+ LACP_DPRINTF((NULL, "active aggregator not changed\n"));
+ }
+ LACP_DPRINTF((NULL, "new %s\n",
+ lacp_format_lagid_aggregator(best_la, buf, sizeof(buf))));
+#endif /* defined(LACP_DEBUG) */
+
+ if (lsc->lsc_active_aggregator != best_la) {
+ sc->sc_ifp->if_baudrate = best_speed;
+ lsc->lsc_active_aggregator = best_la;
+ lacp_update_portmap(lsc);
+ if (best_la) {
+ lacp_suppress_distributing(lsc, best_la);
+ }
+ }
+}
+
+/*
+ * Updated the inactive portmap array with the new list of ports and
+ * make it live.
+ */
+static void
+lacp_update_portmap(struct lacp_softc *lsc)
+{
+ struct lacp_aggregator *la;
+ struct lacp_portmap *p;
+ struct lacp_port *lp;
+ u_int newmap;
+ int i;
+
+ newmap = lsc->lsc_activemap == 0 ? 1 : 0;
+ p = &lsc->lsc_pmap[newmap];
+ la = lsc->lsc_active_aggregator;
+ bzero(p, sizeof(struct lacp_portmap));
+
+ if (la != NULL && la->la_nports > 0) {
+ p->pm_count = la->la_nports;
+ i = 0;
+ TAILQ_FOREACH(lp, &la->la_ports, lp_dist_q)
+ p->pm_map[i++] = lp;
+ KASSERT(i == p->pm_count, ("Invalid port count"));
+ }
+
+ /* switch the active portmap over */
+ atomic_store_rel_int(&lsc->lsc_activemap, newmap);
+ LACP_DPRINTF((NULL, "Set table %d with %d ports\n",
+ lsc->lsc_activemap,
+ lsc->lsc_pmap[lsc->lsc_activemap].pm_count));
+}
+
+static uint16_t
+lacp_compose_key(struct lacp_port *lp)
+{
+ struct lagg_port *lgp = lp->lp_lagg;
+ struct lagg_softc *sc = lgp->lp_softc;
+ u_int media = lp->lp_media;
+ uint16_t key;
+
+ if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0) {
+
+ /*
+ * non-aggregatable links should have unique keys.
+ *
+ * XXX this isn't really unique as if_index is 16 bit.
+ */
+
+ /* bit 0..14: (some bits of) if_index of this port */
+ key = lp->lp_ifp->if_index;
+ /* bit 15: 1 */
+ key |= 0x8000;
+ } else {
+ u_int subtype = IFM_SUBTYPE(media);
+
+ KASSERT(IFM_TYPE(media) == IFM_ETHER, ("invalid media type"));
+ KASSERT((media & IFM_FDX) != 0, ("aggregating HDX interface"));
+
+ /* bit 0..4: IFM_SUBTYPE */
+ key = subtype;
+ /* bit 5..14: (some bits of) if_index of lagg device */
+ key |= 0x7fe0 & ((sc->sc_ifp->if_index) << 5);
+ /* bit 15: 0 */
+ }
+ return (htons(key));
+}
+
+static void
+lacp_aggregator_addref(struct lacp_softc *lsc, struct lacp_aggregator *la)
+{
+#if defined(LACP_DEBUG)
+ char buf[LACP_LAGIDSTR_MAX+1];
+#endif
+
+ LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n",
+ __func__,
+ lacp_format_lagid(&la->la_actor, &la->la_partner,
+ buf, sizeof(buf)),
+ la->la_refcnt, la->la_refcnt + 1));
+
+ KASSERT(la->la_refcnt > 0, ("refcount <= 0"));
+ la->la_refcnt++;
+ KASSERT(la->la_refcnt > la->la_nports, ("invalid refcount"));
+}
+
+static void
+lacp_aggregator_delref(struct lacp_softc *lsc, struct lacp_aggregator *la)
+{
+#if defined(LACP_DEBUG)
+ char buf[LACP_LAGIDSTR_MAX+1];
+#endif
+
+ LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n",
+ __func__,
+ lacp_format_lagid(&la->la_actor, &la->la_partner,
+ buf, sizeof(buf)),
+ la->la_refcnt, la->la_refcnt - 1));
+
+ KASSERT(la->la_refcnt > la->la_nports, ("invalid refcnt"));
+ la->la_refcnt--;
+ if (la->la_refcnt > 0) {
+ return;
+ }
+
+ KASSERT(la->la_refcnt == 0, ("refcount not zero"));
+ KASSERT(lsc->lsc_active_aggregator != la, ("aggregator active"));
+
+ TAILQ_REMOVE(&lsc->lsc_aggregators, la, la_q);
+
+ free(la, M_DEVBUF);
+}
+
+/*
+ * lacp_aggregator_get: allocate an aggregator.
+ */
+
+static struct lacp_aggregator *
+lacp_aggregator_get(struct lacp_softc *lsc, struct lacp_port *lp)
+{
+ struct lacp_aggregator *la;
+
+ la = malloc(sizeof(*la), M_DEVBUF, M_NOWAIT);
+ if (la) {
+ la->la_refcnt = 1;
+ la->la_nports = 0;
+ TAILQ_INIT(&la->la_ports);
+ la->la_pending = 0;
+ TAILQ_INSERT_TAIL(&lsc->lsc_aggregators, la, la_q);
+ }
+
+ return (la);
+}
+
+/*
+ * lacp_fill_aggregator_id: setup a newly allocated aggregator from a port.
+ */
+
+static void
+lacp_fill_aggregator_id(struct lacp_aggregator *la, const struct lacp_port *lp)
+{
+ lacp_fill_aggregator_id_peer(&la->la_partner, &lp->lp_partner);
+ lacp_fill_aggregator_id_peer(&la->la_actor, &lp->lp_actor);
+
+ la->la_actor.lip_state = lp->lp_state & LACP_STATE_AGGREGATION;
+}
+
+static void
+lacp_fill_aggregator_id_peer(struct lacp_peerinfo *lpi_aggr,
+ const struct lacp_peerinfo *lpi_port)
+{
+ memset(lpi_aggr, 0, sizeof(*lpi_aggr));
+ lpi_aggr->lip_systemid = lpi_port->lip_systemid;
+ lpi_aggr->lip_key = lpi_port->lip_key;
+}
+
+/*
+ * lacp_aggregator_is_compatible: check if a port can join to an aggregator.
+ */
+
+static int
+lacp_aggregator_is_compatible(const struct lacp_aggregator *la,
+ const struct lacp_port *lp)
+{
+ if (!(lp->lp_state & LACP_STATE_AGGREGATION) ||
+ !(lp->lp_partner.lip_state & LACP_STATE_AGGREGATION)) {
+ return (0);
+ }
+
+ if (!(la->la_actor.lip_state & LACP_STATE_AGGREGATION)) {
+ return (0);
+ }
+
+ if (!lacp_peerinfo_is_compatible(&la->la_partner, &lp->lp_partner)) {
+ return (0);
+ }
+
+ if (!lacp_peerinfo_is_compatible(&la->la_actor, &lp->lp_actor)) {
+ return (0);
+ }
+
+ return (1);
+}
+
+static int
+lacp_peerinfo_is_compatible(const struct lacp_peerinfo *a,
+ const struct lacp_peerinfo *b)
+{
+ if (memcmp(&a->lip_systemid, &b->lip_systemid,
+ sizeof(a->lip_systemid))) {
+ return (0);
+ }
+
+ if (memcmp(&a->lip_key, &b->lip_key, sizeof(a->lip_key))) {
+ return (0);
+ }
+
+ return (1);
+}
+
+static void
+lacp_port_enable(struct lacp_port *lp)
+{
+ lp->lp_state |= LACP_STATE_AGGREGATION;
+}
+
+static void
+lacp_port_disable(struct lacp_port *lp)
+{
+ lacp_set_mux(lp, LACP_MUX_DETACHED);
+
+ lp->lp_state &= ~LACP_STATE_AGGREGATION;
+ lp->lp_selected = LACP_UNSELECTED;
+ lacp_sm_rx_record_default(lp);
+ lp->lp_partner.lip_state &= ~LACP_STATE_AGGREGATION;
+ lp->lp_state &= ~LACP_STATE_EXPIRED;
+}
+
+/*
+ * lacp_select: select an aggregator. create one if necessary.
+ */
+static void
+lacp_select(struct lacp_port *lp)
+{
+ struct lacp_softc *lsc = lp->lp_lsc;
+ struct lacp_aggregator *la;
+#if defined(LACP_DEBUG)
+ char buf[LACP_LAGIDSTR_MAX+1];
+#endif
+
+ if (lp->lp_aggregator) {
+ return;
+ }
+
+ KASSERT(!LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE),
+ ("timer_wait_while still active"));
+
+ LACP_DPRINTF((lp, "port lagid=%s\n",
+ lacp_format_lagid(&lp->lp_actor, &lp->lp_partner,
+ buf, sizeof(buf))));
+
+ TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) {
+ if (lacp_aggregator_is_compatible(la, lp)) {
+ break;
+ }
+ }
+
+ if (la == NULL) {
+ la = lacp_aggregator_get(lsc, lp);
+ if (la == NULL) {
+ LACP_DPRINTF((lp, "aggregator creation failed\n"));
+
+ /*
+ * will retry on the next tick.
+ */
+
+ return;
+ }
+ lacp_fill_aggregator_id(la, lp);
+ LACP_DPRINTF((lp, "aggregator created\n"));
+ } else {
+ LACP_DPRINTF((lp, "compatible aggregator found\n"));
+ if (la->la_refcnt == LACP_MAX_PORTS)
+ return;
+ lacp_aggregator_addref(lsc, la);
+ }
+
+ LACP_DPRINTF((lp, "aggregator lagid=%s\n",
+ lacp_format_lagid(&la->la_actor, &la->la_partner,
+ buf, sizeof(buf))));
+
+ lp->lp_aggregator = la;
+ lp->lp_selected = LACP_SELECTED;
+}
+
+/*
+ * lacp_unselect: finish unselect/detach process.
+ */
+
+static void
+lacp_unselect(struct lacp_port *lp)
+{
+ struct lacp_softc *lsc = lp->lp_lsc;
+ struct lacp_aggregator *la = lp->lp_aggregator;
+
+ KASSERT(!LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE),
+ ("timer_wait_while still active"));
+
+ if (la == NULL) {
+ return;
+ }
+
+ lp->lp_aggregator = NULL;
+ lacp_aggregator_delref(lsc, la);
+}
+
+/* mux machine */
+
+static void
+lacp_sm_mux(struct lacp_port *lp)
+{
+ enum lacp_mux_state new_state;
+ boolean_t p_sync =
+ (lp->lp_partner.lip_state & LACP_STATE_SYNC) != 0;
+ boolean_t p_collecting =
+ (lp->lp_partner.lip_state & LACP_STATE_COLLECTING) != 0;
+ enum lacp_selected selected = lp->lp_selected;
+ struct lacp_aggregator *la;
+
+ /* LACP_DPRINTF((lp, "%s: state %d\n", __func__, lp->lp_mux_state)); */
+
+re_eval:
+ la = lp->lp_aggregator;
+ KASSERT(lp->lp_mux_state == LACP_MUX_DETACHED || la != NULL,
+ ("MUX not detached"));
+ new_state = lp->lp_mux_state;
+ switch (lp->lp_mux_state) {
+ case LACP_MUX_DETACHED:
+ if (selected != LACP_UNSELECTED) {
+ new_state = LACP_MUX_WAITING;
+ }
+ break;
+ case LACP_MUX_WAITING:
+ KASSERT(la->la_pending > 0 ||
+ !LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE),
+ ("timer_wait_while still active"));
+ if (selected == LACP_SELECTED && la->la_pending == 0) {
+ new_state = LACP_MUX_ATTACHED;
+ } else if (selected == LACP_UNSELECTED) {
+ new_state = LACP_MUX_DETACHED;
+ }
+ break;
+ case LACP_MUX_ATTACHED:
+ if (selected == LACP_SELECTED && p_sync) {
+ new_state = LACP_MUX_COLLECTING;
+ } else if (selected != LACP_SELECTED) {
+ new_state = LACP_MUX_DETACHED;
+ }
+ break;
+ case LACP_MUX_COLLECTING:
+ if (selected == LACP_SELECTED && p_sync && p_collecting) {
+ new_state = LACP_MUX_DISTRIBUTING;
+ } else if (selected != LACP_SELECTED || !p_sync) {
+ new_state = LACP_MUX_ATTACHED;
+ }
+ break;
+ case LACP_MUX_DISTRIBUTING:
+ if (selected != LACP_SELECTED || !p_sync || !p_collecting) {
+ new_state = LACP_MUX_COLLECTING;
+ }
+ break;
+ default:
+ panic("%s: unknown state", __func__);
+ }
+
+ if (lp->lp_mux_state == new_state) {
+ return;
+ }
+
+ lacp_set_mux(lp, new_state);
+ goto re_eval;
+}
+
+static void
+lacp_set_mux(struct lacp_port *lp, enum lacp_mux_state new_state)
+{
+ struct lacp_aggregator *la = lp->lp_aggregator;
+
+ if (lp->lp_mux_state == new_state) {
+ return;
+ }
+
+ switch (new_state) {
+ case LACP_MUX_DETACHED:
+ lp->lp_state &= ~LACP_STATE_SYNC;
+ lacp_disable_distributing(lp);
+ lacp_disable_collecting(lp);
+ lacp_sm_assert_ntt(lp);
+ /* cancel timer */
+ if (LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE)) {
+ KASSERT(la->la_pending > 0,
+ ("timer_wait_while not active"));
+ la->la_pending--;
+ }
+ LACP_TIMER_DISARM(lp, LACP_TIMER_WAIT_WHILE);
+ lacp_unselect(lp);
+ break;
+ case LACP_MUX_WAITING:
+ LACP_TIMER_ARM(lp, LACP_TIMER_WAIT_WHILE,
+ LACP_AGGREGATE_WAIT_TIME);
+ la->la_pending++;
+ break;
+ case LACP_MUX_ATTACHED:
+ lp->lp_state |= LACP_STATE_SYNC;
+ lacp_disable_collecting(lp);
+ lacp_sm_assert_ntt(lp);
+ break;
+ case LACP_MUX_COLLECTING:
+ lacp_enable_collecting(lp);
+ lacp_disable_distributing(lp);
+ lacp_sm_assert_ntt(lp);
+ break;
+ case LACP_MUX_DISTRIBUTING:
+ lacp_enable_distributing(lp);
+ break;
+ default:
+ panic("%s: unknown state", __func__);
+ }
+
+ LACP_DPRINTF((lp, "mux_state %d -> %d\n", lp->lp_mux_state, new_state));
+
+ lp->lp_mux_state = new_state;
+}
+
+static void
+lacp_sm_mux_timer(struct lacp_port *lp)
+{
+ struct lacp_aggregator *la = lp->lp_aggregator;
+#if defined(LACP_DEBUG)
+ char buf[LACP_LAGIDSTR_MAX+1];
+#endif
+
+ KASSERT(la->la_pending > 0, ("no pending event"));
+
+ LACP_DPRINTF((lp, "%s: aggregator %s, pending %d -> %d\n", __func__,
+ lacp_format_lagid(&la->la_actor, &la->la_partner,
+ buf, sizeof(buf)),
+ la->la_pending, la->la_pending - 1));
+
+ la->la_pending--;
+}
+
+/* periodic transmit machine */
+
+static void
+lacp_sm_ptx_update_timeout(struct lacp_port *lp, uint8_t oldpstate)
+{
+ if (LACP_STATE_EQ(oldpstate, lp->lp_partner.lip_state,
+ LACP_STATE_TIMEOUT)) {
+ return;
+ }
+
+ LACP_DPRINTF((lp, "partner timeout changed\n"));
+
+ /*
+ * FAST_PERIODIC -> SLOW_PERIODIC
+ * or
+ * SLOW_PERIODIC (-> PERIODIC_TX) -> FAST_PERIODIC
+ *
+ * let lacp_sm_ptx_tx_schedule to update timeout.
+ */
+
+ LACP_TIMER_DISARM(lp, LACP_TIMER_PERIODIC);
+
+ /*
+ * if timeout has been shortened, assert NTT.
+ */
+
+ if ((lp->lp_partner.lip_state & LACP_STATE_TIMEOUT)) {
+ lacp_sm_assert_ntt(lp);
+ }
+}
+
+static void
+lacp_sm_ptx_tx_schedule(struct lacp_port *lp)
+{
+ int timeout;
+
+ if (!(lp->lp_state & LACP_STATE_ACTIVITY) &&
+ !(lp->lp_partner.lip_state & LACP_STATE_ACTIVITY)) {
+
+ /*
+ * NO_PERIODIC
+ */
+
+ LACP_TIMER_DISARM(lp, LACP_TIMER_PERIODIC);
+ return;
+ }
+
+ if (LACP_TIMER_ISARMED(lp, LACP_TIMER_PERIODIC)) {
+ return;
+ }
+
+ timeout = (lp->lp_partner.lip_state & LACP_STATE_TIMEOUT) ?
+ LACP_FAST_PERIODIC_TIME : LACP_SLOW_PERIODIC_TIME;
+
+ LACP_TIMER_ARM(lp, LACP_TIMER_PERIODIC, timeout);
+}
+
+static void
+lacp_sm_ptx_timer(struct lacp_port *lp)
+{
+ lacp_sm_assert_ntt(lp);
+}
+
+static void
+lacp_sm_rx(struct lacp_port *lp, const struct lacpdu *du)
+{
+ int timeout;
+
+ /*
+ * check LACP_DISABLED first
+ */
+
+ if (!(lp->lp_state & LACP_STATE_AGGREGATION)) {
+ return;
+ }
+
+ /*
+ * check loopback condition.
+ */
+
+ if (!lacp_compare_systemid(&du->ldu_actor.lip_systemid,
+ &lp->lp_actor.lip_systemid)) {
+ return;
+ }
+
+ /*
+ * EXPIRED, DEFAULTED, CURRENT -> CURRENT
+ */
+
+ lacp_sm_rx_update_selected(lp, du);
+ lacp_sm_rx_update_ntt(lp, du);
+ lacp_sm_rx_record_pdu(lp, du);
+
+ timeout = (lp->lp_state & LACP_STATE_TIMEOUT) ?
+ LACP_SHORT_TIMEOUT_TIME : LACP_LONG_TIMEOUT_TIME;
+ LACP_TIMER_ARM(lp, LACP_TIMER_CURRENT_WHILE, timeout);
+
+ lp->lp_state &= ~LACP_STATE_EXPIRED;
+
+ /*
+ * kick transmit machine without waiting the next tick.
+ */
+
+ lacp_sm_tx(lp);
+}
+
+static void
+lacp_sm_rx_set_expired(struct lacp_port *lp)
+{
+ lp->lp_partner.lip_state &= ~LACP_STATE_SYNC;
+ lp->lp_partner.lip_state |= LACP_STATE_TIMEOUT;
+ LACP_TIMER_ARM(lp, LACP_TIMER_CURRENT_WHILE, LACP_SHORT_TIMEOUT_TIME);
+ lp->lp_state |= LACP_STATE_EXPIRED;
+}
+
+static void
+lacp_sm_rx_timer(struct lacp_port *lp)
+{
+ if ((lp->lp_state & LACP_STATE_EXPIRED) == 0) {
+ /* CURRENT -> EXPIRED */
+ LACP_DPRINTF((lp, "%s: CURRENT -> EXPIRED\n", __func__));
+ lacp_sm_rx_set_expired(lp);
+ } else {
+ /* EXPIRED -> DEFAULTED */
+ LACP_DPRINTF((lp, "%s: EXPIRED -> DEFAULTED\n", __func__));
+ lacp_sm_rx_update_default_selected(lp);
+ lacp_sm_rx_record_default(lp);
+ lp->lp_state &= ~LACP_STATE_EXPIRED;
+ }
+}
+
+static void
+lacp_sm_rx_record_pdu(struct lacp_port *lp, const struct lacpdu *du)
+{
+ boolean_t active;
+ uint8_t oldpstate;
+#if defined(LACP_DEBUG)
+ char buf[LACP_STATESTR_MAX+1];
+#endif
+
+ /* LACP_DPRINTF((lp, "%s\n", __func__)); */
+
+ oldpstate = lp->lp_partner.lip_state;
+
+ active = (du->ldu_actor.lip_state & LACP_STATE_ACTIVITY)
+ || ((lp->lp_state & LACP_STATE_ACTIVITY) &&
+ (du->ldu_partner.lip_state & LACP_STATE_ACTIVITY));
+
+ lp->lp_partner = du->ldu_actor;
+ if (active &&
+ ((LACP_STATE_EQ(lp->lp_state, du->ldu_partner.lip_state,
+ LACP_STATE_AGGREGATION) &&
+ !lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner))
+ || (du->ldu_partner.lip_state & LACP_STATE_AGGREGATION) == 0)) {
+ /* XXX nothing? */
+ } else {
+ lp->lp_partner.lip_state &= ~LACP_STATE_SYNC;
+ }
+
+ lp->lp_state &= ~LACP_STATE_DEFAULTED;
+
+ if (oldpstate != lp->lp_partner.lip_state) {
+ LACP_DPRINTF((lp, "old pstate %s\n",
+ lacp_format_state(oldpstate, buf, sizeof(buf))));
+ LACP_DPRINTF((lp, "new pstate %s\n",
+ lacp_format_state(lp->lp_partner.lip_state, buf,
+ sizeof(buf))));
+ }
+
+ lacp_sm_ptx_update_timeout(lp, oldpstate);
+}
+
+static void
+lacp_sm_rx_update_ntt(struct lacp_port *lp, const struct lacpdu *du)
+{
+ /* LACP_DPRINTF((lp, "%s\n", __func__)); */
+
+ if (lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner) ||
+ !LACP_STATE_EQ(lp->lp_state, du->ldu_partner.lip_state,
+ LACP_STATE_ACTIVITY | LACP_STATE_SYNC | LACP_STATE_AGGREGATION)) {
+ LACP_DPRINTF((lp, "%s: assert ntt\n", __func__));
+ lacp_sm_assert_ntt(lp);
+ }
+}
+
+static void
+lacp_sm_rx_record_default(struct lacp_port *lp)
+{
+ uint8_t oldpstate;
+
+ /* LACP_DPRINTF((lp, "%s\n", __func__)); */
+
+ oldpstate = lp->lp_partner.lip_state;
+ lp->lp_partner = lacp_partner_admin;
+ lp->lp_state |= LACP_STATE_DEFAULTED;
+ lacp_sm_ptx_update_timeout(lp, oldpstate);
+}
+
+static void
+lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *lp,
+ const struct lacp_peerinfo *info)
+{
+ /* LACP_DPRINTF((lp, "%s\n", __func__)); */
+
+ if (lacp_compare_peerinfo(&lp->lp_partner, info) ||
+ !LACP_STATE_EQ(lp->lp_partner.lip_state, info->lip_state,
+ LACP_STATE_AGGREGATION)) {
+ lp->lp_selected = LACP_UNSELECTED;
+ /* mux machine will clean up lp->lp_aggregator */
+ }
+}
+
+static void
+lacp_sm_rx_update_selected(struct lacp_port *lp, const struct lacpdu *du)
+{
+ /* LACP_DPRINTF((lp, "%s\n", __func__)); */
+
+ lacp_sm_rx_update_selected_from_peerinfo(lp, &du->ldu_actor);
+}
+
+static void
+lacp_sm_rx_update_default_selected(struct lacp_port *lp)
+{
+ /* LACP_DPRINTF((lp, "%s\n", __func__)); */
+
+ lacp_sm_rx_update_selected_from_peerinfo(lp, &lacp_partner_admin);
+}
+
+/* transmit machine */
+
+static void
+lacp_sm_tx(struct lacp_port *lp)
+{
+ int error;
+
+ if (!(lp->lp_state & LACP_STATE_AGGREGATION)
+#if 1
+ || (!(lp->lp_state & LACP_STATE_ACTIVITY)
+ && !(lp->lp_partner.lip_state & LACP_STATE_ACTIVITY))
+#endif
+ ) {
+ lp->lp_flags &= ~LACP_PORT_NTT;
+ }
+
+ if (!(lp->lp_flags & LACP_PORT_NTT)) {
+ return;
+ }
+
+ /* Rate limit to 3 PDUs per LACP_FAST_PERIODIC_TIME */
+ if (ppsratecheck(&lp->lp_last_lacpdu, &lp->lp_lacpdu_sent,
+ (3 / LACP_FAST_PERIODIC_TIME)) == 0) {
+ LACP_DPRINTF((lp, "rate limited pdu\n"));
+ return;
+ }
+
+ error = lacp_xmit_lacpdu(lp);
+
+ if (error == 0) {
+ lp->lp_flags &= ~LACP_PORT_NTT;
+ } else {
+ LACP_DPRINTF((lp, "lacpdu transmit failure, error %d\n",
+ error));
+ }
+}
+
+static void
+lacp_sm_assert_ntt(struct lacp_port *lp)
+{
+
+ lp->lp_flags |= LACP_PORT_NTT;
+}
+
+static void
+lacp_run_timers(struct lacp_port *lp)
+{
+ int i;
+
+ for (i = 0; i < LACP_NTIMER; i++) {
+ KASSERT(lp->lp_timer[i] >= 0,
+ ("invalid timer value %d", lp->lp_timer[i]));
+ if (lp->lp_timer[i] == 0) {
+ continue;
+ } else if (--lp->lp_timer[i] <= 0) {
+ if (lacp_timer_funcs[i]) {
+ (*lacp_timer_funcs[i])(lp);
+ }
+ }
+ }
+}
+
+int
+lacp_marker_input(struct lacp_port *lp, struct mbuf *m)
+{
+ struct lacp_softc *lsc = lp->lp_lsc;
+ struct lagg_port *lgp = lp->lp_lagg;
+ struct lacp_port *lp2;
+ struct markerdu *mdu;
+ int error = 0;
+ int pending = 0;
+
+ if (m->m_pkthdr.len != sizeof(*mdu)) {
+ goto bad;
+ }
+
+ if ((m->m_flags & M_MCAST) == 0) {
+ goto bad;
+ }
+
+ if (m->m_len < sizeof(*mdu)) {
+ m = m_pullup(m, sizeof(*mdu));
+ if (m == NULL) {
+ return (ENOMEM);
+ }
+ }
+
+ mdu = mtod(m, struct markerdu *);
+
+ if (memcmp(&mdu->mdu_eh.ether_dhost,
+ &ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN)) {
+ goto bad;
+ }
+
+ if (mdu->mdu_sph.sph_version != 1) {
+ goto bad;
+ }
+
+ switch (mdu->mdu_tlv.tlv_type) {
+ case MARKER_TYPE_INFO:
+ if (tlv_check(mdu, sizeof(*mdu), &mdu->mdu_tlv,
+ marker_info_tlv_template, TRUE)) {
+ goto bad;
+ }
+ mdu->mdu_tlv.tlv_type = MARKER_TYPE_RESPONSE;
+ memcpy(&mdu->mdu_eh.ether_dhost,
+ &ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN);
+ memcpy(&mdu->mdu_eh.ether_shost,
+ lgp->lp_lladdr, ETHER_ADDR_LEN);
+ error = lagg_enqueue(lp->lp_ifp, m);
+ break;
+
+ case MARKER_TYPE_RESPONSE:
+ if (tlv_check(mdu, sizeof(*mdu), &mdu->mdu_tlv,
+ marker_response_tlv_template, TRUE)) {
+ goto bad;
+ }
+ LACP_DPRINTF((lp, "marker response, port=%u, sys=%6D, id=%u\n",
+ ntohs(mdu->mdu_info.mi_rq_port), mdu->mdu_info.mi_rq_system,
+ ":", ntohl(mdu->mdu_info.mi_rq_xid)));
+
+ /* Verify that it is the last marker we sent out */
+ if (memcmp(&mdu->mdu_info, &lp->lp_marker,
+ sizeof(struct lacp_markerinfo)))
+ goto bad;
+
+ LACP_LOCK(lsc);
+ lp->lp_flags &= ~LACP_PORT_MARK;
+
+ if (lsc->lsc_suppress_distributing) {
+ /* Check if any ports are waiting for a response */
+ LIST_FOREACH(lp2, &lsc->lsc_ports, lp_next) {
+ if (lp2->lp_flags & LACP_PORT_MARK) {
+ pending = 1;
+ break;
+ }
+ }
+
+ if (pending == 0) {
+ /* All interface queues are clear */
+ LACP_DPRINTF((NULL, "queue flush complete\n"));
+ lsc->lsc_suppress_distributing = FALSE;
+ }
+ }
+ LACP_UNLOCK(lsc);
+ m_freem(m);
+ break;
+
+ default:
+ goto bad;
+ }
+
+ return (error);
+
+bad:
+ LACP_DPRINTF((lp, "bad marker frame\n"));
+ m_freem(m);
+ return (EINVAL);
+}
+
+static int
+tlv_check(const void *p, size_t size, const struct tlvhdr *tlv,
+ const struct tlv_template *tmpl, boolean_t check_type)
+{
+ while (/* CONSTCOND */ 1) {
+ if ((const char *)tlv - (const char *)p + sizeof(*tlv) > size) {
+ return (EINVAL);
+ }
+ if ((check_type && tlv->tlv_type != tmpl->tmpl_type) ||
+ tlv->tlv_length != tmpl->tmpl_length) {
+ return (EINVAL);
+ }
+ if (tmpl->tmpl_type == 0) {
+ break;
+ }
+ tlv = (const struct tlvhdr *)
+ ((const char *)tlv + tlv->tlv_length);
+ tmpl++;
+ }
+
+ return (0);
+}
+
+#if defined(LACP_DEBUG)
+const char *
+lacp_format_mac(const uint8_t *mac, char *buf, size_t buflen)
+{
+ snprintf(buf, buflen, "%02X-%02X-%02X-%02X-%02X-%02X",
+ (int)mac[0],
+ (int)mac[1],
+ (int)mac[2],
+ (int)mac[3],
+ (int)mac[4],
+ (int)mac[5]);
+
+ return (buf);
+}
+
+const char *
+lacp_format_systemid(const struct lacp_systemid *sysid,
+ char *buf, size_t buflen)
+{
+ char macbuf[LACP_MACSTR_MAX+1];
+
+ snprintf(buf, buflen, "%04X,%s",
+ ntohs(sysid->lsi_prio),
+ lacp_format_mac(sysid->lsi_mac, macbuf, sizeof(macbuf)));
+
+ return (buf);
+}
+
+const char *
+lacp_format_portid(const struct lacp_portid *portid, char *buf, size_t buflen)
+{
+ snprintf(buf, buflen, "%04X,%04X",
+ ntohs(portid->lpi_prio),
+ ntohs(portid->lpi_portno));
+
+ return (buf);
+}
+
+const char *
+lacp_format_partner(const struct lacp_peerinfo *peer, char *buf, size_t buflen)
+{
+ char sysid[LACP_SYSTEMIDSTR_MAX+1];
+ char portid[LACP_PORTIDSTR_MAX+1];
+
+ snprintf(buf, buflen, "(%s,%04X,%s)",
+ lacp_format_systemid(&peer->lip_systemid, sysid, sizeof(sysid)),
+ ntohs(peer->lip_key),
+ lacp_format_portid(&peer->lip_portid, portid, sizeof(portid)));
+
+ return (buf);
+}
+
+const char *
+lacp_format_lagid(const struct lacp_peerinfo *a,
+ const struct lacp_peerinfo *b, char *buf, size_t buflen)
+{
+ char astr[LACP_PARTNERSTR_MAX+1];
+ char bstr[LACP_PARTNERSTR_MAX+1];
+
+#if 0
+ /*
+ * there's a convention to display small numbered peer
+ * in the left.
+ */
+
+ if (lacp_compare_peerinfo(a, b) > 0) {
+ const struct lacp_peerinfo *t;
+
+ t = a;
+ a = b;
+ b = t;
+ }
+#endif
+
+ snprintf(buf, buflen, "[%s,%s]",
+ lacp_format_partner(a, astr, sizeof(astr)),
+ lacp_format_partner(b, bstr, sizeof(bstr)));
+
+ return (buf);
+}
+
+const char *
+lacp_format_lagid_aggregator(const struct lacp_aggregator *la,
+ char *buf, size_t buflen)
+{
+ if (la == NULL) {
+ return ("(none)");
+ }
+
+ return (lacp_format_lagid(&la->la_actor, &la->la_partner, buf, buflen));
+}
+
+const char *
+lacp_format_state(uint8_t state, char *buf, size_t buflen)
+{
+ snprintf(buf, buflen, "%b", state, LACP_STATE_BITS);
+ return (buf);
+}
+
+static void
+lacp_dump_lacpdu(const struct lacpdu *du)
+{
+ char buf[LACP_PARTNERSTR_MAX+1];
+ char buf2[LACP_STATESTR_MAX+1];
+
+ printf("actor=%s\n",
+ lacp_format_partner(&du->ldu_actor, buf, sizeof(buf)));
+ printf("actor.state=%s\n",
+ lacp_format_state(du->ldu_actor.lip_state, buf2, sizeof(buf2)));
+ printf("partner=%s\n",
+ lacp_format_partner(&du->ldu_partner, buf, sizeof(buf)));
+ printf("partner.state=%s\n",
+ lacp_format_state(du->ldu_partner.lip_state, buf2, sizeof(buf2)));
+
+ printf("maxdelay=%d\n", ntohs(du->ldu_collector.lci_maxdelay));
+}
+
+static void
+lacp_dprintf(const struct lacp_port *lp, const char *fmt, ...)
+{
+ va_list va;
+
+ if (lp) {
+ printf("%s: ", lp->lp_ifp->if_xname);
+ }
+
+ va_start(va, fmt);
+ vprintf(fmt, va);
+ va_end(va);
+}
+#endif
diff --git a/freebsd/sys/net/ieee8023ad_lacp.h b/freebsd/sys/net/ieee8023ad_lacp.h
new file mode 100644
index 00000000..9cebc591
--- /dev/null
+++ b/freebsd/sys/net/ieee8023ad_lacp.h
@@ -0,0 +1,333 @@
+/* $NetBSD: ieee8023ad_impl.h,v 1.2 2005/12/10 23:21:39 elad Exp $ */
+
+/*-
+ * Copyright (c)2005 YAMAMOTO Takashi,
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * IEEE802.3ad LACP
+ *
+ * implementation details.
+ */
+
+#define LACP_TIMER_CURRENT_WHILE 0
+#define LACP_TIMER_PERIODIC 1
+#define LACP_TIMER_WAIT_WHILE 2
+#define LACP_NTIMER 3
+
+#define LACP_TIMER_ARM(port, timer, val) \
+ (port)->lp_timer[(timer)] = (val)
+#define LACP_TIMER_DISARM(port, timer) \
+ (port)->lp_timer[(timer)] = 0
+#define LACP_TIMER_ISARMED(port, timer) \
+ ((port)->lp_timer[(timer)] > 0)
+
+/*
+ * IEEE802.3ad LACP
+ *
+ * protocol definitions.
+ */
+
+#define LACP_STATE_ACTIVITY (1<<0)
+#define LACP_STATE_TIMEOUT (1<<1)
+#define LACP_STATE_AGGREGATION (1<<2)
+#define LACP_STATE_SYNC (1<<3)
+#define LACP_STATE_COLLECTING (1<<4)
+#define LACP_STATE_DISTRIBUTING (1<<5)
+#define LACP_STATE_DEFAULTED (1<<6)
+#define LACP_STATE_EXPIRED (1<<7)
+
+#define LACP_PORT_NTT 0x00000001
+#define LACP_PORT_MARK 0x00000002
+
+#define LACP_STATE_BITS \
+ "\020" \
+ "\001ACTIVITY" \
+ "\002TIMEOUT" \
+ "\003AGGREGATION" \
+ "\004SYNC" \
+ "\005COLLECTING" \
+ "\006DISTRIBUTING" \
+ "\007DEFAULTED" \
+ "\010EXPIRED"
+
+/*
+ * IEEE802.3 slow protocols
+ *
+ * protocol (on-wire) definitions.
+ *
+ * XXX should be elsewhere.
+ */
+
+#define SLOWPROTOCOLS_SUBTYPE_LACP 1
+#define SLOWPROTOCOLS_SUBTYPE_MARKER 2
+
+struct slowprothdr {
+ uint8_t sph_subtype;
+ uint8_t sph_version;
+} __packed;
+
+/*
+ * TLV on-wire structure.
+ */
+
+struct tlvhdr {
+ uint8_t tlv_type;
+ uint8_t tlv_length;
+ /* uint8_t tlv_value[]; */
+} __packed;
+
+/*
+ * ... and our implementation.
+ */
+
+#define TLV_SET(tlv, type, length) \
+ do { \
+ (tlv)->tlv_type = (type); \
+ (tlv)->tlv_length = sizeof(*tlv) + (length); \
+ } while (/*CONSTCOND*/0)
+
+struct tlv_template {
+ uint8_t tmpl_type;
+ uint8_t tmpl_length;
+};
+
+struct lacp_systemid {
+ uint16_t lsi_prio;
+ uint8_t lsi_mac[6];
+} __packed;
+
+struct lacp_portid {
+ uint16_t lpi_prio;
+ uint16_t lpi_portno;
+} __packed;
+
+struct lacp_peerinfo {
+ struct lacp_systemid lip_systemid;
+ uint16_t lip_key;
+ struct lacp_portid lip_portid;
+ uint8_t lip_state;
+ uint8_t lip_resv[3];
+} __packed;
+
+struct lacp_collectorinfo {
+ uint16_t lci_maxdelay;
+ uint8_t lci_resv[12];
+} __packed;
+
+struct lacpdu {
+ struct ether_header ldu_eh;
+ struct slowprothdr ldu_sph;
+
+ struct tlvhdr ldu_tlv_actor;
+ struct lacp_peerinfo ldu_actor;
+ struct tlvhdr ldu_tlv_partner;
+ struct lacp_peerinfo ldu_partner;
+ struct tlvhdr ldu_tlv_collector;
+ struct lacp_collectorinfo ldu_collector;
+ struct tlvhdr ldu_tlv_term;
+ uint8_t ldu_resv[50];
+} __packed;
+
+/*
+ * IEEE802.3ad marker protocol
+ *
+ * protocol (on-wire) definitions.
+ */
+struct lacp_markerinfo {
+ uint16_t mi_rq_port;
+ uint8_t mi_rq_system[ETHER_ADDR_LEN];
+ uint32_t mi_rq_xid;
+ uint8_t mi_pad[2];
+} __packed;
+
+struct markerdu {
+ struct ether_header mdu_eh;
+ struct slowprothdr mdu_sph;
+
+ struct tlvhdr mdu_tlv;
+ struct lacp_markerinfo mdu_info;
+ struct tlvhdr mdu_tlv_term;
+ uint8_t mdu_resv[90];
+} __packed;
+
+#define MARKER_TYPE_INFO 0x01
+#define MARKER_TYPE_RESPONSE 0x02
+
+enum lacp_selected {
+ LACP_UNSELECTED,
+ LACP_STANDBY, /* not used in this implementation */
+ LACP_SELECTED,
+};
+
+enum lacp_mux_state {
+ LACP_MUX_DETACHED,
+ LACP_MUX_WAITING,
+ LACP_MUX_ATTACHED,
+ LACP_MUX_COLLECTING,
+ LACP_MUX_DISTRIBUTING,
+};
+
+#define LACP_MAX_PORTS 32
+
+struct lacp_portmap {
+ int pm_count;
+ struct lacp_port *pm_map[LACP_MAX_PORTS];
+};
+
+struct lacp_port {
+ TAILQ_ENTRY(lacp_port) lp_dist_q;
+ LIST_ENTRY(lacp_port) lp_next;
+ struct lacp_softc *lp_lsc;
+ struct lagg_port *lp_lagg;
+ struct ifnet *lp_ifp;
+ struct lacp_peerinfo lp_partner;
+ struct lacp_peerinfo lp_actor;
+ struct lacp_markerinfo lp_marker;
+#define lp_state lp_actor.lip_state
+#define lp_key lp_actor.lip_key
+#define lp_systemid lp_actor.lip_systemid
+ struct timeval lp_last_lacpdu;
+ int lp_lacpdu_sent;
+ enum lacp_mux_state lp_mux_state;
+ enum lacp_selected lp_selected;
+ int lp_flags;
+ u_int lp_media; /* XXX redundant */
+ int lp_timer[LACP_NTIMER];
+ struct ifmultiaddr *lp_ifma;
+
+ struct lacp_aggregator *lp_aggregator;
+};
+
+struct lacp_aggregator {
+ TAILQ_ENTRY(lacp_aggregator) la_q;
+ int la_refcnt; /* num of ports which selected us */
+ int la_nports; /* num of distributing ports */
+ TAILQ_HEAD(, lacp_port) la_ports; /* distributing ports */
+ struct lacp_peerinfo la_partner;
+ struct lacp_peerinfo la_actor;
+ int la_pending; /* number of ports in wait_while */
+};
+
+struct lacp_softc {
+ struct lagg_softc *lsc_softc;
+ struct mtx lsc_mtx;
+ struct lacp_aggregator *lsc_active_aggregator;
+ TAILQ_HEAD(, lacp_aggregator) lsc_aggregators;
+ boolean_t lsc_suppress_distributing;
+ struct callout lsc_transit_callout;
+ struct callout lsc_callout;
+ LIST_HEAD(, lacp_port) lsc_ports;
+ struct lacp_portmap lsc_pmap[2];
+ volatile u_int lsc_activemap;
+ u_int32_t lsc_hashkey;
+};
+
+#define LACP_TYPE_ACTORINFO 1
+#define LACP_TYPE_PARTNERINFO 2
+#define LACP_TYPE_COLLECTORINFO 3
+
+/* timeout values (in sec) */
+#define LACP_FAST_PERIODIC_TIME (1)
+#define LACP_SLOW_PERIODIC_TIME (30)
+#define LACP_SHORT_TIMEOUT_TIME (3 * LACP_FAST_PERIODIC_TIME)
+#define LACP_LONG_TIMEOUT_TIME (3 * LACP_SLOW_PERIODIC_TIME)
+#define LACP_CHURN_DETECTION_TIME (60)
+#define LACP_AGGREGATE_WAIT_TIME (2)
+#define LACP_TRANSIT_DELAY 3000 /* in msec */
+
+#define LACP_STATE_EQ(s1, s2, mask) \
+ ((((s1) ^ (s2)) & (mask)) == 0)
+
+#define LACP_SYS_PRI(peer) (peer).lip_systemid.lsi_prio
+
+#define LACP_PORT(_lp) ((struct lacp_port *)(_lp)->lp_psc)
+#define LACP_SOFTC(_sc) ((struct lacp_softc *)(_sc)->sc_psc)
+
+#define LACP_LOCK_INIT(_lsc) mtx_init(&(_lsc)->lsc_mtx, \
+ "lacp mtx", NULL, MTX_DEF)
+#define LACP_LOCK_DESTROY(_lsc) mtx_destroy(&(_lsc)->lsc_mtx)
+#define LACP_LOCK(_lsc) mtx_lock(&(_lsc)->lsc_mtx)
+#define LACP_UNLOCK(_lsc) mtx_unlock(&(_lsc)->lsc_mtx)
+#define LACP_LOCK_ASSERT(_lsc) mtx_assert(&(_lsc)->lsc_mtx, MA_OWNED)
+
+struct mbuf *lacp_input(struct lagg_port *, struct mbuf *);
+struct lagg_port *lacp_select_tx_port(struct lagg_softc *, struct mbuf *);
+int lacp_attach(struct lagg_softc *);
+int lacp_detach(struct lagg_softc *);
+void lacp_init(struct lagg_softc *);
+void lacp_stop(struct lagg_softc *);
+int lacp_port_create(struct lagg_port *);
+void lacp_port_destroy(struct lagg_port *);
+void lacp_linkstate(struct lagg_port *);
+void lacp_req(struct lagg_softc *, caddr_t);
+void lacp_portreq(struct lagg_port *, caddr_t);
+
+static __inline int
+lacp_isactive(struct lagg_port *lgp)
+{
+ struct lacp_port *lp = LACP_PORT(lgp);
+ struct lacp_softc *lsc = lp->lp_lsc;
+ struct lacp_aggregator *la = lp->lp_aggregator;
+
+ /* This port is joined to the active aggregator */
+ if (la != NULL && la == lsc->lsc_active_aggregator)
+ return (1);
+
+ return (0);
+}
+
+static __inline int
+lacp_iscollecting(struct lagg_port *lgp)
+{
+ struct lacp_port *lp = LACP_PORT(lgp);
+
+ return ((lp->lp_state & LACP_STATE_COLLECTING) != 0);
+}
+
+static __inline int
+lacp_isdistributing(struct lagg_port *lgp)
+{
+ struct lacp_port *lp = LACP_PORT(lgp);
+
+ return ((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0);
+}
+
+/* following constants don't include terminating NUL */
+#define LACP_MACSTR_MAX (2*6 + 5)
+#define LACP_SYSTEMPRIOSTR_MAX (4)
+#define LACP_SYSTEMIDSTR_MAX (LACP_SYSTEMPRIOSTR_MAX + 1 + LACP_MACSTR_MAX)
+#define LACP_PORTPRIOSTR_MAX (4)
+#define LACP_PORTNOSTR_MAX (4)
+#define LACP_PORTIDSTR_MAX (LACP_PORTPRIOSTR_MAX + 1 + LACP_PORTNOSTR_MAX)
+#define LACP_KEYSTR_MAX (4)
+#define LACP_PARTNERSTR_MAX \
+ (1 + LACP_SYSTEMIDSTR_MAX + 1 + LACP_KEYSTR_MAX + 1 \
+ + LACP_PORTIDSTR_MAX + 1)
+#define LACP_LAGIDSTR_MAX \
+ (1 + LACP_PARTNERSTR_MAX + 1 + LACP_PARTNERSTR_MAX + 1)
+#define LACP_STATESTR_MAX (255) /* XXX */
diff --git a/freebsd/sys/net/if.c b/freebsd/sys/net/if.c
new file mode 100644
index 00000000..33d9ed9d
--- /dev/null
+++ b/freebsd/sys/net/if.c
@@ -0,0 +1,3431 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1980, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)if.c 8.5 (Berkeley) 1/9/95
+ * $FreeBSD$
+ */
+
+#include <freebsd/local/opt_compat.h>
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_inet.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/types.h>
+#include <freebsd/sys/conf.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/sbuf.h>
+#include <freebsd/sys/bus.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/priv.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/refcount.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/rwlock.h>
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/syslog.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/taskqueue.h>
+#include <freebsd/sys/domain.h>
+#include <freebsd/sys/jail.h>
+#include <freebsd/machine/stdarg.h>
+#include <freebsd/vm/uma.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_arp.h>
+#include <freebsd/net/if_clone.h>
+#include <freebsd/net/if_dl.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/if_var.h>
+#include <freebsd/net/radix.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+
+#if defined(INET) || defined(INET6)
+/*XXX*/
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/ip_carp.h>
+#ifdef INET6
+#include <freebsd/netinet6/in6_var.h>
+#include <freebsd/netinet6/in6_ifattach.h>
+#endif
+#endif
+#ifdef INET
+#include <freebsd/netinet/if_ether.h>
+#endif
+
+#include <freebsd/security/mac/mac_framework.h>
+
+#ifdef COMPAT_FREEBSD32
+#include <freebsd/sys/mount.h>
+#include <freebsd/compat/freebsd32/freebsd32.h>
+#endif
+
+struct ifindex_entry {
+ struct ifnet *ife_ifnet;
+};
+
+static int slowtimo_started;
+
+SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
+SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
+
+TUNABLE_INT("net.link.ifqmaxlen", &ifqmaxlen);
+SYSCTL_UINT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN,
+ &ifqmaxlen, 0, "max send queue size");
+
+/* Log link state change events */
+static int log_link_state_change = 1;
+
+SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW,
+ &log_link_state_change, 0,
+ "log interface link state change events");
+
+/* Interface description */
+static unsigned int ifdescr_maxlen = 1024;
+SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW,
+ &ifdescr_maxlen, 0,
+ "administrative maximum length for interface description");
+
+MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions");
+
+/* global sx for non-critical path ifdescr */
+static struct sx ifdescr_sx;
+SX_SYSINIT(ifdescr_sx, &ifdescr_sx, "ifnet descr");
+
+void (*bstp_linkstate_p)(struct ifnet *ifp, int state);
+void (*ng_ether_link_state_p)(struct ifnet *ifp, int state);
+void (*lagg_linkstate_p)(struct ifnet *ifp, int state);
+/* These are external hooks for CARP. */
+void (*carp_linkstate_p)(struct ifnet *ifp);
+#if defined(INET) || defined(INET6)
+struct ifnet *(*carp_forus_p)(struct ifnet *ifp, u_char *dhost);
+int (*carp_output_p)(struct ifnet *ifp, struct mbuf *m,
+ struct sockaddr *sa, struct rtentry *rt);
+#endif
+#ifdef INET
+int (*carp_iamatch_p)(struct ifnet *, struct in_ifaddr *, struct in_addr *,
+ u_int8_t **);
+#endif
+#ifdef INET6
+struct ifaddr *(*carp_iamatch6_p)(struct ifnet *ifp, struct in6_addr *taddr6);
+caddr_t (*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m,
+ const struct in6_addr *taddr);
+#endif
+
+struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL;
+
+/*
+ * XXX: Style; these should be sorted alphabetically, and unprototyped
+ * static functions should be prototyped. Currently they are sorted by
+ * declaration order.
+ */
+static void if_attachdomain(void *);
+static void if_attachdomain1(struct ifnet *);
+static int ifconf(u_long, caddr_t);
+static void if_freemulti(struct ifmultiaddr *);
+static void if_init(void *);
+static void if_grow(void);
+static void if_check(void *);
+static void if_route(struct ifnet *, int flag, int fam);
+static int if_setflag(struct ifnet *, int, int, int *, int);
+static void if_slowtimo(void *);
+static int if_transmit(struct ifnet *ifp, struct mbuf *m);
+static void if_unroute(struct ifnet *, int flag, int fam);
+static void link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
+static int if_rtdel(struct radix_node *, void *);
+static int ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
+static int if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int);
+static void do_link_state_change(void *, int);
+static int if_getgroup(struct ifgroupreq *, struct ifnet *);
+static int if_getgroupmembers(struct ifgroupreq *);
+static void if_delgroups(struct ifnet *);
+static void if_attach_internal(struct ifnet *, int);
+static void if_detach_internal(struct ifnet *, int);
+
+#ifdef INET6
+/*
+ * XXX: declare here to avoid to include many inet6 related files..
+ * should be more generalized?
+ */
+extern void nd6_setmtu(struct ifnet *);
+#endif
+
+VNET_DEFINE(int, if_index);
+int ifqmaxlen = IFQ_MAXLEN;
+VNET_DEFINE(struct ifnethead, ifnet); /* depend on static init XXX */
+VNET_DEFINE(struct ifgrouphead, ifg_head);
+
+static VNET_DEFINE(int, if_indexlim) = 8;
+
+/* Table of ifnet by index. */
+VNET_DEFINE(struct ifindex_entry *, ifindex_table);
+
+#define V_if_indexlim VNET(if_indexlim)
+#define V_ifindex_table VNET(ifindex_table)
+
+/*
+ * The global network interface list (V_ifnet) and related state (such as
+ * if_index, if_indexlim, and ifindex_table) are protected by an sxlock and
+ * an rwlock. Either may be acquired shared to stablize the list, but both
+ * must be acquired writable to modify the list. This model allows us to
+ * both stablize the interface list during interrupt thread processing, but
+ * also to stablize it over long-running ioctls, without introducing priority
+ * inversions and deadlocks.
+ */
+struct rwlock ifnet_rwlock;
+struct sx ifnet_sxlock;
+
+/*
+ * The allocation of network interfaces is a rather non-atomic affair; we
+ * need to select an index before we are ready to expose the interface for
+ * use, so will use this pointer value to indicate reservation.
+ */
+#define IFNET_HOLD (void *)(uintptr_t)(-1)
+
+static if_com_alloc_t *if_com_alloc[256];
+static if_com_free_t *if_com_free[256];
+
+/*
+ * System initialization
+ */
+SYSINIT(interface_check, SI_SUB_PROTO_IF, SI_ORDER_FIRST, if_check, NULL);
+
+MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals");
+MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
+MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
+
+struct ifnet *
+ifnet_byindex_locked(u_short idx)
+{
+
+ if (idx > V_if_index)
+ return (NULL);
+ if (V_ifindex_table[idx].ife_ifnet == IFNET_HOLD)
+ return (NULL);
+ return (V_ifindex_table[idx].ife_ifnet);
+}
+
+struct ifnet *
+ifnet_byindex(u_short idx)
+{
+ struct ifnet *ifp;
+
+ IFNET_RLOCK_NOSLEEP();
+ ifp = ifnet_byindex_locked(idx);
+ IFNET_RUNLOCK_NOSLEEP();
+ return (ifp);
+}
+
+struct ifnet *
+ifnet_byindex_ref(u_short idx)
+{
+ struct ifnet *ifp;
+
+ IFNET_RLOCK_NOSLEEP();
+ ifp = ifnet_byindex_locked(idx);
+ if (ifp == NULL || (ifp->if_flags & IFF_DYING)) {
+ IFNET_RUNLOCK_NOSLEEP();
+ return (NULL);
+ }
+ if_ref(ifp);
+ IFNET_RUNLOCK_NOSLEEP();
+ return (ifp);
+}
+
+/*
+ * Allocate an ifindex array entry; return 0 on success or an error on
+ * failure.
+ */
+static int
+ifindex_alloc_locked(u_short *idxp)
+{
+ u_short idx;
+
+ IFNET_WLOCK_ASSERT();
+
+ /*
+ * Try to find an empty slot below V_if_index. If we fail, take the
+ * next slot.
+ */
+ for (idx = 1; idx <= V_if_index; idx++) {
+ if (V_ifindex_table[idx].ife_ifnet == NULL)
+ break;
+ }
+
+ /* Catch if_index overflow. */
+ if (idx < 1)
+ return (ENOSPC);
+ if (idx > V_if_index)
+ V_if_index = idx;
+ if (V_if_index >= V_if_indexlim)
+ if_grow();
+ *idxp = idx;
+ return (0);
+}
+
+static void
+ifindex_free_locked(u_short idx)
+{
+
+ IFNET_WLOCK_ASSERT();
+
+ V_ifindex_table[idx].ife_ifnet = NULL;
+ while (V_if_index > 0 &&
+ V_ifindex_table[V_if_index].ife_ifnet == NULL)
+ V_if_index--;
+}
+
+static void
+ifindex_free(u_short idx)
+{
+
+ IFNET_WLOCK();
+ ifindex_free_locked(idx);
+ IFNET_WUNLOCK();
+}
+
+static void
+ifnet_setbyindex_locked(u_short idx, struct ifnet *ifp)
+{
+
+ IFNET_WLOCK_ASSERT();
+
+ V_ifindex_table[idx].ife_ifnet = ifp;
+}
+
+static void
+ifnet_setbyindex(u_short idx, struct ifnet *ifp)
+{
+
+ IFNET_WLOCK();
+ ifnet_setbyindex_locked(idx, ifp);
+ IFNET_WUNLOCK();
+}
+
+struct ifaddr *
+ifaddr_byindex(u_short idx)
+{
+ struct ifaddr *ifa;
+
+ IFNET_RLOCK_NOSLEEP();
+ ifa = ifnet_byindex_locked(idx)->if_addr;
+ if (ifa != NULL)
+ ifa_ref(ifa);
+ IFNET_RUNLOCK_NOSLEEP();
+ return (ifa);
+}
+
+/*
+ * Network interface utility routines.
+ *
+ * Routines with ifa_ifwith* names take sockaddr *'s as
+ * parameters.
+ */
+
+static void
+vnet_if_init(const void *unused __unused)
+{
+
+ TAILQ_INIT(&V_ifnet);
+ TAILQ_INIT(&V_ifg_head);
+ if_grow(); /* create initial table */
+ vnet_if_clone_init();
+}
+VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_FIRST, vnet_if_init,
+ NULL);
+
+/* ARGSUSED*/
+static void
+if_init(void *dummy __unused)
+{
+
+ IFNET_LOCK_INIT();
+ if_clone_init();
+}
+SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_SECOND, if_init, NULL);
+
+
+#ifdef VIMAGE
+static void
+vnet_if_uninit(const void *unused __unused)
+{
+
+ VNET_ASSERT(TAILQ_EMPTY(&V_ifnet));
+ VNET_ASSERT(TAILQ_EMPTY(&V_ifg_head));
+
+ free((caddr_t)V_ifindex_table, M_IFNET);
+}
+VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST,
+ vnet_if_uninit, NULL);
+#endif
+
+static void
+if_grow(void)
+{
+ u_int n;
+ struct ifindex_entry *e;
+
+ V_if_indexlim <<= 1;
+ n = V_if_indexlim * sizeof(*e);
+ e = malloc(n, M_IFNET, M_WAITOK | M_ZERO);
+ if (V_ifindex_table != NULL) {
+ memcpy((caddr_t)e, (caddr_t)V_ifindex_table, n/2);
+ free((caddr_t)V_ifindex_table, M_IFNET);
+ }
+ V_ifindex_table = e;
+}
+
+static void
+if_check(void *dummy __unused)
+{
+
+ /*
+ * If at least one interface added during boot uses
+ * if_watchdog then start the timer.
+ */
+ if (slowtimo_started)
+ if_slowtimo(0);
+}
+
+/*
+ * Allocate a struct ifnet and an index for an interface. A layer 2
+ * common structure will also be allocated if an allocation routine is
+ * registered for the passed type.
+ */
+struct ifnet *
+if_alloc(u_char type)
+{
+ struct ifnet *ifp;
+ u_short idx;
+
+ ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO);
+ IFNET_WLOCK();
+ if (ifindex_alloc_locked(&idx) != 0) {
+ IFNET_WUNLOCK();
+ free(ifp, M_IFNET);
+ return (NULL);
+ }
+ ifnet_setbyindex_locked(idx, IFNET_HOLD);
+ IFNET_WUNLOCK();
+ ifp->if_index = idx;
+ ifp->if_type = type;
+ ifp->if_alloctype = type;
+ if (if_com_alloc[type] != NULL) {
+ ifp->if_l2com = if_com_alloc[type](type, ifp);
+ if (ifp->if_l2com == NULL) {
+ free(ifp, M_IFNET);
+ ifindex_free(idx);
+ return (NULL);
+ }
+ }
+
+ IF_ADDR_LOCK_INIT(ifp);
+ TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp);
+ ifp->if_afdata_initialized = 0;
+ IF_AFDATA_LOCK_INIT(ifp);
+ TAILQ_INIT(&ifp->if_addrhead);
+ TAILQ_INIT(&ifp->if_prefixhead);
+ TAILQ_INIT(&ifp->if_multiaddrs);
+ TAILQ_INIT(&ifp->if_groups);
+#ifdef MAC
+ mac_ifnet_init(ifp);
+#endif
+ ifq_init(&ifp->if_snd, ifp);
+
+ refcount_init(&ifp->if_refcount, 1); /* Index reference. */
+ ifnet_setbyindex(ifp->if_index, ifp);
+ return (ifp);
+}
+
+/*
+ * Do the actual work of freeing a struct ifnet, associated index, and layer
+ * 2 common structure. This call is made when the last reference to an
+ * interface is released.
+ */
+static void
+if_free_internal(struct ifnet *ifp)
+{
+
+ KASSERT((ifp->if_flags & IFF_DYING),
+ ("if_free_internal: interface not dying"));
+
+ IFNET_WLOCK();
+ KASSERT(ifp == ifnet_byindex_locked(ifp->if_index),
+ ("%s: freeing unallocated ifnet", ifp->if_xname));
+
+ ifindex_free_locked(ifp->if_index);
+ IFNET_WUNLOCK();
+
+ if (if_com_free[ifp->if_alloctype] != NULL)
+ if_com_free[ifp->if_alloctype](ifp->if_l2com,
+ ifp->if_alloctype);
+
+#ifdef MAC
+ mac_ifnet_destroy(ifp);
+#endif /* MAC */
+ if (ifp->if_description != NULL)
+ free(ifp->if_description, M_IFDESCR);
+ IF_AFDATA_DESTROY(ifp);
+ IF_ADDR_LOCK_DESTROY(ifp);
+ ifq_delete(&ifp->if_snd);
+ free(ifp, M_IFNET);
+}
+
+/*
+ * This version should only be called by intefaces that switch their type
+ * after calling if_alloc(). if_free_type() will go away again now that we
+ * have if_alloctype to cache the original allocation type. For now, assert
+ * that they match, since we require that in practice.
+ */
+void
+if_free_type(struct ifnet *ifp, u_char type)
+{
+
+ KASSERT(ifp->if_alloctype == type,
+ ("if_free_type: type (%d) != alloctype (%d)", type,
+ ifp->if_alloctype));
+
+ ifp->if_flags |= IFF_DYING; /* XXX: Locking */
+ if (!refcount_release(&ifp->if_refcount))
+ return;
+ if_free_internal(ifp);
+}
+
+/*
+ * This is the normal version of if_free(), used by device drivers to free a
+ * detached network interface. The contents of if_free_type() will move into
+ * here when if_free_type() goes away.
+ */
+void
+if_free(struct ifnet *ifp)
+{
+
+ if_free_type(ifp, ifp->if_alloctype);
+}
+
+/*
+ * Interfaces to keep an ifnet type-stable despite the possibility of the
+ * driver calling if_free(). If there are additional references, we defer
+ * freeing the underlying data structure.
+ */
+void
+if_ref(struct ifnet *ifp)
+{
+
+ /* We don't assert the ifnet list lock here, but arguably should. */
+ refcount_acquire(&ifp->if_refcount);
+}
+
+void
+if_rele(struct ifnet *ifp)
+{
+
+ if (!refcount_release(&ifp->if_refcount))
+ return;
+ if_free_internal(ifp);
+}
+
+void
+ifq_init(struct ifaltq *ifq, struct ifnet *ifp)
+{
+
+ mtx_init(&ifq->ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
+
+ if (ifq->ifq_maxlen == 0)
+ ifq->ifq_maxlen = ifqmaxlen;
+
+ ifq->altq_type = 0;
+ ifq->altq_disc = NULL;
+ ifq->altq_flags &= ALTQF_CANTCHANGE;
+ ifq->altq_tbr = NULL;
+ ifq->altq_ifp = ifp;
+}
+
+void
+ifq_delete(struct ifaltq *ifq)
+{
+ mtx_destroy(&ifq->ifq_mtx);
+}
+
+/*
+ * Perform generic interface initalization tasks and attach the interface
+ * to the list of "active" interfaces. If vmove flag is set on entry
+ * to if_attach_internal(), perform only a limited subset of initialization
+ * tasks, given that we are moving from one vnet to another an ifnet which
+ * has already been fully initialized.
+ *
+ * XXX:
+ * - The decision to return void and thus require this function to
+ * succeed is questionable.
+ * - We should probably do more sanity checking. For instance we don't
+ * do anything to insure if_xname is unique or non-empty.
+ */
+void
+if_attach(struct ifnet *ifp)
+{
+
+ if_attach_internal(ifp, 0);
+}
+
+static void
+if_attach_internal(struct ifnet *ifp, int vmove)
+{
+ unsigned socksize, ifasize;
+ int namelen, masklen;
+ struct sockaddr_dl *sdl;
+ struct ifaddr *ifa;
+
+ if (ifp->if_index == 0 || ifp != ifnet_byindex(ifp->if_index))
+ panic ("%s: BUG: if_attach called without if_alloc'd input()\n",
+ ifp->if_xname);
+
+#ifdef VIMAGE
+ ifp->if_vnet = curvnet;
+ if (ifp->if_home_vnet == NULL)
+ ifp->if_home_vnet = curvnet;
+#endif
+
+ if_addgroup(ifp, IFG_ALL);
+
+ getmicrotime(&ifp->if_lastchange);
+ ifp->if_data.ifi_epoch = time_uptime;
+ ifp->if_data.ifi_datalen = sizeof(struct if_data);
+
+ KASSERT((ifp->if_transmit == NULL && ifp->if_qflush == NULL) ||
+ (ifp->if_transmit != NULL && ifp->if_qflush != NULL),
+ ("transmit and qflush must both either be set or both be NULL"));
+ if (ifp->if_transmit == NULL) {
+ ifp->if_transmit = if_transmit;
+ ifp->if_qflush = if_qflush;
+ }
+
+ if (!vmove) {
+#ifdef MAC
+ mac_ifnet_create(ifp);
+#endif
+
+ /*
+ * Create a Link Level name for this device.
+ */
+ namelen = strlen(ifp->if_xname);
+ /*
+ * Always save enough space for any possiable name so we
+ * can do a rename in place later.
+ */
+ masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ;
+ socksize = masklen + ifp->if_addrlen;
+ if (socksize < sizeof(*sdl))
+ socksize = sizeof(*sdl);
+ socksize = roundup2(socksize, sizeof(long));
+ ifasize = sizeof(*ifa) + 2 * socksize;
+ ifa = malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
+ ifa_init(ifa);
+ sdl = (struct sockaddr_dl *)(ifa + 1);
+ sdl->sdl_len = socksize;
+ sdl->sdl_family = AF_LINK;
+ bcopy(ifp->if_xname, sdl->sdl_data, namelen);
+ sdl->sdl_nlen = namelen;
+ sdl->sdl_index = ifp->if_index;
+ sdl->sdl_type = ifp->if_type;
+ ifp->if_addr = ifa;
+ ifa->ifa_ifp = ifp;
+ ifa->ifa_rtrequest = link_rtrequest;
+ ifa->ifa_addr = (struct sockaddr *)sdl;
+ sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
+ ifa->ifa_netmask = (struct sockaddr *)sdl;
+ sdl->sdl_len = masklen;
+ while (namelen != 0)
+ sdl->sdl_data[--namelen] = 0xff;
+ TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
+ /* Reliably crash if used uninitialized. */
+ ifp->if_broadcastaddr = NULL;
+ }
+#ifdef VIMAGE
+ else {
+ /*
+ * Update the interface index in the link layer address
+ * of the interface.
+ */
+ for (ifa = ifp->if_addr; ifa != NULL;
+ ifa = TAILQ_NEXT(ifa, ifa_link)) {
+ if (ifa->ifa_addr->sa_family == AF_LINK) {
+ sdl = (struct sockaddr_dl *)ifa->ifa_addr;
+ sdl->sdl_index = ifp->if_index;
+ }
+ }
+ }
+#endif
+
+ IFNET_WLOCK();
+ TAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link);
+#ifdef VIMAGE
+ curvnet->vnet_ifcnt++;
+#endif
+ IFNET_WUNLOCK();
+
+ if (domain_init_status >= 2)
+ if_attachdomain1(ifp);
+
+ EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
+ if (IS_DEFAULT_VNET(curvnet))
+ devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
+
+ /* Announce the interface. */
+ rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
+
+ if (!vmove && ifp->if_watchdog != NULL) {
+ if_printf(ifp,
+ "WARNING: using obsoleted if_watchdog interface\n");
+
+ /*
+ * Note that we need if_slowtimo(). If this happens after
+ * boot, then call if_slowtimo() directly.
+ */
+ if (atomic_cmpset_int(&slowtimo_started, 0, 1) && !cold)
+ if_slowtimo(0);
+ }
+}
+
+static void
+if_attachdomain(void *dummy)
+{
+ struct ifnet *ifp;
+ int s;
+
+ s = splnet();
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link)
+ if_attachdomain1(ifp);
+ splx(s);
+}
+SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND,
+ if_attachdomain, NULL);
+
+static void
+if_attachdomain1(struct ifnet *ifp)
+{
+ struct domain *dp;
+ int s;
+
+ s = splnet();
+
+ /*
+ * Since dp->dom_ifattach calls malloc() with M_WAITOK, we
+ * cannot lock ifp->if_afdata initialization, entirely.
+ */
+ if (IF_AFDATA_TRYLOCK(ifp) == 0) {
+ splx(s);
+ return;
+ }
+ if (ifp->if_afdata_initialized >= domain_init_status) {
+ IF_AFDATA_UNLOCK(ifp);
+ splx(s);
+ printf("if_attachdomain called more than once on %s\n",
+ ifp->if_xname);
+ return;
+ }
+ ifp->if_afdata_initialized = domain_init_status;
+ IF_AFDATA_UNLOCK(ifp);
+
+ /* address family dependent data region */
+ bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
+ for (dp = domains; dp; dp = dp->dom_next) {
+ if (dp->dom_ifattach)
+ ifp->if_afdata[dp->dom_family] =
+ (*dp->dom_ifattach)(ifp);
+ }
+
+ splx(s);
+}
+
+/*
+ * Remove any unicast or broadcast network addresses from an interface.
+ */
+void
+if_purgeaddrs(struct ifnet *ifp)
+{
+ struct ifaddr *ifa, *next;
+
+ TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
+ if (ifa->ifa_addr->sa_family == AF_LINK)
+ continue;
+#ifdef INET
+ /* XXX: Ugly!! ad hoc just for INET */
+ if (ifa->ifa_addr->sa_family == AF_INET) {
+ struct ifaliasreq ifr;
+
+ bzero(&ifr, sizeof(ifr));
+ ifr.ifra_addr = *ifa->ifa_addr;
+ if (ifa->ifa_dstaddr)
+ ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
+ if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
+ NULL) == 0)
+ continue;
+ }
+#endif /* INET */
+#ifdef INET6
+ if (ifa->ifa_addr->sa_family == AF_INET6) {
+ in6_purgeaddr(ifa);
+ /* ifp_addrhead is already updated */
+ continue;
+ }
+#endif /* INET6 */
+ TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
+ ifa_free(ifa);
+ }
+}
+
+/*
+ * Remove any multicast network addresses from an interface when an ifnet
+ * is going away.
+ */
+static void
+if_purgemaddrs(struct ifnet *ifp)
+{
+ struct ifmultiaddr *ifma;
+ struct ifmultiaddr *next;
+
+ IF_ADDR_LOCK(ifp);
+ TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
+ if_delmulti_locked(ifp, ifma, 1);
+ IF_ADDR_UNLOCK(ifp);
+}
+
+/*
+ * Detach an interface, removing it from the list of "active" interfaces.
+ * If vmove flag is set on entry to if_detach_internal(), perform only a
+ * limited subset of cleanup tasks, given that we are moving an ifnet from
+ * one vnet to another, where it must be fully operational.
+ *
+ * XXXRW: There are some significant questions about event ordering, and
+ * how to prevent things from starting to use the interface during detach.
+ */
+void
+if_detach(struct ifnet *ifp)
+{
+
+ if_detach_internal(ifp, 0);
+}
+
+static void
+if_detach_internal(struct ifnet *ifp, int vmove)
+{
+ struct ifaddr *ifa;
+ struct radix_node_head *rnh;
+ int i, j;
+ struct domain *dp;
+ struct ifnet *iter;
+ int found = 0;
+
+ IFNET_WLOCK();
+ TAILQ_FOREACH(iter, &V_ifnet, if_link)
+ if (iter == ifp) {
+ TAILQ_REMOVE(&V_ifnet, ifp, if_link);
+ found = 1;
+ break;
+ }
+#ifdef VIMAGE
+ if (found)
+ curvnet->vnet_ifcnt--;
+#endif
+ IFNET_WUNLOCK();
+ if (!found) {
+ if (vmove)
+ panic("%s: ifp=%p not on the ifnet tailq %p",
+ __func__, ifp, &V_ifnet);
+ else
+ return; /* XXX this should panic as well? */
+ }
+
+ /*
+ * Remove/wait for pending events.
+ */
+ taskqueue_drain(taskqueue_swi, &ifp->if_linktask);
+
+ /*
+ * Remove routes and flush queues.
+ */
+ if_down(ifp);
+#ifdef ALTQ
+ if (ALTQ_IS_ENABLED(&ifp->if_snd))
+ altq_disable(&ifp->if_snd);
+ if (ALTQ_IS_ATTACHED(&ifp->if_snd))
+ altq_detach(&ifp->if_snd);
+#endif
+
+ if_purgeaddrs(ifp);
+
+#ifdef INET
+ in_ifdetach(ifp);
+#endif
+
+#ifdef INET6
+ /*
+ * Remove all IPv6 kernel structs related to ifp. This should be done
+ * before removing routing entries below, since IPv6 interface direct
+ * routes are expected to be removed by the IPv6-specific kernel API.
+ * Otherwise, the kernel will detect some inconsistency and bark it.
+ */
+ in6_ifdetach(ifp);
+#endif
+ if_purgemaddrs(ifp);
+
+ if (!vmove) {
+ /*
+ * Prevent further calls into the device driver via ifnet.
+ */
+ if_dead(ifp);
+
+ /*
+ * Remove link ifaddr pointer and maybe decrement if_index.
+ * Clean up all addresses.
+ */
+ ifp->if_addr = NULL;
+
+ /* We can now free link ifaddr. */
+ if (!TAILQ_EMPTY(&ifp->if_addrhead)) {
+ ifa = TAILQ_FIRST(&ifp->if_addrhead);
+ TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
+ ifa_free(ifa);
+ }
+ }
+
+ /*
+ * Delete all remaining routes using this interface
+ * Unfortuneatly the only way to do this is to slog through
+ * the entire routing table looking for routes which point
+ * to this interface...oh well...
+ */
+ for (i = 1; i <= AF_MAX; i++) {
+ for (j = 0; j < rt_numfibs; j++) {
+ rnh = rt_tables_get_rnh(j, i);
+ if (rnh == NULL)
+ continue;
+ RADIX_NODE_HEAD_LOCK(rnh);
+ (void) rnh->rnh_walktree(rnh, if_rtdel, ifp);
+ RADIX_NODE_HEAD_UNLOCK(rnh);
+ }
+ }
+
+ /* Announce that the interface is gone. */
+ rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
+ EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
+ if (IS_DEFAULT_VNET(curvnet))
+ devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
+ if_delgroups(ifp);
+
+ /*
+ * We cannot hold the lock over dom_ifdetach calls as they might
+ * sleep, for example trying to drain a callout, thus open up the
+ * theoretical race with re-attaching.
+ */
+ IF_AFDATA_LOCK(ifp);
+ i = ifp->if_afdata_initialized;
+ ifp->if_afdata_initialized = 0;
+ IF_AFDATA_UNLOCK(ifp);
+ for (dp = domains; i > 0 && dp; dp = dp->dom_next) {
+ if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
+ (*dp->dom_ifdetach)(ifp,
+ ifp->if_afdata[dp->dom_family]);
+ }
+}
+
+#ifdef VIMAGE
+/*
+ * if_vmove() performs a limited version of if_detach() in current
+ * vnet and if_attach()es the ifnet to the vnet specified as 2nd arg.
+ * An attempt is made to shrink if_index in current vnet, find an
+ * unused if_index in target vnet and calls if_grow() if necessary,
+ * and finally find an unused if_xname for the target vnet.
+ */
+void
+if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
+{
+ u_short idx;
+
+ /*
+ * Detach from current vnet, but preserve LLADDR info, do not
+ * mark as dead etc. so that the ifnet can be reattached later.
+ */
+ if_detach_internal(ifp, 1);
+
+ /*
+ * Unlink the ifnet from ifindex_table[] in current vnet, and shrink
+ * the if_index for that vnet if possible.
+ *
+ * NOTE: IFNET_WLOCK/IFNET_WUNLOCK() are assumed to be unvirtualized,
+ * or we'd lock on one vnet and unlock on another.
+ */
+ IFNET_WLOCK();
+ ifindex_free_locked(ifp->if_index);
+ IFNET_WUNLOCK();
+
+ /*
+ * Perform interface-specific reassignment tasks, if provided by
+ * the driver.
+ */
+ if (ifp->if_reassign != NULL)
+ ifp->if_reassign(ifp, new_vnet, NULL);
+
+ /*
+ * Switch to the context of the target vnet.
+ */
+ CURVNET_SET_QUIET(new_vnet);
+
+ IFNET_WLOCK();
+ if (ifindex_alloc_locked(&idx) != 0) {
+ IFNET_WUNLOCK();
+ panic("if_index overflow");
+ }
+ ifp->if_index = idx;
+ ifnet_setbyindex_locked(ifp->if_index, ifp);
+ IFNET_WUNLOCK();
+
+ if_attach_internal(ifp, 1);
+
+ CURVNET_RESTORE();
+}
+
+/*
+ * Move an ifnet to or from another child prison/vnet, specified by the jail id.
+ */
+static int
+if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid)
+{
+ struct prison *pr;
+ struct ifnet *difp;
+
+ /* Try to find the prison within our visibility. */
+ sx_slock(&allprison_lock);
+ pr = prison_find_child(td->td_ucred->cr_prison, jid);
+ sx_sunlock(&allprison_lock);
+ if (pr == NULL)
+ return (ENXIO);
+ prison_hold_locked(pr);
+ mtx_unlock(&pr->pr_mtx);
+
+ /* Do not try to move the iface from and to the same prison. */
+ if (pr->pr_vnet == ifp->if_vnet) {
+ prison_free(pr);
+ return (EEXIST);
+ }
+
+ /* Make sure the named iface does not exists in the dst. prison/vnet. */
+ /* XXX Lock interfaces to avoid races. */
+ CURVNET_SET_QUIET(pr->pr_vnet);
+ difp = ifunit(ifname);
+ CURVNET_RESTORE();
+ if (difp != NULL) {
+ prison_free(pr);
+ return (EEXIST);
+ }
+
+ /* Move the interface into the child jail/vnet. */
+ if_vmove(ifp, pr->pr_vnet);
+
+ /* Report the new if_xname back to the userland. */
+ sprintf(ifname, "%s", ifp->if_xname);
+
+ prison_free(pr);
+ return (0);
+}
+
+static int
+if_vmove_reclaim(struct thread *td, char *ifname, int jid)
+{
+ struct prison *pr;
+ struct vnet *vnet_dst;
+ struct ifnet *ifp;
+
+ /* Try to find the prison within our visibility. */
+ sx_slock(&allprison_lock);
+ pr = prison_find_child(td->td_ucred->cr_prison, jid);
+ sx_sunlock(&allprison_lock);
+ if (pr == NULL)
+ return (ENXIO);
+ prison_hold_locked(pr);
+ mtx_unlock(&pr->pr_mtx);
+
+ /* Make sure the named iface exists in the source prison/vnet. */
+ CURVNET_SET(pr->pr_vnet);
+ ifp = ifunit(ifname); /* XXX Lock to avoid races. */
+ if (ifp == NULL) {
+ CURVNET_RESTORE();
+ prison_free(pr);
+ return (ENXIO);
+ }
+
+ /* Do not try to move the iface from and to the same prison. */
+ vnet_dst = TD_TO_VNET(td);
+ if (vnet_dst == ifp->if_vnet) {
+ CURVNET_RESTORE();
+ prison_free(pr);
+ return (EEXIST);
+ }
+
+ /* Get interface back from child jail/vnet. */
+ if_vmove(ifp, vnet_dst);
+ CURVNET_RESTORE();
+
+ /* Report the new if_xname back to the userland. */
+ sprintf(ifname, "%s", ifp->if_xname);
+
+ prison_free(pr);
+ return (0);
+}
+#endif /* VIMAGE */
+
+/*
+ * Add a group to an interface
+ */
+int
+if_addgroup(struct ifnet *ifp, const char *groupname)
+{
+ struct ifg_list *ifgl;
+ struct ifg_group *ifg = NULL;
+ struct ifg_member *ifgm;
+
+ if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
+ groupname[strlen(groupname) - 1] <= '9')
+ return (EINVAL);
+
+ IFNET_WLOCK();
+ TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
+ if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) {
+ IFNET_WUNLOCK();
+ return (EEXIST);
+ }
+
+ if ((ifgl = (struct ifg_list *)malloc(sizeof(struct ifg_list), M_TEMP,
+ M_NOWAIT)) == NULL) {
+ IFNET_WUNLOCK();
+ return (ENOMEM);
+ }
+
+ if ((ifgm = (struct ifg_member *)malloc(sizeof(struct ifg_member),
+ M_TEMP, M_NOWAIT)) == NULL) {
+ free(ifgl, M_TEMP);
+ IFNET_WUNLOCK();
+ return (ENOMEM);
+ }
+
+ TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
+ if (!strcmp(ifg->ifg_group, groupname))
+ break;
+
+ if (ifg == NULL) {
+ if ((ifg = (struct ifg_group *)malloc(sizeof(struct ifg_group),
+ M_TEMP, M_NOWAIT)) == NULL) {
+ free(ifgl, M_TEMP);
+ free(ifgm, M_TEMP);
+ IFNET_WUNLOCK();
+ return (ENOMEM);
+ }
+ strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
+ ifg->ifg_refcnt = 0;
+ TAILQ_INIT(&ifg->ifg_members);
+ EVENTHANDLER_INVOKE(group_attach_event, ifg);
+ TAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next);
+ }
+
+ ifg->ifg_refcnt++;
+ ifgl->ifgl_group = ifg;
+ ifgm->ifgm_ifp = ifp;
+
+ IF_ADDR_LOCK(ifp);
+ TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
+ TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
+ IF_ADDR_UNLOCK(ifp);
+
+ IFNET_WUNLOCK();
+
+ EVENTHANDLER_INVOKE(group_change_event, groupname);
+
+ return (0);
+}
+
+/*
+ * Remove a group from an interface
+ */
+int
+if_delgroup(struct ifnet *ifp, const char *groupname)
+{
+ struct ifg_list *ifgl;
+ struct ifg_member *ifgm;
+
+ IFNET_WLOCK();
+ TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
+ if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
+ break;
+ if (ifgl == NULL) {
+ IFNET_WUNLOCK();
+ return (ENOENT);
+ }
+
+ IF_ADDR_LOCK(ifp);
+ TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
+ IF_ADDR_UNLOCK(ifp);
+
+ TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
+ if (ifgm->ifgm_ifp == ifp)
+ break;
+
+ if (ifgm != NULL) {
+ TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
+ free(ifgm, M_TEMP);
+ }
+
+ if (--ifgl->ifgl_group->ifg_refcnt == 0) {
+ TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
+ EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group);
+ free(ifgl->ifgl_group, M_TEMP);
+ }
+ IFNET_WUNLOCK();
+
+ free(ifgl, M_TEMP);
+
+ EVENTHANDLER_INVOKE(group_change_event, groupname);
+
+ return (0);
+}
+
+/*
+ * Remove an interface from all groups
+ */
+static void
+if_delgroups(struct ifnet *ifp)
+{
+ struct ifg_list *ifgl;
+ struct ifg_member *ifgm;
+ char groupname[IFNAMSIZ];
+
+ IFNET_WLOCK();
+ while (!TAILQ_EMPTY(&ifp->if_groups)) {
+ ifgl = TAILQ_FIRST(&ifp->if_groups);
+
+ strlcpy(groupname, ifgl->ifgl_group->ifg_group, IFNAMSIZ);
+
+ IF_ADDR_LOCK(ifp);
+ TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
+ IF_ADDR_UNLOCK(ifp);
+
+ TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
+ if (ifgm->ifgm_ifp == ifp)
+ break;
+
+ if (ifgm != NULL) {
+ TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm,
+ ifgm_next);
+ free(ifgm, M_TEMP);
+ }
+
+ if (--ifgl->ifgl_group->ifg_refcnt == 0) {
+ TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
+ EVENTHANDLER_INVOKE(group_detach_event,
+ ifgl->ifgl_group);
+ free(ifgl->ifgl_group, M_TEMP);
+ }
+ IFNET_WUNLOCK();
+
+ free(ifgl, M_TEMP);
+
+ EVENTHANDLER_INVOKE(group_change_event, groupname);
+
+ IFNET_WLOCK();
+ }
+ IFNET_WUNLOCK();
+}
+
+/*
+ * Stores all groups from an interface in memory pointed
+ * to by data
+ */
+static int
+if_getgroup(struct ifgroupreq *data, struct ifnet *ifp)
+{
+ int len, error;
+ struct ifg_list *ifgl;
+ struct ifg_req ifgrq, *ifgp;
+ struct ifgroupreq *ifgr = data;
+
+ if (ifgr->ifgr_len == 0) {
+ IF_ADDR_LOCK(ifp);
+ TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
+ ifgr->ifgr_len += sizeof(struct ifg_req);
+ IF_ADDR_UNLOCK(ifp);
+ return (0);
+ }
+
+ len = ifgr->ifgr_len;
+ ifgp = ifgr->ifgr_groups;
+ /* XXX: wire */
+ IF_ADDR_LOCK(ifp);
+ TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
+ if (len < sizeof(ifgrq)) {
+ IF_ADDR_UNLOCK(ifp);
+ return (EINVAL);
+ }
+ bzero(&ifgrq, sizeof ifgrq);
+ strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
+ sizeof(ifgrq.ifgrq_group));
+ if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
+ IF_ADDR_UNLOCK(ifp);
+ return (error);
+ }
+ len -= sizeof(ifgrq);
+ ifgp++;
+ }
+ IF_ADDR_UNLOCK(ifp);
+
+ return (0);
+}
+
+/*
+ * Stores all members of a group in memory pointed to by data
+ */
+static int
+if_getgroupmembers(struct ifgroupreq *data)
+{
+ struct ifgroupreq *ifgr = data;
+ struct ifg_group *ifg;
+ struct ifg_member *ifgm;
+ struct ifg_req ifgrq, *ifgp;
+ int len, error;
+
+ IFNET_RLOCK();
+ TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
+ if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
+ break;
+ if (ifg == NULL) {
+ IFNET_RUNLOCK();
+ return (ENOENT);
+ }
+
+ if (ifgr->ifgr_len == 0) {
+ TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
+ ifgr->ifgr_len += sizeof(ifgrq);
+ IFNET_RUNLOCK();
+ return (0);
+ }
+
+ len = ifgr->ifgr_len;
+ ifgp = ifgr->ifgr_groups;
+ TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
+ if (len < sizeof(ifgrq)) {
+ IFNET_RUNLOCK();
+ return (EINVAL);
+ }
+ bzero(&ifgrq, sizeof ifgrq);
+ strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
+ sizeof(ifgrq.ifgrq_member));
+ if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
+ IFNET_RUNLOCK();
+ return (error);
+ }
+ len -= sizeof(ifgrq);
+ ifgp++;
+ }
+ IFNET_RUNLOCK();
+
+ return (0);
+}
+
+/*
+ * Delete Routes for a Network Interface
+ *
+ * Called for each routing entry via the rnh->rnh_walktree() call above
+ * to delete all route entries referencing a detaching network interface.
+ *
+ * Arguments:
+ * rn pointer to node in the routing table
+ * arg argument passed to rnh->rnh_walktree() - detaching interface
+ *
+ * Returns:
+ * 0 successful
+ * errno failed - reason indicated
+ *
+ */
+static int
+if_rtdel(struct radix_node *rn, void *arg)
+{
+ struct rtentry *rt = (struct rtentry *)rn;
+ struct ifnet *ifp = arg;
+ int err;
+
+ if (rt->rt_ifp == ifp) {
+
+ /*
+ * Protect (sorta) against walktree recursion problems
+ * with cloned routes
+ */
+ if ((rt->rt_flags & RTF_UP) == 0)
+ return (0);
+
+ err = rtrequest_fib(RTM_DELETE, rt_key(rt), rt->rt_gateway,
+ rt_mask(rt), rt->rt_flags|RTF_RNH_LOCKED,
+ (struct rtentry **) NULL, rt->rt_fibnum);
+ if (err) {
+ log(LOG_WARNING, "if_rtdel: error %d\n", err);
+ }
+ }
+
+ return (0);
+}
+
+/*
+ * Wrapper functions for struct ifnet address list locking macros. These are
+ * used by kernel modules to avoid encoding programming interface or binary
+ * interface assumptions that may be violated when kernel-internal locking
+ * approaches change.
+ */
+void
+if_addr_rlock(struct ifnet *ifp)
+{
+
+ IF_ADDR_LOCK(ifp);
+}
+
+void
+if_addr_runlock(struct ifnet *ifp)
+{
+
+ IF_ADDR_UNLOCK(ifp);
+}
+
+void
+if_maddr_rlock(struct ifnet *ifp)
+{
+
+ IF_ADDR_LOCK(ifp);
+}
+
+void
+if_maddr_runlock(struct ifnet *ifp)
+{
+
+ IF_ADDR_UNLOCK(ifp);
+}
+
+/*
+ * Reference count functions for ifaddrs.
+ */
+void
+ifa_init(struct ifaddr *ifa)
+{
+
+ mtx_init(&ifa->ifa_mtx, "ifaddr", NULL, MTX_DEF);
+ refcount_init(&ifa->ifa_refcnt, 1);
+}
+
+void
+ifa_ref(struct ifaddr *ifa)
+{
+
+ refcount_acquire(&ifa->ifa_refcnt);
+}
+
+void
+ifa_free(struct ifaddr *ifa)
+{
+
+ if (refcount_release(&ifa->ifa_refcnt)) {
+ mtx_destroy(&ifa->ifa_mtx);
+ free(ifa, M_IFADDR);
+ }
+}
+
+int
+ifa_add_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
+{
+ int error = 0;
+ struct rtentry *rt = NULL;
+ struct rt_addrinfo info;
+ static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
+
+ bzero(&info, sizeof(info));
+ info.rti_ifp = V_loif;
+ info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC;
+ info.rti_info[RTAX_DST] = ia;
+ info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl;
+ error = rtrequest1_fib(RTM_ADD, &info, &rt, 0);
+
+ if (error == 0 && rt != NULL) {
+ RT_LOCK(rt);
+ ((struct sockaddr_dl *)rt->rt_gateway)->sdl_type =
+ ifa->ifa_ifp->if_type;
+ ((struct sockaddr_dl *)rt->rt_gateway)->sdl_index =
+ ifa->ifa_ifp->if_index;
+ RT_REMREF(rt);
+ RT_UNLOCK(rt);
+ } else if (error != 0)
+ log(LOG_INFO, "ifa_add_loopback_route: insertion failed\n");
+
+ return (error);
+}
+
+int
+ifa_del_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
+{
+ int error = 0;
+ struct rt_addrinfo info;
+ struct sockaddr_dl null_sdl;
+
+ bzero(&null_sdl, sizeof(null_sdl));
+ null_sdl.sdl_len = sizeof(null_sdl);
+ null_sdl.sdl_family = AF_LINK;
+ null_sdl.sdl_type = ifa->ifa_ifp->if_type;
+ null_sdl.sdl_index = ifa->ifa_ifp->if_index;
+ bzero(&info, sizeof(info));
+ info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC;
+ info.rti_info[RTAX_DST] = ia;
+ info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl;
+ error = rtrequest1_fib(RTM_DELETE, &info, NULL, 0);
+
+ if (error != 0)
+ log(LOG_INFO, "ifa_del_loopback_route: deletion failed\n");
+
+ return (error);
+}
+
+/*
+ * XXX: Because sockaddr_dl has deeper structure than the sockaddr
+ * structs used to represent other address families, it is necessary
+ * to perform a different comparison.
+ */
+
+#define sa_equal(a1, a2) \
+ (bcmp((a1), (a2), ((a1))->sa_len) == 0)
+
+#define sa_dl_equal(a1, a2) \
+ ((((struct sockaddr_dl *)(a1))->sdl_len == \
+ ((struct sockaddr_dl *)(a2))->sdl_len) && \
+ (bcmp(LLADDR((struct sockaddr_dl *)(a1)), \
+ LLADDR((struct sockaddr_dl *)(a2)), \
+ ((struct sockaddr_dl *)(a1))->sdl_alen) == 0))
+
+/*
+ * Locate an interface based on a complete address.
+ */
+/*ARGSUSED*/
+static struct ifaddr *
+ifa_ifwithaddr_internal(struct sockaddr *addr, int getref)
+{
+ struct ifnet *ifp;
+ struct ifaddr *ifa;
+
+ IFNET_RLOCK_NOSLEEP();
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ IF_ADDR_LOCK(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr->sa_family != addr->sa_family)
+ continue;
+ if (sa_equal(addr, ifa->ifa_addr)) {
+ if (getref)
+ ifa_ref(ifa);
+ IF_ADDR_UNLOCK(ifp);
+ goto done;
+ }
+ /* IP6 doesn't have broadcast */
+ if ((ifp->if_flags & IFF_BROADCAST) &&
+ ifa->ifa_broadaddr &&
+ ifa->ifa_broadaddr->sa_len != 0 &&
+ sa_equal(ifa->ifa_broadaddr, addr)) {
+ if (getref)
+ ifa_ref(ifa);
+ IF_ADDR_UNLOCK(ifp);
+ goto done;
+ }
+ }
+ IF_ADDR_UNLOCK(ifp);
+ }
+ ifa = NULL;
+done:
+ IFNET_RUNLOCK_NOSLEEP();
+ return (ifa);
+}
+
+struct ifaddr *
+ifa_ifwithaddr(struct sockaddr *addr)
+{
+
+ return (ifa_ifwithaddr_internal(addr, 1));
+}
+
+int
+ifa_ifwithaddr_check(struct sockaddr *addr)
+{
+
+ return (ifa_ifwithaddr_internal(addr, 0) != NULL);
+}
+
+/*
+ * Locate an interface based on the broadcast address.
+ */
+/* ARGSUSED */
+struct ifaddr *
+ifa_ifwithbroadaddr(struct sockaddr *addr)
+{
+ struct ifnet *ifp;
+ struct ifaddr *ifa;
+
+ IFNET_RLOCK_NOSLEEP();
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ IF_ADDR_LOCK(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr->sa_family != addr->sa_family)
+ continue;
+ if ((ifp->if_flags & IFF_BROADCAST) &&
+ ifa->ifa_broadaddr &&
+ ifa->ifa_broadaddr->sa_len != 0 &&
+ sa_equal(ifa->ifa_broadaddr, addr)) {
+ ifa_ref(ifa);
+ IF_ADDR_UNLOCK(ifp);
+ goto done;
+ }
+ }
+ IF_ADDR_UNLOCK(ifp);
+ }
+ ifa = NULL;
+done:
+ IFNET_RUNLOCK_NOSLEEP();
+ return (ifa);
+}
+
+/*
+ * Locate the point to point interface with a given destination address.
+ */
+/*ARGSUSED*/
+struct ifaddr *
+ifa_ifwithdstaddr(struct sockaddr *addr)
+{
+ struct ifnet *ifp;
+ struct ifaddr *ifa;
+
+ IFNET_RLOCK_NOSLEEP();
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
+ continue;
+ IF_ADDR_LOCK(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr->sa_family != addr->sa_family)
+ continue;
+ if (ifa->ifa_dstaddr != NULL &&
+ sa_equal(addr, ifa->ifa_dstaddr)) {
+ ifa_ref(ifa);
+ IF_ADDR_UNLOCK(ifp);
+ goto done;
+ }
+ }
+ IF_ADDR_UNLOCK(ifp);
+ }
+ ifa = NULL;
+done:
+ IFNET_RUNLOCK_NOSLEEP();
+ return (ifa);
+}
+
+/*
+ * Find an interface on a specific network. If many, choice
+ * is most specific found.
+ */
+struct ifaddr *
+ifa_ifwithnet(struct sockaddr *addr, int ignore_ptp)
+{
+ struct ifnet *ifp;
+ struct ifaddr *ifa;
+ struct ifaddr *ifa_maybe = NULL;
+ u_int af = addr->sa_family;
+ char *addr_data = addr->sa_data, *cplim;
+
+ /*
+ * AF_LINK addresses can be looked up directly by their index number,
+ * so do that if we can.
+ */
+ if (af == AF_LINK) {
+ struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
+ if (sdl->sdl_index && sdl->sdl_index <= V_if_index)
+ return (ifaddr_byindex(sdl->sdl_index));
+ }
+
+ /*
+ * Scan though each interface, looking for ones that have addresses
+ * in this address family. Maintain a reference on ifa_maybe once
+ * we find one, as we release the IF_ADDR_LOCK() that kept it stable
+ * when we move onto the next interface.
+ */
+ IFNET_RLOCK_NOSLEEP();
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ IF_ADDR_LOCK(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ char *cp, *cp2, *cp3;
+
+ if (ifa->ifa_addr->sa_family != af)
+next: continue;
+ if (af == AF_INET &&
+ ifp->if_flags & IFF_POINTOPOINT && !ignore_ptp) {
+ /*
+ * This is a bit broken as it doesn't
+ * take into account that the remote end may
+ * be a single node in the network we are
+ * looking for.
+ * The trouble is that we don't know the
+ * netmask for the remote end.
+ */
+ if (ifa->ifa_dstaddr != NULL &&
+ sa_equal(addr, ifa->ifa_dstaddr)) {
+ ifa_ref(ifa);
+ IF_ADDR_UNLOCK(ifp);
+ goto done;
+ }
+ } else {
+ /*
+ * if we have a special address handler,
+ * then use it instead of the generic one.
+ */
+ if (ifa->ifa_claim_addr) {
+ if ((*ifa->ifa_claim_addr)(ifa, addr)) {
+ ifa_ref(ifa);
+ IF_ADDR_UNLOCK(ifp);
+ goto done;
+ }
+ continue;
+ }
+
+ /*
+ * Scan all the bits in the ifa's address.
+ * If a bit dissagrees with what we are
+ * looking for, mask it with the netmask
+ * to see if it really matters.
+ * (A byte at a time)
+ */
+ if (ifa->ifa_netmask == 0)
+ continue;
+ cp = addr_data;
+ cp2 = ifa->ifa_addr->sa_data;
+ cp3 = ifa->ifa_netmask->sa_data;
+ cplim = ifa->ifa_netmask->sa_len
+ + (char *)ifa->ifa_netmask;
+ while (cp3 < cplim)
+ if ((*cp++ ^ *cp2++) & *cp3++)
+ goto next; /* next address! */
+ /*
+ * If the netmask of what we just found
+ * is more specific than what we had before
+ * (if we had one) then remember the new one
+ * before continuing to search
+ * for an even better one.
+ */
+ if (ifa_maybe == NULL ||
+ rn_refines((caddr_t)ifa->ifa_netmask,
+ (caddr_t)ifa_maybe->ifa_netmask)) {
+ if (ifa_maybe != NULL)
+ ifa_free(ifa_maybe);
+ ifa_maybe = ifa;
+ ifa_ref(ifa_maybe);
+ }
+ }
+ }
+ IF_ADDR_UNLOCK(ifp);
+ }
+ ifa = ifa_maybe;
+ ifa_maybe = NULL;
+done:
+ IFNET_RUNLOCK_NOSLEEP();
+ if (ifa_maybe != NULL)
+ ifa_free(ifa_maybe);
+ return (ifa);
+}
+
+/*
+ * Find an interface address specific to an interface best matching
+ * a given address.
+ */
+struct ifaddr *
+ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
+{
+ struct ifaddr *ifa;
+ char *cp, *cp2, *cp3;
+ char *cplim;
+ struct ifaddr *ifa_maybe = NULL;
+ u_int af = addr->sa_family;
+
+ if (af >= AF_MAX)
+ return (NULL);
+ IF_ADDR_LOCK(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr->sa_family != af)
+ continue;
+ if (ifa_maybe == NULL)
+ ifa_maybe = ifa;
+ if (ifa->ifa_netmask == 0) {
+ if (sa_equal(addr, ifa->ifa_addr) ||
+ (ifa->ifa_dstaddr &&
+ sa_equal(addr, ifa->ifa_dstaddr)))
+ goto done;
+ continue;
+ }
+ if (ifp->if_flags & IFF_POINTOPOINT) {
+ if (sa_equal(addr, ifa->ifa_dstaddr))
+ goto done;
+ } else {
+ cp = addr->sa_data;
+ cp2 = ifa->ifa_addr->sa_data;
+ cp3 = ifa->ifa_netmask->sa_data;
+ cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
+ for (; cp3 < cplim; cp3++)
+ if ((*cp++ ^ *cp2++) & *cp3)
+ break;
+ if (cp3 == cplim)
+ goto done;
+ }
+ }
+ ifa = ifa_maybe;
+done:
+ if (ifa != NULL)
+ ifa_ref(ifa);
+ IF_ADDR_UNLOCK(ifp);
+ return (ifa);
+}
+
+#include <freebsd/net/if_llatbl.h>
+
+/*
+ * Default action when installing a route with a Link Level gateway.
+ * Lookup an appropriate real ifa to point to.
+ * This should be moved to /sys/net/link.c eventually.
+ */
+static void
+link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
+{
+ struct ifaddr *ifa, *oifa;
+ struct sockaddr *dst;
+ struct ifnet *ifp;
+
+ RT_LOCK_ASSERT(rt);
+
+ if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
+ ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
+ return;
+ ifa = ifaof_ifpforaddr(dst, ifp);
+ if (ifa) {
+ oifa = rt->rt_ifa;
+ rt->rt_ifa = ifa;
+ ifa_free(oifa);
+ if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
+ ifa->ifa_rtrequest(cmd, rt, info);
+ }
+}
+
+/*
+ * Mark an interface down and notify protocols of
+ * the transition.
+ * NOTE: must be called at splnet or eqivalent.
+ */
+static void
+if_unroute(struct ifnet *ifp, int flag, int fam)
+{
+ struct ifaddr *ifa;
+
+ KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP"));
+
+ ifp->if_flags &= ~flag;
+ getmicrotime(&ifp->if_lastchange);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
+ if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
+ pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
+ ifp->if_qflush(ifp);
+
+ if (ifp->if_carp)
+ (*carp_linkstate_p)(ifp);
+ rt_ifmsg(ifp);
+}
+
+/*
+ * Mark an interface up and notify protocols of
+ * the transition.
+ * NOTE: must be called at splnet or eqivalent.
+ */
+static void
+if_route(struct ifnet *ifp, int flag, int fam)
+{
+ struct ifaddr *ifa;
+
+ KASSERT(flag == IFF_UP, ("if_route: flag != IFF_UP"));
+
+ ifp->if_flags |= flag;
+ getmicrotime(&ifp->if_lastchange);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
+ if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
+ pfctlinput(PRC_IFUP, ifa->ifa_addr);
+ if (ifp->if_carp)
+ (*carp_linkstate_p)(ifp);
+ rt_ifmsg(ifp);
+#ifdef INET6
+ in6_if_up(ifp);
+#endif
+}
+
+void (*vlan_link_state_p)(struct ifnet *, int); /* XXX: private from if_vlan */
+void (*vlan_trunk_cap_p)(struct ifnet *); /* XXX: private from if_vlan */
+
+/*
+ * Handle a change in the interface link state. To avoid LORs
+ * between driver lock and upper layer locks, as well as possible
+ * recursions, we post event to taskqueue, and all job
+ * is done in static do_link_state_change().
+ */
+void
+if_link_state_change(struct ifnet *ifp, int link_state)
+{
+ /* Return if state hasn't changed. */
+ if (ifp->if_link_state == link_state)
+ return;
+
+ ifp->if_link_state = link_state;
+
+ taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask);
+}
+
+static void
+do_link_state_change(void *arg, int pending)
+{
+ struct ifnet *ifp = (struct ifnet *)arg;
+ int link_state = ifp->if_link_state;
+ CURVNET_SET(ifp->if_vnet);
+
+ /* Notify that the link state has changed. */
+ rt_ifmsg(ifp);
+ if (ifp->if_vlantrunk != NULL)
+ (*vlan_link_state_p)(ifp, 0);
+
+ if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) &&
+ IFP2AC(ifp)->ac_netgraph != NULL)
+ (*ng_ether_link_state_p)(ifp, link_state);
+ if (ifp->if_carp)
+ (*carp_linkstate_p)(ifp);
+ if (ifp->if_bridge) {
+ KASSERT(bstp_linkstate_p != NULL,("if_bridge bstp not loaded!"));
+ (*bstp_linkstate_p)(ifp, link_state);
+ }
+ if (ifp->if_lagg) {
+ KASSERT(lagg_linkstate_p != NULL,("if_lagg not loaded!"));
+ (*lagg_linkstate_p)(ifp, link_state);
+ }
+
+ if (IS_DEFAULT_VNET(curvnet))
+ devctl_notify("IFNET", ifp->if_xname,
+ (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN",
+ NULL);
+ if (pending > 1)
+ if_printf(ifp, "%d link states coalesced\n", pending);
+ if (log_link_state_change)
+ log(LOG_NOTICE, "%s: link state changed to %s\n", ifp->if_xname,
+ (link_state == LINK_STATE_UP) ? "UP" : "DOWN" );
+ CURVNET_RESTORE();
+}
+
+/*
+ * Mark an interface down and notify protocols of
+ * the transition.
+ * NOTE: must be called at splnet or eqivalent.
+ */
+void
+if_down(struct ifnet *ifp)
+{
+
+ if_unroute(ifp, IFF_UP, AF_UNSPEC);
+}
+
+/*
+ * Mark an interface up and notify protocols of
+ * the transition.
+ * NOTE: must be called at splnet or eqivalent.
+ */
+void
+if_up(struct ifnet *ifp)
+{
+
+ if_route(ifp, IFF_UP, AF_UNSPEC);
+}
+
+/*
+ * Flush an interface queue.
+ */
+void
+if_qflush(struct ifnet *ifp)
+{
+ struct mbuf *m, *n;
+ struct ifaltq *ifq;
+
+ ifq = &ifp->if_snd;
+ IFQ_LOCK(ifq);
+#ifdef ALTQ
+ if (ALTQ_IS_ENABLED(ifq))
+ ALTQ_PURGE(ifq);
+#endif
+ n = ifq->ifq_head;
+ while ((m = n) != 0) {
+ n = m->m_act;
+ m_freem(m);
+ }
+ ifq->ifq_head = 0;
+ ifq->ifq_tail = 0;
+ ifq->ifq_len = 0;
+ IFQ_UNLOCK(ifq);
+}
+
+/*
+ * Handle interface watchdog timer routines. Called
+ * from softclock, we decrement timers (if set) and
+ * call the appropriate interface routine on expiration.
+ *
+ * XXXRW: Note that because timeouts run with Giant, if_watchdog() is called
+ * holding Giant.
+ */
+static void
+if_slowtimo(void *arg)
+{
+ VNET_ITERATOR_DECL(vnet_iter);
+ struct ifnet *ifp;
+ int s = splimp();
+
+ VNET_LIST_RLOCK_NOSLEEP();
+ IFNET_RLOCK_NOSLEEP();
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET(vnet_iter);
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ if (ifp->if_timer == 0 || --ifp->if_timer)
+ continue;
+ if (ifp->if_watchdog)
+ (*ifp->if_watchdog)(ifp);
+ }
+ CURVNET_RESTORE();
+ }
+ IFNET_RUNLOCK_NOSLEEP();
+ VNET_LIST_RUNLOCK_NOSLEEP();
+ splx(s);
+ timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ);
+}
+
+/*
+ * Map interface name to interface structure pointer, with or without
+ * returning a reference.
+ */
+struct ifnet *
+ifunit_ref(const char *name)
+{
+ struct ifnet *ifp;
+
+ IFNET_RLOCK_NOSLEEP();
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0 &&
+ !(ifp->if_flags & IFF_DYING))
+ break;
+ }
+ if (ifp != NULL)
+ if_ref(ifp);
+ IFNET_RUNLOCK_NOSLEEP();
+ return (ifp);
+}
+
+struct ifnet *
+ifunit(const char *name)
+{
+ struct ifnet *ifp;
+
+ IFNET_RLOCK_NOSLEEP();
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
+ break;
+ }
+ IFNET_RUNLOCK_NOSLEEP();
+ return (ifp);
+}
+
+/*
+ * Hardware specific interface ioctls.
+ */
+static int
+ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
+{
+ struct ifreq *ifr;
+ struct ifstat *ifs;
+ int error = 0;
+ int new_flags, temp_flags;
+ size_t namelen, onamelen;
+ size_t descrlen;
+ char *descrbuf, *odescrbuf;
+ char new_name[IFNAMSIZ];
+ struct ifaddr *ifa;
+ struct sockaddr_dl *sdl;
+
+ ifr = (struct ifreq *)data;
+ switch (cmd) {
+ case SIOCGIFINDEX:
+ ifr->ifr_index = ifp->if_index;
+ break;
+
+ case SIOCGIFFLAGS:
+ temp_flags = ifp->if_flags | ifp->if_drv_flags;
+ ifr->ifr_flags = temp_flags & 0xffff;
+ ifr->ifr_flagshigh = temp_flags >> 16;
+ break;
+
+ case SIOCGIFCAP:
+ ifr->ifr_reqcap = ifp->if_capabilities;
+ ifr->ifr_curcap = ifp->if_capenable;
+ break;
+
+#ifdef MAC
+ case SIOCGIFMAC:
+ error = mac_ifnet_ioctl_get(td->td_ucred, ifr, ifp);
+ break;
+#endif
+
+ case SIOCGIFMETRIC:
+ ifr->ifr_metric = ifp->if_metric;
+ break;
+
+ case SIOCGIFMTU:
+ ifr->ifr_mtu = ifp->if_mtu;
+ break;
+
+ case SIOCGIFPHYS:
+ ifr->ifr_phys = ifp->if_physical;
+ break;
+
+ case SIOCGIFDESCR:
+ error = 0;
+ sx_slock(&ifdescr_sx);
+ if (ifp->if_description == NULL)
+ error = ENOMSG;
+ else {
+ /* space for terminating nul */
+ descrlen = strlen(ifp->if_description) + 1;
+ if (ifr->ifr_buffer.length < descrlen)
+ ifr->ifr_buffer.buffer = NULL;
+ else
+ error = copyout(ifp->if_description,
+ ifr->ifr_buffer.buffer, descrlen);
+ ifr->ifr_buffer.length = descrlen;
+ }
+ sx_sunlock(&ifdescr_sx);
+ break;
+
+ case SIOCSIFDESCR:
+ error = priv_check(td, PRIV_NET_SETIFDESCR);
+ if (error)
+ return (error);
+
+ /*
+ * Copy only (length-1) bytes to make sure that
+ * if_description is always nul terminated. The
+ * length parameter is supposed to count the
+ * terminating nul in.
+ */
+ if (ifr->ifr_buffer.length > ifdescr_maxlen)
+ return (ENAMETOOLONG);
+ else if (ifr->ifr_buffer.length == 0)
+ descrbuf = NULL;
+ else {
+ descrbuf = malloc(ifr->ifr_buffer.length, M_IFDESCR,
+ M_WAITOK | M_ZERO);
+ error = copyin(ifr->ifr_buffer.buffer, descrbuf,
+ ifr->ifr_buffer.length - 1);
+ if (error) {
+ free(descrbuf, M_IFDESCR);
+ break;
+ }
+ }
+
+ sx_xlock(&ifdescr_sx);
+ odescrbuf = ifp->if_description;
+ ifp->if_description = descrbuf;
+ sx_xunlock(&ifdescr_sx);
+
+ getmicrotime(&ifp->if_lastchange);
+ free(odescrbuf, M_IFDESCR);
+ break;
+
+ case SIOCSIFFLAGS:
+ error = priv_check(td, PRIV_NET_SETIFFLAGS);
+ if (error)
+ return (error);
+ /*
+ * Currently, no driver owned flags pass the IFF_CANTCHANGE
+ * check, so we don't need special handling here yet.
+ */
+ new_flags = (ifr->ifr_flags & 0xffff) |
+ (ifr->ifr_flagshigh << 16);
+ if (ifp->if_flags & IFF_SMART) {
+ /* Smart drivers twiddle their own routes */
+ } else if (ifp->if_flags & IFF_UP &&
+ (new_flags & IFF_UP) == 0) {
+ int s = splimp();
+ if_down(ifp);
+ splx(s);
+ } else if (new_flags & IFF_UP &&
+ (ifp->if_flags & IFF_UP) == 0) {
+ int s = splimp();
+ if_up(ifp);
+ splx(s);
+ }
+ /* See if permanently promiscuous mode bit is about to flip */
+ if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) {
+ if (new_flags & IFF_PPROMISC)
+ ifp->if_flags |= IFF_PROMISC;
+ else if (ifp->if_pcount == 0)
+ ifp->if_flags &= ~IFF_PROMISC;
+ log(LOG_INFO, "%s: permanently promiscuous mode %s\n",
+ ifp->if_xname,
+ (new_flags & IFF_PPROMISC) ? "enabled" : "disabled");
+ }
+ ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
+ (new_flags &~ IFF_CANTCHANGE);
+ if (ifp->if_ioctl) {
+ (void) (*ifp->if_ioctl)(ifp, cmd, data);
+ }
+ getmicrotime(&ifp->if_lastchange);
+ break;
+
+ case SIOCSIFCAP:
+ error = priv_check(td, PRIV_NET_SETIFCAP);
+ if (error)
+ return (error);
+ if (ifp->if_ioctl == NULL)
+ return (EOPNOTSUPP);
+ if (ifr->ifr_reqcap & ~ifp->if_capabilities)
+ return (EINVAL);
+ error = (*ifp->if_ioctl)(ifp, cmd, data);
+ if (error == 0)
+ getmicrotime(&ifp->if_lastchange);
+ break;
+
+#ifdef MAC
+ case SIOCSIFMAC:
+ error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp);
+ break;
+#endif
+
+ case SIOCSIFNAME:
+ error = priv_check(td, PRIV_NET_SETIFNAME);
+ if (error)
+ return (error);
+ error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
+ if (error != 0)
+ return (error);
+ if (new_name[0] == '\0')
+ return (EINVAL);
+ if (ifunit(new_name) != NULL)
+ return (EEXIST);
+
+ /*
+ * XXX: Locking. Nothing else seems to lock if_flags,
+ * and there are numerous other races with the
+ * ifunit() checks not being atomic with namespace
+ * changes (renames, vmoves, if_attach, etc).
+ */
+ ifp->if_flags |= IFF_RENAMING;
+
+ /* Announce the departure of the interface. */
+ rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
+ EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
+
+ log(LOG_INFO, "%s: changing name to '%s'\n",
+ ifp->if_xname, new_name);
+
+ strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
+ ifa = ifp->if_addr;
+ IFA_LOCK(ifa);
+ sdl = (struct sockaddr_dl *)ifa->ifa_addr;
+ namelen = strlen(new_name);
+ onamelen = sdl->sdl_nlen;
+ /*
+ * Move the address if needed. This is safe because we
+ * allocate space for a name of length IFNAMSIZ when we
+ * create this in if_attach().
+ */
+ if (namelen != onamelen) {
+ bcopy(sdl->sdl_data + onamelen,
+ sdl->sdl_data + namelen, sdl->sdl_alen);
+ }
+ bcopy(new_name, sdl->sdl_data, namelen);
+ sdl->sdl_nlen = namelen;
+ sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
+ bzero(sdl->sdl_data, onamelen);
+ while (namelen != 0)
+ sdl->sdl_data[--namelen] = 0xff;
+ IFA_UNLOCK(ifa);
+
+ EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
+ /* Announce the return of the interface. */
+ rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
+
+ ifp->if_flags &= ~IFF_RENAMING;
+ break;
+
+#ifdef VIMAGE
+ case SIOCSIFVNET:
+ error = priv_check(td, PRIV_NET_SETIFVNET);
+ if (error)
+ return (error);
+ error = if_vmove_loan(td, ifp, ifr->ifr_name, ifr->ifr_jid);
+ break;
+#endif
+
+ case SIOCSIFMETRIC:
+ error = priv_check(td, PRIV_NET_SETIFMETRIC);
+ if (error)
+ return (error);
+ ifp->if_metric = ifr->ifr_metric;
+ getmicrotime(&ifp->if_lastchange);
+ break;
+
+ case SIOCSIFPHYS:
+ error = priv_check(td, PRIV_NET_SETIFPHYS);
+ if (error)
+ return (error);
+ if (ifp->if_ioctl == NULL)
+ return (EOPNOTSUPP);
+ error = (*ifp->if_ioctl)(ifp, cmd, data);
+ if (error == 0)
+ getmicrotime(&ifp->if_lastchange);
+ break;
+
+ case SIOCSIFMTU:
+ {
+ u_long oldmtu = ifp->if_mtu;
+
+ error = priv_check(td, PRIV_NET_SETIFMTU);
+ if (error)
+ return (error);
+ if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
+ return (EINVAL);
+ if (ifp->if_ioctl == NULL)
+ return (EOPNOTSUPP);
+ error = (*ifp->if_ioctl)(ifp, cmd, data);
+ if (error == 0) {
+ getmicrotime(&ifp->if_lastchange);
+ rt_ifmsg(ifp);
+ }
+ /*
+ * If the link MTU changed, do network layer specific procedure.
+ */
+ if (ifp->if_mtu != oldmtu) {
+#ifdef INET6
+ nd6_setmtu(ifp);
+#endif
+ }
+ break;
+ }
+
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ if (cmd == SIOCADDMULTI)
+ error = priv_check(td, PRIV_NET_ADDMULTI);
+ else
+ error = priv_check(td, PRIV_NET_DELMULTI);
+ if (error)
+ return (error);
+
+ /* Don't allow group membership on non-multicast interfaces. */
+ if ((ifp->if_flags & IFF_MULTICAST) == 0)
+ return (EOPNOTSUPP);
+
+ /* Don't let users screw up protocols' entries. */
+ if (ifr->ifr_addr.sa_family != AF_LINK)
+ return (EINVAL);
+
+ if (cmd == SIOCADDMULTI) {
+ struct ifmultiaddr *ifma;
+
+ /*
+ * Userland is only permitted to join groups once
+ * via the if_addmulti() KPI, because it cannot hold
+ * struct ifmultiaddr * between calls. It may also
+ * lose a race while we check if the membership
+ * already exists.
+ */
+ IF_ADDR_LOCK(ifp);
+ ifma = if_findmulti(ifp, &ifr->ifr_addr);
+ IF_ADDR_UNLOCK(ifp);
+ if (ifma != NULL)
+ error = EADDRINUSE;
+ else
+ error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
+ } else {
+ error = if_delmulti(ifp, &ifr->ifr_addr);
+ }
+ if (error == 0)
+ getmicrotime(&ifp->if_lastchange);
+ break;
+
+ case SIOCSIFPHYADDR:
+ case SIOCDIFPHYADDR:
+#ifdef INET6
+ case SIOCSIFPHYADDR_IN6:
+#endif
+ case SIOCSLIFPHYADDR:
+ case SIOCSIFMEDIA:
+ case SIOCSIFGENERIC:
+ error = priv_check(td, PRIV_NET_HWIOCTL);
+ if (error)
+ return (error);
+ if (ifp->if_ioctl == NULL)
+ return (EOPNOTSUPP);
+ error = (*ifp->if_ioctl)(ifp, cmd, data);
+ if (error == 0)
+ getmicrotime(&ifp->if_lastchange);
+ break;
+
+ case SIOCGIFSTATUS:
+ ifs = (struct ifstat *)data;
+ ifs->ascii[0] = '\0';
+
+ case SIOCGIFPSRCADDR:
+ case SIOCGIFPDSTADDR:
+ case SIOCGLIFPHYADDR:
+ case SIOCGIFMEDIA:
+ case SIOCGIFGENERIC:
+ if (ifp->if_ioctl == NULL)
+ return (EOPNOTSUPP);
+ error = (*ifp->if_ioctl)(ifp, cmd, data);
+ break;
+
+ case SIOCSIFLLADDR:
+ error = priv_check(td, PRIV_NET_SETLLADDR);
+ if (error)
+ return (error);
+ error = if_setlladdr(ifp,
+ ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
+ EVENTHANDLER_INVOKE(iflladdr_event, ifp);
+ break;
+
+ case SIOCAIFGROUP:
+ {
+ struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr;
+
+ error = priv_check(td, PRIV_NET_ADDIFGROUP);
+ if (error)
+ return (error);
+ if ((error = if_addgroup(ifp, ifgr->ifgr_group)))
+ return (error);
+ break;
+ }
+
+ case SIOCGIFGROUP:
+ if ((error = if_getgroup((struct ifgroupreq *)ifr, ifp)))
+ return (error);
+ break;
+
+ case SIOCDIFGROUP:
+ {
+ struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr;
+
+ error = priv_check(td, PRIV_NET_DELIFGROUP);
+ if (error)
+ return (error);
+ if ((error = if_delgroup(ifp, ifgr->ifgr_group)))
+ return (error);
+ break;
+ }
+
+ default:
+ error = ENOIOCTL;
+ break;
+ }
+ return (error);
+}
+
+#ifdef COMPAT_FREEBSD32
+struct ifconf32 {
+ int32_t ifc_len;
+ union {
+ uint32_t ifcu_buf;
+ uint32_t ifcu_req;
+ } ifc_ifcu;
+};
+#define SIOCGIFCONF32 _IOWR('i', 36, struct ifconf32)
+#endif
+
+/*
+ * Interface ioctls.
+ */
+int
+ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
+{
+ struct ifnet *ifp;
+ struct ifreq *ifr;
+ int error;
+ int oif_flags;
+
+ switch (cmd) {
+ case SIOCGIFCONF:
+ case OSIOCGIFCONF:
+ return (ifconf(cmd, data));
+
+#ifdef COMPAT_FREEBSD32
+ case SIOCGIFCONF32:
+ {
+ struct ifconf32 *ifc32;
+ struct ifconf ifc;
+
+ ifc32 = (struct ifconf32 *)data;
+ ifc.ifc_len = ifc32->ifc_len;
+ ifc.ifc_buf = PTRIN(ifc32->ifc_buf);
+
+ return (ifconf(SIOCGIFCONF, (void *)&ifc));
+ }
+#endif
+ }
+ ifr = (struct ifreq *)data;
+
+ switch (cmd) {
+#ifdef VIMAGE
+ case SIOCSIFRVNET:
+ error = priv_check(td, PRIV_NET_SETIFVNET);
+ if (error)
+ return (error);
+ return (if_vmove_reclaim(td, ifr->ifr_name, ifr->ifr_jid));
+#endif
+ case SIOCIFCREATE:
+ case SIOCIFCREATE2:
+ error = priv_check(td, PRIV_NET_IFCREATE);
+ if (error)
+ return (error);
+ return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name),
+ cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL));
+ case SIOCIFDESTROY:
+ error = priv_check(td, PRIV_NET_IFDESTROY);
+ if (error)
+ return (error);
+ return if_clone_destroy(ifr->ifr_name);
+
+ case SIOCIFGCLONERS:
+ return (if_clone_list((struct if_clonereq *)data));
+ case SIOCGIFGMEMB:
+ return (if_getgroupmembers((struct ifgroupreq *)data));
+ }
+
+ ifp = ifunit_ref(ifr->ifr_name);
+ if (ifp == NULL)
+ return (ENXIO);
+
+ error = ifhwioctl(cmd, ifp, data, td);
+ if (error != ENOIOCTL) {
+ if_rele(ifp);
+ return (error);
+ }
+
+ oif_flags = ifp->if_flags;
+ if (so->so_proto == NULL) {
+ if_rele(ifp);
+ return (EOPNOTSUPP);
+ }
+#ifndef COMPAT_43
+ error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
+ data,
+ ifp, td));
+ if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL)
+ error = (*ifp->if_ioctl)(ifp, cmd, data);
+#else
+ {
+ u_long ocmd = cmd;
+
+ switch (cmd) {
+
+ case SIOCSIFDSTADDR:
+ case SIOCSIFADDR:
+ case SIOCSIFBRDADDR:
+ case SIOCSIFNETMASK:
+#if BYTE_ORDER != BIG_ENDIAN
+ if (ifr->ifr_addr.sa_family == 0 &&
+ ifr->ifr_addr.sa_len < 16) {
+ ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
+ ifr->ifr_addr.sa_len = 16;
+ }
+#else
+ if (ifr->ifr_addr.sa_len == 0)
+ ifr->ifr_addr.sa_len = 16;
+#endif
+ break;
+
+ case OSIOCGIFADDR:
+ cmd = SIOCGIFADDR;
+ break;
+
+ case OSIOCGIFDSTADDR:
+ cmd = SIOCGIFDSTADDR;
+ break;
+
+ case OSIOCGIFBRDADDR:
+ cmd = SIOCGIFBRDADDR;
+ break;
+
+ case OSIOCGIFNETMASK:
+ cmd = SIOCGIFNETMASK;
+ }
+ error = ((*so->so_proto->pr_usrreqs->pru_control)(so,
+ cmd,
+ data,
+ ifp, td));
+ if (error == EOPNOTSUPP && ifp != NULL &&
+ ifp->if_ioctl != NULL)
+ error = (*ifp->if_ioctl)(ifp, cmd, data);
+ switch (ocmd) {
+
+ case OSIOCGIFADDR:
+ case OSIOCGIFDSTADDR:
+ case OSIOCGIFBRDADDR:
+ case OSIOCGIFNETMASK:
+ *(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
+
+ }
+ }
+#endif /* COMPAT_43 */
+
+ if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
+#ifdef INET6
+ if (ifp->if_flags & IFF_UP) {
+ int s = splimp();
+ in6_if_up(ifp);
+ splx(s);
+ }
+#endif
+ }
+ if_rele(ifp);
+ return (error);
+}
+
+/*
+ * The code common to handling reference counted flags,
+ * e.g., in ifpromisc() and if_allmulti().
+ * The "pflag" argument can specify a permanent mode flag to check,
+ * such as IFF_PPROMISC for promiscuous mode; should be 0 if none.
+ *
+ * Only to be used on stack-owned flags, not driver-owned flags.
+ */
+static int
+if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch)
+{
+ struct ifreq ifr;
+ int error;
+ int oldflags, oldcount;
+
+ /* Sanity checks to catch programming errors */
+ KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0,
+ ("%s: setting driver-owned flag %d", __func__, flag));
+
+ if (onswitch)
+ KASSERT(*refcount >= 0,
+ ("%s: increment negative refcount %d for flag %d",
+ __func__, *refcount, flag));
+ else
+ KASSERT(*refcount > 0,
+ ("%s: decrement non-positive refcount %d for flag %d",
+ __func__, *refcount, flag));
+
+ /* In case this mode is permanent, just touch refcount */
+ if (ifp->if_flags & pflag) {
+ *refcount += onswitch ? 1 : -1;
+ return (0);
+ }
+
+ /* Save ifnet parameters for if_ioctl() may fail */
+ oldcount = *refcount;
+ oldflags = ifp->if_flags;
+
+ /*
+ * See if we aren't the only and touching refcount is enough.
+ * Actually toggle interface flag if we are the first or last.
+ */
+ if (onswitch) {
+ if ((*refcount)++)
+ return (0);
+ ifp->if_flags |= flag;
+ } else {
+ if (--(*refcount))
+ return (0);
+ ifp->if_flags &= ~flag;
+ }
+
+ /* Call down the driver since we've changed interface flags */
+ if (ifp->if_ioctl == NULL) {
+ error = EOPNOTSUPP;
+ goto recover;
+ }
+ ifr.ifr_flags = ifp->if_flags & 0xffff;
+ ifr.ifr_flagshigh = ifp->if_flags >> 16;
+ error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
+ if (error)
+ goto recover;
+ /* Notify userland that interface flags have changed */
+ rt_ifmsg(ifp);
+ return (0);
+
+recover:
+ /* Recover after driver error */
+ *refcount = oldcount;
+ ifp->if_flags = oldflags;
+ return (error);
+}
+
+/*
+ * Set/clear promiscuous mode on interface ifp based on the truth value
+ * of pswitch. The calls are reference counted so that only the first
+ * "on" request actually has an effect, as does the final "off" request.
+ * Results are undefined if the "off" and "on" requests are not matched.
+ */
+int
+ifpromisc(struct ifnet *ifp, int pswitch)
+{
+ int error;
+ int oldflags = ifp->if_flags;
+
+ error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC,
+ &ifp->if_pcount, pswitch);
+ /* If promiscuous mode status has changed, log a message */
+ if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC))
+ log(LOG_INFO, "%s: promiscuous mode %s\n",
+ ifp->if_xname,
+ (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
+ return (error);
+}
+
+/*
+ * Return interface configuration
+ * of system. List may be used
+ * in later ioctl's (above) to get
+ * other information.
+ */
+/*ARGSUSED*/
+static int
+ifconf(u_long cmd, caddr_t data)
+{
+ struct ifconf *ifc = (struct ifconf *)data;
+ struct ifnet *ifp;
+ struct ifaddr *ifa;
+ struct ifreq ifr;
+ struct sbuf *sb;
+ int error, full = 0, valid_len, max_len;
+
+ /* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */
+ max_len = MAXPHYS - 1;
+
+ /* Prevent hostile input from being able to crash the system */
+ if (ifc->ifc_len <= 0)
+ return (EINVAL);
+
+again:
+ if (ifc->ifc_len <= max_len) {
+ max_len = ifc->ifc_len;
+ full = 1;
+ }
+ sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN);
+ max_len = 0;
+ valid_len = 0;
+
+ IFNET_RLOCK();
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ int addrs;
+
+ /*
+ * Zero the ifr_name buffer to make sure we don't
+ * disclose the contents of the stack.
+ */
+ memset(ifr.ifr_name, 0, sizeof(ifr.ifr_name));
+
+ if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
+ >= sizeof(ifr.ifr_name)) {
+ sbuf_delete(sb);
+ IFNET_RUNLOCK();
+ return (ENAMETOOLONG);
+ }
+
+ addrs = 0;
+ IF_ADDR_LOCK(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ struct sockaddr *sa = ifa->ifa_addr;
+
+ if (prison_if(curthread->td_ucred, sa) != 0)
+ continue;
+ addrs++;
+#ifdef COMPAT_43
+ if (cmd == OSIOCGIFCONF) {
+ struct osockaddr *osa =
+ (struct osockaddr *)&ifr.ifr_addr;
+ ifr.ifr_addr = *sa;
+ osa->sa_family = sa->sa_family;
+ sbuf_bcat(sb, &ifr, sizeof(ifr));
+ max_len += sizeof(ifr);
+ } else
+#endif
+ if (sa->sa_len <= sizeof(*sa)) {
+ ifr.ifr_addr = *sa;
+ sbuf_bcat(sb, &ifr, sizeof(ifr));
+ max_len += sizeof(ifr);
+ } else {
+ sbuf_bcat(sb, &ifr,
+ offsetof(struct ifreq, ifr_addr));
+ max_len += offsetof(struct ifreq, ifr_addr);
+ sbuf_bcat(sb, sa, sa->sa_len);
+ max_len += sa->sa_len;
+ }
+
+ if (!sbuf_overflowed(sb))
+ valid_len = sbuf_len(sb);
+ }
+ IF_ADDR_UNLOCK(ifp);
+ if (addrs == 0) {
+ bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
+ sbuf_bcat(sb, &ifr, sizeof(ifr));
+ max_len += sizeof(ifr);
+
+ if (!sbuf_overflowed(sb))
+ valid_len = sbuf_len(sb);
+ }
+ }
+ IFNET_RUNLOCK();
+
+ /*
+ * If we didn't allocate enough space (uncommon), try again. If
+ * we have already allocated as much space as we are allowed,
+ * return what we've got.
+ */
+ if (valid_len != max_len && !full) {
+ sbuf_delete(sb);
+ goto again;
+ }
+
+ ifc->ifc_len = valid_len;
+ sbuf_finish(sb);
+ error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len);
+ sbuf_delete(sb);
+ return (error);
+}
+
+/*
+ * Just like ifpromisc(), but for all-multicast-reception mode.
+ */
+int
+if_allmulti(struct ifnet *ifp, int onswitch)
+{
+
+ return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch));
+}
+
+struct ifmultiaddr *
+if_findmulti(struct ifnet *ifp, struct sockaddr *sa)
+{
+ struct ifmultiaddr *ifma;
+
+ IF_ADDR_LOCK_ASSERT(ifp);
+
+ TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+ if (sa->sa_family == AF_LINK) {
+ if (sa_dl_equal(ifma->ifma_addr, sa))
+ break;
+ } else {
+ if (sa_equal(ifma->ifma_addr, sa))
+ break;
+ }
+ }
+
+ return ifma;
+}
+
+/*
+ * Allocate a new ifmultiaddr and initialize based on passed arguments. We
+ * make copies of passed sockaddrs. The ifmultiaddr will not be added to
+ * the ifnet multicast address list here, so the caller must do that and
+ * other setup work (such as notifying the device driver). The reference
+ * count is initialized to 1.
+ */
+static struct ifmultiaddr *
+if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa,
+ int mflags)
+{
+ struct ifmultiaddr *ifma;
+ struct sockaddr *dupsa;
+
+ ifma = malloc(sizeof *ifma, M_IFMADDR, mflags |
+ M_ZERO);
+ if (ifma == NULL)
+ return (NULL);
+
+ dupsa = malloc(sa->sa_len, M_IFMADDR, mflags);
+ if (dupsa == NULL) {
+ free(ifma, M_IFMADDR);
+ return (NULL);
+ }
+ bcopy(sa, dupsa, sa->sa_len);
+ ifma->ifma_addr = dupsa;
+
+ ifma->ifma_ifp = ifp;
+ ifma->ifma_refcount = 1;
+ ifma->ifma_protospec = NULL;
+
+ if (llsa == NULL) {
+ ifma->ifma_lladdr = NULL;
+ return (ifma);
+ }
+
+ dupsa = malloc(llsa->sa_len, M_IFMADDR, mflags);
+ if (dupsa == NULL) {
+ free(ifma->ifma_addr, M_IFMADDR);
+ free(ifma, M_IFMADDR);
+ return (NULL);
+ }
+ bcopy(llsa, dupsa, llsa->sa_len);
+ ifma->ifma_lladdr = dupsa;
+
+ return (ifma);
+}
+
+/*
+ * if_freemulti: free ifmultiaddr structure and possibly attached related
+ * addresses. The caller is responsible for implementing reference
+ * counting, notifying the driver, handling routing messages, and releasing
+ * any dependent link layer state.
+ */
+static void
+if_freemulti(struct ifmultiaddr *ifma)
+{
+
+ KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d",
+ ifma->ifma_refcount));
+ KASSERT(ifma->ifma_protospec == NULL,
+ ("if_freemulti: protospec not NULL"));
+
+ if (ifma->ifma_lladdr != NULL)
+ free(ifma->ifma_lladdr, M_IFMADDR);
+ free(ifma->ifma_addr, M_IFMADDR);
+ free(ifma, M_IFMADDR);
+}
+
+/*
+ * Register an additional multicast address with a network interface.
+ *
+ * - If the address is already present, bump the reference count on the
+ * address and return.
+ * - If the address is not link-layer, look up a link layer address.
+ * - Allocate address structures for one or both addresses, and attach to the
+ * multicast address list on the interface. If automatically adding a link
+ * layer address, the protocol address will own a reference to the link
+ * layer address, to be freed when it is freed.
+ * - Notify the network device driver of an addition to the multicast address
+ * list.
+ *
+ * 'sa' points to caller-owned memory with the desired multicast address.
+ *
+ * 'retifma' will be used to return a pointer to the resulting multicast
+ * address reference, if desired.
+ */
+int
+if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
+ struct ifmultiaddr **retifma)
+{
+ struct ifmultiaddr *ifma, *ll_ifma;
+ struct sockaddr *llsa;
+ int error;
+
+ /*
+ * If the address is already present, return a new reference to it;
+ * otherwise, allocate storage and set up a new address.
+ */
+ IF_ADDR_LOCK(ifp);
+ ifma = if_findmulti(ifp, sa);
+ if (ifma != NULL) {
+ ifma->ifma_refcount++;
+ if (retifma != NULL)
+ *retifma = ifma;
+ IF_ADDR_UNLOCK(ifp);
+ return (0);
+ }
+
+ /*
+ * The address isn't already present; resolve the protocol address
+ * into a link layer address, and then look that up, bump its
+ * refcount or allocate an ifma for that also. If 'llsa' was
+ * returned, we will need to free it later.
+ */
+ llsa = NULL;
+ ll_ifma = NULL;
+ if (ifp->if_resolvemulti != NULL) {
+ error = ifp->if_resolvemulti(ifp, &llsa, sa);
+ if (error)
+ goto unlock_out;
+ }
+
+ /*
+ * Allocate the new address. Don't hook it up yet, as we may also
+ * need to allocate a link layer multicast address.
+ */
+ ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT);
+ if (ifma == NULL) {
+ error = ENOMEM;
+ goto free_llsa_out;
+ }
+
+ /*
+ * If a link layer address is found, we'll need to see if it's
+ * already present in the address list, or allocate is as well.
+ * When this block finishes, the link layer address will be on the
+ * list.
+ */
+ if (llsa != NULL) {
+ ll_ifma = if_findmulti(ifp, llsa);
+ if (ll_ifma == NULL) {
+ ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT);
+ if (ll_ifma == NULL) {
+ --ifma->ifma_refcount;
+ if_freemulti(ifma);
+ error = ENOMEM;
+ goto free_llsa_out;
+ }
+ TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma,
+ ifma_link);
+ } else
+ ll_ifma->ifma_refcount++;
+ ifma->ifma_llifma = ll_ifma;
+ }
+
+ /*
+ * We now have a new multicast address, ifma, and possibly a new or
+ * referenced link layer address. Add the primary address to the
+ * ifnet address list.
+ */
+ TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
+
+ if (retifma != NULL)
+ *retifma = ifma;
+
+ /*
+ * Must generate the message while holding the lock so that 'ifma'
+ * pointer is still valid.
+ */
+ rt_newmaddrmsg(RTM_NEWMADDR, ifma);
+ IF_ADDR_UNLOCK(ifp);
+
+ /*
+ * We are certain we have added something, so call down to the
+ * interface to let them know about it.
+ */
+ if (ifp->if_ioctl != NULL) {
+ (void) (*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0);
+ }
+
+ if (llsa != NULL)
+ free(llsa, M_IFMADDR);
+
+ return (0);
+
+free_llsa_out:
+ if (llsa != NULL)
+ free(llsa, M_IFMADDR);
+
+unlock_out:
+ IF_ADDR_UNLOCK(ifp);
+ return (error);
+}
+
+/*
+ * Delete a multicast group membership by network-layer group address.
+ *
+ * Returns ENOENT if the entry could not be found. If ifp no longer
+ * exists, results are undefined. This entry point should only be used
+ * from subsystems which do appropriate locking to hold ifp for the
+ * duration of the call.
+ * Network-layer protocol domains must use if_delmulti_ifma().
+ */
+int
+if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
+{
+ struct ifmultiaddr *ifma;
+ int lastref;
+#ifdef INVARIANTS
+ struct ifnet *oifp;
+
+ IFNET_RLOCK_NOSLEEP();
+ TAILQ_FOREACH(oifp, &V_ifnet, if_link)
+ if (ifp == oifp)
+ break;
+ if (ifp != oifp)
+ ifp = NULL;
+ IFNET_RUNLOCK_NOSLEEP();
+
+ KASSERT(ifp != NULL, ("%s: ifnet went away", __func__));
+#endif
+ if (ifp == NULL)
+ return (ENOENT);
+
+ IF_ADDR_LOCK(ifp);
+ lastref = 0;
+ ifma = if_findmulti(ifp, sa);
+ if (ifma != NULL)
+ lastref = if_delmulti_locked(ifp, ifma, 0);
+ IF_ADDR_UNLOCK(ifp);
+
+ if (ifma == NULL)
+ return (ENOENT);
+
+ if (lastref && ifp->if_ioctl != NULL) {
+ (void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
+ }
+
+ return (0);
+}
+
+/*
+ * Delete all multicast group membership for an interface.
+ * Should be used to quickly flush all multicast filters.
+ */
+void
+if_delallmulti(struct ifnet *ifp)
+{
+ struct ifmultiaddr *ifma;
+ struct ifmultiaddr *next;
+
+ IF_ADDR_LOCK(ifp);
+ TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
+ if_delmulti_locked(ifp, ifma, 0);
+ IF_ADDR_UNLOCK(ifp);
+}
+
+/*
+ * Delete a multicast group membership by group membership pointer.
+ * Network-layer protocol domains must use this routine.
+ *
+ * It is safe to call this routine if the ifp disappeared.
+ */
+void
+if_delmulti_ifma(struct ifmultiaddr *ifma)
+{
+ struct ifnet *ifp;
+ int lastref;
+
+ ifp = ifma->ifma_ifp;
+#ifdef DIAGNOSTIC
+ if (ifp == NULL) {
+ printf("%s: ifma_ifp seems to be detached\n", __func__);
+ } else {
+ struct ifnet *oifp;
+
+ IFNET_RLOCK_NOSLEEP();
+ TAILQ_FOREACH(oifp, &V_ifnet, if_link)
+ if (ifp == oifp)
+ break;
+ if (ifp != oifp) {
+ printf("%s: ifnet %p disappeared\n", __func__, ifp);
+ ifp = NULL;
+ }
+ IFNET_RUNLOCK_NOSLEEP();
+ }
+#endif
+ /*
+ * If and only if the ifnet instance exists: Acquire the address lock.
+ */
+ if (ifp != NULL)
+ IF_ADDR_LOCK(ifp);
+
+ lastref = if_delmulti_locked(ifp, ifma, 0);
+
+ if (ifp != NULL) {
+ /*
+ * If and only if the ifnet instance exists:
+ * Release the address lock.
+ * If the group was left: update the hardware hash filter.
+ */
+ IF_ADDR_UNLOCK(ifp);
+ if (lastref && ifp->if_ioctl != NULL) {
+ (void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
+ }
+ }
+}
+
+/*
+ * Perform deletion of network-layer and/or link-layer multicast address.
+ *
+ * Return 0 if the reference count was decremented.
+ * Return 1 if the final reference was released, indicating that the
+ * hardware hash filter should be reprogrammed.
+ */
+static int
+if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching)
+{
+ struct ifmultiaddr *ll_ifma;
+
+ if (ifp != NULL && ifma->ifma_ifp != NULL) {
+ KASSERT(ifma->ifma_ifp == ifp,
+ ("%s: inconsistent ifp %p", __func__, ifp));
+ IF_ADDR_LOCK_ASSERT(ifp);
+ }
+
+ ifp = ifma->ifma_ifp;
+
+ /*
+ * If the ifnet is detaching, null out references to ifnet,
+ * so that upper protocol layers will notice, and not attempt
+ * to obtain locks for an ifnet which no longer exists. The
+ * routing socket announcement must happen before the ifnet
+ * instance is detached from the system.
+ */
+ if (detaching) {
+#ifdef DIAGNOSTIC
+ printf("%s: detaching ifnet instance %p\n", __func__, ifp);
+#endif
+ /*
+ * ifp may already be nulled out if we are being reentered
+ * to delete the ll_ifma.
+ */
+ if (ifp != NULL) {
+ rt_newmaddrmsg(RTM_DELMADDR, ifma);
+ ifma->ifma_ifp = NULL;
+ }
+ }
+
+ if (--ifma->ifma_refcount > 0)
+ return 0;
+
+ /*
+ * If this ifma is a network-layer ifma, a link-layer ifma may
+ * have been associated with it. Release it first if so.
+ */
+ ll_ifma = ifma->ifma_llifma;
+ if (ll_ifma != NULL) {
+ KASSERT(ifma->ifma_lladdr != NULL,
+ ("%s: llifma w/o lladdr", __func__));
+ if (detaching)
+ ll_ifma->ifma_ifp = NULL; /* XXX */
+ if (--ll_ifma->ifma_refcount == 0) {
+ if (ifp != NULL) {
+ TAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma,
+ ifma_link);
+ }
+ if_freemulti(ll_ifma);
+ }
+ }
+
+ if (ifp != NULL)
+ TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
+
+ if_freemulti(ifma);
+
+ /*
+ * The last reference to this instance of struct ifmultiaddr
+ * was released; the hardware should be notified of this change.
+ */
+ return 1;
+}
+
+/*
+ * Set the link layer address on an interface.
+ *
+ * At this time we only support certain types of interfaces,
+ * and we don't allow the length of the address to change.
+ */
+int
+if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
+{
+ struct sockaddr_dl *sdl;
+ struct ifaddr *ifa;
+ struct ifreq ifr;
+
+ IF_ADDR_LOCK(ifp);
+ ifa = ifp->if_addr;
+ if (ifa == NULL) {
+ IF_ADDR_UNLOCK(ifp);
+ return (EINVAL);
+ }
+ ifa_ref(ifa);
+ IF_ADDR_UNLOCK(ifp);
+ sdl = (struct sockaddr_dl *)ifa->ifa_addr;
+ if (sdl == NULL) {
+ ifa_free(ifa);
+ return (EINVAL);
+ }
+ if (len != sdl->sdl_alen) { /* don't allow length to change */
+ ifa_free(ifa);
+ return (EINVAL);
+ }
+ switch (ifp->if_type) {
+ case IFT_ETHER:
+ case IFT_FDDI:
+ case IFT_XETHER:
+ case IFT_ISO88025:
+ case IFT_L2VLAN:
+ case IFT_BRIDGE:
+ case IFT_ARCNET:
+ case IFT_IEEE8023ADLAG:
+ case IFT_IEEE80211:
+ bcopy(lladdr, LLADDR(sdl), len);
+ ifa_free(ifa);
+ break;
+ default:
+ ifa_free(ifa);
+ return (ENODEV);
+ }
+
+ /*
+ * If the interface is already up, we need
+ * to re-init it in order to reprogram its
+ * address filter.
+ */
+ if ((ifp->if_flags & IFF_UP) != 0) {
+ if (ifp->if_ioctl) {
+ ifp->if_flags &= ~IFF_UP;
+ ifr.ifr_flags = ifp->if_flags & 0xffff;
+ ifr.ifr_flagshigh = ifp->if_flags >> 16;
+ (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
+ ifp->if_flags |= IFF_UP;
+ ifr.ifr_flags = ifp->if_flags & 0xffff;
+ ifr.ifr_flagshigh = ifp->if_flags >> 16;
+ (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
+ }
+#ifdef INET
+ /*
+ * Also send gratuitous ARPs to notify other nodes about
+ * the address change.
+ */
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr->sa_family == AF_INET)
+ arp_ifinit(ifp, ifa);
+ }
+#endif
+ }
+ return (0);
+}
+
+/*
+ * The name argument must be a pointer to storage which will last as
+ * long as the interface does. For physical devices, the result of
+ * device_get_name(dev) is a good choice and for pseudo-devices a
+ * static string works well.
+ */
+void
+if_initname(struct ifnet *ifp, const char *name, int unit)
+{
+ ifp->if_dname = name;
+ ifp->if_dunit = unit;
+ if (unit != IF_DUNIT_NONE)
+ snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
+ else
+ strlcpy(ifp->if_xname, name, IFNAMSIZ);
+}
+
+int
+if_printf(struct ifnet *ifp, const char * fmt, ...)
+{
+ va_list ap;
+ int retval;
+
+ retval = printf("%s: ", ifp->if_xname);
+ va_start(ap, fmt);
+ retval += vprintf(fmt, ap);
+ va_end(ap);
+ return (retval);
+}
+
+void
+if_start(struct ifnet *ifp)
+{
+
+ (*(ifp)->if_start)(ifp);
+}
+
+/*
+ * Backwards compatibility interface for drivers
+ * that have not implemented it
+ */
+static int
+if_transmit(struct ifnet *ifp, struct mbuf *m)
+{
+ int error;
+
+ IFQ_HANDOFF(ifp, m, error);
+ return (error);
+}
+
+int
+if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust)
+{
+ int active = 0;
+
+ IF_LOCK(ifq);
+ if (_IF_QFULL(ifq)) {
+ _IF_DROP(ifq);
+ IF_UNLOCK(ifq);
+ m_freem(m);
+ return (0);
+ }
+ if (ifp != NULL) {
+ ifp->if_obytes += m->m_pkthdr.len + adjust;
+ if (m->m_flags & (M_BCAST|M_MCAST))
+ ifp->if_omcasts++;
+ active = ifp->if_drv_flags & IFF_DRV_OACTIVE;
+ }
+ _IF_ENQUEUE(ifq, m);
+ IF_UNLOCK(ifq);
+ if (ifp != NULL && !active)
+ (*(ifp)->if_start)(ifp);
+ return (1);
+}
+
+void
+if_register_com_alloc(u_char type,
+ if_com_alloc_t *a, if_com_free_t *f)
+{
+
+ KASSERT(if_com_alloc[type] == NULL,
+ ("if_register_com_alloc: %d already registered", type));
+ KASSERT(if_com_free[type] == NULL,
+ ("if_register_com_alloc: %d free already registered", type));
+
+ if_com_alloc[type] = a;
+ if_com_free[type] = f;
+}
+
+void
+if_deregister_com_alloc(u_char type)
+{
+
+ KASSERT(if_com_alloc[type] != NULL,
+ ("if_deregister_com_alloc: %d not registered", type));
+ KASSERT(if_com_free[type] != NULL,
+ ("if_deregister_com_alloc: %d free not registered", type));
+ if_com_alloc[type] = NULL;
+ if_com_free[type] = NULL;
+}
diff --git a/freebsd/sys/net/if.h b/freebsd/sys/net/if.h
new file mode 100644
index 00000000..6fbbb34a
--- /dev/null
+++ b/freebsd/sys/net/if.h
@@ -0,0 +1,2 @@
+#include <freebsd/bsd.h>
+#include <freebsd/net/if.h>
diff --git a/freebsd/sys/net/if_arc.h b/freebsd/sys/net/if_arc.h
new file mode 100644
index 00000000..6be5d4e1
--- /dev/null
+++ b/freebsd/sys/net/if_arc.h
@@ -0,0 +1,143 @@
+/* $NetBSD: if_arc.h,v 1.13 1999/11/19 20:41:19 thorpej Exp $ */
+/* $FreeBSD$ */
+
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: NetBSD: if_ether.h,v 1.10 1994/06/29 06:37:55 cgd Exp
+ * @(#)if_ether.h 8.1 (Berkeley) 6/10/93
+ */
+
+#ifndef _NET_IF_ARC_HH_
+#define _NET_IF_ARC_HH_
+
+/*
+ * Arcnet address - 1 octets
+ * don't know who uses this.
+ */
+struct arc_addr {
+ u_int8_t arc_addr_octet[1];
+} __packed;
+
+/*
+ * Structure of a 2.5MB/s Arcnet header.
+ * as given to interface code.
+ */
+struct arc_header {
+ u_int8_t arc_shost;
+ u_int8_t arc_dhost;
+ u_int8_t arc_type;
+ /*
+ * only present for newstyle encoding with LL fragmentation.
+ * Don't use sizeof(anything), use ARC_HDR{,NEW}LEN instead.
+ */
+ u_int8_t arc_flag;
+ u_int16_t arc_seqid;
+
+ /*
+ * only present in exception packets (arc_flag == 0xff)
+ */
+ u_int8_t arc_type2; /* same as arc_type */
+ u_int8_t arc_flag2; /* real flag value */
+ u_int16_t arc_seqid2; /* real seqid value */
+} __packed;
+
+#define ARC_ADDR_LEN 1
+
+#define ARC_HDRLEN 3
+#define ARC_HDRNEWLEN 6
+#define ARC_HDRNEWLEN_EXC 10
+
+/* these lengths are data link layer length - 2 * ARC_ADDR_LEN */
+#define ARC_MIN_LEN 1
+#define ARC_MIN_FORBID_LEN 254
+#define ARC_MAX_FORBID_LEN 256
+#define ARC_MAX_LEN 508
+#define ARC_MAX_DATA 504
+
+/* RFC 1051 */
+#define ARCTYPE_IP_OLD 240 /* IP protocol */
+#define ARCTYPE_ARP_OLD 241 /* address resolution protocol */
+
+/* RFC 1201 */
+#define ARCTYPE_IP 212 /* IP protocol */
+#define ARCTYPE_ARP 213 /* address resolution protocol */
+#define ARCTYPE_REVARP 214 /* reverse addr resolution protocol */
+
+#define ARCTYPE_ATALK 221 /* Appletalk */
+#define ARCTYPE_BANIAN 247 /* Banyan Vines */
+#define ARCTYPE_IPX 250 /* Novell IPX */
+
+#define ARCTYPE_INET6 0xc4 /* IPng */
+#define ARCTYPE_DIAGNOSE 0x80 /* as per ANSI/ATA 878.1 */
+
+#define ARCMTU 507
+#define ARCMIN 0
+
+#define ARC_PHDS_MAXMTU 60480
+
+struct arccom {
+ struct ifnet *ac_ifp; /* network-visible interface */
+
+ u_int16_t ac_seqid; /* seq. id used by PHDS encap. */
+
+ u_int8_t arc_shost;
+ u_int8_t arc_dhost;
+ u_int8_t arc_type;
+
+ u_int8_t dummy0;
+ u_int16_t dummy1;
+ int sflag, fsflag, rsflag;
+ struct mbuf *curr_frag;
+
+ struct ac_frag {
+ u_int8_t af_maxflag; /* from first packet */
+ u_int8_t af_lastseen; /* last split flag seen */
+ u_int16_t af_seqid;
+ struct mbuf *af_packet;
+ } ac_fragtab[256]; /* indexed by sender ll address */
+};
+
+#ifdef _KERNEL
+extern u_int8_t arcbroadcastaddr;
+extern int arc_ipmtu; /* XXX new ip only, no RFC 1051! */
+
+void arc_ifattach(struct ifnet *, u_int8_t);
+void arc_ifdetach(struct ifnet *);
+void arc_storelladdr(struct ifnet *, u_int8_t);
+int arc_isphds(u_int8_t);
+void arc_input(struct ifnet *, struct mbuf *);
+int arc_output(struct ifnet *, struct mbuf *,
+ struct sockaddr *, struct route *);
+int arc_ioctl(struct ifnet *, u_long, caddr_t);
+
+void arc_frag_init(struct ifnet *);
+struct mbuf * arc_frag_next(struct ifnet *);
+#endif
+
+#endif /* _NET_IF_ARC_HH_ */
diff --git a/freebsd/sys/net/if_arcsubr.c b/freebsd/sys/net/if_arcsubr.c
new file mode 100644
index 00000000..8cd53a6d
--- /dev/null
+++ b/freebsd/sys/net/if_arcsubr.c
@@ -0,0 +1,886 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/* $NetBSD: if_arcsubr.c,v 1.36 2001/06/14 05:44:23 itojun Exp $ */
+/* $FreeBSD$ */
+
+/*-
+ * Copyright (c) 1994, 1995 Ignatios Souvatzis
+ * Copyright (c) 1982, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: NetBSD: if_ethersubr.c,v 1.9 1994/06/29 06:36:11 cgd Exp
+ * @(#)if_ethersubr.c 8.1 (Berkeley) 6/10/93
+ *
+ */
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_ipx.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/errno.h>
+#include <freebsd/sys/syslog.h>
+
+#include <freebsd/machine/cpu.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/netisr.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/if_dl.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/if_arc.h>
+#include <freebsd/net/if_arp.h>
+#include <freebsd/net/bpf.h>
+#include <freebsd/net/if_llatbl.h>
+
+#if defined(INET) || defined(INET6)
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/if_ether.h>
+#endif
+
+#ifdef INET6
+#include <freebsd/netinet6/nd6.h>
+#endif
+
+#ifdef IPX
+#include <freebsd/netipx/ipx.h>
+#include <freebsd/netipx/ipx_if.h>
+#endif
+
+#define ARCNET_ALLOW_BROKEN_ARP
+
+static struct mbuf *arc_defrag(struct ifnet *, struct mbuf *);
+static int arc_resolvemulti(struct ifnet *, struct sockaddr **,
+ struct sockaddr *);
+
+u_int8_t arcbroadcastaddr = 0;
+
+#define ARC_LLADDR(ifp) (*(u_int8_t *)IF_LLADDR(ifp))
+
+#define senderr(e) { error = (e); goto bad;}
+#define SIN(s) ((struct sockaddr_in *)s)
+#define SIPX(s) ((struct sockaddr_ipx *)s)
+
+/*
+ * ARCnet output routine.
+ * Encapsulate a packet of type family for the local net.
+ * Assumes that ifp is actually pointer to arccom structure.
+ */
+int
+arc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+ struct route *ro)
+{
+ struct arc_header *ah;
+ int error;
+ u_int8_t atype, adst;
+ int loop_copy = 0;
+ int isphds;
+#if defined(INET) || defined(INET6)
+ struct llentry *lle;
+#endif
+
+ if (!((ifp->if_flags & IFF_UP) &&
+ (ifp->if_drv_flags & IFF_DRV_RUNNING)))
+ return(ENETDOWN); /* m, m1 aren't initialized yet */
+
+ error = 0;
+
+ switch (dst->sa_family) {
+#ifdef INET
+ case AF_INET:
+
+ /*
+ * For now, use the simple IP addr -> ARCnet addr mapping
+ */
+ if (m->m_flags & (M_BCAST|M_MCAST))
+ adst = arcbroadcastaddr; /* ARCnet broadcast address */
+ else if (ifp->if_flags & IFF_NOARP)
+ adst = ntohl(SIN(dst)->sin_addr.s_addr) & 0xFF;
+ else {
+ error = arpresolve(ifp, ro ? ro->ro_rt : NULL,
+ m, dst, &adst, &lle);
+ if (error)
+ return (error == EWOULDBLOCK ? 0 : error);
+ }
+
+ atype = (ifp->if_flags & IFF_LINK0) ?
+ ARCTYPE_IP_OLD : ARCTYPE_IP;
+ break;
+ case AF_ARP:
+ {
+ struct arphdr *ah;
+ ah = mtod(m, struct arphdr *);
+ ah->ar_hrd = htons(ARPHRD_ARCNET);
+
+ loop_copy = -1; /* if this is for us, don't do it */
+
+ switch(ntohs(ah->ar_op)) {
+ case ARPOP_REVREQUEST:
+ case ARPOP_REVREPLY:
+ atype = ARCTYPE_REVARP;
+ break;
+ case ARPOP_REQUEST:
+ case ARPOP_REPLY:
+ default:
+ atype = ARCTYPE_ARP;
+ break;
+ }
+
+ if (m->m_flags & M_BCAST)
+ bcopy(ifp->if_broadcastaddr, &adst, ARC_ADDR_LEN);
+ else
+ bcopy(ar_tha(ah), &adst, ARC_ADDR_LEN);
+
+ }
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ error = nd6_storelladdr(ifp, m, dst, (u_char *)&adst, &lle);
+ if (error)
+ return (error);
+ atype = ARCTYPE_INET6;
+ break;
+#endif
+#ifdef IPX
+ case AF_IPX:
+ adst = SIPX(dst)->sipx_addr.x_host.c_host[5];
+ atype = ARCTYPE_IPX;
+ if (adst == 0xff)
+ adst = arcbroadcastaddr;
+ break;
+#endif
+
+ case AF_UNSPEC:
+ loop_copy = -1;
+ ah = (struct arc_header *)dst->sa_data;
+ adst = ah->arc_dhost;
+ atype = ah->arc_type;
+
+ if (atype == ARCTYPE_ARP) {
+ atype = (ifp->if_flags & IFF_LINK0) ?
+ ARCTYPE_ARP_OLD: ARCTYPE_ARP;
+
+#ifdef ARCNET_ALLOW_BROKEN_ARP
+ /*
+ * XXX It's not clear per RFC826 if this is needed, but
+ * "assigned numbers" say this is wrong.
+ * However, e.g., AmiTCP 3.0Beta used it... we make this
+ * switchable for emergency cases. Not perfect, but...
+ */
+ if (ifp->if_flags & IFF_LINK2)
+ mtod(m, struct arphdr *)->ar_pro = atype - 1;
+#endif
+ }
+ break;
+
+ default:
+ if_printf(ifp, "can't handle af%d\n", dst->sa_family);
+ senderr(EAFNOSUPPORT);
+ }
+
+ isphds = arc_isphds(atype);
+ M_PREPEND(m, isphds ? ARC_HDRNEWLEN : ARC_HDRLEN, M_DONTWAIT);
+ if (m == 0)
+ senderr(ENOBUFS);
+ ah = mtod(m, struct arc_header *);
+ ah->arc_type = atype;
+ ah->arc_dhost = adst;
+ ah->arc_shost = ARC_LLADDR(ifp);
+ if (isphds) {
+ ah->arc_flag = 0;
+ ah->arc_seqid = 0;
+ }
+
+ if ((ifp->if_flags & IFF_SIMPLEX) && (loop_copy != -1)) {
+ if ((m->m_flags & M_BCAST) || (loop_copy > 0)) {
+ struct mbuf *n = m_copy(m, 0, (int)M_COPYALL);
+
+ (void) if_simloop(ifp, n, dst->sa_family, ARC_HDRLEN);
+ } else if (ah->arc_dhost == ah->arc_shost) {
+ (void) if_simloop(ifp, m, dst->sa_family, ARC_HDRLEN);
+ return (0); /* XXX */
+ }
+ }
+
+ BPF_MTAP(ifp, m);
+
+ error = ifp->if_transmit(ifp, m);
+
+ return (error);
+
+bad:
+ if (m)
+ m_freem(m);
+ return (error);
+}
+
+void
+arc_frag_init(struct ifnet *ifp)
+{
+ struct arccom *ac;
+
+ ac = (struct arccom *)ifp->if_l2com;
+ ac->curr_frag = 0;
+}
+
+struct mbuf *
+arc_frag_next(struct ifnet *ifp)
+{
+ struct arccom *ac;
+ struct mbuf *m;
+ struct arc_header *ah;
+
+ ac = (struct arccom *)ifp->if_l2com;
+ if ((m = ac->curr_frag) == 0) {
+ int tfrags;
+
+ /* dequeue new packet */
+ IF_DEQUEUE(&ifp->if_snd, m);
+ if (m == 0)
+ return 0;
+
+ ah = mtod(m, struct arc_header *);
+ if (!arc_isphds(ah->arc_type))
+ return m;
+
+ ++ac->ac_seqid; /* make the seqid unique */
+ tfrags = (m->m_pkthdr.len + ARC_MAX_DATA - 1) / ARC_MAX_DATA;
+ ac->fsflag = 2 * tfrags - 3;
+ ac->sflag = 0;
+ ac->rsflag = ac->fsflag;
+ ac->arc_dhost = ah->arc_dhost;
+ ac->arc_shost = ah->arc_shost;
+ ac->arc_type = ah->arc_type;
+
+ m_adj(m, ARC_HDRNEWLEN);
+ ac->curr_frag = m;
+ }
+
+ /* split out next fragment and return it */
+ if (ac->sflag < ac->fsflag) {
+ /* we CAN'T have short packets here */
+ ac->curr_frag = m_split(m, ARC_MAX_DATA, M_DONTWAIT);
+ if (ac->curr_frag == 0) {
+ m_freem(m);
+ return 0;
+ }
+
+ M_PREPEND(m, ARC_HDRNEWLEN, M_DONTWAIT);
+ if (m == 0) {
+ m_freem(ac->curr_frag);
+ ac->curr_frag = 0;
+ return 0;
+ }
+
+ ah = mtod(m, struct arc_header *);
+ ah->arc_flag = ac->rsflag;
+ ah->arc_seqid = ac->ac_seqid;
+
+ ac->sflag += 2;
+ ac->rsflag = ac->sflag;
+ } else if ((m->m_pkthdr.len >=
+ ARC_MIN_FORBID_LEN - ARC_HDRNEWLEN + 2) &&
+ (m->m_pkthdr.len <=
+ ARC_MAX_FORBID_LEN - ARC_HDRNEWLEN + 2)) {
+ ac->curr_frag = 0;
+
+ M_PREPEND(m, ARC_HDRNEWLEN_EXC, M_DONTWAIT);
+ if (m == 0)
+ return 0;
+
+ ah = mtod(m, struct arc_header *);
+ ah->arc_flag = 0xFF;
+ ah->arc_seqid = 0xFFFF;
+ ah->arc_type2 = ac->arc_type;
+ ah->arc_flag2 = ac->sflag;
+ ah->arc_seqid2 = ac->ac_seqid;
+ } else {
+ ac->curr_frag = 0;
+
+ M_PREPEND(m, ARC_HDRNEWLEN, M_DONTWAIT);
+ if (m == 0)
+ return 0;
+
+ ah = mtod(m, struct arc_header *);
+ ah->arc_flag = ac->sflag;
+ ah->arc_seqid = ac->ac_seqid;
+ }
+
+ ah->arc_dhost = ac->arc_dhost;
+ ah->arc_shost = ac->arc_shost;
+ ah->arc_type = ac->arc_type;
+
+ return m;
+}
+
+/*
+ * Defragmenter. Returns mbuf if last packet found, else
+ * NULL. frees imcoming mbuf as necessary.
+ */
+
+static __inline struct mbuf *
+arc_defrag(struct ifnet *ifp, struct mbuf *m)
+{
+ struct arc_header *ah, *ah1;
+ struct arccom *ac;
+ struct ac_frag *af;
+ struct mbuf *m1;
+ char *s;
+ int newflen;
+ u_char src,dst,typ;
+
+ ac = (struct arccom *)ifp->if_l2com;
+
+ if (m->m_len < ARC_HDRNEWLEN) {
+ m = m_pullup(m, ARC_HDRNEWLEN);
+ if (m == NULL) {
+ ++ifp->if_ierrors;
+ return NULL;
+ }
+ }
+
+ ah = mtod(m, struct arc_header *);
+ typ = ah->arc_type;
+
+ if (!arc_isphds(typ))
+ return m;
+
+ src = ah->arc_shost;
+ dst = ah->arc_dhost;
+
+ if (ah->arc_flag == 0xff) {
+ m_adj(m, 4);
+
+ if (m->m_len < ARC_HDRNEWLEN) {
+ m = m_pullup(m, ARC_HDRNEWLEN);
+ if (m == NULL) {
+ ++ifp->if_ierrors;
+ return NULL;
+ }
+ }
+
+ ah = mtod(m, struct arc_header *);
+ }
+
+ af = &ac->ac_fragtab[src];
+ m1 = af->af_packet;
+ s = "debug code error";
+
+ if (ah->arc_flag & 1) {
+ /*
+ * first fragment. We always initialize, which is
+ * about the right thing to do, as we only want to
+ * accept one fragmented packet per src at a time.
+ */
+ if (m1 != NULL)
+ m_freem(m1);
+
+ af->af_packet = m;
+ m1 = m;
+ af->af_maxflag = ah->arc_flag;
+ af->af_lastseen = 0;
+ af->af_seqid = ah->arc_seqid;
+
+ return NULL;
+ /* notreached */
+ } else {
+ /* check for unfragmented packet */
+ if (ah->arc_flag == 0)
+ return m;
+
+ /* do we have a first packet from that src? */
+ if (m1 == NULL) {
+ s = "no first frag";
+ goto outofseq;
+ }
+
+ ah1 = mtod(m1, struct arc_header *);
+
+ if (ah->arc_seqid != ah1->arc_seqid) {
+ s = "seqid differs";
+ goto outofseq;
+ }
+
+ if (typ != ah1->arc_type) {
+ s = "type differs";
+ goto outofseq;
+ }
+
+ if (dst != ah1->arc_dhost) {
+ s = "dest host differs";
+ goto outofseq;
+ }
+
+ /* typ, seqid and dst are ok here. */
+
+ if (ah->arc_flag == af->af_lastseen) {
+ m_freem(m);
+ return NULL;
+ }
+
+ if (ah->arc_flag == af->af_lastseen + 2) {
+ /* ok, this is next fragment */
+ af->af_lastseen = ah->arc_flag;
+ m_adj(m,ARC_HDRNEWLEN);
+
+ /*
+ * m_cat might free the first mbuf (with pkthdr)
+ * in 2nd chain; therefore:
+ */
+
+ newflen = m->m_pkthdr.len;
+
+ m_cat(m1,m);
+
+ m1->m_pkthdr.len += newflen;
+
+ /* is it the last one? */
+ if (af->af_lastseen > af->af_maxflag) {
+ af->af_packet = NULL;
+ return(m1);
+ } else
+ return NULL;
+ }
+ s = "other reason";
+ /* if all else fails, it is out of sequence, too */
+ }
+outofseq:
+ if (m1) {
+ m_freem(m1);
+ af->af_packet = NULL;
+ }
+
+ if (m)
+ m_freem(m);
+
+ log(LOG_INFO,"%s: got out of seq. packet: %s\n",
+ ifp->if_xname, s);
+
+ return NULL;
+}
+
+/*
+ * return 1 if Packet Header Definition Standard, else 0.
+ * For now: old IP, old ARP aren't obviously. Lacking correct information,
+ * we guess that besides new IP and new ARP also IPX and APPLETALK are PHDS.
+ * (Apple and Novell corporations were involved, among others, in PHDS work).
+ * Easiest is to assume that everybody else uses that, too.
+ */
+int
+arc_isphds(u_int8_t type)
+{
+ return (type != ARCTYPE_IP_OLD &&
+ type != ARCTYPE_ARP_OLD &&
+ type != ARCTYPE_DIAGNOSE);
+}
+
+/*
+ * Process a received Arcnet packet;
+ * the packet is in the mbuf chain m with
+ * the ARCnet header.
+ */
+void
+arc_input(struct ifnet *ifp, struct mbuf *m)
+{
+ struct arc_header *ah;
+ int isr;
+ u_int8_t atype;
+
+ if ((ifp->if_flags & IFF_UP) == 0) {
+ m_freem(m);
+ return;
+ }
+
+ /* possibly defragment: */
+ m = arc_defrag(ifp, m);
+ if (m == NULL)
+ return;
+
+ BPF_MTAP(ifp, m);
+
+ ah = mtod(m, struct arc_header *);
+ /* does this belong to us? */
+ if ((ifp->if_flags & IFF_PROMISC) == 0
+ && ah->arc_dhost != arcbroadcastaddr
+ && ah->arc_dhost != ARC_LLADDR(ifp)) {
+ m_freem(m);
+ return;
+ }
+
+ ifp->if_ibytes += m->m_pkthdr.len;
+
+ if (ah->arc_dhost == arcbroadcastaddr) {
+ m->m_flags |= M_BCAST|M_MCAST;
+ ifp->if_imcasts++;
+ }
+
+ atype = ah->arc_type;
+ switch (atype) {
+#ifdef INET
+ case ARCTYPE_IP:
+ m_adj(m, ARC_HDRNEWLEN);
+ if ((m = ip_fastforward(m)) == NULL)
+ return;
+ isr = NETISR_IP;
+ break;
+
+ case ARCTYPE_IP_OLD:
+ m_adj(m, ARC_HDRLEN);
+ if ((m = ip_fastforward(m)) == NULL)
+ return;
+ isr = NETISR_IP;
+ break;
+
+ case ARCTYPE_ARP:
+ if (ifp->if_flags & IFF_NOARP) {
+ /* Discard packet if ARP is disabled on interface */
+ m_freem(m);
+ return;
+ }
+ m_adj(m, ARC_HDRNEWLEN);
+ isr = NETISR_ARP;
+#ifdef ARCNET_ALLOW_BROKEN_ARP
+ mtod(m, struct arphdr *)->ar_pro = htons(ETHERTYPE_IP);
+#endif
+ break;
+
+ case ARCTYPE_ARP_OLD:
+ if (ifp->if_flags & IFF_NOARP) {
+ /* Discard packet if ARP is disabled on interface */
+ m_freem(m);
+ return;
+ }
+ m_adj(m, ARC_HDRLEN);
+ isr = NETISR_ARP;
+#ifdef ARCNET_ALLOW_BROKEN_ARP
+ mtod(m, struct arphdr *)->ar_pro = htons(ETHERTYPE_IP);
+#endif
+ break;
+#endif
+#ifdef INET6
+ case ARCTYPE_INET6:
+ m_adj(m, ARC_HDRNEWLEN);
+ isr = NETISR_IPV6;
+ break;
+#endif
+#ifdef IPX
+ case ARCTYPE_IPX:
+ m_adj(m, ARC_HDRNEWLEN);
+ isr = NETISR_IPX;
+ break;
+#endif
+ default:
+ m_freem(m);
+ return;
+ }
+ netisr_dispatch(isr, m);
+}
+
+/*
+ * Register (new) link level address.
+ */
+void
+arc_storelladdr(struct ifnet *ifp, u_int8_t lla)
+{
+ ARC_LLADDR(ifp) = lla;
+}
+
+/*
+ * Perform common duties while attaching to interface list
+ */
+void
+arc_ifattach(struct ifnet *ifp, u_int8_t lla)
+{
+ struct ifaddr *ifa;
+ struct sockaddr_dl *sdl;
+ struct arccom *ac;
+
+ if_attach(ifp);
+ ifp->if_addrlen = 1;
+ ifp->if_hdrlen = ARC_HDRLEN;
+ ifp->if_mtu = 1500;
+ ifp->if_resolvemulti = arc_resolvemulti;
+ if (ifp->if_baudrate == 0)
+ ifp->if_baudrate = 2500000;
+#if __FreeBSD_version < 500000
+ ifa = ifnet_addrs[ifp->if_index - 1];
+#else
+ ifa = ifp->if_addr;
+#endif
+ KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
+ sdl = (struct sockaddr_dl *)ifa->ifa_addr;
+ sdl->sdl_type = IFT_ARCNET;
+ sdl->sdl_alen = ifp->if_addrlen;
+
+ if (ifp->if_flags & IFF_BROADCAST)
+ ifp->if_flags |= IFF_MULTICAST|IFF_ALLMULTI;
+
+ ac = (struct arccom *)ifp->if_l2com;
+ ac->ac_seqid = (time_second) & 0xFFFF; /* try to make seqid unique */
+ if (lla == 0) {
+ /* XXX this message isn't entirely clear, to me -- cgd */
+ log(LOG_ERR,"%s: link address 0 reserved for broadcasts. Please change it and ifconfig %s down up\n",
+ ifp->if_xname, ifp->if_xname);
+ }
+ arc_storelladdr(ifp, lla);
+
+ ifp->if_broadcastaddr = &arcbroadcastaddr;
+
+ bpfattach(ifp, DLT_ARCNET, ARC_HDRLEN);
+}
+
+void
+arc_ifdetach(struct ifnet *ifp)
+{
+ bpfdetach(ifp);
+ if_detach(ifp);
+}
+
+int
+arc_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
+{
+ struct ifaddr *ifa = (struct ifaddr *) data;
+ struct ifreq *ifr = (struct ifreq *) data;
+ int error = 0;
+
+ switch (command) {
+ case SIOCSIFADDR:
+ ifp->if_flags |= IFF_UP;
+ switch (ifa->ifa_addr->sa_family) {
+#ifdef INET
+ case AF_INET:
+ ifp->if_init(ifp->if_softc); /* before arpwhohas */
+ arp_ifinit(ifp, ifa);
+ break;
+#endif
+#ifdef IPX
+ /*
+ * XXX This code is probably wrong
+ */
+ case AF_IPX:
+ {
+ struct ipx_addr *ina = &(IA_SIPX(ifa)->sipx_addr);
+
+ if (ipx_nullhost(*ina))
+ ina->x_host.c_host[5] = ARC_LLADDR(ifp);
+ else
+ arc_storelladdr(ifp, ina->x_host.c_host[5]);
+
+ /*
+ * Set new address
+ */
+ ifp->if_init(ifp->if_softc);
+ break;
+ }
+#endif
+ default:
+ ifp->if_init(ifp->if_softc);
+ break;
+ }
+ break;
+
+ case SIOCGIFADDR:
+ {
+ struct sockaddr *sa;
+
+ sa = (struct sockaddr *) &ifr->ifr_data;
+ *(u_int8_t *)sa->sa_data = ARC_LLADDR(ifp);
+ }
+ break;
+
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ if (ifr == NULL)
+ error = EAFNOSUPPORT;
+ else {
+ switch (ifr->ifr_addr.sa_family) {
+ case AF_INET:
+ case AF_INET6:
+ error = 0;
+ break;
+ default:
+ error = EAFNOSUPPORT;
+ break;
+ }
+ }
+ break;
+
+ case SIOCSIFMTU:
+ /*
+ * Set the interface MTU.
+ * mtu can't be larger than ARCMTU for RFC1051
+ * and can't be larger than ARC_PHDS_MTU
+ */
+ if (((ifp->if_flags & IFF_LINK0) && ifr->ifr_mtu > ARCMTU) ||
+ ifr->ifr_mtu > ARC_PHDS_MAXMTU)
+ error = EINVAL;
+ else
+ ifp->if_mtu = ifr->ifr_mtu;
+ break;
+ }
+
+ return (error);
+}
+
+/* based on ether_resolvemulti() */
+int
+arc_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
+ struct sockaddr *sa)
+{
+ struct sockaddr_dl *sdl;
+#ifdef INET
+ struct sockaddr_in *sin;
+#endif
+#ifdef INET6
+ struct sockaddr_in6 *sin6;
+#endif
+
+ switch(sa->sa_family) {
+ case AF_LINK:
+ /*
+ * No mapping needed. Just check that it's a valid MC address.
+ */
+ sdl = (struct sockaddr_dl *)sa;
+ if (*LLADDR(sdl) != arcbroadcastaddr)
+ return EADDRNOTAVAIL;
+ *llsa = 0;
+ return 0;
+#ifdef INET
+ case AF_INET:
+ sin = (struct sockaddr_in *)sa;
+ if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
+ return EADDRNOTAVAIL;
+ sdl = malloc(sizeof *sdl, M_IFMADDR,
+ M_NOWAIT | M_ZERO);
+ if (sdl == NULL)
+ return ENOMEM;
+ sdl->sdl_len = sizeof *sdl;
+ sdl->sdl_family = AF_LINK;
+ sdl->sdl_index = ifp->if_index;
+ sdl->sdl_type = IFT_ARCNET;
+ sdl->sdl_alen = ARC_ADDR_LEN;
+ *LLADDR(sdl) = 0;
+ *llsa = (struct sockaddr *)sdl;
+ return 0;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ sin6 = (struct sockaddr_in6 *)sa;
+ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+ /*
+ * An IP6 address of 0 means listen to all
+ * of the Ethernet multicast address used for IP6.
+ * (This is used for multicast routers.)
+ */
+ ifp->if_flags |= IFF_ALLMULTI;
+ *llsa = 0;
+ return 0;
+ }
+ if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
+ return EADDRNOTAVAIL;
+ sdl = malloc(sizeof *sdl, M_IFMADDR,
+ M_NOWAIT | M_ZERO);
+ if (sdl == NULL)
+ return ENOMEM;
+ sdl->sdl_len = sizeof *sdl;
+ sdl->sdl_family = AF_LINK;
+ sdl->sdl_index = ifp->if_index;
+ sdl->sdl_type = IFT_ARCNET;
+ sdl->sdl_alen = ARC_ADDR_LEN;
+ *LLADDR(sdl) = 0;
+ *llsa = (struct sockaddr *)sdl;
+ return 0;
+#endif
+
+ default:
+ /*
+ * Well, the text isn't quite right, but it's the name
+ * that counts...
+ */
+ return EAFNOSUPPORT;
+ }
+}
+
+MALLOC_DEFINE(M_ARCCOM, "arccom", "ARCNET interface internals");
+
+static void*
+arc_alloc(u_char type, struct ifnet *ifp)
+{
+ struct arccom *ac;
+
+ ac = malloc(sizeof(struct arccom), M_ARCCOM, M_WAITOK | M_ZERO);
+ ac->ac_ifp = ifp;
+
+ return (ac);
+}
+
+static void
+arc_free(void *com, u_char type)
+{
+
+ free(com, M_ARCCOM);
+}
+
+static int
+arc_modevent(module_t mod, int type, void *data)
+{
+
+ switch (type) {
+ case MOD_LOAD:
+ if_register_com_alloc(IFT_ARCNET, arc_alloc, arc_free);
+ break;
+ case MOD_UNLOAD:
+ if_deregister_com_alloc(IFT_ARCNET);
+ break;
+ default:
+ return EOPNOTSUPP;
+ }
+
+ return (0);
+}
+
+static moduledata_t arc_mod = {
+ "arcnet",
+ arc_modevent,
+ 0
+};
+
+DECLARE_MODULE(arcnet, arc_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
+MODULE_VERSION(arcnet, 1);
diff --git a/freebsd/sys/net/if_arp.h b/freebsd/sys/net/if_arp.h
new file mode 100644
index 00000000..2ad9fffb
--- /dev/null
+++ b/freebsd/sys/net/if_arp.h
@@ -0,0 +1,2 @@
+#include <freebsd/bsd.h>
+#include <freebsd/net/if_arp.h>
diff --git a/freebsd/sys/net/if_atm.h b/freebsd/sys/net/if_atm.h
new file mode 100644
index 00000000..e8f69da0
--- /dev/null
+++ b/freebsd/sys/net/if_atm.h
@@ -0,0 +1,337 @@
+/* $NetBSD: if_atm.h,v 1.7 1996/11/09 23:02:27 chuck Exp $ */
+/* $FreeBSD$ */
+
+/*-
+ *
+ * Copyright (c) 1996 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * net/if_atm.h
+ */
+
+/*
+ * Classification of ATM cards.
+ */
+#define ATM_DEVICE_UNKNOWN 0
+#define ATM_DEVICE_PCA200E 1 /* Fore/Marconi PCA200-E */
+#define ATM_DEVICE_HE155 2 /* Fore/Marconi HE155 */
+#define ATM_DEVICE_HE622 3 /* Fore/Marconi HE622 */
+#define ATM_DEVICE_ENI155P 4 /* Efficient networks 155p */
+#define ATM_DEVICE_ADP155P 5 /* Adaptec 155p */
+#define ATM_DEVICE_FORELE25 6 /* ForeRunnerLE 25 */
+#define ATM_DEVICE_FORELE155 7 /* ForeRunnerLE 155 */
+#define ATM_DEVICE_NICSTAR25 8 /* other 77211 25.6MBit */
+#define ATM_DEVICE_NICSTAR155 9 /* other 77211 155MBit */
+#define ATM_DEVICE_IDTABR25 10 /* 77252 based card 25MBit */
+#define ATM_DEVICE_IDTABR155 11 /* 77252 based card 155MBit */
+#define ATM_DEVICE_PROATM25 12 /* 77252 based ProSum card 25MBit */
+#define ATM_DEVICE_PROATM155 13 /* 77252 based ProSum card 155MBit */
+#define ATM_DEVICE_VIRTUAL 14 /* virtual ATM device (netgraph) */
+
+/* map to strings and vendors */
+#define ATM_DEVICE_NAMES \
+ { "Unknown", "Unknown" }, \
+ { "PCA200-E", "Fore/Marconi" }, \
+ { "HE155", "Fore/Marconi" }, \
+ { "HE622", "Fore/Marconi" }, \
+ { "ENI155p", "Efficient Networks" }, \
+ { "ADP155p", "Adaptec" }, \
+ { "ForeRunnerLE25", "Fore/Marconi" }, \
+ { "ForeRunnerLE155", "Fore/Marconi" }, \
+ { "IDT77211/25", "IDT" }, \
+ { "IDT77211/155", "IDT" }, \
+ { "IDT77252/25", "IDT" }, \
+ { "IDT77252/155", "IDT" }, \
+ { "ProATM/25", "ProSum" }, \
+ { "ProATM/155", "ProSum" }, \
+ { "Virtual", "NetGraph" },
+
+/*
+ * This is the common link layer MIB for all ATM interfaces. Much of the
+ * information here is needed for ILMI. This will be augmented by statistics
+ * at some point.
+ */
+struct ifatm_mib {
+ /* configuration data */
+ uint8_t device; /* type of card */
+ u_char esi[6]; /* end system identifier (MAC) */
+ uint32_t serial; /* card serial number */
+ uint32_t hw_version; /* card version */
+ uint32_t sw_version; /* firmware version (if any) */
+ uint32_t pcr; /* supported peak cell rate */
+ uint32_t media; /* physical media */
+ uint8_t vpi_bits; /* number of used bits in VPI field */
+ uint8_t vci_bits; /* number of used bits in VCI field */
+ uint16_t max_vpcs; /* maximum number of VPCs */
+ uint32_t max_vccs; /* maximum number of VCCs */
+};
+
+/*
+ * Traffic parameters for ATM connections. This contains all parameters
+ * to accomodate UBR, UBR+MCR, CBR, VBR and ABR connections.
+ *
+ * Keep in sync with ng_atm.h
+ */
+struct atmio_tparam {
+ uint32_t pcr; /* 24bit: Peak Cell Rate */
+ uint32_t scr; /* 24bit: VBR Sustainable Cell Rate */
+ uint32_t mbs; /* 24bit: VBR Maximum burst size */
+ uint32_t mcr; /* 24bit: ABR/VBR/UBR+MCR MCR */
+ uint32_t icr; /* 24bit: ABR ICR */
+ uint32_t tbe; /* 24bit: ABR TBE (1...2^24-1) */
+ uint8_t nrm; /* 3bit: ABR Nrm */
+ uint8_t trm; /* 3bit: ABR Trm */
+ uint16_t adtf; /* 10bit: ABR ADTF */
+ uint8_t rif; /* 4bit: ABR RIF */
+ uint8_t rdf; /* 4bit: ABR RDF */
+ uint8_t cdf; /* 3bit: ABR CDF */
+};
+
+/*
+ * VCC parameters
+ *
+ * Keep in sync with ng_atm.h
+ */
+struct atmio_vcc {
+ uint16_t flags; /* VCC flags */
+ uint16_t vpi;
+ uint16_t vci;
+ uint16_t rmtu; /* maximum receive PDU */
+ uint16_t tmtu; /* maximum transmit PDU */
+ uint8_t aal; /* aal type */
+ uint8_t traffic; /* traffic type */
+ struct atmio_tparam tparam; /* traffic parameters */
+};
+
+/* VCC flags */
+#define ATMIO_FLAG_LLCSNAP 0x0002 /* same as ATM_PH_LLCSNAP */
+#define ATMIO_FLAG_NG 0x0010 /* owned by netgraph */
+#define ATMIO_FLAG_HARP 0x0020 /* owned by HARP */
+#define ATMIO_FLAG_NORX 0x0100 /* not receiving on this VCC */
+#define ATMIO_FLAG_NOTX 0x0200 /* not transmitting on this VCC */
+#define ATMIO_FLAG_PVC 0x0400 /* this is a PVC */
+#define ATMIO_FLAG_ASYNC 0x0800 /* async open/close */
+#define ATMIO_FLAGS "\020\2LLCSNAP\5NG\6HARP\11NORX\12NOTX\13PVC\14ASYNC"
+
+#define ATMIO_AAL_0 0 /* pure cells */
+#define ATMIO_AAL_34 4 /* AAL3 and 4 */
+#define ATMIO_AAL_5 5 /* AAL5 */
+#define ATMIO_AAL_RAW 10 /* whatever the card does */
+
+#define ATMIO_TRAFFIC_UBR 0
+#define ATMIO_TRAFFIC_CBR 1
+#define ATMIO_TRAFFIC_ABR 2
+#define ATMIO_TRAFFIC_VBR 3
+
+/*
+ * VCC table
+ *
+ * Keep in sync with ng_atm.h
+ */
+struct atmio_vcctable {
+ uint32_t count; /* number of vccs */
+ struct atmio_vcc vccs[0]; /* array of VCCs */
+};
+
+/*
+ * Peak cell rates for various physical media. Note, that there are
+ * different opinions on what the correct values are.
+ */
+#define ATM_RATE_25_6M 59259
+#define ATM_RATE_155M 353208
+#define ATM_RATE_622M 1412830
+#define ATM_RATE_2_4G 5651320
+
+#ifdef _KERNEL
+/*
+ * Common fields for all ATM interfaces. Each driver's softc must start with
+ * this structure.
+ */
+struct ifatm {
+ struct ifnet *ifp;
+ struct ifatm_mib mib; /* exported data */
+ void *phy; /* usually SUNI */
+ void *ngpriv; /* netgraph link */
+};
+#define IFP2IFATM(ifp) ((struct ifatm *)(ifp)->if_l2com)
+#endif
+
+/*
+ * Keep structures in sync with ng_atm.h
+ *
+ * These are used by netgraph/harp to call the driver
+ * NATM uses the atm_pseudoioctl instead.
+ */
+struct atmio_openvcc {
+ void *rxhand; /* handle argument */
+ struct atmio_vcc param; /* parameters */
+};
+
+struct atmio_closevcc {
+ uint16_t vpi;
+ uint16_t vci;
+};
+
+#if defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__)
+#define RTALLOC1(A,B) rtalloc1((A),(B))
+#elif defined(__FreeBSD__)
+#define RTALLOC1(A,B) rtalloc1((A),(B),0UL)
+#endif
+
+/*
+ * pseudo header for packet transmission
+ */
+struct atm_pseudohdr {
+ uint8_t atm_ph[4]; /* flags+VPI+VCI1(msb)+VCI2(lsb) */
+};
+
+#define ATM_PH_FLAGS(X) ((X)->atm_ph[0])
+#define ATM_PH_VPI(X) ((X)->atm_ph[1])
+#define ATM_PH_VCI(X) ((((X)->atm_ph[2]) << 8) | ((X)->atm_ph[3]))
+#define ATM_PH_SETVCI(X,V) { \
+ (X)->atm_ph[2] = ((V) >> 8) & 0xff; \
+ (X)->atm_ph[3] = ((V) & 0xff); \
+}
+
+/* use AAL5? (0 == aal0) */
+#define ATM_PH_AAL5 0x01
+/* use the LLC SNAP encoding (iff aal5) */
+#define ATM_PH_LLCSNAP ATMIO_FLAG_LLCSNAP
+
+#define ATM_PH_DRIVER7 0x40 /* reserve for driver's use */
+#define ATM_PH_DRIVER8 0x80 /* reserve for driver's use */
+
+#define ATMMTU 9180 /* ATM MTU size for IP */
+ /* XXX: could be 9188 with LLC/SNAP according
+ to comer */
+
+#define SIOCATMGETVCCS _IOW('a', 125, struct atmio_vcctable)
+#define SIOCATMOPENVCC _IOR('a', 126, struct atmio_openvcc)
+#define SIOCATMCLOSEVCC _IOR('a', 127, struct atmio_closevcc)
+
+#define SIOCATMGVCCS _IOWR('i', 230, struct ifreq)
+
+/*
+ * XXX forget all the garbage in if_llc.h and do it the easy way
+ */
+#define ATMLLC_HDR "\252\252\3\0\0\0"
+struct atmllc {
+ uint8_t llchdr[6]; /* aa.aa.03.00.00.00 */
+ uint8_t type[2]; /* "ethernet" type */
+};
+
+/* ATM_LLC macros: note type code in host byte order */
+#define ATM_LLC_TYPE(X) (((X)->type[0] << 8) | ((X)->type[1]))
+#define ATM_LLC_SETTYPE(X, V) do { \
+ (X)->type[0] = ((V) >> 8) & 0xff; \
+ (X)->type[1] = ((V) & 0xff); \
+ } while (0)
+
+/*
+ * Events that are emitted by the driver. Currently the only consumer
+ * of this is the netgraph node.
+ */
+#define ATMEV_FLOW_CONTROL 0x0001 /* channel busy state changed */
+#define ATMEV_IFSTATE_CHANGED 0x0002 /* up/down or carrier */
+#define ATMEV_VCC_CHANGED 0x0003 /* PVC deleted/create */
+#define ATMEV_ACR_CHANGED 0x0004 /* ABR ACR has changed */
+
+struct atmev_flow_control {
+ uint16_t vpi; /* channel that is changed */
+ uint16_t vci;
+ u_int busy : 1; /* != 0 -> ATM layer busy */
+};
+
+struct atmev_ifstate_changed {
+ u_int running : 1; /* interface is running now */
+ u_int carrier : 1; /* carrier detected (or not) */
+};
+
+struct atmev_vcc_changed {
+ uint16_t vpi; /* channel that is changed */
+ uint16_t vci;
+ u_int up : 1; /* 1 - created, 0 - deleted */
+};
+
+struct atmev_acr_changed {
+ uint16_t vpi; /* channel that is changed */
+ uint16_t vci;
+ uint32_t acr; /* new ACR */
+};
+
+#ifdef _KERNEL
+void atm_ifattach(struct ifnet *);
+void atm_ifdetach(struct ifnet *);
+void atm_input(struct ifnet *, struct atm_pseudohdr *,
+ struct mbuf *, void *);
+int atm_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+ struct route *);
+struct atmio_vcctable *atm_getvccs(struct atmio_vcc **, u_int, u_int,
+ struct mtx *, int);
+
+void atm_event(struct ifnet *, u_int, void *);
+
+#define ATMEV_SEND_FLOW_CONTROL(ATMIF, VPI, VCI, BUSY) \
+ do { \
+ struct atmev_flow_control _arg; \
+ _arg.vpi = (VPI); \
+ _arg.vci = (VCI); \
+ _arg.busy = (BUSY); \
+ atm_event((ATMIF)->ifp, ATMEV_FLOW_CONTROL, &_arg); \
+ } while (0)
+
+#define ATMEV_SEND_VCC_CHANGED(ATMIF, VPI, VCI, UP) \
+ do { \
+ struct atmev_vcc_changed _arg; \
+ _arg.vpi = (VPI); \
+ _arg.vci = (VCI); \
+ _arg.up = (UP); \
+ atm_event((ATMIF)->ifp, ATMEV_VCC_CHANGED, &_arg); \
+ } while (0)
+
+#define ATMEV_SEND_IFSTATE_CHANGED(ATMIF, CARRIER) \
+ do { \
+ struct atmev_ifstate_changed _arg; \
+ _arg.running = (((ATMIF)->ifp->if_drv_flags & \
+ IFF_DRV_RUNNING) != 0); \
+ _arg.carrier = ((CARRIER) != 0); \
+ atm_event((ATMIF)->ifp, ATMEV_IFSTATE_CHANGED, &_arg); \
+ } while (0)
+
+#define ATMEV_SEND_ACR_CHANGED(ATMIF, VPI, VCI, ACR) \
+ do { \
+ struct atmev_acr_changed _arg; \
+ _arg.vpi = (VPI); \
+ _arg.vci = (VCI); \
+ _arg.acr= (ACR); \
+ atm_event((ATMIF)->ifp, ATMEV_ACR_CHANGED, &_arg); \
+ } while (0)
+#endif
diff --git a/freebsd/sys/net/if_atmsubr.c b/freebsd/sys/net/if_atmsubr.c
new file mode 100644
index 00000000..7daa347f
--- /dev/null
+++ b/freebsd/sys/net/if_atmsubr.c
@@ -0,0 +1,504 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/* $NetBSD: if_atmsubr.c,v 1.10 1997/03/11 23:19:51 chuck Exp $ */
+
+/*-
+ *
+ * Copyright (c) 1996 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * if_atmsubr.c
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_natm.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/errno.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/malloc.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/netisr.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/if_dl.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/if_atm.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/if_atm.h>
+#include <freebsd/netinet/if_ether.h> /* XXX: for ETHERTYPE_* */
+#if defined(INET) || defined(INET6)
+#include <freebsd/netinet/in_var.h>
+#endif
+#ifdef NATM
+#include <freebsd/netnatm/natm.h>
+#endif
+
+#include <freebsd/security/mac/mac_framework.h>
+
+/*
+ * Netgraph interface functions.
+ * These need not be protected by a lock, because ng_atm nodes are persitent.
+ * The ng_atm module can be unloaded only if all ATM interfaces have been
+ * unloaded, so nobody should be in the code paths accessing these function
+ * pointers.
+ */
+void (*ng_atm_attach_p)(struct ifnet *);
+void (*ng_atm_detach_p)(struct ifnet *);
+int (*ng_atm_output_p)(struct ifnet *, struct mbuf **);
+void (*ng_atm_input_p)(struct ifnet *, struct mbuf **,
+ struct atm_pseudohdr *, void *);
+void (*ng_atm_input_orphan_p)(struct ifnet *, struct mbuf *,
+ struct atm_pseudohdr *, void *);
+void (*ng_atm_event_p)(struct ifnet *, uint32_t, void *);
+
+/*
+ * Harp pseudo interface hooks
+ */
+void (*atm_harp_input_p)(struct ifnet *ifp, struct mbuf **m,
+ struct atm_pseudohdr *ah, void *rxhand);
+void (*atm_harp_attach_p)(struct ifnet *);
+void (*atm_harp_detach_p)(struct ifnet *);
+void (*atm_harp_event_p)(struct ifnet *, uint32_t, void *);
+
+SYSCTL_NODE(_hw, OID_AUTO, atm, CTLFLAG_RW, 0, "ATM hardware");
+
+MALLOC_DEFINE(M_IFATM, "ifatm", "atm interface internals");
+
+#ifndef ETHERTYPE_IPV6
+#define ETHERTYPE_IPV6 0x86dd
+#endif
+
+#define senderr(e) do { error = (e); goto bad; } while (0)
+
+/*
+ * atm_output: ATM output routine
+ * inputs:
+ * "ifp" = ATM interface to output to
+ * "m0" = the packet to output
+ * "dst" = the sockaddr to send to (either IP addr, or raw VPI/VCI)
+ * "ro" = the route to use
+ * returns: error code [0 == ok]
+ *
+ * note: special semantic: if (dst == NULL) then we assume "m" already
+ * has an atm_pseudohdr on it and just send it directly.
+ * [for native mode ATM output] if dst is null, then
+ * ro->ro_rt must also be NULL.
+ */
+int
+atm_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
+ struct route *ro)
+{
+ u_int16_t etype = 0; /* if using LLC/SNAP */
+ int error = 0, sz;
+ struct atm_pseudohdr atmdst, *ad;
+ struct mbuf *m = m0;
+ struct atmllc *atmllc;
+ struct atmllc *llc_hdr = NULL;
+ u_int32_t atm_flags;
+
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m);
+ if (error)
+ senderr(error);
+#endif
+
+ if (!((ifp->if_flags & IFF_UP) &&
+ (ifp->if_drv_flags & IFF_DRV_RUNNING)))
+ senderr(ENETDOWN);
+
+ /*
+ * check for non-native ATM traffic (dst != NULL)
+ */
+ if (dst) {
+ switch (dst->sa_family) {
+
+#if defined(INET) || defined(INET6)
+ case AF_INET:
+ case AF_INET6:
+ {
+ if (dst->sa_family == AF_INET6)
+ etype = ETHERTYPE_IPV6;
+ else
+ etype = ETHERTYPE_IP;
+ if (!atmresolve(ro->ro_rt, m, dst, &atmdst)) {
+ m = NULL;
+ /* XXX: atmresolve already free'd it */
+ senderr(EHOSTUNREACH);
+ /* XXX: put ATMARP stuff here */
+ /* XXX: watch who frees m on failure */
+ }
+ }
+ break;
+#endif /* INET || INET6 */
+
+ case AF_UNSPEC:
+ /*
+ * XXX: bpfwrite. assuming dst contains 12 bytes
+ * (atm pseudo header (4) + LLC/SNAP (8))
+ */
+ bcopy(dst->sa_data, &atmdst, sizeof(atmdst));
+ llc_hdr = (struct atmllc *)(dst->sa_data +
+ sizeof(atmdst));
+ break;
+
+ default:
+ printf("%s: can't handle af%d\n", ifp->if_xname,
+ dst->sa_family);
+ senderr(EAFNOSUPPORT);
+ }
+
+ /*
+ * must add atm_pseudohdr to data
+ */
+ sz = sizeof(atmdst);
+ atm_flags = ATM_PH_FLAGS(&atmdst);
+ if (atm_flags & ATM_PH_LLCSNAP)
+ sz += 8; /* sizeof snap == 8 */
+ M_PREPEND(m, sz, M_DONTWAIT);
+ if (m == 0)
+ senderr(ENOBUFS);
+ ad = mtod(m, struct atm_pseudohdr *);
+ *ad = atmdst;
+ if (atm_flags & ATM_PH_LLCSNAP) {
+ atmllc = (struct atmllc *)(ad + 1);
+ if (llc_hdr == NULL) {
+ bcopy(ATMLLC_HDR, atmllc->llchdr,
+ sizeof(atmllc->llchdr));
+ /* note: in host order */
+ ATM_LLC_SETTYPE(atmllc, etype);
+ }
+ else
+ bcopy(llc_hdr, atmllc, sizeof(struct atmllc));
+ }
+ }
+
+ if (ng_atm_output_p != NULL) {
+ if ((error = (*ng_atm_output_p)(ifp, &m)) != 0) {
+ if (m != NULL)
+ m_freem(m);
+ return (error);
+ }
+ if (m == NULL)
+ return (0);
+ }
+
+ /*
+ * Queue message on interface, and start output if interface
+ * not yet active.
+ */
+ if (!IF_HANDOFF_ADJ(&ifp->if_snd, m, ifp,
+ -(int)sizeof(struct atm_pseudohdr)))
+ return (ENOBUFS);
+ return (error);
+
+bad:
+ if (m)
+ m_freem(m);
+ return (error);
+}
+
+/*
+ * Process a received ATM packet;
+ * the packet is in the mbuf chain m.
+ */
+void
+atm_input(struct ifnet *ifp, struct atm_pseudohdr *ah, struct mbuf *m,
+ void *rxhand)
+{
+ int isr;
+ u_int16_t etype = ETHERTYPE_IP; /* default */
+
+ if ((ifp->if_flags & IFF_UP) == 0) {
+ m_freem(m);
+ return;
+ }
+#ifdef MAC
+ mac_ifnet_create_mbuf(ifp, m);
+#endif
+ ifp->if_ibytes += m->m_pkthdr.len;
+
+ if (ng_atm_input_p != NULL) {
+ (*ng_atm_input_p)(ifp, &m, ah, rxhand);
+ if (m == NULL)
+ return;
+ }
+
+ /* not eaten by ng_atm. Maybe it's a pseudo-harp PDU? */
+ if (atm_harp_input_p != NULL) {
+ (*atm_harp_input_p)(ifp, &m, ah, rxhand);
+ if (m == NULL)
+ return;
+ }
+
+ if (rxhand) {
+#ifdef NATM
+ struct natmpcb *npcb;
+
+ /*
+ * XXXRW: this use of 'rxhand' is not a very good idea, and
+ * was subject to races even before SMPng due to the release
+ * of spl here.
+ */
+ NATM_LOCK();
+ npcb = rxhand;
+ npcb->npcb_inq++; /* count # in queue */
+ isr = NETISR_NATM;
+ m->m_pkthdr.rcvif = rxhand; /* XXX: overload */
+ NATM_UNLOCK();
+#else
+ printf("atm_input: NATM detected but not "
+ "configured in kernel\n");
+ goto dropit;
+#endif
+ } else {
+ /*
+ * handle LLC/SNAP header, if present
+ */
+ if (ATM_PH_FLAGS(ah) & ATM_PH_LLCSNAP) {
+ struct atmllc *alc;
+
+ if (m->m_len < sizeof(*alc) &&
+ (m = m_pullup(m, sizeof(*alc))) == 0)
+ return; /* failed */
+ alc = mtod(m, struct atmllc *);
+ if (bcmp(alc, ATMLLC_HDR, 6)) {
+ printf("%s: recv'd invalid LLC/SNAP frame "
+ "[vp=%d,vc=%d]\n", ifp->if_xname,
+ ATM_PH_VPI(ah), ATM_PH_VCI(ah));
+ m_freem(m);
+ return;
+ }
+ etype = ATM_LLC_TYPE(alc);
+ m_adj(m, sizeof(*alc));
+ }
+
+ switch (etype) {
+
+#ifdef INET
+ case ETHERTYPE_IP:
+ isr = NETISR_IP;
+ break;
+#endif
+
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ isr = NETISR_IPV6;
+ break;
+#endif
+ default:
+#ifndef NATM
+ dropit:
+#endif
+ if (ng_atm_input_orphan_p != NULL)
+ (*ng_atm_input_orphan_p)(ifp, m, ah, rxhand);
+ else
+ m_freem(m);
+ return;
+ }
+ }
+ netisr_dispatch(isr, m);
+}
+
+/*
+ * Perform common duties while attaching to interface list.
+ */
+void
+atm_ifattach(struct ifnet *ifp)
+{
+ struct ifaddr *ifa;
+ struct sockaddr_dl *sdl;
+ struct ifatm *ifatm = ifp->if_l2com;
+
+ ifp->if_addrlen = 0;
+ ifp->if_hdrlen = 0;
+ if_attach(ifp);
+ ifp->if_mtu = ATMMTU;
+ ifp->if_output = atm_output;
+#if 0
+ ifp->if_input = atm_input;
+#endif
+ ifp->if_snd.ifq_maxlen = 50; /* dummy */
+
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
+ if (ifa->ifa_addr->sa_family == AF_LINK) {
+ sdl = (struct sockaddr_dl *)ifa->ifa_addr;
+ sdl->sdl_type = IFT_ATM;
+ sdl->sdl_alen = ifp->if_addrlen;
+#ifdef notyet /* if using ATMARP, store hardware address using the next line */
+ bcopy(ifp->hw_addr, LLADDR(sdl), ifp->if_addrlen);
+#endif
+ break;
+ }
+
+ ifp->if_linkmib = &ifatm->mib;
+ ifp->if_linkmiblen = sizeof(ifatm->mib);
+
+ if(ng_atm_attach_p)
+ (*ng_atm_attach_p)(ifp);
+ if (atm_harp_attach_p)
+ (*atm_harp_attach_p)(ifp);
+}
+
+/*
+ * Common stuff for detaching an ATM interface
+ */
+void
+atm_ifdetach(struct ifnet *ifp)
+{
+ if (atm_harp_detach_p)
+ (*atm_harp_detach_p)(ifp);
+ if(ng_atm_detach_p)
+ (*ng_atm_detach_p)(ifp);
+ if_detach(ifp);
+}
+
+/*
+ * Support routine for the SIOCATMGVCCS ioctl().
+ *
+ * This routine assumes, that the private VCC structures used by the driver
+ * begin with a struct atmio_vcc.
+ *
+ * Return a table of VCCs in a freshly allocated memory area.
+ * Here we have a problem: we first count, how many vccs we need
+ * to return. The we allocate the memory and finally fill it in.
+ * Because we cannot lock while calling malloc, the number of active
+ * vccs may change while we're in malloc. So we allocate a couple of
+ * vccs more and if space anyway is not enough re-iterate.
+ *
+ * We could use an sx lock for the vcc tables.
+ */
+struct atmio_vcctable *
+atm_getvccs(struct atmio_vcc **table, u_int size, u_int start,
+ struct mtx *lock, int waitok)
+{
+ u_int cid, alloc;
+ size_t len;
+ struct atmio_vcctable *vccs;
+ struct atmio_vcc *v;
+
+ alloc = start + 10;
+ vccs = NULL;
+
+ for (;;) {
+ len = sizeof(*vccs) + alloc * sizeof(vccs->vccs[0]);
+ vccs = reallocf(vccs, len, M_TEMP,
+ waitok ? M_WAITOK : M_NOWAIT);
+ if (vccs == NULL)
+ return (NULL);
+ bzero(vccs, len);
+
+ vccs->count = 0;
+ v = vccs->vccs;
+
+ mtx_lock(lock);
+ for (cid = 0; cid < size; cid++)
+ if (table[cid] != NULL) {
+ if (++vccs->count == alloc)
+ /* too many - try again */
+ break;
+ *v++ = *table[cid];
+ }
+ mtx_unlock(lock);
+
+ if (cid == size)
+ break;
+
+ alloc *= 2;
+ }
+ return (vccs);
+}
+
+/*
+ * Driver or channel state has changed. Inform whoever is interested
+ * in these events.
+ */
+void
+atm_event(struct ifnet *ifp, u_int event, void *arg)
+{
+ if (ng_atm_event_p != NULL)
+ (*ng_atm_event_p)(ifp, event, arg);
+ if (atm_harp_event_p != NULL)
+ (*atm_harp_event_p)(ifp, event, arg);
+}
+
+static void *
+atm_alloc(u_char type, struct ifnet *ifp)
+{
+ struct ifatm *ifatm;
+
+ ifatm = malloc(sizeof(struct ifatm), M_IFATM, M_WAITOK | M_ZERO);
+ ifatm->ifp = ifp;
+
+ return (ifatm);
+}
+
+static void
+atm_free(void *com, u_char type)
+{
+
+ free(com, M_IFATM);
+}
+
+static int
+atm_modevent(module_t mod, int type, void *data)
+{
+ switch (type) {
+ case MOD_LOAD:
+ if_register_com_alloc(IFT_ATM, atm_alloc, atm_free);
+ break;
+ case MOD_UNLOAD:
+ if_deregister_com_alloc(IFT_ATM);
+ break;
+ default:
+ return (EOPNOTSUPP);
+ }
+
+ return (0);
+}
+
+static moduledata_t atm_mod = {
+ "atm",
+ atm_modevent,
+ 0
+};
+
+DECLARE_MODULE(atm, atm_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
+MODULE_VERSION(atm, 1);
diff --git a/freebsd/sys/net/if_bridge.c b/freebsd/sys/net/if_bridge.c
new file mode 100644
index 00000000..de7aea04
--- /dev/null
+++ b/freebsd/sys/net/if_bridge.c
@@ -0,0 +1,3458 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/* $NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $ */
+
+/*
+ * Copyright 2001 Wasabi Systems, Inc.
+ * All rights reserved.
+ *
+ * Written by Jason R. Thorpe for Wasabi Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed for the NetBSD Project by
+ * Wasabi Systems, Inc.
+ * 4. The name of Wasabi Systems, Inc. may not be used to endorse
+ * or promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
+ */
+
+/*
+ * Network interface bridge support.
+ *
+ * TODO:
+ *
+ * - Currently only supports Ethernet-like interfaces (Ethernet,
+ * 802.11, VLANs on Ethernet, etc.) Figure out a nice way
+ * to bridge other types of interfaces (FDDI-FDDI, and maybe
+ * consider heterogenous bridges).
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/time.h>
+#include <freebsd/sys/socket.h> /* for net/if.h */
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/ctype.h> /* string functions */
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/random.h>
+#include <freebsd/sys/syslog.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/vm/uma.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/priv.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/mutex.h>
+#include <freebsd/sys/rwlock.h>
+
+#include <freebsd/net/bpf.h>
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_clone.h>
+#include <freebsd/net/if_dl.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/if_var.h>
+#include <freebsd/net/pfil.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h> /* for struct arpcom */
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/ip_var.h>
+#ifdef INET6
+#include <freebsd/netinet/ip6.h>
+#include <freebsd/netinet6/ip6_var.h>
+#endif
+#if defined(INET) || defined(INET6)
+#include <freebsd/netinet/ip_carp.h>
+#endif
+#include <freebsd/machine/in_cksum.h>
+#include <freebsd/netinet/if_ether.h> /* for struct arpcom */
+#include <freebsd/net/bridgestp.h>
+#include <freebsd/net/if_bridgevar.h>
+#include <freebsd/net/if_llc.h>
+#include <freebsd/net/if_vlan_var.h>
+
+#include <freebsd/net/route.h>
+#include <freebsd/netinet/ip_fw.h>
+#include <freebsd/netinet/ipfw/ip_fw_private.h>
+
+/*
+ * Size of the route hash table. Must be a power of two.
+ */
+#ifndef BRIDGE_RTHASH_SIZE
+#define BRIDGE_RTHASH_SIZE 1024
+#endif
+
+#define BRIDGE_RTHASH_MASK (BRIDGE_RTHASH_SIZE - 1)
+
+/*
+ * Maximum number of addresses to cache.
+ */
+#ifndef BRIDGE_RTABLE_MAX
+#define BRIDGE_RTABLE_MAX 100
+#endif
+
+/*
+ * Timeout (in seconds) for entries learned dynamically.
+ */
+#ifndef BRIDGE_RTABLE_TIMEOUT
+#define BRIDGE_RTABLE_TIMEOUT (20 * 60) /* same as ARP */
+#endif
+
+/*
+ * Number of seconds between walks of the route list.
+ */
+#ifndef BRIDGE_RTABLE_PRUNE_PERIOD
+#define BRIDGE_RTABLE_PRUNE_PERIOD (5 * 60)
+#endif
+
+/*
+ * List of capabilities to possibly mask on the member interface.
+ */
+#define BRIDGE_IFCAPS_MASK (IFCAP_TOE|IFCAP_TSO|IFCAP_TXCSUM)
+
+/*
+ * List of capabilities to strip
+ */
+#define BRIDGE_IFCAPS_STRIP IFCAP_LRO
+
+/*
+ * Bridge interface list entry.
+ */
+struct bridge_iflist {
+ LIST_ENTRY(bridge_iflist) bif_next;
+ struct ifnet *bif_ifp; /* member if */
+ struct bstp_port bif_stp; /* STP state */
+ uint32_t bif_flags; /* member if flags */
+ int bif_savedcaps; /* saved capabilities */
+ uint32_t bif_addrmax; /* max # of addresses */
+ uint32_t bif_addrcnt; /* cur. # of addresses */
+ uint32_t bif_addrexceeded;/* # of address violations */
+};
+
+/*
+ * Bridge route node.
+ */
+struct bridge_rtnode {
+ LIST_ENTRY(bridge_rtnode) brt_hash; /* hash table linkage */
+ LIST_ENTRY(bridge_rtnode) brt_list; /* list linkage */
+ struct bridge_iflist *brt_dst; /* destination if */
+ unsigned long brt_expire; /* expiration time */
+ uint8_t brt_flags; /* address flags */
+ uint8_t brt_addr[ETHER_ADDR_LEN];
+ uint16_t brt_vlan; /* vlan id */
+};
+#define brt_ifp brt_dst->bif_ifp
+
+/*
+ * Software state for each bridge.
+ */
+struct bridge_softc {
+ struct ifnet *sc_ifp; /* make this an interface */
+ LIST_ENTRY(bridge_softc) sc_list;
+ struct mtx sc_mtx;
+ struct cv sc_cv;
+ uint32_t sc_brtmax; /* max # of addresses */
+ uint32_t sc_brtcnt; /* cur. # of addresses */
+ uint32_t sc_brttimeout; /* rt timeout in seconds */
+ struct callout sc_brcallout; /* bridge callout */
+ uint32_t sc_iflist_ref; /* refcount for sc_iflist */
+ uint32_t sc_iflist_xcnt; /* refcount for sc_iflist */
+ LIST_HEAD(, bridge_iflist) sc_iflist; /* member interface list */
+ LIST_HEAD(, bridge_rtnode) *sc_rthash; /* our forwarding table */
+ LIST_HEAD(, bridge_rtnode) sc_rtlist; /* list version of above */
+ uint32_t sc_rthash_key; /* key for hash */
+ LIST_HEAD(, bridge_iflist) sc_spanlist; /* span ports list */
+ struct bstp_state sc_stp; /* STP state */
+ uint32_t sc_brtexceeded; /* # of cache drops */
+ struct ifnet *sc_ifaddr; /* member mac copied from */
+ u_char sc_defaddr[6]; /* Default MAC address */
+};
+
+static struct mtx bridge_list_mtx;
+eventhandler_tag bridge_detach_cookie = NULL;
+
+int bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
+
+uma_zone_t bridge_rtnode_zone;
+
+static int bridge_clone_create(struct if_clone *, int, caddr_t);
+static void bridge_clone_destroy(struct ifnet *);
+
+static int bridge_ioctl(struct ifnet *, u_long, caddr_t);
+static void bridge_mutecaps(struct bridge_softc *);
+static void bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *,
+ int);
+static void bridge_ifdetach(void *arg __unused, struct ifnet *);
+static void bridge_init(void *);
+static void bridge_dummynet(struct mbuf *, struct ifnet *);
+static void bridge_stop(struct ifnet *, int);
+static void bridge_start(struct ifnet *);
+static struct mbuf *bridge_input(struct ifnet *, struct mbuf *);
+static int bridge_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+ struct rtentry *);
+static void bridge_enqueue(struct bridge_softc *, struct ifnet *,
+ struct mbuf *);
+static void bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
+
+static void bridge_forward(struct bridge_softc *, struct bridge_iflist *,
+ struct mbuf *m);
+
+static void bridge_timer(void *);
+
+static void bridge_broadcast(struct bridge_softc *, struct ifnet *,
+ struct mbuf *, int);
+static void bridge_span(struct bridge_softc *, struct mbuf *);
+
+static int bridge_rtupdate(struct bridge_softc *, const uint8_t *,
+ uint16_t, struct bridge_iflist *, int, uint8_t);
+static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *,
+ uint16_t);
+static void bridge_rttrim(struct bridge_softc *);
+static void bridge_rtage(struct bridge_softc *);
+static void bridge_rtflush(struct bridge_softc *, int);
+static int bridge_rtdaddr(struct bridge_softc *, const uint8_t *,
+ uint16_t);
+
+static int bridge_rtable_init(struct bridge_softc *);
+static void bridge_rtable_fini(struct bridge_softc *);
+
+static int bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
+static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
+ const uint8_t *, uint16_t);
+static int bridge_rtnode_insert(struct bridge_softc *,
+ struct bridge_rtnode *);
+static void bridge_rtnode_destroy(struct bridge_softc *,
+ struct bridge_rtnode *);
+static void bridge_rtable_expire(struct ifnet *, int);
+static void bridge_state_change(struct ifnet *, int);
+
+static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
+ const char *name);
+static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
+ struct ifnet *ifp);
+static void bridge_delete_member(struct bridge_softc *,
+ struct bridge_iflist *, int);
+static void bridge_delete_span(struct bridge_softc *,
+ struct bridge_iflist *);
+
+static int bridge_ioctl_add(struct bridge_softc *, void *);
+static int bridge_ioctl_del(struct bridge_softc *, void *);
+static int bridge_ioctl_gifflags(struct bridge_softc *, void *);
+static int bridge_ioctl_sifflags(struct bridge_softc *, void *);
+static int bridge_ioctl_scache(struct bridge_softc *, void *);
+static int bridge_ioctl_gcache(struct bridge_softc *, void *);
+static int bridge_ioctl_gifs(struct bridge_softc *, void *);
+static int bridge_ioctl_rts(struct bridge_softc *, void *);
+static int bridge_ioctl_saddr(struct bridge_softc *, void *);
+static int bridge_ioctl_sto(struct bridge_softc *, void *);
+static int bridge_ioctl_gto(struct bridge_softc *, void *);
+static int bridge_ioctl_daddr(struct bridge_softc *, void *);
+static int bridge_ioctl_flush(struct bridge_softc *, void *);
+static int bridge_ioctl_gpri(struct bridge_softc *, void *);
+static int bridge_ioctl_spri(struct bridge_softc *, void *);
+static int bridge_ioctl_ght(struct bridge_softc *, void *);
+static int bridge_ioctl_sht(struct bridge_softc *, void *);
+static int bridge_ioctl_gfd(struct bridge_softc *, void *);
+static int bridge_ioctl_sfd(struct bridge_softc *, void *);
+static int bridge_ioctl_gma(struct bridge_softc *, void *);
+static int bridge_ioctl_sma(struct bridge_softc *, void *);
+static int bridge_ioctl_sifprio(struct bridge_softc *, void *);
+static int bridge_ioctl_sifcost(struct bridge_softc *, void *);
+static int bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *);
+static int bridge_ioctl_addspan(struct bridge_softc *, void *);
+static int bridge_ioctl_delspan(struct bridge_softc *, void *);
+static int bridge_ioctl_gbparam(struct bridge_softc *, void *);
+static int bridge_ioctl_grte(struct bridge_softc *, void *);
+static int bridge_ioctl_gifsstp(struct bridge_softc *, void *);
+static int bridge_ioctl_sproto(struct bridge_softc *, void *);
+static int bridge_ioctl_stxhc(struct bridge_softc *, void *);
+static int bridge_pfil(struct mbuf **, struct ifnet *, struct ifnet *,
+ int);
+static int bridge_ip_checkbasic(struct mbuf **mp);
+#ifdef INET6
+static int bridge_ip6_checkbasic(struct mbuf **mp);
+#endif /* INET6 */
+static int bridge_fragment(struct ifnet *, struct mbuf *,
+ struct ether_header *, int, struct llc *);
+
+/* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */
+#define VLANTAGOF(_m) \
+ (_m->m_flags & M_VLANTAG) ? EVL_VLANOFTAG(_m->m_pkthdr.ether_vtag) : 1
+
+static struct bstp_cb_ops bridge_ops = {
+ .bcb_state = bridge_state_change,
+ .bcb_rtage = bridge_rtable_expire
+};
+
+SYSCTL_DECL(_net_link);
+SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW, 0, "Bridge");
+
+static int pfil_onlyip = 1; /* only pass IP[46] packets when pfil is enabled */
+static int pfil_bridge = 1; /* run pfil hooks on the bridge interface */
+static int pfil_member = 1; /* run pfil hooks on the member interface */
+static int pfil_ipfw = 0; /* layer2 filter with ipfw */
+static int pfil_ipfw_arp = 0; /* layer2 filter with ipfw */
+static int pfil_local_phys = 0; /* run pfil hooks on the physical interface for
+ locally destined packets */
+static int log_stp = 0; /* log STP state changes */
+static int bridge_inherit_mac = 0; /* share MAC with first bridge member */
+SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip, CTLFLAG_RW,
+ &pfil_onlyip, 0, "Only pass IP packets when pfil is enabled");
+SYSCTL_INT(_net_link_bridge, OID_AUTO, ipfw_arp, CTLFLAG_RW,
+ &pfil_ipfw_arp, 0, "Filter ARP packets through IPFW layer2");
+SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge, CTLFLAG_RW,
+ &pfil_bridge, 0, "Packet filter on the bridge interface");
+SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member, CTLFLAG_RW,
+ &pfil_member, 0, "Packet filter on the member interface");
+SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_local_phys, CTLFLAG_RW,
+ &pfil_local_phys, 0,
+ "Packet filter on the physical interface for locally destined packets");
+SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW,
+ &log_stp, 0, "Log STP state changes");
+SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac, CTLFLAG_RW,
+ &bridge_inherit_mac, 0,
+ "Inherit MAC address from the first bridge member");
+
+struct bridge_control {
+ int (*bc_func)(struct bridge_softc *, void *);
+ int bc_argsize;
+ int bc_flags;
+};
+
+#define BC_F_COPYIN 0x01 /* copy arguments in */
+#define BC_F_COPYOUT 0x02 /* copy arguments out */
+#define BC_F_SUSER 0x04 /* do super-user check */
+
+const struct bridge_control bridge_control_table[] = {
+ { bridge_ioctl_add, sizeof(struct ifbreq),
+ BC_F_COPYIN|BC_F_SUSER },
+ { bridge_ioctl_del, sizeof(struct ifbreq),
+ BC_F_COPYIN|BC_F_SUSER },
+
+ { bridge_ioctl_gifflags, sizeof(struct ifbreq),
+ BC_F_COPYIN|BC_F_COPYOUT },
+ { bridge_ioctl_sifflags, sizeof(struct ifbreq),
+ BC_F_COPYIN|BC_F_SUSER },
+
+ { bridge_ioctl_scache, sizeof(struct ifbrparam),
+ BC_F_COPYIN|BC_F_SUSER },
+ { bridge_ioctl_gcache, sizeof(struct ifbrparam),
+ BC_F_COPYOUT },
+
+ { bridge_ioctl_gifs, sizeof(struct ifbifconf),
+ BC_F_COPYIN|BC_F_COPYOUT },
+ { bridge_ioctl_rts, sizeof(struct ifbaconf),
+ BC_F_COPYIN|BC_F_COPYOUT },
+
+ { bridge_ioctl_saddr, sizeof(struct ifbareq),
+ BC_F_COPYIN|BC_F_SUSER },
+
+ { bridge_ioctl_sto, sizeof(struct ifbrparam),
+ BC_F_COPYIN|BC_F_SUSER },
+ { bridge_ioctl_gto, sizeof(struct ifbrparam),
+ BC_F_COPYOUT },
+
+ { bridge_ioctl_daddr, sizeof(struct ifbareq),
+ BC_F_COPYIN|BC_F_SUSER },
+
+ { bridge_ioctl_flush, sizeof(struct ifbreq),
+ BC_F_COPYIN|BC_F_SUSER },
+
+ { bridge_ioctl_gpri, sizeof(struct ifbrparam),
+ BC_F_COPYOUT },
+ { bridge_ioctl_spri, sizeof(struct ifbrparam),
+ BC_F_COPYIN|BC_F_SUSER },
+
+ { bridge_ioctl_ght, sizeof(struct ifbrparam),
+ BC_F_COPYOUT },
+ { bridge_ioctl_sht, sizeof(struct ifbrparam),
+ BC_F_COPYIN|BC_F_SUSER },
+
+ { bridge_ioctl_gfd, sizeof(struct ifbrparam),
+ BC_F_COPYOUT },
+ { bridge_ioctl_sfd, sizeof(struct ifbrparam),
+ BC_F_COPYIN|BC_F_SUSER },
+
+ { bridge_ioctl_gma, sizeof(struct ifbrparam),
+ BC_F_COPYOUT },
+ { bridge_ioctl_sma, sizeof(struct ifbrparam),
+ BC_F_COPYIN|BC_F_SUSER },
+
+ { bridge_ioctl_sifprio, sizeof(struct ifbreq),
+ BC_F_COPYIN|BC_F_SUSER },
+
+ { bridge_ioctl_sifcost, sizeof(struct ifbreq),
+ BC_F_COPYIN|BC_F_SUSER },
+
+ { bridge_ioctl_addspan, sizeof(struct ifbreq),
+ BC_F_COPYIN|BC_F_SUSER },
+ { bridge_ioctl_delspan, sizeof(struct ifbreq),
+ BC_F_COPYIN|BC_F_SUSER },
+
+ { bridge_ioctl_gbparam, sizeof(struct ifbropreq),
+ BC_F_COPYOUT },
+
+ { bridge_ioctl_grte, sizeof(struct ifbrparam),
+ BC_F_COPYOUT },
+
+ { bridge_ioctl_gifsstp, sizeof(struct ifbpstpconf),
+ BC_F_COPYIN|BC_F_COPYOUT },
+
+ { bridge_ioctl_sproto, sizeof(struct ifbrparam),
+ BC_F_COPYIN|BC_F_SUSER },
+
+ { bridge_ioctl_stxhc, sizeof(struct ifbrparam),
+ BC_F_COPYIN|BC_F_SUSER },
+
+ { bridge_ioctl_sifmaxaddr, sizeof(struct ifbreq),
+ BC_F_COPYIN|BC_F_SUSER },
+
+};
+const int bridge_control_table_size =
+ sizeof(bridge_control_table) / sizeof(bridge_control_table[0]);
+
+LIST_HEAD(, bridge_softc) bridge_list;
+
+IFC_SIMPLE_DECLARE(bridge, 0);
+
+static int
+bridge_modevent(module_t mod, int type, void *data)
+{
+
+ switch (type) {
+ case MOD_LOAD:
+ mtx_init(&bridge_list_mtx, "if_bridge list", NULL, MTX_DEF);
+ if_clone_attach(&bridge_cloner);
+ bridge_rtnode_zone = uma_zcreate("bridge_rtnode",
+ sizeof(struct bridge_rtnode), NULL, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, 0);
+ LIST_INIT(&bridge_list);
+ bridge_input_p = bridge_input;
+ bridge_output_p = bridge_output;
+ bridge_dn_p = bridge_dummynet;
+ bridge_detach_cookie = EVENTHANDLER_REGISTER(
+ ifnet_departure_event, bridge_ifdetach, NULL,
+ EVENTHANDLER_PRI_ANY);
+ break;
+ case MOD_UNLOAD:
+ EVENTHANDLER_DEREGISTER(ifnet_departure_event,
+ bridge_detach_cookie);
+ if_clone_detach(&bridge_cloner);
+ uma_zdestroy(bridge_rtnode_zone);
+ bridge_input_p = NULL;
+ bridge_output_p = NULL;
+ bridge_dn_p = NULL;
+ mtx_destroy(&bridge_list_mtx);
+ break;
+ default:
+ return (EOPNOTSUPP);
+ }
+ return (0);
+}
+
+static moduledata_t bridge_mod = {
+ "if_bridge",
+ bridge_modevent,
+ 0
+};
+
+DECLARE_MODULE(if_bridge, bridge_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_DEPEND(if_bridge, bridgestp, 1, 1, 1);
+
+/*
+ * handler for net.link.bridge.pfil_ipfw
+ */
+static int
+sysctl_pfil_ipfw(SYSCTL_HANDLER_ARGS)
+{
+ int enable = pfil_ipfw;
+ int error;
+
+ error = sysctl_handle_int(oidp, &enable, 0, req);
+ enable = (enable) ? 1 : 0;
+
+ if (enable != pfil_ipfw) {
+ pfil_ipfw = enable;
+
+ /*
+ * Disable pfil so that ipfw doesnt run twice, if the user
+ * really wants both then they can re-enable pfil_bridge and/or
+ * pfil_member. Also allow non-ip packets as ipfw can filter by
+ * layer2 type.
+ */
+ if (pfil_ipfw) {
+ pfil_onlyip = 0;
+ pfil_bridge = 0;
+ pfil_member = 0;
+ }
+ }
+
+ return (error);
+}
+SYSCTL_PROC(_net_link_bridge, OID_AUTO, ipfw, CTLTYPE_INT|CTLFLAG_RW,
+ &pfil_ipfw, 0, &sysctl_pfil_ipfw, "I", "Layer2 filter with IPFW");
+
+/*
+ * bridge_clone_create:
+ *
+ * Create a new bridge instance.
+ */
+static int
+bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+ struct bridge_softc *sc, *sc2;
+ struct ifnet *bifp, *ifp;
+ int retry;
+
+ sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
+ ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
+ if (ifp == NULL) {
+ free(sc, M_DEVBUF);
+ return (ENOSPC);
+ }
+
+ BRIDGE_LOCK_INIT(sc);
+ sc->sc_brtmax = BRIDGE_RTABLE_MAX;
+ sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
+
+ /* Initialize our routing table. */
+ bridge_rtable_init(sc);
+
+ callout_init_mtx(&sc->sc_brcallout, &sc->sc_mtx, 0);
+
+ LIST_INIT(&sc->sc_iflist);
+ LIST_INIT(&sc->sc_spanlist);
+
+ ifp->if_softc = sc;
+ if_initname(ifp, ifc->ifc_name, unit);
+ ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+ ifp->if_ioctl = bridge_ioctl;
+ ifp->if_start = bridge_start;
+ ifp->if_init = bridge_init;
+ ifp->if_type = IFT_BRIDGE;
+ IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
+ ifp->if_snd.ifq_drv_maxlen = ifqmaxlen;
+ IFQ_SET_READY(&ifp->if_snd);
+
+ /*
+ * Generate a random ethernet address with a locally administered
+ * address.
+ *
+ * Since we are using random ethernet addresses for the bridge, it is
+ * possible that we might have address collisions, so make sure that
+ * this hardware address isn't already in use on another bridge.
+ */
+ for (retry = 1; retry != 0;) {
+ arc4rand(sc->sc_defaddr, ETHER_ADDR_LEN, 1);
+ sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
+ sc->sc_defaddr[0] |= 2; /* set the LAA bit */
+ retry = 0;
+ mtx_lock(&bridge_list_mtx);
+ LIST_FOREACH(sc2, &bridge_list, sc_list) {
+ bifp = sc2->sc_ifp;
+ if (memcmp(sc->sc_defaddr,
+ IF_LLADDR(bifp), ETHER_ADDR_LEN) == 0)
+ retry = 1;
+ }
+ mtx_unlock(&bridge_list_mtx);
+ }
+
+ bstp_attach(&sc->sc_stp, &bridge_ops);
+ ether_ifattach(ifp, sc->sc_defaddr);
+ /* Now undo some of the damage... */
+ ifp->if_baudrate = 0;
+ ifp->if_type = IFT_BRIDGE;
+
+ mtx_lock(&bridge_list_mtx);
+ LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
+ mtx_unlock(&bridge_list_mtx);
+
+ return (0);
+}
+
+/*
+ * bridge_clone_destroy:
+ *
+ * Destroy a bridge instance.
+ */
+static void
+bridge_clone_destroy(struct ifnet *ifp)
+{
+ struct bridge_softc *sc = ifp->if_softc;
+ struct bridge_iflist *bif;
+
+ BRIDGE_LOCK(sc);
+
+ bridge_stop(ifp, 1);
+ ifp->if_flags &= ~IFF_UP;
+
+ while ((bif = LIST_FIRST(&sc->sc_iflist)) != NULL)
+ bridge_delete_member(sc, bif, 0);
+
+ while ((bif = LIST_FIRST(&sc->sc_spanlist)) != NULL) {
+ bridge_delete_span(sc, bif);
+ }
+
+ BRIDGE_UNLOCK(sc);
+
+ callout_drain(&sc->sc_brcallout);
+
+ mtx_lock(&bridge_list_mtx);
+ LIST_REMOVE(sc, sc_list);
+ mtx_unlock(&bridge_list_mtx);
+
+ bstp_detach(&sc->sc_stp);
+ ether_ifdetach(ifp);
+ if_free_type(ifp, IFT_ETHER);
+
+ /* Tear down the routing table. */
+ bridge_rtable_fini(sc);
+
+ BRIDGE_LOCK_DESTROY(sc);
+ free(sc, M_DEVBUF);
+}
+
+/*
+ * bridge_ioctl:
+ *
+ * Handle a control request from the operator.
+ */
+static int
+bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ struct bridge_softc *sc = ifp->if_softc;
+ struct ifreq *ifr = (struct ifreq *)data;
+ struct bridge_iflist *bif;
+ struct thread *td = curthread;
+ union {
+ struct ifbreq ifbreq;
+ struct ifbifconf ifbifconf;
+ struct ifbareq ifbareq;
+ struct ifbaconf ifbaconf;
+ struct ifbrparam ifbrparam;
+ struct ifbropreq ifbropreq;
+ } args;
+ struct ifdrv *ifd = (struct ifdrv *) data;
+ const struct bridge_control *bc;
+ int error = 0;
+
+ switch (cmd) {
+
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ break;
+
+ case SIOCGDRVSPEC:
+ case SIOCSDRVSPEC:
+ if (ifd->ifd_cmd >= bridge_control_table_size) {
+ error = EINVAL;
+ break;
+ }
+ bc = &bridge_control_table[ifd->ifd_cmd];
+
+ if (cmd == SIOCGDRVSPEC &&
+ (bc->bc_flags & BC_F_COPYOUT) == 0) {
+ error = EINVAL;
+ break;
+ }
+ else if (cmd == SIOCSDRVSPEC &&
+ (bc->bc_flags & BC_F_COPYOUT) != 0) {
+ error = EINVAL;
+ break;
+ }
+
+ if (bc->bc_flags & BC_F_SUSER) {
+ error = priv_check(td, PRIV_NET_BRIDGE);
+ if (error)
+ break;
+ }
+
+ if (ifd->ifd_len != bc->bc_argsize ||
+ ifd->ifd_len > sizeof(args)) {
+ error = EINVAL;
+ break;
+ }
+
+ bzero(&args, sizeof(args));
+ if (bc->bc_flags & BC_F_COPYIN) {
+ error = copyin(ifd->ifd_data, &args, ifd->ifd_len);
+ if (error)
+ break;
+ }
+
+ BRIDGE_LOCK(sc);
+ error = (*bc->bc_func)(sc, &args);
+ BRIDGE_UNLOCK(sc);
+ if (error)
+ break;
+
+ if (bc->bc_flags & BC_F_COPYOUT)
+ error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
+
+ break;
+
+ case SIOCSIFFLAGS:
+ if (!(ifp->if_flags & IFF_UP) &&
+ (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+ /*
+ * If interface is marked down and it is running,
+ * then stop and disable it.
+ */
+ BRIDGE_LOCK(sc);
+ bridge_stop(ifp, 1);
+ BRIDGE_UNLOCK(sc);
+ } else if ((ifp->if_flags & IFF_UP) &&
+ !(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+ /*
+ * If interface is marked up and it is stopped, then
+ * start it.
+ */
+ (*ifp->if_init)(sc);
+ }
+ break;
+
+ case SIOCSIFMTU:
+ if (ifr->ifr_mtu < 576) {
+ error = EINVAL;
+ break;
+ }
+ if (LIST_EMPTY(&sc->sc_iflist)) {
+ sc->sc_ifp->if_mtu = ifr->ifr_mtu;
+ break;
+ }
+ BRIDGE_LOCK(sc);
+ LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+ if (bif->bif_ifp->if_mtu != ifr->ifr_mtu) {
+ log(LOG_NOTICE, "%s: invalid MTU: %lu(%s)"
+ " != %d\n", sc->sc_ifp->if_xname,
+ bif->bif_ifp->if_mtu,
+ bif->bif_ifp->if_xname, ifr->ifr_mtu);
+ error = EINVAL;
+ break;
+ }
+ }
+ if (!error)
+ sc->sc_ifp->if_mtu = ifr->ifr_mtu;
+ BRIDGE_UNLOCK(sc);
+ break;
+ default:
+ /*
+ * drop the lock as ether_ioctl() will call bridge_start() and
+ * cause the lock to be recursed.
+ */
+ error = ether_ioctl(ifp, cmd, data);
+ break;
+ }
+
+ return (error);
+}
+
+/*
+ * bridge_mutecaps:
+ *
+ * Clear or restore unwanted capabilities on the member interface
+ */
+static void
+bridge_mutecaps(struct bridge_softc *sc)
+{
+ struct bridge_iflist *bif;
+ int enabled, mask;
+
+ /* Initial bitmask of capabilities to test */
+ mask = BRIDGE_IFCAPS_MASK;
+
+ LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+ /* Every member must support it or its disabled */
+ mask &= bif->bif_savedcaps;
+ }
+
+ LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+ enabled = bif->bif_ifp->if_capenable;
+ enabled &= ~BRIDGE_IFCAPS_STRIP;
+ /* strip off mask bits and enable them again if allowed */
+ enabled &= ~BRIDGE_IFCAPS_MASK;
+ enabled |= mask;
+ bridge_set_ifcap(sc, bif, enabled);
+ }
+
+}
+
+static void
+bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
+{
+ struct ifnet *ifp = bif->bif_ifp;
+ struct ifreq ifr;
+ int error;
+
+ bzero(&ifr, sizeof(ifr));
+ ifr.ifr_reqcap = set;
+
+ if (ifp->if_capenable != set) {
+ error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr);
+ if (error)
+ if_printf(sc->sc_ifp,
+ "error setting interface capabilities on %s\n",
+ ifp->if_xname);
+ }
+}
+
+/*
+ * bridge_lookup_member:
+ *
+ * Lookup a bridge member interface.
+ */
+static struct bridge_iflist *
+bridge_lookup_member(struct bridge_softc *sc, const char *name)
+{
+ struct bridge_iflist *bif;
+ struct ifnet *ifp;
+
+ BRIDGE_LOCK_ASSERT(sc);
+
+ LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+ ifp = bif->bif_ifp;
+ if (strcmp(ifp->if_xname, name) == 0)
+ return (bif);
+ }
+
+ return (NULL);
+}
+
+/*
+ * bridge_lookup_member_if:
+ *
+ * Lookup a bridge member interface by ifnet*.
+ */
+static struct bridge_iflist *
+bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
+{
+ struct bridge_iflist *bif;
+
+ BRIDGE_LOCK_ASSERT(sc);
+
+ LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+ if (bif->bif_ifp == member_ifp)
+ return (bif);
+ }
+
+ return (NULL);
+}
+
+/*
+ * bridge_delete_member:
+ *
+ * Delete the specified member interface.
+ */
+static void
+bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
+ int gone)
+{
+ struct ifnet *ifs = bif->bif_ifp;
+ struct ifnet *fif = NULL;
+
+ BRIDGE_LOCK_ASSERT(sc);
+
+ if (bif->bif_flags & IFBIF_STP)
+ bstp_disable(&bif->bif_stp);
+
+ ifs->if_bridge = NULL;
+ BRIDGE_XLOCK(sc);
+ LIST_REMOVE(bif, bif_next);
+ BRIDGE_XDROP(sc);
+
+ /*
+ * If removing the interface that gave the bridge its mac address, set
+ * the mac address of the bridge to the address of the next member, or
+ * to its default address if no members are left.
+ */
+ if (bridge_inherit_mac && sc->sc_ifaddr == ifs) {
+ if (LIST_EMPTY(&sc->sc_iflist)) {
+ bcopy(sc->sc_defaddr,
+ IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
+ sc->sc_ifaddr = NULL;
+ } else {
+ fif = LIST_FIRST(&sc->sc_iflist)->bif_ifp;
+ bcopy(IF_LLADDR(fif),
+ IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
+ sc->sc_ifaddr = fif;
+ }
+ EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
+ }
+
+ bridge_mutecaps(sc); /* recalcuate now this interface is removed */
+ bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
+ KASSERT(bif->bif_addrcnt == 0,
+ ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt));
+
+ BRIDGE_UNLOCK(sc);
+ if (!gone) {
+ switch (ifs->if_type) {
+ case IFT_ETHER:
+ case IFT_L2VLAN:
+ /*
+ * Take the interface out of promiscuous mode.
+ */
+ (void) ifpromisc(ifs, 0);
+ break;
+
+ case IFT_GIF:
+ break;
+
+ default:
+#ifdef DIAGNOSTIC
+ panic("bridge_delete_member: impossible");
+#endif
+ break;
+ }
+ /* reneable any interface capabilities */
+ bridge_set_ifcap(sc, bif, bif->bif_savedcaps);
+ }
+ bstp_destroy(&bif->bif_stp); /* prepare to free */
+ BRIDGE_LOCK(sc);
+ free(bif, M_DEVBUF);
+}
+
+/*
+ * bridge_delete_span:
+ *
+ * Delete the specified span interface.
+ */
+static void
+bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
+{
+ BRIDGE_LOCK_ASSERT(sc);
+
+ KASSERT(bif->bif_ifp->if_bridge == NULL,
+ ("%s: not a span interface", __func__));
+
+ LIST_REMOVE(bif, bif_next);
+ free(bif, M_DEVBUF);
+}
+
+static int
+bridge_ioctl_add(struct bridge_softc *sc, void *arg)
+{
+ struct ifbreq *req = arg;
+ struct bridge_iflist *bif = NULL;
+ struct ifnet *ifs;
+ int error = 0;
+
+ ifs = ifunit(req->ifbr_ifsname);
+ if (ifs == NULL)
+ return (ENOENT);
+ if (ifs->if_ioctl == NULL) /* must be supported */
+ return (EINVAL);
+
+ /* If it's in the span list, it can't be a member. */
+ LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
+ if (ifs == bif->bif_ifp)
+ return (EBUSY);
+
+ if (ifs->if_bridge == sc)
+ return (EEXIST);
+
+ if (ifs->if_bridge != NULL)
+ return (EBUSY);
+
+ bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO);
+ if (bif == NULL)
+ return (ENOMEM);
+
+ bif->bif_ifp = ifs;
+ bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
+ bif->bif_savedcaps = ifs->if_capenable;
+
+ switch (ifs->if_type) {
+ case IFT_ETHER:
+ case IFT_L2VLAN:
+ case IFT_GIF:
+ /* permitted interface types */
+ break;
+ default:
+ error = EINVAL;
+ goto out;
+ }
+
+ /* Allow the first Ethernet member to define the MTU */
+ if (LIST_EMPTY(&sc->sc_iflist))
+ sc->sc_ifp->if_mtu = ifs->if_mtu;
+ else if (sc->sc_ifp->if_mtu != ifs->if_mtu) {
+ if_printf(sc->sc_ifp, "invalid MTU: %lu(%s) != %lu\n",
+ ifs->if_mtu, ifs->if_xname, sc->sc_ifp->if_mtu);
+ error = EINVAL;
+ goto out;
+ }
+
+ /*
+ * Assign the interface's MAC address to the bridge if it's the first
+ * member and the MAC address of the bridge has not been changed from
+ * the default randomly generated one.
+ */
+ if (bridge_inherit_mac && LIST_EMPTY(&sc->sc_iflist) &&
+ !memcmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr, ETHER_ADDR_LEN)) {
+ bcopy(IF_LLADDR(ifs), IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
+ sc->sc_ifaddr = ifs;
+ EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
+ }
+
+ ifs->if_bridge = sc;
+ bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp);
+ /*
+ * XXX: XLOCK HERE!?!
+ *
+ * NOTE: insert_***HEAD*** should be safe for the traversals.
+ */
+ LIST_INSERT_HEAD(&sc->sc_iflist, bif, bif_next);
+
+ /* Set interface capabilities to the intersection set of all members */
+ bridge_mutecaps(sc);
+
+ switch (ifs->if_type) {
+ case IFT_ETHER:
+ case IFT_L2VLAN:
+ /*
+ * Place the interface into promiscuous mode.
+ */
+ BRIDGE_UNLOCK(sc);
+ error = ifpromisc(ifs, 1);
+ BRIDGE_LOCK(sc);
+ break;
+ }
+ if (error)
+ bridge_delete_member(sc, bif, 0);
+out:
+ if (error) {
+ if (bif != NULL)
+ free(bif, M_DEVBUF);
+ }
+ return (error);
+}
+
+static int
+bridge_ioctl_del(struct bridge_softc *sc, void *arg)
+{
+ struct ifbreq *req = arg;
+ struct bridge_iflist *bif;
+
+ bif = bridge_lookup_member(sc, req->ifbr_ifsname);
+ if (bif == NULL)
+ return (ENOENT);
+
+ bridge_delete_member(sc, bif, 0);
+
+ return (0);
+}
+
+static int
+bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
+{
+ struct ifbreq *req = arg;
+ struct bridge_iflist *bif;
+ struct bstp_port *bp;
+
+ bif = bridge_lookup_member(sc, req->ifbr_ifsname);
+ if (bif == NULL)
+ return (ENOENT);
+
+ bp = &bif->bif_stp;
+ req->ifbr_ifsflags = bif->bif_flags;
+ req->ifbr_state = bp->bp_state;
+ req->ifbr_priority = bp->bp_priority;
+ req->ifbr_path_cost = bp->bp_path_cost;
+ req->ifbr_portno = bif->bif_ifp->if_index & 0xfff;
+ req->ifbr_proto = bp->bp_protover;
+ req->ifbr_role = bp->bp_role;
+ req->ifbr_stpflags = bp->bp_flags;
+ req->ifbr_addrcnt = bif->bif_addrcnt;
+ req->ifbr_addrmax = bif->bif_addrmax;
+ req->ifbr_addrexceeded = bif->bif_addrexceeded;
+
+ /* Copy STP state options as flags */
+ if (bp->bp_operedge)
+ req->ifbr_ifsflags |= IFBIF_BSTP_EDGE;
+ if (bp->bp_flags & BSTP_PORT_AUTOEDGE)
+ req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE;
+ if (bp->bp_ptp_link)
+ req->ifbr_ifsflags |= IFBIF_BSTP_PTP;
+ if (bp->bp_flags & BSTP_PORT_AUTOPTP)
+ req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP;
+ if (bp->bp_flags & BSTP_PORT_ADMEDGE)
+ req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE;
+ if (bp->bp_flags & BSTP_PORT_ADMCOST)
+ req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST;
+ return (0);
+}
+
+static int
+bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
+{
+ struct ifbreq *req = arg;
+ struct bridge_iflist *bif;
+ struct bstp_port *bp;
+ int error;
+
+ bif = bridge_lookup_member(sc, req->ifbr_ifsname);
+ if (bif == NULL)
+ return (ENOENT);
+ bp = &bif->bif_stp;
+
+ if (req->ifbr_ifsflags & IFBIF_SPAN)
+ /* SPAN is readonly */
+ return (EINVAL);
+
+ if (req->ifbr_ifsflags & IFBIF_STP) {
+ if ((bif->bif_flags & IFBIF_STP) == 0) {
+ error = bstp_enable(&bif->bif_stp);
+ if (error)
+ return (error);
+ }
+ } else {
+ if ((bif->bif_flags & IFBIF_STP) != 0)
+ bstp_disable(&bif->bif_stp);
+ }
+
+ /* Pass on STP flags */
+ bstp_set_edge(bp, req->ifbr_ifsflags & IFBIF_BSTP_EDGE ? 1 : 0);
+ bstp_set_autoedge(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0);
+ bstp_set_ptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_PTP ? 1 : 0);
+ bstp_set_autoptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0);
+
+ /* Save the bits relating to the bridge */
+ bif->bif_flags = req->ifbr_ifsflags & IFBIFMASK;
+
+ return (0);
+}
+
+static int
+bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
+{
+ struct ifbrparam *param = arg;
+
+ sc->sc_brtmax = param->ifbrp_csize;
+ bridge_rttrim(sc);
+
+ return (0);
+}
+
+static int
+bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
+{
+ struct ifbrparam *param = arg;
+
+ param->ifbrp_csize = sc->sc_brtmax;
+
+ return (0);
+}
+
+static int
+bridge_ioctl_gifs(struct bridge_softc *sc, void *arg)
+{
+ struct ifbifconf *bifc = arg;
+ struct bridge_iflist *bif;
+ struct ifbreq breq;
+ char *buf, *outbuf;
+ int count, buflen, len, error = 0;
+
+ count = 0;
+ LIST_FOREACH(bif, &sc->sc_iflist, bif_next)
+ count++;
+ LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
+ count++;
+
+ buflen = sizeof(breq) * count;
+ if (bifc->ifbic_len == 0) {
+ bifc->ifbic_len = buflen;
+ return (0);
+ }
+ BRIDGE_UNLOCK(sc);
+ outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
+ BRIDGE_LOCK(sc);
+
+ count = 0;
+ buf = outbuf;
+ len = min(bifc->ifbic_len, buflen);
+ bzero(&breq, sizeof(breq));
+ LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+ if (len < sizeof(breq))
+ break;
+
+ strlcpy(breq.ifbr_ifsname, bif->bif_ifp->if_xname,
+ sizeof(breq.ifbr_ifsname));
+ /* Fill in the ifbreq structure */
+ error = bridge_ioctl_gifflags(sc, &breq);
+ if (error)
+ break;
+ memcpy(buf, &breq, sizeof(breq));
+ count++;
+ buf += sizeof(breq);
+ len -= sizeof(breq);
+ }
+ LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) {
+ if (len < sizeof(breq))
+ break;
+
+ strlcpy(breq.ifbr_ifsname, bif->bif_ifp->if_xname,
+ sizeof(breq.ifbr_ifsname));
+ breq.ifbr_ifsflags = bif->bif_flags;
+ breq.ifbr_portno = bif->bif_ifp->if_index & 0xfff;
+ memcpy(buf, &breq, sizeof(breq));
+ count++;
+ buf += sizeof(breq);
+ len -= sizeof(breq);
+ }
+
+ BRIDGE_UNLOCK(sc);
+ bifc->ifbic_len = sizeof(breq) * count;
+ error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len);
+ BRIDGE_LOCK(sc);
+ free(outbuf, M_TEMP);
+ return (error);
+}
+
+static int
+bridge_ioctl_rts(struct bridge_softc *sc, void *arg)
+{
+ struct ifbaconf *bac = arg;
+ struct bridge_rtnode *brt;
+ struct ifbareq bareq;
+ char *buf, *outbuf;
+ int count, buflen, len, error = 0;
+
+ if (bac->ifbac_len == 0)
+ return (0);
+
+ count = 0;
+ LIST_FOREACH(brt, &sc->sc_rtlist, brt_list)
+ count++;
+ buflen = sizeof(bareq) * count;
+
+ BRIDGE_UNLOCK(sc);
+ outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
+ BRIDGE_LOCK(sc);
+
+ count = 0;
+ buf = outbuf;
+ len = min(bac->ifbac_len, buflen);
+ bzero(&bareq, sizeof(bareq));
+ LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
+ if (len < sizeof(bareq))
+ goto out;
+ strlcpy(bareq.ifba_ifsname, brt->brt_ifp->if_xname,
+ sizeof(bareq.ifba_ifsname));
+ memcpy(bareq.ifba_dst, brt->brt_addr, sizeof(brt->brt_addr));
+ bareq.ifba_vlan = brt->brt_vlan;
+ if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
+ time_uptime < brt->brt_expire)
+ bareq.ifba_expire = brt->brt_expire - time_uptime;
+ else
+ bareq.ifba_expire = 0;
+ bareq.ifba_flags = brt->brt_flags;
+
+ memcpy(buf, &bareq, sizeof(bareq));
+ count++;
+ buf += sizeof(bareq);
+ len -= sizeof(bareq);
+ }
+out:
+ BRIDGE_UNLOCK(sc);
+ bac->ifbac_len = sizeof(bareq) * count;
+ error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len);
+ BRIDGE_LOCK(sc);
+ free(outbuf, M_TEMP);
+ return (error);
+}
+
+static int
+bridge_ioctl_saddr(struct bridge_softc *sc, void *arg)
+{
+ struct ifbareq *req = arg;
+ struct bridge_iflist *bif;
+ int error;
+
+ bif = bridge_lookup_member(sc, req->ifba_ifsname);
+ if (bif == NULL)
+ return (ENOENT);
+
+ error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
+ req->ifba_flags);
+
+ return (error);
+}
+
+static int
+bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
+{
+ struct ifbrparam *param = arg;
+
+ sc->sc_brttimeout = param->ifbrp_ctime;
+ return (0);
+}
+
+static int
+bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
+{
+ struct ifbrparam *param = arg;
+
+ param->ifbrp_ctime = sc->sc_brttimeout;
+ return (0);
+}
+
+static int
+bridge_ioctl_daddr(struct bridge_softc *sc, void *arg)
+{
+ struct ifbareq *req = arg;
+
+ return (bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan));
+}
+
+static int
+bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
+{
+ struct ifbreq *req = arg;
+
+ bridge_rtflush(sc, req->ifbr_ifsflags);
+ return (0);
+}
+
+static int
+bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
+{
+ struct ifbrparam *param = arg;
+ struct bstp_state *bs = &sc->sc_stp;
+
+ param->ifbrp_prio = bs->bs_bridge_priority;
+ return (0);
+}
+
+static int
+bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
+{
+ struct ifbrparam *param = arg;
+
+ return (bstp_set_priority(&sc->sc_stp, param->ifbrp_prio));
+}
+
+static int
+bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
+{
+ struct ifbrparam *param = arg;
+ struct bstp_state *bs = &sc->sc_stp;
+
+ param->ifbrp_hellotime = bs->bs_bridge_htime >> 8;
+ return (0);
+}
+
+static int
+bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
+{
+ struct ifbrparam *param = arg;
+
+ return (bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime));
+}
+
+static int
+bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
+{
+ struct ifbrparam *param = arg;
+ struct bstp_state *bs = &sc->sc_stp;
+
+ param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8;
+ return (0);
+}
+
+static int
+bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
+{
+ struct ifbrparam *param = arg;
+
+ return (bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay));
+}
+
+static int
+bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
+{
+ struct ifbrparam *param = arg;
+ struct bstp_state *bs = &sc->sc_stp;
+
+ param->ifbrp_maxage = bs->bs_bridge_max_age >> 8;
+ return (0);
+}
+
+static int
+bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
+{
+ struct ifbrparam *param = arg;
+
+ return (bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage));
+}
+
+static int
+bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
+{
+ struct ifbreq *req = arg;
+ struct bridge_iflist *bif;
+
+ bif = bridge_lookup_member(sc, req->ifbr_ifsname);
+ if (bif == NULL)
+ return (ENOENT);
+
+ return (bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority));
+}
+
+static int
+bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
+{
+ struct ifbreq *req = arg;
+ struct bridge_iflist *bif;
+
+ bif = bridge_lookup_member(sc, req->ifbr_ifsname);
+ if (bif == NULL)
+ return (ENOENT);
+
+ return (bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost));
+}
+
+static int
+bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *arg)
+{
+ struct ifbreq *req = arg;
+ struct bridge_iflist *bif;
+
+ bif = bridge_lookup_member(sc, req->ifbr_ifsname);
+ if (bif == NULL)
+ return (ENOENT);
+
+ bif->bif_addrmax = req->ifbr_addrmax;
+ return (0);
+}
+
+static int
+bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
+{
+ struct ifbreq *req = arg;
+ struct bridge_iflist *bif = NULL;
+ struct ifnet *ifs;
+
+ ifs = ifunit(req->ifbr_ifsname);
+ if (ifs == NULL)
+ return (ENOENT);
+
+ LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
+ if (ifs == bif->bif_ifp)
+ return (EBUSY);
+
+ if (ifs->if_bridge != NULL)
+ return (EBUSY);
+
+ switch (ifs->if_type) {
+ case IFT_ETHER:
+ case IFT_GIF:
+ case IFT_L2VLAN:
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO);
+ if (bif == NULL)
+ return (ENOMEM);
+
+ bif->bif_ifp = ifs;
+ bif->bif_flags = IFBIF_SPAN;
+
+ LIST_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
+
+ return (0);
+}
+
+static int
+bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
+{
+ struct ifbreq *req = arg;
+ struct bridge_iflist *bif;
+ struct ifnet *ifs;
+
+ ifs = ifunit(req->ifbr_ifsname);
+ if (ifs == NULL)
+ return (ENOENT);
+
+ LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
+ if (ifs == bif->bif_ifp)
+ break;
+
+ if (bif == NULL)
+ return (ENOENT);
+
+ bridge_delete_span(sc, bif);
+
+ return (0);
+}
+
+static int
+bridge_ioctl_gbparam(struct bridge_softc *sc, void *arg)
+{
+ struct ifbropreq *req = arg;
+ struct bstp_state *bs = &sc->sc_stp;
+ struct bstp_port *root_port;
+
+ req->ifbop_maxage = bs->bs_bridge_max_age >> 8;
+ req->ifbop_hellotime = bs->bs_bridge_htime >> 8;
+ req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8;
+
+ root_port = bs->bs_root_port;
+ if (root_port == NULL)
+ req->ifbop_root_port = 0;
+ else
+ req->ifbop_root_port = root_port->bp_ifp->if_index;
+
+ req->ifbop_holdcount = bs->bs_txholdcount;
+ req->ifbop_priority = bs->bs_bridge_priority;
+ req->ifbop_protocol = bs->bs_protover;
+ req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost;
+ req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id;
+ req->ifbop_designated_root = bs->bs_root_pv.pv_root_id;
+ req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id;
+ req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec;
+ req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec;
+
+ return (0);
+}
+
+static int
+bridge_ioctl_grte(struct bridge_softc *sc, void *arg)
+{
+ struct ifbrparam *param = arg;
+
+ param->ifbrp_cexceeded = sc->sc_brtexceeded;
+ return (0);
+}
+
+static int
+bridge_ioctl_gifsstp(struct bridge_softc *sc, void *arg)
+{
+ struct ifbpstpconf *bifstp = arg;
+ struct bridge_iflist *bif;
+ struct bstp_port *bp;
+ struct ifbpstpreq bpreq;
+ char *buf, *outbuf;
+ int count, buflen, len, error = 0;
+
+ count = 0;
+ LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+ if ((bif->bif_flags & IFBIF_STP) != 0)
+ count++;
+ }
+
+ buflen = sizeof(bpreq) * count;
+ if (bifstp->ifbpstp_len == 0) {
+ bifstp->ifbpstp_len = buflen;
+ return (0);
+ }
+
+ BRIDGE_UNLOCK(sc);
+ outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
+ BRIDGE_LOCK(sc);
+
+ count = 0;
+ buf = outbuf;
+ len = min(bifstp->ifbpstp_len, buflen);
+ bzero(&bpreq, sizeof(bpreq));
+ LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+ if (len < sizeof(bpreq))
+ break;
+
+ if ((bif->bif_flags & IFBIF_STP) == 0)
+ continue;
+
+ bp = &bif->bif_stp;
+ bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff;
+ bpreq.ifbp_fwd_trans = bp->bp_forward_transitions;
+ bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost;
+ bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id;
+ bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id;
+ bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id;
+
+ memcpy(buf, &bpreq, sizeof(bpreq));
+ count++;
+ buf += sizeof(bpreq);
+ len -= sizeof(bpreq);
+ }
+
+ BRIDGE_UNLOCK(sc);
+ bifstp->ifbpstp_len = sizeof(bpreq) * count;
+ error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len);
+ BRIDGE_LOCK(sc);
+ free(outbuf, M_TEMP);
+ return (error);
+}
+
+static int
+bridge_ioctl_sproto(struct bridge_softc *sc, void *arg)
+{
+ struct ifbrparam *param = arg;
+
+ return (bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto));
+}
+
+static int
+bridge_ioctl_stxhc(struct bridge_softc *sc, void *arg)
+{
+ struct ifbrparam *param = arg;
+
+ return (bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc));
+}
+
+/*
+ * bridge_ifdetach:
+ *
+ * Detach an interface from a bridge. Called when a member
+ * interface is detaching.
+ */
+static void
+bridge_ifdetach(void *arg __unused, struct ifnet *ifp)
+{
+ struct bridge_softc *sc = ifp->if_bridge;
+ struct bridge_iflist *bif;
+
+ /* Check if the interface is a bridge member */
+ if (sc != NULL) {
+ BRIDGE_LOCK(sc);
+
+ bif = bridge_lookup_member_if(sc, ifp);
+ if (bif != NULL)
+ bridge_delete_member(sc, bif, 1);
+
+ BRIDGE_UNLOCK(sc);
+ return;
+ }
+
+ /* Check if the interface is a span port */
+ mtx_lock(&bridge_list_mtx);
+ LIST_FOREACH(sc, &bridge_list, sc_list) {
+ BRIDGE_LOCK(sc);
+ LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
+ if (ifp == bif->bif_ifp) {
+ bridge_delete_span(sc, bif);
+ break;
+ }
+
+ BRIDGE_UNLOCK(sc);
+ }
+ mtx_unlock(&bridge_list_mtx);
+}
+
+/*
+ * bridge_init:
+ *
+ * Initialize a bridge interface.
+ */
+static void
+bridge_init(void *xsc)
+{
+ struct bridge_softc *sc = (struct bridge_softc *)xsc;
+ struct ifnet *ifp = sc->sc_ifp;
+
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+ return;
+
+ BRIDGE_LOCK(sc);
+ callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz,
+ bridge_timer, sc);
+
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ bstp_init(&sc->sc_stp); /* Initialize Spanning Tree */
+
+ BRIDGE_UNLOCK(sc);
+}
+
+/*
+ * bridge_stop:
+ *
+ * Stop the bridge interface.
+ */
+static void
+bridge_stop(struct ifnet *ifp, int disable)
+{
+ struct bridge_softc *sc = ifp->if_softc;
+
+ BRIDGE_LOCK_ASSERT(sc);
+
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
+ return;
+
+ callout_stop(&sc->sc_brcallout);
+ bstp_stop(&sc->sc_stp);
+
+ bridge_rtflush(sc, IFBF_FLUSHDYN);
+
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+}
+
+/*
+ * bridge_enqueue:
+ *
+ * Enqueue a packet on a bridge member interface.
+ *
+ */
+static void
+bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m)
+{
+ int len, err = 0;
+ short mflags;
+ struct mbuf *m0;
+
+ len = m->m_pkthdr.len;
+ mflags = m->m_flags;
+
+ /* We may be sending a fragment so traverse the mbuf */
+ for (; m; m = m0) {
+ m0 = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+
+ /*
+ * If underlying interface can not do VLAN tag insertion itself
+ * then attach a packet tag that holds it.
+ */
+ if ((m->m_flags & M_VLANTAG) &&
+ (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
+ m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
+ if (m == NULL) {
+ if_printf(dst_ifp,
+ "unable to prepend VLAN header\n");
+ dst_ifp->if_oerrors++;
+ continue;
+ }
+ m->m_flags &= ~M_VLANTAG;
+ }
+
+ if (err == 0)
+ dst_ifp->if_transmit(dst_ifp, m);
+ }
+
+ if (err == 0) {
+ sc->sc_ifp->if_opackets++;
+ sc->sc_ifp->if_obytes += len;
+ if (mflags & M_MCAST)
+ sc->sc_ifp->if_omcasts++;
+ }
+}
+
+/*
+ * bridge_dummynet:
+ *
+ * Receive a queued packet from dummynet and pass it on to the output
+ * interface.
+ *
+ * The mbuf has the Ethernet header already attached.
+ */
+static void
+bridge_dummynet(struct mbuf *m, struct ifnet *ifp)
+{
+ struct bridge_softc *sc;
+
+ sc = ifp->if_bridge;
+
+ /*
+ * The packet didnt originate from a member interface. This should only
+ * ever happen if a member interface is removed while packets are
+ * queued for it.
+ */
+ if (sc == NULL) {
+ m_freem(m);
+ return;
+ }
+
+ if (PFIL_HOOKED(&V_inet_pfil_hook)
+#ifdef INET6
+ || PFIL_HOOKED(&V_inet6_pfil_hook)
+#endif
+ ) {
+ if (bridge_pfil(&m, sc->sc_ifp, ifp, PFIL_OUT) != 0)
+ return;
+ if (m == NULL)
+ return;
+ }
+
+ bridge_enqueue(sc, ifp, m);
+}
+
+/*
+ * bridge_output:
+ *
+ * Send output from a bridge member interface. This
+ * performs the bridging function for locally originated
+ * packets.
+ *
+ * The mbuf has the Ethernet header already attached. We must
+ * enqueue or free the mbuf before returning.
+ */
+static int
+bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
+ struct rtentry *rt)
+{
+ struct ether_header *eh;
+ struct ifnet *dst_if;
+ struct bridge_softc *sc;
+ uint16_t vlan;
+
+ if (m->m_len < ETHER_HDR_LEN) {
+ m = m_pullup(m, ETHER_HDR_LEN);
+ if (m == NULL)
+ return (0);
+ }
+
+ eh = mtod(m, struct ether_header *);
+ sc = ifp->if_bridge;
+ vlan = VLANTAGOF(m);
+
+ BRIDGE_LOCK(sc);
+
+ /*
+ * If bridge is down, but the original output interface is up,
+ * go ahead and send out that interface. Otherwise, the packet
+ * is dropped below.
+ */
+ if ((sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+ dst_if = ifp;
+ goto sendunicast;
+ }
+
+ /*
+ * If the packet is a multicast, or we don't know a better way to
+ * get there, send to all interfaces.
+ */
+ if (ETHER_IS_MULTICAST(eh->ether_dhost))
+ dst_if = NULL;
+ else
+ dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan);
+ if (dst_if == NULL) {
+ struct bridge_iflist *bif;
+ struct mbuf *mc;
+ int error = 0, used = 0;
+
+ bridge_span(sc, m);
+
+ BRIDGE_LOCK2REF(sc, error);
+ if (error) {
+ m_freem(m);
+ return (0);
+ }
+
+ LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+ dst_if = bif->bif_ifp;
+
+ if (dst_if->if_type == IFT_GIF)
+ continue;
+ if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0)
+ continue;
+
+ /*
+ * If this is not the original output interface,
+ * and the interface is participating in spanning
+ * tree, make sure the port is in a state that
+ * allows forwarding.
+ */
+ if (dst_if != ifp && (bif->bif_flags & IFBIF_STP) &&
+ bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING)
+ continue;
+
+ if (LIST_NEXT(bif, bif_next) == NULL) {
+ used = 1;
+ mc = m;
+ } else {
+ mc = m_copypacket(m, M_DONTWAIT);
+ if (mc == NULL) {
+ sc->sc_ifp->if_oerrors++;
+ continue;
+ }
+ }
+
+ bridge_enqueue(sc, dst_if, mc);
+ }
+ if (used == 0)
+ m_freem(m);
+ BRIDGE_UNREF(sc);
+ return (0);
+ }
+
+sendunicast:
+ /*
+ * XXX Spanning tree consideration here?
+ */
+
+ bridge_span(sc, m);
+ if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+ m_freem(m);
+ BRIDGE_UNLOCK(sc);
+ return (0);
+ }
+
+ BRIDGE_UNLOCK(sc);
+ bridge_enqueue(sc, dst_if, m);
+ return (0);
+}
+
+/*
+ * bridge_start:
+ *
+ * Start output on a bridge.
+ *
+ */
+static void
+bridge_start(struct ifnet *ifp)
+{
+ struct bridge_softc *sc;
+ struct mbuf *m;
+ struct ether_header *eh;
+ struct ifnet *dst_if;
+
+ sc = ifp->if_softc;
+
+ ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+ for (;;) {
+ IFQ_DEQUEUE(&ifp->if_snd, m);
+ if (m == 0)
+ break;
+ ETHER_BPF_MTAP(ifp, m);
+
+ eh = mtod(m, struct ether_header *);
+ dst_if = NULL;
+
+ BRIDGE_LOCK(sc);
+ if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
+ dst_if = bridge_rtlookup(sc, eh->ether_dhost, 1);
+ }
+
+ if (dst_if == NULL)
+ bridge_broadcast(sc, ifp, m, 0);
+ else {
+ BRIDGE_UNLOCK(sc);
+ bridge_enqueue(sc, dst_if, m);
+ }
+ }
+ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+}
+
+/*
+ * bridge_forward:
+ *
+ * The forwarding function of the bridge.
+ *
+ * NOTE: Releases the lock on return.
+ */
+static void
+bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
+ struct mbuf *m)
+{
+ struct bridge_iflist *dbif;
+ struct ifnet *src_if, *dst_if, *ifp;
+ struct ether_header *eh;
+ uint16_t vlan;
+ uint8_t *dst;
+ int error;
+
+ src_if = m->m_pkthdr.rcvif;
+ ifp = sc->sc_ifp;
+
+ ifp->if_ipackets++;
+ ifp->if_ibytes += m->m_pkthdr.len;
+ vlan = VLANTAGOF(m);
+
+ if ((sbif->bif_flags & IFBIF_STP) &&
+ sbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING)
+ goto drop;
+
+ eh = mtod(m, struct ether_header *);
+ dst = eh->ether_dhost;
+
+ /* If the interface is learning, record the address. */
+ if (sbif->bif_flags & IFBIF_LEARNING) {
+ error = bridge_rtupdate(sc, eh->ether_shost, vlan,
+ sbif, 0, IFBAF_DYNAMIC);
+ /*
+ * If the interface has addresses limits then deny any source
+ * that is not in the cache.
+ */
+ if (error && sbif->bif_addrmax)
+ goto drop;
+ }
+
+ if ((sbif->bif_flags & IFBIF_STP) != 0 &&
+ sbif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING)
+ goto drop;
+
+ /*
+ * At this point, the port either doesn't participate
+ * in spanning tree or it is in the forwarding state.
+ */
+
+ /*
+ * If the packet is unicast, destined for someone on
+ * "this" side of the bridge, drop it.
+ */
+ if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
+ dst_if = bridge_rtlookup(sc, dst, vlan);
+ if (src_if == dst_if)
+ goto drop;
+ } else {
+ /*
+ * Check if its a reserved multicast address, any address
+ * listed in 802.1D section 7.12.6 may not be forwarded by the
+ * bridge.
+ * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F
+ */
+ if (dst[0] == 0x01 && dst[1] == 0x80 &&
+ dst[2] == 0xc2 && dst[3] == 0x00 &&
+ dst[4] == 0x00 && dst[5] <= 0x0f)
+ goto drop;
+
+ /* ...forward it to all interfaces. */
+ ifp->if_imcasts++;
+ dst_if = NULL;
+ }
+
+ /*
+ * If we have a destination interface which is a member of our bridge,
+ * OR this is a unicast packet, push it through the bpf(4) machinery.
+ * For broadcast or multicast packets, don't bother because it will
+ * be reinjected into ether_input. We do this before we pass the packets
+ * through the pfil(9) framework, as it is possible that pfil(9) will
+ * drop the packet, or possibly modify it, making it difficult to debug
+ * firewall issues on the bridge.
+ */
+ if (dst_if != NULL || (m->m_flags & (M_BCAST | M_MCAST)) == 0)
+ ETHER_BPF_MTAP(ifp, m);
+
+ /* run the packet filter */
+ if (PFIL_HOOKED(&V_inet_pfil_hook)
+#ifdef INET6
+ || PFIL_HOOKED(&V_inet6_pfil_hook)
+#endif
+ ) {
+ BRIDGE_UNLOCK(sc);
+ if (bridge_pfil(&m, ifp, src_if, PFIL_IN) != 0)
+ return;
+ if (m == NULL)
+ return;
+ BRIDGE_LOCK(sc);
+ }
+
+ if (dst_if == NULL) {
+ bridge_broadcast(sc, src_if, m, 1);
+ return;
+ }
+
+ /*
+ * At this point, we're dealing with a unicast frame
+ * going to a different interface.
+ */
+ if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0)
+ goto drop;
+
+ dbif = bridge_lookup_member_if(sc, dst_if);
+ if (dbif == NULL)
+ /* Not a member of the bridge (anymore?) */
+ goto drop;
+
+ /* Private segments can not talk to each other */
+ if (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE)
+ goto drop;
+
+ if ((dbif->bif_flags & IFBIF_STP) &&
+ dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING)
+ goto drop;
+
+ BRIDGE_UNLOCK(sc);
+
+ if (PFIL_HOOKED(&V_inet_pfil_hook)
+#ifdef INET6
+ || PFIL_HOOKED(&V_inet6_pfil_hook)
+#endif
+ ) {
+ if (bridge_pfil(&m, ifp, dst_if, PFIL_OUT) != 0)
+ return;
+ if (m == NULL)
+ return;
+ }
+
+ bridge_enqueue(sc, dst_if, m);
+ return;
+
+drop:
+ BRIDGE_UNLOCK(sc);
+ m_freem(m);
+}
+
+/*
+ * bridge_input:
+ *
+ * Receive input from a member interface. Queue the packet for
+ * bridging if it is not for us.
+ */
+static struct mbuf *
+bridge_input(struct ifnet *ifp, struct mbuf *m)
+{
+ struct bridge_softc *sc = ifp->if_bridge;
+ struct bridge_iflist *bif, *bif2;
+ struct ifnet *bifp;
+ struct ether_header *eh;
+ struct mbuf *mc, *mc2;
+ uint16_t vlan;
+ int error;
+
+ if ((sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
+ return (m);
+
+ bifp = sc->sc_ifp;
+ vlan = VLANTAGOF(m);
+
+ /*
+ * Implement support for bridge monitoring. If this flag has been
+ * set on this interface, discard the packet once we push it through
+ * the bpf(4) machinery, but before we do, increment the byte and
+ * packet counters associated with this interface.
+ */
+ if ((bifp->if_flags & IFF_MONITOR) != 0) {
+ m->m_pkthdr.rcvif = bifp;
+ ETHER_BPF_MTAP(bifp, m);
+ bifp->if_ipackets++;
+ bifp->if_ibytes += m->m_pkthdr.len;
+ m_freem(m);
+ return (NULL);
+ }
+ BRIDGE_LOCK(sc);
+ bif = bridge_lookup_member_if(sc, ifp);
+ if (bif == NULL) {
+ BRIDGE_UNLOCK(sc);
+ return (m);
+ }
+
+ eh = mtod(m, struct ether_header *);
+
+ bridge_span(sc, m);
+
+ if (m->m_flags & (M_BCAST|M_MCAST)) {
+ /* Tap off 802.1D packets; they do not get forwarded. */
+ if (memcmp(eh->ether_dhost, bstp_etheraddr,
+ ETHER_ADDR_LEN) == 0) {
+ m = bstp_input(&bif->bif_stp, ifp, m);
+ if (m == NULL) {
+ BRIDGE_UNLOCK(sc);
+ return (NULL);
+ }
+ }
+
+ if ((bif->bif_flags & IFBIF_STP) &&
+ bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
+ BRIDGE_UNLOCK(sc);
+ return (m);
+ }
+
+ /*
+ * Make a deep copy of the packet and enqueue the copy
+ * for bridge processing; return the original packet for
+ * local processing.
+ */
+ mc = m_dup(m, M_DONTWAIT);
+ if (mc == NULL) {
+ BRIDGE_UNLOCK(sc);
+ return (m);
+ }
+
+ /* Perform the bridge forwarding function with the copy. */
+ bridge_forward(sc, bif, mc);
+
+ /*
+ * Reinject the mbuf as arriving on the bridge so we have a
+ * chance at claiming multicast packets. We can not loop back
+ * here from ether_input as a bridge is never a member of a
+ * bridge.
+ */
+ KASSERT(bifp->if_bridge == NULL,
+ ("loop created in bridge_input"));
+ mc2 = m_dup(m, M_DONTWAIT);
+ if (mc2 != NULL) {
+ /* Keep the layer3 header aligned */
+ int i = min(mc2->m_pkthdr.len, max_protohdr);
+ mc2 = m_copyup(mc2, i, ETHER_ALIGN);
+ }
+ if (mc2 != NULL) {
+ mc2->m_pkthdr.rcvif = bifp;
+ (*bifp->if_input)(bifp, mc2);
+ }
+
+ /* Return the original packet for local processing. */
+ return (m);
+ }
+
+ if ((bif->bif_flags & IFBIF_STP) &&
+ bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
+ BRIDGE_UNLOCK(sc);
+ return (m);
+ }
+
+#if (defined(INET) || defined(INET6))
+# define OR_CARP_CHECK_WE_ARE_DST(iface) \
+ || ((iface)->if_carp \
+ && (*carp_forus_p)((iface), eh->ether_dhost))
+# define OR_CARP_CHECK_WE_ARE_SRC(iface) \
+ || ((iface)->if_carp \
+ && (*carp_forus_p)((iface), eh->ether_shost))
+#else
+# define OR_CARP_CHECK_WE_ARE_DST(iface)
+# define OR_CARP_CHECK_WE_ARE_SRC(iface)
+#endif
+
+#ifdef INET6
+# define OR_PFIL_HOOKED_INET6 \
+ || PFIL_HOOKED(&V_inet6_pfil_hook)
+#else
+# define OR_PFIL_HOOKED_INET6
+#endif
+
+#define GRAB_OUR_PACKETS(iface) \
+ if ((iface)->if_type == IFT_GIF) \
+ continue; \
+ /* It is destined for us. */ \
+ if (memcmp(IF_LLADDR((iface)), eh->ether_dhost, ETHER_ADDR_LEN) == 0 \
+ OR_CARP_CHECK_WE_ARE_DST((iface)) \
+ ) { \
+ if ((iface)->if_type == IFT_BRIDGE) { \
+ ETHER_BPF_MTAP(iface, m); \
+ iface->if_ipackets++; \
+ /* Filter on the physical interface. */ \
+ if (pfil_local_phys && \
+ (PFIL_HOOKED(&V_inet_pfil_hook) \
+ OR_PFIL_HOOKED_INET6)) { \
+ if (bridge_pfil(&m, NULL, ifp, \
+ PFIL_IN) != 0 || m == NULL) { \
+ BRIDGE_UNLOCK(sc); \
+ return (NULL); \
+ } \
+ } \
+ } \
+ if (bif->bif_flags & IFBIF_LEARNING) { \
+ error = bridge_rtupdate(sc, eh->ether_shost, \
+ vlan, bif, 0, IFBAF_DYNAMIC); \
+ if (error && bif->bif_addrmax) { \
+ BRIDGE_UNLOCK(sc); \
+ m_freem(m); \
+ return (NULL); \
+ } \
+ } \
+ m->m_pkthdr.rcvif = iface; \
+ BRIDGE_UNLOCK(sc); \
+ return (m); \
+ } \
+ \
+ /* We just received a packet that we sent out. */ \
+ if (memcmp(IF_LLADDR((iface)), eh->ether_shost, ETHER_ADDR_LEN) == 0 \
+ OR_CARP_CHECK_WE_ARE_SRC((iface)) \
+ ) { \
+ BRIDGE_UNLOCK(sc); \
+ m_freem(m); \
+ return (NULL); \
+ }
+
+ /*
+ * Unicast. Make sure it's not for the bridge.
+ */
+ do { GRAB_OUR_PACKETS(bifp) } while (0);
+
+ /*
+ * Give a chance for ifp at first priority. This will help when the
+ * packet comes through the interface like VLAN's with the same MACs
+ * on several interfaces from the same bridge. This also will save
+ * some CPU cycles in case the destination interface and the input
+ * interface (eq ifp) are the same.
+ */
+ do { GRAB_OUR_PACKETS(ifp) } while (0);
+
+ /* Now check the all bridge members. */
+ LIST_FOREACH(bif2, &sc->sc_iflist, bif_next) {
+ GRAB_OUR_PACKETS(bif2->bif_ifp)
+ }
+
+#undef OR_CARP_CHECK_WE_ARE_DST
+#undef OR_CARP_CHECK_WE_ARE_SRC
+#undef OR_PFIL_HOOKED_INET6
+#undef GRAB_OUR_PACKETS
+
+ /* Perform the bridge forwarding function. */
+ bridge_forward(sc, bif, m);
+
+ return (NULL);
+}
+
+/*
+ * bridge_broadcast:
+ *
+ * Send a frame to all interfaces that are members of
+ * the bridge, except for the one on which the packet
+ * arrived.
+ *
+ * NOTE: Releases the lock on return.
+ */
+static void
+bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
+ struct mbuf *m, int runfilt)
+{
+ struct bridge_iflist *dbif, *sbif;
+ struct mbuf *mc;
+ struct ifnet *dst_if;
+ int error = 0, used = 0, i;
+
+ sbif = bridge_lookup_member_if(sc, src_if);
+
+ BRIDGE_LOCK2REF(sc, error);
+ if (error) {
+ m_freem(m);
+ return;
+ }
+
+ /* Filter on the bridge interface before broadcasting */
+ if (runfilt && (PFIL_HOOKED(&V_inet_pfil_hook)
+#ifdef INET6
+ || PFIL_HOOKED(&V_inet6_pfil_hook)
+#endif
+ )) {
+ if (bridge_pfil(&m, sc->sc_ifp, NULL, PFIL_OUT) != 0)
+ goto out;
+ if (m == NULL)
+ goto out;
+ }
+
+ LIST_FOREACH(dbif, &sc->sc_iflist, bif_next) {
+ dst_if = dbif->bif_ifp;
+ if (dst_if == src_if)
+ continue;
+
+ /* Private segments can not talk to each other */
+ if (sbif && (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE))
+ continue;
+
+ if ((dbif->bif_flags & IFBIF_STP) &&
+ dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING)
+ continue;
+
+ if ((dbif->bif_flags & IFBIF_DISCOVER) == 0 &&
+ (m->m_flags & (M_BCAST|M_MCAST)) == 0)
+ continue;
+
+ if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0)
+ continue;
+
+ if (LIST_NEXT(dbif, bif_next) == NULL) {
+ mc = m;
+ used = 1;
+ } else {
+ mc = m_dup(m, M_DONTWAIT);
+ if (mc == NULL) {
+ sc->sc_ifp->if_oerrors++;
+ continue;
+ }
+ }
+
+ /*
+ * Filter on the output interface. Pass a NULL bridge interface
+ * pointer so we do not redundantly filter on the bridge for
+ * each interface we broadcast on.
+ */
+ if (runfilt && (PFIL_HOOKED(&V_inet_pfil_hook)
+#ifdef INET6
+ || PFIL_HOOKED(&V_inet6_pfil_hook)
+#endif
+ )) {
+ if (used == 0) {
+ /* Keep the layer3 header aligned */
+ i = min(mc->m_pkthdr.len, max_protohdr);
+ mc = m_copyup(mc, i, ETHER_ALIGN);
+ if (mc == NULL) {
+ sc->sc_ifp->if_oerrors++;
+ continue;
+ }
+ }
+ if (bridge_pfil(&mc, NULL, dst_if, PFIL_OUT) != 0)
+ continue;
+ if (mc == NULL)
+ continue;
+ }
+
+ bridge_enqueue(sc, dst_if, mc);
+ }
+ if (used == 0)
+ m_freem(m);
+
+out:
+ BRIDGE_UNREF(sc);
+}
+
+/*
+ * bridge_span:
+ *
+ * Duplicate a packet out one or more interfaces that are in span mode,
+ * the original mbuf is unmodified.
+ */
+static void
+bridge_span(struct bridge_softc *sc, struct mbuf *m)
+{
+ struct bridge_iflist *bif;
+ struct ifnet *dst_if;
+ struct mbuf *mc;
+
+ if (LIST_EMPTY(&sc->sc_spanlist))
+ return;
+
+ LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) {
+ dst_if = bif->bif_ifp;
+
+ if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0)
+ continue;
+
+ mc = m_copypacket(m, M_DONTWAIT);
+ if (mc == NULL) {
+ sc->sc_ifp->if_oerrors++;
+ continue;
+ }
+
+ bridge_enqueue(sc, dst_if, mc);
+ }
+}
+
+/*
+ * bridge_rtupdate:
+ *
+ * Add a bridge routing entry.
+ */
+static int
+bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan,
+ struct bridge_iflist *bif, int setflags, uint8_t flags)
+{
+ struct bridge_rtnode *brt;
+ int error;
+
+ BRIDGE_LOCK_ASSERT(sc);
+
+ /* Check the source address is valid and not multicast. */
+ if (ETHER_IS_MULTICAST(dst) ||
+ (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
+ dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0)
+ return (EINVAL);
+
+ /* 802.1p frames map to vlan 1 */
+ if (vlan == 0)
+ vlan = 1;
+
+ /*
+ * A route for this destination might already exist. If so,
+ * update it, otherwise create a new one.
+ */
+ if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) {
+ if (sc->sc_brtcnt >= sc->sc_brtmax) {
+ sc->sc_brtexceeded++;
+ return (ENOSPC);
+ }
+ /* Check per interface address limits (if enabled) */
+ if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) {
+ bif->bif_addrexceeded++;
+ return (ENOSPC);
+ }
+
+ /*
+ * Allocate a new bridge forwarding node, and
+ * initialize the expiration time and Ethernet
+ * address.
+ */
+ brt = uma_zalloc(bridge_rtnode_zone, M_NOWAIT | M_ZERO);
+ if (brt == NULL)
+ return (ENOMEM);
+
+ if (bif->bif_flags & IFBIF_STICKY)
+ brt->brt_flags = IFBAF_STICKY;
+ else
+ brt->brt_flags = IFBAF_DYNAMIC;
+
+ memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
+ brt->brt_vlan = vlan;
+
+ if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
+ uma_zfree(bridge_rtnode_zone, brt);
+ return (error);
+ }
+ brt->brt_dst = bif;
+ bif->bif_addrcnt++;
+ }
+
+ if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
+ brt->brt_dst != bif) {
+ brt->brt_dst->bif_addrcnt--;
+ brt->brt_dst = bif;
+ brt->brt_dst->bif_addrcnt++;
+ }
+
+ if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
+ brt->brt_expire = time_uptime + sc->sc_brttimeout;
+ if (setflags)
+ brt->brt_flags = flags;
+
+ return (0);
+}
+
+/*
+ * bridge_rtlookup:
+ *
+ * Lookup the destination interface for an address.
+ */
+static struct ifnet *
+bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
+{
+ struct bridge_rtnode *brt;
+
+ BRIDGE_LOCK_ASSERT(sc);
+
+ if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL)
+ return (NULL);
+
+ return (brt->brt_ifp);
+}
+
+/*
+ * bridge_rttrim:
+ *
+ * Trim the routine table so that we have a number
+ * of routing entries less than or equal to the
+ * maximum number.
+ */
+static void
+bridge_rttrim(struct bridge_softc *sc)
+{
+ struct bridge_rtnode *brt, *nbrt;
+
+ BRIDGE_LOCK_ASSERT(sc);
+
+ /* Make sure we actually need to do this. */
+ if (sc->sc_brtcnt <= sc->sc_brtmax)
+ return;
+
+ /* Force an aging cycle; this might trim enough addresses. */
+ bridge_rtage(sc);
+ if (sc->sc_brtcnt <= sc->sc_brtmax)
+ return;
+
+ LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
+ if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
+ bridge_rtnode_destroy(sc, brt);
+ if (sc->sc_brtcnt <= sc->sc_brtmax)
+ return;
+ }
+ }
+}
+
+/*
+ * bridge_timer:
+ *
+ * Aging timer for the bridge.
+ */
+static void
+bridge_timer(void *arg)
+{
+ struct bridge_softc *sc = arg;
+
+ BRIDGE_LOCK_ASSERT(sc);
+
+ bridge_rtage(sc);
+
+ if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
+ callout_reset(&sc->sc_brcallout,
+ bridge_rtable_prune_period * hz, bridge_timer, sc);
+}
+
+/*
+ * bridge_rtage:
+ *
+ * Perform an aging cycle.
+ */
+static void
+bridge_rtage(struct bridge_softc *sc)
+{
+ struct bridge_rtnode *brt, *nbrt;
+
+ BRIDGE_LOCK_ASSERT(sc);
+
+ LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
+ if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
+ if (time_uptime >= brt->brt_expire)
+ bridge_rtnode_destroy(sc, brt);
+ }
+ }
+}
+
+/*
+ * bridge_rtflush:
+ *
+ * Remove all dynamic addresses from the bridge.
+ */
+static void
+bridge_rtflush(struct bridge_softc *sc, int full)
+{
+ struct bridge_rtnode *brt, *nbrt;
+
+ BRIDGE_LOCK_ASSERT(sc);
+
+ LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
+ if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
+ bridge_rtnode_destroy(sc, brt);
+ }
+}
+
+/*
+ * bridge_rtdaddr:
+ *
+ * Remove an address from the table.
+ */
+static int
+bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
+{
+ struct bridge_rtnode *brt;
+ int found = 0;
+
+ BRIDGE_LOCK_ASSERT(sc);
+
+ /*
+ * If vlan is zero then we want to delete for all vlans so the lookup
+ * may return more than one.
+ */
+ while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) {
+ bridge_rtnode_destroy(sc, brt);
+ found = 1;
+ }
+
+ return (found ? 0 : ENOENT);
+}
+
+/*
+ * bridge_rtdelete:
+ *
+ * Delete routes to a speicifc member interface.
+ */
+static void
+bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
+{
+ struct bridge_rtnode *brt, *nbrt;
+
+ BRIDGE_LOCK_ASSERT(sc);
+
+ LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
+ if (brt->brt_ifp == ifp && (full ||
+ (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC))
+ bridge_rtnode_destroy(sc, brt);
+ }
+}
+
+/*
+ * bridge_rtable_init:
+ *
+ * Initialize the route table for this bridge.
+ */
+static int
+bridge_rtable_init(struct bridge_softc *sc)
+{
+ int i;
+
+ sc->sc_rthash = malloc(sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE,
+ M_DEVBUF, M_NOWAIT);
+ if (sc->sc_rthash == NULL)
+ return (ENOMEM);
+
+ for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
+ LIST_INIT(&sc->sc_rthash[i]);
+
+ sc->sc_rthash_key = arc4random();
+
+ LIST_INIT(&sc->sc_rtlist);
+
+ return (0);
+}
+
+/*
+ * bridge_rtable_fini:
+ *
+ * Deconstruct the route table for this bridge.
+ */
+static void
+bridge_rtable_fini(struct bridge_softc *sc)
+{
+
+ KASSERT(sc->sc_brtcnt == 0,
+ ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt));
+ free(sc->sc_rthash, M_DEVBUF);
+}
+
+/*
+ * The following hash function is adapted from "Hash Functions" by Bob Jenkins
+ * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
+ */
+#define mix(a, b, c) \
+do { \
+ a -= b; a -= c; a ^= (c >> 13); \
+ b -= c; b -= a; b ^= (a << 8); \
+ c -= a; c -= b; c ^= (b >> 13); \
+ a -= b; a -= c; a ^= (c >> 12); \
+ b -= c; b -= a; b ^= (a << 16); \
+ c -= a; c -= b; c ^= (b >> 5); \
+ a -= b; a -= c; a ^= (c >> 3); \
+ b -= c; b -= a; b ^= (a << 10); \
+ c -= a; c -= b; c ^= (b >> 15); \
+} while (/*CONSTCOND*/0)
+
+static __inline uint32_t
+bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
+{
+ uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
+
+ b += addr[5] << 8;
+ b += addr[4];
+ a += addr[3] << 24;
+ a += addr[2] << 16;
+ a += addr[1] << 8;
+ a += addr[0];
+
+ mix(a, b, c);
+
+ return (c & BRIDGE_RTHASH_MASK);
+}
+
+#undef mix
+
+static int
+bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b)
+{
+ int i, d;
+
+ for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
+ d = ((int)a[i]) - ((int)b[i]);
+ }
+
+ return (d);
+}
+
+/*
+ * bridge_rtnode_lookup:
+ *
+ * Look up a bridge route node for the specified destination. Compare the
+ * vlan id or if zero then just return the first match.
+ */
+static struct bridge_rtnode *
+bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
+{
+ struct bridge_rtnode *brt;
+ uint32_t hash;
+ int dir;
+
+ BRIDGE_LOCK_ASSERT(sc);
+
+ hash = bridge_rthash(sc, addr);
+ LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
+ dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
+ if (dir == 0 && (brt->brt_vlan == vlan || vlan == 0))
+ return (brt);
+ if (dir > 0)
+ return (NULL);
+ }
+
+ return (NULL);
+}
+
+/*
+ * bridge_rtnode_insert:
+ *
+ * Insert the specified bridge node into the route table. We
+ * assume the entry is not already in the table.
+ */
+static int
+bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
+{
+ struct bridge_rtnode *lbrt;
+ uint32_t hash;
+ int dir;
+
+ BRIDGE_LOCK_ASSERT(sc);
+
+ hash = bridge_rthash(sc, brt->brt_addr);
+
+ lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
+ if (lbrt == NULL) {
+ LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
+ goto out;
+ }
+
+ do {
+ dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
+ if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan)
+ return (EEXIST);
+ if (dir > 0) {
+ LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
+ goto out;
+ }
+ if (LIST_NEXT(lbrt, brt_hash) == NULL) {
+ LIST_INSERT_AFTER(lbrt, brt, brt_hash);
+ goto out;
+ }
+ lbrt = LIST_NEXT(lbrt, brt_hash);
+ } while (lbrt != NULL);
+
+#ifdef DIAGNOSTIC
+ panic("bridge_rtnode_insert: impossible");
+#endif
+
+out:
+ LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
+ sc->sc_brtcnt++;
+
+ return (0);
+}
+
+/*
+ * bridge_rtnode_destroy:
+ *
+ * Destroy a bridge rtnode.
+ */
+static void
+bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
+{
+ BRIDGE_LOCK_ASSERT(sc);
+
+ LIST_REMOVE(brt, brt_hash);
+
+ LIST_REMOVE(brt, brt_list);
+ sc->sc_brtcnt--;
+ brt->brt_dst->bif_addrcnt--;
+ uma_zfree(bridge_rtnode_zone, brt);
+}
+
+/*
+ * bridge_rtable_expire:
+ *
+ * Set the expiry time for all routes on an interface.
+ */
+static void
+bridge_rtable_expire(struct ifnet *ifp, int age)
+{
+ struct bridge_softc *sc = ifp->if_bridge;
+ struct bridge_rtnode *brt;
+
+ BRIDGE_LOCK(sc);
+
+ /*
+ * If the age is zero then flush, otherwise set all the expiry times to
+ * age for the interface
+ */
+ if (age == 0)
+ bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN);
+ else {
+ LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
+ /* Cap the expiry time to 'age' */
+ if (brt->brt_ifp == ifp &&
+ brt->brt_expire > time_uptime + age &&
+ (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
+ brt->brt_expire = time_uptime + age;
+ }
+ }
+ BRIDGE_UNLOCK(sc);
+}
+
+/*
+ * bridge_state_change:
+ *
+ * Callback from the bridgestp code when a port changes states.
+ */
+static void
+bridge_state_change(struct ifnet *ifp, int state)
+{
+ struct bridge_softc *sc = ifp->if_bridge;
+ static const char *stpstates[] = {
+ "disabled",
+ "listening",
+ "learning",
+ "forwarding",
+ "blocking",
+ "discarding"
+ };
+
+ if (log_stp)
+ log(LOG_NOTICE, "%s: state changed to %s on %s\n",
+ sc->sc_ifp->if_xname, stpstates[state], ifp->if_xname);
+}
+
+/*
+ * Send bridge packets through pfil if they are one of the types pfil can deal
+ * with, or if they are ARP or REVARP. (pfil will pass ARP and REVARP without
+ * question.) If *bifp or *ifp are NULL then packet filtering is skipped for
+ * that interface.
+ */
+static int
+bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
+{
+ int snap, error, i, hlen;
+ struct ether_header *eh1, eh2;
+ struct ip_fw_args args;
+ struct ip *ip;
+ struct llc llc1;
+ u_int16_t ether_type;
+
+ snap = 0;
+ error = -1; /* Default error if not error == 0 */
+
+#if 0
+ /* we may return with the IP fields swapped, ensure its not shared */
+ KASSERT(M_WRITABLE(*mp), ("%s: modifying a shared mbuf", __func__));
+#endif
+
+ if (pfil_bridge == 0 && pfil_member == 0 && pfil_ipfw == 0)
+ return (0); /* filtering is disabled */
+
+ i = min((*mp)->m_pkthdr.len, max_protohdr);
+ if ((*mp)->m_len < i) {
+ *mp = m_pullup(*mp, i);
+ if (*mp == NULL) {
+ printf("%s: m_pullup failed\n", __func__);
+ return (-1);
+ }
+ }
+
+ eh1 = mtod(*mp, struct ether_header *);
+ ether_type = ntohs(eh1->ether_type);
+
+ /*
+ * Check for SNAP/LLC.
+ */
+ if (ether_type < ETHERMTU) {
+ struct llc *llc2 = (struct llc *)(eh1 + 1);
+
+ if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
+ llc2->llc_dsap == LLC_SNAP_LSAP &&
+ llc2->llc_ssap == LLC_SNAP_LSAP &&
+ llc2->llc_control == LLC_UI) {
+ ether_type = htons(llc2->llc_un.type_snap.ether_type);
+ snap = 1;
+ }
+ }
+
+ /*
+ * If we're trying to filter bridge traffic, don't look at anything
+ * other than IP and ARP traffic. If the filter doesn't understand
+ * IPv6, don't allow IPv6 through the bridge either. This is lame
+ * since if we really wanted, say, an AppleTalk filter, we are hosed,
+ * but of course we don't have an AppleTalk filter to begin with.
+ * (Note that since pfil doesn't understand ARP it will pass *ALL*
+ * ARP traffic.)
+ */
+ switch (ether_type) {
+ case ETHERTYPE_ARP:
+ case ETHERTYPE_REVARP:
+ if (pfil_ipfw_arp == 0)
+ return (0); /* Automatically pass */
+ break;
+
+ case ETHERTYPE_IP:
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+#endif /* INET6 */
+ break;
+ default:
+ /*
+ * Check to see if the user wants to pass non-ip
+ * packets, these will not be checked by pfil(9) and
+ * passed unconditionally so the default is to drop.
+ */
+ if (pfil_onlyip)
+ goto bad;
+ }
+
+ /* Strip off the Ethernet header and keep a copy. */
+ m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t) &eh2);
+ m_adj(*mp, ETHER_HDR_LEN);
+
+ /* Strip off snap header, if present */
+ if (snap) {
+ m_copydata(*mp, 0, sizeof(struct llc), (caddr_t) &llc1);
+ m_adj(*mp, sizeof(struct llc));
+ }
+
+ /*
+ * Check the IP header for alignment and errors
+ */
+ if (dir == PFIL_IN) {
+ switch (ether_type) {
+ case ETHERTYPE_IP:
+ error = bridge_ip_checkbasic(mp);
+ break;
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ error = bridge_ip6_checkbasic(mp);
+ break;
+#endif /* INET6 */
+ default:
+ error = 0;
+ }
+ if (error)
+ goto bad;
+ }
+
+ /* XXX this section is also in if_ethersubr.c */
+ // XXX PFIL_OUT or DIR_OUT ?
+ if (V_ip_fw_chk_ptr && pfil_ipfw != 0 &&
+ dir == PFIL_OUT && ifp != NULL) {
+ struct m_tag *mtag;
+
+ error = -1;
+ /* fetch the start point from existing tags, if any */
+ mtag = m_tag_locate(*mp, MTAG_IPFW_RULE, 0, NULL);
+ if (mtag == NULL) {
+ args.rule.slot = 0;
+ } else {
+ struct ipfw_rule_ref *r;
+
+ /* XXX can we free the tag after use ? */
+ mtag->m_tag_id = PACKET_TAG_NONE;
+ r = (struct ipfw_rule_ref *)(mtag + 1);
+ /* packet already partially processed ? */
+ if (r->info & IPFW_ONEPASS)
+ goto ipfwpass;
+ args.rule = *r;
+ }
+
+ args.m = *mp;
+ args.oif = ifp;
+ args.next_hop = NULL;
+ args.eh = &eh2;
+ args.inp = NULL; /* used by ipfw uid/gid/jail rules */
+ i = V_ip_fw_chk_ptr(&args);
+ *mp = args.m;
+
+ if (*mp == NULL)
+ return (error);
+
+ if (ip_dn_io_ptr && (i == IP_FW_DUMMYNET)) {
+
+ /* put the Ethernet header back on */
+ M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT);
+ if (*mp == NULL)
+ return (error);
+ bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
+
+ /*
+ * Pass the pkt to dummynet, which consumes it. The
+ * packet will return to us via bridge_dummynet().
+ */
+ args.oif = ifp;
+ ip_dn_io_ptr(mp, DIR_FWD | PROTO_IFB, &args);
+ return (error);
+ }
+
+ if (i != IP_FW_PASS) /* drop */
+ goto bad;
+ }
+
+ipfwpass:
+ error = 0;
+
+ /*
+ * Run the packet through pfil
+ */
+ switch (ether_type) {
+ case ETHERTYPE_IP:
+ /*
+ * before calling the firewall, swap fields the same as
+ * IP does. here we assume the header is contiguous
+ */
+ ip = mtod(*mp, struct ip *);
+
+ ip->ip_len = ntohs(ip->ip_len);
+ ip->ip_off = ntohs(ip->ip_off);
+
+ /*
+ * Run pfil on the member interface and the bridge, both can
+ * be skipped by clearing pfil_member or pfil_bridge.
+ *
+ * Keep the order:
+ * in_if -> bridge_if -> out_if
+ */
+ if (pfil_bridge && dir == PFIL_OUT && bifp != NULL)
+ error = pfil_run_hooks(&V_inet_pfil_hook, mp, bifp,
+ dir, NULL);
+
+ if (*mp == NULL || error != 0) /* filter may consume */
+ break;
+
+ if (pfil_member && ifp != NULL)
+ error = pfil_run_hooks(&V_inet_pfil_hook, mp, ifp,
+ dir, NULL);
+
+ if (*mp == NULL || error != 0) /* filter may consume */
+ break;
+
+ if (pfil_bridge && dir == PFIL_IN && bifp != NULL)
+ error = pfil_run_hooks(&V_inet_pfil_hook, mp, bifp,
+ dir, NULL);
+
+ if (*mp == NULL || error != 0) /* filter may consume */
+ break;
+
+ /* check if we need to fragment the packet */
+ if (pfil_member && ifp != NULL && dir == PFIL_OUT) {
+ i = (*mp)->m_pkthdr.len;
+ if (i > ifp->if_mtu) {
+ error = bridge_fragment(ifp, *mp, &eh2, snap,
+ &llc1);
+ return (error);
+ }
+ }
+
+ /* Recalculate the ip checksum and restore byte ordering */
+ ip = mtod(*mp, struct ip *);
+ hlen = ip->ip_hl << 2;
+ if (hlen < sizeof(struct ip))
+ goto bad;
+ if (hlen > (*mp)->m_len) {
+ if ((*mp = m_pullup(*mp, hlen)) == 0)
+ goto bad;
+ ip = mtod(*mp, struct ip *);
+ if (ip == NULL)
+ goto bad;
+ }
+ ip->ip_len = htons(ip->ip_len);
+ ip->ip_off = htons(ip->ip_off);
+ ip->ip_sum = 0;
+ if (hlen == sizeof(struct ip))
+ ip->ip_sum = in_cksum_hdr(ip);
+ else
+ ip->ip_sum = in_cksum(*mp, hlen);
+
+ break;
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ if (pfil_bridge && dir == PFIL_OUT && bifp != NULL)
+ error = pfil_run_hooks(&V_inet6_pfil_hook, mp, bifp,
+ dir, NULL);
+
+ if (*mp == NULL || error != 0) /* filter may consume */
+ break;
+
+ if (pfil_member && ifp != NULL)
+ error = pfil_run_hooks(&V_inet6_pfil_hook, mp, ifp,
+ dir, NULL);
+
+ if (*mp == NULL || error != 0) /* filter may consume */
+ break;
+
+ if (pfil_bridge && dir == PFIL_IN && bifp != NULL)
+ error = pfil_run_hooks(&V_inet6_pfil_hook, mp, bifp,
+ dir, NULL);
+ break;
+#endif
+ default:
+ error = 0;
+ break;
+ }
+
+ if (*mp == NULL)
+ return (error);
+ if (error != 0)
+ goto bad;
+
+ error = -1;
+
+ /*
+ * Finally, put everything back the way it was and return
+ */
+ if (snap) {
+ M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT);
+ if (*mp == NULL)
+ return (error);
+ bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
+ }
+
+ M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT);
+ if (*mp == NULL)
+ return (error);
+ bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
+
+ return (0);
+
+bad:
+ m_freem(*mp);
+ *mp = NULL;
+ return (error);
+}
+
+/*
+ * Perform basic checks on header size since
+ * pfil assumes ip_input has already processed
+ * it for it. Cut-and-pasted from ip_input.c.
+ * Given how simple the IPv6 version is,
+ * does the IPv4 version really need to be
+ * this complicated?
+ *
+ * XXX Should we update ipstat here, or not?
+ * XXX Right now we update ipstat but not
+ * XXX csum_counter.
+ */
+static int
+bridge_ip_checkbasic(struct mbuf **mp)
+{
+ struct mbuf *m = *mp;
+ struct ip *ip;
+ int len, hlen;
+ u_short sum;
+
+ if (*mp == NULL)
+ return (-1);
+
+ if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
+ if ((m = m_copyup(m, sizeof(struct ip),
+ (max_linkhdr + 3) & ~3)) == NULL) {
+ /* XXXJRT new stat, please */
+ KMOD_IPSTAT_INC(ips_toosmall);
+ goto bad;
+ }
+ } else if (__predict_false(m->m_len < sizeof (struct ip))) {
+ if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
+ KMOD_IPSTAT_INC(ips_toosmall);
+ goto bad;
+ }
+ }
+ ip = mtod(m, struct ip *);
+ if (ip == NULL) goto bad;
+
+ if (ip->ip_v != IPVERSION) {
+ KMOD_IPSTAT_INC(ips_badvers);
+ goto bad;
+ }
+ hlen = ip->ip_hl << 2;
+ if (hlen < sizeof(struct ip)) { /* minimum header length */
+ KMOD_IPSTAT_INC(ips_badhlen);
+ goto bad;
+ }
+ if (hlen > m->m_len) {
+ if ((m = m_pullup(m, hlen)) == 0) {
+ KMOD_IPSTAT_INC(ips_badhlen);
+ goto bad;
+ }
+ ip = mtod(m, struct ip *);
+ if (ip == NULL) goto bad;
+ }
+
+ if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
+ sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
+ } else {
+ if (hlen == sizeof(struct ip)) {
+ sum = in_cksum_hdr(ip);
+ } else {
+ sum = in_cksum(m, hlen);
+ }
+ }
+ if (sum) {
+ KMOD_IPSTAT_INC(ips_badsum);
+ goto bad;
+ }
+
+ /* Retrieve the packet length. */
+ len = ntohs(ip->ip_len);
+
+ /*
+ * Check for additional length bogosity
+ */
+ if (len < hlen) {
+ KMOD_IPSTAT_INC(ips_badlen);
+ goto bad;
+ }
+
+ /*
+ * Check that the amount of data in the buffers
+ * is as at least much as the IP header would have us expect.
+ * Drop packet if shorter than we expect.
+ */
+ if (m->m_pkthdr.len < len) {
+ KMOD_IPSTAT_INC(ips_tooshort);
+ goto bad;
+ }
+
+ /* Checks out, proceed */
+ *mp = m;
+ return (0);
+
+bad:
+ *mp = m;
+ return (-1);
+}
+
+#ifdef INET6
+/*
+ * Same as above, but for IPv6.
+ * Cut-and-pasted from ip6_input.c.
+ * XXX Should we update ip6stat, or not?
+ */
+static int
+bridge_ip6_checkbasic(struct mbuf **mp)
+{
+ struct mbuf *m = *mp;
+ struct ip6_hdr *ip6;
+
+ /*
+ * If the IPv6 header is not aligned, slurp it up into a new
+ * mbuf with space for link headers, in the event we forward
+ * it. Otherwise, if it is aligned, make sure the entire base
+ * IPv6 header is in the first mbuf of the chain.
+ */
+ if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
+ struct ifnet *inifp = m->m_pkthdr.rcvif;
+ if ((m = m_copyup(m, sizeof(struct ip6_hdr),
+ (max_linkhdr + 3) & ~3)) == NULL) {
+ /* XXXJRT new stat, please */
+ V_ip6stat.ip6s_toosmall++;
+ in6_ifstat_inc(inifp, ifs6_in_hdrerr);
+ goto bad;
+ }
+ } else if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
+ struct ifnet *inifp = m->m_pkthdr.rcvif;
+ if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
+ V_ip6stat.ip6s_toosmall++;
+ in6_ifstat_inc(inifp, ifs6_in_hdrerr);
+ goto bad;
+ }
+ }
+
+ ip6 = mtod(m, struct ip6_hdr *);
+
+ if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
+ V_ip6stat.ip6s_badvers++;
+ in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
+ goto bad;
+ }
+
+ /* Checks out, proceed */
+ *mp = m;
+ return (0);
+
+bad:
+ *mp = m;
+ return (-1);
+}
+#endif /* INET6 */
+
+/*
+ * bridge_fragment:
+ *
+ * Return a fragmented mbuf chain.
+ */
+static int
+bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
+ int snap, struct llc *llc)
+{
+ struct mbuf *m0;
+ struct ip *ip;
+ int error = -1;
+
+ if (m->m_len < sizeof(struct ip) &&
+ (m = m_pullup(m, sizeof(struct ip))) == NULL)
+ goto out;
+ ip = mtod(m, struct ip *);
+
+ error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist,
+ CSUM_DELAY_IP);
+ if (error)
+ goto out;
+
+ /* walk the chain and re-add the Ethernet header */
+ for (m0 = m; m0; m0 = m0->m_nextpkt) {
+ if (error == 0) {
+ if (snap) {
+ M_PREPEND(m0, sizeof(struct llc), M_DONTWAIT);
+ if (m0 == NULL) {
+ error = ENOBUFS;
+ continue;
+ }
+ bcopy(llc, mtod(m0, caddr_t),
+ sizeof(struct llc));
+ }
+ M_PREPEND(m0, ETHER_HDR_LEN, M_DONTWAIT);
+ if (m0 == NULL) {
+ error = ENOBUFS;
+ continue;
+ }
+ bcopy(eh, mtod(m0, caddr_t), ETHER_HDR_LEN);
+ } else
+ m_freem(m);
+ }
+
+ if (error == 0)
+ KMOD_IPSTAT_INC(ips_fragmented);
+
+ return (error);
+
+out:
+ if (m != NULL)
+ m_freem(m);
+ return (error);
+}
diff --git a/freebsd/sys/net/if_bridgevar.h b/freebsd/sys/net/if_bridgevar.h
new file mode 100644
index 00000000..642cc98d
--- /dev/null
+++ b/freebsd/sys/net/if_bridgevar.h
@@ -0,0 +1,328 @@
+/* $NetBSD: if_bridgevar.h,v 1.4 2003/07/08 07:13:50 itojun Exp $ */
+
+/*
+ * Copyright 2001 Wasabi Systems, Inc.
+ * All rights reserved.
+ *
+ * Written by Jason R. Thorpe for Wasabi Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed for the NetBSD Project by
+ * Wasabi Systems, Inc.
+ * 4. The name of Wasabi Systems, Inc. may not be used to endorse
+ * or promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Jason L. Wright
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * OpenBSD: if_bridge.h,v 1.14 2001/03/22 03:48:29 jason Exp
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * Data structure and control definitions for bridge interfaces.
+ */
+
+#include <freebsd/sys/callout.h>
+#include <freebsd/sys/queue.h>
+#include <freebsd/sys/condvar.h>
+
+/*
+ * Commands used in the SIOCSDRVSPEC ioctl. Note the lookup of the
+ * bridge interface itself is keyed off the ifdrv structure.
+ */
+#define BRDGADD 0 /* add bridge member (ifbreq) */
+#define BRDGDEL 1 /* delete bridge member (ifbreq) */
+#define BRDGGIFFLGS 2 /* get member if flags (ifbreq) */
+#define BRDGSIFFLGS 3 /* set member if flags (ifbreq) */
+#define BRDGSCACHE 4 /* set cache size (ifbrparam) */
+#define BRDGGCACHE 5 /* get cache size (ifbrparam) */
+#define BRDGGIFS 6 /* get member list (ifbifconf) */
+#define BRDGRTS 7 /* get address list (ifbaconf) */
+#define BRDGSADDR 8 /* set static address (ifbareq) */
+#define BRDGSTO 9 /* set cache timeout (ifbrparam) */
+#define BRDGGTO 10 /* get cache timeout (ifbrparam) */
+#define BRDGDADDR 11 /* delete address (ifbareq) */
+#define BRDGFLUSH 12 /* flush address cache (ifbreq) */
+
+#define BRDGGPRI 13 /* get priority (ifbrparam) */
+#define BRDGSPRI 14 /* set priority (ifbrparam) */
+#define BRDGGHT 15 /* get hello time (ifbrparam) */
+#define BRDGSHT 16 /* set hello time (ifbrparam) */
+#define BRDGGFD 17 /* get forward delay (ifbrparam) */
+#define BRDGSFD 18 /* set forward delay (ifbrparam) */
+#define BRDGGMA 19 /* get max age (ifbrparam) */
+#define BRDGSMA 20 /* set max age (ifbrparam) */
+#define BRDGSIFPRIO 21 /* set if priority (ifbreq) */
+#define BRDGSIFCOST 22 /* set if path cost (ifbreq) */
+#define BRDGADDS 23 /* add bridge span member (ifbreq) */
+#define BRDGDELS 24 /* delete bridge span member (ifbreq) */
+#define BRDGPARAM 25 /* get bridge STP params (ifbropreq) */
+#define BRDGGRTE 26 /* get cache drops (ifbrparam) */
+#define BRDGGIFSSTP 27 /* get member STP params list
+ * (ifbpstpconf) */
+#define BRDGSPROTO 28 /* set protocol (ifbrparam) */
+#define BRDGSTXHC 29 /* set tx hold count (ifbrparam) */
+#define BRDGSIFAMAX 30 /* set max interface addrs (ifbreq) */
+
+/*
+ * Generic bridge control request.
+ */
+struct ifbreq {
+ char ifbr_ifsname[IFNAMSIZ]; /* member if name */
+ uint32_t ifbr_ifsflags; /* member if flags */
+ uint32_t ifbr_stpflags; /* member if STP flags */
+ uint32_t ifbr_path_cost; /* member if STP cost */
+ uint8_t ifbr_portno; /* member if port number */
+ uint8_t ifbr_priority; /* member if STP priority */
+ uint8_t ifbr_proto; /* member if STP protocol */
+ uint8_t ifbr_role; /* member if STP role */
+ uint8_t ifbr_state; /* member if STP state */
+ uint32_t ifbr_addrcnt; /* member if addr number */
+ uint32_t ifbr_addrmax; /* member if addr max */
+ uint32_t ifbr_addrexceeded; /* member if addr violations */
+ uint8_t pad[32];
+};
+
+/* BRDGGIFFLAGS, BRDGSIFFLAGS */
+#define IFBIF_LEARNING 0x0001 /* if can learn */
+#define IFBIF_DISCOVER 0x0002 /* if sends packets w/ unknown dest. */
+#define IFBIF_STP 0x0004 /* if participates in spanning tree */
+#define IFBIF_SPAN 0x0008 /* if is a span port */
+#define IFBIF_STICKY 0x0010 /* if learned addresses stick */
+#define IFBIF_BSTP_EDGE 0x0020 /* member stp edge port */
+#define IFBIF_BSTP_AUTOEDGE 0x0040 /* member stp autoedge enabled */
+#define IFBIF_BSTP_PTP 0x0080 /* member stp point to point */
+#define IFBIF_BSTP_AUTOPTP 0x0100 /* member stp autoptp enabled */
+#define IFBIF_BSTP_ADMEDGE 0x0200 /* member stp admin edge enabled */
+#define IFBIF_BSTP_ADMCOST 0x0400 /* member stp admin path cost */
+#define IFBIF_PRIVATE 0x0800 /* if is a private segment */
+
+#define IFBIFBITS "\020\001LEARNING\002DISCOVER\003STP\004SPAN" \
+ "\005STICKY\014PRIVATE\006EDGE\007AUTOEDGE\010PTP" \
+ "\011AUTOPTP"
+#define IFBIFMASK ~(IFBIF_BSTP_EDGE|IFBIF_BSTP_AUTOEDGE|IFBIF_BSTP_PTP| \
+ IFBIF_BSTP_AUTOPTP|IFBIF_BSTP_ADMEDGE| \
+ IFBIF_BSTP_ADMCOST) /* not saved */
+
+/* BRDGFLUSH */
+#define IFBF_FLUSHDYN 0x00 /* flush learned addresses only */
+#define IFBF_FLUSHALL 0x01 /* flush all addresses */
+
+/*
+ * Interface list structure.
+ */
+struct ifbifconf {
+ uint32_t ifbic_len; /* buffer size */
+ union {
+ caddr_t ifbicu_buf;
+ struct ifbreq *ifbicu_req;
+ } ifbic_ifbicu;
+#define ifbic_buf ifbic_ifbicu.ifbicu_buf
+#define ifbic_req ifbic_ifbicu.ifbicu_req
+};
+
+/*
+ * Bridge address request.
+ */
+struct ifbareq {
+ char ifba_ifsname[IFNAMSIZ]; /* member if name */
+ unsigned long ifba_expire; /* address expire time */
+ uint8_t ifba_flags; /* address flags */
+ uint8_t ifba_dst[ETHER_ADDR_LEN];/* destination address */
+ uint16_t ifba_vlan; /* vlan id */
+};
+
+#define IFBAF_TYPEMASK 0x03 /* address type mask */
+#define IFBAF_DYNAMIC 0x00 /* dynamically learned address */
+#define IFBAF_STATIC 0x01 /* static address */
+#define IFBAF_STICKY 0x02 /* sticky address */
+
+#define IFBAFBITS "\020\1STATIC\2STICKY"
+
+/*
+ * Address list structure.
+ */
+struct ifbaconf {
+ uint32_t ifbac_len; /* buffer size */
+ union {
+ caddr_t ifbacu_buf;
+ struct ifbareq *ifbacu_req;
+ } ifbac_ifbacu;
+#define ifbac_buf ifbac_ifbacu.ifbacu_buf
+#define ifbac_req ifbac_ifbacu.ifbacu_req
+};
+
+/*
+ * Bridge parameter structure.
+ */
+struct ifbrparam {
+ union {
+ uint32_t ifbrpu_int32;
+ uint16_t ifbrpu_int16;
+ uint8_t ifbrpu_int8;
+ } ifbrp_ifbrpu;
+};
+#define ifbrp_csize ifbrp_ifbrpu.ifbrpu_int32 /* cache size */
+#define ifbrp_ctime ifbrp_ifbrpu.ifbrpu_int32 /* cache time (sec) */
+#define ifbrp_prio ifbrp_ifbrpu.ifbrpu_int16 /* bridge priority */
+#define ifbrp_proto ifbrp_ifbrpu.ifbrpu_int8 /* bridge protocol */
+#define ifbrp_txhc ifbrp_ifbrpu.ifbrpu_int8 /* bpdu tx holdcount */
+#define ifbrp_hellotime ifbrp_ifbrpu.ifbrpu_int8 /* hello time (sec) */
+#define ifbrp_fwddelay ifbrp_ifbrpu.ifbrpu_int8 /* fwd time (sec) */
+#define ifbrp_maxage ifbrp_ifbrpu.ifbrpu_int8 /* max age (sec) */
+#define ifbrp_cexceeded ifbrp_ifbrpu.ifbrpu_int32 /* # of cache dropped
+ * adresses */
+/*
+ * Bridge current operational parameters structure.
+ */
+struct ifbropreq {
+ uint8_t ifbop_holdcount;
+ uint8_t ifbop_maxage;
+ uint8_t ifbop_hellotime;
+ uint8_t ifbop_fwddelay;
+ uint8_t ifbop_protocol;
+ uint16_t ifbop_priority;
+ uint16_t ifbop_root_port;
+ uint32_t ifbop_root_path_cost;
+ uint64_t ifbop_bridgeid;
+ uint64_t ifbop_designated_root;
+ uint64_t ifbop_designated_bridge;
+ struct timeval ifbop_last_tc_time;
+};
+
+/*
+ * Bridge member operational STP params structure.
+ */
+struct ifbpstpreq {
+ uint8_t ifbp_portno; /* bp STP port number */
+ uint32_t ifbp_fwd_trans; /* bp STP fwd transitions */
+ uint32_t ifbp_design_cost; /* bp STP designated cost */
+ uint32_t ifbp_design_port; /* bp STP designated port */
+ uint64_t ifbp_design_bridge; /* bp STP designated bridge */
+ uint64_t ifbp_design_root; /* bp STP designated root */
+};
+
+/*
+ * Bridge STP ports list structure.
+ */
+struct ifbpstpconf {
+ uint32_t ifbpstp_len; /* buffer size */
+ union {
+ caddr_t ifbpstpu_buf;
+ struct ifbpstpreq *ifbpstpu_req;
+ } ifbpstp_ifbpstpu;
+#define ifbpstp_buf ifbpstp_ifbpstpu.ifbpstpu_buf
+#define ifbpstp_req ifbpstp_ifbpstpu.ifbpstpu_req
+};
+
+#ifdef _KERNEL
+
+#define BRIDGE_LOCK_INIT(_sc) do { \
+ mtx_init(&(_sc)->sc_mtx, "if_bridge", NULL, MTX_DEF); \
+ cv_init(&(_sc)->sc_cv, "if_bridge_cv"); \
+} while (0)
+#define BRIDGE_LOCK_DESTROY(_sc) do { \
+ mtx_destroy(&(_sc)->sc_mtx); \
+ cv_destroy(&(_sc)->sc_cv); \
+} while (0)
+#define BRIDGE_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx)
+#define BRIDGE_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx)
+#define BRIDGE_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_mtx, MA_OWNED)
+#define BRIDGE_LOCK2REF(_sc, _err) do { \
+ mtx_assert(&(_sc)->sc_mtx, MA_OWNED); \
+ if ((_sc)->sc_iflist_xcnt > 0) \
+ (_err) = EBUSY; \
+ else \
+ (_sc)->sc_iflist_ref++; \
+ mtx_unlock(&(_sc)->sc_mtx); \
+} while (0)
+#define BRIDGE_UNREF(_sc) do { \
+ mtx_lock(&(_sc)->sc_mtx); \
+ (_sc)->sc_iflist_ref--; \
+ if (((_sc)->sc_iflist_xcnt > 0) && ((_sc)->sc_iflist_ref == 0)) \
+ cv_broadcast(&(_sc)->sc_cv); \
+ mtx_unlock(&(_sc)->sc_mtx); \
+} while (0)
+#define BRIDGE_XLOCK(_sc) do { \
+ mtx_assert(&(_sc)->sc_mtx, MA_OWNED); \
+ (_sc)->sc_iflist_xcnt++; \
+ while ((_sc)->sc_iflist_ref > 0) \
+ cv_wait(&(_sc)->sc_cv, &(_sc)->sc_mtx); \
+} while (0)
+#define BRIDGE_XDROP(_sc) do { \
+ mtx_assert(&(_sc)->sc_mtx, MA_OWNED); \
+ (_sc)->sc_iflist_xcnt--; \
+} while (0)
+
+#define BRIDGE_INPUT(_ifp, _m) do { \
+ KASSERT(bridge_input_p != NULL, \
+ ("%s: if_bridge not loaded!", __func__)); \
+ _m = (*bridge_input_p)(_ifp, _m); \
+ if (_m != NULL) \
+ _ifp = _m->m_pkthdr.rcvif; \
+} while (0)
+
+#define BRIDGE_OUTPUT(_ifp, _m, _err) do { \
+ KASSERT(bridge_output_p != NULL, \
+ ("%s: if_bridge not loaded!", __func__)); \
+ _err = (*bridge_output_p)(_ifp, _m, NULL, NULL); \
+} while (0)
+
+extern struct mbuf *(*bridge_input_p)(struct ifnet *, struct mbuf *);
+extern int (*bridge_output_p)(struct ifnet *, struct mbuf *,
+ struct sockaddr *, struct rtentry *);
+extern void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
+
+#endif /* _KERNEL */
diff --git a/freebsd/sys/net/if_clone.c b/freebsd/sys/net/if_clone.c
new file mode 100644
index 00000000..aca1276f
--- /dev/null
+++ b/freebsd/sys/net/if_clone.c
@@ -0,0 +1,617 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1980, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)if.c 8.5 (Berkeley) 1/9/95
+ * $FreeBSD$
+ */
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/limits.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/mutex.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/types.h>
+#include <freebsd/sys/socket.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_clone.h>
+#if 0
+#include <freebsd/net/if_dl.h>
+#endif
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/if_var.h>
+#include <freebsd/net/radix.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+
+static void if_clone_free(struct if_clone *ifc);
+static int if_clone_createif(struct if_clone *ifc, char *name, size_t len,
+ caddr_t params);
+
+static struct mtx if_cloners_mtx;
+static VNET_DEFINE(int, if_cloners_count);
+VNET_DEFINE(LIST_HEAD(, if_clone), if_cloners);
+
+#define V_if_cloners_count VNET(if_cloners_count)
+#define V_if_cloners VNET(if_cloners)
+
+#define IF_CLONERS_LOCK_INIT() \
+ mtx_init(&if_cloners_mtx, "if_cloners lock", NULL, MTX_DEF)
+#define IF_CLONERS_LOCK_ASSERT() mtx_assert(&if_cloners_mtx, MA_OWNED)
+#define IF_CLONERS_LOCK() mtx_lock(&if_cloners_mtx)
+#define IF_CLONERS_UNLOCK() mtx_unlock(&if_cloners_mtx)
+
+#define IF_CLONE_LOCK_INIT(ifc) \
+ mtx_init(&(ifc)->ifc_mtx, "if_clone lock", NULL, MTX_DEF)
+#define IF_CLONE_LOCK_DESTROY(ifc) mtx_destroy(&(ifc)->ifc_mtx)
+#define IF_CLONE_LOCK_ASSERT(ifc) mtx_assert(&(ifc)->ifc_mtx, MA_OWNED)
+#define IF_CLONE_LOCK(ifc) mtx_lock(&(ifc)->ifc_mtx)
+#define IF_CLONE_UNLOCK(ifc) mtx_unlock(&(ifc)->ifc_mtx)
+
+#define IF_CLONE_ADDREF(ifc) \
+ do { \
+ IF_CLONE_LOCK(ifc); \
+ IF_CLONE_ADDREF_LOCKED(ifc); \
+ IF_CLONE_UNLOCK(ifc); \
+ } while (0)
+#define IF_CLONE_ADDREF_LOCKED(ifc) \
+ do { \
+ IF_CLONE_LOCK_ASSERT(ifc); \
+ KASSERT((ifc)->ifc_refcnt >= 0, \
+ ("negative refcnt %ld", (ifc)->ifc_refcnt)); \
+ (ifc)->ifc_refcnt++; \
+ } while (0)
+#define IF_CLONE_REMREF(ifc) \
+ do { \
+ IF_CLONE_LOCK(ifc); \
+ IF_CLONE_REMREF_LOCKED(ifc); \
+ } while (0)
+#define IF_CLONE_REMREF_LOCKED(ifc) \
+ do { \
+ IF_CLONE_LOCK_ASSERT(ifc); \
+ KASSERT((ifc)->ifc_refcnt > 0, \
+ ("bogus refcnt %ld", (ifc)->ifc_refcnt)); \
+ if (--(ifc)->ifc_refcnt == 0) { \
+ IF_CLONE_UNLOCK(ifc); \
+ if_clone_free(ifc); \
+ } else { \
+ /* silently free the lock */ \
+ IF_CLONE_UNLOCK(ifc); \
+ } \
+ } while (0)
+
+#define IFC_IFLIST_INSERT(_ifc, _ifp) \
+ LIST_INSERT_HEAD(&_ifc->ifc_iflist, _ifp, if_clones)
+#define IFC_IFLIST_REMOVE(_ifc, _ifp) \
+ LIST_REMOVE(_ifp, if_clones)
+
+static MALLOC_DEFINE(M_CLONE, "clone", "interface cloning framework");
+
+void
+vnet_if_clone_init(void)
+{
+
+ LIST_INIT(&V_if_cloners);
+}
+
+void
+if_clone_init(void)
+{
+
+ IF_CLONERS_LOCK_INIT();
+}
+
+/*
+ * Lookup and create a clone network interface.
+ */
+int
+if_clone_create(char *name, size_t len, caddr_t params)
+{
+ struct if_clone *ifc;
+
+ /* Try to find an applicable cloner for this request */
+ IF_CLONERS_LOCK();
+ LIST_FOREACH(ifc, &V_if_cloners, ifc_list) {
+ if (ifc->ifc_match(ifc, name)) {
+ break;
+ }
+ }
+#ifdef VIMAGE
+ if (ifc == NULL && !IS_DEFAULT_VNET(curvnet)) {
+ CURVNET_SET_QUIET(vnet0);
+ LIST_FOREACH(ifc, &V_if_cloners, ifc_list) {
+ if (ifc->ifc_match(ifc, name))
+ break;
+ }
+ CURVNET_RESTORE();
+ }
+#endif
+ IF_CLONERS_UNLOCK();
+
+ if (ifc == NULL)
+ return (EINVAL);
+
+ return (if_clone_createif(ifc, name, len, params));
+}
+
+/*
+ * Create a clone network interface.
+ */
+static int
+if_clone_createif(struct if_clone *ifc, char *name, size_t len, caddr_t params)
+{
+ int err;
+ struct ifnet *ifp;
+
+ if (ifunit(name) != NULL)
+ return (EEXIST);
+
+ err = (*ifc->ifc_create)(ifc, name, len, params);
+
+ if (!err) {
+ ifp = ifunit(name);
+ if (ifp == NULL)
+ panic("%s: lookup failed for %s", __func__, name);
+
+ if_addgroup(ifp, ifc->ifc_name);
+
+ IF_CLONE_LOCK(ifc);
+ IFC_IFLIST_INSERT(ifc, ifp);
+ IF_CLONE_UNLOCK(ifc);
+ }
+
+ return (err);
+}
+
+/*
+ * Lookup and destroy a clone network interface.
+ */
+int
+if_clone_destroy(const char *name)
+{
+ int err;
+ struct if_clone *ifc;
+ struct ifnet *ifp;
+
+ ifp = ifunit_ref(name);
+ if (ifp == NULL)
+ return (ENXIO);
+
+ /* Find the cloner for this interface */
+ IF_CLONERS_LOCK();
+ LIST_FOREACH(ifc, &V_if_cloners, ifc_list) {
+ if (strcmp(ifc->ifc_name, ifp->if_dname) == 0) {
+ break;
+ }
+ }
+#ifdef VIMAGE
+ if (ifc == NULL && !IS_DEFAULT_VNET(curvnet)) {
+ CURVNET_SET_QUIET(vnet0);
+ LIST_FOREACH(ifc, &V_if_cloners, ifc_list) {
+ if (ifc->ifc_match(ifc, name))
+ break;
+ }
+ CURVNET_RESTORE();
+ }
+#endif
+ IF_CLONERS_UNLOCK();
+ if (ifc == NULL) {
+ if_rele(ifp);
+ return (EINVAL);
+ }
+
+ err = if_clone_destroyif(ifc, ifp);
+ if_rele(ifp);
+ return err;
+}
+
+/*
+ * Destroy a clone network interface.
+ */
+int
+if_clone_destroyif(struct if_clone *ifc, struct ifnet *ifp)
+{
+ int err;
+ struct ifnet *ifcifp;
+
+ if (ifc->ifc_destroy == NULL)
+ return(EOPNOTSUPP);
+
+ /*
+ * Given that the cloned ifnet might be attached to a different
+ * vnet from where its cloner was registered, we have to
+ * switch to the vnet context of the target vnet.
+ */
+ CURVNET_SET_QUIET(ifp->if_vnet);
+
+ IF_CLONE_LOCK(ifc);
+ LIST_FOREACH(ifcifp, &ifc->ifc_iflist, if_clones) {
+ if (ifcifp == ifp) {
+ IFC_IFLIST_REMOVE(ifc, ifp);
+ break;
+ }
+ }
+ IF_CLONE_UNLOCK(ifc);
+ if (ifcifp == NULL) {
+ CURVNET_RESTORE();
+ return (ENXIO); /* ifp is not on the list. */
+ }
+
+ if_delgroup(ifp, ifc->ifc_name);
+
+ err = (*ifc->ifc_destroy)(ifc, ifp);
+
+ if (err != 0) {
+ if_addgroup(ifp, ifc->ifc_name);
+
+ IF_CLONE_LOCK(ifc);
+ IFC_IFLIST_INSERT(ifc, ifp);
+ IF_CLONE_UNLOCK(ifc);
+ }
+ CURVNET_RESTORE();
+ return (err);
+}
+
+/*
+ * Register a network interface cloner.
+ */
+void
+if_clone_attach(struct if_clone *ifc)
+{
+ int len, maxclone;
+
+ /*
+ * Compute bitmap size and allocate it.
+ */
+ maxclone = ifc->ifc_maxunit + 1;
+ len = maxclone >> 3;
+ if ((len << 3) < maxclone)
+ len++;
+ ifc->ifc_units = malloc(len, M_CLONE, M_WAITOK | M_ZERO);
+ ifc->ifc_bmlen = len;
+ IF_CLONE_LOCK_INIT(ifc);
+ IF_CLONE_ADDREF(ifc);
+
+ IF_CLONERS_LOCK();
+ LIST_INSERT_HEAD(&V_if_cloners, ifc, ifc_list);
+ V_if_cloners_count++;
+ IF_CLONERS_UNLOCK();
+
+ LIST_INIT(&ifc->ifc_iflist);
+
+ if (ifc->ifc_attach != NULL)
+ (*ifc->ifc_attach)(ifc);
+ EVENTHANDLER_INVOKE(if_clone_event, ifc);
+}
+
+/*
+ * Unregister a network interface cloner.
+ */
+void
+if_clone_detach(struct if_clone *ifc)
+{
+ struct ifc_simple_data *ifcs = ifc->ifc_data;
+
+ IF_CLONERS_LOCK();
+ LIST_REMOVE(ifc, ifc_list);
+ V_if_cloners_count--;
+ IF_CLONERS_UNLOCK();
+
+ /* Allow all simples to be destroyed */
+ if (ifc->ifc_attach == ifc_simple_attach)
+ ifcs->ifcs_minifs = 0;
+
+ /* destroy all interfaces for this cloner */
+ while (!LIST_EMPTY(&ifc->ifc_iflist))
+ if_clone_destroyif(ifc, LIST_FIRST(&ifc->ifc_iflist));
+
+ IF_CLONE_REMREF(ifc);
+}
+
+static void
+if_clone_free(struct if_clone *ifc)
+{
+ for (int bytoff = 0; bytoff < ifc->ifc_bmlen; bytoff++) {
+ KASSERT(ifc->ifc_units[bytoff] == 0x00,
+ ("ifc_units[%d] is not empty", bytoff));
+ }
+
+ KASSERT(LIST_EMPTY(&ifc->ifc_iflist),
+ ("%s: ifc_iflist not empty", __func__));
+
+ IF_CLONE_LOCK_DESTROY(ifc);
+ free(ifc->ifc_units, M_CLONE);
+}
+
+/*
+ * Provide list of interface cloners to userspace.
+ */
+int
+if_clone_list(struct if_clonereq *ifcr)
+{
+ char *buf, *dst, *outbuf = NULL;
+ struct if_clone *ifc;
+ int buf_count, count, err = 0;
+
+ if (ifcr->ifcr_count < 0)
+ return (EINVAL);
+
+ IF_CLONERS_LOCK();
+ /*
+ * Set our internal output buffer size. We could end up not
+ * reporting a cloner that is added between the unlock and lock
+ * below, but that's not a major problem. Not caping our
+ * allocation to the number of cloners actually in the system
+ * could be because that would let arbitrary users cause us to
+ * allocate abritrary amounts of kernel memory.
+ */
+ buf_count = (V_if_cloners_count < ifcr->ifcr_count) ?
+ V_if_cloners_count : ifcr->ifcr_count;
+ IF_CLONERS_UNLOCK();
+
+ outbuf = malloc(IFNAMSIZ*buf_count, M_CLONE, M_WAITOK | M_ZERO);
+
+ IF_CLONERS_LOCK();
+
+ ifcr->ifcr_total = V_if_cloners_count;
+ if ((dst = ifcr->ifcr_buffer) == NULL) {
+ /* Just asking how many there are. */
+ goto done;
+ }
+ count = (V_if_cloners_count < buf_count) ?
+ V_if_cloners_count : buf_count;
+
+ for (ifc = LIST_FIRST(&V_if_cloners), buf = outbuf;
+ ifc != NULL && count != 0;
+ ifc = LIST_NEXT(ifc, ifc_list), count--, buf += IFNAMSIZ) {
+ strlcpy(buf, ifc->ifc_name, IFNAMSIZ);
+ }
+
+done:
+ IF_CLONERS_UNLOCK();
+ if (err == 0)
+ err = copyout(outbuf, dst, buf_count*IFNAMSIZ);
+ if (outbuf != NULL)
+ free(outbuf, M_CLONE);
+ return (err);
+}
+
+/*
+ * A utility function to extract unit numbers from interface names of
+ * the form name###.
+ *
+ * Returns 0 on success and an error on failure.
+ */
+int
+ifc_name2unit(const char *name, int *unit)
+{
+ const char *cp;
+ int cutoff = INT_MAX / 10;
+ int cutlim = INT_MAX % 10;
+
+ for (cp = name; *cp != '\0' && (*cp < '0' || *cp > '9'); cp++);
+ if (*cp == '\0') {
+ *unit = -1;
+ } else if (cp[0] == '0' && cp[1] != '\0') {
+ /* Disallow leading zeroes. */
+ return (EINVAL);
+ } else {
+ for (*unit = 0; *cp != '\0'; cp++) {
+ if (*cp < '0' || *cp > '9') {
+ /* Bogus unit number. */
+ return (EINVAL);
+ }
+ if (*unit > cutoff ||
+ (*unit == cutoff && *cp - '0' > cutlim))
+ return (EINVAL);
+ *unit = (*unit * 10) + (*cp - '0');
+ }
+ }
+
+ return (0);
+}
+
+int
+ifc_alloc_unit(struct if_clone *ifc, int *unit)
+{
+ int wildcard, bytoff, bitoff;
+ int err = 0;
+
+ IF_CLONE_LOCK(ifc);
+
+ bytoff = bitoff = 0;
+ wildcard = (*unit < 0);
+ /*
+ * Find a free unit if none was given.
+ */
+ if (wildcard) {
+ while ((bytoff < ifc->ifc_bmlen)
+ && (ifc->ifc_units[bytoff] == 0xff))
+ bytoff++;
+ if (bytoff >= ifc->ifc_bmlen) {
+ err = ENOSPC;
+ goto done;
+ }
+ while ((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0)
+ bitoff++;
+ *unit = (bytoff << 3) + bitoff;
+ }
+
+ if (*unit > ifc->ifc_maxunit) {
+ err = ENOSPC;
+ goto done;
+ }
+
+ if (!wildcard) {
+ bytoff = *unit >> 3;
+ bitoff = *unit - (bytoff << 3);
+ }
+
+ if((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0) {
+ err = EEXIST;
+ goto done;
+ }
+ /*
+ * Allocate the unit in the bitmap.
+ */
+ KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) == 0,
+ ("%s: bit is already set", __func__));
+ ifc->ifc_units[bytoff] |= (1 << bitoff);
+ IF_CLONE_ADDREF_LOCKED(ifc);
+
+done:
+ IF_CLONE_UNLOCK(ifc);
+ return (err);
+}
+
+void
+ifc_free_unit(struct if_clone *ifc, int unit)
+{
+ int bytoff, bitoff;
+
+
+ /*
+ * Compute offset in the bitmap and deallocate the unit.
+ */
+ bytoff = unit >> 3;
+ bitoff = unit - (bytoff << 3);
+
+ IF_CLONE_LOCK(ifc);
+ KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0,
+ ("%s: bit is already cleared", __func__));
+ ifc->ifc_units[bytoff] &= ~(1 << bitoff);
+ IF_CLONE_REMREF_LOCKED(ifc); /* releases lock */
+}
+
+void
+ifc_simple_attach(struct if_clone *ifc)
+{
+ int err;
+ int unit;
+ char name[IFNAMSIZ];
+ struct ifc_simple_data *ifcs = ifc->ifc_data;
+
+ KASSERT(ifcs->ifcs_minifs - 1 <= ifc->ifc_maxunit,
+ ("%s: %s requested more units than allowed (%d > %d)",
+ __func__, ifc->ifc_name, ifcs->ifcs_minifs,
+ ifc->ifc_maxunit + 1));
+
+ for (unit = 0; unit < ifcs->ifcs_minifs; unit++) {
+ snprintf(name, IFNAMSIZ, "%s%d", ifc->ifc_name, unit);
+ err = if_clone_createif(ifc, name, IFNAMSIZ, NULL);
+ KASSERT(err == 0,
+ ("%s: failed to create required interface %s",
+ __func__, name));
+ }
+}
+
+int
+ifc_simple_match(struct if_clone *ifc, const char *name)
+{
+ const char *cp;
+ int i;
+
+ /* Match the name */
+ for (cp = name, i = 0; i < strlen(ifc->ifc_name); i++, cp++) {
+ if (ifc->ifc_name[i] != *cp)
+ return (0);
+ }
+
+ /* Make sure there's a unit number or nothing after the name */
+ for (; *cp != '\0'; cp++) {
+ if (*cp < '0' || *cp > '9')
+ return (0);
+ }
+
+ return (1);
+}
+
+int
+ifc_simple_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
+{
+ char *dp;
+ int wildcard;
+ int unit;
+ int err;
+ struct ifc_simple_data *ifcs = ifc->ifc_data;
+
+ err = ifc_name2unit(name, &unit);
+ if (err != 0)
+ return (err);
+
+ wildcard = (unit < 0);
+
+ err = ifc_alloc_unit(ifc, &unit);
+ if (err != 0)
+ return (err);
+
+ err = ifcs->ifcs_create(ifc, unit, params);
+ if (err != 0) {
+ ifc_free_unit(ifc, unit);
+ return (err);
+ }
+
+ /* In the wildcard case, we need to update the name. */
+ if (wildcard) {
+ for (dp = name; *dp != '\0'; dp++);
+ if (snprintf(dp, len - (dp-name), "%d", unit) >
+ len - (dp-name) - 1) {
+ /*
+ * This can only be a programmer error and
+ * there's no straightforward way to recover if
+ * it happens.
+ */
+ panic("if_clone_create(): interface name too long");
+ }
+
+ }
+
+ return (0);
+}
+
+int
+ifc_simple_destroy(struct if_clone *ifc, struct ifnet *ifp)
+{
+ int unit;
+ struct ifc_simple_data *ifcs = ifc->ifc_data;
+
+ unit = ifp->if_dunit;
+
+ if (unit < ifcs->ifcs_minifs)
+ return (EINVAL);
+
+ ifcs->ifcs_destroy(ifp);
+
+ ifc_free_unit(ifc, unit);
+
+ return (0);
+}
diff --git a/freebsd/sys/net/if_clone.h b/freebsd/sys/net/if_clone.h
new file mode 100644
index 00000000..67de320b
--- /dev/null
+++ b/freebsd/sys/net/if_clone.h
@@ -0,0 +1,116 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * From: @(#)if.h 8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NET_IF_CLONE_HH_
+#define _NET_IF_CLONE_HH_
+
+#ifdef _KERNEL
+
+#define IFC_CLONE_INITIALIZER(name, data, maxunit, \
+ attach, match, create, destroy) \
+ { { 0 }, name, maxunit, NULL, 0, data, attach, match, create, destroy }
+
+/*
+ * Structure describing a `cloning' interface.
+ *
+ * List of locks
+ * (c) const until freeing
+ * (d) driver specific data, may need external protection.
+ * (e) locked by if_cloners_mtx
+ * (i) locked by ifc_mtx mtx
+ */
+struct if_clone {
+ LIST_ENTRY(if_clone) ifc_list; /* (e) On list of cloners */
+ const char *ifc_name; /* (c) Name of device, e.g. `gif' */
+ int ifc_maxunit; /* (c) Maximum unit number */
+ unsigned char *ifc_units; /* (i) Bitmap to handle units. */
+ /* Considered private, access */
+ /* via ifc_(alloc|free)_unit(). */
+ int ifc_bmlen; /* (c) Bitmap length. */
+ void *ifc_data; /* (*) Data for ifc_* functions. */
+
+ /* (c) Driver specific cloning functions. Called with no locks held. */
+ void (*ifc_attach)(struct if_clone *);
+ int (*ifc_match)(struct if_clone *, const char *);
+ int (*ifc_create)(struct if_clone *, char *, size_t, caddr_t);
+ int (*ifc_destroy)(struct if_clone *, struct ifnet *);
+
+ long ifc_refcnt; /* (i) Refrence count. */
+ struct mtx ifc_mtx; /* Muted to protect members. */
+ LIST_HEAD(, ifnet) ifc_iflist; /* (i) List of cloned interfaces */
+};
+
+void if_clone_init(void);
+void if_clone_attach(struct if_clone *);
+void if_clone_detach(struct if_clone *);
+void vnet_if_clone_init(void);
+
+int if_clone_create(char *, size_t, caddr_t);
+int if_clone_destroy(const char *);
+int if_clone_destroyif(struct if_clone *, struct ifnet *);
+int if_clone_list(struct if_clonereq *);
+
+int ifc_name2unit(const char *name, int *unit);
+int ifc_alloc_unit(struct if_clone *, int *);
+void ifc_free_unit(struct if_clone *, int);
+
+/*
+ * The ifc_simple functions, structures, and macros implement basic
+ * cloning as in 5.[012].
+ */
+
+struct ifc_simple_data {
+ int ifcs_minifs; /* minimum number of interfaces */
+
+ int (*ifcs_create)(struct if_clone *, int, caddr_t);
+ void (*ifcs_destroy)(struct ifnet *);
+};
+
+/* interface clone event */
+typedef void (*if_clone_event_handler_t)(void *, struct if_clone *);
+EVENTHANDLER_DECLARE(if_clone_event, if_clone_event_handler_t);
+
+#define IFC_SIMPLE_DECLARE(name, minifs) \
+struct ifc_simple_data name##_cloner_data = \
+ {minifs, name##_clone_create, name##_clone_destroy}; \
+struct if_clone name##_cloner = \
+ IFC_CLONE_INITIALIZER(#name, &name##_cloner_data, IF_MAXUNIT, \
+ ifc_simple_attach, ifc_simple_match, ifc_simple_create, ifc_simple_destroy)
+
+void ifc_simple_attach(struct if_clone *);
+int ifc_simple_match(struct if_clone *, const char *);
+int ifc_simple_create(struct if_clone *, char *, size_t, caddr_t);
+int ifc_simple_destroy(struct if_clone *, struct ifnet *);
+
+#endif /* _KERNEL */
+
+#endif /* !_NET_IF_CLONE_HH_ */
diff --git a/freebsd/sys/net/if_dead.c b/freebsd/sys/net/if_dead.c
new file mode 100644
index 00000000..dcceaf25
--- /dev/null
+++ b/freebsd/sys/net/if_dead.c
@@ -0,0 +1,116 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2009 Robert N. M. Watson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * When an interface has been detached but not yet freed, we set the various
+ * ifnet function pointers to "ifdead" versions. This prevents unexpected
+ * calls from the network stack into the device driver after if_detach() has
+ * returned.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/socket.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_var.h>
+
+static int
+ifdead_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
+ struct route *ro)
+{
+
+ m_freem(m);
+ return (ENXIO);
+}
+
+static void
+ifdead_input(struct ifnet *ifp, struct mbuf *m)
+{
+
+ m_freem(m);
+}
+
+static void
+ifdead_start(struct ifnet *ifp)
+{
+
+}
+
+static int
+ifdead_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+
+ return (ENXIO);
+}
+
+static void
+ifdead_watchdog(struct ifnet *ifp)
+{
+
+}
+
+static int
+ifdead_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
+ struct sockaddr *sa)
+{
+
+ *llsa = NULL;
+ return (ENXIO);
+}
+
+static void
+ifdead_qflush(struct ifnet *ifp)
+{
+
+}
+
+static int
+ifdead_transmit(struct ifnet *ifp, struct mbuf *m)
+{
+
+ m_freem(m);
+ return (ENXIO);
+}
+
+void
+if_dead(struct ifnet *ifp)
+{
+
+ ifp->if_output = ifdead_output;
+ ifp->if_input = ifdead_input;
+ ifp->if_start = ifdead_start;
+ ifp->if_ioctl = ifdead_ioctl;
+ ifp->if_watchdog = ifdead_watchdog;
+ ifp->if_resolvemulti = ifdead_resolvemulti;
+ ifp->if_qflush = ifdead_qflush;
+ ifp->if_transmit = ifdead_transmit;
+}
diff --git a/freebsd/sys/net/if_disc.c b/freebsd/sys/net/if_disc.c
new file mode 100644
index 00000000..09918bb4
--- /dev/null
+++ b/freebsd/sys/net/if_disc.c
@@ -0,0 +1,247 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * From: @(#)if_loop.c 8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+/*
+ * Discard interface driver for protocol testing and timing.
+ * (Based on the loopback.)
+ */
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/sockio.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_clone.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/bpf.h>
+
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+
+#ifdef TINY_DSMTU
+#define DSMTU (1024+512)
+#else
+#define DSMTU 65532
+#endif
+
+#define DISCNAME "disc"
+
+struct disc_softc {
+ struct ifnet *sc_ifp;
+};
+
+static int discoutput(struct ifnet *, struct mbuf *,
+ struct sockaddr *, struct route *);
+static void discrtrequest(int, struct rtentry *, struct rt_addrinfo *);
+static int discioctl(struct ifnet *, u_long, caddr_t);
+static int disc_clone_create(struct if_clone *, int, caddr_t);
+static void disc_clone_destroy(struct ifnet *);
+
+static MALLOC_DEFINE(M_DISC, DISCNAME, "Discard interface");
+
+IFC_SIMPLE_DECLARE(disc, 0);
+
+static int
+disc_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+ struct ifnet *ifp;
+ struct disc_softc *sc;
+
+ sc = malloc(sizeof(struct disc_softc), M_DISC, M_WAITOK | M_ZERO);
+ ifp = sc->sc_ifp = if_alloc(IFT_LOOP);
+ if (ifp == NULL) {
+ free(sc, M_DISC);
+ return (ENOSPC);
+ }
+
+ ifp->if_softc = sc;
+ if_initname(ifp, ifc->ifc_name, unit);
+ ifp->if_mtu = DSMTU;
+ /*
+ * IFF_LOOPBACK should not be removed from disc's flags because
+ * it controls what PF-specific routes are magically added when
+ * a network address is assigned to the interface. Things just
+ * won't work as intended w/o such routes because the output
+ * interface selection for a packet is totally route-driven.
+ * A valid alternative to IFF_LOOPBACK can be IFF_BROADCAST or
+ * IFF_POINTOPOINT, but it would result in different properties
+ * of the interface.
+ */
+ ifp->if_flags = IFF_LOOPBACK | IFF_MULTICAST;
+ ifp->if_drv_flags = IFF_DRV_RUNNING;
+ ifp->if_ioctl = discioctl;
+ ifp->if_output = discoutput;
+ ifp->if_hdrlen = 0;
+ ifp->if_addrlen = 0;
+ ifp->if_snd.ifq_maxlen = 20;
+ if_attach(ifp);
+ bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
+
+ return (0);
+}
+
+static void
+disc_clone_destroy(struct ifnet *ifp)
+{
+ struct disc_softc *sc;
+
+ sc = ifp->if_softc;
+
+ bpfdetach(ifp);
+ if_detach(ifp);
+ if_free(ifp);
+
+ free(sc, M_DISC);
+}
+
+static int
+disc_modevent(module_t mod, int type, void *data)
+{
+
+ switch (type) {
+ case MOD_LOAD:
+ if_clone_attach(&disc_cloner);
+ break;
+ case MOD_UNLOAD:
+ if_clone_detach(&disc_cloner);
+ break;
+ default:
+ return (EOPNOTSUPP);
+ }
+ return (0);
+}
+
+static moduledata_t disc_mod = {
+ "if_disc",
+ disc_modevent,
+ NULL
+};
+
+DECLARE_MODULE(if_disc, disc_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+
+static int
+discoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+ struct route *ro)
+{
+ u_int32_t af;
+
+ M_ASSERTPKTHDR(m);
+
+ /* BPF writes need to be handled specially. */
+ if (dst->sa_family == AF_UNSPEC) {
+ bcopy(dst->sa_data, &af, sizeof(af));
+ dst->sa_family = af;
+ }
+
+ if (bpf_peers_present(ifp->if_bpf)) {
+ u_int af = dst->sa_family;
+ bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
+ }
+ m->m_pkthdr.rcvif = ifp;
+
+ ifp->if_opackets++;
+ ifp->if_obytes += m->m_pkthdr.len;
+
+ m_freem(m);
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+discrtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
+{
+ RT_LOCK_ASSERT(rt);
+ rt->rt_rmx.rmx_mtu = DSMTU;
+}
+
+/*
+ * Process an ioctl request.
+ */
+static int
+discioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ struct ifaddr *ifa;
+ struct ifreq *ifr = (struct ifreq *)data;
+ int error = 0;
+
+ switch (cmd) {
+
+ case SIOCSIFADDR:
+ ifp->if_flags |= IFF_UP;
+ ifa = (struct ifaddr *)data;
+ if (ifa != 0)
+ ifa->ifa_rtrequest = discrtrequest;
+ /*
+ * Everything else is done at a higher level.
+ */
+ break;
+
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ if (ifr == 0) {
+ error = EAFNOSUPPORT; /* XXX */
+ break;
+ }
+ switch (ifr->ifr_addr.sa_family) {
+
+#ifdef INET
+ case AF_INET:
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ break;
+#endif
+
+ default:
+ error = EAFNOSUPPORT;
+ break;
+ }
+ break;
+
+ case SIOCSIFMTU:
+ ifp->if_mtu = ifr->ifr_mtu;
+ break;
+
+ default:
+ error = EINVAL;
+ }
+ return (error);
+}
diff --git a/freebsd/sys/net/if_dl.h b/freebsd/sys/net/if_dl.h
new file mode 100644
index 00000000..ad29f1de
--- /dev/null
+++ b/freebsd/sys/net/if_dl.h
@@ -0,0 +1,2 @@
+#include <freebsd/bsd.h>
+#include <freebsd/net/if_dl.h>
diff --git a/freebsd/sys/net/if_edsc.c b/freebsd/sys/net/if_edsc.c
new file mode 100644
index 00000000..89618ce5
--- /dev/null
+++ b/freebsd/sys/net/if_edsc.c
@@ -0,0 +1,356 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following edsclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following edsclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE EDSCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * From: @(#)if_loop.c 8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+/*
+ * Discard interface driver for protocol testing and timing.
+ * Mimics an Ethernet device so that VLANs can be attached to it etc.
+ */
+
+#include <freebsd/sys/param.h> /* types, important constants */
+#include <freebsd/sys/kernel.h> /* SYSINIT for load-time initializations */
+#include <freebsd/sys/malloc.h> /* malloc(9) */
+#include <freebsd/sys/module.h> /* module(9) */
+#include <freebsd/sys/mbuf.h> /* mbuf(9) */
+#include <freebsd/sys/socket.h> /* struct ifreq */
+#include <freebsd/sys/sockio.h> /* socket ioctl's */
+/* #include <freebsd/sys/systm.h> if you need printf(9) or other all-purpose globals */
+
+#include <freebsd/net/bpf.h> /* bpf(9) */
+#include <freebsd/net/ethernet.h> /* Ethernet related constants and types */
+#include <freebsd/net/if.h> /* basic part of ifnet(9) */
+#include <freebsd/net/if_clone.h> /* network interface cloning */
+#include <freebsd/net/if_types.h> /* IFT_ETHER and friends */
+#include <freebsd/net/if_var.h> /* kernel-only part of ifnet(9) */
+
+/*
+ * Software configuration of an interface specific to this device type.
+ */
+struct edsc_softc {
+ struct ifnet *sc_ifp; /* ptr to generic interface configuration */
+
+ /*
+ * A non-null driver can keep various things here, for instance,
+ * the hardware revision, cached values of write-only registers, etc.
+ */
+};
+
+/*
+ * Simple cloning methods.
+ * IFC_SIMPLE_DECLARE() expects precisely these names.
+ */
+static int edsc_clone_create(struct if_clone *, int, caddr_t);
+static void edsc_clone_destroy(struct ifnet *);
+
+/*
+ * Interface driver methods.
+ */
+static void edsc_init(void *dummy);
+/* static void edsc_input(struct ifnet *ifp, struct mbuf *m); would be here */
+static int edsc_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
+static void edsc_start(struct ifnet *ifp);
+
+/*
+ * We'll allocate softc instances from this.
+ */
+static MALLOC_DEFINE(M_EDSC, "edsc", "Ethernet discard interface");
+
+/*
+ * Attach to the interface cloning framework under the name of "edsc".
+ * The second argument is the number of units to be created from
+ * the outset. It's also the minimum number of units allowed.
+ * We don't want any units created as soon as the driver is loaded.
+ */
+IFC_SIMPLE_DECLARE(edsc, 0);
+
+/*
+ * Create an interface instance.
+ */
+static int
+edsc_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+ struct edsc_softc *sc;
+ struct ifnet *ifp;
+ static u_char eaddr[ETHER_ADDR_LEN]; /* 0:0:0:0:0:0 */
+
+ /*
+ * Allocate soft and ifnet structures. Link each to the other.
+ */
+ sc = malloc(sizeof(struct edsc_softc), M_EDSC, M_WAITOK | M_ZERO);
+ ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
+ if (ifp == NULL) {
+ free(sc, M_EDSC);
+ return (ENOSPC);
+ }
+
+ ifp->if_softc = sc;
+
+ /*
+ * Get a name for this particular interface in its ifnet structure.
+ */
+ if_initname(ifp, ifc->ifc_name, unit);
+
+ /*
+ * Typical Ethernet interface flags: we can do broadcast and
+ * multicast but can't hear our own broadcasts or multicasts.
+ */
+ ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX;
+
+ /*
+ * We can pretent we have the whole set of hardware features
+ * because we just discard all packets we get from the upper layer.
+ * However, the features are disabled initially. They can be
+ * enabled via edsc_ioctl() when needed.
+ */
+ ifp->if_capabilities =
+ IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM |
+ IFCAP_HWCSUM | IFCAP_TSO |
+ IFCAP_JUMBO_MTU;
+ ifp->if_capenable = 0;
+
+ /*
+ * Set the interface driver methods.
+ */
+ ifp->if_init = edsc_init;
+ /* ifp->if_input = edsc_input; */
+ ifp->if_ioctl = edsc_ioctl;
+ ifp->if_start = edsc_start;
+
+ /*
+ * Set the maximum output queue length from the global parameter.
+ */
+ ifp->if_snd.ifq_maxlen = ifqmaxlen;
+
+ /*
+ * Do ifnet initializations common to all Ethernet drivers
+ * and attach to the network interface framework.
+ * TODO: Pick a non-zero link level address.
+ */
+ ether_ifattach(ifp, eaddr);
+
+ /*
+ * Now we can mark the interface as running, i.e., ready
+ * for operation.
+ */
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+
+ return (0);
+}
+
+/*
+ * Destroy an interface instance.
+ */
+static void
+edsc_clone_destroy(struct ifnet *ifp)
+{
+ struct edsc_softc *sc = ifp->if_softc;
+
+ /*
+ * Detach from the network interface framework.
+ */
+ ether_ifdetach(ifp);
+
+ /*
+ * Free memory occupied by ifnet and softc.
+ */
+ if_free(ifp);
+ free(sc, M_EDSC);
+}
+
+/*
+ * This method is invoked from ether_ioctl() when it's time
+ * to bring up the hardware.
+ */
+static void
+edsc_init(void *dummy)
+{
+#if 0 /* what a hardware driver would do here... */
+ struct edsc_soft *sc = (struct edsc_softc *)dummy;
+ struct ifnet *ifp = sc->sc_ifp;
+
+ /* blah-blah-blah */
+#endif
+}
+
+/*
+ * Network interfaces are controlled via the ioctl(2) syscall.
+ */
+static int
+edsc_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ struct ifreq *ifr = (struct ifreq *)data;
+
+ switch (cmd) {
+ case SIOCSIFCAP:
+#if 1
+ /*
+ * Just turn on any capabilities requested.
+ * The generic ifioctl() function has already made sure
+ * that they are supported, i.e., set in if_capabilities.
+ */
+ ifp->if_capenable = ifr->ifr_reqcap;
+#else
+ /*
+ * A h/w driver would need to analyze the requested
+ * bits and program the hardware, e.g.:
+ */
+ mask = ifp->if_capenable ^ ifr->ifr_reqcap;
+
+ if (mask & IFCAP_VLAN_HWTAGGING) {
+ ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
+
+ if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
+ /* blah-blah-blah */
+ else
+ /* etc-etc-etc */
+ }
+#endif
+ break;
+
+ default:
+ /*
+ * Offload the rest onto the common Ethernet handler.
+ */
+ return (ether_ioctl(ifp, cmd, data));
+ }
+
+ return (0);
+}
+
+/*
+ * Process the output queue.
+ */
+static void
+edsc_start(struct ifnet *ifp)
+{
+ struct mbuf *m;
+
+ /*
+ * A hardware interface driver can set IFF_DRV_OACTIVE
+ * in ifp->if_drv_flags:
+ *
+ * ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+ *
+ * to prevent if_start from being invoked again while the
+ * transmission is under way. The flag is to protect the
+ * device's transmitter, not the method itself. The output
+ * queue is locked and several threads can process it in
+ * parallel safely, so the driver can use other means to
+ * serialize access to the transmitter.
+ *
+ * If using IFF_DRV_OACTIVE, the driver should clear the flag
+ * not earlier than the current transmission is complete, e.g.,
+ * upon an interrupt from the device, not just before returning
+ * from if_start. This method merely starts the transmission,
+ * which may proceed asynchronously.
+ */
+
+ /*
+ * We loop getting packets from the queue until it's empty.
+ * A h/w driver would loop until the device can accept more
+ * data into its buffer, or while there are free transmit
+ * descriptors, or whatever.
+ */
+ for (;;) {
+ /*
+ * Try to dequeue one packet. Stop if the queue is empty.
+ * Use IF_DEQUEUE() here if ALTQ(9) support is unneeded.
+ */
+ IFQ_DEQUEUE(&ifp->if_snd, m);
+ if (m == NULL)
+ break;
+
+ /*
+ * Let bpf(9) at the packet.
+ */
+ BPF_MTAP(ifp, m);
+
+ /*
+ * Update the interface counters.
+ */
+ ifp->if_obytes += m->m_pkthdr.len;
+ ifp->if_opackets++;
+
+ /*
+ * Finally, just drop the packet.
+ * TODO: Reply to ARP requests unless IFF_NOARP is set.
+ */
+ m_freem(m);
+ }
+
+ /*
+ * ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ * would be here only if the transmission were synchronous.
+ */
+}
+
+/*
+ * This function provides handlers for module events, namely load and unload.
+ */
+static int
+edsc_modevent(module_t mod, int type, void *data)
+{
+
+ switch (type) {
+ case MOD_LOAD:
+ /*
+ * Connect to the network interface cloning framework.
+ */
+ if_clone_attach(&edsc_cloner);
+ break;
+
+ case MOD_UNLOAD:
+ /*
+ * Disconnect from the cloning framework.
+ * Existing interfaces will be disposed of properly.
+ */
+ if_clone_detach(&edsc_cloner);
+ break;
+
+ default:
+ /*
+ * There are other event types, but we don't handle them.
+ * See module(9).
+ */
+ return (EOPNOTSUPP);
+ }
+ return (0);
+}
+
+static moduledata_t edsc_mod = {
+ "if_edsc", /* name */
+ edsc_modevent, /* event handler */
+ NULL /* additional data */
+};
+
+DECLARE_MODULE(if_edsc, edsc_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
diff --git a/freebsd/sys/net/if_ef.c b/freebsd/sys/net/if_ef.c
new file mode 100644
index 00000000..8114806c
--- /dev/null
+++ b/freebsd/sys/net/if_ef.c
@@ -0,0 +1,610 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1999, 2000 Boris Popov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_ipx.h>
+#include <freebsd/local/opt_ef.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/syslog.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/module.h>
+
+#include <freebsd/net/ethernet.h>
+#include <freebsd/net/if_llc.h>
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_arp.h>
+#include <freebsd/net/if_dl.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/netisr.h>
+#include <freebsd/net/bpf.h>
+#include <freebsd/net/vnet.h>
+
+#ifdef INET
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/if_ether.h>
+#endif
+
+#ifdef IPX
+#include <freebsd/netipx/ipx.h>
+#include <freebsd/netipx/ipx_if.h>
+#endif
+
+/* If none of the supported layers is enabled explicitly enable them all */
+#if !defined(ETHER_II) && !defined(ETHER_8023) && !defined(ETHER_8022) && \
+ !defined(ETHER_SNAP)
+#define ETHER_II 1
+#define ETHER_8023 1
+#define ETHER_8022 1
+#define ETHER_SNAP 1
+#endif
+
+/* internal frame types */
+#define ETHER_FT_EII 0 /* Ethernet_II - default */
+#define ETHER_FT_8023 1 /* 802.3 (Novell) */
+#define ETHER_FT_8022 2 /* 802.2 */
+#define ETHER_FT_SNAP 3 /* SNAP */
+#define EF_NFT 4 /* total number of frame types */
+
+#ifdef EF_DEBUG
+#define EFDEBUG(format, args...) printf("%s: "format, __func__ ,## args)
+#else
+#define EFDEBUG(format, args...)
+#endif
+
+#define EFERROR(format, args...) printf("%s: "format, __func__ ,## args)
+
+struct efnet {
+ struct ifnet *ef_ifp;
+ struct ifnet *ef_pifp;
+ int ef_frametype;
+};
+
+struct ef_link {
+ SLIST_ENTRY(ef_link) el_next;
+ struct ifnet *el_ifp; /* raw device for this clones */
+ struct efnet *el_units[EF_NFT]; /* our clones */
+};
+
+static SLIST_HEAD(ef_link_head, ef_link) efdev = {NULL};
+static int efcount;
+
+extern int (*ef_inputp)(struct ifnet*, struct ether_header *eh, struct mbuf *m);
+extern int (*ef_outputp)(struct ifnet *ifp, struct mbuf **mp,
+ struct sockaddr *dst, short *tp, int *hlen);
+
+/*
+static void ef_reset (struct ifnet *);
+*/
+static int ef_attach(struct efnet *sc);
+static int ef_detach(struct efnet *sc);
+static void ef_init(void *);
+static int ef_ioctl(struct ifnet *, u_long, caddr_t);
+static void ef_start(struct ifnet *);
+static int ef_input(struct ifnet*, struct ether_header *, struct mbuf *);
+static int ef_output(struct ifnet *ifp, struct mbuf **mp,
+ struct sockaddr *dst, short *tp, int *hlen);
+
+static int ef_load(void);
+static int ef_unload(void);
+
+/*
+ * Install the interface, most of structure initialization done in ef_clone()
+ */
+static int
+ef_attach(struct efnet *sc)
+{
+ struct ifnet *ifp = sc->ef_ifp;
+
+ ifp->if_start = ef_start;
+ ifp->if_init = ef_init;
+ ifp->if_snd.ifq_maxlen = ifqmaxlen;
+ ifp->if_flags = (IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
+ /*
+ * Attach the interface
+ */
+ ether_ifattach(ifp, IF_LLADDR(sc->ef_pifp));
+
+ ifp->if_resolvemulti = 0;
+ ifp->if_type = IFT_XETHER;
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+
+ EFDEBUG("%s: attached\n", ifp->if_xname);
+ return 1;
+}
+
+/*
+ * This is for _testing_only_, just removes interface from interfaces list
+ */
+static int
+ef_detach(struct efnet *sc)
+{
+ struct ifnet *ifp = sc->ef_ifp;
+ int s;
+
+ s = splimp();
+
+ ether_ifdetach(ifp);
+ if_free(ifp);
+
+ splx(s);
+ return 0;
+}
+
+static void
+ef_init(void *foo) {
+ return;
+}
+
+static int
+ef_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ struct efnet *sc = ifp->if_softc;
+ struct ifaddr *ifa = (struct ifaddr*)data;
+ int s, error;
+
+ EFDEBUG("IOCTL %ld for %s\n", cmd, ifp->if_xname);
+ error = 0;
+ s = splimp();
+ switch (cmd) {
+ case SIOCSIFFLAGS:
+ error = 0;
+ break;
+ case SIOCSIFADDR:
+ if (sc->ef_frametype == ETHER_FT_8023 &&
+ ifa->ifa_addr->sa_family != AF_IPX) {
+ error = EAFNOSUPPORT;
+ break;
+ }
+ ifp->if_flags |= IFF_UP;
+ /* FALL THROUGH */
+ default:
+ error = ether_ioctl(ifp, cmd, data);
+ break;
+ }
+ splx(s);
+ return error;
+}
+
+/*
+ * Currently packet prepared in the ether_output(), but this can be a better
+ * place.
+ */
+static void
+ef_start(struct ifnet *ifp)
+{
+ struct efnet *sc = (struct efnet*)ifp->if_softc;
+ struct ifnet *p;
+ struct mbuf *m;
+ int error;
+
+ ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+ p = sc->ef_pifp;
+
+ EFDEBUG("\n");
+ for (;;) {
+ IF_DEQUEUE(&ifp->if_snd, m);
+ if (m == 0)
+ break;
+ BPF_MTAP(ifp, m);
+ error = p->if_transmit(p, m);
+ if (error) {
+ ifp->if_oerrors++;
+ continue;
+ }
+ ifp->if_opackets++;
+ }
+ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ return;
+}
+
+/*
+ * Inline functions do not put additional overhead to procedure call or
+ * parameter passing but simplify the code
+ */
+static int __inline
+ef_inputEII(struct mbuf *m, struct ether_header *eh, u_short ether_type)
+{
+ int isr;
+
+ switch(ether_type) {
+#ifdef IPX
+ case ETHERTYPE_IPX:
+ isr = NETISR_IPX;
+ break;
+#endif
+#ifdef INET
+ case ETHERTYPE_IP:
+ if ((m = ip_fastforward(m)) == NULL)
+ return (0);
+ isr = NETISR_IP;
+ break;
+
+ case ETHERTYPE_ARP:
+ isr = NETISR_ARP;
+ break;
+#endif
+ default:
+ return (EPROTONOSUPPORT);
+ }
+ netisr_dispatch(isr, m);
+ return (0);
+}
+
+static int __inline
+ef_inputSNAP(struct mbuf *m, struct ether_header *eh, struct llc* l,
+ u_short ether_type)
+{
+ int isr;
+
+ switch(ether_type) {
+#ifdef IPX
+ case ETHERTYPE_IPX:
+ m_adj(m, 8);
+ isr = NETISR_IPX;
+ break;
+#endif
+ default:
+ return (EPROTONOSUPPORT);
+ }
+ netisr_dispatch(isr, m);
+ return (0);
+}
+
+static int __inline
+ef_input8022(struct mbuf *m, struct ether_header *eh, struct llc* l,
+ u_short ether_type)
+{
+ int isr;
+
+ switch(ether_type) {
+#ifdef IPX
+ case 0xe0:
+ m_adj(m, 3);
+ isr = NETISR_IPX;
+ break;
+#endif
+ default:
+ return (EPROTONOSUPPORT);
+ }
+ netisr_dispatch(isr, m);
+ return (0);
+}
+
+/*
+ * Called from ether_input()
+ */
+static int
+ef_input(struct ifnet *ifp, struct ether_header *eh, struct mbuf *m)
+{
+ u_short ether_type;
+ int ft = -1;
+ struct efnet *efp;
+ struct ifnet *eifp;
+ struct llc *l;
+ struct ef_link *efl;
+ int isr;
+
+ ether_type = ntohs(eh->ether_type);
+ l = NULL;
+ if (ether_type < ETHERMTU) {
+ l = mtod(m, struct llc*);
+ if (l->llc_dsap == 0xff && l->llc_ssap == 0xff) {
+ /*
+ * Novell's "802.3" frame
+ */
+ ft = ETHER_FT_8023;
+ } else if (l->llc_dsap == 0xaa && l->llc_ssap == 0xaa) {
+ /*
+ * 802.2/SNAP
+ */
+ ft = ETHER_FT_SNAP;
+ ether_type = ntohs(l->llc_un.type_snap.ether_type);
+ } else if (l->llc_dsap == l->llc_ssap) {
+ /*
+ * 802.3/802.2
+ */
+ ft = ETHER_FT_8022;
+ ether_type = l->llc_ssap;
+ }
+ } else
+ ft = ETHER_FT_EII;
+
+ if (ft == -1) {
+ EFDEBUG("Unrecognised ether_type %x\n", ether_type);
+ return EPROTONOSUPPORT;
+ }
+
+ /*
+ * Check if interface configured for the given frame
+ */
+ efp = NULL;
+ SLIST_FOREACH(efl, &efdev, el_next) {
+ if (efl->el_ifp == ifp) {
+ efp = efl->el_units[ft];
+ break;
+ }
+ }
+ if (efp == NULL) {
+ EFDEBUG("Can't find if for %d\n", ft);
+ return EPROTONOSUPPORT;
+ }
+ eifp = efp->ef_ifp;
+ if ((eifp->if_flags & IFF_UP) == 0)
+ return EPROTONOSUPPORT;
+ eifp->if_ibytes += m->m_pkthdr.len + sizeof (*eh);
+ m->m_pkthdr.rcvif = eifp;
+
+ BPF_MTAP2(eifp, eh, ETHER_HDR_LEN, m);
+ /*
+ * Now we ready to adjust mbufs and pass them to protocol intr's
+ */
+ switch(ft) {
+ case ETHER_FT_EII:
+ return (ef_inputEII(m, eh, ether_type));
+#ifdef IPX
+ case ETHER_FT_8023: /* only IPX can be here */
+ isr = NETISR_IPX;
+ break;
+#endif
+ case ETHER_FT_SNAP:
+ return (ef_inputSNAP(m, eh, l, ether_type));
+ case ETHER_FT_8022:
+ return (ef_input8022(m, eh, l, ether_type));
+ default:
+ EFDEBUG("No support for frame %d and proto %04x\n",
+ ft, ether_type);
+ return (EPROTONOSUPPORT);
+ }
+ netisr_dispatch(isr, m);
+ return (0);
+}
+
+static int
+ef_output(struct ifnet *ifp, struct mbuf **mp, struct sockaddr *dst, short *tp,
+ int *hlen)
+{
+ struct efnet *sc = (struct efnet*)ifp->if_softc;
+ struct mbuf *m = *mp;
+ u_char *cp;
+ short type;
+
+ if (ifp->if_type != IFT_XETHER)
+ return ENETDOWN;
+ switch (sc->ef_frametype) {
+ case ETHER_FT_EII:
+#ifdef IPX
+ type = htons(ETHERTYPE_IPX);
+#else
+ return EPFNOSUPPORT;
+#endif
+ break;
+ case ETHER_FT_8023:
+ type = htons(m->m_pkthdr.len);
+ break;
+ case ETHER_FT_8022:
+ M_PREPEND(m, ETHER_HDR_LEN + 3, M_WAIT);
+ /*
+ * Ensure that ethernet header and next three bytes
+ * will fit into single mbuf
+ */
+ m = m_pullup(m, ETHER_HDR_LEN + 3);
+ if (m == NULL) {
+ *mp = NULL;
+ return ENOBUFS;
+ }
+ m_adj(m, ETHER_HDR_LEN);
+ type = htons(m->m_pkthdr.len);
+ cp = mtod(m, u_char *);
+ *cp++ = 0xE0;
+ *cp++ = 0xE0;
+ *cp++ = 0x03;
+ *hlen += 3;
+ break;
+ case ETHER_FT_SNAP:
+ M_PREPEND(m, 8, M_WAIT);
+ type = htons(m->m_pkthdr.len);
+ cp = mtod(m, u_char *);
+ bcopy("\xAA\xAA\x03\x00\x00\x00\x81\x37", cp, 8);
+ *hlen += 8;
+ break;
+ default:
+ return EPFNOSUPPORT;
+ }
+ *mp = m;
+ *tp = type;
+ return 0;
+}
+
+/*
+ * Create clone from the given interface
+ */
+static int
+ef_clone(struct ef_link *efl, int ft)
+{
+ struct efnet *efp;
+ struct ifnet *eifp;
+ struct ifnet *ifp = efl->el_ifp;
+
+ efp = (struct efnet*)malloc(sizeof(struct efnet), M_IFADDR,
+ M_WAITOK | M_ZERO);
+ if (efp == NULL)
+ return ENOMEM;
+ efp->ef_pifp = ifp;
+ efp->ef_frametype = ft;
+ eifp = efp->ef_ifp = if_alloc(IFT_ETHER);
+ if (eifp == NULL) {
+ free(efp, M_IFADDR);
+ return (ENOSPC);
+ }
+ snprintf(eifp->if_xname, IFNAMSIZ,
+ "%sf%d", ifp->if_xname, efp->ef_frametype);
+ eifp->if_dname = "ef";
+ eifp->if_dunit = IF_DUNIT_NONE;
+ eifp->if_softc = efp;
+ if (ifp->if_ioctl)
+ eifp->if_ioctl = ef_ioctl;
+ efl->el_units[ft] = efp;
+ return 0;
+}
+
+static int
+ef_load(void)
+{
+ VNET_ITERATOR_DECL(vnet_iter);
+ struct ifnet *ifp;
+ struct efnet *efp;
+ struct ef_link *efl = NULL, *efl_temp;
+ int error = 0, d;
+
+ VNET_LIST_RLOCK();
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET(vnet_iter);
+
+ /*
+ * XXXRW: The following loop walks the ifnet list while
+ * modifying it, something not well-supported by ifnet
+ * locking. To avoid lock upgrade/recursion issues, manually
+ * acquire a write lock of ifnet_sxlock here, rather than a
+ * read lock, so that when if_alloc() recurses the lock, we
+ * don't panic. This structure, in which if_ef automatically
+ * attaches to all ethernet interfaces, should be replaced
+ * with a model like that found in if_vlan, in which
+ * interfaces are explicitly configured, which would avoid
+ * this (and other) problems.
+ */
+ sx_xlock(&ifnet_sxlock);
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ if (ifp->if_type != IFT_ETHER) continue;
+ EFDEBUG("Found interface %s\n", ifp->if_xname);
+ efl = (struct ef_link*)malloc(sizeof(struct ef_link),
+ M_IFADDR, M_WAITOK | M_ZERO);
+ if (efl == NULL) {
+ error = ENOMEM;
+ break;
+ }
+
+ efl->el_ifp = ifp;
+#ifdef ETHER_II
+ error = ef_clone(efl, ETHER_FT_EII);
+ if (error) break;
+#endif
+#ifdef ETHER_8023
+ error = ef_clone(efl, ETHER_FT_8023);
+ if (error) break;
+#endif
+#ifdef ETHER_8022
+ error = ef_clone(efl, ETHER_FT_8022);
+ if (error) break;
+#endif
+#ifdef ETHER_SNAP
+ error = ef_clone(efl, ETHER_FT_SNAP);
+ if (error) break;
+#endif
+ efcount++;
+ SLIST_INSERT_HEAD(&efdev, efl, el_next);
+ }
+ sx_xunlock(&ifnet_sxlock);
+ CURVNET_RESTORE();
+ }
+ VNET_LIST_RUNLOCK();
+ if (error) {
+ if (efl)
+ SLIST_INSERT_HEAD(&efdev, efl, el_next);
+ SLIST_FOREACH_SAFE(efl, &efdev, el_next, efl_temp) {
+ for (d = 0; d < EF_NFT; d++)
+ if (efl->el_units[d]) {
+ if (efl->el_units[d]->ef_pifp != NULL)
+ if_free(efl->el_units[d]->ef_pifp);
+ free(efl->el_units[d], M_IFADDR);
+ }
+ free(efl, M_IFADDR);
+ }
+ return error;
+ }
+ SLIST_FOREACH(efl, &efdev, el_next) {
+ for (d = 0; d < EF_NFT; d++) {
+ efp = efl->el_units[d];
+ if (efp)
+ ef_attach(efp);
+ }
+ }
+ ef_inputp = ef_input;
+ ef_outputp = ef_output;
+ EFDEBUG("Loaded\n");
+ return 0;
+}
+
+static int
+ef_unload(void)
+{
+ struct efnet *efp;
+ struct ef_link *efl;
+ int d;
+
+ ef_inputp = NULL;
+ ef_outputp = NULL;
+ SLIST_FOREACH(efl, &efdev, el_next) {
+ for (d = 0; d < EF_NFT; d++) {
+ efp = efl->el_units[d];
+ if (efp) {
+ ef_detach(efp);
+ }
+ }
+ }
+ EFDEBUG("Unloaded\n");
+ return 0;
+}
+
+static int
+if_ef_modevent(module_t mod, int type, void *data)
+{
+ switch ((modeventtype_t)type) {
+ case MOD_LOAD:
+ return ef_load();
+ case MOD_UNLOAD:
+ return ef_unload();
+ default:
+ return EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static moduledata_t if_ef_mod = {
+ "if_ef", if_ef_modevent, NULL
+};
+
+DECLARE_MODULE(if_ef, if_ef_mod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE);
diff --git a/freebsd/sys/net/if_enc.c b/freebsd/sys/net/if_enc.c
new file mode 100644
index 00000000..6bbb6ceb
--- /dev/null
+++ b/freebsd/sys/net/if_enc.c
@@ -0,0 +1,375 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2006 The FreeBSD Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/machine/bus.h>
+#include <freebsd/sys/rman.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/sysctl.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_clone.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/pfil.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/netisr.h>
+#include <freebsd/net/bpf.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/local/opt_inet6.h>
+
+#ifdef INET6
+#include <freebsd/netinet/ip6.h>
+#include <freebsd/netinet6/ip6_var.h>
+#endif
+
+#include <freebsd/local/opt_enc.h>
+#include <freebsd/netipsec/ipsec.h>
+#include <freebsd/netipsec/xform.h>
+
+#define ENCMTU (1024+512)
+
+/* XXX this define must have the same value as in OpenBSD */
+#define M_CONF 0x0400 /* payload was encrypted (ESP-transport) */
+#define M_AUTH 0x0800 /* payload was authenticated (AH or ESP auth) */
+#define M_AUTH_AH 0x2000 /* header was authenticated (AH) */
+
+struct enchdr {
+ u_int32_t af;
+ u_int32_t spi;
+ u_int32_t flags;
+};
+
+struct ifnet *encif;
+static struct mtx enc_mtx;
+
+struct enc_softc {
+ struct ifnet *sc_ifp;
+};
+
+static int enc_ioctl(struct ifnet *, u_long, caddr_t);
+static int enc_output(struct ifnet *ifp, struct mbuf *m,
+ struct sockaddr *dst, struct route *ro);
+static int enc_clone_create(struct if_clone *, int, caddr_t);
+static void enc_clone_destroy(struct ifnet *);
+
+IFC_SIMPLE_DECLARE(enc, 1);
+
+/*
+ * Sysctls.
+ */
+
+/*
+ * Before and after are relative to when we are stripping the
+ * outer IP header.
+ */
+SYSCTL_NODE(_net, OID_AUTO, enc, CTLFLAG_RW, 0, "enc sysctl");
+
+SYSCTL_NODE(_net_enc, OID_AUTO, in, CTLFLAG_RW, 0, "enc input sysctl");
+static int ipsec_filter_mask_in = ENC_BEFORE;
+SYSCTL_XINT(_net_enc_in, OID_AUTO, ipsec_filter_mask, CTLFLAG_RW,
+ &ipsec_filter_mask_in, 0, "IPsec input firewall filter mask");
+static int ipsec_bpf_mask_in = ENC_BEFORE;
+SYSCTL_XINT(_net_enc_in, OID_AUTO, ipsec_bpf_mask, CTLFLAG_RW,
+ &ipsec_bpf_mask_in, 0, "IPsec input bpf mask");
+
+SYSCTL_NODE(_net_enc, OID_AUTO, out, CTLFLAG_RW, 0, "enc output sysctl");
+static int ipsec_filter_mask_out = ENC_BEFORE;
+SYSCTL_XINT(_net_enc_out, OID_AUTO, ipsec_filter_mask, CTLFLAG_RW,
+ &ipsec_filter_mask_out, 0, "IPsec output firewall filter mask");
+static int ipsec_bpf_mask_out = ENC_BEFORE|ENC_AFTER;
+SYSCTL_XINT(_net_enc_out, OID_AUTO, ipsec_bpf_mask, CTLFLAG_RW,
+ &ipsec_bpf_mask_out, 0, "IPsec output bpf mask");
+
+static void
+enc_clone_destroy(struct ifnet *ifp)
+{
+ KASSERT(ifp != encif, ("%s: destroying encif", __func__));
+
+ bpfdetach(ifp);
+ if_detach(ifp);
+ if_free(ifp);
+}
+
+static int
+enc_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+ struct ifnet *ifp;
+ struct enc_softc *sc;
+
+ sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
+ ifp = sc->sc_ifp = if_alloc(IFT_ENC);
+ if (ifp == NULL) {
+ free(sc, M_DEVBUF);
+ return (ENOSPC);
+ }
+
+ if_initname(ifp, ifc->ifc_name, unit);
+ ifp->if_mtu = ENCMTU;
+ ifp->if_ioctl = enc_ioctl;
+ ifp->if_output = enc_output;
+ ifp->if_snd.ifq_maxlen = ifqmaxlen;
+ ifp->if_softc = sc;
+ if_attach(ifp);
+ bpfattach(ifp, DLT_ENC, sizeof(struct enchdr));
+
+ mtx_lock(&enc_mtx);
+ /* grab a pointer to enc0, ignore the rest */
+ if (encif == NULL)
+ encif = ifp;
+ mtx_unlock(&enc_mtx);
+
+ return (0);
+}
+
+static int
+enc_modevent(module_t mod, int type, void *data)
+{
+ switch (type) {
+ case MOD_LOAD:
+ mtx_init(&enc_mtx, "enc mtx", NULL, MTX_DEF);
+ if_clone_attach(&enc_cloner);
+ break;
+ case MOD_UNLOAD:
+ printf("enc module unload - not possible for this module\n");
+ return (EINVAL);
+ default:
+ return (EOPNOTSUPP);
+ }
+ return (0);
+}
+
+static moduledata_t enc_mod = {
+ "enc",
+ enc_modevent,
+ 0
+};
+
+DECLARE_MODULE(enc, enc_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
+
+static int
+enc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+ struct route *ro)
+{
+ m_freem(m);
+ return (0);
+}
+
+/*
+ * Process an ioctl request.
+ */
+/* ARGSUSED */
+static int
+enc_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ int error = 0;
+
+ mtx_lock(&enc_mtx);
+
+ switch (cmd) {
+
+ case SIOCSIFFLAGS:
+ if (ifp->if_flags & IFF_UP)
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ else
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+
+ break;
+
+ default:
+ error = EINVAL;
+ }
+
+ mtx_unlock(&enc_mtx);
+ return (error);
+}
+
+int
+ipsec_filter(struct mbuf **mp, int dir, int flags)
+{
+ int error, i;
+ struct ip *ip;
+
+ KASSERT(encif != NULL, ("%s: encif is null", __func__));
+ KASSERT(flags & (ENC_IN|ENC_OUT),
+ ("%s: invalid flags: %04x", __func__, flags));
+
+ if ((encif->if_drv_flags & IFF_DRV_RUNNING) == 0)
+ return (0);
+
+ if (flags & ENC_IN) {
+ if ((flags & ipsec_filter_mask_in) == 0)
+ return (0);
+ } else {
+ if ((flags & ipsec_filter_mask_out) == 0)
+ return (0);
+ }
+
+ /* Skip pfil(9) if no filters are loaded */
+ if (!(PFIL_HOOKED(&V_inet_pfil_hook)
+#ifdef INET6
+ || PFIL_HOOKED(&V_inet6_pfil_hook)
+#endif
+ )) {
+ return (0);
+ }
+
+ i = min((*mp)->m_pkthdr.len, max_protohdr);
+ if ((*mp)->m_len < i) {
+ *mp = m_pullup(*mp, i);
+ if (*mp == NULL) {
+ printf("%s: m_pullup failed\n", __func__);
+ return (-1);
+ }
+ }
+
+ error = 0;
+ ip = mtod(*mp, struct ip *);
+ switch (ip->ip_v) {
+ case 4:
+ /*
+ * before calling the firewall, swap fields the same as
+ * IP does. here we assume the header is contiguous
+ */
+ ip->ip_len = ntohs(ip->ip_len);
+ ip->ip_off = ntohs(ip->ip_off);
+
+ error = pfil_run_hooks(&V_inet_pfil_hook, mp,
+ encif, dir, NULL);
+
+ if (*mp == NULL || error != 0)
+ break;
+
+ /* restore byte ordering */
+ ip = mtod(*mp, struct ip *);
+ ip->ip_len = htons(ip->ip_len);
+ ip->ip_off = htons(ip->ip_off);
+ break;
+
+#ifdef INET6
+ case 6:
+ error = pfil_run_hooks(&V_inet6_pfil_hook, mp,
+ encif, dir, NULL);
+ break;
+#endif
+ default:
+ printf("%s: unknown IP version\n", __func__);
+ }
+
+ /*
+ * If the mbuf was consumed by the filter for requeueing (dummynet, etc)
+ * then error will be zero but we still want to return an error to our
+ * caller so the null mbuf isn't forwarded further.
+ */
+ if (*mp == NULL && error == 0)
+ return (-1); /* Consumed by the filter */
+ if (*mp == NULL)
+ return (error);
+ if (error != 0)
+ goto bad;
+
+ return (error);
+
+bad:
+ m_freem(*mp);
+ *mp = NULL;
+ return (error);
+}
+
+void
+ipsec_bpf(struct mbuf *m, struct secasvar *sav, int af, int flags)
+{
+ int mflags;
+ struct enchdr hdr;
+
+ KASSERT(encif != NULL, ("%s: encif is null", __func__));
+ KASSERT(flags & (ENC_IN|ENC_OUT),
+ ("%s: invalid flags: %04x", __func__, flags));
+
+ if ((encif->if_drv_flags & IFF_DRV_RUNNING) == 0)
+ return;
+
+ if (flags & ENC_IN) {
+ if ((flags & ipsec_bpf_mask_in) == 0)
+ return;
+ } else {
+ if ((flags & ipsec_bpf_mask_out) == 0)
+ return;
+ }
+
+ if (bpf_peers_present(encif->if_bpf)) {
+ mflags = 0;
+ hdr.spi = 0;
+ if (!sav) {
+ struct m_tag *mtag;
+ mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
+ if (mtag != NULL) {
+ struct tdb_ident *tdbi;
+ tdbi = (struct tdb_ident *) (mtag + 1);
+ if (tdbi->alg_enc != SADB_EALG_NONE)
+ mflags |= M_CONF;
+ if (tdbi->alg_auth != SADB_AALG_NONE)
+ mflags |= M_AUTH;
+ hdr.spi = tdbi->spi;
+ }
+ } else {
+ if (sav->alg_enc != SADB_EALG_NONE)
+ mflags |= M_CONF;
+ if (sav->alg_auth != SADB_AALG_NONE)
+ mflags |= M_AUTH;
+ hdr.spi = sav->spi;
+ }
+
+ /*
+ * We need to prepend the address family as a four byte
+ * field. Cons up a dummy header to pacify bpf. This
+ * is safe because bpf will only read from the mbuf
+ * (i.e., it won't try to free it or keep a pointer a
+ * to it).
+ */
+ hdr.af = af;
+ /* hdr.spi already set above */
+ hdr.flags = mflags;
+
+ bpf_mtap2(encif->if_bpf, &hdr, sizeof(hdr), m);
+ }
+}
diff --git a/freebsd/sys/net/if_enc.h b/freebsd/sys/net/if_enc.h
new file mode 100644
index 00000000..59a55fcf
--- /dev/null
+++ b/freebsd/sys/net/if_enc.h
@@ -0,0 +1,35 @@
+/*-
+ * Copyright (c) 2008 The FreeBSD Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NET_IF_ENC_H
+#define _NET_IF_ENC_H
+
+extern struct ifnet *encif;
+
+#endif /* _NET_IF_ENC_H */
diff --git a/freebsd/sys/net/if_epair.c b/freebsd/sys/net/if_epair.c
new file mode 100644
index 00000000..65baeab8
--- /dev/null
+++ b/freebsd/sys/net/if_epair.c
@@ -0,0 +1,955 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2008 The FreeBSD Foundation
+ * Copyright (c) 2009-2010 Bjoern A. Zeeb <bz@FreeBSD.org>
+ * All rights reserved.
+ *
+ * This software was developed by CK Software GmbH under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * A pair of virtual back-to-back connected ethernet like interfaces
+ * (``two interfaces with a virtual cross-over cable'').
+ *
+ * This is mostly intended to be used to provide connectivity between
+ * different virtual network stack instances.
+ */
+/*
+ * Things to re-think once we have more experience:
+ * - ifp->if_reassign function once we can test with vimage. Depending on
+ * how if_vmove() is going to be improved.
+ * - Real random etheraddrs that are checked to be uniquish; we would need
+ * to re-do them in case we move the interface between network stacks
+ * in a private if_reassign function.
+ * In case we bridge to a real interface/network or between indepedent
+ * epairs on multiple stacks/machines, we may need this.
+ * For now let the user handle that case.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/refcount.h>
+#include <freebsd/sys/queue.h>
+#include <freebsd/sys/smp.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/types.h>
+
+#include <freebsd/net/bpf.h>
+#include <freebsd/net/ethernet.h>
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_clone.h>
+#include <freebsd/net/if_var.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/netisr.h>
+#include <freebsd/net/vnet.h>
+
+#define EPAIRNAME "epair"
+
+SYSCTL_DECL(_net_link);
+SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW, 0, "epair sysctl");
+
+#ifdef EPAIR_DEBUG
+static int epair_debug = 0;
+SYSCTL_INT(_net_link_epair, OID_AUTO, epair_debug, CTLFLAG_RW,
+ &epair_debug, 0, "if_epair(4) debugging.");
+#define DPRINTF(fmt, arg...) \
+ if (epair_debug) \
+ printf("[%s:%d] " fmt, __func__, __LINE__, ##arg)
+#else
+#define DPRINTF(fmt, arg...)
+#endif
+
+static void epair_nh_sintr(struct mbuf *);
+static struct mbuf *epair_nh_m2cpuid(struct mbuf *, uintptr_t, u_int *);
+static void epair_nh_drainedcpu(u_int);
+
+static void epair_start_locked(struct ifnet *);
+
+static int epair_clone_match(struct if_clone *, const char *);
+static int epair_clone_create(struct if_clone *, char *, size_t, caddr_t);
+static int epair_clone_destroy(struct if_clone *, struct ifnet *);
+
+/* Netisr realted definitions and sysctl. */
+static struct netisr_handler epair_nh = {
+ .nh_name = EPAIRNAME,
+ .nh_proto = NETISR_EPAIR,
+ .nh_policy = NETISR_POLICY_CPU,
+ .nh_handler = epair_nh_sintr,
+ .nh_m2cpuid = epair_nh_m2cpuid,
+ .nh_drainedcpu = epair_nh_drainedcpu,
+};
+
+static int
+sysctl_epair_netisr_maxqlen(SYSCTL_HANDLER_ARGS)
+{
+ int error, qlimit;
+
+ netisr_getqlimit(&epair_nh, &qlimit);
+ error = sysctl_handle_int(oidp, &qlimit, 0, req);
+ if (error || !req->newptr)
+ return (error);
+ if (qlimit < 1)
+ return (EINVAL);
+ return (netisr_setqlimit(&epair_nh, qlimit));
+}
+SYSCTL_PROC(_net_link_epair, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW,
+ 0, 0, sysctl_epair_netisr_maxqlen, "I",
+ "Maximum if_epair(4) netisr \"hw\" queue length");
+
+struct epair_softc {
+ struct ifnet *ifp; /* This ifp. */
+ struct ifnet *oifp; /* other ifp of pair. */
+ u_int refcount; /* # of mbufs in flight. */
+ u_int cpuid; /* CPU ID assigned upon creation. */
+ void (*if_qflush)(struct ifnet *);
+ /* Original if_qflush routine. */
+};
+
+/*
+ * Per-CPU list of ifps with data in the ifq that needs to be flushed
+ * to the netisr ``hw'' queue before we allow any further direct queuing
+ * to the ``hw'' queue.
+ */
+struct epair_ifp_drain {
+ STAILQ_ENTRY(epair_ifp_drain) ifp_next;
+ struct ifnet *ifp;
+};
+STAILQ_HEAD(eid_list, epair_ifp_drain);
+
+#define EPAIR_LOCK_INIT(dpcpu) mtx_init(&(dpcpu)->if_epair_mtx, \
+ "if_epair", NULL, MTX_DEF)
+#define EPAIR_LOCK_DESTROY(dpcpu) mtx_destroy(&(dpcpu)->if_epair_mtx)
+#define EPAIR_LOCK_ASSERT(dpcpu) mtx_assert(&(dpcpu)->if_epair_mtx, \
+ MA_OWNED)
+#define EPAIR_LOCK(dpcpu) mtx_lock(&(dpcpu)->if_epair_mtx)
+#define EPAIR_UNLOCK(dpcpu) mtx_unlock(&(dpcpu)->if_epair_mtx)
+
+#ifdef INVARIANTS
+#define EPAIR_REFCOUNT_INIT(r, v) refcount_init((r), (v))
+#define EPAIR_REFCOUNT_AQUIRE(r) refcount_acquire((r))
+#define EPAIR_REFCOUNT_RELEASE(r) refcount_release((r))
+#define EPAIR_REFCOUNT_ASSERT(a, p) KASSERT(a, p)
+#else
+#define EPAIR_REFCOUNT_INIT(r, v)
+#define EPAIR_REFCOUNT_AQUIRE(r)
+#define EPAIR_REFCOUNT_RELEASE(r)
+#define EPAIR_REFCOUNT_ASSERT(a, p)
+#endif
+
+static MALLOC_DEFINE(M_EPAIR, EPAIRNAME,
+ "Pair of virtual cross-over connected Ethernet-like interfaces");
+
+static struct if_clone epair_cloner = IFC_CLONE_INITIALIZER(
+ EPAIRNAME, NULL, IF_MAXUNIT,
+ NULL, epair_clone_match, epair_clone_create, epair_clone_destroy);
+
+/*
+ * DPCPU area and functions.
+ */
+struct epair_dpcpu {
+ struct mtx if_epair_mtx; /* Per-CPU locking. */
+ int epair_drv_flags; /* Per-CPU ``hw'' drv flags. */
+ struct eid_list epair_ifp_drain_list; /* Per-CPU list of ifps with
+ * data in the ifq. */
+};
+DPCPU_DEFINE(struct epair_dpcpu, epair_dpcpu);
+
+static void
+epair_dpcpu_init(void)
+{
+ struct epair_dpcpu *epair_dpcpu;
+ struct eid_list *s;
+ u_int cpuid;
+
+ for (cpuid = 0; cpuid <= mp_maxid; cpuid++) {
+ if (CPU_ABSENT(cpuid))
+ continue;
+
+ epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu);
+
+ /* Initialize per-cpu lock. */
+ EPAIR_LOCK_INIT(epair_dpcpu);
+
+ /* Driver flags are per-cpu as are our netisr "hw" queues. */
+ epair_dpcpu->epair_drv_flags = 0;
+
+ /*
+ * Initialize per-cpu drain list.
+ * Manually do what STAILQ_HEAD_INITIALIZER would do.
+ */
+ s = &epair_dpcpu->epair_ifp_drain_list;
+ s->stqh_first = NULL;
+ s->stqh_last = &s->stqh_first;
+ }
+}
+
+static void
+epair_dpcpu_detach(void)
+{
+ struct epair_dpcpu *epair_dpcpu;
+ u_int cpuid;
+
+ for (cpuid = 0; cpuid <= mp_maxid; cpuid++) {
+ if (CPU_ABSENT(cpuid))
+ continue;
+
+ epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu);
+
+ /* Destroy per-cpu lock. */
+ EPAIR_LOCK_DESTROY(epair_dpcpu);
+ }
+}
+
+/*
+ * Helper functions.
+ */
+static u_int
+cpuid_from_ifp(struct ifnet *ifp)
+{
+ struct epair_softc *sc;
+
+ if (ifp == NULL)
+ return (0);
+ sc = ifp->if_softc;
+
+ return (sc->cpuid);
+}
+
+/*
+ * Netisr handler functions.
+ */
+static void
+epair_nh_sintr(struct mbuf *m)
+{
+ struct ifnet *ifp;
+ struct epair_softc *sc;
+
+ ifp = m->m_pkthdr.rcvif;
+ (*ifp->if_input)(ifp, m);
+ sc = ifp->if_softc;
+ EPAIR_REFCOUNT_RELEASE(&sc->refcount);
+ EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1,
+ ("%s: ifp=%p sc->refcount not >= 1: %d",
+ __func__, ifp, sc->refcount));
+ DPRINTF("ifp=%p refcount=%u\n", ifp, sc->refcount);
+}
+
+static struct mbuf *
+epair_nh_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid)
+{
+
+ *cpuid = cpuid_from_ifp(m->m_pkthdr.rcvif);
+
+ return (m);
+}
+
+static void
+epair_nh_drainedcpu(u_int cpuid)
+{
+ struct epair_dpcpu *epair_dpcpu;
+ struct epair_ifp_drain *elm, *tvar;
+ struct ifnet *ifp;
+
+ epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu);
+ EPAIR_LOCK(epair_dpcpu);
+ /*
+ * Assume our "hw" queue and possibly ifq will be emptied
+ * again. In case we will overflow the "hw" queue while
+ * draining, epair_start_locked will set IFF_DRV_OACTIVE
+ * again and we will stop and return.
+ */
+ STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list,
+ ifp_next, tvar) {
+ ifp = elm->ifp;
+ epair_dpcpu->epair_drv_flags &= ~IFF_DRV_OACTIVE;
+ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ epair_start_locked(ifp);
+
+ IFQ_LOCK(&ifp->if_snd);
+ if (IFQ_IS_EMPTY(&ifp->if_snd)) {
+ struct epair_softc *sc;
+
+ STAILQ_REMOVE(&epair_dpcpu->epair_ifp_drain_list,
+ elm, epair_ifp_drain, ifp_next);
+ /* The cached ifp goes off the list. */
+ sc = ifp->if_softc;
+ EPAIR_REFCOUNT_RELEASE(&sc->refcount);
+ EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1,
+ ("%s: ifp=%p sc->refcount not >= 1: %d",
+ __func__, ifp, sc->refcount));
+ free(elm, M_EPAIR);
+ }
+ IFQ_UNLOCK(&ifp->if_snd);
+
+ if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) != 0) {
+ /* Our "hw"q overflew again. */
+ epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE;
+ DPRINTF("hw queue length overflow at %u\n",
+ epair_nh.nh_qlimit);
+ break;
+ }
+ }
+ EPAIR_UNLOCK(epair_dpcpu);
+}
+
+/*
+ * Network interface (`if') related functions.
+ */
+static void
+epair_remove_ifp_from_draining(struct ifnet *ifp)
+{
+ struct epair_dpcpu *epair_dpcpu;
+ struct epair_ifp_drain *elm, *tvar;
+ u_int cpuid;
+
+ for (cpuid = 0; cpuid <= mp_maxid; cpuid++) {
+ if (CPU_ABSENT(cpuid))
+ continue;
+
+ epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu);
+ EPAIR_LOCK(epair_dpcpu);
+ STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list,
+ ifp_next, tvar) {
+ if (ifp == elm->ifp) {
+ struct epair_softc *sc;
+
+ STAILQ_REMOVE(
+ &epair_dpcpu->epair_ifp_drain_list, elm,
+ epair_ifp_drain, ifp_next);
+ /* The cached ifp goes off the list. */
+ sc = ifp->if_softc;
+ EPAIR_REFCOUNT_RELEASE(&sc->refcount);
+ EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1,
+ ("%s: ifp=%p sc->refcount not >= 1: %d",
+ __func__, ifp, sc->refcount));
+ free(elm, M_EPAIR);
+ }
+ }
+ EPAIR_UNLOCK(epair_dpcpu);
+ }
+}
+
+static int
+epair_add_ifp_for_draining(struct ifnet *ifp)
+{
+ struct epair_dpcpu *epair_dpcpu;
+ struct epair_softc *sc;
+ struct epair_ifp_drain *elm = NULL;
+
+ sc = ifp->if_softc;
+ epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu);
+ EPAIR_LOCK_ASSERT(epair_dpcpu);
+ STAILQ_FOREACH(elm, &epair_dpcpu->epair_ifp_drain_list, ifp_next)
+ if (elm->ifp == ifp)
+ break;
+ /* If the ifp is there already, return success. */
+ if (elm != NULL)
+ return (0);
+
+ elm = malloc(sizeof(struct epair_ifp_drain), M_EPAIR, M_NOWAIT|M_ZERO);
+ if (elm == NULL)
+ return (ENOMEM);
+
+ elm->ifp = ifp;
+ /* Add a reference for the ifp pointer on the list. */
+ EPAIR_REFCOUNT_AQUIRE(&sc->refcount);
+ STAILQ_INSERT_TAIL(&epair_dpcpu->epair_ifp_drain_list, elm, ifp_next);
+
+ return (0);
+}
+
+static void
+epair_start_locked(struct ifnet *ifp)
+{
+ struct epair_dpcpu *epair_dpcpu;
+ struct mbuf *m;
+ struct epair_softc *sc;
+ struct ifnet *oifp;
+ int error;
+
+ DPRINTF("ifp=%p\n", ifp);
+ sc = ifp->if_softc;
+ epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu);
+ EPAIR_LOCK_ASSERT(epair_dpcpu);
+
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
+ return;
+ if ((ifp->if_flags & IFF_UP) == 0)
+ return;
+
+ /*
+ * We get patckets here from ether_output via if_handoff()
+ * and ned to put them into the input queue of the oifp
+ * and call oifp->if_input() via netisr/epair_sintr().
+ */
+ oifp = sc->oifp;
+ sc = oifp->if_softc;
+ for (;;) {
+ IFQ_DEQUEUE(&ifp->if_snd, m);
+ if (m == NULL)
+ break;
+ BPF_MTAP(ifp, m);
+
+ /*
+ * In case the outgoing interface is not usable,
+ * drop the packet.
+ */
+ if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
+ (oifp->if_flags & IFF_UP) ==0) {
+ ifp->if_oerrors++;
+ m_freem(m);
+ continue;
+ }
+ DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname);
+
+ /*
+ * Add a reference so the interface cannot go while the
+ * packet is in transit as we rely on rcvif to stay valid.
+ */
+ EPAIR_REFCOUNT_AQUIRE(&sc->refcount);
+ m->m_pkthdr.rcvif = oifp;
+ CURVNET_SET_QUIET(oifp->if_vnet);
+ error = netisr_queue(NETISR_EPAIR, m);
+ CURVNET_RESTORE();
+ if (!error) {
+ ifp->if_opackets++;
+ /* Someone else received the packet. */
+ oifp->if_ipackets++;
+ } else {
+ /* The packet was freed already. */
+ epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE;
+ ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+ (void) epair_add_ifp_for_draining(ifp);
+ ifp->if_oerrors++;
+ EPAIR_REFCOUNT_RELEASE(&sc->refcount);
+ EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1,
+ ("%s: ifp=%p sc->refcount not >= 1: %d",
+ __func__, oifp, sc->refcount));
+ }
+ }
+}
+
+static void
+epair_start(struct ifnet *ifp)
+{
+ struct epair_dpcpu *epair_dpcpu;
+
+ epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu);
+ EPAIR_LOCK(epair_dpcpu);
+ epair_start_locked(ifp);
+ EPAIR_UNLOCK(epair_dpcpu);
+}
+
+static int
+epair_transmit_locked(struct ifnet *ifp, struct mbuf *m)
+{
+ struct epair_dpcpu *epair_dpcpu;
+ struct epair_softc *sc;
+ struct ifnet *oifp;
+ int error, len;
+ short mflags;
+
+ DPRINTF("ifp=%p m=%p\n", ifp, m);
+ sc = ifp->if_softc;
+ epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu);
+ EPAIR_LOCK_ASSERT(epair_dpcpu);
+
+ if (m == NULL)
+ return (0);
+
+ /*
+ * We are not going to use the interface en/dequeue mechanism
+ * on the TX side. We are called from ether_output_frame()
+ * and will put the packet into the incoming queue of the
+ * other interface of our pair via the netsir.
+ */
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+ m_freem(m);
+ return (ENXIO);
+ }
+ if ((ifp->if_flags & IFF_UP) == 0) {
+ m_freem(m);
+ return (ENETDOWN);
+ }
+
+ BPF_MTAP(ifp, m);
+
+ /*
+ * In case the outgoing interface is not usable,
+ * drop the packet.
+ */
+ oifp = sc->oifp;
+ if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
+ (oifp->if_flags & IFF_UP) ==0) {
+ ifp->if_oerrors++;
+ m_freem(m);
+ return (0);
+ }
+ len = m->m_pkthdr.len;
+ mflags = m->m_flags;
+ DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname);
+
+#ifdef ALTQ
+ /* Support ALTQ via the clasic if_start() path. */
+ IF_LOCK(&ifp->if_snd);
+ if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error);
+ if (error)
+ ifp->if_snd.ifq_drops++;
+ IF_UNLOCK(&ifp->if_snd);
+ if (!error) {
+ ifp->if_obytes += len;
+ if (mflags & (M_BCAST|M_MCAST))
+ ifp->if_omcasts++;
+
+ if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0)
+ epair_start_locked(ifp);
+ else
+ (void)epair_add_ifp_for_draining(ifp);
+ }
+ return (error);
+ }
+ IF_UNLOCK(&ifp->if_snd);
+#endif
+
+ if ((epair_dpcpu->epair_drv_flags & IFF_DRV_OACTIVE) != 0) {
+ /*
+ * Our hardware queue is full, try to fall back
+ * queuing to the ifq but do not call ifp->if_start.
+ * Either we are lucky or the packet is gone.
+ */
+ IFQ_ENQUEUE(&ifp->if_snd, m, error);
+ if (!error)
+ (void)epair_add_ifp_for_draining(ifp);
+ return (error);
+ }
+ sc = oifp->if_softc;
+ /*
+ * Add a reference so the interface cannot go while the
+ * packet is in transit as we rely on rcvif to stay valid.
+ */
+ EPAIR_REFCOUNT_AQUIRE(&sc->refcount);
+ m->m_pkthdr.rcvif = oifp;
+ CURVNET_SET_QUIET(oifp->if_vnet);
+ error = netisr_queue(NETISR_EPAIR, m);
+ CURVNET_RESTORE();
+ if (!error) {
+ ifp->if_opackets++;
+ /*
+ * IFQ_HANDOFF_ADJ/ip_handoff() update statistics,
+ * but as we bypass all this we have to duplicate
+ * the logic another time.
+ */
+ ifp->if_obytes += len;
+ if (mflags & (M_BCAST|M_MCAST))
+ ifp->if_omcasts++;
+ /* Someone else received the packet. */
+ oifp->if_ipackets++;
+ } else {
+ /* The packet was freed already. */
+ epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE;
+ ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+ ifp->if_oerrors++;
+ EPAIR_REFCOUNT_RELEASE(&sc->refcount);
+ EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1,
+ ("%s: ifp=%p sc->refcount not >= 1: %d",
+ __func__, oifp, sc->refcount));
+ }
+
+ return (error);
+}
+
+static int
+epair_transmit(struct ifnet *ifp, struct mbuf *m)
+{
+ struct epair_dpcpu *epair_dpcpu;
+ int error;
+
+ epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu);
+ EPAIR_LOCK(epair_dpcpu);
+ error = epair_transmit_locked(ifp, m);
+ EPAIR_UNLOCK(epair_dpcpu);
+ return (error);
+}
+
+static void
+epair_qflush(struct ifnet *ifp)
+{
+ struct epair_softc *sc;
+
+ sc = ifp->if_softc;
+ KASSERT(sc != NULL, ("%s: ifp=%p, epair_softc gone? sc=%p\n",
+ __func__, ifp, sc));
+ /*
+ * Remove this ifp from all backpointer lists. The interface will not
+ * usable for flushing anyway nor should it have anything to flush
+ * after if_qflush().
+ */
+ epair_remove_ifp_from_draining(ifp);
+
+ if (sc->if_qflush)
+ sc->if_qflush(ifp);
+}
+
+static int
+epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ struct ifreq *ifr;
+ int error;
+
+ ifr = (struct ifreq *)data;
+ switch (cmd) {
+ case SIOCSIFFLAGS:
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ error = 0;
+ break;
+
+ case SIOCSIFMTU:
+ /* We basically allow all kinds of MTUs. */
+ ifp->if_mtu = ifr->ifr_mtu;
+ error = 0;
+ break;
+
+ default:
+ /* Let the common ethernet handler process this. */
+ error = ether_ioctl(ifp, cmd, data);
+ break;
+ }
+
+ return (error);
+}
+
+static void
+epair_init(void *dummy __unused)
+{
+}
+
+
+/*
+ * Interface cloning functions.
+ * We use our private ones so that we can create/destroy our secondary
+ * device along with the primary one.
+ */
+static int
+epair_clone_match(struct if_clone *ifc, const char *name)
+{
+ const char *cp;
+
+ DPRINTF("name='%s'\n", name);
+
+ /*
+ * Our base name is epair.
+ * Our interfaces will be named epair<n>[ab].
+ * So accept anything of the following list:
+ * - epair
+ * - epair<n>
+ * but not the epair<n>[ab] versions.
+ */
+ if (strncmp(EPAIRNAME, name, sizeof(EPAIRNAME)-1) != 0)
+ return (0);
+
+ for (cp = name + sizeof(EPAIRNAME) - 1; *cp != '\0'; cp++) {
+ if (*cp < '0' || *cp > '9')
+ return (0);
+ }
+
+ return (1);
+}
+
+static int
+epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
+{
+ struct epair_softc *sca, *scb;
+ struct ifnet *ifp;
+ char *dp;
+ int error, unit, wildcard;
+ uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */
+
+ /*
+ * We are abusing params to create our second interface.
+ * Actually we already created it and called if_clone_createif()
+ * for it to do the official insertion procedure the moment we knew
+ * it cannot fail anymore. So just do attach it here.
+ */
+ if (params) {
+ scb = (struct epair_softc *)params;
+ ifp = scb->ifp;
+ /* Assign a hopefully unique, locally administered etheraddr. */
+ eaddr[0] = 0x02;
+ eaddr[3] = (ifp->if_index >> 8) & 0xff;
+ eaddr[4] = ifp->if_index & 0xff;
+ eaddr[5] = 0x0b;
+ ether_ifattach(ifp, eaddr);
+ /* Correctly set the name for the cloner list. */
+ strlcpy(name, scb->ifp->if_xname, len);
+ return (0);
+ }
+
+ /* Try to see if a special unit was requested. */
+ error = ifc_name2unit(name, &unit);
+ if (error != 0)
+ return (error);
+ wildcard = (unit < 0);
+
+ error = ifc_alloc_unit(ifc, &unit);
+ if (error != 0)
+ return (error);
+
+ /*
+ * If no unit had been given, we need to adjust the ifName.
+ * Also make sure there is space for our extra [ab] suffix.
+ */
+ for (dp = name; *dp != '\0'; dp++);
+ if (wildcard) {
+ error = snprintf(dp, len - (dp - name), "%d", unit);
+ if (error > len - (dp - name) - 1) {
+ /* ifName too long. */
+ ifc_free_unit(ifc, unit);
+ return (ENOSPC);
+ }
+ dp += error;
+ }
+ if (len - (dp - name) - 1 < 1) {
+ /* No space left for our [ab] suffix. */
+ ifc_free_unit(ifc, unit);
+ return (ENOSPC);
+ }
+ *dp = 'a';
+ /* Must not change dp so we can replace 'a' by 'b' later. */
+ *(dp+1) = '\0';
+
+ /* Allocate memory for both [ab] interfaces */
+ sca = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO);
+ EPAIR_REFCOUNT_INIT(&sca->refcount, 1);
+ sca->ifp = if_alloc(IFT_ETHER);
+ if (sca->ifp == NULL) {
+ free(sca, M_EPAIR);
+ ifc_free_unit(ifc, unit);
+ return (ENOSPC);
+ }
+
+ scb = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO);
+ EPAIR_REFCOUNT_INIT(&scb->refcount, 1);
+ scb->ifp = if_alloc(IFT_ETHER);
+ if (scb->ifp == NULL) {
+ free(scb, M_EPAIR);
+ if_free(sca->ifp);
+ free(sca, M_EPAIR);
+ ifc_free_unit(ifc, unit);
+ return (ENOSPC);
+ }
+
+ /*
+ * Cross-reference the interfaces so we will be able to free both.
+ */
+ sca->oifp = scb->ifp;
+ scb->oifp = sca->ifp;
+
+ /*
+ * Calculate the cpuid for netisr queueing based on the
+ * ifIndex of the interfaces. As long as we cannot configure
+ * this or use cpuset information easily we cannot guarantee
+ * cache locality but we can at least allow parallelism.
+ */
+ sca->cpuid =
+ netisr_get_cpuid(sca->ifp->if_index % netisr_get_cpucount());
+ scb->cpuid =
+ netisr_get_cpuid(scb->ifp->if_index % netisr_get_cpucount());
+
+ /* Finish initialization of interface <n>a. */
+ ifp = sca->ifp;
+ ifp->if_softc = sca;
+ strlcpy(ifp->if_xname, name, IFNAMSIZ);
+ ifp->if_dname = ifc->ifc_name;
+ ifp->if_dunit = unit;
+ ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+ ifp->if_start = epair_start;
+ ifp->if_ioctl = epair_ioctl;
+ ifp->if_init = epair_init;
+ ifp->if_snd.ifq_maxlen = ifqmaxlen;
+ /* Assign a hopefully unique, locally administered etheraddr. */
+ eaddr[0] = 0x02;
+ eaddr[3] = (ifp->if_index >> 8) & 0xff;
+ eaddr[4] = ifp->if_index & 0xff;
+ eaddr[5] = 0x0a;
+ ether_ifattach(ifp, eaddr);
+ sca->if_qflush = ifp->if_qflush;
+ ifp->if_qflush = epair_qflush;
+ ifp->if_transmit = epair_transmit;
+ ifp->if_baudrate = IF_Gbps(10UL); /* arbitrary maximum */
+
+ /* Swap the name and finish initialization of interface <n>b. */
+ *dp = 'b';
+
+ ifp = scb->ifp;
+ ifp->if_softc = scb;
+ strlcpy(ifp->if_xname, name, IFNAMSIZ);
+ ifp->if_dname = ifc->ifc_name;
+ ifp->if_dunit = unit;
+ ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+ ifp->if_start = epair_start;
+ ifp->if_ioctl = epair_ioctl;
+ ifp->if_init = epair_init;
+ ifp->if_snd.ifq_maxlen = ifqmaxlen;
+ /* We need to play some tricks here for the second interface. */
+ strlcpy(name, EPAIRNAME, len);
+ error = if_clone_create(name, len, (caddr_t)scb);
+ if (error)
+ panic("%s: if_clone_createif() for our 2nd iface failed: %d",
+ __func__, error);
+ scb->if_qflush = ifp->if_qflush;
+ ifp->if_qflush = epair_qflush;
+ ifp->if_transmit = epair_transmit;
+ ifp->if_baudrate = IF_Gbps(10UL); /* arbitrary maximum */
+
+ /*
+ * Restore name to <n>a as the ifp for this will go into the
+ * cloner list for the initial call.
+ */
+ strlcpy(name, sca->ifp->if_xname, len);
+ DPRINTF("name='%s/%db' created sca=%p scb=%p\n", name, unit, sca, scb);
+
+ /* Tell the world, that we are ready to rock. */
+ sca->ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ scb->ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ if_link_state_change(sca->ifp, LINK_STATE_UP);
+ if_link_state_change(scb->ifp, LINK_STATE_UP);
+
+ return (0);
+}
+
+static int
+epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
+{
+ struct ifnet *oifp;
+ struct epair_softc *sca, *scb;
+ int unit, error;
+
+ DPRINTF("ifp=%p\n", ifp);
+
+ /*
+ * In case we called into if_clone_destroyif() ourselves
+ * again to remove the second interface, the softc will be
+ * NULL. In that case so not do anything but return success.
+ */
+ if (ifp->if_softc == NULL)
+ return (0);
+
+ unit = ifp->if_dunit;
+ sca = ifp->if_softc;
+ oifp = sca->oifp;
+ scb = oifp->if_softc;
+
+ DPRINTF("ifp=%p oifp=%p\n", ifp, oifp);
+ if_link_state_change(ifp, LINK_STATE_DOWN);
+ if_link_state_change(oifp, LINK_STATE_DOWN);
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ oifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ ether_ifdetach(oifp);
+ ether_ifdetach(ifp);
+ /*
+ * Wait for all packets to be dispatched to if_input.
+ * The numbers can only go down as the interfaces are
+ * detached so there is no need to use atomics.
+ */
+ DPRINTF("sca refcnt=%u scb refcnt=%u\n", sca->refcount, scb->refcount);
+ EPAIR_REFCOUNT_ASSERT(sca->refcount == 1 && scb->refcount == 1,
+ ("%s: ifp=%p sca->refcount!=1: %d || ifp=%p scb->refcount!=1: %d",
+ __func__, ifp, sca->refcount, oifp, scb->refcount));
+
+ /*
+ * Get rid of our second half.
+ */
+ oifp->if_softc = NULL;
+ error = if_clone_destroyif(ifc, oifp);
+ if (error)
+ panic("%s: if_clone_destroyif() for our 2nd iface failed: %d",
+ __func__, error);
+
+ /*
+ * Finish cleaning up. Free them and release the unit.
+ * As the other of the two interfaces my reside in a different vnet,
+ * we need to switch before freeing them.
+ */
+ CURVNET_SET_QUIET(oifp->if_vnet);
+ if_free(oifp);
+ CURVNET_RESTORE();
+ if_free(ifp);
+ free(scb, M_EPAIR);
+ free(sca, M_EPAIR);
+ ifc_free_unit(ifc, unit);
+
+ return (0);
+}
+
+static int
+epair_modevent(module_t mod, int type, void *data)
+{
+ int qlimit;
+
+ switch (type) {
+ case MOD_LOAD:
+ /* For now limit us to one global mutex and one inq. */
+ epair_dpcpu_init();
+ epair_nh.nh_qlimit = 42 * ifqmaxlen; /* 42 shall be the number. */
+#ifndef __rtems__
+ if (TUNABLE_INT_FETCH("net.link.epair.netisr_maxqlen", &qlimit))
+ epair_nh.nh_qlimit = qlimit;
+#endif
+ netisr_register(&epair_nh);
+ if_clone_attach(&epair_cloner);
+ if (bootverbose)
+ printf("%s initialized.\n", EPAIRNAME);
+ break;
+ case MOD_UNLOAD:
+ if_clone_detach(&epair_cloner);
+ netisr_unregister(&epair_nh);
+ epair_dpcpu_detach();
+ if (bootverbose)
+ printf("%s unloaded.\n", EPAIRNAME);
+ break;
+ default:
+ return (EOPNOTSUPP);
+ }
+ return (0);
+}
+
+static moduledata_t epair_mod = {
+ "if_epair",
+ epair_modevent,
+ 0
+};
+
+DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(if_epair, 1);
diff --git a/freebsd/sys/net/if_ethersubr.c b/freebsd/sys/net/if_ethersubr.c
new file mode 100644
index 00000000..d87ebbd0
--- /dev/null
+++ b/freebsd/sys/net/if_ethersubr.c
@@ -0,0 +1,1364 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)if_ethersubr.c 8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#include <freebsd/local/opt_atalk.h>
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_ipx.h>
+#include <freebsd/local/opt_netgraph.h>
+#include <freebsd/local/opt_mbuf_profiling.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/random.h>
+#include <freebsd/sys/rwlock.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/sysctl.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_arp.h>
+#include <freebsd/net/netisr.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/if_llc.h>
+#include <freebsd/net/if_dl.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/bpf.h>
+#include <freebsd/net/ethernet.h>
+#include <freebsd/net/if_bridgevar.h>
+#include <freebsd/net/if_vlan_var.h>
+#include <freebsd/net/if_llatbl.h>
+#include <freebsd/net/pf_mtag.h>
+#include <freebsd/net/vnet.h>
+
+#if defined(INET) || defined(INET6)
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/if_ether.h>
+#include <freebsd/netinet/ip_carp.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/ip_fw.h>
+#include <freebsd/netinet/ipfw/ip_fw_private.h>
+#endif
+#ifdef INET6
+#include <freebsd/netinet6/nd6.h>
+#endif
+
+#ifdef IPX
+#include <freebsd/netipx/ipx.h>
+#include <freebsd/netipx/ipx_if.h>
+#endif
+
+int (*ef_inputp)(struct ifnet*, struct ether_header *eh, struct mbuf *m);
+int (*ef_outputp)(struct ifnet *ifp, struct mbuf **mp,
+ struct sockaddr *dst, short *tp, int *hlen);
+
+#ifdef NETATALK
+#include <freebsd/netatalk/at.h>
+#include <freebsd/netatalk/at_var.h>
+#include <freebsd/netatalk/at_extern.h>
+
+#define llc_snap_org_code llc_un.type_snap.org_code
+#define llc_snap_ether_type llc_un.type_snap.ether_type
+
+extern u_char at_org_code[3];
+extern u_char aarp_org_code[3];
+#endif /* NETATALK */
+
+#include <freebsd/security/mac/mac_framework.h>
+
+#ifdef CTASSERT
+CTASSERT(sizeof (struct ether_header) == ETHER_ADDR_LEN * 2 + 2);
+CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN);
+#endif
+
+/* netgraph node hooks for ng_ether(4) */
+void (*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp);
+void (*ng_ether_input_orphan_p)(struct ifnet *ifp, struct mbuf *m);
+int (*ng_ether_output_p)(struct ifnet *ifp, struct mbuf **mp);
+void (*ng_ether_attach_p)(struct ifnet *ifp);
+void (*ng_ether_detach_p)(struct ifnet *ifp);
+
+void (*vlan_input_p)(struct ifnet *, struct mbuf *);
+
+/* if_bridge(4) support */
+struct mbuf *(*bridge_input_p)(struct ifnet *, struct mbuf *);
+int (*bridge_output_p)(struct ifnet *, struct mbuf *,
+ struct sockaddr *, struct rtentry *);
+void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
+
+/* if_lagg(4) support */
+struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *);
+
+static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
+ { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+static int ether_resolvemulti(struct ifnet *, struct sockaddr **,
+ struct sockaddr *);
+#ifdef VIMAGE
+static void ether_reassign(struct ifnet *, struct vnet *, char *);
+#endif
+
+/* XXX: should be in an arp support file, not here */
+MALLOC_DEFINE(M_ARPCOM, "arpcom", "802.* interface internals");
+
+#define ETHER_IS_BROADCAST(addr) \
+ (bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0)
+
+#define senderr(e) do { error = (e); goto bad;} while (0)
+
+#if defined(INET) || defined(INET6)
+int
+ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst, int shared);
+static VNET_DEFINE(int, ether_ipfw);
+#define V_ether_ipfw VNET(ether_ipfw)
+#endif
+
+
+/*
+ * Ethernet output routine.
+ * Encapsulate a packet of type family for the local net.
+ * Use trailer local net encapsulation if enough data in first
+ * packet leaves a multiple of 512 bytes of data in remainder.
+ */
+int
+ether_output(struct ifnet *ifp, struct mbuf *m,
+ struct sockaddr *dst, struct route *ro)
+{
+ short type;
+ int error = 0, hdrcmplt = 0;
+ u_char esrc[ETHER_ADDR_LEN], edst[ETHER_ADDR_LEN];
+ struct llentry *lle = NULL;
+ struct rtentry *rt0 = NULL;
+ struct ether_header *eh;
+ struct pf_mtag *t;
+ int loop_copy = 1;
+ int hlen; /* link layer header length */
+
+ if (ro != NULL) {
+ if (!(m->m_flags & (M_BCAST | M_MCAST)))
+ lle = ro->ro_lle;
+ rt0 = ro->ro_rt;
+ }
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m);
+ if (error)
+ senderr(error);
+#endif
+
+ M_PROFILE(m);
+ if (ifp->if_flags & IFF_MONITOR)
+ senderr(ENETDOWN);
+ if (!((ifp->if_flags & IFF_UP) &&
+ (ifp->if_drv_flags & IFF_DRV_RUNNING)))
+ senderr(ENETDOWN);
+
+ hlen = ETHER_HDR_LEN;
+ switch (dst->sa_family) {
+#ifdef INET
+ case AF_INET:
+ if (lle != NULL && (lle->la_flags & LLE_VALID))
+ memcpy(edst, &lle->ll_addr.mac16, sizeof(edst));
+ else
+ error = arpresolve(ifp, rt0, m, dst, edst, &lle);
+ if (error)
+ return (error == EWOULDBLOCK ? 0 : error);
+ type = htons(ETHERTYPE_IP);
+ break;
+ case AF_ARP:
+ {
+ struct arphdr *ah;
+ ah = mtod(m, struct arphdr *);
+ ah->ar_hrd = htons(ARPHRD_ETHER);
+
+ loop_copy = 0; /* if this is for us, don't do it */
+
+ switch(ntohs(ah->ar_op)) {
+ case ARPOP_REVREQUEST:
+ case ARPOP_REVREPLY:
+ type = htons(ETHERTYPE_REVARP);
+ break;
+ case ARPOP_REQUEST:
+ case ARPOP_REPLY:
+ default:
+ type = htons(ETHERTYPE_ARP);
+ break;
+ }
+
+ if (m->m_flags & M_BCAST)
+ bcopy(ifp->if_broadcastaddr, edst, ETHER_ADDR_LEN);
+ else
+ bcopy(ar_tha(ah), edst, ETHER_ADDR_LEN);
+
+ }
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ if (lle != NULL && (lle->la_flags & LLE_VALID))
+ memcpy(edst, &lle->ll_addr.mac16, sizeof(edst));
+ else
+ error = nd6_storelladdr(ifp, m, dst, (u_char *)edst, &lle);
+ if (error)
+ return error;
+ type = htons(ETHERTYPE_IPV6);
+ break;
+#endif
+#ifdef IPX
+ case AF_IPX:
+ if (ef_outputp) {
+ error = ef_outputp(ifp, &m, dst, &type, &hlen);
+ if (error)
+ goto bad;
+ } else
+ type = htons(ETHERTYPE_IPX);
+ bcopy((caddr_t)&(((struct sockaddr_ipx *)dst)->sipx_addr.x_host),
+ (caddr_t)edst, sizeof (edst));
+ break;
+#endif
+#ifdef NETATALK
+ case AF_APPLETALK:
+ {
+ struct at_ifaddr *aa;
+
+ if ((aa = at_ifawithnet((struct sockaddr_at *)dst)) == NULL)
+ senderr(EHOSTUNREACH); /* XXX */
+ if (!aarpresolve(ifp, m, (struct sockaddr_at *)dst, edst)) {
+ ifa_free(&aa->aa_ifa);
+ return (0);
+ }
+ /*
+ * In the phase 2 case, need to prepend an mbuf for the llc header.
+ */
+ if ( aa->aa_flags & AFA_PHASE2 ) {
+ struct llc llc;
+
+ ifa_free(&aa->aa_ifa);
+ M_PREPEND(m, LLC_SNAPFRAMELEN, M_DONTWAIT);
+ if (m == NULL)
+ senderr(ENOBUFS);
+ llc.llc_dsap = llc.llc_ssap = LLC_SNAP_LSAP;
+ llc.llc_control = LLC_UI;
+ bcopy(at_org_code, llc.llc_snap_org_code, sizeof(at_org_code));
+ llc.llc_snap_ether_type = htons( ETHERTYPE_AT );
+ bcopy(&llc, mtod(m, caddr_t), LLC_SNAPFRAMELEN);
+ type = htons(m->m_pkthdr.len);
+ hlen = LLC_SNAPFRAMELEN + ETHER_HDR_LEN;
+ } else {
+ ifa_free(&aa->aa_ifa);
+ type = htons(ETHERTYPE_AT);
+ }
+ break;
+ }
+#endif /* NETATALK */
+
+ case pseudo_AF_HDRCMPLT:
+ hdrcmplt = 1;
+ eh = (struct ether_header *)dst->sa_data;
+ (void)memcpy(esrc, eh->ether_shost, sizeof (esrc));
+ /* FALLTHROUGH */
+
+ case AF_UNSPEC:
+ loop_copy = 0; /* if this is for us, don't do it */
+ eh = (struct ether_header *)dst->sa_data;
+ (void)memcpy(edst, eh->ether_dhost, sizeof (edst));
+ type = eh->ether_type;
+ break;
+
+ default:
+ if_printf(ifp, "can't handle af%d\n", dst->sa_family);
+ senderr(EAFNOSUPPORT);
+ }
+
+ if (lle != NULL && (lle->la_flags & LLE_IFADDR)) {
+ int csum_flags = 0;
+ if (m->m_pkthdr.csum_flags & CSUM_IP)
+ csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
+ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
+ csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
+ if (m->m_pkthdr.csum_flags & CSUM_SCTP)
+ csum_flags |= CSUM_SCTP_VALID;
+ m->m_pkthdr.csum_flags |= csum_flags;
+ m->m_pkthdr.csum_data = 0xffff;
+ return (if_simloop(ifp, m, dst->sa_family, 0));
+ }
+
+ /*
+ * Add local net header. If no space in first mbuf,
+ * allocate another.
+ */
+ M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
+ if (m == NULL)
+ senderr(ENOBUFS);
+ eh = mtod(m, struct ether_header *);
+ (void)memcpy(&eh->ether_type, &type,
+ sizeof(eh->ether_type));
+ (void)memcpy(eh->ether_dhost, edst, sizeof (edst));
+ if (hdrcmplt)
+ (void)memcpy(eh->ether_shost, esrc,
+ sizeof(eh->ether_shost));
+ else
+ (void)memcpy(eh->ether_shost, IF_LLADDR(ifp),
+ sizeof(eh->ether_shost));
+
+ /*
+ * If a simplex interface, and the packet is being sent to our
+ * Ethernet address or a broadcast address, loopback a copy.
+ * XXX To make a simplex device behave exactly like a duplex
+ * device, we should copy in the case of sending to our own
+ * ethernet address (thus letting the original actually appear
+ * on the wire). However, we don't do that here for security
+ * reasons and compatibility with the original behavior.
+ */
+ if ((ifp->if_flags & IFF_SIMPLEX) && loop_copy &&
+ ((t = pf_find_mtag(m)) == NULL || !t->routed)) {
+ int csum_flags = 0;
+
+ if (m->m_pkthdr.csum_flags & CSUM_IP)
+ csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
+ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
+ csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
+ if (m->m_pkthdr.csum_flags & CSUM_SCTP)
+ csum_flags |= CSUM_SCTP_VALID;
+
+ if (m->m_flags & M_BCAST) {
+ struct mbuf *n;
+
+ /*
+ * Because if_simloop() modifies the packet, we need a
+ * writable copy through m_dup() instead of a readonly
+ * one as m_copy[m] would give us. The alternative would
+ * be to modify if_simloop() to handle the readonly mbuf,
+ * but performancewise it is mostly equivalent (trading
+ * extra data copying vs. extra locking).
+ *
+ * XXX This is a local workaround. A number of less
+ * often used kernel parts suffer from the same bug.
+ * See PR kern/105943 for a proposed general solution.
+ */
+ if ((n = m_dup(m, M_DONTWAIT)) != NULL) {
+ n->m_pkthdr.csum_flags |= csum_flags;
+ if (csum_flags & CSUM_DATA_VALID)
+ n->m_pkthdr.csum_data = 0xffff;
+ (void)if_simloop(ifp, n, dst->sa_family, hlen);
+ } else
+ ifp->if_iqdrops++;
+ } else if (bcmp(eh->ether_dhost, eh->ether_shost,
+ ETHER_ADDR_LEN) == 0) {
+ m->m_pkthdr.csum_flags |= csum_flags;
+ if (csum_flags & CSUM_DATA_VALID)
+ m->m_pkthdr.csum_data = 0xffff;
+ (void) if_simloop(ifp, m, dst->sa_family, hlen);
+ return (0); /* XXX */
+ }
+ }
+
+ /*
+ * Bridges require special output handling.
+ */
+ if (ifp->if_bridge) {
+ BRIDGE_OUTPUT(ifp, m, error);
+ return (error);
+ }
+
+#if defined(INET) || defined(INET6)
+ if (ifp->if_carp &&
+ (error = (*carp_output_p)(ifp, m, dst, NULL)))
+ goto bad;
+#endif
+
+ /* Handle ng_ether(4) processing, if any */
+ if (IFP2AC(ifp)->ac_netgraph != NULL) {
+ KASSERT(ng_ether_output_p != NULL,
+ ("ng_ether_output_p is NULL"));
+ if ((error = (*ng_ether_output_p)(ifp, &m)) != 0) {
+bad: if (m != NULL)
+ m_freem(m);
+ return (error);
+ }
+ if (m == NULL)
+ return (0);
+ }
+
+ /* Continue with link-layer output */
+ return ether_output_frame(ifp, m);
+}
+
+/*
+ * Ethernet link layer output routine to send a raw frame to the device.
+ *
+ * This assumes that the 14 byte Ethernet header is present and contiguous
+ * in the first mbuf (if BRIDGE'ing).
+ */
+int
+ether_output_frame(struct ifnet *ifp, struct mbuf *m)
+{
+#if defined(INET) || defined(INET6)
+
+ if (V_ip_fw_chk_ptr && V_ether_ipfw != 0) {
+ if (ether_ipfw_chk(&m, ifp, 0) == 0) {
+ if (m) {
+ m_freem(m);
+ return EACCES; /* pkt dropped */
+ } else
+ return 0; /* consumed e.g. in a pipe */
+ }
+ }
+#endif
+
+ /*
+ * Queue message on interface, update output statistics if
+ * successful, and start output if interface not yet active.
+ */
+ return ((ifp->if_transmit)(ifp, m));
+}
+
+#if defined(INET) || defined(INET6)
+/*
+ * ipfw processing for ethernet packets (in and out).
+ * The second parameter is NULL from ether_demux, and ifp from
+ * ether_output_frame.
+ */
+int
+ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst, int shared)
+{
+ struct ether_header *eh;
+ struct ether_header save_eh;
+ struct mbuf *m;
+ int i;
+ struct ip_fw_args args;
+ struct m_tag *mtag;
+
+ /* fetch start point from rule, if any */
+ mtag = m_tag_locate(*m0, MTAG_IPFW_RULE, 0, NULL);
+ if (mtag == NULL) {
+ args.rule.slot = 0;
+ } else {
+ /* dummynet packet, already partially processed */
+ struct ipfw_rule_ref *r;
+
+ /* XXX can we free it after use ? */
+ mtag->m_tag_id = PACKET_TAG_NONE;
+ r = (struct ipfw_rule_ref *)(mtag + 1);
+ if (r->info & IPFW_ONEPASS)
+ return (1);
+ args.rule = *r;
+ }
+
+ /*
+ * I need some amt of data to be contiguous, and in case others need
+ * the packet (shared==1) also better be in the first mbuf.
+ */
+ m = *m0;
+ i = min( m->m_pkthdr.len, max_protohdr);
+ if ( shared || m->m_len < i) {
+ m = m_pullup(m, i);
+ if (m == NULL) {
+ *m0 = m;
+ return 0;
+ }
+ }
+ eh = mtod(m, struct ether_header *);
+ save_eh = *eh; /* save copy for restore below */
+ m_adj(m, ETHER_HDR_LEN); /* strip ethernet header */
+
+ args.m = m; /* the packet we are looking at */
+ args.oif = dst; /* destination, if any */
+ args.next_hop = NULL; /* we do not support forward yet */
+ args.eh = &save_eh; /* MAC header for bridged/MAC packets */
+ args.inp = NULL; /* used by ipfw uid/gid/jail rules */
+ i = V_ip_fw_chk_ptr(&args);
+ m = args.m;
+ if (m != NULL) {
+ /*
+ * Restore Ethernet header, as needed, in case the
+ * mbuf chain was replaced by ipfw.
+ */
+ M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
+ if (m == NULL) {
+ *m0 = m;
+ return 0;
+ }
+ if (eh != mtod(m, struct ether_header *))
+ bcopy(&save_eh, mtod(m, struct ether_header *),
+ ETHER_HDR_LEN);
+ }
+ *m0 = m;
+
+ if (i == IP_FW_DENY) /* drop */
+ return 0;
+
+ KASSERT(m != NULL, ("ether_ipfw_chk: m is NULL"));
+
+ if (i == IP_FW_PASS) /* a PASS rule. */
+ return 1;
+
+ if (ip_dn_io_ptr && (i == IP_FW_DUMMYNET)) {
+ int dir;
+ /*
+ * Pass the pkt to dummynet, which consumes it.
+ * If shared, make a copy and keep the original.
+ */
+ if (shared) {
+ m = m_copypacket(m, M_DONTWAIT);
+ if (m == NULL)
+ return 0;
+ } else {
+ /*
+ * Pass the original to dummynet and
+ * nothing back to the caller
+ */
+ *m0 = NULL ;
+ }
+ dir = PROTO_LAYER2 | (dst ? DIR_OUT : DIR_IN);
+ ip_dn_io_ptr(&m, dir, &args);
+ return 0;
+ }
+ /*
+ * XXX at some point add support for divert/forward actions.
+ * If none of the above matches, we have to drop the pkt.
+ */
+ return 0;
+}
+#endif
+
+/*
+ * Process a received Ethernet packet; the packet is in the
+ * mbuf chain m with the ethernet header at the front.
+ */
+static void
+ether_input(struct ifnet *ifp, struct mbuf *m)
+{
+ struct ether_header *eh;
+ u_short etype;
+
+ if ((ifp->if_flags & IFF_UP) == 0) {
+ m_freem(m);
+ return;
+ }
+#ifdef DIAGNOSTIC
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+ if_printf(ifp, "discard frame at !IFF_DRV_RUNNING\n");
+ m_freem(m);
+ return;
+ }
+#endif
+ /*
+ * Do consistency checks to verify assumptions
+ * made by code past this point.
+ */
+ if ((m->m_flags & M_PKTHDR) == 0) {
+ if_printf(ifp, "discard frame w/o packet header\n");
+ ifp->if_ierrors++;
+ m_freem(m);
+ return;
+ }
+ if (m->m_len < ETHER_HDR_LEN) {
+ /* XXX maybe should pullup? */
+ if_printf(ifp, "discard frame w/o leading ethernet "
+ "header (len %u pkt len %u)\n",
+ m->m_len, m->m_pkthdr.len);
+ ifp->if_ierrors++;
+ m_freem(m);
+ return;
+ }
+ eh = mtod(m, struct ether_header *);
+ etype = ntohs(eh->ether_type);
+ if (m->m_pkthdr.rcvif == NULL) {
+ if_printf(ifp, "discard frame w/o interface pointer\n");
+ ifp->if_ierrors++;
+ m_freem(m);
+ return;
+ }
+#ifdef DIAGNOSTIC
+ if (m->m_pkthdr.rcvif != ifp) {
+ if_printf(ifp, "Warning, frame marked as received on %s\n",
+ m->m_pkthdr.rcvif->if_xname);
+ }
+#endif
+
+ CURVNET_SET_QUIET(ifp->if_vnet);
+
+ if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
+ if (ETHER_IS_BROADCAST(eh->ether_dhost))
+ m->m_flags |= M_BCAST;
+ else
+ m->m_flags |= M_MCAST;
+ ifp->if_imcasts++;
+ }
+
+#ifdef MAC
+ /*
+ * Tag the mbuf with an appropriate MAC label before any other
+ * consumers can get to it.
+ */
+ mac_ifnet_create_mbuf(ifp, m);
+#endif
+
+ /*
+ * Give bpf a chance at the packet.
+ */
+ ETHER_BPF_MTAP(ifp, m);
+
+ /*
+ * If the CRC is still on the packet, trim it off. We do this once
+ * and once only in case we are re-entered. Nothing else on the
+ * Ethernet receive path expects to see the FCS.
+ */
+ if (m->m_flags & M_HASFCS) {
+ m_adj(m, -ETHER_CRC_LEN);
+ m->m_flags &= ~M_HASFCS;
+ }
+
+ ifp->if_ibytes += m->m_pkthdr.len;
+
+ /* Allow monitor mode to claim this frame, after stats are updated. */
+ if (ifp->if_flags & IFF_MONITOR) {
+ m_freem(m);
+ CURVNET_RESTORE();
+ return;
+ }
+
+ /* Handle input from a lagg(4) port */
+ if (ifp->if_type == IFT_IEEE8023ADLAG) {
+ KASSERT(lagg_input_p != NULL,
+ ("%s: if_lagg not loaded!", __func__));
+ m = (*lagg_input_p)(ifp, m);
+ if (m != NULL)
+ ifp = m->m_pkthdr.rcvif;
+ else
+ return;
+ }
+
+ /*
+ * If the hardware did not process an 802.1Q tag, do this now,
+ * to allow 802.1P priority frames to be passed to the main input
+ * path correctly.
+ * TODO: Deal with Q-in-Q frames, but not arbitrary nesting levels.
+ */
+ if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_VLAN) {
+ struct ether_vlan_header *evl;
+
+ if (m->m_len < sizeof(*evl) &&
+ (m = m_pullup(m, sizeof(*evl))) == NULL) {
+#ifdef DIAGNOSTIC
+ if_printf(ifp, "cannot pullup VLAN header\n");
+#endif
+ ifp->if_ierrors++;
+ m_freem(m);
+ return;
+ }
+
+ evl = mtod(m, struct ether_vlan_header *);
+ m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
+ m->m_flags |= M_VLANTAG;
+
+ bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
+ ETHER_HDR_LEN - ETHER_TYPE_LEN);
+ m_adj(m, ETHER_VLAN_ENCAP_LEN);
+ }
+
+ /* Allow ng_ether(4) to claim this frame. */
+ if (IFP2AC(ifp)->ac_netgraph != NULL) {
+ KASSERT(ng_ether_input_p != NULL,
+ ("%s: ng_ether_input_p is NULL", __func__));
+ m->m_flags &= ~M_PROMISC;
+ (*ng_ether_input_p)(ifp, &m);
+ if (m == NULL) {
+ CURVNET_RESTORE();
+ return;
+ }
+ }
+
+ /*
+ * Allow if_bridge(4) to claim this frame.
+ * The BRIDGE_INPUT() macro will update ifp if the bridge changed it
+ * and the frame should be delivered locally.
+ */
+ if (ifp->if_bridge != NULL) {
+ m->m_flags &= ~M_PROMISC;
+ BRIDGE_INPUT(ifp, m);
+ if (m == NULL) {
+ CURVNET_RESTORE();
+ return;
+ }
+ }
+
+#if defined(INET) || defined(INET6)
+ /*
+ * Clear M_PROMISC on frame so that carp(4) will see it when the
+ * mbuf flows up to Layer 3.
+ * FreeBSD's implementation of carp(4) uses the inprotosw
+ * to dispatch IPPROTO_CARP. carp(4) also allocates its own
+ * Ethernet addresses of the form 00:00:5e:00:01:xx, which
+ * is outside the scope of the M_PROMISC test below.
+ * TODO: Maintain a hash table of ethernet addresses other than
+ * ether_dhost which may be active on this ifp.
+ */
+ if (ifp->if_carp && (*carp_forus_p)(ifp, eh->ether_dhost)) {
+ m->m_flags &= ~M_PROMISC;
+ } else
+#endif
+ {
+ /*
+ * If the frame received was not for our MAC address, set the
+ * M_PROMISC flag on the mbuf chain. The frame may need to
+ * be seen by the rest of the Ethernet input path in case of
+ * re-entry (e.g. bridge, vlan, netgraph) but should not be
+ * seen by upper protocol layers.
+ */
+ if (!ETHER_IS_MULTICAST(eh->ether_dhost) &&
+ bcmp(IF_LLADDR(ifp), eh->ether_dhost, ETHER_ADDR_LEN) != 0)
+ m->m_flags |= M_PROMISC;
+ }
+
+ /* First chunk of an mbuf contains good entropy */
+ if (harvest.ethernet)
+ random_harvest(m, 16, 3, 0, RANDOM_NET);
+
+ ether_demux(ifp, m);
+ CURVNET_RESTORE();
+}
+
+/*
+ * Upper layer processing for a received Ethernet packet.
+ */
+void
+ether_demux(struct ifnet *ifp, struct mbuf *m)
+{
+ struct ether_header *eh;
+ int isr;
+ u_short ether_type;
+#if defined(NETATALK)
+ struct llc *l;
+#endif
+
+ KASSERT(ifp != NULL, ("%s: NULL interface pointer", __func__));
+
+#if defined(INET) || defined(INET6)
+ /*
+ * Allow dummynet and/or ipfw to claim the frame.
+ * Do not do this for PROMISC frames in case we are re-entered.
+ */
+ if (V_ip_fw_chk_ptr && V_ether_ipfw != 0 && !(m->m_flags & M_PROMISC)) {
+ if (ether_ipfw_chk(&m, NULL, 0) == 0) {
+ if (m)
+ m_freem(m); /* dropped; free mbuf chain */
+ return; /* consumed */
+ }
+ }
+#endif
+ eh = mtod(m, struct ether_header *);
+ ether_type = ntohs(eh->ether_type);
+
+ /*
+ * If this frame has a VLAN tag other than 0, call vlan_input()
+ * if its module is loaded. Otherwise, drop.
+ */
+ if ((m->m_flags & M_VLANTAG) &&
+ EVL_VLANOFTAG(m->m_pkthdr.ether_vtag) != 0) {
+ if (ifp->if_vlantrunk == NULL) {
+ ifp->if_noproto++;
+ m_freem(m);
+ return;
+ }
+ KASSERT(vlan_input_p != NULL,("%s: VLAN not loaded!",
+ __func__));
+ /* Clear before possibly re-entering ether_input(). */
+ m->m_flags &= ~M_PROMISC;
+ (*vlan_input_p)(ifp, m);
+ return;
+ }
+
+ /*
+ * Pass promiscuously received frames to the upper layer if the user
+ * requested this by setting IFF_PPROMISC. Otherwise, drop them.
+ */
+ if ((ifp->if_flags & IFF_PPROMISC) == 0 && (m->m_flags & M_PROMISC)) {
+ m_freem(m);
+ return;
+ }
+
+ /*
+ * Reset layer specific mbuf flags to avoid confusing upper layers.
+ * Strip off Ethernet header.
+ */
+ m->m_flags &= ~M_VLANTAG;
+ m->m_flags &= ~(M_PROTOFLAGS);
+ m_adj(m, ETHER_HDR_LEN);
+
+ /*
+ * Dispatch frame to upper layer.
+ */
+ switch (ether_type) {
+#ifdef INET
+ case ETHERTYPE_IP:
+ if ((m = ip_fastforward(m)) == NULL)
+ return;
+ isr = NETISR_IP;
+ break;
+
+ case ETHERTYPE_ARP:
+ if (ifp->if_flags & IFF_NOARP) {
+ /* Discard packet if ARP is disabled on interface */
+ m_freem(m);
+ return;
+ }
+ isr = NETISR_ARP;
+ break;
+#endif
+#ifdef IPX
+ case ETHERTYPE_IPX:
+ if (ef_inputp && ef_inputp(ifp, eh, m) == 0)
+ return;
+ isr = NETISR_IPX;
+ break;
+#endif
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ isr = NETISR_IPV6;
+ break;
+#endif
+#ifdef NETATALK
+ case ETHERTYPE_AT:
+ isr = NETISR_ATALK1;
+ break;
+ case ETHERTYPE_AARP:
+ isr = NETISR_AARP;
+ break;
+#endif /* NETATALK */
+ default:
+#ifdef IPX
+ if (ef_inputp && ef_inputp(ifp, eh, m) == 0)
+ return;
+#endif /* IPX */
+#if defined(NETATALK)
+ if (ether_type > ETHERMTU)
+ goto discard;
+ l = mtod(m, struct llc *);
+ if (l->llc_dsap == LLC_SNAP_LSAP &&
+ l->llc_ssap == LLC_SNAP_LSAP &&
+ l->llc_control == LLC_UI) {
+ if (bcmp(&(l->llc_snap_org_code)[0], at_org_code,
+ sizeof(at_org_code)) == 0 &&
+ ntohs(l->llc_snap_ether_type) == ETHERTYPE_AT) {
+ m_adj(m, LLC_SNAPFRAMELEN);
+ isr = NETISR_ATALK2;
+ break;
+ }
+ if (bcmp(&(l->llc_snap_org_code)[0], aarp_org_code,
+ sizeof(aarp_org_code)) == 0 &&
+ ntohs(l->llc_snap_ether_type) == ETHERTYPE_AARP) {
+ m_adj(m, LLC_SNAPFRAMELEN);
+ isr = NETISR_AARP;
+ break;
+ }
+ }
+#endif /* NETATALK */
+ goto discard;
+ }
+ netisr_dispatch(isr, m);
+ return;
+
+discard:
+ /*
+ * Packet is to be discarded. If netgraph is present,
+ * hand the packet to it for last chance processing;
+ * otherwise dispose of it.
+ */
+ if (IFP2AC(ifp)->ac_netgraph != NULL) {
+ KASSERT(ng_ether_input_orphan_p != NULL,
+ ("ng_ether_input_orphan_p is NULL"));
+ /*
+ * Put back the ethernet header so netgraph has a
+ * consistent view of inbound packets.
+ */
+ M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
+ (*ng_ether_input_orphan_p)(ifp, m);
+ return;
+ }
+ m_freem(m);
+}
+
+/*
+ * Convert Ethernet address to printable (loggable) representation.
+ * This routine is for compatibility; it's better to just use
+ *
+ * printf("%6D", <pointer to address>, ":");
+ *
+ * since there's no static buffer involved.
+ */
+char *
+ether_sprintf(const u_char *ap)
+{
+ static char etherbuf[18];
+ snprintf(etherbuf, sizeof (etherbuf), "%6D", ap, ":");
+ return (etherbuf);
+}
+
+/*
+ * Perform common duties while attaching to interface list
+ */
+void
+ether_ifattach(struct ifnet *ifp, const u_int8_t *lla)
+{
+ int i;
+ struct ifaddr *ifa;
+ struct sockaddr_dl *sdl;
+
+ ifp->if_addrlen = ETHER_ADDR_LEN;
+ ifp->if_hdrlen = ETHER_HDR_LEN;
+ if_attach(ifp);
+ ifp->if_mtu = ETHERMTU;
+ ifp->if_output = ether_output;
+ ifp->if_input = ether_input;
+ ifp->if_resolvemulti = ether_resolvemulti;
+#ifdef VIMAGE
+ ifp->if_reassign = ether_reassign;
+#endif
+ if (ifp->if_baudrate == 0)
+ ifp->if_baudrate = IF_Mbps(10); /* just a default */
+ ifp->if_broadcastaddr = etherbroadcastaddr;
+
+ ifa = ifp->if_addr;
+ KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
+ sdl = (struct sockaddr_dl *)ifa->ifa_addr;
+ sdl->sdl_type = IFT_ETHER;
+ sdl->sdl_alen = ifp->if_addrlen;
+ bcopy(lla, LLADDR(sdl), ifp->if_addrlen);
+
+ bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
+ if (ng_ether_attach_p != NULL)
+ (*ng_ether_attach_p)(ifp);
+
+ /* Announce Ethernet MAC address if non-zero. */
+ for (i = 0; i < ifp->if_addrlen; i++)
+ if (lla[i] != 0)
+ break;
+ if (i != ifp->if_addrlen)
+ if_printf(ifp, "Ethernet address: %6D\n", lla, ":");
+}
+
+/*
+ * Perform common duties while detaching an Ethernet interface
+ */
+void
+ether_ifdetach(struct ifnet *ifp)
+{
+ if (IFP2AC(ifp)->ac_netgraph != NULL) {
+ KASSERT(ng_ether_detach_p != NULL,
+ ("ng_ether_detach_p is NULL"));
+ (*ng_ether_detach_p)(ifp);
+ }
+
+ bpfdetach(ifp);
+ if_detach(ifp);
+}
+
+#ifdef VIMAGE
+void
+ether_reassign(struct ifnet *ifp, struct vnet *new_vnet, char *unused __unused)
+{
+
+ if (IFP2AC(ifp)->ac_netgraph != NULL) {
+ KASSERT(ng_ether_detach_p != NULL,
+ ("ng_ether_detach_p is NULL"));
+ (*ng_ether_detach_p)(ifp);
+ }
+
+ if (ng_ether_attach_p != NULL) {
+ CURVNET_SET_QUIET(new_vnet);
+ (*ng_ether_attach_p)(ifp);
+ CURVNET_RESTORE();
+ }
+}
+#endif
+
+SYSCTL_DECL(_net_link);
+SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet");
+#if defined(INET) || defined(INET6)
+SYSCTL_VNET_INT(_net_link_ether, OID_AUTO, ipfw, CTLFLAG_RW,
+ &VNET_NAME(ether_ipfw), 0, "Pass ether pkts through firewall");
+#endif
+
+#if 0
+/*
+ * This is for reference. We have a table-driven version
+ * of the little-endian crc32 generator, which is faster
+ * than the double-loop.
+ */
+uint32_t
+ether_crc32_le(const uint8_t *buf, size_t len)
+{
+ size_t i;
+ uint32_t crc;
+ int bit;
+ uint8_t data;
+
+ crc = 0xffffffff; /* initial value */
+
+ for (i = 0; i < len; i++) {
+ for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) {
+ carry = (crc ^ data) & 1;
+ crc >>= 1;
+ if (carry)
+ crc = (crc ^ ETHER_CRC_POLY_LE);
+ }
+ }
+
+ return (crc);
+}
+#else
+uint32_t
+ether_crc32_le(const uint8_t *buf, size_t len)
+{
+ static const uint32_t crctab[] = {
+ 0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac,
+ 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
+ 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c,
+ 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c
+ };
+ size_t i;
+ uint32_t crc;
+
+ crc = 0xffffffff; /* initial value */
+
+ for (i = 0; i < len; i++) {
+ crc ^= buf[i];
+ crc = (crc >> 4) ^ crctab[crc & 0xf];
+ crc = (crc >> 4) ^ crctab[crc & 0xf];
+ }
+
+ return (crc);
+}
+#endif
+
+uint32_t
+ether_crc32_be(const uint8_t *buf, size_t len)
+{
+ size_t i;
+ uint32_t crc, carry;
+ int bit;
+ uint8_t data;
+
+ crc = 0xffffffff; /* initial value */
+
+ for (i = 0; i < len; i++) {
+ for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) {
+ carry = ((crc & 0x80000000) ? 1 : 0) ^ (data & 0x01);
+ crc <<= 1;
+ if (carry)
+ crc = (crc ^ ETHER_CRC_POLY_BE) | carry;
+ }
+ }
+
+ return (crc);
+}
+
+int
+ether_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
+{
+ struct ifaddr *ifa = (struct ifaddr *) data;
+ struct ifreq *ifr = (struct ifreq *) data;
+ int error = 0;
+
+ switch (command) {
+ case SIOCSIFADDR:
+ ifp->if_flags |= IFF_UP;
+
+ switch (ifa->ifa_addr->sa_family) {
+#ifdef INET
+ case AF_INET:
+ ifp->if_init(ifp->if_softc); /* before arpwhohas */
+ arp_ifinit(ifp, ifa);
+ break;
+#endif
+#ifdef IPX
+ /*
+ * XXX - This code is probably wrong
+ */
+ case AF_IPX:
+ {
+ struct ipx_addr *ina = &(IA_SIPX(ifa)->sipx_addr);
+
+ if (ipx_nullhost(*ina))
+ ina->x_host =
+ *(union ipx_host *)
+ IF_LLADDR(ifp);
+ else {
+ bcopy((caddr_t) ina->x_host.c_host,
+ (caddr_t) IF_LLADDR(ifp),
+ ETHER_ADDR_LEN);
+ }
+
+ /*
+ * Set new address
+ */
+ ifp->if_init(ifp->if_softc);
+ break;
+ }
+#endif
+ default:
+ ifp->if_init(ifp->if_softc);
+ break;
+ }
+ break;
+
+ case SIOCGIFADDR:
+ {
+ struct sockaddr *sa;
+
+ sa = (struct sockaddr *) & ifr->ifr_data;
+ bcopy(IF_LLADDR(ifp),
+ (caddr_t) sa->sa_data, ETHER_ADDR_LEN);
+ }
+ break;
+
+ case SIOCSIFMTU:
+ /*
+ * Set the interface MTU.
+ */
+ if (ifr->ifr_mtu > ETHERMTU) {
+ error = EINVAL;
+ } else {
+ ifp->if_mtu = ifr->ifr_mtu;
+ }
+ break;
+ default:
+ error = EINVAL; /* XXX netbsd has ENOTTY??? */
+ break;
+ }
+ return (error);
+}
+
+static int
+ether_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
+ struct sockaddr *sa)
+{
+ struct sockaddr_dl *sdl;
+#ifdef INET
+ struct sockaddr_in *sin;
+#endif
+#ifdef INET6
+ struct sockaddr_in6 *sin6;
+#endif
+ u_char *e_addr;
+
+ switch(sa->sa_family) {
+ case AF_LINK:
+ /*
+ * No mapping needed. Just check that it's a valid MC address.
+ */
+ sdl = (struct sockaddr_dl *)sa;
+ e_addr = LLADDR(sdl);
+ if (!ETHER_IS_MULTICAST(e_addr))
+ return EADDRNOTAVAIL;
+ *llsa = 0;
+ return 0;
+
+#ifdef INET
+ case AF_INET:
+ sin = (struct sockaddr_in *)sa;
+ if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
+ return EADDRNOTAVAIL;
+ sdl = malloc(sizeof *sdl, M_IFMADDR,
+ M_NOWAIT|M_ZERO);
+ if (sdl == NULL)
+ return ENOMEM;
+ sdl->sdl_len = sizeof *sdl;
+ sdl->sdl_family = AF_LINK;
+ sdl->sdl_index = ifp->if_index;
+ sdl->sdl_type = IFT_ETHER;
+ sdl->sdl_alen = ETHER_ADDR_LEN;
+ e_addr = LLADDR(sdl);
+ ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr);
+ *llsa = (struct sockaddr *)sdl;
+ return 0;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ sin6 = (struct sockaddr_in6 *)sa;
+ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+ /*
+ * An IP6 address of 0 means listen to all
+ * of the Ethernet multicast address used for IP6.
+ * (This is used for multicast routers.)
+ */
+ ifp->if_flags |= IFF_ALLMULTI;
+ *llsa = 0;
+ return 0;
+ }
+ if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
+ return EADDRNOTAVAIL;
+ sdl = malloc(sizeof *sdl, M_IFMADDR,
+ M_NOWAIT|M_ZERO);
+ if (sdl == NULL)
+ return (ENOMEM);
+ sdl->sdl_len = sizeof *sdl;
+ sdl->sdl_family = AF_LINK;
+ sdl->sdl_index = ifp->if_index;
+ sdl->sdl_type = IFT_ETHER;
+ sdl->sdl_alen = ETHER_ADDR_LEN;
+ e_addr = LLADDR(sdl);
+ ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr);
+ *llsa = (struct sockaddr *)sdl;
+ return 0;
+#endif
+
+ default:
+ /*
+ * Well, the text isn't quite right, but it's the name
+ * that counts...
+ */
+ return EAFNOSUPPORT;
+ }
+}
+
+static void*
+ether_alloc(u_char type, struct ifnet *ifp)
+{
+ struct arpcom *ac;
+
+ ac = malloc(sizeof(struct arpcom), M_ARPCOM, M_WAITOK | M_ZERO);
+ ac->ac_ifp = ifp;
+
+ return (ac);
+}
+
+static void
+ether_free(void *com, u_char type)
+{
+
+ free(com, M_ARPCOM);
+}
+
+static int
+ether_modevent(module_t mod, int type, void *data)
+{
+
+ switch (type) {
+ case MOD_LOAD:
+ if_register_com_alloc(IFT_ETHER, ether_alloc, ether_free);
+ break;
+ case MOD_UNLOAD:
+ if_deregister_com_alloc(IFT_ETHER);
+ break;
+ default:
+ return EOPNOTSUPP;
+ }
+
+ return (0);
+}
+
+static moduledata_t ether_mod = {
+ "ether",
+ ether_modevent,
+ 0
+};
+
+void
+ether_vlan_mtap(struct bpf_if *bp, struct mbuf *m, void *data, u_int dlen)
+{
+ struct ether_vlan_header vlan;
+ struct mbuf mv, mb;
+
+ KASSERT((m->m_flags & M_VLANTAG) != 0,
+ ("%s: vlan information not present", __func__));
+ KASSERT(m->m_len >= sizeof(struct ether_header),
+ ("%s: mbuf not large enough for header", __func__));
+ bcopy(mtod(m, char *), &vlan, sizeof(struct ether_header));
+ vlan.evl_proto = vlan.evl_encap_proto;
+ vlan.evl_encap_proto = htons(ETHERTYPE_VLAN);
+ vlan.evl_tag = htons(m->m_pkthdr.ether_vtag);
+ m->m_len -= sizeof(struct ether_header);
+ m->m_data += sizeof(struct ether_header);
+ /*
+ * If a data link has been supplied by the caller, then we will need to
+ * re-create a stack allocated mbuf chain with the following structure:
+ *
+ * (1) mbuf #1 will contain the supplied data link
+ * (2) mbuf #2 will contain the vlan header
+ * (3) mbuf #3 will contain the original mbuf's packet data
+ *
+ * Otherwise, submit the packet and vlan header via bpf_mtap2().
+ */
+ if (data != NULL) {
+ mv.m_next = m;
+ mv.m_data = (caddr_t)&vlan;
+ mv.m_len = sizeof(vlan);
+ mb.m_next = &mv;
+ mb.m_data = data;
+ mb.m_len = dlen;
+ bpf_mtap(bp, &mb);
+ } else
+ bpf_mtap2(bp, &vlan, sizeof(vlan), m);
+ m->m_len += sizeof(struct ether_header);
+ m->m_data -= sizeof(struct ether_header);
+}
+
+struct mbuf *
+ether_vlanencap(struct mbuf *m, uint16_t tag)
+{
+ struct ether_vlan_header *evl;
+
+ M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT);
+ if (m == NULL)
+ return (NULL);
+ /* M_PREPEND takes care of m_len, m_pkthdr.len for us */
+
+ if (m->m_len < sizeof(*evl)) {
+ m = m_pullup(m, sizeof(*evl));
+ if (m == NULL)
+ return (NULL);
+ }
+
+ /*
+ * Transform the Ethernet header into an Ethernet header
+ * with 802.1Q encapsulation.
+ */
+ evl = mtod(m, struct ether_vlan_header *);
+ bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
+ (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
+ evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
+ evl->evl_tag = htons(tag);
+ return (m);
+}
+
+DECLARE_MODULE(ether, ether_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
+MODULE_VERSION(ether, 1);
diff --git a/freebsd/sys/net/if_faith.c b/freebsd/sys/net/if_faith.c
new file mode 100644
index 00000000..c8989922
--- /dev/null
+++ b/freebsd/sys/net/if_faith.c
@@ -0,0 +1,353 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/* $KAME: if_faith.c,v 1.23 2001/12/17 13:55:29 sumikawa Exp $ */
+
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+/*
+ * derived from
+ * @(#)if_loop.c 8.1 (Berkeley) 6/10/93
+ * Id: if_loop.c,v 1.22 1996/06/19 16:24:10 wollman Exp
+ */
+
+/*
+ * Loopback interface driver for protocol testing and timing.
+ */
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/errno.h>
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/time.h>
+#include <freebsd/sys/queue.h>
+#include <freebsd/sys/types.h>
+#include <freebsd/sys/malloc.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_clone.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/netisr.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/bpf.h>
+#include <freebsd/net/vnet.h>
+
+#ifdef INET
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/ip.h>
+#endif
+
+#ifdef INET6
+#ifndef INET
+#include <freebsd/netinet/in.h>
+#endif
+#include <freebsd/netinet6/in6_var.h>
+#include <freebsd/netinet/ip6.h>
+#include <freebsd/netinet6/ip6_var.h>
+#endif
+
+#define FAITHNAME "faith"
+
+struct faith_softc {
+ struct ifnet *sc_ifp;
+};
+
+static int faithioctl(struct ifnet *, u_long, caddr_t);
+int faithoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
+ struct route *);
+static void faithrtrequest(int, struct rtentry *, struct rt_addrinfo *);
+#ifdef INET6
+static int faithprefix(struct in6_addr *);
+#endif
+
+static int faithmodevent(module_t, int, void *);
+
+static MALLOC_DEFINE(M_FAITH, FAITHNAME, "Firewall Assisted Tunnel Interface");
+
+static int faith_clone_create(struct if_clone *, int, caddr_t);
+static void faith_clone_destroy(struct ifnet *);
+
+IFC_SIMPLE_DECLARE(faith, 0);
+
+#define FAITHMTU 1500
+
+static int
+faithmodevent(mod, type, data)
+ module_t mod;
+ int type;
+ void *data;
+{
+
+ switch (type) {
+ case MOD_LOAD:
+ if_clone_attach(&faith_cloner);
+
+#ifdef INET6
+ faithprefix_p = faithprefix;
+#endif
+
+ break;
+ case MOD_UNLOAD:
+#ifdef INET6
+ faithprefix_p = NULL;
+#endif
+
+ if_clone_detach(&faith_cloner);
+ break;
+ default:
+ return EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static moduledata_t faith_mod = {
+ "if_faith",
+ faithmodevent,
+ 0
+};
+
+DECLARE_MODULE(if_faith, faith_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(if_faith, 1);
+
+static int
+faith_clone_create(ifc, unit, params)
+ struct if_clone *ifc;
+ int unit;
+ caddr_t params;
+{
+ struct ifnet *ifp;
+ struct faith_softc *sc;
+
+ sc = malloc(sizeof(struct faith_softc), M_FAITH, M_WAITOK | M_ZERO);
+ ifp = sc->sc_ifp = if_alloc(IFT_FAITH);
+ if (ifp == NULL) {
+ free(sc, M_FAITH);
+ return (ENOSPC);
+ }
+
+ ifp->if_softc = sc;
+ if_initname(sc->sc_ifp, ifc->ifc_name, unit);
+
+ ifp->if_mtu = FAITHMTU;
+ /* Change to BROADCAST experimentaly to announce its prefix. */
+ ifp->if_flags = /* IFF_LOOPBACK */ IFF_BROADCAST | IFF_MULTICAST;
+ ifp->if_ioctl = faithioctl;
+ ifp->if_output = faithoutput;
+ ifp->if_hdrlen = 0;
+ ifp->if_addrlen = 0;
+ ifp->if_snd.ifq_maxlen = ifqmaxlen;
+ if_attach(ifp);
+ bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
+ return (0);
+}
+
+static void
+faith_clone_destroy(ifp)
+ struct ifnet *ifp;
+{
+ struct faith_softc *sc = ifp->if_softc;
+
+ bpfdetach(ifp);
+ if_detach(ifp);
+ if_free(ifp);
+ free(sc, M_FAITH);
+}
+
+int
+faithoutput(ifp, m, dst, ro)
+ struct ifnet *ifp;
+ struct mbuf *m;
+ struct sockaddr *dst;
+ struct route *ro;
+{
+ int isr;
+ u_int32_t af;
+ struct rtentry *rt = NULL;
+
+ M_ASSERTPKTHDR(m);
+
+ if (ro != NULL)
+ rt = ro->ro_rt;
+ /* BPF writes need to be handled specially. */
+ if (dst->sa_family == AF_UNSPEC) {
+ bcopy(dst->sa_data, &af, sizeof(af));
+ dst->sa_family = af;
+ }
+
+ if (bpf_peers_present(ifp->if_bpf)) {
+ af = dst->sa_family;
+ bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
+ }
+
+ if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
+ m_freem(m);
+ return (rt->rt_flags & RTF_BLACKHOLE ? 0 :
+ rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
+ }
+ ifp->if_opackets++;
+ ifp->if_obytes += m->m_pkthdr.len;
+ switch (dst->sa_family) {
+#ifdef INET
+ case AF_INET:
+ isr = NETISR_IP;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ isr = NETISR_IPV6;
+ break;
+#endif
+ default:
+ m_freem(m);
+ return EAFNOSUPPORT;
+ }
+
+ /* XXX do we need more sanity checks? */
+
+ m->m_pkthdr.rcvif = ifp;
+ ifp->if_ipackets++;
+ ifp->if_ibytes += m->m_pkthdr.len;
+ netisr_dispatch(isr, m);
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+faithrtrequest(cmd, rt, info)
+ int cmd;
+ struct rtentry *rt;
+ struct rt_addrinfo *info;
+{
+ RT_LOCK_ASSERT(rt);
+ rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
+}
+
+/*
+ * Process an ioctl request.
+ */
+/* ARGSUSED */
+static int
+faithioctl(ifp, cmd, data)
+ struct ifnet *ifp;
+ u_long cmd;
+ caddr_t data;
+{
+ struct ifaddr *ifa;
+ struct ifreq *ifr = (struct ifreq *)data;
+ int error = 0;
+
+ switch (cmd) {
+
+ case SIOCSIFADDR:
+ ifp->if_flags |= IFF_UP;
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ ifa = (struct ifaddr *)data;
+ ifa->ifa_rtrequest = faithrtrequest;
+ /*
+ * Everything else is done at a higher level.
+ */
+ break;
+
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ if (ifr == 0) {
+ error = EAFNOSUPPORT; /* XXX */
+ break;
+ }
+ switch (ifr->ifr_addr.sa_family) {
+#ifdef INET
+ case AF_INET:
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ break;
+#endif
+
+ default:
+ error = EAFNOSUPPORT;
+ break;
+ }
+ break;
+
+#ifdef SIOCSIFMTU
+ case SIOCSIFMTU:
+ ifp->if_mtu = ifr->ifr_mtu;
+ break;
+#endif
+
+ case SIOCSIFFLAGS:
+ break;
+
+ default:
+ error = EINVAL;
+ }
+ return (error);
+}
+
+#ifdef INET6
+/*
+ * XXX could be slow
+ * XXX could be layer violation to call sys/net from sys/netinet6
+ */
+static int
+faithprefix(in6)
+ struct in6_addr *in6;
+{
+ struct rtentry *rt;
+ struct sockaddr_in6 sin6;
+ int ret;
+
+ if (V_ip6_keepfaith == 0)
+ return 0;
+
+ bzero(&sin6, sizeof(sin6));
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_len = sizeof(struct sockaddr_in6);
+ sin6.sin6_addr = *in6;
+ rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL);
+ if (rt && rt->rt_ifp && rt->rt_ifp->if_type == IFT_FAITH &&
+ (rt->rt_ifp->if_flags & IFF_UP) != 0)
+ ret = 1;
+ else
+ ret = 0;
+ if (rt)
+ RTFREE_LOCKED(rt);
+ return ret;
+}
+#endif
diff --git a/freebsd/sys/net/if_fddisubr.c b/freebsd/sys/net/if_fddisubr.c
new file mode 100644
index 00000000..fc9f27e1
--- /dev/null
+++ b/freebsd/sys/net/if_fddisubr.c
@@ -0,0 +1,800 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1995, 1996
+ * Matt Thomas <matt@3am-software.com>. All rights reserved.
+ * Copyright (c) 1982, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: if_ethersubr.c,v 1.5 1994/12/13 22:31:45 wollman Exp
+ * $FreeBSD$
+ */
+
+#include <freebsd/local/opt_atalk.h>
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_ipx.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/sockio.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_dl.h>
+#include <freebsd/net/if_llc.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/if_llatbl.h>
+
+#include <freebsd/net/ethernet.h>
+#include <freebsd/net/netisr.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/bpf.h>
+#include <freebsd/net/fddi.h>
+
+#if defined(INET) || defined(INET6)
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/if_ether.h>
+#endif
+#ifdef INET6
+#include <freebsd/netinet6/nd6.h>
+#endif
+
+#ifdef IPX
+#include <freebsd/netipx/ipx.h>
+#include <freebsd/netipx/ipx_if.h>
+#endif
+
+#ifdef DECNET
+#include <freebsd/netdnet/dn.h>
+#endif
+
+#ifdef NETATALK
+#include <freebsd/netatalk/at.h>
+#include <freebsd/netatalk/at_var.h>
+#include <freebsd/netatalk/at_extern.h>
+
+extern u_char at_org_code[ 3 ];
+extern u_char aarp_org_code[ 3 ];
+#endif /* NETATALK */
+
+#include <freebsd/security/mac/mac_framework.h>
+
+static const u_char fddibroadcastaddr[FDDI_ADDR_LEN] =
+ { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+static int fddi_resolvemulti(struct ifnet *, struct sockaddr **,
+ struct sockaddr *);
+static int fddi_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+ struct route *);
+static void fddi_input(struct ifnet *ifp, struct mbuf *m);
+
+#define senderr(e) do { error = (e); goto bad; } while (0)
+
+/*
+ * FDDI output routine.
+ * Encapsulate a packet of type family for the local net.
+ * Use trailer local net encapsulation if enough data in first
+ * packet leaves a multiple of 512 bytes of data in remainder.
+ * Assumes that ifp is actually pointer to arpcom structure.
+ */
+static int
+fddi_output(ifp, m, dst, ro)
+ struct ifnet *ifp;
+ struct mbuf *m;
+ struct sockaddr *dst;
+ struct route *ro;
+{
+ u_int16_t type;
+ int loop_copy = 0, error = 0, hdrcmplt = 0;
+ u_char esrc[FDDI_ADDR_LEN], edst[FDDI_ADDR_LEN];
+ struct fddi_header *fh;
+#if defined(INET) || defined(INET6)
+ struct llentry *lle;
+#endif
+
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m);
+ if (error)
+ senderr(error);
+#endif
+
+ if (ifp->if_flags & IFF_MONITOR)
+ senderr(ENETDOWN);
+ if (!((ifp->if_flags & IFF_UP) &&
+ (ifp->if_drv_flags & IFF_DRV_RUNNING)))
+ senderr(ENETDOWN);
+ getmicrotime(&ifp->if_lastchange);
+
+ switch (dst->sa_family) {
+#ifdef INET
+ case AF_INET: {
+ struct rtentry *rt0 = NULL;
+
+ if (ro != NULL)
+ rt0 = ro->ro_rt;
+ error = arpresolve(ifp, rt0, m, dst, edst, &lle);
+ if (error)
+ return (error == EWOULDBLOCK ? 0 : error);
+ type = htons(ETHERTYPE_IP);
+ break;
+ }
+ case AF_ARP:
+ {
+ struct arphdr *ah;
+ ah = mtod(m, struct arphdr *);
+ ah->ar_hrd = htons(ARPHRD_ETHER);
+
+ loop_copy = -1; /* if this is for us, don't do it */
+
+ switch (ntohs(ah->ar_op)) {
+ case ARPOP_REVREQUEST:
+ case ARPOP_REVREPLY:
+ type = htons(ETHERTYPE_REVARP);
+ break;
+ case ARPOP_REQUEST:
+ case ARPOP_REPLY:
+ default:
+ type = htons(ETHERTYPE_ARP);
+ break;
+ }
+
+ if (m->m_flags & M_BCAST)
+ bcopy(ifp->if_broadcastaddr, edst, FDDI_ADDR_LEN);
+ else
+ bcopy(ar_tha(ah), edst, FDDI_ADDR_LEN);
+
+ }
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ error = nd6_storelladdr(ifp, m, dst, (u_char *)edst, &lle);
+ if (error)
+ return (error); /* Something bad happened */
+ type = htons(ETHERTYPE_IPV6);
+ break;
+#endif /* INET6 */
+#ifdef IPX
+ case AF_IPX:
+ type = htons(ETHERTYPE_IPX);
+ bcopy((caddr_t)&(((struct sockaddr_ipx *)dst)->sipx_addr.x_host),
+ (caddr_t)edst, FDDI_ADDR_LEN);
+ break;
+#endif /* IPX */
+#ifdef NETATALK
+ case AF_APPLETALK: {
+ struct at_ifaddr *aa;
+ if (!aarpresolve(ifp, m, (struct sockaddr_at *)dst, edst))
+ return (0);
+ /*
+ * ifaddr is the first thing in at_ifaddr
+ */
+ if ((aa = at_ifawithnet( (struct sockaddr_at *)dst)) == 0)
+ goto bad;
+
+ /*
+ * In the phase 2 case, we need to prepend an mbuf for the llc header.
+ * Since we must preserve the value of m, which is passed to us by
+ * value, we m_copy() the first mbuf, and use it for our llc header.
+ */
+ if (aa->aa_flags & AFA_PHASE2) {
+ struct llc llc;
+
+ M_PREPEND(m, LLC_SNAPFRAMELEN, M_WAIT);
+ llc.llc_dsap = llc.llc_ssap = LLC_SNAP_LSAP;
+ llc.llc_control = LLC_UI;
+ bcopy(at_org_code, llc.llc_snap.org_code, sizeof(at_org_code));
+ llc.llc_snap.ether_type = htons(ETHERTYPE_AT);
+ bcopy(&llc, mtod(m, caddr_t), LLC_SNAPFRAMELEN);
+ type = 0;
+ } else {
+ type = htons(ETHERTYPE_AT);
+ }
+ ifa_free(&aa->aa_ifa);
+ break;
+ }
+#endif /* NETATALK */
+
+ case pseudo_AF_HDRCMPLT:
+ {
+ struct ether_header *eh;
+ hdrcmplt = 1;
+ eh = (struct ether_header *)dst->sa_data;
+ bcopy((caddr_t)eh->ether_shost, (caddr_t)esrc, FDDI_ADDR_LEN);
+ /* FALLTHROUGH */
+ }
+
+ case AF_UNSPEC:
+ {
+ struct ether_header *eh;
+ loop_copy = -1;
+ eh = (struct ether_header *)dst->sa_data;
+ bcopy((caddr_t)eh->ether_dhost, (caddr_t)edst, FDDI_ADDR_LEN);
+ if (*edst & 1)
+ m->m_flags |= (M_BCAST|M_MCAST);
+ type = eh->ether_type;
+ break;
+ }
+
+ case AF_IMPLINK:
+ {
+ fh = mtod(m, struct fddi_header *);
+ error = EPROTONOSUPPORT;
+ switch (fh->fddi_fc & (FDDIFC_C|FDDIFC_L|FDDIFC_F)) {
+ case FDDIFC_LLC_ASYNC: {
+ /* legal priorities are 0 through 7 */
+ if ((fh->fddi_fc & FDDIFC_Z) > 7)
+ goto bad;
+ break;
+ }
+ case FDDIFC_LLC_SYNC: {
+ /* FDDIFC_Z bits reserved, must be zero */
+ if (fh->fddi_fc & FDDIFC_Z)
+ goto bad;
+ break;
+ }
+ case FDDIFC_SMT: {
+ /* FDDIFC_Z bits must be non zero */
+ if ((fh->fddi_fc & FDDIFC_Z) == 0)
+ goto bad;
+ break;
+ }
+ default: {
+ /* anything else is too dangerous */
+ goto bad;
+ }
+ }
+ error = 0;
+ if (fh->fddi_dhost[0] & 1)
+ m->m_flags |= (M_BCAST|M_MCAST);
+ goto queue_it;
+ }
+ default:
+ if_printf(ifp, "can't handle af%d\n", dst->sa_family);
+ senderr(EAFNOSUPPORT);
+ }
+
+ /*
+ * Add LLC header.
+ */
+ if (type != 0) {
+ struct llc *l;
+ M_PREPEND(m, LLC_SNAPFRAMELEN, M_DONTWAIT);
+ if (m == 0)
+ senderr(ENOBUFS);
+ l = mtod(m, struct llc *);
+ l->llc_control = LLC_UI;
+ l->llc_dsap = l->llc_ssap = LLC_SNAP_LSAP;
+ l->llc_snap.org_code[0] =
+ l->llc_snap.org_code[1] =
+ l->llc_snap.org_code[2] = 0;
+ l->llc_snap.ether_type = htons(type);
+ }
+
+ /*
+ * Add local net header. If no space in first mbuf,
+ * allocate another.
+ */
+ M_PREPEND(m, FDDI_HDR_LEN, M_DONTWAIT);
+ if (m == 0)
+ senderr(ENOBUFS);
+ fh = mtod(m, struct fddi_header *);
+ fh->fddi_fc = FDDIFC_LLC_ASYNC|FDDIFC_LLC_PRIO4;
+ bcopy((caddr_t)edst, (caddr_t)fh->fddi_dhost, FDDI_ADDR_LEN);
+ queue_it:
+ if (hdrcmplt)
+ bcopy((caddr_t)esrc, (caddr_t)fh->fddi_shost, FDDI_ADDR_LEN);
+ else
+ bcopy(IF_LLADDR(ifp), (caddr_t)fh->fddi_shost,
+ FDDI_ADDR_LEN);
+
+ /*
+ * If a simplex interface, and the packet is being sent to our
+ * Ethernet address or a broadcast address, loopback a copy.
+ * XXX To make a simplex device behave exactly like a duplex
+ * device, we should copy in the case of sending to our own
+ * ethernet address (thus letting the original actually appear
+ * on the wire). However, we don't do that here for security
+ * reasons and compatibility with the original behavior.
+ */
+ if ((ifp->if_flags & IFF_SIMPLEX) && (loop_copy != -1)) {
+ if ((m->m_flags & M_BCAST) || (loop_copy > 0)) {
+ struct mbuf *n;
+ n = m_copy(m, 0, (int)M_COPYALL);
+ (void) if_simloop(ifp, n, dst->sa_family,
+ FDDI_HDR_LEN);
+ } else if (bcmp(fh->fddi_dhost, fh->fddi_shost,
+ FDDI_ADDR_LEN) == 0) {
+ (void) if_simloop(ifp, m, dst->sa_family,
+ FDDI_HDR_LEN);
+ return (0); /* XXX */
+ }
+ }
+
+ error = (ifp->if_transmit)(ifp, m);
+ if (error)
+ ifp->if_oerrors++;
+
+ return (error);
+
+bad:
+ ifp->if_oerrors++;
+ if (m)
+ m_freem(m);
+ return (error);
+}
+
+/*
+ * Process a received FDDI packet.
+ */
+static void
+fddi_input(ifp, m)
+ struct ifnet *ifp;
+ struct mbuf *m;
+{
+ int isr;
+ struct llc *l;
+ struct fddi_header *fh;
+
+ /*
+ * Do consistency checks to verify assumptions
+ * made by code past this point.
+ */
+ if ((m->m_flags & M_PKTHDR) == 0) {
+ if_printf(ifp, "discard frame w/o packet header\n");
+ ifp->if_ierrors++;
+ m_freem(m);
+ return;
+ }
+ if (m->m_pkthdr.rcvif == NULL) {
+ if_printf(ifp, "discard frame w/o interface pointer\n");
+ ifp->if_ierrors++;
+ m_freem(m);
+ return;
+ }
+
+ m = m_pullup(m, FDDI_HDR_LEN);
+ if (m == NULL) {
+ ifp->if_ierrors++;
+ goto dropanyway;
+ }
+ fh = mtod(m, struct fddi_header *);
+ m->m_pkthdr.header = (void *)fh;
+
+ /*
+ * Discard packet if interface is not up.
+ */
+ if (!((ifp->if_flags & IFF_UP) &&
+ (ifp->if_drv_flags & IFF_DRV_RUNNING)))
+ goto dropanyway;
+
+ /*
+ * Give bpf a chance at the packet.
+ */
+ BPF_MTAP(ifp, m);
+
+ /*
+ * Interface marked for monitoring; discard packet.
+ */
+ if (ifp->if_flags & IFF_MONITOR) {
+ m_freem(m);
+ return;
+ }
+
+#ifdef MAC
+ mac_ifnet_create_mbuf(ifp, m);
+#endif
+
+ /*
+ * Update interface statistics.
+ */
+ ifp->if_ibytes += m->m_pkthdr.len;
+ getmicrotime(&ifp->if_lastchange);
+
+ /*
+ * Discard non local unicast packets when interface
+ * is in promiscuous mode.
+ */
+ if ((ifp->if_flags & IFF_PROMISC) && ((fh->fddi_dhost[0] & 1) == 0) &&
+ (bcmp(IF_LLADDR(ifp), (caddr_t)fh->fddi_dhost,
+ FDDI_ADDR_LEN) != 0))
+ goto dropanyway;
+
+ /*
+ * Set mbuf flags for bcast/mcast.
+ */
+ if (fh->fddi_dhost[0] & 1) {
+ if (bcmp(ifp->if_broadcastaddr, fh->fddi_dhost,
+ FDDI_ADDR_LEN) == 0)
+ m->m_flags |= M_BCAST;
+ else
+ m->m_flags |= M_MCAST;
+ ifp->if_imcasts++;
+ }
+
+#ifdef M_LINK0
+ /*
+ * If this has a LLC priority of 0, then mark it so upper
+ * layers have a hint that it really came via a FDDI/Ethernet
+ * bridge.
+ */
+ if ((fh->fddi_fc & FDDIFC_LLC_PRIO7) == FDDIFC_LLC_PRIO0)
+ m->m_flags |= M_LINK0;
+#endif
+
+ /* Strip off FDDI header. */
+ m_adj(m, FDDI_HDR_LEN);
+
+ m = m_pullup(m, LLC_SNAPFRAMELEN);
+ if (m == 0) {
+ ifp->if_ierrors++;
+ goto dropanyway;
+ }
+ l = mtod(m, struct llc *);
+
+ switch (l->llc_dsap) {
+ case LLC_SNAP_LSAP:
+ {
+ u_int16_t type;
+ if ((l->llc_control != LLC_UI) ||
+ (l->llc_ssap != LLC_SNAP_LSAP)) {
+ ifp->if_noproto++;
+ goto dropanyway;
+ }
+#ifdef NETATALK
+ if (bcmp(&(l->llc_snap.org_code)[0], at_org_code,
+ sizeof(at_org_code)) == 0 &&
+ ntohs(l->llc_snap.ether_type) == ETHERTYPE_AT) {
+ isr = NETISR_ATALK2;
+ m_adj(m, LLC_SNAPFRAMELEN);
+ break;
+ }
+
+ if (bcmp(&(l->llc_snap.org_code)[0], aarp_org_code,
+ sizeof(aarp_org_code)) == 0 &&
+ ntohs(l->llc_snap.ether_type) == ETHERTYPE_AARP) {
+ m_adj(m, LLC_SNAPFRAMELEN);
+ isr = NETISR_AARP;
+ break;
+ }
+#endif /* NETATALK */
+ if (l->llc_snap.org_code[0] != 0 ||
+ l->llc_snap.org_code[1] != 0 ||
+ l->llc_snap.org_code[2] != 0) {
+ ifp->if_noproto++;
+ goto dropanyway;
+ }
+
+ type = ntohs(l->llc_snap.ether_type);
+ m_adj(m, LLC_SNAPFRAMELEN);
+
+ switch (type) {
+#ifdef INET
+ case ETHERTYPE_IP:
+ if ((m = ip_fastforward(m)) == NULL)
+ return;
+ isr = NETISR_IP;
+ break;
+
+ case ETHERTYPE_ARP:
+ if (ifp->if_flags & IFF_NOARP)
+ goto dropanyway;
+ isr = NETISR_ARP;
+ break;
+#endif
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ isr = NETISR_IPV6;
+ break;
+#endif
+#ifdef IPX
+ case ETHERTYPE_IPX:
+ isr = NETISR_IPX;
+ break;
+#endif
+#ifdef DECNET
+ case ETHERTYPE_DECNET:
+ isr = NETISR_DECNET;
+ break;
+#endif
+#ifdef NETATALK
+ case ETHERTYPE_AT:
+ isr = NETISR_ATALK1;
+ break;
+ case ETHERTYPE_AARP:
+ isr = NETISR_AARP;
+ break;
+#endif /* NETATALK */
+ default:
+ /* printf("fddi_input: unknown protocol 0x%x\n", type); */
+ ifp->if_noproto++;
+ goto dropanyway;
+ }
+ break;
+ }
+
+ default:
+ /* printf("fddi_input: unknown dsap 0x%x\n", l->llc_dsap); */
+ ifp->if_noproto++;
+ goto dropanyway;
+ }
+ netisr_dispatch(isr, m);
+ return;
+
+dropanyway:
+ ifp->if_iqdrops++;
+ if (m)
+ m_freem(m);
+ return;
+}
+
+/*
+ * Perform common duties while attaching to interface list
+ */
+void
+fddi_ifattach(ifp, lla, bpf)
+ struct ifnet *ifp;
+ const u_int8_t *lla;
+ int bpf;
+{
+ struct ifaddr *ifa;
+ struct sockaddr_dl *sdl;
+
+ ifp->if_type = IFT_FDDI;
+ ifp->if_addrlen = FDDI_ADDR_LEN;
+ ifp->if_hdrlen = 21;
+
+ if_attach(ifp); /* Must be called before additional assignments */
+
+ ifp->if_mtu = FDDIMTU;
+ ifp->if_output = fddi_output;
+ ifp->if_input = fddi_input;
+ ifp->if_resolvemulti = fddi_resolvemulti;
+ ifp->if_broadcastaddr = fddibroadcastaddr;
+ ifp->if_baudrate = 100000000;
+#ifdef IFF_NOTRAILERS
+ ifp->if_flags |= IFF_NOTRAILERS;
+#endif
+ ifa = ifp->if_addr;
+ KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
+
+ sdl = (struct sockaddr_dl *)ifa->ifa_addr;
+ sdl->sdl_type = IFT_FDDI;
+ sdl->sdl_alen = ifp->if_addrlen;
+ bcopy(lla, LLADDR(sdl), ifp->if_addrlen);
+
+ if (bpf)
+ bpfattach(ifp, DLT_FDDI, FDDI_HDR_LEN);
+
+ return;
+}
+
+void
+fddi_ifdetach(ifp, bpf)
+ struct ifnet *ifp;
+ int bpf;
+{
+
+ if (bpf)
+ bpfdetach(ifp);
+
+ if_detach(ifp);
+
+ return;
+}
+
+int
+fddi_ioctl (ifp, command, data)
+ struct ifnet *ifp;
+ u_long command;
+ caddr_t data;
+{
+ struct ifaddr *ifa;
+ struct ifreq *ifr;
+ int error;
+
+ ifa = (struct ifaddr *) data;
+ ifr = (struct ifreq *) data;
+ error = 0;
+
+ switch (command) {
+ case SIOCSIFADDR:
+ ifp->if_flags |= IFF_UP;
+
+ switch (ifa->ifa_addr->sa_family) {
+#ifdef INET
+ case AF_INET: /* before arpwhohas */
+ ifp->if_init(ifp->if_softc);
+ arp_ifinit(ifp, ifa);
+ break;
+#endif
+#ifdef IPX
+ /*
+ * XXX - This code is probably wrong
+ */
+ case AF_IPX: {
+ struct ipx_addr *ina;
+
+ ina = &(IA_SIPX(ifa)->sipx_addr);
+
+ if (ipx_nullhost(*ina)) {
+ ina->x_host = *(union ipx_host *)
+ IF_LLADDR(ifp);
+ } else {
+ bcopy((caddr_t) ina->x_host.c_host,
+ (caddr_t) IF_LLADDR(ifp),
+ ETHER_ADDR_LEN);
+ }
+
+ /*
+ * Set new address
+ */
+ ifp->if_init(ifp->if_softc);
+ }
+ break;
+#endif
+ default:
+ ifp->if_init(ifp->if_softc);
+ break;
+ }
+ break;
+ case SIOCGIFADDR: {
+ struct sockaddr *sa;
+
+ sa = (struct sockaddr *) & ifr->ifr_data;
+ bcopy(IF_LLADDR(ifp),
+ (caddr_t) sa->sa_data, FDDI_ADDR_LEN);
+
+ }
+ break;
+ case SIOCSIFMTU:
+ /*
+ * Set the interface MTU.
+ */
+ if (ifr->ifr_mtu > FDDIMTU) {
+ error = EINVAL;
+ } else {
+ ifp->if_mtu = ifr->ifr_mtu;
+ }
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return (error);
+}
+
+static int
+fddi_resolvemulti(ifp, llsa, sa)
+ struct ifnet *ifp;
+ struct sockaddr **llsa;
+ struct sockaddr *sa;
+{
+ struct sockaddr_dl *sdl;
+#ifdef INET
+ struct sockaddr_in *sin;
+#endif
+#ifdef INET6
+ struct sockaddr_in6 *sin6;
+#endif
+ u_char *e_addr;
+
+ switch(sa->sa_family) {
+ case AF_LINK:
+ /*
+ * No mapping needed. Just check that it's a valid MC address.
+ */
+ sdl = (struct sockaddr_dl *)sa;
+ e_addr = LLADDR(sdl);
+ if ((e_addr[0] & 1) != 1)
+ return (EADDRNOTAVAIL);
+ *llsa = 0;
+ return (0);
+
+#ifdef INET
+ case AF_INET:
+ sin = (struct sockaddr_in *)sa;
+ if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
+ return (EADDRNOTAVAIL);
+ sdl = malloc(sizeof *sdl, M_IFMADDR,
+ M_NOWAIT | M_ZERO);
+ if (sdl == NULL)
+ return (ENOMEM);
+ sdl->sdl_len = sizeof *sdl;
+ sdl->sdl_family = AF_LINK;
+ sdl->sdl_index = ifp->if_index;
+ sdl->sdl_type = IFT_FDDI;
+ sdl->sdl_nlen = 0;
+ sdl->sdl_alen = FDDI_ADDR_LEN;
+ sdl->sdl_slen = 0;
+ e_addr = LLADDR(sdl);
+ ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr);
+ *llsa = (struct sockaddr *)sdl;
+ return (0);
+#endif
+#ifdef INET6
+ case AF_INET6:
+ sin6 = (struct sockaddr_in6 *)sa;
+ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+ /*
+ * An IP6 address of 0 means listen to all
+ * of the Ethernet multicast address used for IP6.
+ * (This is used for multicast routers.)
+ */
+ ifp->if_flags |= IFF_ALLMULTI;
+ *llsa = 0;
+ return (0);
+ }
+ if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
+ return (EADDRNOTAVAIL);
+ sdl = malloc(sizeof *sdl, M_IFMADDR,
+ M_NOWAIT | M_ZERO);
+ if (sdl == NULL)
+ return (ENOMEM);
+ sdl->sdl_len = sizeof *sdl;
+ sdl->sdl_family = AF_LINK;
+ sdl->sdl_index = ifp->if_index;
+ sdl->sdl_type = IFT_FDDI;
+ sdl->sdl_nlen = 0;
+ sdl->sdl_alen = FDDI_ADDR_LEN;
+ sdl->sdl_slen = 0;
+ e_addr = LLADDR(sdl);
+ ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr);
+ *llsa = (struct sockaddr *)sdl;
+ return (0);
+#endif
+
+ default:
+ /*
+ * Well, the text isn't quite right, but it's the name
+ * that counts...
+ */
+ return (EAFNOSUPPORT);
+ }
+
+ return (0);
+}
+
+static moduledata_t fddi_mod = {
+ "fddi", /* module name */
+ NULL, /* event handler */
+ 0 /* extra data */
+};
+
+DECLARE_MODULE(fddi, fddi_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(fddi, 1);
diff --git a/freebsd/sys/net/if_fwsubr.c b/freebsd/sys/net/if_fwsubr.c
new file mode 100644
index 00000000..d084bea4
--- /dev/null
+++ b/freebsd/sys/net/if_fwsubr.c
@@ -0,0 +1,853 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2004 Doug Rabson
+ * Copyright (c) 1982, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/sockio.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/netisr.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/if_llc.h>
+#include <freebsd/net/if_dl.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/bpf.h>
+#include <freebsd/net/firewire.h>
+#include <freebsd/net/if_llatbl.h>
+
+#if defined(INET) || defined(INET6)
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/if_ether.h>
+#endif
+#ifdef INET6
+#include <freebsd/netinet6/nd6.h>
+#endif
+
+#include <freebsd/security/mac/mac_framework.h>
+
+MALLOC_DEFINE(M_FWCOM, "fw_com", "firewire interface internals");
+
+struct fw_hwaddr firewire_broadcastaddr = {
+ 0xffffffff,
+ 0xffffffff,
+ 0xff,
+ 0xff,
+ 0xffff,
+ 0xffffffff
+};
+
+static int
+firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+ struct route *ro)
+{
+ struct fw_com *fc = IFP2FWC(ifp);
+ int error, type;
+ struct m_tag *mtag;
+ union fw_encap *enc;
+ struct fw_hwaddr *destfw;
+ uint8_t speed;
+ uint16_t psize, fsize, dsize;
+ struct mbuf *mtail;
+ int unicast, dgl, foff;
+ static int next_dgl;
+#if defined(INET) || defined(INET6)
+ struct llentry *lle;
+#endif
+
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m);
+ if (error)
+ goto bad;
+#endif
+
+ if (!((ifp->if_flags & IFF_UP) &&
+ (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
+ error = ENETDOWN;
+ goto bad;
+ }
+
+ /*
+ * For unicast, we make a tag to store the lladdr of the
+ * destination. This might not be the first time we have seen
+ * the packet (for instance, the arp code might be trying to
+ * re-send it after receiving an arp reply) so we only
+ * allocate a tag if there isn't one there already. For
+ * multicast, we will eventually use a different tag to store
+ * the channel number.
+ */
+ unicast = !(m->m_flags & (M_BCAST | M_MCAST));
+ if (unicast) {
+ mtag = m_tag_locate(m, MTAG_FIREWIRE, MTAG_FIREWIRE_HWADDR, NULL);
+ if (!mtag) {
+ mtag = m_tag_alloc(MTAG_FIREWIRE, MTAG_FIREWIRE_HWADDR,
+ sizeof (struct fw_hwaddr), M_NOWAIT);
+ if (!mtag) {
+ error = ENOMEM;
+ goto bad;
+ }
+ m_tag_prepend(m, mtag);
+ }
+ destfw = (struct fw_hwaddr *)(mtag + 1);
+ } else {
+ destfw = 0;
+ }
+
+ switch (dst->sa_family) {
+#ifdef INET
+ case AF_INET:
+ /*
+ * Only bother with arp for unicast. Allocation of
+ * channels etc. for firewire is quite different and
+ * doesn't fit into the arp model.
+ */
+ if (unicast) {
+ error = arpresolve(ifp, ro ? ro->ro_rt : NULL, m, dst, (u_char *) destfw, &lle);
+ if (error)
+ return (error == EWOULDBLOCK ? 0 : error);
+ }
+ type = ETHERTYPE_IP;
+ break;
+
+ case AF_ARP:
+ {
+ struct arphdr *ah;
+ ah = mtod(m, struct arphdr *);
+ ah->ar_hrd = htons(ARPHRD_IEEE1394);
+ type = ETHERTYPE_ARP;
+ if (unicast)
+ *destfw = *(struct fw_hwaddr *) ar_tha(ah);
+
+ /*
+ * The standard arp code leaves a hole for the target
+ * hardware address which we need to close up.
+ */
+ bcopy(ar_tpa(ah), ar_tha(ah), ah->ar_pln);
+ m_adj(m, -ah->ar_hln);
+ break;
+ }
+#endif
+
+#ifdef INET6
+ case AF_INET6:
+ if (unicast) {
+ error = nd6_storelladdr(fc->fc_ifp, m, dst,
+ (u_char *) destfw, &lle);
+ if (error)
+ return (error);
+ }
+ type = ETHERTYPE_IPV6;
+ break;
+#endif
+
+ default:
+ if_printf(ifp, "can't handle af%d\n", dst->sa_family);
+ error = EAFNOSUPPORT;
+ goto bad;
+ }
+
+ /*
+ * Let BPF tap off a copy before we encapsulate.
+ */
+ if (bpf_peers_present(ifp->if_bpf)) {
+ struct fw_bpfhdr h;
+ if (unicast)
+ bcopy(destfw, h.firewire_dhost, 8);
+ else
+ bcopy(&firewire_broadcastaddr, h.firewire_dhost, 8);
+ bcopy(&fc->fc_hwaddr, h.firewire_shost, 8);
+ h.firewire_type = htons(type);
+ bpf_mtap2(ifp->if_bpf, &h, sizeof(h), m);
+ }
+
+ /*
+ * Punt on MCAP for now and send all multicast packets on the
+ * broadcast channel.
+ */
+ if (m->m_flags & M_MCAST)
+ m->m_flags |= M_BCAST;
+
+ /*
+ * Figure out what speed to use and what the largest supported
+ * packet size is. For unicast, this is the minimum of what we
+ * can speak and what they can hear. For broadcast, lets be
+ * conservative and use S100. We could possibly improve that
+ * by examining the bus manager's speed map or similar. We
+ * also reduce the packet size for broadcast to account for
+ * the GASP header.
+ */
+ if (unicast) {
+ speed = min(fc->fc_speed, destfw->sspd);
+ psize = min(512 << speed, 2 << destfw->sender_max_rec);
+ } else {
+ speed = 0;
+ psize = 512 - 2*sizeof(uint32_t);
+ }
+
+ /*
+ * Next, we encapsulate, possibly fragmenting the original
+ * datagram if it won't fit into a single packet.
+ */
+ if (m->m_pkthdr.len <= psize - sizeof(uint32_t)) {
+ /*
+ * No fragmentation is necessary.
+ */
+ M_PREPEND(m, sizeof(uint32_t), M_DONTWAIT);
+ if (!m) {
+ error = ENOBUFS;
+ goto bad;
+ }
+ enc = mtod(m, union fw_encap *);
+ enc->unfrag.ether_type = type;
+ enc->unfrag.lf = FW_ENCAP_UNFRAG;
+ enc->unfrag.reserved = 0;
+
+ /*
+ * Byte swap the encapsulation header manually.
+ */
+ enc->ul[0] = htonl(enc->ul[0]);
+
+ error = (ifp->if_transmit)(ifp, m);
+ return (error);
+ } else {
+ /*
+ * Fragment the datagram, making sure to leave enough
+ * space for the encapsulation header in each packet.
+ */
+ fsize = psize - 2*sizeof(uint32_t);
+ dgl = next_dgl++;
+ dsize = m->m_pkthdr.len;
+ foff = 0;
+ while (m) {
+ if (m->m_pkthdr.len > fsize) {
+ /*
+ * Split off the tail segment from the
+ * datagram, copying our tags over.
+ */
+ mtail = m_split(m, fsize, M_DONTWAIT);
+ m_tag_copy_chain(mtail, m, M_NOWAIT);
+ } else {
+ mtail = 0;
+ }
+
+ /*
+ * Add our encapsulation header to this
+ * fragment and hand it off to the link.
+ */
+ M_PREPEND(m, 2*sizeof(uint32_t), M_DONTWAIT);
+ if (!m) {
+ error = ENOBUFS;
+ goto bad;
+ }
+ enc = mtod(m, union fw_encap *);
+ if (foff == 0) {
+ enc->firstfrag.lf = FW_ENCAP_FIRST;
+ enc->firstfrag.reserved1 = 0;
+ enc->firstfrag.reserved2 = 0;
+ enc->firstfrag.datagram_size = dsize - 1;
+ enc->firstfrag.ether_type = type;
+ enc->firstfrag.dgl = dgl;
+ } else {
+ if (mtail)
+ enc->nextfrag.lf = FW_ENCAP_NEXT;
+ else
+ enc->nextfrag.lf = FW_ENCAP_LAST;
+ enc->nextfrag.reserved1 = 0;
+ enc->nextfrag.reserved2 = 0;
+ enc->nextfrag.reserved3 = 0;
+ enc->nextfrag.datagram_size = dsize - 1;
+ enc->nextfrag.fragment_offset = foff;
+ enc->nextfrag.dgl = dgl;
+ }
+ foff += m->m_pkthdr.len - 2*sizeof(uint32_t);
+
+ /*
+ * Byte swap the encapsulation header manually.
+ */
+ enc->ul[0] = htonl(enc->ul[0]);
+ enc->ul[1] = htonl(enc->ul[1]);
+
+ error = (ifp->if_transmit)(ifp, m);
+ if (error) {
+ if (mtail)
+ m_freem(mtail);
+ return (ENOBUFS);
+ }
+
+ m = mtail;
+ }
+
+ return (0);
+ }
+
+bad:
+ if (m)
+ m_freem(m);
+ return (error);
+}
+
+static struct mbuf *
+firewire_input_fragment(struct fw_com *fc, struct mbuf *m, int src)
+{
+ union fw_encap *enc;
+ struct fw_reass *r;
+ struct mbuf *mf, *mprev;
+ int dsize;
+ int fstart, fend, start, end, islast;
+ uint32_t id;
+
+ /*
+ * Find an existing reassembly buffer or create a new one.
+ */
+ enc = mtod(m, union fw_encap *);
+ id = enc->firstfrag.dgl | (src << 16);
+ STAILQ_FOREACH(r, &fc->fc_frags, fr_link)
+ if (r->fr_id == id)
+ break;
+ if (!r) {
+ r = malloc(sizeof(struct fw_reass), M_TEMP, M_NOWAIT);
+ if (!r) {
+ m_freem(m);
+ return 0;
+ }
+ r->fr_id = id;
+ r->fr_frags = 0;
+ STAILQ_INSERT_HEAD(&fc->fc_frags, r, fr_link);
+ }
+
+ /*
+ * If this fragment overlaps any other fragment, we must discard
+ * the partial reassembly and start again.
+ */
+ if (enc->firstfrag.lf == FW_ENCAP_FIRST)
+ fstart = 0;
+ else
+ fstart = enc->nextfrag.fragment_offset;
+ fend = fstart + m->m_pkthdr.len - 2*sizeof(uint32_t);
+ dsize = enc->nextfrag.datagram_size;
+ islast = (enc->nextfrag.lf == FW_ENCAP_LAST);
+
+ for (mf = r->fr_frags; mf; mf = mf->m_nextpkt) {
+ enc = mtod(mf, union fw_encap *);
+ if (enc->nextfrag.datagram_size != dsize) {
+ /*
+ * This fragment must be from a different
+ * packet.
+ */
+ goto bad;
+ }
+ if (enc->firstfrag.lf == FW_ENCAP_FIRST)
+ start = 0;
+ else
+ start = enc->nextfrag.fragment_offset;
+ end = start + mf->m_pkthdr.len - 2*sizeof(uint32_t);
+ if ((fstart < end && fend > start) ||
+ (islast && enc->nextfrag.lf == FW_ENCAP_LAST)) {
+ /*
+ * Overlap - discard reassembly buffer and start
+ * again with this fragment.
+ */
+ goto bad;
+ }
+ }
+
+ /*
+ * Find where to put this fragment in the list.
+ */
+ for (mf = r->fr_frags, mprev = NULL; mf;
+ mprev = mf, mf = mf->m_nextpkt) {
+ enc = mtod(mf, union fw_encap *);
+ if (enc->firstfrag.lf == FW_ENCAP_FIRST)
+ start = 0;
+ else
+ start = enc->nextfrag.fragment_offset;
+ if (start >= fend)
+ break;
+ }
+
+ /*
+ * If this is a last fragment and we are not adding at the end
+ * of the list, discard the buffer.
+ */
+ if (islast && mprev && mprev->m_nextpkt)
+ goto bad;
+
+ if (mprev) {
+ m->m_nextpkt = mprev->m_nextpkt;
+ mprev->m_nextpkt = m;
+
+ /*
+ * Coalesce forwards and see if we can make a whole
+ * datagram.
+ */
+ enc = mtod(mprev, union fw_encap *);
+ if (enc->firstfrag.lf == FW_ENCAP_FIRST)
+ start = 0;
+ else
+ start = enc->nextfrag.fragment_offset;
+ end = start + mprev->m_pkthdr.len - 2*sizeof(uint32_t);
+ while (end == fstart) {
+ /*
+ * Strip off the encap header from m and
+ * append it to mprev, freeing m.
+ */
+ m_adj(m, 2*sizeof(uint32_t));
+ mprev->m_nextpkt = m->m_nextpkt;
+ mprev->m_pkthdr.len += m->m_pkthdr.len;
+ m_cat(mprev, m);
+
+ if (mprev->m_pkthdr.len == dsize + 1 + 2*sizeof(uint32_t)) {
+ /*
+ * We have assembled a complete packet
+ * we must be finished. Make sure we have
+ * merged the whole chain.
+ */
+ STAILQ_REMOVE(&fc->fc_frags, r, fw_reass, fr_link);
+ free(r, M_TEMP);
+ m = mprev->m_nextpkt;
+ while (m) {
+ mf = m->m_nextpkt;
+ m_freem(m);
+ m = mf;
+ }
+ mprev->m_nextpkt = NULL;
+
+ return (mprev);
+ }
+
+ /*
+ * See if we can continue merging forwards.
+ */
+ end = fend;
+ m = mprev->m_nextpkt;
+ if (m) {
+ enc = mtod(m, union fw_encap *);
+ if (enc->firstfrag.lf == FW_ENCAP_FIRST)
+ fstart = 0;
+ else
+ fstart = enc->nextfrag.fragment_offset;
+ fend = fstart + m->m_pkthdr.len
+ - 2*sizeof(uint32_t);
+ } else {
+ break;
+ }
+ }
+ } else {
+ m->m_nextpkt = 0;
+ r->fr_frags = m;
+ }
+
+ return (0);
+
+bad:
+ while (r->fr_frags) {
+ mf = r->fr_frags;
+ r->fr_frags = mf->m_nextpkt;
+ m_freem(mf);
+ }
+ m->m_nextpkt = 0;
+ r->fr_frags = m;
+
+ return (0);
+}
+
+void
+firewire_input(struct ifnet *ifp, struct mbuf *m, uint16_t src)
+{
+ struct fw_com *fc = IFP2FWC(ifp);
+ union fw_encap *enc;
+ int type, isr;
+
+ /*
+ * The caller has already stripped off the packet header
+ * (stream or wreqb) and marked the mbuf's M_BCAST flag
+ * appropriately. We de-encapsulate the IP packet and pass it
+ * up the line after handling link-level fragmentation.
+ */
+ if (m->m_pkthdr.len < sizeof(uint32_t)) {
+ if_printf(ifp, "discarding frame without "
+ "encapsulation header (len %u pkt len %u)\n",
+ m->m_len, m->m_pkthdr.len);
+ }
+
+ m = m_pullup(m, sizeof(uint32_t));
+ if (m == NULL)
+ return;
+ enc = mtod(m, union fw_encap *);
+
+ /*
+ * Byte swap the encapsulation header manually.
+ */
+ enc->ul[0] = ntohl(enc->ul[0]);
+
+ if (enc->unfrag.lf != 0) {
+ m = m_pullup(m, 2*sizeof(uint32_t));
+ if (!m)
+ return;
+ enc = mtod(m, union fw_encap *);
+ enc->ul[1] = ntohl(enc->ul[1]);
+ m = firewire_input_fragment(fc, m, src);
+ if (!m)
+ return;
+ enc = mtod(m, union fw_encap *);
+ type = enc->firstfrag.ether_type;
+ m_adj(m, 2*sizeof(uint32_t));
+ } else {
+ type = enc->unfrag.ether_type;
+ m_adj(m, sizeof(uint32_t));
+ }
+
+ if (m->m_pkthdr.rcvif == NULL) {
+ if_printf(ifp, "discard frame w/o interface pointer\n");
+ ifp->if_ierrors++;
+ m_freem(m);
+ return;
+ }
+#ifdef DIAGNOSTIC
+ if (m->m_pkthdr.rcvif != ifp) {
+ if_printf(ifp, "Warning, frame marked as received on %s\n",
+ m->m_pkthdr.rcvif->if_xname);
+ }
+#endif
+
+#ifdef MAC
+ /*
+ * Tag the mbuf with an appropriate MAC label before any other
+ * consumers can get to it.
+ */
+ mac_ifnet_create_mbuf(ifp, m);
+#endif
+
+ /*
+ * Give bpf a chance at the packet. The link-level driver
+ * should have left us a tag with the EUID of the sender.
+ */
+ if (bpf_peers_present(ifp->if_bpf)) {
+ struct fw_bpfhdr h;
+ struct m_tag *mtag;
+
+ mtag = m_tag_locate(m, MTAG_FIREWIRE, MTAG_FIREWIRE_SENDER_EUID, 0);
+ if (mtag)
+ bcopy(mtag + 1, h.firewire_shost, 8);
+ else
+ bcopy(&firewire_broadcastaddr, h.firewire_dhost, 8);
+ bcopy(&fc->fc_hwaddr, h.firewire_dhost, 8);
+ h.firewire_type = htons(type);
+ bpf_mtap2(ifp->if_bpf, &h, sizeof(h), m);
+ }
+
+ if (ifp->if_flags & IFF_MONITOR) {
+ /*
+ * Interface marked for monitoring; discard packet.
+ */
+ m_freem(m);
+ return;
+ }
+
+ ifp->if_ibytes += m->m_pkthdr.len;
+
+ /* Discard packet if interface is not up */
+ if ((ifp->if_flags & IFF_UP) == 0) {
+ m_freem(m);
+ return;
+ }
+
+ if (m->m_flags & (M_BCAST|M_MCAST))
+ ifp->if_imcasts++;
+
+ switch (type) {
+#ifdef INET
+ case ETHERTYPE_IP:
+ if ((m = ip_fastforward(m)) == NULL)
+ return;
+ isr = NETISR_IP;
+ break;
+
+ case ETHERTYPE_ARP:
+ {
+ struct arphdr *ah;
+ ah = mtod(m, struct arphdr *);
+
+ /*
+ * Adjust the arp packet to insert an empty tha slot.
+ */
+ m->m_len += ah->ar_hln;
+ m->m_pkthdr.len += ah->ar_hln;
+ bcopy(ar_tha(ah), ar_tpa(ah), ah->ar_pln);
+ isr = NETISR_ARP;
+ break;
+ }
+#endif
+
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ isr = NETISR_IPV6;
+ break;
+#endif
+
+ default:
+ m_freem(m);
+ return;
+ }
+
+ netisr_dispatch(isr, m);
+}
+
+int
+firewire_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
+{
+ struct ifaddr *ifa = (struct ifaddr *) data;
+ struct ifreq *ifr = (struct ifreq *) data;
+ int error = 0;
+
+ switch (command) {
+ case SIOCSIFADDR:
+ ifp->if_flags |= IFF_UP;
+
+ switch (ifa->ifa_addr->sa_family) {
+#ifdef INET
+ case AF_INET:
+ ifp->if_init(ifp->if_softc); /* before arpwhohas */
+ arp_ifinit(ifp, ifa);
+ break;
+#endif
+ default:
+ ifp->if_init(ifp->if_softc);
+ break;
+ }
+ break;
+
+ case SIOCGIFADDR:
+ {
+ struct sockaddr *sa;
+
+ sa = (struct sockaddr *) & ifr->ifr_data;
+ bcopy(&IFP2FWC(ifp)->fc_hwaddr,
+ (caddr_t) sa->sa_data, sizeof(struct fw_hwaddr));
+ }
+ break;
+
+ case SIOCSIFMTU:
+ /*
+ * Set the interface MTU.
+ */
+ if (ifr->ifr_mtu > 1500) {
+ error = EINVAL;
+ } else {
+ ifp->if_mtu = ifr->ifr_mtu;
+ }
+ break;
+ default:
+ error = EINVAL; /* XXX netbsd has ENOTTY??? */
+ break;
+ }
+ return (error);
+}
+
+static int
+firewire_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
+ struct sockaddr *sa)
+{
+#ifdef INET
+ struct sockaddr_in *sin;
+#endif
+#ifdef INET6
+ struct sockaddr_in6 *sin6;
+#endif
+
+ switch(sa->sa_family) {
+ case AF_LINK:
+ /*
+ * No mapping needed.
+ */
+ *llsa = 0;
+ return 0;
+
+#ifdef INET
+ case AF_INET:
+ sin = (struct sockaddr_in *)sa;
+ if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
+ return EADDRNOTAVAIL;
+ *llsa = 0;
+ return 0;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ sin6 = (struct sockaddr_in6 *)sa;
+ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+ /*
+ * An IP6 address of 0 means listen to all
+ * of the Ethernet multicast address used for IP6.
+ * (This is used for multicast routers.)
+ */
+ ifp->if_flags |= IFF_ALLMULTI;
+ *llsa = 0;
+ return 0;
+ }
+ if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
+ return EADDRNOTAVAIL;
+ *llsa = 0;
+ return 0;
+#endif
+
+ default:
+ /*
+ * Well, the text isn't quite right, but it's the name
+ * that counts...
+ */
+ return EAFNOSUPPORT;
+ }
+}
+
+void
+firewire_ifattach(struct ifnet *ifp, struct fw_hwaddr *llc)
+{
+ struct fw_com *fc = IFP2FWC(ifp);
+ struct ifaddr *ifa;
+ struct sockaddr_dl *sdl;
+ static const char* speeds[] = {
+ "S100", "S200", "S400", "S800",
+ "S1600", "S3200"
+ };
+
+ fc->fc_speed = llc->sspd;
+ STAILQ_INIT(&fc->fc_frags);
+
+ ifp->if_addrlen = sizeof(struct fw_hwaddr);
+ ifp->if_hdrlen = 0;
+ if_attach(ifp);
+ ifp->if_mtu = 1500; /* XXX */
+ ifp->if_output = firewire_output;
+ ifp->if_resolvemulti = firewire_resolvemulti;
+ ifp->if_broadcastaddr = (u_char *) &firewire_broadcastaddr;
+
+ ifa = ifp->if_addr;
+ KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
+ sdl = (struct sockaddr_dl *)ifa->ifa_addr;
+ sdl->sdl_type = IFT_IEEE1394;
+ sdl->sdl_alen = ifp->if_addrlen;
+ bcopy(llc, LLADDR(sdl), ifp->if_addrlen);
+
+ bpfattach(ifp, DLT_APPLE_IP_OVER_IEEE1394,
+ sizeof(struct fw_hwaddr));
+
+ if_printf(ifp, "Firewire address: %8D @ 0x%04x%08x, %s, maxrec %d\n",
+ (uint8_t *) &llc->sender_unique_ID_hi, ":",
+ ntohs(llc->sender_unicast_FIFO_hi),
+ ntohl(llc->sender_unicast_FIFO_lo),
+ speeds[llc->sspd],
+ (2 << llc->sender_max_rec));
+}
+
+void
+firewire_ifdetach(struct ifnet *ifp)
+{
+ bpfdetach(ifp);
+ if_detach(ifp);
+}
+
+void
+firewire_busreset(struct ifnet *ifp)
+{
+ struct fw_com *fc = IFP2FWC(ifp);
+ struct fw_reass *r;
+ struct mbuf *m;
+
+ /*
+ * Discard any partial datagrams since the host ids may have changed.
+ */
+ while ((r = STAILQ_FIRST(&fc->fc_frags))) {
+ STAILQ_REMOVE_HEAD(&fc->fc_frags, fr_link);
+ while (r->fr_frags) {
+ m = r->fr_frags;
+ r->fr_frags = m->m_nextpkt;
+ m_freem(m);
+ }
+ free(r, M_TEMP);
+ }
+}
+
+static void *
+firewire_alloc(u_char type, struct ifnet *ifp)
+{
+ struct fw_com *fc;
+
+ fc = malloc(sizeof(struct fw_com), M_FWCOM, M_WAITOK | M_ZERO);
+ fc->fc_ifp = ifp;
+
+ return (fc);
+}
+
+static void
+firewire_free(void *com, u_char type)
+{
+
+ free(com, M_FWCOM);
+}
+
+static int
+firewire_modevent(module_t mod, int type, void *data)
+{
+
+ switch (type) {
+ case MOD_LOAD:
+ if_register_com_alloc(IFT_IEEE1394,
+ firewire_alloc, firewire_free);
+ break;
+ case MOD_UNLOAD:
+ if_deregister_com_alloc(IFT_IEEE1394);
+ break;
+ default:
+ return (EOPNOTSUPP);
+ }
+
+ return (0);
+}
+
+static moduledata_t firewire_mod = {
+ "if_firewire",
+ firewire_modevent,
+ 0
+};
+
+DECLARE_MODULE(if_firewire, firewire_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
+MODULE_VERSION(if_firewire, 1);
diff --git a/freebsd/sys/net/if_gif.c b/freebsd/sys/net/if_gif.c
new file mode 100644
index 00000000..be67500a
--- /dev/null
+++ b/freebsd/sys/net/if_gif.c
@@ -0,0 +1,1025 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/* $FreeBSD$ */
+/* $KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $ */
+
+/*-
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/errno.h>
+#include <freebsd/sys/time.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/syslog.h>
+#include <freebsd/sys/priv.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/conf.h>
+#include <freebsd/machine/cpu.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_clone.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/netisr.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/bpf.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#ifdef INET
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/in_gif.h>
+#include <freebsd/netinet/ip_var.h>
+#endif /* INET */
+
+#ifdef INET6
+#ifndef INET
+#include <freebsd/netinet/in.h>
+#endif
+#include <freebsd/netinet6/in6_var.h>
+#include <freebsd/netinet/ip6.h>
+#include <freebsd/netinet6/ip6_var.h>
+#include <freebsd/netinet6/scope6_var.h>
+#include <freebsd/netinet6/in6_gif.h>
+#include <freebsd/netinet6/ip6protosw.h>
+#endif /* INET6 */
+
+#include <freebsd/netinet/ip_encap.h>
+#include <freebsd/net/ethernet.h>
+#include <freebsd/net/if_bridgevar.h>
+#include <freebsd/net/if_gif.h>
+
+#include <freebsd/security/mac/mac_framework.h>
+
+#define GIFNAME "gif"
+
+/*
+ * gif_mtx protects the global gif_softc_list.
+ */
+static struct mtx gif_mtx;
+static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
+static VNET_DEFINE(LIST_HEAD(, gif_softc), gif_softc_list);
+#define V_gif_softc_list VNET(gif_softc_list)
+
+void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af);
+void (*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af);
+void (*ng_gif_attach_p)(struct ifnet *ifp);
+void (*ng_gif_detach_p)(struct ifnet *ifp);
+
+static void gif_start(struct ifnet *);
+static int gif_clone_create(struct if_clone *, int, caddr_t);
+static void gif_clone_destroy(struct ifnet *);
+
+IFC_SIMPLE_DECLARE(gif, 0);
+
+static int gifmodevent(module_t, int, void *);
+
+SYSCTL_DECL(_net_link);
+SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0,
+ "Generic Tunnel Interface");
+#ifndef MAX_GIF_NEST
+/*
+ * This macro controls the default upper limitation on nesting of gif tunnels.
+ * Since, setting a large value to this macro with a careless configuration
+ * may introduce system crash, we don't allow any nestings by default.
+ * If you need to configure nested gif tunnels, you can define this macro
+ * in your kernel configuration file. However, if you do so, please be
+ * careful to configure the tunnels so that it won't make a loop.
+ */
+#define MAX_GIF_NEST 1
+#endif
+static VNET_DEFINE(int, max_gif_nesting) = MAX_GIF_NEST;
+#define V_max_gif_nesting VNET(max_gif_nesting)
+SYSCTL_VNET_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_RW,
+ &VNET_NAME(max_gif_nesting), 0, "Max nested tunnels");
+
+/*
+ * By default, we disallow creation of multiple tunnels between the same
+ * pair of addresses. Some applications require this functionality so
+ * we allow control over this check here.
+ */
+#ifdef XBONEHACK
+static VNET_DEFINE(int, parallel_tunnels) = 1;
+#else
+static VNET_DEFINE(int, parallel_tunnels) = 0;
+#endif
+#define V_parallel_tunnels VNET(parallel_tunnels)
+SYSCTL_VNET_INT(_net_link_gif, OID_AUTO, parallel_tunnels, CTLFLAG_RW,
+ &VNET_NAME(parallel_tunnels), 0, "Allow parallel tunnels?");
+
+/* copy from src/sys/net/if_ethersubr.c */
+static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
+ { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+#ifndef ETHER_IS_BROADCAST
+#define ETHER_IS_BROADCAST(addr) \
+ (bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0)
+#endif
+
+static int
+gif_clone_create(ifc, unit, params)
+ struct if_clone *ifc;
+ int unit;
+ caddr_t params;
+{
+ struct gif_softc *sc;
+
+ sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO);
+ sc->gif_fibnum = curthread->td_proc->p_fibnum;
+ GIF2IFP(sc) = if_alloc(IFT_GIF);
+ if (GIF2IFP(sc) == NULL) {
+ free(sc, M_GIF);
+ return (ENOSPC);
+ }
+
+ GIF_LOCK_INIT(sc);
+
+ GIF2IFP(sc)->if_softc = sc;
+ if_initname(GIF2IFP(sc), ifc->ifc_name, unit);
+
+ sc->encap_cookie4 = sc->encap_cookie6 = NULL;
+ sc->gif_options = GIF_ACCEPT_REVETHIP;
+
+ GIF2IFP(sc)->if_addrlen = 0;
+ GIF2IFP(sc)->if_mtu = GIF_MTU;
+ GIF2IFP(sc)->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
+#if 0
+ /* turn off ingress filter */
+ GIF2IFP(sc)->if_flags |= IFF_LINK2;
+#endif
+ GIF2IFP(sc)->if_ioctl = gif_ioctl;
+ GIF2IFP(sc)->if_start = gif_start;
+ GIF2IFP(sc)->if_output = gif_output;
+ GIF2IFP(sc)->if_snd.ifq_maxlen = ifqmaxlen;
+ if_attach(GIF2IFP(sc));
+ bpfattach(GIF2IFP(sc), DLT_NULL, sizeof(u_int32_t));
+ if (ng_gif_attach_p != NULL)
+ (*ng_gif_attach_p)(GIF2IFP(sc));
+
+ mtx_lock(&gif_mtx);
+ LIST_INSERT_HEAD(&V_gif_softc_list, sc, gif_list);
+ mtx_unlock(&gif_mtx);
+
+ return (0);
+}
+
+static void
+gif_clone_destroy(ifp)
+ struct ifnet *ifp;
+{
+#if defined(INET) || defined(INET6)
+ int err;
+#endif
+ struct gif_softc *sc = ifp->if_softc;
+
+ mtx_lock(&gif_mtx);
+ LIST_REMOVE(sc, gif_list);
+ mtx_unlock(&gif_mtx);
+
+ gif_delete_tunnel(ifp);
+#ifdef INET6
+ if (sc->encap_cookie6 != NULL) {
+ err = encap_detach(sc->encap_cookie6);
+ KASSERT(err == 0, ("Unexpected error detaching encap_cookie6"));
+ }
+#endif
+#ifdef INET
+ if (sc->encap_cookie4 != NULL) {
+ err = encap_detach(sc->encap_cookie4);
+ KASSERT(err == 0, ("Unexpected error detaching encap_cookie4"));
+ }
+#endif
+
+ if (ng_gif_detach_p != NULL)
+ (*ng_gif_detach_p)(ifp);
+ bpfdetach(ifp);
+ if_detach(ifp);
+ if_free(ifp);
+
+ GIF_LOCK_DESTROY(sc);
+
+ free(sc, M_GIF);
+}
+
+static void
+vnet_gif_init(const void *unused __unused)
+{
+
+ LIST_INIT(&V_gif_softc_list);
+}
+VNET_SYSINIT(vnet_gif_init, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, vnet_gif_init,
+ NULL);
+
+static int
+gifmodevent(mod, type, data)
+ module_t mod;
+ int type;
+ void *data;
+{
+
+ switch (type) {
+ case MOD_LOAD:
+ mtx_init(&gif_mtx, "gif_mtx", NULL, MTX_DEF);
+ if_clone_attach(&gif_cloner);
+ break;
+
+ case MOD_UNLOAD:
+ if_clone_detach(&gif_cloner);
+ mtx_destroy(&gif_mtx);
+ break;
+ default:
+ return EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static moduledata_t gif_mod = {
+ "if_gif",
+ gifmodevent,
+ 0
+};
+
+DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(if_gif, 1);
+
+int
+gif_encapcheck(m, off, proto, arg)
+ const struct mbuf *m;
+ int off;
+ int proto;
+ void *arg;
+{
+ struct ip ip;
+ struct gif_softc *sc;
+
+ sc = (struct gif_softc *)arg;
+ if (sc == NULL)
+ return 0;
+
+ if ((GIF2IFP(sc)->if_flags & IFF_UP) == 0)
+ return 0;
+
+ /* no physical address */
+ if (!sc->gif_psrc || !sc->gif_pdst)
+ return 0;
+
+ switch (proto) {
+#ifdef INET
+ case IPPROTO_IPV4:
+ break;
+#endif
+#ifdef INET6
+ case IPPROTO_IPV6:
+ break;
+#endif
+ case IPPROTO_ETHERIP:
+ break;
+
+ default:
+ return 0;
+ }
+
+ /* Bail on short packets */
+ if (m->m_pkthdr.len < sizeof(ip))
+ return 0;
+
+ m_copydata(m, 0, sizeof(ip), (caddr_t)&ip);
+
+ switch (ip.ip_v) {
+#ifdef INET
+ case 4:
+ if (sc->gif_psrc->sa_family != AF_INET ||
+ sc->gif_pdst->sa_family != AF_INET)
+ return 0;
+ return gif_encapcheck4(m, off, proto, arg);
+#endif
+#ifdef INET6
+ case 6:
+ if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
+ return 0;
+ if (sc->gif_psrc->sa_family != AF_INET6 ||
+ sc->gif_pdst->sa_family != AF_INET6)
+ return 0;
+ return gif_encapcheck6(m, off, proto, arg);
+#endif
+ default:
+ return 0;
+ }
+}
+
+static void
+gif_start(struct ifnet *ifp)
+{
+ struct gif_softc *sc;
+ struct mbuf *m;
+
+ sc = ifp->if_softc;
+
+ ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+ for (;;) {
+ IFQ_DEQUEUE(&ifp->if_snd, m);
+ if (m == 0)
+ break;
+
+ gif_output(ifp, m, sc->gif_pdst, NULL);
+
+ }
+ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+
+ return;
+}
+
+int
+gif_output(ifp, m, dst, ro)
+ struct ifnet *ifp;
+ struct mbuf *m;
+ struct sockaddr *dst;
+ struct route *ro;
+{
+ struct gif_softc *sc = ifp->if_softc;
+ struct m_tag *mtag;
+ int error = 0;
+ int gif_called;
+ u_int32_t af;
+
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m);
+ if (error) {
+ m_freem(m);
+ goto end;
+ }
+#endif
+
+ /*
+ * gif may cause infinite recursion calls when misconfigured.
+ * We'll prevent this by detecting loops.
+ *
+ * High nesting level may cause stack exhaustion.
+ * We'll prevent this by introducing upper limit.
+ */
+ gif_called = 1;
+ mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, NULL);
+ while (mtag != NULL) {
+ if (*(struct ifnet **)(mtag + 1) == ifp) {
+ log(LOG_NOTICE,
+ "gif_output: loop detected on %s\n",
+ (*(struct ifnet **)(mtag + 1))->if_xname);
+ m_freem(m);
+ error = EIO; /* is there better errno? */
+ goto end;
+ }
+ mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, mtag);
+ gif_called++;
+ }
+ if (gif_called > V_max_gif_nesting) {
+ log(LOG_NOTICE,
+ "gif_output: recursively called too many times(%d)\n",
+ gif_called);
+ m_freem(m);
+ error = EIO; /* is there better errno? */
+ goto end;
+ }
+ mtag = m_tag_alloc(MTAG_GIF, MTAG_GIF_CALLED, sizeof(struct ifnet *),
+ M_NOWAIT);
+ if (mtag == NULL) {
+ m_freem(m);
+ error = ENOMEM;
+ goto end;
+ }
+ *(struct ifnet **)(mtag + 1) = ifp;
+ m_tag_prepend(m, mtag);
+
+ m->m_flags &= ~(M_BCAST|M_MCAST);
+
+ GIF_LOCK(sc);
+
+ if (!(ifp->if_flags & IFF_UP) ||
+ sc->gif_psrc == NULL || sc->gif_pdst == NULL) {
+ GIF_UNLOCK(sc);
+ m_freem(m);
+ error = ENETDOWN;
+ goto end;
+ }
+
+ /* BPF writes need to be handled specially. */
+ if (dst->sa_family == AF_UNSPEC) {
+ bcopy(dst->sa_data, &af, sizeof(af));
+ dst->sa_family = af;
+ }
+
+ af = dst->sa_family;
+ BPF_MTAP2(ifp, &af, sizeof(af), m);
+ ifp->if_opackets++;
+ ifp->if_obytes += m->m_pkthdr.len;
+
+ /* override to IPPROTO_ETHERIP for bridged traffic */
+ if (ifp->if_bridge)
+ af = AF_LINK;
+
+ M_SETFIB(m, sc->gif_fibnum);
+ /* inner AF-specific encapsulation */
+
+ /* XXX should we check if our outer source is legal? */
+
+ /* dispatch to output logic based on outer AF */
+ switch (sc->gif_psrc->sa_family) {
+#ifdef INET
+ case AF_INET:
+ error = in_gif_output(ifp, af, m);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ error = in6_gif_output(ifp, af, m);
+ break;
+#endif
+ default:
+ m_freem(m);
+ error = ENETDOWN;
+ }
+
+ GIF_UNLOCK(sc);
+ end:
+ if (error)
+ ifp->if_oerrors++;
+ return (error);
+}
+
+void
+gif_input(m, af, ifp)
+ struct mbuf *m;
+ int af;
+ struct ifnet *ifp;
+{
+ int isr, n;
+ struct gif_softc *sc = ifp->if_softc;
+ struct etherip_header *eip;
+ struct ether_header *eh;
+ struct ifnet *oldifp;
+
+ if (ifp == NULL) {
+ /* just in case */
+ m_freem(m);
+ return;
+ }
+
+ m->m_pkthdr.rcvif = ifp;
+
+#ifdef MAC
+ mac_ifnet_create_mbuf(ifp, m);
+#endif
+
+ if (bpf_peers_present(ifp->if_bpf)) {
+ u_int32_t af1 = af;
+ bpf_mtap2(ifp->if_bpf, &af1, sizeof(af1), m);
+ }
+
+ if (ng_gif_input_p != NULL) {
+ (*ng_gif_input_p)(ifp, &m, af);
+ if (m == NULL)
+ return;
+ }
+
+ /*
+ * Put the packet to the network layer input queue according to the
+ * specified address family.
+ * Note: older versions of gif_input directly called network layer
+ * input functions, e.g. ip6_input, here. We changed the policy to
+ * prevent too many recursive calls of such input functions, which
+ * might cause kernel panic. But the change may introduce another
+ * problem; if the input queue is full, packets are discarded.
+ * The kernel stack overflow really happened, and we believed
+ * queue-full rarely occurs, so we changed the policy.
+ */
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ isr = NETISR_IP;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ isr = NETISR_IPV6;
+ break;
+#endif
+ case AF_LINK:
+ n = sizeof(struct etherip_header) + sizeof(struct ether_header);
+ if (n > m->m_len) {
+ m = m_pullup(m, n);
+ if (m == NULL) {
+ ifp->if_ierrors++;
+ return;
+ }
+ }
+
+ eip = mtod(m, struct etherip_header *);
+ /*
+ * GIF_ACCEPT_REVETHIP (enabled by default) intentionally
+ * accepts an EtherIP packet with revered version field in
+ * the header. This is a knob for backward compatibility
+ * with FreeBSD 7.2R or prior.
+ */
+ if (sc->gif_options & GIF_ACCEPT_REVETHIP) {
+ if (eip->eip_resvl != ETHERIP_VERSION
+ && eip->eip_ver != ETHERIP_VERSION) {
+ /* discard unknown versions */
+ m_freem(m);
+ return;
+ }
+ } else {
+ if (eip->eip_ver != ETHERIP_VERSION) {
+ /* discard unknown versions */
+ m_freem(m);
+ return;
+ }
+ }
+ m_adj(m, sizeof(struct etherip_header));
+
+ m->m_flags &= ~(M_BCAST|M_MCAST);
+ m->m_pkthdr.rcvif = ifp;
+
+ if (ifp->if_bridge) {
+ oldifp = ifp;
+ eh = mtod(m, struct ether_header *);
+ if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
+ if (ETHER_IS_BROADCAST(eh->ether_dhost))
+ m->m_flags |= M_BCAST;
+ else
+ m->m_flags |= M_MCAST;
+ ifp->if_imcasts++;
+ }
+ BRIDGE_INPUT(ifp, m);
+
+ if (m != NULL && ifp != oldifp) {
+ /*
+ * The bridge gave us back itself or one of the
+ * members for which the frame is addressed.
+ */
+ ether_demux(ifp, m);
+ return;
+ }
+ }
+ if (m != NULL)
+ m_freem(m);
+ return;
+
+ default:
+ if (ng_gif_input_orphan_p != NULL)
+ (*ng_gif_input_orphan_p)(ifp, m, af);
+ else
+ m_freem(m);
+ return;
+ }
+
+ ifp->if_ipackets++;
+ ifp->if_ibytes += m->m_pkthdr.len;
+ netisr_dispatch(isr, m);
+}
+
+/* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
+int
+gif_ioctl(ifp, cmd, data)
+ struct ifnet *ifp;
+ u_long cmd;
+ caddr_t data;
+{
+ struct gif_softc *sc = ifp->if_softc;
+ struct ifreq *ifr = (struct ifreq*)data;
+ int error = 0, size;
+ u_int options;
+ struct sockaddr *dst, *src;
+#ifdef SIOCSIFMTU /* xxx */
+ u_long mtu;
+#endif
+
+ switch (cmd) {
+ case SIOCSIFADDR:
+ ifp->if_flags |= IFF_UP;
+ break;
+
+ case SIOCSIFDSTADDR:
+ break;
+
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ break;
+
+#ifdef SIOCSIFMTU /* xxx */
+ case SIOCGIFMTU:
+ break;
+
+ case SIOCSIFMTU:
+ mtu = ifr->ifr_mtu;
+ if (mtu < GIF_MTU_MIN || mtu > GIF_MTU_MAX)
+ return (EINVAL);
+ ifp->if_mtu = mtu;
+ break;
+#endif /* SIOCSIFMTU */
+
+#ifdef INET
+ case SIOCSIFPHYADDR:
+#endif
+#ifdef INET6
+ case SIOCSIFPHYADDR_IN6:
+#endif /* INET6 */
+ case SIOCSLIFPHYADDR:
+ switch (cmd) {
+#ifdef INET
+ case SIOCSIFPHYADDR:
+ src = (struct sockaddr *)
+ &(((struct in_aliasreq *)data)->ifra_addr);
+ dst = (struct sockaddr *)
+ &(((struct in_aliasreq *)data)->ifra_dstaddr);
+ break;
+#endif
+#ifdef INET6
+ case SIOCSIFPHYADDR_IN6:
+ src = (struct sockaddr *)
+ &(((struct in6_aliasreq *)data)->ifra_addr);
+ dst = (struct sockaddr *)
+ &(((struct in6_aliasreq *)data)->ifra_dstaddr);
+ break;
+#endif
+ case SIOCSLIFPHYADDR:
+ src = (struct sockaddr *)
+ &(((struct if_laddrreq *)data)->addr);
+ dst = (struct sockaddr *)
+ &(((struct if_laddrreq *)data)->dstaddr);
+ break;
+ default:
+ return EINVAL;
+ }
+
+ /* sa_family must be equal */
+ if (src->sa_family != dst->sa_family)
+ return EINVAL;
+
+ /* validate sa_len */
+ switch (src->sa_family) {
+#ifdef INET
+ case AF_INET:
+ if (src->sa_len != sizeof(struct sockaddr_in))
+ return EINVAL;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ if (src->sa_len != sizeof(struct sockaddr_in6))
+ return EINVAL;
+ break;
+#endif
+ default:
+ return EAFNOSUPPORT;
+ }
+ switch (dst->sa_family) {
+#ifdef INET
+ case AF_INET:
+ if (dst->sa_len != sizeof(struct sockaddr_in))
+ return EINVAL;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ if (dst->sa_len != sizeof(struct sockaddr_in6))
+ return EINVAL;
+ break;
+#endif
+ default:
+ return EAFNOSUPPORT;
+ }
+
+ /* check sa_family looks sane for the cmd */
+ switch (cmd) {
+ case SIOCSIFPHYADDR:
+ if (src->sa_family == AF_INET)
+ break;
+ return EAFNOSUPPORT;
+#ifdef INET6
+ case SIOCSIFPHYADDR_IN6:
+ if (src->sa_family == AF_INET6)
+ break;
+ return EAFNOSUPPORT;
+#endif /* INET6 */
+ case SIOCSLIFPHYADDR:
+ /* checks done in the above */
+ break;
+ }
+
+ error = gif_set_tunnel(GIF2IFP(sc), src, dst);
+ break;
+
+#ifdef SIOCDIFPHYADDR
+ case SIOCDIFPHYADDR:
+ gif_delete_tunnel(GIF2IFP(sc));
+ break;
+#endif
+
+ case SIOCGIFPSRCADDR:
+#ifdef INET6
+ case SIOCGIFPSRCADDR_IN6:
+#endif /* INET6 */
+ if (sc->gif_psrc == NULL) {
+ error = EADDRNOTAVAIL;
+ goto bad;
+ }
+ src = sc->gif_psrc;
+ switch (cmd) {
+#ifdef INET
+ case SIOCGIFPSRCADDR:
+ dst = &ifr->ifr_addr;
+ size = sizeof(ifr->ifr_addr);
+ break;
+#endif /* INET */
+#ifdef INET6
+ case SIOCGIFPSRCADDR_IN6:
+ dst = (struct sockaddr *)
+ &(((struct in6_ifreq *)data)->ifr_addr);
+ size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
+ break;
+#endif /* INET6 */
+ default:
+ error = EADDRNOTAVAIL;
+ goto bad;
+ }
+ if (src->sa_len > size)
+ return EINVAL;
+ bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
+#ifdef INET6
+ if (dst->sa_family == AF_INET6) {
+ error = sa6_recoverscope((struct sockaddr_in6 *)dst);
+ if (error != 0)
+ return (error);
+ }
+#endif
+ break;
+
+ case SIOCGIFPDSTADDR:
+#ifdef INET6
+ case SIOCGIFPDSTADDR_IN6:
+#endif /* INET6 */
+ if (sc->gif_pdst == NULL) {
+ error = EADDRNOTAVAIL;
+ goto bad;
+ }
+ src = sc->gif_pdst;
+ switch (cmd) {
+#ifdef INET
+ case SIOCGIFPDSTADDR:
+ dst = &ifr->ifr_addr;
+ size = sizeof(ifr->ifr_addr);
+ break;
+#endif /* INET */
+#ifdef INET6
+ case SIOCGIFPDSTADDR_IN6:
+ dst = (struct sockaddr *)
+ &(((struct in6_ifreq *)data)->ifr_addr);
+ size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
+ break;
+#endif /* INET6 */
+ default:
+ error = EADDRNOTAVAIL;
+ goto bad;
+ }
+ if (src->sa_len > size)
+ return EINVAL;
+ bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
+#ifdef INET6
+ if (dst->sa_family == AF_INET6) {
+ error = sa6_recoverscope((struct sockaddr_in6 *)dst);
+ if (error != 0)
+ return (error);
+ }
+#endif
+ break;
+
+ case SIOCGLIFPHYADDR:
+ if (sc->gif_psrc == NULL || sc->gif_pdst == NULL) {
+ error = EADDRNOTAVAIL;
+ goto bad;
+ }
+
+ /* copy src */
+ src = sc->gif_psrc;
+ dst = (struct sockaddr *)
+ &(((struct if_laddrreq *)data)->addr);
+ size = sizeof(((struct if_laddrreq *)data)->addr);
+ if (src->sa_len > size)
+ return EINVAL;
+ bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
+
+ /* copy dst */
+ src = sc->gif_pdst;
+ dst = (struct sockaddr *)
+ &(((struct if_laddrreq *)data)->dstaddr);
+ size = sizeof(((struct if_laddrreq *)data)->dstaddr);
+ if (src->sa_len > size)
+ return EINVAL;
+ bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
+ break;
+
+ case SIOCSIFFLAGS:
+ /* if_ioctl() takes care of it */
+ break;
+
+ case GIFGOPTS:
+ options = sc->gif_options;
+ error = copyout(&options, ifr->ifr_data,
+ sizeof(options));
+ break;
+
+ case GIFSOPTS:
+ if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
+ break;
+ error = copyin(ifr->ifr_data, &options, sizeof(options));
+ if (error)
+ break;
+ if (options & ~GIF_OPTMASK)
+ error = EINVAL;
+ else
+ sc->gif_options = options;
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+ bad:
+ return error;
+}
+
+/*
+ * XXXRW: There's a general event-ordering issue here: the code to check
+ * if a given tunnel is already present happens before we perform a
+ * potentially blocking setup of the tunnel. This code needs to be
+ * re-ordered so that the check and replacement can be atomic using
+ * a mutex.
+ */
+int
+gif_set_tunnel(ifp, src, dst)
+ struct ifnet *ifp;
+ struct sockaddr *src;
+ struct sockaddr *dst;
+{
+ struct gif_softc *sc = ifp->if_softc;
+ struct gif_softc *sc2;
+ struct sockaddr *osrc, *odst, *sa;
+ int error = 0;
+
+ mtx_lock(&gif_mtx);
+ LIST_FOREACH(sc2, &V_gif_softc_list, gif_list) {
+ if (sc2 == sc)
+ continue;
+ if (!sc2->gif_pdst || !sc2->gif_psrc)
+ continue;
+ if (sc2->gif_pdst->sa_family != dst->sa_family ||
+ sc2->gif_pdst->sa_len != dst->sa_len ||
+ sc2->gif_psrc->sa_family != src->sa_family ||
+ sc2->gif_psrc->sa_len != src->sa_len)
+ continue;
+
+ /*
+ * Disallow parallel tunnels unless instructed
+ * otherwise.
+ */
+ if (!V_parallel_tunnels &&
+ bcmp(sc2->gif_pdst, dst, dst->sa_len) == 0 &&
+ bcmp(sc2->gif_psrc, src, src->sa_len) == 0) {
+ error = EADDRNOTAVAIL;
+ mtx_unlock(&gif_mtx);
+ goto bad;
+ }
+
+ /* XXX both end must be valid? (I mean, not 0.0.0.0) */
+ }
+ mtx_unlock(&gif_mtx);
+
+ /* XXX we can detach from both, but be polite just in case */
+ if (sc->gif_psrc)
+ switch (sc->gif_psrc->sa_family) {
+#ifdef INET
+ case AF_INET:
+ (void)in_gif_detach(sc);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ (void)in6_gif_detach(sc);
+ break;
+#endif
+ }
+
+ osrc = sc->gif_psrc;
+ sa = (struct sockaddr *)malloc(src->sa_len, M_IFADDR, M_WAITOK);
+ bcopy((caddr_t)src, (caddr_t)sa, src->sa_len);
+ sc->gif_psrc = sa;
+
+ odst = sc->gif_pdst;
+ sa = (struct sockaddr *)malloc(dst->sa_len, M_IFADDR, M_WAITOK);
+ bcopy((caddr_t)dst, (caddr_t)sa, dst->sa_len);
+ sc->gif_pdst = sa;
+
+ switch (sc->gif_psrc->sa_family) {
+#ifdef INET
+ case AF_INET:
+ error = in_gif_attach(sc);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ /*
+ * Check validity of the scope zone ID of the addresses, and
+ * convert it into the kernel internal form if necessary.
+ */
+ error = sa6_embedscope((struct sockaddr_in6 *)sc->gif_psrc, 0);
+ if (error != 0)
+ break;
+ error = sa6_embedscope((struct sockaddr_in6 *)sc->gif_pdst, 0);
+ if (error != 0)
+ break;
+ error = in6_gif_attach(sc);
+ break;
+#endif
+ }
+ if (error) {
+ /* rollback */
+ free((caddr_t)sc->gif_psrc, M_IFADDR);
+ free((caddr_t)sc->gif_pdst, M_IFADDR);
+ sc->gif_psrc = osrc;
+ sc->gif_pdst = odst;
+ goto bad;
+ }
+
+ if (osrc)
+ free((caddr_t)osrc, M_IFADDR);
+ if (odst)
+ free((caddr_t)odst, M_IFADDR);
+
+ bad:
+ if (sc->gif_psrc && sc->gif_pdst)
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ else
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+
+ return error;
+}
+
+void
+gif_delete_tunnel(ifp)
+ struct ifnet *ifp;
+{
+ struct gif_softc *sc = ifp->if_softc;
+
+ if (sc->gif_psrc) {
+ free((caddr_t)sc->gif_psrc, M_IFADDR);
+ sc->gif_psrc = NULL;
+ }
+ if (sc->gif_pdst) {
+ free((caddr_t)sc->gif_pdst, M_IFADDR);
+ sc->gif_pdst = NULL;
+ }
+ /* it is safe to detach from both */
+#ifdef INET
+ (void)in_gif_detach(sc);
+#endif
+#ifdef INET6
+ (void)in6_gif_detach(sc);
+#endif
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+}
diff --git a/freebsd/sys/net/if_gif.h b/freebsd/sys/net/if_gif.h
new file mode 100644
index 00000000..14f06fd6
--- /dev/null
+++ b/freebsd/sys/net/if_gif.h
@@ -0,0 +1,130 @@
+/* $FreeBSD$ */
+/* $KAME: if_gif.h,v 1.17 2000/09/11 11:36:41 sumikawa Exp $ */
+
+/*-
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * if_gif.h
+ */
+
+#ifndef _NET_IF_GIF_HH_
+#define _NET_IF_GIF_HH_
+
+
+#ifdef _KERNEL
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+
+#include <freebsd/netinet/in.h>
+/* xxx sigh, why route have struct route instead of pointer? */
+
+struct encaptab;
+
+extern void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp,
+ int af);
+extern void (*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m,
+ int af);
+extern int (*ng_gif_output_p)(struct ifnet *ifp, struct mbuf **mp);
+extern void (*ng_gif_attach_p)(struct ifnet *ifp);
+extern void (*ng_gif_detach_p)(struct ifnet *ifp);
+
+struct gif_softc {
+ struct ifnet *gif_ifp;
+ struct mtx gif_mtx;
+ struct sockaddr *gif_psrc; /* Physical src addr */
+ struct sockaddr *gif_pdst; /* Physical dst addr */
+ union {
+ struct route gifscr_ro; /* xxx */
+#ifdef INET6
+ struct route_in6 gifscr_ro6; /* xxx */
+#endif
+ } gifsc_gifscr;
+ int gif_flags;
+ u_int gif_fibnum;
+ const struct encaptab *encap_cookie4;
+ const struct encaptab *encap_cookie6;
+ void *gif_netgraph; /* ng_gif(4) netgraph node info */
+ u_int gif_options;
+ LIST_ENTRY(gif_softc) gif_list; /* all gif's are linked */
+};
+#define GIF2IFP(sc) ((sc)->gif_ifp)
+#define GIF_LOCK_INIT(sc) mtx_init(&(sc)->gif_mtx, "gif softc", \
+ NULL, MTX_DEF)
+#define GIF_LOCK_DESTROY(sc) mtx_destroy(&(sc)->gif_mtx)
+#define GIF_LOCK(sc) mtx_lock(&(sc)->gif_mtx)
+#define GIF_UNLOCK(sc) mtx_unlock(&(sc)->gif_mtx)
+#define GIF_LOCK_ASSERT(sc) mtx_assert(&(sc)->gif_mtx, MA_OWNED)
+
+#define gif_ro gifsc_gifscr.gifscr_ro
+#ifdef INET6
+#define gif_ro6 gifsc_gifscr.gifscr_ro6
+#endif
+
+#define GIF_MTU (1280) /* Default MTU */
+#define GIF_MTU_MIN (1280) /* Minimum MTU */
+#define GIF_MTU_MAX (8192) /* Maximum MTU */
+
+#define MTAG_GIF 1080679712
+#define MTAG_GIF_CALLED 0
+
+struct etherip_header {
+#if BYTE_ORDER == LITTLE_ENDIAN
+ u_int eip_resvl:4, /* reserved */
+ eip_ver:4; /* version */
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+ u_int eip_ver:4, /* version */
+ eip_resvl:4; /* reserved */
+#endif
+ u_int8_t eip_resvh; /* reserved */
+} __packed;
+
+#define ETHERIP_VERSION 0x3
+/* mbuf adjust factor to force 32-bit alignment of IP header */
+#define ETHERIP_ALIGN 2
+
+/* Prototypes */
+void gif_input(struct mbuf *, int, struct ifnet *);
+int gif_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+ struct route *);
+int gif_ioctl(struct ifnet *, u_long, caddr_t);
+int gif_set_tunnel(struct ifnet *, struct sockaddr *, struct sockaddr *);
+void gif_delete_tunnel(struct ifnet *);
+int gif_encapcheck(const struct mbuf *, int, int, void *);
+#endif /* _KERNEL */
+
+#define GIFGOPTS _IOWR('i', 150, struct ifreq)
+#define GIFSOPTS _IOW('i', 151, struct ifreq)
+
+#define GIF_ACCEPT_REVETHIP 0x0001
+#define GIF_SEND_REVETHIP 0x0010
+#define GIF_OPTMASK (GIF_ACCEPT_REVETHIP|GIF_SEND_REVETHIP)
+
+#endif /* _NET_IF_GIF_HH_ */
diff --git a/freebsd/sys/net/if_gre.c b/freebsd/sys/net/if_gre.c
new file mode 100644
index 00000000..4a42029b
--- /dev/null
+++ b/freebsd/sys/net/if_gre.c
@@ -0,0 +1,909 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/* $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $ */
+/* $FreeBSD$ */
+
+/*-
+ * Copyright (c) 1998 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Heiko W.Rupp <hwr@pilhuhn.de>
+ *
+ * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Encapsulate L3 protocols into IP
+ * See RFC 2784 (successor of RFC 1701 and 1702) for more details.
+ * If_gre is compatible with Cisco GRE tunnels, so you can
+ * have a NetBSD box as the other end of a tunnel interface of a Cisco
+ * router. See gre(4) for more details.
+ * Also supported: IP in IP encaps (proto 55) as of RFC 2004
+ */
+
+#include <freebsd/local/opt_atalk.h>
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/priv.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/systm.h>
+
+#include <freebsd/net/ethernet.h>
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_clone.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+
+#ifdef INET
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/ip_gre.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/ip_encap.h>
+#else
+#error "Huh? if_gre without inet?"
+#endif
+
+#include <freebsd/net/bpf.h>
+
+#include <freebsd/net/if_gre.h>
+
+/*
+ * It is not easy to calculate the right value for a GRE MTU.
+ * We leave this task to the admin and use the same default that
+ * other vendors use.
+ */
+#define GREMTU 1476
+
+#define GRENAME "gre"
+
+/*
+ * gre_mtx protects all global variables in if_gre.c.
+ * XXX: gre_softc data not protected yet.
+ */
+struct mtx gre_mtx;
+static MALLOC_DEFINE(M_GRE, GRENAME, "Generic Routing Encapsulation");
+
+struct gre_softc_head gre_softc_list;
+
+static int gre_clone_create(struct if_clone *, int, caddr_t);
+static void gre_clone_destroy(struct ifnet *);
+static int gre_ioctl(struct ifnet *, u_long, caddr_t);
+static int gre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+ struct route *ro);
+
+IFC_SIMPLE_DECLARE(gre, 0);
+
+static int gre_compute_route(struct gre_softc *sc);
+
+static void greattach(void);
+
+#ifdef INET
+extern struct domain inetdomain;
+static const struct protosw in_gre_protosw = {
+ .pr_type = SOCK_RAW,
+ .pr_domain = &inetdomain,
+ .pr_protocol = IPPROTO_GRE,
+ .pr_flags = PR_ATOMIC|PR_ADDR,
+ .pr_input = gre_input,
+ .pr_output = (pr_output_t *)rip_output,
+ .pr_ctlinput = rip_ctlinput,
+ .pr_ctloutput = rip_ctloutput,
+ .pr_usrreqs = &rip_usrreqs
+};
+static const struct protosw in_mobile_protosw = {
+ .pr_type = SOCK_RAW,
+ .pr_domain = &inetdomain,
+ .pr_protocol = IPPROTO_MOBILE,
+ .pr_flags = PR_ATOMIC|PR_ADDR,
+ .pr_input = gre_mobile_input,
+ .pr_output = (pr_output_t *)rip_output,
+ .pr_ctlinput = rip_ctlinput,
+ .pr_ctloutput = rip_ctloutput,
+ .pr_usrreqs = &rip_usrreqs
+};
+#endif
+
+SYSCTL_DECL(_net_link);
+SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0,
+ "Generic Routing Encapsulation");
+#ifndef MAX_GRE_NEST
+/*
+ * This macro controls the default upper limitation on nesting of gre tunnels.
+ * Since, setting a large value to this macro with a careless configuration
+ * may introduce system crash, we don't allow any nestings by default.
+ * If you need to configure nested gre tunnels, you can define this macro
+ * in your kernel configuration file. However, if you do so, please be
+ * careful to configure the tunnels so that it won't make a loop.
+ */
+#define MAX_GRE_NEST 1
+#endif
+static int max_gre_nesting = MAX_GRE_NEST;
+SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW,
+ &max_gre_nesting, 0, "Max nested tunnels");
+
+/* ARGSUSED */
+static void
+greattach(void)
+{
+
+ mtx_init(&gre_mtx, "gre_mtx", NULL, MTX_DEF);
+ LIST_INIT(&gre_softc_list);
+ if_clone_attach(&gre_cloner);
+}
+
+static int
+gre_clone_create(ifc, unit, params)
+ struct if_clone *ifc;
+ int unit;
+ caddr_t params;
+{
+ struct gre_softc *sc;
+
+ sc = malloc(sizeof(struct gre_softc), M_GRE, M_WAITOK | M_ZERO);
+
+ GRE2IFP(sc) = if_alloc(IFT_TUNNEL);
+ if (GRE2IFP(sc) == NULL) {
+ free(sc, M_GRE);
+ return (ENOSPC);
+ }
+
+ GRE2IFP(sc)->if_softc = sc;
+ if_initname(GRE2IFP(sc), ifc->ifc_name, unit);
+
+ GRE2IFP(sc)->if_snd.ifq_maxlen = ifqmaxlen;
+ GRE2IFP(sc)->if_addrlen = 0;
+ GRE2IFP(sc)->if_hdrlen = 24; /* IP + GRE */
+ GRE2IFP(sc)->if_mtu = GREMTU;
+ GRE2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
+ GRE2IFP(sc)->if_output = gre_output;
+ GRE2IFP(sc)->if_ioctl = gre_ioctl;
+ sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
+ sc->g_proto = IPPROTO_GRE;
+ GRE2IFP(sc)->if_flags |= IFF_LINK0;
+ sc->encap = NULL;
+ sc->called = 0;
+ sc->gre_fibnum = curthread->td_proc->p_fibnum;
+ sc->wccp_ver = WCCP_V1;
+ sc->key = 0;
+ if_attach(GRE2IFP(sc));
+ bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t));
+ mtx_lock(&gre_mtx);
+ LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
+ mtx_unlock(&gre_mtx);
+ return (0);
+}
+
+static void
+gre_clone_destroy(ifp)
+ struct ifnet *ifp;
+{
+ struct gre_softc *sc = ifp->if_softc;
+
+ mtx_lock(&gre_mtx);
+ LIST_REMOVE(sc, sc_list);
+ mtx_unlock(&gre_mtx);
+
+#ifdef INET
+ if (sc->encap != NULL)
+ encap_detach(sc->encap);
+#endif
+ bpfdetach(ifp);
+ if_detach(ifp);
+ if_free(ifp);
+ free(sc, M_GRE);
+}
+
+/*
+ * The output routine. Takes a packet and encapsulates it in the protocol
+ * given by sc->g_proto. See also RFC 1701 and RFC 2004
+ */
+static int
+gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+ struct route *ro)
+{
+ int error = 0;
+ struct gre_softc *sc = ifp->if_softc;
+ struct greip *gh;
+ struct ip *ip;
+ u_short gre_ip_id = 0;
+ uint8_t gre_ip_tos = 0;
+ u_int16_t etype = 0;
+ struct mobile_h mob_h;
+ u_int32_t af;
+ int extra = 0;
+
+ /*
+ * gre may cause infinite recursion calls when misconfigured.
+ * We'll prevent this by introducing upper limit.
+ */
+ if (++(sc->called) > max_gre_nesting) {
+ printf("%s: gre_output: recursively called too many "
+ "times(%d)\n", if_name(GRE2IFP(sc)), sc->called);
+ m_freem(m);
+ error = EIO; /* is there better errno? */
+ goto end;
+ }
+
+ if (!((ifp->if_flags & IFF_UP) &&
+ (ifp->if_drv_flags & IFF_DRV_RUNNING)) ||
+ sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
+ m_freem(m);
+ error = ENETDOWN;
+ goto end;
+ }
+
+ gh = NULL;
+ ip = NULL;
+
+ /* BPF writes need to be handled specially. */
+ if (dst->sa_family == AF_UNSPEC) {
+ bcopy(dst->sa_data, &af, sizeof(af));
+ dst->sa_family = af;
+ }
+
+ if (bpf_peers_present(ifp->if_bpf)) {
+ af = dst->sa_family;
+ bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
+ }
+
+ m->m_flags &= ~(M_BCAST|M_MCAST);
+
+ if (sc->g_proto == IPPROTO_MOBILE) {
+ if (dst->sa_family == AF_INET) {
+ struct mbuf *m0;
+ int msiz;
+
+ ip = mtod(m, struct ip *);
+
+ /*
+ * RFC2004 specifies that fragmented diagrams shouldn't
+ * be encapsulated.
+ */
+ if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
+ _IF_DROP(&ifp->if_snd);
+ m_freem(m);
+ error = EINVAL; /* is there better errno? */
+ goto end;
+ }
+ memset(&mob_h, 0, MOB_HH_SIZ_L);
+ mob_h.proto = (ip->ip_p) << 8;
+ mob_h.odst = ip->ip_dst.s_addr;
+ ip->ip_dst.s_addr = sc->g_dst.s_addr;
+
+ /*
+ * If the packet comes from our host, we only change
+ * the destination address in the IP header.
+ * Else we also need to save and change the source
+ */
+ if (in_hosteq(ip->ip_src, sc->g_src)) {
+ msiz = MOB_HH_SIZ_S;
+ } else {
+ mob_h.proto |= MOB_HH_SBIT;
+ mob_h.osrc = ip->ip_src.s_addr;
+ ip->ip_src.s_addr = sc->g_src.s_addr;
+ msiz = MOB_HH_SIZ_L;
+ }
+ mob_h.proto = htons(mob_h.proto);
+ mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz);
+
+ if ((m->m_data - msiz) < m->m_pktdat) {
+ /* need new mbuf */
+ MGETHDR(m0, M_DONTWAIT, MT_DATA);
+ if (m0 == NULL) {
+ _IF_DROP(&ifp->if_snd);
+ m_freem(m);
+ error = ENOBUFS;
+ goto end;
+ }
+ m0->m_next = m;
+ m->m_data += sizeof(struct ip);
+ m->m_len -= sizeof(struct ip);
+ m0->m_pkthdr.len = m->m_pkthdr.len + msiz;
+ m0->m_len = msiz + sizeof(struct ip);
+ m0->m_data += max_linkhdr;
+ memcpy(mtod(m0, caddr_t), (caddr_t)ip,
+ sizeof(struct ip));
+ m = m0;
+ } else { /* we have some space left in the old one */
+ m->m_data -= msiz;
+ m->m_len += msiz;
+ m->m_pkthdr.len += msiz;
+ bcopy(ip, mtod(m, caddr_t),
+ sizeof(struct ip));
+ }
+ ip = mtod(m, struct ip *);
+ memcpy((caddr_t)(ip + 1), &mob_h, (unsigned)msiz);
+ ip->ip_len = ntohs(ip->ip_len) + msiz;
+ } else { /* AF_INET */
+ _IF_DROP(&ifp->if_snd);
+ m_freem(m);
+ error = EINVAL;
+ goto end;
+ }
+ } else if (sc->g_proto == IPPROTO_GRE) {
+ switch (dst->sa_family) {
+ case AF_INET:
+ ip = mtod(m, struct ip *);
+ gre_ip_tos = ip->ip_tos;
+ gre_ip_id = ip->ip_id;
+ if (sc->wccp_ver == WCCP_V2) {
+ extra = sizeof(uint32_t);
+ etype = WCCP_PROTOCOL_TYPE;
+ } else {
+ etype = ETHERTYPE_IP;
+ }
+ break;
+#ifdef INET6
+ case AF_INET6:
+ gre_ip_id = ip_newid();
+ etype = ETHERTYPE_IPV6;
+ break;
+#endif
+#ifdef NETATALK
+ case AF_APPLETALK:
+ etype = ETHERTYPE_ATALK;
+ break;
+#endif
+ default:
+ _IF_DROP(&ifp->if_snd);
+ m_freem(m);
+ error = EAFNOSUPPORT;
+ goto end;
+ }
+
+ /* Reserve space for GRE header + optional GRE key */
+ int hdrlen = sizeof(struct greip) + extra;
+ if (sc->key)
+ hdrlen += sizeof(uint32_t);
+ M_PREPEND(m, hdrlen, M_DONTWAIT);
+ } else {
+ _IF_DROP(&ifp->if_snd);
+ m_freem(m);
+ error = EINVAL;
+ goto end;
+ }
+
+ if (m == NULL) { /* mbuf allocation failed */
+ _IF_DROP(&ifp->if_snd);
+ error = ENOBUFS;
+ goto end;
+ }
+
+ M_SETFIB(m, sc->gre_fibnum); /* The envelope may use a different FIB */
+
+ gh = mtod(m, struct greip *);
+ if (sc->g_proto == IPPROTO_GRE) {
+ uint32_t *options = gh->gi_options;
+
+ memset((void *)gh, 0, sizeof(struct greip) + extra);
+ gh->gi_ptype = htons(etype);
+ gh->gi_flags = 0;
+
+ /* Add key option */
+ if (sc->key)
+ {
+ gh->gi_flags |= htons(GRE_KP);
+ *(options++) = htonl(sc->key);
+ }
+ }
+
+ gh->gi_pr = sc->g_proto;
+ if (sc->g_proto != IPPROTO_MOBILE) {
+ gh->gi_src = sc->g_src;
+ gh->gi_dst = sc->g_dst;
+ ((struct ip*)gh)->ip_v = IPPROTO_IPV4;
+ ((struct ip*)gh)->ip_hl = (sizeof(struct ip)) >> 2;
+ ((struct ip*)gh)->ip_ttl = GRE_TTL;
+ ((struct ip*)gh)->ip_tos = gre_ip_tos;
+ ((struct ip*)gh)->ip_id = gre_ip_id;
+ gh->gi_len = m->m_pkthdr.len;
+ }
+
+ ifp->if_opackets++;
+ ifp->if_obytes += m->m_pkthdr.len;
+ /*
+ * Send it off and with IP_FORWARD flag to prevent it from
+ * overwriting the ip_id again. ip_id is already set to the
+ * ip_id of the encapsulated packet.
+ */
+ error = ip_output(m, NULL, &sc->route, IP_FORWARDING,
+ (struct ip_moptions *)NULL, (struct inpcb *)NULL);
+ end:
+ sc->called = 0;
+ if (error)
+ ifp->if_oerrors++;
+ return (error);
+}
+
+static int
+gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ struct ifreq *ifr = (struct ifreq *)data;
+ struct if_laddrreq *lifr = (struct if_laddrreq *)data;
+ struct in_aliasreq *aifr = (struct in_aliasreq *)data;
+ struct gre_softc *sc = ifp->if_softc;
+ int s;
+ struct sockaddr_in si;
+ struct sockaddr *sa = NULL;
+ int error, adj;
+ struct sockaddr_in sp, sm, dp, dm;
+ uint32_t key;
+
+ error = 0;
+ adj = 0;
+
+ s = splnet();
+ switch (cmd) {
+ case SIOCSIFADDR:
+ ifp->if_flags |= IFF_UP;
+ break;
+ case SIOCSIFDSTADDR:
+ break;
+ case SIOCSIFFLAGS:
+ /*
+ * XXXRW: Isn't this priv_check() redundant to the ifnet
+ * layer check?
+ */
+ if ((error = priv_check(curthread, PRIV_NET_SETIFFLAGS)) != 0)
+ break;
+ if ((ifr->ifr_flags & IFF_LINK0) != 0)
+ sc->g_proto = IPPROTO_GRE;
+ else
+ sc->g_proto = IPPROTO_MOBILE;
+ if ((ifr->ifr_flags & IFF_LINK2) != 0)
+ sc->wccp_ver = WCCP_V2;
+ else
+ sc->wccp_ver = WCCP_V1;
+ goto recompute;
+ case SIOCSIFMTU:
+ /*
+ * XXXRW: Isn't this priv_check() redundant to the ifnet
+ * layer check?
+ */
+ if ((error = priv_check(curthread, PRIV_NET_SETIFMTU)) != 0)
+ break;
+ if (ifr->ifr_mtu < 576) {
+ error = EINVAL;
+ break;
+ }
+ ifp->if_mtu = ifr->ifr_mtu;
+ break;
+ case SIOCGIFMTU:
+ ifr->ifr_mtu = GRE2IFP(sc)->if_mtu;
+ break;
+ case SIOCADDMULTI:
+ /*
+ * XXXRW: Isn't this priv_checkr() redundant to the ifnet
+ * layer check?
+ */
+ if ((error = priv_check(curthread, PRIV_NET_ADDMULTI)) != 0)
+ break;
+ if (ifr == 0) {
+ error = EAFNOSUPPORT;
+ break;
+ }
+ switch (ifr->ifr_addr.sa_family) {
+#ifdef INET
+ case AF_INET:
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ break;
+#endif
+ default:
+ error = EAFNOSUPPORT;
+ break;
+ }
+ break;
+ case SIOCDELMULTI:
+ /*
+ * XXXRW: Isn't this priv_check() redundant to the ifnet
+ * layer check?
+ */
+ if ((error = priv_check(curthread, PRIV_NET_DELIFGROUP)) != 0)
+ break;
+ if (ifr == 0) {
+ error = EAFNOSUPPORT;
+ break;
+ }
+ switch (ifr->ifr_addr.sa_family) {
+#ifdef INET
+ case AF_INET:
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ break;
+#endif
+ default:
+ error = EAFNOSUPPORT;
+ break;
+ }
+ break;
+ case GRESPROTO:
+ /*
+ * XXXRW: Isn't this priv_check() redundant to the ifnet
+ * layer check?
+ */
+ if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
+ break;
+ sc->g_proto = ifr->ifr_flags;
+ switch (sc->g_proto) {
+ case IPPROTO_GRE:
+ ifp->if_flags |= IFF_LINK0;
+ break;
+ case IPPROTO_MOBILE:
+ ifp->if_flags &= ~IFF_LINK0;
+ break;
+ default:
+ error = EPROTONOSUPPORT;
+ break;
+ }
+ goto recompute;
+ case GREGPROTO:
+ ifr->ifr_flags = sc->g_proto;
+ break;
+ case GRESADDRS:
+ case GRESADDRD:
+ error = priv_check(curthread, PRIV_NET_GRE);
+ if (error)
+ return (error);
+ /*
+ * set tunnel endpoints, compute a less specific route
+ * to the remote end and mark if as up
+ */
+ sa = &ifr->ifr_addr;
+ if (cmd == GRESADDRS)
+ sc->g_src = (satosin(sa))->sin_addr;
+ if (cmd == GRESADDRD)
+ sc->g_dst = (satosin(sa))->sin_addr;
+ recompute:
+#ifdef INET
+ if (sc->encap != NULL) {
+ encap_detach(sc->encap);
+ sc->encap = NULL;
+ }
+#endif
+ if ((sc->g_src.s_addr != INADDR_ANY) &&
+ (sc->g_dst.s_addr != INADDR_ANY)) {
+ bzero(&sp, sizeof(sp));
+ bzero(&sm, sizeof(sm));
+ bzero(&dp, sizeof(dp));
+ bzero(&dm, sizeof(dm));
+ sp.sin_len = sm.sin_len = dp.sin_len = dm.sin_len =
+ sizeof(struct sockaddr_in);
+ sp.sin_family = sm.sin_family = dp.sin_family =
+ dm.sin_family = AF_INET;
+ sp.sin_addr = sc->g_src;
+ dp.sin_addr = sc->g_dst;
+ sm.sin_addr.s_addr = dm.sin_addr.s_addr =
+ INADDR_BROADCAST;
+#ifdef INET
+ sc->encap = encap_attach(AF_INET, sc->g_proto,
+ sintosa(&sp), sintosa(&sm), sintosa(&dp),
+ sintosa(&dm), (sc->g_proto == IPPROTO_GRE) ?
+ &in_gre_protosw : &in_mobile_protosw, sc);
+ if (sc->encap == NULL)
+ printf("%s: unable to attach encap\n",
+ if_name(GRE2IFP(sc)));
+#endif
+ if (sc->route.ro_rt != 0) /* free old route */
+ RTFREE(sc->route.ro_rt);
+ if (gre_compute_route(sc) == 0)
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ else
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ }
+ break;
+ case GREGADDRS:
+ memset(&si, 0, sizeof(si));
+ si.sin_family = AF_INET;
+ si.sin_len = sizeof(struct sockaddr_in);
+ si.sin_addr.s_addr = sc->g_src.s_addr;
+ sa = sintosa(&si);
+ ifr->ifr_addr = *sa;
+ break;
+ case GREGADDRD:
+ memset(&si, 0, sizeof(si));
+ si.sin_family = AF_INET;
+ si.sin_len = sizeof(struct sockaddr_in);
+ si.sin_addr.s_addr = sc->g_dst.s_addr;
+ sa = sintosa(&si);
+ ifr->ifr_addr = *sa;
+ break;
+ case SIOCSIFPHYADDR:
+ /*
+ * XXXRW: Isn't this priv_check() redundant to the ifnet
+ * layer check?
+ */
+ if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
+ break;
+ if (aifr->ifra_addr.sin_family != AF_INET ||
+ aifr->ifra_dstaddr.sin_family != AF_INET) {
+ error = EAFNOSUPPORT;
+ break;
+ }
+ if (aifr->ifra_addr.sin_len != sizeof(si) ||
+ aifr->ifra_dstaddr.sin_len != sizeof(si)) {
+ error = EINVAL;
+ break;
+ }
+ sc->g_src = aifr->ifra_addr.sin_addr;
+ sc->g_dst = aifr->ifra_dstaddr.sin_addr;
+ goto recompute;
+ case SIOCSLIFPHYADDR:
+ /*
+ * XXXRW: Isn't this priv_check() redundant to the ifnet
+ * layer check?
+ */
+ if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
+ break;
+ if (lifr->addr.ss_family != AF_INET ||
+ lifr->dstaddr.ss_family != AF_INET) {
+ error = EAFNOSUPPORT;
+ break;
+ }
+ if (lifr->addr.ss_len != sizeof(si) ||
+ lifr->dstaddr.ss_len != sizeof(si)) {
+ error = EINVAL;
+ break;
+ }
+ sc->g_src = (satosin(&lifr->addr))->sin_addr;
+ sc->g_dst =
+ (satosin(&lifr->dstaddr))->sin_addr;
+ goto recompute;
+ case SIOCDIFPHYADDR:
+ /*
+ * XXXRW: Isn't this priv_check() redundant to the ifnet
+ * layer check?
+ */
+ if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
+ break;
+ sc->g_src.s_addr = INADDR_ANY;
+ sc->g_dst.s_addr = INADDR_ANY;
+ goto recompute;
+ case SIOCGLIFPHYADDR:
+ if (sc->g_src.s_addr == INADDR_ANY ||
+ sc->g_dst.s_addr == INADDR_ANY) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ memset(&si, 0, sizeof(si));
+ si.sin_family = AF_INET;
+ si.sin_len = sizeof(struct sockaddr_in);
+ si.sin_addr.s_addr = sc->g_src.s_addr;
+ memcpy(&lifr->addr, &si, sizeof(si));
+ si.sin_addr.s_addr = sc->g_dst.s_addr;
+ memcpy(&lifr->dstaddr, &si, sizeof(si));
+ break;
+ case SIOCGIFPSRCADDR:
+#ifdef INET6
+ case SIOCGIFPSRCADDR_IN6:
+#endif
+ if (sc->g_src.s_addr == INADDR_ANY) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ memset(&si, 0, sizeof(si));
+ si.sin_family = AF_INET;
+ si.sin_len = sizeof(struct sockaddr_in);
+ si.sin_addr.s_addr = sc->g_src.s_addr;
+ bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr));
+ break;
+ case SIOCGIFPDSTADDR:
+#ifdef INET6
+ case SIOCGIFPDSTADDR_IN6:
+#endif
+ if (sc->g_dst.s_addr == INADDR_ANY) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ memset(&si, 0, sizeof(si));
+ si.sin_family = AF_INET;
+ si.sin_len = sizeof(struct sockaddr_in);
+ si.sin_addr.s_addr = sc->g_dst.s_addr;
+ bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr));
+ break;
+ case GRESKEY:
+ error = priv_check(curthread, PRIV_NET_GRE);
+ if (error)
+ break;
+ error = copyin(ifr->ifr_data, &key, sizeof(key));
+ if (error)
+ break;
+ /* adjust MTU for option header */
+ if (key == 0 && sc->key != 0) /* clear */
+ adj += sizeof(key);
+ else if (key != 0 && sc->key == 0) /* set */
+ adj -= sizeof(key);
+
+ if (ifp->if_mtu + adj < 576) {
+ error = EINVAL;
+ break;
+ }
+ ifp->if_mtu += adj;
+ sc->key = key;
+ break;
+ case GREGKEY:
+ error = copyout(&sc->key, ifr->ifr_data, sizeof(sc->key));
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ splx(s);
+ return (error);
+}
+
+/*
+ * computes a route to our destination that is not the one
+ * which would be taken by ip_output(), as this one will loop back to
+ * us. If the interface is p2p as a--->b, then a routing entry exists
+ * If we now send a packet to b (e.g. ping b), this will come down here
+ * gets src=a, dst=b tacked on and would from ip_output() sent back to
+ * if_gre.
+ * Goal here is to compute a route to b that is less specific than
+ * a-->b. We know that this one exists as in normal operation we have
+ * at least a default route which matches.
+ */
+static int
+gre_compute_route(struct gre_softc *sc)
+{
+ struct route *ro;
+
+ ro = &sc->route;
+
+ memset(ro, 0, sizeof(struct route));
+ ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
+ ro->ro_dst.sa_family = AF_INET;
+ ro->ro_dst.sa_len = sizeof(ro->ro_dst);
+
+ /*
+ * toggle last bit, so our interface is not found, but a less
+ * specific route. I'd rather like to specify a shorter mask,
+ * but this is not possible. Should work though. XXX
+ * XXX MRT Use a different FIB for the tunnel to solve this problem.
+ */
+ if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0) {
+ ((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr ^=
+ htonl(0x01);
+ }
+
+#ifdef DIAGNOSTIC
+ printf("%s: searching for a route to %s", if_name(GRE2IFP(sc)),
+ inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr));
+#endif
+
+ rtalloc_fib(ro, sc->gre_fibnum);
+
+ /*
+ * check if this returned a route at all and this route is no
+ * recursion to ourself
+ */
+ if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) {
+#ifdef DIAGNOSTIC
+ if (ro->ro_rt == NULL)
+ printf(" - no route found!\n");
+ else
+ printf(" - route loops back to ourself!\n");
+#endif
+ return EADDRNOTAVAIL;
+ }
+
+ /*
+ * now change it back - else ip_output will just drop
+ * the route and search one to this interface ...
+ */
+ if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0)
+ ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
+
+#ifdef DIAGNOSTIC
+ printf(", choosing %s with gateway %s", if_name(ro->ro_rt->rt_ifp),
+ inet_ntoa(((struct sockaddr_in *)(ro->ro_rt->rt_gateway))->sin_addr));
+ printf("\n");
+#endif
+
+ return 0;
+}
+
+/*
+ * do a checksum of a buffer - much like in_cksum, which operates on
+ * mbufs.
+ */
+u_int16_t
+gre_in_cksum(u_int16_t *p, u_int len)
+{
+ u_int32_t sum = 0;
+ int nwords = len >> 1;
+
+ while (nwords-- != 0)
+ sum += *p++;
+
+ if (len & 1) {
+ union {
+ u_short w;
+ u_char c[2];
+ } u;
+ u.c[0] = *(u_char *)p;
+ u.c[1] = 0;
+ sum += u.w;
+ }
+
+ /* end-around-carry */
+ sum = (sum >> 16) + (sum & 0xffff);
+ sum += (sum >> 16);
+ return (~sum);
+}
+
+static int
+gremodevent(module_t mod, int type, void *data)
+{
+
+ switch (type) {
+ case MOD_LOAD:
+ greattach();
+ break;
+ case MOD_UNLOAD:
+ if_clone_detach(&gre_cloner);
+ mtx_destroy(&gre_mtx);
+ break;
+ default:
+ return EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static moduledata_t gre_mod = {
+ "if_gre",
+ gremodevent,
+ 0
+};
+
+DECLARE_MODULE(if_gre, gre_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(if_gre, 1);
diff --git a/freebsd/sys/net/if_gre.h b/freebsd/sys/net/if_gre.h
new file mode 100644
index 00000000..ff9c63cf
--- /dev/null
+++ b/freebsd/sys/net/if_gre.h
@@ -0,0 +1,194 @@
+/* $NetBSD: if_gre.h,v 1.13 2003/11/10 08:51:52 wiz Exp $ */
+/* $FreeBSD$ */
+
+/*-
+ * Copyright (c) 1998 The NetBSD Foundation, Inc.
+ * All rights reserved
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Heiko W.Rupp <hwr@pilhuhn.de>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _NET_IF_GRE_H
+#define _NET_IF_GRE_H
+
+#include <freebsd/sys/ioccom.h>
+#ifdef _KERNEL
+#include <freebsd/sys/queue.h>
+
+/*
+ * Version of the WCCP, need to be configured manually since
+ * header for version 2 is the same but IP payload is prepended
+ * with additional 4-bytes field.
+ */
+typedef enum {
+ WCCP_V1 = 0,
+ WCCP_V2
+} wccp_ver_t;
+
+struct gre_softc {
+ struct ifnet *sc_ifp;
+ LIST_ENTRY(gre_softc) sc_list;
+ int gre_unit;
+ int gre_flags;
+ u_int gre_fibnum; /* use this fib for envelopes */
+ struct in_addr g_src; /* source address of gre packets */
+ struct in_addr g_dst; /* destination address of gre packets */
+ struct route route; /* routing entry that determines, where a
+ encapsulated packet should go */
+ u_char g_proto; /* protocol of encapsulator */
+
+ const struct encaptab *encap; /* encapsulation cookie */
+
+ int called; /* infinite recursion preventer */
+
+ uint32_t key; /* key included in outgoing GRE packets */
+ /* zero means none */
+
+ wccp_ver_t wccp_ver; /* version of the WCCP */
+};
+#define GRE2IFP(sc) ((sc)->sc_ifp)
+
+
+struct gre_h {
+ u_int16_t flags; /* GRE flags */
+ u_int16_t ptype; /* protocol type of payload typically
+ Ether protocol type*/
+ uint32_t options[0]; /* optional options */
+/*
+ * from here on: fields are optional, presence indicated by flags
+ *
+ u_int_16 checksum checksum (one-complements of GRE header
+ and payload
+ Present if (ck_pres | rt_pres == 1).
+ Valid if (ck_pres == 1).
+ u_int_16 offset offset from start of routing filed to
+ first octet of active SRE (see below).
+ Present if (ck_pres | rt_pres == 1).
+ Valid if (rt_pres == 1).
+ u_int_32 key inserted by encapsulator e.g. for
+ authentication
+ Present if (key_pres ==1 ).
+ u_int_32 seq_num Sequence number to allow for packet order
+ Present if (seq_pres ==1 ).
+ struct gre_sre[] routing Routing fileds (see below)
+ Present if (rt_pres == 1)
+ */
+} __packed;
+
+struct greip {
+ struct ip gi_i;
+ struct gre_h gi_g;
+} __packed;
+
+#define gi_pr gi_i.ip_p
+#define gi_len gi_i.ip_len
+#define gi_src gi_i.ip_src
+#define gi_dst gi_i.ip_dst
+#define gi_ptype gi_g.ptype
+#define gi_flags gi_g.flags
+#define gi_options gi_g.options
+
+#define GRE_CP 0x8000 /* Checksum Present */
+#define GRE_RP 0x4000 /* Routing Present */
+#define GRE_KP 0x2000 /* Key Present */
+#define GRE_SP 0x1000 /* Sequence Present */
+#define GRE_SS 0x0800 /* Strict Source Route */
+
+/*
+ * CISCO uses special type for GRE tunnel created as part of WCCP
+ * connection, while in fact those packets are just IPv4 encapsulated
+ * into GRE.
+ */
+#define WCCP_PROTOCOL_TYPE 0x883E
+
+/*
+ * gre_sre defines a Source route Entry. These are needed if packets
+ * should be routed over more than one tunnel hop by hop
+ */
+struct gre_sre {
+ u_int16_t sre_family; /* address family */
+ u_char sre_offset; /* offset to first octet of active entry */
+ u_char sre_length; /* number of octets in the SRE.
+ sre_lengthl==0 -> last entry. */
+ u_char *sre_rtinfo; /* the routing information */
+};
+
+struct greioctl {
+ int unit;
+ struct in_addr addr;
+};
+
+/* for mobile encaps */
+
+struct mobile_h {
+ u_int16_t proto; /* protocol and S-bit */
+ u_int16_t hcrc; /* header checksum */
+ u_int32_t odst; /* original destination address */
+ u_int32_t osrc; /* original source addr, if S-bit set */
+} __packed;
+
+struct mobip_h {
+ struct ip mi;
+ struct mobile_h mh;
+} __packed;
+
+
+#define MOB_HH_SIZ_S (sizeof(struct mobile_h) - sizeof(u_int32_t))
+#define MOB_HH_SIZ_L (sizeof(struct mobile_h))
+#define MOB_HH_SBIT 0x0080
+
+#define GRE_TTL 30
+
+#endif /* _KERNEL */
+
+/*
+ * ioctls needed to manipulate the interface
+ */
+
+#define GRESADDRS _IOW('i', 101, struct ifreq)
+#define GRESADDRD _IOW('i', 102, struct ifreq)
+#define GREGADDRS _IOWR('i', 103, struct ifreq)
+#define GREGADDRD _IOWR('i', 104, struct ifreq)
+#define GRESPROTO _IOW('i' , 105, struct ifreq)
+#define GREGPROTO _IOWR('i', 106, struct ifreq)
+#define GREGKEY _IOWR('i', 107, struct ifreq)
+#define GRESKEY _IOW('i', 108, struct ifreq)
+
+#ifdef _KERNEL
+LIST_HEAD(gre_softc_head, gre_softc);
+extern struct mtx gre_mtx;
+extern struct gre_softc_head gre_softc_list;
+
+u_int16_t gre_in_cksum(u_int16_t *, u_int);
+#endif /* _KERNEL */
+
+#endif
diff --git a/freebsd/sys/net/if_iso88025subr.c b/freebsd/sys/net/if_iso88025subr.c
new file mode 100644
index 00000000..87d3eb87
--- /dev/null
+++ b/freebsd/sys/net/if_iso88025subr.c
@@ -0,0 +1,831 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1998, Larry Lile
+ * All rights reserved.
+ *
+ * For latest sources and information on this driver, please
+ * go to http://anarchy.stdio.com.
+ *
+ * Questions, comments or suggestions should be directed to
+ * Larry Lile <lile@stdio.com>.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+
+/*
+ *
+ * General ISO 802.5 (Token Ring) support routines
+ *
+ */
+
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_ipx.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/sockio.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_arp.h>
+#include <freebsd/net/if_dl.h>
+#include <freebsd/net/if_llc.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/if_llatbl.h>
+
+#include <freebsd/net/ethernet.h>
+#include <freebsd/net/netisr.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/bpf.h>
+#include <freebsd/net/iso88025.h>
+
+#if defined(INET) || defined(INET6)
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/if_ether.h>
+#endif
+#ifdef INET6
+#include <freebsd/netinet6/nd6.h>
+#endif
+
+#ifdef IPX
+#include <freebsd/netipx/ipx.h>
+#include <freebsd/netipx/ipx_if.h>
+#endif
+
+#include <freebsd/security/mac/mac_framework.h>
+
+static const u_char iso88025_broadcastaddr[ISO88025_ADDR_LEN] =
+ { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+static int iso88025_resolvemulti (struct ifnet *, struct sockaddr **,
+ struct sockaddr *);
+
+#define senderr(e) do { error = (e); goto bad; } while (0)
+
+/*
+ * Perform common duties while attaching to interface list
+ */
+void
+iso88025_ifattach(struct ifnet *ifp, const u_int8_t *lla, int bpf)
+{
+ struct ifaddr *ifa;
+ struct sockaddr_dl *sdl;
+
+ ifa = NULL;
+
+ ifp->if_type = IFT_ISO88025;
+ ifp->if_addrlen = ISO88025_ADDR_LEN;
+ ifp->if_hdrlen = ISO88025_HDR_LEN;
+
+ if_attach(ifp); /* Must be called before additional assignments */
+
+ ifp->if_output = iso88025_output;
+ ifp->if_input = iso88025_input;
+ ifp->if_resolvemulti = iso88025_resolvemulti;
+ ifp->if_broadcastaddr = iso88025_broadcastaddr;
+
+ if (ifp->if_baudrate == 0)
+ ifp->if_baudrate = TR_16MBPS; /* 16Mbit should be a safe default */
+ if (ifp->if_mtu == 0)
+ ifp->if_mtu = ISO88025_DEFAULT_MTU;
+
+ ifa = ifp->if_addr;
+ KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
+
+ sdl = (struct sockaddr_dl *)ifa->ifa_addr;
+ sdl->sdl_type = IFT_ISO88025;
+ sdl->sdl_alen = ifp->if_addrlen;
+ bcopy(lla, LLADDR(sdl), ifp->if_addrlen);
+
+ if (bpf)
+ bpfattach(ifp, DLT_IEEE802, ISO88025_HDR_LEN);
+
+ return;
+}
+
+/*
+ * Perform common duties while detaching a Token Ring interface
+ */
+void
+iso88025_ifdetach(ifp, bpf)
+ struct ifnet *ifp;
+ int bpf;
+{
+
+ if (bpf)
+ bpfdetach(ifp);
+
+ if_detach(ifp);
+
+ return;
+}
+
+int
+iso88025_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
+{
+ struct ifaddr *ifa;
+ struct ifreq *ifr;
+ int error;
+
+ ifa = (struct ifaddr *) data;
+ ifr = (struct ifreq *) data;
+ error = 0;
+
+ switch (command) {
+ case SIOCSIFADDR:
+ ifp->if_flags |= IFF_UP;
+
+ switch (ifa->ifa_addr->sa_family) {
+#ifdef INET
+ case AF_INET:
+ ifp->if_init(ifp->if_softc); /* before arpwhohas */
+ arp_ifinit(ifp, ifa);
+ break;
+#endif /* INET */
+#ifdef IPX
+ /*
+ * XXX - This code is probably wrong
+ */
+ case AF_IPX: {
+ struct ipx_addr *ina;
+
+ ina = &(IA_SIPX(ifa)->sipx_addr);
+
+ if (ipx_nullhost(*ina))
+ ina->x_host = *(union ipx_host *)
+ IF_LLADDR(ifp);
+ else
+ bcopy((caddr_t) ina->x_host.c_host,
+ (caddr_t) IF_LLADDR(ifp),
+ ISO88025_ADDR_LEN);
+
+ /*
+ * Set new address
+ */
+ ifp->if_init(ifp->if_softc);
+ }
+ break;
+#endif /* IPX */
+ default:
+ ifp->if_init(ifp->if_softc);
+ break;
+ }
+ break;
+
+ case SIOCGIFADDR: {
+ struct sockaddr *sa;
+
+ sa = (struct sockaddr *) & ifr->ifr_data;
+ bcopy(IF_LLADDR(ifp),
+ (caddr_t) sa->sa_data, ISO88025_ADDR_LEN);
+ }
+ break;
+
+ case SIOCSIFMTU:
+ /*
+ * Set the interface MTU.
+ */
+ if (ifr->ifr_mtu > ISO88025_MAX_MTU) {
+ error = EINVAL;
+ } else {
+ ifp->if_mtu = ifr->ifr_mtu;
+ }
+ break;
+ default:
+ error = EINVAL; /* XXX netbsd has ENOTTY??? */
+ break;
+ }
+
+ return (error);
+}
+
+/*
+ * ISO88025 encapsulation
+ */
+int
+iso88025_output(ifp, m, dst, ro)
+ struct ifnet *ifp;
+ struct mbuf *m;
+ struct sockaddr *dst;
+ struct route *ro;
+{
+ u_int16_t snap_type = 0;
+ int loop_copy = 0, error = 0, rif_len = 0;
+ u_char edst[ISO88025_ADDR_LEN];
+ struct iso88025_header *th;
+ struct iso88025_header gen_th;
+ struct sockaddr_dl *sdl = NULL;
+ struct rtentry *rt0 = NULL;
+#if defined(INET) || defined(INET6)
+ struct llentry *lle;
+#endif
+
+ if (ro != NULL)
+ rt0 = ro->ro_rt;
+
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m);
+ if (error)
+ senderr(error);
+#endif
+
+ if (ifp->if_flags & IFF_MONITOR)
+ senderr(ENETDOWN);
+ if (!((ifp->if_flags & IFF_UP) &&
+ (ifp->if_drv_flags & IFF_DRV_RUNNING)))
+ senderr(ENETDOWN);
+ getmicrotime(&ifp->if_lastchange);
+
+ /* Calculate routing info length based on arp table entry */
+ /* XXX any better way to do this ? */
+
+ if (rt0 && (sdl = (struct sockaddr_dl *)rt0->rt_gateway))
+ if (SDL_ISO88025(sdl)->trld_rcf != 0)
+ rif_len = TR_RCF_RIFLEN(SDL_ISO88025(sdl)->trld_rcf);
+
+ /* Generate a generic 802.5 header for the packet */
+ gen_th.ac = TR_AC;
+ gen_th.fc = TR_LLC_FRAME;
+ (void)memcpy((caddr_t)gen_th.iso88025_shost, IF_LLADDR(ifp),
+ ISO88025_ADDR_LEN);
+ if (rif_len) {
+ gen_th.iso88025_shost[0] |= TR_RII;
+ if (rif_len > 2) {
+ gen_th.rcf = SDL_ISO88025(sdl)->trld_rcf;
+ (void)memcpy((caddr_t)gen_th.rd,
+ (caddr_t)SDL_ISO88025(sdl)->trld_route,
+ rif_len - 2);
+ }
+ }
+
+ switch (dst->sa_family) {
+#ifdef INET
+ case AF_INET:
+ error = arpresolve(ifp, rt0, m, dst, edst, &lle);
+ if (error)
+ return (error == EWOULDBLOCK ? 0 : error);
+ snap_type = ETHERTYPE_IP;
+ break;
+ case AF_ARP:
+ {
+ struct arphdr *ah;
+ ah = mtod(m, struct arphdr *);
+ ah->ar_hrd = htons(ARPHRD_IEEE802);
+
+ loop_copy = -1; /* if this is for us, don't do it */
+
+ switch(ntohs(ah->ar_op)) {
+ case ARPOP_REVREQUEST:
+ case ARPOP_REVREPLY:
+ snap_type = ETHERTYPE_REVARP;
+ break;
+ case ARPOP_REQUEST:
+ case ARPOP_REPLY:
+ default:
+ snap_type = ETHERTYPE_ARP;
+ break;
+ }
+
+ if (m->m_flags & M_BCAST)
+ bcopy(ifp->if_broadcastaddr, edst, ISO88025_ADDR_LEN);
+ else
+ bcopy(ar_tha(ah), edst, ISO88025_ADDR_LEN);
+
+ }
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ error = nd6_storelladdr(ifp, m, dst, (u_char *)edst, &lle);
+ if (error)
+ return (error);
+ snap_type = ETHERTYPE_IPV6;
+ break;
+#endif /* INET6 */
+#ifdef IPX
+ case AF_IPX:
+ {
+ u_int8_t *cp;
+
+ bcopy((caddr_t)&(satoipx_addr(dst).x_host), (caddr_t)edst,
+ ISO88025_ADDR_LEN);
+
+ M_PREPEND(m, 3, M_WAIT);
+ m = m_pullup(m, 3);
+ if (m == 0)
+ senderr(ENOBUFS);
+ cp = mtod(m, u_int8_t *);
+ *cp++ = ETHERTYPE_IPX_8022;
+ *cp++ = ETHERTYPE_IPX_8022;
+ *cp++ = LLC_UI;
+ }
+ break;
+#endif /* IPX */
+ case AF_UNSPEC:
+ {
+ struct iso88025_sockaddr_data *sd;
+ /*
+ * For AF_UNSPEC sockaddr.sa_data must contain all of the
+ * mac information needed to send the packet. This allows
+ * full mac, llc, and source routing function to be controlled.
+ * llc and source routing information must already be in the
+ * mbuf provided, ac/fc are set in sa_data. sockaddr.sa_data
+ * should be an iso88025_sockaddr_data structure see iso88025.h
+ */
+ loop_copy = -1;
+ sd = (struct iso88025_sockaddr_data *)dst->sa_data;
+ gen_th.ac = sd->ac;
+ gen_th.fc = sd->fc;
+ (void)memcpy((caddr_t)edst, (caddr_t)sd->ether_dhost,
+ ISO88025_ADDR_LEN);
+ (void)memcpy((caddr_t)gen_th.iso88025_shost,
+ (caddr_t)sd->ether_shost, ISO88025_ADDR_LEN);
+ rif_len = 0;
+ break;
+ }
+ default:
+ if_printf(ifp, "can't handle af%d\n", dst->sa_family);
+ senderr(EAFNOSUPPORT);
+ break;
+ }
+
+ /*
+ * Add LLC header.
+ */
+ if (snap_type != 0) {
+ struct llc *l;
+ M_PREPEND(m, LLC_SNAPFRAMELEN, M_DONTWAIT);
+ if (m == 0)
+ senderr(ENOBUFS);
+ l = mtod(m, struct llc *);
+ l->llc_control = LLC_UI;
+ l->llc_dsap = l->llc_ssap = LLC_SNAP_LSAP;
+ l->llc_snap.org_code[0] =
+ l->llc_snap.org_code[1] =
+ l->llc_snap.org_code[2] = 0;
+ l->llc_snap.ether_type = htons(snap_type);
+ }
+
+ /*
+ * Add local net header. If no space in first mbuf,
+ * allocate another.
+ */
+ M_PREPEND(m, ISO88025_HDR_LEN + rif_len, M_DONTWAIT);
+ if (m == 0)
+ senderr(ENOBUFS);
+ th = mtod(m, struct iso88025_header *);
+ bcopy((caddr_t)edst, (caddr_t)&gen_th.iso88025_dhost, ISO88025_ADDR_LEN);
+
+ /* Copy as much of the generic header as is needed into the mbuf */
+ memcpy(th, &gen_th, ISO88025_HDR_LEN + rif_len);
+
+ /*
+ * If a simplex interface, and the packet is being sent to our
+ * Ethernet address or a broadcast address, loopback a copy.
+ * XXX To make a simplex device behave exactly like a duplex
+ * device, we should copy in the case of sending to our own
+ * ethernet address (thus letting the original actually appear
+ * on the wire). However, we don't do that here for security
+ * reasons and compatibility with the original behavior.
+ */
+ if ((ifp->if_flags & IFF_SIMPLEX) && (loop_copy != -1)) {
+ if ((m->m_flags & M_BCAST) || (loop_copy > 0)) {
+ struct mbuf *n;
+ n = m_copy(m, 0, (int)M_COPYALL);
+ (void) if_simloop(ifp, n, dst->sa_family,
+ ISO88025_HDR_LEN);
+ } else if (bcmp(th->iso88025_dhost, th->iso88025_shost,
+ ETHER_ADDR_LEN) == 0) {
+ (void) if_simloop(ifp, m, dst->sa_family,
+ ISO88025_HDR_LEN);
+ return(0); /* XXX */
+ }
+ }
+
+ IFQ_HANDOFF_ADJ(ifp, m, ISO88025_HDR_LEN + LLC_SNAPFRAMELEN, error);
+ if (error) {
+ printf("iso88025_output: packet dropped QFULL.\n");
+ ifp->if_oerrors++;
+ }
+ return (error);
+
+bad:
+ ifp->if_oerrors++;
+ if (m)
+ m_freem(m);
+ return (error);
+}
+
+/*
+ * ISO 88025 de-encapsulation
+ */
+void
+iso88025_input(ifp, m)
+ struct ifnet *ifp;
+ struct mbuf *m;
+{
+ struct iso88025_header *th;
+ struct llc *l;
+ int isr;
+ int mac_hdr_len;
+
+ /*
+ * Do consistency checks to verify assumptions
+ * made by code past this point.
+ */
+ if ((m->m_flags & M_PKTHDR) == 0) {
+ if_printf(ifp, "discard frame w/o packet header\n");
+ ifp->if_ierrors++;
+ m_freem(m);
+ return;
+ }
+ if (m->m_pkthdr.rcvif == NULL) {
+ if_printf(ifp, "discard frame w/o interface pointer\n");
+ ifp->if_ierrors++;
+ m_freem(m);
+ return;
+ }
+
+ m = m_pullup(m, ISO88025_HDR_LEN);
+ if (m == NULL) {
+ ifp->if_ierrors++;
+ goto dropanyway;
+ }
+ th = mtod(m, struct iso88025_header *);
+ m->m_pkthdr.header = (void *)th;
+
+ /*
+ * Discard packet if interface is not up.
+ */
+ if (!((ifp->if_flags & IFF_UP) &&
+ (ifp->if_drv_flags & IFF_DRV_RUNNING)))
+ goto dropanyway;
+
+ /*
+ * Give bpf a chance at the packet.
+ */
+ BPF_MTAP(ifp, m);
+
+ /*
+ * Interface marked for monitoring; discard packet.
+ */
+ if (ifp->if_flags & IFF_MONITOR) {
+ m_freem(m);
+ return;
+ }
+
+#ifdef MAC
+ mac_ifnet_create_mbuf(ifp, m);
+#endif
+
+ /*
+ * Update interface statistics.
+ */
+ ifp->if_ibytes += m->m_pkthdr.len;
+ getmicrotime(&ifp->if_lastchange);
+
+ /*
+ * Discard non local unicast packets when interface
+ * is in promiscuous mode.
+ */
+ if ((ifp->if_flags & IFF_PROMISC) &&
+ ((th->iso88025_dhost[0] & 1) == 0) &&
+ (bcmp(IF_LLADDR(ifp), (caddr_t) th->iso88025_dhost,
+ ISO88025_ADDR_LEN) != 0))
+ goto dropanyway;
+
+ /*
+ * Set mbuf flags for bcast/mcast.
+ */
+ if (th->iso88025_dhost[0] & 1) {
+ if (bcmp(iso88025_broadcastaddr, th->iso88025_dhost,
+ ISO88025_ADDR_LEN) == 0)
+ m->m_flags |= M_BCAST;
+ else
+ m->m_flags |= M_MCAST;
+ ifp->if_imcasts++;
+ }
+
+ mac_hdr_len = ISO88025_HDR_LEN;
+ /* Check for source routing info */
+ if (th->iso88025_shost[0] & TR_RII)
+ mac_hdr_len += TR_RCF_RIFLEN(th->rcf);
+
+ /* Strip off ISO88025 header. */
+ m_adj(m, mac_hdr_len);
+
+ m = m_pullup(m, LLC_SNAPFRAMELEN);
+ if (m == 0) {
+ ifp->if_ierrors++;
+ goto dropanyway;
+ }
+ l = mtod(m, struct llc *);
+
+ switch (l->llc_dsap) {
+#ifdef IPX
+ case ETHERTYPE_IPX_8022: /* Thanks a bunch Novell */
+ if ((l->llc_control != LLC_UI) ||
+ (l->llc_ssap != ETHERTYPE_IPX_8022)) {
+ ifp->if_noproto++;
+ goto dropanyway;
+ }
+
+ th->iso88025_shost[0] &= ~(TR_RII);
+ m_adj(m, 3);
+ isr = NETISR_IPX;
+ break;
+#endif /* IPX */
+ case LLC_SNAP_LSAP: {
+ u_int16_t type;
+ if ((l->llc_control != LLC_UI) ||
+ (l->llc_ssap != LLC_SNAP_LSAP)) {
+ ifp->if_noproto++;
+ goto dropanyway;
+ }
+
+ if (l->llc_snap.org_code[0] != 0 ||
+ l->llc_snap.org_code[1] != 0 ||
+ l->llc_snap.org_code[2] != 0) {
+ ifp->if_noproto++;
+ goto dropanyway;
+ }
+
+ type = ntohs(l->llc_snap.ether_type);
+ m_adj(m, LLC_SNAPFRAMELEN);
+ switch (type) {
+#ifdef INET
+ case ETHERTYPE_IP:
+ th->iso88025_shost[0] &= ~(TR_RII);
+ if ((m = ip_fastforward(m)) == NULL)
+ return;
+ isr = NETISR_IP;
+ break;
+
+ case ETHERTYPE_ARP:
+ if (ifp->if_flags & IFF_NOARP)
+ goto dropanyway;
+ isr = NETISR_ARP;
+ break;
+#endif /* INET */
+#ifdef IPX_SNAP /* XXX: Not supported! */
+ case ETHERTYPE_IPX:
+ th->iso88025_shost[0] &= ~(TR_RII);
+ isr = NETISR_IPX;
+ break;
+#endif /* IPX_SNAP */
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ th->iso88025_shost[0] &= ~(TR_RII);
+ isr = NETISR_IPV6;
+ break;
+#endif /* INET6 */
+ default:
+ printf("iso88025_input: unexpected llc_snap ether_type 0x%02x\n", type);
+ ifp->if_noproto++;
+ goto dropanyway;
+ }
+ break;
+ }
+#ifdef ISO
+ case LLC_ISO_LSAP:
+ switch (l->llc_control) {
+ case LLC_UI:
+ ifp->if_noproto++;
+ goto dropanyway;
+ break;
+ case LLC_XID:
+ case LLC_XID_P:
+ if(m->m_len < ISO88025_ADDR_LEN)
+ goto dropanyway;
+ l->llc_window = 0;
+ l->llc_fid = 9;
+ l->llc_class = 1;
+ l->llc_dsap = l->llc_ssap = 0;
+ /* Fall through to */
+ case LLC_TEST:
+ case LLC_TEST_P:
+ {
+ struct sockaddr sa;
+ struct arpcom *ac;
+ struct iso88025_sockaddr_data *th2;
+ int i;
+ u_char c;
+
+ c = l->llc_dsap;
+
+ if (th->iso88025_shost[0] & TR_RII) { /* XXX */
+ printf("iso88025_input: dropping source routed LLC_TEST\n");
+ goto dropanyway;
+ }
+ l->llc_dsap = l->llc_ssap;
+ l->llc_ssap = c;
+ if (m->m_flags & (M_BCAST | M_MCAST))
+ bcopy((caddr_t)IF_LLADDR(ifp),
+ (caddr_t)th->iso88025_dhost,
+ ISO88025_ADDR_LEN);
+ sa.sa_family = AF_UNSPEC;
+ sa.sa_len = sizeof(sa);
+ th2 = (struct iso88025_sockaddr_data *)sa.sa_data;
+ for (i = 0; i < ISO88025_ADDR_LEN; i++) {
+ th2->ether_shost[i] = c = th->iso88025_dhost[i];
+ th2->ether_dhost[i] = th->iso88025_dhost[i] =
+ th->iso88025_shost[i];
+ th->iso88025_shost[i] = c;
+ }
+ th2->ac = TR_AC;
+ th2->fc = TR_LLC_FRAME;
+ ifp->if_output(ifp, m, &sa, NULL);
+ return;
+ }
+ default:
+ printf("iso88025_input: unexpected llc control 0x%02x\n", l->llc_control);
+ ifp->if_noproto++;
+ goto dropanyway;
+ break;
+ }
+ break;
+#endif /* ISO */
+ default:
+ printf("iso88025_input: unknown dsap 0x%x\n", l->llc_dsap);
+ ifp->if_noproto++;
+ goto dropanyway;
+ break;
+ }
+
+ netisr_dispatch(isr, m);
+ return;
+
+dropanyway:
+ ifp->if_iqdrops++;
+ if (m)
+ m_freem(m);
+ return;
+}
+
+static int
+iso88025_resolvemulti (ifp, llsa, sa)
+ struct ifnet *ifp;
+ struct sockaddr **llsa;
+ struct sockaddr *sa;
+{
+ struct sockaddr_dl *sdl;
+#ifdef INET
+ struct sockaddr_in *sin;
+#endif
+#ifdef INET6
+ struct sockaddr_in6 *sin6;
+#endif
+ u_char *e_addr;
+
+ switch(sa->sa_family) {
+ case AF_LINK:
+ /*
+ * No mapping needed. Just check that it's a valid MC address.
+ */
+ sdl = (struct sockaddr_dl *)sa;
+ e_addr = LLADDR(sdl);
+ if ((e_addr[0] & 1) != 1) {
+ return (EADDRNOTAVAIL);
+ }
+ *llsa = 0;
+ return (0);
+
+#ifdef INET
+ case AF_INET:
+ sin = (struct sockaddr_in *)sa;
+ if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
+ return (EADDRNOTAVAIL);
+ }
+ sdl = malloc(sizeof *sdl, M_IFMADDR,
+ M_NOWAIT|M_ZERO);
+ if (sdl == NULL)
+ return (ENOMEM);
+ sdl->sdl_len = sizeof *sdl;
+ sdl->sdl_family = AF_LINK;
+ sdl->sdl_index = ifp->if_index;
+ sdl->sdl_type = IFT_ISO88025;
+ sdl->sdl_alen = ISO88025_ADDR_LEN;
+ e_addr = LLADDR(sdl);
+ ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr);
+ *llsa = (struct sockaddr *)sdl;
+ return (0);
+#endif
+#ifdef INET6
+ case AF_INET6:
+ sin6 = (struct sockaddr_in6 *)sa;
+ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+ /*
+ * An IP6 address of 0 means listen to all
+ * of the Ethernet multicast address used for IP6.
+ * (This is used for multicast routers.)
+ */
+ ifp->if_flags |= IFF_ALLMULTI;
+ *llsa = 0;
+ return (0);
+ }
+ if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
+ return (EADDRNOTAVAIL);
+ }
+ sdl = malloc(sizeof *sdl, M_IFMADDR,
+ M_NOWAIT|M_ZERO);
+ if (sdl == NULL)
+ return (ENOMEM);
+ sdl->sdl_len = sizeof *sdl;
+ sdl->sdl_family = AF_LINK;
+ sdl->sdl_index = ifp->if_index;
+ sdl->sdl_type = IFT_ISO88025;
+ sdl->sdl_alen = ISO88025_ADDR_LEN;
+ e_addr = LLADDR(sdl);
+ ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr);
+ *llsa = (struct sockaddr *)sdl;
+ return (0);
+#endif
+
+ default:
+ /*
+ * Well, the text isn't quite right, but it's the name
+ * that counts...
+ */
+ return (EAFNOSUPPORT);
+ }
+
+ return (0);
+}
+
+MALLOC_DEFINE(M_ISO88025, "arpcom", "802.5 interface internals");
+
+static void*
+iso88025_alloc(u_char type, struct ifnet *ifp)
+{
+ struct arpcom *ac;
+
+ ac = malloc(sizeof(struct arpcom), M_ISO88025, M_WAITOK | M_ZERO);
+ ac->ac_ifp = ifp;
+
+ return (ac);
+}
+
+static void
+iso88025_free(void *com, u_char type)
+{
+
+ free(com, M_ISO88025);
+}
+
+static int
+iso88025_modevent(module_t mod, int type, void *data)
+{
+
+ switch (type) {
+ case MOD_LOAD:
+ if_register_com_alloc(IFT_ISO88025, iso88025_alloc,
+ iso88025_free);
+ break;
+ case MOD_UNLOAD:
+ if_deregister_com_alloc(IFT_ISO88025);
+ break;
+ default:
+ return EOPNOTSUPP;
+ }
+
+ return (0);
+}
+
+static moduledata_t iso88025_mod = {
+ "iso88025",
+ iso88025_modevent,
+ 0
+};
+
+DECLARE_MODULE(iso88025, iso88025_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(iso88025, 1);
diff --git a/freebsd/sys/net/if_lagg.c b/freebsd/sys/net/if_lagg.c
new file mode 100644
index 00000000..47c72ca2
--- /dev/null
+++ b/freebsd/sys/net/if_lagg.c
@@ -0,0 +1,1808 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/* $OpenBSD: if_trunk.c,v 1.30 2007/01/31 06:20:19 reyk Exp $ */
+
+/*
+ * Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org>
+ * Copyright (c) 2007 Andrew Thompson <thompsa@FreeBSD.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/queue.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/priv.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/hash.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/rwlock.h>
+#include <freebsd/sys/taskqueue.h>
+#include <freebsd/sys/eventhandler.h>
+
+#include <freebsd/net/ethernet.h>
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_clone.h>
+#include <freebsd/net/if_arp.h>
+#include <freebsd/net/if_dl.h>
+#include <freebsd/net/if_llc.h>
+#include <freebsd/net/if_media.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/if_var.h>
+#include <freebsd/net/bpf.h>
+
+#ifdef INET
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/if_ether.h>
+#include <freebsd/netinet/ip.h>
+#endif
+
+#ifdef INET6
+#include <freebsd/netinet/ip6.h>
+#endif
+
+#include <freebsd/net/if_vlan_var.h>
+#include <freebsd/net/if_lagg.h>
+#include <freebsd/net/ieee8023ad_lacp.h>
+
+/* Special flags we should propagate to the lagg ports. */
+static struct {
+ int flag;
+ int (*func)(struct ifnet *, int);
+} lagg_pflags[] = {
+ {IFF_PROMISC, ifpromisc},
+ {IFF_ALLMULTI, if_allmulti},
+ {0, NULL}
+};
+
+SLIST_HEAD(__trhead, lagg_softc) lagg_list; /* list of laggs */
+static struct mtx lagg_list_mtx;
+eventhandler_tag lagg_detach_cookie = NULL;
+
+static int lagg_clone_create(struct if_clone *, int, caddr_t);
+static void lagg_clone_destroy(struct ifnet *);
+static void lagg_lladdr(struct lagg_softc *, uint8_t *);
+static void lagg_capabilities(struct lagg_softc *);
+static void lagg_port_lladdr(struct lagg_port *, uint8_t *);
+static void lagg_port_setlladdr(void *, int);
+static int lagg_port_create(struct lagg_softc *, struct ifnet *);
+static int lagg_port_destroy(struct lagg_port *, int);
+static struct mbuf *lagg_input(struct ifnet *, struct mbuf *);
+static void lagg_linkstate(struct lagg_softc *);
+static void lagg_port_state(struct ifnet *, int);
+static int lagg_port_ioctl(struct ifnet *, u_long, caddr_t);
+static int lagg_port_output(struct ifnet *, struct mbuf *,
+ struct sockaddr *, struct route *);
+static void lagg_port_ifdetach(void *arg __unused, struct ifnet *);
+static int lagg_port_checkstacking(struct lagg_softc *);
+static void lagg_port2req(struct lagg_port *, struct lagg_reqport *);
+static void lagg_init(void *);
+static void lagg_stop(struct lagg_softc *);
+static int lagg_ioctl(struct ifnet *, u_long, caddr_t);
+static int lagg_ether_setmulti(struct lagg_softc *);
+static int lagg_ether_cmdmulti(struct lagg_port *, int);
+static int lagg_setflag(struct lagg_port *, int, int,
+ int (*func)(struct ifnet *, int));
+static int lagg_setflags(struct lagg_port *, int status);
+static void lagg_start(struct ifnet *);
+static int lagg_media_change(struct ifnet *);
+static void lagg_media_status(struct ifnet *, struct ifmediareq *);
+static struct lagg_port *lagg_link_active(struct lagg_softc *,
+ struct lagg_port *);
+static const void *lagg_gethdr(struct mbuf *, u_int, u_int, void *);
+
+IFC_SIMPLE_DECLARE(lagg, 0);
+
+/* Simple round robin */
+static int lagg_rr_attach(struct lagg_softc *);
+static int lagg_rr_detach(struct lagg_softc *);
+static int lagg_rr_start(struct lagg_softc *, struct mbuf *);
+static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *,
+ struct mbuf *);
+
+/* Active failover */
+static int lagg_fail_attach(struct lagg_softc *);
+static int lagg_fail_detach(struct lagg_softc *);
+static int lagg_fail_start(struct lagg_softc *, struct mbuf *);
+static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *,
+ struct mbuf *);
+
+/* Loadbalancing */
+static int lagg_lb_attach(struct lagg_softc *);
+static int lagg_lb_detach(struct lagg_softc *);
+static int lagg_lb_port_create(struct lagg_port *);
+static void lagg_lb_port_destroy(struct lagg_port *);
+static int lagg_lb_start(struct lagg_softc *, struct mbuf *);
+static struct mbuf *lagg_lb_input(struct lagg_softc *, struct lagg_port *,
+ struct mbuf *);
+static int lagg_lb_porttable(struct lagg_softc *, struct lagg_port *);
+
+/* 802.3ad LACP */
+static int lagg_lacp_attach(struct lagg_softc *);
+static int lagg_lacp_detach(struct lagg_softc *);
+static int lagg_lacp_start(struct lagg_softc *, struct mbuf *);
+static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *,
+ struct mbuf *);
+static void lagg_lacp_lladdr(struct lagg_softc *);
+
+/* lagg protocol table */
+static const struct {
+ int ti_proto;
+ int (*ti_attach)(struct lagg_softc *);
+} lagg_protos[] = {
+ { LAGG_PROTO_ROUNDROBIN, lagg_rr_attach },
+ { LAGG_PROTO_FAILOVER, lagg_fail_attach },
+ { LAGG_PROTO_LOADBALANCE, lagg_lb_attach },
+ { LAGG_PROTO_ETHERCHANNEL, lagg_lb_attach },
+ { LAGG_PROTO_LACP, lagg_lacp_attach },
+ { LAGG_PROTO_NONE, NULL }
+};
+
+SYSCTL_DECL(_net_link);
+SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0, "Link Aggregation");
+
+static int lagg_failover_rx_all = 0; /* Allow input on any failover links */
+SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW,
+ &lagg_failover_rx_all, 0,
+ "Accept input from any interface in a failover lagg");
+
+static int
+lagg_modevent(module_t mod, int type, void *data)
+{
+
+ switch (type) {
+ case MOD_LOAD:
+ mtx_init(&lagg_list_mtx, "if_lagg list", NULL, MTX_DEF);
+ SLIST_INIT(&lagg_list);
+ if_clone_attach(&lagg_cloner);
+ lagg_input_p = lagg_input;
+ lagg_linkstate_p = lagg_port_state;
+ lagg_detach_cookie = EVENTHANDLER_REGISTER(
+ ifnet_departure_event, lagg_port_ifdetach, NULL,
+ EVENTHANDLER_PRI_ANY);
+ break;
+ case MOD_UNLOAD:
+ EVENTHANDLER_DEREGISTER(ifnet_departure_event,
+ lagg_detach_cookie);
+ if_clone_detach(&lagg_cloner);
+ lagg_input_p = NULL;
+ lagg_linkstate_p = NULL;
+ mtx_destroy(&lagg_list_mtx);
+ break;
+ default:
+ return (EOPNOTSUPP);
+ }
+ return (0);
+}
+
+static moduledata_t lagg_mod = {
+ "if_lagg",
+ lagg_modevent,
+ 0
+};
+
+DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+
+#if __FreeBSD_version >= 800000
+/*
+ * This routine is run via an vlan
+ * config EVENT
+ */
+static void
+lagg_register_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
+{
+ struct lagg_softc *sc = ifp->if_softc;
+ struct lagg_port *lp;
+
+ if (ifp->if_softc != arg) /* Not our event */
+ return;
+
+ LAGG_RLOCK(sc);
+ if (!SLIST_EMPTY(&sc->sc_ports)) {
+ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
+ EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag);
+ }
+ LAGG_RUNLOCK(sc);
+}
+
+/*
+ * This routine is run via an vlan
+ * unconfig EVENT
+ */
+static void
+lagg_unregister_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
+{
+ struct lagg_softc *sc = ifp->if_softc;
+ struct lagg_port *lp;
+
+ if (ifp->if_softc != arg) /* Not our event */
+ return;
+
+ LAGG_RLOCK(sc);
+ if (!SLIST_EMPTY(&sc->sc_ports)) {
+ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
+ EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag);
+ }
+ LAGG_RUNLOCK(sc);
+}
+#endif
+
+static int
+lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+ struct lagg_softc *sc;
+ struct ifnet *ifp;
+ int i, error = 0;
+ static const u_char eaddr[6]; /* 00:00:00:00:00:00 */
+
+ sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
+ ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
+ if (ifp == NULL) {
+ free(sc, M_DEVBUF);
+ return (ENOSPC);
+ }
+
+ sc->sc_proto = LAGG_PROTO_NONE;
+ for (i = 0; lagg_protos[i].ti_proto != LAGG_PROTO_NONE; i++) {
+ if (lagg_protos[i].ti_proto == LAGG_PROTO_DEFAULT) {
+ sc->sc_proto = lagg_protos[i].ti_proto;
+ if ((error = lagg_protos[i].ti_attach(sc)) != 0) {
+ if_free_type(ifp, IFT_ETHER);
+ free(sc, M_DEVBUF);
+ return (error);
+ }
+ break;
+ }
+ }
+ LAGG_LOCK_INIT(sc);
+ SLIST_INIT(&sc->sc_ports);
+ TASK_INIT(&sc->sc_lladdr_task, 0, lagg_port_setlladdr, sc);
+
+ /* Initialise pseudo media types */
+ ifmedia_init(&sc->sc_media, 0, lagg_media_change,
+ lagg_media_status);
+ ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
+ ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
+
+ if_initname(ifp, ifc->ifc_name, unit);
+ ifp->if_type = IFT_ETHER;
+ ifp->if_softc = sc;
+ ifp->if_start = lagg_start;
+ ifp->if_init = lagg_init;
+ ifp->if_ioctl = lagg_ioctl;
+ ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
+
+ IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
+ ifp->if_snd.ifq_drv_maxlen = ifqmaxlen;
+ IFQ_SET_READY(&ifp->if_snd);
+
+ /*
+ * Attach as an ordinary ethernet device, childs will be attached
+ * as special device IFT_IEEE8023ADLAG.
+ */
+ ether_ifattach(ifp, eaddr);
+
+#if __FreeBSD_version >= 800000
+ sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
+ lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
+ sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
+ lagg_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
+#endif
+
+ /* Insert into the global list of laggs */
+ mtx_lock(&lagg_list_mtx);
+ SLIST_INSERT_HEAD(&lagg_list, sc, sc_entries);
+ mtx_unlock(&lagg_list_mtx);
+
+ return (0);
+}
+
+static void
+lagg_clone_destroy(struct ifnet *ifp)
+{
+ struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
+ struct lagg_port *lp;
+
+ LAGG_WLOCK(sc);
+
+ lagg_stop(sc);
+ ifp->if_flags &= ~IFF_UP;
+
+#if __FreeBSD_version >= 800000
+ EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach);
+ EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach);
+#endif
+
+ /* Shutdown and remove lagg ports */
+ while ((lp = SLIST_FIRST(&sc->sc_ports)) != NULL)
+ lagg_port_destroy(lp, 1);
+ /* Unhook the aggregation protocol */
+ (*sc->sc_detach)(sc);
+
+ LAGG_WUNLOCK(sc);
+
+ ifmedia_removeall(&sc->sc_media);
+ ether_ifdetach(ifp);
+ if_free_type(ifp, IFT_ETHER);
+
+ mtx_lock(&lagg_list_mtx);
+ SLIST_REMOVE(&lagg_list, sc, lagg_softc, sc_entries);
+ mtx_unlock(&lagg_list_mtx);
+
+ taskqueue_drain(taskqueue_swi, &sc->sc_lladdr_task);
+ LAGG_LOCK_DESTROY(sc);
+ free(sc, M_DEVBUF);
+}
+
+static void
+lagg_lladdr(struct lagg_softc *sc, uint8_t *lladdr)
+{
+ struct ifnet *ifp = sc->sc_ifp;
+
+ if (memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
+ return;
+
+ bcopy(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+ /* Let the protocol know the MAC has changed */
+ if (sc->sc_lladdr != NULL)
+ (*sc->sc_lladdr)(sc);
+ EVENTHANDLER_INVOKE(iflladdr_event, ifp);
+}
+
+static void
+lagg_capabilities(struct lagg_softc *sc)
+{
+ struct lagg_port *lp;
+ int cap = ~0, ena = ~0;
+ u_long hwa = ~0UL;
+
+ LAGG_WLOCK_ASSERT(sc);
+
+ /* Get capabilities from the lagg ports */
+ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
+ cap &= lp->lp_ifp->if_capabilities;
+ ena &= lp->lp_ifp->if_capenable;
+ hwa &= lp->lp_ifp->if_hwassist;
+ }
+ cap = (cap == ~0 ? 0 : cap);
+ ena = (ena == ~0 ? 0 : ena);
+ hwa = (hwa == ~0 ? 0 : hwa);
+
+ if (sc->sc_ifp->if_capabilities != cap ||
+ sc->sc_ifp->if_capenable != ena ||
+ sc->sc_ifp->if_hwassist != hwa) {
+ sc->sc_ifp->if_capabilities = cap;
+ sc->sc_ifp->if_capenable = ena;
+ sc->sc_ifp->if_hwassist = hwa;
+ getmicrotime(&sc->sc_ifp->if_lastchange);
+
+ if (sc->sc_ifflags & IFF_DEBUG)
+ if_printf(sc->sc_ifp,
+ "capabilities 0x%08x enabled 0x%08x\n", cap, ena);
+ }
+}
+
+static void
+lagg_port_lladdr(struct lagg_port *lp, uint8_t *lladdr)
+{
+ struct lagg_softc *sc = lp->lp_softc;
+ struct ifnet *ifp = lp->lp_ifp;
+ struct lagg_llq *llq;
+ int pending = 0;
+
+ LAGG_WLOCK_ASSERT(sc);
+
+ if (lp->lp_detaching ||
+ memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
+ return;
+
+ /* Check to make sure its not already queued to be changed */
+ SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) {
+ if (llq->llq_ifp == ifp) {
+ pending = 1;
+ break;
+ }
+ }
+
+ if (!pending) {
+ llq = malloc(sizeof(struct lagg_llq), M_DEVBUF, M_NOWAIT);
+ if (llq == NULL) /* XXX what to do */
+ return;
+ }
+
+ /* Update the lladdr even if pending, it may have changed */
+ llq->llq_ifp = ifp;
+ bcopy(lladdr, llq->llq_lladdr, ETHER_ADDR_LEN);
+
+ if (!pending)
+ SLIST_INSERT_HEAD(&sc->sc_llq_head, llq, llq_entries);
+
+ taskqueue_enqueue(taskqueue_swi, &sc->sc_lladdr_task);
+}
+
+/*
+ * Set the interface MAC address from a taskqueue to avoid a LOR.
+ */
+static void
+lagg_port_setlladdr(void *arg, int pending)
+{
+ struct lagg_softc *sc = (struct lagg_softc *)arg;
+ struct lagg_llq *llq, *head;
+ struct ifnet *ifp;
+ int error;
+
+ /* Grab a local reference of the queue and remove it from the softc */
+ LAGG_WLOCK(sc);
+ head = SLIST_FIRST(&sc->sc_llq_head);
+ SLIST_FIRST(&sc->sc_llq_head) = NULL;
+ LAGG_WUNLOCK(sc);
+
+ /*
+ * Traverse the queue and set the lladdr on each ifp. It is safe to do
+ * unlocked as we have the only reference to it.
+ */
+ for (llq = head; llq != NULL; llq = head) {
+ ifp = llq->llq_ifp;
+
+ /* Set the link layer address */
+ error = if_setlladdr(ifp, llq->llq_lladdr, ETHER_ADDR_LEN);
+ if (error)
+ printf("%s: setlladdr failed on %s\n", __func__,
+ ifp->if_xname);
+
+ head = SLIST_NEXT(llq, llq_entries);
+ free(llq, M_DEVBUF);
+ }
+}
+
+static int
+lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
+{
+ struct lagg_softc *sc_ptr;
+ struct lagg_port *lp;
+ int error = 0;
+
+ LAGG_WLOCK_ASSERT(sc);
+
+ /* Limit the maximal number of lagg ports */
+ if (sc->sc_count >= LAGG_MAX_PORTS)
+ return (ENOSPC);
+
+ /* Check if port has already been associated to a lagg */
+ if (ifp->if_lagg != NULL)
+ return (EBUSY);
+
+ /* XXX Disallow non-ethernet interfaces (this should be any of 802) */
+ if (ifp->if_type != IFT_ETHER)
+ return (EPROTONOSUPPORT);
+
+ /* Allow the first Ethernet member to define the MTU */
+ if (SLIST_EMPTY(&sc->sc_ports))
+ sc->sc_ifp->if_mtu = ifp->if_mtu;
+ else if (sc->sc_ifp->if_mtu != ifp->if_mtu) {
+ if_printf(sc->sc_ifp, "invalid MTU for %s\n",
+ ifp->if_xname);
+ return (EINVAL);
+ }
+
+ if ((lp = malloc(sizeof(struct lagg_port),
+ M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
+ return (ENOMEM);
+
+ /* Check if port is a stacked lagg */
+ mtx_lock(&lagg_list_mtx);
+ SLIST_FOREACH(sc_ptr, &lagg_list, sc_entries) {
+ if (ifp == sc_ptr->sc_ifp) {
+ mtx_unlock(&lagg_list_mtx);
+ free(lp, M_DEVBUF);
+ return (EINVAL);
+ /* XXX disable stacking for the moment, its untested
+ lp->lp_flags |= LAGG_PORT_STACK;
+ if (lagg_port_checkstacking(sc_ptr) >=
+ LAGG_MAX_STACKING) {
+ mtx_unlock(&lagg_list_mtx);
+ free(lp, M_DEVBUF);
+ return (E2BIG);
+ }
+ */
+ }
+ }
+ mtx_unlock(&lagg_list_mtx);
+
+ /* Change the interface type */
+ lp->lp_iftype = ifp->if_type;
+ ifp->if_type = IFT_IEEE8023ADLAG;
+ ifp->if_lagg = lp;
+ lp->lp_ioctl = ifp->if_ioctl;
+ ifp->if_ioctl = lagg_port_ioctl;
+ lp->lp_output = ifp->if_output;
+ ifp->if_output = lagg_port_output;
+
+ lp->lp_ifp = ifp;
+ lp->lp_softc = sc;
+
+ /* Save port link layer address */
+ bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ETHER_ADDR_LEN);
+
+ if (SLIST_EMPTY(&sc->sc_ports)) {
+ sc->sc_primary = lp;
+ lagg_lladdr(sc, IF_LLADDR(ifp));
+ } else {
+ /* Update link layer address for this port */
+ lagg_port_lladdr(lp, IF_LLADDR(sc->sc_ifp));
+ }
+
+ /* Insert into the list of ports */
+ SLIST_INSERT_HEAD(&sc->sc_ports, lp, lp_entries);
+ sc->sc_count++;
+
+ /* Update lagg capabilities */
+ lagg_capabilities(sc);
+ lagg_linkstate(sc);
+
+ /* Add multicast addresses and interface flags to this port */
+ lagg_ether_cmdmulti(lp, 1);
+ lagg_setflags(lp, 1);
+
+ if (sc->sc_port_create != NULL)
+ error = (*sc->sc_port_create)(lp);
+ if (error) {
+ /* remove the port again, without calling sc_port_destroy */
+ lagg_port_destroy(lp, 0);
+ return (error);
+ }
+
+ return (error);
+}
+
+static int
+lagg_port_checkstacking(struct lagg_softc *sc)
+{
+ struct lagg_softc *sc_ptr;
+ struct lagg_port *lp;
+ int m = 0;
+
+ LAGG_WLOCK_ASSERT(sc);
+
+ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
+ if (lp->lp_flags & LAGG_PORT_STACK) {
+ sc_ptr = (struct lagg_softc *)lp->lp_ifp->if_softc;
+ m = MAX(m, lagg_port_checkstacking(sc_ptr));
+ }
+ }
+
+ return (m + 1);
+}
+
+static int
+lagg_port_destroy(struct lagg_port *lp, int runpd)
+{
+ struct lagg_softc *sc = lp->lp_softc;
+ struct lagg_port *lp_ptr;
+ struct lagg_llq *llq;
+ struct ifnet *ifp = lp->lp_ifp;
+
+ LAGG_WLOCK_ASSERT(sc);
+
+ if (runpd && sc->sc_port_destroy != NULL)
+ (*sc->sc_port_destroy)(lp);
+
+ /*
+ * Remove multicast addresses and interface flags from this port and
+ * reset the MAC address, skip if the interface is being detached.
+ */
+ if (!lp->lp_detaching) {
+ lagg_ether_cmdmulti(lp, 0);
+ lagg_setflags(lp, 0);
+ lagg_port_lladdr(lp, lp->lp_lladdr);
+ }
+
+ /* Restore interface */
+ ifp->if_type = lp->lp_iftype;
+ ifp->if_ioctl = lp->lp_ioctl;
+ ifp->if_output = lp->lp_output;
+ ifp->if_lagg = NULL;
+
+ /* Finally, remove the port from the lagg */
+ SLIST_REMOVE(&sc->sc_ports, lp, lagg_port, lp_entries);
+ sc->sc_count--;
+
+ /* Update the primary interface */
+ if (lp == sc->sc_primary) {
+ uint8_t lladdr[ETHER_ADDR_LEN];
+
+ if ((lp_ptr = SLIST_FIRST(&sc->sc_ports)) == NULL) {
+ bzero(&lladdr, ETHER_ADDR_LEN);
+ } else {
+ bcopy(lp_ptr->lp_lladdr,
+ lladdr, ETHER_ADDR_LEN);
+ }
+ lagg_lladdr(sc, lladdr);
+ sc->sc_primary = lp_ptr;
+
+ /* Update link layer address for each port */
+ SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries)
+ lagg_port_lladdr(lp_ptr, lladdr);
+ }
+
+ /* Remove any pending lladdr changes from the queue */
+ if (lp->lp_detaching) {
+ SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) {
+ if (llq->llq_ifp == ifp) {
+ SLIST_REMOVE(&sc->sc_llq_head, llq, lagg_llq,
+ llq_entries);
+ free(llq, M_DEVBUF);
+ break; /* Only appears once */
+ }
+ }
+ }
+
+ if (lp->lp_ifflags)
+ if_printf(ifp, "%s: lp_ifflags unclean\n", __func__);
+
+ free(lp, M_DEVBUF);
+
+ /* Update lagg capabilities */
+ lagg_capabilities(sc);
+ lagg_linkstate(sc);
+
+ return (0);
+}
+
+static int
+lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ struct lagg_reqport *rp = (struct lagg_reqport *)data;
+ struct lagg_softc *sc;
+ struct lagg_port *lp = NULL;
+ int error = 0;
+
+ /* Should be checked by the caller */
+ if (ifp->if_type != IFT_IEEE8023ADLAG ||
+ (lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL)
+ goto fallback;
+
+ switch (cmd) {
+ case SIOCGLAGGPORT:
+ if (rp->rp_portname[0] == '\0' ||
+ ifunit(rp->rp_portname) != ifp) {
+ error = EINVAL;
+ break;
+ }
+
+ LAGG_RLOCK(sc);
+ if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) {
+ error = ENOENT;
+ LAGG_RUNLOCK(sc);
+ break;
+ }
+
+ lagg_port2req(lp, rp);
+ LAGG_RUNLOCK(sc);
+ break;
+
+ case SIOCSIFCAP:
+ if (lp->lp_ioctl == NULL) {
+ error = EINVAL;
+ break;
+ }
+ error = (*lp->lp_ioctl)(ifp, cmd, data);
+ if (error)
+ break;
+
+ /* Update lagg interface capabilities */
+ LAGG_WLOCK(sc);
+ lagg_capabilities(sc);
+ LAGG_WUNLOCK(sc);
+ break;
+
+ case SIOCSIFMTU:
+ /* Do not allow the MTU to be changed once joined */
+ error = EINVAL;
+ break;
+
+ default:
+ goto fallback;
+ }
+
+ return (error);
+
+fallback:
+ if (lp->lp_ioctl != NULL)
+ return ((*lp->lp_ioctl)(ifp, cmd, data));
+
+ return (EINVAL);
+}
+
+static int
+lagg_port_output(struct ifnet *ifp, struct mbuf *m,
+ struct sockaddr *dst, struct route *ro)
+{
+ struct lagg_port *lp = ifp->if_lagg;
+ struct ether_header *eh;
+ short type = 0;
+
+ switch (dst->sa_family) {
+ case pseudo_AF_HDRCMPLT:
+ case AF_UNSPEC:
+ eh = (struct ether_header *)dst->sa_data;
+ type = eh->ether_type;
+ break;
+ }
+
+ /*
+ * Only allow ethernet types required to initiate or maintain the link,
+ * aggregated frames take a different path.
+ */
+ switch (ntohs(type)) {
+ case ETHERTYPE_PAE: /* EAPOL PAE/802.1x */
+ return ((*lp->lp_output)(ifp, m, dst, ro));
+ }
+
+ /* drop any other frames */
+ m_freem(m);
+ return (EBUSY);
+}
+
+static void
+lagg_port_ifdetach(void *arg __unused, struct ifnet *ifp)
+{
+ struct lagg_port *lp;
+ struct lagg_softc *sc;
+
+ if ((lp = ifp->if_lagg) == NULL)
+ return;
+
+ sc = lp->lp_softc;
+
+ LAGG_WLOCK(sc);
+ lp->lp_detaching = 1;
+ lagg_port_destroy(lp, 1);
+ LAGG_WUNLOCK(sc);
+}
+
+static void
+lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp)
+{
+ struct lagg_softc *sc = lp->lp_softc;
+
+ strlcpy(rp->rp_ifname, sc->sc_ifname, sizeof(rp->rp_ifname));
+ strlcpy(rp->rp_portname, lp->lp_ifp->if_xname, sizeof(rp->rp_portname));
+ rp->rp_prio = lp->lp_prio;
+ rp->rp_flags = lp->lp_flags;
+ if (sc->sc_portreq != NULL)
+ (*sc->sc_portreq)(lp, (caddr_t)&rp->rp_psc);
+
+ /* Add protocol specific flags */
+ switch (sc->sc_proto) {
+ case LAGG_PROTO_FAILOVER:
+ if (lp == sc->sc_primary)
+ rp->rp_flags |= LAGG_PORT_MASTER;
+ if (lp == lagg_link_active(sc, sc->sc_primary))
+ rp->rp_flags |= LAGG_PORT_ACTIVE;
+ break;
+
+ case LAGG_PROTO_ROUNDROBIN:
+ case LAGG_PROTO_LOADBALANCE:
+ case LAGG_PROTO_ETHERCHANNEL:
+ if (LAGG_PORTACTIVE(lp))
+ rp->rp_flags |= LAGG_PORT_ACTIVE;
+ break;
+
+ case LAGG_PROTO_LACP:
+ /* LACP has a different definition of active */
+ if (lacp_isactive(lp))
+ rp->rp_flags |= LAGG_PORT_ACTIVE;
+ if (lacp_iscollecting(lp))
+ rp->rp_flags |= LAGG_PORT_COLLECTING;
+ if (lacp_isdistributing(lp))
+ rp->rp_flags |= LAGG_PORT_DISTRIBUTING;
+ break;
+ }
+
+}
+
+static void
+lagg_init(void *xsc)
+{
+ struct lagg_softc *sc = (struct lagg_softc *)xsc;
+ struct lagg_port *lp;
+ struct ifnet *ifp = sc->sc_ifp;
+
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+ return;
+
+ LAGG_WLOCK(sc);
+
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ /* Update the port lladdrs */
+ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
+ lagg_port_lladdr(lp, IF_LLADDR(ifp));
+
+ if (sc->sc_init != NULL)
+ (*sc->sc_init)(sc);
+
+ LAGG_WUNLOCK(sc);
+}
+
+static void
+lagg_stop(struct lagg_softc *sc)
+{
+ struct ifnet *ifp = sc->sc_ifp;
+
+ LAGG_WLOCK_ASSERT(sc);
+
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
+ return;
+
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+
+ if (sc->sc_stop != NULL)
+ (*sc->sc_stop)(sc);
+}
+
+static int
+lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
+ struct lagg_reqall *ra = (struct lagg_reqall *)data;
+ struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf;
+ struct ifreq *ifr = (struct ifreq *)data;
+ struct lagg_port *lp;
+ struct ifnet *tpif;
+ struct thread *td = curthread;
+ char *buf, *outbuf;
+ int count, buflen, len, error = 0;
+
+ bzero(&rpbuf, sizeof(rpbuf));
+
+ switch (cmd) {
+ case SIOCGLAGG:
+ LAGG_RLOCK(sc);
+ count = 0;
+ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
+ count++;
+ buflen = count * sizeof(struct lagg_reqport);
+ LAGG_RUNLOCK(sc);
+
+ outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
+
+ LAGG_RLOCK(sc);
+ ra->ra_proto = sc->sc_proto;
+ if (sc->sc_req != NULL)
+ (*sc->sc_req)(sc, (caddr_t)&ra->ra_psc);
+
+ count = 0;
+ buf = outbuf;
+ len = min(ra->ra_size, buflen);
+ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
+ if (len < sizeof(rpbuf))
+ break;
+
+ lagg_port2req(lp, &rpbuf);
+ memcpy(buf, &rpbuf, sizeof(rpbuf));
+ count++;
+ buf += sizeof(rpbuf);
+ len -= sizeof(rpbuf);
+ }
+ LAGG_RUNLOCK(sc);
+ ra->ra_ports = count;
+ ra->ra_size = count * sizeof(rpbuf);
+ error = copyout(outbuf, ra->ra_port, ra->ra_size);
+ free(outbuf, M_TEMP);
+ break;
+ case SIOCSLAGG:
+ error = priv_check(td, PRIV_NET_LAGG);
+ if (error)
+ break;
+ if (ra->ra_proto >= LAGG_PROTO_MAX) {
+ error = EPROTONOSUPPORT;
+ break;
+ }
+ if (sc->sc_proto != LAGG_PROTO_NONE) {
+ LAGG_WLOCK(sc);
+ error = sc->sc_detach(sc);
+ /* Reset protocol and pointers */
+ sc->sc_proto = LAGG_PROTO_NONE;
+ sc->sc_detach = NULL;
+ sc->sc_start = NULL;
+ sc->sc_input = NULL;
+ sc->sc_port_create = NULL;
+ sc->sc_port_destroy = NULL;
+ sc->sc_linkstate = NULL;
+ sc->sc_init = NULL;
+ sc->sc_stop = NULL;
+ sc->sc_lladdr = NULL;
+ sc->sc_req = NULL;
+ sc->sc_portreq = NULL;
+ LAGG_WUNLOCK(sc);
+ }
+ if (error != 0)
+ break;
+ for (int i = 0; i < (sizeof(lagg_protos) /
+ sizeof(lagg_protos[0])); i++) {
+ if (lagg_protos[i].ti_proto == ra->ra_proto) {
+ if (sc->sc_ifflags & IFF_DEBUG)
+ printf("%s: using proto %u\n",
+ sc->sc_ifname,
+ lagg_protos[i].ti_proto);
+ LAGG_WLOCK(sc);
+ sc->sc_proto = lagg_protos[i].ti_proto;
+ if (sc->sc_proto != LAGG_PROTO_NONE)
+ error = lagg_protos[i].ti_attach(sc);
+ LAGG_WUNLOCK(sc);
+ return (error);
+ }
+ }
+ error = EPROTONOSUPPORT;
+ break;
+ case SIOCGLAGGPORT:
+ if (rp->rp_portname[0] == '\0' ||
+ (tpif = ifunit(rp->rp_portname)) == NULL) {
+ error = EINVAL;
+ break;
+ }
+
+ LAGG_RLOCK(sc);
+ if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
+ lp->lp_softc != sc) {
+ error = ENOENT;
+ LAGG_RUNLOCK(sc);
+ break;
+ }
+
+ lagg_port2req(lp, rp);
+ LAGG_RUNLOCK(sc);
+ break;
+ case SIOCSLAGGPORT:
+ error = priv_check(td, PRIV_NET_LAGG);
+ if (error)
+ break;
+ if (rp->rp_portname[0] == '\0' ||
+ (tpif = ifunit(rp->rp_portname)) == NULL) {
+ error = EINVAL;
+ break;
+ }
+ LAGG_WLOCK(sc);
+ error = lagg_port_create(sc, tpif);
+ LAGG_WUNLOCK(sc);
+ break;
+ case SIOCSLAGGDELPORT:
+ error = priv_check(td, PRIV_NET_LAGG);
+ if (error)
+ break;
+ if (rp->rp_portname[0] == '\0' ||
+ (tpif = ifunit(rp->rp_portname)) == NULL) {
+ error = EINVAL;
+ break;
+ }
+
+ LAGG_WLOCK(sc);
+ if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
+ lp->lp_softc != sc) {
+ error = ENOENT;
+ LAGG_WUNLOCK(sc);
+ break;
+ }
+
+ error = lagg_port_destroy(lp, 1);
+ LAGG_WUNLOCK(sc);
+ break;
+ case SIOCSIFFLAGS:
+ /* Set flags on ports too */
+ LAGG_WLOCK(sc);
+ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
+ lagg_setflags(lp, 1);
+ }
+ LAGG_WUNLOCK(sc);
+
+ if (!(ifp->if_flags & IFF_UP) &&
+ (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+ /*
+ * If interface is marked down and it is running,
+ * then stop and disable it.
+ */
+ LAGG_WLOCK(sc);
+ lagg_stop(sc);
+ LAGG_WUNLOCK(sc);
+ } else if ((ifp->if_flags & IFF_UP) &&
+ !(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+ /*
+ * If interface is marked up and it is stopped, then
+ * start it.
+ */
+ (*ifp->if_init)(sc);
+ }
+ break;
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ LAGG_WLOCK(sc);
+ error = lagg_ether_setmulti(sc);
+ LAGG_WUNLOCK(sc);
+ break;
+ case SIOCSIFMEDIA:
+ case SIOCGIFMEDIA:
+ error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
+ break;
+
+ case SIOCSIFCAP:
+ case SIOCSIFMTU:
+ /* Do not allow the MTU or caps to be directly changed */
+ error = EINVAL;
+ break;
+
+ default:
+ error = ether_ioctl(ifp, cmd, data);
+ break;
+ }
+ return (error);
+}
+
+static int
+lagg_ether_setmulti(struct lagg_softc *sc)
+{
+ struct lagg_port *lp;
+
+ LAGG_WLOCK_ASSERT(sc);
+
+ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
+ /* First, remove any existing filter entries. */
+ lagg_ether_cmdmulti(lp, 0);
+ /* copy all addresses from the lagg interface to the port */
+ lagg_ether_cmdmulti(lp, 1);
+ }
+ return (0);
+}
+
+static int
+lagg_ether_cmdmulti(struct lagg_port *lp, int set)
+{
+ struct lagg_softc *sc = lp->lp_softc;
+ struct ifnet *ifp = lp->lp_ifp;
+ struct ifnet *scifp = sc->sc_ifp;
+ struct lagg_mc *mc;
+ struct ifmultiaddr *ifma, *rifma = NULL;
+ struct sockaddr_dl sdl;
+ int error;
+
+ LAGG_WLOCK_ASSERT(sc);
+
+ bzero((char *)&sdl, sizeof(sdl));
+ sdl.sdl_len = sizeof(sdl);
+ sdl.sdl_family = AF_LINK;
+ sdl.sdl_type = IFT_ETHER;
+ sdl.sdl_alen = ETHER_ADDR_LEN;
+ sdl.sdl_index = ifp->if_index;
+
+ if (set) {
+ TAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) {
+ if (ifma->ifma_addr->sa_family != AF_LINK)
+ continue;
+ bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
+ LLADDR(&sdl), ETHER_ADDR_LEN);
+
+ error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma);
+ if (error)
+ return (error);
+ mc = malloc(sizeof(struct lagg_mc), M_DEVBUF, M_NOWAIT);
+ if (mc == NULL)
+ return (ENOMEM);
+ mc->mc_ifma = rifma;
+ SLIST_INSERT_HEAD(&lp->lp_mc_head, mc, mc_entries);
+ }
+ } else {
+ while ((mc = SLIST_FIRST(&lp->lp_mc_head)) != NULL) {
+ SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries);
+ if_delmulti_ifma(mc->mc_ifma);
+ free(mc, M_DEVBUF);
+ }
+ }
+ return (0);
+}
+
+/* Handle a ref counted flag that should be set on the lagg port as well */
+static int
+lagg_setflag(struct lagg_port *lp, int flag, int status,
+ int (*func)(struct ifnet *, int))
+{
+ struct lagg_softc *sc = lp->lp_softc;
+ struct ifnet *scifp = sc->sc_ifp;
+ struct ifnet *ifp = lp->lp_ifp;
+ int error;
+
+ LAGG_WLOCK_ASSERT(sc);
+
+ status = status ? (scifp->if_flags & flag) : 0;
+ /* Now "status" contains the flag value or 0 */
+
+ /*
+ * See if recorded ports status is different from what
+ * we want it to be. If it is, flip it. We record ports
+ * status in lp_ifflags so that we won't clear ports flag
+ * we haven't set. In fact, we don't clear or set ports
+ * flags directly, but get or release references to them.
+ * That's why we can be sure that recorded flags still are
+ * in accord with actual ports flags.
+ */
+ if (status != (lp->lp_ifflags & flag)) {
+ error = (*func)(ifp, status);
+ if (error)
+ return (error);
+ lp->lp_ifflags &= ~flag;
+ lp->lp_ifflags |= status;
+ }
+ return (0);
+}
+
+/*
+ * Handle IFF_* flags that require certain changes on the lagg port
+ * if "status" is true, update ports flags respective to the lagg
+ * if "status" is false, forcedly clear the flags set on port.
+ */
+static int
+lagg_setflags(struct lagg_port *lp, int status)
+{
+ int error, i;
+
+ for (i = 0; lagg_pflags[i].flag; i++) {
+ error = lagg_setflag(lp, lagg_pflags[i].flag,
+ status, lagg_pflags[i].func);
+ if (error)
+ return (error);
+ }
+ return (0);
+}
+
+static void
+lagg_start(struct ifnet *ifp)
+{
+ struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
+ struct mbuf *m;
+ int error = 0;
+
+ LAGG_RLOCK(sc);
+ /* We need a Tx algorithm and at least one port */
+ if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
+ IF_DRAIN(&ifp->if_snd);
+ LAGG_RUNLOCK(sc);
+ return;
+ }
+
+ for (;; error = 0) {
+ IFQ_DEQUEUE(&ifp->if_snd, m);
+ if (m == NULL)
+ break;
+
+ ETHER_BPF_MTAP(ifp, m);
+
+ error = (*sc->sc_start)(sc, m);
+ if (error == 0)
+ ifp->if_opackets++;
+ else
+ ifp->if_oerrors++;
+ }
+ LAGG_RUNLOCK(sc);
+}
+
+static struct mbuf *
+lagg_input(struct ifnet *ifp, struct mbuf *m)
+{
+ struct lagg_port *lp = ifp->if_lagg;
+ struct lagg_softc *sc = lp->lp_softc;
+ struct ifnet *scifp = sc->sc_ifp;
+
+ if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
+ (lp->lp_flags & LAGG_PORT_DISABLED) ||
+ sc->sc_proto == LAGG_PROTO_NONE) {
+ m_freem(m);
+ return (NULL);
+ }
+
+ LAGG_RLOCK(sc);
+ ETHER_BPF_MTAP(scifp, m);
+
+ m = (*sc->sc_input)(sc, lp, m);
+
+ if (m != NULL) {
+ scifp->if_ipackets++;
+ scifp->if_ibytes += m->m_pkthdr.len;
+
+ if (scifp->if_flags & IFF_MONITOR) {
+ m_freem(m);
+ m = NULL;
+ }
+ }
+
+ LAGG_RUNLOCK(sc);
+ return (m);
+}
+
+static int
+lagg_media_change(struct ifnet *ifp)
+{
+ struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
+
+ if (sc->sc_ifflags & IFF_DEBUG)
+ printf("%s\n", __func__);
+
+ /* Ignore */
+ return (0);
+}
+
+static void
+lagg_media_status(struct ifnet *ifp, struct ifmediareq *imr)
+{
+ struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
+ struct lagg_port *lp;
+
+ imr->ifm_status = IFM_AVALID;
+ imr->ifm_active = IFM_ETHER | IFM_AUTO;
+
+ LAGG_RLOCK(sc);
+ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
+ if (LAGG_PORTACTIVE(lp))
+ imr->ifm_status |= IFM_ACTIVE;
+ }
+ LAGG_RUNLOCK(sc);
+}
+
+static void
+lagg_linkstate(struct lagg_softc *sc)
+{
+ struct lagg_port *lp;
+ int new_link = LINK_STATE_DOWN;
+ uint64_t speed;
+
+ /* Our link is considered up if at least one of our ports is active */
+ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
+ if (lp->lp_link_state == LINK_STATE_UP) {
+ new_link = LINK_STATE_UP;
+ break;
+ }
+ }
+ if_link_state_change(sc->sc_ifp, new_link);
+
+ /* Update if_baudrate to reflect the max possible speed */
+ switch (sc->sc_proto) {
+ case LAGG_PROTO_FAILOVER:
+ sc->sc_ifp->if_baudrate = sc->sc_primary != NULL ?
+ sc->sc_primary->lp_ifp->if_baudrate : 0;
+ break;
+ case LAGG_PROTO_ROUNDROBIN:
+ case LAGG_PROTO_LOADBALANCE:
+ case LAGG_PROTO_ETHERCHANNEL:
+ speed = 0;
+ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
+ speed += lp->lp_ifp->if_baudrate;
+ sc->sc_ifp->if_baudrate = speed;
+ break;
+ case LAGG_PROTO_LACP:
+ /* LACP updates if_baudrate itself */
+ break;
+ }
+}
+
+static void
+lagg_port_state(struct ifnet *ifp, int state)
+{
+ struct lagg_port *lp = (struct lagg_port *)ifp->if_lagg;
+ struct lagg_softc *sc = NULL;
+
+ if (lp != NULL)
+ sc = lp->lp_softc;
+ if (sc == NULL)
+ return;
+
+ LAGG_WLOCK(sc);
+ lagg_linkstate(sc);
+ if (sc->sc_linkstate != NULL)
+ (*sc->sc_linkstate)(lp);
+ LAGG_WUNLOCK(sc);
+}
+
+struct lagg_port *
+lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp)
+{
+ struct lagg_port *lp_next, *rval = NULL;
+ // int new_link = LINK_STATE_DOWN;
+
+ LAGG_RLOCK_ASSERT(sc);
+ /*
+ * Search a port which reports an active link state.
+ */
+
+ if (lp == NULL)
+ goto search;
+ if (LAGG_PORTACTIVE(lp)) {
+ rval = lp;
+ goto found;
+ }
+ if ((lp_next = SLIST_NEXT(lp, lp_entries)) != NULL &&
+ LAGG_PORTACTIVE(lp_next)) {
+ rval = lp_next;
+ goto found;
+ }
+
+search:
+ SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
+ if (LAGG_PORTACTIVE(lp_next)) {
+ rval = lp_next;
+ goto found;
+ }
+ }
+
+found:
+ if (rval != NULL) {
+ /*
+ * The IEEE 802.1D standard assumes that a lagg with
+ * multiple ports is always full duplex. This is valid
+ * for load sharing laggs and if at least two links
+ * are active. Unfortunately, checking the latter would
+ * be too expensive at this point.
+ XXX
+ if ((sc->sc_capabilities & IFCAP_LAGG_FULLDUPLEX) &&
+ (sc->sc_count > 1))
+ new_link = LINK_STATE_FULL_DUPLEX;
+ else
+ new_link = rval->lp_link_state;
+ */
+ }
+
+ return (rval);
+}
+
+static const void *
+lagg_gethdr(struct mbuf *m, u_int off, u_int len, void *buf)
+{
+ if (m->m_pkthdr.len < (off + len)) {
+ return (NULL);
+ } else if (m->m_len < (off + len)) {
+ m_copydata(m, off, len, buf);
+ return (buf);
+ }
+ return (mtod(m, char *) + off);
+}
+
+uint32_t
+lagg_hashmbuf(struct mbuf *m, uint32_t key)
+{
+ uint16_t etype;
+ uint32_t p = 0;
+ int off;
+ struct ether_header *eh;
+ struct ether_vlan_header vlanbuf;
+ const struct ether_vlan_header *vlan;
+#ifdef INET
+ const struct ip *ip;
+ struct ip ipbuf;
+#endif
+#ifdef INET6
+ const struct ip6_hdr *ip6;
+ struct ip6_hdr ip6buf;
+ uint32_t flow;
+#endif
+
+ off = sizeof(*eh);
+ if (m->m_len < off)
+ goto out;
+ eh = mtod(m, struct ether_header *);
+ etype = ntohs(eh->ether_type);
+ p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, key);
+ p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
+
+ /* Special handling for encapsulating VLAN frames */
+ if (m->m_flags & M_VLANTAG) {
+ p = hash32_buf(&m->m_pkthdr.ether_vtag,
+ sizeof(m->m_pkthdr.ether_vtag), p);
+ } else if (etype == ETHERTYPE_VLAN) {
+ vlan = lagg_gethdr(m, off, sizeof(*vlan), &vlanbuf);
+ if (vlan == NULL)
+ goto out;
+
+ p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
+ etype = ntohs(vlan->evl_proto);
+ off += sizeof(*vlan) - sizeof(*eh);
+ }
+
+ switch (etype) {
+#ifdef INET
+ case ETHERTYPE_IP:
+ ip = lagg_gethdr(m, off, sizeof(*ip), &ipbuf);
+ if (ip == NULL)
+ goto out;
+
+ p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p);
+ p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p);
+ break;
+#endif
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ ip6 = lagg_gethdr(m, off, sizeof(*ip6), &ip6buf);
+ if (ip6 == NULL)
+ goto out;
+
+ p = hash32_buf(&ip6->ip6_src, sizeof(struct in6_addr), p);
+ p = hash32_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p);
+ flow = ip6->ip6_flow & IPV6_FLOWLABEL_MASK;
+ p = hash32_buf(&flow, sizeof(flow), p); /* IPv6 flow label */
+ break;
+#endif
+ }
+out:
+ return (p);
+}
+
+int
+lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
+{
+
+ return (ifp->if_transmit)(ifp, m);
+}
+
+/*
+ * Simple round robin aggregation
+ */
+
+static int
+lagg_rr_attach(struct lagg_softc *sc)
+{
+ sc->sc_detach = lagg_rr_detach;
+ sc->sc_start = lagg_rr_start;
+ sc->sc_input = lagg_rr_input;
+ sc->sc_port_create = NULL;
+ sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
+ sc->sc_seq = 0;
+
+ return (0);
+}
+
+static int
+lagg_rr_detach(struct lagg_softc *sc)
+{
+ return (0);
+}
+
+static int
+lagg_rr_start(struct lagg_softc *sc, struct mbuf *m)
+{
+ struct lagg_port *lp;
+ uint32_t p;
+
+ p = atomic_fetchadd_32(&sc->sc_seq, 1);
+ p %= sc->sc_count;
+ lp = SLIST_FIRST(&sc->sc_ports);
+ while (p--)
+ lp = SLIST_NEXT(lp, lp_entries);
+
+ /*
+ * Check the port's link state. This will return the next active
+ * port if the link is down or the port is NULL.
+ */
+ if ((lp = lagg_link_active(sc, lp)) == NULL) {
+ m_freem(m);
+ return (ENOENT);
+ }
+
+ /* Send mbuf */
+ return (lagg_enqueue(lp->lp_ifp, m));
+}
+
+static struct mbuf *
+lagg_rr_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
+{
+ struct ifnet *ifp = sc->sc_ifp;
+
+ /* Just pass in the packet to our lagg device */
+ m->m_pkthdr.rcvif = ifp;
+
+ return (m);
+}
+
+/*
+ * Active failover
+ */
+
+static int
+lagg_fail_attach(struct lagg_softc *sc)
+{
+ sc->sc_detach = lagg_fail_detach;
+ sc->sc_start = lagg_fail_start;
+ sc->sc_input = lagg_fail_input;
+ sc->sc_port_create = NULL;
+ sc->sc_port_destroy = NULL;
+
+ return (0);
+}
+
+static int
+lagg_fail_detach(struct lagg_softc *sc)
+{
+ return (0);
+}
+
+static int
+lagg_fail_start(struct lagg_softc *sc, struct mbuf *m)
+{
+ struct lagg_port *lp;
+
+ /* Use the master port if active or the next available port */
+ if ((lp = lagg_link_active(sc, sc->sc_primary)) == NULL) {
+ m_freem(m);
+ return (ENOENT);
+ }
+
+ /* Send mbuf */
+ return (lagg_enqueue(lp->lp_ifp, m));
+}
+
+static struct mbuf *
+lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
+{
+ struct ifnet *ifp = sc->sc_ifp;
+ struct lagg_port *tmp_tp;
+
+ if (lp == sc->sc_primary || lagg_failover_rx_all) {
+ m->m_pkthdr.rcvif = ifp;
+ return (m);
+ }
+
+ if (!LAGG_PORTACTIVE(sc->sc_primary)) {
+ tmp_tp = lagg_link_active(sc, sc->sc_primary);
+ /*
+ * If tmp_tp is null, we've recieved a packet when all
+ * our links are down. Weird, but process it anyways.
+ */
+ if ((tmp_tp == NULL || tmp_tp == lp)) {
+ m->m_pkthdr.rcvif = ifp;
+ return (m);
+ }
+ }
+
+ m_freem(m);
+ return (NULL);
+}
+
+/*
+ * Loadbalancing
+ */
+
+static int
+lagg_lb_attach(struct lagg_softc *sc)
+{
+ struct lagg_port *lp;
+ struct lagg_lb *lb;
+
+ if ((lb = (struct lagg_lb *)malloc(sizeof(struct lagg_lb),
+ M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
+ return (ENOMEM);
+
+ sc->sc_detach = lagg_lb_detach;
+ sc->sc_start = lagg_lb_start;
+ sc->sc_input = lagg_lb_input;
+ sc->sc_port_create = lagg_lb_port_create;
+ sc->sc_port_destroy = lagg_lb_port_destroy;
+ sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
+
+ lb->lb_key = arc4random();
+ sc->sc_psc = (caddr_t)lb;
+
+ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
+ lagg_lb_port_create(lp);
+
+ return (0);
+}
+
+static int
+lagg_lb_detach(struct lagg_softc *sc)
+{
+ struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
+ if (lb != NULL)
+ free(lb, M_DEVBUF);
+ return (0);
+}
+
+static int
+lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp)
+{
+ struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
+ struct lagg_port *lp_next;
+ int i = 0;
+
+ bzero(&lb->lb_ports, sizeof(lb->lb_ports));
+ SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
+ if (lp_next == lp)
+ continue;
+ if (i >= LAGG_MAX_PORTS)
+ return (EINVAL);
+ if (sc->sc_ifflags & IFF_DEBUG)
+ printf("%s: port %s at index %d\n",
+ sc->sc_ifname, lp_next->lp_ifname, i);
+ lb->lb_ports[i++] = lp_next;
+ }
+
+ return (0);
+}
+
+static int
+lagg_lb_port_create(struct lagg_port *lp)
+{
+ struct lagg_softc *sc = lp->lp_softc;
+ return (lagg_lb_porttable(sc, NULL));
+}
+
+static void
+lagg_lb_port_destroy(struct lagg_port *lp)
+{
+ struct lagg_softc *sc = lp->lp_softc;
+ lagg_lb_porttable(sc, lp);
+}
+
+static int
+lagg_lb_start(struct lagg_softc *sc, struct mbuf *m)
+{
+ struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
+ struct lagg_port *lp = NULL;
+ uint32_t p = 0;
+
+ if (m->m_flags & M_FLOWID)
+ p = m->m_pkthdr.flowid;
+ else
+ p = lagg_hashmbuf(m, lb->lb_key);
+ p %= sc->sc_count;
+ lp = lb->lb_ports[p];
+
+ /*
+ * Check the port's link state. This will return the next active
+ * port if the link is down or the port is NULL.
+ */
+ if ((lp = lagg_link_active(sc, lp)) == NULL) {
+ m_freem(m);
+ return (ENOENT);
+ }
+
+ /* Send mbuf */
+ return (lagg_enqueue(lp->lp_ifp, m));
+}
+
+static struct mbuf *
+lagg_lb_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
+{
+ struct ifnet *ifp = sc->sc_ifp;
+
+ /* Just pass in the packet to our lagg device */
+ m->m_pkthdr.rcvif = ifp;
+
+ return (m);
+}
+
+/*
+ * 802.3ad LACP
+ */
+
+static int
+lagg_lacp_attach(struct lagg_softc *sc)
+{
+ struct lagg_port *lp;
+ int error;
+
+ sc->sc_detach = lagg_lacp_detach;
+ sc->sc_port_create = lacp_port_create;
+ sc->sc_port_destroy = lacp_port_destroy;
+ sc->sc_linkstate = lacp_linkstate;
+ sc->sc_start = lagg_lacp_start;
+ sc->sc_input = lagg_lacp_input;
+ sc->sc_init = lacp_init;
+ sc->sc_stop = lacp_stop;
+ sc->sc_lladdr = lagg_lacp_lladdr;
+ sc->sc_req = lacp_req;
+ sc->sc_portreq = lacp_portreq;
+
+ error = lacp_attach(sc);
+ if (error)
+ return (error);
+
+ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
+ lacp_port_create(lp);
+
+ return (error);
+}
+
+static int
+lagg_lacp_detach(struct lagg_softc *sc)
+{
+ struct lagg_port *lp;
+ int error;
+
+ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
+ lacp_port_destroy(lp);
+
+ /* unlocking is safe here */
+ LAGG_WUNLOCK(sc);
+ error = lacp_detach(sc);
+ LAGG_WLOCK(sc);
+
+ return (error);
+}
+
+static void
+lagg_lacp_lladdr(struct lagg_softc *sc)
+{
+ struct lagg_port *lp;
+
+ /* purge all the lacp ports */
+ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
+ lacp_port_destroy(lp);
+
+ /* add them back in */
+ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
+ lacp_port_create(lp);
+}
+
+static int
+lagg_lacp_start(struct lagg_softc *sc, struct mbuf *m)
+{
+ struct lagg_port *lp;
+
+ lp = lacp_select_tx_port(sc, m);
+ if (lp == NULL) {
+ m_freem(m);
+ return (EBUSY);
+ }
+
+ /* Send mbuf */
+ return (lagg_enqueue(lp->lp_ifp, m));
+}
+
+static struct mbuf *
+lagg_lacp_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
+{
+ struct ifnet *ifp = sc->sc_ifp;
+ struct ether_header *eh;
+ u_short etype;
+
+ eh = mtod(m, struct ether_header *);
+ etype = ntohs(eh->ether_type);
+
+ /* Tap off LACP control messages */
+ if (etype == ETHERTYPE_SLOW) {
+ m = lacp_input(lp, m);
+ if (m == NULL)
+ return (NULL);
+ }
+
+ /*
+ * If the port is not collecting or not in the active aggregator then
+ * free and return.
+ */
+ if (lacp_iscollecting(lp) == 0 || lacp_isactive(lp) == 0) {
+ m_freem(m);
+ return (NULL);
+ }
+
+ m->m_pkthdr.rcvif = ifp;
+ return (m);
+}
diff --git a/freebsd/sys/net/if_lagg.h b/freebsd/sys/net/if_lagg.h
new file mode 100644
index 00000000..0034c617
--- /dev/null
+++ b/freebsd/sys/net/if_lagg.h
@@ -0,0 +1,247 @@
+/* $OpenBSD: if_trunk.h,v 1.11 2007/01/31 06:20:19 reyk Exp $ */
+
+/*
+ * Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NET_LAGG_H
+#define _NET_LAGG_H
+
+/*
+ * Global definitions
+ */
+
+#define LAGG_MAX_PORTS 32 /* logically */
+#define LAGG_MAX_NAMESIZE 32 /* name of a protocol */
+#define LAGG_MAX_STACKING 4 /* maximum number of stacked laggs */
+
+/* Port flags */
+#define LAGG_PORT_SLAVE 0x00000000 /* normal enslaved port */
+#define LAGG_PORT_MASTER 0x00000001 /* primary port */
+#define LAGG_PORT_STACK 0x00000002 /* stacked lagg port */
+#define LAGG_PORT_ACTIVE 0x00000004 /* port is active */
+#define LAGG_PORT_COLLECTING 0x00000008 /* port is receiving frames */
+#define LAGG_PORT_DISTRIBUTING 0x00000010 /* port is sending frames */
+#define LAGG_PORT_DISABLED 0x00000020 /* port is disabled */
+#define LAGG_PORT_BITS "\20\01MASTER\02STACK\03ACTIVE\04COLLECTING" \
+ "\05DISTRIBUTING\06DISABLED"
+
+/* Supported lagg PROTOs */
+#define LAGG_PROTO_NONE 0 /* no lagg protocol defined */
+#define LAGG_PROTO_ROUNDROBIN 1 /* simple round robin */
+#define LAGG_PROTO_FAILOVER 2 /* active failover */
+#define LAGG_PROTO_LOADBALANCE 3 /* loadbalance */
+#define LAGG_PROTO_LACP 4 /* 802.3ad lacp */
+#define LAGG_PROTO_ETHERCHANNEL 5 /* Cisco FEC */
+#define LAGG_PROTO_MAX 6
+
+struct lagg_protos {
+ const char *lpr_name;
+ int lpr_proto;
+};
+
+#define LAGG_PROTO_DEFAULT LAGG_PROTO_FAILOVER
+#define LAGG_PROTOS { \
+ { "failover", LAGG_PROTO_FAILOVER }, \
+ { "fec", LAGG_PROTO_ETHERCHANNEL }, \
+ { "lacp", LAGG_PROTO_LACP }, \
+ { "loadbalance", LAGG_PROTO_LOADBALANCE }, \
+ { "roundrobin", LAGG_PROTO_ROUNDROBIN }, \
+ { "none", LAGG_PROTO_NONE }, \
+ { "default", LAGG_PROTO_DEFAULT } \
+}
+
+/*
+ * lagg ioctls.
+ */
+
+/*
+ * LACP current operational parameters structure.
+ */
+struct lacp_opreq {
+ uint16_t actor_prio;
+ uint8_t actor_mac[ETHER_ADDR_LEN];
+ uint16_t actor_key;
+ uint16_t actor_portprio;
+ uint16_t actor_portno;
+ uint8_t actor_state;
+ uint16_t partner_prio;
+ uint8_t partner_mac[ETHER_ADDR_LEN];
+ uint16_t partner_key;
+ uint16_t partner_portprio;
+ uint16_t partner_portno;
+ uint8_t partner_state;
+};
+
+/* lagg port settings */
+struct lagg_reqport {
+ char rp_ifname[IFNAMSIZ]; /* name of the lagg */
+ char rp_portname[IFNAMSIZ]; /* name of the port */
+ u_int32_t rp_prio; /* port priority */
+ u_int32_t rp_flags; /* port flags */
+ union {
+ struct lacp_opreq rpsc_lacp;
+ } rp_psc;
+#define rp_lacpreq rp_psc.rpsc_lacp
+};
+
+#define SIOCGLAGGPORT _IOWR('i', 140, struct lagg_reqport)
+#define SIOCSLAGGPORT _IOW('i', 141, struct lagg_reqport)
+#define SIOCSLAGGDELPORT _IOW('i', 142, struct lagg_reqport)
+
+/* lagg, ports and options */
+struct lagg_reqall {
+ char ra_ifname[IFNAMSIZ]; /* name of the lagg */
+ u_int ra_proto; /* lagg protocol */
+
+ size_t ra_size; /* size of buffer */
+ struct lagg_reqport *ra_port; /* allocated buffer */
+ int ra_ports; /* total port count */
+ union {
+ struct lacp_opreq rpsc_lacp;
+ } ra_psc;
+#define ra_lacpreq ra_psc.rpsc_lacp
+};
+
+#define SIOCGLAGG _IOWR('i', 143, struct lagg_reqall)
+#define SIOCSLAGG _IOW('i', 144, struct lagg_reqall)
+
+#ifdef _KERNEL
+/*
+ * Internal kernel part
+ */
+
+#define lp_ifname lp_ifp->if_xname /* interface name */
+#define lp_link_state lp_ifp->if_link_state /* link state */
+
+#define LAGG_PORTACTIVE(_tp) ( \
+ ((_tp)->lp_link_state == LINK_STATE_UP) && \
+ ((_tp)->lp_ifp->if_flags & IFF_UP) \
+)
+
+struct lagg_ifreq {
+ union {
+ struct ifreq ifreq;
+ struct {
+ char ifr_name[IFNAMSIZ];
+ struct sockaddr_storage ifr_ss;
+ } ifreq_storage;
+ } ifreq;
+};
+
+#define sc_ifflags sc_ifp->if_flags /* flags */
+#define sc_ifname sc_ifp->if_xname /* name */
+#define sc_capabilities sc_ifp->if_capabilities /* capabilities */
+
+#define IFCAP_LAGG_MASK 0xffff0000 /* private capabilities */
+#define IFCAP_LAGG_FULLDUPLEX 0x00010000 /* full duplex with >1 ports */
+
+/* Private data used by the loadbalancing protocol */
+struct lagg_lb {
+ u_int32_t lb_key;
+ struct lagg_port *lb_ports[LAGG_MAX_PORTS];
+};
+
+struct lagg_mc {
+ struct ifmultiaddr *mc_ifma;
+ SLIST_ENTRY(lagg_mc) mc_entries;
+};
+
+/* List of interfaces to have the MAC address modified */
+struct lagg_llq {
+ struct ifnet *llq_ifp;
+ uint8_t llq_lladdr[ETHER_ADDR_LEN];
+ SLIST_ENTRY(lagg_llq) llq_entries;
+};
+
+struct lagg_softc {
+ struct ifnet *sc_ifp; /* virtual interface */
+ struct rwlock sc_mtx;
+ int sc_proto; /* lagg protocol */
+ u_int sc_count; /* number of ports */
+ struct lagg_port *sc_primary; /* primary port */
+ struct ifmedia sc_media; /* media config */
+ caddr_t sc_psc; /* protocol data */
+ uint32_t sc_seq; /* sequence counter */
+
+ SLIST_HEAD(__tplhd, lagg_port) sc_ports; /* list of interfaces */
+ SLIST_ENTRY(lagg_softc) sc_entries;
+
+ struct task sc_lladdr_task;
+ SLIST_HEAD(__llqhd, lagg_llq) sc_llq_head; /* interfaces to program
+ the lladdr on */
+
+ /* lagg protocol callbacks */
+ int (*sc_detach)(struct lagg_softc *);
+ int (*sc_start)(struct lagg_softc *, struct mbuf *);
+ struct mbuf *(*sc_input)(struct lagg_softc *, struct lagg_port *,
+ struct mbuf *);
+ int (*sc_port_create)(struct lagg_port *);
+ void (*sc_port_destroy)(struct lagg_port *);
+ void (*sc_linkstate)(struct lagg_port *);
+ void (*sc_init)(struct lagg_softc *);
+ void (*sc_stop)(struct lagg_softc *);
+ void (*sc_lladdr)(struct lagg_softc *);
+ void (*sc_req)(struct lagg_softc *, caddr_t);
+ void (*sc_portreq)(struct lagg_port *, caddr_t);
+#if __FreeBSD_version >= 800000
+ eventhandler_tag vlan_attach;
+ eventhandler_tag vlan_detach;
+#endif
+};
+
+struct lagg_port {
+ struct ifnet *lp_ifp; /* physical interface */
+ struct lagg_softc *lp_softc; /* parent lagg */
+ uint8_t lp_lladdr[ETHER_ADDR_LEN];
+
+ u_char lp_iftype; /* interface type */
+ uint32_t lp_prio; /* port priority */
+ uint32_t lp_flags; /* port flags */
+ int lp_ifflags; /* saved ifp flags */
+ void *lh_cookie; /* if state hook */
+ caddr_t lp_psc; /* protocol data */
+ int lp_detaching; /* ifnet is detaching */
+
+ SLIST_HEAD(__mclhd, lagg_mc) lp_mc_head; /* multicast addresses */
+
+ /* Redirected callbacks */
+ int (*lp_ioctl)(struct ifnet *, u_long, caddr_t);
+ int (*lp_output)(struct ifnet *, struct mbuf *, struct sockaddr *,
+ struct route *);
+
+ SLIST_ENTRY(lagg_port) lp_entries;
+};
+
+#define LAGG_LOCK_INIT(_sc) rw_init(&(_sc)->sc_mtx, "if_lagg rwlock")
+#define LAGG_LOCK_DESTROY(_sc) rw_destroy(&(_sc)->sc_mtx)
+#define LAGG_RLOCK(_sc) rw_rlock(&(_sc)->sc_mtx)
+#define LAGG_WLOCK(_sc) rw_wlock(&(_sc)->sc_mtx)
+#define LAGG_RUNLOCK(_sc) rw_runlock(&(_sc)->sc_mtx)
+#define LAGG_WUNLOCK(_sc) rw_wunlock(&(_sc)->sc_mtx)
+#define LAGG_RLOCK_ASSERT(_sc) rw_assert(&(_sc)->sc_mtx, RA_RLOCKED)
+#define LAGG_WLOCK_ASSERT(_sc) rw_assert(&(_sc)->sc_mtx, RA_WLOCKED)
+
+extern struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *);
+extern void (*lagg_linkstate_p)(struct ifnet *, int );
+
+int lagg_enqueue(struct ifnet *, struct mbuf *);
+uint32_t lagg_hashmbuf(struct mbuf *, uint32_t);
+
+#endif /* _KERNEL */
+
+#endif /* _NET_LAGG_H */
diff --git a/freebsd/sys/net/if_llatbl.c b/freebsd/sys/net/if_llatbl.c
new file mode 100644
index 00000000..b9f78a71
--- /dev/null
+++ b/freebsd/sys/net/if_llatbl.c
@@ -0,0 +1,528 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*
+ * Copyright (c) 2004 Luigi Rizzo, Alessandro Cerri. All rights reserved.
+ * Copyright (c) 2004-2008 Qing Li. All rights reserved.
+ * Copyright (c) 2008 Kip Macy. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_ddb.h>
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/syslog.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/mutex.h>
+#include <freebsd/sys/rwlock.h>
+
+#ifdef DDB
+#include <freebsd/ddb/ddb.h>
+#endif
+
+#include <freebsd/vm/uma.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/net/if_llatbl.h>
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_dl.h>
+#include <freebsd/net/if_var.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+#include <freebsd/netinet/if_ether.h>
+#include <freebsd/netinet6/in6_var.h>
+#include <freebsd/netinet6/nd6.h>
+
+MALLOC_DEFINE(M_LLTABLE, "lltable", "link level address tables");
+
+static VNET_DEFINE(SLIST_HEAD(, lltable), lltables);
+#define V_lltables VNET(lltables)
+
+extern void arprequest(struct ifnet *, struct in_addr *, struct in_addr *,
+ u_char *);
+
+static void vnet_lltable_init(void);
+
+struct rwlock lltable_rwlock;
+RW_SYSINIT(lltable_rwlock, &lltable_rwlock, "lltable_rwlock");
+
+/*
+ * Dump arp state for a specific address family.
+ */
+int
+lltable_sysctl_dumparp(int af, struct sysctl_req *wr)
+{
+ struct lltable *llt;
+ int error = 0;
+
+ LLTABLE_RLOCK();
+ SLIST_FOREACH(llt, &V_lltables, llt_link) {
+ if (llt->llt_af == af) {
+ error = llt->llt_dump(llt, wr);
+ if (error != 0)
+ goto done;
+ }
+ }
+done:
+ LLTABLE_RUNLOCK();
+ return (error);
+}
+
+/*
+ * Deletes an address from the address table.
+ * This function is called by the timer functions
+ * such as arptimer() and nd6_llinfo_timer(), and
+ * the caller does the locking.
+ */
+void
+llentry_free(struct llentry *lle)
+{
+
+ LLE_WLOCK_ASSERT(lle);
+ LIST_REMOVE(lle, lle_next);
+
+ if (lle->la_hold != NULL)
+ m_freem(lle->la_hold);
+
+ LLE_FREE_LOCKED(lle);
+}
+
+/*
+ * Update an llentry for address dst (equivalent to rtalloc for new-arp)
+ * Caller must pass in a valid struct llentry * (or NULL)
+ *
+ * if found the llentry * is returned referenced and unlocked
+ */
+int
+llentry_update(struct llentry **llep, struct lltable *lt,
+ struct sockaddr_storage *dst, struct ifnet *ifp)
+{
+ struct llentry *la;
+
+ IF_AFDATA_RLOCK(ifp);
+ la = lla_lookup(lt, LLE_EXCLUSIVE,
+ (struct sockaddr *)dst);
+ IF_AFDATA_RUNLOCK(ifp);
+ if ((la == NULL) &&
+ (ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0) {
+ IF_AFDATA_WLOCK(ifp);
+ la = lla_lookup(lt,
+ (LLE_CREATE | LLE_EXCLUSIVE),
+ (struct sockaddr *)dst);
+ IF_AFDATA_WUNLOCK(ifp);
+ }
+ if (la != NULL && (*llep != la)) {
+ if (*llep != NULL)
+ LLE_FREE(*llep);
+ LLE_ADDREF(la);
+ LLE_WUNLOCK(la);
+ *llep = la;
+ } else if (la != NULL)
+ LLE_WUNLOCK(la);
+
+ if (la == NULL)
+ return (ENOENT);
+
+ return (0);
+}
+
+/*
+ * Free all entries from given table and free itself.
+ */
+void
+lltable_free(struct lltable *llt)
+{
+ struct llentry *lle, *next;
+ int i;
+
+ KASSERT(llt != NULL, ("%s: llt is NULL", __func__));
+
+ LLTABLE_WLOCK();
+ SLIST_REMOVE(&V_lltables, llt, lltable, llt_link);
+ LLTABLE_WUNLOCK();
+
+ for (i=0; i < LLTBL_HASHTBL_SIZE; i++) {
+ LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
+ int canceled;
+
+ canceled = callout_drain(&lle->la_timer);
+ LLE_WLOCK(lle);
+ if (canceled)
+ LLE_REMREF(lle);
+ llentry_free(lle);
+ }
+ }
+
+ free(llt, M_LLTABLE);
+}
+
+#if 0
+void
+lltable_drain(int af)
+{
+ struct lltable *llt;
+ struct llentry *lle;
+ register int i;
+
+ LLTABLE_RLOCK();
+ SLIST_FOREACH(llt, &V_lltables, llt_link) {
+ if (llt->llt_af != af)
+ continue;
+
+ for (i=0; i < LLTBL_HASHTBL_SIZE; i++) {
+ LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
+ LLE_WLOCK(lle);
+ if (lle->la_hold) {
+ m_freem(lle->la_hold);
+ lle->la_hold = NULL;
+ }
+ LLE_WUNLOCK(lle);
+ }
+ }
+ }
+ LLTABLE_RUNLOCK();
+}
+#endif
+
+void
+lltable_prefix_free(int af, struct sockaddr *prefix, struct sockaddr *mask)
+{
+ struct lltable *llt;
+
+ LLTABLE_RLOCK();
+ SLIST_FOREACH(llt, &V_lltables, llt_link) {
+ if (llt->llt_af != af)
+ continue;
+
+ llt->llt_prefix_free(llt, prefix, mask);
+ }
+ LLTABLE_RUNLOCK();
+}
+
+
+
+/*
+ * Create a new lltable.
+ */
+struct lltable *
+lltable_init(struct ifnet *ifp, int af)
+{
+ struct lltable *llt;
+ register int i;
+
+ llt = malloc(sizeof(struct lltable), M_LLTABLE, M_WAITOK);
+
+ llt->llt_af = af;
+ llt->llt_ifp = ifp;
+ for (i = 0; i < LLTBL_HASHTBL_SIZE; i++)
+ LIST_INIT(&llt->lle_head[i]);
+
+ LLTABLE_WLOCK();
+ SLIST_INSERT_HEAD(&V_lltables, llt, llt_link);
+ LLTABLE_WUNLOCK();
+
+ return (llt);
+}
+
+/*
+ * Called in route_output when adding/deleting a route to an interface.
+ */
+int
+lla_rt_output(struct rt_msghdr *rtm, struct rt_addrinfo *info)
+{
+ struct sockaddr_dl *dl =
+ (struct sockaddr_dl *)info->rti_info[RTAX_GATEWAY];
+ struct sockaddr *dst = (struct sockaddr *)info->rti_info[RTAX_DST];
+ struct ifnet *ifp;
+ struct lltable *llt;
+ struct llentry *lle;
+ u_int laflags = 0, flags = 0;
+ int error = 0;
+
+ if (dl == NULL || dl->sdl_family != AF_LINK) {
+ log(LOG_INFO, "%s: invalid dl\n", __func__);
+ return EINVAL;
+ }
+ ifp = ifnet_byindex(dl->sdl_index);
+ if (ifp == NULL) {
+ log(LOG_INFO, "%s: invalid ifp (sdl_index %d)\n",
+ __func__, dl->sdl_index);
+ return EINVAL;
+ }
+
+ switch (rtm->rtm_type) {
+ case RTM_ADD:
+ if (rtm->rtm_flags & RTF_ANNOUNCE) {
+ flags |= LLE_PUB;
+#ifdef INET
+ if (dst->sa_family == AF_INET &&
+ ((struct sockaddr_inarp *)dst)->sin_other != 0) {
+ struct rtentry *rt;
+ ((struct sockaddr_inarp *)dst)->sin_other = 0;
+ rt = rtalloc1(dst, 0, 0);
+ if (rt == NULL || !(rt->rt_flags & RTF_HOST)) {
+ log(LOG_INFO, "%s: RTM_ADD publish "
+ "(proxy only) is invalid\n",
+ __func__);
+ if (rt)
+ RTFREE_LOCKED(rt);
+ return EINVAL;
+ }
+ RTFREE_LOCKED(rt);
+
+ flags |= LLE_PROXY;
+ }
+#endif
+ }
+ flags |= LLE_CREATE;
+ break;
+
+ case RTM_DELETE:
+ flags |= LLE_DELETE;
+ break;
+
+ case RTM_CHANGE:
+ break;
+
+ default:
+ return EINVAL; /* XXX not implemented yet */
+ }
+
+ /* XXX linked list may be too expensive */
+ LLTABLE_RLOCK();
+ SLIST_FOREACH(llt, &V_lltables, llt_link) {
+ if (llt->llt_af == dst->sa_family &&
+ llt->llt_ifp == ifp)
+ break;
+ }
+ LLTABLE_RUNLOCK();
+ KASSERT(llt != NULL, ("Yep, ugly hacks are bad\n"));
+
+ if (flags & LLE_CREATE)
+ flags |= LLE_EXCLUSIVE;
+
+ IF_AFDATA_LOCK(ifp);
+ lle = lla_lookup(llt, flags, dst);
+ IF_AFDATA_UNLOCK(ifp);
+ if (LLE_IS_VALID(lle)) {
+ if (flags & LLE_CREATE) {
+ /*
+ * If we delay the delete, then a subsequent
+ * "arp add" should look up this entry, reset the
+ * LLE_DELETED flag, and reset the expiration timer
+ */
+ bcopy(LLADDR(dl), &lle->ll_addr, ifp->if_addrlen);
+ lle->la_flags |= (flags & (LLE_PUB | LLE_PROXY));
+ lle->la_flags |= LLE_VALID;
+ lle->la_flags &= ~LLE_DELETED;
+#ifdef INET6
+ /*
+ * ND6
+ */
+ if (dst->sa_family == AF_INET6)
+ lle->ln_state = ND6_LLINFO_REACHABLE;
+#endif
+ /*
+ * NB: arp and ndp always set (RTF_STATIC | RTF_HOST)
+ */
+
+ if (rtm->rtm_rmx.rmx_expire == 0) {
+ lle->la_flags |= LLE_STATIC;
+ lle->la_expire = 0;
+ } else
+ lle->la_expire = rtm->rtm_rmx.rmx_expire;
+ laflags = lle->la_flags;
+ LLE_WUNLOCK(lle);
+#ifdef INET
+ /* gratuitous ARP */
+ if ((laflags & LLE_PUB) && dst->sa_family == AF_INET) {
+ arprequest(ifp,
+ &((struct sockaddr_in *)dst)->sin_addr,
+ &((struct sockaddr_in *)dst)->sin_addr,
+ ((laflags & LLE_PROXY) ?
+ (u_char *)IF_LLADDR(ifp) :
+ (u_char *)LLADDR(dl)));
+ }
+#endif
+ } else {
+ if (flags & LLE_EXCLUSIVE)
+ LLE_WUNLOCK(lle);
+ else
+ LLE_RUNLOCK(lle);
+ }
+ } else if ((lle == NULL) && (flags & LLE_DELETE))
+ error = EINVAL;
+
+
+ return (error);
+}
+
+static void
+vnet_lltable_init()
+{
+
+ SLIST_INIT(&V_lltables);
+}
+VNET_SYSINIT(vnet_lltable_init, SI_SUB_PSEUDO, SI_ORDER_FIRST,
+ vnet_lltable_init, NULL);
+
+#ifdef DDB
+struct llentry_sa {
+ struct llentry base;
+ struct sockaddr l3_addr;
+};
+
+static void
+llatbl_lle_show(struct llentry_sa *la)
+{
+ struct llentry *lle;
+ uint8_t octet[6];
+
+ lle = &la->base;
+ db_printf("lle=%p\n", lle);
+ db_printf(" lle_next=%p\n", lle->lle_next.le_next);
+ db_printf(" lle_lock=%p\n", &lle->lle_lock);
+ db_printf(" lle_tbl=%p\n", lle->lle_tbl);
+ db_printf(" lle_head=%p\n", lle->lle_head);
+ db_printf(" la_hold=%p\n", lle->la_hold);
+ db_printf(" la_expire=%ju\n", (uintmax_t)lle->la_expire);
+ db_printf(" la_flags=0x%04x\n", lle->la_flags);
+ db_printf(" la_asked=%u\n", lle->la_asked);
+ db_printf(" la_preempt=%u\n", lle->la_preempt);
+ db_printf(" ln_byhint=%u\n", lle->ln_byhint);
+ db_printf(" ln_state=%d\n", lle->ln_state);
+ db_printf(" ln_router=%u\n", lle->ln_router);
+ db_printf(" ln_ntick=%ju\n", (uintmax_t)lle->ln_ntick);
+ db_printf(" lle_refcnt=%d\n", lle->lle_refcnt);
+ bcopy(&lle->ll_addr.mac16, octet, sizeof(octet));
+ db_printf(" ll_addr=%02x:%02x:%02x:%02x:%02x:%02x\n",
+ octet[0], octet[1], octet[2], octet[3], octet[4], octet[5]);
+ db_printf(" la_timer=%p\n", &lle->la_timer);
+
+ switch (la->l3_addr.sa_family) {
+#ifdef INET
+ case AF_INET:
+ {
+ struct sockaddr_in *sin;
+ char l3s[INET_ADDRSTRLEN];
+
+ sin = (struct sockaddr_in *)&la->l3_addr;
+ inet_ntoa_r(sin->sin_addr, l3s);
+ db_printf(" l3_addr=%s\n", l3s);
+ break;
+ }
+#endif
+#ifdef INET6
+ case AF_INET6:
+ {
+ struct sockaddr_in6 *sin6;
+ char l3s[INET6_ADDRSTRLEN];
+
+ sin6 = (struct sockaddr_in6 *)&la->l3_addr;
+ ip6_sprintf(l3s, &sin6->sin6_addr);
+ db_printf(" l3_addr=%s\n", l3s);
+ break;
+ }
+#endif
+ default:
+ db_printf(" l3_addr=N/A (af=%d)\n", la->l3_addr.sa_family);
+ break;
+ }
+}
+
+DB_SHOW_COMMAND(llentry, db_show_llentry)
+{
+
+ if (!have_addr) {
+ db_printf("usage: show llentry <struct llentry *>\n");
+ return;
+ }
+
+ llatbl_lle_show((struct llentry_sa *)addr);
+}
+
+static void
+llatbl_llt_show(struct lltable *llt)
+{
+ int i;
+ struct llentry *lle;
+
+ db_printf("llt=%p llt_af=%d llt_ifp=%p\n",
+ llt, llt->llt_af, llt->llt_ifp);
+
+ for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
+ LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
+
+ llatbl_lle_show((struct llentry_sa *)lle);
+ if (db_pager_quit)
+ return;
+ }
+ }
+}
+
+DB_SHOW_COMMAND(lltable, db_show_lltable)
+{
+
+ if (!have_addr) {
+ db_printf("usage: show lltable <struct lltable *>\n");
+ return;
+ }
+
+ llatbl_llt_show((struct lltable *)addr);
+}
+
+DB_SHOW_ALL_COMMAND(lltables, db_show_all_lltables)
+{
+ VNET_ITERATOR_DECL(vnet_iter);
+ struct lltable *llt;
+
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET_QUIET(vnet_iter);
+#ifdef VIMAGE
+ db_printf("vnet=%p\n", curvnet);
+#endif
+ SLIST_FOREACH(llt, &V_lltables, llt_link) {
+ db_printf("llt=%p llt_af=%d llt_ifp=%p(%s)\n",
+ llt, llt->llt_af, llt->llt_ifp,
+ (llt->llt_ifp != NULL) ?
+ llt->llt_ifp->if_xname : "?");
+ if (have_addr && addr != 0) /* verbose */
+ llatbl_llt_show(llt);
+ if (db_pager_quit) {
+ CURVNET_RESTORE();
+ return;
+ }
+ }
+ CURVNET_RESTORE();
+ }
+}
+#endif
diff --git a/freebsd/sys/net/if_llatbl.h b/freebsd/sys/net/if_llatbl.h
new file mode 100644
index 00000000..9e12362b
--- /dev/null
+++ b/freebsd/sys/net/if_llatbl.h
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2004 Luigi Rizzo, Alessandro Cerri. All rights reserved.
+ * Copyright (c) 2004-2008 Qing Li. All rights reserved.
+ * Copyright (c) 2008 Kip Macy. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifndef _NET_IF_LLATBL_HH_
+#define _NET_IF_LLATBL_HH_
+
+#include <freebsd/sys/_rwlock.h>
+#include <freebsd/netinet/in.h>
+
+struct ifnet;
+struct sysctl_req;
+struct rt_msghdr;
+struct rt_addrinfo;
+
+struct llentry;
+LIST_HEAD(llentries, llentry);
+
+extern struct rwlock lltable_rwlock;
+#define LLTABLE_RLOCK() rw_rlock(&lltable_rwlock)
+#define LLTABLE_RUNLOCK() rw_runlock(&lltable_rwlock)
+#define LLTABLE_WLOCK() rw_wlock(&lltable_rwlock)
+#define LLTABLE_WUNLOCK() rw_wunlock(&lltable_rwlock)
+#define LLTABLE_LOCK_ASSERT() rw_assert(&lltable_rwlock, RA_LOCKED)
+
+/*
+ * Code referencing llentry must at least hold
+ * a shared lock
+ */
+struct llentry {
+ LIST_ENTRY(llentry) lle_next;
+ struct rwlock lle_lock;
+ struct lltable *lle_tbl;
+ struct llentries *lle_head;
+ struct mbuf *la_hold;
+ time_t la_expire;
+ uint16_t la_flags;
+ uint16_t la_asked;
+ uint16_t la_preempt;
+ uint16_t ln_byhint;
+ int16_t ln_state; /* IPv6 has ND6_LLINFO_NOSTATE == -2 */
+ uint16_t ln_router;
+ time_t ln_ntick;
+ int lle_refcnt;
+
+ union {
+ uint64_t mac_aligned;
+ uint16_t mac16[3];
+ } ll_addr;
+
+ /* XXX af-private? */
+ union {
+ struct callout ln_timer_ch;
+ struct callout la_timer;
+ } lle_timer;
+ /* NB: struct sockaddr must immediately follow */
+};
+
+#define LLE_WLOCK(lle) rw_wlock(&(lle)->lle_lock)
+#define LLE_RLOCK(lle) rw_rlock(&(lle)->lle_lock)
+#define LLE_WUNLOCK(lle) rw_wunlock(&(lle)->lle_lock)
+#define LLE_RUNLOCK(lle) rw_runlock(&(lle)->lle_lock)
+#define LLE_DOWNGRADE(lle) rw_downgrade(&(lle)->lle_lock)
+#define LLE_TRY_UPGRADE(lle) rw_try_upgrade(&(lle)->lle_lock)
+#define LLE_LOCK_INIT(lle) rw_init_flags(&(lle)->lle_lock, "lle", RW_DUPOK)
+#define LLE_LOCK_DESTROY(lle) rw_destroy(&(lle)->lle_lock)
+#define LLE_WLOCK_ASSERT(lle) rw_assert(&(lle)->lle_lock, RA_WLOCKED)
+
+#define LLE_IS_VALID(lle) (((lle) != NULL) && ((lle) != (void *)-1))
+
+#define LLE_ADDREF(lle) do { \
+ LLE_WLOCK_ASSERT(lle); \
+ KASSERT((lle)->lle_refcnt >= 0, \
+ ("negative refcnt %d", (lle)->lle_refcnt)); \
+ (lle)->lle_refcnt++; \
+} while (0)
+
+#define LLE_REMREF(lle) do { \
+ LLE_WLOCK_ASSERT(lle); \
+ KASSERT((lle)->lle_refcnt > 1, \
+ ("bogus refcnt %d", (lle)->lle_refcnt)); \
+ (lle)->lle_refcnt--; \
+} while (0)
+
+#define LLE_FREE_LOCKED(lle) do { \
+ if ((lle)->lle_refcnt <= 1) \
+ (lle)->lle_tbl->llt_free((lle)->lle_tbl, (lle));\
+ else { \
+ (lle)->lle_refcnt--; \
+ LLE_WUNLOCK(lle); \
+ } \
+ /* guard against invalid refs */ \
+ lle = 0; \
+} while (0)
+
+#define LLE_FREE(lle) do { \
+ LLE_WLOCK(lle); \
+ if ((lle)->lle_refcnt <= 1) \
+ (lle)->lle_tbl->llt_free((lle)->lle_tbl, (lle));\
+ else { \
+ (lle)->lle_refcnt--; \
+ LLE_WUNLOCK(lle); \
+ } \
+ /* guard against invalid refs */ \
+ lle = NULL; \
+} while (0)
+
+
+#define ln_timer_ch lle_timer.ln_timer_ch
+#define la_timer lle_timer.la_timer
+
+/* XXX bad name */
+#define L3_ADDR(lle) ((struct sockaddr *)(&lle[1]))
+#define L3_ADDR_LEN(lle) (((struct sockaddr *)(&lle[1]))->sa_len)
+
+#ifndef LLTBL_HASHTBL_SIZE
+#define LLTBL_HASHTBL_SIZE 32 /* default 32 ? */
+#endif
+
+#ifndef LLTBL_HASHMASK
+#define LLTBL_HASHMASK (LLTBL_HASHTBL_SIZE - 1)
+#endif
+
+struct lltable {
+ SLIST_ENTRY(lltable) llt_link;
+ struct llentries lle_head[LLTBL_HASHTBL_SIZE];
+ int llt_af;
+ struct ifnet *llt_ifp;
+
+ struct llentry * (*llt_new)(const struct sockaddr *, u_int);
+ void (*llt_free)(struct lltable *, struct llentry *);
+ void (*llt_prefix_free)(struct lltable *,
+ const struct sockaddr *prefix,
+ const struct sockaddr *mask);
+ struct llentry * (*llt_lookup)(struct lltable *, u_int flags,
+ const struct sockaddr *l3addr);
+ int (*llt_rtcheck)(struct ifnet *, u_int flags,
+ const struct sockaddr *);
+ int (*llt_dump)(struct lltable *,
+ struct sysctl_req *);
+};
+MALLOC_DECLARE(M_LLTABLE);
+
+/*
+ * flags to be passed to arplookup.
+ */
+#define LLE_DELETED 0x0001 /* entry must be deleted */
+#define LLE_STATIC 0x0002 /* entry is static */
+#define LLE_IFADDR 0x0004 /* entry is interface addr */
+#define LLE_VALID 0x0008 /* ll_addr is valid */
+#define LLE_PROXY 0x0010 /* proxy entry ??? */
+#define LLE_PUB 0x0020 /* publish entry ??? */
+#define LLE_DELETE 0x4000 /* delete on a lookup - match LLE_IFADDR */
+#define LLE_CREATE 0x8000 /* create on a lookup miss */
+#define LLE_EXCLUSIVE 0x2000 /* return lle xlocked */
+
+#define LLATBL_HASH(key, mask) \
+ (((((((key >> 8) ^ key) >> 8) ^ key) >> 8) ^ key) & mask)
+
+struct lltable *lltable_init(struct ifnet *, int);
+void lltable_free(struct lltable *);
+void lltable_prefix_free(int, struct sockaddr *,
+ struct sockaddr *);
+#if 0
+void lltable_drain(int);
+#endif
+int lltable_sysctl_dumparp(int, struct sysctl_req *);
+
+void llentry_free(struct llentry *);
+int llentry_update(struct llentry **, struct lltable *,
+ struct sockaddr_storage *, struct ifnet *);
+
+/*
+ * Generic link layer address lookup function.
+ */
+static __inline struct llentry *
+lla_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr)
+{
+ return llt->llt_lookup(llt, flags, l3addr);
+}
+
+int lla_rt_output(struct rt_msghdr *, struct rt_addrinfo *);
+#endif /* _NET_IF_LLATBL_HH_ */
diff --git a/freebsd/sys/net/if_llc.h b/freebsd/sys/net/if_llc.h
new file mode 100644
index 00000000..b72f21bc
--- /dev/null
+++ b/freebsd/sys/net/if_llc.h
@@ -0,0 +1,161 @@
+/* $NetBSD: if_llc.h,v 1.12 1999/11/19 20:41:19 thorpej Exp $ */
+
+/*-
+ * Copyright (c) 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)if_llc.h 8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NET_IF_LLC_HH_
+#define _NET_IF_LLC_HH_
+
+/*
+ * IEEE 802.2 Link Level Control headers, for use in conjunction with
+ * 802.{3,4,5} media access control methods.
+ *
+ * Headers here do not use bit fields due to shortcommings in many
+ * compilers.
+ */
+
+struct llc {
+ u_int8_t llc_dsap;
+ u_int8_t llc_ssap;
+ union {
+ struct {
+ u_int8_t control;
+ u_int8_t format_id;
+ u_int8_t class;
+ u_int8_t window_x2;
+ } __packed type_u;
+ struct {
+ u_int8_t num_snd_x2;
+ u_int8_t num_rcv_x2;
+ } __packed type_i;
+ struct {
+ u_int8_t control;
+ u_int8_t num_rcv_x2;
+ } __packed type_s;
+ struct {
+ u_int8_t control;
+ /*
+ * We cannot put the following fields in a structure because
+ * the structure rounding might cause padding.
+ */
+ u_int8_t frmr_rej_pdu0;
+ u_int8_t frmr_rej_pdu1;
+ u_int8_t frmr_control;
+ u_int8_t frmr_control_ext;
+ u_int8_t frmr_cause;
+ } __packed type_frmr;
+ struct {
+ u_int8_t control;
+ u_int8_t org_code[3];
+ u_int16_t ether_type;
+ } __packed type_snap;
+ struct {
+ u_int8_t control;
+ u_int8_t control_ext;
+ } __packed type_raw;
+ } __packed llc_un;
+} __packed;
+
+struct frmrinfo {
+ u_int8_t frmr_rej_pdu0;
+ u_int8_t frmr_rej_pdu1;
+ u_int8_t frmr_control;
+ u_int8_t frmr_control_ext;
+ u_int8_t frmr_cause;
+} __packed;
+
+#define llc_control llc_un.type_u.control
+#define llc_control_ext llc_un.type_raw.control_ext
+#define llc_fid llc_un.type_u.format_id
+#define llc_class llc_un.type_u.class
+#define llc_window llc_un.type_u.window_x2
+#define llc_frmrinfo llc_un.type_frmr.frmr_rej_pdu0
+#define llc_frmr_pdu0 llc_un.type_frmr.frmr_rej_pdu0
+#define llc_frmr_pdu1 llc_un.type_frmr.frmr_rej_pdu1
+#define llc_frmr_control llc_un.type_frmr.frmr_control
+#define llc_frmr_control_ext llc_un.type_frmr.frmr_control_ext
+#define llc_frmr_cause llc_un.type_frmr.frmr_cause
+#define llc_snap llc_un.type_snap
+
+/*
+ * Don't use sizeof(struct llc_un) for LLC header sizes
+ */
+#define LLC_ISFRAMELEN 4
+#define LLC_UFRAMELEN 3
+#define LLC_FRMRLEN 7
+#define LLC_SNAPFRAMELEN 8
+
+#ifdef CTASSERT
+CTASSERT(sizeof (struct llc) == LLC_SNAPFRAMELEN);
+#endif
+
+/*
+ * Unnumbered LLC format commands
+ */
+#define LLC_UI 0x3
+#define LLC_UI_P 0x13
+#define LLC_DISC 0x43
+#define LLC_DISC_P 0x53
+#define LLC_UA 0x63
+#define LLC_UA_P 0x73
+#define LLC_TEST 0xe3
+#define LLC_TEST_P 0xf3
+#define LLC_FRMR 0x87
+#define LLC_FRMR_P 0x97
+#define LLC_DM 0x0f
+#define LLC_DM_P 0x1f
+#define LLC_XID 0xaf
+#define LLC_XID_P 0xbf
+#define LLC_SABME 0x6f
+#define LLC_SABME_P 0x7f
+
+/*
+ * Supervisory LLC commands
+ */
+#define LLC_RR 0x01
+#define LLC_RNR 0x05
+#define LLC_REJ 0x09
+
+/*
+ * Info format - dummy only
+ */
+#define LLC_INFO 0x00
+
+/*
+ * ISO PDTR 10178 contains among others
+ */
+#define LLC_8021D_LSAP 0x42
+#define LLC_X25_LSAP 0x7e
+#define LLC_SNAP_LSAP 0xaa
+#define LLC_ISO_LSAP 0xfe
+
+#endif /* _NET_IF_LLC_HH_ */
diff --git a/freebsd/sys/net/if_loop.c b/freebsd/sys/net/if_loop.c
new file mode 100644
index 00000000..d80bfdad
--- /dev/null
+++ b/freebsd/sys/net/if_loop.c
@@ -0,0 +1,451 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)if_loop.c 8.2 (Berkeley) 1/9/95
+ * $FreeBSD$
+ */
+
+/*
+ * Loopback interface driver for protocol testing and timing.
+ */
+
+#include <freebsd/local/opt_atalk.h>
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_ipx.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/machine/bus.h>
+#include <freebsd/sys/rman.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/sysctl.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_clone.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/netisr.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/bpf.h>
+#include <freebsd/net/vnet.h>
+
+#ifdef INET
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_var.h>
+#endif
+
+#ifdef IPX
+#include <freebsd/netipx/ipx.h>
+#include <freebsd/netipx/ipx_if.h>
+#endif
+
+#ifdef INET6
+#ifndef INET
+#include <freebsd/netinet/in.h>
+#endif
+#include <freebsd/netinet6/in6_var.h>
+#include <freebsd/netinet/ip6.h>
+#endif
+
+#ifdef NETATALK
+#include <freebsd/netatalk/at.h>
+#include <freebsd/netatalk/at_var.h>
+#endif
+
+#include <freebsd/security/mac/mac_framework.h>
+
+#ifdef TINY_LOMTU
+#define LOMTU (1024+512)
+#elif defined(LARGE_LOMTU)
+#define LOMTU 131072
+#else
+#define LOMTU 16384
+#endif
+
+#define LO_CSUM_FEATURES (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP)
+#define LO_CSUM_SET (CSUM_DATA_VALID | CSUM_PSEUDO_HDR | \
+ CSUM_IP_CHECKED | CSUM_IP_VALID | \
+ CSUM_SCTP_VALID)
+
+int loioctl(struct ifnet *, u_long, caddr_t);
+static void lortrequest(int, struct rtentry *, struct rt_addrinfo *);
+int looutput(struct ifnet *ifp, struct mbuf *m,
+ struct sockaddr *dst, struct route *ro);
+static int lo_clone_create(struct if_clone *, int, caddr_t);
+static void lo_clone_destroy(struct ifnet *);
+
+VNET_DEFINE(struct ifnet *, loif); /* Used externally */
+
+#ifdef VIMAGE
+static VNET_DEFINE(struct ifc_simple_data, lo_cloner_data);
+static VNET_DEFINE(struct if_clone, lo_cloner);
+#define V_lo_cloner_data VNET(lo_cloner_data)
+#define V_lo_cloner VNET(lo_cloner)
+#endif
+
+IFC_SIMPLE_DECLARE(lo, 1);
+
+static void
+lo_clone_destroy(struct ifnet *ifp)
+{
+
+#ifndef VIMAGE
+ /* XXX: destroying lo0 will lead to panics. */
+ KASSERT(V_loif != ifp, ("%s: destroying lo0", __func__));
+#endif
+
+ bpfdetach(ifp);
+ if_detach(ifp);
+ if_free(ifp);
+}
+
+static int
+lo_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+ struct ifnet *ifp;
+
+ ifp = if_alloc(IFT_LOOP);
+ if (ifp == NULL)
+ return (ENOSPC);
+
+ if_initname(ifp, ifc->ifc_name, unit);
+ ifp->if_mtu = LOMTU;
+ ifp->if_flags = IFF_LOOPBACK | IFF_MULTICAST;
+ ifp->if_ioctl = loioctl;
+ ifp->if_output = looutput;
+ ifp->if_snd.ifq_maxlen = ifqmaxlen;
+ ifp->if_capabilities = ifp->if_capenable = IFCAP_HWCSUM;
+ ifp->if_hwassist = LO_CSUM_FEATURES;
+ if_attach(ifp);
+ bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
+ if (V_loif == NULL)
+ V_loif = ifp;
+
+ return (0);
+}
+
+static void
+vnet_loif_init(const void *unused __unused)
+{
+
+#ifdef VIMAGE
+ V_lo_cloner = lo_cloner;
+ V_lo_cloner_data = lo_cloner_data;
+ V_lo_cloner.ifc_data = &V_lo_cloner_data;
+ if_clone_attach(&V_lo_cloner);
+#else
+ if_clone_attach(&lo_cloner);
+#endif
+}
+VNET_SYSINIT(vnet_loif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_loif_init, NULL);
+
+#ifdef VIMAGE
+static void
+vnet_loif_uninit(const void *unused __unused)
+{
+
+ if_clone_detach(&V_lo_cloner);
+ V_loif = NULL;
+}
+VNET_SYSUNINIT(vnet_loif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_loif_uninit, NULL);
+#endif
+
+static int
+loop_modevent(module_t mod, int type, void *data)
+{
+
+ switch (type) {
+ case MOD_LOAD:
+ break;
+
+ case MOD_UNLOAD:
+ printf("loop module unload - not possible for this module type\n");
+ return (EINVAL);
+
+ default:
+ return (EOPNOTSUPP);
+ }
+ return (0);
+}
+
+static moduledata_t loop_mod = {
+ "if_lo",
+ loop_modevent,
+ 0
+};
+
+DECLARE_MODULE(if_lo, loop_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
+
+int
+looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+ struct route *ro)
+{
+ u_int32_t af;
+ struct rtentry *rt = NULL;
+#ifdef MAC
+ int error;
+#endif
+
+ M_ASSERTPKTHDR(m); /* check if we have the packet header */
+
+ if (ro != NULL)
+ rt = ro->ro_rt;
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m);
+ if (error) {
+ m_freem(m);
+ return (error);
+ }
+#endif
+
+ if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
+ m_freem(m);
+ return (rt->rt_flags & RTF_BLACKHOLE ? 0 :
+ rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
+ }
+
+ ifp->if_opackets++;
+ ifp->if_obytes += m->m_pkthdr.len;
+
+ /* BPF writes need to be handled specially. */
+ if (dst->sa_family == AF_UNSPEC) {
+ bcopy(dst->sa_data, &af, sizeof(af));
+ dst->sa_family = af;
+ }
+
+#if 1 /* XXX */
+ switch (dst->sa_family) {
+ case AF_INET:
+ if (ifp->if_capenable & IFCAP_RXCSUM) {
+ m->m_pkthdr.csum_data = 0xffff;
+ m->m_pkthdr.csum_flags = LO_CSUM_SET;
+ }
+ m->m_pkthdr.csum_flags &= ~LO_CSUM_FEATURES;
+ case AF_INET6:
+ case AF_IPX:
+ case AF_APPLETALK:
+ break;
+ default:
+ printf("looutput: af=%d unexpected\n", dst->sa_family);
+ m_freem(m);
+ return (EAFNOSUPPORT);
+ }
+#endif
+ return (if_simloop(ifp, m, dst->sa_family, 0));
+}
+
+/*
+ * if_simloop()
+ *
+ * This function is to support software emulation of hardware loopback,
+ * i.e., for interfaces with the IFF_SIMPLEX attribute. Since they can't
+ * hear their own broadcasts, we create a copy of the packet that we
+ * would normally receive via a hardware loopback.
+ *
+ * This function expects the packet to include the media header of length hlen.
+ */
+int
+if_simloop(struct ifnet *ifp, struct mbuf *m, int af, int hlen)
+{
+ int isr;
+
+ M_ASSERTPKTHDR(m);
+ m_tag_delete_nonpersistent(m);
+ m->m_pkthdr.rcvif = ifp;
+
+#ifdef MAC
+ mac_ifnet_create_mbuf(ifp, m);
+#endif
+
+ /*
+ * Let BPF see incoming packet in the following manner:
+ * - Emulated packet loopback for a simplex interface
+ * (net/if_ethersubr.c)
+ * -> passes it to ifp's BPF
+ * - IPv4/v6 multicast packet loopback (netinet(6)/ip(6)_output.c)
+ * -> not passes it to any BPF
+ * - Normal packet loopback from myself to myself (net/if_loop.c)
+ * -> passes to lo0's BPF (even in case of IPv6, where ifp!=lo0)
+ */
+ if (hlen > 0) {
+ if (bpf_peers_present(ifp->if_bpf)) {
+ bpf_mtap(ifp->if_bpf, m);
+ }
+ } else {
+ if (bpf_peers_present(V_loif->if_bpf)) {
+ if ((m->m_flags & M_MCAST) == 0 || V_loif == ifp) {
+ /* XXX beware sizeof(af) != 4 */
+ u_int32_t af1 = af;
+
+ /*
+ * We need to prepend the address family.
+ */
+ bpf_mtap2(V_loif->if_bpf, &af1, sizeof(af1), m);
+ }
+ }
+ }
+
+ /* Strip away media header */
+ if (hlen > 0) {
+ m_adj(m, hlen);
+#ifndef __NO_STRICT_ALIGNMENT
+ /*
+ * Some archs do not like unaligned data, so
+ * we move data down in the first mbuf.
+ */
+ if (mtod(m, vm_offset_t) & 3) {
+ KASSERT(hlen >= 3, ("if_simloop: hlen too small"));
+ bcopy(m->m_data,
+ (char *)(mtod(m, vm_offset_t)
+ - (mtod(m, vm_offset_t) & 3)),
+ m->m_len);
+ m->m_data -= (mtod(m,vm_offset_t) & 3);
+ }
+#endif
+ }
+
+ /* Deliver to upper layer protocol */
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ isr = NETISR_IP;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ m->m_flags |= M_LOOP;
+ isr = NETISR_IPV6;
+ break;
+#endif
+#ifdef IPX
+ case AF_IPX:
+ isr = NETISR_IPX;
+ break;
+#endif
+#ifdef NETATALK
+ case AF_APPLETALK:
+ isr = NETISR_ATALK2;
+ break;
+#endif
+ default:
+ printf("if_simloop: can't handle af=%d\n", af);
+ m_freem(m);
+ return (EAFNOSUPPORT);
+ }
+ ifp->if_ipackets++;
+ ifp->if_ibytes += m->m_pkthdr.len;
+ netisr_queue(isr, m); /* mbuf is free'd on failure. */
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+lortrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
+{
+
+ RT_LOCK_ASSERT(rt);
+ rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
+}
+
+/*
+ * Process an ioctl request.
+ */
+/* ARGSUSED */
+int
+loioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ struct ifaddr *ifa;
+ struct ifreq *ifr = (struct ifreq *)data;
+ int error = 0, mask;
+
+ switch (cmd) {
+ case SIOCSIFADDR:
+ ifp->if_flags |= IFF_UP;
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ ifa = (struct ifaddr *)data;
+ ifa->ifa_rtrequest = lortrequest;
+ /*
+ * Everything else is done at a higher level.
+ */
+ break;
+
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ if (ifr == 0) {
+ error = EAFNOSUPPORT; /* XXX */
+ break;
+ }
+ switch (ifr->ifr_addr.sa_family) {
+
+#ifdef INET
+ case AF_INET:
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ break;
+#endif
+
+ default:
+ error = EAFNOSUPPORT;
+ break;
+ }
+ break;
+
+ case SIOCSIFMTU:
+ ifp->if_mtu = ifr->ifr_mtu;
+ break;
+
+ case SIOCSIFFLAGS:
+ break;
+
+ case SIOCSIFCAP:
+ mask = ifp->if_capenable ^ ifr->ifr_reqcap;
+ if ((mask & IFCAP_RXCSUM) != 0)
+ ifp->if_capenable ^= IFCAP_RXCSUM;
+ if ((mask & IFCAP_TXCSUM) != 0)
+ ifp->if_capenable ^= IFCAP_TXCSUM;
+ if (ifp->if_capenable & IFCAP_TXCSUM)
+ ifp->if_hwassist = LO_CSUM_FEATURES;
+ else
+ ifp->if_hwassist = 0;
+ break;
+
+ default:
+ error = EINVAL;
+ }
+ return (error);
+}
diff --git a/freebsd/sys/net/if_media.c b/freebsd/sys/net/if_media.c
new file mode 100644
index 00000000..3da5090a
--- /dev/null
+++ b/freebsd/sys/net/if_media.c
@@ -0,0 +1,566 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/* $NetBSD: if_media.c,v 1.1 1997/03/17 02:55:15 thorpej Exp $ */
+/* $FreeBSD$ */
+
+/*-
+ * Copyright (c) 1997
+ * Jonathan Stone and Jason R. Thorpe. All rights reserved.
+ *
+ * This software is derived from information provided by Matt Thomas.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Jonathan Stone
+ * and Jason R. Thorpe for the NetBSD Project.
+ * 4. The names of the authors may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * BSD/OS-compatible network interface media selection.
+ *
+ * Where it is safe to do so, this code strays slightly from the BSD/OS
+ * design. Software which uses the API (device drivers, basically)
+ * shouldn't notice any difference.
+ *
+ * Many thanks to Matt Thomas for providing the information necessary
+ * to implement this interface.
+ */
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/sysctl.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_media.h>
+
+/*
+ * Compile-time options:
+ * IFMEDIA_DEBUG:
+ * turn on implementation-level debug printfs.
+ * Useful for debugging newly-ported drivers.
+ */
+
+static struct ifmedia_entry *ifmedia_match(struct ifmedia *ifm,
+ int flags, int mask);
+
+#ifdef IFMEDIA_DEBUG
+int ifmedia_debug = 0;
+SYSCTL_INT(_debug, OID_AUTO, ifmedia, CTLFLAG_RW, &ifmedia_debug,
+ 0, "if_media debugging msgs");
+static void ifmedia_printword(int);
+#endif
+
+/*
+ * Initialize if_media struct for a specific interface instance.
+ */
+void
+ifmedia_init(ifm, dontcare_mask, change_callback, status_callback)
+ struct ifmedia *ifm;
+ int dontcare_mask;
+ ifm_change_cb_t change_callback;
+ ifm_stat_cb_t status_callback;
+{
+
+ LIST_INIT(&ifm->ifm_list);
+ ifm->ifm_cur = NULL;
+ ifm->ifm_media = 0;
+ ifm->ifm_mask = dontcare_mask; /* IF don't-care bits */
+ ifm->ifm_change = change_callback;
+ ifm->ifm_status = status_callback;
+}
+
+void
+ifmedia_removeall(ifm)
+ struct ifmedia *ifm;
+{
+ struct ifmedia_entry *entry;
+
+ for (entry = LIST_FIRST(&ifm->ifm_list); entry;
+ entry = LIST_FIRST(&ifm->ifm_list)) {
+ LIST_REMOVE(entry, ifm_list);
+ free(entry, M_IFADDR);
+ }
+}
+
+/*
+ * Add a media configuration to the list of supported media
+ * for a specific interface instance.
+ */
+void
+ifmedia_add(ifm, mword, data, aux)
+ struct ifmedia *ifm;
+ int mword;
+ int data;
+ void *aux;
+{
+ register struct ifmedia_entry *entry;
+
+#ifdef IFMEDIA_DEBUG
+ if (ifmedia_debug) {
+ if (ifm == NULL) {
+ printf("ifmedia_add: null ifm\n");
+ return;
+ }
+ printf("Adding entry for ");
+ ifmedia_printword(mword);
+ }
+#endif
+
+ entry = malloc(sizeof(*entry), M_IFADDR, M_NOWAIT);
+ if (entry == NULL)
+ panic("ifmedia_add: can't malloc entry");
+
+ entry->ifm_media = mword;
+ entry->ifm_data = data;
+ entry->ifm_aux = aux;
+
+ LIST_INSERT_HEAD(&ifm->ifm_list, entry, ifm_list);
+}
+
+/*
+ * Add an array of media configurations to the list of
+ * supported media for a specific interface instance.
+ */
+void
+ifmedia_list_add(ifm, lp, count)
+ struct ifmedia *ifm;
+ struct ifmedia_entry *lp;
+ int count;
+{
+ int i;
+
+ for (i = 0; i < count; i++)
+ ifmedia_add(ifm, lp[i].ifm_media, lp[i].ifm_data,
+ lp[i].ifm_aux);
+}
+
+/*
+ * Set the default active media.
+ *
+ * Called by device-specific code which is assumed to have already
+ * selected the default media in hardware. We do _not_ call the
+ * media-change callback.
+ */
+void
+ifmedia_set(ifm, target)
+ struct ifmedia *ifm;
+ int target;
+
+{
+ struct ifmedia_entry *match;
+
+ match = ifmedia_match(ifm, target, ifm->ifm_mask);
+
+ if (match == NULL) {
+ printf("ifmedia_set: no match for 0x%x/0x%x\n",
+ target, ~ifm->ifm_mask);
+ panic("ifmedia_set");
+ }
+ ifm->ifm_cur = match;
+
+#ifdef IFMEDIA_DEBUG
+ if (ifmedia_debug) {
+ printf("ifmedia_set: target ");
+ ifmedia_printword(target);
+ printf("ifmedia_set: setting to ");
+ ifmedia_printword(ifm->ifm_cur->ifm_media);
+ }
+#endif
+}
+
+/*
+ * Device-independent media ioctl support function.
+ */
+int
+ifmedia_ioctl(ifp, ifr, ifm, cmd)
+ struct ifnet *ifp;
+ struct ifreq *ifr;
+ struct ifmedia *ifm;
+ u_long cmd;
+{
+ struct ifmedia_entry *match;
+ struct ifmediareq *ifmr = (struct ifmediareq *) ifr;
+ int error = 0, sticky;
+
+ if (ifp == NULL || ifr == NULL || ifm == NULL)
+ return(EINVAL);
+
+ switch (cmd) {
+
+ /*
+ * Set the current media.
+ */
+ case SIOCSIFMEDIA:
+ {
+ struct ifmedia_entry *oldentry;
+ int oldmedia;
+ int newmedia = ifr->ifr_media;
+
+ match = ifmedia_match(ifm, newmedia, ifm->ifm_mask);
+ if (match == NULL) {
+#ifdef IFMEDIA_DEBUG
+ if (ifmedia_debug) {
+ printf(
+ "ifmedia_ioctl: no media found for 0x%x\n",
+ newmedia);
+ }
+#endif
+ return (ENXIO);
+ }
+
+ /*
+ * If no change, we're done.
+ * XXX Automedia may invole software intervention.
+ * Keep going in case the the connected media changed.
+ * Similarly, if best match changed (kernel debugger?).
+ */
+ if ((IFM_SUBTYPE(newmedia) != IFM_AUTO) &&
+ (newmedia == ifm->ifm_media) &&
+ (match == ifm->ifm_cur))
+ return 0;
+
+ /*
+ * We found a match, now make the driver switch to it.
+ * Make sure to preserve our old media type in case the
+ * driver can't switch.
+ */
+#ifdef IFMEDIA_DEBUG
+ if (ifmedia_debug) {
+ printf("ifmedia_ioctl: switching %s to ",
+ ifp->if_xname);
+ ifmedia_printword(match->ifm_media);
+ }
+#endif
+ oldentry = ifm->ifm_cur;
+ oldmedia = ifm->ifm_media;
+ ifm->ifm_cur = match;
+ ifm->ifm_media = newmedia;
+ error = (*ifm->ifm_change)(ifp);
+ if (error) {
+ ifm->ifm_cur = oldentry;
+ ifm->ifm_media = oldmedia;
+ }
+ break;
+ }
+
+ /*
+ * Get list of available media and current media on interface.
+ */
+ case SIOCGIFMEDIA:
+ {
+ struct ifmedia_entry *ep;
+ int *kptr, count;
+ int usermax; /* user requested max */
+
+ kptr = NULL; /* XXX gcc */
+
+ ifmr->ifm_active = ifmr->ifm_current = ifm->ifm_cur ?
+ ifm->ifm_cur->ifm_media : IFM_NONE;
+ ifmr->ifm_mask = ifm->ifm_mask;
+ ifmr->ifm_status = 0;
+ (*ifm->ifm_status)(ifp, ifmr);
+
+ count = 0;
+ usermax = 0;
+
+ /*
+ * If there are more interfaces on the list, count
+ * them. This allows the caller to set ifmr->ifm_count
+ * to 0 on the first call to know how much space to
+ * allocate.
+ */
+ LIST_FOREACH(ep, &ifm->ifm_list, ifm_list)
+ usermax++;
+
+ /*
+ * Don't allow the user to ask for too many
+ * or a negative number.
+ */
+ if (ifmr->ifm_count > usermax)
+ ifmr->ifm_count = usermax;
+ else if (ifmr->ifm_count < 0)
+ return (EINVAL);
+
+ if (ifmr->ifm_count != 0) {
+ kptr = (int *)malloc(ifmr->ifm_count * sizeof(int),
+ M_TEMP, M_NOWAIT);
+
+ if (kptr == NULL)
+ return (ENOMEM);
+ /*
+ * Get the media words from the interface's list.
+ */
+ ep = LIST_FIRST(&ifm->ifm_list);
+ for (; ep != NULL && count < ifmr->ifm_count;
+ ep = LIST_NEXT(ep, ifm_list), count++)
+ kptr[count] = ep->ifm_media;
+
+ if (ep != NULL)
+ error = E2BIG; /* oops! */
+ } else {
+ count = usermax;
+ }
+
+ /*
+ * We do the copyout on E2BIG, because that's
+ * just our way of telling userland that there
+ * are more. This is the behavior I've observed
+ * under BSD/OS 3.0
+ */
+ sticky = error;
+ if ((error == 0 || error == E2BIG) && ifmr->ifm_count != 0) {
+ error = copyout((caddr_t)kptr,
+ (caddr_t)ifmr->ifm_ulist,
+ ifmr->ifm_count * sizeof(int));
+ }
+
+ if (error == 0)
+ error = sticky;
+
+ if (ifmr->ifm_count != 0)
+ free(kptr, M_TEMP);
+
+ ifmr->ifm_count = count;
+ break;
+ }
+
+ default:
+ return (EINVAL);
+ }
+
+ return (error);
+}
+
+/*
+ * Find media entry matching a given ifm word.
+ *
+ */
+static struct ifmedia_entry *
+ifmedia_match(ifm, target, mask)
+ struct ifmedia *ifm;
+ int target;
+ int mask;
+{
+ struct ifmedia_entry *match, *next;
+
+ match = NULL;
+ mask = ~mask;
+
+ LIST_FOREACH(next, &ifm->ifm_list, ifm_list) {
+ if ((next->ifm_media & mask) == (target & mask)) {
+#if defined(IFMEDIA_DEBUG) || defined(DIAGNOSTIC)
+ if (match) {
+ printf("ifmedia_match: multiple match for "
+ "0x%x/0x%x\n", target, mask);
+ }
+#endif
+ match = next;
+ }
+ }
+
+ return match;
+}
+
+/*
+ * Compute the interface `baudrate' from the media, for the interface
+ * metrics (used by routing daemons).
+ */
+static const struct ifmedia_baudrate ifmedia_baudrate_descriptions[] =
+ IFM_BAUDRATE_DESCRIPTIONS;
+
+uint64_t
+ifmedia_baudrate(int mword)
+{
+ int i;
+
+ for (i = 0; ifmedia_baudrate_descriptions[i].ifmb_word != 0; i++) {
+ if ((mword & (IFM_NMASK|IFM_TMASK)) ==
+ ifmedia_baudrate_descriptions[i].ifmb_word)
+ return (ifmedia_baudrate_descriptions[i].ifmb_baudrate);
+ }
+
+ /* Not known. */
+ return (0);
+}
+
+#ifdef IFMEDIA_DEBUG
+struct ifmedia_description ifm_type_descriptions[] =
+ IFM_TYPE_DESCRIPTIONS;
+
+struct ifmedia_description ifm_subtype_ethernet_descriptions[] =
+ IFM_SUBTYPE_ETHERNET_DESCRIPTIONS;
+
+struct ifmedia_description ifm_subtype_ethernet_option_descriptions[] =
+ IFM_SUBTYPE_ETHERNET_OPTION_DESCRIPTIONS;
+
+struct ifmedia_description ifm_subtype_tokenring_descriptions[] =
+ IFM_SUBTYPE_TOKENRING_DESCRIPTIONS;
+
+struct ifmedia_description ifm_subtype_tokenring_option_descriptions[] =
+ IFM_SUBTYPE_TOKENRING_OPTION_DESCRIPTIONS;
+
+struct ifmedia_description ifm_subtype_fddi_descriptions[] =
+ IFM_SUBTYPE_FDDI_DESCRIPTIONS;
+
+struct ifmedia_description ifm_subtype_fddi_option_descriptions[] =
+ IFM_SUBTYPE_FDDI_OPTION_DESCRIPTIONS;
+
+struct ifmedia_description ifm_subtype_ieee80211_descriptions[] =
+ IFM_SUBTYPE_IEEE80211_DESCRIPTIONS;
+
+struct ifmedia_description ifm_subtype_ieee80211_option_descriptions[] =
+ IFM_SUBTYPE_IEEE80211_OPTION_DESCRIPTIONS;
+
+struct ifmedia_description ifm_subtype_ieee80211_mode_descriptions[] =
+ IFM_SUBTYPE_IEEE80211_MODE_DESCRIPTIONS;
+
+struct ifmedia_description ifm_subtype_atm_descriptions[] =
+ IFM_SUBTYPE_ATM_DESCRIPTIONS;
+
+struct ifmedia_description ifm_subtype_atm_option_descriptions[] =
+ IFM_SUBTYPE_ATM_OPTION_DESCRIPTIONS;
+
+struct ifmedia_description ifm_subtype_shared_descriptions[] =
+ IFM_SUBTYPE_SHARED_DESCRIPTIONS;
+
+struct ifmedia_description ifm_shared_option_descriptions[] =
+ IFM_SHARED_OPTION_DESCRIPTIONS;
+
+struct ifmedia_type_to_subtype {
+ struct ifmedia_description *subtypes;
+ struct ifmedia_description *options;
+ struct ifmedia_description *modes;
+};
+
+/* must be in the same order as IFM_TYPE_DESCRIPTIONS */
+struct ifmedia_type_to_subtype ifmedia_types_to_subtypes[] = {
+ {
+ &ifm_subtype_ethernet_descriptions[0],
+ &ifm_subtype_ethernet_option_descriptions[0],
+ NULL,
+ },
+ {
+ &ifm_subtype_tokenring_descriptions[0],
+ &ifm_subtype_tokenring_option_descriptions[0],
+ NULL,
+ },
+ {
+ &ifm_subtype_fddi_descriptions[0],
+ &ifm_subtype_fddi_option_descriptions[0],
+ NULL,
+ },
+ {
+ &ifm_subtype_ieee80211_descriptions[0],
+ &ifm_subtype_ieee80211_option_descriptions[0],
+ &ifm_subtype_ieee80211_mode_descriptions[0]
+ },
+ {
+ &ifm_subtype_atm_descriptions[0],
+ &ifm_subtype_atm_option_descriptions[0],
+ NULL,
+ },
+};
+
+/*
+ * print a media word.
+ */
+static void
+ifmedia_printword(ifmw)
+ int ifmw;
+{
+ struct ifmedia_description *desc;
+ struct ifmedia_type_to_subtype *ttos;
+ int seen_option = 0;
+
+ /* Find the top-level interface type. */
+ for (desc = ifm_type_descriptions, ttos = ifmedia_types_to_subtypes;
+ desc->ifmt_string != NULL; desc++, ttos++)
+ if (IFM_TYPE(ifmw) == desc->ifmt_word)
+ break;
+ if (desc->ifmt_string == NULL) {
+ printf("<unknown type>\n");
+ return;
+ }
+ printf(desc->ifmt_string);
+
+ /* Any mode. */
+ for (desc = ttos->modes; desc && desc->ifmt_string != NULL; desc++)
+ if (IFM_MODE(ifmw) == desc->ifmt_word) {
+ if (desc->ifmt_string != NULL)
+ printf(" mode %s", desc->ifmt_string);
+ break;
+ }
+
+ /*
+ * Check for the shared subtype descriptions first, then the
+ * type-specific ones.
+ */
+ for (desc = ifm_subtype_shared_descriptions;
+ desc->ifmt_string != NULL; desc++)
+ if (IFM_SUBTYPE(ifmw) == desc->ifmt_word)
+ goto got_subtype;
+
+ for (desc = ttos->subtypes; desc->ifmt_string != NULL; desc++)
+ if (IFM_SUBTYPE(ifmw) == desc->ifmt_word)
+ break;
+ if (desc->ifmt_string == NULL) {
+ printf(" <unknown subtype>\n");
+ return;
+ }
+
+ got_subtype:
+ printf(" %s", desc->ifmt_string);
+
+ /*
+ * Look for shared options.
+ */
+ for (desc = ifm_shared_option_descriptions;
+ desc->ifmt_string != NULL; desc++) {
+ if (ifmw & desc->ifmt_word) {
+ if (seen_option == 0)
+ printf(" <");
+ printf("%s%s", seen_option++ ? "," : "",
+ desc->ifmt_string);
+ }
+ }
+
+ /*
+ * Look for subtype-specific options.
+ */
+ for (desc = ttos->options; desc->ifmt_string != NULL; desc++) {
+ if (ifmw & desc->ifmt_word) {
+ if (seen_option == 0)
+ printf(" <");
+ printf("%s%s", seen_option++ ? "," : "",
+ desc->ifmt_string);
+ }
+ }
+ printf("%s\n", seen_option ? ">" : "");
+}
+#endif /* IFMEDIA_DEBUG */
diff --git a/freebsd/sys/net/if_media.h b/freebsd/sys/net/if_media.h
new file mode 100644
index 00000000..26a3c417
--- /dev/null
+++ b/freebsd/sys/net/if_media.h
@@ -0,0 +1,692 @@
+/* $NetBSD: if_media.h,v 1.3 1997/03/26 01:19:27 thorpej Exp $ */
+/* $FreeBSD$ */
+
+/*-
+ * Copyright (c) 1997
+ * Jonathan Stone and Jason R. Thorpe. All rights reserved.
+ *
+ * This software is derived from information provided by Matt Thomas.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Jonathan Stone
+ * and Jason R. Thorpe for the NetBSD Project.
+ * 4. The names of the authors may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _NET_IF_MEDIA_HH_
+#define _NET_IF_MEDIA_HH_
+
+/*
+ * Prototypes and definitions for BSD/OS-compatible network interface
+ * media selection.
+ *
+ * Where it is safe to do so, this code strays slightly from the BSD/OS
+ * design. Software which uses the API (device drivers, basically)
+ * shouldn't notice any difference.
+ *
+ * Many thanks to Matt Thomas for providing the information necessary
+ * to implement this interface.
+ */
+
+#ifdef _KERNEL
+
+#include <freebsd/sys/queue.h>
+
+/*
+ * Driver callbacks for media status and change requests.
+ */
+typedef int (*ifm_change_cb_t)(struct ifnet *ifp);
+typedef void (*ifm_stat_cb_t)(struct ifnet *ifp, struct ifmediareq *req);
+
+/*
+ * In-kernel representation of a single supported media type.
+ */
+struct ifmedia_entry {
+ LIST_ENTRY(ifmedia_entry) ifm_list;
+ int ifm_media; /* description of this media attachment */
+ int ifm_data; /* for driver-specific use */
+ void *ifm_aux; /* for driver-specific use */
+};
+
+/*
+ * One of these goes into a network interface's softc structure.
+ * It is used to keep general media state.
+ */
+struct ifmedia {
+ int ifm_mask; /* mask of changes we don't care about */
+ int ifm_media; /* current user-set media word */
+ struct ifmedia_entry *ifm_cur; /* currently selected media */
+ LIST_HEAD(, ifmedia_entry) ifm_list; /* list of all supported media */
+ ifm_change_cb_t ifm_change; /* media change driver callback */
+ ifm_stat_cb_t ifm_status; /* media status driver callback */
+};
+
+/* Initialize an interface's struct if_media field. */
+void ifmedia_init(struct ifmedia *ifm, int dontcare_mask,
+ ifm_change_cb_t change_callback, ifm_stat_cb_t status_callback);
+
+/* Remove all mediums from a struct ifmedia. */
+void ifmedia_removeall( struct ifmedia *ifm);
+
+/* Add one supported medium to a struct ifmedia. */
+void ifmedia_add(struct ifmedia *ifm, int mword, int data, void *aux);
+
+/* Add an array (of ifmedia_entry) media to a struct ifmedia. */
+void ifmedia_list_add(struct ifmedia *mp, struct ifmedia_entry *lp,
+ int count);
+
+/* Set default media type on initialization. */
+void ifmedia_set(struct ifmedia *ifm, int mword);
+
+/* Common ioctl function for getting/setting media, called by driver. */
+int ifmedia_ioctl(struct ifnet *ifp, struct ifreq *ifr,
+ struct ifmedia *ifm, u_long cmd);
+
+/* Compute baudrate for a given media. */
+uint64_t ifmedia_baudrate(int);
+
+#endif /*_KERNEL */
+
+/*
+ * if_media Options word:
+ * Bits Use
+ * ---- -------
+ * 0-4 Media variant
+ * 5-7 Media type
+ * 8-15 Type specific options
+ * 16-18 Mode (for multi-mode devices)
+ * 19 RFU
+ * 20-27 Shared (global) options
+ * 28-31 Instance
+ */
+
+/*
+ * Ethernet
+ */
+#define IFM_ETHER 0x00000020
+#define IFM_10_T 3 /* 10BaseT - RJ45 */
+#define IFM_10_2 4 /* 10Base2 - Thinnet */
+#define IFM_10_5 5 /* 10Base5 - AUI */
+#define IFM_100_TX 6 /* 100BaseTX - RJ45 */
+#define IFM_100_FX 7 /* 100BaseFX - Fiber */
+#define IFM_100_T4 8 /* 100BaseT4 - 4 pair cat 3 */
+#define IFM_100_VG 9 /* 100VG-AnyLAN */
+#define IFM_100_T2 10 /* 100BaseT2 */
+#define IFM_1000_SX 11 /* 1000BaseSX - multi-mode fiber */
+#define IFM_10_STP 12 /* 10BaseT over shielded TP */
+#define IFM_10_FL 13 /* 10BaseFL - Fiber */
+#define IFM_1000_LX 14 /* 1000baseLX - single-mode fiber */
+#define IFM_1000_CX 15 /* 1000baseCX - 150ohm STP */
+#define IFM_1000_T 16 /* 1000baseT - 4 pair cat 5 */
+#define IFM_HPNA_1 17 /* HomePNA 1.0 (1Mb/s) */
+#define IFM_10G_LR 18 /* 10GBase-LR 1310nm Single-mode */
+#define IFM_10G_SR 19 /* 10GBase-SR 850nm Multi-mode */
+#define IFM_10G_CX4 20 /* 10GBase CX4 copper */
+#define IFM_2500_SX 21 /* 2500BaseSX - multi-mode fiber */
+#define IFM_10G_TWINAX 22 /* 10GBase Twinax copper */
+#define IFM_10G_TWINAX_LONG 23 /* 10GBase Twinax Long copper */
+#define IFM_10G_LRM 24 /* 10GBase-LRM 850nm Multi-mode */
+#define IFM_UNKNOWN 25 /* media types not defined yet */
+#define IFM_10G_T 26 /* 10GBase-T - RJ45 */
+
+
+/* note 31 is the max! */
+
+#define IFM_ETH_MASTER 0x00000100 /* master mode (1000baseT) */
+#define IFM_ETH_RXPAUSE 0x00000200 /* receive PAUSE frames */
+#define IFM_ETH_TXPAUSE 0x00000400 /* transmit PAUSE frames */
+
+/*
+ * Token ring
+ */
+#define IFM_TOKEN 0x00000040
+#define IFM_TOK_STP4 3 /* Shielded twisted pair 4m - DB9 */
+#define IFM_TOK_STP16 4 /* Shielded twisted pair 16m - DB9 */
+#define IFM_TOK_UTP4 5 /* Unshielded twisted pair 4m - RJ45 */
+#define IFM_TOK_UTP16 6 /* Unshielded twisted pair 16m - RJ45 */
+#define IFM_TOK_STP100 7 /* Shielded twisted pair 100m - DB9 */
+#define IFM_TOK_UTP100 8 /* Unshielded twisted pair 100m - RJ45 */
+#define IFM_TOK_ETR 0x00000200 /* Early token release */
+#define IFM_TOK_SRCRT 0x00000400 /* Enable source routing features */
+#define IFM_TOK_ALLR 0x00000800 /* All routes / Single route bcast */
+#define IFM_TOK_DTR 0x00002000 /* Dedicated token ring */
+#define IFM_TOK_CLASSIC 0x00004000 /* Classic token ring */
+#define IFM_TOK_AUTO 0x00008000 /* Automatic Dedicate/Classic token ring */
+
+/*
+ * FDDI
+ */
+#define IFM_FDDI 0x00000060
+#define IFM_FDDI_SMF 3 /* Single-mode fiber */
+#define IFM_FDDI_MMF 4 /* Multi-mode fiber */
+#define IFM_FDDI_UTP 5 /* CDDI / UTP */
+#define IFM_FDDI_DA 0x00000100 /* Dual attach / single attach */
+
+/*
+ * IEEE 802.11 Wireless
+ */
+#define IFM_IEEE80211 0x00000080
+/* NB: 0,1,2 are auto, manual, none defined below */
+#define IFM_IEEE80211_FH1 3 /* Frequency Hopping 1Mbps */
+#define IFM_IEEE80211_FH2 4 /* Frequency Hopping 2Mbps */
+#define IFM_IEEE80211_DS1 5 /* Direct Sequence 1Mbps */
+#define IFM_IEEE80211_DS2 6 /* Direct Sequence 2Mbps */
+#define IFM_IEEE80211_DS5 7 /* Direct Sequence 5.5Mbps */
+#define IFM_IEEE80211_DS11 8 /* Direct Sequence 11Mbps */
+#define IFM_IEEE80211_DS22 9 /* Direct Sequence 22Mbps */
+#define IFM_IEEE80211_OFDM6 10 /* OFDM 6Mbps */
+#define IFM_IEEE80211_OFDM9 11 /* OFDM 9Mbps */
+#define IFM_IEEE80211_OFDM12 12 /* OFDM 12Mbps */
+#define IFM_IEEE80211_OFDM18 13 /* OFDM 18Mbps */
+#define IFM_IEEE80211_OFDM24 14 /* OFDM 24Mbps */
+#define IFM_IEEE80211_OFDM36 15 /* OFDM 36Mbps */
+#define IFM_IEEE80211_OFDM48 16 /* OFDM 48Mbps */
+#define IFM_IEEE80211_OFDM54 17 /* OFDM 54Mbps */
+#define IFM_IEEE80211_OFDM72 18 /* OFDM 72Mbps */
+#define IFM_IEEE80211_DS354k 19 /* Direct Sequence 354Kbps */
+#define IFM_IEEE80211_DS512k 20 /* Direct Sequence 512Kbps */
+#define IFM_IEEE80211_OFDM3 21 /* OFDM 3Mbps */
+#define IFM_IEEE80211_OFDM4 22 /* OFDM 4.5Mbps */
+#define IFM_IEEE80211_OFDM27 23 /* OFDM 27Mbps */
+/* NB: not enough bits to express MCS fully */
+#define IFM_IEEE80211_MCS 24 /* HT MCS rate */
+
+#define IFM_IEEE80211_ADHOC 0x00000100 /* Operate in Adhoc mode */
+#define IFM_IEEE80211_HOSTAP 0x00000200 /* Operate in Host AP mode */
+#define IFM_IEEE80211_IBSS 0x00000400 /* Operate in IBSS mode */
+#define IFM_IEEE80211_WDS 0x00000800 /* Operate in WDS mode */
+#define IFM_IEEE80211_TURBO 0x00001000 /* Operate in turbo mode */
+#define IFM_IEEE80211_MONITOR 0x00002000 /* Operate in monitor mode */
+#define IFM_IEEE80211_MBSS 0x00004000 /* Operate in MBSS mode */
+
+/* operating mode for multi-mode devices */
+#define IFM_IEEE80211_11A 0x00010000 /* 5Ghz, OFDM mode */
+#define IFM_IEEE80211_11B 0x00020000 /* Direct Sequence mode */
+#define IFM_IEEE80211_11G 0x00030000 /* 2Ghz, CCK mode */
+#define IFM_IEEE80211_FH 0x00040000 /* 2Ghz, GFSK mode */
+#define IFM_IEEE80211_11NA 0x00050000 /* 5Ghz, HT mode */
+#define IFM_IEEE80211_11NG 0x00060000 /* 2Ghz, HT mode */
+
+/*
+ * ATM
+ */
+#define IFM_ATM 0x000000a0
+#define IFM_ATM_UNKNOWN 3
+#define IFM_ATM_UTP_25 4
+#define IFM_ATM_TAXI_100 5
+#define IFM_ATM_TAXI_140 6
+#define IFM_ATM_MM_155 7
+#define IFM_ATM_SM_155 8
+#define IFM_ATM_UTP_155 9
+#define IFM_ATM_MM_622 10
+#define IFM_ATM_SM_622 11
+#define IFM_ATM_VIRTUAL 12
+#define IFM_ATM_SDH 0x00000100 /* SDH instead of SONET */
+#define IFM_ATM_NOSCRAMB 0x00000200 /* no scrambling */
+#define IFM_ATM_UNASSIGNED 0x00000400 /* unassigned cells */
+
+/*
+ * CARP Common Address Redundancy Protocol
+ */
+#define IFM_CARP 0x000000c0
+
+/*
+ * Shared media sub-types
+ */
+#define IFM_AUTO 0 /* Autoselect best media */
+#define IFM_MANUAL 1 /* Jumper/dipswitch selects media */
+#define IFM_NONE 2 /* Deselect all media */
+
+/*
+ * Shared options
+ */
+#define IFM_FDX 0x00100000 /* Force full duplex */
+#define IFM_HDX 0x00200000 /* Force half duplex */
+#define IFM_FLOW 0x00400000 /* enable hardware flow control */
+#define IFM_FLAG0 0x01000000 /* Driver defined flag */
+#define IFM_FLAG1 0x02000000 /* Driver defined flag */
+#define IFM_FLAG2 0x04000000 /* Driver defined flag */
+#define IFM_LOOP 0x08000000 /* Put hardware in loopback */
+
+/*
+ * Masks
+ */
+#define IFM_NMASK 0x000000e0 /* Network type */
+#define IFM_TMASK 0x0000001f /* Media sub-type */
+#define IFM_IMASK 0xf0000000 /* Instance */
+#define IFM_ISHIFT 28 /* Instance shift */
+#define IFM_OMASK 0x0000ff00 /* Type specific options */
+#define IFM_MMASK 0x00070000 /* Mode */
+#define IFM_MSHIFT 16 /* Mode shift */
+#define IFM_GMASK 0x0ff00000 /* Global options */
+
+/* Ethernet flow control mask */
+#define IFM_ETH_FMASK (IFM_FLOW | IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)
+
+/*
+ * Status bits
+ */
+#define IFM_AVALID 0x00000001 /* Active bit valid */
+#define IFM_ACTIVE 0x00000002 /* Interface attached to working net */
+
+/* Mask of "status valid" bits, for ifconfig(8). */
+#define IFM_STATUS_VALID IFM_AVALID
+
+/* List of "status valid" bits, for ifconfig(8). */
+#define IFM_STATUS_VALID_LIST { \
+ IFM_AVALID, \
+ 0 \
+}
+
+/*
+ * Macros to extract various bits of information from the media word.
+ */
+#define IFM_TYPE(x) ((x) & IFM_NMASK)
+#define IFM_SUBTYPE(x) ((x) & IFM_TMASK)
+#define IFM_TYPE_OPTIONS(x) ((x) & IFM_OMASK)
+#define IFM_INST(x) (((x) & IFM_IMASK) >> IFM_ISHIFT)
+#define IFM_OPTIONS(x) ((x) & (IFM_OMASK|IFM_GMASK))
+#define IFM_MODE(x) ((x) & IFM_MMASK)
+
+#define IFM_INST_MAX IFM_INST(IFM_IMASK)
+
+/*
+ * Macro to create a media word.
+ */
+#define IFM_MAKEWORD(type, subtype, options, instance) \
+ ((type) | (subtype) | (options) | ((instance) << IFM_ISHIFT))
+#define IFM_MAKEMODE(mode) \
+ (((mode) << IFM_MSHIFT) & IFM_MMASK)
+
+/*
+ * NetBSD extension not defined in the BSDI API. This is used in various
+ * places to get the canonical description for a given type/subtype.
+ *
+ * NOTE: all but the top-level type descriptions must contain NO whitespace!
+ * Otherwise, parsing these in ifconfig(8) would be a nightmare.
+ */
+struct ifmedia_description {
+ int ifmt_word; /* word value; may be masked */
+ const char *ifmt_string; /* description */
+};
+
+#define IFM_TYPE_DESCRIPTIONS { \
+ { IFM_ETHER, "Ethernet" }, \
+ { IFM_TOKEN, "Token ring" }, \
+ { IFM_FDDI, "FDDI" }, \
+ { IFM_IEEE80211, "IEEE 802.11 Wireless Ethernet" }, \
+ { IFM_ATM, "ATM" }, \
+ { IFM_CARP, "Common Address Redundancy Protocol" }, \
+ { 0, NULL }, \
+}
+
+#define IFM_SUBTYPE_ETHERNET_DESCRIPTIONS { \
+ { IFM_10_T, "10baseT/UTP" }, \
+ { IFM_10_2, "10base2/BNC" }, \
+ { IFM_10_5, "10base5/AUI" }, \
+ { IFM_100_TX, "100baseTX" }, \
+ { IFM_100_FX, "100baseFX" }, \
+ { IFM_100_T4, "100baseT4" }, \
+ { IFM_100_VG, "100baseVG" }, \
+ { IFM_100_T2, "100baseT2" }, \
+ { IFM_10_STP, "10baseSTP" }, \
+ { IFM_10_FL, "10baseFL" }, \
+ { IFM_1000_SX, "1000baseSX" }, \
+ { IFM_1000_LX, "1000baseLX" }, \
+ { IFM_1000_CX, "1000baseCX" }, \
+ { IFM_1000_T, "1000baseT" }, \
+ { IFM_HPNA_1, "homePNA" }, \
+ { IFM_10G_LR, "10Gbase-LR" }, \
+ { IFM_10G_SR, "10Gbase-SR" }, \
+ { IFM_10G_CX4, "10Gbase-CX4" }, \
+ { IFM_2500_SX, "2500BaseSX" }, \
+ { IFM_10G_LRM, "10Gbase-LRM" }, \
+ { IFM_10G_TWINAX, "10Gbase-Twinax" }, \
+ { IFM_10G_TWINAX_LONG, "10Gbase-Twinax-Long" }, \
+ { IFM_UNKNOWN, "Unknown" }, \
+ { IFM_10G_T, "10Gbase-T" }, \
+ { 0, NULL }, \
+}
+
+#define IFM_SUBTYPE_ETHERNET_ALIASES { \
+ { IFM_10_T, "UTP" }, \
+ { IFM_10_T, "10UTP" }, \
+ { IFM_10_2, "BNC" }, \
+ { IFM_10_2, "10BNC" }, \
+ { IFM_10_5, "AUI" }, \
+ { IFM_10_5, "10AUI" }, \
+ { IFM_100_TX, "100TX" }, \
+ { IFM_100_T4, "100T4" }, \
+ { IFM_100_VG, "100VG" }, \
+ { IFM_100_T2, "100T2" }, \
+ { IFM_10_STP, "10STP" }, \
+ { IFM_10_FL, "10FL" }, \
+ { IFM_1000_SX, "1000SX" }, \
+ { IFM_1000_LX, "1000LX" }, \
+ { IFM_1000_CX, "1000CX" }, \
+ { IFM_1000_T, "1000baseTX" }, \
+ { IFM_1000_T, "1000TX" }, \
+ { IFM_1000_T, "1000T" }, \
+ { IFM_2500_SX, "2500SX" }, \
+ { 0, NULL }, \
+}
+
+#define IFM_SUBTYPE_ETHERNET_OPTION_DESCRIPTIONS { \
+ { IFM_ETH_MASTER, "master" }, \
+ { IFM_ETH_RXPAUSE, "rxpause" }, \
+ { IFM_ETH_TXPAUSE, "txpause" }, \
+ { 0, NULL }, \
+}
+
+#define IFM_SUBTYPE_TOKENRING_DESCRIPTIONS { \
+ { IFM_TOK_STP4, "DB9/4Mbit" }, \
+ { IFM_TOK_STP16, "DB9/16Mbit" }, \
+ { IFM_TOK_UTP4, "UTP/4Mbit" }, \
+ { IFM_TOK_UTP16, "UTP/16Mbit" }, \
+ { IFM_TOK_STP100, "STP/100Mbit" }, \
+ { IFM_TOK_UTP100, "UTP/100Mbit" }, \
+ { 0, NULL }, \
+}
+
+#define IFM_SUBTYPE_TOKENRING_ALIASES { \
+ { IFM_TOK_STP4, "4STP" }, \
+ { IFM_TOK_STP16, "16STP" }, \
+ { IFM_TOK_UTP4, "4UTP" }, \
+ { IFM_TOK_UTP16, "16UTP" }, \
+ { IFM_TOK_STP100, "100STP" }, \
+ { IFM_TOK_UTP100, "100UTP" }, \
+ { 0, NULL }, \
+}
+
+#define IFM_SUBTYPE_TOKENRING_OPTION_DESCRIPTIONS { \
+ { IFM_TOK_ETR, "EarlyTokenRelease" }, \
+ { IFM_TOK_SRCRT, "SourceRouting" }, \
+ { IFM_TOK_ALLR, "AllRoutes" }, \
+ { IFM_TOK_DTR, "Dedicated" }, \
+ { IFM_TOK_CLASSIC,"Classic" }, \
+ { IFM_TOK_AUTO, " " }, \
+ { 0, NULL }, \
+}
+
+#define IFM_SUBTYPE_FDDI_DESCRIPTIONS { \
+ { IFM_FDDI_SMF, "Single-mode" }, \
+ { IFM_FDDI_MMF, "Multi-mode" }, \
+ { IFM_FDDI_UTP, "UTP" }, \
+ { 0, NULL }, \
+}
+
+#define IFM_SUBTYPE_FDDI_ALIASES { \
+ { IFM_FDDI_SMF, "SMF" }, \
+ { IFM_FDDI_MMF, "MMF" }, \
+ { IFM_FDDI_UTP, "CDDI" }, \
+ { 0, NULL }, \
+}
+
+#define IFM_SUBTYPE_FDDI_OPTION_DESCRIPTIONS { \
+ { IFM_FDDI_DA, "Dual-attach" }, \
+ { 0, NULL }, \
+}
+
+#define IFM_SUBTYPE_IEEE80211_DESCRIPTIONS { \
+ { IFM_IEEE80211_FH1, "FH/1Mbps" }, \
+ { IFM_IEEE80211_FH2, "FH/2Mbps" }, \
+ { IFM_IEEE80211_DS1, "DS/1Mbps" }, \
+ { IFM_IEEE80211_DS2, "DS/2Mbps" }, \
+ { IFM_IEEE80211_DS5, "DS/5.5Mbps" }, \
+ { IFM_IEEE80211_DS11, "DS/11Mbps" }, \
+ { IFM_IEEE80211_DS22, "DS/22Mbps" }, \
+ { IFM_IEEE80211_OFDM6, "OFDM/6Mbps" }, \
+ { IFM_IEEE80211_OFDM9, "OFDM/9Mbps" }, \
+ { IFM_IEEE80211_OFDM12, "OFDM/12Mbps" }, \
+ { IFM_IEEE80211_OFDM18, "OFDM/18Mbps" }, \
+ { IFM_IEEE80211_OFDM24, "OFDM/24Mbps" }, \
+ { IFM_IEEE80211_OFDM36, "OFDM/36Mbps" }, \
+ { IFM_IEEE80211_OFDM48, "OFDM/48Mbps" }, \
+ { IFM_IEEE80211_OFDM54, "OFDM/54Mbps" }, \
+ { IFM_IEEE80211_OFDM72, "OFDM/72Mbps" }, \
+ { IFM_IEEE80211_DS354k, "DS/354Kbps" }, \
+ { IFM_IEEE80211_DS512k, "DS/512Kbps" }, \
+ { IFM_IEEE80211_OFDM3, "OFDM/3Mbps" }, \
+ { IFM_IEEE80211_OFDM4, "OFDM/4.5Mbps" }, \
+ { IFM_IEEE80211_OFDM27, "OFDM/27Mbps" }, \
+ { 0, NULL }, \
+}
+
+#define IFM_SUBTYPE_IEEE80211_ALIASES { \
+ { IFM_IEEE80211_FH1, "FH1" }, \
+ { IFM_IEEE80211_FH2, "FH2" }, \
+ { IFM_IEEE80211_FH1, "FrequencyHopping/1Mbps" }, \
+ { IFM_IEEE80211_FH2, "FrequencyHopping/2Mbps" }, \
+ { IFM_IEEE80211_DS1, "DS1" }, \
+ { IFM_IEEE80211_DS2, "DS2" }, \
+ { IFM_IEEE80211_DS5, "DS5.5" }, \
+ { IFM_IEEE80211_DS11, "DS11" }, \
+ { IFM_IEEE80211_DS22, "DS22" }, \
+ { IFM_IEEE80211_DS1, "DirectSequence/1Mbps" }, \
+ { IFM_IEEE80211_DS2, "DirectSequence/2Mbps" }, \
+ { IFM_IEEE80211_DS5, "DirectSequence/5.5Mbps" }, \
+ { IFM_IEEE80211_DS11, "DirectSequence/11Mbps" }, \
+ { IFM_IEEE80211_DS22, "DirectSequence/22Mbps" }, \
+ { IFM_IEEE80211_OFDM6, "OFDM6" }, \
+ { IFM_IEEE80211_OFDM9, "OFDM9" }, \
+ { IFM_IEEE80211_OFDM12, "OFDM12" }, \
+ { IFM_IEEE80211_OFDM18, "OFDM18" }, \
+ { IFM_IEEE80211_OFDM24, "OFDM24" }, \
+ { IFM_IEEE80211_OFDM36, "OFDM36" }, \
+ { IFM_IEEE80211_OFDM48, "OFDM48" }, \
+ { IFM_IEEE80211_OFDM54, "OFDM54" }, \
+ { IFM_IEEE80211_OFDM72, "OFDM72" }, \
+ { IFM_IEEE80211_DS1, "CCK1" }, \
+ { IFM_IEEE80211_DS2, "CCK2" }, \
+ { IFM_IEEE80211_DS5, "CCK5.5" }, \
+ { IFM_IEEE80211_DS11, "CCK11" }, \
+ { IFM_IEEE80211_DS354k, "DS354K" }, \
+ { IFM_IEEE80211_DS354k, "DirectSequence/354Kbps" }, \
+ { IFM_IEEE80211_DS512k, "DS512K" }, \
+ { IFM_IEEE80211_DS512k, "DirectSequence/512Kbps" }, \
+ { IFM_IEEE80211_OFDM3, "OFDM3" }, \
+ { IFM_IEEE80211_OFDM4, "OFDM4.5" }, \
+ { IFM_IEEE80211_OFDM27, "OFDM27" }, \
+ { 0, NULL }, \
+}
+
+#define IFM_SUBTYPE_IEEE80211_OPTION_DESCRIPTIONS { \
+ { IFM_IEEE80211_ADHOC, "adhoc" }, \
+ { IFM_IEEE80211_HOSTAP, "hostap" }, \
+ { IFM_IEEE80211_IBSS, "ibss" }, \
+ { IFM_IEEE80211_WDS, "wds" }, \
+ { IFM_IEEE80211_TURBO, "turbo" }, \
+ { IFM_IEEE80211_MONITOR, "monitor" }, \
+ { IFM_IEEE80211_MBSS, "mesh" }, \
+ { 0, NULL }, \
+}
+
+#define IFM_SUBTYPE_IEEE80211_MODE_DESCRIPTIONS { \
+ { IFM_AUTO, "autoselect" }, \
+ { IFM_IEEE80211_11A, "11a" }, \
+ { IFM_IEEE80211_11B, "11b" }, \
+ { IFM_IEEE80211_11G, "11g" }, \
+ { IFM_IEEE80211_FH, "fh" }, \
+ { IFM_IEEE80211_11NA, "11na" }, \
+ { IFM_IEEE80211_11NG, "11ng" }, \
+ { 0, NULL }, \
+}
+
+#define IFM_SUBTYPE_IEEE80211_MODE_ALIASES { \
+ { IFM_AUTO, "auto" }, \
+ { 0, NULL }, \
+}
+
+# define IFM_SUBTYPE_ATM_DESCRIPTIONS { \
+ { IFM_ATM_UNKNOWN, "Unknown" }, \
+ { IFM_ATM_UTP_25, "UTP/25.6MBit" }, \
+ { IFM_ATM_TAXI_100, "Taxi/100MBit" }, \
+ { IFM_ATM_TAXI_140, "Taxi/140MBit" }, \
+ { IFM_ATM_MM_155, "Multi-mode/155MBit" }, \
+ { IFM_ATM_SM_155, "Single-mode/155MBit" }, \
+ { IFM_ATM_UTP_155, "UTP/155MBit" }, \
+ { IFM_ATM_MM_622, "Multi-mode/622MBit" }, \
+ { IFM_ATM_SM_622, "Single-mode/622MBit" }, \
+ { IFM_ATM_VIRTUAL, "Virtual" }, \
+ { 0, NULL }, \
+}
+
+# define IFM_SUBTYPE_ATM_ALIASES { \
+ { IFM_ATM_UNKNOWN, "UNKNOWN" }, \
+ { IFM_ATM_UTP_25, "UTP-25" }, \
+ { IFM_ATM_TAXI_100, "TAXI-100" }, \
+ { IFM_ATM_TAXI_140, "TAXI-140" }, \
+ { IFM_ATM_MM_155, "MM-155" }, \
+ { IFM_ATM_SM_155, "SM-155" }, \
+ { IFM_ATM_UTP_155, "UTP-155" }, \
+ { IFM_ATM_MM_622, "MM-622" }, \
+ { IFM_ATM_SM_622, "SM-622" }, \
+ { IFM_ATM_VIRTUAL, "VIRTUAL" }, \
+ { 0, NULL }, \
+}
+
+#define IFM_SUBTYPE_ATM_OPTION_DESCRIPTIONS { \
+ { IFM_ATM_SDH, "SDH" }, \
+ { IFM_ATM_NOSCRAMB, "Noscramb" }, \
+ { IFM_ATM_UNASSIGNED, "Unassigned" }, \
+ { 0, NULL }, \
+}
+
+
+#define IFM_SUBTYPE_SHARED_DESCRIPTIONS { \
+ { IFM_AUTO, "autoselect" }, \
+ { IFM_MANUAL, "manual" }, \
+ { IFM_NONE, "none" }, \
+ { 0, NULL }, \
+}
+
+#define IFM_SUBTYPE_SHARED_ALIASES { \
+ { IFM_AUTO, "auto" }, \
+ { 0, NULL }, \
+}
+
+#define IFM_SHARED_OPTION_DESCRIPTIONS { \
+ { IFM_FDX, "full-duplex" }, \
+ { IFM_HDX, "half-duplex" }, \
+ { IFM_FLOW, "flowcontrol" }, \
+ { IFM_FLAG0, "flag0" }, \
+ { IFM_FLAG1, "flag1" }, \
+ { IFM_FLAG2, "flag2" }, \
+ { IFM_LOOP, "hw-loopback" }, \
+ { 0, NULL }, \
+}
+
+/*
+ * Baudrate descriptions for the various media types.
+ */
+struct ifmedia_baudrate {
+ int ifmb_word; /* media word */
+ uint64_t ifmb_baudrate; /* corresponding baudrate */
+};
+
+#define IFM_BAUDRATE_DESCRIPTIONS { \
+ { IFM_ETHER | IFM_10_T, IF_Mbps(10) }, \
+ { IFM_ETHER | IFM_10_2, IF_Mbps(10) }, \
+ { IFM_ETHER | IFM_10_5, IF_Mbps(10) }, \
+ { IFM_ETHER | IFM_100_TX, IF_Mbps(100) }, \
+ { IFM_ETHER | IFM_100_FX, IF_Mbps(100) }, \
+ { IFM_ETHER | IFM_100_T4, IF_Mbps(100) }, \
+ { IFM_ETHER | IFM_100_VG, IF_Mbps(100) }, \
+ { IFM_ETHER | IFM_100_T2, IF_Mbps(100) }, \
+ { IFM_ETHER | IFM_1000_SX, IF_Mbps(1000) }, \
+ { IFM_ETHER | IFM_10_STP, IF_Mbps(10) }, \
+ { IFM_ETHER | IFM_10_FL, IF_Mbps(10) }, \
+ { IFM_ETHER | IFM_1000_LX, IF_Mbps(1000) }, \
+ { IFM_ETHER | IFM_1000_CX, IF_Mbps(1000) }, \
+ { IFM_ETHER | IFM_1000_T, IF_Mbps(1000) }, \
+ { IFM_ETHER | IFM_HPNA_1, IF_Mbps(1) }, \
+ { IFM_ETHER | IFM_10G_LR, IF_Gbps(10ULL) }, \
+ { IFM_ETHER | IFM_10G_SR, IF_Gbps(10ULL) }, \
+ { IFM_ETHER | IFM_10G_CX4, IF_Gbps(10ULL) }, \
+ { IFM_ETHER | IFM_2500_SX, IF_Mbps(2500ULL) }, \
+ { IFM_ETHER | IFM_10G_TWINAX, IF_Gbps(10ULL) }, \
+ { IFM_ETHER | IFM_10G_TWINAX_LONG, IF_Gbps(10ULL) }, \
+ { IFM_ETHER | IFM_10G_LRM, IF_Gbps(10ULL) }, \
+ { IFM_ETHER | IFM_10G_T, IF_Gbps(10ULL) }, \
+ \
+ { IFM_TOKEN | IFM_TOK_STP4, IF_Mbps(4) }, \
+ { IFM_TOKEN | IFM_TOK_STP16, IF_Mbps(16) }, \
+ { IFM_TOKEN | IFM_TOK_UTP4, IF_Mbps(4) }, \
+ { IFM_TOKEN | IFM_TOK_UTP16, IF_Mbps(16) }, \
+ \
+ { IFM_FDDI | IFM_FDDI_SMF, IF_Mbps(100) }, \
+ { IFM_FDDI | IFM_FDDI_MMF, IF_Mbps(100) }, \
+ { IFM_FDDI | IFM_FDDI_UTP, IF_Mbps(100) }, \
+ \
+ { IFM_IEEE80211 | IFM_IEEE80211_FH1, IF_Mbps(1) }, \
+ { IFM_IEEE80211 | IFM_IEEE80211_FH2, IF_Mbps(2) }, \
+ { IFM_IEEE80211 | IFM_IEEE80211_DS2, IF_Mbps(2) }, \
+ { IFM_IEEE80211 | IFM_IEEE80211_DS5, IF_Kbps(5500) }, \
+ { IFM_IEEE80211 | IFM_IEEE80211_DS11, IF_Mbps(11) }, \
+ { IFM_IEEE80211 | IFM_IEEE80211_DS1, IF_Mbps(1) }, \
+ { IFM_IEEE80211 | IFM_IEEE80211_DS22, IF_Mbps(22) }, \
+ { IFM_IEEE80211 | IFM_IEEE80211_OFDM6, IF_Mbps(6) }, \
+ { IFM_IEEE80211 | IFM_IEEE80211_OFDM9, IF_Mbps(9) }, \
+ { IFM_IEEE80211 | IFM_IEEE80211_OFDM12, IF_Mbps(12) }, \
+ { IFM_IEEE80211 | IFM_IEEE80211_OFDM18, IF_Mbps(18) }, \
+ { IFM_IEEE80211 | IFM_IEEE80211_OFDM24, IF_Mbps(24) }, \
+ { IFM_IEEE80211 | IFM_IEEE80211_OFDM36, IF_Mbps(36) }, \
+ { IFM_IEEE80211 | IFM_IEEE80211_OFDM48, IF_Mbps(48) }, \
+ { IFM_IEEE80211 | IFM_IEEE80211_OFDM54, IF_Mbps(54) }, \
+ { IFM_IEEE80211 | IFM_IEEE80211_OFDM72, IF_Mbps(72) }, \
+ \
+ { 0, 0 }, \
+}
+
+/*
+ * Status descriptions for the various media types.
+ */
+struct ifmedia_status_description {
+ int ifms_type;
+ int ifms_valid;
+ int ifms_bit;
+ const char *ifms_string[2];
+};
+
+#define IFM_STATUS_DESC(ifms, bit) \
+ (ifms)->ifms_string[((ifms)->ifms_bit & (bit)) ? 1 : 0]
+
+#define IFM_STATUS_DESCRIPTIONS { \
+ { IFM_ETHER, IFM_AVALID, IFM_ACTIVE, \
+ { "no carrier", "active" } }, \
+ { IFM_FDDI, IFM_AVALID, IFM_ACTIVE, \
+ { "no ring", "inserted" } }, \
+ { IFM_TOKEN, IFM_AVALID, IFM_ACTIVE, \
+ { "no ring", "inserted" } }, \
+ { IFM_IEEE80211, IFM_AVALID, IFM_ACTIVE, \
+ { "no network", "active" } }, \
+ { IFM_ATM, IFM_AVALID, IFM_ACTIVE, \
+ { "no network", "active" } }, \
+ { IFM_CARP, IFM_AVALID, IFM_ACTIVE, \
+ { "backup", "master" } }, \
+ { 0, 0, 0, \
+ { NULL, NULL } } \
+}
+#endif /* _NET_IF_MEDIA_HH_ */
diff --git a/freebsd/sys/net/if_mib.c b/freebsd/sys/net/if_mib.c
new file mode 100644
index 00000000..ddc75df3
--- /dev/null
+++ b/freebsd/sys/net/if_mib.c
@@ -0,0 +1,171 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright 1996 Massachusetts Institute of Technology
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that both the above copyright notice and this
+ * permission notice appear in all copies, that both the above
+ * copyright notice and this permission notice appear in all
+ * supporting documentation, and that the name of M.I.T. not be used
+ * in advertising or publicity pertaining to distribution of the
+ * software without specific, written prior permission. M.I.T. makes
+ * no representations about the suitability of this software for any
+ * purpose. It is provided "as is" without express or implied
+ * warranty.
+ *
+ * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
+ * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
+ * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/sysctl.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_mib.h>
+#include <freebsd/net/vnet.h>
+
+/*
+ * A sysctl(3) MIB for generic interface information. This information
+ * is exported in the net.link.generic branch, which has the following
+ * structure:
+ *
+ * net.link.generic .system - system-wide control variables
+ * and statistics (node)
+ * .ifdata.<ifindex>.general
+ * - what's in `struct ifdata'
+ * plus some other info
+ * .ifdata.<ifindex>.linkspecific
+ * - a link-type-specific data
+ * structure (as might be used
+ * by an SNMP agent
+ *
+ * Perhaps someday we will make addresses accessible via this interface
+ * as well (then there will be four such...). The reason that the
+ * index comes before the last element in the name is because it
+ * seems more orthogonal that way, particularly with the possibility
+ * of other per-interface data living down here as well (e.g., integrated
+ * services stuff).
+ */
+
+SYSCTL_DECL(_net_link_generic);
+SYSCTL_NODE(_net_link_generic, IFMIB_SYSTEM, system, CTLFLAG_RW, 0,
+ "Variables global to all interfaces");
+
+SYSCTL_VNET_INT(_net_link_generic_system, IFMIB_IFCOUNT, ifcount, CTLFLAG_RD,
+ &VNET_NAME(if_index), 0,
+ "Number of configured interfaces");
+
+static int
+sysctl_ifdata(SYSCTL_HANDLER_ARGS) /* XXX bad syntax! */
+{
+ int *name = (int *)arg1;
+ int error;
+ u_int namelen = arg2;
+ struct ifnet *ifp;
+ struct ifmibdata ifmd;
+ size_t dlen;
+ char *dbuf;
+
+ if (namelen != 2)
+ return EINVAL;
+ if (name[0] <= 0)
+ return (ENOENT);
+ ifp = ifnet_byindex_ref(name[0]);
+ if (ifp == NULL)
+ return (ENOENT);
+
+ switch(name[1]) {
+ default:
+ error = ENOENT;
+ goto out;
+
+ case IFDATA_GENERAL:
+ bzero(&ifmd, sizeof(ifmd));
+ strlcpy(ifmd.ifmd_name, ifp->if_xname, sizeof(ifmd.ifmd_name));
+
+#define COPY(fld) ifmd.ifmd_##fld = ifp->if_##fld
+ COPY(pcount);
+ COPY(data);
+#undef COPY
+ ifmd.ifmd_flags = ifp->if_flags | ifp->if_drv_flags;
+ ifmd.ifmd_snd_len = ifp->if_snd.ifq_len;
+ ifmd.ifmd_snd_maxlen = ifp->if_snd.ifq_maxlen;
+ ifmd.ifmd_snd_drops = ifp->if_snd.ifq_drops;
+
+ error = SYSCTL_OUT(req, &ifmd, sizeof ifmd);
+ if (error || !req->newptr)
+ goto out;
+
+ error = SYSCTL_IN(req, &ifmd, sizeof ifmd);
+ if (error)
+ goto out;
+
+#define DONTCOPY(fld) ifmd.ifmd_data.ifi_##fld = ifp->if_data.ifi_##fld
+ DONTCOPY(type);
+ DONTCOPY(physical);
+ DONTCOPY(addrlen);
+ DONTCOPY(hdrlen);
+ DONTCOPY(mtu);
+ DONTCOPY(metric);
+ DONTCOPY(baudrate);
+#undef DONTCOPY
+#define COPY(fld) ifp->if_##fld = ifmd.ifmd_##fld
+ COPY(data);
+ ifp->if_snd.ifq_maxlen = ifmd.ifmd_snd_maxlen;
+ ifp->if_snd.ifq_drops = ifmd.ifmd_snd_drops;
+#undef COPY
+ break;
+
+ case IFDATA_LINKSPECIFIC:
+ error = SYSCTL_OUT(req, ifp->if_linkmib, ifp->if_linkmiblen);
+ if (error || !req->newptr)
+ goto out;
+
+ error = SYSCTL_IN(req, ifp->if_linkmib, ifp->if_linkmiblen);
+ if (error)
+ goto out;
+ break;
+
+ case IFDATA_DRIVERNAME:
+ /* 20 is enough for 64bit ints */
+ dlen = strlen(ifp->if_dname) + 20 + 1;
+ if ((dbuf = malloc(dlen, M_TEMP, M_NOWAIT)) == NULL) {
+ error = ENOMEM;
+ goto out;
+ }
+ if (ifp->if_dunit == IF_DUNIT_NONE)
+ strcpy(dbuf, ifp->if_dname);
+ else
+ sprintf(dbuf, "%s%d", ifp->if_dname, ifp->if_dunit);
+
+ error = SYSCTL_OUT(req, dbuf, strlen(dbuf) + 1);
+ if (error == 0 && req->newptr != NULL)
+ error = EPERM;
+ free(dbuf, M_TEMP);
+ goto out;
+ }
+out:
+ if_rele(ifp);
+ return error;
+}
+
+SYSCTL_NODE(_net_link_generic, IFMIB_IFDATA, ifdata, CTLFLAG_RW,
+ sysctl_ifdata, "Interface table");
+
diff --git a/freebsd/sys/net/if_mib.h b/freebsd/sys/net/if_mib.h
new file mode 100644
index 00000000..e2b80c87
--- /dev/null
+++ b/freebsd/sys/net/if_mib.h
@@ -0,0 +1,171 @@
+/*-
+ * Copyright 1996 Massachusetts Institute of Technology
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that both the above copyright notice and this
+ * permission notice appear in all copies, that both the above
+ * copyright notice and this permission notice appear in all
+ * supporting documentation, and that the name of M.I.T. not be used
+ * in advertising or publicity pertaining to distribution of the
+ * software without specific, written prior permission. M.I.T. makes
+ * no representations about the suitability of this software for any
+ * purpose. It is provided "as is" without express or implied
+ * warranty.
+ *
+ * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
+ * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
+ * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NET_IF_MIB_H
+#define _NET_IF_MIB_H 1
+
+struct ifmibdata {
+ char ifmd_name[IFNAMSIZ]; /* name of interface */
+ int ifmd_pcount; /* number of promiscuous listeners */
+ int ifmd_flags; /* interface flags */
+ int ifmd_snd_len; /* instantaneous length of send queue */
+ int ifmd_snd_maxlen; /* maximum length of send queue */
+ int ifmd_snd_drops; /* number of drops in send queue */
+ int ifmd_filler[4]; /* for future expansion */
+ struct if_data ifmd_data; /* generic information and statistics */
+};
+
+/*
+ * sysctl MIB tags at the net.link.generic level
+ */
+#define IFMIB_SYSTEM 1 /* non-interface-specific */
+#define IFMIB_IFDATA 2 /* per-interface data table */
+
+/*
+ * MIB tags for the various net.link.generic.ifdata tables
+ */
+#define IFDATA_GENERAL 1 /* generic stats for all kinds of ifaces */
+#define IFDATA_LINKSPECIFIC 2 /* specific to the type of interface */
+#define IFDATA_DRIVERNAME 3 /* driver name and unit */
+
+/*
+ * MIB tags at the net.link.generic.system level
+ */
+#define IFMIB_IFCOUNT 1 /* number of interfaces configured */
+
+/*
+ * MIB tags as the net.link level
+ * All of the other values are IFT_* names defined in if_types.h.
+ */
+#define NETLINK_GENERIC 0 /* functions not specific to a type of iface */
+
+/*
+ * The reason why the IFDATA_LINKSPECIFIC stuff is not under the
+ * net.link.<iftype> branches is twofold:
+ * 1) It's easier to code this way, and doesn't require duplication.
+ * 2) The fourth level under net.link.<iftype> is <pf>; that is to say,
+ * the net.link.<iftype> tree instruments the adaptation layers between
+ * <iftype> and a particular protocol family (e.g., net.link.ether.inet
+ * instruments ARP). This does not really leave room for anything else
+ * that needs to have a well-known number.
+ */
+
+/*
+ * Link-specific MIB structures for various link types.
+ */
+
+/* For IFT_ETHER, IFT_ISO88023, and IFT_STARLAN, as used by RFC 1650 */
+struct ifmib_iso_8802_3 {
+ u_int32_t dot3StatsAlignmentErrors;
+ u_int32_t dot3StatsFCSErrors;
+ u_int32_t dot3StatsSingleCollisionFrames;
+ u_int32_t dot3StatsMultipleCollisionFrames;
+ u_int32_t dot3StatsSQETestErrors;
+ u_int32_t dot3StatsDeferredTransmissions;
+ u_int32_t dot3StatsLateCollisions;
+ u_int32_t dot3StatsExcessiveCollisions;
+ u_int32_t dot3StatsInternalMacTransmitErrors;
+ u_int32_t dot3StatsCarrierSenseErrors;
+ u_int32_t dot3StatsFrameTooLongs;
+ u_int32_t dot3StatsInternalMacReceiveErrors;
+ u_int32_t dot3StatsEtherChipSet;
+ /* Matt Thomas wants this one, not included in RFC 1650: */
+ u_int32_t dot3StatsMissedFrames;
+
+ u_int32_t dot3StatsCollFrequencies[16]; /* NB: index origin */
+
+ u_int32_t dot3Compliance;
+#define DOT3COMPLIANCE_STATS 1
+#define DOT3COMPLIANCE_COLLS 2
+};
+
+/*
+ * Chipset identifiers are normally part of the vendor's enterprise MIB.
+ * However, we don't want to be trying to represent arbitrary-length
+ * OBJECT IDENTIFIERs here (ick!), and the right value is not necessarily
+ * obvious to the driver implementor. So, we define our own identification
+ * mechanism here, and let the agent writer deal with the translation.
+ */
+#define DOT3CHIPSET_VENDOR(x) ((x) >> 16)
+#define DOT3CHIPSET_PART(x) ((x) & 0xffff)
+#define DOT3CHIPSET(v,p) (((v) << 16) + ((p) & 0xffff))
+
+/* Driver writers! Add your vendors here! */
+enum dot3Vendors {
+ dot3VendorAMD = 1,
+ dot3VendorIntel = 2,
+ dot3VendorNational = 4,
+ dot3VendorFujitsu = 5,
+ dot3VendorDigital = 6,
+ dot3VendorWesternDigital = 7
+};
+
+/* Driver writers! Add your chipsets here! */
+enum {
+ dot3ChipSetAMD7990 = 1,
+ dot3ChipSetAMD79900 = 2,
+ dot3ChipSetAMD79C940 = 3
+};
+
+enum {
+ dot3ChipSetIntel82586 = 1,
+ dot3ChipSetIntel82596 = 2,
+ dot3ChipSetIntel82557 = 3
+};
+
+enum {
+ dot3ChipSetNational8390 = 1,
+ dot3ChipSetNationalSonic = 2
+};
+
+enum {
+ dot3ChipSetFujitsu86950 = 1
+};
+
+enum {
+ dot3ChipSetDigitalDC21040 = 1,
+ dot3ChipSetDigitalDC21140 = 2,
+ dot3ChipSetDigitalDC21041 = 3,
+ dot3ChipSetDigitalDC21140A = 4,
+ dot3ChipSetDigitalDC21142 = 5
+};
+
+enum {
+ dot3ChipSetWesternDigital83C690 = 1,
+ dot3ChipSetWesternDigital83C790 = 2
+};
+/* END of Ethernet-link MIB stuff */
+
+/*
+ * Put other types of interface MIBs here, or in interface-specific
+ * header files if convenient ones already exist.
+ */
+#endif /* _NET_IF_MIB_H */
diff --git a/freebsd/sys/net/if_sppp.h b/freebsd/sys/net/if_sppp.h
new file mode 100644
index 00000000..ed406b55
--- /dev/null
+++ b/freebsd/sys/net/if_sppp.h
@@ -0,0 +1,234 @@
+/*
+ * Defines for synchronous PPP/Cisco/Frame Relay link level subroutines.
+ */
+/*-
+ * Copyright (C) 1994-2000 Cronyx Engineering.
+ * Author: Serge Vakulenko, <vak@cronyx.ru>
+ *
+ * Heavily revamped to conform to RFC 1661.
+ * Copyright (C) 1997, Joerg Wunsch.
+ *
+ * This software is distributed with NO WARRANTIES, not even the implied
+ * warranties for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Authors grant any other persons or organizations permission to use
+ * or modify this software as long as this message is kept with the software,
+ * all derivative works or modified versions.
+ *
+ * From: Version 2.0, Fri Oct 6 20:39:21 MSK 1995
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NET_IF_SPPP_HH_
+#define _NET_IF_SPPP_HH_ 1
+
+#define IDX_LCP 0 /* idx into state table */
+
+struct slcp {
+ u_long opts; /* LCP options to send (bitfield) */
+ u_long magic; /* local magic number */
+ u_long mru; /* our max receive unit */
+ u_long their_mru; /* their max receive unit */
+ u_long protos; /* bitmask of protos that are started */
+ u_char echoid; /* id of last keepalive echo request */
+ /* restart max values, see RFC 1661 */
+ int timeout;
+ int max_terminate;
+ int max_configure;
+ int max_failure;
+};
+
+#define IDX_IPCP 1 /* idx into state table */
+#define IDX_IPV6CP 2 /* idx into state table */
+
+struct sipcp {
+ u_long opts; /* IPCP options to send (bitfield) */
+ u_int flags;
+#define IPCP_HISADDR_SEEN 1 /* have seen his address already */
+#define IPCP_MYADDR_DYN 2 /* my address is dynamically assigned */
+#define IPCP_MYADDR_SEEN 4 /* have seen his address already */
+#ifdef notdef
+#define IPV6CP_MYIFID_DYN 8 /* my ifid is dynamically assigned */
+#endif
+#define IPV6CP_MYIFID_SEEN 0x10 /* have seen his ifid already */
+#define IPCP_VJ 0x20 /* can use VJ compression */
+ int max_state; /* VJ: Max-Slot-Id */
+ int compress_cid; /* VJ: Comp-Slot-Id */
+};
+
+#define AUTHNAMELEN 64
+#define AUTHKEYLEN 16
+
+struct sauth {
+ u_short proto; /* authentication protocol to use */
+ u_short flags;
+#define AUTHFLAG_NOCALLOUT 1 /* do not require authentication on */
+ /* callouts */
+#define AUTHFLAG_NORECHALLENGE 2 /* do not re-challenge CHAP */
+ u_char name[AUTHNAMELEN]; /* system identification name */
+ u_char secret[AUTHKEYLEN]; /* secret password */
+ u_char challenge[AUTHKEYLEN]; /* random challenge */
+};
+
+#define IDX_PAP 3
+#define IDX_CHAP 4
+
+#define IDX_COUNT (IDX_CHAP + 1) /* bump this when adding cp's! */
+
+/*
+ * Don't change the order of this. Ordering the phases this way allows
+ * for a comparision of ``pp_phase >= PHASE_AUTHENTICATE'' in order to
+ * know whether LCP is up.
+ */
+enum ppp_phase {
+ PHASE_DEAD, PHASE_ESTABLISH, PHASE_TERMINATE,
+ PHASE_AUTHENTICATE, PHASE_NETWORK
+};
+
+#define PP_MTU 1500 /* default/minimal MRU */
+#define PP_MAX_MRU 2048 /* maximal MRU we want to negotiate */
+
+/*
+ * This is a cut down struct sppp (see below) that can easily be
+ * exported to/ imported from userland without the need to include
+ * dozens of kernel-internal header files. It is used by the
+ * SPPPIO[GS]DEFS ioctl commands below.
+ */
+struct sppp_parms {
+ enum ppp_phase pp_phase; /* phase we're currently in */
+ int enable_vj; /* VJ header compression enabled */
+ int enable_ipv6; /*
+ * Enable IPv6 negotiations -- only
+ * needed since each IPv4 i/f auto-
+ * matically gets an IPv6 address
+ * assigned, so we can't use this as
+ * a decision.
+ */
+ struct slcp lcp; /* LCP params */
+ struct sipcp ipcp; /* IPCP params */
+ struct sipcp ipv6cp; /* IPv6CP params */
+ struct sauth myauth; /* auth params, i'm peer */
+ struct sauth hisauth; /* auth params, i'm authenticator */
+};
+
+/*
+ * Definitions to pass struct sppp_parms data down into the kernel
+ * using the SIOC[SG]IFGENERIC ioctl interface.
+ *
+ * In order to use this, create a struct spppreq, fill in the cmd
+ * field with SPPPIOGDEFS, and put the address of this structure into
+ * the ifr_data portion of a struct ifreq. Pass this struct to a
+ * SIOCGIFGENERIC ioctl. Then replace the cmd field by SPPPIOSDEFS,
+ * modify the defs field as desired, and pass the struct ifreq now
+ * to a SIOCSIFGENERIC ioctl.
+ */
+
+#define SPPPIOGDEFS ((caddr_t)(('S' << 24) + (1 << 16) +\
+ sizeof(struct sppp_parms)))
+#define SPPPIOSDEFS ((caddr_t)(('S' << 24) + (2 << 16) +\
+ sizeof(struct sppp_parms)))
+
+struct spppreq {
+ int cmd;
+ struct sppp_parms defs;
+};
+
+#ifdef _KERNEL
+struct sppp {
+ struct ifnet *pp_ifp; /* network interface data */
+ struct ifqueue pp_fastq; /* fast output queue */
+ struct ifqueue pp_cpq; /* PPP control protocol queue */
+ struct sppp *pp_next; /* next interface in keepalive list */
+ u_int pp_mode; /* major protocol modes (cisco/ppp/...) */
+ u_int pp_flags; /* sub modes */
+ u_short pp_alivecnt; /* keepalive packets counter */
+ u_short pp_loopcnt; /* loopback detection counter */
+ u_long pp_seq[IDX_COUNT]; /* local sequence number */
+ u_long pp_rseq[IDX_COUNT]; /* remote sequence number */
+ enum ppp_phase pp_phase; /* phase we're currently in */
+ int state[IDX_COUNT]; /* state machine */
+ u_char confid[IDX_COUNT]; /* id of last configuration request */
+ int rst_counter[IDX_COUNT]; /* restart counter */
+ int fail_counter[IDX_COUNT]; /* negotiation failure counter */
+ int confflags; /* administrative configuration flags */
+#define CONF_ENABLE_VJ 0x01 /* VJ header compression enabled */
+#define CONF_ENABLE_IPV6 0x02 /* IPv6 administratively enabled */
+ time_t pp_last_recv; /* time last packet has been received */
+ time_t pp_last_sent; /* time last packet has been sent */
+ struct callout ch[IDX_COUNT]; /* per-proto and if callouts */
+ struct callout pap_my_to_ch; /* PAP needs one more... */
+ struct callout keepalive_callout; /* keepalive callout */
+ struct slcp lcp; /* LCP params */
+ struct sipcp ipcp; /* IPCP params */
+ struct sipcp ipv6cp; /* IPv6CP params */
+ struct sauth myauth; /* auth params, i'm peer */
+ struct sauth hisauth; /* auth params, i'm authenticator */
+ struct slcompress *pp_comp; /* for VJ compression */
+ u_short fr_dlci; /* Frame Relay DLCI number, 16..1023 */
+ u_char fr_status; /* PVC status, active/new/delete */
+ /*
+ * These functions are filled in by sppp_attach(), and are
+ * expected to be used by the lower layer (hardware) drivers
+ * in order to communicate the (un)availability of the
+ * communication link. Lower layer drivers that are always
+ * ready to communicate (like hardware HDLC) can shortcut
+ * pp_up from pp_tls, and pp_down from pp_tlf.
+ */
+ void (*pp_up)(struct sppp *sp);
+ void (*pp_down)(struct sppp *sp);
+ /*
+ * These functions need to be filled in by the lower layer
+ * (hardware) drivers if they request notification from the
+ * PPP layer whether the link is actually required. They
+ * correspond to the tls and tlf actions.
+ */
+ void (*pp_tls)(struct sppp *sp);
+ void (*pp_tlf)(struct sppp *sp);
+ /*
+ * These (optional) functions may be filled by the hardware
+ * driver if any notification of established connections
+ * (currently: IPCP up) is desired (pp_con) or any internal
+ * state change of the interface state machine should be
+ * signaled for monitoring purposes (pp_chg).
+ */
+ void (*pp_con)(struct sppp *sp);
+ void (*pp_chg)(struct sppp *sp, int new_state);
+ /* These two fields are for use by the lower layer */
+ void *pp_lowerp;
+ int pp_loweri;
+ /* Lock */
+ struct mtx mtx;
+ /* if_start () wrapper */
+ void (*if_start) (struct ifnet *);
+ struct callout ifstart_callout; /* if_start () scheduler */
+};
+#define IFP2SP(ifp) ((struct sppp *)(ifp)->if_l2com)
+#define SP2IFP(sp) ((sp)->pp_ifp)
+
+/* bits for pp_flags */
+#define PP_KEEPALIVE 0x01 /* use keepalive protocol */
+#define PP_FR 0x04 /* use Frame Relay protocol instead of PPP */
+ /* 0x04 was PP_TIMO */
+#define PP_CALLIN 0x08 /* we are being called */
+#define PP_NEEDAUTH 0x10 /* remote requested authentication */
+
+void sppp_attach (struct ifnet *ifp);
+void sppp_detach (struct ifnet *ifp);
+void sppp_input (struct ifnet *ifp, struct mbuf *m);
+int sppp_ioctl (struct ifnet *ifp, u_long cmd, void *data);
+struct mbuf *sppp_dequeue (struct ifnet *ifp);
+struct mbuf *sppp_pick(struct ifnet *ifp);
+int sppp_isempty (struct ifnet *ifp);
+void sppp_flush (struct ifnet *ifp);
+
+/* Internal functions */
+void sppp_fr_input (struct sppp *sp, struct mbuf *m);
+struct mbuf *sppp_fr_header (struct sppp *sp, struct mbuf *m, int fam);
+void sppp_fr_keepalive (struct sppp *sp);
+void sppp_get_ip_addrs(struct sppp *sp, u_long *src, u_long *dst,
+ u_long *srcmask);
+
+#endif
+
+#endif /* _NET_IF_SPPP_HH_ */
diff --git a/freebsd/sys/net/if_spppfr.c b/freebsd/sys/net/if_spppfr.c
new file mode 100644
index 00000000..fa912363
--- /dev/null
+++ b/freebsd/sys/net/if_spppfr.c
@@ -0,0 +1,636 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Synchronous Frame Relay link level subroutines.
+ * ANSI T1.617-compaible link management signaling
+ * implemented for Frame Relay mode.
+ * Cisco-type Frame Relay framing added, thanks Alex Tutubalin.
+ * Only one DLCI per channel for now.
+ *
+ * Copyright (C) 1994-2000 Cronyx Engineering.
+ * Author: Serge Vakulenko, <vak@cronyx.ru>
+ *
+ * Copyright (C) 1999-2004 Cronyx Engineering.
+ * Author: Kurakin Roman, <rik@cronyx.ru>
+ *
+ * This software is distributed with NO WARRANTIES, not even the implied
+ * warranties for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Authors grant any other persons or organisations a permission to use,
+ * modify and redistribute this software in source and binary forms,
+ * as long as this message is kept with the software, all derivative
+ * works or modified versions.
+ *
+ * $Cronyx Id: if_spppfr.c,v 1.1.2.10 2004/06/29 09:02:30 rik Exp $
+ * $FreeBSD$
+ */
+
+#include <freebsd/sys/param.h>
+
+#if defined(__FreeBSD__) && __FreeBSD__ >= 3
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_ipx.h>
+#endif
+
+#ifdef NetBSD1_3
+# if NetBSD1_3 > 6
+# include "opt_inet.h"
+# include "opt_inet6.h"
+# include "opt_iso.h"
+# endif
+#endif
+
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/syslog.h>
+#if defined(__FreeBSD__) && __FreeBSD__ >= 3
+#include <freebsd/sys/random.h>
+#endif
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+
+#if defined (__OpenBSD__)
+#include <freebsd/sys/md5k.h>
+#else
+#include <freebsd/sys/md5.h>
+#endif
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/netisr.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/route.h>
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/net/slcompress.h>
+
+#if defined (__NetBSD__) || defined (__OpenBSD__)
+#include <freebsd/machine/cpu.h> /* XXX for softnet */
+#endif
+
+#include <freebsd/machine/stdarg.h>
+
+#include <freebsd/netinet/in_var.h>
+#ifdef INET
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/tcp.h>
+#endif
+
+#if defined (__FreeBSD__) || defined (__OpenBSD__)
+# include <freebsd/netinet/if_ether.h>
+#else
+# include <freebsd/net/ethertypes.h>
+#endif
+
+#ifdef IPX
+#include <freebsd/netipx/ipx.h>
+#include <freebsd/netipx/ipx_if.h>
+#endif
+
+#include <freebsd/net/if_sppp.h>
+
+/*
+ * Frame Relay.
+ */
+#define FR_UI 0x03 /* Unnumbered Information */
+#define FR_IP 0xCC /* IP protocol identifier */
+#define FR_PADDING 0x00 /* NLPID padding */
+#define FR_SIGNALING 0x08 /* Q.933/T1.617 signaling identifier */
+#define FR_SNAP 0x80 /* NLPID snap */
+
+/*
+ * Header flags.
+ */
+#define FR_DE 0x02 /* discard eligibility */
+#define FR_FECN 0x04 /* forward notification */
+#define FR_BECN 0x08 /* backward notification */
+
+/*
+ * Signaling message types.
+ */
+#define FR_MSG_ENQUIRY 0x75 /* status enquiry */
+#define FR_MSG_STATUS 0x7d /* status */
+
+#define FR_ENQUIRY_SIZE 14
+
+/*
+ * Message field types.
+ */
+#define FR_FLD_RTYPE 0x01 /* report type */
+#define FR_FLD_VERIFY 0x03 /* link verification */
+#define FR_FLD_PVC 0x07 /* PVC status */
+#define FR_FLD_LSHIFT5 0x95 /* locking shift 5 */
+
+/*
+ * Report types.
+ */
+#define FR_RTYPE_FULL 0 /* full status */
+#define FR_RTYPE_SHORT 1 /* link verification only */
+#define FR_RTYPE_SINGLE 2 /* single PVC status */
+
+/* PVC status field. */
+#define FR_DLCI_DELETE 0x04 /* PVC is deleted */
+#define FR_DLCI_ACTIVE 0x02 /* PVC is operational */
+#define FR_DLCI_NEW 0x08 /* PVC is new */
+
+struct arp_req {
+ unsigned short htype; /* hardware type = ARPHRD_FRELAY */
+ unsigned short ptype; /* protocol type = ETHERTYPE_IP */
+ unsigned char halen; /* hardware address length = 2 */
+ unsigned char palen; /* protocol address length = 4 */
+ unsigned short op; /* ARP/RARP/InARP request/reply */
+ unsigned short hsource; /* hardware source address */
+ unsigned short psource1; /* protocol source */
+ unsigned short psource2;
+ unsigned short htarget; /* hardware target address */
+ unsigned short ptarget1; /* protocol target */
+ unsigned short ptarget2;
+} __packed;
+
+#if defined(__FreeBSD__) && __FreeBSD__ >= 3 && __FreeBSD_version < 501113
+#define SPP_FMT "%s%d: "
+#define SPP_ARGS(ifp) (ifp)->if_name, (ifp)->if_unit
+#else
+#define SPP_FMT "%s: "
+#define SPP_ARGS(ifp) (ifp)->if_xname
+#endif
+
+/* almost every function needs these */
+#define STDDCL \
+ struct ifnet *ifp = SP2IFP(sp); \
+ int debug = ifp->if_flags & IFF_DEBUG
+
+static void sppp_fr_arp (struct sppp *sp, struct arp_req *req, u_short addr);
+static void sppp_fr_signal (struct sppp *sp, unsigned char *h, int len);
+
+void sppp_fr_input (struct sppp *sp, struct mbuf *m)
+{
+ STDDCL;
+ u_char *h = mtod (m, u_char*);
+ int isr = -1;
+ int dlci, hlen, proto;
+
+ /* Get the DLCI number. */
+ if (m->m_pkthdr.len < 10) {
+bad: m_freem (m);
+ return;
+ }
+ dlci = (h[0] << 2 & 0x3f0) | (h[1] >> 4 & 0x0f);
+
+ /* Process signaling packets. */
+ if (dlci == 0) {
+ sppp_fr_signal (sp, h, m->m_pkthdr.len);
+ m_freem (m);
+ return;
+ }
+
+ if (dlci != sp->fr_dlci) {
+ if (debug)
+ printf (SPP_FMT "Received packet from invalid DLCI %d\n",
+ SPP_ARGS(ifp), dlci);
+ goto bad;
+ }
+
+ /* Process the packet. */
+ if (ntohs (*(short*) (h+2)) == ETHERTYPE_IP) {
+ /* Prehistoric IP framing? */
+ h[2] = FR_UI;
+ h[3] = FR_IP;
+ }
+ if (h[2] != FR_UI) {
+ if (debug)
+ printf (SPP_FMT "Invalid frame relay header flag 0x%02x\n",
+ SPP_ARGS(ifp), h[2]);
+ goto bad;
+ }
+ switch (h[3]) {
+ default:
+ if (debug)
+ printf (SPP_FMT "Unsupported NLPID 0x%02x\n",
+ SPP_ARGS(ifp), h[3]);
+ goto bad;
+
+ case FR_PADDING:
+ if (h[4] != FR_SNAP) {
+ if (debug)
+ printf (SPP_FMT "Bad NLPID 0x%02x\n",
+ SPP_ARGS(ifp), h[4]);
+ goto bad;
+ }
+ if (h[5] || h[6] || h[7]) {
+ if (debug)
+ printf (SPP_FMT "Bad OID 0x%02x-0x%02x-0x%02x\n",
+ SPP_ARGS(ifp),
+ h[5], h[6], h[7]);
+ goto bad;
+ }
+ proto = ntohs (*(short*) (h+8));
+ if (proto == ETHERTYPE_ARP) {
+ /* Process the ARP request. */
+ if (m->m_pkthdr.len != 10 + sizeof (struct arp_req)) {
+ if (debug)
+ printf (SPP_FMT "Bad ARP request size = %d bytes\n",
+ SPP_ARGS(ifp),
+ m->m_pkthdr.len);
+ goto bad;
+ }
+ sppp_fr_arp (sp, (struct arp_req*) (h + 10),
+ h[0] << 8 | h[1]);
+ m_freem (m);
+ return;
+ }
+ hlen = 10;
+ break;
+
+ case FR_IP:
+ proto = ETHERTYPE_IP;
+ hlen = 4;
+ break;
+ }
+
+ /* Remove frame relay header. */
+ m_adj (m, hlen);
+
+ switch (proto) {
+ default:
+ ++ifp->if_noproto;
+drop: ++ifp->if_ierrors;
+ ++ifp->if_iqdrops;
+ m_freem (m);
+ return;
+#ifdef INET
+ case ETHERTYPE_IP:
+ isr = NETISR_IP;
+ break;
+#endif
+#ifdef IPX
+ case ETHERTYPE_IPX:
+ isr = NETISR_IPX;
+ break;
+#endif
+#ifdef NETATALK
+ case ETHERTYPE_AT:
+ isr = NETISR_ATALK;
+ break;
+#endif
+ }
+
+ if (! (ifp->if_flags & IFF_UP))
+ goto drop;
+
+ /* Check queue. */
+ if (netisr_queue(isr, m)) { /* (0) on success. */
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "protocol queue overflow\n",
+ SPP_ARGS(ifp));
+ }
+}
+
+/*
+ * Add the frame relay header to the packet.
+ * For IP the header length is 4 bytes,
+ * for all other protocols - 10 bytes (RFC 1490).
+ */
+struct mbuf *sppp_fr_header (struct sppp *sp, struct mbuf *m,
+ int family)
+{
+ STDDCL;
+ u_char *h;
+ int type, hlen;
+
+ /* Prepend the space for Frame Relay header. */
+ hlen = (family == AF_INET) ? 4 : 10;
+ M_PREPEND (m, hlen, M_DONTWAIT);
+ if (! m)
+ return 0;
+ h = mtod (m, u_char*);
+
+ /* Fill the header. */
+ h[0] = sp->fr_dlci >> 2 & 0xfc;
+ h[1] = sp->fr_dlci << 4 | 1;
+ h[2] = FR_UI;
+
+ switch (family) {
+ default:
+ if (debug)
+ printf (SPP_FMT "Cannot handle address family %d\n",
+ SPP_ARGS(ifp), family);
+ m_freem (m);
+ return 0;
+#ifdef INET
+ case AF_INET:
+#if 0 /* Crashes on fragmented packets */
+ /*
+ * Set the discard eligibility bit, if:
+ * 1) no fragmentation
+ * 2) length > 400 bytes
+ * 3a) the protocol is UDP or
+ * 3b) TCP data (no control bits)
+ */
+ {
+ struct ip *ip = (struct ip*) (h + hlen);
+ struct tcphdr *tcp = (struct tcphdr*) ((long*)ip + ip->ip_hl);
+
+ if (! (ip->ip_off & ~IP_DF) && ip->ip_len > 400 &&
+ (ip->ip_p == IPPROTO_UDP ||
+ ip->ip_p == IPPROTO_TCP && ! tcp->th_flags))
+ h[1] |= FR_DE;
+ }
+#endif
+ h[3] = FR_IP;
+ return m;
+#endif
+#ifdef IPX
+ case AF_IPX:
+ type = ETHERTYPE_IPX;
+ break;
+#endif
+#ifdef NS
+ case AF_NS:
+ type = 0x8137;
+ break;
+#endif
+#ifdef NETATALK
+ case AF_APPLETALK:
+ type = ETHERTYPE_AT;
+ break;
+#endif
+ }
+ h[3] = FR_PADDING;
+ h[4] = FR_SNAP;
+ h[5] = 0;
+ h[6] = 0;
+ h[7] = 0;
+ *(short*) (h+8) = htons(type);
+ return m;
+}
+
+/*
+ * Send periodical frame relay link verification messages via DLCI 0.
+ * Called every 10 seconds (default value of T391 timer is 10 sec).
+ * Every 6-th message is a full status request
+ * (default value of N391 counter is 6).
+ */
+void sppp_fr_keepalive (struct sppp *sp)
+{
+ STDDCL;
+ unsigned char *h, *p;
+ struct mbuf *m;
+
+ MGETHDR (m, M_DONTWAIT, MT_DATA);
+ if (! m)
+ return;
+ m->m_pkthdr.rcvif = 0;
+
+ h = mtod (m, u_char*);
+ p = h;
+ *p++ = 0; /* DLCI = 0 */
+ *p++ = 1;
+ *p++ = FR_UI;
+ *p++ = FR_SIGNALING; /* NLPID = UNI call control */
+
+ *p++ = 0; /* call reference length = 0 */
+ *p++ = FR_MSG_ENQUIRY; /* message type = status enquiry */
+
+ *p++ = FR_FLD_LSHIFT5; /* locking shift 5 */
+
+ *p++ = FR_FLD_RTYPE; /* report type field */
+ *p++ = 1; /* report type length = 1 */
+ if (sp->pp_seq[IDX_LCP] % 6)
+ *p++ = FR_RTYPE_SHORT; /* link verification only */
+ else
+ *p++ = FR_RTYPE_FULL; /* full status needed */
+
+ if (sp->pp_seq[IDX_LCP] >= 255)
+ sp->pp_seq[IDX_LCP] = 0;
+ *p++ = FR_FLD_VERIFY; /* link verification type field */
+ *p++ = 2; /* link verification field length = 2 */
+ *p++ = ++sp->pp_seq[IDX_LCP]; /* our sequence number */
+ *p++ = sp->pp_rseq[IDX_LCP]; /* last received sequence number */
+
+ m->m_pkthdr.len = m->m_len = p - h;
+ if (debug)
+ printf (SPP_FMT "send lmi packet, seq=%d, rseq=%d\n",
+ SPP_ARGS(ifp), (u_char) sp->pp_seq[IDX_LCP],
+ (u_char) sp->pp_rseq[IDX_LCP]);
+
+ if (! IF_HANDOFF_ADJ(&sp->pp_cpq, m, ifp, 3))
+ ++ifp->if_oerrors;
+}
+
+/*
+ * Process the frame relay Inverse ARP request.
+ */
+static void sppp_fr_arp (struct sppp *sp, struct arp_req *req,
+ u_short his_hardware_address)
+{
+ STDDCL;
+ struct mbuf *m;
+ struct arp_req *reply;
+ u_char *h;
+ u_short my_hardware_address;
+ u_long his_ip_address, my_ip_address;
+
+ if ((ntohs (req->htype) != ARPHRD_FRELAY ||
+ ntohs (req->htype) != 16) || /* for BayNetworks routers */
+ ntohs (req->ptype) != ETHERTYPE_IP) {
+ if (debug)
+ printf (SPP_FMT "Invalid ARP hardware/protocol type = 0x%x/0x%x\n",
+ SPP_ARGS(ifp),
+ ntohs (req->htype), ntohs (req->ptype));
+ return;
+ }
+ if (req->halen != 2 || req->palen != 4) {
+ if (debug)
+ printf (SPP_FMT "Invalid ARP hardware/protocol address length = %d/%d\n",
+ SPP_ARGS(ifp),
+ req->halen, req->palen);
+ return;
+ }
+ switch (ntohs (req->op)) {
+ default:
+ if (debug)
+ printf (SPP_FMT "Invalid ARP op = 0x%x\n",
+ SPP_ARGS(ifp), ntohs (req->op));
+ return;
+
+ case ARPOP_INVREPLY:
+ /* Ignore. */
+ return;
+
+ case ARPOP_INVREQUEST:
+ my_hardware_address = ntohs (req->htarget);
+ his_ip_address = ntohs (req->psource1) << 16 |
+ ntohs (req->psource2);
+ my_ip_address = ntohs (req->ptarget1) << 16 |
+ ntohs (req->ptarget2);
+ break;
+ }
+ if (debug)
+ printf (SPP_FMT "got ARP request, source=0x%04x/%d.%d.%d.%d, target=0x%04x/%d.%d.%d.%d\n",
+ SPP_ARGS(ifp), ntohs (req->hsource),
+ (unsigned char) (his_ip_address >> 24),
+ (unsigned char) (his_ip_address >> 16),
+ (unsigned char) (his_ip_address >> 8),
+ (unsigned char) his_ip_address,
+ my_hardware_address,
+ (unsigned char) (my_ip_address >> 24),
+ (unsigned char) (my_ip_address >> 16),
+ (unsigned char) (my_ip_address >> 8),
+ (unsigned char) my_ip_address);
+
+ sppp_get_ip_addrs (sp, &my_ip_address, 0, 0);
+ if (! my_ip_address)
+ return; /* nothing to reply */
+
+ if (debug)
+ printf (SPP_FMT "send ARP reply, source=0x%04x/%d.%d.%d.%d, target=0x%04x/%d.%d.%d.%d\n",
+ SPP_ARGS(ifp), my_hardware_address,
+ (unsigned char) (my_ip_address >> 24),
+ (unsigned char) (my_ip_address >> 16),
+ (unsigned char) (my_ip_address >> 8),
+ (unsigned char) my_ip_address,
+ his_hardware_address,
+ (unsigned char) (his_ip_address >> 24),
+ (unsigned char) (his_ip_address >> 16),
+ (unsigned char) (his_ip_address >> 8),
+ (unsigned char) his_ip_address);
+
+ /* Send the Inverse ARP reply. */
+ MGETHDR (m, M_DONTWAIT, MT_DATA);
+ if (! m)
+ return;
+ m->m_pkthdr.len = m->m_len = 10 + sizeof (*reply);
+ m->m_pkthdr.rcvif = 0;
+
+ h = mtod (m, u_char*);
+ reply = (struct arp_req*) (h + 10);
+
+ h[0] = his_hardware_address >> 8;
+ h[1] = his_hardware_address;
+ h[2] = FR_UI;
+ h[3] = FR_PADDING;
+ h[4] = FR_SNAP;
+ h[5] = 0;
+ h[6] = 0;
+ h[7] = 0;
+ *(short*) (h+8) = htons (ETHERTYPE_ARP);
+
+ reply->htype = htons (ARPHRD_FRELAY);
+ reply->ptype = htons (ETHERTYPE_IP);
+ reply->halen = 2;
+ reply->palen = 4;
+ reply->op = htons (ARPOP_INVREPLY);
+ reply->hsource = htons (my_hardware_address);
+ reply->psource1 = htonl (my_ip_address);
+ reply->psource2 = htonl (my_ip_address) >> 16;
+ reply->htarget = htons (his_hardware_address);
+ reply->ptarget1 = htonl (his_ip_address);
+ reply->ptarget2 = htonl (his_ip_address) >> 16;
+
+ if (! IF_HANDOFF_ADJ(&sp->pp_cpq, m, ifp, 3))
+ ++ifp->if_oerrors;
+}
+
+/*
+ * Process the input signaling packet (DLCI 0).
+ * The implemented protocol is ANSI T1.617 Annex D.
+ */
+static void sppp_fr_signal (struct sppp *sp, unsigned char *h, int len)
+{
+ STDDCL;
+ u_char *p;
+ int dlci;
+
+ if (h[2] != FR_UI || h[3] != FR_SIGNALING || h[4] != 0) {
+ if (debug)
+ printf (SPP_FMT "Invalid signaling header\n",
+ SPP_ARGS(ifp));
+bad: if (debug) {
+ printf ("%02x", *h++);
+ while (--len > 0)
+ printf ("-%02x", *h++);
+ printf ("\n");
+ }
+ return;
+ }
+ if (h[5] == FR_MSG_ENQUIRY) {
+ if (len == FR_ENQUIRY_SIZE &&
+ h[12] == (u_char) sp->pp_seq[IDX_LCP]) {
+ sp->pp_seq[IDX_LCP] = random();
+ printf (SPP_FMT "loopback detected\n",
+ SPP_ARGS(ifp));
+ }
+ return;
+ }
+ if (h[5] != FR_MSG_STATUS) {
+ if (debug)
+ printf (SPP_FMT "Unknown signaling message: 0x%02x\n",
+ SPP_ARGS(ifp), h[5]);
+ goto bad;
+ }
+
+ /* Parse message fields. */
+ for (p=h+6; p<h+len; ) {
+ switch (*p) {
+ default:
+ if (debug)
+ printf (SPP_FMT "Unknown signaling field 0x%x\n",
+ SPP_ARGS(ifp), *p);
+ break;
+ case FR_FLD_LSHIFT5:
+ case FR_FLD_RTYPE:
+ /* Ignore. */
+ break;
+ case FR_FLD_VERIFY:
+ if (p[1] != 2) {
+ if (debug)
+ printf (SPP_FMT "Invalid signaling verify field length %d\n",
+ SPP_ARGS(ifp), p[1]);
+ break;
+ }
+ sp->pp_rseq[IDX_LCP] = p[2];
+ if (debug) {
+ printf (SPP_FMT "got lmi reply rseq=%d, seq=%d",
+ SPP_ARGS(ifp), p[2], p[3]);
+ if (p[3] != (u_char) sp->pp_seq[IDX_LCP])
+ printf (" (really %d)",
+ (u_char) sp->pp_seq[IDX_LCP]);
+ printf ("\n");
+ }
+ break;
+ case FR_FLD_PVC:
+ if (p[1] < 3) {
+ if (debug)
+ printf (SPP_FMT "Invalid PVC status length %d\n",
+ SPP_ARGS(ifp), p[1]);
+ break;
+ }
+ dlci = (p[2] << 4 & 0x3f0) | (p[3] >> 3 & 0x0f);
+ if (! sp->fr_dlci)
+ sp->fr_dlci = dlci;
+ if (sp->fr_status != p[4])
+ printf (SPP_FMT "DLCI %d %s%s\n",
+ SPP_ARGS(ifp), dlci,
+ p[4] & FR_DLCI_DELETE ? "deleted" :
+ p[4] & FR_DLCI_ACTIVE ? "active" : "passive",
+ p[4] & FR_DLCI_NEW ? ", new" : "");
+ sp->fr_status = p[4];
+ break;
+ }
+ if (*p & 0x80)
+ ++p;
+ else if (p < h+len+1 && p[1])
+ p += 2 + p[1];
+ else {
+ if (debug)
+ printf (SPP_FMT "Invalid signaling field 0x%x\n",
+ SPP_ARGS(ifp), *p);
+ goto bad;
+ }
+ }
+}
diff --git a/freebsd/sys/net/if_spppsubr.c b/freebsd/sys/net/if_spppsubr.c
new file mode 100644
index 00000000..235ef7c0
--- /dev/null
+++ b/freebsd/sys/net/if_spppsubr.c
@@ -0,0 +1,5492 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*
+ * Synchronous PPP/Cisco/Frame Relay link level subroutines.
+ * Keepalive protocol implemented in both Cisco and PPP modes.
+ */
+/*-
+ * Copyright (C) 1994-2000 Cronyx Engineering.
+ * Author: Serge Vakulenko, <vak@cronyx.ru>
+ *
+ * Heavily revamped to conform to RFC 1661.
+ * Copyright (C) 1997, 2001 Joerg Wunsch.
+ *
+ * This software is distributed with NO WARRANTIES, not even the implied
+ * warranties for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Authors grant any other persons or organisations permission to use
+ * or modify this software as long as this message is kept with the software,
+ * all derivative works or modified versions.
+ *
+ * From: Version 2.4, Thu Apr 30 17:17:21 MSD 1997
+ *
+ * $FreeBSD$
+ */
+
+#include <freebsd/sys/param.h>
+
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_ipx.h>
+
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/syslog.h>
+#include <freebsd/sys/random.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+
+#include <freebsd/sys/md5.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/netisr.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/net/slcompress.h>
+
+#include <freebsd/machine/stdarg.h>
+
+#include <freebsd/netinet/in_var.h>
+
+#ifdef INET
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/tcp.h>
+#endif
+
+#ifdef INET6
+#include <freebsd/netinet6/scope6_var.h>
+#endif
+
+#include <freebsd/netinet/if_ether.h>
+
+#ifdef IPX
+#include <freebsd/netipx/ipx.h>
+#include <freebsd/netipx/ipx_if.h>
+#endif
+
+#include <freebsd/net/if_sppp.h>
+
+#define IOCTL_CMD_T u_long
+#define MAXALIVECNT 3 /* max. alive packets */
+
+/*
+ * Interface flags that can be set in an ifconfig command.
+ *
+ * Setting link0 will make the link passive, i.e. it will be marked
+ * as being administrative openable, but won't be opened to begin
+ * with. Incoming calls will be answered, or subsequent calls with
+ * -link1 will cause the administrative open of the LCP layer.
+ *
+ * Setting link1 will cause the link to auto-dial only as packets
+ * arrive to be sent.
+ *
+ * Setting IFF_DEBUG will syslog the option negotiation and state
+ * transitions at level kern.debug. Note: all logs consistently look
+ * like
+ *
+ * <if-name><unit>: <proto-name> <additional info...>
+ *
+ * with <if-name><unit> being something like "bppp0", and <proto-name>
+ * being one of "lcp", "ipcp", "cisco", "chap", "pap", etc.
+ */
+
+#define IFF_PASSIVE IFF_LINK0 /* wait passively for connection */
+#define IFF_AUTO IFF_LINK1 /* auto-dial on output */
+#define IFF_CISCO IFF_LINK2 /* auto-dial on output */
+
+#define PPP_ALLSTATIONS 0xff /* All-Stations broadcast address */
+#define PPP_UI 0x03 /* Unnumbered Information */
+#define PPP_IP 0x0021 /* Internet Protocol */
+#define PPP_ISO 0x0023 /* ISO OSI Protocol */
+#define PPP_XNS 0x0025 /* Xerox NS Protocol */
+#define PPP_IPX 0x002b /* Novell IPX Protocol */
+#define PPP_VJ_COMP 0x002d /* VJ compressed TCP/IP */
+#define PPP_VJ_UCOMP 0x002f /* VJ uncompressed TCP/IP */
+#define PPP_IPV6 0x0057 /* Internet Protocol Version 6 */
+#define PPP_LCP 0xc021 /* Link Control Protocol */
+#define PPP_PAP 0xc023 /* Password Authentication Protocol */
+#define PPP_CHAP 0xc223 /* Challenge-Handshake Auth Protocol */
+#define PPP_IPCP 0x8021 /* Internet Protocol Control Protocol */
+#define PPP_IPV6CP 0x8057 /* IPv6 Control Protocol */
+
+#define CONF_REQ 1 /* PPP configure request */
+#define CONF_ACK 2 /* PPP configure acknowledge */
+#define CONF_NAK 3 /* PPP configure negative ack */
+#define CONF_REJ 4 /* PPP configure reject */
+#define TERM_REQ 5 /* PPP terminate request */
+#define TERM_ACK 6 /* PPP terminate acknowledge */
+#define CODE_REJ 7 /* PPP code reject */
+#define PROTO_REJ 8 /* PPP protocol reject */
+#define ECHO_REQ 9 /* PPP echo request */
+#define ECHO_REPLY 10 /* PPP echo reply */
+#define DISC_REQ 11 /* PPP discard request */
+
+#define LCP_OPT_MRU 1 /* maximum receive unit */
+#define LCP_OPT_ASYNC_MAP 2 /* async control character map */
+#define LCP_OPT_AUTH_PROTO 3 /* authentication protocol */
+#define LCP_OPT_QUAL_PROTO 4 /* quality protocol */
+#define LCP_OPT_MAGIC 5 /* magic number */
+#define LCP_OPT_RESERVED 6 /* reserved */
+#define LCP_OPT_PROTO_COMP 7 /* protocol field compression */
+#define LCP_OPT_ADDR_COMP 8 /* address/control field compression */
+
+#define IPCP_OPT_ADDRESSES 1 /* both IP addresses; deprecated */
+#define IPCP_OPT_COMPRESSION 2 /* IP compression protocol (VJ) */
+#define IPCP_OPT_ADDRESS 3 /* local IP address */
+
+#define IPV6CP_OPT_IFID 1 /* interface identifier */
+#define IPV6CP_OPT_COMPRESSION 2 /* IPv6 compression protocol */
+
+#define IPCP_COMP_VJ 0x2d /* Code for VJ compression */
+
+#define PAP_REQ 1 /* PAP name/password request */
+#define PAP_ACK 2 /* PAP acknowledge */
+#define PAP_NAK 3 /* PAP fail */
+
+#define CHAP_CHALLENGE 1 /* CHAP challenge request */
+#define CHAP_RESPONSE 2 /* CHAP challenge response */
+#define CHAP_SUCCESS 3 /* CHAP response ok */
+#define CHAP_FAILURE 4 /* CHAP response failed */
+
+#define CHAP_MD5 5 /* hash algorithm - MD5 */
+
+#define CISCO_MULTICAST 0x8f /* Cisco multicast address */
+#define CISCO_UNICAST 0x0f /* Cisco unicast address */
+#define CISCO_KEEPALIVE 0x8035 /* Cisco keepalive protocol */
+#define CISCO_ADDR_REQ 0 /* Cisco address request */
+#define CISCO_ADDR_REPLY 1 /* Cisco address reply */
+#define CISCO_KEEPALIVE_REQ 2 /* Cisco keepalive request */
+
+/* states are named and numbered according to RFC 1661 */
+#define STATE_INITIAL 0
+#define STATE_STARTING 1
+#define STATE_CLOSED 2
+#define STATE_STOPPED 3
+#define STATE_CLOSING 4
+#define STATE_STOPPING 5
+#define STATE_REQ_SENT 6
+#define STATE_ACK_RCVD 7
+#define STATE_ACK_SENT 8
+#define STATE_OPENED 9
+
+MALLOC_DEFINE(M_SPPP, "sppp", "synchronous PPP interface internals");
+
+struct ppp_header {
+ u_char address;
+ u_char control;
+ u_short protocol;
+} __packed;
+#define PPP_HEADER_LEN sizeof (struct ppp_header)
+
+struct lcp_header {
+ u_char type;
+ u_char ident;
+ u_short len;
+} __packed;
+#define LCP_HEADER_LEN sizeof (struct lcp_header)
+
+struct cisco_packet {
+ u_long type;
+ u_long par1;
+ u_long par2;
+ u_short rel;
+ u_short time0;
+ u_short time1;
+} __packed;
+#define CISCO_PACKET_LEN sizeof (struct cisco_packet)
+
+/*
+ * We follow the spelling and capitalization of RFC 1661 here, to make
+ * it easier comparing with the standard. Please refer to this RFC in
+ * case you can't make sense out of these abbreviation; it will also
+ * explain the semantics related to the various events and actions.
+ */
+struct cp {
+ u_short proto; /* PPP control protocol number */
+ u_char protoidx; /* index into state table in struct sppp */
+ u_char flags;
+#define CP_LCP 0x01 /* this is the LCP */
+#define CP_AUTH 0x02 /* this is an authentication protocol */
+#define CP_NCP 0x04 /* this is a NCP */
+#define CP_QUAL 0x08 /* this is a quality reporting protocol */
+ const char *name; /* name of this control protocol */
+ /* event handlers */
+ void (*Up)(struct sppp *sp);
+ void (*Down)(struct sppp *sp);
+ void (*Open)(struct sppp *sp);
+ void (*Close)(struct sppp *sp);
+ void (*TO)(void *sp);
+ int (*RCR)(struct sppp *sp, struct lcp_header *h, int len);
+ void (*RCN_rej)(struct sppp *sp, struct lcp_header *h, int len);
+ void (*RCN_nak)(struct sppp *sp, struct lcp_header *h, int len);
+ /* actions */
+ void (*tlu)(struct sppp *sp);
+ void (*tld)(struct sppp *sp);
+ void (*tls)(struct sppp *sp);
+ void (*tlf)(struct sppp *sp);
+ void (*scr)(struct sppp *sp);
+};
+
+#define SPP_FMT "%s: "
+#define SPP_ARGS(ifp) (ifp)->if_xname
+
+#define SPPP_LOCK(sp) mtx_lock (&(sp)->mtx)
+#define SPPP_UNLOCK(sp) mtx_unlock (&(sp)->mtx)
+#define SPPP_LOCK_ASSERT(sp) mtx_assert (&(sp)->mtx, MA_OWNED)
+#define SPPP_LOCK_OWNED(sp) mtx_owned (&(sp)->mtx)
+
+#ifdef INET
+/*
+ * The following disgusting hack gets around the problem that IP TOS
+ * can't be set yet. We want to put "interactive" traffic on a high
+ * priority queue. To decide if traffic is interactive, we check that
+ * a) it is TCP and b) one of its ports is telnet, rlogin or ftp control.
+ *
+ * XXX is this really still necessary? - joerg -
+ */
+static const u_short interactive_ports[8] = {
+ 0, 513, 0, 0,
+ 0, 21, 0, 23,
+};
+#define INTERACTIVE(p) (interactive_ports[(p) & 7] == (p))
+#endif
+
+/* almost every function needs these */
+#define STDDCL \
+ struct ifnet *ifp = SP2IFP(sp); \
+ int debug = ifp->if_flags & IFF_DEBUG
+
+static int sppp_output(struct ifnet *ifp, struct mbuf *m,
+ struct sockaddr *dst, struct route *ro);
+
+static void sppp_cisco_send(struct sppp *sp, int type, long par1, long par2);
+static void sppp_cisco_input(struct sppp *sp, struct mbuf *m);
+
+static void sppp_cp_input(const struct cp *cp, struct sppp *sp,
+ struct mbuf *m);
+static void sppp_cp_send(struct sppp *sp, u_short proto, u_char type,
+ u_char ident, u_short len, void *data);
+/* static void sppp_cp_timeout(void *arg); */
+static void sppp_cp_change_state(const struct cp *cp, struct sppp *sp,
+ int newstate);
+static void sppp_auth_send(const struct cp *cp,
+ struct sppp *sp, unsigned int type, unsigned int id,
+ ...);
+
+static void sppp_up_event(const struct cp *cp, struct sppp *sp);
+static void sppp_down_event(const struct cp *cp, struct sppp *sp);
+static void sppp_open_event(const struct cp *cp, struct sppp *sp);
+static void sppp_close_event(const struct cp *cp, struct sppp *sp);
+static void sppp_to_event(const struct cp *cp, struct sppp *sp);
+
+static void sppp_null(struct sppp *sp);
+
+static void sppp_pp_up(struct sppp *sp);
+static void sppp_pp_down(struct sppp *sp);
+
+static void sppp_lcp_init(struct sppp *sp);
+static void sppp_lcp_up(struct sppp *sp);
+static void sppp_lcp_down(struct sppp *sp);
+static void sppp_lcp_open(struct sppp *sp);
+static void sppp_lcp_close(struct sppp *sp);
+static void sppp_lcp_TO(void *sp);
+static int sppp_lcp_RCR(struct sppp *sp, struct lcp_header *h, int len);
+static void sppp_lcp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len);
+static void sppp_lcp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len);
+static void sppp_lcp_tlu(struct sppp *sp);
+static void sppp_lcp_tld(struct sppp *sp);
+static void sppp_lcp_tls(struct sppp *sp);
+static void sppp_lcp_tlf(struct sppp *sp);
+static void sppp_lcp_scr(struct sppp *sp);
+static void sppp_lcp_check_and_close(struct sppp *sp);
+static int sppp_ncp_check(struct sppp *sp);
+
+static void sppp_ipcp_init(struct sppp *sp);
+static void sppp_ipcp_up(struct sppp *sp);
+static void sppp_ipcp_down(struct sppp *sp);
+static void sppp_ipcp_open(struct sppp *sp);
+static void sppp_ipcp_close(struct sppp *sp);
+static void sppp_ipcp_TO(void *sp);
+static int sppp_ipcp_RCR(struct sppp *sp, struct lcp_header *h, int len);
+static void sppp_ipcp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len);
+static void sppp_ipcp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len);
+static void sppp_ipcp_tlu(struct sppp *sp);
+static void sppp_ipcp_tld(struct sppp *sp);
+static void sppp_ipcp_tls(struct sppp *sp);
+static void sppp_ipcp_tlf(struct sppp *sp);
+static void sppp_ipcp_scr(struct sppp *sp);
+
+static void sppp_ipv6cp_init(struct sppp *sp);
+static void sppp_ipv6cp_up(struct sppp *sp);
+static void sppp_ipv6cp_down(struct sppp *sp);
+static void sppp_ipv6cp_open(struct sppp *sp);
+static void sppp_ipv6cp_close(struct sppp *sp);
+static void sppp_ipv6cp_TO(void *sp);
+static int sppp_ipv6cp_RCR(struct sppp *sp, struct lcp_header *h, int len);
+static void sppp_ipv6cp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len);
+static void sppp_ipv6cp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len);
+static void sppp_ipv6cp_tlu(struct sppp *sp);
+static void sppp_ipv6cp_tld(struct sppp *sp);
+static void sppp_ipv6cp_tls(struct sppp *sp);
+static void sppp_ipv6cp_tlf(struct sppp *sp);
+static void sppp_ipv6cp_scr(struct sppp *sp);
+
+static void sppp_pap_input(struct sppp *sp, struct mbuf *m);
+static void sppp_pap_init(struct sppp *sp);
+static void sppp_pap_open(struct sppp *sp);
+static void sppp_pap_close(struct sppp *sp);
+static void sppp_pap_TO(void *sp);
+static void sppp_pap_my_TO(void *sp);
+static void sppp_pap_tlu(struct sppp *sp);
+static void sppp_pap_tld(struct sppp *sp);
+static void sppp_pap_scr(struct sppp *sp);
+
+static void sppp_chap_input(struct sppp *sp, struct mbuf *m);
+static void sppp_chap_init(struct sppp *sp);
+static void sppp_chap_open(struct sppp *sp);
+static void sppp_chap_close(struct sppp *sp);
+static void sppp_chap_TO(void *sp);
+static void sppp_chap_tlu(struct sppp *sp);
+static void sppp_chap_tld(struct sppp *sp);
+static void sppp_chap_scr(struct sppp *sp);
+
+static const char *sppp_auth_type_name(u_short proto, u_char type);
+static const char *sppp_cp_type_name(u_char type);
+#ifdef INET
+static const char *sppp_dotted_quad(u_long addr);
+static const char *sppp_ipcp_opt_name(u_char opt);
+#endif
+#ifdef INET6
+static const char *sppp_ipv6cp_opt_name(u_char opt);
+#endif
+static const char *sppp_lcp_opt_name(u_char opt);
+static const char *sppp_phase_name(enum ppp_phase phase);
+static const char *sppp_proto_name(u_short proto);
+static const char *sppp_state_name(int state);
+static int sppp_params(struct sppp *sp, u_long cmd, void *data);
+static int sppp_strnlen(u_char *p, int max);
+static void sppp_keepalive(void *dummy);
+static void sppp_phase_network(struct sppp *sp);
+static void sppp_print_bytes(const u_char *p, u_short len);
+static void sppp_print_string(const char *p, u_short len);
+static void sppp_qflush(struct ifqueue *ifq);
+#ifdef INET
+static void sppp_set_ip_addr(struct sppp *sp, u_long src);
+#endif
+#ifdef INET6
+static void sppp_get_ip6_addrs(struct sppp *sp, struct in6_addr *src,
+ struct in6_addr *dst, struct in6_addr *srcmask);
+#ifdef IPV6CP_MYIFID_DYN
+static void sppp_set_ip6_addr(struct sppp *sp, const struct in6_addr *src);
+static void sppp_gen_ip6_addr(struct sppp *sp, const struct in6_addr *src);
+#endif
+static void sppp_suggest_ip6_addr(struct sppp *sp, struct in6_addr *src);
+#endif
+
+/* if_start () wrapper */
+static void sppp_ifstart (struct ifnet *ifp);
+
+/* our control protocol descriptors */
+static const struct cp lcp = {
+ PPP_LCP, IDX_LCP, CP_LCP, "lcp",
+ sppp_lcp_up, sppp_lcp_down, sppp_lcp_open, sppp_lcp_close,
+ sppp_lcp_TO, sppp_lcp_RCR, sppp_lcp_RCN_rej, sppp_lcp_RCN_nak,
+ sppp_lcp_tlu, sppp_lcp_tld, sppp_lcp_tls, sppp_lcp_tlf,
+ sppp_lcp_scr
+};
+
+static const struct cp ipcp = {
+ PPP_IPCP, IDX_IPCP,
+#ifdef INET /* don't run IPCP if there's no IPv4 support */
+ CP_NCP,
+#else
+ 0,
+#endif
+ "ipcp",
+ sppp_ipcp_up, sppp_ipcp_down, sppp_ipcp_open, sppp_ipcp_close,
+ sppp_ipcp_TO, sppp_ipcp_RCR, sppp_ipcp_RCN_rej, sppp_ipcp_RCN_nak,
+ sppp_ipcp_tlu, sppp_ipcp_tld, sppp_ipcp_tls, sppp_ipcp_tlf,
+ sppp_ipcp_scr
+};
+
+static const struct cp ipv6cp = {
+ PPP_IPV6CP, IDX_IPV6CP,
+#ifdef INET6 /*don't run IPv6CP if there's no IPv6 support*/
+ CP_NCP,
+#else
+ 0,
+#endif
+ "ipv6cp",
+ sppp_ipv6cp_up, sppp_ipv6cp_down, sppp_ipv6cp_open, sppp_ipv6cp_close,
+ sppp_ipv6cp_TO, sppp_ipv6cp_RCR, sppp_ipv6cp_RCN_rej, sppp_ipv6cp_RCN_nak,
+ sppp_ipv6cp_tlu, sppp_ipv6cp_tld, sppp_ipv6cp_tls, sppp_ipv6cp_tlf,
+ sppp_ipv6cp_scr
+};
+
+static const struct cp pap = {
+ PPP_PAP, IDX_PAP, CP_AUTH, "pap",
+ sppp_null, sppp_null, sppp_pap_open, sppp_pap_close,
+ sppp_pap_TO, 0, 0, 0,
+ sppp_pap_tlu, sppp_pap_tld, sppp_null, sppp_null,
+ sppp_pap_scr
+};
+
+static const struct cp chap = {
+ PPP_CHAP, IDX_CHAP, CP_AUTH, "chap",
+ sppp_null, sppp_null, sppp_chap_open, sppp_chap_close,
+ sppp_chap_TO, 0, 0, 0,
+ sppp_chap_tlu, sppp_chap_tld, sppp_null, sppp_null,
+ sppp_chap_scr
+};
+
+static const struct cp *cps[IDX_COUNT] = {
+ &lcp, /* IDX_LCP */
+ &ipcp, /* IDX_IPCP */
+ &ipv6cp, /* IDX_IPV6CP */
+ &pap, /* IDX_PAP */
+ &chap, /* IDX_CHAP */
+};
+
+static void*
+sppp_alloc(u_char type, struct ifnet *ifp)
+{
+ struct sppp *sp;
+
+ sp = malloc(sizeof(struct sppp), M_SPPP, M_WAITOK | M_ZERO);
+ sp->pp_ifp = ifp;
+
+ return (sp);
+}
+
+static void
+sppp_free(void *com, u_char type)
+{
+
+ free(com, M_SPPP);
+}
+
+static int
+sppp_modevent(module_t mod, int type, void *unused)
+{
+ switch (type) {
+ case MOD_LOAD:
+ /*
+ * XXX: should probably be IFT_SPPP, but it's fairly
+ * harmless to allocate struct sppp's for non-sppp
+ * interfaces.
+ */
+
+ if_register_com_alloc(IFT_PPP, sppp_alloc, sppp_free);
+ break;
+ case MOD_UNLOAD:
+ /* if_deregister_com_alloc(IFT_PPP); */
+ return EACCES;
+ default:
+ return EOPNOTSUPP;
+ }
+ return 0;
+}
+static moduledata_t spppmod = {
+ "sppp",
+ sppp_modevent,
+ 0
+};
+MODULE_VERSION(sppp, 1);
+DECLARE_MODULE(sppp, spppmod, SI_SUB_DRIVERS, SI_ORDER_ANY);
+
+/*
+ * Exported functions, comprising our interface to the lower layer.
+ */
+
+/*
+ * Process the received packet.
+ */
+void
+sppp_input(struct ifnet *ifp, struct mbuf *m)
+{
+ struct ppp_header *h;
+ int isr = -1;
+ struct sppp *sp = IFP2SP(ifp);
+ int debug, do_account = 0;
+#ifdef INET
+ int hlen, vjlen;
+ u_char *iphdr;
+#endif
+
+ SPPP_LOCK(sp);
+ debug = ifp->if_flags & IFF_DEBUG;
+
+ if (ifp->if_flags & IFF_UP)
+ /* Count received bytes, add FCS and one flag */
+ ifp->if_ibytes += m->m_pkthdr.len + 3;
+
+ if (m->m_pkthdr.len <= PPP_HEADER_LEN) {
+ /* Too small packet, drop it. */
+ if (debug)
+ log(LOG_DEBUG,
+ SPP_FMT "input packet is too small, %d bytes\n",
+ SPP_ARGS(ifp), m->m_pkthdr.len);
+ drop:
+ m_freem (m);
+ SPPP_UNLOCK(sp);
+ drop2:
+ ++ifp->if_ierrors;
+ ++ifp->if_iqdrops;
+ return;
+ }
+
+ if (sp->pp_mode == PP_FR) {
+ sppp_fr_input (sp, m);
+ SPPP_UNLOCK(sp);
+ return;
+ }
+
+ /* Get PPP header. */
+ h = mtod (m, struct ppp_header*);
+ m_adj (m, PPP_HEADER_LEN);
+
+ switch (h->address) {
+ case PPP_ALLSTATIONS:
+ if (h->control != PPP_UI)
+ goto invalid;
+ if (sp->pp_mode == IFF_CISCO) {
+ if (debug)
+ log(LOG_DEBUG,
+ SPP_FMT "PPP packet in Cisco mode "
+ "<addr=0x%x ctrl=0x%x proto=0x%x>\n",
+ SPP_ARGS(ifp),
+ h->address, h->control, ntohs(h->protocol));
+ goto drop;
+ }
+ switch (ntohs (h->protocol)) {
+ default:
+ if (debug)
+ log(LOG_DEBUG,
+ SPP_FMT "rejecting protocol "
+ "<addr=0x%x ctrl=0x%x proto=0x%x>\n",
+ SPP_ARGS(ifp),
+ h->address, h->control, ntohs(h->protocol));
+ if (sp->state[IDX_LCP] == STATE_OPENED)
+ sppp_cp_send (sp, PPP_LCP, PROTO_REJ,
+ ++sp->pp_seq[IDX_LCP], m->m_pkthdr.len + 2,
+ &h->protocol);
+ ++ifp->if_noproto;
+ goto drop;
+ case PPP_LCP:
+ sppp_cp_input(&lcp, sp, m);
+ m_freem (m);
+ SPPP_UNLOCK(sp);
+ return;
+ case PPP_PAP:
+ if (sp->pp_phase >= PHASE_AUTHENTICATE)
+ sppp_pap_input(sp, m);
+ m_freem (m);
+ SPPP_UNLOCK(sp);
+ return;
+ case PPP_CHAP:
+ if (sp->pp_phase >= PHASE_AUTHENTICATE)
+ sppp_chap_input(sp, m);
+ m_freem (m);
+ SPPP_UNLOCK(sp);
+ return;
+#ifdef INET
+ case PPP_IPCP:
+ if (sp->pp_phase == PHASE_NETWORK)
+ sppp_cp_input(&ipcp, sp, m);
+ m_freem (m);
+ SPPP_UNLOCK(sp);
+ return;
+ case PPP_IP:
+ if (sp->state[IDX_IPCP] == STATE_OPENED) {
+ isr = NETISR_IP;
+ }
+ do_account++;
+ break;
+ case PPP_VJ_COMP:
+ if (sp->state[IDX_IPCP] == STATE_OPENED) {
+ if ((vjlen =
+ sl_uncompress_tcp_core(mtod(m, u_char *),
+ m->m_len, m->m_len,
+ TYPE_COMPRESSED_TCP,
+ sp->pp_comp,
+ &iphdr, &hlen)) <= 0) {
+ if (debug)
+ log(LOG_INFO,
+ SPP_FMT "VJ uncompress failed on compressed packet\n",
+ SPP_ARGS(ifp));
+ goto drop;
+ }
+
+ /*
+ * Trim the VJ header off the packet, and prepend
+ * the uncompressed IP header (which will usually
+ * end up in two chained mbufs since there's not
+ * enough leading space in the existing mbuf).
+ */
+ m_adj(m, vjlen);
+ M_PREPEND(m, hlen, M_DONTWAIT);
+ if (m == NULL) {
+ SPPP_UNLOCK(sp);
+ goto drop2;
+ }
+ bcopy(iphdr, mtod(m, u_char *), hlen);
+ isr = NETISR_IP;
+ }
+ do_account++;
+ break;
+ case PPP_VJ_UCOMP:
+ if (sp->state[IDX_IPCP] == STATE_OPENED) {
+ if (sl_uncompress_tcp_core(mtod(m, u_char *),
+ m->m_len, m->m_len,
+ TYPE_UNCOMPRESSED_TCP,
+ sp->pp_comp,
+ &iphdr, &hlen) != 0) {
+ if (debug)
+ log(LOG_INFO,
+ SPP_FMT "VJ uncompress failed on uncompressed packet\n",
+ SPP_ARGS(ifp));
+ goto drop;
+ }
+ isr = NETISR_IP;
+ }
+ do_account++;
+ break;
+#endif
+#ifdef INET6
+ case PPP_IPV6CP:
+ if (sp->pp_phase == PHASE_NETWORK)
+ sppp_cp_input(&ipv6cp, sp, m);
+ m_freem (m);
+ SPPP_UNLOCK(sp);
+ return;
+
+ case PPP_IPV6:
+ if (sp->state[IDX_IPV6CP] == STATE_OPENED)
+ isr = NETISR_IPV6;
+ do_account++;
+ break;
+#endif
+#ifdef IPX
+ case PPP_IPX:
+ /* IPX IPXCP not implemented yet */
+ if (sp->pp_phase == PHASE_NETWORK)
+ isr = NETISR_IPX;
+ do_account++;
+ break;
+#endif
+ }
+ break;
+ case CISCO_MULTICAST:
+ case CISCO_UNICAST:
+ /* Don't check the control field here (RFC 1547). */
+ if (sp->pp_mode != IFF_CISCO) {
+ if (debug)
+ log(LOG_DEBUG,
+ SPP_FMT "Cisco packet in PPP mode "
+ "<addr=0x%x ctrl=0x%x proto=0x%x>\n",
+ SPP_ARGS(ifp),
+ h->address, h->control, ntohs(h->protocol));
+ goto drop;
+ }
+ switch (ntohs (h->protocol)) {
+ default:
+ ++ifp->if_noproto;
+ goto invalid;
+ case CISCO_KEEPALIVE:
+ sppp_cisco_input (sp, m);
+ m_freem (m);
+ SPPP_UNLOCK(sp);
+ return;
+#ifdef INET
+ case ETHERTYPE_IP:
+ isr = NETISR_IP;
+ do_account++;
+ break;
+#endif
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ isr = NETISR_IPV6;
+ do_account++;
+ break;
+#endif
+#ifdef IPX
+ case ETHERTYPE_IPX:
+ isr = NETISR_IPX;
+ do_account++;
+ break;
+#endif
+ }
+ break;
+ default: /* Invalid PPP packet. */
+ invalid:
+ if (debug)
+ log(LOG_DEBUG,
+ SPP_FMT "invalid input packet "
+ "<addr=0x%x ctrl=0x%x proto=0x%x>\n",
+ SPP_ARGS(ifp),
+ h->address, h->control, ntohs(h->protocol));
+ goto drop;
+ }
+
+ if (! (ifp->if_flags & IFF_UP) || isr == -1)
+ goto drop;
+
+ SPPP_UNLOCK(sp);
+ /* Check queue. */
+ if (netisr_queue(isr, m)) { /* (0) on success. */
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "protocol queue overflow\n",
+ SPP_ARGS(ifp));
+ goto drop2;
+ }
+
+ if (do_account)
+ /*
+ * Do only account for network packets, not for control
+ * packets. This is used by some subsystems to detect
+ * idle lines.
+ */
+ sp->pp_last_recv = time_uptime;
+}
+
+static void
+sppp_ifstart_sched(void *dummy)
+{
+ struct sppp *sp = dummy;
+
+ sp->if_start(SP2IFP(sp));
+}
+
+/* if_start () wrapper function. We use it to schedule real if_start () for
+ * execution. We can't call it directly
+ */
+static void
+sppp_ifstart(struct ifnet *ifp)
+{
+ struct sppp *sp = IFP2SP(ifp);
+
+ if (SPPP_LOCK_OWNED(sp)) {
+ if (callout_pending(&sp->ifstart_callout))
+ return;
+ callout_reset(&sp->ifstart_callout, 1, sppp_ifstart_sched,
+ (void *)sp);
+ } else {
+ sp->if_start(ifp);
+ }
+}
+
+/*
+ * Enqueue transmit packet.
+ */
+static int
+sppp_output(struct ifnet *ifp, struct mbuf *m,
+ struct sockaddr *dst, struct route *ro)
+{
+ struct sppp *sp = IFP2SP(ifp);
+ struct ppp_header *h;
+ struct ifqueue *ifq = NULL;
+ int s, error, rv = 0;
+#ifdef INET
+ int ipproto = PPP_IP;
+#endif
+ int debug = ifp->if_flags & IFF_DEBUG;
+
+ s = splimp();
+ SPPP_LOCK(sp);
+
+ if (!(ifp->if_flags & IFF_UP) ||
+ (!(ifp->if_flags & IFF_AUTO) &&
+ !(ifp->if_drv_flags & IFF_DRV_RUNNING))) {
+#ifdef INET6
+ drop:
+#endif
+ m_freem (m);
+ SPPP_UNLOCK(sp);
+ splx (s);
+ return (ENETDOWN);
+ }
+
+ if ((ifp->if_flags & IFF_AUTO) &&
+ !(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+#ifdef INET6
+ /*
+ * XXX
+ *
+ * Hack to prevent the initialization-time generated
+ * IPv6 multicast packet to erroneously cause a
+ * dialout event in case IPv6 has been
+ * administratively disabled on that interface.
+ */
+ if (dst->sa_family == AF_INET6 &&
+ !(sp->confflags & CONF_ENABLE_IPV6))
+ goto drop;
+#endif
+ /*
+ * Interface is not yet running, but auto-dial. Need
+ * to start LCP for it.
+ */
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ splx(s);
+ lcp.Open(sp);
+ s = splimp();
+ }
+
+#ifdef INET
+ if (dst->sa_family == AF_INET) {
+ /* XXX Check mbuf length here? */
+ struct ip *ip = mtod (m, struct ip*);
+ struct tcphdr *tcp = (struct tcphdr*) ((long*)ip + ip->ip_hl);
+
+ /*
+ * When using dynamic local IP address assignment by using
+ * 0.0.0.0 as a local address, the first TCP session will
+ * not connect because the local TCP checksum is computed
+ * using 0.0.0.0 which will later become our real IP address
+ * so the TCP checksum computed at the remote end will
+ * become invalid. So we
+ * - don't let packets with src ip addr 0 thru
+ * - we flag TCP packets with src ip 0 as an error
+ */
+
+ if(ip->ip_src.s_addr == INADDR_ANY) /* -hm */
+ {
+ m_freem(m);
+ SPPP_UNLOCK(sp);
+ splx(s);
+ if(ip->ip_p == IPPROTO_TCP)
+ return(EADDRNOTAVAIL);
+ else
+ return(0);
+ }
+
+ /*
+ * Put low delay, telnet, rlogin and ftp control packets
+ * in front of the queue or let ALTQ take care.
+ */
+ if (ALTQ_IS_ENABLED(&ifp->if_snd))
+ ;
+ else if (_IF_QFULL(&sp->pp_fastq))
+ ;
+ else if (ip->ip_tos & IPTOS_LOWDELAY)
+ ifq = &sp->pp_fastq;
+ else if (m->m_len < sizeof *ip + sizeof *tcp)
+ ;
+ else if (ip->ip_p != IPPROTO_TCP)
+ ;
+ else if (INTERACTIVE (ntohs (tcp->th_sport)))
+ ifq = &sp->pp_fastq;
+ else if (INTERACTIVE (ntohs (tcp->th_dport)))
+ ifq = &sp->pp_fastq;
+
+ /*
+ * Do IP Header compression
+ */
+ if (sp->pp_mode != IFF_CISCO && sp->pp_mode != PP_FR &&
+ (sp->ipcp.flags & IPCP_VJ) && ip->ip_p == IPPROTO_TCP)
+ switch (sl_compress_tcp(m, ip, sp->pp_comp,
+ sp->ipcp.compress_cid)) {
+ case TYPE_COMPRESSED_TCP:
+ ipproto = PPP_VJ_COMP;
+ break;
+ case TYPE_UNCOMPRESSED_TCP:
+ ipproto = PPP_VJ_UCOMP;
+ break;
+ case TYPE_IP:
+ ipproto = PPP_IP;
+ break;
+ default:
+ m_freem(m);
+ SPPP_UNLOCK(sp);
+ splx(s);
+ return (EINVAL);
+ }
+ }
+#endif
+
+#ifdef INET6
+ if (dst->sa_family == AF_INET6) {
+ /* XXX do something tricky here? */
+ }
+#endif
+
+ if (sp->pp_mode == PP_FR) {
+ /* Add frame relay header. */
+ m = sppp_fr_header (sp, m, dst->sa_family);
+ if (! m)
+ goto nobufs;
+ goto out;
+ }
+
+ /*
+ * Prepend general data packet PPP header. For now, IP only.
+ */
+ M_PREPEND (m, PPP_HEADER_LEN, M_DONTWAIT);
+ if (! m) {
+nobufs: if (debug)
+ log(LOG_DEBUG, SPP_FMT "no memory for transmit header\n",
+ SPP_ARGS(ifp));
+ ++ifp->if_oerrors;
+ SPPP_UNLOCK(sp);
+ splx (s);
+ return (ENOBUFS);
+ }
+ /*
+ * May want to check size of packet
+ * (albeit due to the implementation it's always enough)
+ */
+ h = mtod (m, struct ppp_header*);
+ if (sp->pp_mode == IFF_CISCO) {
+ h->address = CISCO_UNICAST; /* unicast address */
+ h->control = 0;
+ } else {
+ h->address = PPP_ALLSTATIONS; /* broadcast address */
+ h->control = PPP_UI; /* Unnumbered Info */
+ }
+
+ switch (dst->sa_family) {
+#ifdef INET
+ case AF_INET: /* Internet Protocol */
+ if (sp->pp_mode == IFF_CISCO)
+ h->protocol = htons (ETHERTYPE_IP);
+ else {
+ /*
+ * Don't choke with an ENETDOWN early. It's
+ * possible that we just started dialing out,
+ * so don't drop the packet immediately. If
+ * we notice that we run out of buffer space
+ * below, we will however remember that we are
+ * not ready to carry IP packets, and return
+ * ENETDOWN, as opposed to ENOBUFS.
+ */
+ h->protocol = htons(ipproto);
+ if (sp->state[IDX_IPCP] != STATE_OPENED)
+ rv = ENETDOWN;
+ }
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6: /* Internet Protocol */
+ if (sp->pp_mode == IFF_CISCO)
+ h->protocol = htons (ETHERTYPE_IPV6);
+ else {
+ /*
+ * Don't choke with an ENETDOWN early. It's
+ * possible that we just started dialing out,
+ * so don't drop the packet immediately. If
+ * we notice that we run out of buffer space
+ * below, we will however remember that we are
+ * not ready to carry IP packets, and return
+ * ENETDOWN, as opposed to ENOBUFS.
+ */
+ h->protocol = htons(PPP_IPV6);
+ if (sp->state[IDX_IPV6CP] != STATE_OPENED)
+ rv = ENETDOWN;
+ }
+ break;
+#endif
+#ifdef IPX
+ case AF_IPX: /* Novell IPX Protocol */
+ h->protocol = htons (sp->pp_mode == IFF_CISCO ?
+ ETHERTYPE_IPX : PPP_IPX);
+ break;
+#endif
+ default:
+ m_freem (m);
+ ++ifp->if_oerrors;
+ SPPP_UNLOCK(sp);
+ splx (s);
+ return (EAFNOSUPPORT);
+ }
+
+ /*
+ * Queue message on interface, and start output if interface
+ * not yet active.
+ */
+out:
+ if (ifq != NULL)
+ error = !(IF_HANDOFF_ADJ(ifq, m, ifp, 3));
+ else
+ IFQ_HANDOFF_ADJ(ifp, m, 3, error);
+ if (error) {
+ ++ifp->if_oerrors;
+ SPPP_UNLOCK(sp);
+ splx (s);
+ return (rv? rv: ENOBUFS);
+ }
+ SPPP_UNLOCK(sp);
+ splx (s);
+ /*
+ * Unlike in sppp_input(), we can always bump the timestamp
+ * here since sppp_output() is only called on behalf of
+ * network-layer traffic; control-layer traffic is handled
+ * by sppp_cp_send().
+ */
+ sp->pp_last_sent = time_uptime;
+ return (0);
+}
+
+void
+sppp_attach(struct ifnet *ifp)
+{
+ struct sppp *sp = IFP2SP(ifp);
+
+ /* Initialize mtx lock */
+ mtx_init(&sp->mtx, "sppp", MTX_NETWORK_LOCK, MTX_DEF | MTX_RECURSE);
+
+ /* Initialize keepalive handler. */
+ callout_init(&sp->keepalive_callout, CALLOUT_MPSAFE);
+ callout_reset(&sp->keepalive_callout, hz * 10, sppp_keepalive,
+ (void *)sp);
+
+ ifp->if_mtu = PP_MTU;
+ ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
+ ifp->if_output = sppp_output;
+#if 0
+ sp->pp_flags = PP_KEEPALIVE;
+#endif
+ ifp->if_snd.ifq_maxlen = 32;
+ sp->pp_fastq.ifq_maxlen = 32;
+ sp->pp_cpq.ifq_maxlen = 20;
+ sp->pp_loopcnt = 0;
+ sp->pp_alivecnt = 0;
+ bzero(&sp->pp_seq[0], sizeof(sp->pp_seq));
+ bzero(&sp->pp_rseq[0], sizeof(sp->pp_rseq));
+ sp->pp_phase = PHASE_DEAD;
+ sp->pp_up = sppp_pp_up;
+ sp->pp_down = sppp_pp_down;
+ if(!mtx_initialized(&sp->pp_cpq.ifq_mtx))
+ mtx_init(&sp->pp_cpq.ifq_mtx, "sppp_cpq", NULL, MTX_DEF);
+ if(!mtx_initialized(&sp->pp_fastq.ifq_mtx))
+ mtx_init(&sp->pp_fastq.ifq_mtx, "sppp_fastq", NULL, MTX_DEF);
+ sp->pp_last_recv = sp->pp_last_sent = time_uptime;
+ sp->confflags = 0;
+#ifdef INET
+ sp->confflags |= CONF_ENABLE_VJ;
+#endif
+#ifdef INET6
+ sp->confflags |= CONF_ENABLE_IPV6;
+#endif
+ callout_init(&sp->ifstart_callout, CALLOUT_MPSAFE);
+ sp->if_start = ifp->if_start;
+ ifp->if_start = sppp_ifstart;
+ sp->pp_comp = malloc(sizeof(struct slcompress), M_TEMP, M_WAITOK);
+ sl_compress_init(sp->pp_comp, -1);
+ sppp_lcp_init(sp);
+ sppp_ipcp_init(sp);
+ sppp_ipv6cp_init(sp);
+ sppp_pap_init(sp);
+ sppp_chap_init(sp);
+}
+
+void
+sppp_detach(struct ifnet *ifp)
+{
+ struct sppp *sp = IFP2SP(ifp);
+ int i;
+
+ KASSERT(mtx_initialized(&sp->mtx), ("sppp mutex is not initialized"));
+
+ /* Stop keepalive handler. */
+ if (!callout_drain(&sp->keepalive_callout))
+ callout_stop(&sp->keepalive_callout);
+
+ for (i = 0; i < IDX_COUNT; i++) {
+ if (!callout_drain(&sp->ch[i]))
+ callout_stop(&sp->ch[i]);
+ }
+ if (!callout_drain(&sp->pap_my_to_ch))
+ callout_stop(&sp->pap_my_to_ch);
+ mtx_destroy(&sp->pp_cpq.ifq_mtx);
+ mtx_destroy(&sp->pp_fastq.ifq_mtx);
+ mtx_destroy(&sp->mtx);
+}
+
+/*
+ * Flush the interface output queue.
+ */
+static void
+sppp_flush_unlocked(struct ifnet *ifp)
+{
+ struct sppp *sp = IFP2SP(ifp);
+
+ sppp_qflush ((struct ifqueue *)&SP2IFP(sp)->if_snd);
+ sppp_qflush (&sp->pp_fastq);
+ sppp_qflush (&sp->pp_cpq);
+}
+
+void
+sppp_flush(struct ifnet *ifp)
+{
+ struct sppp *sp = IFP2SP(ifp);
+
+ SPPP_LOCK(sp);
+ sppp_flush_unlocked (ifp);
+ SPPP_UNLOCK(sp);
+}
+
+/*
+ * Check if the output queue is empty.
+ */
+int
+sppp_isempty(struct ifnet *ifp)
+{
+ struct sppp *sp = IFP2SP(ifp);
+ int empty, s;
+
+ s = splimp();
+ SPPP_LOCK(sp);
+ empty = !sp->pp_fastq.ifq_head && !sp->pp_cpq.ifq_head &&
+ !SP2IFP(sp)->if_snd.ifq_head;
+ SPPP_UNLOCK(sp);
+ splx(s);
+ return (empty);
+}
+
+/*
+ * Get next packet to send.
+ */
+struct mbuf *
+sppp_dequeue(struct ifnet *ifp)
+{
+ struct sppp *sp = IFP2SP(ifp);
+ struct mbuf *m;
+ int s;
+
+ s = splimp();
+ SPPP_LOCK(sp);
+ /*
+ * Process only the control protocol queue until we have at
+ * least one NCP open.
+ *
+ * Do always serve all three queues in Cisco mode.
+ */
+ IF_DEQUEUE(&sp->pp_cpq, m);
+ if (m == NULL &&
+ (sppp_ncp_check(sp) || sp->pp_mode == IFF_CISCO ||
+ sp->pp_mode == PP_FR)) {
+ IF_DEQUEUE(&sp->pp_fastq, m);
+ if (m == NULL)
+ IF_DEQUEUE (&SP2IFP(sp)->if_snd, m);
+ }
+ SPPP_UNLOCK(sp);
+ splx(s);
+ return m;
+}
+
+/*
+ * Pick the next packet, do not remove it from the queue.
+ */
+struct mbuf *
+sppp_pick(struct ifnet *ifp)
+{
+ struct sppp *sp = IFP2SP(ifp);
+ struct mbuf *m;
+ int s;
+
+ s = splimp ();
+ SPPP_LOCK(sp);
+
+ m = sp->pp_cpq.ifq_head;
+ if (m == NULL &&
+ (sp->pp_phase == PHASE_NETWORK ||
+ sp->pp_mode == IFF_CISCO ||
+ sp->pp_mode == PP_FR))
+ if ((m = sp->pp_fastq.ifq_head) == NULL)
+ m = SP2IFP(sp)->if_snd.ifq_head;
+ SPPP_UNLOCK(sp);
+ splx (s);
+ return (m);
+}
+
+/*
+ * Process an ioctl request. Called on low priority level.
+ */
+int
+sppp_ioctl(struct ifnet *ifp, IOCTL_CMD_T cmd, void *data)
+{
+ struct ifreq *ifr = (struct ifreq*) data;
+ struct sppp *sp = IFP2SP(ifp);
+ int s, rv, going_up, going_down, newmode;
+
+ s = splimp();
+ SPPP_LOCK(sp);
+ rv = 0;
+ switch (cmd) {
+ case SIOCAIFADDR:
+ case SIOCSIFDSTADDR:
+ break;
+
+ case SIOCSIFADDR:
+ /* set the interface "up" when assigning an IP address */
+ ifp->if_flags |= IFF_UP;
+ /* FALLTHROUGH */
+
+ case SIOCSIFFLAGS:
+ going_up = ifp->if_flags & IFF_UP &&
+ (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0;
+ going_down = (ifp->if_flags & IFF_UP) == 0 &&
+ ifp->if_drv_flags & IFF_DRV_RUNNING;
+
+ newmode = ifp->if_flags & IFF_PASSIVE;
+ if (!newmode)
+ newmode = ifp->if_flags & IFF_AUTO;
+ if (!newmode)
+ newmode = ifp->if_flags & IFF_CISCO;
+ ifp->if_flags &= ~(IFF_PASSIVE | IFF_AUTO | IFF_CISCO);
+ ifp->if_flags |= newmode;
+
+ if (!newmode)
+ newmode = sp->pp_flags & PP_FR;
+
+ if (newmode != sp->pp_mode) {
+ going_down = 1;
+ if (!going_up)
+ going_up = ifp->if_drv_flags & IFF_DRV_RUNNING;
+ }
+
+ if (going_down) {
+ if (sp->pp_mode != IFF_CISCO &&
+ sp->pp_mode != PP_FR)
+ lcp.Close(sp);
+ else if (sp->pp_tlf)
+ (sp->pp_tlf)(sp);
+ sppp_flush_unlocked(ifp);
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ sp->pp_mode = newmode;
+ }
+
+ if (going_up) {
+ if (sp->pp_mode != IFF_CISCO &&
+ sp->pp_mode != PP_FR)
+ lcp.Close(sp);
+ sp->pp_mode = newmode;
+ if (sp->pp_mode == 0) {
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ lcp.Open(sp);
+ }
+ if ((sp->pp_mode == IFF_CISCO) ||
+ (sp->pp_mode == PP_FR)) {
+ if (sp->pp_tls)
+ (sp->pp_tls)(sp);
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ }
+ }
+
+ break;
+
+#ifdef SIOCSIFMTU
+#ifndef ifr_mtu
+#define ifr_mtu ifr_metric
+#endif
+ case SIOCSIFMTU:
+ if (ifr->ifr_mtu < 128 || ifr->ifr_mtu > sp->lcp.their_mru)
+ return (EINVAL);
+ ifp->if_mtu = ifr->ifr_mtu;
+ break;
+#endif
+#ifdef SLIOCSETMTU
+ case SLIOCSETMTU:
+ if (*(short*)data < 128 || *(short*)data > sp->lcp.their_mru)
+ return (EINVAL);
+ ifp->if_mtu = *(short*)data;
+ break;
+#endif
+#ifdef SIOCGIFMTU
+ case SIOCGIFMTU:
+ ifr->ifr_mtu = ifp->if_mtu;
+ break;
+#endif
+#ifdef SLIOCGETMTU
+ case SLIOCGETMTU:
+ *(short*)data = ifp->if_mtu;
+ break;
+#endif
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ break;
+
+ case SIOCGIFGENERIC:
+ case SIOCSIFGENERIC:
+ rv = sppp_params(sp, cmd, data);
+ break;
+
+ default:
+ rv = ENOTTY;
+ }
+ SPPP_UNLOCK(sp);
+ splx(s);
+ return rv;
+}
+
+/*
+ * Cisco framing implementation.
+ */
+
+/*
+ * Handle incoming Cisco keepalive protocol packets.
+ */
+static void
+sppp_cisco_input(struct sppp *sp, struct mbuf *m)
+{
+ STDDCL;
+ struct cisco_packet *h;
+ u_long me, mymask;
+
+ if (m->m_pkthdr.len < CISCO_PACKET_LEN) {
+ if (debug)
+ log(LOG_DEBUG,
+ SPP_FMT "cisco invalid packet length: %d bytes\n",
+ SPP_ARGS(ifp), m->m_pkthdr.len);
+ return;
+ }
+ h = mtod (m, struct cisco_packet*);
+ if (debug)
+ log(LOG_DEBUG,
+ SPP_FMT "cisco input: %d bytes "
+ "<0x%lx 0x%lx 0x%lx 0x%x 0x%x-0x%x>\n",
+ SPP_ARGS(ifp), m->m_pkthdr.len,
+ (u_long)ntohl (h->type), (u_long)h->par1, (u_long)h->par2, (u_int)h->rel,
+ (u_int)h->time0, (u_int)h->time1);
+ switch (ntohl (h->type)) {
+ default:
+ if (debug)
+ log(-1, SPP_FMT "cisco unknown packet type: 0x%lx\n",
+ SPP_ARGS(ifp), (u_long)ntohl (h->type));
+ break;
+ case CISCO_ADDR_REPLY:
+ /* Reply on address request, ignore */
+ break;
+ case CISCO_KEEPALIVE_REQ:
+ sp->pp_alivecnt = 0;
+ sp->pp_rseq[IDX_LCP] = ntohl (h->par1);
+ if (sp->pp_seq[IDX_LCP] == sp->pp_rseq[IDX_LCP]) {
+ /* Local and remote sequence numbers are equal.
+ * Probably, the line is in loopback mode. */
+ if (sp->pp_loopcnt >= MAXALIVECNT) {
+ printf (SPP_FMT "loopback\n",
+ SPP_ARGS(ifp));
+ sp->pp_loopcnt = 0;
+ if (ifp->if_flags & IFF_UP) {
+ if_down (ifp);
+ sppp_qflush (&sp->pp_cpq);
+ }
+ }
+ ++sp->pp_loopcnt;
+
+ /* Generate new local sequence number */
+ sp->pp_seq[IDX_LCP] = random();
+ break;
+ }
+ sp->pp_loopcnt = 0;
+ if (! (ifp->if_flags & IFF_UP) &&
+ (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+ if_up(ifp);
+ printf (SPP_FMT "up\n", SPP_ARGS(ifp));
+ }
+ break;
+ case CISCO_ADDR_REQ:
+ sppp_get_ip_addrs(sp, &me, 0, &mymask);
+ if (me != 0L)
+ sppp_cisco_send(sp, CISCO_ADDR_REPLY, me, mymask);
+ break;
+ }
+}
+
+/*
+ * Send Cisco keepalive packet.
+ */
+static void
+sppp_cisco_send(struct sppp *sp, int type, long par1, long par2)
+{
+ STDDCL;
+ struct ppp_header *h;
+ struct cisco_packet *ch;
+ struct mbuf *m;
+ struct timeval tv;
+
+ getmicrouptime(&tv);
+
+ MGETHDR (m, M_DONTWAIT, MT_DATA);
+ if (! m)
+ return;
+ m->m_pkthdr.len = m->m_len = PPP_HEADER_LEN + CISCO_PACKET_LEN;
+ m->m_pkthdr.rcvif = 0;
+
+ h = mtod (m, struct ppp_header*);
+ h->address = CISCO_MULTICAST;
+ h->control = 0;
+ h->protocol = htons (CISCO_KEEPALIVE);
+
+ ch = (struct cisco_packet*) (h + 1);
+ ch->type = htonl (type);
+ ch->par1 = htonl (par1);
+ ch->par2 = htonl (par2);
+ ch->rel = -1;
+
+ ch->time0 = htons ((u_short) (tv.tv_sec >> 16));
+ ch->time1 = htons ((u_short) tv.tv_sec);
+
+ if (debug)
+ log(LOG_DEBUG,
+ SPP_FMT "cisco output: <0x%lx 0x%lx 0x%lx 0x%x 0x%x-0x%x>\n",
+ SPP_ARGS(ifp), (u_long)ntohl (ch->type), (u_long)ch->par1,
+ (u_long)ch->par2, (u_int)ch->rel, (u_int)ch->time0, (u_int)ch->time1);
+
+ if (! IF_HANDOFF_ADJ(&sp->pp_cpq, m, ifp, 3))
+ ifp->if_oerrors++;
+}
+
+/*
+ * PPP protocol implementation.
+ */
+
+/*
+ * Send PPP control protocol packet.
+ */
+static void
+sppp_cp_send(struct sppp *sp, u_short proto, u_char type,
+ u_char ident, u_short len, void *data)
+{
+ STDDCL;
+ struct ppp_header *h;
+ struct lcp_header *lh;
+ struct mbuf *m;
+
+ if (len > MHLEN - PPP_HEADER_LEN - LCP_HEADER_LEN)
+ len = MHLEN - PPP_HEADER_LEN - LCP_HEADER_LEN;
+ MGETHDR (m, M_DONTWAIT, MT_DATA);
+ if (! m)
+ return;
+ m->m_pkthdr.len = m->m_len = PPP_HEADER_LEN + LCP_HEADER_LEN + len;
+ m->m_pkthdr.rcvif = 0;
+
+ h = mtod (m, struct ppp_header*);
+ h->address = PPP_ALLSTATIONS; /* broadcast address */
+ h->control = PPP_UI; /* Unnumbered Info */
+ h->protocol = htons (proto); /* Link Control Protocol */
+
+ lh = (struct lcp_header*) (h + 1);
+ lh->type = type;
+ lh->ident = ident;
+ lh->len = htons (LCP_HEADER_LEN + len);
+ if (len)
+ bcopy (data, lh+1, len);
+
+ if (debug) {
+ log(LOG_DEBUG, SPP_FMT "%s output <%s id=0x%x len=%d",
+ SPP_ARGS(ifp),
+ sppp_proto_name(proto),
+ sppp_cp_type_name (lh->type), lh->ident,
+ ntohs (lh->len));
+ sppp_print_bytes ((u_char*) (lh+1), len);
+ log(-1, ">\n");
+ }
+ if (! IF_HANDOFF_ADJ(&sp->pp_cpq, m, ifp, 3))
+ ifp->if_oerrors++;
+}
+
+/*
+ * Handle incoming PPP control protocol packets.
+ */
+static void
+sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
+{
+ STDDCL;
+ struct lcp_header *h;
+ int len = m->m_pkthdr.len;
+ int rv;
+ u_char *p;
+
+ if (len < 4) {
+ if (debug)
+ log(LOG_DEBUG,
+ SPP_FMT "%s invalid packet length: %d bytes\n",
+ SPP_ARGS(ifp), cp->name, len);
+ return;
+ }
+ h = mtod (m, struct lcp_header*);
+ if (debug) {
+ log(LOG_DEBUG,
+ SPP_FMT "%s input(%s): <%s id=0x%x len=%d",
+ SPP_ARGS(ifp), cp->name,
+ sppp_state_name(sp->state[cp->protoidx]),
+ sppp_cp_type_name (h->type), h->ident, ntohs (h->len));
+ sppp_print_bytes ((u_char*) (h+1), len-4);
+ log(-1, ">\n");
+ }
+ if (len > ntohs (h->len))
+ len = ntohs (h->len);
+ p = (u_char *)(h + 1);
+ switch (h->type) {
+ case CONF_REQ:
+ if (len < 4) {
+ if (debug)
+ log(-1, SPP_FMT "%s invalid conf-req length %d\n",
+ SPP_ARGS(ifp), cp->name,
+ len);
+ ++ifp->if_ierrors;
+ break;
+ }
+ /* handle states where RCR doesn't get a SCA/SCN */
+ switch (sp->state[cp->protoidx]) {
+ case STATE_CLOSING:
+ case STATE_STOPPING:
+ return;
+ case STATE_CLOSED:
+ sppp_cp_send(sp, cp->proto, TERM_ACK, h->ident,
+ 0, 0);
+ return;
+ }
+ rv = (cp->RCR)(sp, h, len);
+ switch (sp->state[cp->protoidx]) {
+ case STATE_OPENED:
+ (cp->tld)(sp);
+ (cp->scr)(sp);
+ /* FALLTHROUGH */
+ case STATE_ACK_SENT:
+ case STATE_REQ_SENT:
+ /*
+ * sppp_cp_change_state() have the side effect of
+ * restarting the timeouts. We want to avoid that
+ * if the state don't change, otherwise we won't
+ * ever timeout and resend a configuration request
+ * that got lost.
+ */
+ if (sp->state[cp->protoidx] == (rv ? STATE_ACK_SENT:
+ STATE_REQ_SENT))
+ break;
+ sppp_cp_change_state(cp, sp, rv?
+ STATE_ACK_SENT: STATE_REQ_SENT);
+ break;
+ case STATE_STOPPED:
+ sp->rst_counter[cp->protoidx] = sp->lcp.max_configure;
+ (cp->scr)(sp);
+ sppp_cp_change_state(cp, sp, rv?
+ STATE_ACK_SENT: STATE_REQ_SENT);
+ break;
+ case STATE_ACK_RCVD:
+ if (rv) {
+ sppp_cp_change_state(cp, sp, STATE_OPENED);
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "%s tlu\n",
+ SPP_ARGS(ifp),
+ cp->name);
+ (cp->tlu)(sp);
+ } else
+ sppp_cp_change_state(cp, sp, STATE_ACK_RCVD);
+ break;
+ default:
+ printf(SPP_FMT "%s illegal %s in state %s\n",
+ SPP_ARGS(ifp), cp->name,
+ sppp_cp_type_name(h->type),
+ sppp_state_name(sp->state[cp->protoidx]));
+ ++ifp->if_ierrors;
+ }
+ break;
+ case CONF_ACK:
+ if (h->ident != sp->confid[cp->protoidx]) {
+ if (debug)
+ log(-1, SPP_FMT "%s id mismatch 0x%x != 0x%x\n",
+ SPP_ARGS(ifp), cp->name,
+ h->ident, sp->confid[cp->protoidx]);
+ ++ifp->if_ierrors;
+ break;
+ }
+ switch (sp->state[cp->protoidx]) {
+ case STATE_CLOSED:
+ case STATE_STOPPED:
+ sppp_cp_send(sp, cp->proto, TERM_ACK, h->ident, 0, 0);
+ break;
+ case STATE_CLOSING:
+ case STATE_STOPPING:
+ break;
+ case STATE_REQ_SENT:
+ sp->rst_counter[cp->protoidx] = sp->lcp.max_configure;
+ sppp_cp_change_state(cp, sp, STATE_ACK_RCVD);
+ break;
+ case STATE_OPENED:
+ (cp->tld)(sp);
+ /* FALLTHROUGH */
+ case STATE_ACK_RCVD:
+ (cp->scr)(sp);
+ sppp_cp_change_state(cp, sp, STATE_REQ_SENT);
+ break;
+ case STATE_ACK_SENT:
+ sp->rst_counter[cp->protoidx] = sp->lcp.max_configure;
+ sppp_cp_change_state(cp, sp, STATE_OPENED);
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "%s tlu\n",
+ SPP_ARGS(ifp), cp->name);
+ (cp->tlu)(sp);
+ break;
+ default:
+ printf(SPP_FMT "%s illegal %s in state %s\n",
+ SPP_ARGS(ifp), cp->name,
+ sppp_cp_type_name(h->type),
+ sppp_state_name(sp->state[cp->protoidx]));
+ ++ifp->if_ierrors;
+ }
+ break;
+ case CONF_NAK:
+ case CONF_REJ:
+ if (h->ident != sp->confid[cp->protoidx]) {
+ if (debug)
+ log(-1, SPP_FMT "%s id mismatch 0x%x != 0x%x\n",
+ SPP_ARGS(ifp), cp->name,
+ h->ident, sp->confid[cp->protoidx]);
+ ++ifp->if_ierrors;
+ break;
+ }
+ if (h->type == CONF_NAK)
+ (cp->RCN_nak)(sp, h, len);
+ else /* CONF_REJ */
+ (cp->RCN_rej)(sp, h, len);
+
+ switch (sp->state[cp->protoidx]) {
+ case STATE_CLOSED:
+ case STATE_STOPPED:
+ sppp_cp_send(sp, cp->proto, TERM_ACK, h->ident, 0, 0);
+ break;
+ case STATE_REQ_SENT:
+ case STATE_ACK_SENT:
+ sp->rst_counter[cp->protoidx] = sp->lcp.max_configure;
+ /*
+ * Slow things down a bit if we think we might be
+ * in loopback. Depend on the timeout to send the
+ * next configuration request.
+ */
+ if (sp->pp_loopcnt)
+ break;
+ (cp->scr)(sp);
+ break;
+ case STATE_OPENED:
+ (cp->tld)(sp);
+ /* FALLTHROUGH */
+ case STATE_ACK_RCVD:
+ sppp_cp_change_state(cp, sp, STATE_REQ_SENT);
+ (cp->scr)(sp);
+ break;
+ case STATE_CLOSING:
+ case STATE_STOPPING:
+ break;
+ default:
+ printf(SPP_FMT "%s illegal %s in state %s\n",
+ SPP_ARGS(ifp), cp->name,
+ sppp_cp_type_name(h->type),
+ sppp_state_name(sp->state[cp->protoidx]));
+ ++ifp->if_ierrors;
+ }
+ break;
+
+ case TERM_REQ:
+ switch (sp->state[cp->protoidx]) {
+ case STATE_ACK_RCVD:
+ case STATE_ACK_SENT:
+ sppp_cp_change_state(cp, sp, STATE_REQ_SENT);
+ /* FALLTHROUGH */
+ case STATE_CLOSED:
+ case STATE_STOPPED:
+ case STATE_CLOSING:
+ case STATE_STOPPING:
+ case STATE_REQ_SENT:
+ sta:
+ /* Send Terminate-Ack packet. */
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "%s send terminate-ack\n",
+ SPP_ARGS(ifp), cp->name);
+ sppp_cp_send(sp, cp->proto, TERM_ACK, h->ident, 0, 0);
+ break;
+ case STATE_OPENED:
+ (cp->tld)(sp);
+ sp->rst_counter[cp->protoidx] = 0;
+ sppp_cp_change_state(cp, sp, STATE_STOPPING);
+ goto sta;
+ break;
+ default:
+ printf(SPP_FMT "%s illegal %s in state %s\n",
+ SPP_ARGS(ifp), cp->name,
+ sppp_cp_type_name(h->type),
+ sppp_state_name(sp->state[cp->protoidx]));
+ ++ifp->if_ierrors;
+ }
+ break;
+ case TERM_ACK:
+ switch (sp->state[cp->protoidx]) {
+ case STATE_CLOSED:
+ case STATE_STOPPED:
+ case STATE_REQ_SENT:
+ case STATE_ACK_SENT:
+ break;
+ case STATE_CLOSING:
+ sppp_cp_change_state(cp, sp, STATE_CLOSED);
+ (cp->tlf)(sp);
+ break;
+ case STATE_STOPPING:
+ sppp_cp_change_state(cp, sp, STATE_STOPPED);
+ (cp->tlf)(sp);
+ break;
+ case STATE_ACK_RCVD:
+ sppp_cp_change_state(cp, sp, STATE_REQ_SENT);
+ break;
+ case STATE_OPENED:
+ (cp->tld)(sp);
+ (cp->scr)(sp);
+ sppp_cp_change_state(cp, sp, STATE_ACK_RCVD);
+ break;
+ default:
+ printf(SPP_FMT "%s illegal %s in state %s\n",
+ SPP_ARGS(ifp), cp->name,
+ sppp_cp_type_name(h->type),
+ sppp_state_name(sp->state[cp->protoidx]));
+ ++ifp->if_ierrors;
+ }
+ break;
+ case CODE_REJ:
+ /* XXX catastrophic rejects (RXJ-) aren't handled yet. */
+ log(LOG_INFO,
+ SPP_FMT "%s: ignoring RXJ (%s) for proto 0x%x, "
+ "danger will robinson\n",
+ SPP_ARGS(ifp), cp->name,
+ sppp_cp_type_name(h->type), ntohs(*((u_short *)p)));
+ switch (sp->state[cp->protoidx]) {
+ case STATE_CLOSED:
+ case STATE_STOPPED:
+ case STATE_REQ_SENT:
+ case STATE_ACK_SENT:
+ case STATE_CLOSING:
+ case STATE_STOPPING:
+ case STATE_OPENED:
+ break;
+ case STATE_ACK_RCVD:
+ sppp_cp_change_state(cp, sp, STATE_REQ_SENT);
+ break;
+ default:
+ printf(SPP_FMT "%s illegal %s in state %s\n",
+ SPP_ARGS(ifp), cp->name,
+ sppp_cp_type_name(h->type),
+ sppp_state_name(sp->state[cp->protoidx]));
+ ++ifp->if_ierrors;
+ }
+ break;
+ case PROTO_REJ:
+ {
+ int catastrophic;
+ const struct cp *upper;
+ int i;
+ u_int16_t proto;
+
+ catastrophic = 0;
+ upper = NULL;
+ proto = ntohs(*((u_int16_t *)p));
+ for (i = 0; i < IDX_COUNT; i++) {
+ if (cps[i]->proto == proto) {
+ upper = cps[i];
+ break;
+ }
+ }
+ if (upper == NULL)
+ catastrophic++;
+
+ if (catastrophic || debug)
+ log(catastrophic? LOG_INFO: LOG_DEBUG,
+ SPP_FMT "%s: RXJ%c (%s) for proto 0x%x (%s/%s)\n",
+ SPP_ARGS(ifp), cp->name, catastrophic ? '-' : '+',
+ sppp_cp_type_name(h->type), proto,
+ upper ? upper->name : "unknown",
+ upper ? sppp_state_name(sp->state[upper->protoidx]) : "?");
+
+ /*
+ * if we got RXJ+ against conf-req, the peer does not implement
+ * this particular protocol type. terminate the protocol.
+ */
+ if (upper && !catastrophic) {
+ if (sp->state[upper->protoidx] == STATE_REQ_SENT) {
+ upper->Close(sp);
+ break;
+ }
+ }
+
+ /* XXX catastrophic rejects (RXJ-) aren't handled yet. */
+ switch (sp->state[cp->protoidx]) {
+ case STATE_CLOSED:
+ case STATE_STOPPED:
+ case STATE_REQ_SENT:
+ case STATE_ACK_SENT:
+ case STATE_CLOSING:
+ case STATE_STOPPING:
+ case STATE_OPENED:
+ break;
+ case STATE_ACK_RCVD:
+ sppp_cp_change_state(cp, sp, STATE_REQ_SENT);
+ break;
+ default:
+ printf(SPP_FMT "%s illegal %s in state %s\n",
+ SPP_ARGS(ifp), cp->name,
+ sppp_cp_type_name(h->type),
+ sppp_state_name(sp->state[cp->protoidx]));
+ ++ifp->if_ierrors;
+ }
+ break;
+ }
+ case DISC_REQ:
+ if (cp->proto != PPP_LCP)
+ goto illegal;
+ /* Discard the packet. */
+ break;
+ case ECHO_REQ:
+ if (cp->proto != PPP_LCP)
+ goto illegal;
+ if (sp->state[cp->protoidx] != STATE_OPENED) {
+ if (debug)
+ log(-1, SPP_FMT "lcp echo req but lcp closed\n",
+ SPP_ARGS(ifp));
+ ++ifp->if_ierrors;
+ break;
+ }
+ if (len < 8) {
+ if (debug)
+ log(-1, SPP_FMT "invalid lcp echo request "
+ "packet length: %d bytes\n",
+ SPP_ARGS(ifp), len);
+ break;
+ }
+ if ((sp->lcp.opts & (1 << LCP_OPT_MAGIC)) &&
+ ntohl (*(long*)(h+1)) == sp->lcp.magic) {
+ /* Line loopback mode detected. */
+ printf(SPP_FMT "loopback\n", SPP_ARGS(ifp));
+ sp->pp_loopcnt = MAXALIVECNT * 5;
+ if_down (ifp);
+ sppp_qflush (&sp->pp_cpq);
+
+ /* Shut down the PPP link. */
+ /* XXX */
+ lcp.Down(sp);
+ lcp.Up(sp);
+ break;
+ }
+ *(long*)(h+1) = htonl (sp->lcp.magic);
+ if (debug)
+ log(-1, SPP_FMT "got lcp echo req, sending echo rep\n",
+ SPP_ARGS(ifp));
+ sppp_cp_send (sp, PPP_LCP, ECHO_REPLY, h->ident, len-4, h+1);
+ break;
+ case ECHO_REPLY:
+ if (cp->proto != PPP_LCP)
+ goto illegal;
+ if (h->ident != sp->lcp.echoid) {
+ ++ifp->if_ierrors;
+ break;
+ }
+ if (len < 8) {
+ if (debug)
+ log(-1, SPP_FMT "lcp invalid echo reply "
+ "packet length: %d bytes\n",
+ SPP_ARGS(ifp), len);
+ break;
+ }
+ if (debug)
+ log(-1, SPP_FMT "lcp got echo rep\n",
+ SPP_ARGS(ifp));
+ if (!(sp->lcp.opts & (1 << LCP_OPT_MAGIC)) ||
+ ntohl (*(long*)(h+1)) != sp->lcp.magic)
+ sp->pp_alivecnt = 0;
+ break;
+ default:
+ /* Unknown packet type -- send Code-Reject packet. */
+ illegal:
+ if (debug)
+ log(-1, SPP_FMT "%s send code-rej for 0x%x\n",
+ SPP_ARGS(ifp), cp->name, h->type);
+ sppp_cp_send(sp, cp->proto, CODE_REJ,
+ ++sp->pp_seq[cp->protoidx], m->m_pkthdr.len, h);
+ ++ifp->if_ierrors;
+ }
+}
+
+
+/*
+ * The generic part of all Up/Down/Open/Close/TO event handlers.
+ * Basically, the state transition handling in the automaton.
+ */
+static void
+sppp_up_event(const struct cp *cp, struct sppp *sp)
+{
+ STDDCL;
+
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "%s up(%s)\n",
+ SPP_ARGS(ifp), cp->name,
+ sppp_state_name(sp->state[cp->protoidx]));
+
+ switch (sp->state[cp->protoidx]) {
+ case STATE_INITIAL:
+ sppp_cp_change_state(cp, sp, STATE_CLOSED);
+ break;
+ case STATE_STARTING:
+ sp->rst_counter[cp->protoidx] = sp->lcp.max_configure;
+ (cp->scr)(sp);
+ sppp_cp_change_state(cp, sp, STATE_REQ_SENT);
+ break;
+ default:
+ printf(SPP_FMT "%s illegal up in state %s\n",
+ SPP_ARGS(ifp), cp->name,
+ sppp_state_name(sp->state[cp->protoidx]));
+ }
+}
+
+static void
+sppp_down_event(const struct cp *cp, struct sppp *sp)
+{
+ STDDCL;
+
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "%s down(%s)\n",
+ SPP_ARGS(ifp), cp->name,
+ sppp_state_name(sp->state[cp->protoidx]));
+
+ switch (sp->state[cp->protoidx]) {
+ case STATE_CLOSED:
+ case STATE_CLOSING:
+ sppp_cp_change_state(cp, sp, STATE_INITIAL);
+ break;
+ case STATE_STOPPED:
+ sppp_cp_change_state(cp, sp, STATE_STARTING);
+ (cp->tls)(sp);
+ break;
+ case STATE_STOPPING:
+ case STATE_REQ_SENT:
+ case STATE_ACK_RCVD:
+ case STATE_ACK_SENT:
+ sppp_cp_change_state(cp, sp, STATE_STARTING);
+ break;
+ case STATE_OPENED:
+ (cp->tld)(sp);
+ sppp_cp_change_state(cp, sp, STATE_STARTING);
+ break;
+ default:
+ printf(SPP_FMT "%s illegal down in state %s\n",
+ SPP_ARGS(ifp), cp->name,
+ sppp_state_name(sp->state[cp->protoidx]));
+ }
+}
+
+
+static void
+sppp_open_event(const struct cp *cp, struct sppp *sp)
+{
+ STDDCL;
+
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "%s open(%s)\n",
+ SPP_ARGS(ifp), cp->name,
+ sppp_state_name(sp->state[cp->protoidx]));
+
+ switch (sp->state[cp->protoidx]) {
+ case STATE_INITIAL:
+ sppp_cp_change_state(cp, sp, STATE_STARTING);
+ (cp->tls)(sp);
+ break;
+ case STATE_STARTING:
+ break;
+ case STATE_CLOSED:
+ sp->rst_counter[cp->protoidx] = sp->lcp.max_configure;
+ (cp->scr)(sp);
+ sppp_cp_change_state(cp, sp, STATE_REQ_SENT);
+ break;
+ case STATE_STOPPED:
+ /*
+ * Try escaping stopped state. This seems to bite
+ * people occasionally, in particular for IPCP,
+ * presumably following previous IPCP negotiation
+ * aborts. Somehow, we must have missed a Down event
+ * which would have caused a transition into starting
+ * state, so as a bandaid we force the Down event now.
+ * This effectively implements (something like the)
+ * `restart' option mentioned in the state transition
+ * table of RFC 1661.
+ */
+ sppp_cp_change_state(cp, sp, STATE_STARTING);
+ (cp->tls)(sp);
+ break;
+ case STATE_STOPPING:
+ case STATE_REQ_SENT:
+ case STATE_ACK_RCVD:
+ case STATE_ACK_SENT:
+ case STATE_OPENED:
+ break;
+ case STATE_CLOSING:
+ sppp_cp_change_state(cp, sp, STATE_STOPPING);
+ break;
+ }
+}
+
+
+static void
+sppp_close_event(const struct cp *cp, struct sppp *sp)
+{
+ STDDCL;
+
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "%s close(%s)\n",
+ SPP_ARGS(ifp), cp->name,
+ sppp_state_name(sp->state[cp->protoidx]));
+
+ switch (sp->state[cp->protoidx]) {
+ case STATE_INITIAL:
+ case STATE_CLOSED:
+ case STATE_CLOSING:
+ break;
+ case STATE_STARTING:
+ sppp_cp_change_state(cp, sp, STATE_INITIAL);
+ (cp->tlf)(sp);
+ break;
+ case STATE_STOPPED:
+ sppp_cp_change_state(cp, sp, STATE_CLOSED);
+ break;
+ case STATE_STOPPING:
+ sppp_cp_change_state(cp, sp, STATE_CLOSING);
+ break;
+ case STATE_OPENED:
+ (cp->tld)(sp);
+ /* FALLTHROUGH */
+ case STATE_REQ_SENT:
+ case STATE_ACK_RCVD:
+ case STATE_ACK_SENT:
+ sp->rst_counter[cp->protoidx] = sp->lcp.max_terminate;
+ sppp_cp_send(sp, cp->proto, TERM_REQ,
+ ++sp->pp_seq[cp->protoidx], 0, 0);
+ sppp_cp_change_state(cp, sp, STATE_CLOSING);
+ break;
+ }
+}
+
+static void
+sppp_to_event(const struct cp *cp, struct sppp *sp)
+{
+ STDDCL;
+ int s;
+
+ s = splimp();
+ SPPP_LOCK(sp);
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "%s TO(%s) rst_counter = %d\n",
+ SPP_ARGS(ifp), cp->name,
+ sppp_state_name(sp->state[cp->protoidx]),
+ sp->rst_counter[cp->protoidx]);
+
+ if (--sp->rst_counter[cp->protoidx] < 0)
+ /* TO- event */
+ switch (sp->state[cp->protoidx]) {
+ case STATE_CLOSING:
+ sppp_cp_change_state(cp, sp, STATE_CLOSED);
+ (cp->tlf)(sp);
+ break;
+ case STATE_STOPPING:
+ sppp_cp_change_state(cp, sp, STATE_STOPPED);
+ (cp->tlf)(sp);
+ break;
+ case STATE_REQ_SENT:
+ case STATE_ACK_RCVD:
+ case STATE_ACK_SENT:
+ sppp_cp_change_state(cp, sp, STATE_STOPPED);
+ (cp->tlf)(sp);
+ break;
+ }
+ else
+ /* TO+ event */
+ switch (sp->state[cp->protoidx]) {
+ case STATE_CLOSING:
+ case STATE_STOPPING:
+ sppp_cp_send(sp, cp->proto, TERM_REQ,
+ ++sp->pp_seq[cp->protoidx], 0, 0);
+ callout_reset(&sp->ch[cp->protoidx], sp->lcp.timeout,
+ cp->TO, (void *)sp);
+ break;
+ case STATE_REQ_SENT:
+ case STATE_ACK_RCVD:
+ (cp->scr)(sp);
+ /* sppp_cp_change_state() will restart the timer */
+ sppp_cp_change_state(cp, sp, STATE_REQ_SENT);
+ break;
+ case STATE_ACK_SENT:
+ (cp->scr)(sp);
+ callout_reset(&sp->ch[cp->protoidx], sp->lcp.timeout,
+ cp->TO, (void *)sp);
+ break;
+ }
+
+ SPPP_UNLOCK(sp);
+ splx(s);
+}
+
+/*
+ * Change the state of a control protocol in the state automaton.
+ * Takes care of starting/stopping the restart timer.
+ */
+static void
+sppp_cp_change_state(const struct cp *cp, struct sppp *sp, int newstate)
+{
+ sp->state[cp->protoidx] = newstate;
+
+ callout_stop (&sp->ch[cp->protoidx]);
+
+ switch (newstate) {
+ case STATE_INITIAL:
+ case STATE_STARTING:
+ case STATE_CLOSED:
+ case STATE_STOPPED:
+ case STATE_OPENED:
+ break;
+ case STATE_CLOSING:
+ case STATE_STOPPING:
+ case STATE_REQ_SENT:
+ case STATE_ACK_RCVD:
+ case STATE_ACK_SENT:
+ callout_reset(&sp->ch[cp->protoidx], sp->lcp.timeout,
+ cp->TO, (void *)sp);
+ break;
+ }
+}
+
+/*
+ *--------------------------------------------------------------------------*
+ * *
+ * The LCP implementation. *
+ * *
+ *--------------------------------------------------------------------------*
+ */
+static void
+sppp_pp_up(struct sppp *sp)
+{
+ SPPP_LOCK(sp);
+ lcp.Up(sp);
+ SPPP_UNLOCK(sp);
+}
+
+static void
+sppp_pp_down(struct sppp *sp)
+{
+ SPPP_LOCK(sp);
+ lcp.Down(sp);
+ SPPP_UNLOCK(sp);
+}
+
+static void
+sppp_lcp_init(struct sppp *sp)
+{
+ sp->lcp.opts = (1 << LCP_OPT_MAGIC);
+ sp->lcp.magic = 0;
+ sp->state[IDX_LCP] = STATE_INITIAL;
+ sp->fail_counter[IDX_LCP] = 0;
+ sp->pp_seq[IDX_LCP] = 0;
+ sp->pp_rseq[IDX_LCP] = 0;
+ sp->lcp.protos = 0;
+ sp->lcp.mru = sp->lcp.their_mru = PP_MTU;
+
+ /* Note that these values are relevant for all control protocols */
+ sp->lcp.timeout = 3 * hz;
+ sp->lcp.max_terminate = 2;
+ sp->lcp.max_configure = 10;
+ sp->lcp.max_failure = 10;
+ callout_init(&sp->ch[IDX_LCP], CALLOUT_MPSAFE);
+}
+
+static void
+sppp_lcp_up(struct sppp *sp)
+{
+ STDDCL;
+
+ sp->pp_alivecnt = 0;
+ sp->lcp.opts = (1 << LCP_OPT_MAGIC);
+ sp->lcp.magic = 0;
+ sp->lcp.protos = 0;
+ sp->lcp.mru = sp->lcp.their_mru = PP_MTU;
+ /*
+ * If we are authenticator, negotiate LCP_AUTH
+ */
+ if (sp->hisauth.proto != 0)
+ sp->lcp.opts |= (1 << LCP_OPT_AUTH_PROTO);
+ else
+ sp->lcp.opts &= ~(1 << LCP_OPT_AUTH_PROTO);
+ sp->pp_flags &= ~PP_NEEDAUTH;
+ /*
+ * If this interface is passive or dial-on-demand, and we are
+ * still in Initial state, it means we've got an incoming
+ * call. Activate the interface.
+ */
+ if ((ifp->if_flags & (IFF_AUTO | IFF_PASSIVE)) != 0) {
+ if (debug)
+ log(LOG_DEBUG,
+ SPP_FMT "Up event", SPP_ARGS(ifp));
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ if (sp->state[IDX_LCP] == STATE_INITIAL) {
+ if (debug)
+ log(-1, "(incoming call)\n");
+ sp->pp_flags |= PP_CALLIN;
+ lcp.Open(sp);
+ } else if (debug)
+ log(-1, "\n");
+ } else if ((ifp->if_flags & (IFF_AUTO | IFF_PASSIVE)) == 0 &&
+ (sp->state[IDX_LCP] == STATE_INITIAL)) {
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ lcp.Open(sp);
+ }
+
+ sppp_up_event(&lcp, sp);
+}
+
+static void
+sppp_lcp_down(struct sppp *sp)
+{
+ STDDCL;
+
+ sppp_down_event(&lcp, sp);
+
+ /*
+ * If this is neither a dial-on-demand nor a passive
+ * interface, simulate an ``ifconfig down'' action, so the
+ * administrator can force a redial by another ``ifconfig
+ * up''. XXX For leased line operation, should we immediately
+ * try to reopen the connection here?
+ */
+ if ((ifp->if_flags & (IFF_AUTO | IFF_PASSIVE)) == 0) {
+ log(LOG_INFO,
+ SPP_FMT "Down event, taking interface down.\n",
+ SPP_ARGS(ifp));
+ if_down(ifp);
+ } else {
+ if (debug)
+ log(LOG_DEBUG,
+ SPP_FMT "Down event (carrier loss)\n",
+ SPP_ARGS(ifp));
+ sp->pp_flags &= ~PP_CALLIN;
+ if (sp->state[IDX_LCP] != STATE_INITIAL)
+ lcp.Close(sp);
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ }
+}
+
+static void
+sppp_lcp_open(struct sppp *sp)
+{
+ sppp_open_event(&lcp, sp);
+}
+
+static void
+sppp_lcp_close(struct sppp *sp)
+{
+ sppp_close_event(&lcp, sp);
+}
+
+static void
+sppp_lcp_TO(void *cookie)
+{
+ sppp_to_event(&lcp, (struct sppp *)cookie);
+}
+
+/*
+ * Analyze a configure request. Return true if it was agreeable, and
+ * caused action sca, false if it has been rejected or nak'ed, and
+ * caused action scn. (The return value is used to make the state
+ * transition decision in the state automaton.)
+ */
+static int
+sppp_lcp_RCR(struct sppp *sp, struct lcp_header *h, int len)
+{
+ STDDCL;
+ u_char *buf, *r, *p;
+ int origlen, rlen;
+ u_long nmagic;
+ u_short authproto;
+
+ len -= 4;
+ origlen = len;
+ buf = r = malloc (len, M_TEMP, M_NOWAIT);
+ if (! buf)
+ return (0);
+
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "lcp parse opts: ",
+ SPP_ARGS(ifp));
+
+ /* pass 1: check for things that need to be rejected */
+ p = (void*) (h+1);
+ for (rlen=0; len >= 2 && p[1] >= 2 && len >= p[1];
+ len-=p[1], p+=p[1]) {
+ if (debug)
+ log(-1, " %s ", sppp_lcp_opt_name(*p));
+ switch (*p) {
+ case LCP_OPT_MAGIC:
+ /* Magic number. */
+ if (len >= 6 && p[1] == 6)
+ continue;
+ if (debug)
+ log(-1, "[invalid] ");
+ break;
+ case LCP_OPT_ASYNC_MAP:
+ /* Async control character map. */
+ if (len >= 6 && p[1] == 6)
+ continue;
+ if (debug)
+ log(-1, "[invalid] ");
+ break;
+ case LCP_OPT_MRU:
+ /* Maximum receive unit. */
+ if (len >= 4 && p[1] == 4)
+ continue;
+ if (debug)
+ log(-1, "[invalid] ");
+ break;
+ case LCP_OPT_AUTH_PROTO:
+ if (len < 4) {
+ if (debug)
+ log(-1, "[invalid] ");
+ break;
+ }
+ authproto = (p[2] << 8) + p[3];
+ if (authproto == PPP_CHAP && p[1] != 5) {
+ if (debug)
+ log(-1, "[invalid chap len] ");
+ break;
+ }
+ if (sp->myauth.proto == 0) {
+ /* we are not configured to do auth */
+ if (debug)
+ log(-1, "[not configured] ");
+ break;
+ }
+ /*
+ * Remote want us to authenticate, remember this,
+ * so we stay in PHASE_AUTHENTICATE after LCP got
+ * up.
+ */
+ sp->pp_flags |= PP_NEEDAUTH;
+ continue;
+ default:
+ /* Others not supported. */
+ if (debug)
+ log(-1, "[rej] ");
+ break;
+ }
+ /* Add the option to rejected list. */
+ bcopy (p, r, p[1]);
+ r += p[1];
+ rlen += p[1];
+ }
+ if (rlen) {
+ if (debug)
+ log(-1, " send conf-rej\n");
+ sppp_cp_send (sp, PPP_LCP, CONF_REJ, h->ident, rlen, buf);
+ return 0;
+ } else if (debug)
+ log(-1, "\n");
+
+ /*
+ * pass 2: check for option values that are unacceptable and
+ * thus require to be nak'ed.
+ */
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "lcp parse opt values: ",
+ SPP_ARGS(ifp));
+
+ p = (void*) (h+1);
+ len = origlen;
+ for (rlen=0; len >= 2 && p[1] >= 2 && len >= p[1];
+ len-=p[1], p+=p[1]) {
+ if (debug)
+ log(-1, " %s ", sppp_lcp_opt_name(*p));
+ switch (*p) {
+ case LCP_OPT_MAGIC:
+ /* Magic number -- extract. */
+ nmagic = (u_long)p[2] << 24 |
+ (u_long)p[3] << 16 | p[4] << 8 | p[5];
+ if (nmagic != sp->lcp.magic) {
+ sp->pp_loopcnt = 0;
+ if (debug)
+ log(-1, "0x%lx ", nmagic);
+ continue;
+ }
+ if (debug && sp->pp_loopcnt < MAXALIVECNT*5)
+ log(-1, "[glitch] ");
+ ++sp->pp_loopcnt;
+ /*
+ * We negate our magic here, and NAK it. If
+ * we see it later in an NAK packet, we
+ * suggest a new one.
+ */
+ nmagic = ~sp->lcp.magic;
+ /* Gonna NAK it. */
+ p[2] = nmagic >> 24;
+ p[3] = nmagic >> 16;
+ p[4] = nmagic >> 8;
+ p[5] = nmagic;
+ break;
+
+ case LCP_OPT_ASYNC_MAP:
+ /*
+ * Async control character map -- just ignore it.
+ *
+ * Quote from RFC 1662, chapter 6:
+ * To enable this functionality, synchronous PPP
+ * implementations MUST always respond to the
+ * Async-Control-Character-Map Configuration
+ * Option with the LCP Configure-Ack. However,
+ * acceptance of the Configuration Option does
+ * not imply that the synchronous implementation
+ * will do any ACCM mapping. Instead, all such
+ * octet mapping will be performed by the
+ * asynchronous-to-synchronous converter.
+ */
+ continue;
+
+ case LCP_OPT_MRU:
+ /*
+ * Maximum receive unit. Always agreeable,
+ * but ignored by now.
+ */
+ sp->lcp.their_mru = p[2] * 256 + p[3];
+ if (debug)
+ log(-1, "%lu ", sp->lcp.their_mru);
+ continue;
+
+ case LCP_OPT_AUTH_PROTO:
+ authproto = (p[2] << 8) + p[3];
+ if (sp->myauth.proto != authproto) {
+ /* not agreed, nak */
+ if (debug)
+ log(-1, "[mine %s != his %s] ",
+ sppp_proto_name(sp->hisauth.proto),
+ sppp_proto_name(authproto));
+ p[2] = sp->myauth.proto >> 8;
+ p[3] = sp->myauth.proto;
+ break;
+ }
+ if (authproto == PPP_CHAP && p[4] != CHAP_MD5) {
+ if (debug)
+ log(-1, "[chap not MD5] ");
+ p[4] = CHAP_MD5;
+ break;
+ }
+ continue;
+ }
+ /* Add the option to nak'ed list. */
+ bcopy (p, r, p[1]);
+ r += p[1];
+ rlen += p[1];
+ }
+ if (rlen) {
+ /*
+ * Local and remote magics equal -- loopback?
+ */
+ if (sp->pp_loopcnt >= MAXALIVECNT*5) {
+ if (sp->pp_loopcnt == MAXALIVECNT*5)
+ printf (SPP_FMT "loopback\n",
+ SPP_ARGS(ifp));
+ if (ifp->if_flags & IFF_UP) {
+ if_down(ifp);
+ sppp_qflush(&sp->pp_cpq);
+ /* XXX ? */
+ lcp.Down(sp);
+ lcp.Up(sp);
+ }
+ } else if (!sp->pp_loopcnt &&
+ ++sp->fail_counter[IDX_LCP] >= sp->lcp.max_failure) {
+ if (debug)
+ log(-1, " max_failure (%d) exceeded, "
+ "send conf-rej\n",
+ sp->lcp.max_failure);
+ sppp_cp_send(sp, PPP_LCP, CONF_REJ, h->ident, rlen, buf);
+ } else {
+ if (debug)
+ log(-1, " send conf-nak\n");
+ sppp_cp_send (sp, PPP_LCP, CONF_NAK, h->ident, rlen, buf);
+ }
+ } else {
+ if (debug)
+ log(-1, " send conf-ack\n");
+ sp->fail_counter[IDX_LCP] = 0;
+ sp->pp_loopcnt = 0;
+ sppp_cp_send (sp, PPP_LCP, CONF_ACK,
+ h->ident, origlen, h+1);
+ }
+
+ free (buf, M_TEMP);
+ return (rlen == 0);
+}
+
+/*
+ * Analyze the LCP Configure-Reject option list, and adjust our
+ * negotiation.
+ */
+static void
+sppp_lcp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len)
+{
+ STDDCL;
+ u_char *buf, *p;
+
+ len -= 4;
+ buf = malloc (len, M_TEMP, M_NOWAIT);
+ if (!buf)
+ return;
+
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "lcp rej opts: ",
+ SPP_ARGS(ifp));
+
+ p = (void*) (h+1);
+ for (; len >= 2 && p[1] >= 2 && len >= p[1];
+ len -= p[1], p += p[1]) {
+ if (debug)
+ log(-1, " %s ", sppp_lcp_opt_name(*p));
+ switch (*p) {
+ case LCP_OPT_MAGIC:
+ /* Magic number -- can't use it, use 0 */
+ sp->lcp.opts &= ~(1 << LCP_OPT_MAGIC);
+ sp->lcp.magic = 0;
+ break;
+ case LCP_OPT_MRU:
+ /*
+ * Should not be rejected anyway, since we only
+ * negotiate a MRU if explicitly requested by
+ * peer.
+ */
+ sp->lcp.opts &= ~(1 << LCP_OPT_MRU);
+ break;
+ case LCP_OPT_AUTH_PROTO:
+ /*
+ * Peer doesn't want to authenticate himself,
+ * deny unless this is a dialout call, and
+ * AUTHFLAG_NOCALLOUT is set.
+ */
+ if ((sp->pp_flags & PP_CALLIN) == 0 &&
+ (sp->hisauth.flags & AUTHFLAG_NOCALLOUT) != 0) {
+ if (debug)
+ log(-1, "[don't insist on auth "
+ "for callout]");
+ sp->lcp.opts &= ~(1 << LCP_OPT_AUTH_PROTO);
+ break;
+ }
+ if (debug)
+ log(-1, "[access denied]\n");
+ lcp.Close(sp);
+ break;
+ }
+ }
+ if (debug)
+ log(-1, "\n");
+ free (buf, M_TEMP);
+ return;
+}
+
+/*
+ * Analyze the LCP Configure-NAK option list, and adjust our
+ * negotiation.
+ */
+static void
+sppp_lcp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len)
+{
+ STDDCL;
+ u_char *buf, *p;
+ u_long magic;
+
+ len -= 4;
+ buf = malloc (len, M_TEMP, M_NOWAIT);
+ if (!buf)
+ return;
+
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "lcp nak opts: ",
+ SPP_ARGS(ifp));
+
+ p = (void*) (h+1);
+ for (; len >= 2 && p[1] >= 2 && len >= p[1];
+ len -= p[1], p += p[1]) {
+ if (debug)
+ log(-1, " %s ", sppp_lcp_opt_name(*p));
+ switch (*p) {
+ case LCP_OPT_MAGIC:
+ /* Magic number -- renegotiate */
+ if ((sp->lcp.opts & (1 << LCP_OPT_MAGIC)) &&
+ len >= 6 && p[1] == 6) {
+ magic = (u_long)p[2] << 24 |
+ (u_long)p[3] << 16 | p[4] << 8 | p[5];
+ /*
+ * If the remote magic is our negated one,
+ * this looks like a loopback problem.
+ * Suggest a new magic to make sure.
+ */
+ if (magic == ~sp->lcp.magic) {
+ if (debug)
+ log(-1, "magic glitch ");
+ sp->lcp.magic = random();
+ } else {
+ sp->lcp.magic = magic;
+ if (debug)
+ log(-1, "%lu ", magic);
+ }
+ }
+ break;
+ case LCP_OPT_MRU:
+ /*
+ * Peer wants to advise us to negotiate an MRU.
+ * Agree on it if it's reasonable, or use
+ * default otherwise.
+ */
+ if (len >= 4 && p[1] == 4) {
+ u_int mru = p[2] * 256 + p[3];
+ if (debug)
+ log(-1, "%d ", mru);
+ if (mru < PP_MTU || mru > PP_MAX_MRU)
+ mru = PP_MTU;
+ sp->lcp.mru = mru;
+ sp->lcp.opts |= (1 << LCP_OPT_MRU);
+ }
+ break;
+ case LCP_OPT_AUTH_PROTO:
+ /*
+ * Peer doesn't like our authentication method,
+ * deny.
+ */
+ if (debug)
+ log(-1, "[access denied]\n");
+ lcp.Close(sp);
+ break;
+ }
+ }
+ if (debug)
+ log(-1, "\n");
+ free (buf, M_TEMP);
+ return;
+}
+
+static void
+sppp_lcp_tlu(struct sppp *sp)
+{
+ STDDCL;
+ int i;
+ u_long mask;
+
+ /* XXX ? */
+ if (! (ifp->if_flags & IFF_UP) &&
+ (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+ /* Coming out of loopback mode. */
+ if_up(ifp);
+ printf (SPP_FMT "up\n", SPP_ARGS(ifp));
+ }
+
+ for (i = 0; i < IDX_COUNT; i++)
+ if ((cps[i])->flags & CP_QUAL)
+ (cps[i])->Open(sp);
+
+ if ((sp->lcp.opts & (1 << LCP_OPT_AUTH_PROTO)) != 0 ||
+ (sp->pp_flags & PP_NEEDAUTH) != 0)
+ sp->pp_phase = PHASE_AUTHENTICATE;
+ else
+ sp->pp_phase = PHASE_NETWORK;
+
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "phase %s\n", SPP_ARGS(ifp),
+ sppp_phase_name(sp->pp_phase));
+
+ /*
+ * Open all authentication protocols. This is even required
+ * if we already proceeded to network phase, since it might be
+ * that remote wants us to authenticate, so we might have to
+ * send a PAP request. Undesired authentication protocols
+ * don't do anything when they get an Open event.
+ */
+ for (i = 0; i < IDX_COUNT; i++)
+ if ((cps[i])->flags & CP_AUTH)
+ (cps[i])->Open(sp);
+
+ if (sp->pp_phase == PHASE_NETWORK) {
+ /* Notify all NCPs. */
+ for (i = 0; i < IDX_COUNT; i++)
+ if (((cps[i])->flags & CP_NCP) &&
+ /*
+ * XXX
+ * Hack to administratively disable IPv6 if
+ * not desired. Perhaps we should have another
+ * flag for this, but right now, we can make
+ * all struct cp's read/only.
+ */
+ (cps[i] != &ipv6cp ||
+ (sp->confflags & CONF_ENABLE_IPV6)))
+ (cps[i])->Open(sp);
+ }
+
+ /* Send Up events to all started protos. */
+ for (i = 0, mask = 1; i < IDX_COUNT; i++, mask <<= 1)
+ if ((sp->lcp.protos & mask) && ((cps[i])->flags & CP_LCP) == 0)
+ (cps[i])->Up(sp);
+
+ /* notify low-level driver of state change */
+ if (sp->pp_chg)
+ sp->pp_chg(sp, (int)sp->pp_phase);
+
+ if (sp->pp_phase == PHASE_NETWORK)
+ /* if no NCP is starting, close down */
+ sppp_lcp_check_and_close(sp);
+}
+
+static void
+sppp_lcp_tld(struct sppp *sp)
+{
+ STDDCL;
+ int i;
+ u_long mask;
+
+ sp->pp_phase = PHASE_TERMINATE;
+
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "phase %s\n", SPP_ARGS(ifp),
+ sppp_phase_name(sp->pp_phase));
+
+ /*
+ * Take upper layers down. We send the Down event first and
+ * the Close second to prevent the upper layers from sending
+ * ``a flurry of terminate-request packets'', as the RFC
+ * describes it.
+ */
+ for (i = 0, mask = 1; i < IDX_COUNT; i++, mask <<= 1)
+ if ((sp->lcp.protos & mask) && ((cps[i])->flags & CP_LCP) == 0) {
+ (cps[i])->Down(sp);
+ (cps[i])->Close(sp);
+ }
+}
+
+static void
+sppp_lcp_tls(struct sppp *sp)
+{
+ STDDCL;
+
+ sp->pp_phase = PHASE_ESTABLISH;
+
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "phase %s\n", SPP_ARGS(ifp),
+ sppp_phase_name(sp->pp_phase));
+
+ /* Notify lower layer if desired. */
+ if (sp->pp_tls)
+ (sp->pp_tls)(sp);
+ else
+ (sp->pp_up)(sp);
+}
+
+static void
+sppp_lcp_tlf(struct sppp *sp)
+{
+ STDDCL;
+
+ sp->pp_phase = PHASE_DEAD;
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "phase %s\n", SPP_ARGS(ifp),
+ sppp_phase_name(sp->pp_phase));
+
+ /* Notify lower layer if desired. */
+ if (sp->pp_tlf)
+ (sp->pp_tlf)(sp);
+ else
+ (sp->pp_down)(sp);
+}
+
+static void
+sppp_lcp_scr(struct sppp *sp)
+{
+ char opt[6 /* magicnum */ + 4 /* mru */ + 5 /* chap */];
+ int i = 0;
+ u_short authproto;
+
+ if (sp->lcp.opts & (1 << LCP_OPT_MAGIC)) {
+ if (! sp->lcp.magic)
+ sp->lcp.magic = random();
+ opt[i++] = LCP_OPT_MAGIC;
+ opt[i++] = 6;
+ opt[i++] = sp->lcp.magic >> 24;
+ opt[i++] = sp->lcp.magic >> 16;
+ opt[i++] = sp->lcp.magic >> 8;
+ opt[i++] = sp->lcp.magic;
+ }
+
+ if (sp->lcp.opts & (1 << LCP_OPT_MRU)) {
+ opt[i++] = LCP_OPT_MRU;
+ opt[i++] = 4;
+ opt[i++] = sp->lcp.mru >> 8;
+ opt[i++] = sp->lcp.mru;
+ }
+
+ if (sp->lcp.opts & (1 << LCP_OPT_AUTH_PROTO)) {
+ authproto = sp->hisauth.proto;
+ opt[i++] = LCP_OPT_AUTH_PROTO;
+ opt[i++] = authproto == PPP_CHAP? 5: 4;
+ opt[i++] = authproto >> 8;
+ opt[i++] = authproto;
+ if (authproto == PPP_CHAP)
+ opt[i++] = CHAP_MD5;
+ }
+
+ sp->confid[IDX_LCP] = ++sp->pp_seq[IDX_LCP];
+ sppp_cp_send (sp, PPP_LCP, CONF_REQ, sp->confid[IDX_LCP], i, &opt);
+}
+
+/*
+ * Check the open NCPs, return true if at least one NCP is open.
+ */
+static int
+sppp_ncp_check(struct sppp *sp)
+{
+ int i, mask;
+
+ for (i = 0, mask = 1; i < IDX_COUNT; i++, mask <<= 1)
+ if ((sp->lcp.protos & mask) && (cps[i])->flags & CP_NCP)
+ return 1;
+ return 0;
+}
+
+/*
+ * Re-check the open NCPs and see if we should terminate the link.
+ * Called by the NCPs during their tlf action handling.
+ */
+static void
+sppp_lcp_check_and_close(struct sppp *sp)
+{
+
+ if (sp->pp_phase < PHASE_NETWORK)
+ /* don't bother, we are already going down */
+ return;
+
+ if (sppp_ncp_check(sp))
+ return;
+
+ lcp.Close(sp);
+}
+
+/*
+ *--------------------------------------------------------------------------*
+ * *
+ * The IPCP implementation. *
+ * *
+ *--------------------------------------------------------------------------*
+ */
+
+#ifdef INET
+static void
+sppp_ipcp_init(struct sppp *sp)
+{
+ sp->ipcp.opts = 0;
+ sp->ipcp.flags = 0;
+ sp->state[IDX_IPCP] = STATE_INITIAL;
+ sp->fail_counter[IDX_IPCP] = 0;
+ sp->pp_seq[IDX_IPCP] = 0;
+ sp->pp_rseq[IDX_IPCP] = 0;
+ callout_init(&sp->ch[IDX_IPCP], CALLOUT_MPSAFE);
+}
+
+static void
+sppp_ipcp_up(struct sppp *sp)
+{
+ sppp_up_event(&ipcp, sp);
+}
+
+static void
+sppp_ipcp_down(struct sppp *sp)
+{
+ sppp_down_event(&ipcp, sp);
+}
+
+static void
+sppp_ipcp_open(struct sppp *sp)
+{
+ STDDCL;
+ u_long myaddr, hisaddr;
+
+ sp->ipcp.flags &= ~(IPCP_HISADDR_SEEN | IPCP_MYADDR_SEEN |
+ IPCP_MYADDR_DYN | IPCP_VJ);
+ sp->ipcp.opts = 0;
+
+ sppp_get_ip_addrs(sp, &myaddr, &hisaddr, 0);
+ /*
+ * If we don't have his address, this probably means our
+ * interface doesn't want to talk IP at all. (This could
+ * be the case if somebody wants to speak only IPX, for
+ * example.) Don't open IPCP in this case.
+ */
+ if (hisaddr == 0L) {
+ /* XXX this message should go away */
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "ipcp_open(): no IP interface\n",
+ SPP_ARGS(ifp));
+ return;
+ }
+ if (myaddr == 0L) {
+ /*
+ * I don't have an assigned address, so i need to
+ * negotiate my address.
+ */
+ sp->ipcp.flags |= IPCP_MYADDR_DYN;
+ sp->ipcp.opts |= (1 << IPCP_OPT_ADDRESS);
+ } else
+ sp->ipcp.flags |= IPCP_MYADDR_SEEN;
+ if (sp->confflags & CONF_ENABLE_VJ) {
+ sp->ipcp.opts |= (1 << IPCP_OPT_COMPRESSION);
+ sp->ipcp.max_state = MAX_STATES - 1;
+ sp->ipcp.compress_cid = 1;
+ }
+ sppp_open_event(&ipcp, sp);
+}
+
+static void
+sppp_ipcp_close(struct sppp *sp)
+{
+ sppp_close_event(&ipcp, sp);
+ if (sp->ipcp.flags & IPCP_MYADDR_DYN)
+ /*
+ * My address was dynamic, clear it again.
+ */
+ sppp_set_ip_addr(sp, 0L);
+}
+
+static void
+sppp_ipcp_TO(void *cookie)
+{
+ sppp_to_event(&ipcp, (struct sppp *)cookie);
+}
+
+/*
+ * Analyze a configure request. Return true if it was agreeable, and
+ * caused action sca, false if it has been rejected or nak'ed, and
+ * caused action scn. (The return value is used to make the state
+ * transition decision in the state automaton.)
+ */
+static int
+sppp_ipcp_RCR(struct sppp *sp, struct lcp_header *h, int len)
+{
+ u_char *buf, *r, *p;
+ struct ifnet *ifp = SP2IFP(sp);
+ int rlen, origlen, debug = ifp->if_flags & IFF_DEBUG;
+ u_long hisaddr, desiredaddr;
+ int gotmyaddr = 0;
+ int desiredcomp;
+
+ len -= 4;
+ origlen = len;
+ /*
+ * Make sure to allocate a buf that can at least hold a
+ * conf-nak with an `address' option. We might need it below.
+ */
+ buf = r = malloc ((len < 6? 6: len), M_TEMP, M_NOWAIT);
+ if (! buf)
+ return (0);
+
+ /* pass 1: see if we can recognize them */
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "ipcp parse opts: ",
+ SPP_ARGS(ifp));
+ p = (void*) (h+1);
+ for (rlen=0; len >= 2 && p[1] >= 2 && len >= p[1];
+ len-=p[1], p+=p[1]) {
+ if (debug)
+ log(-1, " %s ", sppp_ipcp_opt_name(*p));
+ switch (*p) {
+ case IPCP_OPT_COMPRESSION:
+ if (!(sp->confflags & CONF_ENABLE_VJ)) {
+ /* VJ compression administratively disabled */
+ if (debug)
+ log(-1, "[locally disabled] ");
+ break;
+ }
+ /*
+ * In theory, we should only conf-rej an
+ * option that is shorter than RFC 1618
+ * requires (i.e. < 4), and should conf-nak
+ * anything else that is not VJ. However,
+ * since our algorithm always uses the
+ * original option to NAK it with new values,
+ * things would become more complicated. In
+ * pratice, the only commonly implemented IP
+ * compression option is VJ anyway, so the
+ * difference is negligible.
+ */
+ if (len >= 6 && p[1] == 6) {
+ /*
+ * correctly formed compression option
+ * that could be VJ compression
+ */
+ continue;
+ }
+ if (debug)
+ log(-1,
+ "optlen %d [invalid/unsupported] ",
+ p[1]);
+ break;
+ case IPCP_OPT_ADDRESS:
+ if (len >= 6 && p[1] == 6) {
+ /* correctly formed address option */
+ continue;
+ }
+ if (debug)
+ log(-1, "[invalid] ");
+ break;
+ default:
+ /* Others not supported. */
+ if (debug)
+ log(-1, "[rej] ");
+ break;
+ }
+ /* Add the option to rejected list. */
+ bcopy (p, r, p[1]);
+ r += p[1];
+ rlen += p[1];
+ }
+ if (rlen) {
+ if (debug)
+ log(-1, " send conf-rej\n");
+ sppp_cp_send (sp, PPP_IPCP, CONF_REJ, h->ident, rlen, buf);
+ return 0;
+ } else if (debug)
+ log(-1, "\n");
+
+ /* pass 2: parse option values */
+ sppp_get_ip_addrs(sp, 0, &hisaddr, 0);
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "ipcp parse opt values: ",
+ SPP_ARGS(ifp));
+ p = (void*) (h+1);
+ len = origlen;
+ for (rlen=0; len >= 2 && p[1] >= 2 && len >= p[1];
+ len-=p[1], p+=p[1]) {
+ if (debug)
+ log(-1, " %s ", sppp_ipcp_opt_name(*p));
+ switch (*p) {
+ case IPCP_OPT_COMPRESSION:
+ desiredcomp = p[2] << 8 | p[3];
+ /* We only support VJ */
+ if (desiredcomp == IPCP_COMP_VJ) {
+ if (debug)
+ log(-1, "VJ [ack] ");
+ sp->ipcp.flags |= IPCP_VJ;
+ sl_compress_init(sp->pp_comp, p[4]);
+ sp->ipcp.max_state = p[4];
+ sp->ipcp.compress_cid = p[5];
+ continue;
+ }
+ if (debug)
+ log(-1,
+ "compproto %#04x [not supported] ",
+ desiredcomp);
+ p[2] = IPCP_COMP_VJ >> 8;
+ p[3] = IPCP_COMP_VJ;
+ p[4] = sp->ipcp.max_state;
+ p[5] = sp->ipcp.compress_cid;
+ break;
+ case IPCP_OPT_ADDRESS:
+ /* This is the address he wants in his end */
+ desiredaddr = p[2] << 24 | p[3] << 16 |
+ p[4] << 8 | p[5];
+ if (desiredaddr == hisaddr ||
+ (hisaddr >= 1 && hisaddr <= 254 && desiredaddr != 0)) {
+ /*
+ * Peer's address is same as our value,
+ * or we have set it to 0.0.0.* to
+ * indicate that we do not really care,
+ * this is agreeable. Gonna conf-ack
+ * it.
+ */
+ if (debug)
+ log(-1, "%s [ack] ",
+ sppp_dotted_quad(hisaddr));
+ /* record that we've seen it already */
+ sp->ipcp.flags |= IPCP_HISADDR_SEEN;
+ continue;
+ }
+ /*
+ * The address wasn't agreeable. This is either
+ * he sent us 0.0.0.0, asking to assign him an
+ * address, or he send us another address not
+ * matching our value. Either case, we gonna
+ * conf-nak it with our value.
+ * XXX: we should "rej" if hisaddr == 0
+ */
+ if (debug) {
+ if (desiredaddr == 0)
+ log(-1, "[addr requested] ");
+ else
+ log(-1, "%s [not agreed] ",
+ sppp_dotted_quad(desiredaddr));
+
+ }
+ p[2] = hisaddr >> 24;
+ p[3] = hisaddr >> 16;
+ p[4] = hisaddr >> 8;
+ p[5] = hisaddr;
+ break;
+ }
+ /* Add the option to nak'ed list. */
+ bcopy (p, r, p[1]);
+ r += p[1];
+ rlen += p[1];
+ }
+
+ /*
+ * If we are about to conf-ack the request, but haven't seen
+ * his address so far, gonna conf-nak it instead, with the
+ * `address' option present and our idea of his address being
+ * filled in there, to request negotiation of both addresses.
+ *
+ * XXX This can result in an endless req - nak loop if peer
+ * doesn't want to send us his address. Q: What should we do
+ * about it? XXX A: implement the max-failure counter.
+ */
+ if (rlen == 0 && !(sp->ipcp.flags & IPCP_HISADDR_SEEN) && !gotmyaddr) {
+ buf[0] = IPCP_OPT_ADDRESS;
+ buf[1] = 6;
+ buf[2] = hisaddr >> 24;
+ buf[3] = hisaddr >> 16;
+ buf[4] = hisaddr >> 8;
+ buf[5] = hisaddr;
+ rlen = 6;
+ if (debug)
+ log(-1, "still need hisaddr ");
+ }
+
+ if (rlen) {
+ if (debug)
+ log(-1, " send conf-nak\n");
+ sppp_cp_send (sp, PPP_IPCP, CONF_NAK, h->ident, rlen, buf);
+ } else {
+ if (debug)
+ log(-1, " send conf-ack\n");
+ sppp_cp_send (sp, PPP_IPCP, CONF_ACK,
+ h->ident, origlen, h+1);
+ }
+
+ free (buf, M_TEMP);
+ return (rlen == 0);
+}
+
+/*
+ * Analyze the IPCP Configure-Reject option list, and adjust our
+ * negotiation.
+ */
+static void
+sppp_ipcp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len)
+{
+ u_char *buf, *p;
+ struct ifnet *ifp = SP2IFP(sp);
+ int debug = ifp->if_flags & IFF_DEBUG;
+
+ len -= 4;
+ buf = malloc (len, M_TEMP, M_NOWAIT);
+ if (!buf)
+ return;
+
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "ipcp rej opts: ",
+ SPP_ARGS(ifp));
+
+ p = (void*) (h+1);
+ for (; len >= 2 && p[1] >= 2 && len >= p[1];
+ len -= p[1], p += p[1]) {
+ if (debug)
+ log(-1, " %s ", sppp_ipcp_opt_name(*p));
+ switch (*p) {
+ case IPCP_OPT_COMPRESSION:
+ sp->ipcp.opts &= ~(1 << IPCP_OPT_COMPRESSION);
+ break;
+ case IPCP_OPT_ADDRESS:
+ /*
+ * Peer doesn't grok address option. This is
+ * bad. XXX Should we better give up here?
+ * XXX We could try old "addresses" option...
+ */
+ sp->ipcp.opts &= ~(1 << IPCP_OPT_ADDRESS);
+ break;
+ }
+ }
+ if (debug)
+ log(-1, "\n");
+ free (buf, M_TEMP);
+ return;
+}
+
+/*
+ * Analyze the IPCP Configure-NAK option list, and adjust our
+ * negotiation.
+ */
+static void
+sppp_ipcp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len)
+{
+ u_char *buf, *p;
+ struct ifnet *ifp = SP2IFP(sp);
+ int debug = ifp->if_flags & IFF_DEBUG;
+ int desiredcomp;
+ u_long wantaddr;
+
+ len -= 4;
+ buf = malloc (len, M_TEMP, M_NOWAIT);
+ if (!buf)
+ return;
+
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "ipcp nak opts: ",
+ SPP_ARGS(ifp));
+
+ p = (void*) (h+1);
+ for (; len >= 2 && p[1] >= 2 && len >= p[1];
+ len -= p[1], p += p[1]) {
+ if (debug)
+ log(-1, " %s ", sppp_ipcp_opt_name(*p));
+ switch (*p) {
+ case IPCP_OPT_COMPRESSION:
+ if (len >= 6 && p[1] == 6) {
+ desiredcomp = p[2] << 8 | p[3];
+ if (debug)
+ log(-1, "[wantcomp %#04x] ",
+ desiredcomp);
+ if (desiredcomp == IPCP_COMP_VJ) {
+ sl_compress_init(sp->pp_comp, p[4]);
+ sp->ipcp.max_state = p[4];
+ sp->ipcp.compress_cid = p[5];
+ if (debug)
+ log(-1, "[agree] ");
+ } else
+ sp->ipcp.opts &=
+ ~(1 << IPCP_OPT_COMPRESSION);
+ }
+ break;
+ case IPCP_OPT_ADDRESS:
+ /*
+ * Peer doesn't like our local IP address. See
+ * if we can do something for him. We'll drop
+ * him our address then.
+ */
+ if (len >= 6 && p[1] == 6) {
+ wantaddr = p[2] << 24 | p[3] << 16 |
+ p[4] << 8 | p[5];
+ sp->ipcp.opts |= (1 << IPCP_OPT_ADDRESS);
+ if (debug)
+ log(-1, "[wantaddr %s] ",
+ sppp_dotted_quad(wantaddr));
+ /*
+ * When doing dynamic address assignment,
+ * we accept his offer. Otherwise, we
+ * ignore it and thus continue to negotiate
+ * our already existing value.
+ * XXX: Bogus, if he said no once, he'll
+ * just say no again, might as well die.
+ */
+ if (sp->ipcp.flags & IPCP_MYADDR_DYN) {
+ sppp_set_ip_addr(sp, wantaddr);
+ if (debug)
+ log(-1, "[agree] ");
+ sp->ipcp.flags |= IPCP_MYADDR_SEEN;
+ }
+ }
+ break;
+ }
+ }
+ if (debug)
+ log(-1, "\n");
+ free (buf, M_TEMP);
+ return;
+}
+
+static void
+sppp_ipcp_tlu(struct sppp *sp)
+{
+ /* we are up - notify isdn daemon */
+ if (sp->pp_con)
+ sp->pp_con(sp);
+}
+
+static void
+sppp_ipcp_tld(struct sppp *sp)
+{
+}
+
+static void
+sppp_ipcp_tls(struct sppp *sp)
+{
+ /* indicate to LCP that it must stay alive */
+ sp->lcp.protos |= (1 << IDX_IPCP);
+}
+
+static void
+sppp_ipcp_tlf(struct sppp *sp)
+{
+ /* we no longer need LCP */
+ sp->lcp.protos &= ~(1 << IDX_IPCP);
+ sppp_lcp_check_and_close(sp);
+}
+
+static void
+sppp_ipcp_scr(struct sppp *sp)
+{
+ char opt[6 /* compression */ + 6 /* address */];
+ u_long ouraddr;
+ int i = 0;
+
+ if (sp->ipcp.opts & (1 << IPCP_OPT_COMPRESSION)) {
+ opt[i++] = IPCP_OPT_COMPRESSION;
+ opt[i++] = 6;
+ opt[i++] = IPCP_COMP_VJ >> 8;
+ opt[i++] = IPCP_COMP_VJ;
+ opt[i++] = sp->ipcp.max_state;
+ opt[i++] = sp->ipcp.compress_cid;
+ }
+ if (sp->ipcp.opts & (1 << IPCP_OPT_ADDRESS)) {
+ sppp_get_ip_addrs(sp, &ouraddr, 0, 0);
+ opt[i++] = IPCP_OPT_ADDRESS;
+ opt[i++] = 6;
+ opt[i++] = ouraddr >> 24;
+ opt[i++] = ouraddr >> 16;
+ opt[i++] = ouraddr >> 8;
+ opt[i++] = ouraddr;
+ }
+
+ sp->confid[IDX_IPCP] = ++sp->pp_seq[IDX_IPCP];
+ sppp_cp_send(sp, PPP_IPCP, CONF_REQ, sp->confid[IDX_IPCP], i, &opt);
+}
+#else /* !INET */
+static void
+sppp_ipcp_init(struct sppp *sp)
+{
+}
+
+static void
+sppp_ipcp_up(struct sppp *sp)
+{
+}
+
+static void
+sppp_ipcp_down(struct sppp *sp)
+{
+}
+
+static void
+sppp_ipcp_open(struct sppp *sp)
+{
+}
+
+static void
+sppp_ipcp_close(struct sppp *sp)
+{
+}
+
+static void
+sppp_ipcp_TO(void *cookie)
+{
+}
+
+static int
+sppp_ipcp_RCR(struct sppp *sp, struct lcp_header *h, int len)
+{
+ return (0);
+}
+
+static void
+sppp_ipcp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len)
+{
+}
+
+static void
+sppp_ipcp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len)
+{
+}
+
+static void
+sppp_ipcp_tlu(struct sppp *sp)
+{
+}
+
+static void
+sppp_ipcp_tld(struct sppp *sp)
+{
+}
+
+static void
+sppp_ipcp_tls(struct sppp *sp)
+{
+}
+
+static void
+sppp_ipcp_tlf(struct sppp *sp)
+{
+}
+
+static void
+sppp_ipcp_scr(struct sppp *sp)
+{
+}
+#endif
+
+/*
+ *--------------------------------------------------------------------------*
+ * *
+ * The IPv6CP implementation. *
+ * *
+ *--------------------------------------------------------------------------*
+ */
+
+#ifdef INET6
+static void
+sppp_ipv6cp_init(struct sppp *sp)
+{
+ sp->ipv6cp.opts = 0;
+ sp->ipv6cp.flags = 0;
+ sp->state[IDX_IPV6CP] = STATE_INITIAL;
+ sp->fail_counter[IDX_IPV6CP] = 0;
+ sp->pp_seq[IDX_IPV6CP] = 0;
+ sp->pp_rseq[IDX_IPV6CP] = 0;
+ callout_init(&sp->ch[IDX_IPV6CP], CALLOUT_MPSAFE);
+}
+
+static void
+sppp_ipv6cp_up(struct sppp *sp)
+{
+ sppp_up_event(&ipv6cp, sp);
+}
+
+static void
+sppp_ipv6cp_down(struct sppp *sp)
+{
+ sppp_down_event(&ipv6cp, sp);
+}
+
+static void
+sppp_ipv6cp_open(struct sppp *sp)
+{
+ STDDCL;
+ struct in6_addr myaddr, hisaddr;
+
+#ifdef IPV6CP_MYIFID_DYN
+ sp->ipv6cp.flags &= ~(IPV6CP_MYIFID_SEEN|IPV6CP_MYIFID_DYN);
+#else
+ sp->ipv6cp.flags &= ~IPV6CP_MYIFID_SEEN;
+#endif
+
+ sppp_get_ip6_addrs(sp, &myaddr, &hisaddr, 0);
+ /*
+ * If we don't have our address, this probably means our
+ * interface doesn't want to talk IPv6 at all. (This could
+ * be the case if somebody wants to speak only IPX, for
+ * example.) Don't open IPv6CP in this case.
+ */
+ if (IN6_IS_ADDR_UNSPECIFIED(&myaddr)) {
+ /* XXX this message should go away */
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "ipv6cp_open(): no IPv6 interface\n",
+ SPP_ARGS(ifp));
+ return;
+ }
+
+ sp->ipv6cp.flags |= IPV6CP_MYIFID_SEEN;
+ sp->ipv6cp.opts |= (1 << IPV6CP_OPT_IFID);
+ sppp_open_event(&ipv6cp, sp);
+}
+
+static void
+sppp_ipv6cp_close(struct sppp *sp)
+{
+ sppp_close_event(&ipv6cp, sp);
+}
+
+static void
+sppp_ipv6cp_TO(void *cookie)
+{
+ sppp_to_event(&ipv6cp, (struct sppp *)cookie);
+}
+
+/*
+ * Analyze a configure request. Return true if it was agreeable, and
+ * caused action sca, false if it has been rejected or nak'ed, and
+ * caused action scn. (The return value is used to make the state
+ * transition decision in the state automaton.)
+ */
+static int
+sppp_ipv6cp_RCR(struct sppp *sp, struct lcp_header *h, int len)
+{
+ u_char *buf, *r, *p;
+ struct ifnet *ifp = SP2IFP(sp);
+ int rlen, origlen, debug = ifp->if_flags & IFF_DEBUG;
+ struct in6_addr myaddr, desiredaddr, suggestaddr;
+ int ifidcount;
+ int type;
+ int collision, nohisaddr;
+ char ip6buf[INET6_ADDRSTRLEN];
+
+ len -= 4;
+ origlen = len;
+ /*
+ * Make sure to allocate a buf that can at least hold a
+ * conf-nak with an `address' option. We might need it below.
+ */
+ buf = r = malloc ((len < 6? 6: len), M_TEMP, M_NOWAIT);
+ if (! buf)
+ return (0);
+
+ /* pass 1: see if we can recognize them */
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "ipv6cp parse opts:",
+ SPP_ARGS(ifp));
+ p = (void*) (h+1);
+ ifidcount = 0;
+ for (rlen=0; len >= 2 && p[1] >= 2 && len >= p[1];
+ len-=p[1], p+=p[1]) {
+ if (debug)
+ log(-1, " %s", sppp_ipv6cp_opt_name(*p));
+ switch (*p) {
+ case IPV6CP_OPT_IFID:
+ if (len >= 10 && p[1] == 10 && ifidcount == 0) {
+ /* correctly formed address option */
+ ifidcount++;
+ continue;
+ }
+ if (debug)
+ log(-1, " [invalid]");
+ break;
+#ifdef notyet
+ case IPV6CP_OPT_COMPRESSION:
+ if (len >= 4 && p[1] >= 4) {
+ /* correctly formed compress option */
+ continue;
+ }
+ if (debug)
+ log(-1, " [invalid]");
+ break;
+#endif
+ default:
+ /* Others not supported. */
+ if (debug)
+ log(-1, " [rej]");
+ break;
+ }
+ /* Add the option to rejected list. */
+ bcopy (p, r, p[1]);
+ r += p[1];
+ rlen += p[1];
+ }
+ if (rlen) {
+ if (debug)
+ log(-1, " send conf-rej\n");
+ sppp_cp_send (sp, PPP_IPV6CP, CONF_REJ, h->ident, rlen, buf);
+ goto end;
+ } else if (debug)
+ log(-1, "\n");
+
+ /* pass 2: parse option values */
+ sppp_get_ip6_addrs(sp, &myaddr, 0, 0);
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "ipv6cp parse opt values: ",
+ SPP_ARGS(ifp));
+ p = (void*) (h+1);
+ len = origlen;
+ type = CONF_ACK;
+ for (rlen=0; len >= 2 && p[1] >= 2 && len >= p[1];
+ len-=p[1], p+=p[1]) {
+ if (debug)
+ log(-1, " %s", sppp_ipv6cp_opt_name(*p));
+ switch (*p) {
+#ifdef notyet
+ case IPV6CP_OPT_COMPRESSION:
+ continue;
+#endif
+ case IPV6CP_OPT_IFID:
+ bzero(&desiredaddr, sizeof(desiredaddr));
+ bcopy(&p[2], &desiredaddr.s6_addr[8], 8);
+ collision = (bcmp(&desiredaddr.s6_addr[8],
+ &myaddr.s6_addr[8], 8) == 0);
+ nohisaddr = IN6_IS_ADDR_UNSPECIFIED(&desiredaddr);
+
+ desiredaddr.s6_addr16[0] = htons(0xfe80);
+ (void)in6_setscope(&desiredaddr, SP2IFP(sp), NULL);
+
+ if (!collision && !nohisaddr) {
+ /* no collision, hisaddr known - Conf-Ack */
+ type = CONF_ACK;
+
+ if (debug) {
+ log(-1, " %s [%s]",
+ ip6_sprintf(ip6buf, &desiredaddr),
+ sppp_cp_type_name(type));
+ }
+ continue;
+ }
+
+ bzero(&suggestaddr, sizeof(&suggestaddr));
+ if (collision && nohisaddr) {
+ /* collision, hisaddr unknown - Conf-Rej */
+ type = CONF_REJ;
+ bzero(&p[2], 8);
+ } else {
+ /*
+ * - no collision, hisaddr unknown, or
+ * - collision, hisaddr known
+ * Conf-Nak, suggest hisaddr
+ */
+ type = CONF_NAK;
+ sppp_suggest_ip6_addr(sp, &suggestaddr);
+ bcopy(&suggestaddr.s6_addr[8], &p[2], 8);
+ }
+ if (debug)
+ log(-1, " %s [%s]",
+ ip6_sprintf(ip6buf, &desiredaddr),
+ sppp_cp_type_name(type));
+ break;
+ }
+ /* Add the option to nak'ed list. */
+ bcopy (p, r, p[1]);
+ r += p[1];
+ rlen += p[1];
+ }
+
+ if (rlen == 0 && type == CONF_ACK) {
+ if (debug)
+ log(-1, " send %s\n", sppp_cp_type_name(type));
+ sppp_cp_send (sp, PPP_IPV6CP, type, h->ident, origlen, h+1);
+ } else {
+#ifdef DIAGNOSTIC
+ if (type == CONF_ACK)
+ panic("IPv6CP RCR: CONF_ACK with non-zero rlen");
+#endif
+
+ if (debug) {
+ log(-1, " send %s suggest %s\n",
+ sppp_cp_type_name(type),
+ ip6_sprintf(ip6buf, &suggestaddr));
+ }
+ sppp_cp_send (sp, PPP_IPV6CP, type, h->ident, rlen, buf);
+ }
+
+ end:
+ free (buf, M_TEMP);
+ return (rlen == 0);
+}
+
+/*
+ * Analyze the IPv6CP Configure-Reject option list, and adjust our
+ * negotiation.
+ */
+static void
+sppp_ipv6cp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len)
+{
+ u_char *buf, *p;
+ struct ifnet *ifp = SP2IFP(sp);
+ int debug = ifp->if_flags & IFF_DEBUG;
+
+ len -= 4;
+ buf = malloc (len, M_TEMP, M_NOWAIT);
+ if (!buf)
+ return;
+
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "ipv6cp rej opts:",
+ SPP_ARGS(ifp));
+
+ p = (void*) (h+1);
+ for (; len >= 2 && p[1] >= 2 && len >= p[1];
+ len -= p[1], p += p[1]) {
+ if (debug)
+ log(-1, " %s", sppp_ipv6cp_opt_name(*p));
+ switch (*p) {
+ case IPV6CP_OPT_IFID:
+ /*
+ * Peer doesn't grok address option. This is
+ * bad. XXX Should we better give up here?
+ */
+ sp->ipv6cp.opts &= ~(1 << IPV6CP_OPT_IFID);
+ break;
+#ifdef notyet
+ case IPV6CP_OPT_COMPRESS:
+ sp->ipv6cp.opts &= ~(1 << IPV6CP_OPT_COMPRESS);
+ break;
+#endif
+ }
+ }
+ if (debug)
+ log(-1, "\n");
+ free (buf, M_TEMP);
+ return;
+}
+
+/*
+ * Analyze the IPv6CP Configure-NAK option list, and adjust our
+ * negotiation.
+ */
+static void
+sppp_ipv6cp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len)
+{
+ u_char *buf, *p;
+ struct ifnet *ifp = SP2IFP(sp);
+ int debug = ifp->if_flags & IFF_DEBUG;
+ struct in6_addr suggestaddr;
+ char ip6buf[INET6_ADDRSTRLEN];
+
+ len -= 4;
+ buf = malloc (len, M_TEMP, M_NOWAIT);
+ if (!buf)
+ return;
+
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "ipv6cp nak opts:",
+ SPP_ARGS(ifp));
+
+ p = (void*) (h+1);
+ for (; len >= 2 && p[1] >= 2 && len >= p[1];
+ len -= p[1], p += p[1]) {
+ if (debug)
+ log(-1, " %s", sppp_ipv6cp_opt_name(*p));
+ switch (*p) {
+ case IPV6CP_OPT_IFID:
+ /*
+ * Peer doesn't like our local ifid. See
+ * if we can do something for him. We'll drop
+ * him our address then.
+ */
+ if (len < 10 || p[1] != 10)
+ break;
+ bzero(&suggestaddr, sizeof(suggestaddr));
+ suggestaddr.s6_addr16[0] = htons(0xfe80);
+ (void)in6_setscope(&suggestaddr, SP2IFP(sp), NULL);
+ bcopy(&p[2], &suggestaddr.s6_addr[8], 8);
+
+ sp->ipv6cp.opts |= (1 << IPV6CP_OPT_IFID);
+ if (debug)
+ log(-1, " [suggestaddr %s]",
+ ip6_sprintf(ip6buf, &suggestaddr));
+#ifdef IPV6CP_MYIFID_DYN
+ /*
+ * When doing dynamic address assignment,
+ * we accept his offer.
+ */
+ if (sp->ipv6cp.flags & IPV6CP_MYIFID_DYN) {
+ struct in6_addr lastsuggest;
+ /*
+ * If <suggested myaddr from peer> equals to
+ * <hisaddr we have suggested last time>,
+ * we have a collision. generate new random
+ * ifid.
+ */
+ sppp_suggest_ip6_addr(&lastsuggest);
+ if (IN6_ARE_ADDR_EQUAL(&suggestaddr,
+ lastsuggest)) {
+ if (debug)
+ log(-1, " [random]");
+ sppp_gen_ip6_addr(sp, &suggestaddr);
+ }
+ sppp_set_ip6_addr(sp, &suggestaddr, 0);
+ if (debug)
+ log(-1, " [agree]");
+ sp->ipv6cp.flags |= IPV6CP_MYIFID_SEEN;
+ }
+#else
+ /*
+ * Since we do not do dynamic address assignment,
+ * we ignore it and thus continue to negotiate
+ * our already existing value. This can possibly
+ * go into infinite request-reject loop.
+ *
+ * This is not likely because we normally use
+ * ifid based on MAC-address.
+ * If you have no ethernet card on the node, too bad.
+ * XXX should we use fail_counter?
+ */
+#endif
+ break;
+#ifdef notyet
+ case IPV6CP_OPT_COMPRESS:
+ /*
+ * Peer wants different compression parameters.
+ */
+ break;
+#endif
+ }
+ }
+ if (debug)
+ log(-1, "\n");
+ free (buf, M_TEMP);
+ return;
+}
+static void
+sppp_ipv6cp_tlu(struct sppp *sp)
+{
+ /* we are up - notify isdn daemon */
+ if (sp->pp_con)
+ sp->pp_con(sp);
+}
+
+static void
+sppp_ipv6cp_tld(struct sppp *sp)
+{
+}
+
+static void
+sppp_ipv6cp_tls(struct sppp *sp)
+{
+ /* indicate to LCP that it must stay alive */
+ sp->lcp.protos |= (1 << IDX_IPV6CP);
+}
+
+static void
+sppp_ipv6cp_tlf(struct sppp *sp)
+{
+
+#if 0 /* need #if 0 to close IPv6CP properly */
+ /* we no longer need LCP */
+ sp->lcp.protos &= ~(1 << IDX_IPV6CP);
+ sppp_lcp_check_and_close(sp);
+#endif
+}
+
+static void
+sppp_ipv6cp_scr(struct sppp *sp)
+{
+ char opt[10 /* ifid */ + 4 /* compression, minimum */];
+ struct in6_addr ouraddr;
+ int i = 0;
+
+ if (sp->ipv6cp.opts & (1 << IPV6CP_OPT_IFID)) {
+ sppp_get_ip6_addrs(sp, &ouraddr, 0, 0);
+ opt[i++] = IPV6CP_OPT_IFID;
+ opt[i++] = 10;
+ bcopy(&ouraddr.s6_addr[8], &opt[i], 8);
+ i += 8;
+ }
+
+#ifdef notyet
+ if (sp->ipv6cp.opts & (1 << IPV6CP_OPT_COMPRESSION)) {
+ opt[i++] = IPV6CP_OPT_COMPRESSION;
+ opt[i++] = 4;
+ opt[i++] = 0; /* TBD */
+ opt[i++] = 0; /* TBD */
+ /* variable length data may follow */
+ }
+#endif
+
+ sp->confid[IDX_IPV6CP] = ++sp->pp_seq[IDX_IPV6CP];
+ sppp_cp_send(sp, PPP_IPV6CP, CONF_REQ, sp->confid[IDX_IPV6CP], i, &opt);
+}
+#else /*INET6*/
+static void sppp_ipv6cp_init(struct sppp *sp)
+{
+}
+
+static void sppp_ipv6cp_up(struct sppp *sp)
+{
+}
+
+static void sppp_ipv6cp_down(struct sppp *sp)
+{
+}
+
+
+static void sppp_ipv6cp_open(struct sppp *sp)
+{
+}
+
+static void sppp_ipv6cp_close(struct sppp *sp)
+{
+}
+
+static void sppp_ipv6cp_TO(void *sp)
+{
+}
+
+static int sppp_ipv6cp_RCR(struct sppp *sp, struct lcp_header *h, int len)
+{
+ return 0;
+}
+
+static void sppp_ipv6cp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len)
+{
+}
+
+static void sppp_ipv6cp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len)
+{
+}
+
+static void sppp_ipv6cp_tlu(struct sppp *sp)
+{
+}
+
+static void sppp_ipv6cp_tld(struct sppp *sp)
+{
+}
+
+static void sppp_ipv6cp_tls(struct sppp *sp)
+{
+}
+
+static void sppp_ipv6cp_tlf(struct sppp *sp)
+{
+}
+
+static void sppp_ipv6cp_scr(struct sppp *sp)
+{
+}
+#endif /*INET6*/
+
+/*
+ *--------------------------------------------------------------------------*
+ * *
+ * The CHAP implementation. *
+ * *
+ *--------------------------------------------------------------------------*
+ */
+
+/*
+ * The authentication protocols don't employ a full-fledged state machine as
+ * the control protocols do, since they do have Open and Close events, but
+ * not Up and Down, nor are they explicitly terminated. Also, use of the
+ * authentication protocols may be different in both directions (this makes
+ * sense, think of a machine that never accepts incoming calls but only
+ * calls out, it doesn't require the called party to authenticate itself).
+ *
+ * Our state machine for the local authentication protocol (we are requesting
+ * the peer to authenticate) looks like:
+ *
+ * RCA-
+ * +--------------------------------------------+
+ * V scn,tld|
+ * +--------+ Close +---------+ RCA+
+ * | |<----------------------------------| |------+
+ * +--->| Closed | TO* | Opened | sca |
+ * | | |-----+ +-------| |<-----+
+ * | +--------+ irc | | +---------+
+ * | ^ | | ^
+ * | | | | |
+ * | | | | |
+ * | TO-| | | |
+ * | |tld TO+ V | |
+ * | | +------->+ | |
+ * | | | | | |
+ * | +--------+ V | |
+ * | | |<----+<--------------------+ |
+ * | | Req- | scr |
+ * | | Sent | |
+ * | | | |
+ * | +--------+ |
+ * | RCA- | | RCA+ |
+ * +------+ +------------------------------------------+
+ * scn,tld sca,irc,ict,tlu
+ *
+ *
+ * with:
+ *
+ * Open: LCP reached authentication phase
+ * Close: LCP reached terminate phase
+ *
+ * RCA+: received reply (pap-req, chap-response), acceptable
+ * RCN: received reply (pap-req, chap-response), not acceptable
+ * TO+: timeout with restart counter >= 0
+ * TO-: timeout with restart counter < 0
+ * TO*: reschedule timeout for CHAP
+ *
+ * scr: send request packet (none for PAP, chap-challenge)
+ * sca: send ack packet (pap-ack, chap-success)
+ * scn: send nak packet (pap-nak, chap-failure)
+ * ict: initialize re-challenge timer (CHAP only)
+ *
+ * tlu: this-layer-up, LCP reaches network phase
+ * tld: this-layer-down, LCP enters terminate phase
+ *
+ * Note that in CHAP mode, after sending a new challenge, while the state
+ * automaton falls back into Req-Sent state, it doesn't signal a tld
+ * event to LCP, so LCP remains in network phase. Only after not getting
+ * any response (or after getting an unacceptable response), CHAP closes,
+ * causing LCP to enter terminate phase.
+ *
+ * With PAP, there is no initial request that can be sent. The peer is
+ * expected to send one based on the successful negotiation of PAP as
+ * the authentication protocol during the LCP option negotiation.
+ *
+ * Incoming authentication protocol requests (remote requests
+ * authentication, we are peer) don't employ a state machine at all,
+ * they are simply answered. Some peers [Ascend P50 firmware rev
+ * 4.50] react allergically when sending IPCP requests while they are
+ * still in authentication phase (thereby violating the standard that
+ * demands that these NCP packets are to be discarded), so we keep
+ * track of the peer demanding us to authenticate, and only proceed to
+ * phase network once we've seen a positive acknowledge for the
+ * authentication.
+ */
+
+/*
+ * Handle incoming CHAP packets.
+ */
+static void
+sppp_chap_input(struct sppp *sp, struct mbuf *m)
+{
+ STDDCL;
+ struct lcp_header *h;
+ int len, x;
+ u_char *value, *name, digest[AUTHKEYLEN], dsize;
+ int value_len, name_len;
+ MD5_CTX ctx;
+
+ len = m->m_pkthdr.len;
+ if (len < 4) {
+ if (debug)
+ log(LOG_DEBUG,
+ SPP_FMT "chap invalid packet length: %d bytes\n",
+ SPP_ARGS(ifp), len);
+ return;
+ }
+ h = mtod (m, struct lcp_header*);
+ if (len > ntohs (h->len))
+ len = ntohs (h->len);
+
+ switch (h->type) {
+ /* challenge, failure and success are his authproto */
+ case CHAP_CHALLENGE:
+ value = 1 + (u_char*)(h+1);
+ value_len = value[-1];
+ name = value + value_len;
+ name_len = len - value_len - 5;
+ if (name_len < 0) {
+ if (debug) {
+ log(LOG_DEBUG,
+ SPP_FMT "chap corrupted challenge "
+ "<%s id=0x%x len=%d",
+ SPP_ARGS(ifp),
+ sppp_auth_type_name(PPP_CHAP, h->type),
+ h->ident, ntohs(h->len));
+ sppp_print_bytes((u_char*) (h+1), len-4);
+ log(-1, ">\n");
+ }
+ break;
+ }
+
+ if (debug) {
+ log(LOG_DEBUG,
+ SPP_FMT "chap input <%s id=0x%x len=%d name=",
+ SPP_ARGS(ifp),
+ sppp_auth_type_name(PPP_CHAP, h->type), h->ident,
+ ntohs(h->len));
+ sppp_print_string((char*) name, name_len);
+ log(-1, " value-size=%d value=", value_len);
+ sppp_print_bytes(value, value_len);
+ log(-1, ">\n");
+ }
+
+ /* Compute reply value. */
+ MD5Init(&ctx);
+ MD5Update(&ctx, &h->ident, 1);
+ MD5Update(&ctx, sp->myauth.secret,
+ sppp_strnlen(sp->myauth.secret, AUTHKEYLEN));
+ MD5Update(&ctx, value, value_len);
+ MD5Final(digest, &ctx);
+ dsize = sizeof digest;
+
+ sppp_auth_send(&chap, sp, CHAP_RESPONSE, h->ident,
+ sizeof dsize, (const char *)&dsize,
+ sizeof digest, digest,
+ (size_t)sppp_strnlen(sp->myauth.name, AUTHNAMELEN),
+ sp->myauth.name,
+ 0);
+ break;
+
+ case CHAP_SUCCESS:
+ if (debug) {
+ log(LOG_DEBUG, SPP_FMT "chap success",
+ SPP_ARGS(ifp));
+ if (len > 4) {
+ log(-1, ": ");
+ sppp_print_string((char*)(h + 1), len - 4);
+ }
+ log(-1, "\n");
+ }
+ x = splimp();
+ SPPP_LOCK(sp);
+ sp->pp_flags &= ~PP_NEEDAUTH;
+ if (sp->myauth.proto == PPP_CHAP &&
+ (sp->lcp.opts & (1 << LCP_OPT_AUTH_PROTO)) &&
+ (sp->lcp.protos & (1 << IDX_CHAP)) == 0) {
+ /*
+ * We are authenticator for CHAP but didn't
+ * complete yet. Leave it to tlu to proceed
+ * to network phase.
+ */
+ SPPP_UNLOCK(sp);
+ splx(x);
+ break;
+ }
+ SPPP_UNLOCK(sp);
+ splx(x);
+ sppp_phase_network(sp);
+ break;
+
+ case CHAP_FAILURE:
+ if (debug) {
+ log(LOG_INFO, SPP_FMT "chap failure",
+ SPP_ARGS(ifp));
+ if (len > 4) {
+ log(-1, ": ");
+ sppp_print_string((char*)(h + 1), len - 4);
+ }
+ log(-1, "\n");
+ } else
+ log(LOG_INFO, SPP_FMT "chap failure\n",
+ SPP_ARGS(ifp));
+ /* await LCP shutdown by authenticator */
+ break;
+
+ /* response is my authproto */
+ case CHAP_RESPONSE:
+ value = 1 + (u_char*)(h+1);
+ value_len = value[-1];
+ name = value + value_len;
+ name_len = len - value_len - 5;
+ if (name_len < 0) {
+ if (debug) {
+ log(LOG_DEBUG,
+ SPP_FMT "chap corrupted response "
+ "<%s id=0x%x len=%d",
+ SPP_ARGS(ifp),
+ sppp_auth_type_name(PPP_CHAP, h->type),
+ h->ident, ntohs(h->len));
+ sppp_print_bytes((u_char*)(h+1), len-4);
+ log(-1, ">\n");
+ }
+ break;
+ }
+ if (h->ident != sp->confid[IDX_CHAP]) {
+ if (debug)
+ log(LOG_DEBUG,
+ SPP_FMT "chap dropping response for old ID "
+ "(got %d, expected %d)\n",
+ SPP_ARGS(ifp),
+ h->ident, sp->confid[IDX_CHAP]);
+ break;
+ }
+ if (name_len != sppp_strnlen(sp->hisauth.name, AUTHNAMELEN)
+ || bcmp(name, sp->hisauth.name, name_len) != 0) {
+ log(LOG_INFO, SPP_FMT "chap response, his name ",
+ SPP_ARGS(ifp));
+ sppp_print_string(name, name_len);
+ log(-1, " != expected ");
+ sppp_print_string(sp->hisauth.name,
+ sppp_strnlen(sp->hisauth.name, AUTHNAMELEN));
+ log(-1, "\n");
+ }
+ if (debug) {
+ log(LOG_DEBUG, SPP_FMT "chap input(%s) "
+ "<%s id=0x%x len=%d name=",
+ SPP_ARGS(ifp),
+ sppp_state_name(sp->state[IDX_CHAP]),
+ sppp_auth_type_name(PPP_CHAP, h->type),
+ h->ident, ntohs (h->len));
+ sppp_print_string((char*)name, name_len);
+ log(-1, " value-size=%d value=", value_len);
+ sppp_print_bytes(value, value_len);
+ log(-1, ">\n");
+ }
+ if (value_len != AUTHKEYLEN) {
+ if (debug)
+ log(LOG_DEBUG,
+ SPP_FMT "chap bad hash value length: "
+ "%d bytes, should be %d\n",
+ SPP_ARGS(ifp), value_len,
+ AUTHKEYLEN);
+ break;
+ }
+
+ MD5Init(&ctx);
+ MD5Update(&ctx, &h->ident, 1);
+ MD5Update(&ctx, sp->hisauth.secret,
+ sppp_strnlen(sp->hisauth.secret, AUTHKEYLEN));
+ MD5Update(&ctx, sp->myauth.challenge, AUTHKEYLEN);
+ MD5Final(digest, &ctx);
+
+#define FAILMSG "Failed..."
+#define SUCCMSG "Welcome!"
+
+ if (value_len != sizeof digest ||
+ bcmp(digest, value, value_len) != 0) {
+ /* action scn, tld */
+ sppp_auth_send(&chap, sp, CHAP_FAILURE, h->ident,
+ sizeof(FAILMSG) - 1, (u_char *)FAILMSG,
+ 0);
+ chap.tld(sp);
+ break;
+ }
+ /* action sca, perhaps tlu */
+ if (sp->state[IDX_CHAP] == STATE_REQ_SENT ||
+ sp->state[IDX_CHAP] == STATE_OPENED)
+ sppp_auth_send(&chap, sp, CHAP_SUCCESS, h->ident,
+ sizeof(SUCCMSG) - 1, (u_char *)SUCCMSG,
+ 0);
+ if (sp->state[IDX_CHAP] == STATE_REQ_SENT) {
+ sppp_cp_change_state(&chap, sp, STATE_OPENED);
+ chap.tlu(sp);
+ }
+ break;
+
+ default:
+ /* Unknown CHAP packet type -- ignore. */
+ if (debug) {
+ log(LOG_DEBUG, SPP_FMT "chap unknown input(%s) "
+ "<0x%x id=0x%xh len=%d",
+ SPP_ARGS(ifp),
+ sppp_state_name(sp->state[IDX_CHAP]),
+ h->type, h->ident, ntohs(h->len));
+ sppp_print_bytes((u_char*)(h+1), len-4);
+ log(-1, ">\n");
+ }
+ break;
+
+ }
+}
+
+static void
+sppp_chap_init(struct sppp *sp)
+{
+ /* Chap doesn't have STATE_INITIAL at all. */
+ sp->state[IDX_CHAP] = STATE_CLOSED;
+ sp->fail_counter[IDX_CHAP] = 0;
+ sp->pp_seq[IDX_CHAP] = 0;
+ sp->pp_rseq[IDX_CHAP] = 0;
+ callout_init(&sp->ch[IDX_CHAP], CALLOUT_MPSAFE);
+}
+
+static void
+sppp_chap_open(struct sppp *sp)
+{
+ if (sp->myauth.proto == PPP_CHAP &&
+ (sp->lcp.opts & (1 << LCP_OPT_AUTH_PROTO)) != 0) {
+ /* we are authenticator for CHAP, start it */
+ chap.scr(sp);
+ sp->rst_counter[IDX_CHAP] = sp->lcp.max_configure;
+ sppp_cp_change_state(&chap, sp, STATE_REQ_SENT);
+ }
+ /* nothing to be done if we are peer, await a challenge */
+}
+
+static void
+sppp_chap_close(struct sppp *sp)
+{
+ if (sp->state[IDX_CHAP] != STATE_CLOSED)
+ sppp_cp_change_state(&chap, sp, STATE_CLOSED);
+}
+
+static void
+sppp_chap_TO(void *cookie)
+{
+ struct sppp *sp = (struct sppp *)cookie;
+ STDDCL;
+ int s;
+
+ s = splimp();
+ SPPP_LOCK(sp);
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "chap TO(%s) rst_counter = %d\n",
+ SPP_ARGS(ifp),
+ sppp_state_name(sp->state[IDX_CHAP]),
+ sp->rst_counter[IDX_CHAP]);
+
+ if (--sp->rst_counter[IDX_CHAP] < 0)
+ /* TO- event */
+ switch (sp->state[IDX_CHAP]) {
+ case STATE_REQ_SENT:
+ chap.tld(sp);
+ sppp_cp_change_state(&chap, sp, STATE_CLOSED);
+ break;
+ }
+ else
+ /* TO+ (or TO*) event */
+ switch (sp->state[IDX_CHAP]) {
+ case STATE_OPENED:
+ /* TO* event */
+ sp->rst_counter[IDX_CHAP] = sp->lcp.max_configure;
+ /* FALLTHROUGH */
+ case STATE_REQ_SENT:
+ chap.scr(sp);
+ /* sppp_cp_change_state() will restart the timer */
+ sppp_cp_change_state(&chap, sp, STATE_REQ_SENT);
+ break;
+ }
+
+ SPPP_UNLOCK(sp);
+ splx(s);
+}
+
+static void
+sppp_chap_tlu(struct sppp *sp)
+{
+ STDDCL;
+ int i, x;
+
+ i = 0;
+ sp->rst_counter[IDX_CHAP] = sp->lcp.max_configure;
+
+ /*
+ * Some broken CHAP implementations (Conware CoNet, firmware
+ * 4.0.?) don't want to re-authenticate their CHAP once the
+ * initial challenge-response exchange has taken place.
+ * Provide for an option to avoid rechallenges.
+ */
+ if ((sp->hisauth.flags & AUTHFLAG_NORECHALLENGE) == 0) {
+ /*
+ * Compute the re-challenge timeout. This will yield
+ * a number between 300 and 810 seconds.
+ */
+ i = 300 + ((unsigned)(random() & 0xff00) >> 7);
+ callout_reset(&sp->ch[IDX_CHAP], i * hz, chap.TO, (void *)sp);
+ }
+
+ if (debug) {
+ log(LOG_DEBUG,
+ SPP_FMT "chap %s, ",
+ SPP_ARGS(ifp),
+ sp->pp_phase == PHASE_NETWORK? "reconfirmed": "tlu");
+ if ((sp->hisauth.flags & AUTHFLAG_NORECHALLENGE) == 0)
+ log(-1, "next re-challenge in %d seconds\n", i);
+ else
+ log(-1, "re-challenging supressed\n");
+ }
+
+ x = splimp();
+ SPPP_LOCK(sp);
+ /* indicate to LCP that we need to be closed down */
+ sp->lcp.protos |= (1 << IDX_CHAP);
+
+ if (sp->pp_flags & PP_NEEDAUTH) {
+ /*
+ * Remote is authenticator, but his auth proto didn't
+ * complete yet. Defer the transition to network
+ * phase.
+ */
+ SPPP_UNLOCK(sp);
+ splx(x);
+ return;
+ }
+ SPPP_UNLOCK(sp);
+ splx(x);
+
+ /*
+ * If we are already in phase network, we are done here. This
+ * is the case if this is a dummy tlu event after a re-challenge.
+ */
+ if (sp->pp_phase != PHASE_NETWORK)
+ sppp_phase_network(sp);
+}
+
+static void
+sppp_chap_tld(struct sppp *sp)
+{
+ STDDCL;
+
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "chap tld\n", SPP_ARGS(ifp));
+ callout_stop(&sp->ch[IDX_CHAP]);
+ sp->lcp.protos &= ~(1 << IDX_CHAP);
+
+ lcp.Close(sp);
+}
+
+static void
+sppp_chap_scr(struct sppp *sp)
+{
+ u_long *ch, seed;
+ u_char clen;
+
+ /* Compute random challenge. */
+ ch = (u_long *)sp->myauth.challenge;
+ read_random(&seed, sizeof seed);
+ ch[0] = seed ^ random();
+ ch[1] = seed ^ random();
+ ch[2] = seed ^ random();
+ ch[3] = seed ^ random();
+ clen = AUTHKEYLEN;
+
+ sp->confid[IDX_CHAP] = ++sp->pp_seq[IDX_CHAP];
+
+ sppp_auth_send(&chap, sp, CHAP_CHALLENGE, sp->confid[IDX_CHAP],
+ sizeof clen, (const char *)&clen,
+ (size_t)AUTHKEYLEN, sp->myauth.challenge,
+ (size_t)sppp_strnlen(sp->myauth.name, AUTHNAMELEN),
+ sp->myauth.name,
+ 0);
+}
+
+/*
+ *--------------------------------------------------------------------------*
+ * *
+ * The PAP implementation. *
+ * *
+ *--------------------------------------------------------------------------*
+ */
+/*
+ * For PAP, we need to keep a little state also if we are the peer, not the
+ * authenticator. This is since we don't get a request to authenticate, but
+ * have to repeatedly authenticate ourself until we got a response (or the
+ * retry counter is expired).
+ */
+
+/*
+ * Handle incoming PAP packets. */
+static void
+sppp_pap_input(struct sppp *sp, struct mbuf *m)
+{
+ STDDCL;
+ struct lcp_header *h;
+ int len, x;
+ u_char *name, *passwd, mlen;
+ int name_len, passwd_len;
+
+ len = m->m_pkthdr.len;
+ if (len < 5) {
+ if (debug)
+ log(LOG_DEBUG,
+ SPP_FMT "pap invalid packet length: %d bytes\n",
+ SPP_ARGS(ifp), len);
+ return;
+ }
+ h = mtod (m, struct lcp_header*);
+ if (len > ntohs (h->len))
+ len = ntohs (h->len);
+ switch (h->type) {
+ /* PAP request is my authproto */
+ case PAP_REQ:
+ name = 1 + (u_char*)(h+1);
+ name_len = name[-1];
+ passwd = name + name_len + 1;
+ if (name_len > len - 6 ||
+ (passwd_len = passwd[-1]) > len - 6 - name_len) {
+ if (debug) {
+ log(LOG_DEBUG, SPP_FMT "pap corrupted input "
+ "<%s id=0x%x len=%d",
+ SPP_ARGS(ifp),
+ sppp_auth_type_name(PPP_PAP, h->type),
+ h->ident, ntohs(h->len));
+ sppp_print_bytes((u_char*)(h+1), len-4);
+ log(-1, ">\n");
+ }
+ break;
+ }
+ if (debug) {
+ log(LOG_DEBUG, SPP_FMT "pap input(%s) "
+ "<%s id=0x%x len=%d name=",
+ SPP_ARGS(ifp),
+ sppp_state_name(sp->state[IDX_PAP]),
+ sppp_auth_type_name(PPP_PAP, h->type),
+ h->ident, ntohs(h->len));
+ sppp_print_string((char*)name, name_len);
+ log(-1, " passwd=");
+ sppp_print_string((char*)passwd, passwd_len);
+ log(-1, ">\n");
+ }
+ if (name_len != sppp_strnlen(sp->hisauth.name, AUTHNAMELEN) ||
+ passwd_len != sppp_strnlen(sp->hisauth.secret, AUTHKEYLEN) ||
+ bcmp(name, sp->hisauth.name, name_len) != 0 ||
+ bcmp(passwd, sp->hisauth.secret, passwd_len) != 0) {
+ /* action scn, tld */
+ mlen = sizeof(FAILMSG) - 1;
+ sppp_auth_send(&pap, sp, PAP_NAK, h->ident,
+ sizeof mlen, (const char *)&mlen,
+ sizeof(FAILMSG) - 1, (u_char *)FAILMSG,
+ 0);
+ pap.tld(sp);
+ break;
+ }
+ /* action sca, perhaps tlu */
+ if (sp->state[IDX_PAP] == STATE_REQ_SENT ||
+ sp->state[IDX_PAP] == STATE_OPENED) {
+ mlen = sizeof(SUCCMSG) - 1;
+ sppp_auth_send(&pap, sp, PAP_ACK, h->ident,
+ sizeof mlen, (const char *)&mlen,
+ sizeof(SUCCMSG) - 1, (u_char *)SUCCMSG,
+ 0);
+ }
+ if (sp->state[IDX_PAP] == STATE_REQ_SENT) {
+ sppp_cp_change_state(&pap, sp, STATE_OPENED);
+ pap.tlu(sp);
+ }
+ break;
+
+ /* ack and nak are his authproto */
+ case PAP_ACK:
+ callout_stop(&sp->pap_my_to_ch);
+ if (debug) {
+ log(LOG_DEBUG, SPP_FMT "pap success",
+ SPP_ARGS(ifp));
+ name_len = *((char *)h);
+ if (len > 5 && name_len) {
+ log(-1, ": ");
+ sppp_print_string((char*)(h+1), name_len);
+ }
+ log(-1, "\n");
+ }
+ x = splimp();
+ SPPP_LOCK(sp);
+ sp->pp_flags &= ~PP_NEEDAUTH;
+ if (sp->myauth.proto == PPP_PAP &&
+ (sp->lcp.opts & (1 << LCP_OPT_AUTH_PROTO)) &&
+ (sp->lcp.protos & (1 << IDX_PAP)) == 0) {
+ /*
+ * We are authenticator for PAP but didn't
+ * complete yet. Leave it to tlu to proceed
+ * to network phase.
+ */
+ SPPP_UNLOCK(sp);
+ splx(x);
+ break;
+ }
+ SPPP_UNLOCK(sp);
+ splx(x);
+ sppp_phase_network(sp);
+ break;
+
+ case PAP_NAK:
+ callout_stop (&sp->pap_my_to_ch);
+ if (debug) {
+ log(LOG_INFO, SPP_FMT "pap failure",
+ SPP_ARGS(ifp));
+ name_len = *((char *)h);
+ if (len > 5 && name_len) {
+ log(-1, ": ");
+ sppp_print_string((char*)(h+1), name_len);
+ }
+ log(-1, "\n");
+ } else
+ log(LOG_INFO, SPP_FMT "pap failure\n",
+ SPP_ARGS(ifp));
+ /* await LCP shutdown by authenticator */
+ break;
+
+ default:
+ /* Unknown PAP packet type -- ignore. */
+ if (debug) {
+ log(LOG_DEBUG, SPP_FMT "pap corrupted input "
+ "<0x%x id=0x%x len=%d",
+ SPP_ARGS(ifp),
+ h->type, h->ident, ntohs(h->len));
+ sppp_print_bytes((u_char*)(h+1), len-4);
+ log(-1, ">\n");
+ }
+ break;
+
+ }
+}
+
+static void
+sppp_pap_init(struct sppp *sp)
+{
+ /* PAP doesn't have STATE_INITIAL at all. */
+ sp->state[IDX_PAP] = STATE_CLOSED;
+ sp->fail_counter[IDX_PAP] = 0;
+ sp->pp_seq[IDX_PAP] = 0;
+ sp->pp_rseq[IDX_PAP] = 0;
+ callout_init(&sp->ch[IDX_PAP], CALLOUT_MPSAFE);
+ callout_init(&sp->pap_my_to_ch, CALLOUT_MPSAFE);
+}
+
+static void
+sppp_pap_open(struct sppp *sp)
+{
+ if (sp->hisauth.proto == PPP_PAP &&
+ (sp->lcp.opts & (1 << LCP_OPT_AUTH_PROTO)) != 0) {
+ /* we are authenticator for PAP, start our timer */
+ sp->rst_counter[IDX_PAP] = sp->lcp.max_configure;
+ sppp_cp_change_state(&pap, sp, STATE_REQ_SENT);
+ }
+ if (sp->myauth.proto == PPP_PAP) {
+ /* we are peer, send a request, and start a timer */
+ pap.scr(sp);
+ callout_reset(&sp->pap_my_to_ch, sp->lcp.timeout,
+ sppp_pap_my_TO, (void *)sp);
+ }
+}
+
+static void
+sppp_pap_close(struct sppp *sp)
+{
+ if (sp->state[IDX_PAP] != STATE_CLOSED)
+ sppp_cp_change_state(&pap, sp, STATE_CLOSED);
+}
+
+/*
+ * That's the timeout routine if we are authenticator. Since the
+ * authenticator is basically passive in PAP, we can't do much here.
+ */
+static void
+sppp_pap_TO(void *cookie)
+{
+ struct sppp *sp = (struct sppp *)cookie;
+ STDDCL;
+ int s;
+
+ s = splimp();
+ SPPP_LOCK(sp);
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "pap TO(%s) rst_counter = %d\n",
+ SPP_ARGS(ifp),
+ sppp_state_name(sp->state[IDX_PAP]),
+ sp->rst_counter[IDX_PAP]);
+
+ if (--sp->rst_counter[IDX_PAP] < 0)
+ /* TO- event */
+ switch (sp->state[IDX_PAP]) {
+ case STATE_REQ_SENT:
+ pap.tld(sp);
+ sppp_cp_change_state(&pap, sp, STATE_CLOSED);
+ break;
+ }
+ else
+ /* TO+ event, not very much we could do */
+ switch (sp->state[IDX_PAP]) {
+ case STATE_REQ_SENT:
+ /* sppp_cp_change_state() will restart the timer */
+ sppp_cp_change_state(&pap, sp, STATE_REQ_SENT);
+ break;
+ }
+
+ SPPP_UNLOCK(sp);
+ splx(s);
+}
+
+/*
+ * That's the timeout handler if we are peer. Since the peer is active,
+ * we need to retransmit our PAP request since it is apparently lost.
+ * XXX We should impose a max counter.
+ */
+static void
+sppp_pap_my_TO(void *cookie)
+{
+ struct sppp *sp = (struct sppp *)cookie;
+ STDDCL;
+
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "pap peer TO\n",
+ SPP_ARGS(ifp));
+
+ SPPP_LOCK(sp);
+ pap.scr(sp);
+ SPPP_UNLOCK(sp);
+}
+
+static void
+sppp_pap_tlu(struct sppp *sp)
+{
+ STDDCL;
+ int x;
+
+ sp->rst_counter[IDX_PAP] = sp->lcp.max_configure;
+
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "%s tlu\n",
+ SPP_ARGS(ifp), pap.name);
+
+ x = splimp();
+ SPPP_LOCK(sp);
+ /* indicate to LCP that we need to be closed down */
+ sp->lcp.protos |= (1 << IDX_PAP);
+
+ if (sp->pp_flags & PP_NEEDAUTH) {
+ /*
+ * Remote is authenticator, but his auth proto didn't
+ * complete yet. Defer the transition to network
+ * phase.
+ */
+ SPPP_UNLOCK(sp);
+ splx(x);
+ return;
+ }
+ SPPP_UNLOCK(sp);
+ splx(x);
+ sppp_phase_network(sp);
+}
+
+static void
+sppp_pap_tld(struct sppp *sp)
+{
+ STDDCL;
+
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "pap tld\n", SPP_ARGS(ifp));
+ callout_stop (&sp->ch[IDX_PAP]);
+ callout_stop (&sp->pap_my_to_ch);
+ sp->lcp.protos &= ~(1 << IDX_PAP);
+
+ lcp.Close(sp);
+}
+
+static void
+sppp_pap_scr(struct sppp *sp)
+{
+ u_char idlen, pwdlen;
+
+ sp->confid[IDX_PAP] = ++sp->pp_seq[IDX_PAP];
+ pwdlen = sppp_strnlen(sp->myauth.secret, AUTHKEYLEN);
+ idlen = sppp_strnlen(sp->myauth.name, AUTHNAMELEN);
+
+ sppp_auth_send(&pap, sp, PAP_REQ, sp->confid[IDX_PAP],
+ sizeof idlen, (const char *)&idlen,
+ (size_t)idlen, sp->myauth.name,
+ sizeof pwdlen, (const char *)&pwdlen,
+ (size_t)pwdlen, sp->myauth.secret,
+ 0);
+}
+
+/*
+ * Random miscellaneous functions.
+ */
+
+/*
+ * Send a PAP or CHAP proto packet.
+ *
+ * Varadic function, each of the elements for the ellipsis is of type
+ * ``size_t mlen, const u_char *msg''. Processing will stop iff
+ * mlen == 0.
+ * NOTE: never declare variadic functions with types subject to type
+ * promotion (i.e. u_char). This is asking for big trouble depending
+ * on the architecture you are on...
+ */
+
+static void
+sppp_auth_send(const struct cp *cp, struct sppp *sp,
+ unsigned int type, unsigned int id,
+ ...)
+{
+ STDDCL;
+ struct ppp_header *h;
+ struct lcp_header *lh;
+ struct mbuf *m;
+ u_char *p;
+ int len;
+ unsigned int mlen;
+ const char *msg;
+ va_list ap;
+
+ MGETHDR (m, M_DONTWAIT, MT_DATA);
+ if (! m)
+ return;
+ m->m_pkthdr.rcvif = 0;
+
+ h = mtod (m, struct ppp_header*);
+ h->address = PPP_ALLSTATIONS; /* broadcast address */
+ h->control = PPP_UI; /* Unnumbered Info */
+ h->protocol = htons(cp->proto);
+
+ lh = (struct lcp_header*)(h + 1);
+ lh->type = type;
+ lh->ident = id;
+ p = (u_char*) (lh+1);
+
+ va_start(ap, id);
+ len = 0;
+
+ while ((mlen = (unsigned int)va_arg(ap, size_t)) != 0) {
+ msg = va_arg(ap, const char *);
+ len += mlen;
+ if (len > MHLEN - PPP_HEADER_LEN - LCP_HEADER_LEN) {
+ va_end(ap);
+ m_freem(m);
+ return;
+ }
+
+ bcopy(msg, p, mlen);
+ p += mlen;
+ }
+ va_end(ap);
+
+ m->m_pkthdr.len = m->m_len = PPP_HEADER_LEN + LCP_HEADER_LEN + len;
+ lh->len = htons (LCP_HEADER_LEN + len);
+
+ if (debug) {
+ log(LOG_DEBUG, SPP_FMT "%s output <%s id=0x%x len=%d",
+ SPP_ARGS(ifp), cp->name,
+ sppp_auth_type_name(cp->proto, lh->type),
+ lh->ident, ntohs(lh->len));
+ sppp_print_bytes((u_char*) (lh+1), len);
+ log(-1, ">\n");
+ }
+ if (! IF_HANDOFF_ADJ(&sp->pp_cpq, m, ifp, 3))
+ ifp->if_oerrors++;
+}
+
+/*
+ * Flush interface queue.
+ */
+static void
+sppp_qflush(struct ifqueue *ifq)
+{
+ struct mbuf *m, *n;
+
+ n = ifq->ifq_head;
+ while ((m = n)) {
+ n = m->m_act;
+ m_freem (m);
+ }
+ ifq->ifq_head = 0;
+ ifq->ifq_tail = 0;
+ ifq->ifq_len = 0;
+}
+
+/*
+ * Send keepalive packets, every 10 seconds.
+ */
+static void
+sppp_keepalive(void *dummy)
+{
+ struct sppp *sp = (struct sppp*)dummy;
+ struct ifnet *ifp = SP2IFP(sp);
+ int s;
+
+ s = splimp();
+ SPPP_LOCK(sp);
+ /* Keepalive mode disabled or channel down? */
+ if (! (sp->pp_flags & PP_KEEPALIVE) ||
+ ! (ifp->if_drv_flags & IFF_DRV_RUNNING))
+ goto out;
+
+ if (sp->pp_mode == PP_FR) {
+ sppp_fr_keepalive (sp);
+ goto out;
+ }
+
+ /* No keepalive in PPP mode if LCP not opened yet. */
+ if (sp->pp_mode != IFF_CISCO &&
+ sp->pp_phase < PHASE_AUTHENTICATE)
+ goto out;
+
+ if (sp->pp_alivecnt == MAXALIVECNT) {
+ /* No keepalive packets got. Stop the interface. */
+ printf (SPP_FMT "down\n", SPP_ARGS(ifp));
+ if_down (ifp);
+ sppp_qflush (&sp->pp_cpq);
+ if (sp->pp_mode != IFF_CISCO) {
+ /* XXX */
+ /* Shut down the PPP link. */
+ lcp.Down(sp);
+ /* Initiate negotiation. XXX */
+ lcp.Up(sp);
+ }
+ }
+ if (sp->pp_alivecnt <= MAXALIVECNT)
+ ++sp->pp_alivecnt;
+ if (sp->pp_mode == IFF_CISCO)
+ sppp_cisco_send (sp, CISCO_KEEPALIVE_REQ,
+ ++sp->pp_seq[IDX_LCP], sp->pp_rseq[IDX_LCP]);
+ else if (sp->pp_phase >= PHASE_AUTHENTICATE) {
+ long nmagic = htonl (sp->lcp.magic);
+ sp->lcp.echoid = ++sp->pp_seq[IDX_LCP];
+ sppp_cp_send (sp, PPP_LCP, ECHO_REQ,
+ sp->lcp.echoid, 4, &nmagic);
+ }
+out:
+ SPPP_UNLOCK(sp);
+ splx(s);
+ callout_reset(&sp->keepalive_callout, hz * 10, sppp_keepalive,
+ (void *)sp);
+}
+
+/*
+ * Get both IP addresses.
+ */
+void
+sppp_get_ip_addrs(struct sppp *sp, u_long *src, u_long *dst, u_long *srcmask)
+{
+ struct ifnet *ifp = SP2IFP(sp);
+ struct ifaddr *ifa;
+ struct sockaddr_in *si, *sm;
+ u_long ssrc, ddst;
+
+ sm = NULL;
+ ssrc = ddst = 0L;
+ /*
+ * Pick the first AF_INET address from the list,
+ * aliases don't make any sense on a p2p link anyway.
+ */
+ si = 0;
+ if_addr_rlock(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
+ if (ifa->ifa_addr->sa_family == AF_INET) {
+ si = (struct sockaddr_in *)ifa->ifa_addr;
+ sm = (struct sockaddr_in *)ifa->ifa_netmask;
+ if (si)
+ break;
+ }
+ if (ifa) {
+ if (si && si->sin_addr.s_addr) {
+ ssrc = si->sin_addr.s_addr;
+ if (srcmask)
+ *srcmask = ntohl(sm->sin_addr.s_addr);
+ }
+
+ si = (struct sockaddr_in *)ifa->ifa_dstaddr;
+ if (si && si->sin_addr.s_addr)
+ ddst = si->sin_addr.s_addr;
+ }
+ if_addr_runlock(ifp);
+
+ if (dst) *dst = ntohl(ddst);
+ if (src) *src = ntohl(ssrc);
+}
+
+#ifdef INET
+/*
+ * Set my IP address. Must be called at splimp.
+ */
+static void
+sppp_set_ip_addr(struct sppp *sp, u_long src)
+{
+ STDDCL;
+ struct ifaddr *ifa;
+ struct sockaddr_in *si;
+ struct in_ifaddr *ia;
+
+ /*
+ * Pick the first AF_INET address from the list,
+ * aliases don't make any sense on a p2p link anyway.
+ */
+ si = 0;
+ if_addr_rlock(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr->sa_family == AF_INET) {
+ si = (struct sockaddr_in *)ifa->ifa_addr;
+ if (si != NULL) {
+ ifa_ref(ifa);
+ break;
+ }
+ }
+ }
+ if_addr_runlock(ifp);
+
+ if (ifa != NULL) {
+ int error;
+
+ /* delete old route */
+ error = rtinit(ifa, (int)RTM_DELETE, RTF_HOST);
+ if (debug && error) {
+ log(LOG_DEBUG, SPP_FMT "sppp_set_ip_addr: rtinit DEL failed, error=%d\n",
+ SPP_ARGS(ifp), error);
+ }
+
+ /* set new address */
+ si->sin_addr.s_addr = htonl(src);
+ ia = ifatoia(ifa);
+ IN_IFADDR_WLOCK();
+ LIST_REMOVE(ia, ia_hash);
+ LIST_INSERT_HEAD(INADDR_HASH(si->sin_addr.s_addr), ia, ia_hash);
+ IN_IFADDR_WUNLOCK();
+
+ /* add new route */
+ error = rtinit(ifa, (int)RTM_ADD, RTF_HOST);
+ if (debug && error) {
+ log(LOG_DEBUG, SPP_FMT "sppp_set_ip_addr: rtinit ADD failed, error=%d",
+ SPP_ARGS(ifp), error);
+ }
+ ifa_free(ifa);
+ }
+}
+#endif
+
+#ifdef INET6
+/*
+ * Get both IPv6 addresses.
+ */
+static void
+sppp_get_ip6_addrs(struct sppp *sp, struct in6_addr *src, struct in6_addr *dst,
+ struct in6_addr *srcmask)
+{
+ struct ifnet *ifp = SP2IFP(sp);
+ struct ifaddr *ifa;
+ struct sockaddr_in6 *si, *sm;
+ struct in6_addr ssrc, ddst;
+
+ sm = NULL;
+ bzero(&ssrc, sizeof(ssrc));
+ bzero(&ddst, sizeof(ddst));
+ /*
+ * Pick the first link-local AF_INET6 address from the list,
+ * aliases don't make any sense on a p2p link anyway.
+ */
+ si = NULL;
+ if_addr_rlock(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
+ if (ifa->ifa_addr->sa_family == AF_INET6) {
+ si = (struct sockaddr_in6 *)ifa->ifa_addr;
+ sm = (struct sockaddr_in6 *)ifa->ifa_netmask;
+ if (si && IN6_IS_ADDR_LINKLOCAL(&si->sin6_addr))
+ break;
+ }
+ if (ifa) {
+ if (si && !IN6_IS_ADDR_UNSPECIFIED(&si->sin6_addr)) {
+ bcopy(&si->sin6_addr, &ssrc, sizeof(ssrc));
+ if (srcmask) {
+ bcopy(&sm->sin6_addr, srcmask,
+ sizeof(*srcmask));
+ }
+ }
+
+ si = (struct sockaddr_in6 *)ifa->ifa_dstaddr;
+ if (si && !IN6_IS_ADDR_UNSPECIFIED(&si->sin6_addr))
+ bcopy(&si->sin6_addr, &ddst, sizeof(ddst));
+ }
+
+ if (dst)
+ bcopy(&ddst, dst, sizeof(*dst));
+ if (src)
+ bcopy(&ssrc, src, sizeof(*src));
+ if_addr_runlock(ifp);
+}
+
+#ifdef IPV6CP_MYIFID_DYN
+/*
+ * Generate random ifid.
+ */
+static void
+sppp_gen_ip6_addr(struct sppp *sp, struct in6_addr *addr)
+{
+ /* TBD */
+}
+
+/*
+ * Set my IPv6 address. Must be called at splimp.
+ */
+static void
+sppp_set_ip6_addr(struct sppp *sp, const struct in6_addr *src)
+{
+ STDDCL;
+ struct ifaddr *ifa;
+ struct sockaddr_in6 *sin6;
+
+ /*
+ * Pick the first link-local AF_INET6 address from the list,
+ * aliases don't make any sense on a p2p link anyway.
+ */
+
+ sin6 = NULL;
+ if_addr_rlock(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr->sa_family == AF_INET6) {
+ sin6 = (struct sockaddr_in6 *)ifa->ifa_addr;
+ if (sin6 && IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+ ifa_ref(ifa);
+ break;
+ }
+ }
+ }
+ if_addr_runlock(ifp);
+
+ if (ifa != NULL) {
+ int error;
+ struct sockaddr_in6 new_sin6 = *sin6;
+
+ bcopy(src, &new_sin6.sin6_addr, sizeof(new_sin6.sin6_addr));
+ error = in6_ifinit(ifp, ifatoia6(ifa), &new_sin6, 1);
+ if (debug && error) {
+ log(LOG_DEBUG, SPP_FMT "sppp_set_ip6_addr: in6_ifinit "
+ " failed, error=%d\n", SPP_ARGS(ifp), error);
+ }
+ ifa_free(ifa);
+ }
+}
+#endif
+
+/*
+ * Suggest a candidate address to be used by peer.
+ */
+static void
+sppp_suggest_ip6_addr(struct sppp *sp, struct in6_addr *suggest)
+{
+ struct in6_addr myaddr;
+ struct timeval tv;
+
+ sppp_get_ip6_addrs(sp, &myaddr, 0, 0);
+
+ myaddr.s6_addr[8] &= ~0x02; /* u bit to "local" */
+ microtime(&tv);
+ if ((tv.tv_usec & 0xff) == 0 && (tv.tv_sec & 0xff) == 0) {
+ myaddr.s6_addr[14] ^= 0xff;
+ myaddr.s6_addr[15] ^= 0xff;
+ } else {
+ myaddr.s6_addr[14] ^= (tv.tv_usec & 0xff);
+ myaddr.s6_addr[15] ^= (tv.tv_sec & 0xff);
+ }
+ if (suggest)
+ bcopy(&myaddr, suggest, sizeof(myaddr));
+}
+#endif /*INET6*/
+
+static int
+sppp_params(struct sppp *sp, u_long cmd, void *data)
+{
+ u_long subcmd;
+ struct ifreq *ifr = (struct ifreq *)data;
+ struct spppreq *spr;
+ int rv = 0;
+
+ if ((spr = malloc(sizeof(struct spppreq), M_TEMP, M_NOWAIT)) == 0)
+ return (EAGAIN);
+ /*
+ * ifr->ifr_data is supposed to point to a struct spppreq.
+ * Check the cmd word first before attempting to fetch all the
+ * data.
+ */
+ if ((subcmd = fuword(ifr->ifr_data)) == -1) {
+ rv = EFAULT;
+ goto quit;
+ }
+
+ if (copyin((caddr_t)ifr->ifr_data, spr, sizeof(struct spppreq)) != 0) {
+ rv = EFAULT;
+ goto quit;
+ }
+
+ switch (subcmd) {
+ case (u_long)SPPPIOGDEFS:
+ if (cmd != SIOCGIFGENERIC) {
+ rv = EINVAL;
+ break;
+ }
+ /*
+ * We copy over the entire current state, but clean
+ * out some of the stuff we don't wanna pass up.
+ * Remember, SIOCGIFGENERIC is unprotected, and can be
+ * called by any user. No need to ever get PAP or
+ * CHAP secrets back to userland anyway.
+ */
+ spr->defs.pp_phase = sp->pp_phase;
+ spr->defs.enable_vj = (sp->confflags & CONF_ENABLE_VJ) != 0;
+ spr->defs.enable_ipv6 = (sp->confflags & CONF_ENABLE_IPV6) != 0;
+ spr->defs.lcp = sp->lcp;
+ spr->defs.ipcp = sp->ipcp;
+ spr->defs.ipv6cp = sp->ipv6cp;
+ spr->defs.myauth = sp->myauth;
+ spr->defs.hisauth = sp->hisauth;
+ bzero(spr->defs.myauth.secret, AUTHKEYLEN);
+ bzero(spr->defs.myauth.challenge, AUTHKEYLEN);
+ bzero(spr->defs.hisauth.secret, AUTHKEYLEN);
+ bzero(spr->defs.hisauth.challenge, AUTHKEYLEN);
+ /*
+ * Fixup the LCP timeout value to milliseconds so
+ * spppcontrol doesn't need to bother about the value
+ * of "hz". We do the reverse calculation below when
+ * setting it.
+ */
+ spr->defs.lcp.timeout = sp->lcp.timeout * 1000 / hz;
+ rv = copyout(spr, (caddr_t)ifr->ifr_data,
+ sizeof(struct spppreq));
+ break;
+
+ case (u_long)SPPPIOSDEFS:
+ if (cmd != SIOCSIFGENERIC) {
+ rv = EINVAL;
+ break;
+ }
+ /*
+ * We have a very specific idea of which fields we
+ * allow being passed back from userland, so to not
+ * clobber our current state. For one, we only allow
+ * setting anything if LCP is in dead or establish
+ * phase. Once the authentication negotiations
+ * started, the authentication settings must not be
+ * changed again. (The administrator can force an
+ * ifconfig down in order to get LCP back into dead
+ * phase.)
+ *
+ * Also, we only allow for authentication parameters to be
+ * specified.
+ *
+ * XXX Should allow to set or clear pp_flags.
+ *
+ * Finally, if the respective authentication protocol to
+ * be used is set differently than 0, but the secret is
+ * passed as all zeros, we don't trash the existing secret.
+ * This allows an administrator to change the system name
+ * only without clobbering the secret (which he didn't get
+ * back in a previous SPPPIOGDEFS call). However, the
+ * secrets are cleared if the authentication protocol is
+ * reset to 0. */
+ if (sp->pp_phase != PHASE_DEAD &&
+ sp->pp_phase != PHASE_ESTABLISH) {
+ rv = EBUSY;
+ break;
+ }
+
+ if ((spr->defs.myauth.proto != 0 && spr->defs.myauth.proto != PPP_PAP &&
+ spr->defs.myauth.proto != PPP_CHAP) ||
+ (spr->defs.hisauth.proto != 0 && spr->defs.hisauth.proto != PPP_PAP &&
+ spr->defs.hisauth.proto != PPP_CHAP)) {
+ rv = EINVAL;
+ break;
+ }
+
+ if (spr->defs.myauth.proto == 0)
+ /* resetting myauth */
+ bzero(&sp->myauth, sizeof sp->myauth);
+ else {
+ /* setting/changing myauth */
+ sp->myauth.proto = spr->defs.myauth.proto;
+ bcopy(spr->defs.myauth.name, sp->myauth.name, AUTHNAMELEN);
+ if (spr->defs.myauth.secret[0] != '\0')
+ bcopy(spr->defs.myauth.secret, sp->myauth.secret,
+ AUTHKEYLEN);
+ }
+ if (spr->defs.hisauth.proto == 0)
+ /* resetting hisauth */
+ bzero(&sp->hisauth, sizeof sp->hisauth);
+ else {
+ /* setting/changing hisauth */
+ sp->hisauth.proto = spr->defs.hisauth.proto;
+ sp->hisauth.flags = spr->defs.hisauth.flags;
+ bcopy(spr->defs.hisauth.name, sp->hisauth.name, AUTHNAMELEN);
+ if (spr->defs.hisauth.secret[0] != '\0')
+ bcopy(spr->defs.hisauth.secret, sp->hisauth.secret,
+ AUTHKEYLEN);
+ }
+ /* set LCP restart timer timeout */
+ if (spr->defs.lcp.timeout != 0)
+ sp->lcp.timeout = spr->defs.lcp.timeout * hz / 1000;
+ /* set VJ enable and IPv6 disable flags */
+#ifdef INET
+ if (spr->defs.enable_vj)
+ sp->confflags |= CONF_ENABLE_VJ;
+ else
+ sp->confflags &= ~CONF_ENABLE_VJ;
+#endif
+#ifdef INET6
+ if (spr->defs.enable_ipv6)
+ sp->confflags |= CONF_ENABLE_IPV6;
+ else
+ sp->confflags &= ~CONF_ENABLE_IPV6;
+#endif
+ break;
+
+ default:
+ rv = EINVAL;
+ }
+
+ quit:
+ free(spr, M_TEMP);
+
+ return (rv);
+}
+
+static void
+sppp_phase_network(struct sppp *sp)
+{
+ STDDCL;
+ int i;
+ u_long mask;
+
+ sp->pp_phase = PHASE_NETWORK;
+
+ if (debug)
+ log(LOG_DEBUG, SPP_FMT "phase %s\n", SPP_ARGS(ifp),
+ sppp_phase_name(sp->pp_phase));
+
+ /* Notify NCPs now. */
+ for (i = 0; i < IDX_COUNT; i++)
+ if ((cps[i])->flags & CP_NCP)
+ (cps[i])->Open(sp);
+
+ /* Send Up events to all NCPs. */
+ for (i = 0, mask = 1; i < IDX_COUNT; i++, mask <<= 1)
+ if ((sp->lcp.protos & mask) && ((cps[i])->flags & CP_NCP))
+ (cps[i])->Up(sp);
+
+ /* if no NCP is starting, all this was in vain, close down */
+ sppp_lcp_check_and_close(sp);
+}
+
+
+static const char *
+sppp_cp_type_name(u_char type)
+{
+ static char buf[12];
+ switch (type) {
+ case CONF_REQ: return "conf-req";
+ case CONF_ACK: return "conf-ack";
+ case CONF_NAK: return "conf-nak";
+ case CONF_REJ: return "conf-rej";
+ case TERM_REQ: return "term-req";
+ case TERM_ACK: return "term-ack";
+ case CODE_REJ: return "code-rej";
+ case PROTO_REJ: return "proto-rej";
+ case ECHO_REQ: return "echo-req";
+ case ECHO_REPLY: return "echo-reply";
+ case DISC_REQ: return "discard-req";
+ }
+ snprintf (buf, sizeof(buf), "cp/0x%x", type);
+ return buf;
+}
+
+static const char *
+sppp_auth_type_name(u_short proto, u_char type)
+{
+ static char buf[12];
+ switch (proto) {
+ case PPP_CHAP:
+ switch (type) {
+ case CHAP_CHALLENGE: return "challenge";
+ case CHAP_RESPONSE: return "response";
+ case CHAP_SUCCESS: return "success";
+ case CHAP_FAILURE: return "failure";
+ }
+ case PPP_PAP:
+ switch (type) {
+ case PAP_REQ: return "req";
+ case PAP_ACK: return "ack";
+ case PAP_NAK: return "nak";
+ }
+ }
+ snprintf (buf, sizeof(buf), "auth/0x%x", type);
+ return buf;
+}
+
+static const char *
+sppp_lcp_opt_name(u_char opt)
+{
+ static char buf[12];
+ switch (opt) {
+ case LCP_OPT_MRU: return "mru";
+ case LCP_OPT_ASYNC_MAP: return "async-map";
+ case LCP_OPT_AUTH_PROTO: return "auth-proto";
+ case LCP_OPT_QUAL_PROTO: return "qual-proto";
+ case LCP_OPT_MAGIC: return "magic";
+ case LCP_OPT_PROTO_COMP: return "proto-comp";
+ case LCP_OPT_ADDR_COMP: return "addr-comp";
+ }
+ snprintf (buf, sizeof(buf), "lcp/0x%x", opt);
+ return buf;
+}
+
+#ifdef INET
+static const char *
+sppp_ipcp_opt_name(u_char opt)
+{
+ static char buf[12];
+ switch (opt) {
+ case IPCP_OPT_ADDRESSES: return "addresses";
+ case IPCP_OPT_COMPRESSION: return "compression";
+ case IPCP_OPT_ADDRESS: return "address";
+ }
+ snprintf (buf, sizeof(buf), "ipcp/0x%x", opt);
+ return buf;
+}
+#endif
+
+#ifdef INET6
+static const char *
+sppp_ipv6cp_opt_name(u_char opt)
+{
+ static char buf[12];
+ switch (opt) {
+ case IPV6CP_OPT_IFID: return "ifid";
+ case IPV6CP_OPT_COMPRESSION: return "compression";
+ }
+ sprintf (buf, "0x%x", opt);
+ return buf;
+}
+#endif
+
+static const char *
+sppp_state_name(int state)
+{
+ switch (state) {
+ case STATE_INITIAL: return "initial";
+ case STATE_STARTING: return "starting";
+ case STATE_CLOSED: return "closed";
+ case STATE_STOPPED: return "stopped";
+ case STATE_CLOSING: return "closing";
+ case STATE_STOPPING: return "stopping";
+ case STATE_REQ_SENT: return "req-sent";
+ case STATE_ACK_RCVD: return "ack-rcvd";
+ case STATE_ACK_SENT: return "ack-sent";
+ case STATE_OPENED: return "opened";
+ }
+ return "illegal";
+}
+
+static const char *
+sppp_phase_name(enum ppp_phase phase)
+{
+ switch (phase) {
+ case PHASE_DEAD: return "dead";
+ case PHASE_ESTABLISH: return "establish";
+ case PHASE_TERMINATE: return "terminate";
+ case PHASE_AUTHENTICATE: return "authenticate";
+ case PHASE_NETWORK: return "network";
+ }
+ return "illegal";
+}
+
+static const char *
+sppp_proto_name(u_short proto)
+{
+ static char buf[12];
+ switch (proto) {
+ case PPP_LCP: return "lcp";
+ case PPP_IPCP: return "ipcp";
+ case PPP_PAP: return "pap";
+ case PPP_CHAP: return "chap";
+ case PPP_IPV6CP: return "ipv6cp";
+ }
+ snprintf(buf, sizeof(buf), "proto/0x%x", (unsigned)proto);
+ return buf;
+}
+
+static void
+sppp_print_bytes(const u_char *p, u_short len)
+{
+ if (len)
+ log(-1, " %*D", len, p, "-");
+}
+
+static void
+sppp_print_string(const char *p, u_short len)
+{
+ u_char c;
+
+ while (len-- > 0) {
+ c = *p++;
+ /*
+ * Print only ASCII chars directly. RFC 1994 recommends
+ * using only them, but we don't rely on it. */
+ if (c < ' ' || c > '~')
+ log(-1, "\\x%x", c);
+ else
+ log(-1, "%c", c);
+ }
+}
+
+#ifdef INET
+static const char *
+sppp_dotted_quad(u_long addr)
+{
+ static char s[16];
+ sprintf(s, "%d.%d.%d.%d",
+ (int)((addr >> 24) & 0xff),
+ (int)((addr >> 16) & 0xff),
+ (int)((addr >> 8) & 0xff),
+ (int)(addr & 0xff));
+ return s;
+}
+#endif
+
+static int
+sppp_strnlen(u_char *p, int max)
+{
+ int len;
+
+ for (len = 0; len < max && *p; ++p)
+ ++len;
+ return len;
+}
+
+/* a dummy, used to drop uninteresting events */
+static void
+sppp_null(struct sppp *unused)
+{
+ /* do just nothing */
+}
diff --git a/freebsd/sys/net/if_stf.c b/freebsd/sys/net/if_stf.c
new file mode 100644
index 00000000..1cf5c408
--- /dev/null
+++ b/freebsd/sys/net/if_stf.c
@@ -0,0 +1,850 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/* $FreeBSD$ */
+/* $KAME: if_stf.c,v 1.73 2001/12/03 11:08:30 keiichi Exp $ */
+
+/*-
+ * Copyright (C) 2000 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * 6to4 interface, based on RFC3056.
+ *
+ * 6to4 interface is NOT capable of link-layer (I mean, IPv4) multicasting.
+ * There is no address mapping defined from IPv6 multicast address to IPv4
+ * address. Therefore, we do not have IFF_MULTICAST on the interface.
+ *
+ * Due to the lack of address mapping for link-local addresses, we cannot
+ * throw packets toward link-local addresses (fe80::x). Also, we cannot throw
+ * packets to link-local multicast addresses (ff02::x).
+ *
+ * Here are interesting symptoms due to the lack of link-local address:
+ *
+ * Unicast routing exchange:
+ * - RIPng: Impossible. Uses link-local multicast packet toward ff02::9,
+ * and link-local addresses as nexthop.
+ * - OSPFv6: Impossible. OSPFv6 assumes that there's link-local address
+ * assigned to the link, and makes use of them. Also, HELLO packets use
+ * link-local multicast addresses (ff02::5 and ff02::6).
+ * - BGP4+: Maybe. You can only use global address as nexthop, and global
+ * address as TCP endpoint address.
+ *
+ * Multicast routing protocols:
+ * - PIM: Hello packet cannot be used to discover adjacent PIM routers.
+ * Adjacent PIM routers must be configured manually (is it really spec-wise
+ * correct thing to do?).
+ *
+ * ICMPv6:
+ * - Redirects cannot be used due to the lack of link-local address.
+ *
+ * stf interface does not have, and will not need, a link-local address.
+ * It seems to have no real benefit and does not help the above symptoms much.
+ * Even if we assign link-locals to interface, we cannot really
+ * use link-local unicast/multicast on top of 6to4 cloud (since there's no
+ * encapsulation defined for link-local address), and the above analysis does
+ * not change. RFC3056 does not mandate the assignment of link-local address
+ * either.
+ *
+ * 6to4 interface has security issues. Refer to
+ * http://playground.iijlab.net/i-d/draft-itojun-ipv6-transition-abuse-00.txt
+ * for details. The code tries to filter out some of malicious packets.
+ * Note that there is no way to be 100% secure.
+ */
+
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/errno.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/queue.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/machine/cpu.h>
+
+#include <freebsd/sys/malloc.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_clone.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/netisr.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/if_stf.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/in_var.h>
+
+#include <freebsd/netinet/ip6.h>
+#include <freebsd/netinet6/ip6_var.h>
+#include <freebsd/netinet6/in6_var.h>
+#include <freebsd/netinet/ip_ecn.h>
+
+#include <freebsd/netinet/ip_encap.h>
+
+#include <freebsd/machine/stdarg.h>
+
+#include <freebsd/net/bpf.h>
+
+#include <freebsd/security/mac/mac_framework.h>
+
+SYSCTL_DECL(_net_link);
+SYSCTL_NODE(_net_link, IFT_STF, stf, CTLFLAG_RW, 0, "6to4 Interface");
+
+static int stf_route_cache = 1;
+SYSCTL_INT(_net_link_stf, OID_AUTO, route_cache, CTLFLAG_RW,
+ &stf_route_cache, 0, "Caching of IPv4 routes for 6to4 Output");
+
+#define STFNAME "stf"
+#define STFUNIT 0
+
+#define IN6_IS_ADDR_6TO4(x) (ntohs((x)->s6_addr16[0]) == 0x2002)
+
+/*
+ * XXX: Return a pointer with 16-bit aligned. Don't cast it to
+ * struct in_addr *; use bcopy() instead.
+ */
+#define GET_V4(x) ((caddr_t)(&(x)->s6_addr16[1]))
+
+struct stf_softc {
+ struct ifnet *sc_ifp;
+ union {
+ struct route __sc_ro4;
+ struct route_in6 __sc_ro6; /* just for safety */
+ } __sc_ro46;
+#define sc_ro __sc_ro46.__sc_ro4
+ struct mtx sc_ro_mtx;
+ u_int sc_fibnum;
+ const struct encaptab *encap_cookie;
+};
+#define STF2IFP(sc) ((sc)->sc_ifp)
+
+/*
+ * Note that mutable fields in the softc are not currently locked.
+ * We do lock sc_ro in stf_output though.
+ */
+static MALLOC_DEFINE(M_STF, STFNAME, "6to4 Tunnel Interface");
+static const int ip_stf_ttl = 40;
+
+extern struct domain inetdomain;
+struct protosw in_stf_protosw = {
+ .pr_type = SOCK_RAW,
+ .pr_domain = &inetdomain,
+ .pr_protocol = IPPROTO_IPV6,
+ .pr_flags = PR_ATOMIC|PR_ADDR,
+ .pr_input = in_stf_input,
+ .pr_output = (pr_output_t *)rip_output,
+ .pr_ctloutput = rip_ctloutput,
+ .pr_usrreqs = &rip_usrreqs
+};
+
+static char *stfnames[] = {"stf0", "stf", "6to4", NULL};
+
+static int stfmodevent(module_t, int, void *);
+static int stf_encapcheck(const struct mbuf *, int, int, void *);
+static struct in6_ifaddr *stf_getsrcifa6(struct ifnet *);
+static int stf_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+ struct route *);
+static int isrfc1918addr(struct in_addr *);
+static int stf_checkaddr4(struct stf_softc *, struct in_addr *,
+ struct ifnet *);
+static int stf_checkaddr6(struct stf_softc *, struct in6_addr *,
+ struct ifnet *);
+static void stf_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
+static int stf_ioctl(struct ifnet *, u_long, caddr_t);
+
+static int stf_clone_match(struct if_clone *, const char *);
+static int stf_clone_create(struct if_clone *, char *, size_t, caddr_t);
+static int stf_clone_destroy(struct if_clone *, struct ifnet *);
+struct if_clone stf_cloner = IFC_CLONE_INITIALIZER(STFNAME, NULL, 0,
+ NULL, stf_clone_match, stf_clone_create, stf_clone_destroy);
+
+static int
+stf_clone_match(struct if_clone *ifc, const char *name)
+{
+ int i;
+
+ for(i = 0; stfnames[i] != NULL; i++) {
+ if (strcmp(stfnames[i], name) == 0)
+ return (1);
+ }
+
+ return (0);
+}
+
+static int
+stf_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
+{
+ int err, unit;
+ struct stf_softc *sc;
+ struct ifnet *ifp;
+
+ /*
+ * We can only have one unit, but since unit allocation is
+ * already locked, we use it to keep from allocating extra
+ * interfaces.
+ */
+ unit = STFUNIT;
+ err = ifc_alloc_unit(ifc, &unit);
+ if (err != 0)
+ return (err);
+
+ sc = malloc(sizeof(struct stf_softc), M_STF, M_WAITOK | M_ZERO);
+ ifp = STF2IFP(sc) = if_alloc(IFT_STF);
+ if (ifp == NULL) {
+ free(sc, M_STF);
+ ifc_free_unit(ifc, unit);
+ return (ENOSPC);
+ }
+ ifp->if_softc = sc;
+ sc->sc_fibnum = curthread->td_proc->p_fibnum;
+
+ /*
+ * Set the name manually rather then using if_initname because
+ * we don't conform to the default naming convention for interfaces.
+ */
+ strlcpy(ifp->if_xname, name, IFNAMSIZ);
+ ifp->if_dname = ifc->ifc_name;
+ ifp->if_dunit = IF_DUNIT_NONE;
+
+ mtx_init(&(sc)->sc_ro_mtx, "stf ro", NULL, MTX_DEF);
+ sc->encap_cookie = encap_attach_func(AF_INET, IPPROTO_IPV6,
+ stf_encapcheck, &in_stf_protosw, sc);
+ if (sc->encap_cookie == NULL) {
+ if_printf(ifp, "attach failed\n");
+ free(sc, M_STF);
+ ifc_free_unit(ifc, unit);
+ return (ENOMEM);
+ }
+
+ ifp->if_mtu = IPV6_MMTU;
+ ifp->if_ioctl = stf_ioctl;
+ ifp->if_output = stf_output;
+ ifp->if_snd.ifq_maxlen = ifqmaxlen;
+ if_attach(ifp);
+ bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
+ return (0);
+}
+
+static int
+stf_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
+{
+ struct stf_softc *sc = ifp->if_softc;
+ int err;
+
+ err = encap_detach(sc->encap_cookie);
+ KASSERT(err == 0, ("Unexpected error detaching encap_cookie"));
+ mtx_destroy(&(sc)->sc_ro_mtx);
+ bpfdetach(ifp);
+ if_detach(ifp);
+ if_free(ifp);
+
+ free(sc, M_STF);
+ ifc_free_unit(ifc, STFUNIT);
+
+ return (0);
+}
+
+static int
+stfmodevent(mod, type, data)
+ module_t mod;
+ int type;
+ void *data;
+{
+
+ switch (type) {
+ case MOD_LOAD:
+ if_clone_attach(&stf_cloner);
+ break;
+ case MOD_UNLOAD:
+ if_clone_detach(&stf_cloner);
+ break;
+ default:
+ return (EOPNOTSUPP);
+ }
+
+ return (0);
+}
+
+static moduledata_t stf_mod = {
+ "if_stf",
+ stfmodevent,
+ 0
+};
+
+DECLARE_MODULE(if_stf, stf_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+
+static int
+stf_encapcheck(m, off, proto, arg)
+ const struct mbuf *m;
+ int off;
+ int proto;
+ void *arg;
+{
+ struct ip ip;
+ struct in6_ifaddr *ia6;
+ struct stf_softc *sc;
+ struct in_addr a, b, mask;
+
+ sc = (struct stf_softc *)arg;
+ if (sc == NULL)
+ return 0;
+
+ if ((STF2IFP(sc)->if_flags & IFF_UP) == 0)
+ return 0;
+
+ /* IFF_LINK0 means "no decapsulation" */
+ if ((STF2IFP(sc)->if_flags & IFF_LINK0) != 0)
+ return 0;
+
+ if (proto != IPPROTO_IPV6)
+ return 0;
+
+ /* LINTED const cast */
+ m_copydata((struct mbuf *)(uintptr_t)m, 0, sizeof(ip), (caddr_t)&ip);
+
+ if (ip.ip_v != 4)
+ return 0;
+
+ ia6 = stf_getsrcifa6(STF2IFP(sc));
+ if (ia6 == NULL)
+ return 0;
+
+ /*
+ * check if IPv4 dst matches the IPv4 address derived from the
+ * local 6to4 address.
+ * success on: dst = 10.1.1.1, ia6->ia_addr = 2002:0a01:0101:...
+ */
+ if (bcmp(GET_V4(&ia6->ia_addr.sin6_addr), &ip.ip_dst,
+ sizeof(ip.ip_dst)) != 0) {
+ ifa_free(&ia6->ia_ifa);
+ return 0;
+ }
+
+ /*
+ * check if IPv4 src matches the IPv4 address derived from the
+ * local 6to4 address masked by prefixmask.
+ * success on: src = 10.1.1.1, ia6->ia_addr = 2002:0a00:.../24
+ * fail on: src = 10.1.1.1, ia6->ia_addr = 2002:0b00:.../24
+ */
+ bzero(&a, sizeof(a));
+ bcopy(GET_V4(&ia6->ia_addr.sin6_addr), &a, sizeof(a));
+ bcopy(GET_V4(&ia6->ia_prefixmask.sin6_addr), &mask, sizeof(mask));
+ ifa_free(&ia6->ia_ifa);
+ a.s_addr &= mask.s_addr;
+ b = ip.ip_src;
+ b.s_addr &= mask.s_addr;
+ if (a.s_addr != b.s_addr)
+ return 0;
+
+ /* stf interface makes single side match only */
+ return 32;
+}
+
+static struct in6_ifaddr *
+stf_getsrcifa6(ifp)
+ struct ifnet *ifp;
+{
+ struct ifaddr *ia;
+ struct in_ifaddr *ia4;
+ struct sockaddr_in6 *sin6;
+ struct in_addr in;
+
+ if_addr_rlock(ifp);
+ TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) {
+ if (ia->ifa_addr->sa_family != AF_INET6)
+ continue;
+ sin6 = (struct sockaddr_in6 *)ia->ifa_addr;
+ if (!IN6_IS_ADDR_6TO4(&sin6->sin6_addr))
+ continue;
+
+ bcopy(GET_V4(&sin6->sin6_addr), &in, sizeof(in));
+ LIST_FOREACH(ia4, INADDR_HASH(in.s_addr), ia_hash)
+ if (ia4->ia_addr.sin_addr.s_addr == in.s_addr)
+ break;
+ if (ia4 == NULL)
+ continue;
+
+ ifa_ref(ia);
+ if_addr_runlock(ifp);
+ return (struct in6_ifaddr *)ia;
+ }
+ if_addr_runlock(ifp);
+
+ return NULL;
+}
+
+static int
+stf_output(ifp, m, dst, ro)
+ struct ifnet *ifp;
+ struct mbuf *m;
+ struct sockaddr *dst;
+ struct route *ro;
+{
+ struct stf_softc *sc;
+ struct sockaddr_in6 *dst6;
+ struct route *cached_route;
+ struct in_addr in4;
+ caddr_t ptr;
+ struct sockaddr_in *dst4;
+ u_int8_t tos;
+ struct ip *ip;
+ struct ip6_hdr *ip6;
+ struct in6_ifaddr *ia6;
+ u_int32_t af;
+ int error;
+
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m);
+ if (error) {
+ m_freem(m);
+ return (error);
+ }
+#endif
+
+ sc = ifp->if_softc;
+ dst6 = (struct sockaddr_in6 *)dst;
+
+ /* just in case */
+ if ((ifp->if_flags & IFF_UP) == 0) {
+ m_freem(m);
+ ifp->if_oerrors++;
+ return ENETDOWN;
+ }
+
+ /*
+ * If we don't have an ip4 address that match my inner ip6 address,
+ * we shouldn't generate output. Without this check, we'll end up
+ * using wrong IPv4 source.
+ */
+ ia6 = stf_getsrcifa6(ifp);
+ if (ia6 == NULL) {
+ m_freem(m);
+ ifp->if_oerrors++;
+ return ENETDOWN;
+ }
+
+ if (m->m_len < sizeof(*ip6)) {
+ m = m_pullup(m, sizeof(*ip6));
+ if (!m) {
+ ifa_free(&ia6->ia_ifa);
+ ifp->if_oerrors++;
+ return ENOBUFS;
+ }
+ }
+ ip6 = mtod(m, struct ip6_hdr *);
+ tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+
+ /*
+ * BPF writes need to be handled specially.
+ * This is a null operation, nothing here checks dst->sa_family.
+ */
+ if (dst->sa_family == AF_UNSPEC) {
+ bcopy(dst->sa_data, &af, sizeof(af));
+ dst->sa_family = af;
+ }
+
+ /*
+ * Pickup the right outer dst addr from the list of candidates.
+ * ip6_dst has priority as it may be able to give us shorter IPv4 hops.
+ */
+ ptr = NULL;
+ if (IN6_IS_ADDR_6TO4(&ip6->ip6_dst))
+ ptr = GET_V4(&ip6->ip6_dst);
+ else if (IN6_IS_ADDR_6TO4(&dst6->sin6_addr))
+ ptr = GET_V4(&dst6->sin6_addr);
+ else {
+ ifa_free(&ia6->ia_ifa);
+ m_freem(m);
+ ifp->if_oerrors++;
+ return ENETUNREACH;
+ }
+ bcopy(ptr, &in4, sizeof(in4));
+
+ if (bpf_peers_present(ifp->if_bpf)) {
+ /*
+ * We need to prepend the address family as
+ * a four byte field. Cons up a dummy header
+ * to pacify bpf. This is safe because bpf
+ * will only read from the mbuf (i.e., it won't
+ * try to free it or keep a pointer a to it).
+ */
+ af = AF_INET6;
+ bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
+ }
+
+ M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
+ if (m && m->m_len < sizeof(struct ip))
+ m = m_pullup(m, sizeof(struct ip));
+ if (m == NULL) {
+ ifa_free(&ia6->ia_ifa);
+ ifp->if_oerrors++;
+ return ENOBUFS;
+ }
+ ip = mtod(m, struct ip *);
+
+ bzero(ip, sizeof(*ip));
+
+ bcopy(GET_V4(&((struct sockaddr_in6 *)&ia6->ia_addr)->sin6_addr),
+ &ip->ip_src, sizeof(ip->ip_src));
+ ifa_free(&ia6->ia_ifa);
+ bcopy(&in4, &ip->ip_dst, sizeof(ip->ip_dst));
+ ip->ip_p = IPPROTO_IPV6;
+ ip->ip_ttl = ip_stf_ttl;
+ ip->ip_len = m->m_pkthdr.len; /*host order*/
+ if (ifp->if_flags & IFF_LINK1)
+ ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &tos);
+ else
+ ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos);
+
+ if (!stf_route_cache) {
+ cached_route = NULL;
+ goto sendit;
+ }
+
+ /*
+ * Do we have a cached route?
+ */
+ mtx_lock(&(sc)->sc_ro_mtx);
+ dst4 = (struct sockaddr_in *)&sc->sc_ro.ro_dst;
+ if (dst4->sin_family != AF_INET ||
+ bcmp(&dst4->sin_addr, &ip->ip_dst, sizeof(ip->ip_dst)) != 0) {
+ /* cache route doesn't match */
+ dst4->sin_family = AF_INET;
+ dst4->sin_len = sizeof(struct sockaddr_in);
+ bcopy(&ip->ip_dst, &dst4->sin_addr, sizeof(dst4->sin_addr));
+ if (sc->sc_ro.ro_rt) {
+ RTFREE(sc->sc_ro.ro_rt);
+ sc->sc_ro.ro_rt = NULL;
+ }
+ }
+
+ if (sc->sc_ro.ro_rt == NULL) {
+ rtalloc_fib(&sc->sc_ro, sc->sc_fibnum);
+ if (sc->sc_ro.ro_rt == NULL) {
+ m_freem(m);
+ mtx_unlock(&(sc)->sc_ro_mtx);
+ ifp->if_oerrors++;
+ return ENETUNREACH;
+ }
+ }
+ cached_route = &sc->sc_ro;
+
+sendit:
+ M_SETFIB(m, sc->sc_fibnum);
+ ifp->if_opackets++;
+ error = ip_output(m, NULL, cached_route, 0, NULL, NULL);
+
+ if (cached_route != NULL)
+ mtx_unlock(&(sc)->sc_ro_mtx);
+ return error;
+}
+
+static int
+isrfc1918addr(in)
+ struct in_addr *in;
+{
+ /*
+ * returns 1 if private address range:
+ * 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16
+ */
+ if ((ntohl(in->s_addr) & 0xff000000) >> 24 == 10 ||
+ (ntohl(in->s_addr) & 0xfff00000) >> 16 == 172 * 256 + 16 ||
+ (ntohl(in->s_addr) & 0xffff0000) >> 16 == 192 * 256 + 168)
+ return 1;
+
+ return 0;
+}
+
+static int
+stf_checkaddr4(sc, in, inifp)
+ struct stf_softc *sc;
+ struct in_addr *in;
+ struct ifnet *inifp; /* incoming interface */
+{
+ struct in_ifaddr *ia4;
+
+ /*
+ * reject packets with the following address:
+ * 224.0.0.0/4 0.0.0.0/8 127.0.0.0/8 255.0.0.0/8
+ */
+ if (IN_MULTICAST(ntohl(in->s_addr)))
+ return -1;
+ switch ((ntohl(in->s_addr) & 0xff000000) >> 24) {
+ case 0: case 127: case 255:
+ return -1;
+ }
+
+ /*
+ * reject packets with private address range.
+ * (requirement from RFC3056 section 2 1st paragraph)
+ */
+ if (isrfc1918addr(in))
+ return -1;
+
+ /*
+ * reject packets with broadcast
+ */
+ IN_IFADDR_RLOCK();
+ for (ia4 = TAILQ_FIRST(&V_in_ifaddrhead);
+ ia4;
+ ia4 = TAILQ_NEXT(ia4, ia_link))
+ {
+ if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0)
+ continue;
+ if (in->s_addr == ia4->ia_broadaddr.sin_addr.s_addr) {
+ IN_IFADDR_RUNLOCK();
+ return -1;
+ }
+ }
+ IN_IFADDR_RUNLOCK();
+
+ /*
+ * perform ingress filter
+ */
+ if (sc && (STF2IFP(sc)->if_flags & IFF_LINK2) == 0 && inifp) {
+ struct sockaddr_in sin;
+ struct rtentry *rt;
+
+ bzero(&sin, sizeof(sin));
+ sin.sin_family = AF_INET;
+ sin.sin_len = sizeof(struct sockaddr_in);
+ sin.sin_addr = *in;
+ rt = rtalloc1_fib((struct sockaddr *)&sin, 0,
+ 0UL, sc->sc_fibnum);
+ if (!rt || rt->rt_ifp != inifp) {
+#if 0
+ log(LOG_WARNING, "%s: packet from 0x%x dropped "
+ "due to ingress filter\n", if_name(STF2IFP(sc)),
+ (u_int32_t)ntohl(sin.sin_addr.s_addr));
+#endif
+ if (rt)
+ RTFREE_LOCKED(rt);
+ return -1;
+ }
+ RTFREE_LOCKED(rt);
+ }
+
+ return 0;
+}
+
+static int
+stf_checkaddr6(sc, in6, inifp)
+ struct stf_softc *sc;
+ struct in6_addr *in6;
+ struct ifnet *inifp; /* incoming interface */
+{
+ /*
+ * check 6to4 addresses
+ */
+ if (IN6_IS_ADDR_6TO4(in6)) {
+ struct in_addr in4;
+ bcopy(GET_V4(in6), &in4, sizeof(in4));
+ return stf_checkaddr4(sc, &in4, inifp);
+ }
+
+ /*
+ * reject anything that look suspicious. the test is implemented
+ * in ip6_input too, but we check here as well to
+ * (1) reject bad packets earlier, and
+ * (2) to be safe against future ip6_input change.
+ */
+ if (IN6_IS_ADDR_V4COMPAT(in6) || IN6_IS_ADDR_V4MAPPED(in6))
+ return -1;
+
+ return 0;
+}
+
+void
+in_stf_input(m, off)
+ struct mbuf *m;
+ int off;
+{
+ int proto;
+ struct stf_softc *sc;
+ struct ip *ip;
+ struct ip6_hdr *ip6;
+ u_int8_t otos, itos;
+ struct ifnet *ifp;
+
+ proto = mtod(m, struct ip *)->ip_p;
+
+ if (proto != IPPROTO_IPV6) {
+ m_freem(m);
+ return;
+ }
+
+ ip = mtod(m, struct ip *);
+
+ sc = (struct stf_softc *)encap_getarg(m);
+
+ if (sc == NULL || (STF2IFP(sc)->if_flags & IFF_UP) == 0) {
+ m_freem(m);
+ return;
+ }
+
+ ifp = STF2IFP(sc);
+
+#ifdef MAC
+ mac_ifnet_create_mbuf(ifp, m);
+#endif
+
+ /*
+ * perform sanity check against outer src/dst.
+ * for source, perform ingress filter as well.
+ */
+ if (stf_checkaddr4(sc, &ip->ip_dst, NULL) < 0 ||
+ stf_checkaddr4(sc, &ip->ip_src, m->m_pkthdr.rcvif) < 0) {
+ m_freem(m);
+ return;
+ }
+
+ otos = ip->ip_tos;
+ m_adj(m, off);
+
+ if (m->m_len < sizeof(*ip6)) {
+ m = m_pullup(m, sizeof(*ip6));
+ if (!m)
+ return;
+ }
+ ip6 = mtod(m, struct ip6_hdr *);
+
+ /*
+ * perform sanity check against inner src/dst.
+ * for source, perform ingress filter as well.
+ */
+ if (stf_checkaddr6(sc, &ip6->ip6_dst, NULL) < 0 ||
+ stf_checkaddr6(sc, &ip6->ip6_src, m->m_pkthdr.rcvif) < 0) {
+ m_freem(m);
+ return;
+ }
+
+ itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+ if ((ifp->if_flags & IFF_LINK1) != 0)
+ ip_ecn_egress(ECN_ALLOWED, &otos, &itos);
+ else
+ ip_ecn_egress(ECN_NOCARE, &otos, &itos);
+ ip6->ip6_flow &= ~htonl(0xff << 20);
+ ip6->ip6_flow |= htonl((u_int32_t)itos << 20);
+
+ m->m_pkthdr.rcvif = ifp;
+
+ if (bpf_peers_present(ifp->if_bpf)) {
+ /*
+ * We need to prepend the address family as
+ * a four byte field. Cons up a dummy header
+ * to pacify bpf. This is safe because bpf
+ * will only read from the mbuf (i.e., it won't
+ * try to free it or keep a pointer a to it).
+ */
+ u_int32_t af = AF_INET6;
+ bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
+ }
+
+ /*
+ * Put the packet to the network layer input queue according to the
+ * specified address family.
+ * See net/if_gif.c for possible issues with packet processing
+ * reorder due to extra queueing.
+ */
+ ifp->if_ipackets++;
+ ifp->if_ibytes += m->m_pkthdr.len;
+ netisr_dispatch(NETISR_IPV6, m);
+}
+
+/* ARGSUSED */
+static void
+stf_rtrequest(cmd, rt, info)
+ int cmd;
+ struct rtentry *rt;
+ struct rt_addrinfo *info;
+{
+ RT_LOCK_ASSERT(rt);
+ rt->rt_rmx.rmx_mtu = IPV6_MMTU;
+}
+
+static int
+stf_ioctl(ifp, cmd, data)
+ struct ifnet *ifp;
+ u_long cmd;
+ caddr_t data;
+{
+ struct ifaddr *ifa;
+ struct ifreq *ifr;
+ struct sockaddr_in6 *sin6;
+ struct in_addr addr;
+ int error;
+
+ error = 0;
+ switch (cmd) {
+ case SIOCSIFADDR:
+ ifa = (struct ifaddr *)data;
+ if (ifa == NULL || ifa->ifa_addr->sa_family != AF_INET6) {
+ error = EAFNOSUPPORT;
+ break;
+ }
+ sin6 = (struct sockaddr_in6 *)ifa->ifa_addr;
+ if (!IN6_IS_ADDR_6TO4(&sin6->sin6_addr)) {
+ error = EINVAL;
+ break;
+ }
+ bcopy(GET_V4(&sin6->sin6_addr), &addr, sizeof(addr));
+ if (isrfc1918addr(&addr)) {
+ error = EINVAL;
+ break;
+ }
+
+ ifa->ifa_rtrequest = stf_rtrequest;
+ ifp->if_flags |= IFF_UP;
+ break;
+
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ ifr = (struct ifreq *)data;
+ if (ifr && ifr->ifr_addr.sa_family == AF_INET6)
+ ;
+ else
+ error = EAFNOSUPPORT;
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return error;
+}
diff --git a/freebsd/sys/net/if_stf.h b/freebsd/sys/net/if_stf.h
new file mode 100644
index 00000000..64fd30ee
--- /dev/null
+++ b/freebsd/sys/net/if_stf.h
@@ -0,0 +1,38 @@
+/* $FreeBSD$ */
+/* $KAME: if_stf.h,v 1.5 2001/10/12 10:09:17 keiichi Exp $ */
+
+/*-
+ * Copyright (C) 2000 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _NET_IF_STF_HH_
+#define _NET_IF_STF_HH_
+
+void in_stf_input(struct mbuf *, int);
+
+#endif /* _NET_IF_STF_HH_ */
diff --git a/freebsd/sys/net/if_tap.c b/freebsd/sys/net/if_tap.c
new file mode 100644
index 00000000..206302bb
--- /dev/null
+++ b/freebsd/sys/net/if_tap.c
@@ -0,0 +1,1086 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * BASED ON:
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
+ * Nottingham University 1987.
+ */
+
+/*
+ * $FreeBSD$
+ * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $
+ */
+
+#include <freebsd/local/opt_compat.h>
+#include <freebsd/local/opt_inet.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/conf.h>
+#include <freebsd/sys/fcntl.h>
+#include <freebsd/sys/filio.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/poll.h>
+#include <freebsd/sys/priv.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/selinfo.h>
+#include <freebsd/sys/signalvar.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/ttycom.h>
+#include <freebsd/sys/uio.h>
+#include <freebsd/sys/queue.h>
+
+#include <freebsd/net/bpf.h>
+#include <freebsd/net/ethernet.h>
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_clone.h>
+#include <freebsd/net/if_dl.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/if_types.h>
+
+#include <freebsd/netinet/in.h>
+
+#include <freebsd/net/if_tapvar.h>
+#include <freebsd/net/if_tap.h>
+
+
+#define CDEV_NAME "tap"
+#define TAPDEBUG if (tapdebug) printf
+
+#define TAP "tap"
+#define VMNET "vmnet"
+#define TAPMAXUNIT 0x7fff
+#define VMNET_DEV_MASK CLONE_FLAG0
+
+/* module */
+static int tapmodevent(module_t, int, void *);
+
+/* device */
+static void tapclone(void *, struct ucred *, char *, int,
+ struct cdev **);
+static void tapcreate(struct cdev *);
+
+/* network interface */
+static void tapifstart(struct ifnet *);
+static int tapifioctl(struct ifnet *, u_long, caddr_t);
+static void tapifinit(void *);
+
+static int tap_clone_create(struct if_clone *, int, caddr_t);
+static void tap_clone_destroy(struct ifnet *);
+static int vmnet_clone_create(struct if_clone *, int, caddr_t);
+static void vmnet_clone_destroy(struct ifnet *);
+
+IFC_SIMPLE_DECLARE(tap, 0);
+IFC_SIMPLE_DECLARE(vmnet, 0);
+
+/* character device */
+static d_open_t tapopen;
+static d_close_t tapclose;
+static d_read_t tapread;
+static d_write_t tapwrite;
+static d_ioctl_t tapioctl;
+static d_poll_t tappoll;
+static d_kqfilter_t tapkqfilter;
+
+/* kqueue(2) */
+static int tapkqread(struct knote *, long);
+static int tapkqwrite(struct knote *, long);
+static void tapkqdetach(struct knote *);
+
+static struct filterops tap_read_filterops = {
+ .f_isfd = 1,
+ .f_attach = NULL,
+ .f_detach = tapkqdetach,
+ .f_event = tapkqread,
+};
+
+static struct filterops tap_write_filterops = {
+ .f_isfd = 1,
+ .f_attach = NULL,
+ .f_detach = tapkqdetach,
+ .f_event = tapkqwrite,
+};
+
+static struct cdevsw tap_cdevsw = {
+ .d_version = D_VERSION,
+ .d_flags = D_PSEUDO | D_NEEDMINOR,
+ .d_open = tapopen,
+ .d_close = tapclose,
+ .d_read = tapread,
+ .d_write = tapwrite,
+ .d_ioctl = tapioctl,
+ .d_poll = tappoll,
+ .d_name = CDEV_NAME,
+ .d_kqfilter = tapkqfilter,
+};
+
+/*
+ * All global variables in if_tap.c are locked with tapmtx, with the
+ * exception of tapdebug, which is accessed unlocked; tapclones is
+ * static at runtime.
+ */
+static struct mtx tapmtx;
+static int tapdebug = 0; /* debug flag */
+static int tapuopen = 0; /* allow user open() */
+static int tapuponopen = 0; /* IFF_UP on open() */
+static int tapdclone = 1; /* enable devfs cloning */
+static SLIST_HEAD(, tap_softc) taphead; /* first device */
+static struct clonedevs *tapclones;
+
+MALLOC_DECLARE(M_TAP);
+MALLOC_DEFINE(M_TAP, CDEV_NAME, "Ethernet tunnel interface");
+SYSCTL_INT(_debug, OID_AUTO, if_tap_debug, CTLFLAG_RW, &tapdebug, 0, "");
+
+SYSCTL_DECL(_net_link);
+SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0,
+ "Ethernet tunnel software network interface");
+SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tapuopen, 0,
+ "Allow user to open /dev/tap (based on node permissions)");
+SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0,
+ "Bring interface up when /dev/tap is opened");
+SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RW, &tapdclone, 0,
+ "Enably legacy devfs interface creation");
+SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tapdebug, 0, "");
+
+TUNABLE_INT("net.link.tap.devfs_cloning", &tapdclone);
+
+DEV_MODULE(if_tap, tapmodevent, NULL);
+
+static int
+tap_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+ struct cdev *dev;
+ int i;
+ int extra;
+
+ if (strcmp(ifc->ifc_name, VMNET) == 0)
+ extra = VMNET_DEV_MASK;
+ else
+ extra = 0;
+
+ /* find any existing device, or allocate new unit number */
+ i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, extra);
+ if (i) {
+ dev = make_dev(&tap_cdevsw, unit | extra,
+ UID_ROOT, GID_WHEEL, 0600, "%s%d", ifc->ifc_name, unit);
+ }
+
+ tapcreate(dev);
+ return (0);
+}
+
+/* vmnet devices are tap devices in disguise */
+static int
+vmnet_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+ return tap_clone_create(ifc, unit, params);
+}
+
+static void
+tap_destroy(struct tap_softc *tp)
+{
+ struct ifnet *ifp = tp->tap_ifp;
+
+ /* Unlocked read. */
+ KASSERT(!(tp->tap_flags & TAP_OPEN),
+ ("%s flags is out of sync", ifp->if_xname));
+
+ knlist_destroy(&tp->tap_rsel.si_note);
+ destroy_dev(tp->tap_dev);
+ ether_ifdetach(ifp);
+ if_free_type(ifp, IFT_ETHER);
+
+ mtx_destroy(&tp->tap_mtx);
+ free(tp, M_TAP);
+}
+
+static void
+tap_clone_destroy(struct ifnet *ifp)
+{
+ struct tap_softc *tp = ifp->if_softc;
+
+ mtx_lock(&tapmtx);
+ SLIST_REMOVE(&taphead, tp, tap_softc, tap_next);
+ mtx_unlock(&tapmtx);
+ tap_destroy(tp);
+}
+
+/* vmnet devices are tap devices in disguise */
+static void
+vmnet_clone_destroy(struct ifnet *ifp)
+{
+ tap_clone_destroy(ifp);
+}
+
+/*
+ * tapmodevent
+ *
+ * module event handler
+ */
+static int
+tapmodevent(module_t mod, int type, void *data)
+{
+ static eventhandler_tag eh_tag = NULL;
+ struct tap_softc *tp = NULL;
+ struct ifnet *ifp = NULL;
+
+ switch (type) {
+ case MOD_LOAD:
+
+ /* intitialize device */
+
+ mtx_init(&tapmtx, "tapmtx", NULL, MTX_DEF);
+ SLIST_INIT(&taphead);
+
+ clone_setup(&tapclones);
+ eh_tag = EVENTHANDLER_REGISTER(dev_clone, tapclone, 0, 1000);
+ if (eh_tag == NULL) {
+ clone_cleanup(&tapclones);
+ mtx_destroy(&tapmtx);
+ return (ENOMEM);
+ }
+ if_clone_attach(&tap_cloner);
+ if_clone_attach(&vmnet_cloner);
+ return (0);
+
+ case MOD_UNLOAD:
+ /*
+ * The EBUSY algorithm here can't quite atomically
+ * guarantee that this is race-free since we have to
+ * release the tap mtx to deregister the clone handler.
+ */
+ mtx_lock(&tapmtx);
+ SLIST_FOREACH(tp, &taphead, tap_next) {
+ mtx_lock(&tp->tap_mtx);
+ if (tp->tap_flags & TAP_OPEN) {
+ mtx_unlock(&tp->tap_mtx);
+ mtx_unlock(&tapmtx);
+ return (EBUSY);
+ }
+ mtx_unlock(&tp->tap_mtx);
+ }
+ mtx_unlock(&tapmtx);
+
+ EVENTHANDLER_DEREGISTER(dev_clone, eh_tag);
+ if_clone_detach(&tap_cloner);
+ if_clone_detach(&vmnet_cloner);
+ drain_dev_clone_events();
+
+ mtx_lock(&tapmtx);
+ while ((tp = SLIST_FIRST(&taphead)) != NULL) {
+ SLIST_REMOVE_HEAD(&taphead, tap_next);
+ mtx_unlock(&tapmtx);
+
+ ifp = tp->tap_ifp;
+
+ TAPDEBUG("detaching %s\n", ifp->if_xname);
+
+ tap_destroy(tp);
+ mtx_lock(&tapmtx);
+ }
+ mtx_unlock(&tapmtx);
+ clone_cleanup(&tapclones);
+
+ mtx_destroy(&tapmtx);
+
+ break;
+
+ default:
+ return (EOPNOTSUPP);
+ }
+
+ return (0);
+} /* tapmodevent */
+
+
+/*
+ * DEVFS handler
+ *
+ * We need to support two kind of devices - tap and vmnet
+ */
+static void
+tapclone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **dev)
+{
+ char devname[SPECNAMELEN + 1];
+ int i, unit, append_unit;
+ int extra;
+
+ if (*dev != NULL)
+ return;
+
+ if (!tapdclone ||
+ (!tapuopen && priv_check_cred(cred, PRIV_NET_IFCREATE, 0) != 0))
+ return;
+
+ unit = 0;
+ append_unit = 0;
+ extra = 0;
+
+ /* We're interested in only tap/vmnet devices. */
+ if (strcmp(name, TAP) == 0) {
+ unit = -1;
+ } else if (strcmp(name, VMNET) == 0) {
+ unit = -1;
+ extra = VMNET_DEV_MASK;
+ } else if (dev_stdclone(name, NULL, TAP, &unit) != 1) {
+ if (dev_stdclone(name, NULL, VMNET, &unit) != 1) {
+ return;
+ } else {
+ extra = VMNET_DEV_MASK;
+ }
+ }
+
+ if (unit == -1)
+ append_unit = 1;
+
+ /* find any existing device, or allocate new unit number */
+ i = clone_create(&tapclones, &tap_cdevsw, &unit, dev, extra);
+ if (i) {
+ if (append_unit) {
+ /*
+ * We were passed 'tun' or 'tap', with no unit specified
+ * so we'll need to append it now.
+ */
+ namelen = snprintf(devname, sizeof(devname), "%s%d", name,
+ unit);
+ name = devname;
+ }
+
+ *dev = make_dev_credf(MAKEDEV_REF, &tap_cdevsw, unit | extra,
+ cred, UID_ROOT, GID_WHEEL, 0600, "%s", name);
+ }
+
+ if_clone_create(name, namelen, NULL);
+} /* tapclone */
+
+
+/*
+ * tapcreate
+ *
+ * to create interface
+ */
+static void
+tapcreate(struct cdev *dev)
+{
+ struct ifnet *ifp = NULL;
+ struct tap_softc *tp = NULL;
+ unsigned short macaddr_hi;
+ uint32_t macaddr_mid;
+ int unit;
+ char *name = NULL;
+ u_char eaddr[6];
+
+ dev->si_flags &= ~SI_CHEAPCLONE;
+
+ /* allocate driver storage and create device */
+ tp = malloc(sizeof(*tp), M_TAP, M_WAITOK | M_ZERO);
+ mtx_init(&tp->tap_mtx, "tap_mtx", NULL, MTX_DEF);
+ mtx_lock(&tapmtx);
+ SLIST_INSERT_HEAD(&taphead, tp, tap_next);
+ mtx_unlock(&tapmtx);
+
+ unit = dev2unit(dev);
+
+ /* select device: tap or vmnet */
+ if (unit & VMNET_DEV_MASK) {
+ name = VMNET;
+ tp->tap_flags |= TAP_VMNET;
+ } else
+ name = TAP;
+
+ unit &= TAPMAXUNIT;
+
+ TAPDEBUG("tapcreate(%s%d). minor = %#x\n", name, unit, dev2unit(dev));
+
+ /* generate fake MAC address: 00 bd xx xx xx unit_no */
+ macaddr_hi = htons(0x00bd);
+ macaddr_mid = (uint32_t) ticks;
+ bcopy(&macaddr_hi, eaddr, sizeof(short));
+ bcopy(&macaddr_mid, &eaddr[2], sizeof(uint32_t));
+ eaddr[5] = (u_char)unit;
+
+ /* fill the rest and attach interface */
+ ifp = tp->tap_ifp = if_alloc(IFT_ETHER);
+ if (ifp == NULL)
+ panic("%s%d: can not if_alloc()", name, unit);
+ ifp->if_softc = tp;
+ if_initname(ifp, name, unit);
+ ifp->if_init = tapifinit;
+ ifp->if_start = tapifstart;
+ ifp->if_ioctl = tapifioctl;
+ ifp->if_mtu = ETHERMTU;
+ ifp->if_flags = (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST);
+ IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
+ ifp->if_capabilities |= IFCAP_LINKSTATE;
+ ifp->if_capenable |= IFCAP_LINKSTATE;
+
+ dev->si_drv1 = tp;
+ tp->tap_dev = dev;
+
+ ether_ifattach(ifp, eaddr);
+
+ mtx_lock(&tp->tap_mtx);
+ tp->tap_flags |= TAP_INITED;
+ mtx_unlock(&tp->tap_mtx);
+
+ knlist_init_mtx(&tp->tap_rsel.si_note, &tp->tap_mtx);
+
+ TAPDEBUG("interface %s is created. minor = %#x\n",
+ ifp->if_xname, dev2unit(dev));
+} /* tapcreate */
+
+
+/*
+ * tapopen
+ *
+ * to open tunnel. must be superuser
+ */
+static int
+tapopen(struct cdev *dev, int flag, int mode, struct thread *td)
+{
+ struct tap_softc *tp = NULL;
+ struct ifnet *ifp = NULL;
+ int error;
+
+ if (tapuopen == 0) {
+ error = priv_check(td, PRIV_NET_TAP);
+ if (error)
+ return (error);
+ }
+
+ if ((dev2unit(dev) & CLONE_UNITMASK) > TAPMAXUNIT)
+ return (ENXIO);
+
+ tp = dev->si_drv1;
+
+ mtx_lock(&tp->tap_mtx);
+ if (tp->tap_flags & TAP_OPEN) {
+ mtx_unlock(&tp->tap_mtx);
+ return (EBUSY);
+ }
+
+ bcopy(IF_LLADDR(tp->tap_ifp), tp->ether_addr, sizeof(tp->ether_addr));
+ tp->tap_pid = td->td_proc->p_pid;
+ tp->tap_flags |= TAP_OPEN;
+ ifp = tp->tap_ifp;
+
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ if (tapuponopen)
+ ifp->if_flags |= IFF_UP;
+ if_link_state_change(ifp, LINK_STATE_UP);
+ mtx_unlock(&tp->tap_mtx);
+
+ TAPDEBUG("%s is open. minor = %#x\n", ifp->if_xname, dev2unit(dev));
+
+ return (0);
+} /* tapopen */
+
+
+/*
+ * tapclose
+ *
+ * close the device - mark i/f down & delete routing info
+ */
+static int
+tapclose(struct cdev *dev, int foo, int bar, struct thread *td)
+{
+ struct ifaddr *ifa;
+ struct tap_softc *tp = dev->si_drv1;
+ struct ifnet *ifp = tp->tap_ifp;
+
+ /* junk all pending output */
+ mtx_lock(&tp->tap_mtx);
+ IF_DRAIN(&ifp->if_snd);
+
+ /*
+ * do not bring the interface down, and do not anything with
+ * interface, if we are in VMnet mode. just close the device.
+ */
+
+ if (((tp->tap_flags & TAP_VMNET) == 0) && (ifp->if_flags & IFF_UP)) {
+ mtx_unlock(&tp->tap_mtx);
+ if_down(ifp);
+ mtx_lock(&tp->tap_mtx);
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ mtx_unlock(&tp->tap_mtx);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ rtinit(ifa, (int)RTM_DELETE, 0);
+ }
+ if_purgeaddrs(ifp);
+ mtx_lock(&tp->tap_mtx);
+ }
+ }
+
+ if_link_state_change(ifp, LINK_STATE_DOWN);
+ funsetown(&tp->tap_sigio);
+ selwakeuppri(&tp->tap_rsel, PZERO+1);
+ KNOTE_LOCKED(&tp->tap_rsel.si_note, 0);
+
+ tp->tap_flags &= ~TAP_OPEN;
+ tp->tap_pid = 0;
+ mtx_unlock(&tp->tap_mtx);
+
+ TAPDEBUG("%s is closed. minor = %#x\n",
+ ifp->if_xname, dev2unit(dev));
+
+ return (0);
+} /* tapclose */
+
+
+/*
+ * tapifinit
+ *
+ * network interface initialization function
+ */
+static void
+tapifinit(void *xtp)
+{
+ struct tap_softc *tp = (struct tap_softc *)xtp;
+ struct ifnet *ifp = tp->tap_ifp;
+
+ TAPDEBUG("initializing %s\n", ifp->if_xname);
+
+ mtx_lock(&tp->tap_mtx);
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ mtx_unlock(&tp->tap_mtx);
+
+ /* attempt to start output */
+ tapifstart(ifp);
+} /* tapifinit */
+
+
+/*
+ * tapifioctl
+ *
+ * Process an ioctl request on network interface
+ */
+static int
+tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ struct tap_softc *tp = ifp->if_softc;
+ struct ifreq *ifr = (struct ifreq *)data;
+ struct ifstat *ifs = NULL;
+ int dummy;
+
+ switch (cmd) {
+ case SIOCSIFFLAGS: /* XXX -- just like vmnet does */
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ break;
+
+ case SIOCSIFMTU:
+ ifp->if_mtu = ifr->ifr_mtu;
+ break;
+
+ case SIOCGIFSTATUS:
+ ifs = (struct ifstat *)data;
+ dummy = strlen(ifs->ascii);
+ mtx_lock(&tp->tap_mtx);
+ if (tp->tap_pid != 0 && dummy < sizeof(ifs->ascii))
+ snprintf(ifs->ascii + dummy,
+ sizeof(ifs->ascii) - dummy,
+ "\tOpened by PID %d\n", tp->tap_pid);
+ mtx_unlock(&tp->tap_mtx);
+ break;
+
+ default:
+ return (ether_ioctl(ifp, cmd, data));
+ /* NOT REACHED */
+ }
+
+ return (0);
+} /* tapifioctl */
+
+
+/*
+ * tapifstart
+ *
+ * queue packets from higher level ready to put out
+ */
+static void
+tapifstart(struct ifnet *ifp)
+{
+ struct tap_softc *tp = ifp->if_softc;
+
+ TAPDEBUG("%s starting\n", ifp->if_xname);
+
+ /*
+ * do not junk pending output if we are in VMnet mode.
+ * XXX: can this do any harm because of queue overflow?
+ */
+
+ mtx_lock(&tp->tap_mtx);
+ if (((tp->tap_flags & TAP_VMNET) == 0) &&
+ ((tp->tap_flags & TAP_READY) != TAP_READY)) {
+ struct mbuf *m;
+
+ /* Unlocked read. */
+ TAPDEBUG("%s not ready, tap_flags = 0x%x\n", ifp->if_xname,
+ tp->tap_flags);
+
+ for (;;) {
+ IF_DEQUEUE(&ifp->if_snd, m);
+ if (m != NULL) {
+ m_freem(m);
+ ifp->if_oerrors++;
+ } else
+ break;
+ }
+ mtx_unlock(&tp->tap_mtx);
+
+ return;
+ }
+
+ ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+
+ if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
+ if (tp->tap_flags & TAP_RWAIT) {
+ tp->tap_flags &= ~TAP_RWAIT;
+ wakeup(tp);
+ }
+
+ if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) {
+ mtx_unlock(&tp->tap_mtx);
+ pgsigio(&tp->tap_sigio, SIGIO, 0);
+ mtx_lock(&tp->tap_mtx);
+ }
+
+ selwakeuppri(&tp->tap_rsel, PZERO+1);
+ KNOTE_LOCKED(&tp->tap_rsel.si_note, 0);
+ ifp->if_opackets ++; /* obytes are counted in ether_output */
+ }
+
+ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ mtx_unlock(&tp->tap_mtx);
+} /* tapifstart */
+
+
+/*
+ * tapioctl
+ *
+ * the cdevsw interface is now pretty minimal
+ */
+static int
+tapioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
+{
+ struct tap_softc *tp = dev->si_drv1;
+ struct ifnet *ifp = tp->tap_ifp;
+ struct tapinfo *tapp = NULL;
+ int f;
+#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
+ defined(COMPAT_FREEBSD4)
+ int ival;
+#endif
+
+ switch (cmd) {
+ case TAPSIFINFO:
+ tapp = (struct tapinfo *)data;
+ mtx_lock(&tp->tap_mtx);
+ ifp->if_mtu = tapp->mtu;
+ ifp->if_type = tapp->type;
+ ifp->if_baudrate = tapp->baudrate;
+ mtx_unlock(&tp->tap_mtx);
+ break;
+
+ case TAPGIFINFO:
+ tapp = (struct tapinfo *)data;
+ mtx_lock(&tp->tap_mtx);
+ tapp->mtu = ifp->if_mtu;
+ tapp->type = ifp->if_type;
+ tapp->baudrate = ifp->if_baudrate;
+ mtx_unlock(&tp->tap_mtx);
+ break;
+
+ case TAPSDEBUG:
+ tapdebug = *(int *)data;
+ break;
+
+ case TAPGDEBUG:
+ *(int *)data = tapdebug;
+ break;
+
+ case TAPGIFNAME: {
+ struct ifreq *ifr = (struct ifreq *) data;
+
+ strlcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ);
+ } break;
+
+ case FIONBIO:
+ break;
+
+ case FIOASYNC:
+ mtx_lock(&tp->tap_mtx);
+ if (*(int *)data)
+ tp->tap_flags |= TAP_ASYNC;
+ else
+ tp->tap_flags &= ~TAP_ASYNC;
+ mtx_unlock(&tp->tap_mtx);
+ break;
+
+ case FIONREAD:
+ if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
+ struct mbuf *mb;
+
+ IFQ_LOCK(&ifp->if_snd);
+ IFQ_POLL_NOLOCK(&ifp->if_snd, mb);
+ for (*(int *)data = 0; mb != NULL;
+ mb = mb->m_next)
+ *(int *)data += mb->m_len;
+ IFQ_UNLOCK(&ifp->if_snd);
+ } else
+ *(int *)data = 0;
+ break;
+
+ case FIOSETOWN:
+ return (fsetown(*(int *)data, &tp->tap_sigio));
+
+ case FIOGETOWN:
+ *(int *)data = fgetown(&tp->tap_sigio);
+ return (0);
+
+ /* this is deprecated, FIOSETOWN should be used instead */
+ case TIOCSPGRP:
+ return (fsetown(-(*(int *)data), &tp->tap_sigio));
+
+ /* this is deprecated, FIOGETOWN should be used instead */
+ case TIOCGPGRP:
+ *(int *)data = -fgetown(&tp->tap_sigio);
+ return (0);
+
+ /* VMware/VMnet port ioctl's */
+
+#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
+ defined(COMPAT_FREEBSD4)
+ case _IO('V', 0):
+ ival = IOCPARM_IVAL(data);
+ data = (caddr_t)&ival;
+ /* FALLTHROUGH */
+#endif
+ case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */
+ f = *(int *)data;
+ f &= 0x0fff;
+ f &= ~IFF_CANTCHANGE;
+ f |= IFF_UP;
+
+ mtx_lock(&tp->tap_mtx);
+ ifp->if_flags = f | (ifp->if_flags & IFF_CANTCHANGE);
+ mtx_unlock(&tp->tap_mtx);
+ break;
+
+ case OSIOCGIFADDR: /* get MAC address of the remote side */
+ case SIOCGIFADDR:
+ mtx_lock(&tp->tap_mtx);
+ bcopy(tp->ether_addr, data, sizeof(tp->ether_addr));
+ mtx_unlock(&tp->tap_mtx);
+ break;
+
+ case SIOCSIFADDR: /* set MAC address of the remote side */
+ mtx_lock(&tp->tap_mtx);
+ bcopy(data, tp->ether_addr, sizeof(tp->ether_addr));
+ mtx_unlock(&tp->tap_mtx);
+ break;
+
+ default:
+ return (ENOTTY);
+ }
+ return (0);
+} /* tapioctl */
+
+
+/*
+ * tapread
+ *
+ * the cdevsw read interface - reads a packet at a time, or at
+ * least as much of a packet as can be read
+ */
+static int
+tapread(struct cdev *dev, struct uio *uio, int flag)
+{
+ struct tap_softc *tp = dev->si_drv1;
+ struct ifnet *ifp = tp->tap_ifp;
+ struct mbuf *m = NULL;
+ int error = 0, len;
+
+ TAPDEBUG("%s reading, minor = %#x\n", ifp->if_xname, dev2unit(dev));
+
+ mtx_lock(&tp->tap_mtx);
+ if ((tp->tap_flags & TAP_READY) != TAP_READY) {
+ mtx_unlock(&tp->tap_mtx);
+
+ /* Unlocked read. */
+ TAPDEBUG("%s not ready. minor = %#x, tap_flags = 0x%x\n",
+ ifp->if_xname, dev2unit(dev), tp->tap_flags);
+
+ return (EHOSTDOWN);
+ }
+
+ tp->tap_flags &= ~TAP_RWAIT;
+
+ /* sleep until we get a packet */
+ do {
+ IF_DEQUEUE(&ifp->if_snd, m);
+
+ if (m == NULL) {
+ if (flag & O_NONBLOCK) {
+ mtx_unlock(&tp->tap_mtx);
+ return (EWOULDBLOCK);
+ }
+
+ tp->tap_flags |= TAP_RWAIT;
+ error = mtx_sleep(tp, &tp->tap_mtx, PCATCH | (PZERO + 1),
+ "taprd", 0);
+ if (error) {
+ mtx_unlock(&tp->tap_mtx);
+ return (error);
+ }
+ }
+ } while (m == NULL);
+ mtx_unlock(&tp->tap_mtx);
+
+ /* feed packet to bpf */
+ BPF_MTAP(ifp, m);
+
+ /* xfer packet to user space */
+ while ((m != NULL) && (uio->uio_resid > 0) && (error == 0)) {
+ len = min(uio->uio_resid, m->m_len);
+ if (len == 0)
+ break;
+
+ error = uiomove(mtod(m, void *), len, uio);
+ m = m_free(m);
+ }
+
+ if (m != NULL) {
+ TAPDEBUG("%s dropping mbuf, minor = %#x\n", ifp->if_xname,
+ dev2unit(dev));
+ m_freem(m);
+ }
+
+ return (error);
+} /* tapread */
+
+
+/*
+ * tapwrite
+ *
+ * the cdevsw write interface - an atomic write is a packet - or else!
+ */
+static int
+tapwrite(struct cdev *dev, struct uio *uio, int flag)
+{
+ struct ether_header *eh;
+ struct tap_softc *tp = dev->si_drv1;
+ struct ifnet *ifp = tp->tap_ifp;
+ struct mbuf *m;
+
+ TAPDEBUG("%s writting, minor = %#x\n",
+ ifp->if_xname, dev2unit(dev));
+
+ if (uio->uio_resid == 0)
+ return (0);
+
+ if ((uio->uio_resid < 0) || (uio->uio_resid > TAPMRU)) {
+ TAPDEBUG("%s invalid packet len = %zd, minor = %#x\n",
+ ifp->if_xname, uio->uio_resid, dev2unit(dev));
+
+ return (EIO);
+ }
+
+ if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, ETHER_ALIGN,
+ M_PKTHDR)) == NULL) {
+ ifp->if_ierrors ++;
+ return (ENOBUFS);
+ }
+
+ m->m_pkthdr.rcvif = ifp;
+
+ /*
+ * Only pass a unicast frame to ether_input(), if it would actually
+ * have been received by non-virtual hardware.
+ */
+ if (m->m_len < sizeof(struct ether_header)) {
+ m_freem(m);
+ return (0);
+ }
+ eh = mtod(m, struct ether_header *);
+
+ if (eh && (ifp->if_flags & IFF_PROMISC) == 0 &&
+ !ETHER_IS_MULTICAST(eh->ether_dhost) &&
+ bcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) != 0) {
+ m_freem(m);
+ return (0);
+ }
+
+ /* Pass packet up to parent. */
+ (*ifp->if_input)(ifp, m);
+ ifp->if_ipackets ++; /* ibytes are counted in parent */
+
+ return (0);
+} /* tapwrite */
+
+
+/*
+ * tappoll
+ *
+ * the poll interface, this is only useful on reads
+ * really. the write detect always returns true, write never blocks
+ * anyway, it either accepts the packet or drops it
+ */
+static int
+tappoll(struct cdev *dev, int events, struct thread *td)
+{
+ struct tap_softc *tp = dev->si_drv1;
+ struct ifnet *ifp = tp->tap_ifp;
+ int revents = 0;
+
+ TAPDEBUG("%s polling, minor = %#x\n",
+ ifp->if_xname, dev2unit(dev));
+
+ if (events & (POLLIN | POLLRDNORM)) {
+ IFQ_LOCK(&ifp->if_snd);
+ if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
+ TAPDEBUG("%s have data in queue. len = %d, " \
+ "minor = %#x\n", ifp->if_xname,
+ ifp->if_snd.ifq_len, dev2unit(dev));
+
+ revents |= (events & (POLLIN | POLLRDNORM));
+ } else {
+ TAPDEBUG("%s waiting for data, minor = %#x\n",
+ ifp->if_xname, dev2unit(dev));
+
+ selrecord(td, &tp->tap_rsel);
+ }
+ IFQ_UNLOCK(&ifp->if_snd);
+ }
+
+ if (events & (POLLOUT | POLLWRNORM))
+ revents |= (events & (POLLOUT | POLLWRNORM));
+
+ return (revents);
+} /* tappoll */
+
+
+/*
+ * tap_kqfilter
+ *
+ * support for kevent() system call
+ */
+static int
+tapkqfilter(struct cdev *dev, struct knote *kn)
+{
+ struct tap_softc *tp = dev->si_drv1;
+ struct ifnet *ifp = tp->tap_ifp;
+
+ switch (kn->kn_filter) {
+ case EVFILT_READ:
+ TAPDEBUG("%s kqfilter: EVFILT_READ, minor = %#x\n",
+ ifp->if_xname, dev2unit(dev));
+ kn->kn_fop = &tap_read_filterops;
+ break;
+
+ case EVFILT_WRITE:
+ TAPDEBUG("%s kqfilter: EVFILT_WRITE, minor = %#x\n",
+ ifp->if_xname, dev2unit(dev));
+ kn->kn_fop = &tap_write_filterops;
+ break;
+
+ default:
+ TAPDEBUG("%s kqfilter: invalid filter, minor = %#x\n",
+ ifp->if_xname, dev2unit(dev));
+ return (EINVAL);
+ /* NOT REACHED */
+ }
+
+ kn->kn_hook = tp;
+ knlist_add(&tp->tap_rsel.si_note, kn, 0);
+
+ return (0);
+} /* tapkqfilter */
+
+
+/*
+ * tap_kqread
+ *
+ * Return true if there is data in the interface queue
+ */
+static int
+tapkqread(struct knote *kn, long hint)
+{
+ int ret;
+ struct tap_softc *tp = kn->kn_hook;
+ struct cdev *dev = tp->tap_dev;
+ struct ifnet *ifp = tp->tap_ifp;
+
+ if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) {
+ TAPDEBUG("%s have data in queue. len = %d, minor = %#x\n",
+ ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev));
+ ret = 1;
+ } else {
+ TAPDEBUG("%s waiting for data, minor = %#x\n",
+ ifp->if_xname, dev2unit(dev));
+ ret = 0;
+ }
+
+ return (ret);
+} /* tapkqread */
+
+
+/*
+ * tap_kqwrite
+ *
+ * Always can write. Return the MTU in kn->data
+ */
+static int
+tapkqwrite(struct knote *kn, long hint)
+{
+ struct tap_softc *tp = kn->kn_hook;
+ struct ifnet *ifp = tp->tap_ifp;
+
+ kn->kn_data = ifp->if_mtu;
+
+ return (1);
+} /* tapkqwrite */
+
+
+static void
+tapkqdetach(struct knote *kn)
+{
+ struct tap_softc *tp = kn->kn_hook;
+
+ knlist_remove(&tp->tap_rsel.si_note, kn, 0);
+} /* tapkqdetach */
+
diff --git a/freebsd/sys/net/if_tap.h b/freebsd/sys/net/if_tap.h
new file mode 100644
index 00000000..e611884b
--- /dev/null
+++ b/freebsd/sys/net/if_tap.h
@@ -0,0 +1,74 @@
+/*-
+ * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * BASED ON:
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
+ * Nottingham University 1987.
+ */
+
+/*
+ * $FreeBSD$
+ * $Id: if_tap.h,v 0.7 2000/07/12 04:12:51 max Exp $
+ */
+
+#ifndef _NET_IF_TAP_HH_
+#define _NET_IF_TAP_HH_
+
+/* refer to if_tapvar.h for the softc stuff */
+
+/* maximum receive packet size (hard limit) */
+#define TAPMRU 16384
+
+struct tapinfo {
+ int baudrate; /* linespeed */
+ short mtu; /* maximum transmission unit */
+ u_char type; /* ethernet, tokenring, etc. */
+ u_char dummy; /* place holder */
+};
+
+/* ioctl's for get/set debug */
+#define TAPSDEBUG _IOW('t', 90, int)
+#define TAPGDEBUG _IOR('t', 89, int)
+#define TAPSIFINFO _IOW('t', 91, struct tapinfo)
+#define TAPGIFINFO _IOR('t', 92, struct tapinfo)
+#define TAPGIFNAME _IOR('t', 93, struct ifreq)
+
+/* VMware ioctl's */
+#define VMIO_SIOCSIFFLAGS _IOWINT('V', 0)
+#define VMIO_SIOCSKEEP _IO('V', 1)
+#define VMIO_SIOCSIFBR _IO('V', 2)
+#define VMIO_SIOCSLADRF _IO('V', 3)
+
+/* XXX -- unimplemented */
+#define VMIO_SIOCSETMACADDR _IO('V', 4)
+
+/* XXX -- not used? */
+#define VMIO_SIOCPORT _IO('V', 5)
+#define VMIO_SIOCBRIDGE _IO('V', 6)
+#define VMIO_SIOCNETIF _IO('V', 7)
+
+#endif /* !_NET_IF_TAP_HH_ */
diff --git a/freebsd/sys/net/if_tapvar.h b/freebsd/sys/net/if_tapvar.h
new file mode 100644
index 00000000..4a26fd87
--- /dev/null
+++ b/freebsd/sys/net/if_tapvar.h
@@ -0,0 +1,69 @@
+/*-
+ * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * BASED ON:
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (c) 1998 Brian Somers <brian@Awfulhak.org>
+ * All rights reserved.
+ *
+ * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
+ * Nottingham University 1987.
+ */
+
+/*
+ * $FreeBSD$
+ * $Id: if_tapvar.h,v 0.6 2000/07/11 02:16:08 max Exp $
+ */
+
+#ifndef _NET_IF_TAPVAR_HH_
+#define _NET_IF_TAPVAR_HH_
+
+/*
+ * tap_mtx locks tap_flags, tap_pid. tap_next locked with global tapmtx.
+ * Other fields locked by owning subsystems.
+ */
+struct tap_softc {
+ struct ifnet *tap_ifp;
+ u_short tap_flags; /* misc flags */
+#define TAP_OPEN (1 << 0)
+#define TAP_INITED (1 << 1)
+#define TAP_RWAIT (1 << 2)
+#define TAP_ASYNC (1 << 3)
+#define TAP_READY (TAP_OPEN|TAP_INITED)
+#define TAP_VMNET (1 << 4)
+
+ u_int8_t ether_addr[ETHER_ADDR_LEN]; /* ether addr of the remote side */
+
+ pid_t tap_pid; /* PID of process to open */
+ struct sigio *tap_sigio; /* information for async I/O */
+ struct selinfo tap_rsel; /* read select */
+
+ SLIST_ENTRY(tap_softc) tap_next; /* next device in chain */
+ struct cdev *tap_dev;
+ struct mtx tap_mtx; /* per-softc mutex */
+};
+
+#endif /* !_NET_IF_TAPVAR_HH_ */
diff --git a/freebsd/sys/net/if_tun.c b/freebsd/sys/net/if_tun.c
new file mode 100644
index 00000000..7f90fa51
--- /dev/null
+++ b/freebsd/sys/net/if_tun.c
@@ -0,0 +1,1059 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/* $NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
+ * Nottingham University 1987.
+ *
+ * This source may be freely distributed, however I would be interested
+ * in any changes that are made.
+ *
+ * This driver takes packets off the IP i/f and hands them up to a
+ * user process to have its wicked way with. This driver has it's
+ * roots in a similar driver written by Phil Cockcroft (formerly) at
+ * UCL. This driver is based much more on read/write/poll mode of
+ * operation though.
+ *
+ * $FreeBSD$
+ */
+
+#include <freebsd/local/opt_atalk.h>
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_ipx.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/priv.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/jail.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/fcntl.h>
+#include <freebsd/sys/filio.h>
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/ttycom.h>
+#include <freebsd/sys/poll.h>
+#include <freebsd/sys/selinfo.h>
+#include <freebsd/sys/signalvar.h>
+#include <freebsd/sys/filedesc.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/conf.h>
+#include <freebsd/sys/uio.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/random.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_clone.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/netisr.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+#ifdef INET
+#include <freebsd/netinet/in.h>
+#endif
+#include <freebsd/net/bpf.h>
+#include <freebsd/net/if_tun.h>
+
+#include <freebsd/sys/queue.h>
+#include <freebsd/sys/condvar.h>
+
+#include <freebsd/security/mac/mac_framework.h>
+
+/*
+ * tun_list is protected by global tunmtx. Other mutable fields are
+ * protected by tun->tun_mtx, or by their owning subsystem. tun_dev is
+ * static for the duration of a tunnel interface.
+ */
+struct tun_softc {
+ TAILQ_ENTRY(tun_softc) tun_list;
+ struct cdev *tun_dev;
+ u_short tun_flags; /* misc flags */
+#define TUN_OPEN 0x0001
+#define TUN_INITED 0x0002
+#define TUN_RCOLL 0x0004
+#define TUN_IASET 0x0008
+#define TUN_DSTADDR 0x0010
+#define TUN_LMODE 0x0020
+#define TUN_RWAIT 0x0040
+#define TUN_ASYNC 0x0080
+#define TUN_IFHEAD 0x0100
+
+#define TUN_READY (TUN_OPEN | TUN_INITED)
+
+ /*
+ * XXXRW: tun_pid is used to exclusively lock /dev/tun. Is this
+ * actually needed? Can we just return EBUSY if already open?
+ * Problem is that this involved inherent races when a tun device
+ * is handed off from one process to another, as opposed to just
+ * being slightly stale informationally.
+ */
+ pid_t tun_pid; /* owning pid */
+ struct ifnet *tun_ifp; /* the interface */
+ struct sigio *tun_sigio; /* information for async I/O */
+ struct selinfo tun_rsel; /* read select */
+ struct mtx tun_mtx; /* protect mutable softc fields */
+ struct cv tun_cv; /* protect against ref'd dev destroy */
+};
+#define TUN2IFP(sc) ((sc)->tun_ifp)
+
+#define TUNDEBUG if (tundebug) if_printf
+#define TUNNAME "tun"
+
+/*
+ * All mutable global variables in if_tun are locked using tunmtx, with
+ * the exception of tundebug, which is used unlocked, and tunclones,
+ * which is static after setup.
+ */
+static struct mtx tunmtx;
+static MALLOC_DEFINE(M_TUN, TUNNAME, "Tunnel Interface");
+static int tundebug = 0;
+static int tundclone = 1;
+static struct clonedevs *tunclones;
+static TAILQ_HEAD(,tun_softc) tunhead = TAILQ_HEAD_INITIALIZER(tunhead);
+SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
+
+SYSCTL_DECL(_net_link);
+SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0,
+ "IP tunnel software network interface.");
+SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RW, &tundclone, 0,
+ "Enable legacy devfs interface creation.");
+
+TUNABLE_INT("net.link.tun.devfs_cloning", &tundclone);
+
+static void tunclone(void *arg, struct ucred *cred, char *name,
+ int namelen, struct cdev **dev);
+static void tuncreate(const char *name, struct cdev *dev);
+static int tunifioctl(struct ifnet *, u_long, caddr_t);
+static int tuninit(struct ifnet *);
+static int tunmodevent(module_t, int, void *);
+static int tunoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
+ struct route *ro);
+static void tunstart(struct ifnet *);
+
+static int tun_clone_create(struct if_clone *, int, caddr_t);
+static void tun_clone_destroy(struct ifnet *);
+
+IFC_SIMPLE_DECLARE(tun, 0);
+
+static d_open_t tunopen;
+static d_close_t tunclose;
+static d_read_t tunread;
+static d_write_t tunwrite;
+static d_ioctl_t tunioctl;
+static d_poll_t tunpoll;
+static d_kqfilter_t tunkqfilter;
+
+static int tunkqread(struct knote *, long);
+static int tunkqwrite(struct knote *, long);
+static void tunkqdetach(struct knote *);
+
+static struct filterops tun_read_filterops = {
+ .f_isfd = 1,
+ .f_attach = NULL,
+ .f_detach = tunkqdetach,
+ .f_event = tunkqread,
+};
+
+static struct filterops tun_write_filterops = {
+ .f_isfd = 1,
+ .f_attach = NULL,
+ .f_detach = tunkqdetach,
+ .f_event = tunkqwrite,
+};
+
+static struct cdevsw tun_cdevsw = {
+ .d_version = D_VERSION,
+ .d_flags = D_PSEUDO | D_NEEDMINOR,
+ .d_open = tunopen,
+ .d_close = tunclose,
+ .d_read = tunread,
+ .d_write = tunwrite,
+ .d_ioctl = tunioctl,
+ .d_poll = tunpoll,
+ .d_kqfilter = tunkqfilter,
+ .d_name = TUNNAME,
+};
+
+static int
+tun_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+ struct cdev *dev;
+ int i;
+
+ /* find any existing device, or allocate new unit number */
+ i = clone_create(&tunclones, &tun_cdevsw, &unit, &dev, 0);
+ if (i) {
+ /* No preexisting struct cdev *, create one */
+ dev = make_dev(&tun_cdevsw, unit,
+ UID_UUCP, GID_DIALER, 0600, "%s%d", ifc->ifc_name, unit);
+ }
+ tuncreate(ifc->ifc_name, dev);
+
+ return (0);
+}
+
+static void
+tunclone(void *arg, struct ucred *cred, char *name, int namelen,
+ struct cdev **dev)
+{
+ char devname[SPECNAMELEN + 1];
+ int u, i, append_unit;
+
+ if (*dev != NULL)
+ return;
+
+ /*
+ * If tun cloning is enabled, only the superuser can create an
+ * interface.
+ */
+ if (!tundclone || priv_check_cred(cred, PRIV_NET_IFCREATE, 0) != 0)
+ return;
+
+ if (strcmp(name, TUNNAME) == 0) {
+ u = -1;
+ } else if (dev_stdclone(name, NULL, TUNNAME, &u) != 1)
+ return; /* Don't recognise the name */
+ if (u != -1 && u > IF_MAXUNIT)
+ return; /* Unit number too high */
+
+ if (u == -1)
+ append_unit = 1;
+ else
+ append_unit = 0;
+
+ CURVNET_SET(CRED_TO_VNET(cred));
+ /* find any existing device, or allocate new unit number */
+ i = clone_create(&tunclones, &tun_cdevsw, &u, dev, 0);
+ if (i) {
+ if (append_unit) {
+ namelen = snprintf(devname, sizeof(devname), "%s%d", name,
+ u);
+ name = devname;
+ }
+ /* No preexisting struct cdev *, create one */
+ *dev = make_dev_credf(MAKEDEV_REF, &tun_cdevsw, u, cred,
+ UID_UUCP, GID_DIALER, 0600, "%s", name);
+ }
+
+ if_clone_create(name, namelen, NULL);
+ CURVNET_RESTORE();
+}
+
+static void
+tun_destroy(struct tun_softc *tp)
+{
+ struct cdev *dev;
+
+ /* Unlocked read. */
+ mtx_lock(&tp->tun_mtx);
+ if ((tp->tun_flags & TUN_OPEN) != 0)
+ cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx);
+ else
+ mtx_unlock(&tp->tun_mtx);
+
+ CURVNET_SET(TUN2IFP(tp)->if_vnet);
+ dev = tp->tun_dev;
+ bpfdetach(TUN2IFP(tp));
+ if_detach(TUN2IFP(tp));
+ if_free(TUN2IFP(tp));
+ destroy_dev(dev);
+ knlist_destroy(&tp->tun_rsel.si_note);
+ mtx_destroy(&tp->tun_mtx);
+ cv_destroy(&tp->tun_cv);
+ free(tp, M_TUN);
+ CURVNET_RESTORE();
+}
+
+static void
+tun_clone_destroy(struct ifnet *ifp)
+{
+ struct tun_softc *tp = ifp->if_softc;
+
+ mtx_lock(&tunmtx);
+ TAILQ_REMOVE(&tunhead, tp, tun_list);
+ mtx_unlock(&tunmtx);
+ tun_destroy(tp);
+}
+
+static int
+tunmodevent(module_t mod, int type, void *data)
+{
+ static eventhandler_tag tag;
+ struct tun_softc *tp;
+
+ switch (type) {
+ case MOD_LOAD:
+ mtx_init(&tunmtx, "tunmtx", NULL, MTX_DEF);
+ clone_setup(&tunclones);
+ tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000);
+ if (tag == NULL)
+ return (ENOMEM);
+ if_clone_attach(&tun_cloner);
+ break;
+ case MOD_UNLOAD:
+ if_clone_detach(&tun_cloner);
+ EVENTHANDLER_DEREGISTER(dev_clone, tag);
+ drain_dev_clone_events();
+
+ mtx_lock(&tunmtx);
+ while ((tp = TAILQ_FIRST(&tunhead)) != NULL) {
+ TAILQ_REMOVE(&tunhead, tp, tun_list);
+ mtx_unlock(&tunmtx);
+ tun_destroy(tp);
+ mtx_lock(&tunmtx);
+ }
+ mtx_unlock(&tunmtx);
+ clone_cleanup(&tunclones);
+ mtx_destroy(&tunmtx);
+ break;
+ default:
+ return EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static moduledata_t tun_mod = {
+ "if_tun",
+ tunmodevent,
+ 0
+};
+
+DECLARE_MODULE(if_tun, tun_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+
+static void
+tunstart(struct ifnet *ifp)
+{
+ struct tun_softc *tp = ifp->if_softc;
+ struct mbuf *m;
+
+ TUNDEBUG(ifp,"%s starting\n", ifp->if_xname);
+ if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ IFQ_LOCK(&ifp->if_snd);
+ IFQ_POLL_NOLOCK(&ifp->if_snd, m);
+ if (m == NULL) {
+ IFQ_UNLOCK(&ifp->if_snd);
+ return;
+ }
+ IFQ_UNLOCK(&ifp->if_snd);
+ }
+
+ mtx_lock(&tp->tun_mtx);
+ if (tp->tun_flags & TUN_RWAIT) {
+ tp->tun_flags &= ~TUN_RWAIT;
+ wakeup(tp);
+ }
+ selwakeuppri(&tp->tun_rsel, PZERO + 1);
+ KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
+ if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) {
+ mtx_unlock(&tp->tun_mtx);
+ pgsigio(&tp->tun_sigio, SIGIO, 0);
+ } else
+ mtx_unlock(&tp->tun_mtx);
+}
+
+/* XXX: should return an error code so it can fail. */
+static void
+tuncreate(const char *name, struct cdev *dev)
+{
+ struct tun_softc *sc;
+ struct ifnet *ifp;
+
+ dev->si_flags &= ~SI_CHEAPCLONE;
+
+ sc = malloc(sizeof(*sc), M_TUN, M_WAITOK | M_ZERO);
+ mtx_init(&sc->tun_mtx, "tun_mtx", NULL, MTX_DEF);
+ cv_init(&sc->tun_cv, "tun_condvar");
+ sc->tun_flags = TUN_INITED;
+ sc->tun_dev = dev;
+ mtx_lock(&tunmtx);
+ TAILQ_INSERT_TAIL(&tunhead, sc, tun_list);
+ mtx_unlock(&tunmtx);
+
+ ifp = sc->tun_ifp = if_alloc(IFT_PPP);
+ if (ifp == NULL)
+ panic("%s%d: failed to if_alloc() interface.\n",
+ name, dev2unit(dev));
+ if_initname(ifp, name, dev2unit(dev));
+ ifp->if_mtu = TUNMTU;
+ ifp->if_ioctl = tunifioctl;
+ ifp->if_output = tunoutput;
+ ifp->if_start = tunstart;
+ ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
+ ifp->if_softc = sc;
+ IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
+ ifp->if_snd.ifq_drv_maxlen = 0;
+ IFQ_SET_READY(&ifp->if_snd);
+ knlist_init_mtx(&sc->tun_rsel.si_note, &sc->tun_mtx);
+ ifp->if_capabilities |= IFCAP_LINKSTATE;
+ ifp->if_capenable |= IFCAP_LINKSTATE;
+
+ if_attach(ifp);
+ bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
+ dev->si_drv1 = sc;
+ TUNDEBUG(ifp, "interface %s is created, minor = %#x\n",
+ ifp->if_xname, dev2unit(dev));
+}
+
+static int
+tunopen(struct cdev *dev, int flag, int mode, struct thread *td)
+{
+ struct ifnet *ifp;
+ struct tun_softc *tp;
+
+ /*
+ * XXXRW: Non-atomic test and set of dev->si_drv1 requires
+ * synchronization.
+ */
+ tp = dev->si_drv1;
+ if (!tp) {
+ tuncreate(TUNNAME, dev);
+ tp = dev->si_drv1;
+ }
+
+ /*
+ * XXXRW: This use of tun_pid is subject to error due to the
+ * fact that a reference to the tunnel can live beyond the
+ * death of the process that created it. Can we replace this
+ * with a simple busy flag?
+ */
+ mtx_lock(&tp->tun_mtx);
+ if (tp->tun_pid != 0 && tp->tun_pid != td->td_proc->p_pid) {
+ mtx_unlock(&tp->tun_mtx);
+ return (EBUSY);
+ }
+ tp->tun_pid = td->td_proc->p_pid;
+
+ tp->tun_flags |= TUN_OPEN;
+ ifp = TUN2IFP(tp);
+ if_link_state_change(ifp, LINK_STATE_UP);
+ TUNDEBUG(ifp, "open\n");
+ mtx_unlock(&tp->tun_mtx);
+
+ return (0);
+}
+
+/*
+ * tunclose - close the device - mark i/f down & delete
+ * routing info
+ */
+static int
+tunclose(struct cdev *dev, int foo, int bar, struct thread *td)
+{
+ struct tun_softc *tp;
+ struct ifnet *ifp;
+
+ tp = dev->si_drv1;
+ ifp = TUN2IFP(tp);
+
+ mtx_lock(&tp->tun_mtx);
+ tp->tun_flags &= ~TUN_OPEN;
+ tp->tun_pid = 0;
+
+ /*
+ * junk all pending output
+ */
+ CURVNET_SET(ifp->if_vnet);
+ IFQ_PURGE(&ifp->if_snd);
+
+ if (ifp->if_flags & IFF_UP) {
+ mtx_unlock(&tp->tun_mtx);
+ if_down(ifp);
+ mtx_lock(&tp->tun_mtx);
+ }
+
+ /* Delete all addresses and routes which reference this interface. */
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ struct ifaddr *ifa;
+
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ mtx_unlock(&tp->tun_mtx);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ /* deal w/IPv4 PtP destination; unlocked read */
+ if (ifa->ifa_addr->sa_family == AF_INET) {
+ rtinit(ifa, (int)RTM_DELETE,
+ tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0);
+ } else {
+ rtinit(ifa, (int)RTM_DELETE, 0);
+ }
+ }
+ if_purgeaddrs(ifp);
+ mtx_lock(&tp->tun_mtx);
+ }
+ if_link_state_change(ifp, LINK_STATE_DOWN);
+ CURVNET_RESTORE();
+
+ funsetown(&tp->tun_sigio);
+ selwakeuppri(&tp->tun_rsel, PZERO + 1);
+ KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
+ TUNDEBUG (ifp, "closed\n");
+
+ cv_broadcast(&tp->tun_cv);
+ mtx_unlock(&tp->tun_mtx);
+ return (0);
+}
+
+static int
+tuninit(struct ifnet *ifp)
+{
+ struct tun_softc *tp = ifp->if_softc;
+#ifdef INET
+ struct ifaddr *ifa;
+#endif
+ int error = 0;
+
+ TUNDEBUG(ifp, "tuninit\n");
+
+ mtx_lock(&tp->tun_mtx);
+ ifp->if_flags |= IFF_UP;
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ getmicrotime(&ifp->if_lastchange);
+
+#ifdef INET
+ if_addr_rlock(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr->sa_family == AF_INET) {
+ struct sockaddr_in *si;
+
+ si = (struct sockaddr_in *)ifa->ifa_addr;
+ if (si->sin_addr.s_addr)
+ tp->tun_flags |= TUN_IASET;
+
+ si = (struct sockaddr_in *)ifa->ifa_dstaddr;
+ if (si && si->sin_addr.s_addr)
+ tp->tun_flags |= TUN_DSTADDR;
+ }
+ }
+ if_addr_runlock(ifp);
+#endif
+ mtx_unlock(&tp->tun_mtx);
+ return (error);
+}
+
+/*
+ * Process an ioctl request.
+ */
+static int
+tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ struct ifreq *ifr = (struct ifreq *)data;
+ struct tun_softc *tp = ifp->if_softc;
+ struct ifstat *ifs;
+ int error = 0;
+
+ switch(cmd) {
+ case SIOCGIFSTATUS:
+ ifs = (struct ifstat *)data;
+ mtx_lock(&tp->tun_mtx);
+ if (tp->tun_pid)
+ sprintf(ifs->ascii + strlen(ifs->ascii),
+ "\tOpened by PID %d\n", tp->tun_pid);
+ mtx_unlock(&tp->tun_mtx);
+ break;
+ case SIOCSIFADDR:
+ error = tuninit(ifp);
+ TUNDEBUG(ifp, "address set, error=%d\n", error);
+ break;
+ case SIOCSIFDSTADDR:
+ error = tuninit(ifp);
+ TUNDEBUG(ifp, "destination address set, error=%d\n", error);
+ break;
+ case SIOCSIFMTU:
+ ifp->if_mtu = ifr->ifr_mtu;
+ TUNDEBUG(ifp, "mtu set\n");
+ break;
+ case SIOCSIFFLAGS:
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ break;
+ default:
+ error = EINVAL;
+ }
+ return (error);
+}
+
+/*
+ * tunoutput - queue packets from higher level ready to put out.
+ */
+static int
+tunoutput(
+ struct ifnet *ifp,
+ struct mbuf *m0,
+ struct sockaddr *dst,
+ struct route *ro)
+{
+ struct tun_softc *tp = ifp->if_softc;
+ u_short cached_tun_flags;
+ int error;
+ u_int32_t af;
+
+ TUNDEBUG (ifp, "tunoutput\n");
+
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m0);
+ if (error) {
+ m_freem(m0);
+ return (error);
+ }
+#endif
+
+ /* Could be unlocked read? */
+ mtx_lock(&tp->tun_mtx);
+ cached_tun_flags = tp->tun_flags;
+ mtx_unlock(&tp->tun_mtx);
+ if ((cached_tun_flags & TUN_READY) != TUN_READY) {
+ TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
+ m_freem (m0);
+ return (EHOSTDOWN);
+ }
+
+ if ((ifp->if_flags & IFF_UP) != IFF_UP) {
+ m_freem (m0);
+ return (EHOSTDOWN);
+ }
+
+ /* BPF writes need to be handled specially. */
+ if (dst->sa_family == AF_UNSPEC) {
+ bcopy(dst->sa_data, &af, sizeof(af));
+ dst->sa_family = af;
+ }
+
+ if (bpf_peers_present(ifp->if_bpf)) {
+ af = dst->sa_family;
+ bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0);
+ }
+
+ /* prepend sockaddr? this may abort if the mbuf allocation fails */
+ if (cached_tun_flags & TUN_LMODE) {
+ /* allocate space for sockaddr */
+ M_PREPEND(m0, dst->sa_len, M_DONTWAIT);
+
+ /* if allocation failed drop packet */
+ if (m0 == NULL) {
+ ifp->if_iqdrops++;
+ ifp->if_oerrors++;
+ return (ENOBUFS);
+ } else {
+ bcopy(dst, m0->m_data, dst->sa_len);
+ }
+ }
+
+ if (cached_tun_flags & TUN_IFHEAD) {
+ /* Prepend the address family */
+ M_PREPEND(m0, 4, M_DONTWAIT);
+
+ /* if allocation failed drop packet */
+ if (m0 == NULL) {
+ ifp->if_iqdrops++;
+ ifp->if_oerrors++;
+ return (ENOBUFS);
+ } else
+ *(u_int32_t *)m0->m_data = htonl(dst->sa_family);
+ } else {
+#ifdef INET
+ if (dst->sa_family != AF_INET)
+#endif
+ {
+ m_freem(m0);
+ return (EAFNOSUPPORT);
+ }
+ }
+
+ error = (ifp->if_transmit)(ifp, m0);
+ if (error) {
+ ifp->if_collisions++;
+ return (ENOBUFS);
+ }
+ ifp->if_opackets++;
+ return (0);
+}
+
+/*
+ * the cdevsw interface is now pretty minimal.
+ */
+static int
+tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
+{
+ int error;
+ struct tun_softc *tp = dev->si_drv1;
+ struct tuninfo *tunp;
+
+ switch (cmd) {
+ case TUNSIFINFO:
+ tunp = (struct tuninfo *)data;
+ if (tunp->mtu < IF_MINMTU)
+ return (EINVAL);
+ if (TUN2IFP(tp)->if_mtu != tunp->mtu) {
+ error = priv_check(td, PRIV_NET_SETIFMTU);
+ if (error)
+ return (error);
+ }
+ mtx_lock(&tp->tun_mtx);
+ TUN2IFP(tp)->if_mtu = tunp->mtu;
+ TUN2IFP(tp)->if_type = tunp->type;
+ TUN2IFP(tp)->if_baudrate = tunp->baudrate;
+ mtx_unlock(&tp->tun_mtx);
+ break;
+ case TUNGIFINFO:
+ tunp = (struct tuninfo *)data;
+ mtx_lock(&tp->tun_mtx);
+ tunp->mtu = TUN2IFP(tp)->if_mtu;
+ tunp->type = TUN2IFP(tp)->if_type;
+ tunp->baudrate = TUN2IFP(tp)->if_baudrate;
+ mtx_unlock(&tp->tun_mtx);
+ break;
+ case TUNSDEBUG:
+ tundebug = *(int *)data;
+ break;
+ case TUNGDEBUG:
+ *(int *)data = tundebug;
+ break;
+ case TUNSLMODE:
+ mtx_lock(&tp->tun_mtx);
+ if (*(int *)data) {
+ tp->tun_flags |= TUN_LMODE;
+ tp->tun_flags &= ~TUN_IFHEAD;
+ } else
+ tp->tun_flags &= ~TUN_LMODE;
+ mtx_unlock(&tp->tun_mtx);
+ break;
+ case TUNSIFHEAD:
+ mtx_lock(&tp->tun_mtx);
+ if (*(int *)data) {
+ tp->tun_flags |= TUN_IFHEAD;
+ tp->tun_flags &= ~TUN_LMODE;
+ } else
+ tp->tun_flags &= ~TUN_IFHEAD;
+ mtx_unlock(&tp->tun_mtx);
+ break;
+ case TUNGIFHEAD:
+ mtx_lock(&tp->tun_mtx);
+ *(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0;
+ mtx_unlock(&tp->tun_mtx);
+ break;
+ case TUNSIFMODE:
+ /* deny this if UP */
+ if (TUN2IFP(tp)->if_flags & IFF_UP)
+ return(EBUSY);
+
+ switch (*(int *)data & ~IFF_MULTICAST) {
+ case IFF_POINTOPOINT:
+ case IFF_BROADCAST:
+ mtx_lock(&tp->tun_mtx);
+ TUN2IFP(tp)->if_flags &=
+ ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
+ TUN2IFP(tp)->if_flags |= *(int *)data;
+ mtx_unlock(&tp->tun_mtx);
+ break;
+ default:
+ return(EINVAL);
+ }
+ break;
+ case TUNSIFPID:
+ mtx_lock(&tp->tun_mtx);
+ tp->tun_pid = curthread->td_proc->p_pid;
+ mtx_unlock(&tp->tun_mtx);
+ break;
+ case FIONBIO:
+ break;
+ case FIOASYNC:
+ mtx_lock(&tp->tun_mtx);
+ if (*(int *)data)
+ tp->tun_flags |= TUN_ASYNC;
+ else
+ tp->tun_flags &= ~TUN_ASYNC;
+ mtx_unlock(&tp->tun_mtx);
+ break;
+ case FIONREAD:
+ if (!IFQ_IS_EMPTY(&TUN2IFP(tp)->if_snd)) {
+ struct mbuf *mb;
+ IFQ_LOCK(&TUN2IFP(tp)->if_snd);
+ IFQ_POLL_NOLOCK(&TUN2IFP(tp)->if_snd, mb);
+ for (*(int *)data = 0; mb != NULL; mb = mb->m_next)
+ *(int *)data += mb->m_len;
+ IFQ_UNLOCK(&TUN2IFP(tp)->if_snd);
+ } else
+ *(int *)data = 0;
+ break;
+ case FIOSETOWN:
+ return (fsetown(*(int *)data, &tp->tun_sigio));
+
+ case FIOGETOWN:
+ *(int *)data = fgetown(&tp->tun_sigio);
+ return (0);
+
+ /* This is deprecated, FIOSETOWN should be used instead. */
+ case TIOCSPGRP:
+ return (fsetown(-(*(int *)data), &tp->tun_sigio));
+
+ /* This is deprecated, FIOGETOWN should be used instead. */
+ case TIOCGPGRP:
+ *(int *)data = -fgetown(&tp->tun_sigio);
+ return (0);
+
+ default:
+ return (ENOTTY);
+ }
+ return (0);
+}
+
+/*
+ * The cdevsw read interface - reads a packet at a time, or at
+ * least as much of a packet as can be read.
+ */
+static int
+tunread(struct cdev *dev, struct uio *uio, int flag)
+{
+ struct tun_softc *tp = dev->si_drv1;
+ struct ifnet *ifp = TUN2IFP(tp);
+ struct mbuf *m;
+ int error=0, len;
+
+ TUNDEBUG (ifp, "read\n");
+ mtx_lock(&tp->tun_mtx);
+ if ((tp->tun_flags & TUN_READY) != TUN_READY) {
+ mtx_unlock(&tp->tun_mtx);
+ TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
+ return (EHOSTDOWN);
+ }
+
+ tp->tun_flags &= ~TUN_RWAIT;
+
+ do {
+ IFQ_DEQUEUE(&ifp->if_snd, m);
+ if (m == NULL) {
+ if (flag & O_NONBLOCK) {
+ mtx_unlock(&tp->tun_mtx);
+ return (EWOULDBLOCK);
+ }
+ tp->tun_flags |= TUN_RWAIT;
+ error = mtx_sleep(tp, &tp->tun_mtx, PCATCH | (PZERO + 1),
+ "tunread", 0);
+ if (error != 0) {
+ mtx_unlock(&tp->tun_mtx);
+ return (error);
+ }
+ }
+ } while (m == NULL);
+ mtx_unlock(&tp->tun_mtx);
+
+ while (m && uio->uio_resid > 0 && error == 0) {
+ len = min(uio->uio_resid, m->m_len);
+ if (len != 0)
+ error = uiomove(mtod(m, void *), len, uio);
+ m = m_free(m);
+ }
+
+ if (m) {
+ TUNDEBUG(ifp, "Dropping mbuf\n");
+ m_freem(m);
+ }
+ return (error);
+}
+
+/*
+ * the cdevsw write interface - an atomic write is a packet - or else!
+ */
+static int
+tunwrite(struct cdev *dev, struct uio *uio, int flag)
+{
+ struct tun_softc *tp = dev->si_drv1;
+ struct ifnet *ifp = TUN2IFP(tp);
+ struct mbuf *m;
+ int error = 0;
+ uint32_t family;
+ int isr;
+
+ TUNDEBUG(ifp, "tunwrite\n");
+
+ if ((ifp->if_flags & IFF_UP) != IFF_UP)
+ /* ignore silently */
+ return (0);
+
+ if (uio->uio_resid == 0)
+ return (0);
+
+ if (uio->uio_resid < 0 || uio->uio_resid > TUNMRU) {
+ TUNDEBUG(ifp, "len=%zd!\n", uio->uio_resid);
+ return (EIO);
+ }
+
+ if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, 0, M_PKTHDR)) == NULL) {
+ ifp->if_ierrors++;
+ return (error);
+ }
+
+ m->m_pkthdr.rcvif = ifp;
+#ifdef MAC
+ mac_ifnet_create_mbuf(ifp, m);
+#endif
+
+ /* Could be unlocked read? */
+ mtx_lock(&tp->tun_mtx);
+ if (tp->tun_flags & TUN_IFHEAD) {
+ mtx_unlock(&tp->tun_mtx);
+ if (m->m_len < sizeof(family) &&
+ (m = m_pullup(m, sizeof(family))) == NULL)
+ return (ENOBUFS);
+ family = ntohl(*mtod(m, u_int32_t *));
+ m_adj(m, sizeof(family));
+ } else {
+ mtx_unlock(&tp->tun_mtx);
+ family = AF_INET;
+ }
+
+ BPF_MTAP2(ifp, &family, sizeof(family), m);
+
+ switch (family) {
+#ifdef INET
+ case AF_INET:
+ isr = NETISR_IP;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ isr = NETISR_IPV6;
+ break;
+#endif
+#ifdef IPX
+ case AF_IPX:
+ isr = NETISR_IPX;
+ break;
+#endif
+#ifdef NETATALK
+ case AF_APPLETALK:
+ isr = NETISR_ATALK2;
+ break;
+#endif
+ default:
+ m_freem(m);
+ return (EAFNOSUPPORT);
+ }
+ /* First chunk of an mbuf contains good junk */
+ if (harvest.point_to_point)
+ random_harvest(m, 16, 3, 0, RANDOM_NET);
+ ifp->if_ibytes += m->m_pkthdr.len;
+ ifp->if_ipackets++;
+ CURVNET_SET(ifp->if_vnet);
+ netisr_dispatch(isr, m);
+ CURVNET_RESTORE();
+ return (0);
+}
+
+/*
+ * tunpoll - the poll interface, this is only useful on reads
+ * really. The write detect always returns true, write never blocks
+ * anyway, it either accepts the packet or drops it.
+ */
+static int
+tunpoll(struct cdev *dev, int events, struct thread *td)
+{
+ struct tun_softc *tp = dev->si_drv1;
+ struct ifnet *ifp = TUN2IFP(tp);
+ int revents = 0;
+ struct mbuf *m;
+
+ TUNDEBUG(ifp, "tunpoll\n");
+
+ if (events & (POLLIN | POLLRDNORM)) {
+ IFQ_LOCK(&ifp->if_snd);
+ IFQ_POLL_NOLOCK(&ifp->if_snd, m);
+ if (m != NULL) {
+ TUNDEBUG(ifp, "tunpoll q=%d\n", ifp->if_snd.ifq_len);
+ revents |= events & (POLLIN | POLLRDNORM);
+ } else {
+ TUNDEBUG(ifp, "tunpoll waiting\n");
+ selrecord(td, &tp->tun_rsel);
+ }
+ IFQ_UNLOCK(&ifp->if_snd);
+ }
+ if (events & (POLLOUT | POLLWRNORM))
+ revents |= events & (POLLOUT | POLLWRNORM);
+
+ return (revents);
+}
+
+/*
+ * tunkqfilter - support for the kevent() system call.
+ */
+static int
+tunkqfilter(struct cdev *dev, struct knote *kn)
+{
+ struct tun_softc *tp = dev->si_drv1;
+ struct ifnet *ifp = TUN2IFP(tp);
+
+ switch(kn->kn_filter) {
+ case EVFILT_READ:
+ TUNDEBUG(ifp, "%s kqfilter: EVFILT_READ, minor = %#x\n",
+ ifp->if_xname, dev2unit(dev));
+ kn->kn_fop = &tun_read_filterops;
+ break;
+
+ case EVFILT_WRITE:
+ TUNDEBUG(ifp, "%s kqfilter: EVFILT_WRITE, minor = %#x\n",
+ ifp->if_xname, dev2unit(dev));
+ kn->kn_fop = &tun_write_filterops;
+ break;
+
+ default:
+ TUNDEBUG(ifp, "%s kqfilter: invalid filter, minor = %#x\n",
+ ifp->if_xname, dev2unit(dev));
+ return(EINVAL);
+ }
+
+ kn->kn_hook = tp;
+ knlist_add(&tp->tun_rsel.si_note, kn, 0);
+
+ return (0);
+}
+
+/*
+ * Return true of there is data in the interface queue.
+ */
+static int
+tunkqread(struct knote *kn, long hint)
+{
+ int ret;
+ struct tun_softc *tp = kn->kn_hook;
+ struct cdev *dev = tp->tun_dev;
+ struct ifnet *ifp = TUN2IFP(tp);
+
+ if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) {
+ TUNDEBUG(ifp,
+ "%s have data in the queue. Len = %d, minor = %#x\n",
+ ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev));
+ ret = 1;
+ } else {
+ TUNDEBUG(ifp,
+ "%s waiting for data, minor = %#x\n", ifp->if_xname,
+ dev2unit(dev));
+ ret = 0;
+ }
+
+ return (ret);
+}
+
+/*
+ * Always can write, always return MTU in kn->data.
+ */
+static int
+tunkqwrite(struct knote *kn, long hint)
+{
+ struct tun_softc *tp = kn->kn_hook;
+ struct ifnet *ifp = TUN2IFP(tp);
+
+ kn->kn_data = ifp->if_mtu;
+
+ return (1);
+}
+
+static void
+tunkqdetach(struct knote *kn)
+{
+ struct tun_softc *tp = kn->kn_hook;
+
+ knlist_remove(&tp->tun_rsel.si_note, kn, 0);
+}
diff --git a/freebsd/sys/net/if_tun.h b/freebsd/sys/net/if_tun.h
new file mode 100644
index 00000000..29718cda
--- /dev/null
+++ b/freebsd/sys/net/if_tun.h
@@ -0,0 +1,48 @@
+/* $NetBSD: if_tun.h,v 1.5 1994/06/29 06:36:27 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
+ * Nottingham University 1987.
+ *
+ * This source may be freely distributed, however I would be interested
+ * in any changes that are made.
+ *
+ * This driver takes packets off the IP i/f and hands them up to a
+ * user process to have its wicked way with. This driver has it's
+ * roots in a similar driver written by Phil Cockcroft (formerly) at
+ * UCL. This driver is based much more on read/write/select mode of
+ * operation though.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NET_IF_TUN_HH_
+#define _NET_IF_TUN_HH_
+
+/* Refer to if_tunvar.h for the softc stuff */
+
+/* Maximum transmit packet size (default) */
+#define TUNMTU 1500
+
+/* Maximum receive packet size (hard limit) */
+#define TUNMRU 16384
+
+struct tuninfo {
+ int baudrate; /* linespeed */
+ short mtu; /* maximum transmission unit */
+ u_char type; /* ethernet, tokenring, etc. */
+ u_char dummy; /* place holder */
+};
+
+/* ioctl's for get/set debug */
+#define TUNSDEBUG _IOW('t', 90, int)
+#define TUNGDEBUG _IOR('t', 89, int)
+#define TUNSIFINFO _IOW('t', 91, struct tuninfo)
+#define TUNGIFINFO _IOR('t', 92, struct tuninfo)
+#define TUNSLMODE _IOW('t', 93, int)
+#define TUNSIFMODE _IOW('t', 94, int)
+#define TUNSIFPID _IO('t', 95)
+#define TUNSIFHEAD _IOW('t', 96, int)
+#define TUNGIFHEAD _IOR('t', 97, int)
+
+#endif /* !_NET_IF_TUN_HH_ */
diff --git a/freebsd/sys/net/if_types.h b/freebsd/sys/net/if_types.h
new file mode 100644
index 00000000..1d4f2b2a
--- /dev/null
+++ b/freebsd/sys/net/if_types.h
@@ -0,0 +1,2 @@
+#include <freebsd/bsd.h>
+#include <freebsd/net/if_types.h>
diff --git a/freebsd/sys/net/if_var.h b/freebsd/sys/net/if_var.h
new file mode 100644
index 00000000..913d62a9
--- /dev/null
+++ b/freebsd/sys/net/if_var.h
@@ -0,0 +1,904 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * From: @(#)if.h 8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NET_IF_VAR_HH_
+#define _NET_IF_VAR_HH_
+
+/*
+ * Structures defining a network interface, providing a packet
+ * transport mechanism (ala level 0 of the PUP protocols).
+ *
+ * Each interface accepts output datagrams of a specified maximum
+ * length, and provides higher level routines with input datagrams
+ * received from its medium.
+ *
+ * Output occurs when the routine if_output is called, with three parameters:
+ * (*ifp->if_output)(ifp, m, dst, rt)
+ * Here m is the mbuf chain to be sent and dst is the destination address.
+ * The output routine encapsulates the supplied datagram if necessary,
+ * and then transmits it on its medium.
+ *
+ * On input, each interface unwraps the data received by it, and either
+ * places it on the input queue of an internetwork datagram routine
+ * and posts the associated software interrupt, or passes the datagram to a raw
+ * packet input routine.
+ *
+ * Routines exist for locating interfaces by their addresses
+ * or for locating an interface on a certain network, as well as more general
+ * routing and gateway routines maintaining information used to locate
+ * interfaces. These routines live in the files if.c and route.c
+ */
+
+#ifdef __STDC__
+/*
+ * Forward structure declarations for function prototypes [sic].
+ */
+struct mbuf;
+struct thread;
+struct rtentry;
+struct rt_addrinfo;
+struct socket;
+struct ether_header;
+struct carp_if;
+struct ifvlantrunk;
+struct route;
+struct vnet;
+#endif
+
+#include <freebsd/sys/queue.h> /* get TAILQ macros */
+
+#ifdef _KERNEL
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/eventhandler.h>
+#include <freebsd/sys/buf_ring.h>
+#include <freebsd/net/vnet.h>
+#endif /* _KERNEL */
+#include <freebsd/sys/lock.h> /* XXX */
+#include <freebsd/sys/mutex.h> /* XXX */
+#include <freebsd/sys/rwlock.h> /* XXX */
+#include <freebsd/sys/sx.h> /* XXX */
+#include <freebsd/sys/event.h> /* XXX */
+#include <freebsd/sys/_task.h>
+
+#define IF_DUNIT_NONE -1
+
+#include <freebsd/altq/if_altq.h>
+
+TAILQ_HEAD(ifnethead, ifnet); /* we use TAILQs so that the order of */
+TAILQ_HEAD(ifaddrhead, ifaddr); /* instantiation is preserved in the list */
+TAILQ_HEAD(ifprefixhead, ifprefix);
+TAILQ_HEAD(ifmultihead, ifmultiaddr);
+TAILQ_HEAD(ifgrouphead, ifg_group);
+
+/*
+ * Structure defining a queue for a network interface.
+ */
+struct ifqueue {
+ struct mbuf *ifq_head;
+ struct mbuf *ifq_tail;
+ int ifq_len;
+ int ifq_maxlen;
+ int ifq_drops;
+ struct mtx ifq_mtx;
+};
+
+/*
+ * Structure defining a network interface.
+ *
+ * (Would like to call this struct ``if'', but C isn't PL/1.)
+ */
+
+struct ifnet {
+ void *if_softc; /* pointer to driver state */
+ void *if_l2com; /* pointer to protocol bits */
+ struct vnet *if_vnet; /* pointer to network stack instance */
+ TAILQ_ENTRY(ifnet) if_link; /* all struct ifnets are chained */
+ char if_xname[IFNAMSIZ]; /* external name (name + unit) */
+ const char *if_dname; /* driver name */
+ int if_dunit; /* unit or IF_DUNIT_NONE */
+ u_int if_refcount; /* reference count */
+ struct ifaddrhead if_addrhead; /* linked list of addresses per if */
+ /*
+ * if_addrhead is the list of all addresses associated to
+ * an interface.
+ * Some code in the kernel assumes that first element
+ * of the list has type AF_LINK, and contains sockaddr_dl
+ * addresses which store the link-level address and the name
+ * of the interface.
+ * However, access to the AF_LINK address through this
+ * field is deprecated. Use if_addr or ifaddr_byindex() instead.
+ */
+ int if_pcount; /* number of promiscuous listeners */
+ struct carp_if *if_carp; /* carp interface structure */
+ struct bpf_if *if_bpf; /* packet filter structure */
+ u_short if_index; /* numeric abbreviation for this if */
+ short if_timer; /* time 'til if_watchdog called */
+ struct ifvlantrunk *if_vlantrunk; /* pointer to 802.1q data */
+ int if_flags; /* up/down, broadcast, etc. */
+ int if_capabilities; /* interface features & capabilities */
+ int if_capenable; /* enabled features & capabilities */
+ void *if_linkmib; /* link-type-specific MIB data */
+ size_t if_linkmiblen; /* length of above data */
+ struct if_data if_data;
+ struct ifmultihead if_multiaddrs; /* multicast addresses configured */
+ int if_amcount; /* number of all-multicast requests */
+/* procedure handles */
+ int (*if_output) /* output routine (enqueue) */
+ (struct ifnet *, struct mbuf *, struct sockaddr *,
+ struct route *);
+ void (*if_input) /* input routine (from h/w driver) */
+ (struct ifnet *, struct mbuf *);
+ void (*if_start) /* initiate output routine */
+ (struct ifnet *);
+ int (*if_ioctl) /* ioctl routine */
+ (struct ifnet *, u_long, caddr_t);
+ void (*if_watchdog) /* timer routine */
+ (struct ifnet *);
+ void (*if_init) /* Init routine */
+ (void *);
+ int (*if_resolvemulti) /* validate/resolve multicast */
+ (struct ifnet *, struct sockaddr **, struct sockaddr *);
+ void (*if_qflush) /* flush any queues */
+ (struct ifnet *);
+ int (*if_transmit) /* initiate output routine */
+ (struct ifnet *, struct mbuf *);
+ void (*if_reassign) /* reassign to vnet routine */
+ (struct ifnet *, struct vnet *, char *);
+ struct vnet *if_home_vnet; /* where this ifnet originates from */
+ struct ifaddr *if_addr; /* pointer to link-level address */
+ void *if_llsoftc; /* link layer softc */
+ int if_drv_flags; /* driver-managed status flags */
+ struct ifaltq if_snd; /* output queue (includes altq) */
+ const u_int8_t *if_broadcastaddr; /* linklevel broadcast bytestring */
+
+ void *if_bridge; /* bridge glue */
+
+ struct label *if_label; /* interface MAC label */
+
+ /* these are only used by IPv6 */
+ struct ifprefixhead if_prefixhead; /* list of prefixes per if */
+ void *if_afdata[AF_MAX];
+ int if_afdata_initialized;
+ struct rwlock if_afdata_lock;
+ struct task if_linktask; /* task for link change events */
+ struct mtx if_addr_mtx; /* mutex to protect address lists */
+
+ LIST_ENTRY(ifnet) if_clones; /* interfaces of a cloner */
+ TAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if */
+ /* protected by if_addr_mtx */
+ void *if_pf_kif;
+ void *if_lagg; /* lagg glue */
+ u_char if_alloctype; /* if_type at time of allocation */
+
+ /*
+ * Spare fields are added so that we can modify sensitive data
+ * structures without changing the kernel binary interface, and must
+ * be used with care where binary compatibility is required.
+ */
+ char if_cspare[3];
+ char *if_description; /* interface description */
+ void *if_pspare[7];
+ int if_ispare[4];
+};
+
+typedef void if_init_f_t(void *);
+
+/*
+ * XXX These aliases are terribly dangerous because they could apply
+ * to anything.
+ */
+#define if_mtu if_data.ifi_mtu
+#define if_type if_data.ifi_type
+#define if_physical if_data.ifi_physical
+#define if_addrlen if_data.ifi_addrlen
+#define if_hdrlen if_data.ifi_hdrlen
+#define if_metric if_data.ifi_metric
+#define if_link_state if_data.ifi_link_state
+#define if_baudrate if_data.ifi_baudrate
+#define if_hwassist if_data.ifi_hwassist
+#define if_ipackets if_data.ifi_ipackets
+#define if_ierrors if_data.ifi_ierrors
+#define if_opackets if_data.ifi_opackets
+#define if_oerrors if_data.ifi_oerrors
+#define if_collisions if_data.ifi_collisions
+#define if_ibytes if_data.ifi_ibytes
+#define if_obytes if_data.ifi_obytes
+#define if_imcasts if_data.ifi_imcasts
+#define if_omcasts if_data.ifi_omcasts
+#define if_iqdrops if_data.ifi_iqdrops
+#define if_noproto if_data.ifi_noproto
+#define if_lastchange if_data.ifi_lastchange
+
+/* for compatibility with other BSDs */
+#define if_addrlist if_addrhead
+#define if_list if_link
+#define if_name(ifp) ((ifp)->if_xname)
+
+/*
+ * Locks for address lists on the network interface.
+ */
+#define IF_ADDR_LOCK_INIT(if) mtx_init(&(if)->if_addr_mtx, \
+ "if_addr_mtx", NULL, MTX_DEF)
+#define IF_ADDR_LOCK_DESTROY(if) mtx_destroy(&(if)->if_addr_mtx)
+#define IF_ADDR_LOCK(if) mtx_lock(&(if)->if_addr_mtx)
+#define IF_ADDR_UNLOCK(if) mtx_unlock(&(if)->if_addr_mtx)
+#define IF_ADDR_LOCK_ASSERT(if) mtx_assert(&(if)->if_addr_mtx, MA_OWNED)
+
+/*
+ * Function variations on locking macros intended to be used by loadable
+ * kernel modules in order to divorce them from the internals of address list
+ * locking.
+ */
+void if_addr_rlock(struct ifnet *ifp); /* if_addrhead */
+void if_addr_runlock(struct ifnet *ifp); /* if_addrhead */
+void if_maddr_rlock(struct ifnet *ifp); /* if_multiaddrs */
+void if_maddr_runlock(struct ifnet *ifp); /* if_multiaddrs */
+
+/*
+ * Output queues (ifp->if_snd) and slow device input queues (*ifp->if_slowq)
+ * are queues of messages stored on ifqueue structures
+ * (defined above). Entries are added to and deleted from these structures
+ * by these macros, which should be called with ipl raised to splimp().
+ */
+#define IF_LOCK(ifq) mtx_lock(&(ifq)->ifq_mtx)
+#define IF_UNLOCK(ifq) mtx_unlock(&(ifq)->ifq_mtx)
+#define IF_LOCK_ASSERT(ifq) mtx_assert(&(ifq)->ifq_mtx, MA_OWNED)
+#define _IF_QFULL(ifq) ((ifq)->ifq_len >= (ifq)->ifq_maxlen)
+#define _IF_DROP(ifq) ((ifq)->ifq_drops++)
+#define _IF_QLEN(ifq) ((ifq)->ifq_len)
+
+#define _IF_ENQUEUE(ifq, m) do { \
+ (m)->m_nextpkt = NULL; \
+ if ((ifq)->ifq_tail == NULL) \
+ (ifq)->ifq_head = m; \
+ else \
+ (ifq)->ifq_tail->m_nextpkt = m; \
+ (ifq)->ifq_tail = m; \
+ (ifq)->ifq_len++; \
+} while (0)
+
+#define IF_ENQUEUE(ifq, m) do { \
+ IF_LOCK(ifq); \
+ _IF_ENQUEUE(ifq, m); \
+ IF_UNLOCK(ifq); \
+} while (0)
+
+#define _IF_PREPEND(ifq, m) do { \
+ (m)->m_nextpkt = (ifq)->ifq_head; \
+ if ((ifq)->ifq_tail == NULL) \
+ (ifq)->ifq_tail = (m); \
+ (ifq)->ifq_head = (m); \
+ (ifq)->ifq_len++; \
+} while (0)
+
+#define IF_PREPEND(ifq, m) do { \
+ IF_LOCK(ifq); \
+ _IF_PREPEND(ifq, m); \
+ IF_UNLOCK(ifq); \
+} while (0)
+
+#define _IF_DEQUEUE(ifq, m) do { \
+ (m) = (ifq)->ifq_head; \
+ if (m) { \
+ if (((ifq)->ifq_head = (m)->m_nextpkt) == NULL) \
+ (ifq)->ifq_tail = NULL; \
+ (m)->m_nextpkt = NULL; \
+ (ifq)->ifq_len--; \
+ } \
+} while (0)
+
+#define IF_DEQUEUE(ifq, m) do { \
+ IF_LOCK(ifq); \
+ _IF_DEQUEUE(ifq, m); \
+ IF_UNLOCK(ifq); \
+} while (0)
+
+#define _IF_POLL(ifq, m) ((m) = (ifq)->ifq_head)
+#define IF_POLL(ifq, m) _IF_POLL(ifq, m)
+
+#define _IF_DRAIN(ifq) do { \
+ struct mbuf *m; \
+ for (;;) { \
+ _IF_DEQUEUE(ifq, m); \
+ if (m == NULL) \
+ break; \
+ m_freem(m); \
+ } \
+} while (0)
+
+#define IF_DRAIN(ifq) do { \
+ IF_LOCK(ifq); \
+ _IF_DRAIN(ifq); \
+ IF_UNLOCK(ifq); \
+} while(0)
+
+#ifdef _KERNEL
+/* interface link layer address change event */
+typedef void (*iflladdr_event_handler_t)(void *, struct ifnet *);
+EVENTHANDLER_DECLARE(iflladdr_event, iflladdr_event_handler_t);
+/* interface address change event */
+typedef void (*ifaddr_event_handler_t)(void *, struct ifnet *);
+EVENTHANDLER_DECLARE(ifaddr_event, ifaddr_event_handler_t);
+/* new interface arrival event */
+typedef void (*ifnet_arrival_event_handler_t)(void *, struct ifnet *);
+EVENTHANDLER_DECLARE(ifnet_arrival_event, ifnet_arrival_event_handler_t);
+/* interface departure event */
+typedef void (*ifnet_departure_event_handler_t)(void *, struct ifnet *);
+EVENTHANDLER_DECLARE(ifnet_departure_event, ifnet_departure_event_handler_t);
+
+/*
+ * interface groups
+ */
+struct ifg_group {
+ char ifg_group[IFNAMSIZ];
+ u_int ifg_refcnt;
+ void *ifg_pf_kif;
+ TAILQ_HEAD(, ifg_member) ifg_members;
+ TAILQ_ENTRY(ifg_group) ifg_next;
+};
+
+struct ifg_member {
+ TAILQ_ENTRY(ifg_member) ifgm_next;
+ struct ifnet *ifgm_ifp;
+};
+
+struct ifg_list {
+ struct ifg_group *ifgl_group;
+ TAILQ_ENTRY(ifg_list) ifgl_next;
+};
+
+/* group attach event */
+typedef void (*group_attach_event_handler_t)(void *, struct ifg_group *);
+EVENTHANDLER_DECLARE(group_attach_event, group_attach_event_handler_t);
+/* group detach event */
+typedef void (*group_detach_event_handler_t)(void *, struct ifg_group *);
+EVENTHANDLER_DECLARE(group_detach_event, group_detach_event_handler_t);
+/* group change event */
+typedef void (*group_change_event_handler_t)(void *, const char *);
+EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t);
+
+#define IF_AFDATA_LOCK_INIT(ifp) \
+ rw_init(&(ifp)->if_afdata_lock, "if_afdata")
+
+#define IF_AFDATA_WLOCK(ifp) rw_wlock(&(ifp)->if_afdata_lock)
+#define IF_AFDATA_RLOCK(ifp) rw_rlock(&(ifp)->if_afdata_lock)
+#define IF_AFDATA_WUNLOCK(ifp) rw_wunlock(&(ifp)->if_afdata_lock)
+#define IF_AFDATA_RUNLOCK(ifp) rw_runlock(&(ifp)->if_afdata_lock)
+#define IF_AFDATA_LOCK(ifp) IF_AFDATA_WLOCK(ifp)
+#define IF_AFDATA_UNLOCK(ifp) IF_AFDATA_WUNLOCK(ifp)
+#define IF_AFDATA_TRYLOCK(ifp) rw_try_wlock(&(ifp)->if_afdata_lock)
+#define IF_AFDATA_DESTROY(ifp) rw_destroy(&(ifp)->if_afdata_lock)
+
+#define IF_AFDATA_LOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_LOCKED)
+#define IF_AFDATA_UNLOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_UNLOCKED)
+
+int if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp,
+ int adjust);
+#define IF_HANDOFF(ifq, m, ifp) \
+ if_handoff((struct ifqueue *)ifq, m, ifp, 0)
+#define IF_HANDOFF_ADJ(ifq, m, ifp, adj) \
+ if_handoff((struct ifqueue *)ifq, m, ifp, adj)
+
+void if_start(struct ifnet *);
+
+#define IFQ_ENQUEUE(ifq, m, err) \
+do { \
+ IF_LOCK(ifq); \
+ if (ALTQ_IS_ENABLED(ifq)) \
+ ALTQ_ENQUEUE(ifq, m, NULL, err); \
+ else { \
+ if (_IF_QFULL(ifq)) { \
+ m_freem(m); \
+ (err) = ENOBUFS; \
+ } else { \
+ _IF_ENQUEUE(ifq, m); \
+ (err) = 0; \
+ } \
+ } \
+ if (err) \
+ (ifq)->ifq_drops++; \
+ IF_UNLOCK(ifq); \
+} while (0)
+
+#define IFQ_DEQUEUE_NOLOCK(ifq, m) \
+do { \
+ if (TBR_IS_ENABLED(ifq)) \
+ (m) = tbr_dequeue_ptr(ifq, ALTDQ_REMOVE); \
+ else if (ALTQ_IS_ENABLED(ifq)) \
+ ALTQ_DEQUEUE(ifq, m); \
+ else \
+ _IF_DEQUEUE(ifq, m); \
+} while (0)
+
+#define IFQ_DEQUEUE(ifq, m) \
+do { \
+ IF_LOCK(ifq); \
+ IFQ_DEQUEUE_NOLOCK(ifq, m); \
+ IF_UNLOCK(ifq); \
+} while (0)
+
+#define IFQ_POLL_NOLOCK(ifq, m) \
+do { \
+ if (TBR_IS_ENABLED(ifq)) \
+ (m) = tbr_dequeue_ptr(ifq, ALTDQ_POLL); \
+ else if (ALTQ_IS_ENABLED(ifq)) \
+ ALTQ_POLL(ifq, m); \
+ else \
+ _IF_POLL(ifq, m); \
+} while (0)
+
+#define IFQ_POLL(ifq, m) \
+do { \
+ IF_LOCK(ifq); \
+ IFQ_POLL_NOLOCK(ifq, m); \
+ IF_UNLOCK(ifq); \
+} while (0)
+
+#define IFQ_PURGE_NOLOCK(ifq) \
+do { \
+ if (ALTQ_IS_ENABLED(ifq)) { \
+ ALTQ_PURGE(ifq); \
+ } else \
+ _IF_DRAIN(ifq); \
+} while (0)
+
+#define IFQ_PURGE(ifq) \
+do { \
+ IF_LOCK(ifq); \
+ IFQ_PURGE_NOLOCK(ifq); \
+ IF_UNLOCK(ifq); \
+} while (0)
+
+#define IFQ_SET_READY(ifq) \
+ do { ((ifq)->altq_flags |= ALTQF_READY); } while (0)
+
+#define IFQ_LOCK(ifq) IF_LOCK(ifq)
+#define IFQ_UNLOCK(ifq) IF_UNLOCK(ifq)
+#define IFQ_LOCK_ASSERT(ifq) IF_LOCK_ASSERT(ifq)
+#define IFQ_IS_EMPTY(ifq) ((ifq)->ifq_len == 0)
+#define IFQ_INC_LEN(ifq) ((ifq)->ifq_len++)
+#define IFQ_DEC_LEN(ifq) (--(ifq)->ifq_len)
+#define IFQ_INC_DROPS(ifq) ((ifq)->ifq_drops++)
+#define IFQ_SET_MAXLEN(ifq, len) ((ifq)->ifq_maxlen = (len))
+
+/*
+ * The IFF_DRV_OACTIVE test should really occur in the device driver, not in
+ * the handoff logic, as that flag is locked by the device driver.
+ */
+#define IFQ_HANDOFF_ADJ(ifp, m, adj, err) \
+do { \
+ int len; \
+ short mflags; \
+ \
+ len = (m)->m_pkthdr.len; \
+ mflags = (m)->m_flags; \
+ IFQ_ENQUEUE(&(ifp)->if_snd, m, err); \
+ if ((err) == 0) { \
+ (ifp)->if_obytes += len + (adj); \
+ if (mflags & M_MCAST) \
+ (ifp)->if_omcasts++; \
+ if (((ifp)->if_drv_flags & IFF_DRV_OACTIVE) == 0) \
+ if_start(ifp); \
+ } \
+} while (0)
+
+#define IFQ_HANDOFF(ifp, m, err) \
+ IFQ_HANDOFF_ADJ(ifp, m, 0, err)
+
+#define IFQ_DRV_DEQUEUE(ifq, m) \
+do { \
+ (m) = (ifq)->ifq_drv_head; \
+ if (m) { \
+ if (((ifq)->ifq_drv_head = (m)->m_nextpkt) == NULL) \
+ (ifq)->ifq_drv_tail = NULL; \
+ (m)->m_nextpkt = NULL; \
+ (ifq)->ifq_drv_len--; \
+ } else { \
+ IFQ_LOCK(ifq); \
+ IFQ_DEQUEUE_NOLOCK(ifq, m); \
+ while ((ifq)->ifq_drv_len < (ifq)->ifq_drv_maxlen) { \
+ struct mbuf *m0; \
+ IFQ_DEQUEUE_NOLOCK(ifq, m0); \
+ if (m0 == NULL) \
+ break; \
+ m0->m_nextpkt = NULL; \
+ if ((ifq)->ifq_drv_tail == NULL) \
+ (ifq)->ifq_drv_head = m0; \
+ else \
+ (ifq)->ifq_drv_tail->m_nextpkt = m0; \
+ (ifq)->ifq_drv_tail = m0; \
+ (ifq)->ifq_drv_len++; \
+ } \
+ IFQ_UNLOCK(ifq); \
+ } \
+} while (0)
+
+#define IFQ_DRV_PREPEND(ifq, m) \
+do { \
+ (m)->m_nextpkt = (ifq)->ifq_drv_head; \
+ if ((ifq)->ifq_drv_tail == NULL) \
+ (ifq)->ifq_drv_tail = (m); \
+ (ifq)->ifq_drv_head = (m); \
+ (ifq)->ifq_drv_len++; \
+} while (0)
+
+#define IFQ_DRV_IS_EMPTY(ifq) \
+ (((ifq)->ifq_drv_len == 0) && ((ifq)->ifq_len == 0))
+
+#define IFQ_DRV_PURGE(ifq) \
+do { \
+ struct mbuf *m, *n = (ifq)->ifq_drv_head; \
+ while((m = n) != NULL) { \
+ n = m->m_nextpkt; \
+ m_freem(m); \
+ } \
+ (ifq)->ifq_drv_head = (ifq)->ifq_drv_tail = NULL; \
+ (ifq)->ifq_drv_len = 0; \
+ IFQ_PURGE(ifq); \
+} while (0)
+
+#ifdef _KERNEL
+static __inline void
+drbr_stats_update(struct ifnet *ifp, int len, int mflags)
+{
+#ifndef NO_SLOW_STATS
+ ifp->if_obytes += len;
+ if (mflags & M_MCAST)
+ ifp->if_omcasts++;
+#endif
+}
+
+static __inline int
+drbr_enqueue(struct ifnet *ifp, struct buf_ring *br, struct mbuf *m)
+{
+ int error = 0;
+ int len = m->m_pkthdr.len;
+ int mflags = m->m_flags;
+
+#ifdef ALTQ
+ if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ IFQ_ENQUEUE(&ifp->if_snd, m, error);
+ return (error);
+ }
+#endif
+ if ((error = buf_ring_enqueue_bytes(br, m, len)) == ENOBUFS) {
+ br->br_drops++;
+ m_freem(m);
+ } else
+ drbr_stats_update(ifp, len, mflags);
+
+ return (error);
+}
+
+static __inline void
+drbr_flush(struct ifnet *ifp, struct buf_ring *br)
+{
+ struct mbuf *m;
+
+#ifdef ALTQ
+ if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd))
+ IFQ_PURGE(&ifp->if_snd);
+#endif
+ while ((m = buf_ring_dequeue_sc(br)) != NULL)
+ m_freem(m);
+}
+
+static __inline void
+drbr_free(struct buf_ring *br, struct malloc_type *type)
+{
+
+ drbr_flush(NULL, br);
+ buf_ring_free(br, type);
+}
+
+static __inline struct mbuf *
+drbr_dequeue(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+ struct mbuf *m;
+
+ if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ IFQ_DEQUEUE(&ifp->if_snd, m);
+ return (m);
+ }
+#endif
+ return (buf_ring_dequeue_sc(br));
+}
+
+static __inline struct mbuf *
+drbr_dequeue_cond(struct ifnet *ifp, struct buf_ring *br,
+ int (*func) (struct mbuf *, void *), void *arg)
+{
+ struct mbuf *m;
+#ifdef ALTQ
+ if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ IFQ_LOCK(&ifp->if_snd);
+ IFQ_POLL_NOLOCK(&ifp->if_snd, m);
+ if (m != NULL && func(m, arg) == 0) {
+ IFQ_UNLOCK(&ifp->if_snd);
+ return (NULL);
+ }
+ IFQ_DEQUEUE_NOLOCK(&ifp->if_snd, m);
+ IFQ_UNLOCK(&ifp->if_snd);
+ return (m);
+ }
+#endif
+ m = buf_ring_peek(br);
+ if (m == NULL || func(m, arg) == 0)
+ return (NULL);
+
+ return (buf_ring_dequeue_sc(br));
+}
+
+static __inline int
+drbr_empty(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+ if (ALTQ_IS_ENABLED(&ifp->if_snd))
+ return (IFQ_IS_EMPTY(&ifp->if_snd));
+#endif
+ return (buf_ring_empty(br));
+}
+
+static __inline int
+drbr_needs_enqueue(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+ if (ALTQ_IS_ENABLED(&ifp->if_snd))
+ return (1);
+#endif
+ return (!buf_ring_empty(br));
+}
+
+static __inline int
+drbr_inuse(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+ if (ALTQ_IS_ENABLED(&ifp->if_snd))
+ return (ifp->if_snd.ifq_len);
+#endif
+ return (buf_ring_count(br));
+}
+#endif
+/*
+ * 72 was chosen below because it is the size of a TCP/IP
+ * header (40) + the minimum mss (32).
+ */
+#define IF_MINMTU 72
+#define IF_MAXMTU 65535
+
+#endif /* _KERNEL */
+
+/*
+ * The ifaddr structure contains information about one address
+ * of an interface. They are maintained by the different address families,
+ * are allocated and attached when an address is set, and are linked
+ * together so all addresses for an interface can be located.
+ *
+ * NOTE: a 'struct ifaddr' is always at the beginning of a larger
+ * chunk of malloc'ed memory, where we store the three addresses
+ * (ifa_addr, ifa_dstaddr and ifa_netmask) referenced here.
+ */
+struct ifaddr {
+ struct sockaddr *ifa_addr; /* address of interface */
+ struct sockaddr *ifa_dstaddr; /* other end of p-to-p link */
+#define ifa_broadaddr ifa_dstaddr /* broadcast address interface */
+ struct sockaddr *ifa_netmask; /* used to determine subnet */
+ struct if_data if_data; /* not all members are meaningful */
+ struct ifnet *ifa_ifp; /* back-pointer to interface */
+ TAILQ_ENTRY(ifaddr) ifa_link; /* queue macro glue */
+ void (*ifa_rtrequest) /* check or clean routes (+ or -)'d */
+ (int, struct rtentry *, struct rt_addrinfo *);
+ u_short ifa_flags; /* mostly rt_flags for cloning */
+ u_int ifa_refcnt; /* references to this structure */
+ int ifa_metric; /* cost of going out this interface */
+ int (*ifa_claim_addr) /* check if an addr goes to this if */
+ (struct ifaddr *, struct sockaddr *);
+ struct mtx ifa_mtx;
+};
+#define IFA_ROUTE RTF_UP /* route installed */
+#define IFA_RTSELF RTF_HOST /* loopback route to self installed */
+
+/* for compatibility with other BSDs */
+#define ifa_list ifa_link
+
+#ifdef _KERNEL
+#define IFA_LOCK(ifa) mtx_lock(&(ifa)->ifa_mtx)
+#define IFA_UNLOCK(ifa) mtx_unlock(&(ifa)->ifa_mtx)
+
+void ifa_free(struct ifaddr *ifa);
+void ifa_init(struct ifaddr *ifa);
+void ifa_ref(struct ifaddr *ifa);
+#endif
+
+/*
+ * The prefix structure contains information about one prefix
+ * of an interface. They are maintained by the different address families,
+ * are allocated and attached when a prefix or an address is set,
+ * and are linked together so all prefixes for an interface can be located.
+ */
+struct ifprefix {
+ struct sockaddr *ifpr_prefix; /* prefix of interface */
+ struct ifnet *ifpr_ifp; /* back-pointer to interface */
+ TAILQ_ENTRY(ifprefix) ifpr_list; /* queue macro glue */
+ u_char ifpr_plen; /* prefix length in bits */
+ u_char ifpr_type; /* protocol dependent prefix type */
+};
+
+/*
+ * Multicast address structure. This is analogous to the ifaddr
+ * structure except that it keeps track of multicast addresses.
+ */
+struct ifmultiaddr {
+ TAILQ_ENTRY(ifmultiaddr) ifma_link; /* queue macro glue */
+ struct sockaddr *ifma_addr; /* address this membership is for */
+ struct sockaddr *ifma_lladdr; /* link-layer translation, if any */
+ struct ifnet *ifma_ifp; /* back-pointer to interface */
+ u_int ifma_refcount; /* reference count */
+ void *ifma_protospec; /* protocol-specific state, if any */
+ struct ifmultiaddr *ifma_llifma; /* pointer to ifma for ifma_lladdr */
+};
+
+#ifdef _KERNEL
+
+extern struct rwlock ifnet_rwlock;
+extern struct sx ifnet_sxlock;
+
+#define IFNET_LOCK_INIT() do { \
+ rw_init_flags(&ifnet_rwlock, "ifnet_rw", RW_RECURSE); \
+ sx_init_flags(&ifnet_sxlock, "ifnet_sx", SX_RECURSE); \
+} while(0)
+
+#define IFNET_WLOCK() do { \
+ sx_xlock(&ifnet_sxlock); \
+ rw_wlock(&ifnet_rwlock); \
+} while (0)
+
+#define IFNET_WUNLOCK() do { \
+ rw_wunlock(&ifnet_rwlock); \
+ sx_xunlock(&ifnet_sxlock); \
+} while (0)
+
+/*
+ * To assert the ifnet lock, you must know not only whether it's for read or
+ * write, but also whether it was acquired with sleep support or not.
+ */
+#define IFNET_RLOCK_ASSERT() sx_assert(&ifnet_sxlock, SA_SLOCKED)
+#define IFNET_RLOCK_NOSLEEP_ASSERT() rw_assert(&ifnet_rwlock, RA_RLOCKED)
+#define IFNET_WLOCK_ASSERT() do { \
+ sx_assert(&ifnet_sxlock, SA_XLOCKED); \
+ rw_assert(&ifnet_rwlock, RA_WLOCKED); \
+} while (0)
+
+#define IFNET_RLOCK() sx_slock(&ifnet_sxlock)
+#define IFNET_RLOCK_NOSLEEP() rw_rlock(&ifnet_rwlock)
+#define IFNET_RUNLOCK() sx_sunlock(&ifnet_sxlock)
+#define IFNET_RUNLOCK_NOSLEEP() rw_runlock(&ifnet_rwlock)
+
+/*
+ * Look up an ifnet given its index; the _ref variant also acquires a
+ * reference that must be freed using if_rele(). It is almost always a bug
+ * to call ifnet_byindex() instead if ifnet_byindex_ref().
+ */
+struct ifnet *ifnet_byindex(u_short idx);
+struct ifnet *ifnet_byindex_locked(u_short idx);
+struct ifnet *ifnet_byindex_ref(u_short idx);
+
+/*
+ * Given the index, ifaddr_byindex() returns the one and only
+ * link-level ifaddr for the interface. You are not supposed to use
+ * it to traverse the list of addresses associated to the interface.
+ */
+struct ifaddr *ifaddr_byindex(u_short idx);
+
+VNET_DECLARE(struct ifnethead, ifnet);
+VNET_DECLARE(struct ifgrouphead, ifg_head);
+VNET_DECLARE(int, if_index);
+VNET_DECLARE(struct ifnet *, loif); /* first loopback interface */
+VNET_DECLARE(int, useloopback);
+
+#define V_ifnet VNET(ifnet)
+#define V_ifg_head VNET(ifg_head)
+#define V_if_index VNET(if_index)
+#define V_loif VNET(loif)
+#define V_useloopback VNET(useloopback)
+
+extern int ifqmaxlen;
+
+int if_addgroup(struct ifnet *, const char *);
+int if_delgroup(struct ifnet *, const char *);
+int if_addmulti(struct ifnet *, struct sockaddr *, struct ifmultiaddr **);
+int if_allmulti(struct ifnet *, int);
+struct ifnet* if_alloc(u_char);
+void if_attach(struct ifnet *);
+void if_dead(struct ifnet *);
+int if_delmulti(struct ifnet *, struct sockaddr *);
+void if_delmulti_ifma(struct ifmultiaddr *);
+void if_detach(struct ifnet *);
+void if_vmove(struct ifnet *, struct vnet *);
+void if_purgeaddrs(struct ifnet *);
+void if_delallmulti(struct ifnet *);
+void if_down(struct ifnet *);
+struct ifmultiaddr *
+ if_findmulti(struct ifnet *, struct sockaddr *);
+void if_free(struct ifnet *);
+void if_free_type(struct ifnet *, u_char);
+void if_initname(struct ifnet *, const char *, int);
+void if_link_state_change(struct ifnet *, int);
+int if_printf(struct ifnet *, const char *, ...) __printflike(2, 3);
+void if_qflush(struct ifnet *);
+void if_ref(struct ifnet *);
+void if_rele(struct ifnet *);
+int if_setlladdr(struct ifnet *, const u_char *, int);
+void if_up(struct ifnet *);
+int ifioctl(struct socket *, u_long, caddr_t, struct thread *);
+int ifpromisc(struct ifnet *, int);
+struct ifnet *ifunit(const char *);
+struct ifnet *ifunit_ref(const char *);
+
+void ifq_init(struct ifaltq *, struct ifnet *ifp);
+void ifq_delete(struct ifaltq *);
+
+int ifa_add_loopback_route(struct ifaddr *, struct sockaddr *);
+int ifa_del_loopback_route(struct ifaddr *, struct sockaddr *);
+
+struct ifaddr *ifa_ifwithaddr(struct sockaddr *);
+int ifa_ifwithaddr_check(struct sockaddr *);
+struct ifaddr *ifa_ifwithbroadaddr(struct sockaddr *);
+struct ifaddr *ifa_ifwithdstaddr(struct sockaddr *);
+struct ifaddr *ifa_ifwithnet(struct sockaddr *, int);
+struct ifaddr *ifa_ifwithroute(int, struct sockaddr *, struct sockaddr *);
+struct ifaddr *ifa_ifwithroute_fib(int, struct sockaddr *, struct sockaddr *, u_int);
+
+struct ifaddr *ifaof_ifpforaddr(struct sockaddr *, struct ifnet *);
+
+int if_simloop(struct ifnet *ifp, struct mbuf *m, int af, int hlen);
+
+typedef void *if_com_alloc_t(u_char type, struct ifnet *ifp);
+typedef void if_com_free_t(void *com, u_char type);
+void if_register_com_alloc(u_char type, if_com_alloc_t *a, if_com_free_t *f);
+void if_deregister_com_alloc(u_char type);
+
+#define IF_LLADDR(ifp) \
+ LLADDR((struct sockaddr_dl *)((ifp)->if_addr->ifa_addr))
+
+#ifdef DEVICE_POLLING
+enum poll_cmd { POLL_ONLY, POLL_AND_CHECK_STATUS };
+
+typedef int poll_handler_t(struct ifnet *ifp, enum poll_cmd cmd, int count);
+int ether_poll_register(poll_handler_t *h, struct ifnet *ifp);
+int ether_poll_deregister(struct ifnet *ifp);
+#endif /* DEVICE_POLLING */
+
+#endif /* _KERNEL */
+
+#endif /* !_NET_IF_VAR_HH_ */
diff --git a/freebsd/sys/net/if_vlan.c b/freebsd/sys/net/if_vlan.c
new file mode 100644
index 00000000..5ae5efd4
--- /dev/null
+++ b/freebsd/sys/net/if_vlan.c
@@ -0,0 +1,1538 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright 1998 Massachusetts Institute of Technology
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that both the above copyright notice and this
+ * permission notice appear in all copies, that both the above
+ * copyright notice and this permission notice appear in all
+ * supporting documentation, and that the name of M.I.T. not be used
+ * in advertising or publicity pertaining to distribution of the
+ * software without specific, written prior permission. M.I.T. makes
+ * no representations about the suitability of this software for any
+ * purpose. It is provided "as is" without express or implied
+ * warranty.
+ *
+ * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
+ * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
+ * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * if_vlan.c - pseudo-device driver for IEEE 802.1Q virtual LANs.
+ * Might be extended some day to also handle IEEE 802.1p priority
+ * tagging. This is sort of sneaky in the implementation, since
+ * we need to pretend to be enough of an Ethernet implementation
+ * to make arp work. The way we do this is by telling everyone
+ * that we are an Ethernet, and then catch the packets that
+ * ether_output() left on our output queue when it calls
+ * if_start(), rewrite them for use by the real outgoing interface,
+ * and ask it to send them.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_vlan.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/rwlock.h>
+#include <freebsd/sys/queue.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/systm.h>
+
+#include <freebsd/net/bpf.h>
+#include <freebsd/net/ethernet.h>
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_clone.h>
+#include <freebsd/net/if_dl.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/if_vlan_var.h>
+#include <freebsd/net/vnet.h>
+
+#define VLANNAME "vlan"
+#define VLAN_DEF_HWIDTH 4
+#define VLAN_IFFLAGS (IFF_BROADCAST | IFF_MULTICAST)
+
+#define UP_AND_RUNNING(ifp) \
+ ((ifp)->if_flags & IFF_UP && (ifp)->if_drv_flags & IFF_DRV_RUNNING)
+
+LIST_HEAD(ifvlanhead, ifvlan);
+
+struct ifvlantrunk {
+ struct ifnet *parent; /* parent interface of this trunk */
+ struct rwlock rw;
+#ifdef VLAN_ARRAY
+#define VLAN_ARRAY_SIZE (EVL_VLID_MASK + 1)
+ struct ifvlan *vlans[VLAN_ARRAY_SIZE]; /* static table */
+#else
+ struct ifvlanhead *hash; /* dynamic hash-list table */
+ uint16_t hmask;
+ uint16_t hwidth;
+#endif
+ int refcnt;
+};
+
+struct vlan_mc_entry {
+ struct ether_addr mc_addr;
+ SLIST_ENTRY(vlan_mc_entry) mc_entries;
+};
+
+struct ifvlan {
+ struct ifvlantrunk *ifv_trunk;
+ struct ifnet *ifv_ifp;
+#define TRUNK(ifv) ((ifv)->ifv_trunk)
+#define PARENT(ifv) ((ifv)->ifv_trunk->parent)
+ int ifv_pflags; /* special flags we have set on parent */
+ struct ifv_linkmib {
+ int ifvm_encaplen; /* encapsulation length */
+ int ifvm_mtufudge; /* MTU fudged by this much */
+ int ifvm_mintu; /* min transmission unit */
+ uint16_t ifvm_proto; /* encapsulation ethertype */
+ uint16_t ifvm_tag; /* tag to apply on packets leaving if */
+ } ifv_mib;
+ SLIST_HEAD(, vlan_mc_entry) vlan_mc_listhead;
+#ifndef VLAN_ARRAY
+ LIST_ENTRY(ifvlan) ifv_list;
+#endif
+};
+#define ifv_proto ifv_mib.ifvm_proto
+#define ifv_tag ifv_mib.ifvm_tag
+#define ifv_encaplen ifv_mib.ifvm_encaplen
+#define ifv_mtufudge ifv_mib.ifvm_mtufudge
+#define ifv_mintu ifv_mib.ifvm_mintu
+
+/* Special flags we should propagate to parent. */
+static struct {
+ int flag;
+ int (*func)(struct ifnet *, int);
+} vlan_pflags[] = {
+ {IFF_PROMISC, ifpromisc},
+ {IFF_ALLMULTI, if_allmulti},
+ {0, NULL}
+};
+
+SYSCTL_DECL(_net_link);
+SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW, 0, "IEEE 802.1Q VLAN");
+SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW, 0, "for consistency");
+
+static int soft_pad = 0;
+SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW, &soft_pad, 0,
+ "pad short frames before tagging");
+
+static MALLOC_DEFINE(M_VLAN, VLANNAME, "802.1Q Virtual LAN Interface");
+
+static eventhandler_tag ifdetach_tag;
+static eventhandler_tag iflladdr_tag;
+
+/*
+ * We have a global mutex, that is used to serialize configuration
+ * changes and isn't used in normal packet delivery.
+ *
+ * We also have a per-trunk rwlock, that is locked shared on packet
+ * processing and exclusive when configuration is changed.
+ *
+ * The VLAN_ARRAY substitutes the dynamic hash with a static array
+ * with 4096 entries. In theory this can give a boost in processing,
+ * however on practice it does not. Probably this is because array
+ * is too big to fit into CPU cache.
+ */
+static struct mtx ifv_mtx;
+#define VLAN_LOCK_INIT() mtx_init(&ifv_mtx, "vlan_global", NULL, MTX_DEF)
+#define VLAN_LOCK_DESTROY() mtx_destroy(&ifv_mtx)
+#define VLAN_LOCK_ASSERT() mtx_assert(&ifv_mtx, MA_OWNED)
+#define VLAN_LOCK() mtx_lock(&ifv_mtx)
+#define VLAN_UNLOCK() mtx_unlock(&ifv_mtx)
+#define TRUNK_LOCK_INIT(trunk) rw_init(&(trunk)->rw, VLANNAME)
+#define TRUNK_LOCK_DESTROY(trunk) rw_destroy(&(trunk)->rw)
+#define TRUNK_LOCK(trunk) rw_wlock(&(trunk)->rw)
+#define TRUNK_UNLOCK(trunk) rw_wunlock(&(trunk)->rw)
+#define TRUNK_LOCK_ASSERT(trunk) rw_assert(&(trunk)->rw, RA_WLOCKED)
+#define TRUNK_RLOCK(trunk) rw_rlock(&(trunk)->rw)
+#define TRUNK_RUNLOCK(trunk) rw_runlock(&(trunk)->rw)
+#define TRUNK_LOCK_RASSERT(trunk) rw_assert(&(trunk)->rw, RA_RLOCKED)
+
+#ifndef VLAN_ARRAY
+static void vlan_inithash(struct ifvlantrunk *trunk);
+static void vlan_freehash(struct ifvlantrunk *trunk);
+static int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv);
+static int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv);
+static void vlan_growhash(struct ifvlantrunk *trunk, int howmuch);
+static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk,
+ uint16_t tag);
+#endif
+static void trunk_destroy(struct ifvlantrunk *trunk);
+
+static void vlan_start(struct ifnet *ifp);
+static void vlan_init(void *foo);
+static void vlan_input(struct ifnet *ifp, struct mbuf *m);
+static int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr);
+static int vlan_setflag(struct ifnet *ifp, int flag, int status,
+ int (*func)(struct ifnet *, int));
+static int vlan_setflags(struct ifnet *ifp, int status);
+static int vlan_setmulti(struct ifnet *ifp);
+static void vlan_unconfig(struct ifnet *ifp);
+static void vlan_unconfig_locked(struct ifnet *ifp);
+static int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag);
+static void vlan_link_state(struct ifnet *ifp, int link);
+static void vlan_capabilities(struct ifvlan *ifv);
+static void vlan_trunk_capabilities(struct ifnet *ifp);
+
+static struct ifnet *vlan_clone_match_ethertag(struct if_clone *,
+ const char *, int *);
+static int vlan_clone_match(struct if_clone *, const char *);
+static int vlan_clone_create(struct if_clone *, char *, size_t, caddr_t);
+static int vlan_clone_destroy(struct if_clone *, struct ifnet *);
+
+static void vlan_ifdetach(void *arg, struct ifnet *ifp);
+static void vlan_iflladdr(void *arg, struct ifnet *ifp);
+
+static struct if_clone vlan_cloner = IFC_CLONE_INITIALIZER(VLANNAME, NULL,
+ IF_MAXUNIT, NULL, vlan_clone_match, vlan_clone_create, vlan_clone_destroy);
+
+#ifdef VIMAGE
+static VNET_DEFINE(struct if_clone, vlan_cloner);
+#define V_vlan_cloner VNET(vlan_cloner)
+#endif
+
+#ifndef VLAN_ARRAY
+#define HASH(n, m) ((((n) >> 8) ^ ((n) >> 4) ^ (n)) & (m))
+
+static void
+vlan_inithash(struct ifvlantrunk *trunk)
+{
+ int i, n;
+
+ /*
+ * The trunk must not be locked here since we call malloc(M_WAITOK).
+ * It is OK in case this function is called before the trunk struct
+ * gets hooked up and becomes visible from other threads.
+ */
+
+ KASSERT(trunk->hwidth == 0 && trunk->hash == NULL,
+ ("%s: hash already initialized", __func__));
+
+ trunk->hwidth = VLAN_DEF_HWIDTH;
+ n = 1 << trunk->hwidth;
+ trunk->hmask = n - 1;
+ trunk->hash = malloc(sizeof(struct ifvlanhead) * n, M_VLAN, M_WAITOK);
+ for (i = 0; i < n; i++)
+ LIST_INIT(&trunk->hash[i]);
+}
+
+static void
+vlan_freehash(struct ifvlantrunk *trunk)
+{
+#ifdef INVARIANTS
+ int i;
+
+ KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
+ for (i = 0; i < (1 << trunk->hwidth); i++)
+ KASSERT(LIST_EMPTY(&trunk->hash[i]),
+ ("%s: hash table not empty", __func__));
+#endif
+ free(trunk->hash, M_VLAN);
+ trunk->hash = NULL;
+ trunk->hwidth = trunk->hmask = 0;
+}
+
+static int
+vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
+{
+ int i, b;
+ struct ifvlan *ifv2;
+
+ TRUNK_LOCK_ASSERT(trunk);
+ KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
+
+ b = 1 << trunk->hwidth;
+ i = HASH(ifv->ifv_tag, trunk->hmask);
+ LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
+ if (ifv->ifv_tag == ifv2->ifv_tag)
+ return (EEXIST);
+
+ /*
+ * Grow the hash when the number of vlans exceeds half of the number of
+ * hash buckets squared. This will make the average linked-list length
+ * buckets/2.
+ */
+ if (trunk->refcnt > (b * b) / 2) {
+ vlan_growhash(trunk, 1);
+ i = HASH(ifv->ifv_tag, trunk->hmask);
+ }
+ LIST_INSERT_HEAD(&trunk->hash[i], ifv, ifv_list);
+ trunk->refcnt++;
+
+ return (0);
+}
+
+static int
+vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
+{
+ int i, b;
+ struct ifvlan *ifv2;
+
+ TRUNK_LOCK_ASSERT(trunk);
+ KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
+
+ b = 1 << trunk->hwidth;
+ i = HASH(ifv->ifv_tag, trunk->hmask);
+ LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
+ if (ifv2 == ifv) {
+ trunk->refcnt--;
+ LIST_REMOVE(ifv2, ifv_list);
+ if (trunk->refcnt < (b * b) / 2)
+ vlan_growhash(trunk, -1);
+ return (0);
+ }
+
+ panic("%s: vlan not found\n", __func__);
+ return (ENOENT); /*NOTREACHED*/
+}
+
+/*
+ * Grow the hash larger or smaller if memory permits.
+ */
+static void
+vlan_growhash(struct ifvlantrunk *trunk, int howmuch)
+{
+ struct ifvlan *ifv;
+ struct ifvlanhead *hash2;
+ int hwidth2, i, j, n, n2;
+
+ TRUNK_LOCK_ASSERT(trunk);
+ KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
+
+ if (howmuch == 0) {
+ /* Harmless yet obvious coding error */
+ printf("%s: howmuch is 0\n", __func__);
+ return;
+ }
+
+ hwidth2 = trunk->hwidth + howmuch;
+ n = 1 << trunk->hwidth;
+ n2 = 1 << hwidth2;
+ /* Do not shrink the table below the default */
+ if (hwidth2 < VLAN_DEF_HWIDTH)
+ return;
+
+ /* M_NOWAIT because we're called with trunk mutex held */
+ hash2 = malloc(sizeof(struct ifvlanhead) * n2, M_VLAN, M_NOWAIT);
+ if (hash2 == NULL) {
+ printf("%s: out of memory -- hash size not changed\n",
+ __func__);
+ return; /* We can live with the old hash table */
+ }
+ for (j = 0; j < n2; j++)
+ LIST_INIT(&hash2[j]);
+ for (i = 0; i < n; i++)
+ while ((ifv = LIST_FIRST(&trunk->hash[i])) != NULL) {
+ LIST_REMOVE(ifv, ifv_list);
+ j = HASH(ifv->ifv_tag, n2 - 1);
+ LIST_INSERT_HEAD(&hash2[j], ifv, ifv_list);
+ }
+ free(trunk->hash, M_VLAN);
+ trunk->hash = hash2;
+ trunk->hwidth = hwidth2;
+ trunk->hmask = n2 - 1;
+
+ if (bootverbose)
+ if_printf(trunk->parent,
+ "VLAN hash table resized from %d to %d buckets\n", n, n2);
+}
+
+static __inline struct ifvlan *
+vlan_gethash(struct ifvlantrunk *trunk, uint16_t tag)
+{
+ struct ifvlan *ifv;
+
+ TRUNK_LOCK_RASSERT(trunk);
+
+ LIST_FOREACH(ifv, &trunk->hash[HASH(tag, trunk->hmask)], ifv_list)
+ if (ifv->ifv_tag == tag)
+ return (ifv);
+ return (NULL);
+}
+
+#if 0
+/* Debugging code to view the hashtables. */
+static void
+vlan_dumphash(struct ifvlantrunk *trunk)
+{
+ int i;
+ struct ifvlan *ifv;
+
+ for (i = 0; i < (1 << trunk->hwidth); i++) {
+ printf("%d: ", i);
+ LIST_FOREACH(ifv, &trunk->hash[i], ifv_list)
+ printf("%s ", ifv->ifv_ifp->if_xname);
+ printf("\n");
+ }
+}
+#endif /* 0 */
+#endif /* !VLAN_ARRAY */
+
+static void
+trunk_destroy(struct ifvlantrunk *trunk)
+{
+ VLAN_LOCK_ASSERT();
+
+ TRUNK_LOCK(trunk);
+#ifndef VLAN_ARRAY
+ vlan_freehash(trunk);
+#endif
+ trunk->parent->if_vlantrunk = NULL;
+ TRUNK_UNLOCK(trunk);
+ TRUNK_LOCK_DESTROY(trunk);
+ free(trunk, M_VLAN);
+}
+
+/*
+ * Program our multicast filter. What we're actually doing is
+ * programming the multicast filter of the parent. This has the
+ * side effect of causing the parent interface to receive multicast
+ * traffic that it doesn't really want, which ends up being discarded
+ * later by the upper protocol layers. Unfortunately, there's no way
+ * to avoid this: there really is only one physical interface.
+ *
+ * XXX: There is a possible race here if more than one thread is
+ * modifying the multicast state of the vlan interface at the same time.
+ */
+static int
+vlan_setmulti(struct ifnet *ifp)
+{
+ struct ifnet *ifp_p;
+ struct ifmultiaddr *ifma, *rifma = NULL;
+ struct ifvlan *sc;
+ struct vlan_mc_entry *mc;
+ struct sockaddr_dl sdl;
+ int error;
+
+ /*VLAN_LOCK_ASSERT();*/
+
+ /* Find the parent. */
+ sc = ifp->if_softc;
+ ifp_p = PARENT(sc);
+
+ CURVNET_SET_QUIET(ifp_p->if_vnet);
+
+ bzero((char *)&sdl, sizeof(sdl));
+ sdl.sdl_len = sizeof(sdl);
+ sdl.sdl_family = AF_LINK;
+ sdl.sdl_index = ifp_p->if_index;
+ sdl.sdl_type = IFT_ETHER;
+ sdl.sdl_alen = ETHER_ADDR_LEN;
+
+ /* First, remove any existing filter entries. */
+ while ((mc = SLIST_FIRST(&sc->vlan_mc_listhead)) != NULL) {
+ bcopy((char *)&mc->mc_addr, LLADDR(&sdl), ETHER_ADDR_LEN);
+ error = if_delmulti(ifp_p, (struct sockaddr *)&sdl);
+ if (error)
+ return (error);
+ SLIST_REMOVE_HEAD(&sc->vlan_mc_listhead, mc_entries);
+ free(mc, M_VLAN);
+ }
+
+ /* Now program new ones. */
+ TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+ if (ifma->ifma_addr->sa_family != AF_LINK)
+ continue;
+ mc = malloc(sizeof(struct vlan_mc_entry), M_VLAN, M_NOWAIT);
+ if (mc == NULL)
+ return (ENOMEM);
+ bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
+ (char *)&mc->mc_addr, ETHER_ADDR_LEN);
+ SLIST_INSERT_HEAD(&sc->vlan_mc_listhead, mc, mc_entries);
+ bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
+ LLADDR(&sdl), ETHER_ADDR_LEN);
+ error = if_addmulti(ifp_p, (struct sockaddr *)&sdl, &rifma);
+ if (error)
+ return (error);
+ }
+
+ CURVNET_RESTORE();
+ return (0);
+}
+
+/*
+ * A handler for parent interface link layer address changes.
+ * If the parent interface link layer address is changed we
+ * should also change it on all children vlans.
+ */
+static void
+vlan_iflladdr(void *arg __unused, struct ifnet *ifp)
+{
+ struct ifvlan *ifv;
+#ifndef VLAN_ARRAY
+ struct ifvlan *next;
+#endif
+ int i;
+
+ /*
+ * Check if it's a trunk interface first of all
+ * to avoid needless locking.
+ */
+ if (ifp->if_vlantrunk == NULL)
+ return;
+
+ VLAN_LOCK();
+ /*
+ * OK, it's a trunk. Loop over and change all vlan's lladdrs on it.
+ */
+#ifdef VLAN_ARRAY
+ for (i = 0; i < VLAN_ARRAY_SIZE; i++)
+ if ((ifv = ifp->if_vlantrunk->vlans[i])) {
+#else /* VLAN_ARRAY */
+ for (i = 0; i < (1 << ifp->if_vlantrunk->hwidth); i++)
+ LIST_FOREACH_SAFE(ifv, &ifp->if_vlantrunk->hash[i], ifv_list, next) {
+#endif /* VLAN_ARRAY */
+ VLAN_UNLOCK();
+ if_setlladdr(ifv->ifv_ifp, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+ VLAN_LOCK();
+ }
+ VLAN_UNLOCK();
+
+}
+
+/*
+ * A handler for network interface departure events.
+ * Track departure of trunks here so that we don't access invalid
+ * pointers or whatever if a trunk is ripped from under us, e.g.,
+ * by ejecting its hot-plug card. However, if an ifnet is simply
+ * being renamed, then there's no need to tear down the state.
+ */
+static void
+vlan_ifdetach(void *arg __unused, struct ifnet *ifp)
+{
+ struct ifvlan *ifv;
+ int i;
+
+ /*
+ * Check if it's a trunk interface first of all
+ * to avoid needless locking.
+ */
+ if (ifp->if_vlantrunk == NULL)
+ return;
+
+ /* If the ifnet is just being renamed, don't do anything. */
+ if (ifp->if_flags & IFF_RENAMING)
+ return;
+
+ VLAN_LOCK();
+ /*
+ * OK, it's a trunk. Loop over and detach all vlan's on it.
+ * Check trunk pointer after each vlan_unconfig() as it will
+ * free it and set to NULL after the last vlan was detached.
+ */
+#ifdef VLAN_ARRAY
+ for (i = 0; i < VLAN_ARRAY_SIZE; i++)
+ if ((ifv = ifp->if_vlantrunk->vlans[i])) {
+ vlan_unconfig_locked(ifv->ifv_ifp);
+ if (ifp->if_vlantrunk == NULL)
+ break;
+ }
+#else /* VLAN_ARRAY */
+restart:
+ for (i = 0; i < (1 << ifp->if_vlantrunk->hwidth); i++)
+ if ((ifv = LIST_FIRST(&ifp->if_vlantrunk->hash[i]))) {
+ vlan_unconfig_locked(ifv->ifv_ifp);
+ if (ifp->if_vlantrunk)
+ goto restart; /* trunk->hwidth can change */
+ else
+ break;
+ }
+#endif /* VLAN_ARRAY */
+ /* Trunk should have been destroyed in vlan_unconfig(). */
+ KASSERT(ifp->if_vlantrunk == NULL, ("%s: purge failed", __func__));
+ VLAN_UNLOCK();
+}
+
+/*
+ * VLAN support can be loaded as a module. The only place in the
+ * system that's intimately aware of this is ether_input. We hook
+ * into this code through vlan_input_p which is defined there and
+ * set here. Noone else in the system should be aware of this so
+ * we use an explicit reference here.
+ */
+extern void (*vlan_input_p)(struct ifnet *, struct mbuf *);
+
+/* For if_link_state_change() eyes only... */
+extern void (*vlan_link_state_p)(struct ifnet *, int);
+
+static int
+vlan_modevent(module_t mod, int type, void *data)
+{
+
+ switch (type) {
+ case MOD_LOAD:
+ ifdetach_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
+ vlan_ifdetach, NULL, EVENTHANDLER_PRI_ANY);
+ if (ifdetach_tag == NULL)
+ return (ENOMEM);
+ iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event,
+ vlan_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
+ if (iflladdr_tag == NULL)
+ return (ENOMEM);
+ VLAN_LOCK_INIT();
+ vlan_input_p = vlan_input;
+ vlan_link_state_p = vlan_link_state;
+ vlan_trunk_cap_p = vlan_trunk_capabilities;
+#ifndef VIMAGE
+ if_clone_attach(&vlan_cloner);
+#endif
+ if (bootverbose)
+ printf("vlan: initialized, using "
+#ifdef VLAN_ARRAY
+ "full-size arrays"
+#else
+ "hash tables with chaining"
+#endif
+
+ "\n");
+ break;
+ case MOD_UNLOAD:
+#ifndef VIMAGE
+ if_clone_detach(&vlan_cloner);
+#endif
+ EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_tag);
+ EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_tag);
+ vlan_input_p = NULL;
+ vlan_link_state_p = NULL;
+ vlan_trunk_cap_p = NULL;
+ VLAN_LOCK_DESTROY();
+ if (bootverbose)
+ printf("vlan: unloaded\n");
+ break;
+ default:
+ return (EOPNOTSUPP);
+ }
+ return (0);
+}
+
+static moduledata_t vlan_mod = {
+ "if_vlan",
+ vlan_modevent,
+ 0
+};
+
+DECLARE_MODULE(if_vlan, vlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(if_vlan, 3);
+
+#ifdef VIMAGE
+static void
+vnet_vlan_init(const void *unused __unused)
+{
+
+ V_vlan_cloner = vlan_cloner;
+ if_clone_attach(&V_vlan_cloner);
+}
+VNET_SYSINIT(vnet_vlan_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_vlan_init, NULL);
+
+static void
+vnet_vlan_uninit(const void *unused __unused)
+{
+
+ if_clone_detach(&V_vlan_cloner);
+}
+VNET_SYSUNINIT(vnet_vlan_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
+ vnet_vlan_uninit, NULL);
+#endif
+
+static struct ifnet *
+vlan_clone_match_ethertag(struct if_clone *ifc, const char *name, int *tag)
+{
+ const char *cp;
+ struct ifnet *ifp;
+ int t;
+
+ /* Check for <etherif>.<vlan> style interface names. */
+ IFNET_RLOCK_NOSLEEP();
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ if (ifp->if_type != IFT_ETHER)
+ continue;
+ if (strncmp(ifp->if_xname, name, strlen(ifp->if_xname)) != 0)
+ continue;
+ cp = name + strlen(ifp->if_xname);
+ if (*cp++ != '.')
+ continue;
+ if (*cp == '\0')
+ continue;
+ t = 0;
+ for(; *cp >= '0' && *cp <= '9'; cp++)
+ t = (t * 10) + (*cp - '0');
+ if (*cp != '\0')
+ continue;
+ if (tag != NULL)
+ *tag = t;
+ break;
+ }
+ IFNET_RUNLOCK_NOSLEEP();
+
+ return (ifp);
+}
+
+static int
+vlan_clone_match(struct if_clone *ifc, const char *name)
+{
+ const char *cp;
+
+ if (vlan_clone_match_ethertag(ifc, name, NULL) != NULL)
+ return (1);
+
+ if (strncmp(VLANNAME, name, strlen(VLANNAME)) != 0)
+ return (0);
+ for (cp = name + 4; *cp != '\0'; cp++) {
+ if (*cp < '0' || *cp > '9')
+ return (0);
+ }
+
+ return (1);
+}
+
+static int
+vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
+{
+ char *dp;
+ int wildcard;
+ int unit;
+ int error;
+ int tag;
+ int ethertag;
+ struct ifvlan *ifv;
+ struct ifnet *ifp;
+ struct ifnet *p;
+ struct vlanreq vlr;
+ static const u_char eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */
+
+ /*
+ * There are 3 (ugh) ways to specify the cloned device:
+ * o pass a parameter block with the clone request.
+ * o specify parameters in the text of the clone device name
+ * o specify no parameters and get an unattached device that
+ * must be configured separately.
+ * The first technique is preferred; the latter two are
+ * supported for backwards compatibilty.
+ */
+ if (params) {
+ error = copyin(params, &vlr, sizeof(vlr));
+ if (error)
+ return error;
+ p = ifunit(vlr.vlr_parent);
+ if (p == NULL)
+ return ENXIO;
+ /*
+ * Don't let the caller set up a VLAN tag with
+ * anything except VLID bits.
+ */
+ if (vlr.vlr_tag & ~EVL_VLID_MASK)
+ return (EINVAL);
+ error = ifc_name2unit(name, &unit);
+ if (error != 0)
+ return (error);
+
+ ethertag = 1;
+ tag = vlr.vlr_tag;
+ wildcard = (unit < 0);
+ } else if ((p = vlan_clone_match_ethertag(ifc, name, &tag)) != NULL) {
+ ethertag = 1;
+ unit = -1;
+ wildcard = 0;
+
+ /*
+ * Don't let the caller set up a VLAN tag with
+ * anything except VLID bits.
+ */
+ if (tag & ~EVL_VLID_MASK)
+ return (EINVAL);
+ } else {
+ ethertag = 0;
+
+ error = ifc_name2unit(name, &unit);
+ if (error != 0)
+ return (error);
+
+ wildcard = (unit < 0);
+ }
+
+ error = ifc_alloc_unit(ifc, &unit);
+ if (error != 0)
+ return (error);
+
+ /* In the wildcard case, we need to update the name. */
+ if (wildcard) {
+ for (dp = name; *dp != '\0'; dp++);
+ if (snprintf(dp, len - (dp-name), "%d", unit) >
+ len - (dp-name) - 1) {
+ panic("%s: interface name too long", __func__);
+ }
+ }
+
+ ifv = malloc(sizeof(struct ifvlan), M_VLAN, M_WAITOK | M_ZERO);
+ ifp = ifv->ifv_ifp = if_alloc(IFT_ETHER);
+ if (ifp == NULL) {
+ ifc_free_unit(ifc, unit);
+ free(ifv, M_VLAN);
+ return (ENOSPC);
+ }
+ SLIST_INIT(&ifv->vlan_mc_listhead);
+
+ ifp->if_softc = ifv;
+ /*
+ * Set the name manually rather than using if_initname because
+ * we don't conform to the default naming convention for interfaces.
+ */
+ strlcpy(ifp->if_xname, name, IFNAMSIZ);
+ ifp->if_dname = ifc->ifc_name;
+ ifp->if_dunit = unit;
+ /* NB: flags are not set here */
+ ifp->if_linkmib = &ifv->ifv_mib;
+ ifp->if_linkmiblen = sizeof(ifv->ifv_mib);
+ /* NB: mtu is not set here */
+
+ ifp->if_init = vlan_init;
+ ifp->if_start = vlan_start;
+ ifp->if_ioctl = vlan_ioctl;
+ ifp->if_snd.ifq_maxlen = ifqmaxlen;
+ ifp->if_flags = VLAN_IFFLAGS;
+ ether_ifattach(ifp, eaddr);
+ /* Now undo some of the damage... */
+ ifp->if_baudrate = 0;
+ ifp->if_type = IFT_L2VLAN;
+ ifp->if_hdrlen = ETHER_VLAN_ENCAP_LEN;
+
+ if (ethertag) {
+ error = vlan_config(ifv, p, tag);
+ if (error != 0) {
+ /*
+ * Since we've partialy failed, we need to back
+ * out all the way, otherwise userland could get
+ * confused. Thus, we destroy the interface.
+ */
+ ether_ifdetach(ifp);
+ vlan_unconfig(ifp);
+ if_free_type(ifp, IFT_ETHER);
+ ifc_free_unit(ifc, unit);
+ free(ifv, M_VLAN);
+
+ return (error);
+ }
+
+ /* Update flags on the parent, if necessary. */
+ vlan_setflags(ifp, 1);
+ }
+
+ return (0);
+}
+
+static int
+vlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
+{
+ struct ifvlan *ifv = ifp->if_softc;
+ int unit = ifp->if_dunit;
+
+ ether_ifdetach(ifp); /* first, remove it from system-wide lists */
+ vlan_unconfig(ifp); /* now it can be unconfigured and freed */
+ if_free_type(ifp, IFT_ETHER);
+ free(ifv, M_VLAN);
+ ifc_free_unit(ifc, unit);
+
+ return (0);
+}
+
+/*
+ * The ifp->if_init entry point for vlan(4) is a no-op.
+ */
+static void
+vlan_init(void *foo __unused)
+{
+}
+
+/*
+ * The if_start method for vlan(4) interface. It doesn't
+ * raises the IFF_DRV_OACTIVE flag, since it is called
+ * only from IFQ_HANDOFF() macro in ether_output_frame().
+ * If the interface queue is full, and vlan_start() is
+ * not called, the queue would never get emptied and
+ * interface would stall forever.
+ */
+static void
+vlan_start(struct ifnet *ifp)
+{
+ struct ifvlan *ifv;
+ struct ifnet *p;
+ struct mbuf *m;
+ int error;
+
+ ifv = ifp->if_softc;
+ p = PARENT(ifv);
+
+ for (;;) {
+ IF_DEQUEUE(&ifp->if_snd, m);
+ if (m == NULL)
+ break;
+ BPF_MTAP(ifp, m);
+
+ /*
+ * Do not run parent's if_start() if the parent is not up,
+ * or parent's driver will cause a system crash.
+ */
+ if (!UP_AND_RUNNING(p)) {
+ m_freem(m);
+ ifp->if_collisions++;
+ continue;
+ }
+
+ /*
+ * Pad the frame to the minimum size allowed if told to.
+ * This option is in accord with IEEE Std 802.1Q, 2003 Ed.,
+ * paragraph C.4.4.3.b. It can help to work around buggy
+ * bridges that violate paragraph C.4.4.3.a from the same
+ * document, i.e., fail to pad short frames after untagging.
+ * E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but
+ * untagging it will produce a 62-byte frame, which is a runt
+ * and requires padding. There are VLAN-enabled network
+ * devices that just discard such runts instead or mishandle
+ * them somehow.
+ */
+ if (soft_pad) {
+ static char pad[8]; /* just zeros */
+ int n;
+
+ for (n = ETHERMIN + ETHER_HDR_LEN - m->m_pkthdr.len;
+ n > 0; n -= sizeof(pad))
+ if (!m_append(m, min(n, sizeof(pad)), pad))
+ break;
+
+ if (n > 0) {
+ if_printf(ifp, "cannot pad short frame\n");
+ ifp->if_oerrors++;
+ m_freem(m);
+ continue;
+ }
+ }
+
+ /*
+ * If underlying interface can do VLAN tag insertion itself,
+ * just pass the packet along. However, we need some way to
+ * tell the interface where the packet came from so that it
+ * knows how to find the VLAN tag to use, so we attach a
+ * packet tag that holds it.
+ */
+ if (p->if_capenable & IFCAP_VLAN_HWTAGGING) {
+ m->m_pkthdr.ether_vtag = ifv->ifv_tag;
+ m->m_flags |= M_VLANTAG;
+ } else {
+ m = ether_vlanencap(m, ifv->ifv_tag);
+ if (m == NULL) {
+ if_printf(ifp,
+ "unable to prepend VLAN header\n");
+ ifp->if_oerrors++;
+ continue;
+ }
+ }
+
+ /*
+ * Send it, precisely as ether_output() would have.
+ * We are already running at splimp.
+ */
+ error = (p->if_transmit)(p, m);
+ if (!error)
+ ifp->if_opackets++;
+ else
+ ifp->if_oerrors++;
+ }
+}
+
+static void
+vlan_input(struct ifnet *ifp, struct mbuf *m)
+{
+ struct ifvlantrunk *trunk = ifp->if_vlantrunk;
+ struct ifvlan *ifv;
+ uint16_t tag;
+
+ KASSERT(trunk != NULL, ("%s: no trunk", __func__));
+
+ if (m->m_flags & M_VLANTAG) {
+ /*
+ * Packet is tagged, but m contains a normal
+ * Ethernet frame; the tag is stored out-of-band.
+ */
+ tag = EVL_VLANOFTAG(m->m_pkthdr.ether_vtag);
+ m->m_flags &= ~M_VLANTAG;
+ } else {
+ struct ether_vlan_header *evl;
+
+ /*
+ * Packet is tagged in-band as specified by 802.1q.
+ */
+ switch (ifp->if_type) {
+ case IFT_ETHER:
+ if (m->m_len < sizeof(*evl) &&
+ (m = m_pullup(m, sizeof(*evl))) == NULL) {
+ if_printf(ifp, "cannot pullup VLAN header\n");
+ return;
+ }
+ evl = mtod(m, struct ether_vlan_header *);
+ tag = EVL_VLANOFTAG(ntohs(evl->evl_tag));
+
+ /*
+ * Remove the 802.1q header by copying the Ethernet
+ * addresses over it and adjusting the beginning of
+ * the data in the mbuf. The encapsulated Ethernet
+ * type field is already in place.
+ */
+ bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
+ ETHER_HDR_LEN - ETHER_TYPE_LEN);
+ m_adj(m, ETHER_VLAN_ENCAP_LEN);
+ break;
+
+ default:
+#ifdef INVARIANTS
+ panic("%s: %s has unsupported if_type %u",
+ __func__, ifp->if_xname, ifp->if_type);
+#endif
+ m_freem(m);
+ ifp->if_noproto++;
+ return;
+ }
+ }
+
+ TRUNK_RLOCK(trunk);
+#ifdef VLAN_ARRAY
+ ifv = trunk->vlans[tag];
+#else
+ ifv = vlan_gethash(trunk, tag);
+#endif
+ if (ifv == NULL || !UP_AND_RUNNING(ifv->ifv_ifp)) {
+ TRUNK_RUNLOCK(trunk);
+ m_freem(m);
+ ifp->if_noproto++;
+ return;
+ }
+ TRUNK_RUNLOCK(trunk);
+
+ m->m_pkthdr.rcvif = ifv->ifv_ifp;
+ ifv->ifv_ifp->if_ipackets++;
+
+ /* Pass it back through the parent's input routine. */
+ (*ifp->if_input)(ifv->ifv_ifp, m);
+}
+
+static int
+vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag)
+{
+ struct ifvlantrunk *trunk;
+ struct ifnet *ifp;
+ int error = 0;
+
+ /* VID numbers 0x0 and 0xFFF are reserved */
+ if (tag == 0 || tag == 0xFFF)
+ return (EINVAL);
+ if (p->if_type != IFT_ETHER)
+ return (EPROTONOSUPPORT);
+ if ((p->if_flags & VLAN_IFFLAGS) != VLAN_IFFLAGS)
+ return (EPROTONOSUPPORT);
+ if (ifv->ifv_trunk)
+ return (EBUSY);
+
+ if (p->if_vlantrunk == NULL) {
+ trunk = malloc(sizeof(struct ifvlantrunk),
+ M_VLAN, M_WAITOK | M_ZERO);
+#ifndef VLAN_ARRAY
+ vlan_inithash(trunk);
+#endif
+ VLAN_LOCK();
+ if (p->if_vlantrunk != NULL) {
+ /* A race that that is very unlikely to be hit. */
+#ifndef VLAN_ARRAY
+ vlan_freehash(trunk);
+#endif
+ free(trunk, M_VLAN);
+ goto exists;
+ }
+ TRUNK_LOCK_INIT(trunk);
+ TRUNK_LOCK(trunk);
+ p->if_vlantrunk = trunk;
+ trunk->parent = p;
+ } else {
+ VLAN_LOCK();
+exists:
+ trunk = p->if_vlantrunk;
+ TRUNK_LOCK(trunk);
+ }
+
+ ifv->ifv_tag = tag; /* must set this before vlan_inshash() */
+#ifdef VLAN_ARRAY
+ if (trunk->vlans[tag] != NULL) {
+ error = EEXIST;
+ goto done;
+ }
+ trunk->vlans[tag] = ifv;
+ trunk->refcnt++;
+#else
+ error = vlan_inshash(trunk, ifv);
+ if (error)
+ goto done;
+#endif
+ ifv->ifv_proto = ETHERTYPE_VLAN;
+ ifv->ifv_encaplen = ETHER_VLAN_ENCAP_LEN;
+ ifv->ifv_mintu = ETHERMIN;
+ ifv->ifv_pflags = 0;
+
+ /*
+ * If the parent supports the VLAN_MTU capability,
+ * i.e. can Tx/Rx larger than ETHER_MAX_LEN frames,
+ * use it.
+ */
+ if (p->if_capenable & IFCAP_VLAN_MTU) {
+ /*
+ * No need to fudge the MTU since the parent can
+ * handle extended frames.
+ */
+ ifv->ifv_mtufudge = 0;
+ } else {
+ /*
+ * Fudge the MTU by the encapsulation size. This
+ * makes us incompatible with strictly compliant
+ * 802.1Q implementations, but allows us to use
+ * the feature with other NetBSD implementations,
+ * which might still be useful.
+ */
+ ifv->ifv_mtufudge = ifv->ifv_encaplen;
+ }
+
+ ifv->ifv_trunk = trunk;
+ ifp = ifv->ifv_ifp;
+ ifp->if_mtu = p->if_mtu - ifv->ifv_mtufudge;
+ ifp->if_baudrate = p->if_baudrate;
+ /*
+ * Copy only a selected subset of flags from the parent.
+ * Other flags are none of our business.
+ */
+#define VLAN_COPY_FLAGS (IFF_SIMPLEX)
+ ifp->if_flags &= ~VLAN_COPY_FLAGS;
+ ifp->if_flags |= p->if_flags & VLAN_COPY_FLAGS;
+#undef VLAN_COPY_FLAGS
+
+ ifp->if_link_state = p->if_link_state;
+
+ vlan_capabilities(ifv);
+
+ /*
+ * Set up our ``Ethernet address'' to reflect the underlying
+ * physical interface's.
+ */
+ bcopy(IF_LLADDR(p), IF_LLADDR(ifp), ETHER_ADDR_LEN);
+
+ /*
+ * Configure multicast addresses that may already be
+ * joined on the vlan device.
+ */
+ (void)vlan_setmulti(ifp); /* XXX: VLAN lock held */
+
+ /* We are ready for operation now. */
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+done:
+ TRUNK_UNLOCK(trunk);
+ if (error == 0)
+ EVENTHANDLER_INVOKE(vlan_config, p, ifv->ifv_tag);
+ VLAN_UNLOCK();
+
+ return (error);
+}
+
+static void
+vlan_unconfig(struct ifnet *ifp)
+{
+
+ VLAN_LOCK();
+ vlan_unconfig_locked(ifp);
+ VLAN_UNLOCK();
+}
+
+static void
+vlan_unconfig_locked(struct ifnet *ifp)
+{
+ struct ifvlantrunk *trunk;
+ struct vlan_mc_entry *mc;
+ struct ifvlan *ifv;
+ struct ifnet *parent;
+
+ VLAN_LOCK_ASSERT();
+
+ ifv = ifp->if_softc;
+ trunk = ifv->ifv_trunk;
+ parent = NULL;
+
+ if (trunk != NULL) {
+ struct sockaddr_dl sdl;
+
+ TRUNK_LOCK(trunk);
+ parent = trunk->parent;
+
+ /*
+ * Since the interface is being unconfigured, we need to
+ * empty the list of multicast groups that we may have joined
+ * while we were alive from the parent's list.
+ */
+ bzero((char *)&sdl, sizeof(sdl));
+ sdl.sdl_len = sizeof(sdl);
+ sdl.sdl_family = AF_LINK;
+ sdl.sdl_index = parent->if_index;
+ sdl.sdl_type = IFT_ETHER;
+ sdl.sdl_alen = ETHER_ADDR_LEN;
+
+ while ((mc = SLIST_FIRST(&ifv->vlan_mc_listhead)) != NULL) {
+ bcopy((char *)&mc->mc_addr, LLADDR(&sdl),
+ ETHER_ADDR_LEN);
+
+ /*
+ * This may fail if the parent interface is
+ * being detached. Regardless, we should do a
+ * best effort to free this interface as much
+ * as possible as all callers expect vlan
+ * destruction to succeed.
+ */
+ (void)if_delmulti(parent, (struct sockaddr *)&sdl);
+ SLIST_REMOVE_HEAD(&ifv->vlan_mc_listhead, mc_entries);
+ free(mc, M_VLAN);
+ }
+
+ vlan_setflags(ifp, 0); /* clear special flags on parent */
+#ifdef VLAN_ARRAY
+ trunk->vlans[ifv->ifv_tag] = NULL;
+ trunk->refcnt--;
+#else
+ vlan_remhash(trunk, ifv);
+#endif
+ ifv->ifv_trunk = NULL;
+
+ /*
+ * Check if we were the last.
+ */
+ if (trunk->refcnt == 0) {
+ trunk->parent->if_vlantrunk = NULL;
+ /*
+ * XXXGL: If some ithread has already entered
+ * vlan_input() and is now blocked on the trunk
+ * lock, then it should preempt us right after
+ * unlock and finish its work. Then we will acquire
+ * lock again in trunk_destroy().
+ */
+ TRUNK_UNLOCK(trunk);
+ trunk_destroy(trunk);
+ } else
+ TRUNK_UNLOCK(trunk);
+ }
+
+ /* Disconnect from parent. */
+ if (ifv->ifv_pflags)
+ if_printf(ifp, "%s: ifv_pflags unclean\n", __func__);
+ ifp->if_mtu = ETHERMTU;
+ ifp->if_link_state = LINK_STATE_UNKNOWN;
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+
+ /*
+ * Only dispatch an event if vlan was
+ * attached, otherwise there is nothing
+ * to cleanup anyway.
+ */
+ if (parent != NULL)
+ EVENTHANDLER_INVOKE(vlan_unconfig, parent, ifv->ifv_tag);
+}
+
+/* Handle a reference counted flag that should be set on the parent as well */
+static int
+vlan_setflag(struct ifnet *ifp, int flag, int status,
+ int (*func)(struct ifnet *, int))
+{
+ struct ifvlan *ifv;
+ int error;
+
+ /* XXX VLAN_LOCK_ASSERT(); */
+
+ ifv = ifp->if_softc;
+ status = status ? (ifp->if_flags & flag) : 0;
+ /* Now "status" contains the flag value or 0 */
+
+ /*
+ * See if recorded parent's status is different from what
+ * we want it to be. If it is, flip it. We record parent's
+ * status in ifv_pflags so that we won't clear parent's flag
+ * we haven't set. In fact, we don't clear or set parent's
+ * flags directly, but get or release references to them.
+ * That's why we can be sure that recorded flags still are
+ * in accord with actual parent's flags.
+ */
+ if (status != (ifv->ifv_pflags & flag)) {
+ error = (*func)(PARENT(ifv), status);
+ if (error)
+ return (error);
+ ifv->ifv_pflags &= ~flag;
+ ifv->ifv_pflags |= status;
+ }
+ return (0);
+}
+
+/*
+ * Handle IFF_* flags that require certain changes on the parent:
+ * if "status" is true, update parent's flags respective to our if_flags;
+ * if "status" is false, forcedly clear the flags set on parent.
+ */
+static int
+vlan_setflags(struct ifnet *ifp, int status)
+{
+ int error, i;
+
+ for (i = 0; vlan_pflags[i].flag; i++) {
+ error = vlan_setflag(ifp, vlan_pflags[i].flag,
+ status, vlan_pflags[i].func);
+ if (error)
+ return (error);
+ }
+ return (0);
+}
+
+/* Inform all vlans that their parent has changed link state */
+static void
+vlan_link_state(struct ifnet *ifp, int link)
+{
+ struct ifvlantrunk *trunk = ifp->if_vlantrunk;
+ struct ifvlan *ifv;
+ int i;
+
+ TRUNK_LOCK(trunk);
+#ifdef VLAN_ARRAY
+ for (i = 0; i < VLAN_ARRAY_SIZE; i++)
+ if (trunk->vlans[i] != NULL) {
+ ifv = trunk->vlans[i];
+#else
+ for (i = 0; i < (1 << trunk->hwidth); i++)
+ LIST_FOREACH(ifv, &trunk->hash[i], ifv_list) {
+#endif
+ ifv->ifv_ifp->if_baudrate = trunk->parent->if_baudrate;
+ if_link_state_change(ifv->ifv_ifp,
+ trunk->parent->if_link_state);
+ }
+ TRUNK_UNLOCK(trunk);
+}
+
+static void
+vlan_capabilities(struct ifvlan *ifv)
+{
+ struct ifnet *p = PARENT(ifv);
+ struct ifnet *ifp = ifv->ifv_ifp;
+
+ TRUNK_LOCK_ASSERT(TRUNK(ifv));
+
+ /*
+ * If the parent interface can do checksum offloading
+ * on VLANs, then propagate its hardware-assisted
+ * checksumming flags. Also assert that checksum
+ * offloading requires hardware VLAN tagging.
+ */
+ if (p->if_capabilities & IFCAP_VLAN_HWCSUM)
+ ifp->if_capabilities = p->if_capabilities & IFCAP_HWCSUM;
+
+ if (p->if_capenable & IFCAP_VLAN_HWCSUM &&
+ p->if_capenable & IFCAP_VLAN_HWTAGGING) {
+ ifp->if_capenable = p->if_capenable & IFCAP_HWCSUM;
+ ifp->if_hwassist = p->if_hwassist & (CSUM_IP | CSUM_TCP |
+ CSUM_UDP | CSUM_SCTP | CSUM_IP_FRAGS | CSUM_FRAGMENT);
+ } else {
+ ifp->if_capenable = 0;
+ ifp->if_hwassist = 0;
+ }
+ /*
+ * If the parent interface can do TSO on VLANs then
+ * propagate the hardware-assisted flag. TSO on VLANs
+ * does not necessarily require hardware VLAN tagging.
+ */
+ if (p->if_capabilities & IFCAP_VLAN_HWTSO)
+ ifp->if_capabilities |= p->if_capabilities & IFCAP_TSO;
+ if (p->if_capenable & IFCAP_VLAN_HWTSO) {
+ ifp->if_capenable |= p->if_capenable & IFCAP_TSO;
+ ifp->if_hwassist |= p->if_hwassist & CSUM_TSO;
+ } else {
+ ifp->if_capenable &= ~(p->if_capenable & IFCAP_TSO);
+ ifp->if_hwassist &= ~(p->if_hwassist & CSUM_TSO);
+ }
+}
+
+static void
+vlan_trunk_capabilities(struct ifnet *ifp)
+{
+ struct ifvlantrunk *trunk = ifp->if_vlantrunk;
+ struct ifvlan *ifv;
+ int i;
+
+ TRUNK_LOCK(trunk);
+#ifdef VLAN_ARRAY
+ for (i = 0; i < VLAN_ARRAY_SIZE; i++)
+ if (trunk->vlans[i] != NULL) {
+ ifv = trunk->vlans[i];
+#else
+ for (i = 0; i < (1 << trunk->hwidth); i++) {
+ LIST_FOREACH(ifv, &trunk->hash[i], ifv_list)
+#endif
+ vlan_capabilities(ifv);
+ }
+ TRUNK_UNLOCK(trunk);
+}
+
+static int
+vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ struct ifnet *p;
+ struct ifreq *ifr;
+ struct ifvlan *ifv;
+ struct vlanreq vlr;
+ int error = 0;
+
+ ifr = (struct ifreq *)data;
+ ifv = ifp->if_softc;
+
+ switch (cmd) {
+ case SIOCGIFMEDIA:
+ VLAN_LOCK();
+ if (TRUNK(ifv) != NULL) {
+ p = PARENT(ifv);
+ VLAN_UNLOCK();
+ error = (*p->if_ioctl)(p, SIOCGIFMEDIA, data);
+ /* Limit the result to the parent's current config. */
+ if (error == 0) {
+ struct ifmediareq *ifmr;
+
+ ifmr = (struct ifmediareq *)data;
+ if (ifmr->ifm_count >= 1 && ifmr->ifm_ulist) {
+ ifmr->ifm_count = 1;
+ error = copyout(&ifmr->ifm_current,
+ ifmr->ifm_ulist,
+ sizeof(int));
+ }
+ }
+ } else {
+ VLAN_UNLOCK();
+ error = EINVAL;
+ }
+ break;
+
+ case SIOCSIFMEDIA:
+ error = EINVAL;
+ break;
+
+ case SIOCSIFMTU:
+ /*
+ * Set the interface MTU.
+ */
+ VLAN_LOCK();
+ if (TRUNK(ifv) != NULL) {
+ if (ifr->ifr_mtu >
+ (PARENT(ifv)->if_mtu - ifv->ifv_mtufudge) ||
+ ifr->ifr_mtu <
+ (ifv->ifv_mintu - ifv->ifv_mtufudge))
+ error = EINVAL;
+ else
+ ifp->if_mtu = ifr->ifr_mtu;
+ } else
+ error = EINVAL;
+ VLAN_UNLOCK();
+ break;
+
+ case SIOCSETVLAN:
+#ifdef VIMAGE
+ if (ifp->if_vnet != ifp->if_home_vnet) {
+ error = EPERM;
+ break;
+ }
+#endif
+ error = copyin(ifr->ifr_data, &vlr, sizeof(vlr));
+ if (error)
+ break;
+ if (vlr.vlr_parent[0] == '\0') {
+ vlan_unconfig(ifp);
+ break;
+ }
+ p = ifunit(vlr.vlr_parent);
+ if (p == 0) {
+ error = ENOENT;
+ break;
+ }
+ /*
+ * Don't let the caller set up a VLAN tag with
+ * anything except VLID bits.
+ */
+ if (vlr.vlr_tag & ~EVL_VLID_MASK) {
+ error = EINVAL;
+ break;
+ }
+ error = vlan_config(ifv, p, vlr.vlr_tag);
+ if (error)
+ break;
+
+ /* Update flags on the parent, if necessary. */
+ vlan_setflags(ifp, 1);
+ break;
+
+ case SIOCGETVLAN:
+#ifdef VIMAGE
+ if (ifp->if_vnet != ifp->if_home_vnet) {
+ error = EPERM;
+ break;
+ }
+#endif
+ bzero(&vlr, sizeof(vlr));
+ VLAN_LOCK();
+ if (TRUNK(ifv) != NULL) {
+ strlcpy(vlr.vlr_parent, PARENT(ifv)->if_xname,
+ sizeof(vlr.vlr_parent));
+ vlr.vlr_tag = ifv->ifv_tag;
+ }
+ VLAN_UNLOCK();
+ error = copyout(&vlr, ifr->ifr_data, sizeof(vlr));
+ break;
+
+ case SIOCSIFFLAGS:
+ /*
+ * We should propagate selected flags to the parent,
+ * e.g., promiscuous mode.
+ */
+ if (TRUNK(ifv) != NULL)
+ error = vlan_setflags(ifp, 1);
+ break;
+
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ /*
+ * If we don't have a parent, just remember the membership for
+ * when we do.
+ */
+ if (TRUNK(ifv) != NULL)
+ error = vlan_setmulti(ifp);
+ break;
+
+ default:
+ error = ether_ioctl(ifp, cmd, data);
+ }
+
+ return (error);
+}
diff --git a/freebsd/sys/net/if_vlan_var.h b/freebsd/sys/net/if_vlan_var.h
new file mode 100644
index 00000000..045e2fa1
--- /dev/null
+++ b/freebsd/sys/net/if_vlan_var.h
@@ -0,0 +1,137 @@
+/*-
+ * Copyright 1998 Massachusetts Institute of Technology
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that both the above copyright notice and this
+ * permission notice appear in all copies, that both the above
+ * copyright notice and this permission notice appear in all
+ * supporting documentation, and that the name of M.I.T. not be used
+ * in advertising or publicity pertaining to distribution of the
+ * software without specific, written prior permission. M.I.T. makes
+ * no representations about the suitability of this software for any
+ * purpose. It is provided "as is" without express or implied
+ * warranty.
+ *
+ * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
+ * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
+ * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NET_IF_VLAN_VAR_HH_
+#define _NET_IF_VLAN_VAR_HH_ 1
+
+struct ether_vlan_header {
+ u_char evl_dhost[ETHER_ADDR_LEN];
+ u_char evl_shost[ETHER_ADDR_LEN];
+ u_int16_t evl_encap_proto;
+ u_int16_t evl_tag;
+ u_int16_t evl_proto;
+};
+
+#define EVL_VLID_MASK 0x0FFF
+#define EVL_PRI_MASK 0xE000
+#define EVL_VLANOFTAG(tag) ((tag) & EVL_VLID_MASK)
+#define EVL_PRIOFTAG(tag) (((tag) >> 13) & 7)
+#define EVL_CFIOFTAG(tag) (((tag) >> 12) & 1)
+#define EVL_MAKETAG(vlid, pri, cfi) \
+ ((((((pri) & 7) << 1) | ((cfi) & 1)) << 12) | ((vlid) & EVL_VLID_MASK))
+
+/* Set the VLAN ID in an mbuf packet header non-destructively. */
+#define EVL_APPLY_VLID(m, vlid) \
+ do { \
+ if ((m)->m_flags & M_VLANTAG) { \
+ (m)->m_pkthdr.ether_vtag &= EVL_VLID_MASK; \
+ (m)->m_pkthdr.ether_vtag |= (vlid); \
+ } else { \
+ (m)->m_pkthdr.ether_vtag = (vlid); \
+ (m)->m_flags |= M_VLANTAG; \
+ } \
+ } while (0)
+
+/* Set the priority ID in an mbuf packet header non-destructively. */
+#define EVL_APPLY_PRI(m, pri) \
+ do { \
+ if ((m)->m_flags & M_VLANTAG) { \
+ uint16_t __vlantag = (m)->m_pkthdr.ether_vtag; \
+ (m)->m_pkthdr.ether_vtag |= EVL_MAKETAG( \
+ EVL_VLANOFTAG(__vlantag), (pri), \
+ EVL_CFIOFTAG(__vlantag)); \
+ } else { \
+ (m)->m_pkthdr.ether_vtag = \
+ EVL_MAKETAG(0, (pri), 0); \
+ (m)->m_flags |= M_VLANTAG; \
+ } \
+ } while (0)
+
+/* sysctl(3) tags, for compatibility purposes */
+#define VLANCTL_PROTO 1
+#define VLANCTL_MAX 2
+
+/*
+ * Configuration structure for SIOCSETVLAN and SIOCGETVLAN ioctls.
+ */
+struct vlanreq {
+ char vlr_parent[IFNAMSIZ];
+ u_short vlr_tag;
+};
+#define SIOCSETVLAN SIOCSIFGENERIC
+#define SIOCGETVLAN SIOCGIFGENERIC
+
+#ifdef _KERNEL
+/*
+ * Drivers that are capable of adding and removing the VLAN header
+ * in hardware indicate they support this by marking IFCAP_VLAN_HWTAGGING
+ * in if_capabilities. Drivers for hardware that is capable
+ * of handling larger MTU's that may include a software-appended
+ * VLAN header w/o lowering the normal MTU should mark IFCAP_VLAN_MTU
+ * in if_capabilities; this notifies the VLAN code it can leave the
+ * MTU on the vlan interface at the normal setting.
+ */
+
+/*
+ * VLAN tags are stored in host byte order. Byte swapping may be
+ * necessary.
+ *
+ * Drivers that support hardware VLAN tag stripping fill in the
+ * received VLAN tag (containing both vlan and priority information)
+ * into the ether_vtag mbuf packet header field:
+ *
+ * m->m_pkthdr.ether_vtag = vlan_id; // ntohs()?
+ * m->m_flags |= M_VLANTAG;
+ *
+ * to mark the packet m with the specified VLAN tag.
+ *
+ * On output the driver should check the mbuf for the M_VLANTAG
+ * flag to see if a VLAN tag is present and valid:
+ *
+ * if (m->m_flags & M_VLANTAG) {
+ * ... = m->m_pkthdr.ether_vtag; // htons()?
+ * ... pass tag to hardware ...
+ * }
+ *
+ * Note that a driver must indicate it supports hardware VLAN
+ * stripping/insertion by marking IFCAP_VLAN_HWTAGGING in
+ * if_capabilities.
+ */
+
+#define VLAN_CAPABILITIES(_ifp) do { \
+ if ((_ifp)->if_vlantrunk != NULL) \
+ (*vlan_trunk_cap_p)(_ifp); \
+} while (0)
+
+extern void (*vlan_trunk_cap_p)(struct ifnet *);
+#endif /* _KERNEL */
+
+#endif /* _NET_IF_VLAN_VAR_HH_ */
diff --git a/freebsd/sys/net/iso88025.h b/freebsd/sys/net/iso88025.h
new file mode 100644
index 00000000..26e3ada6
--- /dev/null
+++ b/freebsd/sys/net/iso88025.h
@@ -0,0 +1,172 @@
+/*-
+ * Copyright (c) 1998, Larry Lile
+ * All rights reserved.
+ *
+ * For latest sources and information on this driver, please
+ * go to http://anarchy.stdio.com.
+ *
+ * Questions, comments or suggestions should be directed to
+ * Larry Lile <lile@stdio.com>.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ * Information gathered from tokenring@freebsd, /sys/net/ethernet.h and
+ * the Mach token ring driver.
+ */
+
+/*
+ * Fundamental constants relating to iso 802.5
+ */
+
+#ifndef _NET_ISO88025_HH_
+#define _NET_ISO88025_HH_
+
+/*
+ * General ISO 802.5 definitions
+ */
+#define ISO88025_ADDR_LEN 6
+#define ISO88025_CF_LEN 2
+#define ISO88025_HDR_LEN (ISO88025_CF_LEN + (ISO88025_ADDR_LEN * 2))
+#define RCF_LEN 2
+#define RIF_MAX_RD 14
+#define RIF_MAX_LEN 16
+
+#define TR_AC 0x10
+#define TR_LLC_FRAME 0x40
+
+#define TR_4MBPS 4000000
+#define TR_16MBPS 16000000
+#define TR_100MBPS 100000000
+
+/*
+ * Source routing
+ */
+#define TR_RII 0x80
+#define TR_RCF_BCST_MASK 0xe000
+#define TR_RCF_LEN_MASK 0x1f00
+#define TR_RCF_DIR 0x0080
+#define TR_RCF_LF_MASK 0x0070
+
+#define TR_RCF_RIFLEN(x) ((ntohs(x) & TR_RCF_LEN_MASK) >> 8)
+
+/*
+ * Minimum and maximum packet payload lengths.
+ */
+#define ISO88025_MIN_LEN 0
+#define ISO88025_MAX_LEN_4 4464
+#define ISO88025_MAX_LEN_16 17960
+#define ISO88025_MAX_LEN ISO88025_MAX_LEN_16
+
+/*
+ * A macro to validate a length with
+ */
+#define ISO88025_IS_VALID_LEN(foo) \
+ ((foo) >= ISO88025_MIN_LEN && (foo) <= ISO88025_MAX_LEN)
+
+/* Access Control field */
+#define AC_PRI_MASK 0xe0 /* Priority bits */
+#define AC_TOKEN 0x10 /* Token bit: 0=Token, 1=Frame */
+#define AC_MONITOR 0x08 /* Monitor */
+#define AC_RESV_MASK 0x07 /* Reservation bits */
+
+/* Frame Control field */
+#define FC_FT_MASK 0xc0 /* Frame Type */
+#define FC_FT_MAC 0x00 /* MAC frame */
+#define FC_FT_LLC 0x40 /* LLC frame */
+#define FC_ATTN_MASK 0x0f /* Attention bits */
+#define FC_ATTN_EB 0x01 /* Express buffer */
+#define FC_ATTN_BE 0x02 /* Beacon */
+#define FC_ATTN_CT 0x03 /* Claim token */
+#define FC_ATTN_RP 0x04 /* Ring purge */
+#define FC_ATTN_AMP 0x05 /* Active monitor present */
+#define FC_ATTN_SMP 0x06 /* Standby monitor present */
+
+/* Token Ring destination address */
+#define DA_IG 0x80 /* Individual/group address. */
+ /* 0=Individual, 1=Group */
+#define DA_UL 0x40 /* Universal/local address. */
+ /* 0=Universal, 1=Local */
+/* Token Ring source address */
+#define SA_RII 0x80 /* Routing information indicator */
+#define SA_IG 0x40 /* Individual/group address */
+ /* 0=Group, 1=Individual */
+
+/*
+ * ISO 802.5 physical header
+ */
+struct iso88025_header {
+ u_int8_t ac; /* access control field */
+ u_int8_t fc; /* frame control field */
+ u_int8_t iso88025_dhost[ISO88025_ADDR_LEN]; /* destination address */
+ u_int8_t iso88025_shost[ISO88025_ADDR_LEN]; /* source address */
+ u_int16_t rcf; /* route control field */
+ u_int16_t rd[RIF_MAX_RD]; /* routing designators */
+} __packed;
+
+struct iso88025_rif {
+ u_int16_t rcf; /* route control field */
+ u_int16_t rd[RIF_MAX_RD]; /* routing designators */
+} __packed;
+
+struct iso88025_sockaddr_data {
+ u_char ether_dhost[ISO88025_ADDR_LEN];
+ u_char ether_shost[ISO88025_ADDR_LEN];
+ u_char ac;
+ u_char fc;
+};
+
+struct iso88025_sockaddr_dl_data {
+ u_short trld_rcf;
+ u_short *trld_route[RIF_MAX_LEN];
+};
+
+#define ISO88025_MAX(a, b) (((a)>(b))?(a):(b))
+#define SDL_ISO88025(s) ((struct iso88025_sockaddr_dl_data *) \
+ ((s)->sdl_data + \
+ ISO88025_MAX((s)->sdl_nlen + (s)->sdl_alen + \
+ (s)->sdl_slen, 12)))
+
+/*
+ * Structure of a 48-bit iso 802.5 address.
+ * ( We could also add the 16 bit addresses as a union)
+ */
+struct iso88025_addr {
+ u_char octet[ISO88025_ADDR_LEN];
+};
+
+#define ISO88025_MAX_MTU 18000
+#define ISO88025_DEFAULT_MTU 1500
+
+#define ISO88025_BPF_UNSUPPORTED 0
+#define ISO88025_BPF_SUPPORTED 1
+
+void iso88025_ifattach (struct ifnet *, const u_int8_t *, int);
+void iso88025_ifdetach (struct ifnet *, int);
+int iso88025_ioctl (struct ifnet *, u_long, caddr_t );
+int iso88025_output (struct ifnet *, struct mbuf *, struct sockaddr *,
+ struct route *);
+void iso88025_input (struct ifnet *, struct mbuf *);
+
+#endif
diff --git a/freebsd/sys/net/netisr.c b/freebsd/sys/net/netisr.c
new file mode 100644
index 00000000..4d34953a
--- /dev/null
+++ b/freebsd/sys/net/netisr.c
@@ -0,0 +1,1172 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2007-2009 Robert N. M. Watson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * netisr is a packet dispatch service, allowing synchronous (directly
+ * dispatched) and asynchronous (deferred dispatch) processing of packets by
+ * registered protocol handlers. Callers pass a protocol identifier and
+ * packet to netisr, along with a direct dispatch hint, and work will either
+ * be immediately processed with the registered handler, or passed to a
+ * kernel software interrupt (SWI) thread for deferred dispatch. Callers
+ * will generally select one or the other based on:
+ *
+ * - Might directly dispatching a netisr handler lead to code reentrance or
+ * lock recursion, such as entering the socket code from the socket code.
+ * - Might directly dispatching a netisr handler lead to recursive
+ * processing, such as when decapsulating several wrapped layers of tunnel
+ * information (IPSEC within IPSEC within ...).
+ *
+ * Maintaining ordering for protocol streams is a critical design concern.
+ * Enforcing ordering limits the opportunity for concurrency, but maintains
+ * the strong ordering requirements found in some protocols, such as TCP. Of
+ * related concern is CPU affinity--it is desirable to process all data
+ * associated with a particular stream on the same CPU over time in order to
+ * avoid acquiring locks associated with the connection on different CPUs,
+ * keep connection data in one cache, and to generally encourage associated
+ * user threads to live on the same CPU as the stream. It's also desirable
+ * to avoid lock migration and contention where locks are associated with
+ * more than one flow.
+ *
+ * netisr supports several policy variations, represented by the
+ * NETISR_POLICY_* constants, allowing protocols to play a varying role in
+ * identifying flows, assigning work to CPUs, etc. These are described in
+ * detail in netisr.h.
+ */
+
+#include <freebsd/local/opt_ddb.h>
+#include <freebsd/local/opt_device_polling.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/bus.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/kthread.h>
+#include <freebsd/sys/interrupt.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/mutex.h>
+#include <freebsd/sys/pcpu.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/rmlock.h>
+#include <freebsd/sys/sched.h>
+#include <freebsd/sys/smp.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/systm.h>
+
+#ifdef DDB
+#include <freebsd/ddb/ddb.h>
+#endif
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_var.h>
+#include <freebsd/net/netisr.h>
+#include <freebsd/net/vnet.h>
+
+/*-
+ * Synchronize use and modification of the registered netisr data structures;
+ * acquire a read lock while modifying the set of registered protocols to
+ * prevent partially registered or unregistered protocols from being run.
+ *
+ * The following data structures and fields are protected by this lock:
+ *
+ * - The np array, including all fields of struct netisr_proto.
+ * - The nws array, including all fields of struct netisr_worker.
+ * - The nws_array array.
+ *
+ * Note: the NETISR_LOCKING define controls whether read locks are acquired
+ * in packet processing paths requiring netisr registration stability. This
+ * is disabled by default as it can lead to a measurable performance
+ * degradation even with rmlocks (3%-6% for loopback ping-pong traffic), and
+ * because netisr registration and unregistration is extremely rare at
+ * runtime. If it becomes more common, this decision should be revisited.
+ *
+ * XXXRW: rmlocks don't support assertions.
+ */
+static struct rmlock netisr_rmlock;
+#define NETISR_LOCK_INIT() rm_init_flags(&netisr_rmlock, "netisr", \
+ RM_NOWITNESS)
+#define NETISR_LOCK_ASSERT()
+#define NETISR_RLOCK(tracker) rm_rlock(&netisr_rmlock, (tracker))
+#define NETISR_RUNLOCK(tracker) rm_runlock(&netisr_rmlock, (tracker))
+#define NETISR_WLOCK() rm_wlock(&netisr_rmlock)
+#define NETISR_WUNLOCK() rm_wunlock(&netisr_rmlock)
+/* #define NETISR_LOCKING */
+
+SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr");
+
+/*-
+ * Three direct dispatch policies are supported:
+ *
+ * - Always defer: all work is scheduled for a netisr, regardless of context.
+ * (!direct)
+ *
+ * - Hybrid: if the executing context allows direct dispatch, and we're
+ * running on the CPU the work would be done on, then direct dispatch if it
+ * wouldn't violate ordering constraints on the workstream.
+ * (direct && !direct_force)
+ *
+ * - Always direct: if the executing context allows direct dispatch, always
+ * direct dispatch. (direct && direct_force)
+ *
+ * Notice that changing the global policy could lead to short periods of
+ * misordered processing, but this is considered acceptable as compared to
+ * the complexity of enforcing ordering during policy changes.
+ */
+static int netisr_direct_force = 1; /* Always direct dispatch. */
+TUNABLE_INT("net.isr.direct_force", &netisr_direct_force);
+SYSCTL_INT(_net_isr, OID_AUTO, direct_force, CTLFLAG_RW,
+ &netisr_direct_force, 0, "Force direct dispatch");
+
+static int netisr_direct = 1; /* Enable direct dispatch. */
+TUNABLE_INT("net.isr.direct", &netisr_direct);
+SYSCTL_INT(_net_isr, OID_AUTO, direct, CTLFLAG_RW,
+ &netisr_direct, 0, "Enable direct dispatch");
+
+/*
+ * Allow the administrator to limit the number of threads (CPUs) to use for
+ * netisr. We don't check netisr_maxthreads before creating the thread for
+ * CPU 0, so in practice we ignore values <= 1. This must be set at boot.
+ * We will create at most one thread per CPU.
+ */
+static int netisr_maxthreads = -1; /* Max number of threads. */
+TUNABLE_INT("net.isr.maxthreads", &netisr_maxthreads);
+SYSCTL_INT(_net_isr, OID_AUTO, maxthreads, CTLFLAG_RD,
+ &netisr_maxthreads, 0,
+ "Use at most this many CPUs for netisr processing");
+
+static int netisr_bindthreads = 0; /* Bind threads to CPUs. */
+TUNABLE_INT("net.isr.bindthreads", &netisr_bindthreads);
+SYSCTL_INT(_net_isr, OID_AUTO, bindthreads, CTLFLAG_RD,
+ &netisr_bindthreads, 0, "Bind netisr threads to CPUs.");
+
+/*
+ * Limit per-workstream queues to at most net.isr.maxqlimit, both for initial
+ * configuration and later modification using netisr_setqlimit().
+ */
+#define NETISR_DEFAULT_MAXQLIMIT 10240
+static u_int netisr_maxqlimit = NETISR_DEFAULT_MAXQLIMIT;
+TUNABLE_INT("net.isr.maxqlimit", &netisr_maxqlimit);
+SYSCTL_INT(_net_isr, OID_AUTO, maxqlimit, CTLFLAG_RD,
+ &netisr_maxqlimit, 0,
+ "Maximum netisr per-protocol, per-CPU queue depth.");
+
+/*
+ * The default per-workstream queue limit for protocols that don't initialize
+ * the nh_qlimit field of their struct netisr_handler. If this is set above
+ * netisr_maxqlimit, we truncate it to the maximum during boot.
+ */
+#define NETISR_DEFAULT_DEFAULTQLIMIT 256
+static u_int netisr_defaultqlimit = NETISR_DEFAULT_DEFAULTQLIMIT;
+TUNABLE_INT("net.isr.defaultqlimit", &netisr_defaultqlimit);
+SYSCTL_INT(_net_isr, OID_AUTO, defaultqlimit, CTLFLAG_RD,
+ &netisr_defaultqlimit, 0,
+ "Default netisr per-protocol, per-CPU queue limit if not set by protocol");
+
+/*
+ * Each protocol is described by a struct netisr_proto, which holds all
+ * global per-protocol information. This data structure is set up by
+ * netisr_register(), and derived from the public struct netisr_handler.
+ */
+struct netisr_proto {
+ const char *np_name; /* Character string protocol name. */
+ netisr_handler_t *np_handler; /* Protocol handler. */
+ netisr_m2flow_t *np_m2flow; /* Query flow for untagged packet. */
+ netisr_m2cpuid_t *np_m2cpuid; /* Query CPU to process packet on. */
+ netisr_drainedcpu_t *np_drainedcpu; /* Callback when drained a queue. */
+ u_int np_qlimit; /* Maximum per-CPU queue depth. */
+ u_int np_policy; /* Work placement policy. */
+};
+
+#define NETISR_MAXPROT 16 /* Compile-time limit. */
+
+/*
+ * The np array describes all registered protocols, indexed by protocol
+ * number.
+ */
+static struct netisr_proto np[NETISR_MAXPROT];
+
+/*
+ * Protocol-specific work for each workstream is described by struct
+ * netisr_work. Each work descriptor consists of an mbuf queue and
+ * statistics.
+ */
+struct netisr_work {
+ /*
+ * Packet queue, linked by m_nextpkt.
+ */
+ struct mbuf *nw_head;
+ struct mbuf *nw_tail;
+ u_int nw_len;
+ u_int nw_qlimit;
+ u_int nw_watermark;
+
+ /*
+ * Statistics -- written unlocked, but mostly from curcpu.
+ */
+ u_int64_t nw_dispatched; /* Number of direct dispatches. */
+ u_int64_t nw_hybrid_dispatched; /* "" hybrid dispatches. */
+ u_int64_t nw_qdrops; /* "" drops. */
+ u_int64_t nw_queued; /* "" enqueues. */
+ u_int64_t nw_handled; /* "" handled in worker. */
+};
+
+/*
+ * Workstreams hold a set of ordered work across each protocol, and are
+ * described by netisr_workstream. Each workstream is associated with a
+ * worker thread, which in turn is pinned to a CPU. Work associated with a
+ * workstream can be processd in other threads during direct dispatch;
+ * concurrent processing is prevented by the NWS_RUNNING flag, which
+ * indicates that a thread is already processing the work queue.
+ */
+struct netisr_workstream {
+ struct intr_event *nws_intr_event; /* Handler for stream. */
+ void *nws_swi_cookie; /* swi(9) cookie for stream. */
+ struct mtx nws_mtx; /* Synchronize work. */
+ u_int nws_cpu; /* CPU pinning. */
+ u_int nws_flags; /* Wakeup flags. */
+ u_int nws_pendingbits; /* Scheduled protocols. */
+
+ /*
+ * Each protocol has per-workstream data.
+ */
+ struct netisr_work nws_work[NETISR_MAXPROT];
+} __aligned(CACHE_LINE_SIZE);
+
+/*
+ * Per-CPU workstream data.
+ */
+DPCPU_DEFINE(struct netisr_workstream, nws);
+
+/*
+ * Map contiguous values between 0 and nws_count into CPU IDs appropriate for
+ * accessing workstreams. This allows constructions of the form
+ * DPCPU_ID_GET(nws_array[arbitraryvalue % nws_count], nws).
+ */
+static u_int nws_array[MAXCPU];
+
+/*
+ * Number of registered workstreams. Will be at most the number of running
+ * CPUs once fully started.
+ */
+static u_int nws_count;
+SYSCTL_INT(_net_isr, OID_AUTO, numthreads, CTLFLAG_RD,
+ &nws_count, 0, "Number of extant netisr threads.");
+
+/*
+ * Per-workstream flags.
+ */
+#define NWS_RUNNING 0x00000001 /* Currently running in a thread. */
+#define NWS_DISPATCHING 0x00000002 /* Currently being direct-dispatched. */
+#define NWS_SCHEDULED 0x00000004 /* Signal issued. */
+
+/*
+ * Synchronization for each workstream: a mutex protects all mutable fields
+ * in each stream, including per-protocol state (mbuf queues). The SWI is
+ * woken up if asynchronous dispatch is required.
+ */
+#define NWS_LOCK(s) mtx_lock(&(s)->nws_mtx)
+#define NWS_LOCK_ASSERT(s) mtx_assert(&(s)->nws_mtx, MA_OWNED)
+#define NWS_UNLOCK(s) mtx_unlock(&(s)->nws_mtx)
+#define NWS_SIGNAL(s) swi_sched((s)->nws_swi_cookie, 0)
+
+#ifndef __rtems__
+/*
+ * Utility routines for protocols that implement their own mapping of flows
+ * to CPUs.
+ */
+u_int
+netisr_get_cpucount(void)
+{
+
+ return (nws_count);
+}
+
+u_int
+netisr_get_cpuid(u_int cpunumber)
+{
+
+ KASSERT(cpunumber < nws_count, ("%s: %u > %u", __func__, cpunumber,
+ nws_count));
+
+ return (nws_array[cpunumber]);
+}
+#endif /* __rtems__ */
+
+/*
+ * The default implementation of -> CPU ID mapping.
+ *
+ * Non-static so that protocols can use it to map their own work to specific
+ * CPUs in a manner consistent to netisr for affinity purposes.
+ */
+u_int
+netisr_default_flow2cpu(u_int flowid)
+{
+
+ return (nws_array[flowid % nws_count]);
+}
+
+/*
+ * Register a new netisr handler, which requires initializing per-protocol
+ * fields for each workstream. All netisr work is briefly suspended while
+ * the protocol is installed.
+ */
+void
+netisr_register(const struct netisr_handler *nhp)
+{
+ struct netisr_work *npwp;
+ const char *name;
+ u_int i, proto;
+
+ proto = nhp->nh_proto;
+ name = nhp->nh_name;
+
+ /*
+ * Test that the requested registration is valid.
+ */
+ KASSERT(nhp->nh_name != NULL,
+ ("%s: nh_name NULL for %u", __func__, proto));
+ KASSERT(nhp->nh_handler != NULL,
+ ("%s: nh_handler NULL for %s", __func__, name));
+ KASSERT(nhp->nh_policy == NETISR_POLICY_SOURCE ||
+ nhp->nh_policy == NETISR_POLICY_FLOW ||
+ nhp->nh_policy == NETISR_POLICY_CPU,
+ ("%s: unsupported nh_policy %u for %s", __func__,
+ nhp->nh_policy, name));
+ KASSERT(nhp->nh_policy == NETISR_POLICY_FLOW ||
+ nhp->nh_m2flow == NULL,
+ ("%s: nh_policy != FLOW but m2flow defined for %s", __func__,
+ name));
+ KASSERT(nhp->nh_policy == NETISR_POLICY_CPU || nhp->nh_m2cpuid == NULL,
+ ("%s: nh_policy != CPU but m2cpuid defined for %s", __func__,
+ name));
+ KASSERT(nhp->nh_policy != NETISR_POLICY_CPU || nhp->nh_m2cpuid != NULL,
+ ("%s: nh_policy == CPU but m2cpuid not defined for %s", __func__,
+ name));
+ KASSERT(proto < NETISR_MAXPROT,
+ ("%s(%u, %s): protocol too big", __func__, proto, name));
+
+ /*
+ * Test that no existing registration exists for this protocol.
+ */
+ NETISR_WLOCK();
+ KASSERT(np[proto].np_name == NULL,
+ ("%s(%u, %s): name present", __func__, proto, name));
+ KASSERT(np[proto].np_handler == NULL,
+ ("%s(%u, %s): handler present", __func__, proto, name));
+
+ np[proto].np_name = name;
+ np[proto].np_handler = nhp->nh_handler;
+ np[proto].np_m2flow = nhp->nh_m2flow;
+ np[proto].np_m2cpuid = nhp->nh_m2cpuid;
+ np[proto].np_drainedcpu = nhp->nh_drainedcpu;
+ if (nhp->nh_qlimit == 0)
+ np[proto].np_qlimit = netisr_defaultqlimit;
+ else if (nhp->nh_qlimit > netisr_maxqlimit) {
+ printf("%s: %s requested queue limit %u capped to "
+ "net.isr.maxqlimit %u\n", __func__, name, nhp->nh_qlimit,
+ netisr_maxqlimit);
+ np[proto].np_qlimit = netisr_maxqlimit;
+ } else
+ np[proto].np_qlimit = nhp->nh_qlimit;
+ np[proto].np_policy = nhp->nh_policy;
+ for (i = 0; i <= mp_maxid; i++) {
+ if (CPU_ABSENT(i))
+ continue;
+ npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto];
+ bzero(npwp, sizeof(*npwp));
+ npwp->nw_qlimit = np[proto].np_qlimit;
+ }
+ NETISR_WUNLOCK();
+}
+
+/*
+ * Clear drop counters across all workstreams for a protocol.
+ */
+void
+netisr_clearqdrops(const struct netisr_handler *nhp)
+{
+ struct netisr_work *npwp;
+#ifdef INVARIANTS
+ const char *name;
+#endif
+ u_int i, proto;
+
+ proto = nhp->nh_proto;
+#ifdef INVARIANTS
+ name = nhp->nh_name;
+#endif
+ KASSERT(proto < NETISR_MAXPROT,
+ ("%s(%u): protocol too big for %s", __func__, proto, name));
+
+ NETISR_WLOCK();
+ KASSERT(np[proto].np_handler != NULL,
+ ("%s(%u): protocol not registered for %s", __func__, proto,
+ name));
+
+ for (i = 0; i <= mp_maxid; i++) {
+ if (CPU_ABSENT(i))
+ continue;
+ npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto];
+ npwp->nw_qdrops = 0;
+ }
+ NETISR_WUNLOCK();
+}
+
+/*
+ * Query the current drop counters across all workstreams for a protocol.
+ */
+void
+netisr_getqdrops(const struct netisr_handler *nhp, u_int64_t *qdropp)
+{
+ struct netisr_work *npwp;
+ struct rm_priotracker tracker;
+#ifdef INVARIANTS
+ const char *name;
+#endif
+ u_int i, proto;
+
+ *qdropp = 0;
+ proto = nhp->nh_proto;
+#ifdef INVARIANTS
+ name = nhp->nh_name;
+#endif
+ KASSERT(proto < NETISR_MAXPROT,
+ ("%s(%u): protocol too big for %s", __func__, proto, name));
+
+ NETISR_RLOCK(&tracker);
+ KASSERT(np[proto].np_handler != NULL,
+ ("%s(%u): protocol not registered for %s", __func__, proto,
+ name));
+
+ for (i = 0; i <= mp_maxid; i++) {
+ if (CPU_ABSENT(i))
+ continue;
+ npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto];
+ *qdropp += npwp->nw_qdrops;
+ }
+ NETISR_RUNLOCK(&tracker);
+}
+
+/*
+ * Query the current queue limit for per-workstream queues for a protocol.
+ */
+void
+netisr_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp)
+{
+ struct rm_priotracker tracker;
+#ifdef INVARIANTS
+ const char *name;
+#endif
+ u_int proto;
+
+ proto = nhp->nh_proto;
+#ifdef INVARIANTS
+ name = nhp->nh_name;
+#endif
+ KASSERT(proto < NETISR_MAXPROT,
+ ("%s(%u): protocol too big for %s", __func__, proto, name));
+
+ NETISR_RLOCK(&tracker);
+ KASSERT(np[proto].np_handler != NULL,
+ ("%s(%u): protocol not registered for %s", __func__, proto,
+ name));
+ *qlimitp = np[proto].np_qlimit;
+ NETISR_RUNLOCK(&tracker);
+}
+
+/*
+ * Update the queue limit across per-workstream queues for a protocol. We
+ * simply change the limits, and don't drain overflowed packets as they will
+ * (hopefully) take care of themselves shortly.
+ */
+int
+netisr_setqlimit(const struct netisr_handler *nhp, u_int qlimit)
+{
+ struct netisr_work *npwp;
+#ifdef INVARIANTS
+ const char *name;
+#endif
+ u_int i, proto;
+
+ if (qlimit > netisr_maxqlimit)
+ return (EINVAL);
+
+ proto = nhp->nh_proto;
+#ifdef INVARIANTS
+ name = nhp->nh_name;
+#endif
+ KASSERT(proto < NETISR_MAXPROT,
+ ("%s(%u): protocol too big for %s", __func__, proto, name));
+
+ NETISR_WLOCK();
+ KASSERT(np[proto].np_handler != NULL,
+ ("%s(%u): protocol not registered for %s", __func__, proto,
+ name));
+
+ np[proto].np_qlimit = qlimit;
+ for (i = 0; i <= mp_maxid; i++) {
+ if (CPU_ABSENT(i))
+ continue;
+ npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto];
+ npwp->nw_qlimit = qlimit;
+ }
+ NETISR_WUNLOCK();
+ return (0);
+}
+
+/*
+ * Drain all packets currently held in a particular protocol work queue.
+ */
+static void
+netisr_drain_proto(struct netisr_work *npwp)
+{
+ struct mbuf *m;
+
+ /*
+ * We would assert the lock on the workstream but it's not passed in.
+ */
+ while ((m = npwp->nw_head) != NULL) {
+ npwp->nw_head = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+ if (npwp->nw_head == NULL)
+ npwp->nw_tail = NULL;
+ npwp->nw_len--;
+ m_freem(m);
+ }
+ KASSERT(npwp->nw_tail == NULL, ("%s: tail", __func__));
+ KASSERT(npwp->nw_len == 0, ("%s: len", __func__));
+}
+
+/*
+ * Remove the registration of a network protocol, which requires clearing
+ * per-protocol fields across all workstreams, including freeing all mbufs in
+ * the queues at time of unregister. All work in netisr is briefly suspended
+ * while this takes place.
+ */
+void
+netisr_unregister(const struct netisr_handler *nhp)
+{
+ struct netisr_work *npwp;
+#ifdef INVARIANTS
+ const char *name;
+#endif
+ u_int i, proto;
+
+ proto = nhp->nh_proto;
+#ifdef INVARIANTS
+ name = nhp->nh_name;
+#endif
+ KASSERT(proto < NETISR_MAXPROT,
+ ("%s(%u): protocol too big for %s", __func__, proto, name));
+
+ NETISR_WLOCK();
+ KASSERT(np[proto].np_handler != NULL,
+ ("%s(%u): protocol not registered for %s", __func__, proto,
+ name));
+
+ np[proto].np_name = NULL;
+ np[proto].np_handler = NULL;
+ np[proto].np_m2flow = NULL;
+ np[proto].np_m2cpuid = NULL;
+ np[proto].np_qlimit = 0;
+ np[proto].np_policy = 0;
+ for (i = 0; i <= mp_maxid; i++) {
+ if (CPU_ABSENT(i))
+ continue;
+ npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto];
+ netisr_drain_proto(npwp);
+ bzero(npwp, sizeof(*npwp));
+ }
+ NETISR_WUNLOCK();
+}
+
+/*
+ * Look up the workstream given a packet and source identifier. Do this by
+ * checking the protocol's policy, and optionally call out to the protocol
+ * for assistance if required.
+ */
+static struct mbuf *
+netisr_select_cpuid(struct netisr_proto *npp, uintptr_t source,
+ struct mbuf *m, u_int *cpuidp)
+{
+ struct ifnet *ifp;
+
+ NETISR_LOCK_ASSERT();
+
+ /*
+ * In the event we have only one worker, shortcut and deliver to it
+ * without further ado.
+ */
+ if (nws_count == 1) {
+ *cpuidp = nws_array[0];
+ return (m);
+ }
+
+ /*
+ * What happens next depends on the policy selected by the protocol.
+ * If we want to support per-interface policies, we should do that
+ * here first.
+ */
+ switch (npp->np_policy) {
+ case NETISR_POLICY_CPU:
+ return (npp->np_m2cpuid(m, source, cpuidp));
+
+ case NETISR_POLICY_FLOW:
+ if (!(m->m_flags & M_FLOWID) && npp->np_m2flow != NULL) {
+ m = npp->np_m2flow(m, source);
+ if (m == NULL)
+ return (NULL);
+ }
+ if (m->m_flags & M_FLOWID) {
+ *cpuidp =
+ netisr_default_flow2cpu(m->m_pkthdr.flowid);
+ return (m);
+ }
+ /* FALLTHROUGH */
+
+ case NETISR_POLICY_SOURCE:
+ ifp = m->m_pkthdr.rcvif;
+ if (ifp != NULL)
+ *cpuidp = nws_array[(ifp->if_index + source) %
+ nws_count];
+ else
+ *cpuidp = nws_array[source % nws_count];
+ return (m);
+
+ default:
+ panic("%s: invalid policy %u for %s", __func__,
+ npp->np_policy, npp->np_name);
+ }
+}
+
+/*
+ * Process packets associated with a workstream and protocol. For reasons of
+ * fairness, we process up to one complete netisr queue at a time, moving the
+ * queue to a stack-local queue for processing, but do not loop refreshing
+ * from the global queue. The caller is responsible for deciding whether to
+ * loop, and for setting the NWS_RUNNING flag. The passed workstream will be
+ * locked on entry and relocked before return, but will be released while
+ * processing. The number of packets processed is returned.
+ */
+static u_int
+netisr_process_workstream_proto(struct netisr_workstream *nwsp, u_int proto)
+{
+ struct netisr_work local_npw, *npwp;
+ u_int handled;
+ struct mbuf *m;
+
+ NETISR_LOCK_ASSERT();
+ NWS_LOCK_ASSERT(nwsp);
+
+ KASSERT(nwsp->nws_flags & NWS_RUNNING,
+ ("%s(%u): not running", __func__, proto));
+ KASSERT(proto >= 0 && proto < NETISR_MAXPROT,
+ ("%s(%u): invalid proto\n", __func__, proto));
+
+ npwp = &nwsp->nws_work[proto];
+ if (npwp->nw_len == 0)
+ return (0);
+
+ /*
+ * Move the global work queue to a thread-local work queue.
+ *
+ * Notice that this means the effective maximum length of the queue
+ * is actually twice that of the maximum queue length specified in
+ * the protocol registration call.
+ */
+ handled = npwp->nw_len;
+ local_npw = *npwp;
+ npwp->nw_head = NULL;
+ npwp->nw_tail = NULL;
+ npwp->nw_len = 0;
+ nwsp->nws_pendingbits &= ~(1 << proto);
+ NWS_UNLOCK(nwsp);
+ while ((m = local_npw.nw_head) != NULL) {
+ local_npw.nw_head = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+ if (local_npw.nw_head == NULL)
+ local_npw.nw_tail = NULL;
+ local_npw.nw_len--;
+ VNET_ASSERT(m->m_pkthdr.rcvif != NULL);
+ CURVNET_SET(m->m_pkthdr.rcvif->if_vnet);
+ np[proto].np_handler(m);
+ CURVNET_RESTORE();
+ }
+ KASSERT(local_npw.nw_len == 0,
+ ("%s(%u): len %u", __func__, proto, local_npw.nw_len));
+ if (np[proto].np_drainedcpu)
+ np[proto].np_drainedcpu(nwsp->nws_cpu);
+ NWS_LOCK(nwsp);
+ npwp->nw_handled += handled;
+ return (handled);
+}
+
+/*
+ * SWI handler for netisr -- processes prackets in a set of workstreams that
+ * it owns, woken up by calls to NWS_SIGNAL(). If this workstream is already
+ * being direct dispatched, go back to sleep and wait for the dispatching
+ * thread to wake us up again.
+ */
+static void
+swi_net(void *arg)
+{
+#ifdef NETISR_LOCKING
+ struct rm_priotracker tracker;
+#endif
+ struct netisr_workstream *nwsp;
+ u_int bits, prot;
+
+ nwsp = arg;
+
+#ifdef DEVICE_POLLING
+ KASSERT(nws_count == 1,
+ ("%s: device_polling but nws_count != 1", __func__));
+ netisr_poll();
+#endif
+#ifdef NETISR_LOCKING
+ NETISR_RLOCK(&tracker);
+#endif
+ NWS_LOCK(nwsp);
+ KASSERT(!(nwsp->nws_flags & NWS_RUNNING), ("swi_net: running"));
+ if (nwsp->nws_flags & NWS_DISPATCHING)
+ goto out;
+ nwsp->nws_flags |= NWS_RUNNING;
+ nwsp->nws_flags &= ~NWS_SCHEDULED;
+ while ((bits = nwsp->nws_pendingbits) != 0) {
+ while ((prot = ffs(bits)) != 0) {
+ prot--;
+ bits &= ~(1 << prot);
+ (void)netisr_process_workstream_proto(nwsp, prot);
+ }
+ }
+ nwsp->nws_flags &= ~NWS_RUNNING;
+out:
+ NWS_UNLOCK(nwsp);
+#ifdef NETISR_LOCKING
+ NETISR_RUNLOCK(&tracker);
+#endif
+#ifdef DEVICE_POLLING
+ netisr_pollmore();
+#endif
+}
+
+static int
+netisr_queue_workstream(struct netisr_workstream *nwsp, u_int proto,
+ struct netisr_work *npwp, struct mbuf *m, int *dosignalp)
+{
+
+ NWS_LOCK_ASSERT(nwsp);
+
+ *dosignalp = 0;
+ if (npwp->nw_len < npwp->nw_qlimit) {
+ m->m_nextpkt = NULL;
+ if (npwp->nw_head == NULL) {
+ npwp->nw_head = m;
+ npwp->nw_tail = m;
+ } else {
+ npwp->nw_tail->m_nextpkt = m;
+ npwp->nw_tail = m;
+ }
+ npwp->nw_len++;
+ if (npwp->nw_len > npwp->nw_watermark)
+ npwp->nw_watermark = npwp->nw_len;
+ nwsp->nws_pendingbits |= (1 << proto);
+ if (!(nwsp->nws_flags &
+ (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED))) {
+ nwsp->nws_flags |= NWS_SCHEDULED;
+ *dosignalp = 1; /* Defer until unlocked. */
+ }
+ npwp->nw_queued++;
+ return (0);
+ } else {
+ m_freem(m);
+ npwp->nw_qdrops++;
+ return (ENOBUFS);
+ }
+}
+
+static int
+netisr_queue_internal(u_int proto, struct mbuf *m, u_int cpuid)
+{
+ struct netisr_workstream *nwsp;
+ struct netisr_work *npwp;
+ int dosignal, error;
+
+#ifdef NETISR_LOCKING
+ NETISR_LOCK_ASSERT();
+#endif
+ KASSERT(cpuid <= mp_maxid, ("%s: cpuid too big (%u, %u)", __func__,
+ cpuid, mp_maxid));
+ KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid));
+
+ dosignal = 0;
+ error = 0;
+ nwsp = DPCPU_ID_PTR(cpuid, nws);
+ npwp = &nwsp->nws_work[proto];
+ NWS_LOCK(nwsp);
+ error = netisr_queue_workstream(nwsp, proto, npwp, m, &dosignal);
+ NWS_UNLOCK(nwsp);
+ if (dosignal)
+ NWS_SIGNAL(nwsp);
+ return (error);
+}
+
+int
+netisr_queue_src(u_int proto, uintptr_t source, struct mbuf *m)
+{
+#ifdef NETISR_LOCKING
+ struct rm_priotracker tracker;
+#endif
+ u_int cpuid;
+ int error;
+
+ KASSERT(proto < NETISR_MAXPROT,
+ ("%s: invalid proto %u", __func__, proto));
+
+#ifdef NETISR_LOCKING
+ NETISR_RLOCK(&tracker);
+#endif
+ KASSERT(np[proto].np_handler != NULL,
+ ("%s: invalid proto %u", __func__, proto));
+
+ m = netisr_select_cpuid(&np[proto], source, m, &cpuid);
+ if (m != NULL) {
+ KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__,
+ cpuid));
+ error = netisr_queue_internal(proto, m, cpuid);
+ } else
+ error = ENOBUFS;
+#ifdef NETISR_LOCKING
+ NETISR_RUNLOCK(&tracker);
+#endif
+ return (error);
+}
+
+int
+netisr_queue(u_int proto, struct mbuf *m)
+{
+
+ return (netisr_queue_src(proto, 0, m));
+}
+
+/*
+ * Dispatch a packet for netisr processing, direct dispatch permitted by
+ * calling context.
+ */
+int
+netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m)
+{
+#ifdef NETISR_LOCKING
+ struct rm_priotracker tracker;
+#endif
+ struct netisr_workstream *nwsp;
+ struct netisr_work *npwp;
+ int dosignal, error;
+ u_int cpuid;
+
+ /*
+ * If direct dispatch is entirely disabled, fall back on queueing.
+ */
+ if (!netisr_direct)
+ return (netisr_queue_src(proto, source, m));
+
+ KASSERT(proto < NETISR_MAXPROT,
+ ("%s: invalid proto %u", __func__, proto));
+#ifdef NETISR_LOCKING
+ NETISR_RLOCK(&tracker);
+#endif
+ KASSERT(np[proto].np_handler != NULL,
+ ("%s: invalid proto %u", __func__, proto));
+
+ /*
+ * If direct dispatch is forced, then unconditionally dispatch
+ * without a formal CPU selection. Borrow the current CPU's stats,
+ * even if there's no worker on it. In this case we don't update
+ * nws_flags because all netisr processing will be source ordered due
+ * to always being forced to directly dispatch.
+ */
+ if (netisr_direct_force) {
+ nwsp = DPCPU_PTR(nws);
+ npwp = &nwsp->nws_work[proto];
+ npwp->nw_dispatched++;
+ npwp->nw_handled++;
+ np[proto].np_handler(m);
+ error = 0;
+ goto out_unlock;
+ }
+
+ /*
+ * Otherwise, we execute in a hybrid mode where we will try to direct
+ * dispatch if we're on the right CPU and the netisr worker isn't
+ * already running.
+ */
+ m = netisr_select_cpuid(&np[proto], source, m, &cpuid);
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto out_unlock;
+ }
+ KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid));
+#ifndef __rtems__
+ sched_pin();
+#endif /* __rtems__ */
+ if (cpuid != curcpu)
+ goto queue_fallback;
+ nwsp = DPCPU_PTR(nws);
+ npwp = &nwsp->nws_work[proto];
+
+ /*-
+ * We are willing to direct dispatch only if three conditions hold:
+ *
+ * (1) The netisr worker isn't already running,
+ * (2) Another thread isn't already directly dispatching, and
+ * (3) The netisr hasn't already been woken up.
+ */
+ NWS_LOCK(nwsp);
+ if (nwsp->nws_flags & (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED)) {
+ error = netisr_queue_workstream(nwsp, proto, npwp, m,
+ &dosignal);
+ NWS_UNLOCK(nwsp);
+ if (dosignal)
+ NWS_SIGNAL(nwsp);
+ goto out_unpin;
+ }
+
+ /*
+ * The current thread is now effectively the netisr worker, so set
+ * the dispatching flag to prevent concurrent processing of the
+ * stream from another thread (even the netisr worker), which could
+ * otherwise lead to effective misordering of the stream.
+ */
+ nwsp->nws_flags |= NWS_DISPATCHING;
+ NWS_UNLOCK(nwsp);
+ np[proto].np_handler(m);
+ NWS_LOCK(nwsp);
+ nwsp->nws_flags &= ~NWS_DISPATCHING;
+ npwp->nw_handled++;
+ npwp->nw_hybrid_dispatched++;
+
+ /*
+ * If other work was enqueued by another thread while we were direct
+ * dispatching, we need to signal the netisr worker to do that work.
+ * In the future, we might want to do some of that work in the
+ * current thread, rather than trigger further context switches. If
+ * so, we'll want to establish a reasonable bound on the work done in
+ * the "borrowed" context.
+ */
+ if (nwsp->nws_pendingbits != 0) {
+ nwsp->nws_flags |= NWS_SCHEDULED;
+ dosignal = 1;
+ } else
+ dosignal = 0;
+ NWS_UNLOCK(nwsp);
+ if (dosignal)
+ NWS_SIGNAL(nwsp);
+ error = 0;
+ goto out_unpin;
+
+queue_fallback:
+ error = netisr_queue_internal(proto, m, cpuid);
+out_unpin:
+#ifndef __rtems__
+ sched_unpin();
+#endif /* __rtems__ */
+out_unlock:
+#ifdef NETISR_LOCKING
+ NETISR_RUNLOCK(&tracker);
+#endif
+ return (error);
+}
+
+int
+netisr_dispatch(u_int proto, struct mbuf *m)
+{
+
+ return (netisr_dispatch_src(proto, 0, m));
+}
+
+#ifdef DEVICE_POLLING
+/*
+ * Kernel polling borrows a netisr thread to run interface polling in; this
+ * function allows kernel polling to request that the netisr thread be
+ * scheduled even if no packets are pending for protocols.
+ */
+void
+netisr_sched_poll(void)
+{
+ struct netisr_workstream *nwsp;
+
+ nwsp = DPCPU_ID_PTR(nws_array[0], nws);
+ NWS_SIGNAL(nwsp);
+}
+#endif
+
+static void
+netisr_start_swi(u_int cpuid, struct pcpu *pc)
+{
+ char swiname[12];
+ struct netisr_workstream *nwsp;
+ int error;
+
+ KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid));
+
+ nwsp = DPCPU_ID_PTR(cpuid, nws);
+ mtx_init(&nwsp->nws_mtx, "netisr_mtx", NULL, MTX_DEF);
+ nwsp->nws_cpu = cpuid;
+ snprintf(swiname, sizeof(swiname), "netisr %u", cpuid);
+ error = swi_add(&nwsp->nws_intr_event, swiname, swi_net, nwsp,
+ SWI_NET, INTR_MPSAFE, &nwsp->nws_swi_cookie);
+ if (error)
+ panic("%s: swi_add %d", __func__, error);
+#ifndef __rtems__
+ pc->pc_netisr = nwsp->nws_intr_event;
+ if (netisr_bindthreads) {
+ error = intr_event_bind(nwsp->nws_intr_event, cpuid);
+ if (error != 0)
+ printf("%s: cpu %u: intr_event_bind: %d", __func__,
+ cpuid, error);
+ }
+#endif
+ NETISR_WLOCK();
+ nws_array[nws_count] = nwsp->nws_cpu;
+ nws_count++;
+ NETISR_WUNLOCK();
+}
+
+/*
+ * Initialize the netisr subsystem. We rely on BSS and static initialization
+ * of most fields in global data structures.
+ *
+ * Start a worker thread for the boot CPU so that we can support network
+ * traffic immediately in case the network stack is used before additional
+ * CPUs are started (for example, diskless boot).
+ */
+static void
+netisr_init(void *arg)
+{
+
+ KASSERT(curcpu == 0, ("%s: not on CPU 0", __func__));
+
+ NETISR_LOCK_INIT();
+ if (netisr_maxthreads < 1)
+ netisr_maxthreads = 1;
+ if (netisr_maxthreads > mp_ncpus) {
+ printf("netisr_init: forcing maxthreads from %d to %d\n",
+ netisr_maxthreads, mp_ncpus);
+ netisr_maxthreads = mp_ncpus;
+ }
+ if (netisr_defaultqlimit > netisr_maxqlimit) {
+ printf("netisr_init: forcing defaultqlimit from %d to %d\n",
+ netisr_defaultqlimit, netisr_maxqlimit);
+ netisr_defaultqlimit = netisr_maxqlimit;
+ }
+#ifdef DEVICE_POLLING
+ /*
+ * The device polling code is not yet aware of how to deal with
+ * multiple netisr threads, so for the time being compiling in device
+ * polling disables parallel netisr workers.
+ */
+ if (netisr_maxthreads != 1 || netisr_bindthreads != 0) {
+ printf("netisr_init: forcing maxthreads to 1 and "
+ "bindthreads to 0 for device polling\n");
+ netisr_maxthreads = 1;
+ netisr_bindthreads = 0;
+ }
+#endif
+
+#ifndef __rtems__
+ netisr_start_swi(curcpu, pcpu_find(curcpu));
+#else /* __rtems__ */
+ netisr_start_swi(0, NULL);
+#endif /* __rtems__ */
+}
+SYSINIT(netisr_init, SI_SUB_SOFTINTR, SI_ORDER_FIRST, netisr_init, NULL);
+
+#ifndef __rtems__
+/*
+ * Start worker threads for additional CPUs. No attempt to gracefully handle
+ * work reassignment, we don't yet support dynamic reconfiguration.
+ */
+static void
+netisr_start(void *arg)
+{
+ struct pcpu *pc;
+
+ SLIST_FOREACH(pc, &cpuhead, pc_allcpu) {
+ if (nws_count >= netisr_maxthreads)
+ break;
+ /* XXXRW: Is skipping absent CPUs still required here? */
+ if (CPU_ABSENT(pc->pc_cpuid))
+ continue;
+ /* Worker will already be present for boot CPU. */
+ if (pc->pc_netisr != NULL)
+ continue;
+ netisr_start_swi(pc->pc_cpuid, pc);
+ }
+}
+SYSINIT(netisr_start, SI_SUB_SMP, SI_ORDER_MIDDLE, netisr_start, NULL);
+#endif /* __rtems__ */
+
+#ifdef DDB
+DB_SHOW_COMMAND(netisr, db_show_netisr)
+{
+ struct netisr_workstream *nwsp;
+ struct netisr_work *nwp;
+ int first, proto;
+ u_int cpuid;
+
+ db_printf("%3s %6s %5s %5s %5s %8s %8s %8s %8s\n", "CPU", "Proto",
+ "Len", "WMark", "Max", "Disp", "HDisp", "Drop", "Queue");
+ for (cpuid = 0; cpuid <= mp_maxid; cpuid++) {
+ if (CPU_ABSENT(cpuid))
+ continue;
+ nwsp = DPCPU_ID_PTR(cpuid, nws);
+ if (nwsp->nws_intr_event == NULL)
+ continue;
+ first = 1;
+ for (proto = 0; proto < NETISR_MAXPROT; proto++) {
+ if (np[proto].np_handler == NULL)
+ continue;
+ nwp = &nwsp->nws_work[proto];
+ if (first) {
+ db_printf("%3d ", cpuid);
+ first = 0;
+ } else
+ db_printf("%3s ", "");
+ db_printf(
+ "%6s %5d %5d %5d %8ju %8ju %8ju %8ju\n",
+ np[proto].np_name, nwp->nw_len,
+ nwp->nw_watermark, nwp->nw_qlimit,
+ nwp->nw_dispatched, nwp->nw_hybrid_dispatched,
+ nwp->nw_qdrops, nwp->nw_queued);
+ }
+ }
+}
+#endif
diff --git a/freebsd/sys/net/netisr.h b/freebsd/sys/net/netisr.h
new file mode 100644
index 00000000..b755332a
--- /dev/null
+++ b/freebsd/sys/net/netisr.h
@@ -0,0 +1,156 @@
+/*-
+ * Copyright (c) 2007-2009 Robert N. M. Watson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NET_NETISR_HH_
+#define _NET_NETISR_HH_
+#ifdef _KERNEL
+
+/*
+ * The netisr (network interrupt service routine) provides a deferred
+ * execution evironment in which (generally inbound) network processing can
+ * take place. Protocols register handlers which will be executed directly,
+ * or via deferred dispatch, depending on the circumstances.
+ *
+ * Historically, this was implemented by the BSD software ISR facility; it is
+ * now implemented via a software ithread (SWI).
+ */
+#define NETISR_IP 1
+#define NETISR_IGMP 2 /* IGMPv3 output queue */
+#define NETISR_ROUTE 3 /* routing socket */
+#define NETISR_AARP 4 /* Appletalk ARP */
+#define NETISR_ATALK2 5 /* Appletalk phase 2 */
+#define NETISR_ATALK1 6 /* Appletalk phase 1 */
+#define NETISR_ARP 7 /* same as AF_LINK */
+#define NETISR_IPX 8 /* same as AF_IPX */
+#define NETISR_ETHER 9 /* ethernet input */
+#define NETISR_IPV6 10
+#define NETISR_NATM 11
+#define NETISR_EPAIR 12 /* if_epair(4) */
+
+/*-
+ * Protocols express ordering constraints and affinity preferences by
+ * implementing one or neither of nh_m2flow and nh_m2cpuid, which are used by
+ * netisr to determine which per-CPU workstream to assign mbufs to.
+ *
+ * The following policies may be used by protocols:
+ *
+ * NETISR_POLICY_SOURCE - netisr should maintain source ordering without
+ * advice from the protocol. netisr will ignore any
+ * flow IDs present on the mbuf for the purposes of
+ * work placement.
+ *
+ * NETISR_POLICY_FLOW - netisr should maintain flow ordering as defined by
+ * the mbuf header flow ID field. If the protocol
+ * implements nh_m2flow, then netisr will query the
+ * protocol in the event that the mbuf doesn't have a
+ * flow ID, falling back on source ordering.
+ *
+ * NETISR_POLICY_CPU - netisr will delegate all work placement decisions to
+ * the protocol, querying nh_m2cpuid for each packet.
+ *
+ * Protocols might make decisions about work placement based on an existing
+ * calculated flow ID on the mbuf, such as one provided in hardware, the
+ * receive interface pointed to by the mbuf (if any), the optional source
+ * identifier passed at some dispatch points, or even parse packet headers to
+ * calculate a flow. Both protocol handlers may return a new mbuf pointer
+ * for the chain, or NULL if the packet proves invalid or m_pullup() fails.
+ *
+ * XXXRW: If we eventually support dynamic reconfiguration, there should be
+ * protocol handlers to notify them of CPU configuration changes so that they
+ * can rebalance work.
+ */
+struct mbuf;
+typedef void netisr_handler_t(struct mbuf *m);
+typedef struct mbuf *netisr_m2cpuid_t(struct mbuf *m, uintptr_t source,
+ u_int *cpuid);
+typedef struct mbuf *netisr_m2flow_t(struct mbuf *m, uintptr_t source);
+typedef void netisr_drainedcpu_t(u_int cpuid);
+
+#define NETISR_POLICY_SOURCE 1 /* Maintain source ordering. */
+#define NETISR_POLICY_FLOW 2 /* Maintain flow ordering. */
+#define NETISR_POLICY_CPU 3 /* Protocol determines CPU placement. */
+
+/*
+ * Data structure describing a protocol handler.
+ */
+struct netisr_handler {
+ const char *nh_name; /* Character string protocol name. */
+ netisr_handler_t *nh_handler; /* Protocol handler. */
+ netisr_m2flow_t *nh_m2flow; /* Query flow for untagged packet. */
+ netisr_m2cpuid_t *nh_m2cpuid; /* Query CPU to process mbuf on. */
+ netisr_drainedcpu_t *nh_drainedcpu; /* Callback when drained a queue. */
+ u_int nh_proto; /* Integer protocol ID. */
+ u_int nh_qlimit; /* Maximum per-CPU queue depth. */
+ u_int nh_policy; /* Work placement policy. */
+ u_int nh_ispare[5]; /* For future use. */
+ void *nh_pspare[4]; /* For future use. */
+};
+
+/*
+ * Register, unregister, and other netisr handler management functions.
+ */
+void netisr_clearqdrops(const struct netisr_handler *nhp);
+void netisr_getqdrops(const struct netisr_handler *nhp,
+ u_int64_t *qdropsp);
+void netisr_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp);
+void netisr_register(const struct netisr_handler *nhp);
+int netisr_setqlimit(const struct netisr_handler *nhp, u_int qlimit);
+void netisr_unregister(const struct netisr_handler *nhp);
+
+/*
+ * Process a packet destined for a protocol, and attempt direct dispatch.
+ * Supplemental source ordering information can be passed using the _src
+ * variant.
+ */
+int netisr_dispatch(u_int proto, struct mbuf *m);
+int netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m);
+int netisr_queue(u_int proto, struct mbuf *m);
+int netisr_queue_src(u_int proto, uintptr_t source, struct mbuf *m);
+
+/*
+ * Provide a default implementation of "map an ID to a CPU ID".
+ */
+u_int netisr_default_flow2cpu(u_int flowid);
+
+/*
+ * Utility routines to return the number of CPUs participting in netisr, and
+ * to return a mapping from a number to a CPU ID that can be used with the
+ * scheduler.
+ */
+u_int netisr_get_cpucount(void);
+u_int netisr_get_cpuid(u_int cpunumber);
+
+/*
+ * Interfaces between DEVICE_POLLING and netisr.
+ */
+void netisr_sched_poll(void);
+void netisr_poll(void);
+void netisr_pollmore(void);
+
+#endif /* !_KERNEL */
+#endif /* !_NET_NETISR_HH_ */
diff --git a/freebsd/sys/net/pfil.c b/freebsd/sys/net/pfil.c
new file mode 100644
index 00000000..3a382bc5
--- /dev/null
+++ b/freebsd/sys/net/pfil.c
@@ -0,0 +1,331 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/* $FreeBSD$ */
+/* $NetBSD: pfil.c,v 1.20 2001/11/12 23:49:46 lukem Exp $ */
+
+/*-
+ * Copyright (c) 1996 Matthew R. Green
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/errno.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/rmlock.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/condvar.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/mutex.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/queue.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/pfil.h>
+
+static struct mtx pfil_global_lock;
+
+MTX_SYSINIT(pfil_heads_lock, &pfil_global_lock, "pfil_head_list lock",
+ MTX_DEF);
+
+static int pfil_list_add(pfil_list_t *, struct packet_filter_hook *, int);
+
+static int pfil_list_remove(pfil_list_t *,
+ int (*)(void *, struct mbuf **, struct ifnet *, int, struct inpcb *),
+ void *);
+
+LIST_HEAD(pfilheadhead, pfil_head);
+VNET_DEFINE(struct pfilheadhead, pfil_head_list);
+#define V_pfil_head_list VNET(pfil_head_list)
+
+/*
+ * pfil_run_hooks() runs the specified packet filter hooks.
+ */
+int
+pfil_run_hooks(struct pfil_head *ph, struct mbuf **mp, struct ifnet *ifp,
+ int dir, struct inpcb *inp)
+{
+ struct rm_priotracker rmpt;
+ struct packet_filter_hook *pfh;
+ struct mbuf *m = *mp;
+ int rv = 0;
+
+ PFIL_RLOCK(ph, &rmpt);
+ KASSERT(ph->ph_nhooks >= 0, ("Pfil hook count dropped < 0"));
+ for (pfh = pfil_hook_get(dir, ph); pfh != NULL;
+ pfh = TAILQ_NEXT(pfh, pfil_link)) {
+ if (pfh->pfil_func != NULL) {
+ rv = (*pfh->pfil_func)(pfh->pfil_arg, &m, ifp, dir,
+ inp);
+ if (rv != 0 || m == NULL)
+ break;
+ }
+ }
+ PFIL_RUNLOCK(ph, &rmpt);
+ *mp = m;
+ return (rv);
+}
+
+/*
+ * pfil_head_register() registers a pfil_head with the packet filter hook
+ * mechanism.
+ */
+int
+pfil_head_register(struct pfil_head *ph)
+{
+ struct pfil_head *lph;
+
+ PFIL_LIST_LOCK();
+ LIST_FOREACH(lph, &V_pfil_head_list, ph_list) {
+ if (ph->ph_type == lph->ph_type &&
+ ph->ph_un.phu_val == lph->ph_un.phu_val) {
+ PFIL_LIST_UNLOCK();
+ return (EEXIST);
+ }
+ }
+ PFIL_LOCK_INIT(ph);
+ ph->ph_nhooks = 0;
+ TAILQ_INIT(&ph->ph_in);
+ TAILQ_INIT(&ph->ph_out);
+ LIST_INSERT_HEAD(&V_pfil_head_list, ph, ph_list);
+ PFIL_LIST_UNLOCK();
+ return (0);
+}
+
+/*
+ * pfil_head_unregister() removes a pfil_head from the packet filter hook
+ * mechanism. The producer of the hook promises that all outstanding
+ * invocations of the hook have completed before it unregisters the hook.
+ */
+int
+pfil_head_unregister(struct pfil_head *ph)
+{
+ struct packet_filter_hook *pfh, *pfnext;
+
+ PFIL_LIST_LOCK();
+ LIST_REMOVE(ph, ph_list);
+ PFIL_LIST_UNLOCK();
+ TAILQ_FOREACH_SAFE(pfh, &ph->ph_in, pfil_link, pfnext)
+ free(pfh, M_IFADDR);
+ TAILQ_FOREACH_SAFE(pfh, &ph->ph_out, pfil_link, pfnext)
+ free(pfh, M_IFADDR);
+ PFIL_LOCK_DESTROY(ph);
+ return (0);
+}
+
+/*
+ * pfil_head_get() returns the pfil_head for a given key/dlt.
+ */
+struct pfil_head *
+pfil_head_get(int type, u_long val)
+{
+ struct pfil_head *ph;
+
+ PFIL_LIST_LOCK();
+ LIST_FOREACH(ph, &V_pfil_head_list, ph_list)
+ if (ph->ph_type == type && ph->ph_un.phu_val == val)
+ break;
+ PFIL_LIST_UNLOCK();
+ return (ph);
+}
+
+/*
+ * pfil_add_hook() adds a function to the packet filter hook. the
+ * flags are:
+ * PFIL_IN call me on incoming packets
+ * PFIL_OUT call me on outgoing packets
+ * PFIL_ALL call me on all of the above
+ * PFIL_WAITOK OK to call malloc with M_WAITOK.
+ */
+int
+pfil_add_hook(int (*func)(void *, struct mbuf **, struct ifnet *, int,
+ struct inpcb *), void *arg, int flags, struct pfil_head *ph)
+{
+ struct packet_filter_hook *pfh1 = NULL;
+ struct packet_filter_hook *pfh2 = NULL;
+ int err;
+
+ if (flags & PFIL_IN) {
+ pfh1 = (struct packet_filter_hook *)malloc(sizeof(*pfh1),
+ M_IFADDR, (flags & PFIL_WAITOK) ? M_WAITOK : M_NOWAIT);
+ if (pfh1 == NULL) {
+ err = ENOMEM;
+ goto error;
+ }
+ }
+ if (flags & PFIL_OUT) {
+ pfh2 = (struct packet_filter_hook *)malloc(sizeof(*pfh1),
+ M_IFADDR, (flags & PFIL_WAITOK) ? M_WAITOK : M_NOWAIT);
+ if (pfh2 == NULL) {
+ err = ENOMEM;
+ goto error;
+ }
+ }
+ PFIL_WLOCK(ph);
+ if (flags & PFIL_IN) {
+ pfh1->pfil_func = func;
+ pfh1->pfil_arg = arg;
+ err = pfil_list_add(&ph->ph_in, pfh1, flags & ~PFIL_OUT);
+ if (err)
+ goto locked_error;
+ ph->ph_nhooks++;
+ }
+ if (flags & PFIL_OUT) {
+ pfh2->pfil_func = func;
+ pfh2->pfil_arg = arg;
+ err = pfil_list_add(&ph->ph_out, pfh2, flags & ~PFIL_IN);
+ if (err) {
+ if (flags & PFIL_IN)
+ pfil_list_remove(&ph->ph_in, func, arg);
+ goto locked_error;
+ }
+ ph->ph_nhooks++;
+ }
+ PFIL_WUNLOCK(ph);
+ return (0);
+locked_error:
+ PFIL_WUNLOCK(ph);
+error:
+ if (pfh1 != NULL)
+ free(pfh1, M_IFADDR);
+ if (pfh2 != NULL)
+ free(pfh2, M_IFADDR);
+ return (err);
+}
+
+/*
+ * pfil_remove_hook removes a specific function from the packet filter hook
+ * list.
+ */
+int
+pfil_remove_hook(int (*func)(void *, struct mbuf **, struct ifnet *, int,
+ struct inpcb *), void *arg, int flags, struct pfil_head *ph)
+{
+ int err = 0;
+
+ PFIL_WLOCK(ph);
+ if (flags & PFIL_IN) {
+ err = pfil_list_remove(&ph->ph_in, func, arg);
+ if (err == 0)
+ ph->ph_nhooks--;
+ }
+ if ((err == 0) && (flags & PFIL_OUT)) {
+ err = pfil_list_remove(&ph->ph_out, func, arg);
+ if (err == 0)
+ ph->ph_nhooks--;
+ }
+ PFIL_WUNLOCK(ph);
+ return (err);
+}
+
+static int
+pfil_list_add(pfil_list_t *list, struct packet_filter_hook *pfh1, int flags)
+{
+ struct packet_filter_hook *pfh;
+
+ /*
+ * First make sure the hook is not already there.
+ */
+ TAILQ_FOREACH(pfh, list, pfil_link)
+ if (pfh->pfil_func == pfh1->pfil_func &&
+ pfh->pfil_arg == pfh1->pfil_arg)
+ return (EEXIST);
+
+ /*
+ * Insert the input list in reverse order of the output list so that
+ * the same path is followed in or out of the kernel.
+ */
+ if (flags & PFIL_IN)
+ TAILQ_INSERT_HEAD(list, pfh1, pfil_link);
+ else
+ TAILQ_INSERT_TAIL(list, pfh1, pfil_link);
+ return (0);
+}
+
+/*
+ * pfil_list_remove is an internal function that takes a function off the
+ * specified list.
+ */
+static int
+pfil_list_remove(pfil_list_t *list,
+ int (*func)(void *, struct mbuf **, struct ifnet *, int, struct inpcb *),
+ void *arg)
+{
+ struct packet_filter_hook *pfh;
+
+ TAILQ_FOREACH(pfh, list, pfil_link)
+ if (pfh->pfil_func == func && pfh->pfil_arg == arg) {
+ TAILQ_REMOVE(list, pfh, pfil_link);
+ free(pfh, M_IFADDR);
+ return (0);
+ }
+ return (ENOENT);
+}
+
+/****************
+ * Stuff that must be initialized for every instance
+ * (including the first of course).
+ */
+static int
+vnet_pfil_init(const void *unused)
+{
+ LIST_INIT(&V_pfil_head_list);
+ return (0);
+}
+
+/***********************
+ * Called for the removal of each instance.
+ */
+static int
+vnet_pfil_uninit(const void *unused)
+{
+ /* XXX should panic if list is not empty */
+ return 0;
+}
+
+/* Define startup order. */
+#define PFIL_SYSINIT_ORDER SI_SUB_PROTO_BEGIN
+#define PFIL_MODEVENT_ORDER (SI_ORDER_FIRST) /* On boot slot in here. */
+#define PFIL_VNET_ORDER (PFIL_MODEVENT_ORDER + 2) /* Later still. */
+
+/*
+ * Starting up.
+ * VNET_SYSINIT is called for each existing vnet and each new vnet.
+ */
+VNET_SYSINIT(vnet_pfil_init, PFIL_SYSINIT_ORDER, PFIL_VNET_ORDER,
+ vnet_pfil_init, NULL);
+
+/*
+ * Closing up shop. These are done in REVERSE ORDER,
+ * Not called on reboot.
+ * VNET_SYSUNINIT is called for each exiting vnet as it exits.
+ */
+VNET_SYSUNINIT(vnet_pfil_uninit, PFIL_SYSINIT_ORDER, PFIL_VNET_ORDER,
+ vnet_pfil_uninit, NULL);
+
diff --git a/freebsd/sys/net/pfil.h b/freebsd/sys/net/pfil.h
new file mode 100644
index 00000000..78ab0518
--- /dev/null
+++ b/freebsd/sys/net/pfil.h
@@ -0,0 +1,117 @@
+/* $FreeBSD$ */
+/* $NetBSD: pfil.h,v 1.22 2003/06/23 12:57:08 martin Exp $ */
+
+/*-
+ * Copyright (c) 1996 Matthew R. Green
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _NET_PFIL_HH_
+#define _NET_PFIL_HH_
+
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/queue.h>
+#include <freebsd/sys/_lock.h>
+#include <freebsd/sys/_mutex.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/rmlock.h>
+
+struct mbuf;
+struct ifnet;
+struct inpcb;
+
+/*
+ * The packet filter hooks are designed for anything to call them to
+ * possibly intercept the packet.
+ */
+struct packet_filter_hook {
+ TAILQ_ENTRY(packet_filter_hook) pfil_link;
+ int (*pfil_func)(void *, struct mbuf **, struct ifnet *, int,
+ struct inpcb *);
+ void *pfil_arg;
+};
+
+#define PFIL_IN 0x00000001
+#define PFIL_OUT 0x00000002
+#define PFIL_WAITOK 0x00000004
+#define PFIL_ALL (PFIL_IN|PFIL_OUT)
+
+typedef TAILQ_HEAD(pfil_list, packet_filter_hook) pfil_list_t;
+
+#define PFIL_TYPE_AF 1 /* key is AF_* type */
+#define PFIL_TYPE_IFNET 2 /* key is ifnet pointer */
+
+struct pfil_head {
+ pfil_list_t ph_in;
+ pfil_list_t ph_out;
+ int ph_type;
+ int ph_nhooks;
+ struct rmlock ph_lock;
+ union {
+ u_long phu_val;
+ void *phu_ptr;
+ } ph_un;
+#define ph_af ph_un.phu_val
+#define ph_ifnet ph_un.phu_ptr
+ LIST_ENTRY(pfil_head) ph_list;
+};
+
+int pfil_add_hook(int (*func)(void *, struct mbuf **, struct ifnet *,
+ int, struct inpcb *), void *, int, struct pfil_head *);
+int pfil_remove_hook(int (*func)(void *, struct mbuf **, struct ifnet *,
+ int, struct inpcb *), void *, int, struct pfil_head *);
+int pfil_run_hooks(struct pfil_head *, struct mbuf **, struct ifnet *,
+ int, struct inpcb *inp);
+
+int pfil_head_register(struct pfil_head *);
+int pfil_head_unregister(struct pfil_head *);
+
+struct pfil_head *pfil_head_get(int, u_long);
+
+#define PFIL_HOOKED(p) ((p)->ph_nhooks > 0)
+#define PFIL_LOCK_INIT(p) \
+ rm_init_flags(&(p)->ph_lock, "PFil hook read/write mutex", RM_RECURSE)
+#define PFIL_LOCK_DESTROY(p) rm_destroy(&(p)->ph_lock)
+#define PFIL_RLOCK(p, t) rm_rlock(&(p)->ph_lock, (t))
+#define PFIL_WLOCK(p) rm_wlock(&(p)->ph_lock)
+#define PFIL_RUNLOCK(p, t) rm_runlock(&(p)->ph_lock, (t))
+#define PFIL_WUNLOCK(p) rm_wunlock(&(p)->ph_lock)
+#define PFIL_LIST_LOCK() mtx_lock(&pfil_global_lock)
+#define PFIL_LIST_UNLOCK() mtx_unlock(&pfil_global_lock)
+
+static __inline struct packet_filter_hook *
+pfil_hook_get(int dir, struct pfil_head *ph)
+{
+
+ if (dir == PFIL_IN)
+ return (TAILQ_FIRST(&ph->ph_in));
+ else if (dir == PFIL_OUT)
+ return (TAILQ_FIRST(&ph->ph_out));
+ else
+ return (NULL);
+}
+
+#endif /* _NET_PFIL_HH_ */
diff --git a/freebsd/sys/net/pfkeyv2.h b/freebsd/sys/net/pfkeyv2.h
new file mode 100644
index 00000000..f8e088e1
--- /dev/null
+++ b/freebsd/sys/net/pfkeyv2.h
@@ -0,0 +1,432 @@
+/* $FreeBSD$ */
+/* $KAME: pfkeyv2.h,v 1.37 2003/09/06 05:15:43 itojun Exp $ */
+
+/*-
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * This file has been derived rfc 2367,
+ * And added some flags of SADB_KEY_FLAGS_ as SADB_X_EXT_.
+ * sakane@ydc.co.jp
+ */
+
+#ifndef _NET_PFKEYV2_HH_
+#define _NET_PFKEYV2_HH_
+
+/*
+This file defines structures and symbols for the PF_KEY Version 2
+key management interface. It was written at the U.S. Naval Research
+Laboratory. This file is in the public domain. The authors ask that
+you leave this credit intact on any copies of this file.
+*/
+#ifndef __PFKEY_V2_H
+#define __PFKEY_V2_H 1
+
+#define PF_KEY_V2 2
+#define PFKEYV2_REVISION 199806L
+
+#define SADB_RESERVED 0
+#define SADB_GETSPI 1
+#define SADB_UPDATE 2
+#define SADB_ADD 3
+#define SADB_DELETE 4
+#define SADB_GET 5
+#define SADB_ACQUIRE 6
+#define SADB_REGISTER 7
+#define SADB_EXPIRE 8
+#define SADB_FLUSH 9
+#define SADB_DUMP 10
+#define SADB_X_PROMISC 11
+#define SADB_X_PCHANGE 12
+
+#define SADB_X_SPDUPDATE 13
+#define SADB_X_SPDADD 14
+#define SADB_X_SPDDELETE 15 /* by policy index */
+#define SADB_X_SPDGET 16
+#define SADB_X_SPDACQUIRE 17
+#define SADB_X_SPDDUMP 18
+#define SADB_X_SPDFLUSH 19
+#define SADB_X_SPDSETIDX 20
+#define SADB_X_SPDEXPIRE 21
+#define SADB_X_SPDDELETE2 22 /* by policy id */
+#define SADB_MAX 22
+
+struct sadb_msg {
+ u_int8_t sadb_msg_version;
+ u_int8_t sadb_msg_type;
+ u_int8_t sadb_msg_errno;
+ u_int8_t sadb_msg_satype;
+ u_int16_t sadb_msg_len;
+ u_int16_t sadb_msg_reserved;
+ u_int32_t sadb_msg_seq;
+ u_int32_t sadb_msg_pid;
+};
+
+struct sadb_ext {
+ u_int16_t sadb_ext_len;
+ u_int16_t sadb_ext_type;
+};
+
+struct sadb_sa {
+ u_int16_t sadb_sa_len;
+ u_int16_t sadb_sa_exttype;
+ u_int32_t sadb_sa_spi;
+ u_int8_t sadb_sa_replay;
+ u_int8_t sadb_sa_state;
+ u_int8_t sadb_sa_auth;
+ u_int8_t sadb_sa_encrypt;
+ u_int32_t sadb_sa_flags;
+};
+
+struct sadb_lifetime {
+ u_int16_t sadb_lifetime_len;
+ u_int16_t sadb_lifetime_exttype;
+ u_int32_t sadb_lifetime_allocations;
+ u_int64_t sadb_lifetime_bytes;
+ u_int64_t sadb_lifetime_addtime;
+ u_int64_t sadb_lifetime_usetime;
+};
+
+struct sadb_address {
+ u_int16_t sadb_address_len;
+ u_int16_t sadb_address_exttype;
+ u_int8_t sadb_address_proto;
+ u_int8_t sadb_address_prefixlen;
+ u_int16_t sadb_address_reserved;
+};
+
+struct sadb_key {
+ u_int16_t sadb_key_len;
+ u_int16_t sadb_key_exttype;
+ u_int16_t sadb_key_bits;
+ u_int16_t sadb_key_reserved;
+};
+
+struct sadb_ident {
+ u_int16_t sadb_ident_len;
+ u_int16_t sadb_ident_exttype;
+ u_int16_t sadb_ident_type;
+ u_int16_t sadb_ident_reserved;
+ u_int64_t sadb_ident_id;
+};
+
+struct sadb_sens {
+ u_int16_t sadb_sens_len;
+ u_int16_t sadb_sens_exttype;
+ u_int32_t sadb_sens_dpd;
+ u_int8_t sadb_sens_sens_level;
+ u_int8_t sadb_sens_sens_len;
+ u_int8_t sadb_sens_integ_level;
+ u_int8_t sadb_sens_integ_len;
+ u_int32_t sadb_sens_reserved;
+};
+
+struct sadb_prop {
+ u_int16_t sadb_prop_len;
+ u_int16_t sadb_prop_exttype;
+ u_int8_t sadb_prop_replay;
+ u_int8_t sadb_prop_reserved[3];
+};
+
+struct sadb_comb {
+ u_int8_t sadb_comb_auth;
+ u_int8_t sadb_comb_encrypt;
+ u_int16_t sadb_comb_flags;
+ u_int16_t sadb_comb_auth_minbits;
+ u_int16_t sadb_comb_auth_maxbits;
+ u_int16_t sadb_comb_encrypt_minbits;
+ u_int16_t sadb_comb_encrypt_maxbits;
+ u_int32_t sadb_comb_reserved;
+ u_int32_t sadb_comb_soft_allocations;
+ u_int32_t sadb_comb_hard_allocations;
+ u_int64_t sadb_comb_soft_bytes;
+ u_int64_t sadb_comb_hard_bytes;
+ u_int64_t sadb_comb_soft_addtime;
+ u_int64_t sadb_comb_hard_addtime;
+ u_int64_t sadb_comb_soft_usetime;
+ u_int64_t sadb_comb_hard_usetime;
+};
+
+struct sadb_supported {
+ u_int16_t sadb_supported_len;
+ u_int16_t sadb_supported_exttype;
+ u_int32_t sadb_supported_reserved;
+};
+
+struct sadb_alg {
+ u_int8_t sadb_alg_id;
+ u_int8_t sadb_alg_ivlen;
+ u_int16_t sadb_alg_minbits;
+ u_int16_t sadb_alg_maxbits;
+ u_int16_t sadb_alg_reserved;
+};
+
+struct sadb_spirange {
+ u_int16_t sadb_spirange_len;
+ u_int16_t sadb_spirange_exttype;
+ u_int32_t sadb_spirange_min;
+ u_int32_t sadb_spirange_max;
+ u_int32_t sadb_spirange_reserved;
+};
+
+struct sadb_x_kmprivate {
+ u_int16_t sadb_x_kmprivate_len;
+ u_int16_t sadb_x_kmprivate_exttype;
+ u_int32_t sadb_x_kmprivate_reserved;
+};
+
+/*
+ * XXX Additional SA Extension.
+ * mode: tunnel or transport
+ * reqid: to make SA unique nevertheless the address pair of SA are same.
+ * Mainly it's for VPN.
+ */
+struct sadb_x_sa2 {
+ u_int16_t sadb_x_sa2_len;
+ u_int16_t sadb_x_sa2_exttype;
+ u_int8_t sadb_x_sa2_mode;
+ u_int8_t sadb_x_sa2_reserved1;
+ u_int16_t sadb_x_sa2_reserved2;
+ u_int32_t sadb_x_sa2_sequence; /* lowermost 32bit of sequence number */
+ u_int32_t sadb_x_sa2_reqid;
+};
+
+/* XXX Policy Extension */
+/* sizeof(struct sadb_x_policy) == 16 */
+struct sadb_x_policy {
+ u_int16_t sadb_x_policy_len;
+ u_int16_t sadb_x_policy_exttype;
+ u_int16_t sadb_x_policy_type; /* See policy type of ipsec.h */
+ u_int8_t sadb_x_policy_dir; /* direction, see ipsec.h */
+ u_int8_t sadb_x_policy_reserved;
+ u_int32_t sadb_x_policy_id;
+ u_int32_t sadb_x_policy_reserved2;
+};
+/*
+ * When policy_type == IPSEC, it is followed by some of
+ * the ipsec policy request.
+ * [total length of ipsec policy requests]
+ * = (sadb_x_policy_len * sizeof(uint64_t) - sizeof(struct sadb_x_policy))
+ */
+
+/* XXX IPsec Policy Request Extension */
+/*
+ * This structure is aligned 8 bytes.
+ */
+struct sadb_x_ipsecrequest {
+ u_int16_t sadb_x_ipsecrequest_len; /* structure length in 64 bits. */
+ u_int16_t sadb_x_ipsecrequest_proto; /* See ipsec.h */
+ u_int8_t sadb_x_ipsecrequest_mode; /* See IPSEC_MODE_XX in ipsec.h. */
+ u_int8_t sadb_x_ipsecrequest_level; /* See IPSEC_LEVEL_XX in ipsec.h */
+ u_int16_t sadb_x_ipsecrequest_reqid; /* See ipsec.h */
+
+ /*
+ * followed by source IP address of SA, and immediately followed by
+ * destination IP address of SA. These encoded into two of sockaddr
+ * structure without any padding. Must set each sa_len exactly.
+ * Each of length of the sockaddr structure are not aligned to 64bits,
+ * but sum of x_request and addresses is aligned to 64bits.
+ */
+};
+
+/* NAT-Traversal type, see RFC 3948 (and drafts). */
+/* sizeof(struct sadb_x_nat_t_type) == 8 */
+struct sadb_x_nat_t_type {
+ u_int16_t sadb_x_nat_t_type_len;
+ u_int16_t sadb_x_nat_t_type_exttype;
+ u_int8_t sadb_x_nat_t_type_type;
+ u_int8_t sadb_x_nat_t_type_reserved[3];
+};
+
+/* NAT-Traversal source or destination port. */
+/* sizeof(struct sadb_x_nat_t_port) == 8 */
+struct sadb_x_nat_t_port {
+ u_int16_t sadb_x_nat_t_port_len;
+ u_int16_t sadb_x_nat_t_port_exttype;
+ u_int16_t sadb_x_nat_t_port_port;
+ u_int16_t sadb_x_nat_t_port_reserved;
+};
+
+/* ESP fragmentation size. */
+/* sizeof(struct sadb_x_nat_t_frag) == 8 */
+struct sadb_x_nat_t_frag {
+ u_int16_t sadb_x_nat_t_frag_len;
+ u_int16_t sadb_x_nat_t_frag_exttype;
+ u_int16_t sadb_x_nat_t_frag_fraglen;
+ u_int16_t sadb_x_nat_t_frag_reserved;
+};
+
+
+#define SADB_EXT_RESERVED 0
+#define SADB_EXT_SA 1
+#define SADB_EXT_LIFETIME_CURRENT 2
+#define SADB_EXT_LIFETIME_HARD 3
+#define SADB_EXT_LIFETIME_SOFT 4
+#define SADB_EXT_ADDRESS_SRC 5
+#define SADB_EXT_ADDRESS_DST 6
+#define SADB_EXT_ADDRESS_PROXY 7
+#define SADB_EXT_KEY_AUTH 8
+#define SADB_EXT_KEY_ENCRYPT 9
+#define SADB_EXT_IDENTITY_SRC 10
+#define SADB_EXT_IDENTITY_DST 11
+#define SADB_EXT_SENSITIVITY 12
+#define SADB_EXT_PROPOSAL 13
+#define SADB_EXT_SUPPORTED_AUTH 14
+#define SADB_EXT_SUPPORTED_ENCRYPT 15
+#define SADB_EXT_SPIRANGE 16
+#define SADB_X_EXT_KMPRIVATE 17
+#define SADB_X_EXT_POLICY 18
+#define SADB_X_EXT_SA2 19
+#define SADB_X_EXT_NAT_T_TYPE 20
+#define SADB_X_EXT_NAT_T_SPORT 21
+#define SADB_X_EXT_NAT_T_DPORT 22
+#define SADB_X_EXT_NAT_T_OA 23 /* Deprecated. */
+#define SADB_X_EXT_NAT_T_OAI 23 /* Peer's NAT_OA for src of SA. */
+#define SADB_X_EXT_NAT_T_OAR 24 /* Peer's NAT_OA for dst of SA. */
+#define SADB_X_EXT_NAT_T_FRAG 25 /* Manual MTU override. */
+#define SADB_EXT_MAX 25
+
+#define SADB_SATYPE_UNSPEC 0
+#define SADB_SATYPE_AH 2
+#define SADB_SATYPE_ESP 3
+#define SADB_SATYPE_RSVP 5
+#define SADB_SATYPE_OSPFV2 6
+#define SADB_SATYPE_RIPV2 7
+#define SADB_SATYPE_MIP 8
+#define SADB_X_SATYPE_IPCOMP 9
+/*#define SADB_X_SATYPE_POLICY 10 obsolete, do not reuse */
+#define SADB_X_SATYPE_TCPSIGNATURE 11
+#define SADB_SATYPE_MAX 12
+
+#define SADB_SASTATE_LARVAL 0
+#define SADB_SASTATE_MATURE 1
+#define SADB_SASTATE_DYING 2
+#define SADB_SASTATE_DEAD 3
+#define SADB_SASTATE_MAX 3
+
+#define SADB_SAFLAGS_PFS 1
+
+/* RFC2367 numbers - meets RFC2407 */
+#define SADB_AALG_NONE 0
+#define SADB_AALG_MD5HMAC 2
+#define SADB_AALG_SHA1HMAC 3
+#define SADB_AALG_MAX 252
+/* private allocations - based on RFC2407/IANA assignment */
+#define SADB_X_AALG_SHA2_256 5
+#define SADB_X_AALG_SHA2_384 6
+#define SADB_X_AALG_SHA2_512 7
+#define SADB_X_AALG_RIPEMD160HMAC 8
+#define SADB_X_AALG_AES_XCBC_MAC 9 /* draft-ietf-ipsec-ciph-aes-xcbc-mac-04 */
+/* private allocations should use 249-255 (RFC2407) */
+#define SADB_X_AALG_MD5 249 /* Keyed MD5 */
+#define SADB_X_AALG_SHA 250 /* Keyed SHA */
+#define SADB_X_AALG_NULL 251 /* null authentication */
+#define SADB_X_AALG_TCP_MD5 252 /* Keyed TCP-MD5 (RFC2385) */
+
+/* RFC2367 numbers - meets RFC2407 */
+#define SADB_EALG_NONE 0
+#define SADB_EALG_DESCBC 2
+#define SADB_EALG_3DESCBC 3
+#define SADB_EALG_NULL 11
+#define SADB_EALG_MAX 250
+/* private allocations - based on RFC2407/IANA assignment */
+#define SADB_X_EALG_CAST128CBC 6
+#define SADB_X_EALG_BLOWFISHCBC 7
+#define SADB_X_EALG_RIJNDAELCBC 12
+#define SADB_X_EALG_AES 12
+/* private allocations - based on RFC4312/IANA assignment */
+#define SADB_X_EALG_CAMELLIACBC 22
+/* private allocations should use 249-255 (RFC2407) */
+#define SADB_X_EALG_SKIPJACK 249 /*250*/ /* for IPSEC */
+#define SADB_X_EALG_AESCTR 250 /*249*/ /* draft-ietf-ipsec-ciph-aes-ctr-03 */
+
+/* private allocations - based on RFC2407/IANA assignment */
+#define SADB_X_CALG_NONE 0
+#define SADB_X_CALG_OUI 1
+#define SADB_X_CALG_DEFLATE 2
+#define SADB_X_CALG_LZS 3
+#define SADB_X_CALG_MAX 4
+
+#define SADB_IDENTTYPE_RESERVED 0
+#define SADB_IDENTTYPE_PREFIX 1
+#define SADB_IDENTTYPE_FQDN 2
+#define SADB_IDENTTYPE_USERFQDN 3
+#define SADB_X_IDENTTYPE_ADDR 4
+#define SADB_IDENTTYPE_MAX 4
+
+/* `flags' in sadb_sa structure holds followings */
+#define SADB_X_EXT_NONE 0x0000 /* i.e. new format. */
+#define SADB_X_EXT_OLD 0x0001 /* old format. */
+
+#define SADB_X_EXT_IV4B 0x0010 /* IV length of 4 bytes in use */
+#define SADB_X_EXT_DERIV 0x0020 /* DES derived */
+#define SADB_X_EXT_CYCSEQ 0x0040 /* allowing to cyclic sequence. */
+
+ /* three of followings are exclusive flags each them */
+#define SADB_X_EXT_PSEQ 0x0000 /* sequencial padding for ESP */
+#define SADB_X_EXT_PRAND 0x0100 /* random padding for ESP */
+#define SADB_X_EXT_PZERO 0x0200 /* zero padding for ESP */
+#define SADB_X_EXT_PMASK 0x0300 /* mask for padding flag */
+
+#if 1
+#define SADB_X_EXT_RAWCPI 0x0080 /* use well known CPI (IPComp) */
+#endif
+
+#define SADB_KEY_FLAGS_MAX 0x0fff
+
+/* SPI size for PF_KEYv2 */
+#define PFKEY_SPI_SIZE sizeof(u_int32_t)
+
+/* Identifier for menber of lifetime structure */
+#define SADB_X_LIFETIME_ALLOCATIONS 0
+#define SADB_X_LIFETIME_BYTES 1
+#define SADB_X_LIFETIME_ADDTIME 2
+#define SADB_X_LIFETIME_USETIME 3
+
+/* The rate for SOFT lifetime against HARD one. */
+#define PFKEY_SOFT_LIFETIME_RATE 80
+
+/* Utilities */
+#define PFKEY_ALIGN8(a) (1 + (((a) - 1) | (8 - 1)))
+#define PFKEY_EXTLEN(msg) \
+ PFKEY_UNUNIT64(((struct sadb_ext *)(msg))->sadb_ext_len)
+#define PFKEY_ADDR_PREFIX(ext) \
+ (((struct sadb_address *)(ext))->sadb_address_prefixlen)
+#define PFKEY_ADDR_PROTO(ext) \
+ (((struct sadb_address *)(ext))->sadb_address_proto)
+#define PFKEY_ADDR_SADDR(ext) \
+ ((struct sockaddr *)((caddr_t)(ext) + sizeof(struct sadb_address)))
+
+/* in 64bits */
+#define PFKEY_UNUNIT64(a) ((a) << 3)
+#define PFKEY_UNIT64(a) ((a) >> 3)
+
+#endif /* __PFKEY_V2_H */
+
+#endif /* _NET_PFKEYV2_HH_ */
diff --git a/freebsd/sys/net/ppp_defs.h b/freebsd/sys/net/ppp_defs.h
new file mode 100644
index 00000000..e0690e94
--- /dev/null
+++ b/freebsd/sys/net/ppp_defs.h
@@ -0,0 +1,158 @@
+/*
+ * ppp_defs.h - PPP definitions.
+ */
+/*-
+ * Copyright (c) 1994 The Australian National University.
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation is hereby granted, provided that the above copyright
+ * notice appears in all copies. This software is provided without any
+ * warranty, express or implied. The Australian National University
+ * makes no representations about the suitability of this software for
+ * any purpose.
+ *
+ * IN NO EVENT SHALL THE AUSTRALIAN NATIONAL UNIVERSITY BE LIABLE TO ANY
+ * PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
+ * THE AUSTRALIAN NATIONAL UNIVERSITY HAVE BEEN ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE.
+ *
+ * THE AUSTRALIAN NATIONAL UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE AUSTRALIAN NATIONAL UNIVERSITY HAS NO
+ * OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
+ * OR MODIFICATIONS.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _PPP_DEFS_HH_
+#define _PPP_DEFS_HH_
+
+/*
+ * The basic PPP frame.
+ */
+#define PPP_HDRLEN 4 /* octets for standard ppp header */
+#define PPP_FCSLEN 2 /* octets for FCS */
+#define PPP_MRU 1500 /* default MRU = max length of info field */
+
+#define PPP_ADDRESS(p) (((u_char *)(p))[0])
+#define PPP_CONTROL(p) (((u_char *)(p))[1])
+#define PPP_PROTOCOL(p) ((((u_char *)(p))[2] << 8) + ((u_char *)(p))[3])
+
+/*
+ * Significant octet values.
+ */
+#define PPP_ALLSTATIONS 0xff /* All-Stations broadcast address */
+#define PPP_UI 0x03 /* Unnumbered Information */
+#define PPP_FLAG 0x7e /* Flag Sequence */
+#define PPP_ESCAPE 0x7d /* Asynchronous Control Escape */
+#define PPP_TRANS 0x20 /* Asynchronous transparency modifier */
+
+/*
+ * Protocol field values.
+ */
+#define PPP_IP 0x21 /* Internet Protocol */
+#define PPP_XNS 0x25 /* Xerox NS */
+#define PPP_AT 0x29 /* AppleTalk Protocol */
+#define PPP_IPX 0x2b /* IPX Datagram (RFC1552) */
+#define PPP_VJC_COMP 0x2d /* VJ compressed TCP */
+#define PPP_VJC_UNCOMP 0x2f /* VJ uncompressed TCP */
+#define PPP_COMP 0xfd /* compressed packet */
+#define PPP_IPCP 0x8021 /* IP Control Protocol */
+#define PPP_ATCP 0x8029 /* AppleTalk Control Protocol */
+#define PPP_IPXCP 0x802b /* IPX Control Protocol (RFC1552) */
+#define PPP_CCP 0x80fd /* Compression Control Protocol */
+#define PPP_LCP 0xc021 /* Link Control Protocol */
+#define PPP_PAP 0xc023 /* Password Authentication Protocol */
+#define PPP_LQR 0xc025 /* Link Quality Report protocol */
+#define PPP_CHAP 0xc223 /* Cryptographic Handshake Auth. Protocol */
+#define PPP_CBCP 0xc029 /* Callback Control Protocol */
+#define PPP_IPV6 0x57 /* Internet Protocol version 6*/
+#define PPP_IPV6CP 0x8057 /* IPv6 Control Protocol */
+
+/*
+ * Values for FCS calculations.
+ */
+#define PPP_INITFCS 0xffff /* Initial FCS value */
+#define PPP_GOODFCS 0xf0b8 /* Good final FCS value */
+#define PPP_FCS(fcs, c) (((fcs) >> 8) ^ fcstab[((fcs) ^ (c)) & 0xff])
+
+/*
+ * Extended asyncmap - allows any character to be escaped.
+ */
+typedef u_int32_t ext_accm[8];
+
+/*
+ * What to do with network protocol (NP) packets.
+ */
+enum NPmode {
+ NPMODE_PASS, /* pass the packet through */
+ NPMODE_DROP, /* silently drop the packet */
+ NPMODE_ERROR, /* return an error */
+ NPMODE_QUEUE /* save it up for later. */
+};
+
+/*
+ * Statistics.
+ */
+struct pppstat {
+ unsigned int ppp_ibytes; /* bytes received */
+ unsigned int ppp_ipackets; /* packets received */
+ unsigned int ppp_ierrors; /* receive errors */
+ unsigned int ppp_obytes; /* bytes sent */
+ unsigned int ppp_opackets; /* packets sent */
+ unsigned int ppp_oerrors; /* transmit errors */
+};
+
+struct vjstat {
+ unsigned int vjs_packets; /* outbound packets */
+ unsigned int vjs_compressed; /* outbound compressed packets */
+ unsigned int vjs_searches; /* searches for connection state */
+ unsigned int vjs_misses; /* times couldn't find conn. state */
+ unsigned int vjs_uncompressedin; /* inbound uncompressed packets */
+ unsigned int vjs_compressedin; /* inbound compressed packets */
+ unsigned int vjs_errorin; /* inbound unknown type packets */
+ unsigned int vjs_tossed; /* inbound packets tossed because of error */
+};
+
+struct ppp_stats {
+ struct pppstat p; /* basic PPP statistics */
+ struct vjstat vj; /* VJ header compression statistics */
+};
+
+struct compstat {
+ unsigned int unc_bytes; /* total uncompressed bytes */
+ unsigned int unc_packets; /* total uncompressed packets */
+ unsigned int comp_bytes; /* compressed bytes */
+ unsigned int comp_packets; /* compressed packets */
+ unsigned int inc_bytes; /* incompressible bytes */
+ unsigned int inc_packets; /* incompressible packets */
+ unsigned int ratio; /* recent compression ratio << 8 */
+};
+
+struct ppp_comp_stats {
+ struct compstat c; /* packet compression statistics */
+ struct compstat d; /* packet decompression statistics */
+};
+
+/*
+ * The following structure records the time in seconds since
+ * the last NP packet was sent or received.
+ */
+struct ppp_idle {
+ time_t xmit_idle; /* time since last NP packet sent */
+ time_t recv_idle; /* time since last NP packet received */
+};
+
+#ifndef __P
+#ifdef __STDC__
+#define __P(x) x
+#else
+#define __P(x) ()
+#endif
+#endif
+
+#endif /* _PPP_DEFS_HH_ */
diff --git a/freebsd/sys/net/radix.c b/freebsd/sys/net/radix.c
new file mode 100644
index 00000000..c1881acb
--- /dev/null
+++ b/freebsd/sys/net/radix.c
@@ -0,0 +1,1205 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1988, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)radix.c 8.5 (Berkeley) 5/19/95
+ * $FreeBSD$
+ */
+
+/*
+ * Routines to build and maintain radix trees for routing lookups.
+ */
+#include <freebsd/sys/param.h>
+#ifdef _KERNEL
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/mutex.h>
+#include <freebsd/sys/rwlock.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/syslog.h>
+#include <freebsd/net/radix.h>
+#include <freebsd/local/opt_mpath.h>
+#ifdef RADIX_MPATH
+#include <freebsd/net/radix_mpath.h>
+#endif
+#else /* !_KERNEL */
+#include <freebsd/stdio.h>
+#include <freebsd/strings.h>
+#include <freebsd/stdlib.h>
+#define log(x, arg...) fprintf(stderr, ## arg)
+#define panic(x) fprintf(stderr, "PANIC: %s", x), exit(1)
+#define min(a, b) ((a) < (b) ? (a) : (b) )
+#include <freebsd/net/radix.h>
+#endif /* !_KERNEL */
+
+static int rn_walktree_from(struct radix_node_head *h, void *a, void *m,
+ walktree_f_t *f, void *w);
+static int rn_walktree(struct radix_node_head *, walktree_f_t *, void *);
+static struct radix_node
+ *rn_insert(void *, struct radix_node_head *, int *,
+ struct radix_node [2]),
+ *rn_newpair(void *, int, struct radix_node[2]),
+ *rn_search(void *, struct radix_node *),
+ *rn_search_m(void *, struct radix_node *, void *);
+
+static int max_keylen;
+static struct radix_mask *rn_mkfreelist;
+static struct radix_node_head *mask_rnhead;
+/*
+ * Work area -- the following point to 3 buffers of size max_keylen,
+ * allocated in this order in a block of memory malloc'ed by rn_init.
+ * rn_zeros, rn_ones are set in rn_init and used in readonly afterwards.
+ * addmask_key is used in rn_addmask in rw mode and not thread-safe.
+ */
+static char *rn_zeros, *rn_ones, *addmask_key;
+
+#define MKGet(m) { \
+ if (rn_mkfreelist) { \
+ m = rn_mkfreelist; \
+ rn_mkfreelist = (m)->rm_mklist; \
+ } else \
+ R_Malloc(m, struct radix_mask *, sizeof (struct radix_mask)); }
+
+#define MKFree(m) { (m)->rm_mklist = rn_mkfreelist; rn_mkfreelist = (m);}
+
+#define rn_masktop (mask_rnhead->rnh_treetop)
+
+static int rn_lexobetter(void *m_arg, void *n_arg);
+static struct radix_mask *
+ rn_new_radix_mask(struct radix_node *tt,
+ struct radix_mask *next);
+static int rn_satisfies_leaf(char *trial, struct radix_node *leaf,
+ int skip);
+
+/*
+ * The data structure for the keys is a radix tree with one way
+ * branching removed. The index rn_bit at an internal node n represents a bit
+ * position to be tested. The tree is arranged so that all descendants
+ * of a node n have keys whose bits all agree up to position rn_bit - 1.
+ * (We say the index of n is rn_bit.)
+ *
+ * There is at least one descendant which has a one bit at position rn_bit,
+ * and at least one with a zero there.
+ *
+ * A route is determined by a pair of key and mask. We require that the
+ * bit-wise logical and of the key and mask to be the key.
+ * We define the index of a route to associated with the mask to be
+ * the first bit number in the mask where 0 occurs (with bit number 0
+ * representing the highest order bit).
+ *
+ * We say a mask is normal if every bit is 0, past the index of the mask.
+ * If a node n has a descendant (k, m) with index(m) == index(n) == rn_bit,
+ * and m is a normal mask, then the route applies to every descendant of n.
+ * If the index(m) < rn_bit, this implies the trailing last few bits of k
+ * before bit b are all 0, (and hence consequently true of every descendant
+ * of n), so the route applies to all descendants of the node as well.
+ *
+ * Similar logic shows that a non-normal mask m such that
+ * index(m) <= index(n) could potentially apply to many children of n.
+ * Thus, for each non-host route, we attach its mask to a list at an internal
+ * node as high in the tree as we can go.
+ *
+ * The present version of the code makes use of normal routes in short-
+ * circuiting an explict mask and compare operation when testing whether
+ * a key satisfies a normal route, and also in remembering the unique leaf
+ * that governs a subtree.
+ */
+
+/*
+ * Most of the functions in this code assume that the key/mask arguments
+ * are sockaddr-like structures, where the first byte is an u_char
+ * indicating the size of the entire structure.
+ *
+ * To make the assumption more explicit, we use the LEN() macro to access
+ * this field. It is safe to pass an expression with side effects
+ * to LEN() as the argument is evaluated only once.
+ * We cast the result to int as this is the dominant usage.
+ */
+#define LEN(x) ( (int) (*(const u_char *)(x)) )
+
+/*
+ * XXX THIS NEEDS TO BE FIXED
+ * In the code, pointers to keys and masks are passed as either
+ * 'void *' (because callers use to pass pointers of various kinds), or
+ * 'caddr_t' (which is fine for pointer arithmetics, but not very
+ * clean when you dereference it to access data). Furthermore, caddr_t
+ * is really 'char *', while the natural type to operate on keys and
+ * masks would be 'u_char'. This mismatch require a lot of casts and
+ * intermediate variables to adapt types that clutter the code.
+ */
+
+/*
+ * Search a node in the tree matching the key.
+ */
+static struct radix_node *
+rn_search(v_arg, head)
+ void *v_arg;
+ struct radix_node *head;
+{
+ register struct radix_node *x;
+ register caddr_t v;
+
+ for (x = head, v = v_arg; x->rn_bit >= 0;) {
+ if (x->rn_bmask & v[x->rn_offset])
+ x = x->rn_right;
+ else
+ x = x->rn_left;
+ }
+ return (x);
+}
+
+/*
+ * Same as above, but with an additional mask.
+ * XXX note this function is used only once.
+ */
+static struct radix_node *
+rn_search_m(v_arg, head, m_arg)
+ struct radix_node *head;
+ void *v_arg, *m_arg;
+{
+ register struct radix_node *x;
+ register caddr_t v = v_arg, m = m_arg;
+
+ for (x = head; x->rn_bit >= 0;) {
+ if ((x->rn_bmask & m[x->rn_offset]) &&
+ (x->rn_bmask & v[x->rn_offset]))
+ x = x->rn_right;
+ else
+ x = x->rn_left;
+ }
+ return x;
+}
+
+int
+rn_refines(m_arg, n_arg)
+ void *m_arg, *n_arg;
+{
+ register caddr_t m = m_arg, n = n_arg;
+ register caddr_t lim, lim2 = lim = n + LEN(n);
+ int longer = LEN(n++) - LEN(m++);
+ int masks_are_equal = 1;
+
+ if (longer > 0)
+ lim -= longer;
+ while (n < lim) {
+ if (*n & ~(*m))
+ return 0;
+ if (*n++ != *m++)
+ masks_are_equal = 0;
+ }
+ while (n < lim2)
+ if (*n++)
+ return 0;
+ if (masks_are_equal && (longer < 0))
+ for (lim2 = m - longer; m < lim2; )
+ if (*m++)
+ return 1;
+ return (!masks_are_equal);
+}
+
+struct radix_node *
+rn_lookup(v_arg, m_arg, head)
+ void *v_arg, *m_arg;
+ struct radix_node_head *head;
+{
+ register struct radix_node *x;
+ caddr_t netmask = 0;
+
+ if (m_arg) {
+ x = rn_addmask(m_arg, 1, head->rnh_treetop->rn_offset);
+ if (x == 0)
+ return (0);
+ netmask = x->rn_key;
+ }
+ x = rn_match(v_arg, head);
+ if (x && netmask) {
+ while (x && x->rn_mask != netmask)
+ x = x->rn_dupedkey;
+ }
+ return x;
+}
+
+static int
+rn_satisfies_leaf(trial, leaf, skip)
+ char *trial;
+ register struct radix_node *leaf;
+ int skip;
+{
+ register char *cp = trial, *cp2 = leaf->rn_key, *cp3 = leaf->rn_mask;
+ char *cplim;
+ int length = min(LEN(cp), LEN(cp2));
+
+ if (cp3 == NULL)
+ cp3 = rn_ones;
+ else
+ length = min(length, LEN(cp3));
+ cplim = cp + length; cp3 += skip; cp2 += skip;
+ for (cp += skip; cp < cplim; cp++, cp2++, cp3++)
+ if ((*cp ^ *cp2) & *cp3)
+ return 0;
+ return 1;
+}
+
+struct radix_node *
+rn_match(v_arg, head)
+ void *v_arg;
+ struct radix_node_head *head;
+{
+ caddr_t v = v_arg;
+ register struct radix_node *t = head->rnh_treetop, *x;
+ register caddr_t cp = v, cp2;
+ caddr_t cplim;
+ struct radix_node *saved_t, *top = t;
+ int off = t->rn_offset, vlen = LEN(cp), matched_off;
+ register int test, b, rn_bit;
+
+ /*
+ * Open code rn_search(v, top) to avoid overhead of extra
+ * subroutine call.
+ */
+ for (; t->rn_bit >= 0; ) {
+ if (t->rn_bmask & cp[t->rn_offset])
+ t = t->rn_right;
+ else
+ t = t->rn_left;
+ }
+ /*
+ * See if we match exactly as a host destination
+ * or at least learn how many bits match, for normal mask finesse.
+ *
+ * It doesn't hurt us to limit how many bytes to check
+ * to the length of the mask, since if it matches we had a genuine
+ * match and the leaf we have is the most specific one anyway;
+ * if it didn't match with a shorter length it would fail
+ * with a long one. This wins big for class B&C netmasks which
+ * are probably the most common case...
+ */
+ if (t->rn_mask)
+ vlen = *(u_char *)t->rn_mask;
+ cp += off; cp2 = t->rn_key + off; cplim = v + vlen;
+ for (; cp < cplim; cp++, cp2++)
+ if (*cp != *cp2)
+ goto on1;
+ /*
+ * This extra grot is in case we are explicitly asked
+ * to look up the default. Ugh!
+ *
+ * Never return the root node itself, it seems to cause a
+ * lot of confusion.
+ */
+ if (t->rn_flags & RNF_ROOT)
+ t = t->rn_dupedkey;
+ return t;
+on1:
+ test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */
+ for (b = 7; (test >>= 1) > 0;)
+ b--;
+ matched_off = cp - v;
+ b += matched_off << 3;
+ rn_bit = -1 - b;
+ /*
+ * If there is a host route in a duped-key chain, it will be first.
+ */
+ if ((saved_t = t)->rn_mask == 0)
+ t = t->rn_dupedkey;
+ for (; t; t = t->rn_dupedkey)
+ /*
+ * Even if we don't match exactly as a host,
+ * we may match if the leaf we wound up at is
+ * a route to a net.
+ */
+ if (t->rn_flags & RNF_NORMAL) {
+ if (rn_bit <= t->rn_bit)
+ return t;
+ } else if (rn_satisfies_leaf(v, t, matched_off))
+ return t;
+ t = saved_t;
+ /* start searching up the tree */
+ do {
+ register struct radix_mask *m;
+ t = t->rn_parent;
+ m = t->rn_mklist;
+ /*
+ * If non-contiguous masks ever become important
+ * we can restore the masking and open coding of
+ * the search and satisfaction test and put the
+ * calculation of "off" back before the "do".
+ */
+ while (m) {
+ if (m->rm_flags & RNF_NORMAL) {
+ if (rn_bit <= m->rm_bit)
+ return (m->rm_leaf);
+ } else {
+ off = min(t->rn_offset, matched_off);
+ x = rn_search_m(v, t, m->rm_mask);
+ while (x && x->rn_mask != m->rm_mask)
+ x = x->rn_dupedkey;
+ if (x && rn_satisfies_leaf(v, x, off))
+ return x;
+ }
+ m = m->rm_mklist;
+ }
+ } while (t != top);
+ return 0;
+}
+
+#ifdef RN_DEBUG
+int rn_nodenum;
+struct radix_node *rn_clist;
+int rn_saveinfo;
+int rn_debug = 1;
+#endif
+
+/*
+ * Whenever we add a new leaf to the tree, we also add a parent node,
+ * so we allocate them as an array of two elements: the first one must be
+ * the leaf (see RNTORT() in route.c), the second one is the parent.
+ * This routine initializes the relevant fields of the nodes, so that
+ * the leaf is the left child of the parent node, and both nodes have
+ * (almost) all all fields filled as appropriate.
+ * (XXX some fields are left unset, see the '#if 0' section).
+ * The function returns a pointer to the parent node.
+ */
+
+static struct radix_node *
+rn_newpair(v, b, nodes)
+ void *v;
+ int b;
+ struct radix_node nodes[2];
+{
+ register struct radix_node *tt = nodes, *t = tt + 1;
+ t->rn_bit = b;
+ t->rn_bmask = 0x80 >> (b & 7);
+ t->rn_left = tt;
+ t->rn_offset = b >> 3;
+
+#if 0 /* XXX perhaps we should fill these fields as well. */
+ t->rn_parent = t->rn_right = NULL;
+
+ tt->rn_mask = NULL;
+ tt->rn_dupedkey = NULL;
+ tt->rn_bmask = 0;
+#endif
+ tt->rn_bit = -1;
+ tt->rn_key = (caddr_t)v;
+ tt->rn_parent = t;
+ tt->rn_flags = t->rn_flags = RNF_ACTIVE;
+ tt->rn_mklist = t->rn_mklist = 0;
+#ifdef RN_DEBUG
+ tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++;
+ tt->rn_twin = t;
+ tt->rn_ybro = rn_clist;
+ rn_clist = tt;
+#endif
+ return t;
+}
+
+static struct radix_node *
+rn_insert(v_arg, head, dupentry, nodes)
+ void *v_arg;
+ struct radix_node_head *head;
+ int *dupentry;
+ struct radix_node nodes[2];
+{
+ caddr_t v = v_arg;
+ struct radix_node *top = head->rnh_treetop;
+ int head_off = top->rn_offset, vlen = LEN(v);
+ register struct radix_node *t = rn_search(v_arg, top);
+ register caddr_t cp = v + head_off;
+ register int b;
+ struct radix_node *tt;
+ /*
+ * Find first bit at which v and t->rn_key differ
+ */
+ {
+ register caddr_t cp2 = t->rn_key + head_off;
+ register int cmp_res;
+ caddr_t cplim = v + vlen;
+
+ while (cp < cplim)
+ if (*cp2++ != *cp++)
+ goto on1;
+ *dupentry = 1;
+ return t;
+on1:
+ *dupentry = 0;
+ cmp_res = (cp[-1] ^ cp2[-1]) & 0xff;
+ for (b = (cp - v) << 3; cmp_res; b--)
+ cmp_res >>= 1;
+ }
+ {
+ register struct radix_node *p, *x = top;
+ cp = v;
+ do {
+ p = x;
+ if (cp[x->rn_offset] & x->rn_bmask)
+ x = x->rn_right;
+ else
+ x = x->rn_left;
+ } while (b > (unsigned) x->rn_bit);
+ /* x->rn_bit < b && x->rn_bit >= 0 */
+#ifdef RN_DEBUG
+ if (rn_debug)
+ log(LOG_DEBUG, "rn_insert: Going In:\n"), traverse(p);
+#endif
+ t = rn_newpair(v_arg, b, nodes);
+ tt = t->rn_left;
+ if ((cp[p->rn_offset] & p->rn_bmask) == 0)
+ p->rn_left = t;
+ else
+ p->rn_right = t;
+ x->rn_parent = t;
+ t->rn_parent = p; /* frees x, p as temp vars below */
+ if ((cp[t->rn_offset] & t->rn_bmask) == 0) {
+ t->rn_right = x;
+ } else {
+ t->rn_right = tt;
+ t->rn_left = x;
+ }
+#ifdef RN_DEBUG
+ if (rn_debug)
+ log(LOG_DEBUG, "rn_insert: Coming Out:\n"), traverse(p);
+#endif
+ }
+ return (tt);
+}
+
+struct radix_node *
+rn_addmask(n_arg, search, skip)
+ int search, skip;
+ void *n_arg;
+{
+ caddr_t netmask = (caddr_t)n_arg;
+ register struct radix_node *x;
+ register caddr_t cp, cplim;
+ register int b = 0, mlen, j;
+ int maskduplicated, m0, isnormal;
+ struct radix_node *saved_x;
+ static int last_zeroed = 0;
+
+ if ((mlen = LEN(netmask)) > max_keylen)
+ mlen = max_keylen;
+ if (skip == 0)
+ skip = 1;
+ if (mlen <= skip)
+ return (mask_rnhead->rnh_nodes);
+ if (skip > 1)
+ bcopy(rn_ones + 1, addmask_key + 1, skip - 1);
+ if ((m0 = mlen) > skip)
+ bcopy(netmask + skip, addmask_key + skip, mlen - skip);
+ /*
+ * Trim trailing zeroes.
+ */
+ for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;)
+ cp--;
+ mlen = cp - addmask_key;
+ if (mlen <= skip) {
+ if (m0 >= last_zeroed)
+ last_zeroed = mlen;
+ return (mask_rnhead->rnh_nodes);
+ }
+ if (m0 < last_zeroed)
+ bzero(addmask_key + m0, last_zeroed - m0);
+ *addmask_key = last_zeroed = mlen;
+ x = rn_search(addmask_key, rn_masktop);
+ if (bcmp(addmask_key, x->rn_key, mlen) != 0)
+ x = 0;
+ if (x || search)
+ return (x);
+ R_Zalloc(x, struct radix_node *, max_keylen + 2 * sizeof (*x));
+ if ((saved_x = x) == 0)
+ return (0);
+ netmask = cp = (caddr_t)(x + 2);
+ bcopy(addmask_key, cp, mlen);
+ x = rn_insert(cp, mask_rnhead, &maskduplicated, x);
+ if (maskduplicated) {
+ log(LOG_ERR, "rn_addmask: mask impossibly already in tree");
+ Free(saved_x);
+ return (x);
+ }
+ /*
+ * Calculate index of mask, and check for normalcy.
+ * First find the first byte with a 0 bit, then if there are
+ * more bits left (remember we already trimmed the trailing 0's),
+ * the pattern must be one of those in normal_chars[], or we have
+ * a non-contiguous mask.
+ */
+ cplim = netmask + mlen;
+ isnormal = 1;
+ for (cp = netmask + skip; (cp < cplim) && *(u_char *)cp == 0xff;)
+ cp++;
+ if (cp != cplim) {
+ static char normal_chars[] = {
+ 0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff};
+
+ for (j = 0x80; (j & *cp) != 0; j >>= 1)
+ b++;
+ if (*cp != normal_chars[b] || cp != (cplim - 1))
+ isnormal = 0;
+ }
+ b += (cp - netmask) << 3;
+ x->rn_bit = -1 - b;
+ if (isnormal)
+ x->rn_flags |= RNF_NORMAL;
+ return (x);
+}
+
+static int /* XXX: arbitrary ordering for non-contiguous masks */
+rn_lexobetter(m_arg, n_arg)
+ void *m_arg, *n_arg;
+{
+ register u_char *mp = m_arg, *np = n_arg, *lim;
+
+ if (LEN(mp) > LEN(np))
+ return 1; /* not really, but need to check longer one first */
+ if (LEN(mp) == LEN(np))
+ for (lim = mp + LEN(mp); mp < lim;)
+ if (*mp++ > *np++)
+ return 1;
+ return 0;
+}
+
+static struct radix_mask *
+rn_new_radix_mask(tt, next)
+ register struct radix_node *tt;
+ register struct radix_mask *next;
+{
+ register struct radix_mask *m;
+
+ MKGet(m);
+ if (m == 0) {
+ log(LOG_ERR, "Mask for route not entered\n");
+ return (0);
+ }
+ bzero(m, sizeof *m);
+ m->rm_bit = tt->rn_bit;
+ m->rm_flags = tt->rn_flags;
+ if (tt->rn_flags & RNF_NORMAL)
+ m->rm_leaf = tt;
+ else
+ m->rm_mask = tt->rn_mask;
+ m->rm_mklist = next;
+ tt->rn_mklist = m;
+ return m;
+}
+
+struct radix_node *
+rn_addroute(v_arg, n_arg, head, treenodes)
+ void *v_arg, *n_arg;
+ struct radix_node_head *head;
+ struct radix_node treenodes[2];
+{
+ caddr_t v = (caddr_t)v_arg, netmask = (caddr_t)n_arg;
+ register struct radix_node *t, *x = 0, *tt;
+ struct radix_node *saved_tt, *top = head->rnh_treetop;
+ short b = 0, b_leaf = 0;
+ int keyduplicated;
+ caddr_t mmask;
+ struct radix_mask *m, **mp;
+
+ /*
+ * In dealing with non-contiguous masks, there may be
+ * many different routes which have the same mask.
+ * We will find it useful to have a unique pointer to
+ * the mask to speed avoiding duplicate references at
+ * nodes and possibly save time in calculating indices.
+ */
+ if (netmask) {
+ if ((x = rn_addmask(netmask, 0, top->rn_offset)) == 0)
+ return (0);
+ b_leaf = x->rn_bit;
+ b = -1 - x->rn_bit;
+ netmask = x->rn_key;
+ }
+ /*
+ * Deal with duplicated keys: attach node to previous instance
+ */
+ saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes);
+ if (keyduplicated) {
+ for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) {
+#ifdef RADIX_MPATH
+ /* permit multipath, if enabled for the family */
+ if (rn_mpath_capable(head) && netmask == tt->rn_mask) {
+ /*
+ * go down to the end of multipaths, so that
+ * new entry goes into the end of rn_dupedkey
+ * chain.
+ */
+ do {
+ t = tt;
+ tt = tt->rn_dupedkey;
+ } while (tt && t->rn_mask == tt->rn_mask);
+ break;
+ }
+#endif
+ if (tt->rn_mask == netmask)
+ return (0);
+ if (netmask == 0 ||
+ (tt->rn_mask &&
+ ((b_leaf < tt->rn_bit) /* index(netmask) > node */
+ || rn_refines(netmask, tt->rn_mask)
+ || rn_lexobetter(netmask, tt->rn_mask))))
+ break;
+ }
+ /*
+ * If the mask is not duplicated, we wouldn't
+ * find it among possible duplicate key entries
+ * anyway, so the above test doesn't hurt.
+ *
+ * We sort the masks for a duplicated key the same way as
+ * in a masklist -- most specific to least specific.
+ * This may require the unfortunate nuisance of relocating
+ * the head of the list.
+ *
+ * We also reverse, or doubly link the list through the
+ * parent pointer.
+ */
+ if (tt == saved_tt) {
+ struct radix_node *xx = x;
+ /* link in at head of list */
+ (tt = treenodes)->rn_dupedkey = t;
+ tt->rn_flags = t->rn_flags;
+ tt->rn_parent = x = t->rn_parent;
+ t->rn_parent = tt; /* parent */
+ if (x->rn_left == t)
+ x->rn_left = tt;
+ else
+ x->rn_right = tt;
+ saved_tt = tt; x = xx;
+ } else {
+ (tt = treenodes)->rn_dupedkey = t->rn_dupedkey;
+ t->rn_dupedkey = tt;
+ tt->rn_parent = t; /* parent */
+ if (tt->rn_dupedkey) /* parent */
+ tt->rn_dupedkey->rn_parent = tt; /* parent */
+ }
+#ifdef RN_DEBUG
+ t=tt+1; tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++;
+ tt->rn_twin = t; tt->rn_ybro = rn_clist; rn_clist = tt;
+#endif
+ tt->rn_key = (caddr_t) v;
+ tt->rn_bit = -1;
+ tt->rn_flags = RNF_ACTIVE;
+ }
+ /*
+ * Put mask in tree.
+ */
+ if (netmask) {
+ tt->rn_mask = netmask;
+ tt->rn_bit = x->rn_bit;
+ tt->rn_flags |= x->rn_flags & RNF_NORMAL;
+ }
+ t = saved_tt->rn_parent;
+ if (keyduplicated)
+ goto on2;
+ b_leaf = -1 - t->rn_bit;
+ if (t->rn_right == saved_tt)
+ x = t->rn_left;
+ else
+ x = t->rn_right;
+ /* Promote general routes from below */
+ if (x->rn_bit < 0) {
+ for (mp = &t->rn_mklist; x; x = x->rn_dupedkey)
+ if (x->rn_mask && (x->rn_bit >= b_leaf) && x->rn_mklist == 0) {
+ *mp = m = rn_new_radix_mask(x, 0);
+ if (m)
+ mp = &m->rm_mklist;
+ }
+ } else if (x->rn_mklist) {
+ /*
+ * Skip over masks whose index is > that of new node
+ */
+ for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist)
+ if (m->rm_bit >= b_leaf)
+ break;
+ t->rn_mklist = m; *mp = 0;
+ }
+on2:
+ /* Add new route to highest possible ancestor's list */
+ if ((netmask == 0) || (b > t->rn_bit ))
+ return tt; /* can't lift at all */
+ b_leaf = tt->rn_bit;
+ do {
+ x = t;
+ t = t->rn_parent;
+ } while (b <= t->rn_bit && x != top);
+ /*
+ * Search through routes associated with node to
+ * insert new route according to index.
+ * Need same criteria as when sorting dupedkeys to avoid
+ * double loop on deletion.
+ */
+ for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) {
+ if (m->rm_bit < b_leaf)
+ continue;
+ if (m->rm_bit > b_leaf)
+ break;
+ if (m->rm_flags & RNF_NORMAL) {
+ mmask = m->rm_leaf->rn_mask;
+ if (tt->rn_flags & RNF_NORMAL) {
+#if !defined(RADIX_MPATH)
+ log(LOG_ERR,
+ "Non-unique normal route, mask not entered\n");
+#endif
+ return tt;
+ }
+ } else
+ mmask = m->rm_mask;
+ if (mmask == netmask) {
+ m->rm_refs++;
+ tt->rn_mklist = m;
+ return tt;
+ }
+ if (rn_refines(netmask, mmask)
+ || rn_lexobetter(netmask, mmask))
+ break;
+ }
+ *mp = rn_new_radix_mask(tt, *mp);
+ return tt;
+}
+
+struct radix_node *
+rn_delete(v_arg, netmask_arg, head)
+ void *v_arg, *netmask_arg;
+ struct radix_node_head *head;
+{
+ register struct radix_node *t, *p, *x, *tt;
+ struct radix_mask *m, *saved_m, **mp;
+ struct radix_node *dupedkey, *saved_tt, *top;
+ caddr_t v, netmask;
+ int b, head_off, vlen;
+
+ v = v_arg;
+ netmask = netmask_arg;
+ x = head->rnh_treetop;
+ tt = rn_search(v, x);
+ head_off = x->rn_offset;
+ vlen = LEN(v);
+ saved_tt = tt;
+ top = x;
+ if (tt == 0 ||
+ bcmp(v + head_off, tt->rn_key + head_off, vlen - head_off))
+ return (0);
+ /*
+ * Delete our route from mask lists.
+ */
+ if (netmask) {
+ if ((x = rn_addmask(netmask, 1, head_off)) == 0)
+ return (0);
+ netmask = x->rn_key;
+ while (tt->rn_mask != netmask)
+ if ((tt = tt->rn_dupedkey) == 0)
+ return (0);
+ }
+ if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == 0)
+ goto on1;
+ if (tt->rn_flags & RNF_NORMAL) {
+ if (m->rm_leaf != tt || m->rm_refs > 0) {
+ log(LOG_ERR, "rn_delete: inconsistent annotation\n");
+ return 0; /* dangling ref could cause disaster */
+ }
+ } else {
+ if (m->rm_mask != tt->rn_mask) {
+ log(LOG_ERR, "rn_delete: inconsistent annotation\n");
+ goto on1;
+ }
+ if (--m->rm_refs >= 0)
+ goto on1;
+ }
+ b = -1 - tt->rn_bit;
+ t = saved_tt->rn_parent;
+ if (b > t->rn_bit)
+ goto on1; /* Wasn't lifted at all */
+ do {
+ x = t;
+ t = t->rn_parent;
+ } while (b <= t->rn_bit && x != top);
+ for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist)
+ if (m == saved_m) {
+ *mp = m->rm_mklist;
+ MKFree(m);
+ break;
+ }
+ if (m == 0) {
+ log(LOG_ERR, "rn_delete: couldn't find our annotation\n");
+ if (tt->rn_flags & RNF_NORMAL)
+ return (0); /* Dangling ref to us */
+ }
+on1:
+ /*
+ * Eliminate us from tree
+ */
+ if (tt->rn_flags & RNF_ROOT)
+ return (0);
+#ifdef RN_DEBUG
+ /* Get us out of the creation list */
+ for (t = rn_clist; t && t->rn_ybro != tt; t = t->rn_ybro) {}
+ if (t) t->rn_ybro = tt->rn_ybro;
+#endif
+ t = tt->rn_parent;
+ dupedkey = saved_tt->rn_dupedkey;
+ if (dupedkey) {
+ /*
+ * Here, tt is the deletion target and
+ * saved_tt is the head of the dupekey chain.
+ */
+ if (tt == saved_tt) {
+ /* remove from head of chain */
+ x = dupedkey; x->rn_parent = t;
+ if (t->rn_left == tt)
+ t->rn_left = x;
+ else
+ t->rn_right = x;
+ } else {
+ /* find node in front of tt on the chain */
+ for (x = p = saved_tt; p && p->rn_dupedkey != tt;)
+ p = p->rn_dupedkey;
+ if (p) {
+ p->rn_dupedkey = tt->rn_dupedkey;
+ if (tt->rn_dupedkey) /* parent */
+ tt->rn_dupedkey->rn_parent = p;
+ /* parent */
+ } else log(LOG_ERR, "rn_delete: couldn't find us\n");
+ }
+ t = tt + 1;
+ if (t->rn_flags & RNF_ACTIVE) {
+#ifndef RN_DEBUG
+ *++x = *t;
+ p = t->rn_parent;
+#else
+ b = t->rn_info;
+ *++x = *t;
+ t->rn_info = b;
+ p = t->rn_parent;
+#endif
+ if (p->rn_left == t)
+ p->rn_left = x;
+ else
+ p->rn_right = x;
+ x->rn_left->rn_parent = x;
+ x->rn_right->rn_parent = x;
+ }
+ goto out;
+ }
+ if (t->rn_left == tt)
+ x = t->rn_right;
+ else
+ x = t->rn_left;
+ p = t->rn_parent;
+ if (p->rn_right == t)
+ p->rn_right = x;
+ else
+ p->rn_left = x;
+ x->rn_parent = p;
+ /*
+ * Demote routes attached to us.
+ */
+ if (t->rn_mklist) {
+ if (x->rn_bit >= 0) {
+ for (mp = &x->rn_mklist; (m = *mp);)
+ mp = &m->rm_mklist;
+ *mp = t->rn_mklist;
+ } else {
+ /* If there are any key,mask pairs in a sibling
+ duped-key chain, some subset will appear sorted
+ in the same order attached to our mklist */
+ for (m = t->rn_mklist; m && x; x = x->rn_dupedkey)
+ if (m == x->rn_mklist) {
+ struct radix_mask *mm = m->rm_mklist;
+ x->rn_mklist = 0;
+ if (--(m->rm_refs) < 0)
+ MKFree(m);
+ m = mm;
+ }
+ if (m)
+ log(LOG_ERR,
+ "rn_delete: Orphaned Mask %p at %p\n",
+ m, x);
+ }
+ }
+ /*
+ * We may be holding an active internal node in the tree.
+ */
+ x = tt + 1;
+ if (t != x) {
+#ifndef RN_DEBUG
+ *t = *x;
+#else
+ b = t->rn_info;
+ *t = *x;
+ t->rn_info = b;
+#endif
+ t->rn_left->rn_parent = t;
+ t->rn_right->rn_parent = t;
+ p = x->rn_parent;
+ if (p->rn_left == x)
+ p->rn_left = t;
+ else
+ p->rn_right = t;
+ }
+out:
+ tt->rn_flags &= ~RNF_ACTIVE;
+ tt[1].rn_flags &= ~RNF_ACTIVE;
+ return (tt);
+}
+
+/*
+ * This is the same as rn_walktree() except for the parameters and the
+ * exit.
+ */
+static int
+rn_walktree_from(h, a, m, f, w)
+ struct radix_node_head *h;
+ void *a, *m;
+ walktree_f_t *f;
+ void *w;
+{
+ int error;
+ struct radix_node *base, *next;
+ u_char *xa = (u_char *)a;
+ u_char *xm = (u_char *)m;
+ register struct radix_node *rn, *last = 0 /* shut up gcc */;
+ int stopping = 0;
+ int lastb;
+
+ /*
+ * rn_search_m is sort-of-open-coded here. We cannot use the
+ * function because we need to keep track of the last node seen.
+ */
+ /* printf("about to search\n"); */
+ for (rn = h->rnh_treetop; rn->rn_bit >= 0; ) {
+ last = rn;
+ /* printf("rn_bit %d, rn_bmask %x, xm[rn_offset] %x\n",
+ rn->rn_bit, rn->rn_bmask, xm[rn->rn_offset]); */
+ if (!(rn->rn_bmask & xm[rn->rn_offset])) {
+ break;
+ }
+ if (rn->rn_bmask & xa[rn->rn_offset]) {
+ rn = rn->rn_right;
+ } else {
+ rn = rn->rn_left;
+ }
+ }
+ /* printf("done searching\n"); */
+
+ /*
+ * Two cases: either we stepped off the end of our mask,
+ * in which case last == rn, or we reached a leaf, in which
+ * case we want to start from the last node we looked at.
+ * Either way, last is the node we want to start from.
+ */
+ rn = last;
+ lastb = rn->rn_bit;
+
+ /* printf("rn %p, lastb %d\n", rn, lastb);*/
+
+ /*
+ * This gets complicated because we may delete the node
+ * while applying the function f to it, so we need to calculate
+ * the successor node in advance.
+ */
+ while (rn->rn_bit >= 0)
+ rn = rn->rn_left;
+
+ while (!stopping) {
+ /* printf("node %p (%d)\n", rn, rn->rn_bit); */
+ base = rn;
+ /* If at right child go back up, otherwise, go right */
+ while (rn->rn_parent->rn_right == rn
+ && !(rn->rn_flags & RNF_ROOT)) {
+ rn = rn->rn_parent;
+
+ /* if went up beyond last, stop */
+ if (rn->rn_bit <= lastb) {
+ stopping = 1;
+ /* printf("up too far\n"); */
+ /*
+ * XXX we should jump to the 'Process leaves'
+ * part, because the values of 'rn' and 'next'
+ * we compute will not be used. Not a big deal
+ * because this loop will terminate, but it is
+ * inefficient and hard to understand!
+ */
+ }
+ }
+
+ /*
+ * At the top of the tree, no need to traverse the right
+ * half, prevent the traversal of the entire tree in the
+ * case of default route.
+ */
+ if (rn->rn_parent->rn_flags & RNF_ROOT)
+ stopping = 1;
+
+ /* Find the next *leaf* since next node might vanish, too */
+ for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;)
+ rn = rn->rn_left;
+ next = rn;
+ /* Process leaves */
+ while ((rn = base) != 0) {
+ base = rn->rn_dupedkey;
+ /* printf("leaf %p\n", rn); */
+ if (!(rn->rn_flags & RNF_ROOT)
+ && (error = (*f)(rn, w)))
+ return (error);
+ }
+ rn = next;
+
+ if (rn->rn_flags & RNF_ROOT) {
+ /* printf("root, stopping"); */
+ stopping = 1;
+ }
+
+ }
+ return 0;
+}
+
+static int
+rn_walktree(h, f, w)
+ struct radix_node_head *h;
+ walktree_f_t *f;
+ void *w;
+{
+ int error;
+ struct radix_node *base, *next;
+ register struct radix_node *rn = h->rnh_treetop;
+ /*
+ * This gets complicated because we may delete the node
+ * while applying the function f to it, so we need to calculate
+ * the successor node in advance.
+ */
+
+ /* First time through node, go left */
+ while (rn->rn_bit >= 0)
+ rn = rn->rn_left;
+ for (;;) {
+ base = rn;
+ /* If at right child go back up, otherwise, go right */
+ while (rn->rn_parent->rn_right == rn
+ && (rn->rn_flags & RNF_ROOT) == 0)
+ rn = rn->rn_parent;
+ /* Find the next *leaf* since next node might vanish, too */
+ for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;)
+ rn = rn->rn_left;
+ next = rn;
+ /* Process leaves */
+ while ((rn = base)) {
+ base = rn->rn_dupedkey;
+ if (!(rn->rn_flags & RNF_ROOT)
+ && (error = (*f)(rn, w)))
+ return (error);
+ }
+ rn = next;
+ if (rn->rn_flags & RNF_ROOT)
+ return (0);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Allocate and initialize an empty tree. This has 3 nodes, which are
+ * part of the radix_node_head (in the order <left,root,right>) and are
+ * marked RNF_ROOT so they cannot be freed.
+ * The leaves have all-zero and all-one keys, with significant
+ * bits starting at 'off'.
+ * Return 1 on success, 0 on error.
+ */
+int
+rn_inithead(head, off)
+ void **head;
+ int off;
+{
+ register struct radix_node_head *rnh;
+ register struct radix_node *t, *tt, *ttt;
+ if (*head)
+ return (1);
+ R_Zalloc(rnh, struct radix_node_head *, sizeof (*rnh));
+ if (rnh == 0)
+ return (0);
+#ifdef _KERNEL
+ RADIX_NODE_HEAD_LOCK_INIT(rnh);
+#endif
+ *head = rnh;
+ t = rn_newpair(rn_zeros, off, rnh->rnh_nodes);
+ ttt = rnh->rnh_nodes + 2;
+ t->rn_right = ttt;
+ t->rn_parent = t;
+ tt = t->rn_left; /* ... which in turn is rnh->rnh_nodes */
+ tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE;
+ tt->rn_bit = -1 - off;
+ *ttt = *tt;
+ ttt->rn_key = rn_ones;
+ rnh->rnh_addaddr = rn_addroute;
+ rnh->rnh_deladdr = rn_delete;
+ rnh->rnh_matchaddr = rn_match;
+ rnh->rnh_lookup = rn_lookup;
+ rnh->rnh_walktree = rn_walktree;
+ rnh->rnh_walktree_from = rn_walktree_from;
+ rnh->rnh_treetop = t;
+ return (1);
+}
+
+int
+rn_detachhead(void **head)
+{
+ struct radix_node_head *rnh;
+
+ KASSERT((head != NULL && *head != NULL),
+ ("%s: head already freed", __func__));
+ rnh = *head;
+
+ /* Free <left,root,right> nodes. */
+ Free(rnh);
+
+ *head = NULL;
+ return (1);
+}
+
+void
+rn_init(int maxk)
+{
+ char *cp, *cplim;
+
+ max_keylen = maxk;
+ if (max_keylen == 0) {
+ log(LOG_ERR,
+ "rn_init: radix functions require max_keylen be set\n");
+ return;
+ }
+ R_Malloc(rn_zeros, char *, 3 * max_keylen);
+ if (rn_zeros == NULL)
+ panic("rn_init");
+ bzero(rn_zeros, 3 * max_keylen);
+ rn_ones = cp = rn_zeros + max_keylen;
+ addmask_key = cplim = rn_ones + max_keylen;
+ while (cp < cplim)
+ *cp++ = -1;
+ if (rn_inithead((void **)(void *)&mask_rnhead, 0) == 0)
+ panic("rn_init 2");
+}
diff --git a/freebsd/sys/net/radix.h b/freebsd/sys/net/radix.h
new file mode 100644
index 00000000..ccd5f491
--- /dev/null
+++ b/freebsd/sys/net/radix.h
@@ -0,0 +1,176 @@
+/*-
+ * Copyright (c) 1988, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)radix.h 8.2 (Berkeley) 10/31/94
+ * $FreeBSD$
+ */
+
+#ifndef _RADIX_HH_
+#define _RADIX_HH_
+
+#ifdef _KERNEL
+#include <freebsd/sys/_lock.h>
+#include <freebsd/sys/_mutex.h>
+#include <freebsd/sys/_rwlock.h>
+#endif
+
+#ifdef MALLOC_DECLARE
+MALLOC_DECLARE(M_RTABLE);
+#endif
+
+/*
+ * Radix search tree node layout.
+ */
+
+struct radix_node {
+ struct radix_mask *rn_mklist; /* list of masks contained in subtree */
+ struct radix_node *rn_parent; /* parent */
+ short rn_bit; /* bit offset; -1-index(netmask) */
+ char rn_bmask; /* node: mask for bit test*/
+ u_char rn_flags; /* enumerated next */
+#define RNF_NORMAL 1 /* leaf contains normal route */
+#define RNF_ROOT 2 /* leaf is root leaf for tree */
+#define RNF_ACTIVE 4 /* This node is alive (for rtfree) */
+ union {
+ struct { /* leaf only data: */
+ caddr_t rn_Key; /* object of search */
+ caddr_t rn_Mask; /* netmask, if present */
+ struct radix_node *rn_Dupedkey;
+ } rn_leaf;
+ struct { /* node only data: */
+ int rn_Off; /* where to start compare */
+ struct radix_node *rn_L;/* progeny */
+ struct radix_node *rn_R;/* progeny */
+ } rn_node;
+ } rn_u;
+#ifdef RN_DEBUG
+ int rn_info;
+ struct radix_node *rn_twin;
+ struct radix_node *rn_ybro;
+#endif
+};
+
+#define rn_dupedkey rn_u.rn_leaf.rn_Dupedkey
+#define rn_key rn_u.rn_leaf.rn_Key
+#define rn_mask rn_u.rn_leaf.rn_Mask
+#define rn_offset rn_u.rn_node.rn_Off
+#define rn_left rn_u.rn_node.rn_L
+#define rn_right rn_u.rn_node.rn_R
+
+/*
+ * Annotations to tree concerning potential routes applying to subtrees.
+ */
+
+struct radix_mask {
+ short rm_bit; /* bit offset; -1-index(netmask) */
+ char rm_unused; /* cf. rn_bmask */
+ u_char rm_flags; /* cf. rn_flags */
+ struct radix_mask *rm_mklist; /* more masks to try */
+ union {
+ caddr_t rmu_mask; /* the mask */
+ struct radix_node *rmu_leaf; /* for normal routes */
+ } rm_rmu;
+ int rm_refs; /* # of references to this struct */
+};
+
+#define rm_mask rm_rmu.rmu_mask
+#define rm_leaf rm_rmu.rmu_leaf /* extra field would make 32 bytes */
+
+typedef int walktree_f_t(struct radix_node *, void *);
+
+struct radix_node_head {
+ struct radix_node *rnh_treetop;
+ int rnh_addrsize; /* permit, but not require fixed keys */
+ int rnh_pktsize; /* permit, but not require fixed keys */
+ struct radix_node *(*rnh_addaddr) /* add based on sockaddr */
+ (void *v, void *mask,
+ struct radix_node_head *head, struct radix_node nodes[]);
+ struct radix_node *(*rnh_addpkt) /* add based on packet hdr */
+ (void *v, void *mask,
+ struct radix_node_head *head, struct radix_node nodes[]);
+ struct radix_node *(*rnh_deladdr) /* remove based on sockaddr */
+ (void *v, void *mask, struct radix_node_head *head);
+ struct radix_node *(*rnh_delpkt) /* remove based on packet hdr */
+ (void *v, void *mask, struct radix_node_head *head);
+ struct radix_node *(*rnh_matchaddr) /* locate based on sockaddr */
+ (void *v, struct radix_node_head *head);
+ struct radix_node *(*rnh_lookup) /* locate based on sockaddr */
+ (void *v, void *mask, struct radix_node_head *head);
+ struct radix_node *(*rnh_matchpkt) /* locate based on packet hdr */
+ (void *v, struct radix_node_head *head);
+ int (*rnh_walktree) /* traverse tree */
+ (struct radix_node_head *head, walktree_f_t *f, void *w);
+ int (*rnh_walktree_from) /* traverse tree below a */
+ (struct radix_node_head *head, void *a, void *m,
+ walktree_f_t *f, void *w);
+ void (*rnh_close) /* do something when the last ref drops */
+ (struct radix_node *rn, struct radix_node_head *head);
+ struct radix_node rnh_nodes[3]; /* empty tree for common case */
+ int rnh_multipath; /* multipath capable ? */
+#ifdef _KERNEL
+ struct rwlock rnh_lock; /* locks entire radix tree */
+#endif
+};
+
+#ifndef _KERNEL
+#define R_Malloc(p, t, n) (p = (t) malloc((unsigned int)(n)))
+#define R_Zalloc(p, t, n) (p = (t) calloc(1,(unsigned int)(n)))
+#define Free(p) free((char *)p);
+#else
+#define R_Malloc(p, t, n) (p = (t) malloc((unsigned long)(n), M_RTABLE, M_NOWAIT))
+#define R_Zalloc(p, t, n) (p = (t) malloc((unsigned long)(n), M_RTABLE, M_NOWAIT | M_ZERO))
+#define Free(p) free((caddr_t)p, M_RTABLE);
+
+#define RADIX_NODE_HEAD_LOCK_INIT(rnh) \
+ rw_init_flags(&(rnh)->rnh_lock, "radix node head", 0)
+#define RADIX_NODE_HEAD_LOCK(rnh) rw_wlock(&(rnh)->rnh_lock)
+#define RADIX_NODE_HEAD_UNLOCK(rnh) rw_wunlock(&(rnh)->rnh_lock)
+#define RADIX_NODE_HEAD_RLOCK(rnh) rw_rlock(&(rnh)->rnh_lock)
+#define RADIX_NODE_HEAD_RUNLOCK(rnh) rw_runlock(&(rnh)->rnh_lock)
+#define RADIX_NODE_HEAD_LOCK_TRY_UPGRADE(rnh) rw_try_upgrade(&(rnh)->rnh_lock)
+
+
+#define RADIX_NODE_HEAD_DESTROY(rnh) rw_destroy(&(rnh)->rnh_lock)
+#define RADIX_NODE_HEAD_LOCK_ASSERT(rnh) rw_assert(&(rnh)->rnh_lock, RA_LOCKED)
+#define RADIX_NODE_HEAD_WLOCK_ASSERT(rnh) rw_assert(&(rnh)->rnh_lock, RA_WLOCKED)
+#endif /* _KERNEL */
+
+void rn_init(int);
+int rn_inithead(void **, int);
+int rn_detachhead(void **);
+int rn_refines(void *, void *);
+struct radix_node
+ *rn_addmask(void *, int, int),
+ *rn_addroute (void *, void *, struct radix_node_head *,
+ struct radix_node [2]),
+ *rn_delete(void *, void *, struct radix_node_head *),
+ *rn_lookup (void *v_arg, void *m_arg,
+ struct radix_node_head *head),
+ *rn_match(void *, struct radix_node_head *);
+
+#endif /* _RADIX_HH_ */
diff --git a/freebsd/sys/net/radix_mpath.c b/freebsd/sys/net/radix_mpath.c
new file mode 100644
index 00000000..3c348249
--- /dev/null
+++ b/freebsd/sys/net/radix_mpath.c
@@ -0,0 +1,365 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/* $KAME: radix_mpath.c,v 1.17 2004/11/08 10:29:39 itojun Exp $ */
+
+/*
+ * Copyright (C) 2001 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * THE AUTHORS DO NOT GUARANTEE THAT THIS SOFTWARE DOES NOT INFRINGE
+ * ANY OTHERS' INTELLECTUAL PROPERTIES. IN NO EVENT SHALL THE AUTHORS
+ * BE LIABLE FOR ANY INFRINGEMENT OF ANY OTHERS' INTELLECTUAL
+ * PROPERTIES.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/domain.h>
+#include <freebsd/sys/syslog.h>
+#include <freebsd/net/radix.h>
+#include <freebsd/net/radix_mpath.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_var.h>
+
+/*
+ * give some jitter to hash, to avoid synchronization between routers
+ */
+static uint32_t hashjitter;
+
+int
+rn_mpath_capable(struct radix_node_head *rnh)
+{
+
+ return rnh->rnh_multipath;
+}
+
+struct radix_node *
+rn_mpath_next(struct radix_node *rn)
+{
+ struct radix_node *next;
+
+ if (!rn->rn_dupedkey)
+ return NULL;
+ next = rn->rn_dupedkey;
+ if (rn->rn_mask == next->rn_mask)
+ return next;
+ else
+ return NULL;
+}
+
+#ifdef __rtems__
+u_int32_t
+#else
+uint32_t
+#endif
+rn_mpath_count(struct radix_node *rn)
+{
+ uint32_t i = 0;
+ struct rtentry *rt;
+
+ while (rn != NULL) {
+ rt = (struct rtentry *)rn;
+ i += rt->rt_rmx.rmx_weight;
+ rn = rn_mpath_next(rn);
+ }
+ return (i);
+}
+
+struct rtentry *
+rt_mpath_matchgate(struct rtentry *rt, struct sockaddr *gate)
+{
+ struct radix_node *rn;
+
+ if (!rn_mpath_next((struct radix_node *)rt))
+ return rt;
+
+ if (!gate)
+ return NULL;
+
+ /* beyond here, we use rn as the master copy */
+ rn = (struct radix_node *)rt;
+ do {
+ rt = (struct rtentry *)rn;
+ /*
+ * we are removing an address alias that has
+ * the same prefix as another address
+ * we need to compare the interface address because
+ * rt_gateway is a special sockadd_dl structure
+ */
+ if (rt->rt_gateway->sa_family == AF_LINK) {
+ if (!memcmp(rt->rt_ifa->ifa_addr, gate, gate->sa_len))
+ break;
+ } else {
+ if (rt->rt_gateway->sa_len == gate->sa_len &&
+ !memcmp(rt->rt_gateway, gate, gate->sa_len))
+ break;
+ }
+ } while ((rn = rn_mpath_next(rn)) != NULL);
+
+ return (struct rtentry *)rn;
+}
+
+/*
+ * go through the chain and unlink "rt" from the list
+ * the caller will free "rt"
+ */
+int
+rt_mpath_deldup(struct rtentry *headrt, struct rtentry *rt)
+{
+ struct radix_node *t, *tt;
+
+ if (!headrt || !rt)
+ return (0);
+ t = (struct radix_node *)headrt;
+ tt = rn_mpath_next(t);
+ while (tt) {
+ if (tt == (struct radix_node *)rt) {
+ t->rn_dupedkey = tt->rn_dupedkey;
+ tt->rn_dupedkey = NULL;
+ tt->rn_flags &= ~RNF_ACTIVE;
+ tt[1].rn_flags &= ~RNF_ACTIVE;
+ return (1);
+ }
+ t = tt;
+ tt = rn_mpath_next((struct radix_node *)t);
+ }
+ return (0);
+}
+
+/*
+ * check if we have the same key/mask/gateway on the table already.
+ */
+int
+rt_mpath_conflict(struct radix_node_head *rnh, struct rtentry *rt,
+ struct sockaddr *netmask)
+{
+ struct radix_node *rn, *rn1;
+ struct rtentry *rt1;
+ char *p, *q, *eq;
+ int same, l, skip;
+
+ rn = (struct radix_node *)rt;
+ rn1 = rnh->rnh_lookup(rt_key(rt), netmask, rnh);
+ if (!rn1 || rn1->rn_flags & RNF_ROOT)
+ return 0;
+
+ /*
+ * unlike other functions we have in this file, we have to check
+ * all key/mask/gateway as rnh_lookup can match less specific entry.
+ */
+ rt1 = (struct rtentry *)rn1;
+
+ /* compare key. */
+ if (rt_key(rt1)->sa_len != rt_key(rt)->sa_len ||
+ bcmp(rt_key(rt1), rt_key(rt), rt_key(rt1)->sa_len))
+ goto different;
+
+ /* key was the same. compare netmask. hairy... */
+ if (rt_mask(rt1) && netmask) {
+ skip = rnh->rnh_treetop->rn_offset;
+ if (rt_mask(rt1)->sa_len > netmask->sa_len) {
+ /*
+ * as rt_mask(rt1) is made optimal by radix.c,
+ * there must be some 1-bits on rt_mask(rt1)
+ * after netmask->sa_len. therefore, in
+ * this case, the entries are different.
+ */
+ if (rt_mask(rt1)->sa_len > skip)
+ goto different;
+ else {
+ /* no bits to compare, i.e. same*/
+ goto maskmatched;
+ }
+ }
+
+ l = rt_mask(rt1)->sa_len;
+ if (skip > l) {
+ /* no bits to compare, i.e. same */
+ goto maskmatched;
+ }
+ p = (char *)rt_mask(rt1);
+ q = (char *)netmask;
+ if (bcmp(p + skip, q + skip, l - skip))
+ goto different;
+ /*
+ * need to go through all the bit, as netmask is not
+ * optimal and can contain trailing 0s
+ */
+ eq = (char *)netmask + netmask->sa_len;
+ q += l;
+ same = 1;
+ while (eq > q)
+ if (*q++) {
+ same = 0;
+ break;
+ }
+ if (!same)
+ goto different;
+ } else if (!rt_mask(rt1) && !netmask)
+ ; /* no mask to compare, i.e. same */
+ else {
+ /* one has mask and the other does not, different */
+ goto different;
+ }
+
+maskmatched:
+
+ /* key/mask were the same. compare gateway for all multipaths */
+ do {
+ rt1 = (struct rtentry *)rn1;
+
+ /* sanity: no use in comparing the same thing */
+ if (rn1 == rn)
+ continue;
+
+ if (rt1->rt_gateway->sa_family == AF_LINK) {
+ if (rt1->rt_ifa->ifa_addr->sa_len != rt->rt_ifa->ifa_addr->sa_len ||
+ bcmp(rt1->rt_ifa->ifa_addr, rt->rt_ifa->ifa_addr,
+ rt1->rt_ifa->ifa_addr->sa_len))
+ continue;
+ } else {
+ if (rt1->rt_gateway->sa_len != rt->rt_gateway->sa_len ||
+ bcmp(rt1->rt_gateway, rt->rt_gateway,
+ rt1->rt_gateway->sa_len))
+ continue;
+ }
+
+ /* all key/mask/gateway are the same. conflicting entry. */
+ return EEXIST;
+ } while ((rn1 = rn_mpath_next(rn1)) != NULL);
+
+different:
+ return 0;
+}
+
+void
+#ifdef __rtems__
+rtalloc_mpath_fib(struct route *ro, u_int32_t hash, u_int fibnum)
+#else
+rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum)
+#endif
+{
+ struct radix_node *rn0, *rn;
+ u_int32_t n;
+ struct rtentry *rt;
+ int64_t weight;
+
+ /*
+ * XXX we don't attempt to lookup cached route again; what should
+ * be done for sendto(3) case?
+ */
+ if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP)
+ && RT_LINK_IS_UP(ro->ro_rt->rt_ifp))
+ return;
+ ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, 0, fibnum);
+
+ /* if the route does not exist or it is not multipath, don't care */
+ if (ro->ro_rt == NULL)
+ return;
+ if (rn_mpath_next((struct radix_node *)ro->ro_rt) == NULL) {
+ RT_UNLOCK(ro->ro_rt);
+ return;
+ }
+
+ /* beyond here, we use rn as the master copy */
+ rn0 = rn = (struct radix_node *)ro->ro_rt;
+ n = rn_mpath_count(rn0);
+
+ /* gw selection by Modulo-N Hash (RFC2991) XXX need improvement? */
+ hash += hashjitter;
+ hash %= n;
+ for (weight = abs((int32_t)hash), rt = ro->ro_rt;
+ weight >= rt->rt_rmx.rmx_weight && rn;
+ weight -= rt->rt_rmx.rmx_weight) {
+
+ /* stay within the multipath routes */
+ if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask)
+ break;
+ rn = rn->rn_dupedkey;
+ rt = (struct rtentry *)rn;
+ }
+ /* XXX try filling rt_gwroute and avoid unreachable gw */
+
+ /* gw selection has failed - there must be only zero weight routes */
+ if (!rn) {
+ RT_UNLOCK(ro->ro_rt);
+ ro->ro_rt = NULL;
+ return;
+ }
+ if (ro->ro_rt != rt) {
+ RTFREE_LOCKED(ro->ro_rt);
+ ro->ro_rt = (struct rtentry *)rn;
+ RT_LOCK(ro->ro_rt);
+ RT_ADDREF(ro->ro_rt);
+
+ }
+ RT_UNLOCK(ro->ro_rt);
+}
+
+extern int in6_inithead(void **head, int off);
+extern int in_inithead(void **head, int off);
+
+#ifdef INET
+int
+rn4_mpath_inithead(void **head, int off)
+{
+ struct radix_node_head *rnh;
+
+ hashjitter = arc4random();
+ if (in_inithead(head, off) == 1) {
+ rnh = (struct radix_node_head *)*head;
+ rnh->rnh_multipath = 1;
+ return 1;
+ } else
+ return 0;
+}
+#endif
+
+#ifdef INET6
+int
+rn6_mpath_inithead(void **head, int off)
+{
+ struct radix_node_head *rnh;
+
+ hashjitter = arc4random();
+ if (in6_inithead(head, off) == 1) {
+ rnh = (struct radix_node_head *)*head;
+ rnh->rnh_multipath = 1;
+ return 1;
+ } else
+ return 0;
+}
+
+#endif
diff --git a/freebsd/sys/net/radix_mpath.h b/freebsd/sys/net/radix_mpath.h
new file mode 100644
index 00000000..b6d8c16a
--- /dev/null
+++ b/freebsd/sys/net/radix_mpath.h
@@ -0,0 +1,63 @@
+/* $KAME: radix_mpath.h,v 1.10 2004/11/06 15:44:28 itojun Exp $ */
+
+/*
+ * Copyright (C) 2001 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * THE AUTHORS DO NOT GUARANTEE THAT THIS SOFTWARE DOES NOT INFRINGE
+ * ANY OTHERS' INTELLECTUAL PROPERTIES. IN NO EVENT SHALL THE AUTHORS
+ * BE LIABLE FOR ANY INFRINGEMENT OF ANY OTHERS' INTELLECTUAL
+ * PROPERTIES.
+ */
+/* $FreeBSD$ */
+
+#ifndef _NET_RADIX_MPATH_HH_
+#define _NET_RADIX_MPATH_HH_
+
+#ifdef _KERNEL
+/*
+ * Radix tree API with multipath support
+ */
+struct route;
+struct rtentry;
+struct sockaddr;
+int rn_mpath_capable(struct radix_node_head *);
+struct radix_node *rn_mpath_next(struct radix_node *);
+u_int32_t rn_mpath_count(struct radix_node *);
+struct rtentry *rt_mpath_matchgate(struct rtentry *, struct sockaddr *);
+int rt_mpath_conflict(struct radix_node_head *, struct rtentry *,
+ struct sockaddr *);
+void rtalloc_mpath_fib(struct route *, u_int32_t, u_int);
+#define rtalloc_mpath(_route, _hash) rtalloc_mpath_fib((_route), (_hash), 0)
+struct radix_node *rn_mpath_lookup(void *, void *,
+ struct radix_node_head *);
+int rt_mpath_deldup(struct rtentry *, struct rtentry *);
+int rn4_mpath_inithead(void **, int);
+int rn6_mpath_inithead(void **, int);
+
+#endif
+
+#endif /* _NET_RADIX_MPATH_HH_ */
diff --git a/freebsd/sys/net/raw_cb.c b/freebsd/sys/net/raw_cb.c
new file mode 100644
index 00000000..2fd73dac
--- /dev/null
+++ b/freebsd/sys/net/raw_cb.c
@@ -0,0 +1,119 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1980, 1986, 1993
+ * The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)raw_cb.c 8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/domain.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mutex.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/systm.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/raw_cb.h>
+#include <freebsd/net/vnet.h>
+
+/*
+ * Routines to manage the raw protocol control blocks.
+ *
+ * TODO:
+ * hash lookups by protocol family/protocol + address family
+ * take care of unique address problems per AF?
+ * redo address binding to allow wildcards
+ */
+
+struct mtx rawcb_mtx;
+VNET_DEFINE(struct rawcb_list_head, rawcb_list);
+
+SYSCTL_NODE(_net, OID_AUTO, raw, CTLFLAG_RW, 0, "Raw socket infrastructure");
+
+static u_long raw_sendspace = RAWSNDQ;
+SYSCTL_ULONG(_net_raw, OID_AUTO, sendspace, CTLFLAG_RW, &raw_sendspace, 0,
+ "Default raw socket send space");
+
+static u_long raw_recvspace = RAWRCVQ;
+SYSCTL_ULONG(_net_raw, OID_AUTO, recvspace, CTLFLAG_RW, &raw_recvspace, 0,
+ "Default raw socket receive space");
+
+/*
+ * Allocate a control block and a nominal amount of buffer space for the
+ * socket.
+ */
+int
+raw_attach(struct socket *so, int proto)
+{
+ struct rawcb *rp = sotorawcb(so);
+ int error;
+
+ /*
+ * It is assumed that raw_attach is called after space has been
+ * allocated for the rawcb; consumer protocols may simply allocate
+ * type struct rawcb, or a wrapper data structure that begins with a
+ * struct rawcb.
+ */
+ KASSERT(rp != NULL, ("raw_attach: rp == NULL"));
+
+ error = soreserve(so, raw_sendspace, raw_recvspace);
+ if (error)
+ return (error);
+ rp->rcb_socket = so;
+ rp->rcb_proto.sp_family = so->so_proto->pr_domain->dom_family;
+ rp->rcb_proto.sp_protocol = proto;
+ mtx_lock(&rawcb_mtx);
+ LIST_INSERT_HEAD(&V_rawcb_list, rp, list);
+ mtx_unlock(&rawcb_mtx);
+ return (0);
+}
+
+/*
+ * Detach the raw connection block and discard socket resources.
+ */
+void
+raw_detach(struct rawcb *rp)
+{
+ struct socket *so = rp->rcb_socket;
+
+ KASSERT(so->so_pcb == rp, ("raw_detach: so_pcb != rp"));
+
+ so->so_pcb = NULL;
+ mtx_lock(&rawcb_mtx);
+ LIST_REMOVE(rp, list);
+ mtx_unlock(&rawcb_mtx);
+ free((caddr_t)(rp), M_PCB);
+}
diff --git a/freebsd/sys/net/raw_cb.h b/freebsd/sys/net/raw_cb.h
new file mode 100644
index 00000000..278b4235
--- /dev/null
+++ b/freebsd/sys/net/raw_cb.h
@@ -0,0 +1,84 @@
+/*-
+ * Copyright (c) 1980, 1986, 1993
+ * The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)raw_cb.h 8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NET_RAW_CB_HH_
+#define _NET_RAW_CB_HH_
+
+#include <freebsd/sys/queue.h>
+
+/*
+ * Raw protocol interface control block. Used to tie a socket to the generic
+ * raw interface.
+ */
+struct rawcb {
+ LIST_ENTRY(rawcb) list;
+ struct socket *rcb_socket; /* back pointer to socket */
+ struct sockproto rcb_proto; /* protocol family, protocol */
+};
+
+#define sotorawcb(so) ((struct rawcb *)(so)->so_pcb)
+
+/*
+ * Nominal space allocated to a raw socket.
+ */
+#define RAWSNDQ 8192
+#define RAWRCVQ 8192
+
+#ifdef _KERNEL
+VNET_DECLARE(LIST_HEAD(rawcb_list_head, rawcb), rawcb_list);
+#define V_rawcb_list VNET(rawcb_list)
+
+extern struct mtx rawcb_mtx;
+
+/*
+ * Generic protosw entries for raw socket protocols.
+ */
+pr_ctlinput_t raw_ctlinput;
+pr_init_t raw_init;
+
+/*
+ * Library routines for raw socket usrreq functions; will always be wrapped
+ * so that protocol-specific functions can be handled.
+ */
+int raw_attach(struct socket *, int);
+void raw_detach(struct rawcb *);
+void raw_input(struct mbuf *, struct sockproto *, struct sockaddr *);
+
+/*
+ * Generic pr_usrreqs entries for raw socket protocols, usually wrapped so
+ * that protocol-specific functions can be handled.
+ */
+extern struct pr_usrreqs raw_usrreqs;
+#endif
+
+#endif
diff --git a/freebsd/sys/net/raw_usrreq.c b/freebsd/sys/net/raw_usrreq.c
new file mode 100644
index 00000000..bdf3369e
--- /dev/null
+++ b/freebsd/sys/net/raw_usrreq.c
@@ -0,0 +1,266 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1980, 1986, 1993
+ * The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)raw_usrreq.c 8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/mutex.h>
+#include <freebsd/sys/priv.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/signalvar.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/sx.h>
+#include <freebsd/sys/systm.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/raw_cb.h>
+#include <freebsd/net/vnet.h>
+
+MTX_SYSINIT(rawcb_mtx, &rawcb_mtx, "rawcb", MTX_DEF);
+
+/*
+ * Initialize raw connection block q.
+ */
+void
+raw_init(void)
+{
+
+ LIST_INIT(&V_rawcb_list);
+}
+
+/*
+ * Raw protocol input routine. Find the socket associated with the packet(s)
+ * and move them over. If nothing exists for this packet, drop it.
+ */
+/*
+ * Raw protocol interface.
+ */
+void
+raw_input(struct mbuf *m0, struct sockproto *proto, struct sockaddr *src)
+{
+ struct rawcb *rp;
+ struct mbuf *m = m0;
+ struct socket *last;
+
+ last = 0;
+ mtx_lock(&rawcb_mtx);
+ LIST_FOREACH(rp, &V_rawcb_list, list) {
+ if (rp->rcb_proto.sp_family != proto->sp_family)
+ continue;
+ if (rp->rcb_proto.sp_protocol &&
+ rp->rcb_proto.sp_protocol != proto->sp_protocol)
+ continue;
+ if (last) {
+ struct mbuf *n;
+ n = m_copy(m, 0, (int)M_COPYALL);
+ if (n) {
+ if (sbappendaddr(&last->so_rcv, src,
+ n, (struct mbuf *)0) == 0)
+ /* should notify about lost packet */
+ m_freem(n);
+ else
+ sorwakeup(last);
+ }
+ }
+ last = rp->rcb_socket;
+ }
+ if (last) {
+ if (sbappendaddr(&last->so_rcv, src,
+ m, (struct mbuf *)0) == 0)
+ m_freem(m);
+ else
+ sorwakeup(last);
+ } else
+ m_freem(m);
+ mtx_unlock(&rawcb_mtx);
+}
+
+/*ARGSUSED*/
+void
+raw_ctlinput(int cmd, struct sockaddr *arg, void *dummy)
+{
+
+ if (cmd < 0 || cmd >= PRC_NCMDS)
+ return;
+ /* INCOMPLETE */
+}
+
+static void
+raw_uabort(struct socket *so)
+{
+
+ KASSERT(sotorawcb(so) != NULL, ("raw_uabort: rp == NULL"));
+
+ soisdisconnected(so);
+}
+
+static void
+raw_uclose(struct socket *so)
+{
+
+ KASSERT(sotorawcb(so) != NULL, ("raw_uabort: rp == NULL"));
+
+ soisdisconnected(so);
+}
+
+/* pru_accept is EOPNOTSUPP */
+
+static int
+raw_uattach(struct socket *so, int proto, struct thread *td)
+{
+ int error;
+
+ /*
+ * Implementors of raw sockets will already have allocated the PCB,
+ * so it must be non-NULL here.
+ */
+ KASSERT(sotorawcb(so) != NULL, ("raw_uattach: so_pcb == NULL"));
+
+ if (td != NULL) {
+ error = priv_check(td, PRIV_NET_RAW);
+ if (error)
+ return (error);
+ }
+ return (raw_attach(so, proto));
+}
+
+static int
+raw_ubind(struct socket *so, struct sockaddr *nam, struct thread *td)
+{
+
+ return (EINVAL);
+}
+
+static int
+raw_uconnect(struct socket *so, struct sockaddr *nam, struct thread *td)
+{
+
+ return (EINVAL);
+}
+
+/* pru_connect2 is EOPNOTSUPP */
+/* pru_control is EOPNOTSUPP */
+
+static void
+raw_udetach(struct socket *so)
+{
+ struct rawcb *rp = sotorawcb(so);
+
+ KASSERT(rp != NULL, ("raw_udetach: rp == NULL"));
+
+ raw_detach(rp);
+}
+
+static int
+raw_udisconnect(struct socket *so)
+{
+
+ KASSERT(sotorawcb(so) != NULL, ("raw_udisconnect: rp == NULL"));
+
+ return (ENOTCONN);
+}
+
+/* pru_listen is EOPNOTSUPP */
+
+static int
+raw_upeeraddr(struct socket *so, struct sockaddr **nam)
+{
+
+ KASSERT(sotorawcb(so) != NULL, ("raw_upeeraddr: rp == NULL"));
+
+ return (ENOTCONN);
+}
+
+/* pru_rcvd is EOPNOTSUPP */
+/* pru_rcvoob is EOPNOTSUPP */
+
+static int
+raw_usend(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
+ struct mbuf *control, struct thread *td)
+{
+
+ KASSERT(sotorawcb(so) != NULL, ("raw_usend: rp == NULL"));
+
+ if ((flags & PRUS_OOB) || (control && control->m_len)) {
+ /* XXXRW: Should control also be freed here? */
+ if (m != NULL)
+ m_freem(m);
+ return (EOPNOTSUPP);
+ }
+
+ /*
+ * For historical (bad?) reasons, we effectively ignore the address
+ * argument to sendto(2). Perhaps we should return an error instead?
+ */
+ return ((*so->so_proto->pr_output)(m, so));
+}
+
+/* pru_sense is null */
+
+static int
+raw_ushutdown(struct socket *so)
+{
+
+ KASSERT(sotorawcb(so) != NULL, ("raw_ushutdown: rp == NULL"));
+
+ socantsendmore(so);
+ return (0);
+}
+
+static int
+raw_usockaddr(struct socket *so, struct sockaddr **nam)
+{
+
+ KASSERT(sotorawcb(so) != NULL, ("raw_usockaddr: rp == NULL"));
+
+ return (EINVAL);
+}
+
+struct pr_usrreqs raw_usrreqs = {
+ .pru_abort = raw_uabort,
+ .pru_attach = raw_uattach,
+ .pru_bind = raw_ubind,
+ .pru_connect = raw_uconnect,
+ .pru_detach = raw_udetach,
+ .pru_disconnect = raw_udisconnect,
+ .pru_peeraddr = raw_upeeraddr,
+ .pru_send = raw_usend,
+ .pru_shutdown = raw_ushutdown,
+ .pru_sockaddr = raw_usockaddr,
+ .pru_close = raw_uclose,
+};
diff --git a/freebsd/sys/net/route.c b/freebsd/sys/net/route.c
new file mode 100644
index 00000000..ee43c843
--- /dev/null
+++ b/freebsd/sys/net/route.c
@@ -0,0 +1,1601 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1980, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)route.c 8.3.1.1 (Berkeley) 2/23/95
+ * $FreeBSD$
+ */
+/************************************************************************
+ * Note: In this file a 'fib' is a "forwarding information base" *
+ * Which is the new name for an in kernel routing (next hop) table. *
+ ***********************************************************************/
+
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_route.h>
+#include <freebsd/local/opt_mrouting.h>
+#include <freebsd/local/opt_mpath.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/syslog.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/syslog.h>
+#include <freebsd/sys/sysproto.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/domain.h>
+#include <freebsd/sys/kernel.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_dl.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+#include <freebsd/net/flowtable.h>
+
+#ifdef RADIX_MPATH
+#include <freebsd/net/radix_mpath.h>
+#endif
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/ip_mroute.h>
+
+#include <freebsd/vm/uma.h>
+
+u_int rt_numfibs = RT_NUMFIBS;
+SYSCTL_INT(_net, OID_AUTO, fibs, CTLFLAG_RD, &rt_numfibs, 0, "");
+/*
+ * Allow the boot code to allow LESS than RT_MAXFIBS to be used.
+ * We can't do more because storage is statically allocated for now.
+ * (for compatibility reasons.. this will change).
+ */
+TUNABLE_INT("net.fibs", &rt_numfibs);
+
+/*
+ * By default add routes to all fibs for new interfaces.
+ * Once this is set to 0 then only allocate routes on interface
+ * changes for the FIB of the caller when adding a new set of addresses
+ * to an interface. XXX this is a shotgun aproach to a problem that needs
+ * a more fine grained solution.. that will come.
+ */
+u_int rt_add_addr_allfibs = 1;
+SYSCTL_INT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RW,
+ &rt_add_addr_allfibs, 0, "");
+TUNABLE_INT("net.add_addr_allfibs", &rt_add_addr_allfibs);
+
+VNET_DEFINE(struct rtstat, rtstat);
+#define V_rtstat VNET(rtstat)
+
+VNET_DEFINE(struct radix_node_head *, rt_tables);
+#define V_rt_tables VNET(rt_tables)
+
+VNET_DEFINE(int, rttrash); /* routes not in table but not freed */
+#define V_rttrash VNET(rttrash)
+
+
+/* compare two sockaddr structures */
+#define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
+
+/*
+ * Convert a 'struct radix_node *' to a 'struct rtentry *'.
+ * The operation can be done safely (in this code) because a
+ * 'struct rtentry' starts with two 'struct radix_node''s, the first
+ * one representing leaf nodes in the routing tree, which is
+ * what the code in radix.c passes us as a 'struct radix_node'.
+ *
+ * But because there are a lot of assumptions in this conversion,
+ * do not cast explicitly, but always use the macro below.
+ */
+#define RNTORT(p) ((struct rtentry *)(p))
+
+static VNET_DEFINE(uma_zone_t, rtzone); /* Routing table UMA zone. */
+#define V_rtzone VNET(rtzone)
+
+#if 0
+/* default fib for tunnels to use */
+u_int tunnel_fib = 0;
+SYSCTL_INT(_net, OID_AUTO, tunnelfib, CTLFLAG_RD, &tunnel_fib, 0, "");
+#endif
+
+/*
+ * handler for net.my_fibnum
+ */
+static int
+sysctl_my_fibnum(SYSCTL_HANDLER_ARGS)
+{
+ int fibnum;
+ int error;
+
+ fibnum = curthread->td_proc->p_fibnum;
+ error = sysctl_handle_int(oidp, &fibnum, 0, req);
+ return (error);
+}
+
+SYSCTL_PROC(_net, OID_AUTO, my_fibnum, CTLTYPE_INT|CTLFLAG_RD,
+ NULL, 0, &sysctl_my_fibnum, "I", "default FIB of caller");
+
+static __inline struct radix_node_head **
+rt_tables_get_rnh_ptr(int table, int fam)
+{
+ struct radix_node_head **rnh;
+
+ KASSERT(table >= 0 && table < rt_numfibs, ("%s: table out of bounds.",
+ __func__));
+ KASSERT(fam >= 0 && fam < (AF_MAX+1), ("%s: fam out of bounds.",
+ __func__));
+
+ /* rnh is [fib=0][af=0]. */
+ rnh = (struct radix_node_head **)V_rt_tables;
+ /* Get the offset to the requested table and fam. */
+ rnh += table * (AF_MAX+1) + fam;
+
+ return (rnh);
+}
+
+struct radix_node_head *
+rt_tables_get_rnh(int table, int fam)
+{
+
+ return (*rt_tables_get_rnh_ptr(table, fam));
+}
+
+/*
+ * route initialization must occur before ip6_init2(), which happenas at
+ * SI_ORDER_MIDDLE.
+ */
+static void
+route_init(void)
+{
+ struct domain *dom;
+ int max_keylen = 0;
+
+ /* whack the tunable ints into line. */
+ if (rt_numfibs > RT_MAXFIBS)
+ rt_numfibs = RT_MAXFIBS;
+ if (rt_numfibs == 0)
+ rt_numfibs = 1;
+
+ for (dom = domains; dom; dom = dom->dom_next)
+ if (dom->dom_maxrtkey > max_keylen)
+ max_keylen = dom->dom_maxrtkey;
+
+ rn_init(max_keylen); /* init all zeroes, all ones, mask table */
+}
+SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0);
+
+static void
+vnet_route_init(const void *unused __unused)
+{
+ struct domain *dom;
+ struct radix_node_head **rnh;
+ int table;
+ int fam;
+
+ V_rt_tables = malloc(rt_numfibs * (AF_MAX+1) *
+ sizeof(struct radix_node_head *), M_RTABLE, M_WAITOK|M_ZERO);
+
+ V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), NULL, NULL,
+ NULL, NULL, UMA_ALIGN_PTR, 0);
+ for (dom = domains; dom; dom = dom->dom_next) {
+ if (dom->dom_rtattach) {
+ for (table = 0; table < rt_numfibs; table++) {
+ if ( (fam = dom->dom_family) == AF_INET ||
+ table == 0) {
+ /* for now only AF_INET has > 1 table */
+ /* XXX MRT
+ * rtattach will be also called
+ * from vfs_export.c but the
+ * offset will be 0
+ * (only for AF_INET and AF_INET6
+ * which don't need it anyhow)
+ */
+ rnh = rt_tables_get_rnh_ptr(table, fam);
+ if (rnh == NULL)
+ panic("%s: rnh NULL", __func__);
+ dom->dom_rtattach((void **)rnh,
+ dom->dom_rtoffset);
+ } else {
+ break;
+ }
+ }
+ }
+ }
+}
+VNET_SYSINIT(vnet_route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH,
+ vnet_route_init, 0);
+
+#ifdef VIMAGE
+static void
+vnet_route_uninit(const void *unused __unused)
+{
+ int table;
+ int fam;
+ struct domain *dom;
+ struct radix_node_head **rnh;
+
+ for (dom = domains; dom; dom = dom->dom_next) {
+ if (dom->dom_rtdetach) {
+ for (table = 0; table < rt_numfibs; table++) {
+ if ( (fam = dom->dom_family) == AF_INET ||
+ table == 0) {
+ /* For now only AF_INET has > 1 tbl. */
+ rnh = rt_tables_get_rnh_ptr(table, fam);
+ if (rnh == NULL)
+ panic("%s: rnh NULL", __func__);
+ dom->dom_rtdetach((void **)rnh,
+ dom->dom_rtoffset);
+ } else {
+ break;
+ }
+ }
+ }
+ }
+}
+VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
+ vnet_route_uninit, 0);
+#endif
+
+#ifndef _SYS_SYSPROTO_HH_
+struct setfib_args {
+ int fibnum;
+};
+#endif
+int
+setfib(struct thread *td, struct setfib_args *uap)
+{
+ if (uap->fibnum < 0 || uap->fibnum >= rt_numfibs)
+ return EINVAL;
+ td->td_proc->p_fibnum = uap->fibnum;
+ return (0);
+}
+
+/*
+ * Packet routing routines.
+ */
+void
+rtalloc(struct route *ro)
+{
+ rtalloc_ign_fib(ro, 0UL, 0);
+}
+
+void
+rtalloc_fib(struct route *ro, u_int fibnum)
+{
+ rtalloc_ign_fib(ro, 0UL, fibnum);
+}
+
+void
+rtalloc_ign(struct route *ro, u_long ignore)
+{
+ struct rtentry *rt;
+
+ if ((rt = ro->ro_rt) != NULL) {
+ if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP)
+ return;
+ RTFREE(rt);
+ ro->ro_rt = NULL;
+ }
+ ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, 0);
+ if (ro->ro_rt)
+ RT_UNLOCK(ro->ro_rt);
+}
+
+void
+rtalloc_ign_fib(struct route *ro, u_long ignore, u_int fibnum)
+{
+ struct rtentry *rt;
+
+ if ((rt = ro->ro_rt) != NULL) {
+ if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP)
+ return;
+ RTFREE(rt);
+ ro->ro_rt = NULL;
+ }
+ ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, fibnum);
+ if (ro->ro_rt)
+ RT_UNLOCK(ro->ro_rt);
+}
+
+/*
+ * Look up the route that matches the address given
+ * Or, at least try.. Create a cloned route if needed.
+ *
+ * The returned route, if any, is locked.
+ */
+struct rtentry *
+rtalloc1(struct sockaddr *dst, int report, u_long ignflags)
+{
+ return (rtalloc1_fib(dst, report, ignflags, 0));
+}
+
+struct rtentry *
+rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags,
+ u_int fibnum)
+{
+ struct radix_node_head *rnh;
+ struct rtentry *rt;
+ struct radix_node *rn;
+ struct rtentry *newrt;
+ struct rt_addrinfo info;
+ int err = 0, msgtype = RTM_MISS;
+ int needlock;
+
+ KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum"));
+ if (dst->sa_family != AF_INET) /* Only INET supports > 1 fib now */
+ fibnum = 0;
+ rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
+ newrt = NULL;
+ /*
+ * Look up the address in the table for that Address Family
+ */
+ if (rnh == NULL) {
+ V_rtstat.rts_unreach++;
+ goto miss;
+ }
+ needlock = !(ignflags & RTF_RNH_LOCKED);
+ if (needlock)
+ RADIX_NODE_HEAD_RLOCK(rnh);
+#ifdef INVARIANTS
+ else
+ RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
+#endif
+ rn = rnh->rnh_matchaddr(dst, rnh);
+ if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) {
+ newrt = rt = RNTORT(rn);
+ RT_LOCK(newrt);
+ RT_ADDREF(newrt);
+ if (needlock)
+ RADIX_NODE_HEAD_RUNLOCK(rnh);
+ goto done;
+
+ } else if (needlock)
+ RADIX_NODE_HEAD_RUNLOCK(rnh);
+
+ /*
+ * Either we hit the root or couldn't find any match,
+ * Which basically means
+ * "caint get there frm here"
+ */
+ V_rtstat.rts_unreach++;
+miss:
+ if (report) {
+ /*
+ * If required, report the failure to the supervising
+ * Authorities.
+ * For a delete, this is not an error. (report == 0)
+ */
+ bzero(&info, sizeof(info));
+ info.rti_info[RTAX_DST] = dst;
+ rt_missmsg(msgtype, &info, 0, err);
+ }
+done:
+ if (newrt)
+ RT_LOCK_ASSERT(newrt);
+ return (newrt);
+}
+
+/*
+ * Remove a reference count from an rtentry.
+ * If the count gets low enough, take it out of the routing table
+ */
+void
+rtfree(struct rtentry *rt)
+{
+ struct radix_node_head *rnh;
+
+ KASSERT(rt != NULL,("%s: NULL rt", __func__));
+ rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family);
+ KASSERT(rnh != NULL,("%s: NULL rnh", __func__));
+
+ RT_LOCK_ASSERT(rt);
+
+ /*
+ * The callers should use RTFREE_LOCKED() or RTFREE(), so
+ * we should come here exactly with the last reference.
+ */
+ RT_REMREF(rt);
+ if (rt->rt_refcnt > 0) {
+ log(LOG_DEBUG, "%s: %p has %d refs\n", __func__, rt, rt->rt_refcnt);
+ goto done;
+ }
+
+ /*
+ * On last reference give the "close method" a chance
+ * to cleanup private state. This also permits (for
+ * IPv4 and IPv6) a chance to decide if the routing table
+ * entry should be purged immediately or at a later time.
+ * When an immediate purge is to happen the close routine
+ * typically calls rtexpunge which clears the RTF_UP flag
+ * on the entry so that the code below reclaims the storage.
+ */
+ if (rt->rt_refcnt == 0 && rnh->rnh_close)
+ rnh->rnh_close((struct radix_node *)rt, rnh);
+
+ /*
+ * If we are no longer "up" (and ref == 0)
+ * then we can free the resources associated
+ * with the route.
+ */
+ if ((rt->rt_flags & RTF_UP) == 0) {
+ if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))
+ panic("rtfree 2");
+ /*
+ * the rtentry must have been removed from the routing table
+ * so it is represented in rttrash.. remove that now.
+ */
+ V_rttrash--;
+#ifdef DIAGNOSTIC
+ if (rt->rt_refcnt < 0) {
+ printf("rtfree: %p not freed (neg refs)\n", rt);
+ goto done;
+ }
+#endif
+ /*
+ * release references on items we hold them on..
+ * e.g other routes and ifaddrs.
+ */
+ if (rt->rt_ifa)
+ ifa_free(rt->rt_ifa);
+ /*
+ * The key is separatly alloc'd so free it (see rt_setgate()).
+ * This also frees the gateway, as they are always malloc'd
+ * together.
+ */
+ Free(rt_key(rt));
+
+ /*
+ * and the rtentry itself of course
+ */
+ RT_LOCK_DESTROY(rt);
+ uma_zfree(V_rtzone, rt);
+ return;
+ }
+done:
+ RT_UNLOCK(rt);
+}
+
+
+/*
+ * Force a routing table entry to the specified
+ * destination to go through the given gateway.
+ * Normally called as a result of a routing redirect
+ * message from the network layer.
+ */
+void
+rtredirect(struct sockaddr *dst,
+ struct sockaddr *gateway,
+ struct sockaddr *netmask,
+ int flags,
+ struct sockaddr *src)
+{
+ rtredirect_fib(dst, gateway, netmask, flags, src, 0);
+}
+
+void
+rtredirect_fib(struct sockaddr *dst,
+ struct sockaddr *gateway,
+ struct sockaddr *netmask,
+ int flags,
+ struct sockaddr *src,
+ u_int fibnum)
+{
+ struct rtentry *rt, *rt0 = NULL;
+ int error = 0;
+ short *stat = NULL;
+ struct rt_addrinfo info;
+ struct ifaddr *ifa;
+ struct radix_node_head *rnh;
+
+ ifa = NULL;
+ rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
+ if (rnh == NULL) {
+ error = EAFNOSUPPORT;
+ goto out;
+ }
+
+ /* verify the gateway is directly reachable */
+ if ((ifa = ifa_ifwithnet(gateway, 0)) == NULL) {
+ error = ENETUNREACH;
+ goto out;
+ }
+ rt = rtalloc1_fib(dst, 0, 0UL, fibnum); /* NB: rt is locked */
+ /*
+ * If the redirect isn't from our current router for this dst,
+ * it's either old or wrong. If it redirects us to ourselves,
+ * we have a routing loop, perhaps as a result of an interface
+ * going down recently.
+ */
+ if (!(flags & RTF_DONE) && rt &&
+ (!sa_equal(src, rt->rt_gateway) || rt->rt_ifa != ifa))
+ error = EINVAL;
+ else if (ifa_ifwithaddr_check(gateway))
+ error = EHOSTUNREACH;
+ if (error)
+ goto done;
+ /*
+ * Create a new entry if we just got back a wildcard entry
+ * or the the lookup failed. This is necessary for hosts
+ * which use routing redirects generated by smart gateways
+ * to dynamically build the routing tables.
+ */
+ if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
+ goto create;
+ /*
+ * Don't listen to the redirect if it's
+ * for a route to an interface.
+ */
+ if (rt->rt_flags & RTF_GATEWAY) {
+ if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
+ /*
+ * Changing from route to net => route to host.
+ * Create new route, rather than smashing route to net.
+ */
+ create:
+ rt0 = rt;
+ rt = NULL;
+
+ flags |= RTF_GATEWAY | RTF_DYNAMIC;
+ bzero((caddr_t)&info, sizeof(info));
+ info.rti_info[RTAX_DST] = dst;
+ info.rti_info[RTAX_GATEWAY] = gateway;
+ info.rti_info[RTAX_NETMASK] = netmask;
+ info.rti_ifa = ifa;
+ info.rti_flags = flags;
+ if (rt0 != NULL)
+ RT_UNLOCK(rt0); /* drop lock to avoid LOR with RNH */
+ error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum);
+ if (rt != NULL) {
+ RT_LOCK(rt);
+ if (rt0 != NULL)
+ EVENTHANDLER_INVOKE(route_redirect_event, rt0, rt, dst);
+ flags = rt->rt_flags;
+ }
+ if (rt0 != NULL)
+ RTFREE(rt0);
+
+ stat = &V_rtstat.rts_dynamic;
+ } else {
+ struct rtentry *gwrt;
+
+ /*
+ * Smash the current notion of the gateway to
+ * this destination. Should check about netmask!!!
+ */
+ rt->rt_flags |= RTF_MODIFIED;
+ flags |= RTF_MODIFIED;
+ stat = &V_rtstat.rts_newgateway;
+ /*
+ * add the key and gateway (in one malloc'd chunk).
+ */
+ RT_UNLOCK(rt);
+ RADIX_NODE_HEAD_LOCK(rnh);
+ RT_LOCK(rt);
+ rt_setgate(rt, rt_key(rt), gateway);
+ gwrt = rtalloc1(gateway, 1, RTF_RNH_LOCKED);
+ RADIX_NODE_HEAD_UNLOCK(rnh);
+ EVENTHANDLER_INVOKE(route_redirect_event, rt, gwrt, dst);
+ RTFREE_LOCKED(gwrt);
+ }
+ } else
+ error = EHOSTUNREACH;
+done:
+ if (rt)
+ RTFREE_LOCKED(rt);
+out:
+ if (error)
+ V_rtstat.rts_badredirect++;
+ else if (stat != NULL)
+ (*stat)++;
+ bzero((caddr_t)&info, sizeof(info));
+ info.rti_info[RTAX_DST] = dst;
+ info.rti_info[RTAX_GATEWAY] = gateway;
+ info.rti_info[RTAX_NETMASK] = netmask;
+ info.rti_info[RTAX_AUTHOR] = src;
+ rt_missmsg(RTM_REDIRECT, &info, flags, error);
+ if (ifa != NULL)
+ ifa_free(ifa);
+}
+
+int
+rtioctl(u_long req, caddr_t data)
+{
+ return (rtioctl_fib(req, data, 0));
+}
+
+/*
+ * Routing table ioctl interface.
+ */
+int
+rtioctl_fib(u_long req, caddr_t data, u_int fibnum)
+{
+
+ /*
+ * If more ioctl commands are added here, make sure the proper
+ * super-user checks are being performed because it is possible for
+ * prison-root to make it this far if raw sockets have been enabled
+ * in jails.
+ */
+#ifdef INET
+ /* Multicast goop, grrr... */
+ return mrt_ioctl ? mrt_ioctl(req, data, fibnum) : EOPNOTSUPP;
+#else /* INET */
+ return ENXIO;
+#endif /* INET */
+}
+
+/*
+ * For both ifa_ifwithroute() routines, 'ifa' is returned referenced.
+ */
+struct ifaddr *
+ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway)
+{
+ return (ifa_ifwithroute_fib(flags, dst, gateway, 0));
+}
+
+struct ifaddr *
+ifa_ifwithroute_fib(int flags, struct sockaddr *dst, struct sockaddr *gateway,
+ u_int fibnum)
+{
+ register struct ifaddr *ifa;
+ int not_found = 0;
+
+ if ((flags & RTF_GATEWAY) == 0) {
+ /*
+ * If we are adding a route to an interface,
+ * and the interface is a pt to pt link
+ * we should search for the destination
+ * as our clue to the interface. Otherwise
+ * we can use the local address.
+ */
+ ifa = NULL;
+ if (flags & RTF_HOST)
+ ifa = ifa_ifwithdstaddr(dst);
+ if (ifa == NULL)
+ ifa = ifa_ifwithaddr(gateway);
+ } else {
+ /*
+ * If we are adding a route to a remote net
+ * or host, the gateway may still be on the
+ * other end of a pt to pt link.
+ */
+ ifa = ifa_ifwithdstaddr(gateway);
+ }
+ if (ifa == NULL)
+ ifa = ifa_ifwithnet(gateway, 0);
+ if (ifa == NULL) {
+ struct rtentry *rt = rtalloc1_fib(gateway, 0, RTF_RNH_LOCKED, fibnum);
+ if (rt == NULL)
+ return (NULL);
+ /*
+ * dismiss a gateway that is reachable only
+ * through the default router
+ */
+ switch (gateway->sa_family) {
+ case AF_INET:
+ if (satosin(rt_key(rt))->sin_addr.s_addr == INADDR_ANY)
+ not_found = 1;
+ break;
+ case AF_INET6:
+ if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(rt))->sin6_addr))
+ not_found = 1;
+ break;
+ default:
+ break;
+ }
+ if (!not_found && rt->rt_ifa != NULL) {
+ ifa = rt->rt_ifa;
+ ifa_ref(ifa);
+ }
+ RT_REMREF(rt);
+ RT_UNLOCK(rt);
+ if (not_found || ifa == NULL)
+ return (NULL);
+ }
+ if (ifa->ifa_addr->sa_family != dst->sa_family) {
+ struct ifaddr *oifa = ifa;
+ ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
+ if (ifa == NULL)
+ ifa = oifa;
+ else
+ ifa_free(oifa);
+ }
+ return (ifa);
+}
+
+/*
+ * Do appropriate manipulations of a routing tree given
+ * all the bits of info needed
+ */
+int
+rtrequest(int req,
+ struct sockaddr *dst,
+ struct sockaddr *gateway,
+ struct sockaddr *netmask,
+ int flags,
+ struct rtentry **ret_nrt)
+{
+ return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt, 0));
+}
+
+int
+rtrequest_fib(int req,
+ struct sockaddr *dst,
+ struct sockaddr *gateway,
+ struct sockaddr *netmask,
+ int flags,
+ struct rtentry **ret_nrt,
+ u_int fibnum)
+{
+ struct rt_addrinfo info;
+
+ if (dst->sa_len == 0)
+ return(EINVAL);
+
+ bzero((caddr_t)&info, sizeof(info));
+ info.rti_flags = flags;
+ info.rti_info[RTAX_DST] = dst;
+ info.rti_info[RTAX_GATEWAY] = gateway;
+ info.rti_info[RTAX_NETMASK] = netmask;
+ return rtrequest1_fib(req, &info, ret_nrt, fibnum);
+}
+
+/*
+ * These (questionable) definitions of apparent local variables apply
+ * to the next two functions. XXXXXX!!!
+ */
+#define dst info->rti_info[RTAX_DST]
+#define gateway info->rti_info[RTAX_GATEWAY]
+#define netmask info->rti_info[RTAX_NETMASK]
+#define ifaaddr info->rti_info[RTAX_IFA]
+#define ifpaddr info->rti_info[RTAX_IFP]
+#define flags info->rti_flags
+
+int
+rt_getifa(struct rt_addrinfo *info)
+{
+ return (rt_getifa_fib(info, 0));
+}
+
+/*
+ * Look up rt_addrinfo for a specific fib. Note that if rti_ifa is defined,
+ * it will be referenced so the caller must free it.
+ */
+int
+rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
+{
+ struct ifaddr *ifa;
+ int error = 0;
+
+ /*
+ * ifp may be specified by sockaddr_dl
+ * when protocol address is ambiguous.
+ */
+ if (info->rti_ifp == NULL && ifpaddr != NULL &&
+ ifpaddr->sa_family == AF_LINK &&
+ (ifa = ifa_ifwithnet(ifpaddr, 0)) != NULL) {
+ info->rti_ifp = ifa->ifa_ifp;
+ ifa_free(ifa);
+ }
+ if (info->rti_ifa == NULL && ifaaddr != NULL)
+ info->rti_ifa = ifa_ifwithaddr(ifaaddr);
+ if (info->rti_ifa == NULL) {
+ struct sockaddr *sa;
+
+ sa = ifaaddr != NULL ? ifaaddr :
+ (gateway != NULL ? gateway : dst);
+ if (sa != NULL && info->rti_ifp != NULL)
+ info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp);
+ else if (dst != NULL && gateway != NULL)
+ info->rti_ifa = ifa_ifwithroute_fib(flags, dst, gateway,
+ fibnum);
+ else if (sa != NULL)
+ info->rti_ifa = ifa_ifwithroute_fib(flags, sa, sa,
+ fibnum);
+ }
+ if ((ifa = info->rti_ifa) != NULL) {
+ if (info->rti_ifp == NULL)
+ info->rti_ifp = ifa->ifa_ifp;
+ } else
+ error = ENETUNREACH;
+ return (error);
+}
+
+/*
+ * Expunges references to a route that's about to be reclaimed.
+ * The route must be locked.
+ */
+int
+rtexpunge(struct rtentry *rt)
+{
+#if !defined(RADIX_MPATH)
+ struct radix_node *rn;
+#else
+ struct rt_addrinfo info;
+ int fib;
+ struct rtentry *rt0;
+#endif
+ struct radix_node_head *rnh;
+ struct ifaddr *ifa;
+ int error = 0;
+
+ /*
+ * Find the correct routing tree to use for this Address Family
+ */
+ rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family);
+ RT_LOCK_ASSERT(rt);
+ if (rnh == NULL)
+ return (EAFNOSUPPORT);
+ RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
+
+#ifdef RADIX_MPATH
+ fib = rt->rt_fibnum;
+ bzero(&info, sizeof(info));
+ info.rti_ifp = rt->rt_ifp;
+ info.rti_flags = RTF_RNH_LOCKED;
+ info.rti_info[RTAX_DST] = rt_key(rt);
+ info.rti_info[RTAX_GATEWAY] = rt->rt_ifa->ifa_addr;
+
+ RT_UNLOCK(rt);
+ error = rtrequest1_fib(RTM_DELETE, &info, &rt0, fib);
+
+ if (error == 0 && rt0 != NULL) {
+ rt = rt0;
+ RT_LOCK(rt);
+ } else if (error != 0) {
+ RT_LOCK(rt);
+ return (error);
+ }
+#else
+ /*
+ * Remove the item from the tree; it should be there,
+ * but when callers invoke us blindly it may not (sigh).
+ */
+ rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), rnh);
+ if (rn == NULL) {
+ error = ESRCH;
+ goto bad;
+ }
+ KASSERT((rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) == 0,
+ ("unexpected flags 0x%x", rn->rn_flags));
+ KASSERT(rt == RNTORT(rn),
+ ("lookup mismatch, rt %p rn %p", rt, rn));
+#endif /* RADIX_MPATH */
+
+ rt->rt_flags &= ~RTF_UP;
+
+ /*
+ * Give the protocol a chance to keep things in sync.
+ */
+ if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) {
+ struct rt_addrinfo info;
+
+ bzero((caddr_t)&info, sizeof(info));
+ info.rti_flags = rt->rt_flags;
+ info.rti_info[RTAX_DST] = rt_key(rt);
+ info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+ info.rti_info[RTAX_NETMASK] = rt_mask(rt);
+ ifa->ifa_rtrequest(RTM_DELETE, rt, &info);
+ }
+
+ /*
+ * one more rtentry floating around that is not
+ * linked to the routing table.
+ */
+ V_rttrash++;
+#if !defined(RADIX_MPATH)
+bad:
+#endif
+ return (error);
+}
+
+#ifdef RADIX_MPATH
+static int
+rn_mpath_update(int req, struct rt_addrinfo *info,
+ struct radix_node_head *rnh, struct rtentry **ret_nrt)
+{
+ /*
+ * if we got multipath routes, we require users to specify
+ * a matching RTAX_GATEWAY.
+ */
+ struct rtentry *rt, *rto = NULL;
+ register struct radix_node *rn;
+ int error = 0;
+
+ rn = rnh->rnh_matchaddr(dst, rnh);
+ if (rn == NULL)
+ return (ESRCH);
+ rto = rt = RNTORT(rn);
+ rt = rt_mpath_matchgate(rt, gateway);
+ if (rt == NULL)
+ return (ESRCH);
+ /*
+ * this is the first entry in the chain
+ */
+ if (rto == rt) {
+ rn = rn_mpath_next((struct radix_node *)rt);
+ /*
+ * there is another entry, now it's active
+ */
+ if (rn) {
+ rto = RNTORT(rn);
+ RT_LOCK(rto);
+ rto->rt_flags |= RTF_UP;
+ RT_UNLOCK(rto);
+ } else if (rt->rt_flags & RTF_GATEWAY) {
+ /*
+ * For gateway routes, we need to
+ * make sure that we we are deleting
+ * the correct gateway.
+ * rt_mpath_matchgate() does not
+ * check the case when there is only
+ * one route in the chain.
+ */
+ if (gateway &&
+ (rt->rt_gateway->sa_len != gateway->sa_len ||
+ memcmp(rt->rt_gateway, gateway, gateway->sa_len)))
+ error = ESRCH;
+ else {
+ /*
+ * remove from tree before returning it
+ * to the caller
+ */
+ rn = rnh->rnh_deladdr(dst, netmask, rnh);
+ KASSERT(rt == RNTORT(rn), ("radix node disappeared"));
+ goto gwdelete;
+ }
+
+ }
+ /*
+ * use the normal delete code to remove
+ * the first entry
+ */
+ if (req != RTM_DELETE)
+ goto nondelete;
+
+ error = ENOENT;
+ goto done;
+ }
+
+ /*
+ * if the entry is 2nd and on up
+ */
+ if ((req == RTM_DELETE) && !rt_mpath_deldup(rto, rt))
+ panic ("rtrequest1: rt_mpath_deldup");
+gwdelete:
+ RT_LOCK(rt);
+ RT_ADDREF(rt);
+ if (req == RTM_DELETE) {
+ rt->rt_flags &= ~RTF_UP;
+ /*
+ * One more rtentry floating around that is not
+ * linked to the routing table. rttrash will be decremented
+ * when RTFREE(rt) is eventually called.
+ */
+ V_rttrash++;
+ }
+
+nondelete:
+ if (req != RTM_DELETE)
+ panic("unrecognized request %d", req);
+
+
+ /*
+ * If the caller wants it, then it can have it,
+ * but it's up to it to free the rtentry as we won't be
+ * doing it.
+ */
+ if (ret_nrt) {
+ *ret_nrt = rt;
+ RT_UNLOCK(rt);
+ } else
+ RTFREE_LOCKED(rt);
+done:
+ return (error);
+}
+#endif
+
+int
+rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
+ u_int fibnum)
+{
+ int error = 0, needlock = 0;
+ register struct rtentry *rt;
+#ifdef FLOWTABLE
+ register struct rtentry *rt0;
+#endif
+ register struct radix_node *rn;
+ register struct radix_node_head *rnh;
+ struct ifaddr *ifa;
+ struct sockaddr *ndst;
+#define senderr(x) { error = x ; goto bad; }
+
+ KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum"));
+ if (dst->sa_family != AF_INET) /* Only INET supports > 1 fib now */
+ fibnum = 0;
+ /*
+ * Find the correct routing tree to use for this Address Family
+ */
+ rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
+ if (rnh == NULL)
+ return (EAFNOSUPPORT);
+ needlock = ((flags & RTF_RNH_LOCKED) == 0);
+ flags &= ~RTF_RNH_LOCKED;
+ if (needlock)
+ RADIX_NODE_HEAD_LOCK(rnh);
+ else
+ RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
+ /*
+ * If we are adding a host route then we don't want to put
+ * a netmask in the tree, nor do we want to clone it.
+ */
+ if (flags & RTF_HOST)
+ netmask = NULL;
+
+ switch (req) {
+ case RTM_DELETE:
+#ifdef RADIX_MPATH
+ if (rn_mpath_capable(rnh)) {
+ error = rn_mpath_update(req, info, rnh, ret_nrt);
+ /*
+ * "bad" holds true for the success case
+ * as well
+ */
+ if (error != ENOENT)
+ goto bad;
+ error = 0;
+ }
+#endif
+ /*
+ * Remove the item from the tree and return it.
+ * Complain if it is not there and do no more processing.
+ */
+ rn = rnh->rnh_deladdr(dst, netmask, rnh);
+ if (rn == NULL)
+ senderr(ESRCH);
+ if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
+ panic ("rtrequest delete");
+ rt = RNTORT(rn);
+ RT_LOCK(rt);
+ RT_ADDREF(rt);
+ rt->rt_flags &= ~RTF_UP;
+
+ /*
+ * give the protocol a chance to keep things in sync.
+ */
+ if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
+ ifa->ifa_rtrequest(RTM_DELETE, rt, info);
+
+ /*
+ * One more rtentry floating around that is not
+ * linked to the routing table. rttrash will be decremented
+ * when RTFREE(rt) is eventually called.
+ */
+ V_rttrash++;
+
+ /*
+ * If the caller wants it, then it can have it,
+ * but it's up to it to free the rtentry as we won't be
+ * doing it.
+ */
+ if (ret_nrt) {
+ *ret_nrt = rt;
+ RT_UNLOCK(rt);
+ } else
+ RTFREE_LOCKED(rt);
+ break;
+ case RTM_RESOLVE:
+ /*
+ * resolve was only used for route cloning
+ * here for compat
+ */
+ break;
+ case RTM_ADD:
+ if ((flags & RTF_GATEWAY) && !gateway)
+ senderr(EINVAL);
+ if (dst && gateway && (dst->sa_family != gateway->sa_family) &&
+ (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK))
+ senderr(EINVAL);
+
+ if (info->rti_ifa == NULL) {
+ error = rt_getifa_fib(info, fibnum);
+ if (error)
+ senderr(error);
+ } else
+ ifa_ref(info->rti_ifa);
+ ifa = info->rti_ifa;
+ rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO);
+ if (rt == NULL) {
+ if (ifa != NULL)
+ ifa_free(ifa);
+ senderr(ENOBUFS);
+ }
+ RT_LOCK_INIT(rt);
+ rt->rt_flags = RTF_UP | flags;
+ rt->rt_fibnum = fibnum;
+ /*
+ * Add the gateway. Possibly re-malloc-ing the storage for it
+ *
+ */
+ RT_LOCK(rt);
+ if ((error = rt_setgate(rt, dst, gateway)) != 0) {
+ RT_LOCK_DESTROY(rt);
+ if (ifa != NULL)
+ ifa_free(ifa);
+ uma_zfree(V_rtzone, rt);
+ senderr(error);
+ }
+
+ /*
+ * point to the (possibly newly malloc'd) dest address.
+ */
+ ndst = (struct sockaddr *)rt_key(rt);
+
+ /*
+ * make sure it contains the value we want (masked if needed).
+ */
+ if (netmask) {
+ rt_maskedcopy(dst, ndst, netmask);
+ } else
+ bcopy(dst, ndst, dst->sa_len);
+
+ /*
+ * We use the ifa reference returned by rt_getifa_fib().
+ * This moved from below so that rnh->rnh_addaddr() can
+ * examine the ifa and ifa->ifa_ifp if it so desires.
+ */
+ rt->rt_ifa = ifa;
+ rt->rt_ifp = ifa->ifa_ifp;
+ rt->rt_rmx.rmx_weight = 1;
+
+#ifdef RADIX_MPATH
+ /* do not permit exactly the same dst/mask/gw pair */
+ if (rn_mpath_capable(rnh) &&
+ rt_mpath_conflict(rnh, rt, netmask)) {
+ if (rt->rt_ifa) {
+ ifa_free(rt->rt_ifa);
+ }
+ Free(rt_key(rt));
+ RT_LOCK_DESTROY(rt);
+ uma_zfree(V_rtzone, rt);
+ senderr(EEXIST);
+ }
+#endif
+
+#ifdef FLOWTABLE
+ rt0 = NULL;
+ /* XXX
+ * "flow-table" only support IPv4 at the moment.
+ */
+#ifdef INET
+ if (dst->sa_family == AF_INET) {
+ rn = rnh->rnh_matchaddr(dst, rnh);
+ if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) {
+ struct sockaddr *mask;
+ u_char *m, *n;
+ int len;
+
+ /*
+ * compare mask to see if the new route is
+ * more specific than the existing one
+ */
+ rt0 = RNTORT(rn);
+ RT_LOCK(rt0);
+ RT_ADDREF(rt0);
+ RT_UNLOCK(rt0);
+ /*
+ * A host route is already present, so
+ * leave the flow-table entries as is.
+ */
+ if (rt0->rt_flags & RTF_HOST) {
+ RTFREE(rt0);
+ rt0 = NULL;
+ } else if (!(flags & RTF_HOST) && netmask) {
+ mask = rt_mask(rt0);
+ len = mask->sa_len;
+ m = (u_char *)mask;
+ n = (u_char *)netmask;
+ while (len-- > 0) {
+ if (*n != *m)
+ break;
+ n++;
+ m++;
+ }
+ if (len == 0 || (*n < *m)) {
+ RTFREE(rt0);
+ rt0 = NULL;
+ }
+ }
+ }
+ }
+#endif
+#endif
+
+ /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
+ rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes);
+ /*
+ * If it still failed to go into the tree,
+ * then un-make it (this should be a function)
+ */
+ if (rn == NULL) {
+ if (rt->rt_ifa)
+ ifa_free(rt->rt_ifa);
+ Free(rt_key(rt));
+ RT_LOCK_DESTROY(rt);
+ uma_zfree(V_rtzone, rt);
+#ifdef FLOWTABLE
+ if (rt0 != NULL)
+ RTFREE(rt0);
+#endif
+ senderr(EEXIST);
+ }
+#ifdef FLOWTABLE
+ else if (rt0 != NULL) {
+#ifdef INET
+ flowtable_route_flush(V_ip_ft, rt0);
+#endif
+ RTFREE(rt0);
+ }
+#endif
+
+ /*
+ * If this protocol has something to add to this then
+ * allow it to do that as well.
+ */
+ if (ifa->ifa_rtrequest)
+ ifa->ifa_rtrequest(req, rt, info);
+
+ /*
+ * actually return a resultant rtentry and
+ * give the caller a single reference.
+ */
+ if (ret_nrt) {
+ *ret_nrt = rt;
+ RT_ADDREF(rt);
+ }
+ RT_UNLOCK(rt);
+ break;
+ default:
+ error = EOPNOTSUPP;
+ }
+bad:
+ if (needlock)
+ RADIX_NODE_HEAD_UNLOCK(rnh);
+ return (error);
+#undef senderr
+}
+
+#undef dst
+#undef gateway
+#undef netmask
+#undef ifaaddr
+#undef ifpaddr
+#undef flags
+
+int
+rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
+{
+ /* XXX dst may be overwritten, can we move this to below */
+ int dlen = SA_SIZE(dst), glen = SA_SIZE(gate);
+#ifdef INVARIANTS
+ struct radix_node_head *rnh;
+
+ rnh = rt_tables_get_rnh(rt->rt_fibnum, dst->sa_family);
+#endif
+
+ RT_LOCK_ASSERT(rt);
+ RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
+
+ /*
+ * Prepare to store the gateway in rt->rt_gateway.
+ * Both dst and gateway are stored one after the other in the same
+ * malloc'd chunk. If we have room, we can reuse the old buffer,
+ * rt_gateway already points to the right place.
+ * Otherwise, malloc a new block and update the 'dst' address.
+ */
+ if (rt->rt_gateway == NULL || glen > SA_SIZE(rt->rt_gateway)) {
+ caddr_t new;
+
+ R_Malloc(new, caddr_t, dlen + glen);
+ if (new == NULL)
+ return ENOBUFS;
+ /*
+ * XXX note, we copy from *dst and not *rt_key(rt) because
+ * rt_setgate() can be called to initialize a newly
+ * allocated route entry, in which case rt_key(rt) == NULL
+ * (and also rt->rt_gateway == NULL).
+ * Free()/free() handle a NULL argument just fine.
+ */
+ bcopy(dst, new, dlen);
+ Free(rt_key(rt)); /* free old block, if any */
+ rt_key(rt) = (struct sockaddr *)new;
+ rt->rt_gateway = (struct sockaddr *)(new + dlen);
+ }
+
+ /*
+ * Copy the new gateway value into the memory chunk.
+ */
+ bcopy(gate, rt->rt_gateway, glen);
+
+ return (0);
+}
+
+void
+rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask)
+{
+ register u_char *cp1 = (u_char *)src;
+ register u_char *cp2 = (u_char *)dst;
+ register u_char *cp3 = (u_char *)netmask;
+ u_char *cplim = cp2 + *cp3;
+ u_char *cplim2 = cp2 + *cp1;
+
+ *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
+ cp3 += 2;
+ if (cplim > cplim2)
+ cplim = cplim2;
+ while (cp2 < cplim)
+ *cp2++ = *cp1++ & *cp3++;
+ if (cp2 < cplim2)
+ bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2));
+}
+
+/*
+ * Set up a routing table entry, normally
+ * for an interface.
+ */
+#define _SOCKADDR_TMPSIZE 128 /* Not too big.. kernel stack size is limited */
+static inline int
+rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
+{
+ struct sockaddr *dst;
+ struct sockaddr *netmask;
+ struct rtentry *rt = NULL;
+ struct rt_addrinfo info;
+ int error = 0;
+ int startfib, endfib;
+ char tempbuf[_SOCKADDR_TMPSIZE];
+ int didwork = 0;
+ int a_failure = 0;
+ static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
+
+ if (flags & RTF_HOST) {
+ dst = ifa->ifa_dstaddr;
+ netmask = NULL;
+ } else {
+ dst = ifa->ifa_addr;
+ netmask = ifa->ifa_netmask;
+ }
+ if ( dst->sa_family != AF_INET)
+ fibnum = 0;
+ if (fibnum == -1) {
+ if (rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD) {
+ startfib = endfib = curthread->td_proc->p_fibnum;
+ } else {
+ startfib = 0;
+ endfib = rt_numfibs - 1;
+ }
+ } else {
+ KASSERT((fibnum < rt_numfibs), ("rtinit1: bad fibnum"));
+ startfib = fibnum;
+ endfib = fibnum;
+ }
+ if (dst->sa_len == 0)
+ return(EINVAL);
+
+ /*
+ * If it's a delete, check that if it exists,
+ * it's on the correct interface or we might scrub
+ * a route to another ifa which would
+ * be confusing at best and possibly worse.
+ */
+ if (cmd == RTM_DELETE) {
+ /*
+ * It's a delete, so it should already exist..
+ * If it's a net, mask off the host bits
+ * (Assuming we have a mask)
+ * XXX this is kinda inet specific..
+ */
+ if (netmask != NULL) {
+ rt_maskedcopy(dst, (struct sockaddr *)tempbuf, netmask);
+ dst = (struct sockaddr *)tempbuf;
+ }
+ }
+ /*
+ * Now go through all the requested tables (fibs) and do the
+ * requested action. Realistically, this will either be fib 0
+ * for protocols that don't do multiple tables or all the
+ * tables for those that do. XXX For this version only AF_INET.
+ * When that changes code should be refactored to protocol
+ * independent parts and protocol dependent parts.
+ */
+ for ( fibnum = startfib; fibnum <= endfib; fibnum++) {
+ if (cmd == RTM_DELETE) {
+ struct radix_node_head *rnh;
+ struct radix_node *rn;
+ /*
+ * Look up an rtentry that is in the routing tree and
+ * contains the correct info.
+ */
+ rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
+ if (rnh == NULL)
+ /* this table doesn't exist but others might */
+ continue;
+ RADIX_NODE_HEAD_LOCK(rnh);
+#ifdef RADIX_MPATH
+ if (rn_mpath_capable(rnh)) {
+
+ rn = rnh->rnh_matchaddr(dst, rnh);
+ if (rn == NULL)
+ error = ESRCH;
+ else {
+ rt = RNTORT(rn);
+ /*
+ * for interface route the
+ * rt->rt_gateway is sockaddr_intf
+ * for cloning ARP entries, so
+ * rt_mpath_matchgate must use the
+ * interface address
+ */
+ rt = rt_mpath_matchgate(rt,
+ ifa->ifa_addr);
+ if (!rt)
+ error = ESRCH;
+ }
+ }
+ else
+#endif
+ rn = rnh->rnh_lookup(dst, netmask, rnh);
+ error = (rn == NULL ||
+ (rn->rn_flags & RNF_ROOT) ||
+ RNTORT(rn)->rt_ifa != ifa ||
+ !sa_equal((struct sockaddr *)rn->rn_key, dst));
+ RADIX_NODE_HEAD_UNLOCK(rnh);
+ if (error) {
+ /* this is only an error if bad on ALL tables */
+ continue;
+ }
+ }
+ /*
+ * Do the actual request
+ */
+ bzero((caddr_t)&info, sizeof(info));
+ info.rti_ifa = ifa;
+ info.rti_flags = flags | ifa->ifa_flags;
+ info.rti_info[RTAX_DST] = dst;
+ /*
+ * doing this for compatibility reasons
+ */
+ if (cmd == RTM_ADD)
+ info.rti_info[RTAX_GATEWAY] =
+ (struct sockaddr *)&null_sdl;
+ else
+ info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
+ info.rti_info[RTAX_NETMASK] = netmask;
+ error = rtrequest1_fib(cmd, &info, &rt, fibnum);
+ if (error == 0 && rt != NULL) {
+ /*
+ * notify any listening routing agents of the change
+ */
+ RT_LOCK(rt);
+#ifdef RADIX_MPATH
+ /*
+ * in case address alias finds the first address
+ * e.g. ifconfig bge0 192.103.54.246/24
+ * e.g. ifconfig bge0 192.103.54.247/24
+ * the address set in the route is 192.103.54.246
+ * so we need to replace it with 192.103.54.247
+ */
+ if (memcmp(rt->rt_ifa->ifa_addr,
+ ifa->ifa_addr, ifa->ifa_addr->sa_len)) {
+ ifa_free(rt->rt_ifa);
+ ifa_ref(ifa);
+ rt->rt_ifp = ifa->ifa_ifp;
+ rt->rt_ifa = ifa;
+ }
+#endif
+ /*
+ * doing this for compatibility reasons
+ */
+ if (cmd == RTM_ADD) {
+ ((struct sockaddr_dl *)rt->rt_gateway)->sdl_type =
+ rt->rt_ifp->if_type;
+ ((struct sockaddr_dl *)rt->rt_gateway)->sdl_index =
+ rt->rt_ifp->if_index;
+ }
+ RT_ADDREF(rt);
+ RT_UNLOCK(rt);
+ rt_newaddrmsg(cmd, ifa, error, rt);
+ RT_LOCK(rt);
+ RT_REMREF(rt);
+ if (cmd == RTM_DELETE) {
+ /*
+ * If we are deleting, and we found an entry,
+ * then it's been removed from the tree..
+ * now throw it away.
+ */
+ RTFREE_LOCKED(rt);
+ } else {
+ if (cmd == RTM_ADD) {
+ /*
+ * We just wanted to add it..
+ * we don't actually need a reference.
+ */
+ RT_REMREF(rt);
+ }
+ RT_UNLOCK(rt);
+ }
+ didwork = 1;
+ }
+ if (error)
+ a_failure = error;
+ }
+ if (cmd == RTM_DELETE) {
+ if (didwork) {
+ error = 0;
+ } else {
+ /* we only give an error if it wasn't in any table */
+ error = ((flags & RTF_HOST) ?
+ EHOSTUNREACH : ENETUNREACH);
+ }
+ } else {
+ if (a_failure) {
+ /* return an error if any of them failed */
+ error = a_failure;
+ }
+ }
+ return (error);
+}
+
+/* special one for inet internal use. may not use. */
+int
+rtinit_fib(struct ifaddr *ifa, int cmd, int flags)
+{
+ return (rtinit1(ifa, cmd, flags, -1));
+}
+
+/*
+ * Set up a routing table entry, normally
+ * for an interface.
+ */
+int
+rtinit(struct ifaddr *ifa, int cmd, int flags)
+{
+ struct sockaddr *dst;
+ int fib = 0;
+
+ if (flags & RTF_HOST) {
+ dst = ifa->ifa_dstaddr;
+ } else {
+ dst = ifa->ifa_addr;
+ }
+
+ if (dst->sa_family == AF_INET)
+ fib = -1;
+ return (rtinit1(ifa, cmd, flags, fib));
+}
diff --git a/freebsd/sys/net/route.h b/freebsd/sys/net/route.h
new file mode 100644
index 00000000..4375020f
--- /dev/null
+++ b/freebsd/sys/net/route.h
@@ -0,0 +1,2 @@
+#include <freebsd/bsd.h>
+#include <freebsd/net/route.h>
diff --git a/freebsd/sys/net/rtsock.c b/freebsd/sys/net/rtsock.c
new file mode 100644
index 00000000..287dd74d
--- /dev/null
+++ b/freebsd/sys/net/rtsock.c
@@ -0,0 +1,1702 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1988, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)rtsock.c 8.7 (Berkeley) 10/12/95
+ * $FreeBSD$
+ */
+#include <freebsd/local/opt_compat.h>
+#include <freebsd/local/opt_sctp.h>
+#include <freebsd/local/opt_mpath.h>
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/jail.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/domain.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/priv.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/rwlock.h>
+#include <freebsd/sys/signalvar.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/systm.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_dl.h>
+#include <freebsd/net/if_llatbl.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/netisr.h>
+#include <freebsd/net/raw_cb.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/if_ether.h>
+#ifdef INET6
+#include <freebsd/netinet6/scope6_var.h>
+#endif
+
+#if defined(INET) || defined(INET6)
+#ifdef SCTP
+extern void sctp_addr_change(struct ifaddr *ifa, int cmd);
+#endif /* SCTP */
+#endif
+
+#ifdef COMPAT_FREEBSD32
+#include <freebsd/sys/mount.h>
+#include <freebsd/compat/freebsd32/freebsd32.h>
+
+struct if_data32 {
+ uint8_t ifi_type;
+ uint8_t ifi_physical;
+ uint8_t ifi_addrlen;
+ uint8_t ifi_hdrlen;
+ uint8_t ifi_link_state;
+ uint8_t ifi_spare_char1;
+ uint8_t ifi_spare_char2;
+ uint8_t ifi_datalen;
+ uint32_t ifi_mtu;
+ uint32_t ifi_metric;
+ uint32_t ifi_baudrate;
+ uint32_t ifi_ipackets;
+ uint32_t ifi_ierrors;
+ uint32_t ifi_opackets;
+ uint32_t ifi_oerrors;
+ uint32_t ifi_collisions;
+ uint32_t ifi_ibytes;
+ uint32_t ifi_obytes;
+ uint32_t ifi_imcasts;
+ uint32_t ifi_omcasts;
+ uint32_t ifi_iqdrops;
+ uint32_t ifi_noproto;
+ uint32_t ifi_hwassist;
+ int32_t ifi_epoch;
+ struct timeval32 ifi_lastchange;
+};
+
+struct if_msghdr32 {
+ uint16_t ifm_msglen;
+ uint8_t ifm_version;
+ uint8_t ifm_type;
+ int32_t ifm_addrs;
+ int32_t ifm_flags;
+ uint16_t ifm_index;
+ struct if_data32 ifm_data;
+};
+#endif
+
+MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
+
+/* NB: these are not modified */
+static struct sockaddr route_src = { 2, PF_ROUTE, };
+static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, };
+
+static struct {
+ int ip_count; /* attached w/ AF_INET */
+ int ip6_count; /* attached w/ AF_INET6 */
+ int ipx_count; /* attached w/ AF_IPX */
+ int any_count; /* total attached */
+} route_cb;
+
+struct mtx rtsock_mtx;
+MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF);
+
+#define RTSOCK_LOCK() mtx_lock(&rtsock_mtx)
+#define RTSOCK_UNLOCK() mtx_unlock(&rtsock_mtx)
+#define RTSOCK_LOCK_ASSERT() mtx_assert(&rtsock_mtx, MA_OWNED)
+
+SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD, 0, "");
+
+struct walkarg {
+ int w_tmemsize;
+ int w_op, w_arg;
+ caddr_t w_tmem;
+ struct sysctl_req *w_req;
+};
+
+static void rts_input(struct mbuf *m);
+static struct mbuf *rt_msg1(int type, struct rt_addrinfo *rtinfo);
+static int rt_msg2(int type, struct rt_addrinfo *rtinfo,
+ caddr_t cp, struct walkarg *w);
+static int rt_xaddrs(caddr_t cp, caddr_t cplim,
+ struct rt_addrinfo *rtinfo);
+static int sysctl_dumpentry(struct radix_node *rn, void *vw);
+static int sysctl_iflist(int af, struct walkarg *w);
+static int sysctl_ifmalist(int af, struct walkarg *w);
+static int route_output(struct mbuf *m, struct socket *so);
+static void rt_setmetrics(u_long which, const struct rt_metrics *in,
+ struct rt_metrics_lite *out);
+static void rt_getmetrics(const struct rt_metrics_lite *in,
+ struct rt_metrics *out);
+static void rt_dispatch(struct mbuf *, const struct sockaddr *);
+
+static struct netisr_handler rtsock_nh = {
+ .nh_name = "rtsock",
+ .nh_handler = rts_input,
+ .nh_proto = NETISR_ROUTE,
+ .nh_policy = NETISR_POLICY_SOURCE,
+};
+
+static int
+sysctl_route_netisr_maxqlen(SYSCTL_HANDLER_ARGS)
+{
+ int error, qlimit;
+
+ netisr_getqlimit(&rtsock_nh, &qlimit);
+ error = sysctl_handle_int(oidp, &qlimit, 0, req);
+ if (error || !req->newptr)
+ return (error);
+ if (qlimit < 1)
+ return (EINVAL);
+ return (netisr_setqlimit(&rtsock_nh, qlimit));
+}
+SYSCTL_PROC(_net_route, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW,
+ 0, 0, sysctl_route_netisr_maxqlen, "I",
+ "maximum routing socket dispatch queue length");
+
+static void
+rts_init(void)
+{
+ int tmp;
+
+#ifndef __rtems__
+ if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
+ rtsock_nh.nh_qlimit = tmp;
+#endif
+ netisr_register(&rtsock_nh);
+}
+SYSINIT(rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rts_init, 0);
+
+static void
+rts_input(struct mbuf *m)
+{
+ struct sockproto route_proto;
+ unsigned short *family;
+ struct m_tag *tag;
+
+ route_proto.sp_family = PF_ROUTE;
+ tag = m_tag_find(m, PACKET_TAG_RTSOCKFAM, NULL);
+ if (tag != NULL) {
+ family = (unsigned short *)(tag + 1);
+ route_proto.sp_protocol = *family;
+ m_tag_delete(m, tag);
+ } else
+ route_proto.sp_protocol = 0;
+
+ raw_input(m, &route_proto, &route_src);
+}
+
+/*
+ * It really doesn't make any sense at all for this code to share much
+ * with raw_usrreq.c, since its functionality is so restricted. XXX
+ */
+static void
+rts_abort(struct socket *so)
+{
+
+ raw_usrreqs.pru_abort(so);
+}
+
+static void
+rts_close(struct socket *so)
+{
+
+ raw_usrreqs.pru_close(so);
+}
+
+/* pru_accept is EOPNOTSUPP */
+
+static int
+rts_attach(struct socket *so, int proto, struct thread *td)
+{
+ struct rawcb *rp;
+ int s, error;
+
+ KASSERT(so->so_pcb == NULL, ("rts_attach: so_pcb != NULL"));
+
+ /* XXX */
+ rp = malloc(sizeof *rp, M_PCB, M_WAITOK | M_ZERO);
+ if (rp == NULL)
+ return ENOBUFS;
+
+ /*
+ * The splnet() is necessary to block protocols from sending
+ * error notifications (like RTM_REDIRECT or RTM_LOSING) while
+ * this PCB is extant but incompletely initialized.
+ * Probably we should try to do more of this work beforehand and
+ * eliminate the spl.
+ */
+ s = splnet();
+ so->so_pcb = (caddr_t)rp;
+ so->so_fibnum = td->td_proc->p_fibnum;
+ error = raw_attach(so, proto);
+ rp = sotorawcb(so);
+ if (error) {
+ splx(s);
+ so->so_pcb = NULL;
+ free(rp, M_PCB);
+ return error;
+ }
+ RTSOCK_LOCK();
+ switch(rp->rcb_proto.sp_protocol) {
+ case AF_INET:
+ route_cb.ip_count++;
+ break;
+ case AF_INET6:
+ route_cb.ip6_count++;
+ break;
+ case AF_IPX:
+ route_cb.ipx_count++;
+ break;
+ }
+ route_cb.any_count++;
+ RTSOCK_UNLOCK();
+ soisconnected(so);
+ so->so_options |= SO_USELOOPBACK;
+ splx(s);
+ return 0;
+}
+
+static int
+rts_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
+{
+
+ return (raw_usrreqs.pru_bind(so, nam, td)); /* xxx just EINVAL */
+}
+
+static int
+rts_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
+{
+
+ return (raw_usrreqs.pru_connect(so, nam, td)); /* XXX just EINVAL */
+}
+
+/* pru_connect2 is EOPNOTSUPP */
+/* pru_control is EOPNOTSUPP */
+
+static void
+rts_detach(struct socket *so)
+{
+ struct rawcb *rp = sotorawcb(so);
+
+ KASSERT(rp != NULL, ("rts_detach: rp == NULL"));
+
+ RTSOCK_LOCK();
+ switch(rp->rcb_proto.sp_protocol) {
+ case AF_INET:
+ route_cb.ip_count--;
+ break;
+ case AF_INET6:
+ route_cb.ip6_count--;
+ break;
+ case AF_IPX:
+ route_cb.ipx_count--;
+ break;
+ }
+ route_cb.any_count--;
+ RTSOCK_UNLOCK();
+ raw_usrreqs.pru_detach(so);
+}
+
+static int
+rts_disconnect(struct socket *so)
+{
+
+ return (raw_usrreqs.pru_disconnect(so));
+}
+
+/* pru_listen is EOPNOTSUPP */
+
+static int
+rts_peeraddr(struct socket *so, struct sockaddr **nam)
+{
+
+ return (raw_usrreqs.pru_peeraddr(so, nam));
+}
+
+/* pru_rcvd is EOPNOTSUPP */
+/* pru_rcvoob is EOPNOTSUPP */
+
+static int
+rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
+ struct mbuf *control, struct thread *td)
+{
+
+ return (raw_usrreqs.pru_send(so, flags, m, nam, control, td));
+}
+
+/* pru_sense is null */
+
+static int
+rts_shutdown(struct socket *so)
+{
+
+ return (raw_usrreqs.pru_shutdown(so));
+}
+
+static int
+rts_sockaddr(struct socket *so, struct sockaddr **nam)
+{
+
+ return (raw_usrreqs.pru_sockaddr(so, nam));
+}
+
+static struct pr_usrreqs route_usrreqs = {
+ .pru_abort = rts_abort,
+ .pru_attach = rts_attach,
+ .pru_bind = rts_bind,
+ .pru_connect = rts_connect,
+ .pru_detach = rts_detach,
+ .pru_disconnect = rts_disconnect,
+ .pru_peeraddr = rts_peeraddr,
+ .pru_send = rts_send,
+ .pru_shutdown = rts_shutdown,
+ .pru_sockaddr = rts_sockaddr,
+ .pru_close = rts_close,
+};
+
+#ifndef _SOCKADDR_UNION_DEFINED
+#define _SOCKADDR_UNION_DEFINED
+/*
+ * The union of all possible address formats we handle.
+ */
+union sockaddr_union {
+ struct sockaddr sa;
+ struct sockaddr_in sin;
+ struct sockaddr_in6 sin6;
+};
+#endif /* _SOCKADDR_UNION_DEFINED */
+
+static int
+rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
+ struct rtentry *rt, union sockaddr_union *saun, struct ucred *cred)
+{
+
+ /* First, see if the returned address is part of the jail. */
+ if (prison_if(cred, rt->rt_ifa->ifa_addr) == 0) {
+ info->rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
+ return (0);
+ }
+
+ switch (info->rti_info[RTAX_DST]->sa_family) {
+#ifdef INET
+ case AF_INET:
+ {
+ struct in_addr ia;
+ struct ifaddr *ifa;
+ int found;
+
+ found = 0;
+ /*
+ * Try to find an address on the given outgoing interface
+ * that belongs to the jail.
+ */
+ IF_ADDR_LOCK(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ struct sockaddr *sa;
+ sa = ifa->ifa_addr;
+ if (sa->sa_family != AF_INET)
+ continue;
+ ia = ((struct sockaddr_in *)sa)->sin_addr;
+ if (prison_check_ip4(cred, &ia) == 0) {
+ found = 1;
+ break;
+ }
+ }
+ IF_ADDR_UNLOCK(ifp);
+ if (!found) {
+ /*
+ * As a last resort return the 'default' jail address.
+ */
+ ia = ((struct sockaddr_in *)rt->rt_ifa->ifa_addr)->
+ sin_addr;
+ if (prison_get_ip4(cred, &ia) != 0)
+ return (ESRCH);
+ }
+ bzero(&saun->sin, sizeof(struct sockaddr_in));
+ saun->sin.sin_len = sizeof(struct sockaddr_in);
+ saun->sin.sin_family = AF_INET;
+ saun->sin.sin_addr.s_addr = ia.s_addr;
+ info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin;
+ break;
+ }
+#endif
+#ifdef INET6
+ case AF_INET6:
+ {
+ struct in6_addr ia6;
+ struct ifaddr *ifa;
+ int found;
+
+ found = 0;
+ /*
+ * Try to find an address on the given outgoing interface
+ * that belongs to the jail.
+ */
+ IF_ADDR_LOCK(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ struct sockaddr *sa;
+ sa = ifa->ifa_addr;
+ if (sa->sa_family != AF_INET6)
+ continue;
+ bcopy(&((struct sockaddr_in6 *)sa)->sin6_addr,
+ &ia6, sizeof(struct in6_addr));
+ if (prison_check_ip6(cred, &ia6) == 0) {
+ found = 1;
+ break;
+ }
+ }
+ IF_ADDR_UNLOCK(ifp);
+ if (!found) {
+ /*
+ * As a last resort return the 'default' jail address.
+ */
+ ia6 = ((struct sockaddr_in6 *)rt->rt_ifa->ifa_addr)->
+ sin6_addr;
+ if (prison_get_ip6(cred, &ia6) != 0)
+ return (ESRCH);
+ }
+ bzero(&saun->sin6, sizeof(struct sockaddr_in6));
+ saun->sin6.sin6_len = sizeof(struct sockaddr_in6);
+ saun->sin6.sin6_family = AF_INET6;
+ bcopy(&ia6, &saun->sin6.sin6_addr, sizeof(struct in6_addr));
+ if (sa6_recoverscope(&saun->sin6) != 0)
+ return (ESRCH);
+ info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin6;
+ break;
+ }
+#endif
+ default:
+ return (ESRCH);
+ }
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+route_output(struct mbuf *m, struct socket *so)
+{
+#define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
+ struct rt_msghdr *rtm = NULL;
+ struct rtentry *rt = NULL;
+ struct radix_node_head *rnh;
+ struct rt_addrinfo info;
+ int len, error = 0;
+ struct ifnet *ifp = NULL;
+ union sockaddr_union saun;
+
+#define senderr(e) { error = e; goto flush;}
+ if (m == NULL || ((m->m_len < sizeof(long)) &&
+ (m = m_pullup(m, sizeof(long))) == NULL))
+ return (ENOBUFS);
+ if ((m->m_flags & M_PKTHDR) == 0)
+ panic("route_output");
+ len = m->m_pkthdr.len;
+ if (len < sizeof(*rtm) ||
+ len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
+ info.rti_info[RTAX_DST] = NULL;
+ senderr(EINVAL);
+ }
+ R_Malloc(rtm, struct rt_msghdr *, len);
+ if (rtm == NULL) {
+ info.rti_info[RTAX_DST] = NULL;
+ senderr(ENOBUFS);
+ }
+ m_copydata(m, 0, len, (caddr_t)rtm);
+ if (rtm->rtm_version != RTM_VERSION) {
+ info.rti_info[RTAX_DST] = NULL;
+ senderr(EPROTONOSUPPORT);
+ }
+ rtm->rtm_pid = curproc->p_pid;
+ bzero(&info, sizeof(info));
+ info.rti_addrs = rtm->rtm_addrs;
+ if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) {
+ info.rti_info[RTAX_DST] = NULL;
+ senderr(EINVAL);
+ }
+ info.rti_flags = rtm->rtm_flags;
+ if (info.rti_info[RTAX_DST] == NULL ||
+ info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
+ (info.rti_info[RTAX_GATEWAY] != NULL &&
+ info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX))
+ senderr(EINVAL);
+ /*
+ * Verify that the caller has the appropriate privilege; RTM_GET
+ * is the only operation the non-superuser is allowed.
+ */
+ if (rtm->rtm_type != RTM_GET) {
+ error = priv_check(curthread, PRIV_NET_ROUTE);
+ if (error)
+ senderr(error);
+ }
+
+ /*
+ * The given gateway address may be an interface address.
+ * For example, issuing a "route change" command on a route
+ * entry that was created from a tunnel, and the gateway
+ * address given is the local end point. In this case the
+ * RTF_GATEWAY flag must be cleared or the destination will
+ * not be reachable even though there is no error message.
+ */
+ if (info.rti_info[RTAX_GATEWAY] != NULL &&
+ info.rti_info[RTAX_GATEWAY]->sa_family != AF_LINK) {
+ struct route gw_ro;
+
+ bzero(&gw_ro, sizeof(gw_ro));
+ gw_ro.ro_dst = *info.rti_info[RTAX_GATEWAY];
+ rtalloc_ign_fib(&gw_ro, 0, so->so_fibnum);
+ /*
+ * A host route through the loopback interface is
+ * installed for each interface adddress. In pre 8.0
+ * releases the interface address of a PPP link type
+ * is not reachable locally. This behavior is fixed as
+ * part of the new L2/L3 redesign and rewrite work. The
+ * signature of this interface address route is the
+ * AF_LINK sa_family type of the rt_gateway, and the
+ * rt_ifp has the IFF_LOOPBACK flag set.
+ */
+ if (gw_ro.ro_rt != NULL &&
+ gw_ro.ro_rt->rt_gateway->sa_family == AF_LINK &&
+ gw_ro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)
+ info.rti_flags &= ~RTF_GATEWAY;
+ if (gw_ro.ro_rt != NULL)
+ RTFREE(gw_ro.ro_rt);
+ }
+
+ switch (rtm->rtm_type) {
+ struct rtentry *saved_nrt;
+
+ case RTM_ADD:
+ if (info.rti_info[RTAX_GATEWAY] == NULL)
+ senderr(EINVAL);
+ saved_nrt = NULL;
+
+ /* support for new ARP code */
+ if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK &&
+ (rtm->rtm_flags & RTF_LLDATA) != 0) {
+ error = lla_rt_output(rtm, &info);
+ break;
+ }
+ error = rtrequest1_fib(RTM_ADD, &info, &saved_nrt,
+ so->so_fibnum);
+ if (error == 0 && saved_nrt) {
+ RT_LOCK(saved_nrt);
+ rt_setmetrics(rtm->rtm_inits,
+ &rtm->rtm_rmx, &saved_nrt->rt_rmx);
+ rtm->rtm_index = saved_nrt->rt_ifp->if_index;
+ RT_REMREF(saved_nrt);
+ RT_UNLOCK(saved_nrt);
+ }
+ break;
+
+ case RTM_DELETE:
+ saved_nrt = NULL;
+ /* support for new ARP code */
+ if (info.rti_info[RTAX_GATEWAY] &&
+ (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK) &&
+ (rtm->rtm_flags & RTF_LLDATA) != 0) {
+ error = lla_rt_output(rtm, &info);
+ break;
+ }
+ error = rtrequest1_fib(RTM_DELETE, &info, &saved_nrt,
+ so->so_fibnum);
+ if (error == 0) {
+ RT_LOCK(saved_nrt);
+ rt = saved_nrt;
+ goto report;
+ }
+ break;
+
+ case RTM_GET:
+ case RTM_CHANGE:
+ case RTM_LOCK:
+ rnh = rt_tables_get_rnh(so->so_fibnum,
+ info.rti_info[RTAX_DST]->sa_family);
+ if (rnh == NULL)
+ senderr(EAFNOSUPPORT);
+ RADIX_NODE_HEAD_RLOCK(rnh);
+ rt = (struct rtentry *) rnh->rnh_lookup(info.rti_info[RTAX_DST],
+ info.rti_info[RTAX_NETMASK], rnh);
+ if (rt == NULL) { /* XXX looks bogus */
+ RADIX_NODE_HEAD_RUNLOCK(rnh);
+ senderr(ESRCH);
+ }
+#ifdef RADIX_MPATH
+ /*
+ * for RTM_CHANGE/LOCK, if we got multipath routes,
+ * we require users to specify a matching RTAX_GATEWAY.
+ *
+ * for RTM_GET, gate is optional even with multipath.
+ * if gate == NULL the first match is returned.
+ * (no need to call rt_mpath_matchgate if gate == NULL)
+ */
+ if (rn_mpath_capable(rnh) &&
+ (rtm->rtm_type != RTM_GET || info.rti_info[RTAX_GATEWAY])) {
+ rt = rt_mpath_matchgate(rt, info.rti_info[RTAX_GATEWAY]);
+ if (!rt) {
+ RADIX_NODE_HEAD_RUNLOCK(rnh);
+ senderr(ESRCH);
+ }
+ }
+#endif
+ /*
+ * If performing proxied L2 entry insertion, and
+ * the actual PPP host entry is found, perform
+ * another search to retrieve the prefix route of
+ * the local end point of the PPP link.
+ */
+ if (rtm->rtm_flags & RTF_ANNOUNCE) {
+ struct sockaddr laddr;
+
+ if (rt->rt_ifp != NULL &&
+ rt->rt_ifp->if_type == IFT_PROPVIRTUAL) {
+ struct ifaddr *ifa;
+
+ ifa = ifa_ifwithnet(info.rti_info[RTAX_DST], 1);
+ if (ifa != NULL)
+ rt_maskedcopy(ifa->ifa_addr,
+ &laddr,
+ ifa->ifa_netmask);
+ } else
+ rt_maskedcopy(rt->rt_ifa->ifa_addr,
+ &laddr,
+ rt->rt_ifa->ifa_netmask);
+ /*
+ * refactor rt and no lock operation necessary
+ */
+ rt = (struct rtentry *)rnh->rnh_matchaddr(&laddr, rnh);
+ if (rt == NULL) {
+ RADIX_NODE_HEAD_RUNLOCK(rnh);
+ senderr(ESRCH);
+ }
+ }
+ RT_LOCK(rt);
+ RT_ADDREF(rt);
+ RADIX_NODE_HEAD_RUNLOCK(rnh);
+
+ /*
+ * Fix for PR: 82974
+ *
+ * RTM_CHANGE/LOCK need a perfect match, rn_lookup()
+ * returns a perfect match in case a netmask is
+ * specified. For host routes only a longest prefix
+ * match is returned so it is necessary to compare the
+ * existence of the netmask. If both have a netmask
+ * rnh_lookup() did a perfect match and if none of them
+ * have a netmask both are host routes which is also a
+ * perfect match.
+ */
+
+ if (rtm->rtm_type != RTM_GET &&
+ (!rt_mask(rt) != !info.rti_info[RTAX_NETMASK])) {
+ RT_UNLOCK(rt);
+ senderr(ESRCH);
+ }
+
+ switch(rtm->rtm_type) {
+
+ case RTM_GET:
+ report:
+ RT_LOCK_ASSERT(rt);
+ if ((rt->rt_flags & RTF_HOST) == 0
+ ? jailed_without_vnet(curthread->td_ucred)
+ : prison_if(curthread->td_ucred,
+ rt_key(rt)) != 0) {
+ RT_UNLOCK(rt);
+ senderr(ESRCH);
+ }
+ info.rti_info[RTAX_DST] = rt_key(rt);
+ info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+ info.rti_info[RTAX_NETMASK] = rt_mask(rt);
+ info.rti_info[RTAX_GENMASK] = 0;
+ if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
+ ifp = rt->rt_ifp;
+ if (ifp) {
+ info.rti_info[RTAX_IFP] =
+ ifp->if_addr->ifa_addr;
+ error = rtm_get_jailed(&info, ifp, rt,
+ &saun, curthread->td_ucred);
+ if (error != 0) {
+ RT_UNLOCK(rt);
+ senderr(error);
+ }
+ if (ifp->if_flags & IFF_POINTOPOINT)
+ info.rti_info[RTAX_BRD] =
+ rt->rt_ifa->ifa_dstaddr;
+ rtm->rtm_index = ifp->if_index;
+ } else {
+ info.rti_info[RTAX_IFP] = NULL;
+ info.rti_info[RTAX_IFA] = NULL;
+ }
+ } else if ((ifp = rt->rt_ifp) != NULL) {
+ rtm->rtm_index = ifp->if_index;
+ }
+ len = rt_msg2(rtm->rtm_type, &info, NULL, NULL);
+ if (len > rtm->rtm_msglen) {
+ struct rt_msghdr *new_rtm;
+ R_Malloc(new_rtm, struct rt_msghdr *, len);
+ if (new_rtm == NULL) {
+ RT_UNLOCK(rt);
+ senderr(ENOBUFS);
+ }
+ bcopy(rtm, new_rtm, rtm->rtm_msglen);
+ Free(rtm); rtm = new_rtm;
+ }
+ (void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, NULL);
+ rtm->rtm_flags = rt->rt_flags;
+ rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
+ rtm->rtm_addrs = info.rti_addrs;
+ break;
+
+ case RTM_CHANGE:
+ /*
+ * New gateway could require new ifaddr, ifp;
+ * flags may also be different; ifp may be specified
+ * by ll sockaddr when protocol address is ambiguous
+ */
+ if (((rt->rt_flags & RTF_GATEWAY) &&
+ info.rti_info[RTAX_GATEWAY] != NULL) ||
+ info.rti_info[RTAX_IFP] != NULL ||
+ (info.rti_info[RTAX_IFA] != NULL &&
+ !sa_equal(info.rti_info[RTAX_IFA],
+ rt->rt_ifa->ifa_addr))) {
+ RT_UNLOCK(rt);
+ RADIX_NODE_HEAD_LOCK(rnh);
+ error = rt_getifa_fib(&info, rt->rt_fibnum);
+ /*
+ * XXXRW: Really we should release this
+ * reference later, but this maintains
+ * historical behavior.
+ */
+ if (info.rti_ifa != NULL)
+ ifa_free(info.rti_ifa);
+ RADIX_NODE_HEAD_UNLOCK(rnh);
+ if (error != 0)
+ senderr(error);
+ RT_LOCK(rt);
+ }
+ if (info.rti_ifa != NULL &&
+ info.rti_ifa != rt->rt_ifa &&
+ rt->rt_ifa != NULL &&
+ rt->rt_ifa->ifa_rtrequest != NULL) {
+ rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt,
+ &info);
+ ifa_free(rt->rt_ifa);
+ }
+ if (info.rti_info[RTAX_GATEWAY] != NULL) {
+ RT_UNLOCK(rt);
+ RADIX_NODE_HEAD_LOCK(rnh);
+ RT_LOCK(rt);
+
+ error = rt_setgate(rt, rt_key(rt),
+ info.rti_info[RTAX_GATEWAY]);
+ RADIX_NODE_HEAD_UNLOCK(rnh);
+ if (error != 0) {
+ RT_UNLOCK(rt);
+ senderr(error);
+ }
+ rt->rt_flags |= (RTF_GATEWAY & info.rti_flags);
+ }
+ if (info.rti_ifa != NULL &&
+ info.rti_ifa != rt->rt_ifa) {
+ ifa_ref(info.rti_ifa);
+ rt->rt_ifa = info.rti_ifa;
+ rt->rt_ifp = info.rti_ifp;
+ }
+ /* Allow some flags to be toggled on change. */
+ rt->rt_flags = (rt->rt_flags & ~RTF_FMASK) |
+ (rtm->rtm_flags & RTF_FMASK);
+ rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
+ &rt->rt_rmx);
+ rtm->rtm_index = rt->rt_ifp->if_index;
+ if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
+ rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info);
+ /* FALLTHROUGH */
+ case RTM_LOCK:
+ /* We don't support locks anymore */
+ break;
+ }
+ RT_UNLOCK(rt);
+ break;
+
+ default:
+ senderr(EOPNOTSUPP);
+ }
+
+flush:
+ if (rtm) {
+ if (error)
+ rtm->rtm_errno = error;
+ else
+ rtm->rtm_flags |= RTF_DONE;
+ }
+ if (rt) /* XXX can this be true? */
+ RTFREE(rt);
+ {
+ struct rawcb *rp = NULL;
+ /*
+ * Check to see if we don't want our own messages.
+ */
+ if ((so->so_options & SO_USELOOPBACK) == 0) {
+ if (route_cb.any_count <= 1) {
+ if (rtm)
+ Free(rtm);
+ m_freem(m);
+ return (error);
+ }
+ /* There is another listener, so construct message */
+ rp = sotorawcb(so);
+ }
+ if (rtm) {
+ m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
+ if (m->m_pkthdr.len < rtm->rtm_msglen) {
+ m_freem(m);
+ m = NULL;
+ } else if (m->m_pkthdr.len > rtm->rtm_msglen)
+ m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
+ Free(rtm);
+ }
+ if (m) {
+ if (rp) {
+ /*
+ * XXX insure we don't get a copy by
+ * invalidating our protocol
+ */
+ unsigned short family = rp->rcb_proto.sp_family;
+ rp->rcb_proto.sp_family = 0;
+ rt_dispatch(m, info.rti_info[RTAX_DST]);
+ rp->rcb_proto.sp_family = family;
+ } else
+ rt_dispatch(m, info.rti_info[RTAX_DST]);
+ }
+ }
+ return (error);
+#undef sa_equal
+}
+
+static void
+rt_setmetrics(u_long which, const struct rt_metrics *in,
+ struct rt_metrics_lite *out)
+{
+#define metric(f, e) if (which & (f)) out->e = in->e;
+ /*
+ * Only these are stored in the routing entry since introduction
+ * of tcp hostcache. The rest is ignored.
+ */
+ metric(RTV_MTU, rmx_mtu);
+ metric(RTV_WEIGHT, rmx_weight);
+ /* Userland -> kernel timebase conversion. */
+ if (which & RTV_EXPIRE)
+ out->rmx_expire = in->rmx_expire ?
+ in->rmx_expire - time_second + time_uptime : 0;
+#undef metric
+}
+
+static void
+rt_getmetrics(const struct rt_metrics_lite *in, struct rt_metrics *out)
+{
+#define metric(e) out->e = in->e;
+ bzero(out, sizeof(*out));
+ metric(rmx_mtu);
+ metric(rmx_weight);
+ /* Kernel -> userland timebase conversion. */
+ out->rmx_expire = in->rmx_expire ?
+ in->rmx_expire - time_uptime + time_second : 0;
+#undef metric
+}
+
+/*
+ * Extract the addresses of the passed sockaddrs.
+ * Do a little sanity checking so as to avoid bad memory references.
+ * This data is derived straight from userland.
+ */
+static int
+rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
+{
+ struct sockaddr *sa;
+ int i;
+
+ for (i = 0; i < RTAX_MAX && cp < cplim; i++) {
+ if ((rtinfo->rti_addrs & (1 << i)) == 0)
+ continue;
+ sa = (struct sockaddr *)cp;
+ /*
+ * It won't fit.
+ */
+ if (cp + sa->sa_len > cplim)
+ return (EINVAL);
+ /*
+ * there are no more.. quit now
+ * If there are more bits, they are in error.
+ * I've seen this. route(1) can evidently generate these.
+ * This causes kernel to core dump.
+ * for compatibility, If we see this, point to a safe address.
+ */
+ if (sa->sa_len == 0) {
+ rtinfo->rti_info[i] = &sa_zero;
+ return (0); /* should be EINVAL but for compat */
+ }
+ /* accept it */
+ rtinfo->rti_info[i] = sa;
+ cp += SA_SIZE(sa);
+ }
+ return (0);
+}
+
+static struct mbuf *
+rt_msg1(int type, struct rt_addrinfo *rtinfo)
+{
+ struct rt_msghdr *rtm;
+ struct mbuf *m;
+ int i;
+ struct sockaddr *sa;
+ int len, dlen;
+
+ switch (type) {
+
+ case RTM_DELADDR:
+ case RTM_NEWADDR:
+ len = sizeof(struct ifa_msghdr);
+ break;
+
+ case RTM_DELMADDR:
+ case RTM_NEWMADDR:
+ len = sizeof(struct ifma_msghdr);
+ break;
+
+ case RTM_IFINFO:
+ len = sizeof(struct if_msghdr);
+ break;
+
+ case RTM_IFANNOUNCE:
+ case RTM_IEEE80211:
+ len = sizeof(struct if_announcemsghdr);
+ break;
+
+ default:
+ len = sizeof(struct rt_msghdr);
+ }
+ if (len > MCLBYTES)
+ panic("rt_msg1");
+ m = m_gethdr(M_DONTWAIT, MT_DATA);
+ if (m && len > MHLEN) {
+ MCLGET(m, M_DONTWAIT);
+ if ((m->m_flags & M_EXT) == 0) {
+ m_free(m);
+ m = NULL;
+ }
+ }
+ if (m == NULL)
+ return (m);
+ m->m_pkthdr.len = m->m_len = len;
+ m->m_pkthdr.rcvif = NULL;
+ rtm = mtod(m, struct rt_msghdr *);
+ bzero((caddr_t)rtm, len);
+ for (i = 0; i < RTAX_MAX; i++) {
+ if ((sa = rtinfo->rti_info[i]) == NULL)
+ continue;
+ rtinfo->rti_addrs |= (1 << i);
+ dlen = SA_SIZE(sa);
+ m_copyback(m, len, dlen, (caddr_t)sa);
+ len += dlen;
+ }
+ if (m->m_pkthdr.len != len) {
+ m_freem(m);
+ return (NULL);
+ }
+ rtm->rtm_msglen = len;
+ rtm->rtm_version = RTM_VERSION;
+ rtm->rtm_type = type;
+ return (m);
+}
+
+static int
+rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w)
+{
+ int i;
+ int len, dlen, second_time = 0;
+ caddr_t cp0;
+
+ rtinfo->rti_addrs = 0;
+again:
+ switch (type) {
+
+ case RTM_DELADDR:
+ case RTM_NEWADDR:
+ len = sizeof(struct ifa_msghdr);
+ break;
+
+ case RTM_IFINFO:
+#ifdef COMPAT_FREEBSD32
+ if (w != NULL && w->w_req->flags & SCTL_MASK32) {
+ len = sizeof(struct if_msghdr32);
+ break;
+ }
+#endif
+ len = sizeof(struct if_msghdr);
+ break;
+
+ case RTM_NEWMADDR:
+ len = sizeof(struct ifma_msghdr);
+ break;
+
+ default:
+ len = sizeof(struct rt_msghdr);
+ }
+ cp0 = cp;
+ if (cp0)
+ cp += len;
+ for (i = 0; i < RTAX_MAX; i++) {
+ struct sockaddr *sa;
+
+ if ((sa = rtinfo->rti_info[i]) == NULL)
+ continue;
+ rtinfo->rti_addrs |= (1 << i);
+ dlen = SA_SIZE(sa);
+ if (cp) {
+ bcopy((caddr_t)sa, cp, (unsigned)dlen);
+ cp += dlen;
+ }
+ len += dlen;
+ }
+ len = ALIGN(len);
+ if (cp == NULL && w != NULL && !second_time) {
+ struct walkarg *rw = w;
+
+ if (rw->w_req) {
+ if (rw->w_tmemsize < len) {
+ if (rw->w_tmem)
+ free(rw->w_tmem, M_RTABLE);
+ rw->w_tmem = (caddr_t)
+ malloc(len, M_RTABLE, M_NOWAIT);
+ if (rw->w_tmem)
+ rw->w_tmemsize = len;
+ }
+ if (rw->w_tmem) {
+ cp = rw->w_tmem;
+ second_time = 1;
+ goto again;
+ }
+ }
+ }
+ if (cp) {
+ struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
+
+ rtm->rtm_version = RTM_VERSION;
+ rtm->rtm_type = type;
+ rtm->rtm_msglen = len;
+ }
+ return (len);
+}
+
+/*
+ * This routine is called to generate a message from the routing
+ * socket indicating that a redirect has occured, a routing lookup
+ * has failed, or that a protocol has detected timeouts to a particular
+ * destination.
+ */
+void
+rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
+{
+ struct rt_msghdr *rtm;
+ struct mbuf *m;
+ struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
+
+ if (route_cb.any_count == 0)
+ return;
+ m = rt_msg1(type, rtinfo);
+ if (m == NULL)
+ return;
+ rtm = mtod(m, struct rt_msghdr *);
+ rtm->rtm_flags = RTF_DONE | flags;
+ rtm->rtm_errno = error;
+ rtm->rtm_addrs = rtinfo->rti_addrs;
+ rt_dispatch(m, sa);
+}
+
+/*
+ * This routine is called to generate a message from the routing
+ * socket indicating that the status of a network interface has changed.
+ */
+void
+rt_ifmsg(struct ifnet *ifp)
+{
+ struct if_msghdr *ifm;
+ struct mbuf *m;
+ struct rt_addrinfo info;
+
+ if (route_cb.any_count == 0)
+ return;
+ bzero((caddr_t)&info, sizeof(info));
+ m = rt_msg1(RTM_IFINFO, &info);
+ if (m == NULL)
+ return;
+ ifm = mtod(m, struct if_msghdr *);
+ ifm->ifm_index = ifp->if_index;
+ ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
+ ifm->ifm_data = ifp->if_data;
+ ifm->ifm_addrs = 0;
+ rt_dispatch(m, NULL);
+}
+
+/*
+ * This is called to generate messages from the routing socket
+ * indicating a network interface has had addresses associated with it.
+ * if we ever reverse the logic and replace messages TO the routing
+ * socket indicate a request to configure interfaces, then it will
+ * be unnecessary as the routing socket will automatically generate
+ * copies of it.
+ */
+void
+rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
+{
+ struct rt_addrinfo info;
+ struct sockaddr *sa = NULL;
+ int pass;
+ struct mbuf *m = NULL;
+ struct ifnet *ifp = ifa->ifa_ifp;
+
+ KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE,
+ ("unexpected cmd %u", cmd));
+#if defined(INET) || defined(INET6)
+#ifdef SCTP
+ /*
+ * notify the SCTP stack
+ * this will only get called when an address is added/deleted
+ * XXX pass the ifaddr struct instead if ifa->ifa_addr...
+ */
+ sctp_addr_change(ifa, cmd);
+#endif /* SCTP */
+#endif
+ if (route_cb.any_count == 0)
+ return;
+ for (pass = 1; pass < 3; pass++) {
+ bzero((caddr_t)&info, sizeof(info));
+ if ((cmd == RTM_ADD && pass == 1) ||
+ (cmd == RTM_DELETE && pass == 2)) {
+ struct ifa_msghdr *ifam;
+ int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
+
+ info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
+ info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
+ info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
+ info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
+ if ((m = rt_msg1(ncmd, &info)) == NULL)
+ continue;
+ ifam = mtod(m, struct ifa_msghdr *);
+ ifam->ifam_index = ifp->if_index;
+ ifam->ifam_metric = ifa->ifa_metric;
+ ifam->ifam_flags = ifa->ifa_flags;
+ ifam->ifam_addrs = info.rti_addrs;
+ }
+ if ((cmd == RTM_ADD && pass == 2) ||
+ (cmd == RTM_DELETE && pass == 1)) {
+ struct rt_msghdr *rtm;
+
+ if (rt == NULL)
+ continue;
+ info.rti_info[RTAX_NETMASK] = rt_mask(rt);
+ info.rti_info[RTAX_DST] = sa = rt_key(rt);
+ info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+ if ((m = rt_msg1(cmd, &info)) == NULL)
+ continue;
+ rtm = mtod(m, struct rt_msghdr *);
+ rtm->rtm_index = ifp->if_index;
+ rtm->rtm_flags |= rt->rt_flags;
+ rtm->rtm_errno = error;
+ rtm->rtm_addrs = info.rti_addrs;
+ }
+ rt_dispatch(m, sa);
+ }
+}
+
+/*
+ * This is the analogue to the rt_newaddrmsg which performs the same
+ * function but for multicast group memberhips. This is easier since
+ * there is no route state to worry about.
+ */
+void
+rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
+{
+ struct rt_addrinfo info;
+ struct mbuf *m = NULL;
+ struct ifnet *ifp = ifma->ifma_ifp;
+ struct ifma_msghdr *ifmam;
+
+ if (route_cb.any_count == 0)
+ return;
+
+ bzero((caddr_t)&info, sizeof(info));
+ info.rti_info[RTAX_IFA] = ifma->ifma_addr;
+ info.rti_info[RTAX_IFP] = ifp ? ifp->if_addr->ifa_addr : NULL;
+ /*
+ * If a link-layer address is present, present it as a ``gateway''
+ * (similarly to how ARP entries, e.g., are presented).
+ */
+ info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr;
+ m = rt_msg1(cmd, &info);
+ if (m == NULL)
+ return;
+ ifmam = mtod(m, struct ifma_msghdr *);
+ KASSERT(ifp != NULL, ("%s: link-layer multicast address w/o ifp\n",
+ __func__));
+ ifmam->ifmam_index = ifp->if_index;
+ ifmam->ifmam_addrs = info.rti_addrs;
+ rt_dispatch(m, ifma->ifma_addr);
+}
+
+static struct mbuf *
+rt_makeifannouncemsg(struct ifnet *ifp, int type, int what,
+ struct rt_addrinfo *info)
+{
+ struct if_announcemsghdr *ifan;
+ struct mbuf *m;
+
+ if (route_cb.any_count == 0)
+ return NULL;
+ bzero((caddr_t)info, sizeof(*info));
+ m = rt_msg1(type, info);
+ if (m != NULL) {
+ ifan = mtod(m, struct if_announcemsghdr *);
+ ifan->ifan_index = ifp->if_index;
+ strlcpy(ifan->ifan_name, ifp->if_xname,
+ sizeof(ifan->ifan_name));
+ ifan->ifan_what = what;
+ }
+ return m;
+}
+
+/*
+ * This is called to generate routing socket messages indicating
+ * IEEE80211 wireless events.
+ * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way.
+ */
+void
+rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len)
+{
+ struct mbuf *m;
+ struct rt_addrinfo info;
+
+ m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info);
+ if (m != NULL) {
+ /*
+ * Append the ieee80211 data. Try to stick it in the
+ * mbuf containing the ifannounce msg; otherwise allocate
+ * a new mbuf and append.
+ *
+ * NB: we assume m is a single mbuf.
+ */
+ if (data_len > M_TRAILINGSPACE(m)) {
+ struct mbuf *n = m_get(M_NOWAIT, MT_DATA);
+ if (n == NULL) {
+ m_freem(m);
+ return;
+ }
+ bcopy(data, mtod(n, void *), data_len);
+ n->m_len = data_len;
+ m->m_next = n;
+ } else if (data_len > 0) {
+ bcopy(data, mtod(m, u_int8_t *) + m->m_len, data_len);
+ m->m_len += data_len;
+ }
+ if (m->m_flags & M_PKTHDR)
+ m->m_pkthdr.len += data_len;
+ mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len;
+ rt_dispatch(m, NULL);
+ }
+}
+
+/*
+ * This is called to generate routing socket messages indicating
+ * network interface arrival and departure.
+ */
+void
+rt_ifannouncemsg(struct ifnet *ifp, int what)
+{
+ struct mbuf *m;
+ struct rt_addrinfo info;
+
+ m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info);
+ if (m != NULL)
+ rt_dispatch(m, NULL);
+}
+
+static void
+rt_dispatch(struct mbuf *m, const struct sockaddr *sa)
+{
+ struct m_tag *tag;
+
+ /*
+ * Preserve the family from the sockaddr, if any, in an m_tag for
+ * use when injecting the mbuf into the routing socket buffer from
+ * the netisr.
+ */
+ if (sa != NULL) {
+ tag = m_tag_get(PACKET_TAG_RTSOCKFAM, sizeof(unsigned short),
+ M_NOWAIT);
+ if (tag == NULL) {
+ m_freem(m);
+ return;
+ }
+ *(unsigned short *)(tag + 1) = sa->sa_family;
+ m_tag_prepend(m, tag);
+ }
+#ifdef VIMAGE
+ if (V_loif)
+ m->m_pkthdr.rcvif = V_loif;
+ else {
+ m_freem(m);
+ return;
+ }
+#endif
+ netisr_queue(NETISR_ROUTE, m); /* mbuf is free'd on failure. */
+}
+
+/*
+ * This is used in dumping the kernel table via sysctl().
+ */
+static int
+sysctl_dumpentry(struct radix_node *rn, void *vw)
+{
+ struct walkarg *w = vw;
+ struct rtentry *rt = (struct rtentry *)rn;
+ int error = 0, size;
+ struct rt_addrinfo info;
+
+ if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
+ return 0;
+ if ((rt->rt_flags & RTF_HOST) == 0
+ ? jailed_without_vnet(w->w_req->td->td_ucred)
+ : prison_if(w->w_req->td->td_ucred, rt_key(rt)) != 0)
+ return (0);
+ bzero((caddr_t)&info, sizeof(info));
+ info.rti_info[RTAX_DST] = rt_key(rt);
+ info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+ info.rti_info[RTAX_NETMASK] = rt_mask(rt);
+ info.rti_info[RTAX_GENMASK] = 0;
+ if (rt->rt_ifp) {
+ info.rti_info[RTAX_IFP] = rt->rt_ifp->if_addr->ifa_addr;
+ info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
+ if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
+ info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
+ }
+ size = rt_msg2(RTM_GET, &info, NULL, w);
+ if (w->w_req && w->w_tmem) {
+ struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
+
+ rtm->rtm_flags = rt->rt_flags;
+ /*
+ * let's be honest about this being a retarded hack
+ */
+ rtm->rtm_fmask = rt->rt_rmx.rmx_pksent;
+ rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
+ rtm->rtm_index = rt->rt_ifp->if_index;
+ rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
+ rtm->rtm_addrs = info.rti_addrs;
+ error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
+ return (error);
+ }
+ return (error);
+}
+
+#ifdef COMPAT_FREEBSD32
+static void
+copy_ifdata32(struct if_data *src, struct if_data32 *dst)
+{
+
+ bzero(dst, sizeof(*dst));
+ CP(*src, *dst, ifi_type);
+ CP(*src, *dst, ifi_physical);
+ CP(*src, *dst, ifi_addrlen);
+ CP(*src, *dst, ifi_hdrlen);
+ CP(*src, *dst, ifi_link_state);
+ dst->ifi_datalen = sizeof(struct if_data32);
+ CP(*src, *dst, ifi_mtu);
+ CP(*src, *dst, ifi_metric);
+ CP(*src, *dst, ifi_baudrate);
+ CP(*src, *dst, ifi_ipackets);
+ CP(*src, *dst, ifi_ierrors);
+ CP(*src, *dst, ifi_opackets);
+ CP(*src, *dst, ifi_oerrors);
+ CP(*src, *dst, ifi_collisions);
+ CP(*src, *dst, ifi_ibytes);
+ CP(*src, *dst, ifi_obytes);
+ CP(*src, *dst, ifi_imcasts);
+ CP(*src, *dst, ifi_omcasts);
+ CP(*src, *dst, ifi_iqdrops);
+ CP(*src, *dst, ifi_noproto);
+ CP(*src, *dst, ifi_hwassist);
+ CP(*src, *dst, ifi_epoch);
+ TV_CP(*src, *dst, ifi_lastchange);
+}
+#endif
+
+static int
+sysctl_iflist(int af, struct walkarg *w)
+{
+ struct ifnet *ifp;
+ struct ifaddr *ifa;
+ struct rt_addrinfo info;
+ int len, error = 0;
+
+ bzero((caddr_t)&info, sizeof(info));
+ IFNET_RLOCK();
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ if (w->w_arg && w->w_arg != ifp->if_index)
+ continue;
+ IF_ADDR_LOCK(ifp);
+ ifa = ifp->if_addr;
+ info.rti_info[RTAX_IFP] = ifa->ifa_addr;
+ len = rt_msg2(RTM_IFINFO, &info, NULL, w);
+ info.rti_info[RTAX_IFP] = NULL;
+ if (w->w_req && w->w_tmem) {
+ struct if_msghdr *ifm;
+
+#ifdef COMPAT_FREEBSD32
+ if (w->w_req->flags & SCTL_MASK32) {
+ struct if_msghdr32 *ifm32;
+
+ ifm32 = (struct if_msghdr32 *)w->w_tmem;
+ ifm32->ifm_index = ifp->if_index;
+ ifm32->ifm_flags = ifp->if_flags |
+ ifp->if_drv_flags;
+ copy_ifdata32(&ifp->if_data, &ifm32->ifm_data);
+ ifm32->ifm_addrs = info.rti_addrs;
+ error = SYSCTL_OUT(w->w_req, (caddr_t)ifm32,
+ len);
+ goto sysctl_out;
+ }
+#endif
+ ifm = (struct if_msghdr *)w->w_tmem;
+ ifm->ifm_index = ifp->if_index;
+ ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
+ ifm->ifm_data = ifp->if_data;
+ ifm->ifm_addrs = info.rti_addrs;
+ error = SYSCTL_OUT(w->w_req, (caddr_t)ifm, len);
+#ifdef COMPAT_FREEBSD32
+ sysctl_out:
+#endif
+ if (error)
+ goto done;
+ }
+ while ((ifa = TAILQ_NEXT(ifa, ifa_link)) != NULL) {
+ if (af && af != ifa->ifa_addr->sa_family)
+ continue;
+ if (prison_if(w->w_req->td->td_ucred,
+ ifa->ifa_addr) != 0)
+ continue;
+ info.rti_info[RTAX_IFA] = ifa->ifa_addr;
+ info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
+ info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
+ len = rt_msg2(RTM_NEWADDR, &info, NULL, w);
+ if (w->w_req && w->w_tmem) {
+ struct ifa_msghdr *ifam;
+
+ ifam = (struct ifa_msghdr *)w->w_tmem;
+ ifam->ifam_index = ifa->ifa_ifp->if_index;
+ ifam->ifam_flags = ifa->ifa_flags;
+ ifam->ifam_metric = ifa->ifa_metric;
+ ifam->ifam_addrs = info.rti_addrs;
+ error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
+ if (error)
+ goto done;
+ }
+ }
+ IF_ADDR_UNLOCK(ifp);
+ info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
+ info.rti_info[RTAX_BRD] = NULL;
+ }
+done:
+ if (ifp != NULL)
+ IF_ADDR_UNLOCK(ifp);
+ IFNET_RUNLOCK();
+ return (error);
+}
+
+static int
+sysctl_ifmalist(int af, struct walkarg *w)
+{
+ struct ifnet *ifp;
+ struct ifmultiaddr *ifma;
+ struct rt_addrinfo info;
+ int len, error = 0;
+ struct ifaddr *ifa;
+
+ bzero((caddr_t)&info, sizeof(info));
+ IFNET_RLOCK();
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ if (w->w_arg && w->w_arg != ifp->if_index)
+ continue;
+ ifa = ifp->if_addr;
+ info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL;
+ IF_ADDR_LOCK(ifp);
+ TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+ if (af && af != ifma->ifma_addr->sa_family)
+ continue;
+ if (prison_if(w->w_req->td->td_ucred,
+ ifma->ifma_addr) != 0)
+ continue;
+ info.rti_info[RTAX_IFA] = ifma->ifma_addr;
+ info.rti_info[RTAX_GATEWAY] =
+ (ifma->ifma_addr->sa_family != AF_LINK) ?
+ ifma->ifma_lladdr : NULL;
+ len = rt_msg2(RTM_NEWMADDR, &info, NULL, w);
+ if (w->w_req && w->w_tmem) {
+ struct ifma_msghdr *ifmam;
+
+ ifmam = (struct ifma_msghdr *)w->w_tmem;
+ ifmam->ifmam_index = ifma->ifma_ifp->if_index;
+ ifmam->ifmam_flags = 0;
+ ifmam->ifmam_addrs = info.rti_addrs;
+ error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
+ if (error) {
+ IF_ADDR_UNLOCK(ifp);
+ goto done;
+ }
+ }
+ }
+ IF_ADDR_UNLOCK(ifp);
+ }
+done:
+ IFNET_RUNLOCK();
+ return (error);
+}
+
+static int
+sysctl_rtsock(SYSCTL_HANDLER_ARGS)
+{
+ int *name = (int *)arg1;
+ u_int namelen = arg2;
+ struct radix_node_head *rnh = NULL; /* silence compiler. */
+ int i, lim, error = EINVAL;
+ u_char af;
+ struct walkarg w;
+
+ name ++;
+ namelen--;
+ if (req->newptr)
+ return (EPERM);
+ if (namelen != 3)
+ return ((namelen < 3) ? EISDIR : ENOTDIR);
+ af = name[0];
+ if (af > AF_MAX)
+ return (EINVAL);
+ bzero(&w, sizeof(w));
+ w.w_op = name[1];
+ w.w_arg = name[2];
+ w.w_req = req;
+
+ error = sysctl_wire_old_buffer(req, 0);
+ if (error)
+ return (error);
+ switch (w.w_op) {
+
+ case NET_RT_DUMP:
+ case NET_RT_FLAGS:
+ if (af == 0) { /* dump all tables */
+ i = 1;
+ lim = AF_MAX;
+ } else /* dump only one table */
+ i = lim = af;
+
+ /*
+ * take care of llinfo entries, the caller must
+ * specify an AF
+ */
+ if (w.w_op == NET_RT_FLAGS &&
+ (w.w_arg == 0 || w.w_arg & RTF_LLINFO)) {
+ if (af != 0)
+ error = lltable_sysctl_dumparp(af, w.w_req);
+ else
+ error = EINVAL;
+ break;
+ }
+ /*
+ * take care of routing entries
+ */
+ for (error = 0; error == 0 && i <= lim; i++) {
+ rnh = rt_tables_get_rnh(req->td->td_proc->p_fibnum, i);
+ if (rnh != NULL) {
+ RADIX_NODE_HEAD_LOCK(rnh);
+ error = rnh->rnh_walktree(rnh,
+ sysctl_dumpentry, &w);
+ RADIX_NODE_HEAD_UNLOCK(rnh);
+ } else if (af != 0)
+ error = EAFNOSUPPORT;
+ }
+ break;
+
+ case NET_RT_IFLIST:
+ error = sysctl_iflist(af, &w);
+ break;
+
+ case NET_RT_IFMALIST:
+ error = sysctl_ifmalist(af, &w);
+ break;
+ }
+ if (w.w_tmem)
+ free(w.w_tmem, M_RTABLE);
+ return (error);
+}
+
+SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, "");
+
+/*
+ * Definitions of protocols supported in the ROUTE domain.
+ */
+
+static struct domain routedomain; /* or at least forward */
+
+static struct protosw routesw[] = {
+{
+ .pr_type = SOCK_RAW,
+ .pr_domain = &routedomain,
+ .pr_flags = PR_ATOMIC|PR_ADDR,
+ .pr_output = route_output,
+ .pr_ctlinput = raw_ctlinput,
+ .pr_init = raw_init,
+ .pr_usrreqs = &route_usrreqs
+}
+};
+
+static struct domain routedomain = {
+ .dom_family = PF_ROUTE,
+ .dom_name = "route",
+ .dom_protosw = routesw,
+ .dom_protoswNPROTOSW = &routesw[sizeof(routesw)/sizeof(routesw[0])]
+};
+
+VNET_DOMAIN_SET(route);
diff --git a/freebsd/sys/net/slcompress.c b/freebsd/sys/net/slcompress.c
new file mode 100644
index 00000000..be337c1f
--- /dev/null
+++ b/freebsd/sys/net/slcompress.c
@@ -0,0 +1,609 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1989, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)slcompress.c 8.2 (Berkeley) 4/16/94
+ * $FreeBSD$
+ */
+
+/*
+ * Routines to compress and uncompess tcp packets (for transmission
+ * over low speed serial lines.
+ *
+ * Van Jacobson (van@helios.ee.lbl.gov), Dec 31, 1989:
+ * - Initial distribution.
+ *
+ */
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/systm.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/tcp.h>
+
+#include <freebsd/net/slcompress.h>
+
+#ifndef SL_NO_STATS
+#define INCR(counter) ++comp->counter;
+#else
+#define INCR(counter)
+#endif
+
+#define BCMP(p1, p2, n) bcmp((void *)(p1), (void *)(p2), (int)(n))
+#define BCOPY(p1, p2, n) bcopy((void *)(p1), (void *)(p2), (int)(n))
+
+void
+sl_compress_init(comp, max_state)
+ struct slcompress *comp;
+ int max_state;
+{
+ register u_int i;
+ register struct cstate *tstate = comp->tstate;
+
+ if (max_state == -1) {
+ max_state = MAX_STATES - 1;
+ bzero((char *)comp, sizeof(*comp));
+ } else {
+ /* Don't reset statistics */
+ bzero((char *)comp->tstate, sizeof(comp->tstate));
+ bzero((char *)comp->rstate, sizeof(comp->rstate));
+ }
+ for (i = max_state; i > 0; --i) {
+ tstate[i].cs_id = i;
+ tstate[i].cs_next = &tstate[i - 1];
+ }
+ tstate[0].cs_next = &tstate[max_state];
+ tstate[0].cs_id = 0;
+ comp->last_cs = &tstate[0];
+ comp->last_recv = 255;
+ comp->last_xmit = 255;
+ comp->flags = SLF_TOSS;
+}
+
+
+/* ENCODE encodes a number that is known to be non-zero. ENCODEZ
+ * checks for zero (since zero has to be encoded in the long, 3 byte
+ * form).
+ */
+#define ENCODE(n) { \
+ if ((u_int16_t)(n) >= 256) { \
+ *cp++ = 0; \
+ cp[1] = (n); \
+ cp[0] = (n) >> 8; \
+ cp += 2; \
+ } else { \
+ *cp++ = (n); \
+ } \
+}
+#define ENCODEZ(n) { \
+ if ((u_int16_t)(n) >= 256 || (u_int16_t)(n) == 0) { \
+ *cp++ = 0; \
+ cp[1] = (n); \
+ cp[0] = (n) >> 8; \
+ cp += 2; \
+ } else { \
+ *cp++ = (n); \
+ } \
+}
+
+#define DECODEL(f) { \
+ if (*cp == 0) {\
+ (f) = htonl(ntohl(f) + ((cp[1] << 8) | cp[2])); \
+ cp += 3; \
+ } else { \
+ (f) = htonl(ntohl(f) + (u_int32_t)*cp++); \
+ } \
+}
+
+#define DECODES(f) { \
+ if (*cp == 0) {\
+ (f) = htons(ntohs(f) + ((cp[1] << 8) | cp[2])); \
+ cp += 3; \
+ } else { \
+ (f) = htons(ntohs(f) + (u_int32_t)*cp++); \
+ } \
+}
+
+#define DECODEU(f) { \
+ if (*cp == 0) {\
+ (f) = htons((cp[1] << 8) | cp[2]); \
+ cp += 3; \
+ } else { \
+ (f) = htons((u_int32_t)*cp++); \
+ } \
+}
+
+/*
+ * Attempt to compress an outgoing TCP packet and return the type of
+ * the result. The caller must have already verified that the protocol
+ * is TCP. The first mbuf must contain the complete IP and TCP headers,
+ * and "ip" must be == mtod(m, struct ip *). "comp" supplies the
+ * compression state, and "compress_cid" tells us whether it is OK
+ * to leave out the CID field when feasible.
+ *
+ * The caller is responsible for adjusting m->m_pkthdr.len upon return,
+ * if m is an M_PKTHDR mbuf.
+ */
+u_int
+sl_compress_tcp(m, ip, comp, compress_cid)
+ struct mbuf *m;
+ register struct ip *ip;
+ struct slcompress *comp;
+ int compress_cid;
+{
+ register struct cstate *cs = comp->last_cs->cs_next;
+ register u_int hlen = ip->ip_hl;
+ register struct tcphdr *oth;
+ register struct tcphdr *th;
+ register u_int deltaS, deltaA;
+ register u_int changes = 0;
+ u_char new_seq[16];
+ register u_char *cp = new_seq;
+
+ /*
+ * Bail if this is an IP fragment or if the TCP packet isn't
+ * `compressible' (i.e., ACK isn't set or some other control bit is
+ * set). (We assume that the caller has already made sure the
+ * packet is IP proto TCP).
+ */
+ if ((ip->ip_off & htons(0x3fff)) || m->m_len < 40)
+ return (TYPE_IP);
+
+ th = (struct tcphdr *)&((int32_t *)ip)[hlen];
+ if ((th->th_flags & (TH_SYN|TH_FIN|TH_RST|TH_ACK)) != TH_ACK)
+ return (TYPE_IP);
+ /*
+ * Packet is compressible -- we're going to send either a
+ * COMPRESSED_TCP or UNCOMPRESSED_TCP packet. Either way we need
+ * to locate (or create) the connection state. Special case the
+ * most recently used connection since it's most likely to be used
+ * again & we don't have to do any reordering if it's used.
+ */
+ INCR(sls_packets)
+ if (ip->ip_src.s_addr != cs->cs_ip.ip_src.s_addr ||
+ ip->ip_dst.s_addr != cs->cs_ip.ip_dst.s_addr ||
+ *(int32_t *)th != ((int32_t *)&cs->cs_ip)[cs->cs_ip.ip_hl]) {
+ /*
+ * Wasn't the first -- search for it.
+ *
+ * States are kept in a circularly linked list with
+ * last_cs pointing to the end of the list. The
+ * list is kept in lru order by moving a state to the
+ * head of the list whenever it is referenced. Since
+ * the list is short and, empirically, the connection
+ * we want is almost always near the front, we locate
+ * states via linear search. If we don't find a state
+ * for the datagram, the oldest state is (re-)used.
+ */
+ register struct cstate *lcs;
+ register struct cstate *lastcs = comp->last_cs;
+
+ do {
+ lcs = cs; cs = cs->cs_next;
+ INCR(sls_searches)
+ if (ip->ip_src.s_addr == cs->cs_ip.ip_src.s_addr
+ && ip->ip_dst.s_addr == cs->cs_ip.ip_dst.s_addr
+ && *(int32_t *)th ==
+ ((int32_t *)&cs->cs_ip)[cs->cs_ip.ip_hl])
+ goto found;
+ } while (cs != lastcs);
+
+ /*
+ * Didn't find it -- re-use oldest cstate. Send an
+ * uncompressed packet that tells the other side what
+ * connection number we're using for this conversation.
+ * Note that since the state list is circular, the oldest
+ * state points to the newest and we only need to set
+ * last_cs to update the lru linkage.
+ */
+ INCR(sls_misses)
+ comp->last_cs = lcs;
+ hlen += th->th_off;
+ hlen <<= 2;
+ if (hlen > m->m_len)
+ return TYPE_IP;
+ goto uncompressed;
+
+ found:
+ /*
+ * Found it -- move to the front on the connection list.
+ */
+ if (cs == lastcs)
+ comp->last_cs = lcs;
+ else {
+ lcs->cs_next = cs->cs_next;
+ cs->cs_next = lastcs->cs_next;
+ lastcs->cs_next = cs;
+ }
+ }
+
+ /*
+ * Make sure that only what we expect to change changed. The first
+ * line of the `if' checks the IP protocol version, header length &
+ * type of service. The 2nd line checks the "Don't fragment" bit.
+ * The 3rd line checks the time-to-live and protocol (the protocol
+ * check is unnecessary but costless). The 4th line checks the TCP
+ * header length. The 5th line checks IP options, if any. The 6th
+ * line checks TCP options, if any. If any of these things are
+ * different between the previous & current datagram, we send the
+ * current datagram `uncompressed'.
+ */
+ oth = (struct tcphdr *)&((int32_t *)&cs->cs_ip)[hlen];
+ deltaS = hlen;
+ hlen += th->th_off;
+ hlen <<= 2;
+ if (hlen > m->m_len)
+ return TYPE_IP;
+
+ if (((u_int16_t *)ip)[0] != ((u_int16_t *)&cs->cs_ip)[0] ||
+ ((u_int16_t *)ip)[3] != ((u_int16_t *)&cs->cs_ip)[3] ||
+ ((u_int16_t *)ip)[4] != ((u_int16_t *)&cs->cs_ip)[4] ||
+ th->th_off != oth->th_off ||
+ (deltaS > 5 &&
+ BCMP(ip + 1, &cs->cs_ip + 1, (deltaS - 5) << 2)) ||
+ (th->th_off > 5 &&
+ BCMP(th + 1, oth + 1, (th->th_off - 5) << 2)))
+ goto uncompressed;
+
+ /*
+ * Figure out which of the changing fields changed. The
+ * receiver expects changes in the order: urgent, window,
+ * ack, seq (the order minimizes the number of temporaries
+ * needed in this section of code).
+ */
+ if (th->th_flags & TH_URG) {
+ deltaS = ntohs(th->th_urp);
+ ENCODEZ(deltaS);
+ changes |= NEW_U;
+ } else if (th->th_urp != oth->th_urp)
+ /* argh! URG not set but urp changed -- a sensible
+ * implementation should never do this but RFC793
+ * doesn't prohibit the change so we have to deal
+ * with it. */
+ goto uncompressed;
+
+ deltaS = (u_int16_t)(ntohs(th->th_win) - ntohs(oth->th_win));
+ if (deltaS) {
+ ENCODE(deltaS);
+ changes |= NEW_W;
+ }
+
+ deltaA = ntohl(th->th_ack) - ntohl(oth->th_ack);
+ if (deltaA) {
+ if (deltaA > 0xffff)
+ goto uncompressed;
+ ENCODE(deltaA);
+ changes |= NEW_A;
+ }
+
+ deltaS = ntohl(th->th_seq) - ntohl(oth->th_seq);
+ if (deltaS) {
+ if (deltaS > 0xffff)
+ goto uncompressed;
+ ENCODE(deltaS);
+ changes |= NEW_S;
+ }
+
+ switch(changes) {
+
+ case 0:
+ /*
+ * Nothing changed. If this packet contains data and the
+ * last one didn't, this is probably a data packet following
+ * an ack (normal on an interactive connection) and we send
+ * it compressed. Otherwise it's probably a retransmit,
+ * retransmitted ack or window probe. Send it uncompressed
+ * in case the other side missed the compressed version.
+ */
+ if (ip->ip_len != cs->cs_ip.ip_len &&
+ ntohs(cs->cs_ip.ip_len) == hlen)
+ break;
+
+ /* FALLTHROUGH */
+
+ case SPECIAL_I:
+ case SPECIAL_D:
+ /*
+ * actual changes match one of our special case encodings --
+ * send packet uncompressed.
+ */
+ goto uncompressed;
+
+ case NEW_S|NEW_A:
+ if (deltaS == deltaA &&
+ deltaS == ntohs(cs->cs_ip.ip_len) - hlen) {
+ /* special case for echoed terminal traffic */
+ changes = SPECIAL_I;
+ cp = new_seq;
+ }
+ break;
+
+ case NEW_S:
+ if (deltaS == ntohs(cs->cs_ip.ip_len) - hlen) {
+ /* special case for data xfer */
+ changes = SPECIAL_D;
+ cp = new_seq;
+ }
+ break;
+ }
+
+ deltaS = ntohs(ip->ip_id) - ntohs(cs->cs_ip.ip_id);
+ if (deltaS != 1) {
+ ENCODEZ(deltaS);
+ changes |= NEW_I;
+ }
+ if (th->th_flags & TH_PUSH)
+ changes |= TCP_PUSH_BIT;
+ /*
+ * Grab the cksum before we overwrite it below. Then update our
+ * state with this packet's header.
+ */
+ deltaA = ntohs(th->th_sum);
+ BCOPY(ip, &cs->cs_ip, hlen);
+
+ /*
+ * We want to use the original packet as our compressed packet.
+ * (cp - new_seq) is the number of bytes we need for compressed
+ * sequence numbers. In addition we need one byte for the change
+ * mask, one for the connection id and two for the tcp checksum.
+ * So, (cp - new_seq) + 4 bytes of header are needed. hlen is how
+ * many bytes of the original packet to toss so subtract the two to
+ * get the new packet size.
+ */
+ deltaS = cp - new_seq;
+ cp = (u_char *)ip;
+ if (compress_cid == 0 || comp->last_xmit != cs->cs_id) {
+ comp->last_xmit = cs->cs_id;
+ hlen -= deltaS + 4;
+ cp += hlen;
+ *cp++ = changes | NEW_C;
+ *cp++ = cs->cs_id;
+ } else {
+ hlen -= deltaS + 3;
+ cp += hlen;
+ *cp++ = changes;
+ }
+ m->m_len -= hlen;
+ m->m_data += hlen;
+ *cp++ = deltaA >> 8;
+ *cp++ = deltaA;
+ BCOPY(new_seq, cp, deltaS);
+ INCR(sls_compressed)
+ return (TYPE_COMPRESSED_TCP);
+
+ /*
+ * Update connection state cs & send uncompressed packet ('uncompressed'
+ * means a regular ip/tcp packet but with the 'conversation id' we hope
+ * to use on future compressed packets in the protocol field).
+ */
+uncompressed:
+ BCOPY(ip, &cs->cs_ip, hlen);
+ ip->ip_p = cs->cs_id;
+ comp->last_xmit = cs->cs_id;
+ return (TYPE_UNCOMPRESSED_TCP);
+}
+
+
+int
+sl_uncompress_tcp(bufp, len, type, comp)
+ u_char **bufp;
+ int len;
+ u_int type;
+ struct slcompress *comp;
+{
+ u_char *hdr, *cp;
+ int hlen, vjlen;
+
+ cp = bufp? *bufp: NULL;
+ vjlen = sl_uncompress_tcp_core(cp, len, len, type, comp, &hdr, &hlen);
+ if (vjlen < 0)
+ return (0); /* error */
+ if (vjlen == 0)
+ return (len); /* was uncompressed already */
+
+ cp += vjlen;
+ len -= vjlen;
+
+ /*
+ * At this point, cp points to the first byte of data in the
+ * packet. If we're not aligned on a 4-byte boundary, copy the
+ * data down so the ip & tcp headers will be aligned. Then back up
+ * cp by the tcp/ip header length to make room for the reconstructed
+ * header (we assume the packet we were handed has enough space to
+ * prepend 128 bytes of header).
+ */
+ if ((intptr_t)cp & 3) {
+ if (len > 0)
+ BCOPY(cp, ((intptr_t)cp &~ 3), len);
+ cp = (u_char *)((intptr_t)cp &~ 3);
+ }
+ cp -= hlen;
+ len += hlen;
+ BCOPY(hdr, cp, hlen);
+
+ *bufp = cp;
+ return (len);
+}
+
+/*
+ * Uncompress a packet of total length total_len. The first buflen
+ * bytes are at buf; this must include the entire (compressed or
+ * uncompressed) TCP/IP header. This procedure returns the length
+ * of the VJ header, with a pointer to the uncompressed IP header
+ * in *hdrp and its length in *hlenp.
+ */
+int
+sl_uncompress_tcp_core(buf, buflen, total_len, type, comp, hdrp, hlenp)
+ u_char *buf;
+ int buflen, total_len;
+ u_int type;
+ struct slcompress *comp;
+ u_char **hdrp;
+ u_int *hlenp;
+{
+ register u_char *cp;
+ register u_int hlen, changes;
+ register struct tcphdr *th;
+ register struct cstate *cs;
+ register struct ip *ip;
+ register u_int16_t *bp;
+ register u_int vjlen;
+
+ switch (type) {
+
+ case TYPE_UNCOMPRESSED_TCP:
+ ip = (struct ip *) buf;
+ if (ip->ip_p >= MAX_STATES)
+ goto bad;
+ cs = &comp->rstate[comp->last_recv = ip->ip_p];
+ comp->flags &=~ SLF_TOSS;
+ ip->ip_p = IPPROTO_TCP;
+ /*
+ * Calculate the size of the TCP/IP header and make sure that
+ * we don't overflow the space we have available for it.
+ */
+ hlen = ip->ip_hl << 2;
+ if (hlen + sizeof(struct tcphdr) > buflen)
+ goto bad;
+ hlen += ((struct tcphdr *)&((char *)ip)[hlen])->th_off << 2;
+ if (hlen > MAX_HDR || hlen > buflen)
+ goto bad;
+ BCOPY(ip, &cs->cs_ip, hlen);
+ cs->cs_hlen = hlen;
+ INCR(sls_uncompressedin)
+ *hdrp = (u_char *) &cs->cs_ip;
+ *hlenp = hlen;
+ return (0);
+
+ default:
+ goto bad;
+
+ case TYPE_COMPRESSED_TCP:
+ break;
+ }
+ /* We've got a compressed packet. */
+ INCR(sls_compressedin)
+ cp = buf;
+ changes = *cp++;
+ if (changes & NEW_C) {
+ /* Make sure the state index is in range, then grab the state.
+ * If we have a good state index, clear the 'discard' flag. */
+ if (*cp >= MAX_STATES)
+ goto bad;
+
+ comp->flags &=~ SLF_TOSS;
+ comp->last_recv = *cp++;
+ } else {
+ /* this packet has an implicit state index. If we've
+ * had a line error since the last time we got an
+ * explicit state index, we have to toss the packet. */
+ if (comp->flags & SLF_TOSS) {
+ INCR(sls_tossed)
+ return (-1);
+ }
+ }
+ cs = &comp->rstate[comp->last_recv];
+ hlen = cs->cs_ip.ip_hl << 2;
+ th = (struct tcphdr *)&((u_char *)&cs->cs_ip)[hlen];
+ th->th_sum = htons((*cp << 8) | cp[1]);
+ cp += 2;
+ if (changes & TCP_PUSH_BIT)
+ th->th_flags |= TH_PUSH;
+ else
+ th->th_flags &=~ TH_PUSH;
+
+ switch (changes & SPECIALS_MASK) {
+ case SPECIAL_I:
+ {
+ register u_int i = ntohs(cs->cs_ip.ip_len) - cs->cs_hlen;
+ th->th_ack = htonl(ntohl(th->th_ack) + i);
+ th->th_seq = htonl(ntohl(th->th_seq) + i);
+ }
+ break;
+
+ case SPECIAL_D:
+ th->th_seq = htonl(ntohl(th->th_seq) + ntohs(cs->cs_ip.ip_len)
+ - cs->cs_hlen);
+ break;
+
+ default:
+ if (changes & NEW_U) {
+ th->th_flags |= TH_URG;
+ DECODEU(th->th_urp)
+ } else
+ th->th_flags &=~ TH_URG;
+ if (changes & NEW_W)
+ DECODES(th->th_win)
+ if (changes & NEW_A)
+ DECODEL(th->th_ack)
+ if (changes & NEW_S)
+ DECODEL(th->th_seq)
+ break;
+ }
+ if (changes & NEW_I) {
+ DECODES(cs->cs_ip.ip_id)
+ } else
+ cs->cs_ip.ip_id = htons(ntohs(cs->cs_ip.ip_id) + 1);
+
+ /*
+ * At this point, cp points to the first byte of data in the
+ * packet. Fill in the IP total length and update the IP
+ * header checksum.
+ */
+ vjlen = cp - buf;
+ buflen -= vjlen;
+ if (buflen < 0)
+ /* we must have dropped some characters (crc should detect
+ * this but the old slip framing won't) */
+ goto bad;
+
+ total_len += cs->cs_hlen - vjlen;
+ cs->cs_ip.ip_len = htons(total_len);
+
+ /* recompute the ip header checksum */
+ bp = (u_int16_t *) &cs->cs_ip;
+ cs->cs_ip.ip_sum = 0;
+ for (changes = 0; hlen > 0; hlen -= 2)
+ changes += *bp++;
+ changes = (changes & 0xffff) + (changes >> 16);
+ changes = (changes & 0xffff) + (changes >> 16);
+ cs->cs_ip.ip_sum = ~ changes;
+
+ *hdrp = (u_char *) &cs->cs_ip;
+ *hlenp = cs->cs_hlen;
+ return vjlen;
+
+bad:
+ comp->flags |= SLF_TOSS;
+ INCR(sls_errorin)
+ return (-1);
+}
diff --git a/freebsd/sys/net/slcompress.h b/freebsd/sys/net/slcompress.h
new file mode 100644
index 00000000..08c9042e
--- /dev/null
+++ b/freebsd/sys/net/slcompress.h
@@ -0,0 +1,158 @@
+/*
+ * Definitions for tcp compression routines.
+ */
+/*-
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Van Jacobson (van@helios.ee.lbl.gov), Dec 31, 1989:
+ * - Initial distribution.
+ * $FreeBSD$
+ */
+
+#ifndef _NET_SLCOMPRESS_HH_
+#define _NET_SLCOMPRESS_HH_
+
+#define MAX_STATES 16 /* must be > 2 and < 256 */
+#define MAX_HDR 128
+
+/*
+ * Compressed packet format:
+ *
+ * The first octet contains the packet type (top 3 bits), TCP
+ * 'push' bit, and flags that indicate which of the 4 TCP sequence
+ * numbers have changed (bottom 5 bits). The next octet is a
+ * conversation number that associates a saved IP/TCP header with
+ * the compressed packet. The next two octets are the TCP checksum
+ * from the original datagram. The next 0 to 15 octets are
+ * sequence number changes, one change per bit set in the header
+ * (there may be no changes and there are two special cases where
+ * the receiver implicitly knows what changed -- see below).
+ *
+ * There are 5 numbers which can change (they are always inserted
+ * in the following order): TCP urgent pointer, window,
+ * acknowledgement, sequence number and IP ID. (The urgent pointer
+ * is different from the others in that its value is sent, not the
+ * change in value.) Since typical use of SLIP links is biased
+ * toward small packets (see comments on MTU/MSS below), changes
+ * use a variable length coding with one octet for numbers in the
+ * range 1 - 255 and 3 octets (0, MSB, LSB) for numbers in the
+ * range 256 - 65535 or 0. (If the change in sequence number or
+ * ack is more than 65535, an uncompressed packet is sent.)
+ */
+
+/*
+ * Packet types (must not conflict with IP protocol version)
+ *
+ * The top nibble of the first octet is the packet type. There are
+ * three possible types: IP (not proto TCP or tcp with one of the
+ * control flags set); uncompressed TCP (a normal IP/TCP packet but
+ * with the 8-bit protocol field replaced by an 8-bit connection id --
+ * this type of packet syncs the sender & receiver); and compressed
+ * TCP (described above).
+ *
+ * LSB of 4-bit field is TCP "PUSH" bit (a worthless anachronism) and
+ * is logically part of the 4-bit "changes" field that follows. Top
+ * three bits are actual packet type. For backward compatibility
+ * and in the interest of conserving bits, numbers are chosen so the
+ * IP protocol version number (4) which normally appears in this nibble
+ * means "IP packet".
+ */
+
+/* packet types */
+#define TYPE_IP 0x40
+#define TYPE_UNCOMPRESSED_TCP 0x70
+#define TYPE_COMPRESSED_TCP 0x80
+#define TYPE_ERROR 0x00
+
+/* Bits in first octet of compressed packet */
+#define NEW_C 0x40 /* flag bits for what changed in a packet */
+#define NEW_I 0x20
+#define NEW_S 0x08
+#define NEW_A 0x04
+#define NEW_W 0x02
+#define NEW_U 0x01
+
+/* reserved, special-case values of above */
+#define SPECIAL_I (NEW_S|NEW_W|NEW_U) /* echoed interactive traffic */
+#define SPECIAL_D (NEW_S|NEW_A|NEW_W|NEW_U) /* unidirectional data */
+#define SPECIALS_MASK (NEW_S|NEW_A|NEW_W|NEW_U)
+
+#define TCP_PUSH_BIT 0x10
+
+
+/*
+ * "state" data for each active tcp conversation on the wire. This is
+ * basically a copy of the entire IP/TCP header from the last packet
+ * we saw from the conversation together with a small identifier
+ * the transmit & receive ends of the line use to locate saved header.
+ */
+struct cstate {
+ struct cstate *cs_next; /* next most recently used cstate (xmit only) */
+ u_int16_t cs_hlen; /* size of hdr (receive only) */
+ u_char cs_id; /* connection # associated with this state */
+ u_char cs_filler;
+ union {
+ char csu_hdr[MAX_HDR];
+ struct ip csu_ip; /* ip/tcp hdr from most recent packet */
+ } slcs_u;
+};
+#define cs_ip slcs_u.csu_ip
+#define cs_hdr slcs_u.csu_hdr
+
+/*
+ * all the state data for one serial line (we need one of these
+ * per line).
+ */
+struct slcompress {
+ struct cstate *last_cs; /* most recently used tstate */
+ u_char last_recv; /* last rcvd conn. id */
+ u_char last_xmit; /* last sent conn. id */
+ u_int16_t flags;
+#ifndef SL_NO_STATS
+ int sls_packets; /* outbound packets */
+ int sls_compressed; /* outbound compressed packets */
+ int sls_searches; /* searches for connection state */
+ int sls_misses; /* times couldn't find conn. state */
+ int sls_uncompressedin; /* inbound uncompressed packets */
+ int sls_compressedin; /* inbound compressed packets */
+ int sls_errorin; /* inbound unknown type packets */
+ int sls_tossed; /* inbound packets tossed because of error */
+#endif
+ struct cstate tstate[MAX_STATES]; /* xmit connection states */
+ struct cstate rstate[MAX_STATES]; /* receive connection states */
+};
+/* flag values */
+#define SLF_TOSS 1 /* tossing rcvd frames because of input err */
+
+void sl_compress_init(struct slcompress *, int);
+u_int sl_compress_tcp(struct mbuf *, struct ip *, struct slcompress *, int);
+int sl_uncompress_tcp(u_char **, int, u_int, struct slcompress *);
+int sl_uncompress_tcp_core(u_char *, int, int, u_int,
+ struct slcompress *, u_char **, u_int *);
+
+#endif /* !_NET_SLCOMPRESS_HH_ */
diff --git a/freebsd/sys/net/vnet.h b/freebsd/sys/net/vnet.h
new file mode 100644
index 00000000..7f6326fe
--- /dev/null
+++ b/freebsd/sys/net/vnet.h
@@ -0,0 +1,437 @@
+/*-
+ * Copyright (c) 2006-2009 University of Zagreb
+ * Copyright (c) 2006-2009 FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by the University of Zagreb and the
+ * FreeBSD Foundation under sponsorship by the Stichting NLnet and the
+ * FreeBSD Foundation.
+ *
+ * Copyright (c) 2009 Jeffrey Roberson <jeff@freebsd.org>
+ * Copyright (c) 2009 Robert N. M. Watson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*-
+ * This header file defines several sets of interfaces supporting virtualized
+ * network stacks:
+ *
+ * - Definition of 'struct vnet' and functions and macros to allocate/free/
+ * manipulate it.
+ *
+ * - A virtual network stack memory allocator, which provides support for
+ * virtualized global variables via a special linker set, set_vnet.
+ *
+ * - Virtualized sysinits/sysuninits, which allow constructors and
+ * destructors to be run for each network stack subsystem as virtual
+ * instances are created and destroyed.
+ *
+ * If VIMAGE isn't compiled into the kernel, virtualized global variables
+ * compile to normal global variables, and virtualized sysinits to regular
+ * sysinits.
+ */
+
+#ifndef _NET_VNET_HH_
+#define _NET_VNET_HH_
+
+/*
+ * struct vnet describes a virtualized network stack, and is primarily a
+ * pointer to storage for virtualized global variables. Expose to userspace
+ * as required for libkvm.
+ */
+#if defined(_KERNEL) || defined(_WANT_VNET)
+#include <freebsd/sys/queue.h>
+
+struct vnet {
+ LIST_ENTRY(vnet) vnet_le; /* all vnets list */
+ u_int vnet_magic_n;
+ u_int vnet_ifcnt;
+ u_int vnet_sockcnt;
+ void *vnet_data_mem;
+ uintptr_t vnet_data_base;
+};
+#define VNET_MAGIC_N 0x3e0d8f29
+
+/*
+ * These two virtual network stack allocator definitions are also required
+ * for libkvm so that it can evaluate virtualized global variables.
+ */
+#define VNET_SETNAME "set_vnet"
+#define VNET_SYMPREFIX "vnet_entry_"
+#endif
+
+#ifdef _KERNEL
+
+#ifdef VIMAGE
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/proc.h> /* for struct thread */
+#include <freebsd/sys/rwlock.h>
+#include <freebsd/sys/sx.h>
+
+/*
+ * Location of the kernel's 'set_vnet' linker set.
+ */
+extern uintptr_t *__start_set_vnet;
+extern uintptr_t *__stop_set_vnet;
+
+#define VNET_START (uintptr_t)&__start_set_vnet
+#define VNET_STOP (uintptr_t)&__stop_set_vnet
+
+/*
+ * Functions to allocate and destroy virtual network stacks.
+ */
+struct vnet *vnet_alloc(void);
+void vnet_destroy(struct vnet *vnet);
+
+/*
+ * The current virtual network stack -- we may wish to move this to struct
+ * pcpu in the future.
+ */
+#define curvnet curthread->td_vnet
+
+/*
+ * Various macros -- get and set the current network stack, but also
+ * assertions.
+ */
+#ifdef VNET_DEBUG
+void vnet_log_recursion(struct vnet *, const char *, int);
+
+#define VNET_ASSERT(condition) \
+ if (!(condition)) { \
+ printf("VNET_ASSERT @ %s:%d %s():\n", \
+ __FILE__, __LINE__, __FUNCTION__); \
+ panic(#condition); \
+ }
+
+#define CURVNET_SET_QUIET(arg) \
+ VNET_ASSERT((arg)->vnet_magic_n == VNET_MAGIC_N); \
+ struct vnet *saved_vnet = curvnet; \
+ const char *saved_vnet_lpush = curthread->td_vnet_lpush; \
+ curvnet = arg; \
+ curthread->td_vnet_lpush = __FUNCTION__;
+
+#define CURVNET_SET_VERBOSE(arg) \
+ CURVNET_SET_QUIET(arg) \
+ if (saved_vnet) \
+ vnet_log_recursion(saved_vnet, saved_vnet_lpush, __LINE__);
+
+#define CURVNET_SET(arg) CURVNET_SET_VERBOSE(arg)
+
+#define CURVNET_RESTORE() \
+ VNET_ASSERT(saved_vnet == NULL || \
+ saved_vnet->vnet_magic_n == VNET_MAGIC_N); \
+ curvnet = saved_vnet; \
+ curthread->td_vnet_lpush = saved_vnet_lpush;
+#else /* !VNET_DEBUG */
+#define VNET_ASSERT(condition)
+
+#define CURVNET_SET(arg) \
+ struct vnet *saved_vnet = curvnet; \
+ curvnet = arg;
+
+#define CURVNET_SET_VERBOSE(arg) CURVNET_SET(arg)
+#define CURVNET_SET_QUIET(arg) CURVNET_SET(arg)
+
+#define CURVNET_RESTORE() \
+ curvnet = saved_vnet;
+#endif /* VNET_DEBUG */
+
+extern struct vnet *vnet0;
+#define IS_DEFAULT_VNET(arg) ((arg) == vnet0)
+
+#define CRED_TO_VNET(cr) (cr)->cr_prison->pr_vnet
+#define TD_TO_VNET(td) CRED_TO_VNET((td)->td_ucred)
+#define P_TO_VNET(p) CRED_TO_VNET((p)->p_ucred)
+
+/*
+ * Global linked list of all virtual network stacks, along with read locks to
+ * access it. If a caller may sleep while accessing the list, it must use
+ * the sleepable lock macros.
+ */
+LIST_HEAD(vnet_list_head, vnet);
+extern struct vnet_list_head vnet_head;
+extern struct rwlock vnet_rwlock;
+extern struct sx vnet_sxlock;
+
+#define VNET_LIST_RLOCK() sx_slock(&vnet_sxlock)
+#define VNET_LIST_RLOCK_NOSLEEP() rw_rlock(&vnet_rwlock)
+#define VNET_LIST_RUNLOCK() sx_sunlock(&vnet_sxlock)
+#define VNET_LIST_RUNLOCK_NOSLEEP() rw_runlock(&vnet_rwlock)
+
+/*
+ * Iteration macros to walk the global list of virtual network stacks.
+ */
+#define VNET_ITERATOR_DECL(arg) struct vnet *arg
+#define VNET_FOREACH(arg) LIST_FOREACH((arg), &vnet_head, vnet_le)
+
+/*
+ * Virtual network stack memory allocator, which allows global variables to
+ * be automatically instantiated for each network stack instance.
+ */
+__asm__(
+#if defined(__arm__)
+ ".section " VNET_SETNAME ", \"aw\", %progbits\n"
+#else
+ ".section " VNET_SETNAME ", \"aw\", @progbits\n"
+#endif
+ "\t.p2align " __XSTRING(CACHE_LINE_SHIFT) "\n"
+ "\t.previous");
+
+#define VNET_NAME(n) vnet_entry_##n
+#define VNET_DECLARE(t, n) extern t VNET_NAME(n)
+#define VNET_DEFINE(t, n) t VNET_NAME(n) __section(VNET_SETNAME) __used
+#define _VNET_PTR(b, n) (__typeof(VNET_NAME(n))*) \
+ ((b) + (uintptr_t)&VNET_NAME(n))
+
+#define _VNET(b, n) (*_VNET_PTR(b, n))
+
+/*
+ * Virtualized global variable accessor macros.
+ */
+#define VNET_VNET_PTR(vnet, n) _VNET_PTR((vnet)->vnet_data_base, n)
+#define VNET_VNET(vnet, n) (*VNET_VNET_PTR((vnet), n))
+
+#define VNET_PTR(n) VNET_VNET_PTR(curvnet, n)
+#define VNET(n) VNET_VNET(curvnet, n)
+
+/*
+ * Virtual network stack allocator interfaces from the kernel linker.
+ */
+void *vnet_data_alloc(int size);
+void vnet_data_copy(void *start, int size);
+void vnet_data_free(void *start_arg, int size);
+
+/*
+ * Sysctl variants for vnet-virtualized global variables. Include
+ * <sys/sysctl.h> to expose these definitions.
+ *
+ * Note: SYSCTL_PROC() handler functions will need to resolve pointer
+ * arguments themselves, if required.
+ */
+#ifdef SYSCTL_OID
+int vnet_sysctl_handle_int(SYSCTL_HANDLER_ARGS);
+int vnet_sysctl_handle_opaque(SYSCTL_HANDLER_ARGS);
+int vnet_sysctl_handle_string(SYSCTL_HANDLER_ARGS);
+int vnet_sysctl_handle_uint(SYSCTL_HANDLER_ARGS);
+
+#define SYSCTL_VNET_INT(parent, nbr, name, access, ptr, val, descr) \
+ SYSCTL_OID(parent, nbr, name, \
+ CTLTYPE_INT|CTLFLAG_MPSAFE|CTLFLAG_VNET|(access), \
+ ptr, val, vnet_sysctl_handle_int, "I", descr)
+#define SYSCTL_VNET_PROC(parent, nbr, name, access, ptr, arg, handler, \
+ fmt, descr) \
+ SYSCTL_OID(parent, nbr, name, CTLFLAG_VNET|(access), ptr, arg, \
+ handler, fmt, descr)
+#define SYSCTL_VNET_OPAQUE(parent, nbr, name, access, ptr, len, fmt, \
+ descr) \
+ SYSCTL_OID(parent, nbr, name, \
+ CTLTYPE_OPAQUE|CTLFLAG_VNET|(access), ptr, len, \
+ vnet_sysctl_handle_opaque, fmt, descr)
+#define SYSCTL_VNET_STRING(parent, nbr, name, access, arg, len, descr) \
+ SYSCTL_OID(parent, nbr, name, \
+ CTLTYPE_STRING|CTLFLAG_VNET|(access), \
+ arg, len, vnet_sysctl_handle_string, "A", descr)
+#define SYSCTL_VNET_STRUCT(parent, nbr, name, access, ptr, type, descr) \
+ SYSCTL_OID(parent, nbr, name, \
+ CTLTYPE_OPAQUE|CTLFLAG_VNET|(access), ptr, \
+ sizeof(struct type), vnet_sysctl_handle_opaque, "S," #type, \
+ descr)
+#define SYSCTL_VNET_UINT(parent, nbr, name, access, ptr, val, descr) \
+ SYSCTL_OID(parent, nbr, name, \
+ CTLTYPE_UINT|CTLFLAG_MPSAFE|CTLFLAG_VNET|(access), \
+ ptr, val, vnet_sysctl_handle_uint, "IU", descr)
+#define VNET_SYSCTL_ARG(req, arg1) do { \
+ if (arg1 != NULL) \
+ arg1 = (void *)(TD_TO_VNET((req)->td)->vnet_data_base + \
+ (uintptr_t)(arg1)); \
+} while (0)
+#endif /* SYSCTL_OID */
+
+/*
+ * Virtual sysinit mechanism, allowing network stack components to declare
+ * startup and shutdown methods to be run when virtual network stack
+ * instances are created and destroyed.
+ */
+#include <freebsd/sys/kernel.h>
+
+/*
+ * SYSINIT/SYSUNINIT variants that provide per-vnet constructors and
+ * destructors.
+ */
+struct vnet_sysinit {
+ enum sysinit_sub_id subsystem;
+ enum sysinit_elem_order order;
+ sysinit_cfunc_t func;
+ const void *arg;
+ TAILQ_ENTRY(vnet_sysinit) link;
+};
+
+#define VNET_SYSINIT(ident, subsystem, order, func, arg) \
+ static struct vnet_sysinit ident ## _vnet_init = { \
+ subsystem, \
+ order, \
+ (sysinit_cfunc_t)(sysinit_nfunc_t)func, \
+ (arg) \
+ }; \
+ SYSINIT(vnet_init_ ## ident, subsystem, order, \
+ vnet_register_sysinit, &ident ## _vnet_init); \
+ SYSUNINIT(vnet_init_ ## ident, subsystem, order, \
+ vnet_deregister_sysinit, &ident ## _vnet_init)
+
+#define VNET_SYSUNINIT(ident, subsystem, order, func, arg) \
+ static struct vnet_sysinit ident ## _vnet_uninit = { \
+ subsystem, \
+ order, \
+ (sysinit_cfunc_t)(sysinit_nfunc_t)func, \
+ (arg) \
+ }; \
+ SYSINIT(vnet_uninit_ ## ident, subsystem, order, \
+ vnet_register_sysuninit, &ident ## _vnet_uninit); \
+ SYSUNINIT(vnet_uninit_ ## ident, subsystem, order, \
+ vnet_deregister_sysuninit, &ident ## _vnet_uninit)
+
+/*
+ * Run per-vnet sysinits or sysuninits during vnet creation/destruction.
+ */
+void vnet_sysinit(void);
+void vnet_sysuninit(void);
+
+/*
+ * Interfaces for managing per-vnet constructors and destructors.
+ */
+void vnet_register_sysinit(void *arg);
+void vnet_register_sysuninit(void *arg);
+void vnet_deregister_sysinit(void *arg);
+void vnet_deregister_sysuninit(void *arg);
+
+/*
+ * EVENTHANDLER(9) extensions.
+ */
+#include <freebsd/sys/eventhandler.h>
+
+void vnet_global_eventhandler_iterator_func(void *, ...);
+#define VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG(tag, name, func, arg, priority) \
+do { \
+ if (IS_DEFAULT_VNET(curvnet)) { \
+ (tag) = vimage_eventhandler_register(NULL, #name, func, \
+ arg, priority, \
+ vnet_global_eventhandler_iterator_func); \
+ } \
+} while(0)
+#define VNET_GLOBAL_EVENTHANDLER_REGISTER(name, func, arg, priority) \
+do { \
+ if (IS_DEFAULT_VNET(curvnet)) { \
+ vimage_eventhandler_register(NULL, #name, func, \
+ arg, priority, \
+ vnet_global_eventhandler_iterator_func); \
+ } \
+} while(0)
+
+#else /* !VIMAGE */
+
+/*
+ * Various virtual network stack macros compile to no-ops without VIMAGE.
+ */
+#define curvnet NULL
+
+#define VNET_ASSERT(condition)
+#define CURVNET_SET(arg)
+#define CURVNET_SET_QUIET(arg)
+#define CURVNET_RESTORE()
+
+#define VNET_LIST_RLOCK()
+#define VNET_LIST_RLOCK_NOSLEEP()
+#define VNET_LIST_RUNLOCK()
+#define VNET_LIST_RUNLOCK_NOSLEEP()
+#define VNET_ITERATOR_DECL(arg)
+#define VNET_FOREACH(arg)
+
+#define IS_DEFAULT_VNET(arg) 1
+#define CRED_TO_VNET(cr) NULL
+#define TD_TO_VNET(td) NULL
+#define P_TO_VNET(p) NULL
+
+/*
+ * Versions of the VNET macros that compile to normal global variables and
+ * standard sysctl definitions.
+ */
+#define VNET_NAME(n) n
+#define VNET_DECLARE(t, n) extern t n
+#define VNET_DEFINE(t, n) t n
+#define _VNET_PTR(b, n) &VNET_NAME(n)
+
+/*
+ * Virtualized global variable accessor macros.
+ */
+#define VNET_VNET_PTR(vnet, n) (&(n))
+#define VNET_VNET(vnet, n) (n)
+
+#define VNET_PTR(n) (&(n))
+#define VNET(n) (n)
+
+/*
+ * When VIMAGE isn't compiled into the kernel, virtaulized SYSCTLs simply
+ * become normal SYSCTLs.
+ */
+#ifdef SYSCTL_OID
+#define SYSCTL_VNET_INT(parent, nbr, name, access, ptr, val, descr) \
+ SYSCTL_INT(parent, nbr, name, access, ptr, val, descr)
+#define SYSCTL_VNET_PROC(parent, nbr, name, access, ptr, arg, handler, \
+ fmt, descr) \
+ SYSCTL_PROC(parent, nbr, name, access, ptr, arg, handler, fmt, \
+ descr)
+#define SYSCTL_VNET_OPAQUE(parent, nbr, name, access, ptr, len, fmt, \
+ descr) \
+ SYSCTL_OPAQUE(parent, nbr, name, access, ptr, len, fmt, descr)
+#define SYSCTL_VNET_STRING(parent, nbr, name, access, arg, len, descr) \
+ SYSCTL_STRING(parent, nbr, name, access, arg, len, descr)
+#define SYSCTL_VNET_STRUCT(parent, nbr, name, access, ptr, type, descr) \
+ SYSCTL_STRUCT(parent, nbr, name, access, ptr, type, descr)
+#define SYSCTL_VNET_UINT(parent, nbr, name, access, ptr, val, descr) \
+ SYSCTL_UINT(parent, nbr, name, access, ptr, val, descr)
+#define VNET_SYSCTL_ARG(req, arg1)
+#endif /* SYSCTL_OID */
+
+/*
+ * When VIMAGE isn't compiled into the kernel, VNET_SYSINIT/VNET_SYSUNINIT
+ * map into normal sysinits, which have the same ordering properties.
+ */
+#define VNET_SYSINIT(ident, subsystem, order, func, arg) \
+ SYSINIT(ident, subsystem, order, func, arg)
+#define VNET_SYSUNINIT(ident, subsystem, order, func, arg) \
+ SYSUNINIT(ident, subsystem, order, func, arg)
+
+/*
+ * Without VIMAGE revert to the default implementation.
+ */
+#define VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG(tag, name, func, arg, priority) \
+ (tag) = eventhandler_register(NULL, #name, func, arg, priority)
+#define VNET_GLOBAL_EVENTHANDLER_REGISTER(name, func, arg, priority) \
+ eventhandler_register(NULL, #name, func, arg, priority)
+#endif /* VIMAGE */
+#endif /* _KERNEL */
+
+#endif /* !_NET_VNET_HH_ */
diff --git a/freebsd/sys/net/zlib.c b/freebsd/sys/net/zlib.c
new file mode 100644
index 00000000..a7a54740
--- /dev/null
+++ b/freebsd/sys/net/zlib.c
@@ -0,0 +1,5409 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*
+ * This file is derived from various .h and .c files from the zlib-1.0.4
+ * distribution by Jean-loup Gailly and Mark Adler, with some additions
+ * by Paul Mackerras to aid in implementing Deflate compression and
+ * decompression for PPP packets. See zlib.h for conditions of
+ * distribution and use.
+ *
+ * Changes that have been made include:
+ * - added Z_PACKET_FLUSH (see zlib.h for details)
+ * - added inflateIncomp and deflateOutputPending
+ * - allow strm->next_out to be NULL, meaning discard the output
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * ==FILEVERSION 971210==
+ *
+ * This marker is used by the Linux installation script to determine
+ * whether an up-to-date version of this file is already installed.
+ */
+
+#define NO_DUMMY_DECL
+#define NO_ZCFUNCS
+#define MY_ZCALLOC
+
+#if defined(__FreeBSD__) && defined(_KERNEL)
+#define inflate inflate_ppp /* FreeBSD already has an inflate :-( */
+#endif
+
+
+/* +++ zutil.h */
+/*-
+ * zutil.h -- internal interface and configuration of the compression library
+ * Copyright (C) 1995-1996 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+ part of the implementation of the compression library and is
+ subject to change. Applications should only use zlib.h.
+ */
+
+/* From: zutil.h,v 1.16 1996/07/24 13:41:13 me Exp $ */
+
+#ifndef _Z_UTIL_H
+#define _Z_UTIL_H
+
+#ifdef _KERNEL
+#include <freebsd/net/zlib.h>
+#else
+#include <freebsd/local/zlib.h>
+#endif
+
+#ifdef _KERNEL
+/* Assume this is a *BSD or SVR4 kernel */
+#include <freebsd/sys/types.h>
+#include <freebsd/sys/time.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/module.h>
+# define HAVE_MEMCPY
+
+#else
+#if defined(__KERNEL__)
+/* Assume this is a Linux kernel */
+#include <freebsd/linux/string.h>
+#define HAVE_MEMCPY
+
+#else /* not kernel */
+
+#if defined(MSDOS)||defined(VMS)||defined(CRAY)||defined(WIN32)||defined(RISCOS)
+# include <freebsd/stddef.h>
+# include <freebsd/errno.h>
+#else
+ extern int errno;
+#endif
+#ifdef STDC
+# include <freebsd/string.h>
+# include <freebsd/stdlib.h>
+#endif
+#endif /* __KERNEL__ */
+#endif /* _KERNEL */
+
+#ifndef local
+# define local static
+#endif
+/* compile with -Dlocal if your debugger can't find static symbols */
+
+typedef unsigned char uch;
+typedef uch FAR uchf;
+typedef unsigned short ush;
+typedef ush FAR ushf;
+typedef unsigned long ulg;
+
+static const char *z_errmsg[10]; /* indexed by 2-zlib_error */
+/* (size given to avoid silly warnings with Visual C++) */
+
+#define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)]
+
+#define ERR_RETURN(strm,err) \
+ return (strm->msg = (const char*)ERR_MSG(err), (err))
+/* To be used only when the state is known to be valid */
+
+ /* common constants */
+
+#ifndef DEF_WBITS
+# define DEF_WBITS MAX_WBITS
+#endif
+/* default windowBits for decompression. MAX_WBITS is for compression only */
+
+#if MAX_MEM_LEVEL >= 8
+# define DEF_MEM_LEVEL 8
+#else
+# define DEF_MEM_LEVEL MAX_MEM_LEVEL
+#endif
+/* default memLevel */
+
+#define STORED_BLOCK 0
+#define STATIC_TREES 1
+#define DYN_TREES 2
+/* The three kinds of block type */
+
+#define MIN_MATCH 3
+#define MAX_MATCH 258
+/* The minimum and maximum match lengths */
+
+#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */
+
+ /* target dependencies */
+
+#ifdef MSDOS
+# define OS_CODE 0x00
+# ifdef __TURBOC__
+# include <freebsd/alloc.h>
+# else /* MSC or DJGPP */
+# include <freebsd/malloc.h>
+# endif
+#endif
+
+#ifdef OS2
+# define OS_CODE 0x06
+#endif
+
+#ifdef WIN32 /* Window 95 & Windows NT */
+# define OS_CODE 0x0b
+#endif
+
+#if defined(VAXC) || defined(VMS)
+# define OS_CODE 0x02
+# define FOPEN(name, mode) \
+ fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512")
+#endif
+
+#ifdef AMIGA
+# define OS_CODE 0x01
+#endif
+
+#if defined(ATARI) || defined(atarist)
+# define OS_CODE 0x05
+#endif
+
+#ifdef MACOS
+# define OS_CODE 0x07
+#endif
+
+#ifdef __50SERIES /* Prime/PRIMOS */
+# define OS_CODE 0x0F
+#endif
+
+#ifdef TOPS20
+# define OS_CODE 0x0a
+#endif
+
+#if defined(_BEOS_) || defined(RISCOS)
+# define fdopen(fd,mode) NULL /* No fdopen() */
+#endif
+
+ /* Common defaults */
+
+#ifndef OS_CODE
+# define OS_CODE 0x03 /* assume Unix */
+#endif
+
+#ifndef FOPEN
+# define FOPEN(name, mode) fopen((name), (mode))
+#endif
+
+ /* functions */
+
+#ifdef HAVE_STRERROR
+ extern char *strerror OF((int));
+# define zstrerror(errnum) strerror(errnum)
+#else
+# define zstrerror(errnum) ""
+#endif
+
+#if defined(pyr)
+# define NO_MEMCPY
+#endif
+#if (defined(M_I86SM) || defined(M_I86MM)) && !defined(_MSC_VER)
+ /* Use our own functions for small and medium model with MSC <= 5.0.
+ * You may have to use the same strategy for Borland C (untested).
+ */
+# define NO_MEMCPY
+#endif
+#if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY)
+# define HAVE_MEMCPY
+#endif
+#ifdef HAVE_MEMCPY
+# ifdef SMALL_MEDIUM /* MSDOS small or medium model */
+# define zmemcpy _fmemcpy
+# define zmemcmp _fmemcmp
+# define zmemzero(dest, len) _fmemset(dest, 0, len)
+# else
+# define zmemcpy memcpy
+# define zmemcmp memcmp
+# define zmemzero(dest, len) memset(dest, 0, len)
+# endif
+#else
+ extern void zmemcpy OF((Bytef* dest, Bytef* source, uInt len));
+ extern int zmemcmp OF((Bytef* s1, Bytef* s2, uInt len));
+ extern void zmemzero OF((Bytef* dest, uInt len));
+#endif
+
+/* Diagnostic functions */
+#ifdef DEBUG_ZLIB
+# include <freebsd/stdio.h>
+# ifndef verbose
+# define verbose 0
+# endif
+ extern void z_error OF((char *m));
+# define Assert(cond,msg) {if(!(cond)) z_error(msg);}
+# define Trace(x) fprintf x
+# define Tracev(x) {if (verbose) fprintf x ;}
+# define Tracevv(x) {if (verbose>1) fprintf x ;}
+# define Tracec(c,x) {if (verbose && (c)) fprintf x ;}
+# define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;}
+#else
+# define Assert(cond,msg)
+# define Trace(x)
+# define Tracev(x)
+# define Tracevv(x)
+# define Tracec(c,x)
+# define Tracecv(c,x)
+#endif
+
+
+typedef uLong (*check_func) OF((uLong check, const Bytef *buf, uInt len));
+
+voidpf zcalloc OF((voidpf opaque, unsigned items, unsigned size));
+void zcfree OF((voidpf opaque, voidpf ptr));
+
+#define ZALLOC(strm, items, size) \
+ (*((strm)->zalloc))((strm)->opaque, (items), (size))
+#define ZFREE(strm, addr) (*((strm)->zfree))((strm)->opaque, (voidpf)(addr))
+#define TRY_FREE(s, p) {if (p) ZFREE(s, p);}
+
+#endif /* _Z_UTIL_H */
+/* --- zutil.h */
+
+/* +++ deflate.h */
+/* deflate.h -- internal compression state
+ * Copyright (C) 1995-1996 Jean-loup Gailly
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+ part of the implementation of the compression library and is
+ subject to change. Applications should only use zlib.h.
+ */
+
+/* From: deflate.h,v 1.10 1996/07/02 12:41:00 me Exp $ */
+
+#ifndef _DEFLATE_H
+#define _DEFLATE_H
+
+/* #include <freebsd/local/zutil.h> */
+
+/* ===========================================================================
+ * Internal compression state.
+ */
+
+#define LENGTH_CODES 29
+/* number of length codes, not counting the special END_BLOCK code */
+
+#define LITERALS 256
+/* number of literal bytes 0..255 */
+
+#define L_CODES (LITERALS+1+LENGTH_CODES)
+/* number of Literal or Length codes, including the END_BLOCK code */
+
+#define D_CODES 30
+/* number of distance codes */
+
+#define BL_CODES 19
+/* number of codes used to transfer the bit lengths */
+
+#define HEAP_SIZE (2*L_CODES+1)
+/* maximum heap size */
+
+#define MAX_BITS 15
+/* All codes must not exceed MAX_BITS bits */
+
+#define INIT_STATE 42
+#define BUSY_STATE 113
+#define FINISH_STATE 666
+/* Stream status */
+
+
+/* Data structure describing a single value and its code string. */
+typedef struct ct_data_s {
+ union {
+ ush freq; /* frequency count */
+ ush code; /* bit string */
+ } fc;
+ union {
+ ush dad; /* father node in Huffman tree */
+ ush len; /* length of bit string */
+ } dl;
+} FAR ct_data;
+
+#define Freq fc.freq
+#define Code fc.code
+#define Dad dl.dad
+#define Len dl.len
+
+typedef struct static_tree_desc_s static_tree_desc;
+
+typedef struct tree_desc_s {
+ ct_data *dyn_tree; /* the dynamic tree */
+ int max_code; /* largest code with non zero frequency */
+ static_tree_desc *stat_desc; /* the corresponding static tree */
+} FAR tree_desc;
+
+typedef ush Pos;
+typedef Pos FAR Posf;
+typedef unsigned IPos;
+
+/* A Pos is an index in the character window. We use short instead of int to
+ * save space in the various tables. IPos is used only for parameter passing.
+ */
+
+typedef struct deflate_state {
+ z_streamp strm; /* pointer back to this zlib stream */
+ int status; /* as the name implies */
+ Bytef *pending_buf; /* output still pending */
+ ulg pending_buf_size; /* size of pending_buf */
+ Bytef *pending_out; /* next pending byte to output to the stream */
+ int pending; /* nb of bytes in the pending buffer */
+ int noheader; /* suppress zlib header and adler32 */
+ Byte data_type; /* UNKNOWN, BINARY or ASCII */
+ Byte method; /* STORED (for zip only) or DEFLATED */
+ int last_flush; /* value of flush param for previous deflate call */
+
+ /* used by deflate.c: */
+
+ uInt w_size; /* LZ77 window size (32K by default) */
+ uInt w_bits; /* log2(w_size) (8..16) */
+ uInt w_mask; /* w_size - 1 */
+
+ Bytef *window;
+ /* Sliding window. Input bytes are read into the second half of the window,
+ * and move to the first half later to keep a dictionary of at least wSize
+ * bytes. With this organization, matches are limited to a distance of
+ * wSize-MAX_MATCH bytes, but this ensures that IO is always
+ * performed with a length multiple of the block size. Also, it limits
+ * the window size to 64K, which is quite useful on MSDOS.
+ * To do: use the user input buffer as sliding window.
+ */
+
+ ulg window_size;
+ /* Actual size of window: 2*wSize, except when the user input buffer
+ * is directly used as sliding window.
+ */
+
+ Posf *prev;
+ /* Link to older string with same hash index. To limit the size of this
+ * array to 64K, this link is maintained only for the last 32K strings.
+ * An index in this array is thus a window index modulo 32K.
+ */
+
+ Posf *head; /* Heads of the hash chains or NIL. */
+
+ uInt ins_h; /* hash index of string to be inserted */
+ uInt hash_size; /* number of elements in hash table */
+ uInt hash_bits; /* log2(hash_size) */
+ uInt hash_mask; /* hash_size-1 */
+
+ uInt hash_shift;
+ /* Number of bits by which ins_h must be shifted at each input
+ * step. It must be such that after MIN_MATCH steps, the oldest
+ * byte no longer takes part in the hash key, that is:
+ * hash_shift * MIN_MATCH >= hash_bits
+ */
+
+ long block_start;
+ /* Window position at the beginning of the current output block. Gets
+ * negative when the window is moved backwards.
+ */
+
+ uInt match_length; /* length of best match */
+ IPos prev_match; /* previous match */
+ int match_available; /* set if previous match exists */
+ uInt strstart; /* start of string to insert */
+ uInt match_start; /* start of matching string */
+ uInt lookahead; /* number of valid bytes ahead in window */
+
+ uInt prev_length;
+ /* Length of the best match at previous step. Matches not greater than this
+ * are discarded. This is used in the lazy match evaluation.
+ */
+
+ uInt max_chain_length;
+ /* To speed up deflation, hash chains are never searched beyond this
+ * length. A higher limit improves compression ratio but degrades the
+ * speed.
+ */
+
+ uInt max_lazy_match;
+ /* Attempt to find a better match only when the current match is strictly
+ * smaller than this value. This mechanism is used only for compression
+ * levels >= 4.
+ */
+# define max_insert_length max_lazy_match
+ /* Insert new strings in the hash table only if the match length is not
+ * greater than this length. This saves time but degrades compression.
+ * max_insert_length is used only for compression levels <= 3.
+ */
+
+ int level; /* compression level (1..9) */
+ int strategy; /* favor or force Huffman coding*/
+
+ uInt good_match;
+ /* Use a faster search when the previous match is longer than this */
+
+ int nice_match; /* Stop searching when current match exceeds this */
+
+ /* used by trees.c: */
+ /* Didn't use ct_data typedef below to supress compiler warning */
+ struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */
+ struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */
+ struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */
+
+ struct tree_desc_s l_desc; /* desc. for literal tree */
+ struct tree_desc_s d_desc; /* desc. for distance tree */
+ struct tree_desc_s bl_desc; /* desc. for bit length tree */
+
+ ush bl_count[MAX_BITS+1];
+ /* number of codes at each bit length for an optimal tree */
+
+ int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */
+ int heap_len; /* number of elements in the heap */
+ int heap_max; /* element of largest frequency */
+ /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
+ * The same heap array is used to build all trees.
+ */
+
+ uch depth[2*L_CODES+1];
+ /* Depth of each subtree used as tie breaker for trees of equal frequency
+ */
+
+ uchf *l_buf; /* buffer for literals or lengths */
+
+ uInt lit_bufsize;
+ /* Size of match buffer for literals/lengths. There are 4 reasons for
+ * limiting lit_bufsize to 64K:
+ * - frequencies can be kept in 16 bit counters
+ * - if compression is not successful for the first block, all input
+ * data is still in the window so we can still emit a stored block even
+ * when input comes from standard input. (This can also be done for
+ * all blocks if lit_bufsize is not greater than 32K.)
+ * - if compression is not successful for a file smaller than 64K, we can
+ * even emit a stored file instead of a stored block (saving 5 bytes).
+ * This is applicable only for zip (not gzip or zlib).
+ * - creating new Huffman trees less frequently may not provide fast
+ * adaptation to changes in the input data statistics. (Take for
+ * example a binary file with poorly compressible code followed by
+ * a highly compressible string table.) Smaller buffer sizes give
+ * fast adaptation but have of course the overhead of transmitting
+ * trees more frequently.
+ * - I can't count above 4
+ */
+
+ uInt last_lit; /* running index in l_buf */
+
+ ushf *d_buf;
+ /* Buffer for distances. To simplify the code, d_buf and l_buf have
+ * the same number of elements. To use different lengths, an extra flag
+ * array would be necessary.
+ */
+
+ ulg opt_len; /* bit length of current block with optimal trees */
+ ulg static_len; /* bit length of current block with static trees */
+ ulg compressed_len; /* total bit length of compressed file */
+ uInt matches; /* number of string matches in current block */
+ int last_eob_len; /* bit length of EOB code for last block */
+
+#ifdef DEBUG_ZLIB
+ ulg bits_sent; /* bit length of the compressed data */
+#endif
+
+ ush bi_buf;
+ /* Output buffer. bits are inserted starting at the bottom (least
+ * significant bits).
+ */
+ int bi_valid;
+ /* Number of valid bits in bi_buf. All bits above the last valid bit
+ * are always zero.
+ */
+
+} FAR deflate_state;
+
+/* Output a byte on the stream.
+ * IN assertion: there is enough room in pending_buf.
+ */
+#define put_byte(s, c) {s->pending_buf[s->pending++] = (c);}
+
+
+#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
+/* Minimum amount of lookahead, except at the end of the input file.
+ * See deflate.c for comments about the MIN_MATCH+1.
+ */
+
+#define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD)
+/* In order to simplify the code, particularly on 16 bit machines, match
+ * distances are limited to MAX_DIST instead of WSIZE.
+ */
+
+ /* in trees.c */
+void _tr_init OF((deflate_state *s));
+int _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc));
+ulg _tr_flush_block OF((deflate_state *s, charf *buf, ulg stored_len,
+ int eof));
+void _tr_align OF((deflate_state *s));
+void _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len,
+ int eof));
+void _tr_stored_type_only OF((deflate_state *));
+
+#endif
+/* --- deflate.h */
+
+/* +++ deflate.c */
+/* deflate.c -- compress data using the deflation algorithm
+ * Copyright (C) 1995-1996 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/*
+ * ALGORITHM
+ *
+ * The "deflation" process depends on being able to identify portions
+ * of the input text which are identical to earlier input (within a
+ * sliding window trailing behind the input currently being processed).
+ *
+ * The most straightforward technique turns out to be the fastest for
+ * most input files: try all possible matches and select the longest.
+ * The key feature of this algorithm is that insertions into the string
+ * dictionary are very simple and thus fast, and deletions are avoided
+ * completely. Insertions are performed at each input character, whereas
+ * string matches are performed only when the previous match ends. So it
+ * is preferable to spend more time in matches to allow very fast string
+ * insertions and avoid deletions. The matching algorithm for small
+ * strings is inspired from that of Rabin & Karp. A brute force approach
+ * is used to find longer strings when a small match has been found.
+ * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze
+ * (by Leonid Broukhis).
+ * A previous version of this file used a more sophisticated algorithm
+ * (by Fiala and Greene) which is guaranteed to run in linear amortized
+ * time, but has a larger average cost, uses more memory and is patented.
+ * However the F&G algorithm may be faster for some highly redundant
+ * files if the parameter max_chain_length (described below) is too large.
+ *
+ * ACKNOWLEDGEMENTS
+ *
+ * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and
+ * I found it in 'freeze' written by Leonid Broukhis.
+ * Thanks to many people for bug reports and testing.
+ *
+ * REFERENCES
+ *
+ * Deutsch, L.P.,"DEFLATE Compressed Data Format Specification".
+ * Available in ftp://ds.internic.net/rfc/rfc1951.txt
+ *
+ * A description of the Rabin and Karp algorithm is given in the book
+ * "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
+ *
+ * Fiala,E.R., and Greene,D.H.
+ * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595
+ *
+ */
+
+/* From: deflate.c,v 1.15 1996/07/24 13:40:58 me Exp $ */
+
+/* #include <freebsd/local/deflate.h> */
+
+char deflate_copyright[] = " deflate 1.0.4 Copyright 1995-1996 Jean-loup Gailly ";
+/*
+ If you use the zlib library in a product, an acknowledgment is welcome
+ in the documentation of your product. If for some reason you cannot
+ include such an acknowledgment, I would appreciate that you keep this
+ copyright string in the executable of your product.
+ */
+
+/* ===========================================================================
+ * Function prototypes.
+ */
+typedef enum {
+ need_more, /* block not completed, need more input or more output */
+ block_done, /* block flush performed */
+ finish_started, /* finish started, need only more output at next deflate */
+ finish_done /* finish done, accept no more input or output */
+} block_state;
+
+typedef block_state (*compress_func) OF((deflate_state *s, int flush));
+/* Compression function. Returns the block state after the call. */
+
+local void fill_window OF((deflate_state *s));
+local block_state deflate_stored OF((deflate_state *s, int flush));
+local block_state deflate_fast OF((deflate_state *s, int flush));
+local block_state deflate_slow OF((deflate_state *s, int flush));
+local void lm_init OF((deflate_state *s));
+local void putShortMSB OF((deflate_state *s, uInt b));
+local void flush_pending OF((z_streamp strm));
+local int read_buf OF((z_streamp strm, charf *buf, unsigned size));
+#ifdef ASMV
+ void match_init OF((void)); /* asm code initialization */
+ uInt longest_match OF((deflate_state *s, IPos cur_match));
+#else
+local uInt longest_match OF((deflate_state *s, IPos cur_match));
+#endif
+
+#ifdef DEBUG_ZLIB
+local void check_match OF((deflate_state *s, IPos start, IPos match,
+ int length));
+#endif
+
+/* ===========================================================================
+ * Local data
+ */
+
+#define NIL 0
+/* Tail of hash chains */
+
+#ifndef TOO_FAR
+# define TOO_FAR 4096
+#endif
+/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
+
+#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
+/* Minimum amount of lookahead, except at the end of the input file.
+ * See deflate.c for comments about the MIN_MATCH+1.
+ */
+
+/* Values for max_lazy_match, good_match and max_chain_length, depending on
+ * the desired pack level (0..9). The values given below have been tuned to
+ * exclude worst case performance for pathological files. Better values may be
+ * found for specific files.
+ */
+typedef struct config_s {
+ ush good_length; /* reduce lazy search above this match length */
+ ush max_lazy; /* do not perform lazy search above this match length */
+ ush nice_length; /* quit search above this match length */
+ ush max_chain;
+ compress_func func;
+} config;
+
+local config configuration_table[10] = {
+/* good lazy nice chain */
+/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */
+/* 1 */ {4, 4, 8, 4, deflate_fast}, /* maximum speed, no lazy matches */
+/* 2 */ {4, 5, 16, 8, deflate_fast},
+/* 3 */ {4, 6, 32, 32, deflate_fast},
+
+/* 4 */ {4, 4, 16, 16, deflate_slow}, /* lazy matches */
+/* 5 */ {8, 16, 32, 32, deflate_slow},
+/* 6 */ {8, 16, 128, 128, deflate_slow},
+/* 7 */ {8, 32, 128, 256, deflate_slow},
+/* 8 */ {32, 128, 258, 1024, deflate_slow},
+/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* maximum compression */
+
+/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4
+ * For deflate_fast() (levels <= 3) good is ignored and lazy has a different
+ * meaning.
+ */
+
+#define EQUAL 0
+/* result of memcmp for equal strings */
+
+#ifndef NO_DUMMY_DECL
+struct static_tree_desc_s {int dummy;}; /* for buggy compilers */
+#endif
+
+/* ===========================================================================
+ * Update a hash value with the given input byte
+ * IN assertion: all calls to to UPDATE_HASH are made with consecutive
+ * input characters, so that a running hash key can be computed from the
+ * previous key instead of complete recalculation each time.
+ */
+#define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask)
+
+
+/* ===========================================================================
+ * Insert string str in the dictionary and set match_head to the previous head
+ * of the hash chain (the most recent string with same hash key). Return
+ * the previous length of the hash chain.
+ * IN assertion: all calls to to INSERT_STRING are made with consecutive
+ * input characters and the first MIN_MATCH bytes of str are valid
+ * (except for the last MIN_MATCH-1 bytes of the input file).
+ */
+#define INSERT_STRING(s, str, match_head) \
+ (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \
+ s->prev[(str) & s->w_mask] = match_head = s->head[s->ins_h], \
+ s->head[s->ins_h] = (Pos)(str))
+
+/* ===========================================================================
+ * Initialize the hash table (avoiding 64K overflow for 16 bit systems).
+ * prev[] will be initialized on the fly.
+ */
+#define CLEAR_HASH(s) \
+ s->head[s->hash_size-1] = NIL; \
+ zmemzero((charf *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head));
+
+/* ========================================================================= */
+int deflateInit_(strm, level, version, stream_size)
+ z_streamp strm;
+ int level;
+ const char *version;
+ int stream_size;
+{
+ return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL,
+ Z_DEFAULT_STRATEGY, version, stream_size);
+ /* To do: ignore strm->next_in if we use it as window */
+}
+
+/* ========================================================================= */
+int deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
+ version, stream_size)
+ z_streamp strm;
+ int level;
+ int method;
+ int windowBits;
+ int memLevel;
+ int strategy;
+ const char *version;
+ int stream_size;
+{
+ deflate_state *s;
+ int noheader = 0;
+ static char* my_version = ZLIB_VERSION;
+
+ ushf *overlay;
+ /* We overlay pending_buf and d_buf+l_buf. This works since the average
+ * output size for (length,distance) codes is <= 24 bits.
+ */
+
+ if (version == Z_NULL || version[0] != my_version[0] ||
+ stream_size != sizeof(z_stream)) {
+ return Z_VERSION_ERROR;
+ }
+ if (strm == Z_NULL) return Z_STREAM_ERROR;
+
+ strm->msg = Z_NULL;
+#ifndef NO_ZCFUNCS
+ if (strm->zalloc == Z_NULL) {
+ strm->zalloc = zcalloc;
+ strm->opaque = (voidpf)0;
+ }
+ if (strm->zfree == Z_NULL) strm->zfree = zcfree;
+#endif
+
+ if (level == Z_DEFAULT_COMPRESSION) level = 6;
+
+ if (windowBits < 0) { /* undocumented feature: suppress zlib header */
+ noheader = 1;
+ windowBits = -windowBits;
+ }
+ if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED ||
+ windowBits < 9 || windowBits > 15 || level < 0 || level > 9 ||
+ strategy < 0 || strategy > Z_HUFFMAN_ONLY) {
+ return Z_STREAM_ERROR;
+ }
+ s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state));
+ if (s == Z_NULL) return Z_MEM_ERROR;
+ strm->state = (struct internal_state FAR *)s;
+ s->strm = strm;
+
+ s->noheader = noheader;
+ s->w_bits = windowBits;
+ s->w_size = 1 << s->w_bits;
+ s->w_mask = s->w_size - 1;
+
+ s->hash_bits = memLevel + 7;
+ s->hash_size = 1 << s->hash_bits;
+ s->hash_mask = s->hash_size - 1;
+ s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH);
+
+ s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte));
+ s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos));
+ s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos));
+
+ s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */
+
+ overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2);
+ s->pending_buf = (uchf *) overlay;
+ s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L);
+
+ if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL ||
+ s->pending_buf == Z_NULL) {
+ strm->msg = (const char*)ERR_MSG(Z_MEM_ERROR);
+ deflateEnd (strm);
+ return Z_MEM_ERROR;
+ }
+ s->d_buf = overlay + s->lit_bufsize/sizeof(ush);
+ s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize;
+
+ s->level = level;
+ s->strategy = strategy;
+ s->method = (Byte)method;
+
+ return deflateReset(strm);
+}
+
+/* ========================================================================= */
+int deflateSetDictionary (strm, dictionary, dictLength)
+ z_streamp strm;
+ const Bytef *dictionary;
+ uInt dictLength;
+{
+ deflate_state *s;
+ uInt length = dictLength;
+ uInt n;
+ IPos hash_head = 0;
+
+ if (strm == Z_NULL || strm->state == Z_NULL || dictionary == Z_NULL)
+ return Z_STREAM_ERROR;
+
+ s = (deflate_state *) strm->state;
+ if (s->status != INIT_STATE) return Z_STREAM_ERROR;
+
+ strm->adler = adler32(strm->adler, dictionary, dictLength);
+
+ if (length < MIN_MATCH) return Z_OK;
+ if (length > MAX_DIST(s)) {
+ length = MAX_DIST(s);
+#ifndef USE_DICT_HEAD
+ dictionary += dictLength - length; /* use the tail of the dictionary */
+#endif
+ }
+ zmemcpy((charf *)s->window, dictionary, length);
+ s->strstart = length;
+ s->block_start = (long)length;
+
+ /* Insert all strings in the hash table (except for the last two bytes).
+ * s->lookahead stays null, so s->ins_h will be recomputed at the next
+ * call of fill_window.
+ */
+ s->ins_h = s->window[0];
+ UPDATE_HASH(s, s->ins_h, s->window[1]);
+ for (n = 0; n <= length - MIN_MATCH; n++) {
+ INSERT_STRING(s, n, hash_head);
+ }
+ if (hash_head) hash_head = 0; /* to make compiler happy */
+ return Z_OK;
+}
+
+/* ========================================================================= */
+int deflateReset (strm)
+ z_streamp strm;
+{
+ deflate_state *s;
+
+ if (strm == Z_NULL || strm->state == Z_NULL ||
+ strm->zalloc == Z_NULL || strm->zfree == Z_NULL) return Z_STREAM_ERROR;
+
+ strm->total_in = strm->total_out = 0;
+ strm->msg = Z_NULL; /* use zfree if we ever allocate msg dynamically */
+ strm->data_type = Z_UNKNOWN;
+
+ s = (deflate_state *)strm->state;
+ s->pending = 0;
+ s->pending_out = s->pending_buf;
+
+ if (s->noheader < 0) {
+ s->noheader = 0; /* was set to -1 by deflate(..., Z_FINISH); */
+ }
+ s->status = s->noheader ? BUSY_STATE : INIT_STATE;
+ strm->adler = 1;
+ s->last_flush = Z_NO_FLUSH;
+
+ _tr_init(s);
+ lm_init(s);
+
+ return Z_OK;
+}
+
+/* ========================================================================= */
+int deflateParams(strm, level, strategy)
+ z_streamp strm;
+ int level;
+ int strategy;
+{
+ deflate_state *s;
+ compress_func func;
+ int err = Z_OK;
+
+ if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+ s = (deflate_state *) strm->state;
+
+ if (level == Z_DEFAULT_COMPRESSION) {
+ level = 6;
+ }
+ if (level < 0 || level > 9 || strategy < 0 || strategy > Z_HUFFMAN_ONLY) {
+ return Z_STREAM_ERROR;
+ }
+ func = configuration_table[s->level].func;
+
+ if (func != configuration_table[level].func && strm->total_in != 0) {
+ /* Flush the last buffer: */
+ err = deflate(strm, Z_PARTIAL_FLUSH);
+ }
+ if (s->level != level) {
+ s->level = level;
+ s->max_lazy_match = configuration_table[level].max_lazy;
+ s->good_match = configuration_table[level].good_length;
+ s->nice_match = configuration_table[level].nice_length;
+ s->max_chain_length = configuration_table[level].max_chain;
+ }
+ s->strategy = strategy;
+ return err;
+}
+
+/* =========================================================================
+ * Put a short in the pending buffer. The 16-bit value is put in MSB order.
+ * IN assertion: the stream state is correct and there is enough room in
+ * pending_buf.
+ */
+local void putShortMSB (s, b)
+ deflate_state *s;
+ uInt b;
+{
+ put_byte(s, (Byte)(b >> 8));
+ put_byte(s, (Byte)(b & 0xff));
+}
+
+/* =========================================================================
+ * Flush as much pending output as possible. All deflate() output goes
+ * through this function so some applications may wish to modify it
+ * to avoid allocating a large strm->next_out buffer and copying into it.
+ * (See also read_buf()).
+ */
+local void flush_pending(strm)
+ z_streamp strm;
+{
+ deflate_state *s = (deflate_state *) strm->state;
+ unsigned len = s->pending;
+
+ if (len > strm->avail_out) len = strm->avail_out;
+ if (len == 0) return;
+
+ if (strm->next_out != Z_NULL) {
+ zmemcpy(strm->next_out, s->pending_out, len);
+ strm->next_out += len;
+ }
+ s->pending_out += len;
+ strm->total_out += len;
+ strm->avail_out -= len;
+ s->pending -= len;
+ if (s->pending == 0) {
+ s->pending_out = s->pending_buf;
+ }
+}
+
+/* ========================================================================= */
+int deflate (strm, flush)
+ z_streamp strm;
+ int flush;
+{
+ int old_flush; /* value of flush param for previous deflate call */
+ deflate_state *s;
+
+ if (strm == Z_NULL || strm->state == Z_NULL ||
+ flush > Z_FINISH || flush < 0) {
+ return Z_STREAM_ERROR;
+ }
+ s = (deflate_state *) strm->state;
+
+ if ((strm->next_in == Z_NULL && strm->avail_in != 0) ||
+ (s->status == FINISH_STATE && flush != Z_FINISH)) {
+ ERR_RETURN(strm, Z_STREAM_ERROR);
+ }
+ if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR);
+
+ s->strm = strm; /* just in case */
+ old_flush = s->last_flush;
+ s->last_flush = flush;
+
+ /* Write the zlib header */
+ if (s->status == INIT_STATE) {
+
+ uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8;
+ uInt level_flags = (s->level-1) >> 1;
+
+ if (level_flags > 3) level_flags = 3;
+ header |= (level_flags << 6);
+ if (s->strstart != 0) header |= PRESET_DICT;
+ header += 31 - (header % 31);
+
+ s->status = BUSY_STATE;
+ putShortMSB(s, header);
+
+ /* Save the adler32 of the preset dictionary: */
+ if (s->strstart != 0) {
+ putShortMSB(s, (uInt)(strm->adler >> 16));
+ putShortMSB(s, (uInt)(strm->adler & 0xffff));
+ }
+ strm->adler = 1L;
+ }
+
+ /* Flush as much pending output as possible */
+ if (s->pending != 0) {
+ flush_pending(strm);
+ if (strm->avail_out == 0) {
+ /* Since avail_out is 0, deflate will be called again with
+ * more output space, but possibly with both pending and
+ * avail_in equal to zero. There won't be anything to do,
+ * but this is not an error situation so make sure we
+ * return OK instead of BUF_ERROR at next call of deflate:
+ */
+ s->last_flush = -1;
+ return Z_OK;
+ }
+
+ /* Make sure there is something to do and avoid duplicate consecutive
+ * flushes. For repeated and useless calls with Z_FINISH, we keep
+ * returning Z_STREAM_END instead of Z_BUFF_ERROR.
+ */
+ } else if (strm->avail_in == 0 && flush <= old_flush &&
+ flush != Z_FINISH) {
+ ERR_RETURN(strm, Z_BUF_ERROR);
+ }
+
+ /* User must not provide more input after the first FINISH: */
+ if (s->status == FINISH_STATE && strm->avail_in != 0) {
+ ERR_RETURN(strm, Z_BUF_ERROR);
+ }
+
+ /* Start a new block or continue the current one.
+ */
+ if (strm->avail_in != 0 || s->lookahead != 0 ||
+ (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) {
+ block_state bstate;
+
+ bstate = (*(configuration_table[s->level].func))(s, flush);
+
+ if (bstate == finish_started || bstate == finish_done) {
+ s->status = FINISH_STATE;
+ }
+ if (bstate == need_more || bstate == finish_started) {
+ if (strm->avail_out == 0) {
+ s->last_flush = -1; /* avoid BUF_ERROR next call, see above */
+ }
+ return Z_OK;
+ /* If flush != Z_NO_FLUSH && avail_out == 0, the next call
+ * of deflate should use the same flush parameter to make sure
+ * that the flush is complete. So we don't have to output an
+ * empty block here, this will be done at next call. This also
+ * ensures that for a very small output buffer, we emit at most
+ * one empty block.
+ */
+ }
+ if (bstate == block_done) {
+ if (flush == Z_PARTIAL_FLUSH) {
+ _tr_align(s);
+ } else if (flush == Z_PACKET_FLUSH) {
+ /* Output just the 3-bit `stored' block type value,
+ but not a zero length. */
+ _tr_stored_type_only(s);
+ } else { /* FULL_FLUSH or SYNC_FLUSH */
+ _tr_stored_block(s, (char*)0, 0L, 0);
+ /* For a full flush, this empty block will be recognized
+ * as a special marker by inflate_sync().
+ */
+ if (flush == Z_FULL_FLUSH) {
+ CLEAR_HASH(s); /* forget history */
+ }
+ }
+ flush_pending(strm);
+ if (strm->avail_out == 0) {
+ s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */
+ return Z_OK;
+ }
+ }
+ }
+ Assert(strm->avail_out > 0, "bug2");
+
+ if (flush != Z_FINISH) return Z_OK;
+ if (s->noheader) return Z_STREAM_END;
+
+ /* Write the zlib trailer (adler32) */
+ putShortMSB(s, (uInt)(strm->adler >> 16));
+ putShortMSB(s, (uInt)(strm->adler & 0xffff));
+ flush_pending(strm);
+ /* If avail_out is zero, the application will call deflate again
+ * to flush the rest.
+ */
+ s->noheader = -1; /* write the trailer only once! */
+ return s->pending != 0 ? Z_OK : Z_STREAM_END;
+}
+
+/* ========================================================================= */
+int deflateEnd (strm)
+ z_streamp strm;
+{
+ int status;
+ deflate_state *s;
+
+ if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+ s = (deflate_state *) strm->state;
+
+ status = s->status;
+ if (status != INIT_STATE && status != BUSY_STATE &&
+ status != FINISH_STATE) {
+ return Z_STREAM_ERROR;
+ }
+
+ /* Deallocate in reverse order of allocations: */
+ TRY_FREE(strm, s->pending_buf);
+ TRY_FREE(strm, s->head);
+ TRY_FREE(strm, s->prev);
+ TRY_FREE(strm, s->window);
+
+ ZFREE(strm, s);
+ strm->state = Z_NULL;
+
+ return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK;
+}
+
+/* =========================================================================
+ * Copy the source state to the destination state.
+ */
+int deflateCopy (dest, source)
+ z_streamp dest;
+ z_streamp source;
+{
+ deflate_state *ds;
+ deflate_state *ss;
+ ushf *overlay;
+
+ if (source == Z_NULL || dest == Z_NULL || source->state == Z_NULL)
+ return Z_STREAM_ERROR;
+ ss = (deflate_state *) source->state;
+
+ zmemcpy(dest, source, sizeof(*dest));
+
+ ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state));
+ if (ds == Z_NULL) return Z_MEM_ERROR;
+ dest->state = (struct internal_state FAR *) ds;
+ zmemcpy(ds, ss, sizeof(*ds));
+ ds->strm = dest;
+
+ ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte));
+ ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos));
+ ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos));
+ overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2);
+ ds->pending_buf = (uchf *) overlay;
+
+ if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL ||
+ ds->pending_buf == Z_NULL) {
+ deflateEnd (dest);
+ return Z_MEM_ERROR;
+ }
+ /* ??? following zmemcpy doesn't work for 16-bit MSDOS */
+ zmemcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte));
+ zmemcpy(ds->prev, ss->prev, ds->w_size * sizeof(Pos));
+ zmemcpy(ds->head, ss->head, ds->hash_size * sizeof(Pos));
+ zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);
+
+ ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
+ ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush);
+ ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize;
+
+ ds->l_desc.dyn_tree = ds->dyn_ltree;
+ ds->d_desc.dyn_tree = ds->dyn_dtree;
+ ds->bl_desc.dyn_tree = ds->bl_tree;
+
+ return Z_OK;
+}
+
+/* ===========================================================================
+ * Return the number of bytes of output which are immediately available
+ * for output from the decompressor.
+ */
+int deflateOutputPending (strm)
+ z_streamp strm;
+{
+ if (strm == Z_NULL || strm->state == Z_NULL) return 0;
+
+ return ((deflate_state *)(strm->state))->pending;
+}
+
+/* ===========================================================================
+ * Read a new buffer from the current input stream, update the adler32
+ * and total number of bytes read. All deflate() input goes through
+ * this function so some applications may wish to modify it to avoid
+ * allocating a large strm->next_in buffer and copying from it.
+ * (See also flush_pending()).
+ */
+local int read_buf(strm, buf, size)
+ z_streamp strm;
+ charf *buf;
+ unsigned size;
+{
+ unsigned len = strm->avail_in;
+
+ if (len > size) len = size;
+ if (len == 0) return 0;
+
+ strm->avail_in -= len;
+
+ if (!((deflate_state *)(strm->state))->noheader) {
+ strm->adler = adler32(strm->adler, strm->next_in, len);
+ }
+ zmemcpy(buf, strm->next_in, len);
+ strm->next_in += len;
+ strm->total_in += len;
+
+ return (int)len;
+}
+
+/* ===========================================================================
+ * Initialize the "longest match" routines for a new zlib stream
+ */
+local void lm_init (s)
+ deflate_state *s;
+{
+ s->window_size = (ulg)2L*s->w_size;
+
+ CLEAR_HASH(s);
+
+ /* Set the default configuration parameters:
+ */
+ s->max_lazy_match = configuration_table[s->level].max_lazy;
+ s->good_match = configuration_table[s->level].good_length;
+ s->nice_match = configuration_table[s->level].nice_length;
+ s->max_chain_length = configuration_table[s->level].max_chain;
+
+ s->strstart = 0;
+ s->block_start = 0L;
+ s->lookahead = 0;
+ s->match_length = s->prev_length = MIN_MATCH-1;
+ s->match_available = 0;
+ s->ins_h = 0;
+#ifdef ASMV
+ match_init(); /* initialize the asm code */
+#endif
+}
+
+/* ===========================================================================
+ * Set match_start to the longest match starting at the given string and
+ * return its length. Matches shorter or equal to prev_length are discarded,
+ * in which case the result is equal to prev_length and match_start is
+ * garbage.
+ * IN assertions: cur_match is the head of the hash chain for the current
+ * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1
+ * OUT assertion: the match length is not greater than s->lookahead.
+ */
+#ifndef ASMV
+/* For 80x86 and 680x0, an optimized version will be provided in match.asm or
+ * match.S. The code will be functionally equivalent.
+ */
+local uInt longest_match(s, cur_match)
+ deflate_state *s;
+ IPos cur_match; /* current match */
+{
+ unsigned chain_length = s->max_chain_length;/* max hash chain length */
+ register Bytef *scan = s->window + s->strstart; /* current string */
+ register Bytef *match; /* matched string */
+ register int len; /* length of current match */
+ int best_len = s->prev_length; /* best match length so far */
+ int nice_match = s->nice_match; /* stop if match long enough */
+ IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
+ s->strstart - (IPos)MAX_DIST(s) : NIL;
+ /* Stop when cur_match becomes <= limit. To simplify the code,
+ * we prevent matches with the string of window index 0.
+ */
+ Posf *prev = s->prev;
+ uInt wmask = s->w_mask;
+
+#ifdef UNALIGNED_OK
+ /* Compare two bytes at a time. Note: this is not always beneficial.
+ * Try with and without -DUNALIGNED_OK to check.
+ */
+ register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1;
+ register ush scan_start = *(ushf*)scan;
+ register ush scan_end = *(ushf*)(scan+best_len-1);
+#else
+ register Bytef *strend = s->window + s->strstart + MAX_MATCH;
+ register Byte scan_end1 = scan[best_len-1];
+ register Byte scan_end = scan[best_len];
+#endif
+
+ /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
+ * It is easy to get rid of this optimization if necessary.
+ */
+ Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
+
+ /* Do not waste too much time if we already have a good match: */
+ if (s->prev_length >= s->good_match) {
+ chain_length >>= 2;
+ }
+ /* Do not look for matches beyond the end of the input. This is necessary
+ * to make deflate deterministic.
+ */
+ if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
+
+ Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
+
+ do {
+ Assert(cur_match < s->strstart, "no future");
+ match = s->window + cur_match;
+
+ /* Skip to next match if the match length cannot increase
+ * or if the match length is less than 2:
+ */
+#if (defined(UNALIGNED_OK) && MAX_MATCH == 258)
+ /* This code assumes sizeof(unsigned short) == 2. Do not use
+ * UNALIGNED_OK if your compiler uses a different size.
+ */
+ if (*(ushf*)(match+best_len-1) != scan_end ||
+ *(ushf*)match != scan_start) continue;
+
+ /* It is not necessary to compare scan[2] and match[2] since they are
+ * always equal when the other bytes match, given that the hash keys
+ * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at
+ * strstart+3, +5, ... up to strstart+257. We check for insufficient
+ * lookahead only every 4th comparison; the 128th check will be made
+ * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is
+ * necessary to put more guard bytes at the end of the window, or
+ * to check more often for insufficient lookahead.
+ */
+ Assert(scan[2] == match[2], "scan[2]?");
+ scan++, match++;
+ do {
+ } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+ *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+ *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+ *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+ scan < strend);
+ /* The funny "do {}" generates better code on most compilers */
+
+ /* Here, scan <= window+strstart+257 */
+ Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
+ if (*scan == *match) scan++;
+
+ len = (MAX_MATCH - 1) - (int)(strend-scan);
+ scan = strend - (MAX_MATCH-1);
+
+#else /* UNALIGNED_OK */
+
+ if (match[best_len] != scan_end ||
+ match[best_len-1] != scan_end1 ||
+ *match != *scan ||
+ *++match != scan[1]) continue;
+
+ /* The check at best_len-1 can be removed because it will be made
+ * again later. (This heuristic is not always a win.)
+ * It is not necessary to compare scan[2] and match[2] since they
+ * are always equal when the other bytes match, given that
+ * the hash keys are equal and that HASH_BITS >= 8.
+ */
+ scan += 2, match++;
+ Assert(*scan == *match, "match[2]?");
+
+ /* We check for insufficient lookahead only every 8th comparison;
+ * the 256th check will be made at strstart+258.
+ */
+ do {
+ } while (*++scan == *++match && *++scan == *++match &&
+ *++scan == *++match && *++scan == *++match &&
+ *++scan == *++match && *++scan == *++match &&
+ *++scan == *++match && *++scan == *++match &&
+ scan < strend);
+
+ Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
+
+ len = MAX_MATCH - (int)(strend - scan);
+ scan = strend - MAX_MATCH;
+
+#endif /* UNALIGNED_OK */
+
+ if (len > best_len) {
+ s->match_start = cur_match;
+ best_len = len;
+ if (len >= nice_match) break;
+#ifdef UNALIGNED_OK
+ scan_end = *(ushf*)(scan+best_len-1);
+#else
+ scan_end1 = scan[best_len-1];
+ scan_end = scan[best_len];
+#endif
+ }
+ } while ((cur_match = prev[cur_match & wmask]) > limit
+ && --chain_length != 0);
+
+ if ((uInt)best_len <= s->lookahead) return best_len;
+ return s->lookahead;
+}
+#endif /* ASMV */
+
+#ifdef DEBUG_ZLIB
+/* ===========================================================================
+ * Check that the match at match_start is indeed a match.
+ */
+local void check_match(s, start, match, length)
+ deflate_state *s;
+ IPos start, match;
+ int length;
+{
+ /* check that the match is indeed a match */
+ if (zmemcmp((charf *)s->window + match,
+ (charf *)s->window + start, length) != EQUAL) {
+ fprintf(stderr, " start %u, match %u, length %d\n",
+ start, match, length);
+ do {
+ fprintf(stderr, "%c%c", s->window[match++], s->window[start++]);
+ } while (--length != 0);
+ z_error("invalid match");
+ }
+ if (z_verbose > 1) {
+ fprintf(stderr,"\\[%d,%d]", start-match, length);
+ do { putc(s->window[start++], stderr); } while (--length != 0);
+ }
+}
+#else
+# define check_match(s, start, match, length)
+#endif
+
+/* ===========================================================================
+ * Fill the window when the lookahead becomes insufficient.
+ * Updates strstart and lookahead.
+ *
+ * IN assertion: lookahead < MIN_LOOKAHEAD
+ * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD
+ * At least one byte has been read, or avail_in == 0; reads are
+ * performed for at least two bytes (required for the zip translate_eol
+ * option -- not supported here).
+ */
+local void fill_window(s)
+ deflate_state *s;
+{
+ register unsigned n, m;
+ register Posf *p;
+ unsigned more; /* Amount of free space at the end of the window. */
+ uInt wsize = s->w_size;
+
+ do {
+ more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart);
+
+ /* Deal with !@#$% 64K limit: */
+ if (more == 0 && s->strstart == 0 && s->lookahead == 0) {
+ more = wsize;
+
+ } else if (more == (unsigned)(-1)) {
+ /* Very unlikely, but possible on 16 bit machine if strstart == 0
+ * and lookahead == 1 (input done one byte at time)
+ */
+ more--;
+
+ /* If the window is almost full and there is insufficient lookahead,
+ * move the upper half to the lower one to make room in the upper half.
+ */
+ } else if (s->strstart >= wsize+MAX_DIST(s)) {
+
+ zmemcpy((charf *)s->window, (charf *)s->window+wsize,
+ (unsigned)wsize);
+ s->match_start -= wsize;
+ s->strstart -= wsize; /* we now have strstart >= MAX_DIST */
+ s->block_start -= (long) wsize;
+
+ /* Slide the hash table (could be avoided with 32 bit values
+ at the expense of memory usage). We slide even when level == 0
+ to keep the hash table consistent if we switch back to level > 0
+ later. (Using level 0 permanently is not an optimal usage of
+ zlib, so we don't care about this pathological case.)
+ */
+ n = s->hash_size;
+ p = &s->head[n];
+ do {
+ m = *--p;
+ *p = (Pos)(m >= wsize ? m-wsize : NIL);
+ } while (--n);
+
+ n = wsize;
+ p = &s->prev[n];
+ do {
+ m = *--p;
+ *p = (Pos)(m >= wsize ? m-wsize : NIL);
+ /* If n is not on any hash chain, prev[n] is garbage but
+ * its value will never be used.
+ */
+ } while (--n);
+ more += wsize;
+ }
+ if (s->strm->avail_in == 0) return;
+
+ /* If there was no sliding:
+ * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
+ * more == window_size - lookahead - strstart
+ * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
+ * => more >= window_size - 2*WSIZE + 2
+ * In the BIG_MEM or MMAP case (not yet supported),
+ * window_size == input_size + MIN_LOOKAHEAD &&
+ * strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
+ * Otherwise, window_size == 2*WSIZE so more >= 2.
+ * If there was sliding, more >= WSIZE. So in all cases, more >= 2.
+ */
+ Assert(more >= 2, "more < 2");
+
+ n = read_buf(s->strm, (charf *)s->window + s->strstart + s->lookahead,
+ more);
+ s->lookahead += n;
+
+ /* Initialize the hash value now that we have some input: */
+ if (s->lookahead >= MIN_MATCH) {
+ s->ins_h = s->window[s->strstart];
+ UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]);
+#if MIN_MATCH != 3
+ Call UPDATE_HASH() MIN_MATCH-3 more times
+#endif
+ }
+ /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
+ * but this is not important since only literal bytes will be emitted.
+ */
+
+ } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
+}
+
+/* ===========================================================================
+ * Flush the current block, with given end-of-file flag.
+ * IN assertion: strstart is set to the end of the current match.
+ */
+#define FLUSH_BLOCK_ONLY(s, eof) { \
+ _tr_flush_block(s, (s->block_start >= 0L ? \
+ (charf *)&s->window[(unsigned)s->block_start] : \
+ (charf *)Z_NULL), \
+ (ulg)((long)s->strstart - s->block_start), \
+ (eof)); \
+ s->block_start = s->strstart; \
+ flush_pending(s->strm); \
+ Tracev((stderr,"[FLUSH]")); \
+}
+
+/* Same but force premature exit if necessary. */
+#define FLUSH_BLOCK(s, eof) { \
+ FLUSH_BLOCK_ONLY(s, eof); \
+ if (s->strm->avail_out == 0) return (eof) ? finish_started : need_more; \
+}
+
+/* ===========================================================================
+ * Copy without compression as much as possible from the input stream, return
+ * the current block state.
+ * This function does not insert new strings in the dictionary since
+ * uncompressible data is probably not useful. This function is used
+ * only for the level=0 compression option.
+ * NOTE: this function should be optimized to avoid extra copying from
+ * window to pending_buf.
+ */
+local block_state deflate_stored(s, flush)
+ deflate_state *s;
+ int flush;
+{
+ /* Stored blocks are limited to 0xffff bytes, pending_buf is limited
+ * to pending_buf_size, and each stored block has a 5 byte header:
+ */
+ ulg max_block_size = 0xffff;
+ ulg max_start;
+
+ if (max_block_size > s->pending_buf_size - 5) {
+ max_block_size = s->pending_buf_size - 5;
+ }
+
+ /* Copy as much as possible from input to output: */
+ for (;;) {
+ /* Fill the window as much as possible: */
+ if (s->lookahead <= 1) {
+
+ Assert(s->strstart < s->w_size+MAX_DIST(s) ||
+ s->block_start >= (long)s->w_size, "slide too late");
+
+ fill_window(s);
+ if (s->lookahead == 0 && flush == Z_NO_FLUSH) return need_more;
+
+ if (s->lookahead == 0) break; /* flush the current block */
+ }
+ Assert(s->block_start >= 0L, "block gone");
+
+ s->strstart += s->lookahead;
+ s->lookahead = 0;
+
+ /* Emit a stored block if pending_buf will be full: */
+ max_start = s->block_start + max_block_size;
+ if (s->strstart == 0 || (ulg)s->strstart >= max_start) {
+ /* strstart == 0 is possible when wraparound on 16-bit machine */
+ s->lookahead = (uInt)(s->strstart - max_start);
+ s->strstart = (uInt)max_start;
+ FLUSH_BLOCK(s, 0);
+ }
+ /* Flush if we may have to slide, otherwise block_start may become
+ * negative and the data will be gone:
+ */
+ if (s->strstart - (uInt)s->block_start >= MAX_DIST(s)) {
+ FLUSH_BLOCK(s, 0);
+ }
+ }
+ FLUSH_BLOCK(s, flush == Z_FINISH);
+ return flush == Z_FINISH ? finish_done : block_done;
+}
+
+/* ===========================================================================
+ * Compress as much as possible from the input stream, return the current
+ * block state.
+ * This function does not perform lazy evaluation of matches and inserts
+ * new strings in the dictionary only for unmatched strings or for short
+ * matches. It is used only for the fast compression options.
+ */
+local block_state deflate_fast(s, flush)
+ deflate_state *s;
+ int flush;
+{
+ IPos hash_head = NIL; /* head of the hash chain */
+ int bflush; /* set if current block must be flushed */
+
+ for (;;) {
+ /* Make sure that we always have enough lookahead, except
+ * at the end of the input file. We need MAX_MATCH bytes
+ * for the next match, plus MIN_MATCH bytes to insert the
+ * string following the next match.
+ */
+ if (s->lookahead < MIN_LOOKAHEAD) {
+ fill_window(s);
+ if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
+ return need_more;
+ }
+ if (s->lookahead == 0) break; /* flush the current block */
+ }
+
+ /* Insert the string window[strstart .. strstart+2] in the
+ * dictionary, and set hash_head to the head of the hash chain:
+ */
+ if (s->lookahead >= MIN_MATCH) {
+ INSERT_STRING(s, s->strstart, hash_head);
+ }
+
+ /* Find the longest match, discarding those <= prev_length.
+ * At this point we have always match_length < MIN_MATCH
+ */
+ if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) {
+ /* To simplify the code, we prevent matches with the string
+ * of window index 0 (in particular we have to avoid a match
+ * of the string with itself at the start of the input file).
+ */
+ if (s->strategy != Z_HUFFMAN_ONLY) {
+ s->match_length = longest_match (s, hash_head);
+ }
+ /* longest_match() sets match_start */
+ }
+ if (s->match_length >= MIN_MATCH) {
+ check_match(s, s->strstart, s->match_start, s->match_length);
+
+ bflush = _tr_tally(s, s->strstart - s->match_start,
+ s->match_length - MIN_MATCH);
+
+ s->lookahead -= s->match_length;
+
+ /* Insert new strings in the hash table only if the match length
+ * is not too large. This saves time but degrades compression.
+ */
+ if (s->match_length <= s->max_insert_length &&
+ s->lookahead >= MIN_MATCH) {
+ s->match_length--; /* string at strstart already in hash table */
+ do {
+ s->strstart++;
+ INSERT_STRING(s, s->strstart, hash_head);
+ /* strstart never exceeds WSIZE-MAX_MATCH, so there are
+ * always MIN_MATCH bytes ahead.
+ */
+ } while (--s->match_length != 0);
+ s->strstart++;
+ } else {
+ s->strstart += s->match_length;
+ s->match_length = 0;
+ s->ins_h = s->window[s->strstart];
+ UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]);
+#if MIN_MATCH != 3
+ Call UPDATE_HASH() MIN_MATCH-3 more times
+#endif
+ /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not
+ * matter since it will be recomputed at next deflate call.
+ */
+ }
+ } else {
+ /* No match, output a literal byte */
+ Tracevv((stderr,"%c", s->window[s->strstart]));
+ bflush = _tr_tally (s, 0, s->window[s->strstart]);
+ s->lookahead--;
+ s->strstart++;
+ }
+ if (bflush) FLUSH_BLOCK(s, 0);
+ }
+ FLUSH_BLOCK(s, flush == Z_FINISH);
+ return flush == Z_FINISH ? finish_done : block_done;
+}
+
+/* ===========================================================================
+ * Same as above, but achieves better compression. We use a lazy
+ * evaluation for matches: a match is finally adopted only if there is
+ * no better match at the next window position.
+ */
+local block_state deflate_slow(s, flush)
+ deflate_state *s;
+ int flush;
+{
+ IPos hash_head = NIL; /* head of hash chain */
+ int bflush; /* set if current block must be flushed */
+
+ /* Process the input block. */
+ for (;;) {
+ /* Make sure that we always have enough lookahead, except
+ * at the end of the input file. We need MAX_MATCH bytes
+ * for the next match, plus MIN_MATCH bytes to insert the
+ * string following the next match.
+ */
+ if (s->lookahead < MIN_LOOKAHEAD) {
+ fill_window(s);
+ if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
+ return need_more;
+ }
+ if (s->lookahead == 0) break; /* flush the current block */
+ }
+
+ /* Insert the string window[strstart .. strstart+2] in the
+ * dictionary, and set hash_head to the head of the hash chain:
+ */
+ if (s->lookahead >= MIN_MATCH) {
+ INSERT_STRING(s, s->strstart, hash_head);
+ }
+
+ /* Find the longest match, discarding those <= prev_length.
+ */
+ s->prev_length = s->match_length, s->prev_match = s->match_start;
+ s->match_length = MIN_MATCH-1;
+
+ if (hash_head != NIL && s->prev_length < s->max_lazy_match &&
+ s->strstart - hash_head <= MAX_DIST(s)) {
+ /* To simplify the code, we prevent matches with the string
+ * of window index 0 (in particular we have to avoid a match
+ * of the string with itself at the start of the input file).
+ */
+ if (s->strategy != Z_HUFFMAN_ONLY) {
+ s->match_length = longest_match (s, hash_head);
+ }
+ /* longest_match() sets match_start */
+
+ if (s->match_length <= 5 && (s->strategy == Z_FILTERED ||
+ (s->match_length == MIN_MATCH &&
+ s->strstart - s->match_start > TOO_FAR))) {
+
+ /* If prev_match is also MIN_MATCH, match_start is garbage
+ * but we will ignore the current match anyway.
+ */
+ s->match_length = MIN_MATCH-1;
+ }
+ }
+ /* If there was a match at the previous step and the current
+ * match is not better, output the previous match:
+ */
+ if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) {
+ uInt max_insert = s->strstart + s->lookahead - MIN_MATCH;
+ /* Do not insert strings in hash table beyond this. */
+
+ check_match(s, s->strstart-1, s->prev_match, s->prev_length);
+
+ bflush = _tr_tally(s, s->strstart -1 - s->prev_match,
+ s->prev_length - MIN_MATCH);
+
+ /* Insert in hash table all strings up to the end of the match.
+ * strstart-1 and strstart are already inserted. If there is not
+ * enough lookahead, the last two strings are not inserted in
+ * the hash table.
+ */
+ s->lookahead -= s->prev_length-1;
+ s->prev_length -= 2;
+ do {
+ if (++s->strstart <= max_insert) {
+ INSERT_STRING(s, s->strstart, hash_head);
+ }
+ } while (--s->prev_length != 0);
+ s->match_available = 0;
+ s->match_length = MIN_MATCH-1;
+ s->strstart++;
+
+ if (bflush) FLUSH_BLOCK(s, 0);
+
+ } else if (s->match_available) {
+ /* If there was no match at the previous position, output a
+ * single literal. If there was a match but the current match
+ * is longer, truncate the previous match to a single literal.
+ */
+ Tracevv((stderr,"%c", s->window[s->strstart-1]));
+ if (_tr_tally (s, 0, s->window[s->strstart-1])) {
+ FLUSH_BLOCK_ONLY(s, 0);
+ }
+ s->strstart++;
+ s->lookahead--;
+ if (s->strm->avail_out == 0) return need_more;
+ } else {
+ /* There is no previous match to compare with, wait for
+ * the next step to decide.
+ */
+ s->match_available = 1;
+ s->strstart++;
+ s->lookahead--;
+ }
+ }
+ Assert (flush != Z_NO_FLUSH, "no flush?");
+ if (s->match_available) {
+ Tracevv((stderr,"%c", s->window[s->strstart-1]));
+ _tr_tally (s, 0, s->window[s->strstart-1]);
+ s->match_available = 0;
+ }
+ FLUSH_BLOCK(s, flush == Z_FINISH);
+ return flush == Z_FINISH ? finish_done : block_done;
+}
+/* --- deflate.c */
+
+/* +++ trees.c */
+/* trees.c -- output deflated data using Huffman coding
+ * Copyright (C) 1995-1996 Jean-loup Gailly
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/*
+ * ALGORITHM
+ *
+ * The "deflation" process uses several Huffman trees. The more
+ * common source values are represented by shorter bit sequences.
+ *
+ * Each code tree is stored in a compressed form which is itself
+ * a Huffman encoding of the lengths of all the code strings (in
+ * ascending order by source values). The actual code strings are
+ * reconstructed from the lengths in the inflate process, as described
+ * in the deflate specification.
+ *
+ * REFERENCES
+ *
+ * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification".
+ * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc
+ *
+ * Storer, James A.
+ * Data Compression: Methods and Theory, pp. 49-50.
+ * Computer Science Press, 1988. ISBN 0-7167-8156-5.
+ *
+ * Sedgewick, R.
+ * Algorithms, p290.
+ * Addison-Wesley, 1983. ISBN 0-201-06672-6.
+ */
+
+/* From: trees.c,v 1.11 1996/07/24 13:41:06 me Exp $ */
+
+/* #include <freebsd/local/deflate.h> */
+
+#ifdef DEBUG_ZLIB
+# include <freebsd/ctype.h>
+#endif
+
+/* ===========================================================================
+ * Constants
+ */
+
+#define MAX_BL_BITS 7
+/* Bit length codes must not exceed MAX_BL_BITS bits */
+
+#define END_BLOCK 256
+/* end of block literal code */
+
+#define REP_3_6 16
+/* repeat previous bit length 3-6 times (2 bits of repeat count) */
+
+#define REPZ_3_10 17
+/* repeat a zero length 3-10 times (3 bits of repeat count) */
+
+#define REPZ_11_138 18
+/* repeat a zero length 11-138 times (7 bits of repeat count) */
+
+local int extra_lbits[LENGTH_CODES] /* extra bits for each length code */
+ = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};
+
+local int extra_dbits[D_CODES] /* extra bits for each distance code */
+ = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
+
+local int extra_blbits[BL_CODES]/* extra bits for each bit length code */
+ = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7};
+
+local uch bl_order[BL_CODES]
+ = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
+/* The lengths of the bit length codes are sent in order of decreasing
+ * probability, to avoid transmitting the lengths for unused bit length codes.
+ */
+
+#define Buf_size (8 * 2*sizeof(char))
+/* Number of bits used within bi_buf. (bi_buf might be implemented on
+ * more than 16 bits on some systems.)
+ */
+
+/* ===========================================================================
+ * Local data. These are initialized only once.
+ */
+
+local ct_data static_ltree[L_CODES+2];
+/* The static literal tree. Since the bit lengths are imposed, there is no
+ * need for the L_CODES extra codes used during heap construction. However
+ * The codes 286 and 287 are needed to build a canonical tree (see _tr_init
+ * below).
+ */
+
+local ct_data static_dtree[D_CODES];
+/* The static distance tree. (Actually a trivial tree since all codes use
+ * 5 bits.)
+ */
+
+local uch dist_code[512];
+/* distance codes. The first 256 values correspond to the distances
+ * 3 .. 258, the last 256 values correspond to the top 8 bits of
+ * the 15 bit distances.
+ */
+
+local uch length_code[MAX_MATCH-MIN_MATCH+1];
+/* length code for each normalized match length (0 == MIN_MATCH) */
+
+local int base_length[LENGTH_CODES];
+/* First normalized length for each code (0 = MIN_MATCH) */
+
+local int base_dist[D_CODES];
+/* First normalized distance for each code (0 = distance of 1) */
+
+struct static_tree_desc_s {
+ ct_data *static_tree; /* static tree or NULL */
+ intf *extra_bits; /* extra bits for each code or NULL */
+ int extra_base; /* base index for extra_bits */
+ int elems; /* max number of elements in the tree */
+ int max_length; /* max bit length for the codes */
+};
+
+local static_tree_desc static_l_desc =
+{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS};
+
+local static_tree_desc static_d_desc =
+{static_dtree, extra_dbits, 0, D_CODES, MAX_BITS};
+
+local static_tree_desc static_bl_desc =
+{(ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS};
+
+/* ===========================================================================
+ * Local (static) routines in this file.
+ */
+
+local void tr_static_init OF((void));
+local void init_block OF((deflate_state *s));
+local void pqdownheap OF((deflate_state *s, ct_data *tree, int k));
+local void gen_bitlen OF((deflate_state *s, tree_desc *desc));
+local void gen_codes OF((ct_data *tree, int max_code, ushf *bl_count));
+local void build_tree OF((deflate_state *s, tree_desc *desc));
+local void scan_tree OF((deflate_state *s, ct_data *tree, int max_code));
+local void send_tree OF((deflate_state *s, ct_data *tree, int max_code));
+local int build_bl_tree OF((deflate_state *s));
+local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes,
+ int blcodes));
+local void compress_block OF((deflate_state *s, ct_data *ltree,
+ ct_data *dtree));
+local void set_data_type OF((deflate_state *s));
+local unsigned bi_reverse OF((unsigned value, int length));
+local void bi_windup OF((deflate_state *s));
+local void bi_flush OF((deflate_state *s));
+local void copy_block OF((deflate_state *s, charf *buf, unsigned len,
+ int header));
+
+#ifndef DEBUG_ZLIB
+# define send_code(s, c, tree) send_bits(s, tree[(c)].Code, tree[(c)].Len)
+ /* Send a code of the given tree. c and tree must not have side effects */
+
+#else /* DEBUG_ZLIB */
+# define send_code(s, c, tree) \
+ { if (verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \
+ send_bits(s, tree[c].Code, tree[c].Len); }
+#endif
+
+#define d_code(dist) \
+ ((dist) < 256 ? dist_code[dist] : dist_code[256+((dist)>>7)])
+/* Mapping from a distance to a distance code. dist is the distance - 1 and
+ * must not have side effects. dist_code[256] and dist_code[257] are never
+ * used.
+ */
+
+/* ===========================================================================
+ * Output a short LSB first on the stream.
+ * IN assertion: there is enough room in pendingBuf.
+ */
+#define put_short(s, w) { \
+ put_byte(s, (uch)((w) & 0xff)); \
+ put_byte(s, (uch)((ush)(w) >> 8)); \
+}
+
+/* ===========================================================================
+ * Send a value on a given number of bits.
+ * IN assertion: length <= 16 and value fits in length bits.
+ */
+#ifdef DEBUG_ZLIB
+local void send_bits OF((deflate_state *s, int value, int length));
+
+local void send_bits(s, value, length)
+ deflate_state *s;
+ int value; /* value to send */
+ int length; /* number of bits */
+{
+ Tracevv((stderr," l %2d v %4x ", length, value));
+ Assert(length > 0 && length <= 15, "invalid length");
+ s->bits_sent += (ulg)length;
+
+ /* If not enough room in bi_buf, use (valid) bits from bi_buf and
+ * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid))
+ * unused bits in value.
+ */
+ if (s->bi_valid > (int)Buf_size - length) {
+ s->bi_buf |= (value << s->bi_valid);
+ put_short(s, s->bi_buf);
+ s->bi_buf = (ush)value >> (Buf_size - s->bi_valid);
+ s->bi_valid += length - Buf_size;
+ } else {
+ s->bi_buf |= value << s->bi_valid;
+ s->bi_valid += length;
+ }
+}
+#else /* !DEBUG_ZLIB */
+
+#define send_bits(s, value, length) \
+{ int len = (length);\
+ if ((s)->bi_valid > (int)Buf_size - len) {\
+ int val = (value);\
+ (s)->bi_buf |= (val << (s)->bi_valid);\
+ put_short((s), (s)->bi_buf);\
+ (s)->bi_buf = (ush)val >> (Buf_size - (s)->bi_valid);\
+ (s)->bi_valid += len - Buf_size;\
+ } else {\
+ (s)->bi_buf |= (value) << (s)->bi_valid;\
+ (s)->bi_valid += len;\
+ }\
+}
+#endif /* DEBUG_ZLIB */
+
+/* the arguments must not have side effects */
+
+/* ===========================================================================
+ * Initialize the various 'constant' tables. In a multi-threaded environment,
+ * this function may be called by two threads concurrently, but this is
+ * harmless since both invocations do exactly the same thing.
+ */
+local void tr_static_init()
+{
+ static int static_init_done = 0;
+ int n; /* iterates over tree elements */
+ int bits; /* bit counter */
+ int length; /* length value */
+ int code; /* code value */
+ int dist; /* distance index */
+ ush bl_count[MAX_BITS+1];
+ /* number of codes at each bit length for an optimal tree */
+
+ if (static_init_done) return;
+
+ /* Initialize the mapping length (0..255) -> length code (0..28) */
+ length = 0;
+ for (code = 0; code < LENGTH_CODES-1; code++) {
+ base_length[code] = length;
+ for (n = 0; n < (1<<extra_lbits[code]); n++) {
+ length_code[length++] = (uch)code;
+ }
+ }
+ Assert (length == 256, "tr_static_init: length != 256");
+ /* Note that the length 255 (match length 258) can be represented
+ * in two different ways: code 284 + 5 bits or code 285, so we
+ * overwrite length_code[255] to use the best encoding:
+ */
+ length_code[length-1] = (uch)code;
+
+ /* Initialize the mapping dist (0..32K) -> dist code (0..29) */
+ dist = 0;
+ for (code = 0 ; code < 16; code++) {
+ base_dist[code] = dist;
+ for (n = 0; n < (1<<extra_dbits[code]); n++) {
+ dist_code[dist++] = (uch)code;
+ }
+ }
+ Assert (dist == 256, "tr_static_init: dist != 256");
+ dist >>= 7; /* from now on, all distances are divided by 128 */
+ for ( ; code < D_CODES; code++) {
+ base_dist[code] = dist << 7;
+ for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) {
+ dist_code[256 + dist++] = (uch)code;
+ }
+ }
+ Assert (dist == 256, "tr_static_init: 256+dist != 512");
+
+ /* Construct the codes of the static literal tree */
+ for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0;
+ n = 0;
+ while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++;
+ while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++;
+ while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++;
+ while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++;
+ /* Codes 286 and 287 do not exist, but we must include them in the
+ * tree construction to get a canonical Huffman tree (longest code
+ * all ones)
+ */
+ gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count);
+
+ /* The static distance tree is trivial: */
+ for (n = 0; n < D_CODES; n++) {
+ static_dtree[n].Len = 5;
+ static_dtree[n].Code = bi_reverse((unsigned)n, 5);
+ }
+ static_init_done = 1;
+}
+
+/* ===========================================================================
+ * Initialize the tree data structures for a new zlib stream.
+ */
+void _tr_init(s)
+ deflate_state *s;
+{
+ tr_static_init();
+
+ s->compressed_len = 0L;
+
+ s->l_desc.dyn_tree = s->dyn_ltree;
+ s->l_desc.stat_desc = &static_l_desc;
+
+ s->d_desc.dyn_tree = s->dyn_dtree;
+ s->d_desc.stat_desc = &static_d_desc;
+
+ s->bl_desc.dyn_tree = s->bl_tree;
+ s->bl_desc.stat_desc = &static_bl_desc;
+
+ s->bi_buf = 0;
+ s->bi_valid = 0;
+ s->last_eob_len = 8; /* enough lookahead for inflate */
+#ifdef DEBUG_ZLIB
+ s->bits_sent = 0L;
+#endif
+
+ /* Initialize the first block of the first file: */
+ init_block(s);
+}
+
+/* ===========================================================================
+ * Initialize a new block.
+ */
+local void init_block(s)
+ deflate_state *s;
+{
+ int n; /* iterates over tree elements */
+
+ /* Initialize the trees. */
+ for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0;
+ for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0;
+ for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0;
+
+ s->dyn_ltree[END_BLOCK].Freq = 1;
+ s->opt_len = s->static_len = 0L;
+ s->last_lit = s->matches = 0;
+}
+
+#define SMALLEST 1
+/* Index within the heap array of least frequent node in the Huffman tree */
+
+
+/* ===========================================================================
+ * Remove the smallest element from the heap and recreate the heap with
+ * one less element. Updates heap and heap_len.
+ */
+#define pqremove(s, tree, top) \
+{\
+ top = s->heap[SMALLEST]; \
+ s->heap[SMALLEST] = s->heap[s->heap_len--]; \
+ pqdownheap(s, tree, SMALLEST); \
+}
+
+/* ===========================================================================
+ * Compares to subtrees, using the tree depth as tie breaker when
+ * the subtrees have equal frequency. This minimizes the worst case length.
+ */
+#define smaller(tree, n, m, depth) \
+ (tree[n].Freq < tree[m].Freq || \
+ (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m]))
+
+/* ===========================================================================
+ * Restore the heap property by moving down the tree starting at node k,
+ * exchanging a node with the smallest of its two sons if necessary, stopping
+ * when the heap property is re-established (each father smaller than its
+ * two sons).
+ */
+local void pqdownheap(s, tree, k)
+ deflate_state *s;
+ ct_data *tree; /* the tree to restore */
+ int k; /* node to move down */
+{
+ int v = s->heap[k];
+ int j = k << 1; /* left son of k */
+ while (j <= s->heap_len) {
+ /* Set j to the smallest of the two sons: */
+ if (j < s->heap_len &&
+ smaller(tree, s->heap[j+1], s->heap[j], s->depth)) {
+ j++;
+ }
+ /* Exit if v is smaller than both sons */
+ if (smaller(tree, v, s->heap[j], s->depth)) break;
+
+ /* Exchange v with the smallest son */
+ s->heap[k] = s->heap[j]; k = j;
+
+ /* And continue down the tree, setting j to the left son of k */
+ j <<= 1;
+ }
+ s->heap[k] = v;
+}
+
+/* ===========================================================================
+ * Compute the optimal bit lengths for a tree and update the total bit length
+ * for the current block.
+ * IN assertion: the fields freq and dad are set, heap[heap_max] and
+ * above are the tree nodes sorted by increasing frequency.
+ * OUT assertions: the field len is set to the optimal bit length, the
+ * array bl_count contains the frequencies for each bit length.
+ * The length opt_len is updated; static_len is also updated if stree is
+ * not null.
+ */
+local void gen_bitlen(s, desc)
+ deflate_state *s;
+ tree_desc *desc; /* the tree descriptor */
+{
+ ct_data *tree = desc->dyn_tree;
+ int max_code = desc->max_code;
+ ct_data *stree = desc->stat_desc->static_tree;
+ intf *extra = desc->stat_desc->extra_bits;
+ int base = desc->stat_desc->extra_base;
+ int max_length = desc->stat_desc->max_length;
+ int h; /* heap index */
+ int n, m; /* iterate over the tree elements */
+ int bits; /* bit length */
+ int xbits; /* extra bits */
+ ush f; /* frequency */
+ int overflow = 0; /* number of elements with bit length too large */
+
+ for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0;
+
+ /* In a first pass, compute the optimal bit lengths (which may
+ * overflow in the case of the bit length tree).
+ */
+ tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */
+
+ for (h = s->heap_max+1; h < HEAP_SIZE; h++) {
+ n = s->heap[h];
+ bits = tree[tree[n].Dad].Len + 1;
+ if (bits > max_length) bits = max_length, overflow++;
+ tree[n].Len = (ush)bits;
+ /* We overwrite tree[n].Dad which is no longer needed */
+
+ if (n > max_code) continue; /* not a leaf node */
+
+ s->bl_count[bits]++;
+ xbits = 0;
+ if (n >= base) xbits = extra[n-base];
+ f = tree[n].Freq;
+ s->opt_len += (ulg)f * (bits + xbits);
+ if (stree) s->static_len += (ulg)f * (stree[n].Len + xbits);
+ }
+ if (overflow == 0) return;
+
+ Trace((stderr,"\nbit length overflow\n"));
+ /* This happens for example on obj2 and pic of the Calgary corpus */
+
+ /* Find the first bit length which could increase: */
+ do {
+ bits = max_length-1;
+ while (s->bl_count[bits] == 0) bits--;
+ s->bl_count[bits]--; /* move one leaf down the tree */
+ s->bl_count[bits+1] += 2; /* move one overflow item as its brother */
+ s->bl_count[max_length]--;
+ /* The brother of the overflow item also moves one step up,
+ * but this does not affect bl_count[max_length]
+ */
+ overflow -= 2;
+ } while (overflow > 0);
+
+ /* Now recompute all bit lengths, scanning in increasing frequency.
+ * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
+ * lengths instead of fixing only the wrong ones. This idea is taken
+ * from 'ar' written by Haruhiko Okumura.)
+ */
+ for (bits = max_length; bits != 0; bits--) {
+ n = s->bl_count[bits];
+ while (n != 0) {
+ m = s->heap[--h];
+ if (m > max_code) continue;
+ if (tree[m].Len != (unsigned) bits) {
+ Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits));
+ s->opt_len += ((long)bits - (long)tree[m].Len)
+ *(long)tree[m].Freq;
+ tree[m].Len = (ush)bits;
+ }
+ n--;
+ }
+ }
+}
+
+/* ===========================================================================
+ * Generate the codes for a given tree and bit counts (which need not be
+ * optimal).
+ * IN assertion: the array bl_count contains the bit length statistics for
+ * the given tree and the field len is set for all tree elements.
+ * OUT assertion: the field code is set for all tree elements of non
+ * zero code length.
+ */
+local void gen_codes (tree, max_code, bl_count)
+ ct_data *tree; /* the tree to decorate */
+ int max_code; /* largest code with non zero frequency */
+ ushf *bl_count; /* number of codes at each bit length */
+{
+ ush next_code[MAX_BITS+1]; /* next code value for each bit length */
+ ush code = 0; /* running code value */
+ int bits; /* bit index */
+ int n; /* code index */
+
+ /* The distribution counts are first used to generate the code values
+ * without bit reversal.
+ */
+ for (bits = 1; bits <= MAX_BITS; bits++) {
+ next_code[bits] = code = (code + bl_count[bits-1]) << 1;
+ }
+ /* Check that the bit counts in bl_count are consistent. The last code
+ * must be all ones.
+ */
+ Assert (code + bl_count[MAX_BITS]-1 == (1<<MAX_BITS)-1,
+ "inconsistent bit counts");
+ Tracev((stderr,"\ngen_codes: max_code %d ", max_code));
+
+ for (n = 0; n <= max_code; n++) {
+ int len = tree[n].Len;
+ if (len == 0) continue;
+ /* Now reverse the bits */
+ tree[n].Code = bi_reverse(next_code[len]++, len);
+
+ Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ",
+ n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1));
+ }
+}
+
+/* ===========================================================================
+ * Construct one Huffman tree and assigns the code bit strings and lengths.
+ * Update the total bit length for the current block.
+ * IN assertion: the field freq is set for all tree elements.
+ * OUT assertions: the fields len and code are set to the optimal bit length
+ * and corresponding code. The length opt_len is updated; static_len is
+ * also updated if stree is not null. The field max_code is set.
+ */
+local void build_tree(s, desc)
+ deflate_state *s;
+ tree_desc *desc; /* the tree descriptor */
+{
+ ct_data *tree = desc->dyn_tree;
+ ct_data *stree = desc->stat_desc->static_tree;
+ int elems = desc->stat_desc->elems;
+ int n, m; /* iterate over heap elements */
+ int max_code = -1; /* largest code with non zero frequency */
+ int node; /* new node being created */
+
+ /* Construct the initial heap, with least frequent element in
+ * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
+ * heap[0] is not used.
+ */
+ s->heap_len = 0, s->heap_max = HEAP_SIZE;
+
+ for (n = 0; n < elems; n++) {
+ if (tree[n].Freq != 0) {
+ s->heap[++(s->heap_len)] = max_code = n;
+ s->depth[n] = 0;
+ } else {
+ tree[n].Len = 0;
+ }
+ }
+
+ /* The pkzip format requires that at least one distance code exists,
+ * and that at least one bit should be sent even if there is only one
+ * possible code. So to avoid special checks later on we force at least
+ * two codes of non zero frequency.
+ */
+ while (s->heap_len < 2) {
+ node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0);
+ tree[node].Freq = 1;
+ s->depth[node] = 0;
+ s->opt_len--; if (stree) s->static_len -= stree[node].Len;
+ /* node is 0 or 1 so it does not have extra bits */
+ }
+ desc->max_code = max_code;
+
+ /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
+ * establish sub-heaps of increasing lengths:
+ */
+ for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n);
+
+ /* Construct the Huffman tree by repeatedly combining the least two
+ * frequent nodes.
+ */
+ node = elems; /* next internal node of the tree */
+ do {
+ pqremove(s, tree, n); /* n = node of least frequency */
+ m = s->heap[SMALLEST]; /* m = node of next least frequency */
+
+ s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */
+ s->heap[--(s->heap_max)] = m;
+
+ /* Create a new node father of n and m */
+ tree[node].Freq = tree[n].Freq + tree[m].Freq;
+ s->depth[node] = (uch) (MAX(s->depth[n], s->depth[m]) + 1);
+ tree[n].Dad = tree[m].Dad = (ush)node;
+#ifdef DUMP_BL_TREE
+ if (tree == s->bl_tree) {
+ fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)",
+ node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq);
+ }
+#endif
+ /* and insert the new node in the heap */
+ s->heap[SMALLEST] = node++;
+ pqdownheap(s, tree, SMALLEST);
+
+ } while (s->heap_len >= 2);
+
+ s->heap[--(s->heap_max)] = s->heap[SMALLEST];
+
+ /* At this point, the fields freq and dad are set. We can now
+ * generate the bit lengths.
+ */
+ gen_bitlen(s, (tree_desc *)desc);
+
+ /* The field len is now set, we can generate the bit codes */
+ gen_codes ((ct_data *)tree, max_code, s->bl_count);
+}
+
+/* ===========================================================================
+ * Scan a literal or distance tree to determine the frequencies of the codes
+ * in the bit length tree.
+ */
+local void scan_tree (s, tree, max_code)
+ deflate_state *s;
+ ct_data *tree; /* the tree to be scanned */
+ int max_code; /* and its largest code of non zero frequency */
+{
+ int n; /* iterates over all tree elements */
+ int prevlen = -1; /* last emitted length */
+ int curlen; /* length of current code */
+ int nextlen = tree[0].Len; /* length of next code */
+ int count = 0; /* repeat count of the current code */
+ int max_count = 7; /* max repeat count */
+ int min_count = 4; /* min repeat count */
+
+ if (nextlen == 0) max_count = 138, min_count = 3;
+ tree[max_code+1].Len = (ush)0xffff; /* guard */
+
+ for (n = 0; n <= max_code; n++) {
+ curlen = nextlen; nextlen = tree[n+1].Len;
+ if (++count < max_count && curlen == nextlen) {
+ continue;
+ } else if (count < min_count) {
+ s->bl_tree[curlen].Freq += count;
+ } else if (curlen != 0) {
+ if (curlen != prevlen) s->bl_tree[curlen].Freq++;
+ s->bl_tree[REP_3_6].Freq++;
+ } else if (count <= 10) {
+ s->bl_tree[REPZ_3_10].Freq++;
+ } else {
+ s->bl_tree[REPZ_11_138].Freq++;
+ }
+ count = 0; prevlen = curlen;
+ if (nextlen == 0) {
+ max_count = 138, min_count = 3;
+ } else if (curlen == nextlen) {
+ max_count = 6, min_count = 3;
+ } else {
+ max_count = 7, min_count = 4;
+ }
+ }
+}
+
+/* ===========================================================================
+ * Send a literal or distance tree in compressed form, using the codes in
+ * bl_tree.
+ */
+local void send_tree (s, tree, max_code)
+ deflate_state *s;
+ ct_data *tree; /* the tree to be scanned */
+ int max_code; /* and its largest code of non zero frequency */
+{
+ int n; /* iterates over all tree elements */
+ int prevlen = -1; /* last emitted length */
+ int curlen; /* length of current code */
+ int nextlen = tree[0].Len; /* length of next code */
+ int count = 0; /* repeat count of the current code */
+ int max_count = 7; /* max repeat count */
+ int min_count = 4; /* min repeat count */
+
+ /* tree[max_code+1].Len = -1; */ /* guard already set */
+ if (nextlen == 0) max_count = 138, min_count = 3;
+
+ for (n = 0; n <= max_code; n++) {
+ curlen = nextlen; nextlen = tree[n+1].Len;
+ if (++count < max_count && curlen == nextlen) {
+ continue;
+ } else if (count < min_count) {
+ do { send_code(s, curlen, s->bl_tree); } while (--count != 0);
+
+ } else if (curlen != 0) {
+ if (curlen != prevlen) {
+ send_code(s, curlen, s->bl_tree); count--;
+ }
+ Assert(count >= 3 && count <= 6, " 3_6?");
+ send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2);
+
+ } else if (count <= 10) {
+ send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3);
+
+ } else {
+ send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7);
+ }
+ count = 0; prevlen = curlen;
+ if (nextlen == 0) {
+ max_count = 138, min_count = 3;
+ } else if (curlen == nextlen) {
+ max_count = 6, min_count = 3;
+ } else {
+ max_count = 7, min_count = 4;
+ }
+ }
+}
+
+/* ===========================================================================
+ * Construct the Huffman tree for the bit lengths and return the index in
+ * bl_order of the last bit length code to send.
+ */
+local int build_bl_tree(s)
+ deflate_state *s;
+{
+ int max_blindex; /* index of last bit length code of non zero freq */
+
+ /* Determine the bit length frequencies for literal and distance trees */
+ scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code);
+ scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code);
+
+ /* Build the bit length tree: */
+ build_tree(s, (tree_desc *)(&(s->bl_desc)));
+ /* opt_len now includes the length of the tree representations, except
+ * the lengths of the bit lengths codes and the 5+5+4 bits for the counts.
+ */
+
+ /* Determine the number of bit length codes to send. The pkzip format
+ * requires that at least 4 bit length codes be sent. (appnote.txt says
+ * 3 but the actual value used is 4.)
+ */
+ for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) {
+ if (s->bl_tree[bl_order[max_blindex]].Len != 0) break;
+ }
+ /* Update opt_len to include the bit length tree and counts */
+ s->opt_len += 3*(max_blindex+1) + 5+5+4;
+ Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld",
+ s->opt_len, s->static_len));
+
+ return max_blindex;
+}
+
+/* ===========================================================================
+ * Send the header for a block using dynamic Huffman trees: the counts, the
+ * lengths of the bit length codes, the literal tree and the distance tree.
+ * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4.
+ */
+local void send_all_trees(s, lcodes, dcodes, blcodes)
+ deflate_state *s;
+ int lcodes, dcodes, blcodes; /* number of codes for each tree */
+{
+ int rank; /* index in bl_order */
+
+ Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes");
+ Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES,
+ "too many codes");
+ Tracev((stderr, "\nbl counts: "));
+ send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */
+ send_bits(s, dcodes-1, 5);
+ send_bits(s, blcodes-4, 4); /* not -3 as stated in appnote.txt */
+ for (rank = 0; rank < blcodes; rank++) {
+ Tracev((stderr, "\nbl code %2d ", bl_order[rank]));
+ send_bits(s, s->bl_tree[bl_order[rank]].Len, 3);
+ }
+ Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent));
+
+ send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */
+ Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent));
+
+ send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */
+ Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent));
+}
+
+/* ===========================================================================
+ * Send a stored block
+ */
+void _tr_stored_block(s, buf, stored_len, eof)
+ deflate_state *s;
+ charf *buf; /* input block */
+ ulg stored_len; /* length of input block */
+ int eof; /* true if this is the last block for a file */
+{
+ send_bits(s, (STORED_BLOCK<<1)+eof, 3); /* send block type */
+ s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L;
+ s->compressed_len += (stored_len + 4) << 3;
+
+ copy_block(s, buf, (unsigned)stored_len, 1); /* with header */
+}
+
+/* Send just the `stored block' type code without any length bytes or data.
+ */
+void _tr_stored_type_only(s)
+ deflate_state *s;
+{
+ send_bits(s, (STORED_BLOCK << 1), 3);
+ bi_windup(s);
+ s->compressed_len = (s->compressed_len + 3) & ~7L;
+}
+
+
+/* ===========================================================================
+ * Send one empty static block to give enough lookahead for inflate.
+ * This takes 10 bits, of which 7 may remain in the bit buffer.
+ * The current inflate code requires 9 bits of lookahead. If the
+ * last two codes for the previous block (real code plus EOB) were coded
+ * on 5 bits or less, inflate may have only 5+3 bits of lookahead to decode
+ * the last real code. In this case we send two empty static blocks instead
+ * of one. (There are no problems if the previous block is stored or fixed.)
+ * To simplify the code, we assume the worst case of last real code encoded
+ * on one bit only.
+ */
+void _tr_align(s)
+ deflate_state *s;
+{
+ send_bits(s, STATIC_TREES<<1, 3);
+ send_code(s, END_BLOCK, static_ltree);
+ s->compressed_len += 10L; /* 3 for block type, 7 for EOB */
+ bi_flush(s);
+ /* Of the 10 bits for the empty block, we have already sent
+ * (10 - bi_valid) bits. The lookahead for the last real code (before
+ * the EOB of the previous block) was thus at least one plus the length
+ * of the EOB plus what we have just sent of the empty static block.
+ */
+ if (1 + s->last_eob_len + 10 - s->bi_valid < 9) {
+ send_bits(s, STATIC_TREES<<1, 3);
+ send_code(s, END_BLOCK, static_ltree);
+ s->compressed_len += 10L;
+ bi_flush(s);
+ }
+ s->last_eob_len = 7;
+}
+
+/* ===========================================================================
+ * Determine the best encoding for the current block: dynamic trees, static
+ * trees or store, and output the encoded block to the zip file. This function
+ * returns the total compressed length for the file so far.
+ */
+ulg _tr_flush_block(s, buf, stored_len, eof)
+ deflate_state *s;
+ charf *buf; /* input block, or NULL if too old */
+ ulg stored_len; /* length of input block */
+ int eof; /* true if this is the last block for a file */
+{
+ ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
+ int max_blindex = 0; /* index of last bit length code of non zero freq */
+
+ /* Build the Huffman trees unless a stored block is forced */
+ if (s->level > 0) {
+
+ /* Check if the file is ascii or binary */
+ if (s->data_type == Z_UNKNOWN) set_data_type(s);
+
+ /* Construct the literal and distance trees */
+ build_tree(s, (tree_desc *)(&(s->l_desc)));
+ Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len,
+ s->static_len));
+
+ build_tree(s, (tree_desc *)(&(s->d_desc)));
+ Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len,
+ s->static_len));
+ /* At this point, opt_len and static_len are the total bit lengths of
+ * the compressed block data, excluding the tree representations.
+ */
+
+ /* Build the bit length tree for the above two trees, and get the index
+ * in bl_order of the last bit length code to send.
+ */
+ max_blindex = build_bl_tree(s);
+
+ /* Determine the best encoding. Compute first the block length in bytes*/
+ opt_lenb = (s->opt_len+3+7)>>3;
+ static_lenb = (s->static_len+3+7)>>3;
+
+ Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ",
+ opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len,
+ s->last_lit));
+
+ if (static_lenb <= opt_lenb) opt_lenb = static_lenb;
+
+ } else {
+ Assert(buf != (char*)0, "lost buf");
+ opt_lenb = static_lenb = stored_len + 5; /* force a stored block */
+ }
+
+ /* If compression failed and this is the first and last block,
+ * and if the .zip file can be seeked (to rewrite the local header),
+ * the whole file is transformed into a stored file:
+ */
+#ifdef STORED_FILE_OK
+# ifdef FORCE_STORED_FILE
+ if (eof && s->compressed_len == 0L) { /* force stored file */
+# else
+ if (stored_len <= opt_lenb && eof && s->compressed_len==0L && seekable()) {
+# endif
+ /* Since LIT_BUFSIZE <= 2*WSIZE, the input data must be there: */
+ if (buf == (charf*)0) error ("block vanished");
+
+ copy_block(s, buf, (unsigned)stored_len, 0); /* without header */
+ s->compressed_len = stored_len << 3;
+ s->method = STORED;
+ } else
+#endif /* STORED_FILE_OK */
+
+#ifdef FORCE_STORED
+ if (buf != (char*)0) { /* force stored block */
+#else
+ if (stored_len+4 <= opt_lenb && buf != (char*)0) {
+ /* 4: two words for the lengths */
+#endif
+ /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
+ * Otherwise we can't have processed more than WSIZE input bytes since
+ * the last block flush, because compression would have been
+ * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
+ * transform a block into a stored block.
+ */
+ _tr_stored_block(s, buf, stored_len, eof);
+
+#ifdef FORCE_STATIC
+ } else if (static_lenb >= 0) { /* force static trees */
+#else
+ } else if (static_lenb == opt_lenb) {
+#endif
+ send_bits(s, (STATIC_TREES<<1)+eof, 3);
+ compress_block(s, (ct_data *)static_ltree, (ct_data *)static_dtree);
+ s->compressed_len += 3 + s->static_len;
+ } else {
+ send_bits(s, (DYN_TREES<<1)+eof, 3);
+ send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1,
+ max_blindex+1);
+ compress_block(s, (ct_data *)s->dyn_ltree, (ct_data *)s->dyn_dtree);
+ s->compressed_len += 3 + s->opt_len;
+ }
+ Assert (s->compressed_len == s->bits_sent, "bad compressed size");
+ init_block(s);
+
+ if (eof) {
+ bi_windup(s);
+ s->compressed_len += 7; /* align on byte boundary */
+ }
+ Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3,
+ s->compressed_len-7*eof));
+
+ return s->compressed_len >> 3;
+}
+
+/* ===========================================================================
+ * Save the match info and tally the frequency counts. Return true if
+ * the current block must be flushed.
+ */
+int _tr_tally (s, dist, lc)
+ deflate_state *s;
+ unsigned dist; /* distance of matched string */
+ unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */
+{
+ s->d_buf[s->last_lit] = (ush)dist;
+ s->l_buf[s->last_lit++] = (uch)lc;
+ if (dist == 0) {
+ /* lc is the unmatched char */
+ s->dyn_ltree[lc].Freq++;
+ } else {
+ s->matches++;
+ /* Here, lc is the match length - MIN_MATCH */
+ dist--; /* dist = match distance - 1 */
+ Assert((ush)dist < (ush)MAX_DIST(s) &&
+ (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) &&
+ (ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match");
+
+ s->dyn_ltree[length_code[lc]+LITERALS+1].Freq++;
+ s->dyn_dtree[d_code(dist)].Freq++;
+ }
+
+ /* Try to guess if it is profitable to stop the current block here */
+ if (s->level > 2 && (s->last_lit & 0xfff) == 0) {
+ /* Compute an upper bound for the compressed length */
+ ulg out_length = (ulg)s->last_lit*8L;
+ ulg in_length = (ulg)((long)s->strstart - s->block_start);
+ int dcode;
+ for (dcode = 0; dcode < D_CODES; dcode++) {
+ out_length += (ulg)s->dyn_dtree[dcode].Freq *
+ (5L+extra_dbits[dcode]);
+ }
+ out_length >>= 3;
+ Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ",
+ s->last_lit, in_length, out_length,
+ 100L - out_length*100L/in_length));
+ if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1;
+ }
+ return (s->last_lit == s->lit_bufsize-1);
+ /* We avoid equality with lit_bufsize because of wraparound at 64K
+ * on 16 bit machines and because stored blocks are restricted to
+ * 64K-1 bytes.
+ */
+}
+
+/* ===========================================================================
+ * Send the block data compressed using the given Huffman trees
+ */
+local void compress_block(s, ltree, dtree)
+ deflate_state *s;
+ ct_data *ltree; /* literal tree */
+ ct_data *dtree; /* distance tree */
+{
+ unsigned dist; /* distance of matched string */
+ int lc; /* match length or unmatched char (if dist == 0) */
+ unsigned lx = 0; /* running index in l_buf */
+ unsigned code; /* the code to send */
+ int extra; /* number of extra bits to send */
+
+ if (s->last_lit != 0) do {
+ dist = s->d_buf[lx];
+ lc = s->l_buf[lx++];
+ if (dist == 0) {
+ send_code(s, lc, ltree); /* send a literal byte */
+ Tracecv(isgraph(lc), (stderr," '%c' ", lc));
+ } else {
+ /* Here, lc is the match length - MIN_MATCH */
+ code = length_code[lc];
+ send_code(s, code+LITERALS+1, ltree); /* send the length code */
+ extra = extra_lbits[code];
+ if (extra != 0) {
+ lc -= base_length[code];
+ send_bits(s, lc, extra); /* send the extra length bits */
+ }
+ dist--; /* dist is now the match distance - 1 */
+ code = d_code(dist);
+ Assert (code < D_CODES, "bad d_code");
+
+ send_code(s, code, dtree); /* send the distance code */
+ extra = extra_dbits[code];
+ if (extra != 0) {
+ dist -= base_dist[code];
+ send_bits(s, dist, extra); /* send the extra distance bits */
+ }
+ } /* literal or match pair ? */
+
+ /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */
+ Assert(s->pending < s->lit_bufsize + 2*lx, "pendingBuf overflow");
+
+ } while (lx < s->last_lit);
+
+ send_code(s, END_BLOCK, ltree);
+ s->last_eob_len = ltree[END_BLOCK].Len;
+}
+
+/* ===========================================================================
+ * Set the data type to ASCII or BINARY, using a crude approximation:
+ * binary if more than 20% of the bytes are <= 6 or >= 128, ascii otherwise.
+ * IN assertion: the fields freq of dyn_ltree are set and the total of all
+ * frequencies does not exceed 64K (to fit in an int on 16 bit machines).
+ */
+local void set_data_type(s)
+ deflate_state *s;
+{
+ int n = 0;
+ unsigned ascii_freq = 0;
+ unsigned bin_freq = 0;
+ while (n < 7) bin_freq += s->dyn_ltree[n++].Freq;
+ while (n < 128) ascii_freq += s->dyn_ltree[n++].Freq;
+ while (n < LITERALS) bin_freq += s->dyn_ltree[n++].Freq;
+ s->data_type = (Byte)(bin_freq > (ascii_freq >> 2) ? Z_BINARY : Z_ASCII);
+}
+
+/* ===========================================================================
+ * Reverse the first len bits of a code, using straightforward code (a faster
+ * method would use a table)
+ * IN assertion: 1 <= len <= 15
+ */
+local unsigned bi_reverse(code, len)
+ unsigned code; /* the value to invert */
+ int len; /* its bit length */
+{
+ register unsigned res = 0;
+ do {
+ res |= code & 1;
+ code >>= 1, res <<= 1;
+ } while (--len > 0);
+ return res >> 1;
+}
+
+/* ===========================================================================
+ * Flush the bit buffer, keeping at most 7 bits in it.
+ */
+local void bi_flush(s)
+ deflate_state *s;
+{
+ if (s->bi_valid == 16) {
+ put_short(s, s->bi_buf);
+ s->bi_buf = 0;
+ s->bi_valid = 0;
+ } else if (s->bi_valid >= 8) {
+ put_byte(s, (Byte)s->bi_buf);
+ s->bi_buf >>= 8;
+ s->bi_valid -= 8;
+ }
+}
+
+/* ===========================================================================
+ * Flush the bit buffer and align the output on a byte boundary
+ */
+local void bi_windup(s)
+ deflate_state *s;
+{
+ if (s->bi_valid > 8) {
+ put_short(s, s->bi_buf);
+ } else if (s->bi_valid > 0) {
+ put_byte(s, (Byte)s->bi_buf);
+ }
+ s->bi_buf = 0;
+ s->bi_valid = 0;
+#ifdef DEBUG_ZLIB
+ s->bits_sent = (s->bits_sent+7) & ~7;
+#endif
+}
+
+/* ===========================================================================
+ * Copy a stored block, storing first the length and its
+ * one's complement if requested.
+ */
+local void copy_block(s, buf, len, header)
+ deflate_state *s;
+ charf *buf; /* the input data */
+ unsigned len; /* its length */
+ int header; /* true if block header must be written */
+{
+ bi_windup(s); /* align on byte boundary */
+ s->last_eob_len = 8; /* enough lookahead for inflate */
+
+ if (header) {
+ put_short(s, (ush)len);
+ put_short(s, (ush)~len);
+#ifdef DEBUG_ZLIB
+ s->bits_sent += 2*16;
+#endif
+ }
+#ifdef DEBUG_ZLIB
+ s->bits_sent += (ulg)len<<3;
+#endif
+ /* bundle up the put_byte(s, *buf++) calls */
+ zmemcpy(&s->pending_buf[s->pending], buf, len);
+ s->pending += len;
+}
+/* --- trees.c */
+
+/* +++ inflate.c */
+/* inflate.c -- zlib interface to inflate modules
+ * Copyright (C) 1995-1996 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* #include <freebsd/local/zutil.h> */
+
+/* +++ infblock.h */
+/* infblock.h -- header to use infblock.c
+ * Copyright (C) 1995-1996 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+ part of the implementation of the compression library and is
+ subject to change. Applications should only use zlib.h.
+ */
+
+struct inflate_blocks_state;
+typedef struct inflate_blocks_state FAR inflate_blocks_statef;
+
+extern inflate_blocks_statef * inflate_blocks_new OF((
+ z_streamp z,
+ check_func c, /* check function */
+ uInt w)); /* window size */
+
+extern int inflate_blocks OF((
+ inflate_blocks_statef *,
+ z_streamp ,
+ int)); /* initial return code */
+
+extern void inflate_blocks_reset OF((
+ inflate_blocks_statef *,
+ z_streamp ,
+ uLongf *)); /* check value on output */
+
+extern int inflate_blocks_free OF((
+ inflate_blocks_statef *,
+ z_streamp ,
+ uLongf *)); /* check value on output */
+
+extern void inflate_set_dictionary OF((
+ inflate_blocks_statef *s,
+ const Bytef *d, /* dictionary */
+ uInt n)); /* dictionary length */
+
+extern int inflate_addhistory OF((
+ inflate_blocks_statef *,
+ z_streamp));
+
+extern int inflate_packet_flush OF((
+ inflate_blocks_statef *));
+/* --- infblock.h */
+
+#ifndef NO_DUMMY_DECL
+struct inflate_blocks_state {int dummy;}; /* for buggy compilers */
+#endif
+
+/* inflate private state */
+struct internal_state {
+
+ /* mode */
+ enum {
+ METHOD, /* waiting for method byte */
+ FLAG, /* waiting for flag byte */
+ DICT4, /* four dictionary check bytes to go */
+ DICT3, /* three dictionary check bytes to go */
+ DICT2, /* two dictionary check bytes to go */
+ DICT1, /* one dictionary check byte to go */
+ DICT0, /* waiting for inflateSetDictionary */
+ BLOCKS, /* decompressing blocks */
+ CHECK4, /* four check bytes to go */
+ CHECK3, /* three check bytes to go */
+ CHECK2, /* two check bytes to go */
+ CHECK1, /* one check byte to go */
+ DONE, /* finished check, done */
+ BAD} /* got an error--stay here */
+ mode; /* current inflate mode */
+
+ /* mode dependent information */
+ union {
+ uInt method; /* if FLAGS, method byte */
+ struct {
+ uLong was; /* computed check value */
+ uLong need; /* stream check value */
+ } check; /* if CHECK, check values to compare */
+ uInt marker; /* if BAD, inflateSync's marker bytes count */
+ } sub; /* submode */
+
+ /* mode independent information */
+ int nowrap; /* flag for no wrapper */
+ uInt wbits; /* log2(window size) (8..15, defaults to 15) */
+ inflate_blocks_statef
+ *blocks; /* current inflate_blocks state */
+
+};
+
+
+int inflateReset(z)
+z_streamp z;
+{
+ uLong c;
+
+ if (z == Z_NULL || z->state == Z_NULL)
+ return Z_STREAM_ERROR;
+ z->total_in = z->total_out = 0;
+ z->msg = Z_NULL;
+ z->state->mode = z->state->nowrap ? BLOCKS : METHOD;
+ inflate_blocks_reset(z->state->blocks, z, &c);
+ Trace((stderr, "inflate: reset\n"));
+ return Z_OK;
+}
+
+
+int inflateEnd(z)
+z_streamp z;
+{
+ uLong c;
+
+ if (z == Z_NULL || z->state == Z_NULL || z->zfree == Z_NULL)
+ return Z_STREAM_ERROR;
+ if (z->state->blocks != Z_NULL)
+ inflate_blocks_free(z->state->blocks, z, &c);
+ ZFREE(z, z->state);
+ z->state = Z_NULL;
+ Trace((stderr, "inflate: end\n"));
+ return Z_OK;
+}
+
+
+int inflateInit2_(z, w, version, stream_size)
+z_streamp z;
+int w;
+const char *version;
+int stream_size;
+{
+ if (version == Z_NULL || version[0] != ZLIB_VERSION[0] ||
+ stream_size != sizeof(z_stream))
+ return Z_VERSION_ERROR;
+
+ /* initialize state */
+ if (z == Z_NULL)
+ return Z_STREAM_ERROR;
+ z->msg = Z_NULL;
+#ifndef NO_ZCFUNCS
+ if (z->zalloc == Z_NULL)
+ {
+ z->zalloc = zcalloc;
+ z->opaque = (voidpf)0;
+ }
+ if (z->zfree == Z_NULL) z->zfree = zcfree;
+#endif
+ if ((z->state = (struct internal_state FAR *)
+ ZALLOC(z,1,sizeof(struct internal_state))) == Z_NULL)
+ return Z_MEM_ERROR;
+ z->state->blocks = Z_NULL;
+
+ /* handle undocumented nowrap option (no zlib header or check) */
+ z->state->nowrap = 0;
+ if (w < 0)
+ {
+ w = - w;
+ z->state->nowrap = 1;
+ }
+
+ /* set window size */
+ if (w < 8 || w > 15)
+ {
+ inflateEnd(z);
+ return Z_STREAM_ERROR;
+ }
+ z->state->wbits = (uInt)w;
+
+ /* create inflate_blocks state */
+ if ((z->state->blocks =
+ inflate_blocks_new(z, z->state->nowrap ? Z_NULL : adler32, (uInt)1 << w))
+ == Z_NULL)
+ {
+ inflateEnd(z);
+ return Z_MEM_ERROR;
+ }
+ Trace((stderr, "inflate: allocated\n"));
+
+ /* reset state */
+ inflateReset(z);
+ return Z_OK;
+}
+
+
+int inflateInit_(z, version, stream_size)
+z_streamp z;
+const char *version;
+int stream_size;
+{
+ return inflateInit2_(z, DEF_WBITS, version, stream_size);
+}
+
+
+#define NEEDBYTE {if(z->avail_in==0)goto empty;r=Z_OK;}
+#define NEXTBYTE (z->avail_in--,z->total_in++,*z->next_in++)
+
+int inflate(z, f)
+z_streamp z;
+int f;
+{
+ int r;
+ uInt b;
+
+ if (z == Z_NULL || z->state == Z_NULL || z->next_in == Z_NULL || f < 0)
+ return Z_STREAM_ERROR;
+ r = Z_BUF_ERROR;
+ while (1) switch (z->state->mode)
+ {
+ case METHOD:
+ NEEDBYTE
+ if (((z->state->sub.method = NEXTBYTE) & 0xf) != Z_DEFLATED)
+ {
+ z->state->mode = BAD;
+ z->msg = (char*)"unknown compression method";
+ z->state->sub.marker = 5; /* can't try inflateSync */
+ break;
+ }
+ if ((z->state->sub.method >> 4) + 8 > z->state->wbits)
+ {
+ z->state->mode = BAD;
+ z->msg = (char*)"invalid window size";
+ z->state->sub.marker = 5; /* can't try inflateSync */
+ break;
+ }
+ z->state->mode = FLAG;
+ case FLAG:
+ NEEDBYTE
+ b = NEXTBYTE;
+ if (((z->state->sub.method << 8) + b) % 31)
+ {
+ z->state->mode = BAD;
+ z->msg = (char*)"incorrect header check";
+ z->state->sub.marker = 5; /* can't try inflateSync */
+ break;
+ }
+ Trace((stderr, "inflate: zlib header ok\n"));
+ if (!(b & PRESET_DICT))
+ {
+ z->state->mode = BLOCKS;
+ break;
+ }
+ z->state->mode = DICT4;
+ case DICT4:
+ NEEDBYTE
+ z->state->sub.check.need = (uLong)NEXTBYTE << 24;
+ z->state->mode = DICT3;
+ case DICT3:
+ NEEDBYTE
+ z->state->sub.check.need += (uLong)NEXTBYTE << 16;
+ z->state->mode = DICT2;
+ case DICT2:
+ NEEDBYTE
+ z->state->sub.check.need += (uLong)NEXTBYTE << 8;
+ z->state->mode = DICT1;
+ case DICT1:
+ NEEDBYTE
+ z->state->sub.check.need += (uLong)NEXTBYTE;
+ z->adler = z->state->sub.check.need;
+ z->state->mode = DICT0;
+ return Z_NEED_DICT;
+ case DICT0:
+ z->state->mode = BAD;
+ z->msg = (char*)"need dictionary";
+ z->state->sub.marker = 0; /* can try inflateSync */
+ return Z_STREAM_ERROR;
+ case BLOCKS:
+ r = inflate_blocks(z->state->blocks, z, r);
+ if (f == Z_PACKET_FLUSH && z->avail_in == 0 && z->avail_out != 0)
+ r = inflate_packet_flush(z->state->blocks);
+ if (r == Z_DATA_ERROR)
+ {
+ z->state->mode = BAD;
+ z->state->sub.marker = 0; /* can try inflateSync */
+ break;
+ }
+ if (r != Z_STREAM_END)
+ return r;
+ r = Z_OK;
+ inflate_blocks_reset(z->state->blocks, z, &z->state->sub.check.was);
+ if (z->state->nowrap)
+ {
+ z->state->mode = DONE;
+ break;
+ }
+ z->state->mode = CHECK4;
+ case CHECK4:
+ NEEDBYTE
+ z->state->sub.check.need = (uLong)NEXTBYTE << 24;
+ z->state->mode = CHECK3;
+ case CHECK3:
+ NEEDBYTE
+ z->state->sub.check.need += (uLong)NEXTBYTE << 16;
+ z->state->mode = CHECK2;
+ case CHECK2:
+ NEEDBYTE
+ z->state->sub.check.need += (uLong)NEXTBYTE << 8;
+ z->state->mode = CHECK1;
+ case CHECK1:
+ NEEDBYTE
+ z->state->sub.check.need += (uLong)NEXTBYTE;
+
+ if (z->state->sub.check.was != z->state->sub.check.need)
+ {
+ z->state->mode = BAD;
+ z->msg = (char*)"incorrect data check";
+ z->state->sub.marker = 5; /* can't try inflateSync */
+ break;
+ }
+ Trace((stderr, "inflate: zlib check ok\n"));
+ z->state->mode = DONE;
+ case DONE:
+ return Z_STREAM_END;
+ case BAD:
+ return Z_DATA_ERROR;
+ default:
+ return Z_STREAM_ERROR;
+ }
+
+ empty:
+ if (f != Z_PACKET_FLUSH)
+ return r;
+ z->state->mode = BAD;
+ z->msg = (char *)"need more for packet flush";
+ z->state->sub.marker = 0; /* can try inflateSync */
+ return Z_DATA_ERROR;
+}
+
+
+int inflateSetDictionary(z, dictionary, dictLength)
+z_streamp z;
+const Bytef *dictionary;
+uInt dictLength;
+{
+ uInt length = dictLength;
+
+ if (z == Z_NULL || z->state == Z_NULL || z->state->mode != DICT0)
+ return Z_STREAM_ERROR;
+
+ if (adler32(1L, dictionary, dictLength) != z->adler) return Z_DATA_ERROR;
+ z->adler = 1L;
+
+ if (length >= ((uInt)1<<z->state->wbits))
+ {
+ length = (1<<z->state->wbits)-1;
+ dictionary += dictLength - length;
+ }
+ inflate_set_dictionary(z->state->blocks, dictionary, length);
+ z->state->mode = BLOCKS;
+ return Z_OK;
+}
+
+/*
+ * This subroutine adds the data at next_in/avail_in to the output history
+ * without performing any output. The output buffer must be "caught up";
+ * i.e. no pending output (hence s->read equals s->write), and the state must
+ * be BLOCKS (i.e. we should be willing to see the start of a series of
+ * BLOCKS). On exit, the output will also be caught up, and the checksum
+ * will have been updated if need be.
+ */
+
+int inflateIncomp(z)
+z_stream *z;
+{
+ if (z->state->mode != BLOCKS)
+ return Z_DATA_ERROR;
+ return inflate_addhistory(z->state->blocks, z);
+}
+
+
+int inflateSync(z)
+z_streamp z;
+{
+ uInt n; /* number of bytes to look at */
+ Bytef *p; /* pointer to bytes */
+ uInt m; /* number of marker bytes found in a row */
+ uLong r, w; /* temporaries to save total_in and total_out */
+
+ /* set up */
+ if (z == Z_NULL || z->state == Z_NULL)
+ return Z_STREAM_ERROR;
+ if (z->state->mode != BAD)
+ {
+ z->state->mode = BAD;
+ z->state->sub.marker = 0;
+ }
+ if ((n = z->avail_in) == 0)
+ return Z_BUF_ERROR;
+ p = z->next_in;
+ m = z->state->sub.marker;
+
+ /* search */
+ while (n && m < 4)
+ {
+ if (*p == (Byte)(m < 2 ? 0 : 0xff))
+ m++;
+ else if (*p)
+ m = 0;
+ else
+ m = 4 - m;
+ p++, n--;
+ }
+
+ /* restore */
+ z->total_in += p - z->next_in;
+ z->next_in = p;
+ z->avail_in = n;
+ z->state->sub.marker = m;
+
+ /* return no joy or set up to restart on a new block */
+ if (m != 4)
+ return Z_DATA_ERROR;
+ r = z->total_in; w = z->total_out;
+ inflateReset(z);
+ z->total_in = r; z->total_out = w;
+ z->state->mode = BLOCKS;
+ return Z_OK;
+}
+
+#undef NEEDBYTE
+#undef NEXTBYTE
+/* --- inflate.c */
+
+/* +++ infblock.c */
+/* infblock.c -- interpret and process block types to last block
+ * Copyright (C) 1995-1996 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* #include <freebsd/local/zutil.h> */
+/* #include <freebsd/local/infblock.h> */
+
+/* +++ inftrees.h */
+/* inftrees.h -- header to use inftrees.c
+ * Copyright (C) 1995-1996 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+ part of the implementation of the compression library and is
+ subject to change. Applications should only use zlib.h.
+ */
+
+/* Huffman code lookup table entry--this entry is four bytes for machines
+ that have 16-bit pointers (e.g. PC's in the small or medium model). */
+
+typedef struct inflate_huft_s FAR inflate_huft;
+
+struct inflate_huft_s {
+ union {
+ struct {
+ Byte Exop; /* number of extra bits or operation */
+ Byte Bits; /* number of bits in this code or subcode */
+ } what;
+ Bytef *pad; /* pad structure to a power of 2 (4 bytes for */
+ } word; /* 16-bit, 8 bytes for 32-bit machines) */
+ union {
+ uInt Base; /* literal, length base, or distance base */
+ inflate_huft *Next; /* pointer to next level of table */
+ } more;
+};
+
+#ifdef DEBUG_ZLIB
+ extern uInt inflate_hufts;
+#endif
+
+extern int inflate_trees_bits OF((
+ uIntf *, /* 19 code lengths */
+ uIntf *, /* bits tree desired/actual depth */
+ inflate_huft * FAR *, /* bits tree result */
+ z_streamp )); /* for zalloc, zfree functions */
+
+extern int inflate_trees_dynamic OF((
+ uInt, /* number of literal/length codes */
+ uInt, /* number of distance codes */
+ uIntf *, /* that many (total) code lengths */
+ uIntf *, /* literal desired/actual bit depth */
+ uIntf *, /* distance desired/actual bit depth */
+ inflate_huft * FAR *, /* literal/length tree result */
+ inflate_huft * FAR *, /* distance tree result */
+ z_streamp )); /* for zalloc, zfree functions */
+
+extern int inflate_trees_fixed OF((
+ uIntf *, /* literal desired/actual bit depth */
+ uIntf *, /* distance desired/actual bit depth */
+ inflate_huft * FAR *, /* literal/length tree result */
+ inflate_huft * FAR *)); /* distance tree result */
+
+extern int inflate_trees_free OF((
+ inflate_huft *, /* tables to free */
+ z_streamp )); /* for zfree function */
+
+/* --- inftrees.h */
+
+/* +++ infcodes.h */
+/* infcodes.h -- header to use infcodes.c
+ * Copyright (C) 1995-1996 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+ part of the implementation of the compression library and is
+ subject to change. Applications should only use zlib.h.
+ */
+
+struct inflate_codes_state;
+typedef struct inflate_codes_state FAR inflate_codes_statef;
+
+extern inflate_codes_statef *inflate_codes_new OF((
+ uInt, uInt,
+ inflate_huft *, inflate_huft *,
+ z_streamp ));
+
+extern int inflate_codes OF((
+ inflate_blocks_statef *,
+ z_streamp ,
+ int));
+
+extern void inflate_codes_free OF((
+ inflate_codes_statef *,
+ z_streamp ));
+
+/* --- infcodes.h */
+
+/* +++ infutil.h */
+/* infutil.h -- types and macros common to blocks and codes
+ * Copyright (C) 1995-1996 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+ part of the implementation of the compression library and is
+ subject to change. Applications should only use zlib.h.
+ */
+
+#ifndef _INFUTIL_H
+#define _INFUTIL_H
+
+typedef enum {
+ TYPE, /* get type bits (3, including end bit) */
+ LENS, /* get lengths for stored */
+ STORED, /* processing stored block */
+ TABLE, /* get table lengths */
+ BTREE, /* get bit lengths tree for a dynamic block */
+ DTREE, /* get length, distance trees for a dynamic block */
+ CODES, /* processing fixed or dynamic block */
+ DRY, /* output remaining window bytes */
+ DONEB, /* finished last block, done */
+ BADB} /* got a data error--stuck here */
+inflate_block_mode;
+
+/* inflate blocks semi-private state */
+struct inflate_blocks_state {
+
+ /* mode */
+ inflate_block_mode mode; /* current inflate_block mode */
+
+ /* mode dependent information */
+ union {
+ uInt left; /* if STORED, bytes left to copy */
+ struct {
+ uInt table; /* table lengths (14 bits) */
+ uInt index; /* index into blens (or border) */
+ uIntf *blens; /* bit lengths of codes */
+ uInt bb; /* bit length tree depth */
+ inflate_huft *tb; /* bit length decoding tree */
+ } trees; /* if DTREE, decoding info for trees */
+ struct {
+ inflate_huft *tl;
+ inflate_huft *td; /* trees to free */
+ inflate_codes_statef
+ *codes;
+ } decode; /* if CODES, current state */
+ } sub; /* submode */
+ uInt last; /* true if this block is the last block */
+
+ /* mode independent information */
+ uInt bitk; /* bits in bit buffer */
+ uLong bitb; /* bit buffer */
+ Bytef *window; /* sliding window */
+ Bytef *end; /* one byte after sliding window */
+ Bytef *read; /* window read pointer */
+ Bytef *write; /* window write pointer */
+ check_func checkfn; /* check function */
+ uLong check; /* check on output */
+
+};
+
+
+/* defines for inflate input/output */
+/* update pointers and return */
+#define UPDBITS {s->bitb=b;s->bitk=k;}
+#define UPDIN {z->avail_in=n;z->total_in+=p-z->next_in;z->next_in=p;}
+#define UPDOUT {s->write=q;}
+#define UPDATE {UPDBITS UPDIN UPDOUT}
+#define LEAVE {UPDATE return inflate_flush(s,z,r);}
+/* get bytes and bits */
+#define LOADIN {p=z->next_in;n=z->avail_in;b=s->bitb;k=s->bitk;}
+#define NEEDBYTE {if(n)r=Z_OK;else LEAVE}
+#define NEXTBYTE (n--,*p++)
+#define NEEDBITS(j) {while(k<(j)){NEEDBYTE;b|=((uLong)NEXTBYTE)<<k;k+=8;}}
+#define DUMPBITS(j) {b>>=(j);k-=(j);}
+/* output bytes */
+#define WAVAIL (uInt)(q<s->read?s->read-q-1:s->end-q)
+#define LOADOUT {q=s->write;m=(uInt)WAVAIL;}
+#define WWRAP {if(q==s->end&&s->read!=s->window){q=s->window;m=(uInt)WAVAIL;}}
+#define FLUSH {UPDOUT r=inflate_flush(s,z,r); LOADOUT}
+#define NEEDOUT {if(m==0){WWRAP if(m==0){FLUSH WWRAP if(m==0) LEAVE}}r=Z_OK;}
+#define OUTBYTE(a) {*q++=(Byte)(a);m--;}
+/* load local pointers */
+#define LOAD {LOADIN LOADOUT}
+
+/* masks for lower bits (size given to avoid silly warnings with Visual C++) */
+extern uInt inflate_mask[17];
+
+/* copy as much as possible from the sliding window to the output area */
+extern int inflate_flush OF((
+ inflate_blocks_statef *,
+ z_streamp ,
+ int));
+
+#ifndef NO_DUMMY_DECL
+struct internal_state {int dummy;}; /* for buggy compilers */
+#endif
+
+#endif
+/* --- infutil.h */
+
+#ifndef NO_DUMMY_DECL
+struct inflate_codes_state {int dummy;}; /* for buggy compilers */
+#endif
+
+/* Table for deflate from PKZIP's appnote.txt. */
+local const uInt border[] = { /* Order of the bit length code lengths */
+ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+
+/*
+ Notes beyond the 1.93a appnote.txt:
+
+ 1. Distance pointers never point before the beginning of the output
+ stream.
+ 2. Distance pointers can point back across blocks, up to 32k away.
+ 3. There is an implied maximum of 7 bits for the bit length table and
+ 15 bits for the actual data.
+ 4. If only one code exists, then it is encoded using one bit. (Zero
+ would be more efficient, but perhaps a little confusing.) If two
+ codes exist, they are coded using one bit each (0 and 1).
+ 5. There is no way of sending zero distance codes--a dummy must be
+ sent if there are none. (History: a pre 2.0 version of PKZIP would
+ store blocks with no distance codes, but this was discovered to be
+ too harsh a criterion.) Valid only for 1.93a. 2.04c does allow
+ zero distance codes, which is sent as one code of zero bits in
+ length.
+ 6. There are up to 286 literal/length codes. Code 256 represents the
+ end-of-block. Note however that the static length tree defines
+ 288 codes just to fill out the Huffman codes. Codes 286 and 287
+ cannot be used though, since there is no length base or extra bits
+ defined for them. Similarily, there are up to 30 distance codes.
+ However, static trees define 32 codes (all 5 bits) to fill out the
+ Huffman codes, but the last two had better not show up in the data.
+ 7. Unzip can check dynamic Huffman blocks for complete code sets.
+ The exception is that a single code would not be complete (see #4).
+ 8. The five bits following the block type is really the number of
+ literal codes sent minus 257.
+ 9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits
+ (1+6+6). Therefore, to output three times the length, you output
+ three codes (1+1+1), whereas to output four times the same length,
+ you only need two codes (1+3). Hmm.
+ 10. In the tree reconstruction algorithm, Code = Code + Increment
+ only if BitLength(i) is not zero. (Pretty obvious.)
+ 11. Correction: 4 Bits: # of Bit Length codes - 4 (4 - 19)
+ 12. Note: length code 284 can represent 227-258, but length code 285
+ really is 258. The last length deserves its own, short code
+ since it gets used a lot in very redundant files. The length
+ 258 is special since 258 - 3 (the min match length) is 255.
+ 13. The literal/length and distance code bit lengths are read as a
+ single stream of lengths. It is possible (and advantageous) for
+ a repeat code (16, 17, or 18) to go across the boundary between
+ the two sets of lengths.
+ */
+
+
+void inflate_blocks_reset(s, z, c)
+inflate_blocks_statef *s;
+z_streamp z;
+uLongf *c;
+{
+ if (s->checkfn != Z_NULL)
+ *c = s->check;
+ if (s->mode == BTREE || s->mode == DTREE)
+ ZFREE(z, s->sub.trees.blens);
+ if (s->mode == CODES)
+ {
+ inflate_codes_free(s->sub.decode.codes, z);
+ inflate_trees_free(s->sub.decode.td, z);
+ inflate_trees_free(s->sub.decode.tl, z);
+ }
+ s->mode = TYPE;
+ s->bitk = 0;
+ s->bitb = 0;
+ s->read = s->write = s->window;
+ if (s->checkfn != Z_NULL)
+ z->adler = s->check = (*s->checkfn)(0L, Z_NULL, 0);
+ Trace((stderr, "inflate: blocks reset\n"));
+}
+
+
+inflate_blocks_statef *inflate_blocks_new(z, c, w)
+z_streamp z;
+check_func c;
+uInt w;
+{
+ inflate_blocks_statef *s;
+
+ if ((s = (inflate_blocks_statef *)ZALLOC
+ (z,1,sizeof(struct inflate_blocks_state))) == Z_NULL)
+ return s;
+ if ((s->window = (Bytef *)ZALLOC(z, 1, w)) == Z_NULL)
+ {
+ ZFREE(z, s);
+ return Z_NULL;
+ }
+ s->end = s->window + w;
+ s->checkfn = c;
+ s->mode = TYPE;
+ Trace((stderr, "inflate: blocks allocated\n"));
+ inflate_blocks_reset(s, z, &s->check);
+ return s;
+}
+
+
+#ifdef DEBUG_ZLIB
+ extern uInt inflate_hufts;
+#endif
+int inflate_blocks(s, z, r)
+inflate_blocks_statef *s;
+z_streamp z;
+int r;
+{
+ uInt t; /* temporary storage */
+ uLong b; /* bit buffer */
+ uInt k; /* bits in bit buffer */
+ Bytef *p; /* input data pointer */
+ uInt n; /* bytes available there */
+ Bytef *q; /* output window write pointer */
+ uInt m; /* bytes to end of window or read pointer */
+
+ /* copy input/output information to locals (UPDATE macro restores) */
+ LOAD
+
+ /* process input based on current state */
+ while (1) switch (s->mode)
+ {
+ case TYPE:
+ NEEDBITS(3)
+ t = (uInt)b & 7;
+ s->last = t & 1;
+ switch (t >> 1)
+ {
+ case 0: /* stored */
+ Trace((stderr, "inflate: stored block%s\n",
+ s->last ? " (last)" : ""));
+ DUMPBITS(3)
+ t = k & 7; /* go to byte boundary */
+ DUMPBITS(t)
+ s->mode = LENS; /* get length of stored block */
+ break;
+ case 1: /* fixed */
+ Trace((stderr, "inflate: fixed codes block%s\n",
+ s->last ? " (last)" : ""));
+ {
+ uInt bl, bd;
+ inflate_huft *tl, *td;
+
+ inflate_trees_fixed(&bl, &bd, &tl, &td);
+ s->sub.decode.codes = inflate_codes_new(bl, bd, tl, td, z);
+ if (s->sub.decode.codes == Z_NULL)
+ {
+ r = Z_MEM_ERROR;
+ LEAVE
+ }
+ s->sub.decode.tl = Z_NULL; /* don't try to free these */
+ s->sub.decode.td = Z_NULL;
+ }
+ DUMPBITS(3)
+ s->mode = CODES;
+ break;
+ case 2: /* dynamic */
+ Trace((stderr, "inflate: dynamic codes block%s\n",
+ s->last ? " (last)" : ""));
+ DUMPBITS(3)
+ s->mode = TABLE;
+ break;
+ case 3: /* illegal */
+ DUMPBITS(3)
+ s->mode = BADB;
+ z->msg = (char*)"invalid block type";
+ r = Z_DATA_ERROR;
+ LEAVE
+ }
+ break;
+ case LENS:
+ NEEDBITS(32)
+ if ((((~b) >> 16) & 0xffff) != (b & 0xffff))
+ {
+ s->mode = BADB;
+ z->msg = (char*)"invalid stored block lengths";
+ r = Z_DATA_ERROR;
+ LEAVE
+ }
+ s->sub.left = (uInt)b & 0xffff;
+ b = k = 0; /* dump bits */
+ Tracev((stderr, "inflate: stored length %u\n", s->sub.left));
+ s->mode = s->sub.left ? STORED : (s->last ? DRY : TYPE);
+ break;
+ case STORED:
+ if (n == 0)
+ LEAVE
+ NEEDOUT
+ t = s->sub.left;
+ if (t > n) t = n;
+ if (t > m) t = m;
+ zmemcpy(q, p, t);
+ p += t; n -= t;
+ q += t; m -= t;
+ if ((s->sub.left -= t) != 0)
+ break;
+ Tracev((stderr, "inflate: stored end, %lu total out\n",
+ z->total_out + (q >= s->read ? q - s->read :
+ (s->end - s->read) + (q - s->window))));
+ s->mode = s->last ? DRY : TYPE;
+ break;
+ case TABLE:
+ NEEDBITS(14)
+ s->sub.trees.table = t = (uInt)b & 0x3fff;
+#ifndef PKZIP_BUG_WORKAROUND
+ if ((t & 0x1f) > 29 || ((t >> 5) & 0x1f) > 29)
+ {
+ s->mode = BADB;
+ z->msg = (char*)"too many length or distance symbols";
+ r = Z_DATA_ERROR;
+ LEAVE
+ }
+#endif
+ t = 258 + (t & 0x1f) + ((t >> 5) & 0x1f);
+ if (t < 19)
+ t = 19;
+ if ((s->sub.trees.blens = (uIntf*)ZALLOC(z, t, sizeof(uInt))) == Z_NULL)
+ {
+ r = Z_MEM_ERROR;
+ LEAVE
+ }
+ DUMPBITS(14)
+ s->sub.trees.index = 0;
+ Tracev((stderr, "inflate: table sizes ok\n"));
+ s->mode = BTREE;
+ case BTREE:
+ while (s->sub.trees.index < 4 + (s->sub.trees.table >> 10))
+ {
+ NEEDBITS(3)
+ s->sub.trees.blens[border[s->sub.trees.index++]] = (uInt)b & 7;
+ DUMPBITS(3)
+ }
+ while (s->sub.trees.index < 19)
+ s->sub.trees.blens[border[s->sub.trees.index++]] = 0;
+ s->sub.trees.bb = 7;
+ t = inflate_trees_bits(s->sub.trees.blens, &s->sub.trees.bb,
+ &s->sub.trees.tb, z);
+ if (t != Z_OK)
+ {
+ r = t;
+ if (r == Z_DATA_ERROR) {
+ ZFREE(z, s->sub.trees.blens);
+ s->mode = BADB;
+ }
+ LEAVE
+ }
+ s->sub.trees.index = 0;
+ Tracev((stderr, "inflate: bits tree ok\n"));
+ s->mode = DTREE;
+ case DTREE:
+ while (t = s->sub.trees.table,
+ s->sub.trees.index < 258 + (t & 0x1f) + ((t >> 5) & 0x1f))
+ {
+ inflate_huft *h;
+ uInt i, j, c;
+
+ t = s->sub.trees.bb;
+ NEEDBITS(t)
+ h = s->sub.trees.tb + ((uInt)b & inflate_mask[t]);
+ t = h->word.what.Bits;
+ c = h->more.Base;
+ if (c < 16)
+ {
+ DUMPBITS(t)
+ s->sub.trees.blens[s->sub.trees.index++] = c;
+ }
+ else /* c == 16..18 */
+ {
+ i = c == 18 ? 7 : c - 14;
+ j = c == 18 ? 11 : 3;
+ NEEDBITS(t + i)
+ DUMPBITS(t)
+ j += (uInt)b & inflate_mask[i];
+ DUMPBITS(i)
+ i = s->sub.trees.index;
+ t = s->sub.trees.table;
+ if (i + j > 258 + (t & 0x1f) + ((t >> 5) & 0x1f) ||
+ (c == 16 && i < 1))
+ {
+ inflate_trees_free(s->sub.trees.tb, z);
+ ZFREE(z, s->sub.trees.blens);
+ s->mode = BADB;
+ z->msg = (char*)"invalid bit length repeat";
+ r = Z_DATA_ERROR;
+ LEAVE
+ }
+ c = c == 16 ? s->sub.trees.blens[i - 1] : 0;
+ do {
+ s->sub.trees.blens[i++] = c;
+ } while (--j);
+ s->sub.trees.index = i;
+ }
+ }
+ inflate_trees_free(s->sub.trees.tb, z);
+ s->sub.trees.tb = Z_NULL;
+ {
+ uInt bl, bd;
+ inflate_huft *tl, *td;
+ inflate_codes_statef *c;
+
+ bl = 9; /* must be <= 9 for lookahead assumptions */
+ bd = 6; /* must be <= 9 for lookahead assumptions */
+ t = s->sub.trees.table;
+#ifdef DEBUG_ZLIB
+ inflate_hufts = 0;
+#endif
+ t = inflate_trees_dynamic(257 + (t & 0x1f), 1 + ((t >> 5) & 0x1f),
+ s->sub.trees.blens, &bl, &bd, &tl, &td, z);
+ if (t != Z_OK)
+ {
+ if (t == (uInt)Z_DATA_ERROR) {
+ ZFREE(z, s->sub.trees.blens);
+ s->mode = BADB;
+ }
+ r = t;
+ LEAVE
+ }
+ Tracev((stderr, "inflate: trees ok, %d * %d bytes used\n",
+ inflate_hufts, sizeof(inflate_huft)));
+ if ((c = inflate_codes_new(bl, bd, tl, td, z)) == Z_NULL)
+ {
+ inflate_trees_free(td, z);
+ inflate_trees_free(tl, z);
+ r = Z_MEM_ERROR;
+ LEAVE
+ }
+ /*
+ * this ZFREE must occur *BEFORE* we mess with sub.decode, because
+ * sub.trees is union'd with sub.decode.
+ */
+ ZFREE(z, s->sub.trees.blens);
+ s->sub.decode.codes = c;
+ s->sub.decode.tl = tl;
+ s->sub.decode.td = td;
+ }
+ s->mode = CODES;
+ case CODES:
+ UPDATE
+ if ((r = inflate_codes(s, z, r)) != Z_STREAM_END)
+ return inflate_flush(s, z, r);
+ r = Z_OK;
+ inflate_codes_free(s->sub.decode.codes, z);
+ inflate_trees_free(s->sub.decode.td, z);
+ inflate_trees_free(s->sub.decode.tl, z);
+ LOAD
+ Tracev((stderr, "inflate: codes end, %lu total out\n",
+ z->total_out + (q >= s->read ? q - s->read :
+ (s->end - s->read) + (q - s->window))));
+ if (!s->last)
+ {
+ s->mode = TYPE;
+ break;
+ }
+ if (k > 7) /* return unused byte, if any */
+ {
+ Assert(k < 16, "inflate_codes grabbed too many bytes")
+ k -= 8;
+ n++;
+ p--; /* can always return one */
+ }
+ s->mode = DRY;
+ case DRY:
+ FLUSH
+ if (s->read != s->write)
+ LEAVE
+ s->mode = DONEB;
+ case DONEB:
+ r = Z_STREAM_END;
+ LEAVE
+ case BADB:
+ r = Z_DATA_ERROR;
+ LEAVE
+ default:
+ r = Z_STREAM_ERROR;
+ LEAVE
+ }
+}
+
+
+int inflate_blocks_free(s, z, c)
+inflate_blocks_statef *s;
+z_streamp z;
+uLongf *c;
+{
+ inflate_blocks_reset(s, z, c);
+ ZFREE(z, s->window);
+ ZFREE(z, s);
+ Trace((stderr, "inflate: blocks freed\n"));
+ return Z_OK;
+}
+
+
+void inflate_set_dictionary(s, d, n)
+inflate_blocks_statef *s;
+const Bytef *d;
+uInt n;
+{
+ zmemcpy((charf *)s->window, d, n);
+ s->read = s->write = s->window + n;
+}
+
+/*
+ * This subroutine adds the data at next_in/avail_in to the output history
+ * without performing any output. The output buffer must be "caught up";
+ * i.e. no pending output (hence s->read equals s->write), and the state must
+ * be BLOCKS (i.e. we should be willing to see the start of a series of
+ * BLOCKS). On exit, the output will also be caught up, and the checksum
+ * will have been updated if need be.
+ */
+int inflate_addhistory(s, z)
+inflate_blocks_statef *s;
+z_stream *z;
+{
+ uLong b; /* bit buffer */ /* NOT USED HERE */
+ uInt k; /* bits in bit buffer */ /* NOT USED HERE */
+ uInt t; /* temporary storage */
+ Bytef *p; /* input data pointer */
+ uInt n; /* bytes available there */
+ Bytef *q; /* output window write pointer */
+ uInt m; /* bytes to end of window or read pointer */
+
+ if (s->read != s->write)
+ return Z_STREAM_ERROR;
+ if (s->mode != TYPE)
+ return Z_DATA_ERROR;
+
+ /* we're ready to rock */
+ LOAD
+ /* while there is input ready, copy to output buffer, moving
+ * pointers as needed.
+ */
+ while (n) {
+ t = n; /* how many to do */
+ /* is there room until end of buffer? */
+ if (t > m) t = m;
+ /* update check information */
+ if (s->checkfn != Z_NULL)
+ s->check = (*s->checkfn)(s->check, q, t);
+ zmemcpy(q, p, t);
+ q += t;
+ p += t;
+ n -= t;
+ z->total_out += t;
+ s->read = q; /* drag read pointer forward */
+/* WWRAP */ /* expand WWRAP macro by hand to handle s->read */
+ if (q == s->end) {
+ s->read = q = s->window;
+ m = WAVAIL;
+ }
+ }
+ UPDATE
+ return Z_OK;
+}
+
+
+/*
+ * At the end of a Deflate-compressed PPP packet, we expect to have seen
+ * a `stored' block type value but not the (zero) length bytes.
+ */
+int inflate_packet_flush(s)
+ inflate_blocks_statef *s;
+{
+ if (s->mode != LENS)
+ return Z_DATA_ERROR;
+ s->mode = TYPE;
+ return Z_OK;
+}
+/* --- infblock.c */
+
+/* +++ inftrees.c */
+/* inftrees.c -- generate Huffman trees for efficient decoding
+ * Copyright (C) 1995-1996 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* #include <freebsd/local/zutil.h> */
+/* #include <freebsd/local/inftrees.h> */
+
+char inflate_copyright[] = " inflate 1.0.4 Copyright 1995-1996 Mark Adler ";
+/*
+ If you use the zlib library in a product, an acknowledgment is welcome
+ in the documentation of your product. If for some reason you cannot
+ include such an acknowledgment, I would appreciate that you keep this
+ copyright string in the executable of your product.
+ */
+
+#ifndef NO_DUMMY_DECL
+struct internal_state {int dummy;}; /* for buggy compilers */
+#endif
+
+/* simplify the use of the inflate_huft type with some defines */
+#define base more.Base
+#define next more.Next
+#define exop word.what.Exop
+#define bits word.what.Bits
+
+
+local int huft_build OF((
+ uIntf *, /* code lengths in bits */
+ uInt, /* number of codes */
+ uInt, /* number of "simple" codes */
+ const uIntf *, /* list of base values for non-simple codes */
+ const uIntf *, /* list of extra bits for non-simple codes */
+ inflate_huft * FAR*,/* result: starting table */
+ uIntf *, /* maximum lookup bits (returns actual) */
+ z_streamp )); /* for zalloc function */
+
+local voidpf falloc OF((
+ voidpf, /* opaque pointer (not used) */
+ uInt, /* number of items */
+ uInt)); /* size of item */
+
+/* Tables for deflate from PKZIP's appnote.txt. */
+local const uInt cplens[31] = { /* Copy lengths for literal codes 257..285 */
+ 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
+ 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
+ /* see note #13 above about 258 */
+local const uInt cplext[31] = { /* Extra bits for literal codes 257..285 */
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
+ 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 112, 112}; /* 112==invalid */
+local const uInt cpdist[30] = { /* Copy offsets for distance codes 0..29 */
+ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
+ 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
+ 8193, 12289, 16385, 24577};
+local const uInt cpdext[30] = { /* Extra bits for distance codes */
+ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
+ 7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
+ 12, 12, 13, 13};
+
+/*
+ Huffman code decoding is performed using a multi-level table lookup.
+ The fastest way to decode is to simply build a lookup table whose
+ size is determined by the longest code. However, the time it takes
+ to build this table can also be a factor if the data being decoded
+ is not very long. The most common codes are necessarily the
+ shortest codes, so those codes dominate the decoding time, and hence
+ the speed. The idea is you can have a shorter table that decodes the
+ shorter, more probable codes, and then point to subsidiary tables for
+ the longer codes. The time it costs to decode the longer codes is
+ then traded against the time it takes to make longer tables.
+
+ This results of this trade are in the variables lbits and dbits
+ below. lbits is the number of bits the first level table for literal/
+ length codes can decode in one step, and dbits is the same thing for
+ the distance codes. Subsequent tables are also less than or equal to
+ those sizes. These values may be adjusted either when all of the
+ codes are shorter than that, in which case the longest code length in
+ bits is used, or when the shortest code is *longer* than the requested
+ table size, in which case the length of the shortest code in bits is
+ used.
+
+ There are two different values for the two tables, since they code a
+ different number of possibilities each. The literal/length table
+ codes 286 possible values, or in a flat code, a little over eight
+ bits. The distance table codes 30 possible values, or a little less
+ than five bits, flat. The optimum values for speed end up being
+ about one bit more than those, so lbits is 8+1 and dbits is 5+1.
+ The optimum values may differ though from machine to machine, and
+ possibly even between compilers. Your mileage may vary.
+ */
+
+
+/* If BMAX needs to be larger than 16, then h and x[] should be uLong. */
+#define BMAX 15 /* maximum bit length of any code */
+#define N_MAX 288 /* maximum number of codes in any set */
+
+#ifdef DEBUG_ZLIB
+ uInt inflate_hufts;
+#endif
+
+local int huft_build(b, n, s, d, e, t, m, zs)
+uIntf *b; /* code lengths in bits (all assumed <= BMAX) */
+uInt n; /* number of codes (assumed <= N_MAX) */
+uInt s; /* number of simple-valued codes (0..s-1) */
+const uIntf *d; /* list of base values for non-simple codes */
+const uIntf *e; /* list of extra bits for non-simple codes */
+inflate_huft * FAR *t; /* result: starting table */
+uIntf *m; /* maximum lookup bits, returns actual */
+z_streamp zs; /* for zalloc function */
+/* Given a list of code lengths and a maximum table size, make a set of
+ tables to decode that set of codes. Return Z_OK on success, Z_BUF_ERROR
+ if the given code set is incomplete (the tables are still built in this
+ case), Z_DATA_ERROR if the input is invalid (an over-subscribed set of
+ lengths), or Z_MEM_ERROR if not enough memory. */
+{
+
+ uInt a; /* counter for codes of length k */
+ uInt c[BMAX+1]; /* bit length count table */
+ uInt f; /* i repeats in table every f entries */
+ int g; /* maximum code length */
+ int h; /* table level */
+ register uInt i; /* counter, current code */
+ register uInt j; /* counter */
+ register int k; /* number of bits in current code */
+ int l; /* bits per table (returned in m) */
+ register uIntf *p; /* pointer into c[], b[], or v[] */
+ inflate_huft *q; /* points to current table */
+ struct inflate_huft_s r; /* table entry for structure assignment */
+ inflate_huft *u[BMAX]; /* table stack */
+ uInt v[N_MAX]; /* values in order of bit length */
+ register int w; /* bits before this table == (l * h) */
+ uInt x[BMAX+1]; /* bit offsets, then code stack */
+ uIntf *xp; /* pointer into x */
+ int y; /* number of dummy codes added */
+ uInt z; /* number of entries in current table */
+
+
+ /* Generate counts for each bit length */
+ p = c;
+#define C0 *p++ = 0;
+#define C2 C0 C0 C0 C0
+#define C4 C2 C2 C2 C2
+ C4 /* clear c[]--assume BMAX+1 is 16 */
+ p = b; i = n;
+ do {
+ c[*p++]++; /* assume all entries <= BMAX */
+ } while (--i);
+ if (c[0] == n) /* null input--all zero length codes */
+ {
+ *t = (inflate_huft *)Z_NULL;
+ *m = 0;
+ return Z_OK;
+ }
+
+
+ /* Find minimum and maximum length, bound *m by those */
+ l = *m;
+ for (j = 1; j <= BMAX; j++)
+ if (c[j])
+ break;
+ k = j; /* minimum code length */
+ if ((uInt)l < j)
+ l = j;
+ for (i = BMAX; i; i--)
+ if (c[i])
+ break;
+ g = i; /* maximum code length */
+ if ((uInt)l > i)
+ l = i;
+ *m = l;
+
+
+ /* Adjust last length count to fill out codes, if needed */
+ for (y = 1 << j; j < i; j++, y <<= 1)
+ if ((y -= c[j]) < 0)
+ return Z_DATA_ERROR;
+ if ((y -= c[i]) < 0)
+ return Z_DATA_ERROR;
+ c[i] += y;
+
+
+ /* Generate starting offsets into the value table for each length */
+ x[1] = j = 0;
+ p = c + 1; xp = x + 2;
+ while (--i) { /* note that i == g from above */
+ *xp++ = (j += *p++);
+ }
+
+
+ /* Make a table of values in order of bit lengths */
+ p = b; i = 0;
+ do {
+ if ((j = *p++) != 0)
+ v[x[j]++] = i;
+ } while (++i < n);
+ n = x[g]; /* set n to length of v */
+
+
+ /* Generate the Huffman codes and for each, make the table entries */
+ x[0] = i = 0; /* first Huffman code is zero */
+ p = v; /* grab values in bit order */
+ h = -1; /* no tables yet--level -1 */
+ w = -l; /* bits decoded == (l * h) */
+ u[0] = (inflate_huft *)Z_NULL; /* just to keep compilers happy */
+ q = (inflate_huft *)Z_NULL; /* ditto */
+ z = 0; /* ditto */
+
+ /* go through the bit lengths (k already is bits in shortest code) */
+ for (; k <= g; k++)
+ {
+ a = c[k];
+ while (a--)
+ {
+ /* here i is the Huffman code of length k bits for value *p */
+ /* make tables up to required level */
+ while (k > w + l)
+ {
+ h++;
+ w += l; /* previous table always l bits */
+
+ /* compute minimum size table less than or equal to l bits */
+ z = g - w;
+ z = z > (uInt)l ? l : z; /* table size upper limit */
+ if ((f = 1 << (j = k - w)) > a + 1) /* try a k-w bit table */
+ { /* too few codes for k-w bit table */
+ f -= a + 1; /* deduct codes from patterns left */
+ xp = c + k;
+ if (j < z)
+ while (++j < z) /* try smaller tables up to z bits */
+ {
+ if ((f <<= 1) <= *++xp)
+ break; /* enough codes to use up j bits */
+ f -= *xp; /* else deduct codes from patterns */
+ }
+ }
+ z = 1 << j; /* table entries for j-bit table */
+
+ /* allocate and link in new table */
+ if ((q = (inflate_huft *)ZALLOC
+ (zs,z + 1,sizeof(inflate_huft))) == Z_NULL)
+ {
+ if (h)
+ inflate_trees_free(u[0], zs);
+ return Z_MEM_ERROR; /* not enough memory */
+ }
+#ifdef DEBUG_ZLIB
+ inflate_hufts += z + 1;
+#endif
+ *t = q + 1; /* link to list for huft_free() */
+ *(t = &(q->next)) = Z_NULL;
+ u[h] = ++q; /* table starts after link */
+
+ /* connect to last table, if there is one */
+ if (h)
+ {
+ x[h] = i; /* save pattern for backing up */
+ r.bits = (Byte)l; /* bits to dump before this table */
+ r.exop = (Byte)j; /* bits in this table */
+ r.next = q; /* pointer to this table */
+ j = i >> (w - l); /* (get around Turbo C bug) */
+ u[h-1][j] = r; /* connect to last table */
+ }
+ }
+
+ /* set up table entry in r */
+ r.bits = (Byte)(k - w);
+ if (p >= v + n)
+ r.exop = 128 + 64; /* out of values--invalid code */
+ else if (*p < s)
+ {
+ r.exop = (Byte)(*p < 256 ? 0 : 32 + 64); /* 256 is end-of-block */
+ r.base = *p++; /* simple code is just the value */
+ }
+ else
+ {
+ r.exop = (Byte)(e[*p - s] + 16 + 64);/* non-simple--look up in lists */
+ r.base = d[*p++ - s];
+ }
+
+ /* fill code-like entries with r */
+ f = 1 << (k - w);
+ for (j = i >> w; j < z; j += f)
+ q[j] = r;
+
+ /* backwards increment the k-bit code i */
+ for (j = 1 << (k - 1); i & j; j >>= 1)
+ i ^= j;
+ i ^= j;
+
+ /* backup over finished tables */
+ while ((i & ((1 << w) - 1)) != x[h])
+ {
+ h--; /* don't need to update q */
+ w -= l;
+ }
+ }
+ }
+
+
+ /* Return Z_BUF_ERROR if we were given an incomplete table */
+ return y != 0 && g != 1 ? Z_BUF_ERROR : Z_OK;
+}
+
+
+int inflate_trees_bits(c, bb, tb, z)
+uIntf *c; /* 19 code lengths */
+uIntf *bb; /* bits tree desired/actual depth */
+inflate_huft * FAR *tb; /* bits tree result */
+z_streamp z; /* for zfree function */
+{
+ int r;
+
+ r = huft_build(c, 19, 19, (uIntf*)Z_NULL, (uIntf*)Z_NULL, tb, bb, z);
+ if (r == Z_DATA_ERROR)
+ z->msg = (char*)"oversubscribed dynamic bit lengths tree";
+ else if (r == Z_BUF_ERROR || *bb == 0)
+ {
+ inflate_trees_free(*tb, z);
+ z->msg = (char*)"incomplete dynamic bit lengths tree";
+ r = Z_DATA_ERROR;
+ }
+ return r;
+}
+
+
+int inflate_trees_dynamic(nl, nd, c, bl, bd, tl, td, z)
+uInt nl; /* number of literal/length codes */
+uInt nd; /* number of distance codes */
+uIntf *c; /* that many (total) code lengths */
+uIntf *bl; /* literal desired/actual bit depth */
+uIntf *bd; /* distance desired/actual bit depth */
+inflate_huft * FAR *tl; /* literal/length tree result */
+inflate_huft * FAR *td; /* distance tree result */
+z_streamp z; /* for zfree function */
+{
+ int r;
+
+ /* build literal/length tree */
+ r = huft_build(c, nl, 257, cplens, cplext, tl, bl, z);
+ if (r != Z_OK || *bl == 0)
+ {
+ if (r == Z_DATA_ERROR)
+ z->msg = (char*)"oversubscribed literal/length tree";
+ else if (r != Z_MEM_ERROR)
+ {
+ inflate_trees_free(*tl, z);
+ z->msg = (char*)"incomplete literal/length tree";
+ r = Z_DATA_ERROR;
+ }
+ return r;
+ }
+
+ /* build distance tree */
+ r = huft_build(c + nl, nd, 0, cpdist, cpdext, td, bd, z);
+ if (r != Z_OK || (*bd == 0 && nl > 257))
+ {
+ if (r == Z_DATA_ERROR)
+ z->msg = (char*)"oversubscribed distance tree";
+ else if (r == Z_BUF_ERROR) {
+#ifdef PKZIP_BUG_WORKAROUND
+ r = Z_OK;
+ }
+#else
+ inflate_trees_free(*td, z);
+ z->msg = (char*)"incomplete distance tree";
+ r = Z_DATA_ERROR;
+ }
+ else if (r != Z_MEM_ERROR)
+ {
+ z->msg = (char*)"empty distance tree with lengths";
+ r = Z_DATA_ERROR;
+ }
+ inflate_trees_free(*tl, z);
+ return r;
+#endif
+ }
+
+ /* done */
+ return Z_OK;
+}
+
+
+/* build fixed tables only once--keep them here */
+local int fixed_built = 0;
+#define FIXEDH 530 /* number of hufts used by fixed tables */
+local inflate_huft fixed_mem[FIXEDH];
+local uInt fixed_bl;
+local uInt fixed_bd;
+local inflate_huft *fixed_tl;
+local inflate_huft *fixed_td;
+
+
+local voidpf falloc(q, n, s)
+voidpf q; /* opaque pointer */
+uInt n; /* number of items */
+uInt s; /* size of item */
+{
+ Assert(s == sizeof(inflate_huft) && n <= *(intf *)q,
+ "inflate_trees falloc overflow");
+ *(intf *)q -= n+s-s; /* s-s to avoid warning */
+ return (voidpf)(fixed_mem + *(intf *)q);
+}
+
+
+int inflate_trees_fixed(bl, bd, tl, td)
+uIntf *bl; /* literal desired/actual bit depth */
+uIntf *bd; /* distance desired/actual bit depth */
+inflate_huft * FAR *tl; /* literal/length tree result */
+inflate_huft * FAR *td; /* distance tree result */
+{
+ /* build fixed tables if not already (multiple overlapped executions ok) */
+ if (!fixed_built)
+ {
+ int k; /* temporary variable */
+ unsigned c[288]; /* length list for huft_build */
+ z_stream z; /* for falloc function */
+ int f = FIXEDH; /* number of hufts left in fixed_mem */
+
+ /* set up fake z_stream for memory routines */
+ z.zalloc = falloc;
+ z.zfree = Z_NULL;
+ z.opaque = (voidpf)&f;
+
+ /* literal table */
+ for (k = 0; k < 144; k++)
+ c[k] = 8;
+ for (; k < 256; k++)
+ c[k] = 9;
+ for (; k < 280; k++)
+ c[k] = 7;
+ for (; k < 288; k++)
+ c[k] = 8;
+ fixed_bl = 7;
+ huft_build(c, 288, 257, cplens, cplext, &fixed_tl, &fixed_bl, &z);
+
+ /* distance table */
+ for (k = 0; k < 30; k++)
+ c[k] = 5;
+ fixed_bd = 5;
+ huft_build(c, 30, 0, cpdist, cpdext, &fixed_td, &fixed_bd, &z);
+
+ /* done */
+ Assert(f == 0, "invalid build of fixed tables");
+ fixed_built = 1;
+ }
+ *bl = fixed_bl;
+ *bd = fixed_bd;
+ *tl = fixed_tl;
+ *td = fixed_td;
+ return Z_OK;
+}
+
+
+int inflate_trees_free(t, z)
+inflate_huft *t; /* table to free */
+z_streamp z; /* for zfree function */
+/* Free the malloc'ed tables built by huft_build(), which makes a linked
+ list of the tables it made, with the links in a dummy first entry of
+ each table. */
+{
+ register inflate_huft *p, *q, *r;
+
+ /* Reverse linked list */
+ p = Z_NULL;
+ q = t;
+ while (q != Z_NULL)
+ {
+ r = (q - 1)->next;
+ (q - 1)->next = p;
+ p = q;
+ q = r;
+ }
+ /* Go through linked list, freeing from the malloced (t[-1]) address. */
+ while (p != Z_NULL)
+ {
+ q = (--p)->next;
+ ZFREE(z,p);
+ p = q;
+ }
+ return Z_OK;
+}
+/* --- inftrees.c */
+
+/* +++ infcodes.c */
+/* infcodes.c -- process literals and length/distance pairs
+ * Copyright (C) 1995-1996 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* #include <freebsd/local/zutil.h> */
+/* #include <freebsd/local/inftrees.h> */
+/* #include <freebsd/local/infblock.h> */
+/* #include <freebsd/local/infcodes.h> */
+/* #include <freebsd/local/infutil.h> */
+
+/* +++ inffast.h */
+/* inffast.h -- header to use inffast.c
+ * Copyright (C) 1995-1996 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+ part of the implementation of the compression library and is
+ subject to change. Applications should only use zlib.h.
+ */
+
+extern int inflate_fast OF((
+ uInt,
+ uInt,
+ inflate_huft *,
+ inflate_huft *,
+ inflate_blocks_statef *,
+ z_streamp ));
+/* --- inffast.h */
+
+/* simplify the use of the inflate_huft type with some defines */
+#define base more.Base
+#define next more.Next
+#define exop word.what.Exop
+#define bits word.what.Bits
+
+/* inflate codes private state */
+struct inflate_codes_state {
+
+ /* mode */
+ enum { /* waiting for "i:"=input, "o:"=output, "x:"=nothing */
+ START, /* x: set up for LEN */
+ LEN, /* i: get length/literal/eob next */
+ LENEXT, /* i: getting length extra (have base) */
+ DIST, /* i: get distance next */
+ DISTEXT, /* i: getting distance extra */
+ COPY, /* o: copying bytes in window, waiting for space */
+ LIT, /* o: got literal, waiting for output space */
+ WASH, /* o: got eob, possibly still output waiting */
+ END, /* x: got eob and all data flushed */
+ BADCODE} /* x: got error */
+ mode; /* current inflate_codes mode */
+
+ /* mode dependent information */
+ uInt len;
+ union {
+ struct {
+ inflate_huft *tree; /* pointer into tree */
+ uInt need; /* bits needed */
+ } code; /* if LEN or DIST, where in tree */
+ uInt lit; /* if LIT, literal */
+ struct {
+ uInt get; /* bits to get for extra */
+ uInt dist; /* distance back to copy from */
+ } copy; /* if EXT or COPY, where and how much */
+ } sub; /* submode */
+
+ /* mode independent information */
+ Byte lbits; /* ltree bits decoded per branch */
+ Byte dbits; /* dtree bits decoder per branch */
+ inflate_huft *ltree; /* literal/length/eob tree */
+ inflate_huft *dtree; /* distance tree */
+
+};
+
+
+inflate_codes_statef *inflate_codes_new(bl, bd, tl, td, z)
+uInt bl, bd;
+inflate_huft *tl;
+inflate_huft *td; /* need separate declaration for Borland C++ */
+z_streamp z;
+{
+ inflate_codes_statef *c;
+
+ if ((c = (inflate_codes_statef *)
+ ZALLOC(z,1,sizeof(struct inflate_codes_state))) != Z_NULL)
+ {
+ c->mode = START;
+ c->lbits = (Byte)bl;
+ c->dbits = (Byte)bd;
+ c->ltree = tl;
+ c->dtree = td;
+ Tracev((stderr, "inflate: codes new\n"));
+ }
+ return c;
+}
+
+
+int inflate_codes(s, z, r)
+inflate_blocks_statef *s;
+z_streamp z;
+int r;
+{
+ uInt j; /* temporary storage */
+ inflate_huft *t; /* temporary pointer */
+ uInt e; /* extra bits or operation */
+ uLong b; /* bit buffer */
+ uInt k; /* bits in bit buffer */
+ Bytef *p; /* input data pointer */
+ uInt n; /* bytes available there */
+ Bytef *q; /* output window write pointer */
+ uInt m; /* bytes to end of window or read pointer */
+ Bytef *f; /* pointer to copy strings from */
+ inflate_codes_statef *c = s->sub.decode.codes; /* codes state */
+
+ /* copy input/output information to locals (UPDATE macro restores) */
+ LOAD
+
+ /* process input and output based on current state */
+ while (1) switch (c->mode)
+ { /* waiting for "i:"=input, "o:"=output, "x:"=nothing */
+ case START: /* x: set up for LEN */
+#ifndef SLOW
+ if (m >= 258 && n >= 10)
+ {
+ UPDATE
+ r = inflate_fast(c->lbits, c->dbits, c->ltree, c->dtree, s, z);
+ LOAD
+ if (r != Z_OK)
+ {
+ c->mode = r == Z_STREAM_END ? WASH : BADCODE;
+ break;
+ }
+ }
+#endif /* !SLOW */
+ c->sub.code.need = c->lbits;
+ c->sub.code.tree = c->ltree;
+ c->mode = LEN;
+ case LEN: /* i: get length/literal/eob next */
+ j = c->sub.code.need;
+ NEEDBITS(j)
+ t = c->sub.code.tree + ((uInt)b & inflate_mask[j]);
+ DUMPBITS(t->bits)
+ e = (uInt)(t->exop);
+ if (e == 0) /* literal */
+ {
+ c->sub.lit = t->base;
+ Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ?
+ "inflate: literal '%c'\n" :
+ "inflate: literal 0x%02x\n", t->base));
+ c->mode = LIT;
+ break;
+ }
+ if (e & 16) /* length */
+ {
+ c->sub.copy.get = e & 15;
+ c->len = t->base;
+ c->mode = LENEXT;
+ break;
+ }
+ if ((e & 64) == 0) /* next table */
+ {
+ c->sub.code.need = e;
+ c->sub.code.tree = t->next;
+ break;
+ }
+ if (e & 32) /* end of block */
+ {
+ Tracevv((stderr, "inflate: end of block\n"));
+ c->mode = WASH;
+ break;
+ }
+ c->mode = BADCODE; /* invalid code */
+ z->msg = (char*)"invalid literal/length code";
+ r = Z_DATA_ERROR;
+ LEAVE
+ case LENEXT: /* i: getting length extra (have base) */
+ j = c->sub.copy.get;
+ NEEDBITS(j)
+ c->len += (uInt)b & inflate_mask[j];
+ DUMPBITS(j)
+ c->sub.code.need = c->dbits;
+ c->sub.code.tree = c->dtree;
+ Tracevv((stderr, "inflate: length %u\n", c->len));
+ c->mode = DIST;
+ case DIST: /* i: get distance next */
+ j = c->sub.code.need;
+ NEEDBITS(j)
+ t = c->sub.code.tree + ((uInt)b & inflate_mask[j]);
+ DUMPBITS(t->bits)
+ e = (uInt)(t->exop);
+ if (e & 16) /* distance */
+ {
+ c->sub.copy.get = e & 15;
+ c->sub.copy.dist = t->base;
+ c->mode = DISTEXT;
+ break;
+ }
+ if ((e & 64) == 0) /* next table */
+ {
+ c->sub.code.need = e;
+ c->sub.code.tree = t->next;
+ break;
+ }
+ c->mode = BADCODE; /* invalid code */
+ z->msg = (char*)"invalid distance code";
+ r = Z_DATA_ERROR;
+ LEAVE
+ case DISTEXT: /* i: getting distance extra */
+ j = c->sub.copy.get;
+ NEEDBITS(j)
+ c->sub.copy.dist += (uInt)b & inflate_mask[j];
+ DUMPBITS(j)
+ Tracevv((stderr, "inflate: distance %u\n", c->sub.copy.dist));
+ c->mode = COPY;
+ case COPY: /* o: copying bytes in window, waiting for space */
+#ifndef __TURBOC__ /* Turbo C bug for following expression */
+ f = (uInt)(q - s->window) < c->sub.copy.dist ?
+ s->end - (c->sub.copy.dist - (q - s->window)) :
+ q - c->sub.copy.dist;
+#else
+ f = q - c->sub.copy.dist;
+ if ((uInt)(q - s->window) < c->sub.copy.dist)
+ f = s->end - (c->sub.copy.dist - (uInt)(q - s->window));
+#endif
+ while (c->len)
+ {
+ NEEDOUT
+ OUTBYTE(*f++)
+ if (f == s->end)
+ f = s->window;
+ c->len--;
+ }
+ c->mode = START;
+ break;
+ case LIT: /* o: got literal, waiting for output space */
+ NEEDOUT
+ OUTBYTE(c->sub.lit)
+ c->mode = START;
+ break;
+ case WASH: /* o: got eob, possibly more output */
+ FLUSH
+ if (s->read != s->write)
+ LEAVE
+ c->mode = END;
+ case END:
+ r = Z_STREAM_END;
+ LEAVE
+ case BADCODE: /* x: got error */
+ r = Z_DATA_ERROR;
+ LEAVE
+ default:
+ r = Z_STREAM_ERROR;
+ LEAVE
+ }
+}
+
+
+void inflate_codes_free(c, z)
+inflate_codes_statef *c;
+z_streamp z;
+{
+ ZFREE(z, c);
+ Tracev((stderr, "inflate: codes free\n"));
+}
+/* --- infcodes.c */
+
+/* +++ infutil.c */
+/* inflate_util.c -- data and routines common to blocks and codes
+ * Copyright (C) 1995-1996 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* #include <freebsd/local/zutil.h> */
+/* #include <freebsd/local/infblock.h> */
+/* #include <freebsd/local/inftrees.h> */
+/* #include <freebsd/local/infcodes.h> */
+/* #include <freebsd/local/infutil.h> */
+
+#ifndef NO_DUMMY_DECL
+struct inflate_codes_state {int dummy;}; /* for buggy compilers */
+#endif
+
+/* And'ing with mask[n] masks the lower n bits */
+uInt inflate_mask[17] = {
+ 0x0000,
+ 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff,
+ 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff
+};
+
+
+/* copy as much as possible from the sliding window to the output area */
+int inflate_flush(s, z, r)
+inflate_blocks_statef *s;
+z_streamp z;
+int r;
+{
+ uInt n;
+ Bytef *p;
+ Bytef *q;
+
+ /* local copies of source and destination pointers */
+ p = z->next_out;
+ q = s->read;
+
+ /* compute number of bytes to copy as far as end of window */
+ n = (uInt)((q <= s->write ? s->write : s->end) - q);
+ if (n > z->avail_out) n = z->avail_out;
+ if (n && r == Z_BUF_ERROR) r = Z_OK;
+
+ /* update counters */
+ z->avail_out -= n;
+ z->total_out += n;
+
+ /* update check information */
+ if (s->checkfn != Z_NULL)
+ z->adler = s->check = (*s->checkfn)(s->check, q, n);
+
+ /* copy as far as end of window */
+ if (p != Z_NULL) {
+ zmemcpy(p, q, n);
+ p += n;
+ }
+ q += n;
+
+ /* see if more to copy at beginning of window */
+ if (q == s->end)
+ {
+ /* wrap pointers */
+ q = s->window;
+ if (s->write == s->end)
+ s->write = s->window;
+
+ /* compute bytes to copy */
+ n = (uInt)(s->write - q);
+ if (n > z->avail_out) n = z->avail_out;
+ if (n && r == Z_BUF_ERROR) r = Z_OK;
+
+ /* update counters */
+ z->avail_out -= n;
+ z->total_out += n;
+
+ /* update check information */
+ if (s->checkfn != Z_NULL)
+ z->adler = s->check = (*s->checkfn)(s->check, q, n);
+
+ /* copy */
+ if (p != Z_NULL) {
+ zmemcpy(p, q, n);
+ p += n;
+ }
+ q += n;
+ }
+
+ /* update pointers */
+ z->next_out = p;
+ s->read = q;
+
+ /* done */
+ return r;
+}
+/* --- infutil.c */
+
+/* +++ inffast.c */
+/* inffast.c -- process literals and length/distance pairs fast
+ * Copyright (C) 1995-1996 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* #include <freebsd/local/zutil.h> */
+/* #include <freebsd/local/inftrees.h> */
+/* #include <freebsd/local/infblock.h> */
+/* #include <freebsd/local/infcodes.h> */
+/* #include <freebsd/local/infutil.h> */
+/* #include <freebsd/local/inffast.h> */
+
+#ifndef NO_DUMMY_DECL
+struct inflate_codes_state {int dummy;}; /* for buggy compilers */
+#endif
+
+/* simplify the use of the inflate_huft type with some defines */
+#define base more.Base
+#define next more.Next
+#define exop word.what.Exop
+#define bits word.what.Bits
+
+/* macros for bit input with no checking and for returning unused bytes */
+#define GRABBITS(j) {while(k<(j)){b|=((uLong)NEXTBYTE)<<k;k+=8;}}
+#define UNGRAB {n+=(c=k>>3);p-=c;k&=7;}
+
+/* Called with number of bytes left to write in window at least 258
+ (the maximum string length) and number of input bytes available
+ at least ten. The ten bytes are six bytes for the longest length/
+ distance pair plus four bytes for overloading the bit buffer. */
+
+int inflate_fast(bl, bd, tl, td, s, z)
+uInt bl, bd;
+inflate_huft *tl;
+inflate_huft *td; /* need separate declaration for Borland C++ */
+inflate_blocks_statef *s;
+z_streamp z;
+{
+ inflate_huft *t; /* temporary pointer */
+ uInt e; /* extra bits or operation */
+ uLong b; /* bit buffer */
+ uInt k; /* bits in bit buffer */
+ Bytef *p; /* input data pointer */
+ uInt n; /* bytes available there */
+ Bytef *q; /* output window write pointer */
+ uInt m; /* bytes to end of window or read pointer */
+ uInt ml; /* mask for literal/length tree */
+ uInt md; /* mask for distance tree */
+ uInt c; /* bytes to copy */
+ uInt d; /* distance back to copy from */
+ Bytef *r; /* copy source pointer */
+
+ /* load input, output, bit values */
+ LOAD
+
+ /* initialize masks */
+ ml = inflate_mask[bl];
+ md = inflate_mask[bd];
+
+ /* do until not enough input or output space for fast loop */
+ do { /* assume called with m >= 258 && n >= 10 */
+ /* get literal/length code */
+ GRABBITS(20) /* max bits for literal/length code */
+ if ((e = (t = tl + ((uInt)b & ml))->exop) == 0)
+ {
+ DUMPBITS(t->bits)
+ Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ?
+ "inflate: * literal '%c'\n" :
+ "inflate: * literal 0x%02x\n", t->base));
+ *q++ = (Byte)t->base;
+ m--;
+ continue;
+ }
+ do {
+ DUMPBITS(t->bits)
+ if (e & 16)
+ {
+ /* get extra bits for length */
+ e &= 15;
+ c = t->base + ((uInt)b & inflate_mask[e]);
+ DUMPBITS(e)
+ Tracevv((stderr, "inflate: * length %u\n", c));
+
+ /* decode distance base of block to copy */
+ GRABBITS(15); /* max bits for distance code */
+ e = (t = td + ((uInt)b & md))->exop;
+ do {
+ DUMPBITS(t->bits)
+ if (e & 16)
+ {
+ /* get extra bits to add to distance base */
+ e &= 15;
+ GRABBITS(e) /* get extra bits (up to 13) */
+ d = t->base + ((uInt)b & inflate_mask[e]);
+ DUMPBITS(e)
+ Tracevv((stderr, "inflate: * distance %u\n", d));
+
+ /* do the copy */
+ m -= c;
+ if ((uInt)(q - s->window) >= d) /* offset before dest */
+ { /* just copy */
+ r = q - d;
+ *q++ = *r++; c--; /* minimum count is three, */
+ *q++ = *r++; c--; /* so unroll loop a little */
+ }
+ else /* else offset after destination */
+ {
+ e = d - (uInt)(q - s->window); /* bytes from offset to end */
+ r = s->end - e; /* pointer to offset */
+ if (c > e) /* if source crosses, */
+ {
+ c -= e; /* copy to end of window */
+ do {
+ *q++ = *r++;
+ } while (--e);
+ r = s->window; /* copy rest from start of window */
+ }
+ }
+ do { /* copy all or what's left */
+ *q++ = *r++;
+ } while (--c);
+ break;
+ }
+ else if ((e & 64) == 0)
+ e = (t = t->next + ((uInt)b & inflate_mask[e]))->exop;
+ else
+ {
+ z->msg = (char*)"invalid distance code";
+ UNGRAB
+ UPDATE
+ return Z_DATA_ERROR;
+ }
+ } while (1);
+ break;
+ }
+ if ((e & 64) == 0)
+ {
+ if ((e = (t = t->next + ((uInt)b & inflate_mask[e]))->exop) == 0)
+ {
+ DUMPBITS(t->bits)
+ Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ?
+ "inflate: * literal '%c'\n" :
+ "inflate: * literal 0x%02x\n", t->base));
+ *q++ = (Byte)t->base;
+ m--;
+ break;
+ }
+ }
+ else if (e & 32)
+ {
+ Tracevv((stderr, "inflate: * end of block\n"));
+ UNGRAB
+ UPDATE
+ return Z_STREAM_END;
+ }
+ else
+ {
+ z->msg = (char*)"invalid literal/length code";
+ UNGRAB
+ UPDATE
+ return Z_DATA_ERROR;
+ }
+ } while (1);
+ } while (m >= 258 && n >= 10);
+
+ /* not enough input or output--restore pointers and return */
+ UNGRAB
+ UPDATE
+ return Z_OK;
+}
+/* --- inffast.c */
+
+/* +++ zutil.c */
+/* zutil.c -- target dependent utility functions for the compression library
+ * Copyright (C) 1995-1996 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* From: zutil.c,v 1.17 1996/07/24 13:41:12 me Exp $ */
+
+#ifdef DEBUG_ZLIB
+#include <freebsd/stdio.h>
+#endif
+
+/* #include <freebsd/local/zutil.h> */
+
+#ifndef NO_DUMMY_DECL
+struct internal_state {int dummy;}; /* for buggy compilers */
+#endif
+
+#ifndef STDC
+extern void exit OF((int));
+#endif
+
+static const char *z_errmsg[10] = {
+"need dictionary", /* Z_NEED_DICT 2 */
+"stream end", /* Z_STREAM_END 1 */
+"", /* Z_OK 0 */
+"file error", /* Z_ERRNO (-1) */
+"stream error", /* Z_STREAM_ERROR (-2) */
+"data error", /* Z_DATA_ERROR (-3) */
+"insufficient memory", /* Z_MEM_ERROR (-4) */
+"buffer error", /* Z_BUF_ERROR (-5) */
+"incompatible version",/* Z_VERSION_ERROR (-6) */
+""};
+
+
+const char *zlibVersion()
+{
+ return ZLIB_VERSION;
+}
+
+#ifdef DEBUG_ZLIB
+void z_error (m)
+ char *m;
+{
+ fprintf(stderr, "%s\n", m);
+ exit(1);
+}
+#endif
+
+#ifndef HAVE_MEMCPY
+
+void zmemcpy(dest, source, len)
+ Bytef* dest;
+ Bytef* source;
+ uInt len;
+{
+ if (len == 0) return;
+ do {
+ *dest++ = *source++; /* ??? to be unrolled */
+ } while (--len != 0);
+}
+
+int zmemcmp(s1, s2, len)
+ Bytef* s1;
+ Bytef* s2;
+ uInt len;
+{
+ uInt j;
+
+ for (j = 0; j < len; j++) {
+ if (s1[j] != s2[j]) return 2*(s1[j] > s2[j])-1;
+ }
+ return 0;
+}
+
+void zmemzero(dest, len)
+ Bytef* dest;
+ uInt len;
+{
+ if (len == 0) return;
+ do {
+ *dest++ = 0; /* ??? to be unrolled */
+ } while (--len != 0);
+}
+#endif
+
+#ifdef __TURBOC__
+#if (defined( __BORLANDC__) || !defined(SMALL_MEDIUM)) && !defined(__32BIT__)
+/* Small and medium model in Turbo C are for now limited to near allocation
+ * with reduced MAX_WBITS and MAX_MEM_LEVEL
+ */
+# define MY_ZCALLOC
+
+/* Turbo C malloc() does not allow dynamic allocation of 64K bytes
+ * and farmalloc(64K) returns a pointer with an offset of 8, so we
+ * must fix the pointer. Warning: the pointer must be put back to its
+ * original form in order to free it, use zcfree().
+ */
+
+#define MAX_PTR 10
+/* 10*64K = 640K */
+
+local int next_ptr = 0;
+
+typedef struct ptr_table_s {
+ voidpf org_ptr;
+ voidpf new_ptr;
+} ptr_table;
+
+local ptr_table table[MAX_PTR];
+/* This table is used to remember the original form of pointers
+ * to large buffers (64K). Such pointers are normalized with a zero offset.
+ * Since MSDOS is not a preemptive multitasking OS, this table is not
+ * protected from concurrent access. This hack doesn't work anyway on
+ * a protected system like OS/2. Use Microsoft C instead.
+ */
+
+voidpf zcalloc (voidpf opaque, unsigned items, unsigned size)
+{
+ voidpf buf = opaque; /* just to make some compilers happy */
+ ulg bsize = (ulg)items*size;
+
+ /* If we allocate less than 65520 bytes, we assume that farmalloc
+ * will return a usable pointer which doesn't have to be normalized.
+ */
+ if (bsize < 65520L) {
+ buf = farmalloc(bsize);
+ if (*(ush*)&buf != 0) return buf;
+ } else {
+ buf = farmalloc(bsize + 16L);
+ }
+ if (buf == NULL || next_ptr >= MAX_PTR) return NULL;
+ table[next_ptr].org_ptr = buf;
+
+ /* Normalize the pointer to seg:0 */
+ *((ush*)&buf+1) += ((ush)((uch*)buf-0) + 15) >> 4;
+ *(ush*)&buf = 0;
+ table[next_ptr++].new_ptr = buf;
+ return buf;
+}
+
+void zcfree (voidpf opaque, voidpf ptr)
+{
+ int n;
+ if (*(ush*)&ptr != 0) { /* object < 64K */
+ farfree(ptr);
+ return;
+ }
+ /* Find the original pointer */
+ for (n = 0; n < next_ptr; n++) {
+ if (ptr != table[n].new_ptr) continue;
+
+ farfree(table[n].org_ptr);
+ while (++n < next_ptr) {
+ table[n-1] = table[n];
+ }
+ next_ptr--;
+ return;
+ }
+ ptr = opaque; /* just to make some compilers happy */
+ Assert(0, "zcfree: ptr not found");
+}
+#endif
+#endif /* __TURBOC__ */
+
+
+#if defined(M_I86) && !defined(__32BIT__)
+/* Microsoft C in 16-bit mode */
+
+# define MY_ZCALLOC
+
+#if (!defined(_MSC_VER) || (_MSC_VER < 600))
+# define _halloc halloc
+# define _hfree hfree
+#endif
+
+voidpf zcalloc (voidpf opaque, unsigned items, unsigned size)
+{
+ if (opaque) opaque = 0; /* to make compiler happy */
+ return _halloc((long)items, size);
+}
+
+void zcfree (voidpf opaque, voidpf ptr)
+{
+ if (opaque) opaque = 0; /* to make compiler happy */
+ _hfree(ptr);
+}
+
+#endif /* MSC */
+
+
+#ifndef MY_ZCALLOC /* Any system without a special alloc function */
+
+#ifndef STDC
+extern voidp calloc OF((uInt items, uInt size));
+extern void free OF((voidpf ptr));
+#endif
+
+voidpf zcalloc (opaque, items, size)
+ voidpf opaque;
+ unsigned items;
+ unsigned size;
+{
+ if (opaque) items += size - size; /* make compiler happy */
+ return (voidpf)calloc(items, size);
+}
+
+void zcfree (opaque, ptr)
+ voidpf opaque;
+ voidpf ptr;
+{
+ free(ptr);
+ if (opaque) return; /* make compiler happy */
+}
+
+#endif /* MY_ZCALLOC */
+/* --- zutil.c */
+
+/* +++ adler32.c */
+/* adler32.c -- compute the Adler-32 checksum of a data stream
+ * Copyright (C) 1995-1996 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* From: adler32.c,v 1.10 1996/05/22 11:52:18 me Exp $ */
+
+/* #include <freebsd/local/zlib.h> */
+
+#define BASE 65521L /* largest prime smaller than 65536 */
+#define NMAX 5552
+/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
+
+#define DO1(buf,i) {s1 += buf[(i)]; s2 += s1;}
+#define DO2(buf,i) DO1(buf,i); DO1(buf,(i)+1);
+#define DO4(buf,i) DO2(buf,i); DO2(buf,(i)+2);
+#define DO8(buf,i) DO4(buf,i); DO4(buf,(i)+4);
+#define DO16(buf) DO8(buf,0); DO8(buf,8);
+
+/* ========================================================================= */
+uLong adler32(adler, buf, len)
+ uLong adler;
+ const Bytef *buf;
+ uInt len;
+{
+ unsigned long s1 = adler & 0xffff;
+ unsigned long s2 = (adler >> 16) & 0xffff;
+ int k;
+
+ if (buf == Z_NULL) return 1L;
+
+ while (len > 0) {
+ k = len < NMAX ? len : NMAX;
+ len -= k;
+ while (k >= 16) {
+ DO16(buf);
+ buf += 16;
+ k -= 16;
+ }
+ if (k != 0) do {
+ s1 += *buf++;
+ s2 += s1;
+ } while (--k);
+ s1 %= BASE;
+ s2 %= BASE;
+ }
+ return (s2 << 16) | s1;
+}
+/* --- adler32.c */
+
+#ifdef _KERNEL
+static int
+zlib_modevent(module_t mod, int type, void *unused)
+{
+ switch (type) {
+ case MOD_LOAD:
+ return 0;
+ case MOD_UNLOAD:
+ return 0;
+ }
+ return EINVAL;
+}
+
+static moduledata_t zlib_mod = {
+ "zlib",
+ zlib_modevent,
+ 0
+};
+DECLARE_MODULE(zlib, zlib_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST);
+MODULE_VERSION(zlib, 1);
+#endif /* _KERNEL */
diff --git a/freebsd/sys/net/zlib.h b/freebsd/sys/net/zlib.h
new file mode 100644
index 00000000..3da670fd
--- /dev/null
+++ b/freebsd/sys/net/zlib.h
@@ -0,0 +1,1018 @@
+/* $FreeBSD$ */
+
+/*
+ * This file is derived from zlib.h and zconf.h from the zlib-1.0.4
+ * distribution by Jean-loup Gailly and Mark Adler, with some additions
+ * by Paul Mackerras to aid in implementing Deflate compression and
+ * decompression for PPP packets.
+ */
+
+/*
+ * ==FILEVERSION 971127==
+ *
+ * This marker is used by the Linux installation script to determine
+ * whether an up-to-date version of this file is already installed.
+ */
+
+
+/* +++ zlib.h */
+/*-
+ zlib.h -- interface of the 'zlib' general purpose compression library
+ version 1.0.4, Jul 24th, 1996.
+
+ Copyright (C) 1995-1996 Jean-loup Gailly and Mark Adler
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+
+ Jean-loup Gailly Mark Adler
+ gzip@prep.ai.mit.edu madler@alumni.caltech.edu
+*/
+/*
+ The data format used by the zlib library is described by RFCs (Request for
+ Comments) 1950 to 1952 in the files ftp://ds.internic.net/rfc/rfc1950.txt
+ (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format).
+*/
+
+#ifndef _ZLIB_H
+#define _ZLIB_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* +++ zconf.h */
+/* zconf.h -- configuration of the zlib compression library
+ * Copyright (C) 1995-1996 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* From: zconf.h,v 1.20 1996/07/02 15:09:28 me Exp $ */
+
+#ifndef _ZCONF_H
+#define _ZCONF_H
+
+/*
+ * If you *really* need a unique prefix for all types and library functions,
+ * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it.
+ */
+#ifdef Z_PREFIX
+# define deflateInit_ z_deflateInit_
+# define deflate z_deflate
+# define deflateEnd z_deflateEnd
+# define inflateInit_ z_inflateInit_
+# define inflate z_inflate
+# define inflateEnd z_inflateEnd
+# define deflateInit2_ z_deflateInit2_
+# define deflateSetDictionary z_deflateSetDictionary
+# define deflateCopy z_deflateCopy
+# define deflateReset z_deflateReset
+# define deflateParams z_deflateParams
+# define inflateInit2_ z_inflateInit2_
+# define inflateSetDictionary z_inflateSetDictionary
+# define inflateSync z_inflateSync
+# define inflateReset z_inflateReset
+# define compress z_compress
+# define uncompress z_uncompress
+# define adler32 z_adler32
+#if 0
+# define crc32 z_crc32
+# define get_crc_table z_get_crc_table
+#endif
+
+# define Byte z_Byte
+# define uInt z_uInt
+# define uLong z_uLong
+# define Bytef z_Bytef
+# define charf z_charf
+# define intf z_intf
+# define uIntf z_uIntf
+# define uLongf z_uLongf
+# define voidpf z_voidpf
+# define voidp z_voidp
+#endif
+
+#if (defined(_WIN32) || defined(__WIN32__)) && !defined(WIN32)
+# define WIN32
+#endif
+#if defined(__GNUC__) || defined(WIN32) || defined(__386__) || defined(i386)
+# ifndef __32BIT__
+# define __32BIT__
+# endif
+#endif
+#if defined(__MSDOS__) && !defined(MSDOS)
+# define MSDOS
+#endif
+
+/*
+ * Compile with -DMAXSEG_64K if the alloc function cannot allocate more
+ * than 64k bytes at a time (needed on systems with 16-bit int).
+ */
+#if defined(MSDOS) && !defined(__32BIT__)
+# define MAXSEG_64K
+#endif
+#ifdef MSDOS
+# define UNALIGNED_OK
+#endif
+
+#if (defined(MSDOS) || defined(_WINDOWS) || defined(WIN32)) && !defined(STDC)
+# define STDC
+#endif
+#if (defined(__STDC__) || defined(__cplusplus)) && !defined(STDC)
+# define STDC
+#endif
+
+#ifndef STDC
+# ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */
+# define const
+# endif
+#endif
+
+/* Some Mac compilers merge all .h files incorrectly: */
+#if defined(__MWERKS__) || defined(applec) ||defined(THINK_C) ||defined(__SC__)
+# define NO_DUMMY_DECL
+#endif
+
+/* Maximum value for memLevel in deflateInit2 */
+#ifndef MAX_MEM_LEVEL
+# ifdef MAXSEG_64K
+# define MAX_MEM_LEVEL 8
+# else
+# define MAX_MEM_LEVEL 9
+# endif
+#endif
+
+/* Maximum value for windowBits in deflateInit2 and inflateInit2 */
+#ifndef MAX_WBITS
+# define MAX_WBITS 15 /* 32K LZ77 window */
+#endif
+
+/* The memory requirements for deflate are (in bytes):
+ 1 << (windowBits+2) + 1 << (memLevel+9)
+ that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values)
+ plus a few kilobytes for small objects. For example, if you want to reduce
+ the default memory requirements from 256K to 128K, compile with
+ make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7"
+ Of course this will generally degrade compression (there's no free lunch).
+
+ The memory requirements for inflate are (in bytes) 1 << windowBits
+ that is, 32K for windowBits=15 (default value) plus a few kilobytes
+ for small objects.
+*/
+
+ /* Type declarations */
+
+#ifndef OF /* function prototypes */
+# ifdef STDC
+# define OF(args) args
+# else
+# define OF(args) ()
+# endif
+#endif
+
+/* The following definitions for FAR are needed only for MSDOS mixed
+ * model programming (small or medium model with some far allocations).
+ * This was tested only with MSC; for other MSDOS compilers you may have
+ * to define NO_MEMCPY in zutil.h. If you don't need the mixed model,
+ * just define FAR to be empty.
+ */
+#if (defined(M_I86SM) || defined(M_I86MM)) && !defined(__32BIT__)
+ /* MSC small or medium model */
+# define SMALL_MEDIUM
+# ifdef _MSC_VER
+# define FAR __far
+# else
+# define FAR far
+# endif
+#endif
+#if defined(__BORLANDC__) && (defined(__SMALL__) || defined(__MEDIUM__))
+# ifndef __32BIT__
+# define SMALL_MEDIUM
+# define FAR __far
+# endif
+#endif
+#ifndef FAR
+# define FAR
+#endif
+
+typedef unsigned char Byte; /* 8 bits */
+typedef unsigned int uInt; /* 16 bits or more */
+typedef unsigned long uLong; /* 32 bits or more */
+
+#if defined(__BORLANDC__) && defined(SMALL_MEDIUM)
+ /* Borland C/C++ ignores FAR inside typedef */
+# define Bytef Byte FAR
+#else
+ typedef Byte FAR Bytef;
+#endif
+typedef char FAR charf;
+typedef int FAR intf;
+typedef uInt FAR uIntf;
+typedef uLong FAR uLongf;
+
+#ifdef STDC
+ typedef void FAR *voidpf;
+ typedef void *voidp;
+#else
+ typedef Byte FAR *voidpf;
+ typedef Byte *voidp;
+#endif
+
+
+/* Compile with -DZLIB_DLL for Windows DLL support */
+#if (defined(_WINDOWS) || defined(WINDOWS)) && defined(ZLIB_DLL)
+# include <freebsd/windows.h>
+# define EXPORT WINAPI
+#else
+# define EXPORT
+#endif
+
+#endif /* _ZCONF_H */
+/* --- zconf.h */
+
+#define ZLIB_VERSION "1.0.4P"
+
+/*
+ The 'zlib' compression library provides in-memory compression and
+ decompression functions, including integrity checks of the uncompressed
+ data. This version of the library supports only one compression method
+ (deflation) but other algorithms may be added later and will have the same
+ stream interface.
+
+ For compression the application must provide the output buffer and
+ may optionally provide the input buffer for optimization. For decompression,
+ the application must provide the input buffer and may optionally provide
+ the output buffer for optimization.
+
+ Compression can be done in a single step if the buffers are large
+ enough (for example if an input file is mmap'ed), or can be done by
+ repeated calls of the compression function. In the latter case, the
+ application must provide more input and/or consume the output
+ (providing more output space) before each call.
+
+ The library does not install any signal handler. It is recommended to
+ add at least a handler for SIGSEGV when decompressing; the library checks
+ the consistency of the input data whenever possible but may go nuts
+ for some forms of corrupted input.
+*/
+
+typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size));
+typedef void (*free_func) OF((voidpf opaque, voidpf address));
+
+struct internal_state;
+
+typedef struct z_stream_s {
+ Bytef *next_in; /* next input byte */
+ uInt avail_in; /* number of bytes available at next_in */
+ uLong total_in; /* total nb of input bytes read so far */
+
+ Bytef *next_out; /* next output byte should be put there */
+ uInt avail_out; /* remaining free space at next_out */
+ uLong total_out; /* total nb of bytes output so far */
+
+ const char *msg; /* last error message, NULL if no error */
+ struct internal_state FAR *state; /* not visible by applications */
+
+ alloc_func zalloc; /* used to allocate the internal state */
+ free_func zfree; /* used to free the internal state */
+ voidpf opaque; /* private data object passed to zalloc and zfree */
+
+ int data_type; /* best guess about the data type: ascii or binary */
+ uLong adler; /* adler32 value of the uncompressed data */
+ uLong reserved; /* reserved for future use */
+} z_stream;
+
+typedef z_stream FAR *z_streamp;
+
+/*
+ The application must update next_in and avail_in when avail_in has
+ dropped to zero. It must update next_out and avail_out when avail_out
+ has dropped to zero. The application must initialize zalloc, zfree and
+ opaque before calling the init function. All other fields are set by the
+ compression library and must not be updated by the application.
+
+ The opaque value provided by the application will be passed as the first
+ parameter for calls of zalloc and zfree. This can be useful for custom
+ memory management. The compression library attaches no meaning to the
+ opaque value.
+
+ zalloc must return Z_NULL if there is not enough memory for the object.
+ On 16-bit systems, the functions zalloc and zfree must be able to allocate
+ exactly 65536 bytes, but will not be required to allocate more than this
+ if the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS,
+ pointers returned by zalloc for objects of exactly 65536 bytes *must*
+ have their offset normalized to zero. The default allocation function
+ provided by this library ensures this (see zutil.c). To reduce memory
+ requirements and avoid any allocation of 64K objects, at the expense of
+ compression ratio, compile the library with -DMAX_WBITS=14 (see zconf.h).
+
+ The fields total_in and total_out can be used for statistics or
+ progress reports. After compression, total_in holds the total size of
+ the uncompressed data and may be saved for use in the decompressor
+ (particularly if the decompressor wants to decompress everything in
+ a single step).
+*/
+
+ /* constants */
+
+#define Z_NO_FLUSH 0
+#define Z_PARTIAL_FLUSH 1
+#define Z_PACKET_FLUSH 2
+#define Z_SYNC_FLUSH 3
+#define Z_FULL_FLUSH 4
+#define Z_FINISH 5
+/* Allowed flush values; see deflate() below for details */
+
+#define Z_OK 0
+#define Z_STREAM_END 1
+#define Z_NEED_DICT 2
+#define Z_ERRNO (-1)
+#define Z_STREAM_ERROR (-2)
+#define Z_DATA_ERROR (-3)
+#define Z_MEM_ERROR (-4)
+#define Z_BUF_ERROR (-5)
+#define Z_VERSION_ERROR (-6)
+/* Return codes for the compression/decompression functions. Negative
+ * values are errors, positive values are used for special but normal events.
+ */
+
+#define Z_NO_COMPRESSION 0
+#define Z_BEST_SPEED 1
+#define Z_BEST_COMPRESSION 9
+#define Z_DEFAULT_COMPRESSION (-1)
+/* compression levels */
+
+#define Z_FILTERED 1
+#define Z_HUFFMAN_ONLY 2
+#define Z_DEFAULT_STRATEGY 0
+/* compression strategy; see deflateInit2() below for details */
+
+#define Z_BINARY 0
+#define Z_ASCII 1
+#define Z_UNKNOWN 2
+/* Possible values of the data_type field */
+
+#define Z_DEFLATED 8
+/* The deflate compression method (the only one supported in this version) */
+
+#define Z_NULL 0 /* for initializing zalloc, zfree, opaque */
+
+#define zlib_version zlibVersion()
+/* for compatibility with versions < 1.0.2 */
+
+ /* basic functions */
+
+extern const char * EXPORT zlibVersion OF((void));
+/* The application can compare zlibVersion and ZLIB_VERSION for consistency.
+ If the first character differs, the library code actually used is
+ not compatible with the zlib.h header file used by the application.
+ This check is automatically made by deflateInit and inflateInit.
+ */
+
+/*
+extern int EXPORT deflateInit OF((z_streamp strm, int level));
+
+ Initializes the internal stream state for compression. The fields
+ zalloc, zfree and opaque must be initialized before by the caller.
+ If zalloc and zfree are set to Z_NULL, deflateInit updates them to
+ use default allocation functions.
+
+ The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9:
+ 1 gives best speed, 9 gives best compression, 0 gives no compression at
+ all (the input data is simply copied a block at a time).
+ Z_DEFAULT_COMPRESSION requests a default compromise between speed and
+ compression (currently equivalent to level 6).
+
+ deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not
+ enough memory, Z_STREAM_ERROR if level is not a valid compression level,
+ Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible
+ with the version assumed by the caller (ZLIB_VERSION).
+ msg is set to null if there is no error message. deflateInit does not
+ perform any compression: this will be done by deflate().
+*/
+
+
+extern int EXPORT deflate OF((z_streamp strm, int flush));
+/*
+ Performs one or both of the following actions:
+
+ - Compress more input starting at next_in and update next_in and avail_in
+ accordingly. If not all input can be processed (because there is not
+ enough room in the output buffer), next_in and avail_in are updated and
+ processing will resume at this point for the next call of deflate().
+
+ - Provide more output starting at next_out and update next_out and avail_out
+ accordingly. This action is forced if the parameter flush is non zero.
+ Forcing flush frequently degrades the compression ratio, so this parameter
+ should be set only when necessary (in interactive applications).
+ Some output may be provided even if flush is not set.
+
+ Before the call of deflate(), the application should ensure that at least
+ one of the actions is possible, by providing more input and/or consuming
+ more output, and updating avail_in or avail_out accordingly; avail_out
+ should never be zero before the call. The application can consume the
+ compressed output when it wants, for example when the output buffer is full
+ (avail_out == 0), or after each call of deflate(). If deflate returns Z_OK
+ and with zero avail_out, it must be called again after making room in the
+ output buffer because there might be more output pending.
+
+ If the parameter flush is set to Z_PARTIAL_FLUSH, the current compression
+ block is terminated and flushed to the output buffer so that the
+ decompressor can get all input data available so far. For method 9, a future
+ variant on method 8, the current block will be flushed but not terminated.
+ Z_SYNC_FLUSH has the same effect as partial flush except that the compressed
+ output is byte aligned (the compressor can clear its internal bit buffer)
+ and the current block is always terminated; this can be useful if the
+ compressor has to be restarted from scratch after an interruption (in which
+ case the internal state of the compressor may be lost).
+ If flush is set to Z_FULL_FLUSH, the compression block is terminated, a
+ special marker is output and the compression dictionary is discarded; this
+ is useful to allow the decompressor to synchronize if one compressed block
+ has been damaged (see inflateSync below). Flushing degrades compression and
+ so should be used only when necessary. Using Z_FULL_FLUSH too often can
+ seriously degrade the compression. If deflate returns with avail_out == 0,
+ this function must be called again with the same value of the flush
+ parameter and more output space (updated avail_out), until the flush is
+ complete (deflate returns with non-zero avail_out).
+
+ If the parameter flush is set to Z_PACKET_FLUSH, the compression
+ block is terminated, and a zero-length stored block is output,
+ omitting the length bytes (the effect of this is that the 3-bit type
+ code 000 for a stored block is output, and the output is then
+ byte-aligned). This is designed for use at the end of a PPP packet.
+
+ If the parameter flush is set to Z_FINISH, pending input is processed,
+ pending output is flushed and deflate returns with Z_STREAM_END if there
+ was enough output space; if deflate returns with Z_OK, this function must be
+ called again with Z_FINISH and more output space (updated avail_out) but no
+ more input data, until it returns with Z_STREAM_END or an error. After
+ deflate has returned Z_STREAM_END, the only possible operations on the
+ stream are deflateReset or deflateEnd.
+
+ Z_FINISH can be used immediately after deflateInit if all the compression
+ is to be done in a single step. In this case, avail_out must be at least
+ 0.1% larger than avail_in plus 12 bytes. If deflate does not return
+ Z_STREAM_END, then it must be called again as described above.
+
+ deflate() may update data_type if it can make a good guess about
+ the input data type (Z_ASCII or Z_BINARY). In doubt, the data is considered
+ binary. This field is only for information purposes and does not affect
+ the compression algorithm in any manner.
+
+ deflate() returns Z_OK if some progress has been made (more input
+ processed or more output produced), Z_STREAM_END if all input has been
+ consumed and all output has been produced (only when flush is set to
+ Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example
+ if next_in or next_out was NULL), Z_BUF_ERROR if no progress is possible.
+*/
+
+
+extern int EXPORT deflateEnd OF((z_streamp strm));
+/*
+ All dynamically allocated data structures for this stream are freed.
+ This function discards any unprocessed input and does not flush any
+ pending output.
+
+ deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the
+ stream state was inconsistent, Z_DATA_ERROR if the stream was freed
+ prematurely (some input or output was discarded). In the error case,
+ msg may be set but then points to a static string (which must not be
+ deallocated).
+*/
+
+
+/*
+extern int EXPORT inflateInit OF((z_streamp strm));
+
+ Initializes the internal stream state for decompression. The fields
+ zalloc, zfree and opaque must be initialized before by the caller. If
+ zalloc and zfree are set to Z_NULL, inflateInit updates them to use default
+ allocation functions.
+
+ inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not
+ enough memory, Z_VERSION_ERROR if the zlib library version is incompatible
+ with the version assumed by the caller. msg is set to null if there is no
+ error message. inflateInit does not perform any decompression: this will be
+ done by inflate().
+*/
+
+#if defined(__FreeBSD__) && defined(_KERNEL)
+#define inflate inflate_ppp /* FreeBSD already has an inflate :-( */
+#endif
+
+extern int EXPORT inflate OF((z_streamp strm, int flush));
+/*
+ Performs one or both of the following actions:
+
+ - Decompress more input starting at next_in and update next_in and avail_in
+ accordingly. If not all input can be processed (because there is not
+ enough room in the output buffer), next_in is updated and processing
+ will resume at this point for the next call of inflate().
+
+ - Provide more output starting at next_out and update next_out and avail_out
+ accordingly. inflate() provides as much output as possible, until there
+ is no more input data or no more space in the output buffer (see below
+ about the flush parameter).
+
+ Before the call of inflate(), the application should ensure that at least
+ one of the actions is possible, by providing more input and/or consuming
+ more output, and updating the next_* and avail_* values accordingly.
+ The application can consume the uncompressed output when it wants, for
+ example when the output buffer is full (avail_out == 0), or after each
+ call of inflate(). If inflate returns Z_OK and with zero avail_out, it
+ must be called again after making room in the output buffer because there
+ might be more output pending.
+
+ If the parameter flush is set to Z_PARTIAL_FLUSH or Z_PACKET_FLUSH,
+ inflate flushes as much output as possible to the output buffer. The
+ flushing behavior of inflate is not specified for values of the flush
+ parameter other than Z_PARTIAL_FLUSH, Z_PACKET_FLUSH or Z_FINISH, but the
+ current implementation actually flushes as much output as possible
+ anyway. For Z_PACKET_FLUSH, inflate checks that once all the input data
+ has been consumed, it is expecting to see the length field of a stored
+ block; if not, it returns Z_DATA_ERROR.
+
+ inflate() should normally be called until it returns Z_STREAM_END or an
+ error. However if all decompression is to be performed in a single step
+ (a single call of inflate), the parameter flush should be set to
+ Z_FINISH. In this case all pending input is processed and all pending
+ output is flushed; avail_out must be large enough to hold all the
+ uncompressed data. (The size of the uncompressed data may have been saved
+ by the compressor for this purpose.) The next operation on this stream must
+ be inflateEnd to deallocate the decompression state. The use of Z_FINISH
+ is never required, but can be used to inform inflate that a faster routine
+ may be used for the single inflate() call.
+
+ inflate() returns Z_OK if some progress has been made (more input
+ processed or more output produced), Z_STREAM_END if the end of the
+ compressed data has been reached and all uncompressed output has been
+ produced, Z_NEED_DICT if a preset dictionary is needed at this point (see
+ inflateSetDictionary below), Z_DATA_ERROR if the input data was corrupted,
+ Z_STREAM_ERROR if the stream structure was inconsistent (for example if
+ next_in or next_out was NULL), Z_MEM_ERROR if there was not enough memory,
+ Z_BUF_ERROR if no progress is possible or if there was not enough room in
+ the output buffer when Z_FINISH is used. In the Z_DATA_ERROR case, the
+ application may then call inflateSync to look for a good compression block.
+ In the Z_NEED_DICT case, strm->adler is set to the Adler32 value of the
+ dictionary chosen by the compressor.
+*/
+
+
+extern int EXPORT inflateEnd OF((z_streamp strm));
+/*
+ All dynamically allocated data structures for this stream are freed.
+ This function discards any unprocessed input and does not flush any
+ pending output.
+
+ inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state
+ was inconsistent. In the error case, msg may be set but then points to a
+ static string (which must not be deallocated).
+*/
+
+ /* Advanced functions */
+
+/*
+ The following functions are needed only in some special applications.
+*/
+
+/*
+extern int EXPORT deflateInit2 OF((z_streamp strm,
+ int level,
+ int method,
+ int windowBits,
+ int memLevel,
+ int strategy));
+
+ This is another version of deflateInit with more compression options. The
+ fields next_in, zalloc, zfree and opaque must be initialized before by
+ the caller.
+
+ The method parameter is the compression method. It must be Z_DEFLATED in
+ this version of the library. (Method 9 will allow a 64K history buffer and
+ partial block flushes.)
+
+ The windowBits parameter is the base two logarithm of the window size
+ (the size of the history buffer). It should be in the range 8..15 for this
+ version of the library (the value 16 will be allowed for method 9). Larger
+ values of this parameter result in better compression at the expense of
+ memory usage. The default value is 15 if deflateInit is used instead.
+
+ The memLevel parameter specifies how much memory should be allocated
+ for the internal compression state. memLevel=1 uses minimum memory but
+ is slow and reduces compression ratio; memLevel=9 uses maximum memory
+ for optimal speed. The default value is 8. See zconf.h for total memory
+ usage as a function of windowBits and memLevel.
+
+ The strategy parameter is used to tune the compression algorithm. Use the
+ value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a
+ filter (or predictor), or Z_HUFFMAN_ONLY to force Huffman encoding only (no
+ string match). Filtered data consists mostly of small values with a
+ somewhat random distribution. In this case, the compression algorithm is
+ tuned to compress them better. The effect of Z_FILTERED is to force more
+ Huffman coding and less string matching; it is somewhat intermediate
+ between Z_DEFAULT and Z_HUFFMAN_ONLY. The strategy parameter only affects
+ the compression ratio but not the correctness of the compressed output even
+ if it is not set appropriately.
+
+ If next_in is not null, the library will use this buffer to hold also
+ some history information; the buffer must either hold the entire input
+ data, or have at least 1<<(windowBits+1) bytes and be writable. If next_in
+ is null, the library will allocate its own history buffer (and leave next_in
+ null). next_out need not be provided here but must be provided by the
+ application for the next call of deflate().
+
+ If the history buffer is provided by the application, next_in must
+ must never be changed by the application since the compressor maintains
+ information inside this buffer from call to call; the application
+ must provide more input only by increasing avail_in. next_in is always
+ reset by the library in this case.
+
+ deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was
+ not enough memory, Z_STREAM_ERROR if a parameter is invalid (such as
+ an invalid method). msg is set to null if there is no error message.
+ deflateInit2 does not perform any compression: this will be done by
+ deflate().
+*/
+
+extern int EXPORT deflateSetDictionary OF((z_streamp strm,
+ const Bytef *dictionary,
+ uInt dictLength));
+/*
+ Initializes the compression dictionary (history buffer) from the given
+ byte sequence without producing any compressed output. This function must
+ be called immediately after deflateInit or deflateInit2, before any call
+ of deflate. The compressor and decompressor must use exactly the same
+ dictionary (see inflateSetDictionary).
+ The dictionary should consist of strings (byte sequences) that are likely
+ to be encountered later in the data to be compressed, with the most commonly
+ used strings preferably put towards the end of the dictionary. Using a
+ dictionary is most useful when the data to be compressed is short and
+ can be predicted with good accuracy; the data can then be compressed better
+ than with the default empty dictionary. In this version of the library,
+ only the last 32K bytes of the dictionary are used.
+ Upon return of this function, strm->adler is set to the Adler32 value
+ of the dictionary; the decompressor may later use this value to determine
+ which dictionary has been used by the compressor. (The Adler32 value
+ applies to the whole dictionary even if only a subset of the dictionary is
+ actually used by the compressor.)
+
+ deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a
+ parameter is invalid (such as NULL dictionary) or the stream state
+ is inconsistent (for example if deflate has already been called for this
+ stream). deflateSetDictionary does not perform any compression: this will
+ be done by deflate().
+*/
+
+extern int EXPORT deflateCopy OF((z_streamp dest,
+ z_streamp source));
+/*
+ Sets the destination stream as a complete copy of the source stream. If
+ the source stream is using an application-supplied history buffer, a new
+ buffer is allocated for the destination stream. The compressed output
+ buffer is always application-supplied. It's the responsibility of the
+ application to provide the correct values of next_out and avail_out for the
+ next call of deflate.
+
+ This function can be useful when several compression strategies will be
+ tried, for example when there are several ways of pre-processing the input
+ data with a filter. The streams that will be discarded should then be freed
+ by calling deflateEnd. Note that deflateCopy duplicates the internal
+ compression state which can be quite large, so this strategy is slow and
+ can consume lots of memory.
+
+ deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
+ enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
+ (such as zalloc being NULL). msg is left unchanged in both source and
+ destination.
+*/
+
+extern int EXPORT deflateReset OF((z_streamp strm));
+/*
+ This function is equivalent to deflateEnd followed by deflateInit,
+ but does not free and reallocate all the internal compression state.
+ The stream will keep the same compression level and any other attributes
+ that may have been set by deflateInit2.
+
+ deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
+ stream state was inconsistent (such as zalloc or state being NULL).
+*/
+
+extern int EXPORT deflateParams OF((z_streamp strm, int level, int strategy));
+/*
+ Dynamically update the compression level and compression strategy.
+ This can be used to switch between compression and straight copy of
+ the input data, or to switch to a different kind of input data requiring
+ a different strategy. If the compression level is changed, the input
+ available so far is compressed with the old level (and may be flushed);
+ the new level will take effect only at the next call of deflate().
+
+ Before the call of deflateParams, the stream state must be set as for
+ a call of deflate(), since the currently available input may have to
+ be compressed and flushed. In particular, strm->avail_out must be non-zero.
+
+ deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source
+ stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR
+ if strm->avail_out was zero.
+*/
+
+extern int EXPORT deflateOutputPending OF((z_streamp strm));
+/*
+ Returns the number of bytes of output which are immediately
+ available from the compressor (i.e. without any further input
+ or flush).
+*/
+
+/*
+extern int EXPORT inflateInit2 OF((z_streamp strm,
+ int windowBits));
+
+ This is another version of inflateInit with more compression options. The
+ fields next_out, zalloc, zfree and opaque must be initialized before by
+ the caller.
+
+ The windowBits parameter is the base two logarithm of the maximum window
+ size (the size of the history buffer). It should be in the range 8..15 for
+ this version of the library (the value 16 will be allowed soon). The
+ default value is 15 if inflateInit is used instead. If a compressed stream
+ with a larger window size is given as input, inflate() will return with
+ the error code Z_DATA_ERROR instead of trying to allocate a larger window.
+
+ If next_out is not null, the library will use this buffer for the history
+ buffer; the buffer must either be large enough to hold the entire output
+ data, or have at least 1<<windowBits bytes. If next_out is null, the
+ library will allocate its own buffer (and leave next_out null). next_in
+ need not be provided here but must be provided by the application for the
+ next call of inflate().
+
+ If the history buffer is provided by the application, next_out must
+ never be changed by the application since the decompressor maintains
+ history information inside this buffer from call to call; the application
+ can only reset next_out to the beginning of the history buffer when
+ avail_out is zero and all output has been consumed.
+
+ inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was
+ not enough memory, Z_STREAM_ERROR if a parameter is invalid (such as
+ windowBits < 8). msg is set to null if there is no error message.
+ inflateInit2 does not perform any decompression: this will be done by
+ inflate().
+*/
+
+extern int EXPORT inflateSetDictionary OF((z_streamp strm,
+ const Bytef *dictionary,
+ uInt dictLength));
+/*
+ Initializes the decompression dictionary (history buffer) from the given
+ uncompressed byte sequence. This function must be called immediately after
+ a call of inflate if this call returned Z_NEED_DICT. The dictionary chosen
+ by the compressor can be determined from the Adler32 value returned by this
+ call of inflate. The compressor and decompressor must use exactly the same
+ dictionary (see deflateSetDictionary).
+
+ inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a
+ parameter is invalid (such as NULL dictionary) or the stream state is
+ inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the
+ expected one (incorrect Adler32 value). inflateSetDictionary does not
+ perform any decompression: this will be done by subsequent calls of
+ inflate().
+*/
+
+extern int EXPORT inflateSync OF((z_streamp strm));
+/*
+ Skips invalid compressed data until the special marker (see deflate()
+ above) can be found, or until all available input is skipped. No output
+ is provided.
+
+ inflateSync returns Z_OK if the special marker has been found, Z_BUF_ERROR
+ if no more input was provided, Z_DATA_ERROR if no marker has been found,
+ or Z_STREAM_ERROR if the stream structure was inconsistent. In the success
+ case, the application may save the current current value of total_in which
+ indicates where valid compressed data was found. In the error case, the
+ application may repeatedly call inflateSync, providing more input each time,
+ until success or end of the input data.
+*/
+
+extern int EXPORT inflateReset OF((z_streamp strm));
+/*
+ This function is equivalent to inflateEnd followed by inflateInit,
+ but does not free and reallocate all the internal decompression state.
+ The stream will keep attributes that may have been set by inflateInit2.
+
+ inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
+ stream state was inconsistent (such as zalloc or state being NULL).
+*/
+
+extern int inflateIncomp OF((z_stream *strm));
+/*
+ This function adds the data at next_in (avail_in bytes) to the output
+ history without performing any output. There must be no pending output,
+ and the decompressor must be expecting to see the start of a block.
+ Calling this function is equivalent to decompressing a stored block
+ containing the data at next_in (except that the data is not output).
+*/
+
+ /* utility functions */
+
+/*
+ The following utility functions are implemented on top of the
+ basic stream-oriented functions. To simplify the interface, some
+ default options are assumed (compression level, window size,
+ standard memory allocation functions). The source code of these
+ utility functions can easily be modified if you need special options.
+*/
+
+extern int EXPORT compress OF((Bytef *dest, uLongf *destLen,
+ const Bytef *source, uLong sourceLen));
+/*
+ Compresses the source buffer into the destination buffer. sourceLen is
+ the byte length of the source buffer. Upon entry, destLen is the total
+ size of the destination buffer, which must be at least 0.1% larger than
+ sourceLen plus 12 bytes. Upon exit, destLen is the actual size of the
+ compressed buffer.
+ This function can be used to compress a whole file at once if the
+ input file is mmap'ed.
+ compress returns Z_OK if success, Z_MEM_ERROR if there was not
+ enough memory, Z_BUF_ERROR if there was not enough room in the output
+ buffer.
+*/
+
+extern int EXPORT uncompress OF((Bytef *dest, uLongf *destLen,
+ const Bytef *source, uLong sourceLen));
+/*
+ Decompresses the source buffer into the destination buffer. sourceLen is
+ the byte length of the source buffer. Upon entry, destLen is the total
+ size of the destination buffer, which must be large enough to hold the
+ entire uncompressed data. (The size of the uncompressed data must have
+ been saved previously by the compressor and transmitted to the decompressor
+ by some mechanism outside the scope of this compression library.)
+ Upon exit, destLen is the actual size of the compressed buffer.
+ This function can be used to decompress a whole file at once if the
+ input file is mmap'ed.
+
+ uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
+ enough memory, Z_BUF_ERROR if there was not enough room in the output
+ buffer, or Z_DATA_ERROR if the input data was corrupted.
+*/
+
+
+typedef voidp gzFile;
+
+extern gzFile EXPORT gzopen OF((const char *path, const char *mode));
+/*
+ Opens a gzip (.gz) file for reading or writing. The mode parameter
+ is as in fopen ("rb" or "wb") but can also include a compression level
+ ("wb9"). gzopen can be used to read a file which is not in gzip format;
+ in this case gzread will directly read from the file without decompression.
+ gzopen returns NULL if the file could not be opened or if there was
+ insufficient memory to allocate the (de)compression state; errno
+ can be checked to distinguish the two cases (if errno is zero, the
+ zlib error is Z_MEM_ERROR).
+*/
+
+extern gzFile EXPORT gzdopen OF((int fd, const char *mode));
+/*
+ gzdopen() associates a gzFile with the file descriptor fd. File
+ descriptors are obtained from calls like open, dup, creat, pipe or
+ fileno (in the file has been previously opened with fopen).
+ The mode parameter is as in gzopen.
+ The next call of gzclose on the returned gzFile will also close the
+ file descriptor fd, just like fclose(fdopen(fd), mode) closes the file
+ descriptor fd. If you want to keep fd open, use gzdopen(dup(fd), mode).
+ gzdopen returns NULL if there was insufficient memory to allocate
+ the (de)compression state.
+*/
+
+extern int EXPORT gzread OF((gzFile file, voidp buf, unsigned len));
+/*
+ Reads the given number of uncompressed bytes from the compressed file.
+ If the input file was not in gzip format, gzread copies the given number
+ of bytes into the buffer.
+ gzread returns the number of uncompressed bytes actually read (0 for
+ end of file, -1 for error). */
+
+extern int EXPORT gzwrite OF((gzFile file, const voidp buf, unsigned len));
+/*
+ Writes the given number of uncompressed bytes into the compressed file.
+ gzwrite returns the number of uncompressed bytes actually written
+ (0 in case of error).
+*/
+
+extern int EXPORT gzflush OF((gzFile file, int flush));
+/*
+ Flushes all pending output into the compressed file. The parameter
+ flush is as in the deflate() function. The return value is the zlib
+ error number (see function gzerror below). gzflush returns Z_OK if
+ the flush parameter is Z_FINISH and all output could be flushed.
+ gzflush should be called only when strictly necessary because it can
+ degrade compression.
+*/
+
+extern int EXPORT gzclose OF((gzFile file));
+/*
+ Flushes all pending output if necessary, closes the compressed file
+ and deallocates all the (de)compression state. The return value is the zlib
+ error number (see function gzerror below).
+*/
+
+extern const char * EXPORT gzerror OF((gzFile file, int *errnum));
+/*
+ Returns the error message for the last error which occurred on the
+ given compressed file. errnum is set to zlib error number. If an
+ error occurred in the filesystem and not in the compression library,
+ errnum is set to Z_ERRNO and the application may consult errno
+ to get the exact error code.
+*/
+
+ /* checksum functions */
+
+/*
+ These functions are not related to compression but are exported
+ anyway because they might be useful in applications using the
+ compression library.
+*/
+
+extern uLong EXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len));
+
+/*
+ Update a running Adler-32 checksum with the bytes buf[0..len-1] and
+ return the updated checksum. If buf is NULL, this function returns
+ the required initial value for the checksum.
+ An Adler-32 checksum is almost as reliable as a CRC32 but can be computed
+ much faster. Usage example:
+
+ uLong adler = adler32(0L, Z_NULL, 0);
+
+ while (read_buffer(buffer, length) != EOF) {
+ adler = adler32(adler, buffer, length);
+ }
+ if (adler != original_adler) error();
+*/
+
+#if 0
+extern uLong EXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len));
+/*
+ Update a running crc with the bytes buf[0..len-1] and return the updated
+ crc. If buf is NULL, this function returns the required initial value
+ for the crc. Pre- and post-conditioning (one's complement) is performed
+ within this function so it shouldn't be done by the application.
+ Usage example:
+
+ uLong crc = crc32(0L, Z_NULL, 0);
+
+ while (read_buffer(buffer, length) != EOF) {
+ crc = crc32(crc, buffer, length);
+ }
+ if (crc != original_crc) error();
+*/
+#endif
+
+
+ /* various hacks, don't look :) */
+
+/* deflateInit and inflateInit are macros to allow checking the zlib version
+ * and the compiler's view of z_stream:
+ */
+extern int EXPORT deflateInit_ OF((z_streamp strm, int level,
+ const char *version, int stream_size));
+extern int EXPORT inflateInit_ OF((z_streamp strm,
+ const char *version, int stream_size));
+extern int EXPORT deflateInit2_ OF((z_streamp strm, int level, int method,
+ int windowBits, int memLevel, int strategy,
+ const char *version, int stream_size));
+extern int EXPORT inflateInit2_ OF((z_streamp strm, int windowBits,
+ const char *version, int stream_size));
+#define deflateInit(strm, level) \
+ deflateInit_((strm), (level), ZLIB_VERSION, sizeof(z_stream))
+#define inflateInit(strm) \
+ inflateInit_((strm), ZLIB_VERSION, sizeof(z_stream))
+#define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \
+ deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\
+ (strategy), ZLIB_VERSION, sizeof(z_stream))
+#define inflateInit2(strm, windowBits) \
+ inflateInit2_((strm), (windowBits), ZLIB_VERSION, sizeof(z_stream))
+
+#if !defined(_Z_UTIL_H) && !defined(NO_DUMMY_DECL)
+ struct internal_state {int dummy;}; /* hack for buggy compilers */
+#endif
+
+uLongf *get_crc_table OF((void)); /* can be used by asm versions of crc32() */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZLIB_H */
+/* --- zlib.h */