summaryrefslogtreecommitdiffstats
path: root/freebsd/sys/net
diff options
context:
space:
mode:
authorSebastian Huber <sebastian.huber@embedded-brains.de>2016-10-07 15:10:20 +0200
committerSebastian Huber <sebastian.huber@embedded-brains.de>2017-01-10 09:53:31 +0100
commitc40e45b75eb76d79a05c7fa85c1fa9b5c728a12f (patch)
treead4f2519067709f00ab98b3c591186c26dc3a21f /freebsd/sys/net
parentuserspace-header-gen.py: Simplify program ports (diff)
downloadrtems-libbsd-c40e45b75eb76d79a05c7fa85c1fa9b5c728a12f.tar.bz2
Update to FreeBSD head 2016-08-23
Git mirror commit 9fe7c416e6abb28b1398fd3e5687099846800cfd.
Diffstat (limited to 'freebsd/sys/net')
-rw-r--r--freebsd/sys/net/altq/altq.h206
-rw-r--r--freebsd/sys/net/altq/altq_cbq.c1171
-rw-r--r--freebsd/sys/net/altq/altq_cbq.h225
-rw-r--r--freebsd/sys/net/altq/altq_cdnr.c1384
-rw-r--r--freebsd/sys/net/altq/altq_cdnr.h336
-rw-r--r--freebsd/sys/net/altq/altq_classq.h213
-rw-r--r--freebsd/sys/net/altq/altq_codel.c479
-rw-r--r--freebsd/sys/net/altq/altq_codel.h129
-rw-r--r--freebsd/sys/net/altq/altq_fairq.c911
-rw-r--r--freebsd/sys/net/altq/altq_fairq.h145
-rw-r--r--freebsd/sys/net/altq/altq_hfsc.c2240
-rw-r--r--freebsd/sys/net/altq/altq_hfsc.h319
-rw-r--r--freebsd/sys/net/altq/altq_priq.c1072
-rw-r--r--freebsd/sys/net/altq/altq_priq.h180
-rw-r--r--freebsd/sys/net/altq/altq_red.c1494
-rw-r--r--freebsd/sys/net/altq/altq_red.h199
-rw-r--r--freebsd/sys/net/altq/altq_rio.c846
-rw-r--r--freebsd/sys/net/altq/altq_rio.h145
-rw-r--r--freebsd/sys/net/altq/altq_rmclass.c1841
-rw-r--r--freebsd/sys/net/altq/altq_rmclass.h273
-rw-r--r--freebsd/sys/net/altq/altq_rmclass_debug.h113
-rw-r--r--freebsd/sys/net/altq/altq_subr.c1978
-rw-r--r--freebsd/sys/net/altq/altq_var.h243
-rw-r--r--freebsd/sys/net/altq/if_altq.h182
-rw-r--r--freebsd/sys/net/bpf.c390
-rw-r--r--freebsd/sys/net/bpf.h246
-rw-r--r--freebsd/sys/net/bpf_buffer.c5
-rw-r--r--freebsd/sys/net/bpf_filter.c11
-rw-r--r--freebsd/sys/net/bridgestp.c8
-rw-r--r--freebsd/sys/net/ethernet.h27
-rw-r--r--freebsd/sys/net/flowtable.h119
-rw-r--r--freebsd/sys/net/ieee8023ad_lacp.c201
-rw-r--r--freebsd/sys/net/ieee8023ad_lacp.h16
-rw-r--r--freebsd/sys/net/if.c1411
-rw-r--r--freebsd/sys/net/if.h117
-rw-r--r--freebsd/sys/net/if_arc.h2
-rw-r--r--freebsd/sys/net/if_arcsubr.c139
-rw-r--r--freebsd/sys/net/if_arp.h44
-rw-r--r--freebsd/sys/net/if_atm.h4
-rw-r--r--freebsd/sys/net/if_atmsubr.c15
-rw-r--r--freebsd/sys/net/if_bridge.c476
-rw-r--r--freebsd/sys/net/if_clone.c389
-rw-r--r--freebsd/sys/net/if_clone.h95
-rw-r--r--freebsd/sys/net/if_dead.c10
-rw-r--r--freebsd/sys/net/if_disc.c75
-rw-r--r--freebsd/sys/net/if_dl.h8
-rw-r--r--freebsd/sys/net/if_edsc.c69
-rw-r--r--freebsd/sys/net/if_ef.c610
-rw-r--r--freebsd/sys/net/if_enc.c458
-rw-r--r--freebsd/sys/net/if_enc.h9
-rw-r--r--freebsd/sys/net/if_epair.c125
-rw-r--r--freebsd/sys/net/if_ethersubr.c894
-rw-r--r--freebsd/sys/net/if_faith.c353
-rw-r--r--freebsd/sys/net/if_fddisubr.c213
-rw-r--r--freebsd/sys/net/if_fwsubr.c44
-rw-r--r--freebsd/sys/net/if_gif.c1111
-rw-r--r--freebsd/sys/net/if_gif.h82
-rw-r--r--freebsd/sys/net/if_gre.c1541
-rw-r--r--freebsd/sys/net/if_gre.h215
-rw-r--r--freebsd/sys/net/if_iso88025subr.c207
-rw-r--r--freebsd/sys/net/if_lagg.c1253
-rw-r--r--freebsd/sys/net/if_lagg.h127
-rw-r--r--freebsd/sys/net/if_llatbl.c745
-rw-r--r--freebsd/sys/net/if_llatbl.h147
-rw-r--r--freebsd/sys/net/if_loop.c96
-rw-r--r--freebsd/sys/net/if_media.c105
-rw-r--r--freebsd/sys/net/if_media.h123
-rw-r--r--freebsd/sys/net/if_mib.c37
-rw-r--r--freebsd/sys/net/if_pflog.h66
-rw-r--r--freebsd/sys/net/if_pfsync.h265
-rw-r--r--freebsd/sys/net/if_sppp.h2
-rw-r--r--freebsd/sys/net/if_spppfr.c49
-rw-r--r--freebsd/sys/net/if_spppsubr.c186
-rw-r--r--freebsd/sys/net/if_stf.c279
-rw-r--r--freebsd/sys/net/if_tap.c103
-rw-r--r--freebsd/sys/net/if_tun.c108
-rw-r--r--freebsd/sys/net/if_tun.h4
-rw-r--r--freebsd/sys/net/if_types.h434
-rw-r--r--freebsd/sys/net/if_var.h925
-rw-r--r--freebsd/sys/net/if_vlan.c401
-rw-r--r--freebsd/sys/net/if_vlan_var.h61
-rw-r--r--freebsd/sys/net/ifq.h484
-rw-r--r--freebsd/sys/net/iso88025.h8
-rw-r--r--freebsd/sys/net/netisr.c276
-rw-r--r--freebsd/sys/net/netisr.h20
-rw-r--r--freebsd/sys/net/pfil.c175
-rw-r--r--freebsd/sys/net/pfil.h115
-rw-r--r--freebsd/sys/net/pfkeyv2.h45
-rw-r--r--freebsd/sys/net/pfvar.h1757
-rw-r--r--freebsd/sys/net/ppp_defs.h8
-rw-r--r--freebsd/sys/net/radix.c225
-rw-r--r--freebsd/sys/net/radix.h90
-rw-r--r--freebsd/sys/net/radix_mpath.c92
-rw-r--r--freebsd/sys/net/radix_mpath.h10
-rw-r--r--freebsd/sys/net/raw_cb.c2
-rw-r--r--freebsd/sys/net/raw_usrreq.c4
-rw-r--r--freebsd/sys/net/route.c1519
-rw-r--r--freebsd/sys/net/route.h234
-rw-r--r--freebsd/sys/net/route_var.h76
-rw-r--r--freebsd/sys/net/rss_config.h138
-rw-r--r--freebsd/sys/net/rtsock.c912
-rw-r--r--freebsd/sys/net/sff8436.h213
-rw-r--r--freebsd/sys/net/sff8472.h508
-rw-r--r--freebsd/sys/net/vnet.h126
104 files changed, 29683 insertions, 8933 deletions
diff --git a/freebsd/sys/net/altq/altq.h b/freebsd/sys/net/altq/altq.h
new file mode 100644
index 00000000..5d7eab8a
--- /dev/null
+++ b/freebsd/sys/net/altq/altq.h
@@ -0,0 +1,206 @@
+/*-
+ * Copyright (C) 1998-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq.h,v 1.10 2003/07/10 12:07:47 kjc Exp $
+ * $FreeBSD$
+ */
+#ifndef _ALTQ_ALTQ_H_
+#define _ALTQ_ALTQ_H_
+
+#if 0
+/*
+ * allow altq-3 (altqd(8) and /dev/altq) to coexist with the new pf-based altq.
+ * altq3 is mainly for research experiments. pf-based altq is for daily use.
+ */
+#define ALTQ3_COMPAT /* for compatibility with altq-3 */
+#define ALTQ3_CLFIER_COMPAT /* for compatibility with altq-3 classifier */
+#endif
+
+#ifdef ALTQ3_COMPAT
+#include <rtems/bsd/sys/param.h>
+#include <sys/ioccom.h>
+#include <sys/queue.h>
+#include <netinet/in.h>
+
+#ifndef IFNAMSIZ
+#define IFNAMSIZ 16
+#endif
+#endif /* ALTQ3_COMPAT */
+
+/* altq discipline type */
+#define ALTQT_NONE 0 /* reserved */
+#define ALTQT_CBQ 1 /* cbq */
+#define ALTQT_WFQ 2 /* wfq */
+#define ALTQT_AFMAP 3 /* afmap */
+#define ALTQT_FIFOQ 4 /* fifoq */
+#define ALTQT_RED 5 /* red */
+#define ALTQT_RIO 6 /* rio */
+#define ALTQT_LOCALQ 7 /* local use */
+#define ALTQT_HFSC 8 /* hfsc */
+#define ALTQT_CDNR 9 /* traffic conditioner */
+#define ALTQT_BLUE 10 /* blue */
+#define ALTQT_PRIQ 11 /* priority queue */
+#define ALTQT_JOBS 12 /* JoBS */
+#define ALTQT_FAIRQ 13 /* fairq */
+#define ALTQT_CODEL 14 /* CoDel */
+#define ALTQT_MAX 15 /* should be max discipline type + 1 */
+
+#ifdef ALTQ3_COMPAT
+struct altqreq {
+ char ifname[IFNAMSIZ]; /* if name, e.g. "en0" */
+ u_long arg; /* request-specific argument */
+};
+#endif
+
+/* simple token backet meter profile */
+struct tb_profile {
+ u_int rate; /* rate in bit-per-sec */
+ u_int depth; /* depth in bytes */
+};
+
+#ifdef ALTQ3_COMPAT
+struct tbrreq {
+ char ifname[IFNAMSIZ]; /* if name, e.g. "en0" */
+ struct tb_profile tb_prof; /* token bucket profile */
+};
+
+#ifdef ALTQ3_CLFIER_COMPAT
+/*
+ * common network flow info structure
+ */
+struct flowinfo {
+ u_char fi_len; /* total length */
+ u_char fi_family; /* address family */
+ u_int8_t fi_data[46]; /* actually longer; address family
+ specific flow info. */
+};
+
+/*
+ * flow info structure for internet protocol family.
+ * (currently this is the only protocol family supported)
+ */
+struct flowinfo_in {
+ u_char fi_len; /* sizeof(struct flowinfo_in) */
+ u_char fi_family; /* AF_INET */
+ u_int8_t fi_proto; /* IPPROTO_XXX */
+ u_int8_t fi_tos; /* type-of-service */
+ struct in_addr fi_dst; /* dest address */
+ struct in_addr fi_src; /* src address */
+ u_int16_t fi_dport; /* dest port */
+ u_int16_t fi_sport; /* src port */
+ u_int32_t fi_gpi; /* generalized port id for ipsec */
+ u_int8_t _pad[28]; /* make the size equal to
+ flowinfo_in6 */
+};
+
+#ifdef SIN6_LEN
+struct flowinfo_in6 {
+ u_char fi6_len; /* sizeof(struct flowinfo_in6) */
+ u_char fi6_family; /* AF_INET6 */
+ u_int8_t fi6_proto; /* IPPROTO_XXX */
+ u_int8_t fi6_tclass; /* traffic class */
+ u_int32_t fi6_flowlabel; /* ipv6 flowlabel */
+ u_int16_t fi6_dport; /* dest port */
+ u_int16_t fi6_sport; /* src port */
+ u_int32_t fi6_gpi; /* generalized port id */
+ struct in6_addr fi6_dst; /* dest address */
+ struct in6_addr fi6_src; /* src address */
+};
+#endif /* INET6 */
+
+/*
+ * flow filters for AF_INET and AF_INET6
+ */
+struct flow_filter {
+ int ff_ruleno;
+ struct flowinfo_in ff_flow;
+ struct {
+ struct in_addr mask_dst;
+ struct in_addr mask_src;
+ u_int8_t mask_tos;
+ u_int8_t _pad[3];
+ } ff_mask;
+ u_int8_t _pad2[24]; /* make the size equal to flow_filter6 */
+};
+
+#ifdef SIN6_LEN
+struct flow_filter6 {
+ int ff_ruleno;
+ struct flowinfo_in6 ff_flow6;
+ struct {
+ struct in6_addr mask6_dst;
+ struct in6_addr mask6_src;
+ u_int8_t mask6_tclass;
+ u_int8_t _pad[3];
+ } ff_mask6;
+};
+#endif /* INET6 */
+#endif /* ALTQ3_CLFIER_COMPAT */
+#endif /* ALTQ3_COMPAT */
+
+/*
+ * generic packet counter
+ */
+struct pktcntr {
+ u_int64_t packets;
+ u_int64_t bytes;
+};
+
+#define PKTCNTR_ADD(cntr, len) \
+ do { (cntr)->packets++; (cntr)->bytes += len; } while (/*CONSTCOND*/ 0)
+
+#ifdef ALTQ3_COMPAT
+/*
+ * altq related ioctls
+ */
+#define ALTQGTYPE _IOWR('q', 0, struct altqreq) /* get queue type */
+#if 0
+/*
+ * these ioctls are currently discipline-specific but could be shared
+ * in the future.
+ */
+#define ALTQATTACH _IOW('q', 1, struct altqreq) /* attach discipline */
+#define ALTQDETACH _IOW('q', 2, struct altqreq) /* detach discipline */
+#define ALTQENABLE _IOW('q', 3, struct altqreq) /* enable discipline */
+#define ALTQDISABLE _IOW('q', 4, struct altqreq) /* disable discipline*/
+#define ALTQCLEAR _IOW('q', 5, struct altqreq) /* (re)initialize */
+#define ALTQCONFIG _IOWR('q', 6, struct altqreq) /* set config params */
+#define ALTQADDCLASS _IOWR('q', 7, struct altqreq) /* add a class */
+#define ALTQMODCLASS _IOWR('q', 8, struct altqreq) /* modify a class */
+#define ALTQDELCLASS _IOWR('q', 9, struct altqreq) /* delete a class */
+#define ALTQADDFILTER _IOWR('q', 10, struct altqreq) /* add a filter */
+#define ALTQDELFILTER _IOWR('q', 11, struct altqreq) /* delete a filter */
+#define ALTQGETSTATS _IOWR('q', 12, struct altqreq) /* get statistics */
+#define ALTQGETCNTR _IOWR('q', 13, struct altqreq) /* get a pkt counter */
+#endif /* 0 */
+#define ALTQTBRSET _IOW('q', 14, struct tbrreq) /* set tb regulator */
+#define ALTQTBRGET _IOWR('q', 15, struct tbrreq) /* get tb regulator */
+#endif /* ALTQ3_COMPAT */
+
+#ifdef _KERNEL
+#include <net/altq/altq_var.h>
+#endif
+
+#endif /* _ALTQ_ALTQ_H_ */
diff --git a/freebsd/sys/net/altq/altq_cbq.c b/freebsd/sys/net/altq/altq_cbq.c
new file mode 100644
index 00000000..b8593fd6
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_cbq.c
@@ -0,0 +1,1171 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) Sun Microsystems, Inc. 1993-1998 All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the SMCC Technology
+ * Development Group at Sun Microsystems, Inc.
+ *
+ * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE
+ * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE. The software is
+ * provided "as is" without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this software.
+ *
+ * $KAME: altq_cbq.c,v 1.19 2003/09/17 14:23:25 kjc Exp $
+ * $FreeBSD$
+ */
+
+#include <rtems/bsd/local/opt_altq.h>
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+#ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <rtems/bsd/sys/errno.h>
+#include <sys/time.h>
+#ifdef ALTQ3_COMPAT
+#include <sys/uio.h>
+#include <sys/kernel.h>
+#endif
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <netinet/in.h>
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <netpfil/pf/pf_mtag.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_cbq.h>
+#ifdef ALTQ3_COMPAT
+#include <net/altq/altq_conf.h>
+#endif
+
+#ifdef ALTQ3_COMPAT
+/*
+ * Local Data structures.
+ */
+static cbq_state_t *cbq_list = NULL;
+#endif
+
+/*
+ * Forward Declarations.
+ */
+static int cbq_class_destroy(cbq_state_t *, struct rm_class *);
+static struct rm_class *clh_to_clp(cbq_state_t *, u_int32_t);
+static int cbq_clear_interface(cbq_state_t *);
+static int cbq_request(struct ifaltq *, int, void *);
+static int cbq_enqueue(struct ifaltq *, struct mbuf *,
+ struct altq_pktattr *);
+static struct mbuf *cbq_dequeue(struct ifaltq *, int);
+static void cbqrestart(struct ifaltq *);
+static void get_class_stats(class_stats_t *, struct rm_class *);
+static void cbq_purge(cbq_state_t *);
+#ifdef ALTQ3_COMPAT
+static int cbq_add_class(struct cbq_add_class *);
+static int cbq_delete_class(struct cbq_delete_class *);
+static int cbq_modify_class(struct cbq_modify_class *);
+static int cbq_class_create(cbq_state_t *, struct cbq_add_class *,
+ struct rm_class *, struct rm_class *);
+static int cbq_clear_hierarchy(struct cbq_interface *);
+static int cbq_set_enable(struct cbq_interface *, int);
+static int cbq_ifattach(struct cbq_interface *);
+static int cbq_ifdetach(struct cbq_interface *);
+static int cbq_getstats(struct cbq_getstats *);
+
+static int cbq_add_filter(struct cbq_add_filter *);
+static int cbq_delete_filter(struct cbq_delete_filter *);
+#endif /* ALTQ3_COMPAT */
+
+/*
+ * int
+ * cbq_class_destroy(cbq_mod_state_t *, struct rm_class *) - This
+ * function destroys a given traffic class. Before destroying
+ * the class, all traffic for that class is released.
+ */
+static int
+cbq_class_destroy(cbq_state_t *cbqp, struct rm_class *cl)
+{
+ int i;
+
+ /* delete the class */
+ rmc_delete_class(&cbqp->ifnp, cl);
+
+ /*
+ * free the class handle
+ */
+ for (i = 0; i < CBQ_MAX_CLASSES; i++)
+ if (cbqp->cbq_class_tbl[i] == cl)
+ cbqp->cbq_class_tbl[i] = NULL;
+
+ if (cl == cbqp->ifnp.root_)
+ cbqp->ifnp.root_ = NULL;
+ if (cl == cbqp->ifnp.default_)
+ cbqp->ifnp.default_ = NULL;
+#ifdef ALTQ3_COMPAT
+ if (cl == cbqp->ifnp.ctl_)
+ cbqp->ifnp.ctl_ = NULL;
+#endif
+ return (0);
+}
+
+/* convert class handle to class pointer */
+static struct rm_class *
+clh_to_clp(cbq_state_t *cbqp, u_int32_t chandle)
+{
+ int i;
+ struct rm_class *cl;
+
+ if (chandle == 0)
+ return (NULL);
+ /*
+ * first, try optimistically the slot matching the lower bits of
+ * the handle. if it fails, do the linear table search.
+ */
+ i = chandle % CBQ_MAX_CLASSES;
+ if ((cl = cbqp->cbq_class_tbl[i]) != NULL &&
+ cl->stats_.handle == chandle)
+ return (cl);
+ for (i = 0; i < CBQ_MAX_CLASSES; i++)
+ if ((cl = cbqp->cbq_class_tbl[i]) != NULL &&
+ cl->stats_.handle == chandle)
+ return (cl);
+ return (NULL);
+}
+
+static int
+cbq_clear_interface(cbq_state_t *cbqp)
+{
+ int again, i;
+ struct rm_class *cl;
+
+#ifdef ALTQ3_CLFIER_COMPAT
+ /* free the filters for this interface */
+ acc_discard_filters(&cbqp->cbq_classifier, NULL, 1);
+#endif
+
+ /* clear out the classes now */
+ do {
+ again = 0;
+ for (i = 0; i < CBQ_MAX_CLASSES; i++) {
+ if ((cl = cbqp->cbq_class_tbl[i]) != NULL) {
+ if (is_a_parent_class(cl))
+ again++;
+ else {
+ cbq_class_destroy(cbqp, cl);
+ cbqp->cbq_class_tbl[i] = NULL;
+ if (cl == cbqp->ifnp.root_)
+ cbqp->ifnp.root_ = NULL;
+ if (cl == cbqp->ifnp.default_)
+ cbqp->ifnp.default_ = NULL;
+#ifdef ALTQ3_COMPAT
+ if (cl == cbqp->ifnp.ctl_)
+ cbqp->ifnp.ctl_ = NULL;
+#endif
+ }
+ }
+ }
+ } while (again);
+
+ return (0);
+}
+
+static int
+cbq_request(struct ifaltq *ifq, int req, void *arg)
+{
+ cbq_state_t *cbqp = (cbq_state_t *)ifq->altq_disc;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ switch (req) {
+ case ALTRQ_PURGE:
+ cbq_purge(cbqp);
+ break;
+ }
+ return (0);
+}
+
+/* copy the stats info in rm_class to class_states_t */
+static void
+get_class_stats(class_stats_t *statsp, struct rm_class *cl)
+{
+ statsp->xmit_cnt = cl->stats_.xmit_cnt;
+ statsp->drop_cnt = cl->stats_.drop_cnt;
+ statsp->over = cl->stats_.over;
+ statsp->borrows = cl->stats_.borrows;
+ statsp->overactions = cl->stats_.overactions;
+ statsp->delays = cl->stats_.delays;
+
+ statsp->depth = cl->depth_;
+ statsp->priority = cl->pri_;
+ statsp->maxidle = cl->maxidle_;
+ statsp->minidle = cl->minidle_;
+ statsp->offtime = cl->offtime_;
+ statsp->qmax = qlimit(cl->q_);
+ statsp->ns_per_byte = cl->ns_per_byte_;
+ statsp->wrr_allot = cl->w_allotment_;
+ statsp->qcnt = qlen(cl->q_);
+ statsp->avgidle = cl->avgidle_;
+
+ statsp->qtype = qtype(cl->q_);
+#ifdef ALTQ_RED
+ if (q_is_red(cl->q_))
+ red_getstats(cl->red_, &statsp->red[0]);
+#endif
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->q_))
+ rio_getstats((rio_t *)cl->red_, &statsp->red[0]);
+#endif
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->q_))
+ codel_getstats(cl->codel_, &statsp->codel);
+#endif
+}
+
+int
+cbq_pfattach(struct pf_altq *a)
+{
+ struct ifnet *ifp;
+ int s, error;
+
+ if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
+ return (EINVAL);
+ s = splnet();
+ error = altq_attach(&ifp->if_snd, ALTQT_CBQ, a->altq_disc,
+ cbq_enqueue, cbq_dequeue, cbq_request, NULL, NULL);
+ splx(s);
+ return (error);
+}
+
+int
+cbq_add_altq(struct pf_altq *a)
+{
+ cbq_state_t *cbqp;
+ struct ifnet *ifp;
+
+ if ((ifp = ifunit(a->ifname)) == NULL)
+ return (EINVAL);
+ if (!ALTQ_IS_READY(&ifp->if_snd))
+ return (ENODEV);
+
+ /* allocate and initialize cbq_state_t */
+ cbqp = malloc(sizeof(cbq_state_t), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (cbqp == NULL)
+ return (ENOMEM);
+ CALLOUT_INIT(&cbqp->cbq_callout);
+ cbqp->cbq_qlen = 0;
+ cbqp->ifnp.ifq_ = &ifp->if_snd; /* keep the ifq */
+
+ /* keep the state in pf_altq */
+ a->altq_disc = cbqp;
+
+ return (0);
+}
+
+int
+cbq_remove_altq(struct pf_altq *a)
+{
+ cbq_state_t *cbqp;
+
+ if ((cbqp = a->altq_disc) == NULL)
+ return (EINVAL);
+ a->altq_disc = NULL;
+
+ cbq_clear_interface(cbqp);
+
+ if (cbqp->ifnp.default_)
+ cbq_class_destroy(cbqp, cbqp->ifnp.default_);
+ if (cbqp->ifnp.root_)
+ cbq_class_destroy(cbqp, cbqp->ifnp.root_);
+
+ /* deallocate cbq_state_t */
+ free(cbqp, M_DEVBUF);
+
+ return (0);
+}
+
+int
+cbq_add_queue(struct pf_altq *a)
+{
+ struct rm_class *borrow, *parent;
+ cbq_state_t *cbqp;
+ struct rm_class *cl;
+ struct cbq_opts *opts;
+ int i;
+
+ if ((cbqp = a->altq_disc) == NULL)
+ return (EINVAL);
+ if (a->qid == 0)
+ return (EINVAL);
+
+ /*
+ * find a free slot in the class table. if the slot matching
+ * the lower bits of qid is free, use this slot. otherwise,
+ * use the first free slot.
+ */
+ i = a->qid % CBQ_MAX_CLASSES;
+ if (cbqp->cbq_class_tbl[i] != NULL) {
+ for (i = 0; i < CBQ_MAX_CLASSES; i++)
+ if (cbqp->cbq_class_tbl[i] == NULL)
+ break;
+ if (i == CBQ_MAX_CLASSES)
+ return (EINVAL);
+ }
+
+ opts = &a->pq_u.cbq_opts;
+ /* check parameters */
+ if (a->priority >= CBQ_MAXPRI)
+ return (EINVAL);
+
+ /* Get pointers to parent and borrow classes. */
+ parent = clh_to_clp(cbqp, a->parent_qid);
+ if (opts->flags & CBQCLF_BORROW)
+ borrow = parent;
+ else
+ borrow = NULL;
+
+ /*
+ * A class must borrow from it's parent or it can not
+ * borrow at all. Hence, borrow can be null.
+ */
+ if (parent == NULL && (opts->flags & CBQCLF_ROOTCLASS) == 0) {
+ printf("cbq_add_queue: no parent class!\n");
+ return (EINVAL);
+ }
+
+ if ((borrow != parent) && (borrow != NULL)) {
+ printf("cbq_add_class: borrow class != parent\n");
+ return (EINVAL);
+ }
+
+ /*
+ * check parameters
+ */
+ switch (opts->flags & CBQCLF_CLASSMASK) {
+ case CBQCLF_ROOTCLASS:
+ if (parent != NULL)
+ return (EINVAL);
+ if (cbqp->ifnp.root_)
+ return (EINVAL);
+ break;
+ case CBQCLF_DEFCLASS:
+ if (cbqp->ifnp.default_)
+ return (EINVAL);
+ break;
+ case 0:
+ if (a->qid == 0)
+ return (EINVAL);
+ break;
+ default:
+ /* more than two flags bits set */
+ return (EINVAL);
+ }
+
+ /*
+ * create a class. if this is a root class, initialize the
+ * interface.
+ */
+ if ((opts->flags & CBQCLF_CLASSMASK) == CBQCLF_ROOTCLASS) {
+ rmc_init(cbqp->ifnp.ifq_, &cbqp->ifnp, opts->ns_per_byte,
+ cbqrestart, a->qlimit, RM_MAXQUEUED,
+ opts->maxidle, opts->minidle, opts->offtime,
+ opts->flags);
+ cl = cbqp->ifnp.root_;
+ } else {
+ cl = rmc_newclass(a->priority,
+ &cbqp->ifnp, opts->ns_per_byte,
+ rmc_delay_action, a->qlimit, parent, borrow,
+ opts->maxidle, opts->minidle, opts->offtime,
+ opts->pktsize, opts->flags);
+ }
+ if (cl == NULL)
+ return (ENOMEM);
+
+ /* return handle to user space. */
+ cl->stats_.handle = a->qid;
+ cl->stats_.depth = cl->depth_;
+
+ /* save the allocated class */
+ cbqp->cbq_class_tbl[i] = cl;
+
+ if ((opts->flags & CBQCLF_CLASSMASK) == CBQCLF_DEFCLASS)
+ cbqp->ifnp.default_ = cl;
+
+ return (0);
+}
+
+int
+cbq_remove_queue(struct pf_altq *a)
+{
+ struct rm_class *cl;
+ cbq_state_t *cbqp;
+ int i;
+
+ if ((cbqp = a->altq_disc) == NULL)
+ return (EINVAL);
+
+ if ((cl = clh_to_clp(cbqp, a->qid)) == NULL)
+ return (EINVAL);
+
+ /* if we are a parent class, then return an error. */
+ if (is_a_parent_class(cl))
+ return (EINVAL);
+
+ /* delete the class */
+ rmc_delete_class(&cbqp->ifnp, cl);
+
+ /*
+ * free the class handle
+ */
+ for (i = 0; i < CBQ_MAX_CLASSES; i++)
+ if (cbqp->cbq_class_tbl[i] == cl) {
+ cbqp->cbq_class_tbl[i] = NULL;
+ if (cl == cbqp->ifnp.root_)
+ cbqp->ifnp.root_ = NULL;
+ if (cl == cbqp->ifnp.default_)
+ cbqp->ifnp.default_ = NULL;
+ break;
+ }
+
+ return (0);
+}
+
+int
+cbq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+ cbq_state_t *cbqp;
+ struct rm_class *cl;
+ class_stats_t stats;
+ int error = 0;
+
+ if ((cbqp = altq_lookup(a->ifname, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(cbqp, a->qid)) == NULL)
+ return (EINVAL);
+
+ if (*nbytes < sizeof(stats))
+ return (EINVAL);
+
+ get_class_stats(&stats, cl);
+
+ if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
+ return (error);
+ *nbytes = sizeof(stats);
+ return (0);
+}
+
+/*
+ * int
+ * cbq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pattr)
+ * - Queue data packets.
+ *
+ * cbq_enqueue is set to ifp->if_altqenqueue and called by an upper
+ * layer (e.g. ether_output). cbq_enqueue queues the given packet
+ * to the cbq, then invokes the driver's start routine.
+ *
+ * Assumptions: called in splimp
+ * Returns: 0 if the queueing is successful.
+ * ENOBUFS if a packet dropping occurred as a result of
+ * the queueing.
+ */
+
+static int
+cbq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
+{
+ cbq_state_t *cbqp = (cbq_state_t *)ifq->altq_disc;
+ struct rm_class *cl;
+ struct pf_mtag *t;
+ int len;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ /* grab class set by classifier */
+ if ((m->m_flags & M_PKTHDR) == 0) {
+ /* should not happen */
+ printf("altq: packet for %s does not have pkthdr\n",
+ ifq->altq_ifp->if_xname);
+ m_freem(m);
+ return (ENOBUFS);
+ }
+ cl = NULL;
+ if ((t = pf_find_mtag(m)) != NULL)
+ cl = clh_to_clp(cbqp, t->qid);
+#ifdef ALTQ3_COMPAT
+ else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL)
+ cl = pktattr->pattr_class;
+#endif
+ if (cl == NULL) {
+ cl = cbqp->ifnp.default_;
+ if (cl == NULL) {
+ m_freem(m);
+ return (ENOBUFS);
+ }
+ }
+#ifdef ALTQ3_COMPAT
+ if (pktattr != NULL)
+ cl->pktattr_ = pktattr; /* save proto hdr used by ECN */
+ else
+#endif
+ cl->pktattr_ = NULL;
+ len = m_pktlen(m);
+ if (rmc_queue_packet(cl, m) != 0) {
+ /* drop occurred. some mbuf was freed in rmc_queue_packet. */
+ PKTCNTR_ADD(&cl->stats_.drop_cnt, len);
+ return (ENOBUFS);
+ }
+
+ /* successfully queued. */
+ ++cbqp->cbq_qlen;
+ IFQ_INC_LEN(ifq);
+ return (0);
+}
+
+static struct mbuf *
+cbq_dequeue(struct ifaltq *ifq, int op)
+{
+ cbq_state_t *cbqp = (cbq_state_t *)ifq->altq_disc;
+ struct mbuf *m;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ m = rmc_dequeue_next(&cbqp->ifnp, op);
+
+ if (m && op == ALTDQ_REMOVE) {
+ --cbqp->cbq_qlen; /* decrement # of packets in cbq */
+ IFQ_DEC_LEN(ifq);
+
+ /* Update the class. */
+ rmc_update_class_util(&cbqp->ifnp);
+ }
+ return (m);
+}
+
+/*
+ * void
+ * cbqrestart(queue_t *) - Restart sending of data.
+ * called from rmc_restart in splimp via timeout after waking up
+ * a suspended class.
+ * Returns: NONE
+ */
+
+static void
+cbqrestart(struct ifaltq *ifq)
+{
+ cbq_state_t *cbqp;
+ struct ifnet *ifp;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ if (!ALTQ_IS_ENABLED(ifq))
+ /* cbq must have been detached */
+ return;
+
+ if ((cbqp = (cbq_state_t *)ifq->altq_disc) == NULL)
+ /* should not happen */
+ return;
+
+ ifp = ifq->altq_ifp;
+ if (ifp->if_start &&
+ cbqp->cbq_qlen > 0 && (ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
+ IFQ_UNLOCK(ifq);
+ (*ifp->if_start)(ifp);
+ IFQ_LOCK(ifq);
+ }
+}
+
+static void cbq_purge(cbq_state_t *cbqp)
+{
+ struct rm_class *cl;
+ int i;
+
+ for (i = 0; i < CBQ_MAX_CLASSES; i++)
+ if ((cl = cbqp->cbq_class_tbl[i]) != NULL)
+ rmc_dropall(cl);
+ if (ALTQ_IS_ENABLED(cbqp->ifnp.ifq_))
+ cbqp->ifnp.ifq_->ifq_len = 0;
+}
+#ifdef ALTQ3_COMPAT
+
+static int
+cbq_add_class(acp)
+ struct cbq_add_class *acp;
+{
+ char *ifacename;
+ struct rm_class *borrow, *parent;
+ cbq_state_t *cbqp;
+
+ ifacename = acp->cbq_iface.cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ /* check parameters */
+ if (acp->cbq_class.priority >= CBQ_MAXPRI ||
+ acp->cbq_class.maxq > CBQ_MAXQSIZE)
+ return (EINVAL);
+
+ /* Get pointers to parent and borrow classes. */
+ parent = clh_to_clp(cbqp, acp->cbq_class.parent_class_handle);
+ borrow = clh_to_clp(cbqp, acp->cbq_class.borrow_class_handle);
+
+ /*
+ * A class must borrow from it's parent or it can not
+ * borrow at all. Hence, borrow can be null.
+ */
+ if (parent == NULL && (acp->cbq_class.flags & CBQCLF_ROOTCLASS) == 0) {
+ printf("cbq_add_class: no parent class!\n");
+ return (EINVAL);
+ }
+
+ if ((borrow != parent) && (borrow != NULL)) {
+ printf("cbq_add_class: borrow class != parent\n");
+ return (EINVAL);
+ }
+
+ return cbq_class_create(cbqp, acp, parent, borrow);
+}
+
+static int
+cbq_delete_class(dcp)
+ struct cbq_delete_class *dcp;
+{
+ char *ifacename;
+ struct rm_class *cl;
+ cbq_state_t *cbqp;
+
+ ifacename = dcp->cbq_iface.cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(cbqp, dcp->cbq_class_handle)) == NULL)
+ return (EINVAL);
+
+ /* if we are a parent class, then return an error. */
+ if (is_a_parent_class(cl))
+ return (EINVAL);
+
+ /* if a filter has a reference to this class delete the filter */
+ acc_discard_filters(&cbqp->cbq_classifier, cl, 0);
+
+ return cbq_class_destroy(cbqp, cl);
+}
+
+static int
+cbq_modify_class(acp)
+ struct cbq_modify_class *acp;
+{
+ char *ifacename;
+ struct rm_class *cl;
+ cbq_state_t *cbqp;
+
+ ifacename = acp->cbq_iface.cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ /* Get pointer to this class */
+ if ((cl = clh_to_clp(cbqp, acp->cbq_class_handle)) == NULL)
+ return (EINVAL);
+
+ if (rmc_modclass(cl, acp->cbq_class.nano_sec_per_byte,
+ acp->cbq_class.maxq, acp->cbq_class.maxidle,
+ acp->cbq_class.minidle, acp->cbq_class.offtime,
+ acp->cbq_class.pktsize) < 0)
+ return (EINVAL);
+ return (0);
+}
+
+/*
+ * struct rm_class *
+ * cbq_class_create(cbq_mod_state_t *cbqp, struct cbq_add_class *acp,
+ * struct rm_class *parent, struct rm_class *borrow)
+ *
+ * This function create a new traffic class in the CBQ class hierarchy of
+ * given parameters. The class that created is either the root, default,
+ * or a new dynamic class. If CBQ is not initilaized, the root class
+ * will be created.
+ */
+static int
+cbq_class_create(cbqp, acp, parent, borrow)
+ cbq_state_t *cbqp;
+ struct cbq_add_class *acp;
+ struct rm_class *parent, *borrow;
+{
+ struct rm_class *cl;
+ cbq_class_spec_t *spec = &acp->cbq_class;
+ u_int32_t chandle;
+ int i;
+
+ /*
+ * allocate class handle
+ */
+ for (i = 1; i < CBQ_MAX_CLASSES; i++)
+ if (cbqp->cbq_class_tbl[i] == NULL)
+ break;
+ if (i == CBQ_MAX_CLASSES)
+ return (EINVAL);
+ chandle = i; /* use the slot number as class handle */
+
+ /*
+ * create a class. if this is a root class, initialize the
+ * interface.
+ */
+ if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_ROOTCLASS) {
+ rmc_init(cbqp->ifnp.ifq_, &cbqp->ifnp, spec->nano_sec_per_byte,
+ cbqrestart, spec->maxq, RM_MAXQUEUED,
+ spec->maxidle, spec->minidle, spec->offtime,
+ spec->flags);
+ cl = cbqp->ifnp.root_;
+ } else {
+ cl = rmc_newclass(spec->priority,
+ &cbqp->ifnp, spec->nano_sec_per_byte,
+ rmc_delay_action, spec->maxq, parent, borrow,
+ spec->maxidle, spec->minidle, spec->offtime,
+ spec->pktsize, spec->flags);
+ }
+ if (cl == NULL)
+ return (ENOMEM);
+
+ /* return handle to user space. */
+ acp->cbq_class_handle = chandle;
+
+ cl->stats_.handle = chandle;
+ cl->stats_.depth = cl->depth_;
+
+ /* save the allocated class */
+ cbqp->cbq_class_tbl[i] = cl;
+
+ if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_DEFCLASS)
+ cbqp->ifnp.default_ = cl;
+ if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_CTLCLASS)
+ cbqp->ifnp.ctl_ = cl;
+
+ return (0);
+}
+
+static int
+cbq_add_filter(afp)
+ struct cbq_add_filter *afp;
+{
+ char *ifacename;
+ cbq_state_t *cbqp;
+ struct rm_class *cl;
+
+ ifacename = afp->cbq_iface.cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ /* Get the pointer to class. */
+ if ((cl = clh_to_clp(cbqp, afp->cbq_class_handle)) == NULL)
+ return (EINVAL);
+
+ return acc_add_filter(&cbqp->cbq_classifier, &afp->cbq_filter,
+ cl, &afp->cbq_filter_handle);
+}
+
+static int
+cbq_delete_filter(dfp)
+ struct cbq_delete_filter *dfp;
+{
+ char *ifacename;
+ cbq_state_t *cbqp;
+
+ ifacename = dfp->cbq_iface.cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ return acc_delete_filter(&cbqp->cbq_classifier,
+ dfp->cbq_filter_handle);
+}
+
+/*
+ * cbq_clear_hierarchy deletes all classes and their filters on the
+ * given interface.
+ */
+static int
+cbq_clear_hierarchy(ifacep)
+ struct cbq_interface *ifacep;
+{
+ char *ifacename;
+ cbq_state_t *cbqp;
+
+ ifacename = ifacep->cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ return cbq_clear_interface(cbqp);
+}
+
+/*
+ * static int
+ * cbq_set_enable(struct cbq_enable *ep) - this function processed the
+ * ioctl request to enable class based queueing. It searches the list
+ * of interfaces for the specified interface and then enables CBQ on
+ * that interface.
+ *
+ * Returns: 0, for no error.
+ * EBADF, for specified inteface not found.
+ */
+
+static int
+cbq_set_enable(ep, enable)
+ struct cbq_interface *ep;
+ int enable;
+{
+ int error = 0;
+ cbq_state_t *cbqp;
+ char *ifacename;
+
+ ifacename = ep->cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ switch (enable) {
+ case ENABLE:
+ if (cbqp->ifnp.root_ == NULL || cbqp->ifnp.default_ == NULL ||
+ cbqp->ifnp.ctl_ == NULL) {
+ if (cbqp->ifnp.root_ == NULL)
+ printf("No Root Class for %s\n", ifacename);
+ if (cbqp->ifnp.default_ == NULL)
+ printf("No Default Class for %s\n", ifacename);
+ if (cbqp->ifnp.ctl_ == NULL)
+ printf("No Control Class for %s\n", ifacename);
+ error = EINVAL;
+ } else if ((error = altq_enable(cbqp->ifnp.ifq_)) == 0) {
+ cbqp->cbq_qlen = 0;
+ }
+ break;
+
+ case DISABLE:
+ error = altq_disable(cbqp->ifnp.ifq_);
+ break;
+ }
+ return (error);
+}
+
+static int
+cbq_getstats(gsp)
+ struct cbq_getstats *gsp;
+{
+ char *ifacename;
+ int i, n, nclasses;
+ cbq_state_t *cbqp;
+ struct rm_class *cl;
+ class_stats_t stats, *usp;
+ int error = 0;
+
+ ifacename = gsp->iface.cbq_ifacename;
+ nclasses = gsp->nclasses;
+ usp = gsp->stats;
+
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+ if (nclasses <= 0)
+ return (EINVAL);
+
+ for (n = 0, i = 0; n < nclasses && i < CBQ_MAX_CLASSES; n++, i++) {
+ while ((cl = cbqp->cbq_class_tbl[i]) == NULL)
+ if (++i >= CBQ_MAX_CLASSES)
+ goto out;
+
+ get_class_stats(&stats, cl);
+ stats.handle = cl->stats_.handle;
+
+ if ((error = copyout((caddr_t)&stats, (caddr_t)usp++,
+ sizeof(stats))) != 0)
+ return (error);
+ }
+
+ out:
+ gsp->nclasses = n;
+ return (error);
+}
+
+static int
+cbq_ifattach(ifacep)
+ struct cbq_interface *ifacep;
+{
+ int error = 0;
+ char *ifacename;
+ cbq_state_t *new_cbqp;
+ struct ifnet *ifp;
+
+ ifacename = ifacep->cbq_ifacename;
+ if ((ifp = ifunit(ifacename)) == NULL)
+ return (ENXIO);
+ if (!ALTQ_IS_READY(&ifp->if_snd))
+ return (ENXIO);
+
+ /* allocate and initialize cbq_state_t */
+ new_cbqp = malloc(sizeof(cbq_state_t), M_DEVBUF, M_WAITOK);
+ if (new_cbqp == NULL)
+ return (ENOMEM);
+ bzero(new_cbqp, sizeof(cbq_state_t));
+ CALLOUT_INIT(&new_cbqp->cbq_callout);
+
+ new_cbqp->cbq_qlen = 0;
+ new_cbqp->ifnp.ifq_ = &ifp->if_snd; /* keep the ifq */
+
+ /*
+ * set CBQ to this ifnet structure.
+ */
+ error = altq_attach(&ifp->if_snd, ALTQT_CBQ, new_cbqp,
+ cbq_enqueue, cbq_dequeue, cbq_request,
+ &new_cbqp->cbq_classifier, acc_classify);
+ if (error) {
+ free(new_cbqp, M_DEVBUF);
+ return (error);
+ }
+
+ /* prepend to the list of cbq_state_t's. */
+ new_cbqp->cbq_next = cbq_list;
+ cbq_list = new_cbqp;
+
+ return (0);
+}
+
+static int
+cbq_ifdetach(ifacep)
+ struct cbq_interface *ifacep;
+{
+ char *ifacename;
+ cbq_state_t *cbqp;
+
+ ifacename = ifacep->cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ (void)cbq_set_enable(ifacep, DISABLE);
+
+ cbq_clear_interface(cbqp);
+
+ /* remove CBQ from the ifnet structure. */
+ (void)altq_detach(cbqp->ifnp.ifq_);
+
+ /* remove from the list of cbq_state_t's. */
+ if (cbq_list == cbqp)
+ cbq_list = cbqp->cbq_next;
+ else {
+ cbq_state_t *cp;
+
+ for (cp = cbq_list; cp != NULL; cp = cp->cbq_next)
+ if (cp->cbq_next == cbqp) {
+ cp->cbq_next = cbqp->cbq_next;
+ break;
+ }
+ ASSERT(cp != NULL);
+ }
+
+ /* deallocate cbq_state_t */
+ free(cbqp, M_DEVBUF);
+
+ return (0);
+}
+
+/*
+ * cbq device interface
+ */
+
+altqdev_decl(cbq);
+
+int
+cbqopen(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ return (0);
+}
+
+int
+cbqclose(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ struct ifnet *ifp;
+ struct cbq_interface iface;
+ int err, error = 0;
+
+ while (cbq_list) {
+ ifp = cbq_list->ifnp.ifq_->altq_ifp;
+ sprintf(iface.cbq_ifacename, "%s", ifp->if_xname);
+ err = cbq_ifdetach(&iface);
+ if (err != 0 && error == 0)
+ error = err;
+ }
+
+ return (error);
+}
+
+int
+cbqioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ ioctlcmd_t cmd;
+ caddr_t addr;
+ int flag;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ int error = 0;
+
+ /* check cmd for superuser only */
+ switch (cmd) {
+ case CBQ_GETSTATS:
+ /* currently only command that an ordinary user can call */
+ break;
+ default:
+#if (__FreeBSD_version > 700000)
+ error = priv_check(p, PRIV_ALTQ_MANAGE);
+#elsif (__FreeBSD_version > 400000)
+ error = suser(p);
+#else
+ error = suser(p->p_ucred, &p->p_acflag);
+#endif
+ if (error)
+ return (error);
+ break;
+ }
+
+ switch (cmd) {
+
+ case CBQ_ENABLE:
+ error = cbq_set_enable((struct cbq_interface *)addr, ENABLE);
+ break;
+
+ case CBQ_DISABLE:
+ error = cbq_set_enable((struct cbq_interface *)addr, DISABLE);
+ break;
+
+ case CBQ_ADD_FILTER:
+ error = cbq_add_filter((struct cbq_add_filter *)addr);
+ break;
+
+ case CBQ_DEL_FILTER:
+ error = cbq_delete_filter((struct cbq_delete_filter *)addr);
+ break;
+
+ case CBQ_ADD_CLASS:
+ error = cbq_add_class((struct cbq_add_class *)addr);
+ break;
+
+ case CBQ_DEL_CLASS:
+ error = cbq_delete_class((struct cbq_delete_class *)addr);
+ break;
+
+ case CBQ_MODIFY_CLASS:
+ error = cbq_modify_class((struct cbq_modify_class *)addr);
+ break;
+
+ case CBQ_CLEAR_HIERARCHY:
+ error = cbq_clear_hierarchy((struct cbq_interface *)addr);
+ break;
+
+ case CBQ_IF_ATTACH:
+ error = cbq_ifattach((struct cbq_interface *)addr);
+ break;
+
+ case CBQ_IF_DETACH:
+ error = cbq_ifdetach((struct cbq_interface *)addr);
+ break;
+
+ case CBQ_GETSTATS:
+ error = cbq_getstats((struct cbq_getstats *)addr);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return error;
+}
+
+#if 0
+/* for debug */
+static void cbq_class_dump(int);
+
+static void cbq_class_dump(i)
+ int i;
+{
+ struct rm_class *cl;
+ rm_class_stats_t *s;
+ struct _class_queue_ *q;
+
+ if (cbq_list == NULL) {
+ printf("cbq_class_dump: no cbq_state found\n");
+ return;
+ }
+ cl = cbq_list->cbq_class_tbl[i];
+
+ printf("class %d cl=%p\n", i, cl);
+ if (cl != NULL) {
+ s = &cl->stats_;
+ q = cl->q_;
+
+ printf("pri=%d, depth=%d, maxrate=%d, allotment=%d\n",
+ cl->pri_, cl->depth_, cl->maxrate_, cl->allotment_);
+ printf("w_allotment=%d, bytes_alloc=%d, avgidle=%d, maxidle=%d\n",
+ cl->w_allotment_, cl->bytes_alloc_, cl->avgidle_,
+ cl->maxidle_);
+ printf("minidle=%d, offtime=%d, sleeping=%d, leaf=%d\n",
+ cl->minidle_, cl->offtime_, cl->sleeping_, cl->leaf_);
+ printf("handle=%d, depth=%d, packets=%d, bytes=%d\n",
+ s->handle, s->depth,
+ (int)s->xmit_cnt.packets, (int)s->xmit_cnt.bytes);
+ printf("over=%d\n, borrows=%d, drops=%d, overactions=%d, delays=%d\n",
+ s->over, s->borrows, (int)s->drop_cnt.packets,
+ s->overactions, s->delays);
+ printf("tail=%p, head=%p, qlen=%d, qlim=%d, qthresh=%d,qtype=%d\n",
+ q->tail_, q->head_, q->qlen_, q->qlim_,
+ q->qthresh_, q->qtype_);
+ }
+}
+#endif /* 0 */
+
+#ifdef KLD_MODULE
+
+static struct altqsw cbq_sw =
+ {"cbq", cbqopen, cbqclose, cbqioctl};
+
+ALTQ_MODULE(altq_cbq, ALTQT_CBQ, &cbq_sw);
+MODULE_DEPEND(altq_cbq, altq_red, 1, 1, 1);
+MODULE_DEPEND(altq_cbq, altq_rio, 1, 1, 1);
+
+#endif /* KLD_MODULE */
+#endif /* ALTQ3_COMPAT */
+
+#endif /* ALTQ_CBQ */
diff --git a/freebsd/sys/net/altq/altq_cbq.h b/freebsd/sys/net/altq/altq_cbq.h
new file mode 100644
index 00000000..51e7cf9a
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_cbq.h
@@ -0,0 +1,225 @@
+/*-
+ * Copyright (c) Sun Microsystems, Inc. 1993-1998 All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the SMCC Technology
+ * Development Group at Sun Microsystems, Inc.
+ *
+ * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE
+ * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE. The software is
+ * provided "as is" without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this software.
+ *
+ * $KAME: altq_cbq.h,v 1.12 2003/10/03 05:05:15 kjc Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _ALTQ_ALTQ_CBQ_H_
+#define _ALTQ_ALTQ_CBQ_H_
+
+#include <net/altq/altq.h>
+#include <net/altq/altq_rmclass.h>
+#include <net/altq/altq_codel.h>
+#include <net/altq/altq_red.h>
+#include <net/altq/altq_rio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define NULL_CLASS_HANDLE 0
+
+/* class flags should be same as class flags in rm_class.h */
+#define CBQCLF_RED 0x0001 /* use RED */
+#define CBQCLF_ECN 0x0002 /* use RED/ECN */
+#define CBQCLF_RIO 0x0004 /* use RIO */
+#define CBQCLF_FLOWVALVE 0x0008 /* use flowvalve (aka penalty-box) */
+#define CBQCLF_CLEARDSCP 0x0010 /* clear diffserv codepoint */
+#define CBQCLF_BORROW 0x0020 /* borrow from parent */
+#define CBQCLF_CODEL 0x0040 /* use CoDel */
+
+/* class flags only for root class */
+#define CBQCLF_WRR 0x0100 /* weighted-round robin */
+#define CBQCLF_EFFICIENT 0x0200 /* work-conserving */
+
+/* class flags for special classes */
+#define CBQCLF_ROOTCLASS 0x1000 /* root class */
+#define CBQCLF_DEFCLASS 0x2000 /* default class */
+#ifdef ALTQ3_COMPAT
+#define CBQCLF_CTLCLASS 0x4000 /* control class */
+#endif
+#define CBQCLF_CLASSMASK 0xf000 /* class mask */
+
+#define CBQ_MAXQSIZE 200
+#define CBQ_MAXPRI RM_MAXPRIO
+
+typedef struct _cbq_class_stats_ {
+ u_int32_t handle;
+ u_int depth;
+
+ struct pktcntr xmit_cnt; /* packets sent in this class */
+ struct pktcntr drop_cnt; /* dropped packets */
+ u_int over; /* # times went over limit */
+ u_int borrows; /* # times tried to borrow */
+ u_int overactions; /* # times invoked overlimit action */
+ u_int delays; /* # times invoked delay actions */
+
+ /* other static class parameters useful for debugging */
+ int priority;
+ int maxidle;
+ int minidle;
+ int offtime;
+ int qmax;
+ int ns_per_byte;
+ int wrr_allot;
+
+ int qcnt; /* # packets in queue */
+ int avgidle;
+
+ /* codel, red and rio related info */
+ int qtype;
+ struct redstats red[3];
+ struct codel_stats codel;
+} class_stats_t;
+
+#ifdef ALTQ3_COMPAT
+/*
+ * Define structures associated with IOCTLS for cbq.
+ */
+
+/*
+ * Define the CBQ interface structure. This must be included in all
+ * IOCTL's such that the CBQ driver may find the appropriate CBQ module
+ * associated with the network interface to be affected.
+ */
+struct cbq_interface {
+ char cbq_ifacename[IFNAMSIZ];
+};
+
+typedef struct cbq_class_spec {
+ u_int priority;
+ u_int nano_sec_per_byte;
+ u_int maxq;
+ u_int maxidle;
+ int minidle;
+ u_int offtime;
+ u_int32_t parent_class_handle;
+ u_int32_t borrow_class_handle;
+
+ u_int pktsize;
+ int flags;
+} cbq_class_spec_t;
+
+struct cbq_add_class {
+ struct cbq_interface cbq_iface;
+
+ cbq_class_spec_t cbq_class;
+ u_int32_t cbq_class_handle;
+};
+
+struct cbq_delete_class {
+ struct cbq_interface cbq_iface;
+ u_int32_t cbq_class_handle;
+};
+
+struct cbq_modify_class {
+ struct cbq_interface cbq_iface;
+
+ cbq_class_spec_t cbq_class;
+ u_int32_t cbq_class_handle;
+};
+
+struct cbq_add_filter {
+ struct cbq_interface cbq_iface;
+ u_int32_t cbq_class_handle;
+ struct flow_filter cbq_filter;
+
+ u_long cbq_filter_handle;
+};
+
+struct cbq_delete_filter {
+ struct cbq_interface cbq_iface;
+ u_long cbq_filter_handle;
+};
+
+/* number of classes are returned in nclasses field */
+struct cbq_getstats {
+ struct cbq_interface iface;
+ int nclasses;
+ class_stats_t *stats;
+};
+
+/*
+ * Define IOCTLs for CBQ.
+ */
+#define CBQ_IF_ATTACH _IOW('Q', 1, struct cbq_interface)
+#define CBQ_IF_DETACH _IOW('Q', 2, struct cbq_interface)
+#define CBQ_ENABLE _IOW('Q', 3, struct cbq_interface)
+#define CBQ_DISABLE _IOW('Q', 4, struct cbq_interface)
+#define CBQ_CLEAR_HIERARCHY _IOW('Q', 5, struct cbq_interface)
+#define CBQ_ADD_CLASS _IOWR('Q', 7, struct cbq_add_class)
+#define CBQ_DEL_CLASS _IOW('Q', 8, struct cbq_delete_class)
+#define CBQ_MODIFY_CLASS _IOWR('Q', 9, struct cbq_modify_class)
+#define CBQ_ADD_FILTER _IOWR('Q', 10, struct cbq_add_filter)
+#define CBQ_DEL_FILTER _IOW('Q', 11, struct cbq_delete_filter)
+#define CBQ_GETSTATS _IOWR('Q', 12, struct cbq_getstats)
+#endif /* ALTQ3_COMPAT */
+
+#ifdef _KERNEL
+/*
+ * Define macros only good for kernel drivers and modules.
+ */
+#define CBQ_WATCHDOG (hz / 20)
+#define CBQ_TIMEOUT 10
+#define CBQ_LS_TIMEOUT (20 * hz / 1000)
+
+#define CBQ_MAX_CLASSES 256
+
+#ifdef ALTQ3_COMPAT
+#define CBQ_MAX_FILTERS 256
+
+#define DISABLE 0x00
+#define ENABLE 0x01
+#endif /* ALTQ3_COMPAT */
+
+/*
+ * Define State structures.
+ */
+typedef struct cbqstate {
+#ifdef ALTQ3_COMPAT
+ struct cbqstate *cbq_next;
+#endif
+ int cbq_qlen; /* # of packets in cbq */
+ struct rm_class *cbq_class_tbl[CBQ_MAX_CLASSES];
+
+ struct rm_ifdat ifnp;
+ struct callout cbq_callout; /* for timeouts */
+#ifdef ALTQ3_CLFIER_COMPAT
+ struct acc_classifier cbq_classifier;
+#endif
+} cbq_state_t;
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !_ALTQ_ALTQ_CBQ_H_ */
diff --git a/freebsd/sys/net/altq/altq_cdnr.c b/freebsd/sys/net/altq/altq_cdnr.c
new file mode 100644
index 00000000..f456ce83
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_cdnr.c
@@ -0,0 +1,1384 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (C) 1999-2002
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq_cdnr.c,v 1.15 2005/04/13 03:44:24 suz Exp $
+ * $FreeBSD$
+ */
+
+#include <rtems/bsd/local/opt_altq.h>
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <rtems/bsd/sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+
+#include <net/altq/if_altq.h>
+#include <net/altq/altq.h>
+#ifdef ALTQ3_COMPAT
+#include <net/altq/altq_conf.h>
+#endif
+#include <net/altq/altq_cdnr.h>
+
+#ifdef ALTQ3_COMPAT
+/*
+ * diffserv traffic conditioning module
+ */
+
+int altq_cdnr_enabled = 0;
+
+/* traffic conditioner is enabled by ALTQ_CDNR option in opt_altq.h */
+#ifdef ALTQ_CDNR
+
+/* cdnr_list keeps all cdnr's allocated. */
+static LIST_HEAD(, top_cdnr) tcb_list;
+
+static int altq_cdnr_input(struct mbuf *, int);
+static struct top_cdnr *tcb_lookup(char *ifname);
+static struct cdnr_block *cdnr_handle2cb(u_long);
+static u_long cdnr_cb2handle(struct cdnr_block *);
+static void *cdnr_cballoc(struct top_cdnr *, int,
+ struct tc_action *(*)(struct cdnr_block *, struct cdnr_pktinfo *));
+static void cdnr_cbdestroy(void *);
+static int tca_verify_action(struct tc_action *);
+static void tca_import_action(struct tc_action *, struct tc_action *);
+static void tca_invalidate_action(struct tc_action *);
+
+static int generic_element_destroy(struct cdnr_block *);
+static struct top_cdnr *top_create(struct ifaltq *);
+static int top_destroy(struct top_cdnr *);
+static struct cdnr_block *element_create(struct top_cdnr *, struct tc_action *);
+static int element_destroy(struct cdnr_block *);
+static void tb_import_profile(struct tbe *, struct tb_profile *);
+static struct tbmeter *tbm_create(struct top_cdnr *, struct tb_profile *,
+ struct tc_action *, struct tc_action *);
+static int tbm_destroy(struct tbmeter *);
+static struct tc_action *tbm_input(struct cdnr_block *, struct cdnr_pktinfo *);
+static struct trtcm *trtcm_create(struct top_cdnr *,
+ struct tb_profile *, struct tb_profile *,
+ struct tc_action *, struct tc_action *, struct tc_action *,
+ int);
+static int trtcm_destroy(struct trtcm *);
+static struct tc_action *trtcm_input(struct cdnr_block *, struct cdnr_pktinfo *);
+static struct tswtcm *tswtcm_create(struct top_cdnr *,
+ u_int32_t, u_int32_t, u_int32_t,
+ struct tc_action *, struct tc_action *, struct tc_action *);
+static int tswtcm_destroy(struct tswtcm *);
+static struct tc_action *tswtcm_input(struct cdnr_block *, struct cdnr_pktinfo *);
+
+static int cdnrcmd_if_attach(char *);
+static int cdnrcmd_if_detach(char *);
+static int cdnrcmd_add_element(struct cdnr_add_element *);
+static int cdnrcmd_delete_element(struct cdnr_delete_element *);
+static int cdnrcmd_add_filter(struct cdnr_add_filter *);
+static int cdnrcmd_delete_filter(struct cdnr_delete_filter *);
+static int cdnrcmd_add_tbm(struct cdnr_add_tbmeter *);
+static int cdnrcmd_modify_tbm(struct cdnr_modify_tbmeter *);
+static int cdnrcmd_tbm_stats(struct cdnr_tbmeter_stats *);
+static int cdnrcmd_add_trtcm(struct cdnr_add_trtcm *);
+static int cdnrcmd_modify_trtcm(struct cdnr_modify_trtcm *);
+static int cdnrcmd_tcm_stats(struct cdnr_tcm_stats *);
+static int cdnrcmd_add_tswtcm(struct cdnr_add_tswtcm *);
+static int cdnrcmd_modify_tswtcm(struct cdnr_modify_tswtcm *);
+static int cdnrcmd_get_stats(struct cdnr_get_stats *);
+
+altqdev_decl(cdnr);
+
+/*
+ * top level input function called from ip_input.
+ * should be called before converting header fields to host-byte-order.
+ */
+int
+altq_cdnr_input(m, af)
+ struct mbuf *m;
+ int af; /* address family */
+{
+ struct ifnet *ifp;
+ struct ip *ip;
+ struct top_cdnr *top;
+ struct tc_action *tca;
+ struct cdnr_block *cb;
+ struct cdnr_pktinfo pktinfo;
+
+ ifp = m->m_pkthdr.rcvif;
+ if (!ALTQ_IS_CNDTNING(&ifp->if_snd))
+ /* traffic conditioner is not enabled on this interface */
+ return (1);
+
+ top = ifp->if_snd.altq_cdnr;
+
+ ip = mtod(m, struct ip *);
+#ifdef INET6
+ if (af == AF_INET6) {
+ u_int32_t flowlabel;
+
+ flowlabel = ((struct ip6_hdr *)ip)->ip6_flow;
+ pktinfo.pkt_dscp = (ntohl(flowlabel) >> 20) & DSCP_MASK;
+ } else
+#endif
+ pktinfo.pkt_dscp = ip->ip_tos & DSCP_MASK;
+ pktinfo.pkt_len = m_pktlen(m);
+
+ tca = NULL;
+
+ cb = acc_classify(&top->tc_classifier, m, af);
+ if (cb != NULL)
+ tca = &cb->cb_action;
+
+ if (tca == NULL)
+ tca = &top->tc_block.cb_action;
+
+ while (1) {
+ PKTCNTR_ADD(&top->tc_cnts[tca->tca_code], pktinfo.pkt_len);
+
+ switch (tca->tca_code) {
+ case TCACODE_PASS:
+ return (1);
+ case TCACODE_DROP:
+ m_freem(m);
+ return (0);
+ case TCACODE_RETURN:
+ return (0);
+ case TCACODE_MARK:
+#ifdef INET6
+ if (af == AF_INET6) {
+ struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
+ u_int32_t flowlabel;
+
+ flowlabel = ntohl(ip6->ip6_flow);
+ flowlabel = (tca->tca_dscp << 20) |
+ (flowlabel & ~(DSCP_MASK << 20));
+ ip6->ip6_flow = htonl(flowlabel);
+ } else
+#endif
+ ip->ip_tos = tca->tca_dscp |
+ (ip->ip_tos & DSCP_CUMASK);
+ return (1);
+ case TCACODE_NEXT:
+ cb = tca->tca_next;
+ tca = (*cb->cb_input)(cb, &pktinfo);
+ break;
+ case TCACODE_NONE:
+ default:
+ return (1);
+ }
+ }
+}
+
+static struct top_cdnr *
+tcb_lookup(ifname)
+ char *ifname;
+{
+ struct top_cdnr *top;
+ struct ifnet *ifp;
+
+ if ((ifp = ifunit(ifname)) != NULL)
+ LIST_FOREACH(top, &tcb_list, tc_next)
+ if (top->tc_ifq->altq_ifp == ifp)
+ return (top);
+ return (NULL);
+}
+
+static struct cdnr_block *
+cdnr_handle2cb(handle)
+ u_long handle;
+{
+ struct cdnr_block *cb;
+
+ cb = (struct cdnr_block *)handle;
+ if (handle != ALIGN(cb))
+ return (NULL);
+
+ if (cb == NULL || cb->cb_handle != handle)
+ return (NULL);
+ return (cb);
+}
+
+static u_long
+cdnr_cb2handle(cb)
+ struct cdnr_block *cb;
+{
+ return (cb->cb_handle);
+}
+
+static void *
+cdnr_cballoc(top, type, input_func)
+ struct top_cdnr *top;
+ int type;
+ struct tc_action *(*input_func)(struct cdnr_block *,
+ struct cdnr_pktinfo *);
+{
+ struct cdnr_block *cb;
+ int size;
+
+ switch (type) {
+ case TCETYPE_TOP:
+ size = sizeof(struct top_cdnr);
+ break;
+ case TCETYPE_ELEMENT:
+ size = sizeof(struct cdnr_block);
+ break;
+ case TCETYPE_TBMETER:
+ size = sizeof(struct tbmeter);
+ break;
+ case TCETYPE_TRTCM:
+ size = sizeof(struct trtcm);
+ break;
+ case TCETYPE_TSWTCM:
+ size = sizeof(struct tswtcm);
+ break;
+ default:
+ return (NULL);
+ }
+
+ cb = malloc(size, M_DEVBUF, M_WAITOK);
+ if (cb == NULL)
+ return (NULL);
+ bzero(cb, size);
+
+ cb->cb_len = size;
+ cb->cb_type = type;
+ cb->cb_ref = 0;
+ cb->cb_handle = (u_long)cb;
+ if (top == NULL)
+ cb->cb_top = (struct top_cdnr *)cb;
+ else
+ cb->cb_top = top;
+
+ if (input_func != NULL) {
+ /*
+ * if this cdnr has an action function,
+ * make tc_action to call itself.
+ */
+ cb->cb_action.tca_code = TCACODE_NEXT;
+ cb->cb_action.tca_next = cb;
+ cb->cb_input = input_func;
+ } else
+ cb->cb_action.tca_code = TCACODE_NONE;
+
+ /* if this isn't top, register the element to the top level cdnr */
+ if (top != NULL)
+ LIST_INSERT_HEAD(&top->tc_elements, cb, cb_next);
+
+ return ((void *)cb);
+}
+
+static void
+cdnr_cbdestroy(cblock)
+ void *cblock;
+{
+ struct cdnr_block *cb = cblock;
+
+ /* delete filters belonging to this cdnr */
+ acc_discard_filters(&cb->cb_top->tc_classifier, cb, 0);
+
+ /* remove from the top level cdnr */
+ if (cb->cb_top != cblock)
+ LIST_REMOVE(cb, cb_next);
+
+ free(cb, M_DEVBUF);
+}
+
+/*
+ * conditioner common destroy routine
+ */
+static int
+generic_element_destroy(cb)
+ struct cdnr_block *cb;
+{
+ int error = 0;
+
+ switch (cb->cb_type) {
+ case TCETYPE_TOP:
+ error = top_destroy((struct top_cdnr *)cb);
+ break;
+ case TCETYPE_ELEMENT:
+ error = element_destroy(cb);
+ break;
+ case TCETYPE_TBMETER:
+ error = tbm_destroy((struct tbmeter *)cb);
+ break;
+ case TCETYPE_TRTCM:
+ error = trtcm_destroy((struct trtcm *)cb);
+ break;
+ case TCETYPE_TSWTCM:
+ error = tswtcm_destroy((struct tswtcm *)cb);
+ break;
+ default:
+ error = EINVAL;
+ }
+ return (error);
+}
+
+static int
+tca_verify_action(utca)
+ struct tc_action *utca;
+{
+ switch (utca->tca_code) {
+ case TCACODE_PASS:
+ case TCACODE_DROP:
+ case TCACODE_MARK:
+ /* these are ok */
+ break;
+
+ case TCACODE_HANDLE:
+ /* verify handle value */
+ if (cdnr_handle2cb(utca->tca_handle) == NULL)
+ return (-1);
+ break;
+
+ case TCACODE_NONE:
+ case TCACODE_RETURN:
+ case TCACODE_NEXT:
+ default:
+ /* should not be passed from a user */
+ return (-1);
+ }
+ return (0);
+}
+
+static void
+tca_import_action(ktca, utca)
+ struct tc_action *ktca, *utca;
+{
+ struct cdnr_block *cb;
+
+ *ktca = *utca;
+ if (ktca->tca_code == TCACODE_HANDLE) {
+ cb = cdnr_handle2cb(ktca->tca_handle);
+ if (cb == NULL) {
+ ktca->tca_code = TCACODE_NONE;
+ return;
+ }
+ ktca->tca_code = TCACODE_NEXT;
+ ktca->tca_next = cb;
+ cb->cb_ref++;
+ } else if (ktca->tca_code == TCACODE_MARK) {
+ ktca->tca_dscp &= DSCP_MASK;
+ }
+ return;
+}
+
+static void
+tca_invalidate_action(tca)
+ struct tc_action *tca;
+{
+ struct cdnr_block *cb;
+
+ if (tca->tca_code == TCACODE_NEXT) {
+ cb = tca->tca_next;
+ if (cb == NULL)
+ return;
+ cb->cb_ref--;
+ }
+ tca->tca_code = TCACODE_NONE;
+}
+
+/*
+ * top level traffic conditioner
+ */
+static struct top_cdnr *
+top_create(ifq)
+ struct ifaltq *ifq;
+{
+ struct top_cdnr *top;
+
+ if ((top = cdnr_cballoc(NULL, TCETYPE_TOP, NULL)) == NULL)
+ return (NULL);
+
+ top->tc_ifq = ifq;
+ /* set default action for the top level conditioner */
+ top->tc_block.cb_action.tca_code = TCACODE_PASS;
+
+ LIST_INSERT_HEAD(&tcb_list, top, tc_next);
+
+ ifq->altq_cdnr = top;
+
+ return (top);
+}
+
+static int
+top_destroy(top)
+ struct top_cdnr *top;
+{
+ struct cdnr_block *cb;
+
+ if (ALTQ_IS_CNDTNING(top->tc_ifq))
+ ALTQ_CLEAR_CNDTNING(top->tc_ifq);
+ top->tc_ifq->altq_cdnr = NULL;
+
+ /*
+ * destroy all the conditioner elements belonging to this interface
+ */
+ while ((cb = LIST_FIRST(&top->tc_elements)) != NULL) {
+ while (cb != NULL && cb->cb_ref > 0)
+ cb = LIST_NEXT(cb, cb_next);
+ if (cb != NULL)
+ generic_element_destroy(cb);
+ }
+
+ LIST_REMOVE(top, tc_next);
+
+ cdnr_cbdestroy(top);
+
+ /* if there is no active conditioner, remove the input hook */
+ if (altq_input != NULL) {
+ LIST_FOREACH(top, &tcb_list, tc_next)
+ if (ALTQ_IS_CNDTNING(top->tc_ifq))
+ break;
+ if (top == NULL)
+ altq_input = NULL;
+ }
+
+ return (0);
+}
+
+/*
+ * simple tc elements without input function (e.g., dropper and makers).
+ */
+static struct cdnr_block *
+element_create(top, action)
+ struct top_cdnr *top;
+ struct tc_action *action;
+{
+ struct cdnr_block *cb;
+
+ if (tca_verify_action(action) < 0)
+ return (NULL);
+
+ if ((cb = cdnr_cballoc(top, TCETYPE_ELEMENT, NULL)) == NULL)
+ return (NULL);
+
+ tca_import_action(&cb->cb_action, action);
+
+ return (cb);
+}
+
+static int
+element_destroy(cb)
+ struct cdnr_block *cb;
+{
+ if (cb->cb_ref > 0)
+ return (EBUSY);
+
+ tca_invalidate_action(&cb->cb_action);
+
+ cdnr_cbdestroy(cb);
+ return (0);
+}
+
+/*
+ * internal representation of token bucket parameters
+ * rate: byte_per_unittime << 32
+ * (((bits_per_sec) / 8) << 32) / machclk_freq
+ * depth: byte << 32
+ *
+ */
+#define TB_SHIFT 32
+#define TB_SCALE(x) ((u_int64_t)(x) << TB_SHIFT)
+#define TB_UNSCALE(x) ((x) >> TB_SHIFT)
+
+static void
+tb_import_profile(tb, profile)
+ struct tbe *tb;
+ struct tb_profile *profile;
+{
+ tb->rate = TB_SCALE(profile->rate / 8) / machclk_freq;
+ tb->depth = TB_SCALE(profile->depth);
+ if (tb->rate > 0)
+ tb->filluptime = tb->depth / tb->rate;
+ else
+ tb->filluptime = 0xffffffffffffffffLL;
+ tb->token = tb->depth;
+ tb->last = read_machclk();
+}
+
+/*
+ * simple token bucket meter
+ */
+static struct tbmeter *
+tbm_create(top, profile, in_action, out_action)
+ struct top_cdnr *top;
+ struct tb_profile *profile;
+ struct tc_action *in_action, *out_action;
+{
+ struct tbmeter *tbm = NULL;
+
+ if (tca_verify_action(in_action) < 0
+ || tca_verify_action(out_action) < 0)
+ return (NULL);
+
+ if ((tbm = cdnr_cballoc(top, TCETYPE_TBMETER,
+ tbm_input)) == NULL)
+ return (NULL);
+
+ tb_import_profile(&tbm->tb, profile);
+
+ tca_import_action(&tbm->in_action, in_action);
+ tca_import_action(&tbm->out_action, out_action);
+
+ return (tbm);
+}
+
+static int
+tbm_destroy(tbm)
+ struct tbmeter *tbm;
+{
+ if (tbm->cdnrblk.cb_ref > 0)
+ return (EBUSY);
+
+ tca_invalidate_action(&tbm->in_action);
+ tca_invalidate_action(&tbm->out_action);
+
+ cdnr_cbdestroy(tbm);
+ return (0);
+}
+
+static struct tc_action *
+tbm_input(cb, pktinfo)
+ struct cdnr_block *cb;
+ struct cdnr_pktinfo *pktinfo;
+{
+ struct tbmeter *tbm = (struct tbmeter *)cb;
+ u_int64_t len;
+ u_int64_t interval, now;
+
+ len = TB_SCALE(pktinfo->pkt_len);
+
+ if (tbm->tb.token < len) {
+ now = read_machclk();
+ interval = now - tbm->tb.last;
+ if (interval >= tbm->tb.filluptime)
+ tbm->tb.token = tbm->tb.depth;
+ else {
+ tbm->tb.token += interval * tbm->tb.rate;
+ if (tbm->tb.token > tbm->tb.depth)
+ tbm->tb.token = tbm->tb.depth;
+ }
+ tbm->tb.last = now;
+ }
+
+ if (tbm->tb.token < len) {
+ PKTCNTR_ADD(&tbm->out_cnt, pktinfo->pkt_len);
+ return (&tbm->out_action);
+ }
+
+ tbm->tb.token -= len;
+ PKTCNTR_ADD(&tbm->in_cnt, pktinfo->pkt_len);
+ return (&tbm->in_action);
+}
+
+/*
+ * two rate three color marker
+ * as described in draft-heinanen-diffserv-trtcm-01.txt
+ */
+static struct trtcm *
+trtcm_create(top, cmtd_profile, peak_profile,
+ green_action, yellow_action, red_action, coloraware)
+ struct top_cdnr *top;
+ struct tb_profile *cmtd_profile, *peak_profile;
+ struct tc_action *green_action, *yellow_action, *red_action;
+ int coloraware;
+{
+ struct trtcm *tcm = NULL;
+
+ if (tca_verify_action(green_action) < 0
+ || tca_verify_action(yellow_action) < 0
+ || tca_verify_action(red_action) < 0)
+ return (NULL);
+
+ if ((tcm = cdnr_cballoc(top, TCETYPE_TRTCM,
+ trtcm_input)) == NULL)
+ return (NULL);
+
+ tb_import_profile(&tcm->cmtd_tb, cmtd_profile);
+ tb_import_profile(&tcm->peak_tb, peak_profile);
+
+ tca_import_action(&tcm->green_action, green_action);
+ tca_import_action(&tcm->yellow_action, yellow_action);
+ tca_import_action(&tcm->red_action, red_action);
+
+ /* set dscps to use */
+ if (tcm->green_action.tca_code == TCACODE_MARK)
+ tcm->green_dscp = tcm->green_action.tca_dscp & DSCP_MASK;
+ else
+ tcm->green_dscp = DSCP_AF11;
+ if (tcm->yellow_action.tca_code == TCACODE_MARK)
+ tcm->yellow_dscp = tcm->yellow_action.tca_dscp & DSCP_MASK;
+ else
+ tcm->yellow_dscp = DSCP_AF12;
+ if (tcm->red_action.tca_code == TCACODE_MARK)
+ tcm->red_dscp = tcm->red_action.tca_dscp & DSCP_MASK;
+ else
+ tcm->red_dscp = DSCP_AF13;
+
+ tcm->coloraware = coloraware;
+
+ return (tcm);
+}
+
+static int
+trtcm_destroy(tcm)
+ struct trtcm *tcm;
+{
+ if (tcm->cdnrblk.cb_ref > 0)
+ return (EBUSY);
+
+ tca_invalidate_action(&tcm->green_action);
+ tca_invalidate_action(&tcm->yellow_action);
+ tca_invalidate_action(&tcm->red_action);
+
+ cdnr_cbdestroy(tcm);
+ return (0);
+}
+
+static struct tc_action *
+trtcm_input(cb, pktinfo)
+ struct cdnr_block *cb;
+ struct cdnr_pktinfo *pktinfo;
+{
+ struct trtcm *tcm = (struct trtcm *)cb;
+ u_int64_t len;
+ u_int64_t interval, now;
+ u_int8_t color;
+
+ len = TB_SCALE(pktinfo->pkt_len);
+ if (tcm->coloraware) {
+ color = pktinfo->pkt_dscp;
+ if (color != tcm->yellow_dscp && color != tcm->red_dscp)
+ color = tcm->green_dscp;
+ } else {
+ /* if color-blind, precolor it as green */
+ color = tcm->green_dscp;
+ }
+
+ now = read_machclk();
+ if (tcm->cmtd_tb.token < len) {
+ interval = now - tcm->cmtd_tb.last;
+ if (interval >= tcm->cmtd_tb.filluptime)
+ tcm->cmtd_tb.token = tcm->cmtd_tb.depth;
+ else {
+ tcm->cmtd_tb.token += interval * tcm->cmtd_tb.rate;
+ if (tcm->cmtd_tb.token > tcm->cmtd_tb.depth)
+ tcm->cmtd_tb.token = tcm->cmtd_tb.depth;
+ }
+ tcm->cmtd_tb.last = now;
+ }
+ if (tcm->peak_tb.token < len) {
+ interval = now - tcm->peak_tb.last;
+ if (interval >= tcm->peak_tb.filluptime)
+ tcm->peak_tb.token = tcm->peak_tb.depth;
+ else {
+ tcm->peak_tb.token += interval * tcm->peak_tb.rate;
+ if (tcm->peak_tb.token > tcm->peak_tb.depth)
+ tcm->peak_tb.token = tcm->peak_tb.depth;
+ }
+ tcm->peak_tb.last = now;
+ }
+
+ if (color == tcm->red_dscp || tcm->peak_tb.token < len) {
+ pktinfo->pkt_dscp = tcm->red_dscp;
+ PKTCNTR_ADD(&tcm->red_cnt, pktinfo->pkt_len);
+ return (&tcm->red_action);
+ }
+
+ if (color == tcm->yellow_dscp || tcm->cmtd_tb.token < len) {
+ pktinfo->pkt_dscp = tcm->yellow_dscp;
+ tcm->peak_tb.token -= len;
+ PKTCNTR_ADD(&tcm->yellow_cnt, pktinfo->pkt_len);
+ return (&tcm->yellow_action);
+ }
+
+ pktinfo->pkt_dscp = tcm->green_dscp;
+ tcm->cmtd_tb.token -= len;
+ tcm->peak_tb.token -= len;
+ PKTCNTR_ADD(&tcm->green_cnt, pktinfo->pkt_len);
+ return (&tcm->green_action);
+}
+
+/*
+ * time sliding window three color marker
+ * as described in draft-fang-diffserv-tc-tswtcm-00.txt
+ */
+static struct tswtcm *
+tswtcm_create(top, cmtd_rate, peak_rate, avg_interval,
+ green_action, yellow_action, red_action)
+ struct top_cdnr *top;
+ u_int32_t cmtd_rate, peak_rate, avg_interval;
+ struct tc_action *green_action, *yellow_action, *red_action;
+{
+ struct tswtcm *tsw;
+
+ if (tca_verify_action(green_action) < 0
+ || tca_verify_action(yellow_action) < 0
+ || tca_verify_action(red_action) < 0)
+ return (NULL);
+
+ if ((tsw = cdnr_cballoc(top, TCETYPE_TSWTCM,
+ tswtcm_input)) == NULL)
+ return (NULL);
+
+ tca_import_action(&tsw->green_action, green_action);
+ tca_import_action(&tsw->yellow_action, yellow_action);
+ tca_import_action(&tsw->red_action, red_action);
+
+ /* set dscps to use */
+ if (tsw->green_action.tca_code == TCACODE_MARK)
+ tsw->green_dscp = tsw->green_action.tca_dscp & DSCP_MASK;
+ else
+ tsw->green_dscp = DSCP_AF11;
+ if (tsw->yellow_action.tca_code == TCACODE_MARK)
+ tsw->yellow_dscp = tsw->yellow_action.tca_dscp & DSCP_MASK;
+ else
+ tsw->yellow_dscp = DSCP_AF12;
+ if (tsw->red_action.tca_code == TCACODE_MARK)
+ tsw->red_dscp = tsw->red_action.tca_dscp & DSCP_MASK;
+ else
+ tsw->red_dscp = DSCP_AF13;
+
+ /* convert rates from bits/sec to bytes/sec */
+ tsw->cmtd_rate = cmtd_rate / 8;
+ tsw->peak_rate = peak_rate / 8;
+ tsw->avg_rate = 0;
+
+ /* timewin is converted from msec to machine clock unit */
+ tsw->timewin = (u_int64_t)machclk_freq * avg_interval / 1000;
+
+ return (tsw);
+}
+
+static int
+tswtcm_destroy(tsw)
+ struct tswtcm *tsw;
+{
+ if (tsw->cdnrblk.cb_ref > 0)
+ return (EBUSY);
+
+ tca_invalidate_action(&tsw->green_action);
+ tca_invalidate_action(&tsw->yellow_action);
+ tca_invalidate_action(&tsw->red_action);
+
+ cdnr_cbdestroy(tsw);
+ return (0);
+}
+
+static struct tc_action *
+tswtcm_input(cb, pktinfo)
+ struct cdnr_block *cb;
+ struct cdnr_pktinfo *pktinfo;
+{
+ struct tswtcm *tsw = (struct tswtcm *)cb;
+ int len;
+ u_int32_t avg_rate;
+ u_int64_t interval, now, tmp;
+
+ /*
+ * rate estimator
+ */
+ len = pktinfo->pkt_len;
+ now = read_machclk();
+
+ interval = now - tsw->t_front;
+ /*
+ * calculate average rate:
+ * avg = (avg * timewin + pkt_len)/(timewin + interval)
+ * pkt_len needs to be multiplied by machclk_freq in order to
+ * get (bytes/sec).
+ * note: when avg_rate (bytes/sec) and timewin (machclk unit) are
+ * less than 32 bits, the following 64-bit operation has enough
+ * precision.
+ */
+ tmp = ((u_int64_t)tsw->avg_rate * tsw->timewin
+ + (u_int64_t)len * machclk_freq) / (tsw->timewin + interval);
+ tsw->avg_rate = avg_rate = (u_int32_t)tmp;
+ tsw->t_front = now;
+
+ /*
+ * marker
+ */
+ if (avg_rate > tsw->cmtd_rate) {
+ u_int32_t randval = arc4random() % avg_rate;
+
+ if (avg_rate > tsw->peak_rate) {
+ if (randval < avg_rate - tsw->peak_rate) {
+ /* mark red */
+ pktinfo->pkt_dscp = tsw->red_dscp;
+ PKTCNTR_ADD(&tsw->red_cnt, len);
+ return (&tsw->red_action);
+ } else if (randval < avg_rate - tsw->cmtd_rate)
+ goto mark_yellow;
+ } else {
+ /* peak_rate >= avg_rate > cmtd_rate */
+ if (randval < avg_rate - tsw->cmtd_rate) {
+ mark_yellow:
+ pktinfo->pkt_dscp = tsw->yellow_dscp;
+ PKTCNTR_ADD(&tsw->yellow_cnt, len);
+ return (&tsw->yellow_action);
+ }
+ }
+ }
+
+ /* mark green */
+ pktinfo->pkt_dscp = tsw->green_dscp;
+ PKTCNTR_ADD(&tsw->green_cnt, len);
+ return (&tsw->green_action);
+}
+
+/*
+ * ioctl requests
+ */
+static int
+cdnrcmd_if_attach(ifname)
+ char *ifname;
+{
+ struct ifnet *ifp;
+ struct top_cdnr *top;
+
+ if ((ifp = ifunit(ifname)) == NULL)
+ return (EBADF);
+
+ if (ifp->if_snd.altq_cdnr != NULL)
+ return (EBUSY);
+
+ if ((top = top_create(&ifp->if_snd)) == NULL)
+ return (ENOMEM);
+ return (0);
+}
+
+static int
+cdnrcmd_if_detach(ifname)
+ char *ifname;
+{
+ struct top_cdnr *top;
+
+ if ((top = tcb_lookup(ifname)) == NULL)
+ return (EBADF);
+
+ return top_destroy(top);
+}
+
+static int
+cdnrcmd_add_element(ap)
+ struct cdnr_add_element *ap;
+{
+ struct top_cdnr *top;
+ struct cdnr_block *cb;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ cb = element_create(top, &ap->action);
+ if (cb == NULL)
+ return (EINVAL);
+ /* return a class handle to the user */
+ ap->cdnr_handle = cdnr_cb2handle(cb);
+ return (0);
+}
+
+static int
+cdnrcmd_delete_element(ap)
+ struct cdnr_delete_element *ap;
+{
+ struct top_cdnr *top;
+ struct cdnr_block *cb;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+ return (EINVAL);
+
+ if (cb->cb_type != TCETYPE_ELEMENT)
+ return generic_element_destroy(cb);
+
+ return element_destroy(cb);
+}
+
+static int
+cdnrcmd_add_filter(ap)
+ struct cdnr_add_filter *ap;
+{
+ struct top_cdnr *top;
+ struct cdnr_block *cb;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+ return (EINVAL);
+
+ return acc_add_filter(&top->tc_classifier, &ap->filter,
+ cb, &ap->filter_handle);
+}
+
+static int
+cdnrcmd_delete_filter(ap)
+ struct cdnr_delete_filter *ap;
+{
+ struct top_cdnr *top;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ return acc_delete_filter(&top->tc_classifier, ap->filter_handle);
+}
+
+static int
+cdnrcmd_add_tbm(ap)
+ struct cdnr_add_tbmeter *ap;
+{
+ struct top_cdnr *top;
+ struct tbmeter *tbm;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ tbm = tbm_create(top, &ap->profile, &ap->in_action, &ap->out_action);
+ if (tbm == NULL)
+ return (EINVAL);
+ /* return a class handle to the user */
+ ap->cdnr_handle = cdnr_cb2handle(&tbm->cdnrblk);
+ return (0);
+}
+
+static int
+cdnrcmd_modify_tbm(ap)
+ struct cdnr_modify_tbmeter *ap;
+{
+ struct tbmeter *tbm;
+
+ if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+ return (EINVAL);
+
+ tb_import_profile(&tbm->tb, &ap->profile);
+
+ return (0);
+}
+
+static int
+cdnrcmd_tbm_stats(ap)
+ struct cdnr_tbmeter_stats *ap;
+{
+ struct tbmeter *tbm;
+
+ if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+ return (EINVAL);
+
+ ap->in_cnt = tbm->in_cnt;
+ ap->out_cnt = tbm->out_cnt;
+
+ return (0);
+}
+
+static int
+cdnrcmd_add_trtcm(ap)
+ struct cdnr_add_trtcm *ap;
+{
+ struct top_cdnr *top;
+ struct trtcm *tcm;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ tcm = trtcm_create(top, &ap->cmtd_profile, &ap->peak_profile,
+ &ap->green_action, &ap->yellow_action,
+ &ap->red_action, ap->coloraware);
+ if (tcm == NULL)
+ return (EINVAL);
+
+ /* return a class handle to the user */
+ ap->cdnr_handle = cdnr_cb2handle(&tcm->cdnrblk);
+ return (0);
+}
+
+static int
+cdnrcmd_modify_trtcm(ap)
+ struct cdnr_modify_trtcm *ap;
+{
+ struct trtcm *tcm;
+
+ if ((tcm = (struct trtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+ return (EINVAL);
+
+ tb_import_profile(&tcm->cmtd_tb, &ap->cmtd_profile);
+ tb_import_profile(&tcm->peak_tb, &ap->peak_profile);
+
+ return (0);
+}
+
+static int
+cdnrcmd_tcm_stats(ap)
+ struct cdnr_tcm_stats *ap;
+{
+ struct cdnr_block *cb;
+
+ if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+ return (EINVAL);
+
+ if (cb->cb_type == TCETYPE_TRTCM) {
+ struct trtcm *tcm = (struct trtcm *)cb;
+
+ ap->green_cnt = tcm->green_cnt;
+ ap->yellow_cnt = tcm->yellow_cnt;
+ ap->red_cnt = tcm->red_cnt;
+ } else if (cb->cb_type == TCETYPE_TSWTCM) {
+ struct tswtcm *tsw = (struct tswtcm *)cb;
+
+ ap->green_cnt = tsw->green_cnt;
+ ap->yellow_cnt = tsw->yellow_cnt;
+ ap->red_cnt = tsw->red_cnt;
+ } else
+ return (EINVAL);
+
+ return (0);
+}
+
+static int
+cdnrcmd_add_tswtcm(ap)
+ struct cdnr_add_tswtcm *ap;
+{
+ struct top_cdnr *top;
+ struct tswtcm *tsw;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ if (ap->cmtd_rate > ap->peak_rate)
+ return (EINVAL);
+
+ tsw = tswtcm_create(top, ap->cmtd_rate, ap->peak_rate,
+ ap->avg_interval, &ap->green_action,
+ &ap->yellow_action, &ap->red_action);
+ if (tsw == NULL)
+ return (EINVAL);
+
+ /* return a class handle to the user */
+ ap->cdnr_handle = cdnr_cb2handle(&tsw->cdnrblk);
+ return (0);
+}
+
+static int
+cdnrcmd_modify_tswtcm(ap)
+ struct cdnr_modify_tswtcm *ap;
+{
+ struct tswtcm *tsw;
+
+ if ((tsw = (struct tswtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+ return (EINVAL);
+
+ if (ap->cmtd_rate > ap->peak_rate)
+ return (EINVAL);
+
+ /* convert rates from bits/sec to bytes/sec */
+ tsw->cmtd_rate = ap->cmtd_rate / 8;
+ tsw->peak_rate = ap->peak_rate / 8;
+ tsw->avg_rate = 0;
+
+ /* timewin is converted from msec to machine clock unit */
+ tsw->timewin = (u_int64_t)machclk_freq * ap->avg_interval / 1000;
+
+ return (0);
+}
+
+static int
+cdnrcmd_get_stats(ap)
+ struct cdnr_get_stats *ap;
+{
+ struct top_cdnr *top;
+ struct cdnr_block *cb;
+ struct tbmeter *tbm;
+ struct trtcm *tcm;
+ struct tswtcm *tsw;
+ struct tce_stats tce, *usp;
+ int error, n, nskip, nelements;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ /* copy action stats */
+ bcopy(top->tc_cnts, ap->cnts, sizeof(ap->cnts));
+
+ /* stats for each element */
+ nelements = ap->nelements;
+ usp = ap->tce_stats;
+ if (nelements <= 0 || usp == NULL)
+ return (0);
+
+ nskip = ap->nskip;
+ n = 0;
+ LIST_FOREACH(cb, &top->tc_elements, cb_next) {
+ if (nskip > 0) {
+ nskip--;
+ continue;
+ }
+
+ bzero(&tce, sizeof(tce));
+ tce.tce_handle = cb->cb_handle;
+ tce.tce_type = cb->cb_type;
+ switch (cb->cb_type) {
+ case TCETYPE_TBMETER:
+ tbm = (struct tbmeter *)cb;
+ tce.tce_cnts[0] = tbm->in_cnt;
+ tce.tce_cnts[1] = tbm->out_cnt;
+ break;
+ case TCETYPE_TRTCM:
+ tcm = (struct trtcm *)cb;
+ tce.tce_cnts[0] = tcm->green_cnt;
+ tce.tce_cnts[1] = tcm->yellow_cnt;
+ tce.tce_cnts[2] = tcm->red_cnt;
+ break;
+ case TCETYPE_TSWTCM:
+ tsw = (struct tswtcm *)cb;
+ tce.tce_cnts[0] = tsw->green_cnt;
+ tce.tce_cnts[1] = tsw->yellow_cnt;
+ tce.tce_cnts[2] = tsw->red_cnt;
+ break;
+ default:
+ continue;
+ }
+
+ if ((error = copyout((caddr_t)&tce, (caddr_t)usp++,
+ sizeof(tce))) != 0)
+ return (error);
+
+ if (++n == nelements)
+ break;
+ }
+ ap->nelements = n;
+
+ return (0);
+}
+
+/*
+ * conditioner device interface
+ */
+int
+cdnropen(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ if (machclk_freq == 0)
+ init_machclk();
+
+ if (machclk_freq == 0) {
+ printf("cdnr: no cpu clock available!\n");
+ return (ENXIO);
+ }
+
+ /* everything will be done when the queueing scheme is attached. */
+ return 0;
+}
+
+int
+cdnrclose(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ struct top_cdnr *top;
+ int err, error = 0;
+
+ while ((top = LIST_FIRST(&tcb_list)) != NULL) {
+ /* destroy all */
+ err = top_destroy(top);
+ if (err != 0 && error == 0)
+ error = err;
+ }
+ altq_input = NULL;
+
+ return (error);
+}
+
+int
+cdnrioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ ioctlcmd_t cmd;
+ caddr_t addr;
+ int flag;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ struct top_cdnr *top;
+ struct cdnr_interface *ifacep;
+ int s, error = 0;
+
+ /* check super-user privilege */
+ switch (cmd) {
+ case CDNR_GETSTATS:
+ break;
+ default:
+#if (__FreeBSD_version > 700000)
+ if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
+#elsif (__FreeBSD_version > 400000)
+ if ((error = suser(p)) != 0)
+#else
+ if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+#endif
+ return (error);
+ break;
+ }
+
+ s = splnet();
+ switch (cmd) {
+
+ case CDNR_IF_ATTACH:
+ ifacep = (struct cdnr_interface *)addr;
+ error = cdnrcmd_if_attach(ifacep->cdnr_ifname);
+ break;
+
+ case CDNR_IF_DETACH:
+ ifacep = (struct cdnr_interface *)addr;
+ error = cdnrcmd_if_detach(ifacep->cdnr_ifname);
+ break;
+
+ case CDNR_ENABLE:
+ case CDNR_DISABLE:
+ ifacep = (struct cdnr_interface *)addr;
+ if ((top = tcb_lookup(ifacep->cdnr_ifname)) == NULL) {
+ error = EBADF;
+ break;
+ }
+
+ switch (cmd) {
+
+ case CDNR_ENABLE:
+ ALTQ_SET_CNDTNING(top->tc_ifq);
+ if (altq_input == NULL)
+ altq_input = altq_cdnr_input;
+ break;
+
+ case CDNR_DISABLE:
+ ALTQ_CLEAR_CNDTNING(top->tc_ifq);
+ LIST_FOREACH(top, &tcb_list, tc_next)
+ if (ALTQ_IS_CNDTNING(top->tc_ifq))
+ break;
+ if (top == NULL)
+ altq_input = NULL;
+ break;
+ }
+ break;
+
+ case CDNR_ADD_ELEM:
+ error = cdnrcmd_add_element((struct cdnr_add_element *)addr);
+ break;
+
+ case CDNR_DEL_ELEM:
+ error = cdnrcmd_delete_element((struct cdnr_delete_element *)addr);
+ break;
+
+ case CDNR_ADD_TBM:
+ error = cdnrcmd_add_tbm((struct cdnr_add_tbmeter *)addr);
+ break;
+
+ case CDNR_MOD_TBM:
+ error = cdnrcmd_modify_tbm((struct cdnr_modify_tbmeter *)addr);
+ break;
+
+ case CDNR_TBM_STATS:
+ error = cdnrcmd_tbm_stats((struct cdnr_tbmeter_stats *)addr);
+ break;
+
+ case CDNR_ADD_TCM:
+ error = cdnrcmd_add_trtcm((struct cdnr_add_trtcm *)addr);
+ break;
+
+ case CDNR_MOD_TCM:
+ error = cdnrcmd_modify_trtcm((struct cdnr_modify_trtcm *)addr);
+ break;
+
+ case CDNR_TCM_STATS:
+ error = cdnrcmd_tcm_stats((struct cdnr_tcm_stats *)addr);
+ break;
+
+ case CDNR_ADD_FILTER:
+ error = cdnrcmd_add_filter((struct cdnr_add_filter *)addr);
+ break;
+
+ case CDNR_DEL_FILTER:
+ error = cdnrcmd_delete_filter((struct cdnr_delete_filter *)addr);
+ break;
+
+ case CDNR_GETSTATS:
+ error = cdnrcmd_get_stats((struct cdnr_get_stats *)addr);
+ break;
+
+ case CDNR_ADD_TSW:
+ error = cdnrcmd_add_tswtcm((struct cdnr_add_tswtcm *)addr);
+ break;
+
+ case CDNR_MOD_TSW:
+ error = cdnrcmd_modify_tswtcm((struct cdnr_modify_tswtcm *)addr);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+ splx(s);
+
+ return error;
+}
+
+#ifdef KLD_MODULE
+
+static struct altqsw cdnr_sw =
+ {"cdnr", cdnropen, cdnrclose, cdnrioctl};
+
+ALTQ_MODULE(altq_cdnr, ALTQT_CDNR, &cdnr_sw);
+
+#endif /* KLD_MODULE */
+
+#endif /* ALTQ3_COMPAT */
+#endif /* ALTQ_CDNR */
diff --git a/freebsd/sys/net/altq/altq_cdnr.h b/freebsd/sys/net/altq/altq_cdnr.h
new file mode 100644
index 00000000..06fa9c98
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_cdnr.h
@@ -0,0 +1,336 @@
+/*-
+ * Copyright (C) 1999-2002
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq_cdnr.h,v 1.9 2003/07/10 12:07:48 kjc Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _ALTQ_ALTQ_CDNR_H_
+#define _ALTQ_ALTQ_CDNR_H_
+
+#include <net/altq/altq.h>
+
+/*
+ * traffic conditioner element types
+ */
+#define TCETYPE_NONE 0
+#define TCETYPE_TOP 1 /* top level conditioner */
+#define TCETYPE_ELEMENT 2 /* a simple tc element */
+#define TCETYPE_TBMETER 3 /* token bucket meter */
+#define TCETYPE_TRTCM 4 /* (two-rate) three color marker */
+#define TCETYPE_TSWTCM 5 /* time sliding window 3-color maker */
+
+/*
+ * traffic conditioner action
+ */
+struct cdnr_block;
+
+struct tc_action {
+ int tca_code; /* e.g., TCACODE_PASS */
+ /* tca_code dependent variable */
+ union {
+ u_long un_value; /* template */
+ u_int8_t un_dscp; /* diffserv code point */
+ u_long un_handle; /* tc action handle */
+ struct cdnr_block *un_next; /* next tc element block */
+ } tca_un;
+};
+#define tca_value tca_un.un_value
+#define tca_dscp tca_un.un_dscp
+#define tca_handle tca_un.un_handle
+#define tca_next tca_un.un_next
+
+#define TCACODE_NONE 0 /* action is not set */
+#define TCACODE_PASS 1 /* pass this packet */
+#define TCACODE_DROP 2 /* discard this packet */
+#define TCACODE_RETURN 3 /* do not process this packet */
+#define TCACODE_MARK 4 /* mark dscp */
+#define TCACODE_HANDLE 5 /* take action specified by handle */
+#define TCACODE_NEXT 6 /* take action in the next tc element */
+#define TCACODE_MAX 6
+
+#define CDNR_NULL_HANDLE 0
+
+struct cdnr_interface {
+ char cdnr_ifname[IFNAMSIZ]; /* interface name (e.g., fxp0) */
+};
+
+/* simple element operations */
+struct cdnr_add_element {
+ struct cdnr_interface iface;
+ struct tc_action action;
+
+ u_long cdnr_handle; /* return value */
+};
+
+struct cdnr_delete_element {
+ struct cdnr_interface iface;
+ u_long cdnr_handle;
+};
+
+/* token-bucket meter operations */
+struct cdnr_add_tbmeter {
+ struct cdnr_interface iface;
+ struct tb_profile profile;
+ struct tc_action in_action;
+ struct tc_action out_action;
+
+ u_long cdnr_handle; /* return value */
+};
+
+struct cdnr_modify_tbmeter {
+ struct cdnr_interface iface;
+ u_long cdnr_handle;
+ struct tb_profile profile;
+};
+
+struct cdnr_tbmeter_stats {
+ struct cdnr_interface iface;
+ u_long cdnr_handle;
+ struct pktcntr in_cnt;
+ struct pktcntr out_cnt;
+};
+
+/* two-rate three-color marker operations */
+struct cdnr_add_trtcm {
+ struct cdnr_interface iface;
+ struct tb_profile cmtd_profile; /* profile for committed tb */
+ struct tb_profile peak_profile; /* profile for peak tb */
+ struct tc_action green_action; /* action for green packets */
+ struct tc_action yellow_action; /* action for yellow packets */
+ struct tc_action red_action; /* action for red packets */
+ int coloraware; /* color-aware/color-blind */
+
+ u_long cdnr_handle; /* return value */
+};
+
+struct cdnr_modify_trtcm {
+ struct cdnr_interface iface;
+ u_long cdnr_handle;
+ struct tb_profile cmtd_profile; /* profile for committed tb */
+ struct tb_profile peak_profile; /* profile for peak tb */
+ int coloraware; /* color-aware/color-blind */
+};
+
+struct cdnr_tcm_stats {
+ struct cdnr_interface iface;
+ u_long cdnr_handle;
+ struct pktcntr green_cnt;
+ struct pktcntr yellow_cnt;
+ struct pktcntr red_cnt;
+};
+
+/* time sliding window three-color marker operations */
+struct cdnr_add_tswtcm {
+ struct cdnr_interface iface;
+ u_int32_t cmtd_rate; /* committed rate (bits/sec) */
+ u_int32_t peak_rate; /* peak rate (bits/sec) */
+ u_int32_t avg_interval; /* averaging interval (msec) */
+ struct tc_action green_action; /* action for green packets */
+ struct tc_action yellow_action; /* action for yellow packets */
+ struct tc_action red_action; /* action for red packets */
+
+ u_long cdnr_handle; /* return value */
+};
+
+struct cdnr_modify_tswtcm {
+ struct cdnr_interface iface;
+ u_long cdnr_handle;
+ u_int32_t cmtd_rate; /* committed rate (bits/sec) */
+ u_int32_t peak_rate; /* peak rate (bits/sec) */
+ u_int32_t avg_interval; /* averaging interval (msec) */
+};
+
+struct cdnr_add_filter {
+ struct cdnr_interface iface;
+ u_long cdnr_handle;
+#ifdef ALTQ3_CLFIER_COMPAT
+ struct flow_filter filter;
+#endif
+ u_long filter_handle; /* return value */
+};
+
+struct cdnr_delete_filter {
+ struct cdnr_interface iface;
+ u_long filter_handle;
+};
+
+struct tce_stats {
+ u_long tce_handle; /* tc element handle */
+ int tce_type; /* e.g., TCETYPE_ELEMENT */
+ struct pktcntr tce_cnts[3]; /* tcm returns 3 counters */
+};
+
+struct cdnr_get_stats {
+ struct cdnr_interface iface;
+ struct pktcntr cnts[TCACODE_MAX+1];
+
+ /* element stats */
+ int nskip; /* skip # of elements */
+ int nelements; /* # of element stats (WR) */
+ struct tce_stats *tce_stats; /* pointer to stats array */
+};
+
+#define CDNR_IF_ATTACH _IOW('Q', 1, struct cdnr_interface)
+#define CDNR_IF_DETACH _IOW('Q', 2, struct cdnr_interface)
+#define CDNR_ENABLE _IOW('Q', 3, struct cdnr_interface)
+#define CDNR_DISABLE _IOW('Q', 4, struct cdnr_interface)
+#define CDNR_ADD_FILTER _IOWR('Q', 10, struct cdnr_add_filter)
+#define CDNR_DEL_FILTER _IOW('Q', 11, struct cdnr_delete_filter)
+#define CDNR_GETSTATS _IOWR('Q', 12, struct cdnr_get_stats)
+#define CDNR_ADD_ELEM _IOWR('Q', 30, struct cdnr_add_element)
+#define CDNR_DEL_ELEM _IOW('Q', 31, struct cdnr_delete_element)
+#define CDNR_ADD_TBM _IOWR('Q', 32, struct cdnr_add_tbmeter)
+#define CDNR_MOD_TBM _IOW('Q', 33, struct cdnr_modify_tbmeter)
+#define CDNR_TBM_STATS _IOWR('Q', 34, struct cdnr_tbmeter_stats)
+#define CDNR_ADD_TCM _IOWR('Q', 35, struct cdnr_add_trtcm)
+#define CDNR_MOD_TCM _IOWR('Q', 36, struct cdnr_modify_trtcm)
+#define CDNR_TCM_STATS _IOWR('Q', 37, struct cdnr_tcm_stats)
+#define CDNR_ADD_TSW _IOWR('Q', 38, struct cdnr_add_tswtcm)
+#define CDNR_MOD_TSW _IOWR('Q', 39, struct cdnr_modify_tswtcm)
+
+#ifndef DSCP_EF
+/* diffserve code points */
+#define DSCP_MASK 0xfc
+#define DSCP_CUMASK 0x03
+#define DSCP_EF 0xb8
+#define DSCP_AF11 0x28
+#define DSCP_AF12 0x30
+#define DSCP_AF13 0x38
+#define DSCP_AF21 0x48
+#define DSCP_AF22 0x50
+#define DSCP_AF23 0x58
+#define DSCP_AF31 0x68
+#define DSCP_AF32 0x70
+#define DSCP_AF33 0x78
+#define DSCP_AF41 0x88
+#define DSCP_AF42 0x90
+#define DSCP_AF43 0x98
+#define AF_CLASSMASK 0xe0
+#define AF_DROPPRECMASK 0x18
+#endif
+
+#ifdef _KERNEL
+
+/*
+ * packet information passed to the input function of tc elements
+ */
+struct cdnr_pktinfo {
+ int pkt_len; /* packet length */
+ u_int8_t pkt_dscp; /* diffserv code point */
+};
+
+/*
+ * traffic conditioner control block common to all types of tc elements
+ */
+struct cdnr_block {
+ LIST_ENTRY(cdnr_block) cb_next;
+ int cb_len; /* size of this tc element */
+ int cb_type; /* cdnr block type */
+ int cb_ref; /* reference count of this element */
+ u_long cb_handle; /* handle of this tc element */
+ struct top_cdnr *cb_top; /* back pointer to top */
+ struct tc_action cb_action; /* top level action for this tcb */
+ struct tc_action *(*cb_input)(struct cdnr_block *,
+ struct cdnr_pktinfo *);
+};
+
+/*
+ * top level traffic conditioner structure for an interface
+ */
+struct top_cdnr {
+ struct cdnr_block tc_block;
+
+ LIST_ENTRY(top_cdnr) tc_next;
+ struct ifaltq *tc_ifq;
+
+ LIST_HEAD(, cdnr_block) tc_elements;
+#ifdef ALTQ3_CLFIER_COMPAT
+ struct acc_classifier tc_classifier;
+#endif
+ struct pktcntr tc_cnts[TCACODE_MAX+1];
+};
+
+/* token bucket element */
+struct tbe {
+ u_int64_t rate;
+ u_int64_t depth;
+
+ u_int64_t token;
+ u_int64_t filluptime;
+ u_int64_t last;
+};
+
+/* token bucket meter structure */
+struct tbmeter {
+ struct cdnr_block cdnrblk; /* conditioner block */
+ struct tbe tb; /* token bucket */
+ struct tc_action in_action; /* actions for IN/OUT */
+ struct tc_action out_action; /* actions for IN/OUT */
+ struct pktcntr in_cnt; /* statistics for IN/OUT */
+ struct pktcntr out_cnt; /* statistics for IN/OUT */
+};
+
+/* two-rate three-color marker structure */
+struct trtcm {
+ struct cdnr_block cdnrblk; /* conditioner block */
+ struct tbe cmtd_tb; /* committed tb profile */
+ struct tbe peak_tb; /* peak tb profile */
+ struct tc_action green_action;
+ struct tc_action yellow_action;
+ struct tc_action red_action;
+ int coloraware;
+ u_int8_t green_dscp;
+ u_int8_t yellow_dscp;
+ u_int8_t red_dscp;
+ struct pktcntr green_cnt;
+ struct pktcntr yellow_cnt;
+ struct pktcntr red_cnt;
+};
+
+/* time sliding window three-color marker structure */
+struct tswtcm {
+ struct cdnr_block cdnrblk; /* conditioner block */
+
+ u_int32_t avg_rate; /* average rate (bytes/sec) */
+ u_int64_t t_front; /* timestamp of last update */
+
+ u_int64_t timewin; /* average interval */
+ u_int32_t cmtd_rate; /* committed target rate */
+ u_int32_t peak_rate; /* peak target rate */
+ struct tc_action green_action;
+ struct tc_action yellow_action;
+ struct tc_action red_action;
+ u_int8_t green_dscp;
+ u_int8_t yellow_dscp;
+ u_int8_t red_dscp;
+ struct pktcntr green_cnt;
+ struct pktcntr yellow_cnt;
+ struct pktcntr red_cnt;
+};
+
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_ALTQ_CDNR_H_ */
diff --git a/freebsd/sys/net/altq/altq_classq.h b/freebsd/sys/net/altq/altq_classq.h
new file mode 100644
index 00000000..dc465a0b
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_classq.h
@@ -0,0 +1,213 @@
+/*-
+ * Copyright (c) 1991-1997 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the Network Research
+ * Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq_classq.h,v 1.6 2003/01/07 07:33:38 kjc Exp $
+ * $FreeBSD$
+ */
+/*
+ * class queue definitions extracted from rm_class.h.
+ */
+#ifndef _ALTQ_ALTQ_CLASSQ_H_
+#define _ALTQ_ALTQ_CLASSQ_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Packet Queue types: RED or DROPHEAD.
+ */
+#define Q_DROPHEAD 0x00
+#define Q_RED 0x01
+#define Q_RIO 0x02
+#define Q_DROPTAIL 0x03
+#define Q_CODEL 0x04
+
+#ifdef _KERNEL
+
+/*
+ * Packet Queue structures and macros to manipulate them.
+ */
+struct _class_queue_ {
+ struct mbuf *tail_; /* Tail of packet queue */
+ int qlen_; /* Queue length (in number of packets) */
+ int qlim_; /* Queue limit (in number of packets*) */
+ int qsize_; /* Queue size (in number of bytes*) */
+ int qtype_; /* Queue type */
+};
+
+typedef struct _class_queue_ class_queue_t;
+
+#define qtype(q) (q)->qtype_ /* Get queue type */
+#define qlimit(q) (q)->qlim_ /* Max packets to be queued */
+#define qlen(q) (q)->qlen_ /* Current queue length. */
+#define qsize(q) (q)->qsize_ /* Current queue size. */
+#define qtail(q) (q)->tail_ /* Tail of the queue */
+#define qhead(q) ((q)->tail_ ? (q)->tail_->m_nextpkt : NULL)
+
+#define qempty(q) ((q)->qlen_ == 0) /* Is the queue empty?? */
+#define q_is_codel(q) ((q)->qtype_ == Q_CODEL) /* Is the queue a codel queue */
+#define q_is_red(q) ((q)->qtype_ == Q_RED) /* Is the queue a red queue */
+#define q_is_rio(q) ((q)->qtype_ == Q_RIO) /* Is the queue a rio queue */
+#define q_is_red_or_rio(q) ((q)->qtype_ == Q_RED || (q)->qtype_ == Q_RIO)
+
+#if !defined(__GNUC__) || defined(ALTQ_DEBUG)
+
+extern void _addq(class_queue_t *, struct mbuf *);
+extern struct mbuf *_getq(class_queue_t *);
+extern struct mbuf *_getq_tail(class_queue_t *);
+extern struct mbuf *_getq_random(class_queue_t *);
+extern void _removeq(class_queue_t *, struct mbuf *);
+extern void _flushq(class_queue_t *);
+
+#else /* __GNUC__ && !ALTQ_DEBUG */
+/*
+ * inlined versions
+ */
+static __inline void
+_addq(class_queue_t *q, struct mbuf *m)
+{
+ struct mbuf *m0;
+
+ if ((m0 = qtail(q)) != NULL)
+ m->m_nextpkt = m0->m_nextpkt;
+ else
+ m0 = m;
+ m0->m_nextpkt = m;
+ qtail(q) = m;
+ qlen(q)++;
+ qsize(q) += m_pktlen(m);
+}
+
+static __inline struct mbuf *
+_getq(class_queue_t *q)
+{
+ struct mbuf *m, *m0;
+
+ if ((m = qtail(q)) == NULL)
+ return (NULL);
+ if ((m0 = m->m_nextpkt) != m)
+ m->m_nextpkt = m0->m_nextpkt;
+ else
+ qtail(q) = NULL;
+ qlen(q)--;
+ qsize(q) -= m_pktlen(m0);
+ m0->m_nextpkt = NULL;
+ return (m0);
+}
+
+/* drop a packet at the tail of the queue */
+static __inline struct mbuf *
+_getq_tail(class_queue_t *q)
+{
+ struct mbuf *m, *m0, *prev;
+
+ if ((m = m0 = qtail(q)) == NULL)
+ return NULL;
+ do {
+ prev = m0;
+ m0 = m0->m_nextpkt;
+ } while (m0 != m);
+ prev->m_nextpkt = m->m_nextpkt;
+ if (prev == m)
+ qtail(q) = NULL;
+ else
+ qtail(q) = prev;
+ qlen(q)--;
+ m->m_nextpkt = NULL;
+ return (m);
+}
+
+/* randomly select a packet in the queue */
+static __inline struct mbuf *
+_getq_random(class_queue_t *q)
+{
+ struct mbuf *m;
+ int i, n;
+
+ if ((m = qtail(q)) == NULL)
+ return NULL;
+ if (m->m_nextpkt == m)
+ qtail(q) = NULL;
+ else {
+ struct mbuf *prev = NULL;
+
+ n = random() % qlen(q) + 1;
+ for (i = 0; i < n; i++) {
+ prev = m;
+ m = m->m_nextpkt;
+ }
+ prev->m_nextpkt = m->m_nextpkt;
+ if (m == qtail(q))
+ qtail(q) = prev;
+ }
+ qlen(q)--;
+ m->m_nextpkt = NULL;
+ return (m);
+}
+
+static __inline void
+_removeq(class_queue_t *q, struct mbuf *m)
+{
+ struct mbuf *m0, *prev;
+
+ m0 = qtail(q);
+ do {
+ prev = m0;
+ m0 = m0->m_nextpkt;
+ } while (m0 != m);
+ prev->m_nextpkt = m->m_nextpkt;
+ if (prev == m)
+ qtail(q) = NULL;
+ else if (qtail(q) == m)
+ qtail(q) = prev;
+ qlen(q)--;
+}
+
+static __inline void
+_flushq(class_queue_t *q)
+{
+ struct mbuf *m;
+
+ while ((m = _getq(q)) != NULL)
+ m_freem(m);
+}
+
+#endif /* __GNUC__ && !ALTQ_DEBUG */
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ALTQ_ALTQ_CLASSQ_H_ */
diff --git a/freebsd/sys/net/altq/altq_codel.c b/freebsd/sys/net/altq/altq_codel.c
new file mode 100644
index 00000000..438120f5
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_codel.c
@@ -0,0 +1,479 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * CoDel - The Controlled-Delay Active Queue Management algorithm
+ *
+ * Copyright (C) 2013 Ermal Luçi <eri@FreeBSD.org>
+ * Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com>
+ * Copyright (C) 2011-2012 Van Jacobson <van@pollere.net>
+ * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
+ * Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General
+ * Public License ("GPL") version 2, in which case the provisions of the
+ * GPL apply INSTEAD OF those given above.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * $FreeBSD$
+ */
+#include <rtems/bsd/local/opt_altq.h>
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+
+#ifdef ALTQ_CODEL /* CoDel is enabled by ALTQ_CODEL option in opt_altq.h */
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <netinet/in.h>
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <net/altq/if_altq.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_codel.h>
+
+static int codel_should_drop(struct codel *, class_queue_t *,
+ struct mbuf *, u_int64_t);
+static void codel_Newton_step(struct codel_vars *);
+static u_int64_t codel_control_law(u_int64_t t, u_int64_t, u_int32_t);
+
+#define codel_time_after(a, b) ((int64_t)(a) - (int64_t)(b) > 0)
+#define codel_time_after_eq(a, b) ((int64_t)(a) - (int64_t)(b) >= 0)
+#define codel_time_before(a, b) ((int64_t)(a) - (int64_t)(b) < 0)
+#define codel_time_before_eq(a, b) ((int64_t)(a) - (int64_t)(b) <= 0)
+
+static int codel_request(struct ifaltq *, int, void *);
+
+static int codel_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+static struct mbuf *codel_dequeue(struct ifaltq *, int);
+
+int
+codel_pfattach(struct pf_altq *a)
+{
+ struct ifnet *ifp;
+
+ if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
+ return (EINVAL);
+
+ return (altq_attach(&ifp->if_snd, ALTQT_CODEL, a->altq_disc,
+ codel_enqueue, codel_dequeue, codel_request, NULL, NULL));
+}
+
+int
+codel_add_altq(struct pf_altq *a)
+{
+ struct codel_if *cif;
+ struct ifnet *ifp;
+ struct codel_opts *opts;
+
+ if ((ifp = ifunit(a->ifname)) == NULL)
+ return (EINVAL);
+ if (!ALTQ_IS_READY(&ifp->if_snd))
+ return (ENODEV);
+
+ opts = &a->pq_u.codel_opts;
+
+ cif = malloc(sizeof(struct codel_if), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (cif == NULL)
+ return (ENOMEM);
+ cif->cif_bandwidth = a->ifbandwidth;
+ cif->cif_ifq = &ifp->if_snd;
+
+ cif->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (cif->cl_q == NULL) {
+ free(cif, M_DEVBUF);
+ return (ENOMEM);
+ }
+
+ if (a->qlimit == 0)
+ a->qlimit = 50; /* use default. */
+ qlimit(cif->cl_q) = a->qlimit;
+ qtype(cif->cl_q) = Q_CODEL;
+ qlen(cif->cl_q) = 0;
+ qsize(cif->cl_q) = 0;
+
+ if (opts->target == 0)
+ opts->target = 5;
+ if (opts->interval == 0)
+ opts->interval = 100;
+ cif->codel.params.target = machclk_freq * opts->target / 1000;
+ cif->codel.params.interval = machclk_freq * opts->interval / 1000;
+ cif->codel.params.ecn = opts->ecn;
+ cif->codel.stats.maxpacket = 256;
+
+ cif->cl_stats.qlength = qlen(cif->cl_q);
+ cif->cl_stats.qlimit = qlimit(cif->cl_q);
+
+ /* keep the state in pf_altq */
+ a->altq_disc = cif;
+
+ return (0);
+}
+
+int
+codel_remove_altq(struct pf_altq *a)
+{
+ struct codel_if *cif;
+
+ if ((cif = a->altq_disc) == NULL)
+ return (EINVAL);
+ a->altq_disc = NULL;
+
+ if (cif->cl_q)
+ free(cif->cl_q, M_DEVBUF);
+ free(cif, M_DEVBUF);
+
+ return (0);
+}
+
+int
+codel_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+ struct codel_if *cif;
+ struct codel_ifstats stats;
+ int error = 0;
+
+ if ((cif = altq_lookup(a->ifname, ALTQT_CODEL)) == NULL)
+ return (EBADF);
+
+ if (*nbytes < sizeof(stats))
+ return (EINVAL);
+
+ stats = cif->cl_stats;
+ stats.stats = cif->codel.stats;
+
+ if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
+ return (error);
+ *nbytes = sizeof(stats);
+
+ return (0);
+}
+
+static int
+codel_request(struct ifaltq *ifq, int req, void *arg)
+{
+ struct codel_if *cif = (struct codel_if *)ifq->altq_disc;
+ struct mbuf *m;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ switch (req) {
+ case ALTRQ_PURGE:
+ if (!ALTQ_IS_ENABLED(cif->cif_ifq))
+ break;
+
+ if (qempty(cif->cl_q))
+ break;
+
+ while ((m = _getq(cif->cl_q)) != NULL) {
+ PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m));
+ m_freem(m);
+ IFQ_DEC_LEN(cif->cif_ifq);
+ }
+ cif->cif_ifq->ifq_len = 0;
+ break;
+ }
+
+ return (0);
+}
+
+static int
+codel_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
+{
+
+ struct codel_if *cif = (struct codel_if *) ifq->altq_disc;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ /* grab class set by classifier */
+ if ((m->m_flags & M_PKTHDR) == 0) {
+ /* should not happen */
+ printf("altq: packet for %s does not have pkthdr\n",
+ ifq->altq_ifp->if_xname);
+ m_freem(m);
+ PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m));
+ return (ENOBUFS);
+ }
+
+ if (codel_addq(&cif->codel, cif->cl_q, m)) {
+ PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m));
+ return (ENOBUFS);
+ }
+ IFQ_INC_LEN(ifq);
+
+ return (0);
+}
+
+static struct mbuf *
+codel_dequeue(struct ifaltq *ifq, int op)
+{
+ struct codel_if *cif = (struct codel_if *)ifq->altq_disc;
+ struct mbuf *m;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ if (IFQ_IS_EMPTY(ifq))
+ return (NULL);
+
+ if (op == ALTDQ_POLL)
+ return (qhead(cif->cl_q));
+
+
+ m = codel_getq(&cif->codel, cif->cl_q);
+ if (m != NULL) {
+ IFQ_DEC_LEN(ifq);
+ PKTCNTR_ADD(&cif->cl_stats.cl_xmitcnt, m_pktlen(m));
+ return (m);
+ }
+
+ return (NULL);
+}
+
+struct codel *
+codel_alloc(int target, int interval, int ecn)
+{
+ struct codel *c;
+
+ c = malloc(sizeof(*c), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (c != NULL) {
+ c->params.target = machclk_freq * target / 1000;
+ c->params.interval = machclk_freq * interval / 1000;
+ c->params.ecn = ecn;
+ c->stats.maxpacket = 256;
+ }
+
+ return (c);
+}
+
+void
+codel_destroy(struct codel *c)
+{
+
+ free(c, M_DEVBUF);
+}
+
+#define MTAG_CODEL 1438031249
+int
+codel_addq(struct codel *c, class_queue_t *q, struct mbuf *m)
+{
+ struct m_tag *mtag;
+ uint64_t *enqueue_time;
+
+ if (qlen(q) < qlimit(q)) {
+ mtag = m_tag_locate(m, MTAG_CODEL, 0, NULL);
+ if (mtag == NULL)
+ mtag = m_tag_alloc(MTAG_CODEL, 0, sizeof(uint64_t),
+ M_NOWAIT);
+ if (mtag == NULL) {
+ m_freem(m);
+ return (-1);
+ }
+ enqueue_time = (uint64_t *)(mtag + 1);
+ *enqueue_time = read_machclk();
+ m_tag_prepend(m, mtag);
+ _addq(q, m);
+ return (0);
+ }
+ c->drop_overlimit++;
+ m_freem(m);
+
+ return (-1);
+}
+
+static int
+codel_should_drop(struct codel *c, class_queue_t *q, struct mbuf *m,
+ u_int64_t now)
+{
+ struct m_tag *mtag;
+ uint64_t *enqueue_time;
+
+ if (m == NULL) {
+ c->vars.first_above_time = 0;
+ return (0);
+ }
+
+ mtag = m_tag_locate(m, MTAG_CODEL, 0, NULL);
+ if (mtag == NULL) {
+ /* Only one warning per second. */
+ if (ppsratecheck(&c->last_log, &c->last_pps, 1))
+ printf("%s: could not found the packet mtag!\n",
+ __func__);
+ c->vars.first_above_time = 0;
+ return (0);
+ }
+ enqueue_time = (uint64_t *)(mtag + 1);
+ c->vars.ldelay = now - *enqueue_time;
+ c->stats.maxpacket = MAX(c->stats.maxpacket, m_pktlen(m));
+
+ if (codel_time_before(c->vars.ldelay, c->params.target) ||
+ qsize(q) <= c->stats.maxpacket) {
+ /* went below - stay below for at least interval */
+ c->vars.first_above_time = 0;
+ return (0);
+ }
+ if (c->vars.first_above_time == 0) {
+ /* just went above from below. If we stay above
+ * for at least interval we'll say it's ok to drop
+ */
+ c->vars.first_above_time = now + c->params.interval;
+ return (0);
+ }
+ if (codel_time_after(now, c->vars.first_above_time))
+ return (1);
+
+ return (0);
+}
+
+/*
+ * Run a Newton method step:
+ * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
+ *
+ * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32
+ */
+static void
+codel_Newton_step(struct codel_vars *vars)
+{
+ uint32_t invsqrt, invsqrt2;
+ uint64_t val;
+
+/* sizeof_in_bits(rec_inv_sqrt) */
+#define REC_INV_SQRT_BITS (8 * sizeof(u_int16_t))
+/* needed shift to get a Q0.32 number from rec_inv_sqrt */
+#define REC_INV_SQRT_SHIFT (32 - REC_INV_SQRT_BITS)
+
+ invsqrt = ((u_int32_t)vars->rec_inv_sqrt) << REC_INV_SQRT_SHIFT;
+ invsqrt2 = ((u_int64_t)invsqrt * invsqrt) >> 32;
+ val = (3LL << 32) - ((u_int64_t)vars->count * invsqrt2);
+ val >>= 2; /* avoid overflow in following multiply */
+ val = (val * invsqrt) >> (32 - 2 + 1);
+
+ vars->rec_inv_sqrt = val >> REC_INV_SQRT_SHIFT;
+}
+
+static u_int64_t
+codel_control_law(u_int64_t t, u_int64_t interval, u_int32_t rec_inv_sqrt)
+{
+
+ return (t + (u_int32_t)(((u_int64_t)interval *
+ (rec_inv_sqrt << REC_INV_SQRT_SHIFT)) >> 32));
+}
+
+struct mbuf *
+codel_getq(struct codel *c, class_queue_t *q)
+{
+ struct mbuf *m;
+ u_int64_t now;
+ int drop;
+
+ if ((m = _getq(q)) == NULL) {
+ c->vars.dropping = 0;
+ return (m);
+ }
+
+ now = read_machclk();
+ drop = codel_should_drop(c, q, m, now);
+ if (c->vars.dropping) {
+ if (!drop) {
+ /* sojourn time below target - leave dropping state */
+ c->vars.dropping = 0;
+ } else if (codel_time_after_eq(now, c->vars.drop_next)) {
+ /* It's time for the next drop. Drop the current
+ * packet and dequeue the next. The dequeue might
+ * take us out of dropping state.
+ * If not, schedule the next drop.
+ * A large backlog might result in drop rates so high
+ * that the next drop should happen now,
+ * hence the while loop.
+ */
+ while (c->vars.dropping &&
+ codel_time_after_eq(now, c->vars.drop_next)) {
+ c->vars.count++; /* don't care of possible wrap
+ * since there is no more
+ * divide */
+ codel_Newton_step(&c->vars);
+ /* TODO ECN */
+ PKTCNTR_ADD(&c->stats.drop_cnt, m_pktlen(m));
+ m_freem(m);
+ m = _getq(q);
+ if (!codel_should_drop(c, q, m, now))
+ /* leave dropping state */
+ c->vars.dropping = 0;
+ else
+ /* and schedule the next drop */
+ c->vars.drop_next =
+ codel_control_law(c->vars.drop_next,
+ c->params.interval,
+ c->vars.rec_inv_sqrt);
+ }
+ }
+ } else if (drop) {
+ /* TODO ECN */
+ PKTCNTR_ADD(&c->stats.drop_cnt, m_pktlen(m));
+ m_freem(m);
+
+ m = _getq(q);
+ drop = codel_should_drop(c, q, m, now);
+
+ c->vars.dropping = 1;
+ /* if min went above target close to when we last went below it
+ * assume that the drop rate that controlled the queue on the
+ * last cycle is a good starting point to control it now.
+ */
+ if (codel_time_before(now - c->vars.drop_next,
+ 16 * c->params.interval)) {
+ c->vars.count = (c->vars.count - c->vars.lastcount) | 1;
+ /* we dont care if rec_inv_sqrt approximation
+ * is not very precise :
+ * Next Newton steps will correct it quadratically.
+ */
+ codel_Newton_step(&c->vars);
+ } else {
+ c->vars.count = 1;
+ c->vars.rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT;
+ }
+ c->vars.lastcount = c->vars.count;
+ c->vars.drop_next = codel_control_law(now, c->params.interval,
+ c->vars.rec_inv_sqrt);
+ }
+
+ return (m);
+}
+
+void
+codel_getstats(struct codel *c, struct codel_stats *s)
+{
+ *s = c->stats;
+}
+
+#endif /* ALTQ_CODEL */
diff --git a/freebsd/sys/net/altq/altq_codel.h b/freebsd/sys/net/altq/altq_codel.h
new file mode 100644
index 00000000..8d7178b4
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_codel.h
@@ -0,0 +1,129 @@
+/*
+ * CoDel - The Controlled-Delay Active Queue Management algorithm
+ *
+ * Copyright (C) 2013 Ermal Luçi <eri@FreeBSD.org>
+ * Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com>
+ * Copyright (C) 2011-2012 Van Jacobson <van@pollere.net>
+ * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
+ * Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General
+ * Public License ("GPL") version 2, in which case the provisions of the
+ * GPL apply INSTEAD OF those given above.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _ALTQ_ALTQ_CODEL_H_
+#define _ALTQ_ALTQ_CODEL_H_
+
+struct codel_stats {
+ u_int32_t maxpacket;
+ struct pktcntr drop_cnt;
+ u_int marked_packets;
+};
+
+struct codel_ifstats {
+ u_int qlength;
+ u_int qlimit;
+ struct codel_stats stats;
+ struct pktcntr cl_xmitcnt; /* transmitted packet counter */
+ struct pktcntr cl_dropcnt; /* dropped packet counter */
+};
+
+#ifdef _KERNEL
+#include <net/altq/altq_classq.h>
+
+/**
+ * struct codel_params - contains codel parameters
+ * <at> target: target queue size (in time units)
+ * <at> interval: width of moving time window
+ * <at> ecn: is Explicit Congestion Notification enabled
+ */
+struct codel_params {
+ u_int64_t target;
+ u_int64_t interval;
+ int ecn;
+};
+
+/**
+ * struct codel_vars - contains codel variables
+ * <at> count: how many drops we've done since the last time we
+ * entered dropping state
+ * <at> lastcount: count at entry to dropping state
+ * <at> dropping: set to true if in dropping state
+ * <at> rec_inv_sqrt: reciprocal value of sqrt(count) >> 1
+ * <at> first_above_time: when we went (or will go) continuously above
+ * target for interval
+ * <at> drop_next: time to drop next packet, or when we dropped last
+ * <at> ldelay: sojourn time of last dequeued packet
+ */
+struct codel_vars {
+ u_int32_t count;
+ u_int32_t lastcount;
+ int dropping;
+ u_int16_t rec_inv_sqrt;
+ u_int64_t first_above_time;
+ u_int64_t drop_next;
+ u_int64_t ldelay;
+};
+
+struct codel {
+ int last_pps;
+ struct codel_params params;
+ struct codel_vars vars;
+ struct codel_stats stats;
+ struct timeval last_log;
+ u_int32_t drop_overlimit;
+};
+
+/*
+ * codel interface state
+ */
+struct codel_if {
+ struct codel_if *cif_next; /* interface state list */
+ struct ifaltq *cif_ifq; /* backpointer to ifaltq */
+ u_int cif_bandwidth; /* link bandwidth in bps */
+
+ class_queue_t *cl_q; /* class queue structure */
+ struct codel codel;
+
+ /* statistics */
+ struct codel_ifstats cl_stats;
+};
+
+struct codel *codel_alloc(int, int, int);
+void codel_destroy(struct codel *);
+int codel_addq(struct codel *, class_queue_t *, struct mbuf *);
+struct mbuf *codel_getq(struct codel *, class_queue_t *);
+void codel_getstats(struct codel *, struct codel_stats *);
+
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_ALTQ_CODEL_H_ */
diff --git a/freebsd/sys/net/altq/altq_fairq.c b/freebsd/sys/net/altq/altq_fairq.c
new file mode 100644
index 00000000..efb58d3f
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_fairq.c
@@ -0,0 +1,911 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * Copyright (c) 2008 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $DragonFly: src/sys/net/altq/altq_fairq.c,v 1.1 2008/04/06 18:58:15 dillon Exp $
+ * $FreeBSD$
+ */
+/*
+ * Matt: I gutted altq_priq.c and used it as a skeleton on which to build
+ * fairq. The fairq algorithm is completely different then priq, of course,
+ * but because I used priq's skeleton I believe I should include priq's
+ * copyright.
+ *
+ * Copyright (C) 2000-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * FAIRQ - take traffic classified by keep state (hashed into
+ * mbuf->m_pkthdr.altq_state_hash) and bucketize it. Fairly extract
+ * the first packet from each bucket in a round-robin fashion.
+ *
+ * TODO - better overall qlimit support (right now it is per-bucket).
+ * - NOTE: red etc is per bucket, not overall.
+ * - better service curve support.
+ *
+ * EXAMPLE:
+ *
+ * altq on em0 fairq bandwidth 650Kb queue { std, bulk }
+ * queue std priority 3 bandwidth 400Kb \
+ * fairq (buckets 64, default, hogs 1Kb) qlimit 50
+ * queue bulk priority 2 bandwidth 100Kb \
+ * fairq (buckets 64, hogs 1Kb) qlimit 50
+ *
+ * pass out on em0 from any to any keep state queue std
+ * pass out on em0 inet proto tcp ..... port ... keep state queue bulk
+ */
+#include <rtems/bsd/local/opt_altq.h>
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+
+#ifdef ALTQ_FAIRQ /* fairq is enabled in the kernel conf */
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <rtems/bsd/sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <netinet/in.h>
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <netpfil/pf/pf_mtag.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_fairq.h>
+
+/*
+ * function prototypes
+ */
+static int fairq_clear_interface(struct fairq_if *);
+static int fairq_request(struct ifaltq *, int, void *);
+static void fairq_purge(struct fairq_if *);
+static struct fairq_class *fairq_class_create(struct fairq_if *, int, int, u_int, struct fairq_opts *, int);
+static int fairq_class_destroy(struct fairq_class *);
+static int fairq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+static struct mbuf *fairq_dequeue(struct ifaltq *, int);
+
+static int fairq_addq(struct fairq_class *, struct mbuf *, u_int32_t);
+static struct mbuf *fairq_getq(struct fairq_class *, uint64_t);
+static struct mbuf *fairq_pollq(struct fairq_class *, uint64_t, int *);
+static fairq_bucket_t *fairq_selectq(struct fairq_class *, int);
+static void fairq_purgeq(struct fairq_class *);
+
+static void get_class_stats(struct fairq_classstats *, struct fairq_class *);
+static struct fairq_class *clh_to_clp(struct fairq_if *, uint32_t);
+
+int
+fairq_pfattach(struct pf_altq *a)
+{
+ struct ifnet *ifp;
+ int error;
+
+ if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
+ return (EINVAL);
+
+ error = altq_attach(&ifp->if_snd, ALTQT_FAIRQ, a->altq_disc,
+ fairq_enqueue, fairq_dequeue, fairq_request, NULL, NULL);
+
+ return (error);
+}
+
+int
+fairq_add_altq(struct pf_altq *a)
+{
+ struct fairq_if *pif;
+ struct ifnet *ifp;
+
+ if ((ifp = ifunit(a->ifname)) == NULL)
+ return (EINVAL);
+ if (!ALTQ_IS_READY(&ifp->if_snd))
+ return (ENODEV);
+
+
+ pif = malloc(sizeof(struct fairq_if),
+ M_DEVBUF, M_WAITOK | M_ZERO);
+ pif->pif_bandwidth = a->ifbandwidth;
+ pif->pif_maxpri = -1;
+ pif->pif_ifq = &ifp->if_snd;
+
+ /* keep the state in pf_altq */
+ a->altq_disc = pif;
+
+ return (0);
+}
+
+int
+fairq_remove_altq(struct pf_altq *a)
+{
+ struct fairq_if *pif;
+
+ if ((pif = a->altq_disc) == NULL)
+ return (EINVAL);
+ a->altq_disc = NULL;
+
+ fairq_clear_interface(pif);
+
+ free(pif, M_DEVBUF);
+ return (0);
+}
+
+int
+fairq_add_queue(struct pf_altq *a)
+{
+ struct fairq_if *pif;
+ struct fairq_class *cl;
+
+ if ((pif = a->altq_disc) == NULL)
+ return (EINVAL);
+
+ /* check parameters */
+ if (a->priority >= FAIRQ_MAXPRI)
+ return (EINVAL);
+ if (a->qid == 0)
+ return (EINVAL);
+ if (pif->pif_classes[a->priority] != NULL)
+ return (EBUSY);
+ if (clh_to_clp(pif, a->qid) != NULL)
+ return (EBUSY);
+
+ cl = fairq_class_create(pif, a->priority, a->qlimit, a->bandwidth,
+ &a->pq_u.fairq_opts, a->qid);
+ if (cl == NULL)
+ return (ENOMEM);
+
+ return (0);
+}
+
+int
+fairq_remove_queue(struct pf_altq *a)
+{
+ struct fairq_if *pif;
+ struct fairq_class *cl;
+
+ if ((pif = a->altq_disc) == NULL)
+ return (EINVAL);
+
+ if ((cl = clh_to_clp(pif, a->qid)) == NULL)
+ return (EINVAL);
+
+ return (fairq_class_destroy(cl));
+}
+
+int
+fairq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+ struct fairq_if *pif;
+ struct fairq_class *cl;
+ struct fairq_classstats stats;
+ int error = 0;
+
+ if ((pif = altq_lookup(a->ifname, ALTQT_FAIRQ)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(pif, a->qid)) == NULL)
+ return (EINVAL);
+
+ if (*nbytes < sizeof(stats))
+ return (EINVAL);
+
+ get_class_stats(&stats, cl);
+
+ if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
+ return (error);
+ *nbytes = sizeof(stats);
+ return (0);
+}
+
+/*
+ * bring the interface back to the initial state by discarding
+ * all the filters and classes.
+ */
+static int
+fairq_clear_interface(struct fairq_if *pif)
+{
+ struct fairq_class *cl;
+ int pri;
+
+ /* clear out the classes */
+ for (pri = 0; pri <= pif->pif_maxpri; pri++) {
+ if ((cl = pif->pif_classes[pri]) != NULL)
+ fairq_class_destroy(cl);
+ }
+
+ return (0);
+}
+
+static int
+fairq_request(struct ifaltq *ifq, int req, void *arg)
+{
+ struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ switch (req) {
+ case ALTRQ_PURGE:
+ fairq_purge(pif);
+ break;
+ }
+ return (0);
+}
+
+/* discard all the queued packets on the interface */
+static void
+fairq_purge(struct fairq_if *pif)
+{
+ struct fairq_class *cl;
+ int pri;
+
+ for (pri = 0; pri <= pif->pif_maxpri; pri++) {
+ if ((cl = pif->pif_classes[pri]) != NULL && cl->cl_head)
+ fairq_purgeq(cl);
+ }
+ if (ALTQ_IS_ENABLED(pif->pif_ifq))
+ pif->pif_ifq->ifq_len = 0;
+}
+
+static struct fairq_class *
+fairq_class_create(struct fairq_if *pif, int pri, int qlimit,
+ u_int bandwidth, struct fairq_opts *opts, int qid)
+{
+ struct fairq_class *cl;
+ int flags = opts->flags;
+ u_int nbuckets = opts->nbuckets;
+ int i;
+
+#ifndef ALTQ_RED
+ if (flags & FARF_RED) {
+#ifdef ALTQ_DEBUG
+ printf("fairq_class_create: RED not configured for FAIRQ!\n");
+#endif
+ return (NULL);
+ }
+#endif
+#ifndef ALTQ_CODEL
+ if (flags & FARF_CODEL) {
+#ifdef ALTQ_DEBUG
+ printf("fairq_class_create: CODEL not configured for FAIRQ!\n");
+#endif
+ return (NULL);
+ }
+#endif
+ if (nbuckets == 0)
+ nbuckets = 256;
+ if (nbuckets > FAIRQ_MAX_BUCKETS)
+ nbuckets = FAIRQ_MAX_BUCKETS;
+ /* enforce power-of-2 size */
+ while ((nbuckets ^ (nbuckets - 1)) != ((nbuckets << 1) - 1))
+ ++nbuckets;
+
+ if ((cl = pif->pif_classes[pri]) != NULL) {
+ /* modify the class instead of creating a new one */
+ IFQ_LOCK(cl->cl_pif->pif_ifq);
+ if (cl->cl_head)
+ fairq_purgeq(cl);
+ IFQ_UNLOCK(cl->cl_pif->pif_ifq);
+#ifdef ALTQ_RIO
+ if (cl->cl_qtype == Q_RIO)
+ rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+ if (cl->cl_qtype == Q_RED)
+ red_destroy(cl->cl_red);
+#endif
+#ifdef ALTQ_CODEL
+ if (cl->cl_qtype == Q_CODEL)
+ codel_destroy(cl->cl_codel);
+#endif
+ } else {
+ cl = malloc(sizeof(struct fairq_class),
+ M_DEVBUF, M_WAITOK | M_ZERO);
+ cl->cl_nbuckets = nbuckets;
+ cl->cl_nbucket_mask = nbuckets - 1;
+
+ cl->cl_buckets = malloc(
+ sizeof(struct fairq_bucket) * cl->cl_nbuckets,
+ M_DEVBUF, M_WAITOK | M_ZERO);
+ cl->cl_head = NULL;
+ }
+
+ pif->pif_classes[pri] = cl;
+ if (flags & FARF_DEFAULTCLASS)
+ pif->pif_default = cl;
+ if (qlimit == 0)
+ qlimit = 50; /* use default */
+ cl->cl_qlimit = qlimit;
+ for (i = 0; i < cl->cl_nbuckets; ++i) {
+ qlimit(&cl->cl_buckets[i].queue) = qlimit;
+ }
+ cl->cl_bandwidth = bandwidth / 8;
+ cl->cl_qtype = Q_DROPTAIL;
+ cl->cl_flags = flags & FARF_USERFLAGS;
+ cl->cl_pri = pri;
+ if (pri > pif->pif_maxpri)
+ pif->pif_maxpri = pri;
+ cl->cl_pif = pif;
+ cl->cl_handle = qid;
+ cl->cl_hogs_m1 = opts->hogs_m1 / 8;
+ cl->cl_lssc_m1 = opts->lssc_m1 / 8; /* NOT YET USED */
+
+#ifdef ALTQ_RED
+ if (flags & (FARF_RED|FARF_RIO)) {
+ int red_flags, red_pkttime;
+
+ red_flags = 0;
+ if (flags & FARF_ECN)
+ red_flags |= REDF_ECN;
+#ifdef ALTQ_RIO
+ if (flags & FARF_CLEARDSCP)
+ red_flags |= RIOF_CLEARDSCP;
+#endif
+ if (pif->pif_bandwidth < 8)
+ red_pkttime = 1000 * 1000 * 1000; /* 1 sec */
+ else
+ red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu
+ * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8);
+#ifdef ALTQ_RIO
+ if (flags & FARF_RIO) {
+ cl->cl_red = (red_t *)rio_alloc(0, NULL,
+ red_flags, red_pkttime);
+ if (cl->cl_red != NULL)
+ cl->cl_qtype = Q_RIO;
+ } else
+#endif
+ if (flags & FARF_RED) {
+ cl->cl_red = red_alloc(0, 0,
+ cl->cl_qlimit * 10/100,
+ cl->cl_qlimit * 30/100,
+ red_flags, red_pkttime);
+ if (cl->cl_red != NULL)
+ cl->cl_qtype = Q_RED;
+ }
+ }
+#endif /* ALTQ_RED */
+#ifdef ALTQ_CODEL
+ if (flags & FARF_CODEL) {
+ cl->cl_codel = codel_alloc(5, 100, 0);
+ if (cl->cl_codel != NULL)
+ cl->cl_qtype = Q_CODEL;
+ }
+#endif
+
+ return (cl);
+}
+
+static int
+fairq_class_destroy(struct fairq_class *cl)
+{
+ struct fairq_if *pif;
+ int pri;
+
+ IFQ_LOCK(cl->cl_pif->pif_ifq);
+
+ if (cl->cl_head)
+ fairq_purgeq(cl);
+
+ pif = cl->cl_pif;
+ pif->pif_classes[cl->cl_pri] = NULL;
+ if (pif->pif_poll_cache == cl)
+ pif->pif_poll_cache = NULL;
+ if (pif->pif_maxpri == cl->cl_pri) {
+ for (pri = cl->cl_pri; pri >= 0; pri--)
+ if (pif->pif_classes[pri] != NULL) {
+ pif->pif_maxpri = pri;
+ break;
+ }
+ if (pri < 0)
+ pif->pif_maxpri = -1;
+ }
+ IFQ_UNLOCK(cl->cl_pif->pif_ifq);
+
+ if (cl->cl_red != NULL) {
+#ifdef ALTQ_RIO
+ if (cl->cl_qtype == Q_RIO)
+ rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+ if (cl->cl_qtype == Q_RED)
+ red_destroy(cl->cl_red);
+#endif
+#ifdef ALTQ_CODEL
+ if (cl->cl_qtype == Q_CODEL)
+ codel_destroy(cl->cl_codel);
+#endif
+ }
+ free(cl->cl_buckets, M_DEVBUF);
+ free(cl, M_DEVBUF);
+
+ return (0);
+}
+
+/*
+ * fairq_enqueue is an enqueue function to be registered to
+ * (*altq_enqueue) in struct ifaltq.
+ */
+static int
+fairq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
+{
+ struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
+ struct fairq_class *cl = NULL; /* Make compiler happy */
+ struct pf_mtag *t;
+ u_int32_t qid_hash = 0;
+ int len;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ /* grab class set by classifier */
+ if ((m->m_flags & M_PKTHDR) == 0) {
+ /* should not happen */
+ printf("altq: packet for %s does not have pkthdr\n",
+ ifq->altq_ifp->if_xname);
+ m_freem(m);
+ return (ENOBUFS);
+ }
+
+ if ((t = pf_find_mtag(m)) != NULL) {
+ cl = clh_to_clp(pif, t->qid);
+ qid_hash = t->qid_hash;
+ }
+ if (cl == NULL) {
+ cl = pif->pif_default;
+ if (cl == NULL) {
+ m_freem(m);
+ return (ENOBUFS);
+ }
+ }
+ cl->cl_flags |= FARF_HAS_PACKETS;
+ cl->cl_pktattr = NULL;
+ len = m_pktlen(m);
+ if (fairq_addq(cl, m, qid_hash) != 0) {
+ /* drop occurred. mbuf was freed in fairq_addq. */
+ PKTCNTR_ADD(&cl->cl_dropcnt, len);
+ return (ENOBUFS);
+ }
+ IFQ_INC_LEN(ifq);
+
+ return (0);
+}
+
+/*
+ * fairq_dequeue is a dequeue function to be registered to
+ * (*altq_dequeue) in struct ifaltq.
+ *
+ * note: ALTDQ_POLL returns the next packet without removing the packet
+ * from the queue. ALTDQ_REMOVE is a normal dequeue operation.
+ * ALTDQ_REMOVE must return the same packet if called immediately
+ * after ALTDQ_POLL.
+ */
+static struct mbuf *
+fairq_dequeue(struct ifaltq *ifq, int op)
+{
+ struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
+ struct fairq_class *cl;
+ struct fairq_class *best_cl;
+ struct mbuf *best_m;
+ struct mbuf *m = NULL;
+ uint64_t cur_time = read_machclk();
+ int pri;
+ int hit_limit;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ if (IFQ_IS_EMPTY(ifq)) {
+ return (NULL);
+ }
+
+ if (pif->pif_poll_cache && op == ALTDQ_REMOVE) {
+ best_cl = pif->pif_poll_cache;
+ m = fairq_getq(best_cl, cur_time);
+ pif->pif_poll_cache = NULL;
+ if (m) {
+ IFQ_DEC_LEN(ifq);
+ PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m));
+ return (m);
+ }
+ } else {
+ best_cl = NULL;
+ best_m = NULL;
+
+ for (pri = pif->pif_maxpri; pri >= 0; pri--) {
+ if ((cl = pif->pif_classes[pri]) == NULL)
+ continue;
+ if ((cl->cl_flags & FARF_HAS_PACKETS) == 0)
+ continue;
+ m = fairq_pollq(cl, cur_time, &hit_limit);
+ if (m == NULL) {
+ cl->cl_flags &= ~FARF_HAS_PACKETS;
+ continue;
+ }
+
+ /*
+ * Only override the best choice if we are under
+ * the BW limit.
+ */
+ if (hit_limit == 0 || best_cl == NULL) {
+ best_cl = cl;
+ best_m = m;
+ }
+
+ /*
+ * Remember the highest priority mbuf in case we
+ * do not find any lower priority mbufs.
+ */
+ if (hit_limit)
+ continue;
+ break;
+ }
+ if (op == ALTDQ_POLL) {
+ pif->pif_poll_cache = best_cl;
+ m = best_m;
+ } else if (best_cl) {
+ m = fairq_getq(best_cl, cur_time);
+ if (m != NULL) {
+ IFQ_DEC_LEN(ifq);
+ PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m));
+ }
+ }
+ return (m);
+ }
+ return (NULL);
+}
+
+static int
+fairq_addq(struct fairq_class *cl, struct mbuf *m, u_int32_t bucketid)
+{
+ fairq_bucket_t *b;
+ u_int hindex;
+ uint64_t bw;
+
+ /*
+ * If the packet doesn't have any keep state put it on the end of
+ * our queue. XXX this can result in out of order delivery.
+ */
+ if (bucketid == 0) {
+ if (cl->cl_head)
+ b = cl->cl_head->prev;
+ else
+ b = &cl->cl_buckets[0];
+ } else {
+ hindex = bucketid & cl->cl_nbucket_mask;
+ b = &cl->cl_buckets[hindex];
+ }
+
+ /*
+ * Add the bucket to the end of the circular list of active buckets.
+ *
+ * As a special case we add the bucket to the beginning of the list
+ * instead of the end if it was not previously on the list and if
+ * its traffic is less then the hog level.
+ */
+ if (b->in_use == 0) {
+ b->in_use = 1;
+ if (cl->cl_head == NULL) {
+ cl->cl_head = b;
+ b->next = b;
+ b->prev = b;
+ } else {
+ b->next = cl->cl_head;
+ b->prev = cl->cl_head->prev;
+ b->prev->next = b;
+ b->next->prev = b;
+
+ if (b->bw_delta && cl->cl_hogs_m1) {
+ bw = b->bw_bytes * machclk_freq / b->bw_delta;
+ if (bw < cl->cl_hogs_m1)
+ cl->cl_head = b;
+ }
+ }
+ }
+
+#ifdef ALTQ_RIO
+ if (cl->cl_qtype == Q_RIO)
+ return rio_addq((rio_t *)cl->cl_red, &b->queue, m, cl->cl_pktattr);
+#endif
+#ifdef ALTQ_RED
+ if (cl->cl_qtype == Q_RED)
+ return red_addq(cl->cl_red, &b->queue, m, cl->cl_pktattr);
+#endif
+#ifdef ALTQ_CODEL
+ if (cl->cl_qtype == Q_CODEL)
+ return codel_addq(cl->cl_codel, &b->queue, m);
+#endif
+ if (qlen(&b->queue) >= qlimit(&b->queue)) {
+ m_freem(m);
+ return (-1);
+ }
+
+ if (cl->cl_flags & FARF_CLEARDSCP)
+ write_dsfield(m, cl->cl_pktattr, 0);
+
+ _addq(&b->queue, m);
+
+ return (0);
+}
+
+static struct mbuf *
+fairq_getq(struct fairq_class *cl, uint64_t cur_time)
+{
+ fairq_bucket_t *b;
+ struct mbuf *m;
+
+ b = fairq_selectq(cl, 0);
+ if (b == NULL)
+ m = NULL;
+#ifdef ALTQ_RIO
+ else if (cl->cl_qtype == Q_RIO)
+ m = rio_getq((rio_t *)cl->cl_red, &b->queue);
+#endif
+#ifdef ALTQ_RED
+ else if (cl->cl_qtype == Q_RED)
+ m = red_getq(cl->cl_red, &b->queue);
+#endif
+#ifdef ALTQ_CODEL
+ else if (cl->cl_qtype == Q_CODEL)
+ m = codel_getq(cl->cl_codel, &b->queue);
+#endif
+ else
+ m = _getq(&b->queue);
+
+ /*
+ * Calculate the BW change
+ */
+ if (m != NULL) {
+ uint64_t delta;
+
+ /*
+ * Per-class bandwidth calculation
+ */
+ delta = (cur_time - cl->cl_last_time);
+ if (delta > machclk_freq * 8)
+ delta = machclk_freq * 8;
+ cl->cl_bw_delta += delta;
+ cl->cl_bw_bytes += m->m_pkthdr.len;
+ cl->cl_last_time = cur_time;
+ cl->cl_bw_delta -= cl->cl_bw_delta >> 3;
+ cl->cl_bw_bytes -= cl->cl_bw_bytes >> 3;
+
+ /*
+ * Per-bucket bandwidth calculation
+ */
+ delta = (cur_time - b->last_time);
+ if (delta > machclk_freq * 8)
+ delta = machclk_freq * 8;
+ b->bw_delta += delta;
+ b->bw_bytes += m->m_pkthdr.len;
+ b->last_time = cur_time;
+ b->bw_delta -= b->bw_delta >> 3;
+ b->bw_bytes -= b->bw_bytes >> 3;
+ }
+ return(m);
+}
+
+/*
+ * Figure out what the next packet would be if there were no limits. If
+ * this class hits its bandwidth limit *hit_limit is set to no-zero, otherwise
+ * it is set to 0. A non-NULL mbuf is returned either way.
+ */
+static struct mbuf *
+fairq_pollq(struct fairq_class *cl, uint64_t cur_time, int *hit_limit)
+{
+ fairq_bucket_t *b;
+ struct mbuf *m;
+ uint64_t delta;
+ uint64_t bw;
+
+ *hit_limit = 0;
+ b = fairq_selectq(cl, 1);
+ if (b == NULL)
+ return(NULL);
+ m = qhead(&b->queue);
+
+ /*
+ * Did this packet exceed the class bandwidth? Calculate the
+ * bandwidth component of the packet.
+ *
+ * - Calculate bytes per second
+ */
+ delta = cur_time - cl->cl_last_time;
+ if (delta > machclk_freq * 8)
+ delta = machclk_freq * 8;
+ cl->cl_bw_delta += delta;
+ cl->cl_last_time = cur_time;
+ if (cl->cl_bw_delta) {
+ bw = cl->cl_bw_bytes * machclk_freq / cl->cl_bw_delta;
+
+ if (bw > cl->cl_bandwidth)
+ *hit_limit = 1;
+#ifdef ALTQ_DEBUG
+ printf("BW %6ju relative to %6u %d queue %p\n",
+ (uintmax_t)bw, cl->cl_bandwidth, *hit_limit, b);
+#endif
+ }
+ return(m);
+}
+
+/*
+ * Locate the next queue we want to pull a packet out of. This code
+ * is also responsible for removing empty buckets from the circular list.
+ */
+static
+fairq_bucket_t *
+fairq_selectq(struct fairq_class *cl, int ispoll)
+{
+ fairq_bucket_t *b;
+ uint64_t bw;
+
+ if (ispoll == 0 && cl->cl_polled) {
+ b = cl->cl_polled;
+ cl->cl_polled = NULL;
+ return(b);
+ }
+
+ while ((b = cl->cl_head) != NULL) {
+ /*
+ * Remove empty queues from consideration
+ */
+ if (qempty(&b->queue)) {
+ b->in_use = 0;
+ cl->cl_head = b->next;
+ if (cl->cl_head == b) {
+ cl->cl_head = NULL;
+ } else {
+ b->next->prev = b->prev;
+ b->prev->next = b->next;
+ }
+ continue;
+ }
+
+ /*
+ * Advance the round robin. Queues with bandwidths less
+ * then the hog bandwidth are allowed to burst.
+ */
+ if (cl->cl_hogs_m1 == 0) {
+ cl->cl_head = b->next;
+ } else if (b->bw_delta) {
+ bw = b->bw_bytes * machclk_freq / b->bw_delta;
+ if (bw >= cl->cl_hogs_m1) {
+ cl->cl_head = b->next;
+ }
+ /*
+ * XXX TODO -
+ */
+ }
+
+ /*
+ * Return bucket b.
+ */
+ break;
+ }
+ if (ispoll)
+ cl->cl_polled = b;
+ return(b);
+}
+
+static void
+fairq_purgeq(struct fairq_class *cl)
+{
+ fairq_bucket_t *b;
+ struct mbuf *m;
+
+ while ((b = fairq_selectq(cl, 0)) != NULL) {
+ while ((m = _getq(&b->queue)) != NULL) {
+ PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m));
+ m_freem(m);
+ }
+ ASSERT(qlen(&b->queue) == 0);
+ }
+}
+
+static void
+get_class_stats(struct fairq_classstats *sp, struct fairq_class *cl)
+{
+ fairq_bucket_t *b;
+
+ sp->class_handle = cl->cl_handle;
+ sp->qlimit = cl->cl_qlimit;
+ sp->xmit_cnt = cl->cl_xmitcnt;
+ sp->drop_cnt = cl->cl_dropcnt;
+ sp->qtype = cl->cl_qtype;
+ sp->qlength = 0;
+
+ if (cl->cl_head) {
+ b = cl->cl_head;
+ do {
+ sp->qlength += qlen(&b->queue);
+ b = b->next;
+ } while (b != cl->cl_head);
+ }
+
+#ifdef ALTQ_RED
+ if (cl->cl_qtype == Q_RED)
+ red_getstats(cl->cl_red, &sp->red[0]);
+#endif
+#ifdef ALTQ_RIO
+ if (cl->cl_qtype == Q_RIO)
+ rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
+#endif
+#ifdef ALTQ_CODEL
+ if (cl->cl_qtype == Q_CODEL)
+ codel_getstats(cl->cl_codel, &sp->codel);
+#endif
+}
+
+/* convert a class handle to the corresponding class pointer */
+static struct fairq_class *
+clh_to_clp(struct fairq_if *pif, uint32_t chandle)
+{
+ struct fairq_class *cl;
+ int idx;
+
+ if (chandle == 0)
+ return (NULL);
+
+ for (idx = pif->pif_maxpri; idx >= 0; idx--)
+ if ((cl = pif->pif_classes[idx]) != NULL &&
+ cl->cl_handle == chandle)
+ return (cl);
+
+ return (NULL);
+}
+
+#endif /* ALTQ_FAIRQ */
diff --git a/freebsd/sys/net/altq/altq_fairq.h b/freebsd/sys/net/altq/altq_fairq.h
new file mode 100644
index 00000000..1a4b97dd
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_fairq.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2008 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $DragonFly: src/sys/net/altq/altq_fairq.h,v 1.1 2008/04/06 18:58:15 dillon Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _ALTQ_ALTQ_FAIRQ_H_
+#define _ALTQ_ALTQ_FAIRQ_H_
+
+#include <net/altq/altq.h>
+#include <net/altq/altq_classq.h>
+#include <net/altq/altq_codel.h>
+#include <net/altq/altq_red.h>
+#include <net/altq/altq_rio.h>
+#include <net/altq/altq_rmclass.h>
+
+#define FAIRQ_MAX_BUCKETS 2048 /* maximum number of sorting buckets */
+#define FAIRQ_MAXPRI RM_MAXPRIO
+#define FAIRQ_BITMAP_WIDTH (sizeof(fairq_bitmap_t)*8)
+#define FAIRQ_BITMAP_MASK (FAIRQ_BITMAP_WIDTH - 1)
+
+/* fairq class flags */
+#define FARF_RED 0x0001 /* use RED */
+#define FARF_ECN 0x0002 /* use RED/ECN */
+#define FARF_RIO 0x0004 /* use RIO */
+#define FARF_CODEL 0x0008 /* use CoDel */
+#define FARF_CLEARDSCP 0x0010 /* clear diffserv codepoint */
+#define FARF_DEFAULTCLASS 0x1000 /* default class */
+
+#define FARF_HAS_PACKETS 0x2000 /* might have queued packets */
+
+#define FARF_USERFLAGS (FARF_RED|FARF_ECN|FARF_RIO|FARF_CLEARDSCP| \
+ FARF_DEFAULTCLASS)
+
+/* special class handles */
+#define FAIRQ_NULLCLASS_HANDLE 0
+
+typedef u_int fairq_bitmap_t;
+
+struct fairq_classstats {
+ uint32_t class_handle;
+
+ u_int qlength;
+ u_int qlimit;
+ struct pktcntr xmit_cnt; /* transmitted packet counter */
+ struct pktcntr drop_cnt; /* dropped packet counter */
+
+ /* codel, red and rio related info */
+ int qtype;
+ struct redstats red[3]; /* rio has 3 red stats */
+ struct codel_stats codel;
+};
+
+#ifdef _KERNEL
+
+typedef struct fairq_bucket {
+ struct fairq_bucket *next; /* circular list */
+ struct fairq_bucket *prev; /* circular list */
+ class_queue_t queue; /* the actual queue */
+ uint64_t bw_bytes; /* statistics used to calculate bw */
+ uint64_t bw_delta; /* statistics used to calculate bw */
+ uint64_t last_time;
+ int in_use;
+} fairq_bucket_t;
+
+struct fairq_class {
+ uint32_t cl_handle; /* class handle */
+ u_int cl_nbuckets; /* (power of 2) */
+ u_int cl_nbucket_mask; /* bucket mask */
+ fairq_bucket_t *cl_buckets;
+ fairq_bucket_t *cl_head; /* head of circular bucket list */
+ fairq_bucket_t *cl_polled;
+ union {
+ struct red *cl_red; /* RED state */
+ struct codel *cl_codel; /* CoDel state */
+ } cl_aqm;
+#define cl_red cl_aqm.cl_red
+#define cl_codel cl_aqm.cl_codel
+ u_int cl_hogs_m1;
+ u_int cl_lssc_m1;
+ u_int cl_bandwidth;
+ uint64_t cl_bw_bytes;
+ uint64_t cl_bw_delta;
+ uint64_t cl_last_time;
+ int cl_qtype; /* rollup */
+ int cl_qlimit;
+ int cl_pri; /* priority */
+ int cl_flags; /* class flags */
+ struct fairq_if *cl_pif; /* back pointer to pif */
+ struct altq_pktattr *cl_pktattr; /* saved header used by ECN */
+
+ /* round robin index */
+
+ /* statistics */
+ struct pktcntr cl_xmitcnt; /* transmitted packet counter */
+ struct pktcntr cl_dropcnt; /* dropped packet counter */
+};
+
+/*
+ * fairq interface state
+ */
+struct fairq_if {
+ struct fairq_if *pif_next; /* interface state list */
+ struct ifaltq *pif_ifq; /* backpointer to ifaltq */
+ u_int pif_bandwidth; /* link bandwidth in bps */
+ int pif_maxpri; /* max priority in use */
+ struct fairq_class *pif_poll_cache;/* cached poll */
+ struct fairq_class *pif_default; /* default class */
+ struct fairq_class *pif_classes[FAIRQ_MAXPRI]; /* classes */
+};
+
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_ALTQ_FAIRQ_H_ */
diff --git a/freebsd/sys/net/altq/altq_hfsc.c b/freebsd/sys/net/altq/altq_hfsc.c
new file mode 100644
index 00000000..f7a18296
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_hfsc.c
@@ -0,0 +1,2240 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation is hereby granted (including for commercial or
+ * for-profit use), provided that both the copyright notice and this
+ * permission notice appear in all copies of the software, derivative
+ * works, or modified versions, and any portions thereof.
+ *
+ * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
+ * WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON PROVIDES THIS
+ * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * Carnegie Mellon encourages (but does not require) users of this
+ * software to return any improvements or extensions that they make,
+ * and to grant Carnegie Mellon the rights to redistribute these
+ * changes without encumbrance.
+ *
+ * $KAME: altq_hfsc.c,v 1.24 2003/12/05 05:40:46 kjc Exp $
+ * $FreeBSD$
+ */
+/*
+ * H-FSC is described in Proceedings of SIGCOMM'97,
+ * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing,
+ * Real-Time and Priority Service"
+ * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng.
+ *
+ * Oleg Cherevko <olwi@aq.ml.com.ua> added the upperlimit for link-sharing.
+ * when a class has an upperlimit, the fit-time is computed from the
+ * upperlimit service curve. the link-sharing scheduler does not schedule
+ * a class whose fit-time exceeds the current time.
+ */
+
+#include <rtems/bsd/local/opt_altq.h>
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+
+#ifdef ALTQ_HFSC /* hfsc is enabled by ALTQ_HFSC option in opt_altq.h */
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+#include <rtems/bsd/sys/errno.h>
+#include <sys/queue.h>
+#if 1 /* ALTQ3_COMPAT */
+#include <sys/sockio.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#endif /* ALTQ3_COMPAT */
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <netinet/in.h>
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <netpfil/pf/pf_mtag.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_hfsc.h>
+#ifdef ALTQ3_COMPAT
+#include <net/altq/altq_conf.h>
+#endif
+
+/*
+ * function prototypes
+ */
+static int hfsc_clear_interface(struct hfsc_if *);
+static int hfsc_request(struct ifaltq *, int, void *);
+static void hfsc_purge(struct hfsc_if *);
+static struct hfsc_class *hfsc_class_create(struct hfsc_if *,
+ struct service_curve *, struct service_curve *, struct service_curve *,
+ struct hfsc_class *, int, int, int);
+static int hfsc_class_destroy(struct hfsc_class *);
+static struct hfsc_class *hfsc_nextclass(struct hfsc_class *);
+static int hfsc_enqueue(struct ifaltq *, struct mbuf *,
+ struct altq_pktattr *);
+static struct mbuf *hfsc_dequeue(struct ifaltq *, int);
+
+static int hfsc_addq(struct hfsc_class *, struct mbuf *);
+static struct mbuf *hfsc_getq(struct hfsc_class *);
+static struct mbuf *hfsc_pollq(struct hfsc_class *);
+static void hfsc_purgeq(struct hfsc_class *);
+
+static void update_cfmin(struct hfsc_class *);
+static void set_active(struct hfsc_class *, int);
+static void set_passive(struct hfsc_class *);
+
+static void init_ed(struct hfsc_class *, int);
+static void update_ed(struct hfsc_class *, int);
+static void update_d(struct hfsc_class *, int);
+static void init_vf(struct hfsc_class *, int);
+static void update_vf(struct hfsc_class *, int, u_int64_t);
+static void ellist_insert(struct hfsc_class *);
+static void ellist_remove(struct hfsc_class *);
+static void ellist_update(struct hfsc_class *);
+struct hfsc_class *hfsc_get_mindl(struct hfsc_if *, u_int64_t);
+static void actlist_insert(struct hfsc_class *);
+static void actlist_remove(struct hfsc_class *);
+static void actlist_update(struct hfsc_class *);
+
+static struct hfsc_class *actlist_firstfit(struct hfsc_class *,
+ u_int64_t);
+
+static __inline u_int64_t seg_x2y(u_int64_t, u_int64_t);
+static __inline u_int64_t seg_y2x(u_int64_t, u_int64_t);
+static __inline u_int64_t m2sm(u_int);
+static __inline u_int64_t m2ism(u_int);
+static __inline u_int64_t d2dx(u_int);
+static u_int sm2m(u_int64_t);
+static u_int dx2d(u_int64_t);
+
+static void sc2isc(struct service_curve *, struct internal_sc *);
+static void rtsc_init(struct runtime_sc *, struct internal_sc *,
+ u_int64_t, u_int64_t);
+static u_int64_t rtsc_y2x(struct runtime_sc *, u_int64_t);
+static u_int64_t rtsc_x2y(struct runtime_sc *, u_int64_t);
+static void rtsc_min(struct runtime_sc *, struct internal_sc *,
+ u_int64_t, u_int64_t);
+
+static void get_class_stats(struct hfsc_classstats *,
+ struct hfsc_class *);
+static struct hfsc_class *clh_to_clp(struct hfsc_if *, u_int32_t);
+
+
+#ifdef ALTQ3_COMPAT
+static struct hfsc_if *hfsc_attach(struct ifaltq *, u_int);
+static int hfsc_detach(struct hfsc_if *);
+static int hfsc_class_modify(struct hfsc_class *, struct service_curve *,
+ struct service_curve *, struct service_curve *);
+
+static int hfsccmd_if_attach(struct hfsc_attach *);
+static int hfsccmd_if_detach(struct hfsc_interface *);
+static int hfsccmd_add_class(struct hfsc_add_class *);
+static int hfsccmd_delete_class(struct hfsc_delete_class *);
+static int hfsccmd_modify_class(struct hfsc_modify_class *);
+static int hfsccmd_add_filter(struct hfsc_add_filter *);
+static int hfsccmd_delete_filter(struct hfsc_delete_filter *);
+static int hfsccmd_class_stats(struct hfsc_class_stats *);
+
+altqdev_decl(hfsc);
+#endif /* ALTQ3_COMPAT */
+
+/*
+ * macros
+ */
+#define is_a_parent_class(cl) ((cl)->cl_children != NULL)
+
+#define HT_INFINITY 0xffffffffffffffffLL /* infinite time value */
+
+#ifdef ALTQ3_COMPAT
+/* hif_list keeps all hfsc_if's allocated. */
+static struct hfsc_if *hif_list = NULL;
+#endif /* ALTQ3_COMPAT */
+
+int
+hfsc_pfattach(struct pf_altq *a)
+{
+ struct ifnet *ifp;
+ int s, error;
+
+ if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
+ return (EINVAL);
+ s = splnet();
+ error = altq_attach(&ifp->if_snd, ALTQT_HFSC, a->altq_disc,
+ hfsc_enqueue, hfsc_dequeue, hfsc_request, NULL, NULL);
+ splx(s);
+ return (error);
+}
+
+int
+hfsc_add_altq(struct pf_altq *a)
+{
+ struct hfsc_if *hif;
+ struct ifnet *ifp;
+
+ if ((ifp = ifunit(a->ifname)) == NULL)
+ return (EINVAL);
+ if (!ALTQ_IS_READY(&ifp->if_snd))
+ return (ENODEV);
+
+ hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (hif == NULL)
+ return (ENOMEM);
+
+ TAILQ_INIT(&hif->hif_eligible);
+ hif->hif_ifq = &ifp->if_snd;
+
+ /* keep the state in pf_altq */
+ a->altq_disc = hif;
+
+ return (0);
+}
+
+int
+hfsc_remove_altq(struct pf_altq *a)
+{
+ struct hfsc_if *hif;
+
+ if ((hif = a->altq_disc) == NULL)
+ return (EINVAL);
+ a->altq_disc = NULL;
+
+ (void)hfsc_clear_interface(hif);
+ (void)hfsc_class_destroy(hif->hif_rootclass);
+
+ free(hif, M_DEVBUF);
+
+ return (0);
+}
+
+int
+hfsc_add_queue(struct pf_altq *a)
+{
+ struct hfsc_if *hif;
+ struct hfsc_class *cl, *parent;
+ struct hfsc_opts *opts;
+ struct service_curve rtsc, lssc, ulsc;
+
+ if ((hif = a->altq_disc) == NULL)
+ return (EINVAL);
+
+ opts = &a->pq_u.hfsc_opts;
+
+ if (a->parent_qid == HFSC_NULLCLASS_HANDLE &&
+ hif->hif_rootclass == NULL)
+ parent = NULL;
+ else if ((parent = clh_to_clp(hif, a->parent_qid)) == NULL)
+ return (EINVAL);
+
+ if (a->qid == 0)
+ return (EINVAL);
+
+ if (clh_to_clp(hif, a->qid) != NULL)
+ return (EBUSY);
+
+ rtsc.m1 = opts->rtsc_m1;
+ rtsc.d = opts->rtsc_d;
+ rtsc.m2 = opts->rtsc_m2;
+ lssc.m1 = opts->lssc_m1;
+ lssc.d = opts->lssc_d;
+ lssc.m2 = opts->lssc_m2;
+ ulsc.m1 = opts->ulsc_m1;
+ ulsc.d = opts->ulsc_d;
+ ulsc.m2 = opts->ulsc_m2;
+
+ cl = hfsc_class_create(hif, &rtsc, &lssc, &ulsc,
+ parent, a->qlimit, opts->flags, a->qid);
+ if (cl == NULL)
+ return (ENOMEM);
+
+ return (0);
+}
+
+int
+hfsc_remove_queue(struct pf_altq *a)
+{
+ struct hfsc_if *hif;
+ struct hfsc_class *cl;
+
+ if ((hif = a->altq_disc) == NULL)
+ return (EINVAL);
+
+ if ((cl = clh_to_clp(hif, a->qid)) == NULL)
+ return (EINVAL);
+
+ return (hfsc_class_destroy(cl));
+}
+
+int
+hfsc_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+ struct hfsc_if *hif;
+ struct hfsc_class *cl;
+ struct hfsc_classstats stats;
+ int error = 0;
+
+ if ((hif = altq_lookup(a->ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(hif, a->qid)) == NULL)
+ return (EINVAL);
+
+ if (*nbytes < sizeof(stats))
+ return (EINVAL);
+
+ get_class_stats(&stats, cl);
+
+ if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
+ return (error);
+ *nbytes = sizeof(stats);
+ return (0);
+}
+
+/*
+ * bring the interface back to the initial state by discarding
+ * all the filters and classes except the root class.
+ */
+static int
+hfsc_clear_interface(struct hfsc_if *hif)
+{
+ struct hfsc_class *cl;
+
+#ifdef ALTQ3_COMPAT
+ /* free the filters for this interface */
+ acc_discard_filters(&hif->hif_classifier, NULL, 1);
+#endif
+
+ /* clear out the classes */
+ while (hif->hif_rootclass != NULL &&
+ (cl = hif->hif_rootclass->cl_children) != NULL) {
+ /*
+ * remove the first leaf class found in the hierarchy
+ * then start over
+ */
+ for (; cl != NULL; cl = hfsc_nextclass(cl)) {
+ if (!is_a_parent_class(cl)) {
+ (void)hfsc_class_destroy(cl);
+ break;
+ }
+ }
+ }
+
+ return (0);
+}
+
+static int
+hfsc_request(struct ifaltq *ifq, int req, void *arg)
+{
+ struct hfsc_if *hif = (struct hfsc_if *)ifq->altq_disc;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ switch (req) {
+ case ALTRQ_PURGE:
+ hfsc_purge(hif);
+ break;
+ }
+ return (0);
+}
+
+/* discard all the queued packets on the interface */
+static void
+hfsc_purge(struct hfsc_if *hif)
+{
+ struct hfsc_class *cl;
+
+ for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
+ if (!qempty(cl->cl_q))
+ hfsc_purgeq(cl);
+ if (ALTQ_IS_ENABLED(hif->hif_ifq))
+ hif->hif_ifq->ifq_len = 0;
+}
+
+struct hfsc_class *
+hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc,
+ struct service_curve *fsc, struct service_curve *usc,
+ struct hfsc_class *parent, int qlimit, int flags, int qid)
+{
+ struct hfsc_class *cl, *p;
+ int i, s;
+
+ if (hif->hif_classes >= HFSC_MAX_CLASSES)
+ return (NULL);
+
+#ifndef ALTQ_RED
+ if (flags & HFCF_RED) {
+#ifdef ALTQ_DEBUG
+ printf("hfsc_class_create: RED not configured for HFSC!\n");
+#endif
+ return (NULL);
+ }
+#endif
+#ifndef ALTQ_CODEL
+ if (flags & HFCF_CODEL) {
+#ifdef ALTQ_DEBUG
+ printf("hfsc_class_create: CODEL not configured for HFSC!\n");
+#endif
+ return (NULL);
+ }
+#endif
+
+ cl = malloc(sizeof(struct hfsc_class), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (cl == NULL)
+ return (NULL);
+
+ cl->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (cl->cl_q == NULL)
+ goto err_ret;
+
+ TAILQ_INIT(&cl->cl_actc);
+
+ if (qlimit == 0)
+ qlimit = 50; /* use default */
+ qlimit(cl->cl_q) = qlimit;
+ qtype(cl->cl_q) = Q_DROPTAIL;
+ qlen(cl->cl_q) = 0;
+ qsize(cl->cl_q) = 0;
+ cl->cl_flags = flags;
+#ifdef ALTQ_RED
+ if (flags & (HFCF_RED|HFCF_RIO)) {
+ int red_flags, red_pkttime;
+ u_int m2;
+
+ m2 = 0;
+ if (rsc != NULL && rsc->m2 > m2)
+ m2 = rsc->m2;
+ if (fsc != NULL && fsc->m2 > m2)
+ m2 = fsc->m2;
+ if (usc != NULL && usc->m2 > m2)
+ m2 = usc->m2;
+
+ red_flags = 0;
+ if (flags & HFCF_ECN)
+ red_flags |= REDF_ECN;
+#ifdef ALTQ_RIO
+ if (flags & HFCF_CLEARDSCP)
+ red_flags |= RIOF_CLEARDSCP;
+#endif
+ if (m2 < 8)
+ red_pkttime = 1000 * 1000 * 1000; /* 1 sec */
+ else
+ red_pkttime = (int64_t)hif->hif_ifq->altq_ifp->if_mtu
+ * 1000 * 1000 * 1000 / (m2 / 8);
+ if (flags & HFCF_RED) {
+ cl->cl_red = red_alloc(0, 0,
+ qlimit(cl->cl_q) * 10/100,
+ qlimit(cl->cl_q) * 30/100,
+ red_flags, red_pkttime);
+ if (cl->cl_red != NULL)
+ qtype(cl->cl_q) = Q_RED;
+ }
+#ifdef ALTQ_RIO
+ else {
+ cl->cl_red = (red_t *)rio_alloc(0, NULL,
+ red_flags, red_pkttime);
+ if (cl->cl_red != NULL)
+ qtype(cl->cl_q) = Q_RIO;
+ }
+#endif
+ }
+#endif /* ALTQ_RED */
+#ifdef ALTQ_CODEL
+ if (flags & HFCF_CODEL) {
+ cl->cl_codel = codel_alloc(5, 100, 0);
+ if (cl->cl_codel != NULL)
+ qtype(cl->cl_q) = Q_CODEL;
+ }
+#endif
+
+ if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0)) {
+ cl->cl_rsc = malloc(sizeof(struct internal_sc),
+ M_DEVBUF, M_NOWAIT);
+ if (cl->cl_rsc == NULL)
+ goto err_ret;
+ sc2isc(rsc, cl->cl_rsc);
+ rtsc_init(&cl->cl_deadline, cl->cl_rsc, 0, 0);
+ rtsc_init(&cl->cl_eligible, cl->cl_rsc, 0, 0);
+ }
+ if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0)) {
+ cl->cl_fsc = malloc(sizeof(struct internal_sc),
+ M_DEVBUF, M_NOWAIT);
+ if (cl->cl_fsc == NULL)
+ goto err_ret;
+ sc2isc(fsc, cl->cl_fsc);
+ rtsc_init(&cl->cl_virtual, cl->cl_fsc, 0, 0);
+ }
+ if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0)) {
+ cl->cl_usc = malloc(sizeof(struct internal_sc),
+ M_DEVBUF, M_NOWAIT);
+ if (cl->cl_usc == NULL)
+ goto err_ret;
+ sc2isc(usc, cl->cl_usc);
+ rtsc_init(&cl->cl_ulimit, cl->cl_usc, 0, 0);
+ }
+
+ cl->cl_id = hif->hif_classid++;
+ cl->cl_handle = qid;
+ cl->cl_hif = hif;
+ cl->cl_parent = parent;
+
+ s = splnet();
+ IFQ_LOCK(hif->hif_ifq);
+ hif->hif_classes++;
+
+ /*
+ * find a free slot in the class table. if the slot matching
+ * the lower bits of qid is free, use this slot. otherwise,
+ * use the first free slot.
+ */
+ i = qid % HFSC_MAX_CLASSES;
+ if (hif->hif_class_tbl[i] == NULL)
+ hif->hif_class_tbl[i] = cl;
+ else {
+ for (i = 0; i < HFSC_MAX_CLASSES; i++)
+ if (hif->hif_class_tbl[i] == NULL) {
+ hif->hif_class_tbl[i] = cl;
+ break;
+ }
+ if (i == HFSC_MAX_CLASSES) {
+ IFQ_UNLOCK(hif->hif_ifq);
+ splx(s);
+ goto err_ret;
+ }
+ }
+
+ if (flags & HFCF_DEFAULTCLASS)
+ hif->hif_defaultclass = cl;
+
+ if (parent == NULL) {
+ /* this is root class */
+ hif->hif_rootclass = cl;
+ } else {
+ /* add this class to the children list of the parent */
+ if ((p = parent->cl_children) == NULL)
+ parent->cl_children = cl;
+ else {
+ while (p->cl_siblings != NULL)
+ p = p->cl_siblings;
+ p->cl_siblings = cl;
+ }
+ }
+ IFQ_UNLOCK(hif->hif_ifq);
+ splx(s);
+
+ return (cl);
+
+ err_ret:
+ if (cl->cl_red != NULL) {
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ red_destroy(cl->cl_red);
+#endif
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->cl_q))
+ codel_destroy(cl->cl_codel);
+#endif
+ }
+ if (cl->cl_fsc != NULL)
+ free(cl->cl_fsc, M_DEVBUF);
+ if (cl->cl_rsc != NULL)
+ free(cl->cl_rsc, M_DEVBUF);
+ if (cl->cl_usc != NULL)
+ free(cl->cl_usc, M_DEVBUF);
+ if (cl->cl_q != NULL)
+ free(cl->cl_q, M_DEVBUF);
+ free(cl, M_DEVBUF);
+ return (NULL);
+}
+
+static int
+hfsc_class_destroy(struct hfsc_class *cl)
+{
+ int i, s;
+
+ if (cl == NULL)
+ return (0);
+
+ if (is_a_parent_class(cl))
+ return (EBUSY);
+
+ s = splnet();
+ IFQ_LOCK(cl->cl_hif->hif_ifq);
+
+#ifdef ALTQ3_COMPAT
+ /* delete filters referencing to this class */
+ acc_discard_filters(&cl->cl_hif->hif_classifier, cl, 0);
+#endif /* ALTQ3_COMPAT */
+
+ if (!qempty(cl->cl_q))
+ hfsc_purgeq(cl);
+
+ if (cl->cl_parent == NULL) {
+ /* this is root class */
+ } else {
+ struct hfsc_class *p = cl->cl_parent->cl_children;
+
+ if (p == cl)
+ cl->cl_parent->cl_children = cl->cl_siblings;
+ else do {
+ if (p->cl_siblings == cl) {
+ p->cl_siblings = cl->cl_siblings;
+ break;
+ }
+ } while ((p = p->cl_siblings) != NULL);
+ ASSERT(p != NULL);
+ }
+
+ for (i = 0; i < HFSC_MAX_CLASSES; i++)
+ if (cl->cl_hif->hif_class_tbl[i] == cl) {
+ cl->cl_hif->hif_class_tbl[i] = NULL;
+ break;
+ }
+
+ cl->cl_hif->hif_classes--;
+ IFQ_UNLOCK(cl->cl_hif->hif_ifq);
+ splx(s);
+
+ if (cl->cl_red != NULL) {
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ red_destroy(cl->cl_red);
+#endif
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->cl_q))
+ codel_destroy(cl->cl_codel);
+#endif
+ }
+
+ IFQ_LOCK(cl->cl_hif->hif_ifq);
+ if (cl == cl->cl_hif->hif_rootclass)
+ cl->cl_hif->hif_rootclass = NULL;
+ if (cl == cl->cl_hif->hif_defaultclass)
+ cl->cl_hif->hif_defaultclass = NULL;
+ IFQ_UNLOCK(cl->cl_hif->hif_ifq);
+
+ if (cl->cl_usc != NULL)
+ free(cl->cl_usc, M_DEVBUF);
+ if (cl->cl_fsc != NULL)
+ free(cl->cl_fsc, M_DEVBUF);
+ if (cl->cl_rsc != NULL)
+ free(cl->cl_rsc, M_DEVBUF);
+ free(cl->cl_q, M_DEVBUF);
+ free(cl, M_DEVBUF);
+
+ return (0);
+}
+
+/*
+ * hfsc_nextclass returns the next class in the tree.
+ * usage:
+ * for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
+ * do_something;
+ */
+static struct hfsc_class *
+hfsc_nextclass(struct hfsc_class *cl)
+{
+ if (cl->cl_children != NULL)
+ cl = cl->cl_children;
+ else if (cl->cl_siblings != NULL)
+ cl = cl->cl_siblings;
+ else {
+ while ((cl = cl->cl_parent) != NULL)
+ if (cl->cl_siblings) {
+ cl = cl->cl_siblings;
+ break;
+ }
+ }
+
+ return (cl);
+}
+
+/*
+ * hfsc_enqueue is an enqueue function to be registered to
+ * (*altq_enqueue) in struct ifaltq.
+ */
+static int
+hfsc_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
+{
+ struct hfsc_if *hif = (struct hfsc_if *)ifq->altq_disc;
+ struct hfsc_class *cl;
+ struct pf_mtag *t;
+ int len;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ /* grab class set by classifier */
+ if ((m->m_flags & M_PKTHDR) == 0) {
+ /* should not happen */
+ printf("altq: packet for %s does not have pkthdr\n",
+ ifq->altq_ifp->if_xname);
+ m_freem(m);
+ return (ENOBUFS);
+ }
+ cl = NULL;
+ if ((t = pf_find_mtag(m)) != NULL)
+ cl = clh_to_clp(hif, t->qid);
+#ifdef ALTQ3_COMPAT
+ else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL)
+ cl = pktattr->pattr_class;
+#endif
+ if (cl == NULL || is_a_parent_class(cl)) {
+ cl = hif->hif_defaultclass;
+ if (cl == NULL) {
+ m_freem(m);
+ return (ENOBUFS);
+ }
+ }
+#ifdef ALTQ3_COMPAT
+ if (pktattr != NULL)
+ cl->cl_pktattr = pktattr; /* save proto hdr used by ECN */
+ else
+#endif
+ cl->cl_pktattr = NULL;
+ len = m_pktlen(m);
+ if (hfsc_addq(cl, m) != 0) {
+ /* drop occurred. mbuf was freed in hfsc_addq. */
+ PKTCNTR_ADD(&cl->cl_stats.drop_cnt, len);
+ return (ENOBUFS);
+ }
+ IFQ_INC_LEN(ifq);
+ cl->cl_hif->hif_packets++;
+
+ /* successfully queued. */
+ if (qlen(cl->cl_q) == 1)
+ set_active(cl, m_pktlen(m));
+
+ return (0);
+}
+
+/*
+ * hfsc_dequeue is a dequeue function to be registered to
+ * (*altq_dequeue) in struct ifaltq.
+ *
+ * note: ALTDQ_POLL returns the next packet without removing the packet
+ * from the queue. ALTDQ_REMOVE is a normal dequeue operation.
+ * ALTDQ_REMOVE must return the same packet if called immediately
+ * after ALTDQ_POLL.
+ */
+static struct mbuf *
+hfsc_dequeue(struct ifaltq *ifq, int op)
+{
+ struct hfsc_if *hif = (struct hfsc_if *)ifq->altq_disc;
+ struct hfsc_class *cl;
+ struct mbuf *m;
+ int len, next_len;
+ int realtime = 0;
+ u_int64_t cur_time;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ if (hif->hif_packets == 0)
+ /* no packet in the tree */
+ return (NULL);
+
+ cur_time = read_machclk();
+
+ if (op == ALTDQ_REMOVE && hif->hif_pollcache != NULL) {
+
+ cl = hif->hif_pollcache;
+ hif->hif_pollcache = NULL;
+ /* check if the class was scheduled by real-time criteria */
+ if (cl->cl_rsc != NULL)
+ realtime = (cl->cl_e <= cur_time);
+ } else {
+ /*
+ * if there are eligible classes, use real-time criteria.
+ * find the class with the minimum deadline among
+ * the eligible classes.
+ */
+ if ((cl = hfsc_get_mindl(hif, cur_time))
+ != NULL) {
+ realtime = 1;
+ } else {
+#ifdef ALTQ_DEBUG
+ int fits = 0;
+#endif
+ /*
+ * use link-sharing criteria
+ * get the class with the minimum vt in the hierarchy
+ */
+ cl = hif->hif_rootclass;
+ while (is_a_parent_class(cl)) {
+
+ cl = actlist_firstfit(cl, cur_time);
+ if (cl == NULL) {
+#ifdef ALTQ_DEBUG
+ if (fits > 0)
+ printf("%d fit but none found\n",fits);
+#endif
+ return (NULL);
+ }
+ /*
+ * update parent's cl_cvtmin.
+ * don't update if the new vt is smaller.
+ */
+ if (cl->cl_parent->cl_cvtmin < cl->cl_vt)
+ cl->cl_parent->cl_cvtmin = cl->cl_vt;
+#ifdef ALTQ_DEBUG
+ fits++;
+#endif
+ }
+ }
+
+ if (op == ALTDQ_POLL) {
+ hif->hif_pollcache = cl;
+ m = hfsc_pollq(cl);
+ return (m);
+ }
+ }
+
+ m = hfsc_getq(cl);
+ if (m == NULL)
+ panic("hfsc_dequeue:");
+ len = m_pktlen(m);
+ cl->cl_hif->hif_packets--;
+ IFQ_DEC_LEN(ifq);
+ PKTCNTR_ADD(&cl->cl_stats.xmit_cnt, len);
+
+ update_vf(cl, len, cur_time);
+ if (realtime)
+ cl->cl_cumul += len;
+
+ if (!qempty(cl->cl_q)) {
+ if (cl->cl_rsc != NULL) {
+ /* update ed */
+ next_len = m_pktlen(qhead(cl->cl_q));
+
+ if (realtime)
+ update_ed(cl, next_len);
+ else
+ update_d(cl, next_len);
+ }
+ } else {
+ /* the class becomes passive */
+ set_passive(cl);
+ }
+
+ return (m);
+}
+
+static int
+hfsc_addq(struct hfsc_class *cl, struct mbuf *m)
+{
+
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ return rio_addq((rio_t *)cl->cl_red, cl->cl_q,
+ m, cl->cl_pktattr);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr);
+#endif
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->cl_q))
+ return codel_addq(cl->cl_codel, cl->cl_q, m);
+#endif
+ if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) {
+ m_freem(m);
+ return (-1);
+ }
+
+ if (cl->cl_flags & HFCF_CLEARDSCP)
+ write_dsfield(m, cl->cl_pktattr, 0);
+
+ _addq(cl->cl_q, m);
+
+ return (0);
+}
+
+static struct mbuf *
+hfsc_getq(struct hfsc_class *cl)
+{
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ return rio_getq((rio_t *)cl->cl_red, cl->cl_q);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ return red_getq(cl->cl_red, cl->cl_q);
+#endif
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->cl_q))
+ return codel_getq(cl->cl_codel, cl->cl_q);
+#endif
+ return _getq(cl->cl_q);
+}
+
+static struct mbuf *
+hfsc_pollq(struct hfsc_class *cl)
+{
+ return qhead(cl->cl_q);
+}
+
+static void
+hfsc_purgeq(struct hfsc_class *cl)
+{
+ struct mbuf *m;
+
+ if (qempty(cl->cl_q))
+ return;
+
+ while ((m = _getq(cl->cl_q)) != NULL) {
+ PKTCNTR_ADD(&cl->cl_stats.drop_cnt, m_pktlen(m));
+ m_freem(m);
+ cl->cl_hif->hif_packets--;
+ IFQ_DEC_LEN(cl->cl_hif->hif_ifq);
+ }
+ ASSERT(qlen(cl->cl_q) == 0);
+
+ update_vf(cl, 0, 0); /* remove cl from the actlist */
+ set_passive(cl);
+}
+
+static void
+set_active(struct hfsc_class *cl, int len)
+{
+ if (cl->cl_rsc != NULL)
+ init_ed(cl, len);
+ if (cl->cl_fsc != NULL)
+ init_vf(cl, len);
+
+ cl->cl_stats.period++;
+}
+
+static void
+set_passive(struct hfsc_class *cl)
+{
+ if (cl->cl_rsc != NULL)
+ ellist_remove(cl);
+
+ /*
+ * actlist is now handled in update_vf() so that update_vf(cl, 0, 0)
+ * needs to be called explicitly to remove a class from actlist
+ */
+}
+
+static void
+init_ed(struct hfsc_class *cl, int next_len)
+{
+ u_int64_t cur_time;
+
+ cur_time = read_machclk();
+
+ /* update the deadline curve */
+ rtsc_min(&cl->cl_deadline, cl->cl_rsc, cur_time, cl->cl_cumul);
+
+ /*
+ * update the eligible curve.
+ * for concave, it is equal to the deadline curve.
+ * for convex, it is a linear curve with slope m2.
+ */
+ cl->cl_eligible = cl->cl_deadline;
+ if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) {
+ cl->cl_eligible.dx = 0;
+ cl->cl_eligible.dy = 0;
+ }
+
+ /* compute e and d */
+ cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
+ cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+
+ ellist_insert(cl);
+}
+
+static void
+update_ed(struct hfsc_class *cl, int next_len)
+{
+ cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
+ cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+
+ ellist_update(cl);
+}
+
+static void
+update_d(struct hfsc_class *cl, int next_len)
+{
+ cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+}
+
+static void
+init_vf(struct hfsc_class *cl, int len)
+{
+ struct hfsc_class *max_cl, *p;
+ u_int64_t vt, f, cur_time;
+ int go_active;
+
+ cur_time = 0;
+ go_active = 1;
+ for ( ; cl->cl_parent != NULL; cl = cl->cl_parent) {
+
+ if (go_active && cl->cl_nactive++ == 0)
+ go_active = 1;
+ else
+ go_active = 0;
+
+ if (go_active) {
+ max_cl = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead);
+ if (max_cl != NULL) {
+ /*
+ * set vt to the average of the min and max
+ * classes. if the parent's period didn't
+ * change, don't decrease vt of the class.
+ */
+ vt = max_cl->cl_vt;
+ if (cl->cl_parent->cl_cvtmin != 0)
+ vt = (cl->cl_parent->cl_cvtmin + vt)/2;
+
+ if (cl->cl_parent->cl_vtperiod !=
+ cl->cl_parentperiod || vt > cl->cl_vt)
+ cl->cl_vt = vt;
+ } else {
+ /*
+ * first child for a new parent backlog period.
+ * add parent's cvtmax to vtoff of children
+ * to make a new vt (vtoff + vt) larger than
+ * the vt in the last period for all children.
+ */
+ vt = cl->cl_parent->cl_cvtmax;
+ for (p = cl->cl_parent->cl_children; p != NULL;
+ p = p->cl_siblings)
+ p->cl_vtoff += vt;
+ cl->cl_vt = 0;
+ cl->cl_parent->cl_cvtmax = 0;
+ cl->cl_parent->cl_cvtmin = 0;
+ }
+ cl->cl_initvt = cl->cl_vt;
+
+ /* update the virtual curve */
+ vt = cl->cl_vt + cl->cl_vtoff;
+ rtsc_min(&cl->cl_virtual, cl->cl_fsc, vt, cl->cl_total);
+ if (cl->cl_virtual.x == vt) {
+ cl->cl_virtual.x -= cl->cl_vtoff;
+ cl->cl_vtoff = 0;
+ }
+ cl->cl_vtadj = 0;
+
+ cl->cl_vtperiod++; /* increment vt period */
+ cl->cl_parentperiod = cl->cl_parent->cl_vtperiod;
+ if (cl->cl_parent->cl_nactive == 0)
+ cl->cl_parentperiod++;
+ cl->cl_f = 0;
+
+ actlist_insert(cl);
+
+ if (cl->cl_usc != NULL) {
+ /* class has upper limit curve */
+ if (cur_time == 0)
+ cur_time = read_machclk();
+
+ /* update the ulimit curve */
+ rtsc_min(&cl->cl_ulimit, cl->cl_usc, cur_time,
+ cl->cl_total);
+ /* compute myf */
+ cl->cl_myf = rtsc_y2x(&cl->cl_ulimit,
+ cl->cl_total);
+ cl->cl_myfadj = 0;
+ }
+ }
+
+ if (cl->cl_myf > cl->cl_cfmin)
+ f = cl->cl_myf;
+ else
+ f = cl->cl_cfmin;
+ if (f != cl->cl_f) {
+ cl->cl_f = f;
+ update_cfmin(cl->cl_parent);
+ }
+ }
+}
+
+static void
+update_vf(struct hfsc_class *cl, int len, u_int64_t cur_time)
+{
+ u_int64_t f, myf_bound, delta;
+ int go_passive;
+
+ go_passive = qempty(cl->cl_q);
+
+ for (; cl->cl_parent != NULL; cl = cl->cl_parent) {
+
+ cl->cl_total += len;
+
+ if (cl->cl_fsc == NULL || cl->cl_nactive == 0)
+ continue;
+
+ if (go_passive && --cl->cl_nactive == 0)
+ go_passive = 1;
+ else
+ go_passive = 0;
+
+ if (go_passive) {
+ /* no more active child, going passive */
+
+ /* update cvtmax of the parent class */
+ if (cl->cl_vt > cl->cl_parent->cl_cvtmax)
+ cl->cl_parent->cl_cvtmax = cl->cl_vt;
+
+ /* remove this class from the vt list */
+ actlist_remove(cl);
+
+ update_cfmin(cl->cl_parent);
+
+ continue;
+ }
+
+ /*
+ * update vt and f
+ */
+ cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total)
+ - cl->cl_vtoff + cl->cl_vtadj;
+
+ /*
+ * if vt of the class is smaller than cvtmin,
+ * the class was skipped in the past due to non-fit.
+ * if so, we need to adjust vtadj.
+ */
+ if (cl->cl_vt < cl->cl_parent->cl_cvtmin) {
+ cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt;
+ cl->cl_vt = cl->cl_parent->cl_cvtmin;
+ }
+
+ /* update the vt list */
+ actlist_update(cl);
+
+ if (cl->cl_usc != NULL) {
+ cl->cl_myf = cl->cl_myfadj
+ + rtsc_y2x(&cl->cl_ulimit, cl->cl_total);
+
+ /*
+ * if myf lags behind by more than one clock tick
+ * from the current time, adjust myfadj to prevent
+ * a rate-limited class from going greedy.
+ * in a steady state under rate-limiting, myf
+ * fluctuates within one clock tick.
+ */
+ myf_bound = cur_time - machclk_per_tick;
+ if (cl->cl_myf < myf_bound) {
+ delta = cur_time - cl->cl_myf;
+ cl->cl_myfadj += delta;
+ cl->cl_myf += delta;
+ }
+ }
+
+ /* cl_f is max(cl_myf, cl_cfmin) */
+ if (cl->cl_myf > cl->cl_cfmin)
+ f = cl->cl_myf;
+ else
+ f = cl->cl_cfmin;
+ if (f != cl->cl_f) {
+ cl->cl_f = f;
+ update_cfmin(cl->cl_parent);
+ }
+ }
+}
+
+static void
+update_cfmin(struct hfsc_class *cl)
+{
+ struct hfsc_class *p;
+ u_int64_t cfmin;
+
+ if (TAILQ_EMPTY(&cl->cl_actc)) {
+ cl->cl_cfmin = 0;
+ return;
+ }
+ cfmin = HT_INFINITY;
+ TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) {
+ if (p->cl_f == 0) {
+ cl->cl_cfmin = 0;
+ return;
+ }
+ if (p->cl_f < cfmin)
+ cfmin = p->cl_f;
+ }
+ cl->cl_cfmin = cfmin;
+}
+
+/*
+ * TAILQ based ellist and actlist implementation
+ * (ion wanted to make a calendar queue based implementation)
+ */
+/*
+ * eligible list holds backlogged classes being sorted by their eligible times.
+ * there is one eligible list per interface.
+ */
+
+static void
+ellist_insert(struct hfsc_class *cl)
+{
+ struct hfsc_if *hif = cl->cl_hif;
+ struct hfsc_class *p;
+
+ /* check the last entry first */
+ if ((p = TAILQ_LAST(&hif->hif_eligible, elighead)) == NULL ||
+ p->cl_e <= cl->cl_e) {
+ TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist);
+ return;
+ }
+
+ TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) {
+ if (cl->cl_e < p->cl_e) {
+ TAILQ_INSERT_BEFORE(p, cl, cl_ellist);
+ return;
+ }
+ }
+ ASSERT(0); /* should not reach here */
+}
+
+static void
+ellist_remove(struct hfsc_class *cl)
+{
+ struct hfsc_if *hif = cl->cl_hif;
+
+ TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
+}
+
+static void
+ellist_update(struct hfsc_class *cl)
+{
+ struct hfsc_if *hif = cl->cl_hif;
+ struct hfsc_class *p, *last;
+
+ /*
+ * the eligible time of a class increases monotonically.
+ * if the next entry has a larger eligible time, nothing to do.
+ */
+ p = TAILQ_NEXT(cl, cl_ellist);
+ if (p == NULL || cl->cl_e <= p->cl_e)
+ return;
+
+ /* check the last entry */
+ last = TAILQ_LAST(&hif->hif_eligible, elighead);
+ ASSERT(last != NULL);
+ if (last->cl_e <= cl->cl_e) {
+ TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
+ TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist);
+ return;
+ }
+
+ /*
+ * the new position must be between the next entry
+ * and the last entry
+ */
+ while ((p = TAILQ_NEXT(p, cl_ellist)) != NULL) {
+ if (cl->cl_e < p->cl_e) {
+ TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
+ TAILQ_INSERT_BEFORE(p, cl, cl_ellist);
+ return;
+ }
+ }
+ ASSERT(0); /* should not reach here */
+}
+
+/* find the class with the minimum deadline among the eligible classes */
+struct hfsc_class *
+hfsc_get_mindl(struct hfsc_if *hif, u_int64_t cur_time)
+{
+ struct hfsc_class *p, *cl = NULL;
+
+ TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) {
+ if (p->cl_e > cur_time)
+ break;
+ if (cl == NULL || p->cl_d < cl->cl_d)
+ cl = p;
+ }
+ return (cl);
+}
+
+/*
+ * active children list holds backlogged child classes being sorted
+ * by their virtual time.
+ * each intermediate class has one active children list.
+ */
+
+static void
+actlist_insert(struct hfsc_class *cl)
+{
+ struct hfsc_class *p;
+
+ /* check the last entry first */
+ if ((p = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead)) == NULL
+ || p->cl_vt <= cl->cl_vt) {
+ TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist);
+ return;
+ }
+
+ TAILQ_FOREACH(p, &cl->cl_parent->cl_actc, cl_actlist) {
+ if (cl->cl_vt < p->cl_vt) {
+ TAILQ_INSERT_BEFORE(p, cl, cl_actlist);
+ return;
+ }
+ }
+ ASSERT(0); /* should not reach here */
+}
+
+static void
+actlist_remove(struct hfsc_class *cl)
+{
+ TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
+}
+
+static void
+actlist_update(struct hfsc_class *cl)
+{
+ struct hfsc_class *p, *last;
+
+ /*
+ * the virtual time of a class increases monotonically during its
+ * backlogged period.
+ * if the next entry has a larger virtual time, nothing to do.
+ */
+ p = TAILQ_NEXT(cl, cl_actlist);
+ if (p == NULL || cl->cl_vt < p->cl_vt)
+ return;
+
+ /* check the last entry */
+ last = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead);
+ ASSERT(last != NULL);
+ if (last->cl_vt <= cl->cl_vt) {
+ TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
+ TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist);
+ return;
+ }
+
+ /*
+ * the new position must be between the next entry
+ * and the last entry
+ */
+ while ((p = TAILQ_NEXT(p, cl_actlist)) != NULL) {
+ if (cl->cl_vt < p->cl_vt) {
+ TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
+ TAILQ_INSERT_BEFORE(p, cl, cl_actlist);
+ return;
+ }
+ }
+ ASSERT(0); /* should not reach here */
+}
+
+static struct hfsc_class *
+actlist_firstfit(struct hfsc_class *cl, u_int64_t cur_time)
+{
+ struct hfsc_class *p;
+
+ TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) {
+ if (p->cl_f <= cur_time)
+ return (p);
+ }
+ return (NULL);
+}
+
+/*
+ * service curve support functions
+ *
+ * external service curve parameters
+ * m: bits/sec
+ * d: msec
+ * internal service curve parameters
+ * sm: (bytes/tsc_interval) << SM_SHIFT
+ * ism: (tsc_count/byte) << ISM_SHIFT
+ * dx: tsc_count
+ *
+ * SM_SHIFT and ISM_SHIFT are scaled in order to keep effective digits.
+ * we should be able to handle 100K-1Gbps linkspeed with 200Hz-1GHz CPU
+ * speed. SM_SHIFT and ISM_SHIFT are selected to have at least 3 effective
+ * digits in decimal using the following table.
+ *
+ * bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps
+ * ----------+-------------------------------------------------------
+ * bytes/nsec 12.5e-6 125e-6 1250e-6 12500e-6 125000e-6
+ * sm(500MHz) 25.0e-6 250e-6 2500e-6 25000e-6 250000e-6
+ * sm(200MHz) 62.5e-6 625e-6 6250e-6 62500e-6 625000e-6
+ *
+ * nsec/byte 80000 8000 800 80 8
+ * ism(500MHz) 40000 4000 400 40 4
+ * ism(200MHz) 16000 1600 160 16 1.6
+ */
+#define SM_SHIFT 24
+#define ISM_SHIFT 10
+
+#define SM_MASK ((1LL << SM_SHIFT) - 1)
+#define ISM_MASK ((1LL << ISM_SHIFT) - 1)
+
+static __inline u_int64_t
+seg_x2y(u_int64_t x, u_int64_t sm)
+{
+ u_int64_t y;
+
+ /*
+ * compute
+ * y = x * sm >> SM_SHIFT
+ * but divide it for the upper and lower bits to avoid overflow
+ */
+ y = (x >> SM_SHIFT) * sm + (((x & SM_MASK) * sm) >> SM_SHIFT);
+ return (y);
+}
+
+static __inline u_int64_t
+seg_y2x(u_int64_t y, u_int64_t ism)
+{
+ u_int64_t x;
+
+ if (y == 0)
+ x = 0;
+ else if (ism == HT_INFINITY)
+ x = HT_INFINITY;
+ else {
+ x = (y >> ISM_SHIFT) * ism
+ + (((y & ISM_MASK) * ism) >> ISM_SHIFT);
+ }
+ return (x);
+}
+
+static __inline u_int64_t
+m2sm(u_int m)
+{
+ u_int64_t sm;
+
+ sm = ((u_int64_t)m << SM_SHIFT) / 8 / machclk_freq;
+ return (sm);
+}
+
+static __inline u_int64_t
+m2ism(u_int m)
+{
+ u_int64_t ism;
+
+ if (m == 0)
+ ism = HT_INFINITY;
+ else
+ ism = ((u_int64_t)machclk_freq << ISM_SHIFT) * 8 / m;
+ return (ism);
+}
+
+static __inline u_int64_t
+d2dx(u_int d)
+{
+ u_int64_t dx;
+
+ dx = ((u_int64_t)d * machclk_freq) / 1000;
+ return (dx);
+}
+
+static u_int
+sm2m(u_int64_t sm)
+{
+ u_int64_t m;
+
+ m = (sm * 8 * machclk_freq) >> SM_SHIFT;
+ return ((u_int)m);
+}
+
+static u_int
+dx2d(u_int64_t dx)
+{
+ u_int64_t d;
+
+ d = dx * 1000 / machclk_freq;
+ return ((u_int)d);
+}
+
+static void
+sc2isc(struct service_curve *sc, struct internal_sc *isc)
+{
+ isc->sm1 = m2sm(sc->m1);
+ isc->ism1 = m2ism(sc->m1);
+ isc->dx = d2dx(sc->d);
+ isc->dy = seg_x2y(isc->dx, isc->sm1);
+ isc->sm2 = m2sm(sc->m2);
+ isc->ism2 = m2ism(sc->m2);
+}
+
+/*
+ * initialize the runtime service curve with the given internal
+ * service curve starting at (x, y).
+ */
+static void
+rtsc_init(struct runtime_sc *rtsc, struct internal_sc * isc, u_int64_t x,
+ u_int64_t y)
+{
+ rtsc->x = x;
+ rtsc->y = y;
+ rtsc->sm1 = isc->sm1;
+ rtsc->ism1 = isc->ism1;
+ rtsc->dx = isc->dx;
+ rtsc->dy = isc->dy;
+ rtsc->sm2 = isc->sm2;
+ rtsc->ism2 = isc->ism2;
+}
+
+/*
+ * calculate the y-projection of the runtime service curve by the
+ * given x-projection value
+ */
+static u_int64_t
+rtsc_y2x(struct runtime_sc *rtsc, u_int64_t y)
+{
+ u_int64_t x;
+
+ if (y < rtsc->y)
+ x = rtsc->x;
+ else if (y <= rtsc->y + rtsc->dy) {
+ /* x belongs to the 1st segment */
+ if (rtsc->dy == 0)
+ x = rtsc->x + rtsc->dx;
+ else
+ x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1);
+ } else {
+ /* x belongs to the 2nd segment */
+ x = rtsc->x + rtsc->dx
+ + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2);
+ }
+ return (x);
+}
+
+static u_int64_t
+rtsc_x2y(struct runtime_sc *rtsc, u_int64_t x)
+{
+ u_int64_t y;
+
+ if (x <= rtsc->x)
+ y = rtsc->y;
+ else if (x <= rtsc->x + rtsc->dx)
+ /* y belongs to the 1st segment */
+ y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1);
+ else
+ /* y belongs to the 2nd segment */
+ y = rtsc->y + rtsc->dy
+ + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2);
+ return (y);
+}
+
+/*
+ * update the runtime service curve by taking the minimum of the current
+ * runtime service curve and the service curve starting at (x, y).
+ */
+static void
+rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u_int64_t x,
+ u_int64_t y)
+{
+ u_int64_t y1, y2, dx, dy;
+
+ if (isc->sm1 <= isc->sm2) {
+ /* service curve is convex */
+ y1 = rtsc_x2y(rtsc, x);
+ if (y1 < y)
+ /* the current rtsc is smaller */
+ return;
+ rtsc->x = x;
+ rtsc->y = y;
+ return;
+ }
+
+ /*
+ * service curve is concave
+ * compute the two y values of the current rtsc
+ * y1: at x
+ * y2: at (x + dx)
+ */
+ y1 = rtsc_x2y(rtsc, x);
+ if (y1 <= y) {
+ /* rtsc is below isc, no change to rtsc */
+ return;
+ }
+
+ y2 = rtsc_x2y(rtsc, x + isc->dx);
+ if (y2 >= y + isc->dy) {
+ /* rtsc is above isc, replace rtsc by isc */
+ rtsc->x = x;
+ rtsc->y = y;
+ rtsc->dx = isc->dx;
+ rtsc->dy = isc->dy;
+ return;
+ }
+
+ /*
+ * the two curves intersect
+ * compute the offsets (dx, dy) using the reverse
+ * function of seg_x2y()
+ * seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y)
+ */
+ dx = ((y1 - y) << SM_SHIFT) / (isc->sm1 - isc->sm2);
+ /*
+ * check if (x, y1) belongs to the 1st segment of rtsc.
+ * if so, add the offset.
+ */
+ if (rtsc->x + rtsc->dx > x)
+ dx += rtsc->x + rtsc->dx - x;
+ dy = seg_x2y(dx, isc->sm1);
+
+ rtsc->x = x;
+ rtsc->y = y;
+ rtsc->dx = dx;
+ rtsc->dy = dy;
+ return;
+}
+
+static void
+get_class_stats(struct hfsc_classstats *sp, struct hfsc_class *cl)
+{
+ sp->class_id = cl->cl_id;
+ sp->class_handle = cl->cl_handle;
+
+ if (cl->cl_rsc != NULL) {
+ sp->rsc.m1 = sm2m(cl->cl_rsc->sm1);
+ sp->rsc.d = dx2d(cl->cl_rsc->dx);
+ sp->rsc.m2 = sm2m(cl->cl_rsc->sm2);
+ } else {
+ sp->rsc.m1 = 0;
+ sp->rsc.d = 0;
+ sp->rsc.m2 = 0;
+ }
+ if (cl->cl_fsc != NULL) {
+ sp->fsc.m1 = sm2m(cl->cl_fsc->sm1);
+ sp->fsc.d = dx2d(cl->cl_fsc->dx);
+ sp->fsc.m2 = sm2m(cl->cl_fsc->sm2);
+ } else {
+ sp->fsc.m1 = 0;
+ sp->fsc.d = 0;
+ sp->fsc.m2 = 0;
+ }
+ if (cl->cl_usc != NULL) {
+ sp->usc.m1 = sm2m(cl->cl_usc->sm1);
+ sp->usc.d = dx2d(cl->cl_usc->dx);
+ sp->usc.m2 = sm2m(cl->cl_usc->sm2);
+ } else {
+ sp->usc.m1 = 0;
+ sp->usc.d = 0;
+ sp->usc.m2 = 0;
+ }
+
+ sp->total = cl->cl_total;
+ sp->cumul = cl->cl_cumul;
+
+ sp->d = cl->cl_d;
+ sp->e = cl->cl_e;
+ sp->vt = cl->cl_vt;
+ sp->f = cl->cl_f;
+
+ sp->initvt = cl->cl_initvt;
+ sp->vtperiod = cl->cl_vtperiod;
+ sp->parentperiod = cl->cl_parentperiod;
+ sp->nactive = cl->cl_nactive;
+ sp->vtoff = cl->cl_vtoff;
+ sp->cvtmax = cl->cl_cvtmax;
+ sp->myf = cl->cl_myf;
+ sp->cfmin = cl->cl_cfmin;
+ sp->cvtmin = cl->cl_cvtmin;
+ sp->myfadj = cl->cl_myfadj;
+ sp->vtadj = cl->cl_vtadj;
+
+ sp->cur_time = read_machclk();
+ sp->machclk_freq = machclk_freq;
+
+ sp->qlength = qlen(cl->cl_q);
+ sp->qlimit = qlimit(cl->cl_q);
+ sp->xmit_cnt = cl->cl_stats.xmit_cnt;
+ sp->drop_cnt = cl->cl_stats.drop_cnt;
+ sp->period = cl->cl_stats.period;
+
+ sp->qtype = qtype(cl->cl_q);
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ red_getstats(cl->cl_red, &sp->red[0]);
+#endif
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
+#endif
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->cl_q))
+ codel_getstats(cl->cl_codel, &sp->codel);
+#endif
+}
+
+/* convert a class handle to the corresponding class pointer */
+static struct hfsc_class *
+clh_to_clp(struct hfsc_if *hif, u_int32_t chandle)
+{
+ int i;
+ struct hfsc_class *cl;
+
+ if (chandle == 0)
+ return (NULL);
+ /*
+ * first, try optimistically the slot matching the lower bits of
+ * the handle. if it fails, do the linear table search.
+ */
+ i = chandle % HFSC_MAX_CLASSES;
+ if ((cl = hif->hif_class_tbl[i]) != NULL && cl->cl_handle == chandle)
+ return (cl);
+ for (i = 0; i < HFSC_MAX_CLASSES; i++)
+ if ((cl = hif->hif_class_tbl[i]) != NULL &&
+ cl->cl_handle == chandle)
+ return (cl);
+ return (NULL);
+}
+
+#ifdef ALTQ3_COMPAT
+static struct hfsc_if *
+hfsc_attach(ifq, bandwidth)
+ struct ifaltq *ifq;
+ u_int bandwidth;
+{
+ struct hfsc_if *hif;
+
+ hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_WAITOK);
+ if (hif == NULL)
+ return (NULL);
+ bzero(hif, sizeof(struct hfsc_if));
+
+ hif->hif_eligible = ellist_alloc();
+ if (hif->hif_eligible == NULL) {
+ free(hif, M_DEVBUF);
+ return NULL;
+ }
+
+ hif->hif_ifq = ifq;
+
+ /* add this state to the hfsc list */
+ hif->hif_next = hif_list;
+ hif_list = hif;
+
+ return (hif);
+}
+
+static int
+hfsc_detach(hif)
+ struct hfsc_if *hif;
+{
+ (void)hfsc_clear_interface(hif);
+ (void)hfsc_class_destroy(hif->hif_rootclass);
+
+ /* remove this interface from the hif list */
+ if (hif_list == hif)
+ hif_list = hif->hif_next;
+ else {
+ struct hfsc_if *h;
+
+ for (h = hif_list; h != NULL; h = h->hif_next)
+ if (h->hif_next == hif) {
+ h->hif_next = hif->hif_next;
+ break;
+ }
+ ASSERT(h != NULL);
+ }
+
+ ellist_destroy(hif->hif_eligible);
+
+ free(hif, M_DEVBUF);
+
+ return (0);
+}
+
+static int
+hfsc_class_modify(cl, rsc, fsc, usc)
+ struct hfsc_class *cl;
+ struct service_curve *rsc, *fsc, *usc;
+{
+ struct internal_sc *rsc_tmp, *fsc_tmp, *usc_tmp;
+ u_int64_t cur_time;
+ int s;
+
+ rsc_tmp = fsc_tmp = usc_tmp = NULL;
+ if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0) &&
+ cl->cl_rsc == NULL) {
+ rsc_tmp = malloc(sizeof(struct internal_sc),
+ M_DEVBUF, M_WAITOK);
+ if (rsc_tmp == NULL)
+ return (ENOMEM);
+ }
+ if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0) &&
+ cl->cl_fsc == NULL) {
+ fsc_tmp = malloc(sizeof(struct internal_sc),
+ M_DEVBUF, M_WAITOK);
+ if (fsc_tmp == NULL) {
+ free(rsc_tmp);
+ return (ENOMEM);
+ }
+ }
+ if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0) &&
+ cl->cl_usc == NULL) {
+ usc_tmp = malloc(sizeof(struct internal_sc),
+ M_DEVBUF, M_WAITOK);
+ if (usc_tmp == NULL) {
+ free(rsc_tmp);
+ free(fsc_tmp);
+ return (ENOMEM);
+ }
+ }
+
+ cur_time = read_machclk();
+ s = splnet();
+ IFQ_LOCK(cl->cl_hif->hif_ifq);
+
+ if (rsc != NULL) {
+ if (rsc->m1 == 0 && rsc->m2 == 0) {
+ if (cl->cl_rsc != NULL) {
+ if (!qempty(cl->cl_q))
+ hfsc_purgeq(cl);
+ free(cl->cl_rsc, M_DEVBUF);
+ cl->cl_rsc = NULL;
+ }
+ } else {
+ if (cl->cl_rsc == NULL)
+ cl->cl_rsc = rsc_tmp;
+ sc2isc(rsc, cl->cl_rsc);
+ rtsc_init(&cl->cl_deadline, cl->cl_rsc, cur_time,
+ cl->cl_cumul);
+ cl->cl_eligible = cl->cl_deadline;
+ if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) {
+ cl->cl_eligible.dx = 0;
+ cl->cl_eligible.dy = 0;
+ }
+ }
+ }
+
+ if (fsc != NULL) {
+ if (fsc->m1 == 0 && fsc->m2 == 0) {
+ if (cl->cl_fsc != NULL) {
+ if (!qempty(cl->cl_q))
+ hfsc_purgeq(cl);
+ free(cl->cl_fsc, M_DEVBUF);
+ cl->cl_fsc = NULL;
+ }
+ } else {
+ if (cl->cl_fsc == NULL)
+ cl->cl_fsc = fsc_tmp;
+ sc2isc(fsc, cl->cl_fsc);
+ rtsc_init(&cl->cl_virtual, cl->cl_fsc, cl->cl_vt,
+ cl->cl_total);
+ }
+ }
+
+ if (usc != NULL) {
+ if (usc->m1 == 0 && usc->m2 == 0) {
+ if (cl->cl_usc != NULL) {
+ free(cl->cl_usc, M_DEVBUF);
+ cl->cl_usc = NULL;
+ cl->cl_myf = 0;
+ }
+ } else {
+ if (cl->cl_usc == NULL)
+ cl->cl_usc = usc_tmp;
+ sc2isc(usc, cl->cl_usc);
+ rtsc_init(&cl->cl_ulimit, cl->cl_usc, cur_time,
+ cl->cl_total);
+ }
+ }
+
+ if (!qempty(cl->cl_q)) {
+ if (cl->cl_rsc != NULL)
+ update_ed(cl, m_pktlen(qhead(cl->cl_q)));
+ if (cl->cl_fsc != NULL)
+ update_vf(cl, 0, cur_time);
+ /* is this enough? */
+ }
+
+ IFQ_UNLOCK(cl->cl_hif->hif_ifq);
+ splx(s);
+
+ return (0);
+}
+
+/*
+ * hfsc device interface
+ */
+int
+hfscopen(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ if (machclk_freq == 0)
+ init_machclk();
+
+ if (machclk_freq == 0) {
+ printf("hfsc: no cpu clock available!\n");
+ return (ENXIO);
+ }
+
+ /* everything will be done when the queueing scheme is attached. */
+ return 0;
+}
+
+int
+hfscclose(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ struct hfsc_if *hif;
+ int err, error = 0;
+
+ while ((hif = hif_list) != NULL) {
+ /* destroy all */
+ if (ALTQ_IS_ENABLED(hif->hif_ifq))
+ altq_disable(hif->hif_ifq);
+
+ err = altq_detach(hif->hif_ifq);
+ if (err == 0)
+ err = hfsc_detach(hif);
+ if (err != 0 && error == 0)
+ error = err;
+ }
+
+ return error;
+}
+
+int
+hfscioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ ioctlcmd_t cmd;
+ caddr_t addr;
+ int flag;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ struct hfsc_if *hif;
+ struct hfsc_interface *ifacep;
+ int error = 0;
+
+ /* check super-user privilege */
+ switch (cmd) {
+ case HFSC_GETSTATS:
+ break;
+ default:
+#if (__FreeBSD_version > 700000)
+ if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
+ return (error);
+#elsif (__FreeBSD_version > 400000)
+ if ((error = suser(p)) != 0)
+ return (error);
+#else
+ if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+ return (error);
+#endif
+ break;
+ }
+
+ switch (cmd) {
+
+ case HFSC_IF_ATTACH:
+ error = hfsccmd_if_attach((struct hfsc_attach *)addr);
+ break;
+
+ case HFSC_IF_DETACH:
+ error = hfsccmd_if_detach((struct hfsc_interface *)addr);
+ break;
+
+ case HFSC_ENABLE:
+ case HFSC_DISABLE:
+ case HFSC_CLEAR_HIERARCHY:
+ ifacep = (struct hfsc_interface *)addr;
+ if ((hif = altq_lookup(ifacep->hfsc_ifname,
+ ALTQT_HFSC)) == NULL) {
+ error = EBADF;
+ break;
+ }
+
+ switch (cmd) {
+
+ case HFSC_ENABLE:
+ if (hif->hif_defaultclass == NULL) {
+#ifdef ALTQ_DEBUG
+ printf("hfsc: no default class\n");
+#endif
+ error = EINVAL;
+ break;
+ }
+ error = altq_enable(hif->hif_ifq);
+ break;
+
+ case HFSC_DISABLE:
+ error = altq_disable(hif->hif_ifq);
+ break;
+
+ case HFSC_CLEAR_HIERARCHY:
+ hfsc_clear_interface(hif);
+ break;
+ }
+ break;
+
+ case HFSC_ADD_CLASS:
+ error = hfsccmd_add_class((struct hfsc_add_class *)addr);
+ break;
+
+ case HFSC_DEL_CLASS:
+ error = hfsccmd_delete_class((struct hfsc_delete_class *)addr);
+ break;
+
+ case HFSC_MOD_CLASS:
+ error = hfsccmd_modify_class((struct hfsc_modify_class *)addr);
+ break;
+
+ case HFSC_ADD_FILTER:
+ error = hfsccmd_add_filter((struct hfsc_add_filter *)addr);
+ break;
+
+ case HFSC_DEL_FILTER:
+ error = hfsccmd_delete_filter((struct hfsc_delete_filter *)addr);
+ break;
+
+ case HFSC_GETSTATS:
+ error = hfsccmd_class_stats((struct hfsc_class_stats *)addr);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+ return error;
+}
+
+static int
+hfsccmd_if_attach(ap)
+ struct hfsc_attach *ap;
+{
+ struct hfsc_if *hif;
+ struct ifnet *ifp;
+ int error;
+
+ if ((ifp = ifunit(ap->iface.hfsc_ifname)) == NULL)
+ return (ENXIO);
+
+ if ((hif = hfsc_attach(&ifp->if_snd, ap->bandwidth)) == NULL)
+ return (ENOMEM);
+
+ /*
+ * set HFSC to this ifnet structure.
+ */
+ if ((error = altq_attach(&ifp->if_snd, ALTQT_HFSC, hif,
+ hfsc_enqueue, hfsc_dequeue, hfsc_request,
+ &hif->hif_classifier, acc_classify)) != 0)
+ (void)hfsc_detach(hif);
+
+ return (error);
+}
+
+static int
+hfsccmd_if_detach(ap)
+ struct hfsc_interface *ap;
+{
+ struct hfsc_if *hif;
+ int error;
+
+ if ((hif = altq_lookup(ap->hfsc_ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ if (ALTQ_IS_ENABLED(hif->hif_ifq))
+ altq_disable(hif->hif_ifq);
+
+ if ((error = altq_detach(hif->hif_ifq)))
+ return (error);
+
+ return hfsc_detach(hif);
+}
+
+static int
+hfsccmd_add_class(ap)
+ struct hfsc_add_class *ap;
+{
+ struct hfsc_if *hif;
+ struct hfsc_class *cl, *parent;
+ int i;
+
+ if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ if (ap->parent_handle == HFSC_NULLCLASS_HANDLE &&
+ hif->hif_rootclass == NULL)
+ parent = NULL;
+ else if ((parent = clh_to_clp(hif, ap->parent_handle)) == NULL)
+ return (EINVAL);
+
+ /* assign a class handle (use a free slot number for now) */
+ for (i = 1; i < HFSC_MAX_CLASSES; i++)
+ if (hif->hif_class_tbl[i] == NULL)
+ break;
+ if (i == HFSC_MAX_CLASSES)
+ return (EBUSY);
+
+ if ((cl = hfsc_class_create(hif, &ap->service_curve, NULL, NULL,
+ parent, ap->qlimit, ap->flags, i)) == NULL)
+ return (ENOMEM);
+
+ /* return a class handle to the user */
+ ap->class_handle = i;
+
+ return (0);
+}
+
+static int
+hfsccmd_delete_class(ap)
+ struct hfsc_delete_class *ap;
+{
+ struct hfsc_if *hif;
+ struct hfsc_class *cl;
+
+ if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL)
+ return (EINVAL);
+
+ return hfsc_class_destroy(cl);
+}
+
+static int
+hfsccmd_modify_class(ap)
+ struct hfsc_modify_class *ap;
+{
+ struct hfsc_if *hif;
+ struct hfsc_class *cl;
+ struct service_curve *rsc = NULL;
+ struct service_curve *fsc = NULL;
+ struct service_curve *usc = NULL;
+
+ if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL)
+ return (EINVAL);
+
+ if (ap->sctype & HFSC_REALTIMESC)
+ rsc = &ap->service_curve;
+ if (ap->sctype & HFSC_LINKSHARINGSC)
+ fsc = &ap->service_curve;
+ if (ap->sctype & HFSC_UPPERLIMITSC)
+ usc = &ap->service_curve;
+
+ return hfsc_class_modify(cl, rsc, fsc, usc);
+}
+
+static int
+hfsccmd_add_filter(ap)
+ struct hfsc_add_filter *ap;
+{
+ struct hfsc_if *hif;
+ struct hfsc_class *cl;
+
+ if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL)
+ return (EINVAL);
+
+ if (is_a_parent_class(cl)) {
+#ifdef ALTQ_DEBUG
+ printf("hfsccmd_add_filter: not a leaf class!\n");
+#endif
+ return (EINVAL);
+ }
+
+ return acc_add_filter(&hif->hif_classifier, &ap->filter,
+ cl, &ap->filter_handle);
+}
+
+static int
+hfsccmd_delete_filter(ap)
+ struct hfsc_delete_filter *ap;
+{
+ struct hfsc_if *hif;
+
+ if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ return acc_delete_filter(&hif->hif_classifier,
+ ap->filter_handle);
+}
+
+static int
+hfsccmd_class_stats(ap)
+ struct hfsc_class_stats *ap;
+{
+ struct hfsc_if *hif;
+ struct hfsc_class *cl;
+ struct hfsc_classstats stats, *usp;
+ int n, nclasses, error;
+
+ if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ ap->cur_time = read_machclk();
+ ap->machclk_freq = machclk_freq;
+ ap->hif_classes = hif->hif_classes;
+ ap->hif_packets = hif->hif_packets;
+
+ /* skip the first N classes in the tree */
+ nclasses = ap->nskip;
+ for (cl = hif->hif_rootclass, n = 0; cl != NULL && n < nclasses;
+ cl = hfsc_nextclass(cl), n++)
+ ;
+ if (n != nclasses)
+ return (EINVAL);
+
+ /* then, read the next N classes in the tree */
+ nclasses = ap->nclasses;
+ usp = ap->stats;
+ for (n = 0; cl != NULL && n < nclasses; cl = hfsc_nextclass(cl), n++) {
+
+ get_class_stats(&stats, cl);
+
+ if ((error = copyout((caddr_t)&stats, (caddr_t)usp++,
+ sizeof(stats))) != 0)
+ return (error);
+ }
+
+ ap->nclasses = n;
+
+ return (0);
+}
+
+#ifdef KLD_MODULE
+
+static struct altqsw hfsc_sw =
+ {"hfsc", hfscopen, hfscclose, hfscioctl};
+
+ALTQ_MODULE(altq_hfsc, ALTQT_HFSC, &hfsc_sw);
+MODULE_DEPEND(altq_hfsc, altq_red, 1, 1, 1);
+MODULE_DEPEND(altq_hfsc, altq_rio, 1, 1, 1);
+
+#endif /* KLD_MODULE */
+#endif /* ALTQ3_COMPAT */
+
+#endif /* ALTQ_HFSC */
diff --git a/freebsd/sys/net/altq/altq_hfsc.h b/freebsd/sys/net/altq/altq_hfsc.h
new file mode 100644
index 00000000..de5e89b8
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_hfsc.h
@@ -0,0 +1,319 @@
+/*-
+ * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation is hereby granted (including for commercial or
+ * for-profit use), provided that both the copyright notice and this
+ * permission notice appear in all copies of the software, derivative
+ * works, or modified versions, and any portions thereof.
+ *
+ * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
+ * WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON PROVIDES THIS
+ * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * Carnegie Mellon encourages (but does not require) users of this
+ * software to return any improvements or extensions that they make,
+ * and to grant Carnegie Mellon the rights to redistribute these
+ * changes without encumbrance.
+ *
+ * $KAME: altq_hfsc.h,v 1.12 2003/12/05 05:40:46 kjc Exp $
+ * $FreeBSD$
+ */
+#ifndef _ALTQ_ALTQ_HFSC_H_
+#define _ALTQ_ALTQ_HFSC_H_
+
+#include <net/altq/altq.h>
+#include <net/altq/altq_classq.h>
+#include <net/altq/altq_codel.h>
+#include <net/altq/altq_red.h>
+#include <net/altq/altq_rio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct service_curve {
+ u_int m1; /* slope of the first segment in bits/sec */
+ u_int d; /* the x-projection of the first segment in msec */
+ u_int m2; /* slope of the second segment in bits/sec */
+};
+
+/* special class handles */
+#define HFSC_NULLCLASS_HANDLE 0
+#define HFSC_MAX_CLASSES 64
+
+/* hfsc class flags */
+#define HFCF_RED 0x0001 /* use RED */
+#define HFCF_ECN 0x0002 /* use RED/ECN */
+#define HFCF_RIO 0x0004 /* use RIO */
+#define HFCF_CODEL 0x0008 /* use CoDel */
+#define HFCF_CLEARDSCP 0x0010 /* clear diffserv codepoint */
+#define HFCF_DEFAULTCLASS 0x1000 /* default class */
+
+/* service curve types */
+#define HFSC_REALTIMESC 1
+#define HFSC_LINKSHARINGSC 2
+#define HFSC_UPPERLIMITSC 4
+#define HFSC_DEFAULTSC (HFSC_REALTIMESC|HFSC_LINKSHARINGSC)
+
+struct hfsc_classstats {
+ u_int class_id;
+ u_int32_t class_handle;
+ struct service_curve rsc;
+ struct service_curve fsc;
+ struct service_curve usc; /* upper limit service curve */
+
+ u_int64_t total; /* total work in bytes */
+ u_int64_t cumul; /* cumulative work in bytes
+ done by real-time criteria */
+ u_int64_t d; /* deadline */
+ u_int64_t e; /* eligible time */
+ u_int64_t vt; /* virtual time */
+ u_int64_t f; /* fit time for upper-limit */
+
+ /* info helpful for debugging */
+ u_int64_t initvt; /* init virtual time */
+ u_int64_t vtoff; /* cl_vt_ipoff */
+ u_int64_t cvtmax; /* cl_maxvt */
+ u_int64_t myf; /* cl_myf */
+ u_int64_t cfmin; /* cl_mincf */
+ u_int64_t cvtmin; /* cl_mincvt */
+ u_int64_t myfadj; /* cl_myfadj */
+ u_int64_t vtadj; /* cl_vtadj */
+ u_int64_t cur_time;
+ u_int32_t machclk_freq;
+
+ u_int qlength;
+ u_int qlimit;
+ struct pktcntr xmit_cnt;
+ struct pktcntr drop_cnt;
+ u_int period;
+
+ u_int vtperiod; /* vt period sequence no */
+ u_int parentperiod; /* parent's vt period seqno */
+ int nactive; /* number of active children */
+
+ /* codel, red and rio related info */
+ int qtype;
+ struct redstats red[3];
+ struct codel_stats codel;
+};
+
+#ifdef ALTQ3_COMPAT
+struct hfsc_interface {
+ char hfsc_ifname[IFNAMSIZ]; /* interface name (e.g., fxp0) */
+};
+
+struct hfsc_attach {
+ struct hfsc_interface iface;
+ u_int bandwidth; /* link bandwidth in bits/sec */
+};
+
+struct hfsc_add_class {
+ struct hfsc_interface iface;
+ u_int32_t parent_handle;
+ struct service_curve service_curve;
+ int qlimit;
+ int flags;
+
+ u_int32_t class_handle; /* return value */
+};
+
+struct hfsc_delete_class {
+ struct hfsc_interface iface;
+ u_int32_t class_handle;
+};
+
+struct hfsc_modify_class {
+ struct hfsc_interface iface;
+ u_int32_t class_handle;
+ struct service_curve service_curve;
+ int sctype;
+};
+
+struct hfsc_add_filter {
+ struct hfsc_interface iface;
+ u_int32_t class_handle;
+ struct flow_filter filter;
+
+ u_long filter_handle; /* return value */
+};
+
+struct hfsc_delete_filter {
+ struct hfsc_interface iface;
+ u_long filter_handle;
+};
+
+struct hfsc_class_stats {
+ struct hfsc_interface iface;
+ int nskip; /* skip # of classes */
+ int nclasses; /* # of class stats (WR) */
+ u_int64_t cur_time; /* current time */
+ u_int32_t machclk_freq; /* machine clock frequency */
+ u_int hif_classes; /* # of classes in the tree */
+ u_int hif_packets; /* # of packets in the tree */
+ struct hfsc_classstats *stats; /* pointer to stats array */
+};
+
+#define HFSC_IF_ATTACH _IOW('Q', 1, struct hfsc_attach)
+#define HFSC_IF_DETACH _IOW('Q', 2, struct hfsc_interface)
+#define HFSC_ENABLE _IOW('Q', 3, struct hfsc_interface)
+#define HFSC_DISABLE _IOW('Q', 4, struct hfsc_interface)
+#define HFSC_CLEAR_HIERARCHY _IOW('Q', 5, struct hfsc_interface)
+#define HFSC_ADD_CLASS _IOWR('Q', 7, struct hfsc_add_class)
+#define HFSC_DEL_CLASS _IOW('Q', 8, struct hfsc_delete_class)
+#define HFSC_MOD_CLASS _IOW('Q', 9, struct hfsc_modify_class)
+#define HFSC_ADD_FILTER _IOWR('Q', 10, struct hfsc_add_filter)
+#define HFSC_DEL_FILTER _IOW('Q', 11, struct hfsc_delete_filter)
+#define HFSC_GETSTATS _IOWR('Q', 12, struct hfsc_class_stats)
+#endif /* ALTQ3_COMPAT */
+
+#ifdef _KERNEL
+/*
+ * kernel internal service curve representation
+ * coordinates are given by 64 bit unsigned integers.
+ * x-axis: unit is clock count. for the intel x86 architecture,
+ * the raw Pentium TSC (Timestamp Counter) value is used.
+ * virtual time is also calculated in this time scale.
+ * y-axis: unit is byte.
+ *
+ * the service curve parameters are converted to the internal
+ * representation.
+ * the slope values are scaled to avoid overflow.
+ * the inverse slope values as well as the y-projection of the 1st
+ * segment are kept in order to to avoid 64-bit divide operations
+ * that are expensive on 32-bit architectures.
+ *
+ * note: Intel Pentium TSC never wraps around in several thousands of years.
+ * x-axis doesn't wrap around for 1089 years with 1GHz clock.
+ * y-axis doesn't wrap around for 4358 years with 1Gbps bandwidth.
+ */
+
+/* kernel internal representation of a service curve */
+struct internal_sc {
+ u_int64_t sm1; /* scaled slope of the 1st segment */
+ u_int64_t ism1; /* scaled inverse-slope of the 1st segment */
+ u_int64_t dx; /* the x-projection of the 1st segment */
+ u_int64_t dy; /* the y-projection of the 1st segment */
+ u_int64_t sm2; /* scaled slope of the 2nd segment */
+ u_int64_t ism2; /* scaled inverse-slope of the 2nd segment */
+};
+
+/* runtime service curve */
+struct runtime_sc {
+ u_int64_t x; /* current starting position on x-axis */
+ u_int64_t y; /* current starting position on x-axis */
+ u_int64_t sm1; /* scaled slope of the 1st segment */
+ u_int64_t ism1; /* scaled inverse-slope of the 1st segment */
+ u_int64_t dx; /* the x-projection of the 1st segment */
+ u_int64_t dy; /* the y-projection of the 1st segment */
+ u_int64_t sm2; /* scaled slope of the 2nd segment */
+ u_int64_t ism2; /* scaled inverse-slope of the 2nd segment */
+};
+
+struct hfsc_class {
+ u_int cl_id; /* class id (just for debug) */
+ u_int32_t cl_handle; /* class handle */
+ struct hfsc_if *cl_hif; /* back pointer to struct hfsc_if */
+ int cl_flags; /* misc flags */
+
+ struct hfsc_class *cl_parent; /* parent class */
+ struct hfsc_class *cl_siblings; /* sibling classes */
+ struct hfsc_class *cl_children; /* child classes */
+
+ class_queue_t *cl_q; /* class queue structure */
+ union {
+ struct red *cl_red; /* RED state */
+ struct codel *cl_codel; /* CoDel state */
+ } cl_aqm;
+#define cl_red cl_aqm.cl_red
+#define cl_codel cl_aqm.cl_codel
+ struct altq_pktattr *cl_pktattr; /* saved header used by ECN */
+
+ u_int64_t cl_total; /* total work in bytes */
+ u_int64_t cl_cumul; /* cumulative work in bytes
+ done by real-time criteria */
+ u_int64_t cl_d; /* deadline */
+ u_int64_t cl_e; /* eligible time */
+ u_int64_t cl_vt; /* virtual time */
+ u_int64_t cl_f; /* time when this class will fit for
+ link-sharing, max(myf, cfmin) */
+ u_int64_t cl_myf; /* my fit-time (as calculated from this
+ class's own upperlimit curve) */
+ u_int64_t cl_myfadj; /* my fit-time adjustment
+ (to cancel history dependence) */
+ u_int64_t cl_cfmin; /* earliest children's fit-time (used
+ with cl_myf to obtain cl_f) */
+ u_int64_t cl_cvtmin; /* minimal virtual time among the
+ children fit for link-sharing
+ (monotonic within a period) */
+ u_int64_t cl_vtadj; /* intra-period cumulative vt
+ adjustment */
+ u_int64_t cl_vtoff; /* inter-period cumulative vt offset */
+ u_int64_t cl_cvtmax; /* max child's vt in the last period */
+
+ u_int64_t cl_initvt; /* init virtual time (for debugging) */
+
+ struct internal_sc *cl_rsc; /* internal real-time service curve */
+ struct internal_sc *cl_fsc; /* internal fair service curve */
+ struct internal_sc *cl_usc; /* internal upperlimit service curve */
+ struct runtime_sc cl_deadline; /* deadline curve */
+ struct runtime_sc cl_eligible; /* eligible curve */
+ struct runtime_sc cl_virtual; /* virtual curve */
+ struct runtime_sc cl_ulimit; /* upperlimit curve */
+
+ u_int cl_vtperiod; /* vt period sequence no */
+ u_int cl_parentperiod; /* parent's vt period seqno */
+ int cl_nactive; /* number of active children */
+
+ TAILQ_HEAD(acthead, hfsc_class) cl_actc; /* active children list */
+ TAILQ_ENTRY(hfsc_class) cl_actlist; /* active children list entry */
+ TAILQ_ENTRY(hfsc_class) cl_ellist; /* eligible list entry */
+
+ struct {
+ struct pktcntr xmit_cnt;
+ struct pktcntr drop_cnt;
+ u_int period;
+ } cl_stats;
+};
+
+/*
+ * hfsc interface state
+ */
+struct hfsc_if {
+ struct hfsc_if *hif_next; /* interface state list */
+ struct ifaltq *hif_ifq; /* backpointer to ifaltq */
+ struct hfsc_class *hif_rootclass; /* root class */
+ struct hfsc_class *hif_defaultclass; /* default class */
+ struct hfsc_class *hif_class_tbl[HFSC_MAX_CLASSES];
+ struct hfsc_class *hif_pollcache; /* cache for poll operation */
+
+ u_int hif_classes; /* # of classes in the tree */
+ u_int hif_packets; /* # of packets in the tree */
+ u_int hif_classid; /* class id sequence number */
+
+ TAILQ_HEAD(elighead, hfsc_class) hif_eligible; /* eligible list */
+
+#ifdef ALTQ3_CLFIER_COMPAT
+ struct acc_classifier hif_classifier;
+#endif
+};
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ALTQ_ALTQ_HFSC_H_ */
diff --git a/freebsd/sys/net/altq/altq_priq.c b/freebsd/sys/net/altq/altq_priq.c
new file mode 100644
index 00000000..d257ae3c
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_priq.c
@@ -0,0 +1,1072 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (C) 2000-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq_priq.c,v 1.11 2003/09/17 14:23:25 kjc Exp $
+ * $FreeBSD$
+ */
+/*
+ * priority queue
+ */
+
+#include <rtems/bsd/local/opt_altq.h>
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+
+#ifdef ALTQ_PRIQ /* priq is enabled by ALTQ_PRIQ option in opt_altq.h */
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <rtems/bsd/sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <netinet/in.h>
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <netpfil/pf/pf_mtag.h>
+#include <net/altq/altq.h>
+#ifdef ALTQ3_COMPAT
+#include <net/altq/altq_conf.h>
+#endif
+#include <net/altq/altq_priq.h>
+
+/*
+ * function prototypes
+ */
+#ifdef ALTQ3_COMPAT
+static struct priq_if *priq_attach(struct ifaltq *, u_int);
+static int priq_detach(struct priq_if *);
+#endif
+static int priq_clear_interface(struct priq_if *);
+static int priq_request(struct ifaltq *, int, void *);
+static void priq_purge(struct priq_if *);
+static struct priq_class *priq_class_create(struct priq_if *, int, int, int,
+ int);
+static int priq_class_destroy(struct priq_class *);
+static int priq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+static struct mbuf *priq_dequeue(struct ifaltq *, int);
+
+static int priq_addq(struct priq_class *, struct mbuf *);
+static struct mbuf *priq_getq(struct priq_class *);
+static struct mbuf *priq_pollq(struct priq_class *);
+static void priq_purgeq(struct priq_class *);
+
+#ifdef ALTQ3_COMPAT
+static int priqcmd_if_attach(struct priq_interface *);
+static int priqcmd_if_detach(struct priq_interface *);
+static int priqcmd_add_class(struct priq_add_class *);
+static int priqcmd_delete_class(struct priq_delete_class *);
+static int priqcmd_modify_class(struct priq_modify_class *);
+static int priqcmd_add_filter(struct priq_add_filter *);
+static int priqcmd_delete_filter(struct priq_delete_filter *);
+static int priqcmd_class_stats(struct priq_class_stats *);
+#endif /* ALTQ3_COMPAT */
+
+static void get_class_stats(struct priq_classstats *, struct priq_class *);
+static struct priq_class *clh_to_clp(struct priq_if *, u_int32_t);
+
+#ifdef ALTQ3_COMPAT
+altqdev_decl(priq);
+
+/* pif_list keeps all priq_if's allocated. */
+static struct priq_if *pif_list = NULL;
+#endif /* ALTQ3_COMPAT */
+
+int
+priq_pfattach(struct pf_altq *a)
+{
+ struct ifnet *ifp;
+ int s, error;
+
+ if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
+ return (EINVAL);
+ s = splnet();
+ error = altq_attach(&ifp->if_snd, ALTQT_PRIQ, a->altq_disc,
+ priq_enqueue, priq_dequeue, priq_request, NULL, NULL);
+ splx(s);
+ return (error);
+}
+
+int
+priq_add_altq(struct pf_altq *a)
+{
+ struct priq_if *pif;
+ struct ifnet *ifp;
+
+ if ((ifp = ifunit(a->ifname)) == NULL)
+ return (EINVAL);
+ if (!ALTQ_IS_READY(&ifp->if_snd))
+ return (ENODEV);
+
+ pif = malloc(sizeof(struct priq_if), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (pif == NULL)
+ return (ENOMEM);
+ pif->pif_bandwidth = a->ifbandwidth;
+ pif->pif_maxpri = -1;
+ pif->pif_ifq = &ifp->if_snd;
+
+ /* keep the state in pf_altq */
+ a->altq_disc = pif;
+
+ return (0);
+}
+
+int
+priq_remove_altq(struct pf_altq *a)
+{
+ struct priq_if *pif;
+
+ if ((pif = a->altq_disc) == NULL)
+ return (EINVAL);
+ a->altq_disc = NULL;
+
+ (void)priq_clear_interface(pif);
+
+ free(pif, M_DEVBUF);
+ return (0);
+}
+
+int
+priq_add_queue(struct pf_altq *a)
+{
+ struct priq_if *pif;
+ struct priq_class *cl;
+
+ if ((pif = a->altq_disc) == NULL)
+ return (EINVAL);
+
+ /* check parameters */
+ if (a->priority >= PRIQ_MAXPRI)
+ return (EINVAL);
+ if (a->qid == 0)
+ return (EINVAL);
+ if (pif->pif_classes[a->priority] != NULL)
+ return (EBUSY);
+ if (clh_to_clp(pif, a->qid) != NULL)
+ return (EBUSY);
+
+ cl = priq_class_create(pif, a->priority, a->qlimit,
+ a->pq_u.priq_opts.flags, a->qid);
+ if (cl == NULL)
+ return (ENOMEM);
+
+ return (0);
+}
+
+int
+priq_remove_queue(struct pf_altq *a)
+{
+ struct priq_if *pif;
+ struct priq_class *cl;
+
+ if ((pif = a->altq_disc) == NULL)
+ return (EINVAL);
+
+ if ((cl = clh_to_clp(pif, a->qid)) == NULL)
+ return (EINVAL);
+
+ return (priq_class_destroy(cl));
+}
+
+int
+priq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+ struct priq_if *pif;
+ struct priq_class *cl;
+ struct priq_classstats stats;
+ int error = 0;
+
+ if ((pif = altq_lookup(a->ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(pif, a->qid)) == NULL)
+ return (EINVAL);
+
+ if (*nbytes < sizeof(stats))
+ return (EINVAL);
+
+ get_class_stats(&stats, cl);
+
+ if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
+ return (error);
+ *nbytes = sizeof(stats);
+ return (0);
+}
+
+/*
+ * bring the interface back to the initial state by discarding
+ * all the filters and classes.
+ */
+static int
+priq_clear_interface(struct priq_if *pif)
+{
+ struct priq_class *cl;
+ int pri;
+
+#ifdef ALTQ3_CLFIER_COMPAT
+ /* free the filters for this interface */
+ acc_discard_filters(&pif->pif_classifier, NULL, 1);
+#endif
+
+ /* clear out the classes */
+ for (pri = 0; pri <= pif->pif_maxpri; pri++)
+ if ((cl = pif->pif_classes[pri]) != NULL)
+ priq_class_destroy(cl);
+
+ return (0);
+}
+
+static int
+priq_request(struct ifaltq *ifq, int req, void *arg)
+{
+ struct priq_if *pif = (struct priq_if *)ifq->altq_disc;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ switch (req) {
+ case ALTRQ_PURGE:
+ priq_purge(pif);
+ break;
+ }
+ return (0);
+}
+
+/* discard all the queued packets on the interface */
+static void
+priq_purge(struct priq_if *pif)
+{
+ struct priq_class *cl;
+ int pri;
+
+ for (pri = 0; pri <= pif->pif_maxpri; pri++) {
+ if ((cl = pif->pif_classes[pri]) != NULL && !qempty(cl->cl_q))
+ priq_purgeq(cl);
+ }
+ if (ALTQ_IS_ENABLED(pif->pif_ifq))
+ pif->pif_ifq->ifq_len = 0;
+}
+
+static struct priq_class *
+priq_class_create(struct priq_if *pif, int pri, int qlimit, int flags, int qid)
+{
+ struct priq_class *cl;
+ int s;
+
+#ifndef ALTQ_RED
+ if (flags & PRCF_RED) {
+#ifdef ALTQ_DEBUG
+ printf("priq_class_create: RED not configured for PRIQ!\n");
+#endif
+ return (NULL);
+ }
+#endif
+#ifndef ALTQ_CODEL
+ if (flags & PRCF_CODEL) {
+#ifdef ALTQ_DEBUG
+ printf("priq_class_create: CODEL not configured for PRIQ!\n");
+#endif
+ return (NULL);
+ }
+#endif
+
+ if ((cl = pif->pif_classes[pri]) != NULL) {
+ /* modify the class instead of creating a new one */
+ s = splnet();
+ IFQ_LOCK(cl->cl_pif->pif_ifq);
+ if (!qempty(cl->cl_q))
+ priq_purgeq(cl);
+ IFQ_UNLOCK(cl->cl_pif->pif_ifq);
+ splx(s);
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ red_destroy(cl->cl_red);
+#endif
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->cl_q))
+ codel_destroy(cl->cl_codel);
+#endif
+ } else {
+ cl = malloc(sizeof(struct priq_class), M_DEVBUF,
+ M_NOWAIT | M_ZERO);
+ if (cl == NULL)
+ return (NULL);
+
+ cl->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF,
+ M_NOWAIT | M_ZERO);
+ if (cl->cl_q == NULL)
+ goto err_ret;
+ }
+
+ pif->pif_classes[pri] = cl;
+ if (flags & PRCF_DEFAULTCLASS)
+ pif->pif_default = cl;
+ if (qlimit == 0)
+ qlimit = 50; /* use default */
+ qlimit(cl->cl_q) = qlimit;
+ qtype(cl->cl_q) = Q_DROPTAIL;
+ qlen(cl->cl_q) = 0;
+ qsize(cl->cl_q) = 0;
+ cl->cl_flags = flags;
+ cl->cl_pri = pri;
+ if (pri > pif->pif_maxpri)
+ pif->pif_maxpri = pri;
+ cl->cl_pif = pif;
+ cl->cl_handle = qid;
+
+#ifdef ALTQ_RED
+ if (flags & (PRCF_RED|PRCF_RIO)) {
+ int red_flags, red_pkttime;
+
+ red_flags = 0;
+ if (flags & PRCF_ECN)
+ red_flags |= REDF_ECN;
+#ifdef ALTQ_RIO
+ if (flags & PRCF_CLEARDSCP)
+ red_flags |= RIOF_CLEARDSCP;
+#endif
+ if (pif->pif_bandwidth < 8)
+ red_pkttime = 1000 * 1000 * 1000; /* 1 sec */
+ else
+ red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu
+ * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8);
+#ifdef ALTQ_RIO
+ if (flags & PRCF_RIO) {
+ cl->cl_red = (red_t *)rio_alloc(0, NULL,
+ red_flags, red_pkttime);
+ if (cl->cl_red == NULL)
+ goto err_ret;
+ qtype(cl->cl_q) = Q_RIO;
+ } else
+#endif
+ if (flags & PRCF_RED) {
+ cl->cl_red = red_alloc(0, 0,
+ qlimit(cl->cl_q) * 10/100,
+ qlimit(cl->cl_q) * 30/100,
+ red_flags, red_pkttime);
+ if (cl->cl_red == NULL)
+ goto err_ret;
+ qtype(cl->cl_q) = Q_RED;
+ }
+ }
+#endif /* ALTQ_RED */
+#ifdef ALTQ_CODEL
+ if (flags & PRCF_CODEL) {
+ cl->cl_codel = codel_alloc(5, 100, 0);
+ if (cl->cl_codel != NULL)
+ qtype(cl->cl_q) = Q_CODEL;
+ }
+#endif
+
+ return (cl);
+
+ err_ret:
+ if (cl->cl_red != NULL) {
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ red_destroy(cl->cl_red);
+#endif
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->cl_q))
+ codel_destroy(cl->cl_codel);
+#endif
+ }
+ if (cl->cl_q != NULL)
+ free(cl->cl_q, M_DEVBUF);
+ free(cl, M_DEVBUF);
+ return (NULL);
+}
+
+static int
+priq_class_destroy(struct priq_class *cl)
+{
+ struct priq_if *pif;
+ int s, pri;
+
+ s = splnet();
+ IFQ_LOCK(cl->cl_pif->pif_ifq);
+
+#ifdef ALTQ3_CLFIER_COMPAT
+ /* delete filters referencing to this class */
+ acc_discard_filters(&cl->cl_pif->pif_classifier, cl, 0);
+#endif
+
+ if (!qempty(cl->cl_q))
+ priq_purgeq(cl);
+
+ pif = cl->cl_pif;
+ pif->pif_classes[cl->cl_pri] = NULL;
+ if (pif->pif_maxpri == cl->cl_pri) {
+ for (pri = cl->cl_pri; pri >= 0; pri--)
+ if (pif->pif_classes[pri] != NULL) {
+ pif->pif_maxpri = pri;
+ break;
+ }
+ if (pri < 0)
+ pif->pif_maxpri = -1;
+ }
+ IFQ_UNLOCK(cl->cl_pif->pif_ifq);
+ splx(s);
+
+ if (cl->cl_red != NULL) {
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ red_destroy(cl->cl_red);
+#endif
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->cl_q))
+ codel_destroy(cl->cl_codel);
+#endif
+ }
+ free(cl->cl_q, M_DEVBUF);
+ free(cl, M_DEVBUF);
+ return (0);
+}
+
+/*
+ * priq_enqueue is an enqueue function to be registered to
+ * (*altq_enqueue) in struct ifaltq.
+ */
+static int
+priq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
+{
+ struct priq_if *pif = (struct priq_if *)ifq->altq_disc;
+ struct priq_class *cl;
+ struct pf_mtag *t;
+ int len;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ /* grab class set by classifier */
+ if ((m->m_flags & M_PKTHDR) == 0) {
+ /* should not happen */
+ printf("altq: packet for %s does not have pkthdr\n",
+ ifq->altq_ifp->if_xname);
+ m_freem(m);
+ return (ENOBUFS);
+ }
+ cl = NULL;
+ if ((t = pf_find_mtag(m)) != NULL)
+ cl = clh_to_clp(pif, t->qid);
+#ifdef ALTQ3_COMPAT
+ else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL)
+ cl = pktattr->pattr_class;
+#endif
+ if (cl == NULL) {
+ cl = pif->pif_default;
+ if (cl == NULL) {
+ m_freem(m);
+ return (ENOBUFS);
+ }
+ }
+#ifdef ALTQ3_COMPAT
+ if (pktattr != NULL)
+ cl->cl_pktattr = pktattr; /* save proto hdr used by ECN */
+ else
+#endif
+ cl->cl_pktattr = NULL;
+ len = m_pktlen(m);
+ if (priq_addq(cl, m) != 0) {
+ /* drop occurred. mbuf was freed in priq_addq. */
+ PKTCNTR_ADD(&cl->cl_dropcnt, len);
+ return (ENOBUFS);
+ }
+ IFQ_INC_LEN(ifq);
+
+ /* successfully queued. */
+ return (0);
+}
+
+/*
+ * priq_dequeue is a dequeue function to be registered to
+ * (*altq_dequeue) in struct ifaltq.
+ *
+ * note: ALTDQ_POLL returns the next packet without removing the packet
+ * from the queue. ALTDQ_REMOVE is a normal dequeue operation.
+ * ALTDQ_REMOVE must return the same packet if called immediately
+ * after ALTDQ_POLL.
+ */
+static struct mbuf *
+priq_dequeue(struct ifaltq *ifq, int op)
+{
+ struct priq_if *pif = (struct priq_if *)ifq->altq_disc;
+ struct priq_class *cl;
+ struct mbuf *m;
+ int pri;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ if (IFQ_IS_EMPTY(ifq))
+ /* no packet in the queue */
+ return (NULL);
+
+ for (pri = pif->pif_maxpri; pri >= 0; pri--) {
+ if ((cl = pif->pif_classes[pri]) != NULL &&
+ !qempty(cl->cl_q)) {
+ if (op == ALTDQ_POLL)
+ return (priq_pollq(cl));
+
+ m = priq_getq(cl);
+ if (m != NULL) {
+ IFQ_DEC_LEN(ifq);
+ if (qempty(cl->cl_q))
+ cl->cl_period++;
+ PKTCNTR_ADD(&cl->cl_xmitcnt, m_pktlen(m));
+ }
+ return (m);
+ }
+ }
+ return (NULL);
+}
+
+static int
+priq_addq(struct priq_class *cl, struct mbuf *m)
+{
+
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ return rio_addq((rio_t *)cl->cl_red, cl->cl_q, m,
+ cl->cl_pktattr);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr);
+#endif
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->cl_q))
+ return codel_addq(cl->cl_codel, cl->cl_q, m);
+#endif
+ if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) {
+ m_freem(m);
+ return (-1);
+ }
+
+ if (cl->cl_flags & PRCF_CLEARDSCP)
+ write_dsfield(m, cl->cl_pktattr, 0);
+
+ _addq(cl->cl_q, m);
+
+ return (0);
+}
+
+static struct mbuf *
+priq_getq(struct priq_class *cl)
+{
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ return rio_getq((rio_t *)cl->cl_red, cl->cl_q);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ return red_getq(cl->cl_red, cl->cl_q);
+#endif
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->cl_q))
+ return codel_getq(cl->cl_codel, cl->cl_q);
+#endif
+ return _getq(cl->cl_q);
+}
+
+static struct mbuf *
+priq_pollq(cl)
+ struct priq_class *cl;
+{
+ return qhead(cl->cl_q);
+}
+
+static void
+priq_purgeq(struct priq_class *cl)
+{
+ struct mbuf *m;
+
+ if (qempty(cl->cl_q))
+ return;
+
+ while ((m = _getq(cl->cl_q)) != NULL) {
+ PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m));
+ m_freem(m);
+ }
+ ASSERT(qlen(cl->cl_q) == 0);
+}
+
+static void
+get_class_stats(struct priq_classstats *sp, struct priq_class *cl)
+{
+ sp->class_handle = cl->cl_handle;
+ sp->qlength = qlen(cl->cl_q);
+ sp->qlimit = qlimit(cl->cl_q);
+ sp->period = cl->cl_period;
+ sp->xmitcnt = cl->cl_xmitcnt;
+ sp->dropcnt = cl->cl_dropcnt;
+
+ sp->qtype = qtype(cl->cl_q);
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ red_getstats(cl->cl_red, &sp->red[0]);
+#endif
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
+#endif
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->cl_q))
+ codel_getstats(cl->cl_codel, &sp->codel);
+#endif
+}
+
+/* convert a class handle to the corresponding class pointer */
+static struct priq_class *
+clh_to_clp(struct priq_if *pif, u_int32_t chandle)
+{
+ struct priq_class *cl;
+ int idx;
+
+ if (chandle == 0)
+ return (NULL);
+
+ for (idx = pif->pif_maxpri; idx >= 0; idx--)
+ if ((cl = pif->pif_classes[idx]) != NULL &&
+ cl->cl_handle == chandle)
+ return (cl);
+
+ return (NULL);
+}
+
+
+#ifdef ALTQ3_COMPAT
+
+static struct priq_if *
+priq_attach(ifq, bandwidth)
+ struct ifaltq *ifq;
+ u_int bandwidth;
+{
+ struct priq_if *pif;
+
+ pif = malloc(sizeof(struct priq_if),
+ M_DEVBUF, M_WAITOK);
+ if (pif == NULL)
+ return (NULL);
+ bzero(pif, sizeof(struct priq_if));
+ pif->pif_bandwidth = bandwidth;
+ pif->pif_maxpri = -1;
+ pif->pif_ifq = ifq;
+
+ /* add this state to the priq list */
+ pif->pif_next = pif_list;
+ pif_list = pif;
+
+ return (pif);
+}
+
+static int
+priq_detach(pif)
+ struct priq_if *pif;
+{
+ (void)priq_clear_interface(pif);
+
+ /* remove this interface from the pif list */
+ if (pif_list == pif)
+ pif_list = pif->pif_next;
+ else {
+ struct priq_if *p;
+
+ for (p = pif_list; p != NULL; p = p->pif_next)
+ if (p->pif_next == pif) {
+ p->pif_next = pif->pif_next;
+ break;
+ }
+ ASSERT(p != NULL);
+ }
+
+ free(pif, M_DEVBUF);
+ return (0);
+}
+
+/*
+ * priq device interface
+ */
+int
+priqopen(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ /* everything will be done when the queueing scheme is attached. */
+ return 0;
+}
+
+int
+priqclose(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ struct priq_if *pif;
+ int err, error = 0;
+
+ while ((pif = pif_list) != NULL) {
+ /* destroy all */
+ if (ALTQ_IS_ENABLED(pif->pif_ifq))
+ altq_disable(pif->pif_ifq);
+
+ err = altq_detach(pif->pif_ifq);
+ if (err == 0)
+ err = priq_detach(pif);
+ if (err != 0 && error == 0)
+ error = err;
+ }
+
+ return error;
+}
+
+int
+priqioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ ioctlcmd_t cmd;
+ caddr_t addr;
+ int flag;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ struct priq_if *pif;
+ struct priq_interface *ifacep;
+ int error = 0;
+
+ /* check super-user privilege */
+ switch (cmd) {
+ case PRIQ_GETSTATS:
+ break;
+ default:
+#if (__FreeBSD_version > 700000)
+ if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
+ return (error);
+#elsif (__FreeBSD_version > 400000)
+ if ((error = suser(p)) != 0)
+ return (error);
+#else
+ if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+ return (error);
+#endif
+ break;
+ }
+
+ switch (cmd) {
+
+ case PRIQ_IF_ATTACH:
+ error = priqcmd_if_attach((struct priq_interface *)addr);
+ break;
+
+ case PRIQ_IF_DETACH:
+ error = priqcmd_if_detach((struct priq_interface *)addr);
+ break;
+
+ case PRIQ_ENABLE:
+ case PRIQ_DISABLE:
+ case PRIQ_CLEAR:
+ ifacep = (struct priq_interface *)addr;
+ if ((pif = altq_lookup(ifacep->ifname,
+ ALTQT_PRIQ)) == NULL) {
+ error = EBADF;
+ break;
+ }
+
+ switch (cmd) {
+ case PRIQ_ENABLE:
+ if (pif->pif_default == NULL) {
+#ifdef ALTQ_DEBUG
+ printf("priq: no default class\n");
+#endif
+ error = EINVAL;
+ break;
+ }
+ error = altq_enable(pif->pif_ifq);
+ break;
+
+ case PRIQ_DISABLE:
+ error = altq_disable(pif->pif_ifq);
+ break;
+
+ case PRIQ_CLEAR:
+ priq_clear_interface(pif);
+ break;
+ }
+ break;
+
+ case PRIQ_ADD_CLASS:
+ error = priqcmd_add_class((struct priq_add_class *)addr);
+ break;
+
+ case PRIQ_DEL_CLASS:
+ error = priqcmd_delete_class((struct priq_delete_class *)addr);
+ break;
+
+ case PRIQ_MOD_CLASS:
+ error = priqcmd_modify_class((struct priq_modify_class *)addr);
+ break;
+
+ case PRIQ_ADD_FILTER:
+ error = priqcmd_add_filter((struct priq_add_filter *)addr);
+ break;
+
+ case PRIQ_DEL_FILTER:
+ error = priqcmd_delete_filter((struct priq_delete_filter *)addr);
+ break;
+
+ case PRIQ_GETSTATS:
+ error = priqcmd_class_stats((struct priq_class_stats *)addr);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+ return error;
+}
+
+static int
+priqcmd_if_attach(ap)
+ struct priq_interface *ap;
+{
+ struct priq_if *pif;
+ struct ifnet *ifp;
+ int error;
+
+ if ((ifp = ifunit(ap->ifname)) == NULL)
+ return (ENXIO);
+
+ if ((pif = priq_attach(&ifp->if_snd, ap->arg)) == NULL)
+ return (ENOMEM);
+
+ /*
+ * set PRIQ to this ifnet structure.
+ */
+ if ((error = altq_attach(&ifp->if_snd, ALTQT_PRIQ, pif,
+ priq_enqueue, priq_dequeue, priq_request,
+ &pif->pif_classifier, acc_classify)) != 0)
+ (void)priq_detach(pif);
+
+ return (error);
+}
+
+static int
+priqcmd_if_detach(ap)
+ struct priq_interface *ap;
+{
+ struct priq_if *pif;
+ int error;
+
+ if ((pif = altq_lookup(ap->ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ if (ALTQ_IS_ENABLED(pif->pif_ifq))
+ altq_disable(pif->pif_ifq);
+
+ if ((error = altq_detach(pif->pif_ifq)))
+ return (error);
+
+ return priq_detach(pif);
+}
+
+static int
+priqcmd_add_class(ap)
+ struct priq_add_class *ap;
+{
+ struct priq_if *pif;
+ struct priq_class *cl;
+ int qid;
+
+ if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ if (ap->pri < 0 || ap->pri >= PRIQ_MAXPRI)
+ return (EINVAL);
+ if (pif->pif_classes[ap->pri] != NULL)
+ return (EBUSY);
+
+ qid = ap->pri + 1;
+ if ((cl = priq_class_create(pif, ap->pri,
+ ap->qlimit, ap->flags, qid)) == NULL)
+ return (ENOMEM);
+
+ /* return a class handle to the user */
+ ap->class_handle = cl->cl_handle;
+
+ return (0);
+}
+
+static int
+priqcmd_delete_class(ap)
+ struct priq_delete_class *ap;
+{
+ struct priq_if *pif;
+ struct priq_class *cl;
+
+ if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL)
+ return (EINVAL);
+
+ return priq_class_destroy(cl);
+}
+
+static int
+priqcmd_modify_class(ap)
+ struct priq_modify_class *ap;
+{
+ struct priq_if *pif;
+ struct priq_class *cl;
+
+ if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ if (ap->pri < 0 || ap->pri >= PRIQ_MAXPRI)
+ return (EINVAL);
+
+ if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL)
+ return (EINVAL);
+
+ /*
+ * if priority is changed, move the class to the new priority
+ */
+ if (pif->pif_classes[ap->pri] != cl) {
+ if (pif->pif_classes[ap->pri] != NULL)
+ return (EEXIST);
+ pif->pif_classes[cl->cl_pri] = NULL;
+ pif->pif_classes[ap->pri] = cl;
+ cl->cl_pri = ap->pri;
+ }
+
+ /* call priq_class_create to change class parameters */
+ if ((cl = priq_class_create(pif, ap->pri,
+ ap->qlimit, ap->flags, ap->class_handle)) == NULL)
+ return (ENOMEM);
+ return 0;
+}
+
+static int
+priqcmd_add_filter(ap)
+ struct priq_add_filter *ap;
+{
+ struct priq_if *pif;
+ struct priq_class *cl;
+
+ if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL)
+ return (EINVAL);
+
+ return acc_add_filter(&pif->pif_classifier, &ap->filter,
+ cl, &ap->filter_handle);
+}
+
+static int
+priqcmd_delete_filter(ap)
+ struct priq_delete_filter *ap;
+{
+ struct priq_if *pif;
+
+ if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ return acc_delete_filter(&pif->pif_classifier,
+ ap->filter_handle);
+}
+
+static int
+priqcmd_class_stats(ap)
+ struct priq_class_stats *ap;
+{
+ struct priq_if *pif;
+ struct priq_class *cl;
+ struct priq_classstats stats, *usp;
+ int pri, error;
+
+ if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ ap->maxpri = pif->pif_maxpri;
+
+ /* then, read the next N classes in the tree */
+ usp = ap->stats;
+ for (pri = 0; pri <= pif->pif_maxpri; pri++) {
+ cl = pif->pif_classes[pri];
+ if (cl != NULL)
+ get_class_stats(&stats, cl);
+ else
+ bzero(&stats, sizeof(stats));
+ if ((error = copyout((caddr_t)&stats, (caddr_t)usp++,
+ sizeof(stats))) != 0)
+ return (error);
+ }
+ return (0);
+}
+
+#ifdef KLD_MODULE
+
+static struct altqsw priq_sw =
+ {"priq", priqopen, priqclose, priqioctl};
+
+ALTQ_MODULE(altq_priq, ALTQT_PRIQ, &priq_sw);
+MODULE_DEPEND(altq_priq, altq_red, 1, 1, 1);
+MODULE_DEPEND(altq_priq, altq_rio, 1, 1, 1);
+
+#endif /* KLD_MODULE */
+
+#endif /* ALTQ3_COMPAT */
+#endif /* ALTQ_PRIQ */
diff --git a/freebsd/sys/net/altq/altq_priq.h b/freebsd/sys/net/altq/altq_priq.h
new file mode 100644
index 00000000..fcbfee98
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_priq.h
@@ -0,0 +1,180 @@
+/*-
+ * Copyright (C) 2000-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq_priq.h,v 1.7 2003/10/03 05:05:15 kjc Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _ALTQ_ALTQ_PRIQ_H_
+#define _ALTQ_ALTQ_PRIQ_H_
+
+#include <net/altq/altq.h>
+#include <net/altq/altq_classq.h>
+#include <net/altq/altq_codel.h>
+#include <net/altq/altq_red.h>
+#include <net/altq/altq_rio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define PRIQ_MAXPRI 16 /* upper limit of the number of priorities */
+
+#ifdef ALTQ3_COMPAT
+struct priq_interface {
+ char ifname[IFNAMSIZ]; /* interface name (e.g., fxp0) */
+ u_long arg; /* request-specific argument */
+};
+
+struct priq_add_class {
+ struct priq_interface iface;
+ int pri; /* priority (0 is the lowest) */
+ int qlimit; /* queue size limit */
+ int flags; /* misc flags (see below) */
+
+ u_int32_t class_handle; /* return value */
+};
+#endif /* ALTQ3_COMPAT */
+
+/* priq class flags */
+#define PRCF_RED 0x0001 /* use RED */
+#define PRCF_ECN 0x0002 /* use RED/ECN */
+#define PRCF_RIO 0x0004 /* use RIO */
+#define PRCF_CODEL 0x0008 /* use CoDel */
+#define PRCF_CLEARDSCP 0x0010 /* clear diffserv codepoint */
+#define PRCF_DEFAULTCLASS 0x1000 /* default class */
+
+/* special class handles */
+#define PRIQ_NULLCLASS_HANDLE 0
+
+#ifdef ALTQ3_COMPAT
+struct priq_delete_class {
+ struct priq_interface iface;
+ u_int32_t class_handle;
+};
+
+struct priq_modify_class {
+ struct priq_interface iface;
+ u_int32_t class_handle;
+ int pri;
+ int qlimit;
+ int flags;
+};
+
+struct priq_add_filter {
+ struct priq_interface iface;
+ u_int32_t class_handle;
+ struct flow_filter filter;
+
+ u_long filter_handle; /* return value */
+};
+
+struct priq_delete_filter {
+ struct priq_interface iface;
+ u_long filter_handle;
+};
+#endif /* ALTQ3_COMPAT */
+
+struct priq_classstats {
+ u_int32_t class_handle;
+
+ u_int qlength;
+ u_int qlimit;
+ u_int period;
+ struct pktcntr xmitcnt; /* transmitted packet counter */
+ struct pktcntr dropcnt; /* dropped packet counter */
+
+ /* codel, red and rio related info */
+ int qtype;
+ struct redstats red[3]; /* rio has 3 red stats */
+ struct codel_stats codel;
+};
+
+#ifdef ALTQ3_COMPAT
+struct priq_class_stats {
+ struct priq_interface iface;
+ int maxpri; /* in/out */
+
+ struct priq_classstats *stats; /* pointer to stats array */
+};
+
+#define PRIQ_IF_ATTACH _IOW('Q', 1, struct priq_interface)
+#define PRIQ_IF_DETACH _IOW('Q', 2, struct priq_interface)
+#define PRIQ_ENABLE _IOW('Q', 3, struct priq_interface)
+#define PRIQ_DISABLE _IOW('Q', 4, struct priq_interface)
+#define PRIQ_CLEAR _IOW('Q', 5, struct priq_interface)
+#define PRIQ_ADD_CLASS _IOWR('Q', 7, struct priq_add_class)
+#define PRIQ_DEL_CLASS _IOW('Q', 8, struct priq_delete_class)
+#define PRIQ_MOD_CLASS _IOW('Q', 9, struct priq_modify_class)
+#define PRIQ_ADD_FILTER _IOWR('Q', 10, struct priq_add_filter)
+#define PRIQ_DEL_FILTER _IOW('Q', 11, struct priq_delete_filter)
+#define PRIQ_GETSTATS _IOWR('Q', 12, struct priq_class_stats)
+
+#endif /* ALTQ3_COMPAT */
+
+#ifdef _KERNEL
+
+struct priq_class {
+ u_int32_t cl_handle; /* class handle */
+ class_queue_t *cl_q; /* class queue structure */
+ union {
+ struct red *cl_red; /* RED state */
+ struct codel *cl_codel; /* CoDel state */
+ } cl_aqm;
+#define cl_red cl_aqm.cl_red
+#define cl_codel cl_aqm.cl_codel
+ int cl_pri; /* priority */
+ int cl_flags; /* class flags */
+ struct priq_if *cl_pif; /* back pointer to pif */
+ struct altq_pktattr *cl_pktattr; /* saved header used by ECN */
+
+ /* statistics */
+ u_int cl_period; /* backlog period */
+ struct pktcntr cl_xmitcnt; /* transmitted packet counter */
+ struct pktcntr cl_dropcnt; /* dropped packet counter */
+};
+
+/*
+ * priq interface state
+ */
+struct priq_if {
+ struct priq_if *pif_next; /* interface state list */
+ struct ifaltq *pif_ifq; /* backpointer to ifaltq */
+ u_int pif_bandwidth; /* link bandwidth in bps */
+ int pif_maxpri; /* max priority in use */
+ struct priq_class *pif_default; /* default class */
+ struct priq_class *pif_classes[PRIQ_MAXPRI]; /* classes */
+#ifdef ALTQ3_CLFIER_COMPAT
+ struct acc_classifier pif_classifier; /* classifier */
+#endif
+};
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ALTQ_ALTQ_PRIQ_H_ */
diff --git a/freebsd/sys/net/altq/altq_red.c b/freebsd/sys/net/altq/altq_red.c
new file mode 100644
index 00000000..f83b7b50
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_red.c
@@ -0,0 +1,1494 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (C) 1997-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+/*-
+ * Copyright (c) 1990-1994 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the Computer Systems
+ * Engineering Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $
+ * $FreeBSD$
+ */
+
+#include <rtems/bsd/local/opt_altq.h>
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+#ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+#include <rtems/bsd/sys/errno.h>
+#if 1 /* ALTQ3_COMPAT */
+#include <sys/sockio.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#ifdef ALTQ_FLOWVALVE
+#include <sys/queue.h>
+#include <sys/time.h>
+#endif
+#endif /* ALTQ3_COMPAT */
+
+#include <net/if.h>
+#include <net/if_var.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <netpfil/pf/pf_mtag.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_red.h>
+#ifdef ALTQ3_COMPAT
+#include <net/altq/altq_conf.h>
+#ifdef ALTQ_FLOWVALVE
+#include <net/altq/altq_flowvalve.h>
+#endif
+#endif
+
+/*
+ * ALTQ/RED (Random Early Detection) implementation using 32-bit
+ * fixed-point calculation.
+ *
+ * written by kjc using the ns code as a reference.
+ * you can learn more about red and ns from Sally's home page at
+ * http://www-nrg.ee.lbl.gov/floyd/
+ *
+ * most of the red parameter values are fixed in this implementation
+ * to prevent fixed-point overflow/underflow.
+ * if you change the parameters, watch out for overflow/underflow!
+ *
+ * the parameters used are recommended values by Sally.
+ * the corresponding ns config looks:
+ * q_weight=0.00195
+ * minthresh=5 maxthresh=15 queue-size=60
+ * linterm=30
+ * dropmech=drop-tail
+ * bytes=false (can't be handled by 32-bit fixed-point)
+ * doubleq=false dqthresh=false
+ * wait=true
+ */
+/*
+ * alternative red parameters for a slow link.
+ *
+ * assume the queue length becomes from zero to L and keeps L, it takes
+ * N packets for q_avg to reach 63% of L.
+ * when q_weight is 0.002, N is about 500 packets.
+ * for a slow link like dial-up, 500 packets takes more than 1 minute!
+ * when q_weight is 0.008, N is about 127 packets.
+ * when q_weight is 0.016, N is about 63 packets.
+ * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
+ * are allowed for 0.016.
+ * see Sally's paper for more details.
+ */
+/* normal red parameters */
+#define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
+ /* q_weight = 0.00195 */
+
+/* red parameters for a slow link */
+#define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
+ /* q_weight = 0.0078125 */
+
+/* red parameters for a very slow link (e.g., dialup) */
+#define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
+ /* q_weight = 0.015625 */
+
+/* fixed-point uses 12-bit decimal places */
+#define FP_SHIFT 12 /* fixed-point shift */
+
+/* red parameters for drop probability */
+#define INV_P_MAX 10 /* inverse of max drop probability */
+#define TH_MIN 5 /* min threshold */
+#define TH_MAX 15 /* max threshold */
+
+#define RED_LIMIT 60 /* default max queue length */
+#define RED_STATS /* collect statistics */
+
+/*
+ * our default policy for forced-drop is drop-tail.
+ * (in altq-1.1.2 or earlier, the default was random-drop.
+ * but it makes more sense to punish the cause of the surge.)
+ * to switch to the random-drop policy, define "RED_RANDOM_DROP".
+ */
+
+#ifdef ALTQ3_COMPAT
+#ifdef ALTQ_FLOWVALVE
+/*
+ * flow-valve is an extension to protect red from unresponsive flows
+ * and to promote end-to-end congestion control.
+ * flow-valve observes the average drop rates of the flows that have
+ * experienced packet drops in the recent past.
+ * when the average drop rate exceeds the threshold, the flow is
+ * blocked by the flow-valve. the trapped flow should back off
+ * exponentially to escape from the flow-valve.
+ */
+#ifdef RED_RANDOM_DROP
+#error "random-drop can't be used with flow-valve!"
+#endif
+#endif /* ALTQ_FLOWVALVE */
+
+/* red_list keeps all red_queue_t's allocated. */
+static red_queue_t *red_list = NULL;
+
+#endif /* ALTQ3_COMPAT */
+
+/* default red parameter values */
+static int default_th_min = TH_MIN;
+static int default_th_max = TH_MAX;
+static int default_inv_pmax = INV_P_MAX;
+
+#ifdef ALTQ3_COMPAT
+/* internal function prototypes */
+static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+static struct mbuf *red_dequeue(struct ifaltq *, int);
+static int red_request(struct ifaltq *, int, void *);
+static void red_purgeq(red_queue_t *);
+static int red_detach(red_queue_t *);
+#ifdef ALTQ_FLOWVALVE
+static __inline struct fve *flowlist_lookup(struct flowvalve *,
+ struct altq_pktattr *, struct timeval *);
+static __inline struct fve *flowlist_reclaim(struct flowvalve *,
+ struct altq_pktattr *);
+static __inline void flowlist_move_to_head(struct flowvalve *, struct fve *);
+static __inline int fv_p2f(struct flowvalve *, int);
+#if 0 /* XXX: make the compiler happy (fv_alloc unused) */
+static struct flowvalve *fv_alloc(struct red *);
+#endif
+static void fv_destroy(struct flowvalve *);
+static int fv_checkflow(struct flowvalve *, struct altq_pktattr *,
+ struct fve **);
+static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *,
+ struct fve *);
+#endif
+#endif /* ALTQ3_COMPAT */
+
+/*
+ * red support routines
+ */
+red_t *
+red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
+ int pkttime)
+{
+ red_t *rp;
+ int w, i;
+ int npkts_per_sec;
+
+ rp = malloc(sizeof(red_t), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (rp == NULL)
+ return (NULL);
+
+ if (weight == 0)
+ rp->red_weight = W_WEIGHT;
+ else
+ rp->red_weight = weight;
+
+ /* allocate weight table */
+ rp->red_wtab = wtab_alloc(rp->red_weight);
+ if (rp->red_wtab == NULL) {
+ free(rp, M_DEVBUF);
+ return (NULL);
+ }
+
+ rp->red_avg = 0;
+ rp->red_idle = 1;
+
+ if (inv_pmax == 0)
+ rp->red_inv_pmax = default_inv_pmax;
+ else
+ rp->red_inv_pmax = inv_pmax;
+ if (th_min == 0)
+ rp->red_thmin = default_th_min;
+ else
+ rp->red_thmin = th_min;
+ if (th_max == 0)
+ rp->red_thmax = default_th_max;
+ else
+ rp->red_thmax = th_max;
+
+ rp->red_flags = flags;
+
+ if (pkttime == 0)
+ /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
+ rp->red_pkttime = 800;
+ else
+ rp->red_pkttime = pkttime;
+
+ if (weight == 0) {
+ /* when the link is very slow, adjust red parameters */
+ npkts_per_sec = 1000000 / rp->red_pkttime;
+ if (npkts_per_sec < 50) {
+ /* up to about 400Kbps */
+ rp->red_weight = W_WEIGHT_2;
+ } else if (npkts_per_sec < 300) {
+ /* up to about 2.4Mbps */
+ rp->red_weight = W_WEIGHT_1;
+ }
+ }
+
+ /* calculate wshift. weight must be power of 2 */
+ w = rp->red_weight;
+ for (i = 0; w > 1; i++)
+ w = w >> 1;
+ rp->red_wshift = i;
+ w = 1 << rp->red_wshift;
+ if (w != rp->red_weight) {
+ printf("invalid weight value %d for red! use %d\n",
+ rp->red_weight, w);
+ rp->red_weight = w;
+ }
+
+ /*
+ * thmin_s and thmax_s are scaled versions of th_min and th_max
+ * to be compared with avg.
+ */
+ rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
+ rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
+
+ /*
+ * precompute probability denominator
+ * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
+ */
+ rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
+ * rp->red_inv_pmax) << FP_SHIFT;
+
+ microtime(&rp->red_last);
+ return (rp);
+}
+
+void
+red_destroy(red_t *rp)
+{
+#ifdef ALTQ3_COMPAT
+#ifdef ALTQ_FLOWVALVE
+ if (rp->red_flowvalve != NULL)
+ fv_destroy(rp->red_flowvalve);
+#endif
+#endif /* ALTQ3_COMPAT */
+ wtab_destroy(rp->red_wtab);
+ free(rp, M_DEVBUF);
+}
+
+void
+red_getstats(red_t *rp, struct redstats *sp)
+{
+ sp->q_avg = rp->red_avg >> rp->red_wshift;
+ sp->xmit_cnt = rp->red_stats.xmit_cnt;
+ sp->drop_cnt = rp->red_stats.drop_cnt;
+ sp->drop_forced = rp->red_stats.drop_forced;
+ sp->drop_unforced = rp->red_stats.drop_unforced;
+ sp->marked_packets = rp->red_stats.marked_packets;
+}
+
+int
+red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
+ struct altq_pktattr *pktattr)
+{
+ int avg, droptype;
+ int n;
+#ifdef ALTQ3_COMPAT
+#ifdef ALTQ_FLOWVALVE
+ struct fve *fve = NULL;
+
+ if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
+ if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
+ m_freem(m);
+ return (-1);
+ }
+#endif
+#endif /* ALTQ3_COMPAT */
+
+ avg = rp->red_avg;
+
+ /*
+ * if we were idle, we pretend that n packets arrived during
+ * the idle period.
+ */
+ if (rp->red_idle) {
+ struct timeval now;
+ int t;
+
+ rp->red_idle = 0;
+ microtime(&now);
+ t = (now.tv_sec - rp->red_last.tv_sec);
+ if (t > 60) {
+ /*
+ * being idle for more than 1 minute, set avg to zero.
+ * this prevents t from overflow.
+ */
+ avg = 0;
+ } else {
+ t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
+ n = t / rp->red_pkttime - 1;
+
+ /* the following line does (avg = (1 - Wq)^n * avg) */
+ if (n > 0)
+ avg = (avg >> FP_SHIFT) *
+ pow_w(rp->red_wtab, n);
+ }
+ }
+
+ /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
+ avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
+ rp->red_avg = avg; /* save the new value */
+
+ /*
+ * red_count keeps a tally of arriving traffic that has not
+ * been dropped.
+ */
+ rp->red_count++;
+
+ /* see if we drop early */
+ droptype = DTYPE_NODROP;
+ if (avg >= rp->red_thmin_s && qlen(q) > 1) {
+ if (avg >= rp->red_thmax_s) {
+ /* avg >= th_max: forced drop */
+ droptype = DTYPE_FORCED;
+ } else if (rp->red_old == 0) {
+ /* first exceeds th_min */
+ rp->red_count = 1;
+ rp->red_old = 1;
+ } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
+ rp->red_probd, rp->red_count)) {
+ /* mark or drop by red */
+ if ((rp->red_flags & REDF_ECN) &&
+ mark_ecn(m, pktattr, rp->red_flags)) {
+ /* successfully marked. do not drop. */
+ rp->red_count = 0;
+#ifdef RED_STATS
+ rp->red_stats.marked_packets++;
+#endif
+ } else {
+ /* unforced drop by red */
+ droptype = DTYPE_EARLY;
+ }
+ }
+ } else {
+ /* avg < th_min */
+ rp->red_old = 0;
+ }
+
+ /*
+ * if the queue length hits the hard limit, it's a forced drop.
+ */
+ if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
+ droptype = DTYPE_FORCED;
+
+#ifdef RED_RANDOM_DROP
+ /* if successful or forced drop, enqueue this packet. */
+ if (droptype != DTYPE_EARLY)
+ _addq(q, m);
+#else
+ /* if successful, enqueue this packet. */
+ if (droptype == DTYPE_NODROP)
+ _addq(q, m);
+#endif
+ if (droptype != DTYPE_NODROP) {
+ if (droptype == DTYPE_EARLY) {
+ /* drop the incoming packet */
+#ifdef RED_STATS
+ rp->red_stats.drop_unforced++;
+#endif
+ } else {
+ /* forced drop, select a victim packet in the queue. */
+#ifdef RED_RANDOM_DROP
+ m = _getq_random(q);
+#endif
+#ifdef RED_STATS
+ rp->red_stats.drop_forced++;
+#endif
+ }
+#ifdef RED_STATS
+ PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
+#endif
+ rp->red_count = 0;
+#ifdef ALTQ3_COMPAT
+#ifdef ALTQ_FLOWVALVE
+ if (rp->red_flowvalve != NULL)
+ fv_dropbyred(rp->red_flowvalve, pktattr, fve);
+#endif
+#endif /* ALTQ3_COMPAT */
+ m_freem(m);
+ return (-1);
+ }
+ /* successfully queued */
+#ifdef RED_STATS
+ PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
+#endif
+ return (0);
+}
+
+/*
+ * early-drop probability is calculated as follows:
+ * prob = p_max * (avg - th_min) / (th_max - th_min)
+ * prob_a = prob / (2 - count*prob)
+ * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
+ * here prob_a increases as successive undrop count increases.
+ * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
+ * becomes 1 when (count >= (2 / prob))).
+ */
+int
+drop_early(int fp_len, int fp_probd, int count)
+{
+ int d; /* denominator of drop-probability */
+
+ d = fp_probd - count * fp_len;
+ if (d <= 0)
+ /* count exceeds the hard limit: drop or mark */
+ return (1);
+
+ /*
+ * now the range of d is [1..600] in fixed-point. (when
+ * th_max-th_min=10 and p_max=1/30)
+ * drop probability = (avg - TH_MIN) / d
+ */
+
+ if ((arc4random() % d) < fp_len) {
+ /* drop or mark */
+ return (1);
+ }
+ /* no drop/mark */
+ return (0);
+}
+
+/*
+ * try to mark CE bit to the packet.
+ * returns 1 if successfully marked, 0 otherwise.
+ */
+int
+mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
+{
+ struct mbuf *m0;
+ struct pf_mtag *at;
+ void *hdr;
+
+ at = pf_find_mtag(m);
+ if (at != NULL) {
+ hdr = at->hdr;
+#ifdef ALTQ3_COMPAT
+ } else if (pktattr != NULL) {
+ af = pktattr->pattr_af;
+ hdr = pktattr->pattr_hdr;
+#endif /* ALTQ3_COMPAT */
+ } else
+ return (0);
+
+ /* verify that pattr_hdr is within the mbuf data */
+ for (m0 = m; m0 != NULL; m0 = m0->m_next)
+ if (((caddr_t)hdr >= m0->m_data) &&
+ ((caddr_t)hdr < m0->m_data + m0->m_len))
+ break;
+ if (m0 == NULL) {
+ /* ick, tag info is stale */
+ return (0);
+ }
+
+ switch (((struct ip *)hdr)->ip_v) {
+ case IPVERSION:
+ if (flags & REDF_ECN4) {
+ struct ip *ip = hdr;
+ u_int8_t otos;
+ int sum;
+
+ if (ip->ip_v != 4)
+ return (0); /* version mismatch! */
+
+ if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
+ return (0); /* not-ECT */
+ if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
+ return (1); /* already marked */
+
+ /*
+ * ecn-capable but not marked,
+ * mark CE and update checksum
+ */
+ otos = ip->ip_tos;
+ ip->ip_tos |= IPTOS_ECN_CE;
+ /*
+ * update checksum (from RFC1624)
+ * HC' = ~(~HC + ~m + m')
+ */
+ sum = ~ntohs(ip->ip_sum) & 0xffff;
+ sum += (~otos & 0xffff) + ip->ip_tos;
+ sum = (sum >> 16) + (sum & 0xffff);
+ sum += (sum >> 16); /* add carry */
+ ip->ip_sum = htons(~sum & 0xffff);
+ return (1);
+ }
+ break;
+#ifdef INET6
+ case (IPV6_VERSION >> 4):
+ if (flags & REDF_ECN6) {
+ struct ip6_hdr *ip6 = hdr;
+ u_int32_t flowlabel;
+
+ flowlabel = ntohl(ip6->ip6_flow);
+ if ((flowlabel >> 28) != 6)
+ return (0); /* version mismatch! */
+ if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
+ (IPTOS_ECN_NOTECT << 20))
+ return (0); /* not-ECT */
+ if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
+ (IPTOS_ECN_CE << 20))
+ return (1); /* already marked */
+ /*
+ * ecn-capable but not marked, mark CE
+ */
+ flowlabel |= (IPTOS_ECN_CE << 20);
+ ip6->ip6_flow = htonl(flowlabel);
+ return (1);
+ }
+ break;
+#endif /* INET6 */
+ }
+
+ /* not marked */
+ return (0);
+}
+
+struct mbuf *
+red_getq(rp, q)
+ red_t *rp;
+ class_queue_t *q;
+{
+ struct mbuf *m;
+
+ if ((m = _getq(q)) == NULL) {
+ if (rp->red_idle == 0) {
+ rp->red_idle = 1;
+ microtime(&rp->red_last);
+ }
+ return NULL;
+ }
+
+ rp->red_idle = 0;
+ return (m);
+}
+
+/*
+ * helper routine to calibrate avg during idle.
+ * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
+ * here Wq = 1/weight and the code assumes Wq is close to zero.
+ *
+ * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
+ */
+static struct wtab *wtab_list = NULL; /* pointer to wtab list */
+
+struct wtab *
+wtab_alloc(int weight)
+{
+ struct wtab *w;
+ int i;
+
+ for (w = wtab_list; w != NULL; w = w->w_next)
+ if (w->w_weight == weight) {
+ w->w_refcount++;
+ return (w);
+ }
+
+ w = malloc(sizeof(struct wtab), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (w == NULL)
+ return (NULL);
+ w->w_weight = weight;
+ w->w_refcount = 1;
+ w->w_next = wtab_list;
+ wtab_list = w;
+
+ /* initialize the weight table */
+ w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
+ for (i = 1; i < 32; i++) {
+ w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
+ if (w->w_tab[i] == 0 && w->w_param_max == 0)
+ w->w_param_max = 1 << i;
+ }
+
+ return (w);
+}
+
+int
+wtab_destroy(struct wtab *w)
+{
+ struct wtab *prev;
+
+ if (--w->w_refcount > 0)
+ return (0);
+
+ if (wtab_list == w)
+ wtab_list = w->w_next;
+ else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
+ if (prev->w_next == w) {
+ prev->w_next = w->w_next;
+ break;
+ }
+
+ free(w, M_DEVBUF);
+ return (0);
+}
+
+int32_t
+pow_w(struct wtab *w, int n)
+{
+ int i, bit;
+ int32_t val;
+
+ if (n >= w->w_param_max)
+ return (0);
+
+ val = 1 << FP_SHIFT;
+ if (n <= 0)
+ return (val);
+
+ bit = 1;
+ i = 0;
+ while (n) {
+ if (n & bit) {
+ val = (val * w->w_tab[i]) >> FP_SHIFT;
+ n &= ~bit;
+ }
+ i++;
+ bit <<= 1;
+ }
+ return (val);
+}
+
+#ifdef ALTQ3_COMPAT
+/*
+ * red device interface
+ */
+altqdev_decl(red);
+
+int
+redopen(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ /* everything will be done when the queueing scheme is attached. */
+ return 0;
+}
+
+int
+redclose(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ red_queue_t *rqp;
+ int err, error = 0;
+
+ while ((rqp = red_list) != NULL) {
+ /* destroy all */
+ err = red_detach(rqp);
+ if (err != 0 && error == 0)
+ error = err;
+ }
+
+ return error;
+}
+
+int
+redioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ ioctlcmd_t cmd;
+ caddr_t addr;
+ int flag;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ red_queue_t *rqp;
+ struct red_interface *ifacep;
+ struct ifnet *ifp;
+ int error = 0;
+
+ /* check super-user privilege */
+ switch (cmd) {
+ case RED_GETSTATS:
+ break;
+ default:
+#if (__FreeBSD_version > 700000)
+ if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
+#elsif (__FreeBSD_version > 400000)
+ if ((error = suser(p)) != 0)
+#else
+ if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+#endif
+ return (error);
+ break;
+ }
+
+ switch (cmd) {
+
+ case RED_ENABLE:
+ ifacep = (struct red_interface *)addr;
+ if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ error = altq_enable(rqp->rq_ifq);
+ break;
+
+ case RED_DISABLE:
+ ifacep = (struct red_interface *)addr;
+ if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ error = altq_disable(rqp->rq_ifq);
+ break;
+
+ case RED_IF_ATTACH:
+ ifp = ifunit(((struct red_interface *)addr)->red_ifname);
+ if (ifp == NULL) {
+ error = ENXIO;
+ break;
+ }
+
+ /* allocate and initialize red_queue_t */
+ rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK);
+ if (rqp == NULL) {
+ error = ENOMEM;
+ break;
+ }
+ bzero(rqp, sizeof(red_queue_t));
+
+ rqp->rq_q = malloc(sizeof(class_queue_t),
+ M_DEVBUF, M_WAITOK);
+ if (rqp->rq_q == NULL) {
+ free(rqp, M_DEVBUF);
+ error = ENOMEM;
+ break;
+ }
+ bzero(rqp->rq_q, sizeof(class_queue_t));
+
+ rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
+ if (rqp->rq_red == NULL) {
+ free(rqp->rq_q, M_DEVBUF);
+ free(rqp, M_DEVBUF);
+ error = ENOMEM;
+ break;
+ }
+
+ rqp->rq_ifq = &ifp->if_snd;
+ qtail(rqp->rq_q) = NULL;
+ qlen(rqp->rq_q) = 0;
+ qlimit(rqp->rq_q) = RED_LIMIT;
+ qtype(rqp->rq_q) = Q_RED;
+
+ /*
+ * set RED to this ifnet structure.
+ */
+ error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
+ red_enqueue, red_dequeue, red_request,
+ NULL, NULL);
+ if (error) {
+ red_destroy(rqp->rq_red);
+ free(rqp->rq_q, M_DEVBUF);
+ free(rqp, M_DEVBUF);
+ break;
+ }
+
+ /* add this state to the red list */
+ rqp->rq_next = red_list;
+ red_list = rqp;
+ break;
+
+ case RED_IF_DETACH:
+ ifacep = (struct red_interface *)addr;
+ if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ error = red_detach(rqp);
+ break;
+
+ case RED_GETSTATS:
+ do {
+ struct red_stats *q_stats;
+ red_t *rp;
+
+ q_stats = (struct red_stats *)addr;
+ if ((rqp = altq_lookup(q_stats->iface.red_ifname,
+ ALTQT_RED)) == NULL) {
+ error = EBADF;
+ break;
+ }
+
+ q_stats->q_len = qlen(rqp->rq_q);
+ q_stats->q_limit = qlimit(rqp->rq_q);
+
+ rp = rqp->rq_red;
+ q_stats->q_avg = rp->red_avg >> rp->red_wshift;
+ q_stats->xmit_cnt = rp->red_stats.xmit_cnt;
+ q_stats->drop_cnt = rp->red_stats.drop_cnt;
+ q_stats->drop_forced = rp->red_stats.drop_forced;
+ q_stats->drop_unforced = rp->red_stats.drop_unforced;
+ q_stats->marked_packets = rp->red_stats.marked_packets;
+
+ q_stats->weight = rp->red_weight;
+ q_stats->inv_pmax = rp->red_inv_pmax;
+ q_stats->th_min = rp->red_thmin;
+ q_stats->th_max = rp->red_thmax;
+
+#ifdef ALTQ_FLOWVALVE
+ if (rp->red_flowvalve != NULL) {
+ struct flowvalve *fv = rp->red_flowvalve;
+ q_stats->fv_flows = fv->fv_flows;
+ q_stats->fv_pass = fv->fv_stats.pass;
+ q_stats->fv_predrop = fv->fv_stats.predrop;
+ q_stats->fv_alloc = fv->fv_stats.alloc;
+ q_stats->fv_escape = fv->fv_stats.escape;
+ } else {
+#endif /* ALTQ_FLOWVALVE */
+ q_stats->fv_flows = 0;
+ q_stats->fv_pass = 0;
+ q_stats->fv_predrop = 0;
+ q_stats->fv_alloc = 0;
+ q_stats->fv_escape = 0;
+#ifdef ALTQ_FLOWVALVE
+ }
+#endif /* ALTQ_FLOWVALVE */
+ } while (/*CONSTCOND*/ 0);
+ break;
+
+ case RED_CONFIG:
+ do {
+ struct red_conf *fc;
+ red_t *new;
+ int s, limit;
+
+ fc = (struct red_conf *)addr;
+ if ((rqp = altq_lookup(fc->iface.red_ifname,
+ ALTQT_RED)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ new = red_alloc(fc->red_weight,
+ fc->red_inv_pmax,
+ fc->red_thmin,
+ fc->red_thmax,
+ fc->red_flags,
+ fc->red_pkttime);
+ if (new == NULL) {
+ error = ENOMEM;
+ break;
+ }
+
+ s = splnet();
+ red_purgeq(rqp);
+ limit = fc->red_limit;
+ if (limit < fc->red_thmax)
+ limit = fc->red_thmax;
+ qlimit(rqp->rq_q) = limit;
+ fc->red_limit = limit; /* write back the new value */
+
+ red_destroy(rqp->rq_red);
+ rqp->rq_red = new;
+
+ splx(s);
+
+ /* write back new values */
+ fc->red_limit = limit;
+ fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
+ fc->red_thmin = rqp->rq_red->red_thmin;
+ fc->red_thmax = rqp->rq_red->red_thmax;
+
+ } while (/*CONSTCOND*/ 0);
+ break;
+
+ case RED_SETDEFAULTS:
+ do {
+ struct redparams *rp;
+
+ rp = (struct redparams *)addr;
+
+ default_th_min = rp->th_min;
+ default_th_max = rp->th_max;
+ default_inv_pmax = rp->inv_pmax;
+ } while (/*CONSTCOND*/ 0);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+ return error;
+}
+
+static int
+red_detach(rqp)
+ red_queue_t *rqp;
+{
+ red_queue_t *tmp;
+ int error = 0;
+
+ if (ALTQ_IS_ENABLED(rqp->rq_ifq))
+ altq_disable(rqp->rq_ifq);
+
+ if ((error = altq_detach(rqp->rq_ifq)))
+ return (error);
+
+ if (red_list == rqp)
+ red_list = rqp->rq_next;
+ else {
+ for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
+ if (tmp->rq_next == rqp) {
+ tmp->rq_next = rqp->rq_next;
+ break;
+ }
+ if (tmp == NULL)
+ printf("red_detach: no state found in red_list!\n");
+ }
+
+ red_destroy(rqp->rq_red);
+ free(rqp->rq_q, M_DEVBUF);
+ free(rqp, M_DEVBUF);
+ return (error);
+}
+
+/*
+ * enqueue routine:
+ *
+ * returns: 0 when successfully queued.
+ * ENOBUFS when drop occurs.
+ */
+static int
+red_enqueue(ifq, m, pktattr)
+ struct ifaltq *ifq;
+ struct mbuf *m;
+ struct altq_pktattr *pktattr;
+{
+ red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
+ return ENOBUFS;
+ ifq->ifq_len++;
+ return 0;
+}
+
+/*
+ * dequeue routine:
+ * must be called in splimp.
+ *
+ * returns: mbuf dequeued.
+ * NULL when no packet is available in the queue.
+ */
+
+static struct mbuf *
+red_dequeue(ifq, op)
+ struct ifaltq *ifq;
+ int op;
+{
+ red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
+ struct mbuf *m;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ if (op == ALTDQ_POLL)
+ return qhead(rqp->rq_q);
+
+ /* op == ALTDQ_REMOVE */
+ m = red_getq(rqp->rq_red, rqp->rq_q);
+ if (m != NULL)
+ ifq->ifq_len--;
+ return (m);
+}
+
+static int
+red_request(ifq, req, arg)
+ struct ifaltq *ifq;
+ int req;
+ void *arg;
+{
+ red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ switch (req) {
+ case ALTRQ_PURGE:
+ red_purgeq(rqp);
+ break;
+ }
+ return (0);
+}
+
+static void
+red_purgeq(rqp)
+ red_queue_t *rqp;
+{
+ _flushq(rqp->rq_q);
+ if (ALTQ_IS_ENABLED(rqp->rq_ifq))
+ rqp->rq_ifq->ifq_len = 0;
+}
+
+#ifdef ALTQ_FLOWVALVE
+
+#define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */
+#define FV_PSCALE(x) ((x) << FV_PSHIFT)
+#define FV_PUNSCALE(x) ((x) >> FV_PSHIFT)
+#define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */
+#define FV_FSCALE(x) ((x) << FV_FSHIFT)
+#define FV_FUNSCALE(x) ((x) >> FV_FSHIFT)
+
+#define FV_TIMER (3 * hz) /* timer value for garbage collector */
+#define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */
+
+#define FV_N 10 /* update fve_f every FV_N packets */
+
+#define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */
+#define FV_TTHRESH 3 /* time threshold to delete fve */
+#define FV_ALPHA 5 /* extra packet count */
+
+#define FV_STATS
+
+#if (__FreeBSD_version > 300000)
+#define FV_TIMESTAMP(tp) getmicrotime(tp)
+#else
+#define FV_TIMESTAMP(tp) { (*(tp)) = time; }
+#endif
+
+/*
+ * Brtt table: 127 entry table to convert drop rate (p) to
+ * the corresponding bandwidth fraction (f)
+ * the following equation is implemented to use scaled values,
+ * fve_p and fve_f, in the fixed point format.
+ *
+ * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
+ * f = Brtt(p) / (max_th + alpha)
+ */
+#define BRTT_SIZE 128
+#define BRTT_SHIFT 12
+#define BRTT_MASK 0x0007f000
+#define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT))
+
+const int brtt_tab[BRTT_SIZE] = {
+ 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
+ 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
+ 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
+ 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
+ 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
+ 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
+ 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
+ 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
+ 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
+ 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
+ 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
+ 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
+ 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
+ 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
+ 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
+ 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
+};
+
+static __inline struct fve *
+flowlist_lookup(fv, pktattr, now)
+ struct flowvalve *fv;
+ struct altq_pktattr *pktattr;
+ struct timeval *now;
+{
+ struct fve *fve;
+ int flows;
+ struct ip *ip;
+#ifdef INET6
+ struct ip6_hdr *ip6;
+#endif
+ struct timeval tthresh;
+
+ if (pktattr == NULL)
+ return (NULL);
+
+ tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
+ flows = 0;
+ /*
+ * search the flow list
+ */
+ switch (pktattr->pattr_af) {
+ case AF_INET:
+ ip = (struct ip *)pktattr->pattr_hdr;
+ TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
+ if (fve->fve_lastdrop.tv_sec == 0)
+ break;
+ if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
+ fve->fve_lastdrop.tv_sec = 0;
+ break;
+ }
+ if (fve->fve_flow.flow_af == AF_INET &&
+ fve->fve_flow.flow_ip.ip_src.s_addr ==
+ ip->ip_src.s_addr &&
+ fve->fve_flow.flow_ip.ip_dst.s_addr ==
+ ip->ip_dst.s_addr)
+ return (fve);
+ flows++;
+ }
+ break;
+#ifdef INET6
+ case AF_INET6:
+ ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
+ TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
+ if (fve->fve_lastdrop.tv_sec == 0)
+ break;
+ if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
+ fve->fve_lastdrop.tv_sec = 0;
+ break;
+ }
+ if (fve->fve_flow.flow_af == AF_INET6 &&
+ IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
+ &ip6->ip6_src) &&
+ IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
+ &ip6->ip6_dst))
+ return (fve);
+ flows++;
+ }
+ break;
+#endif /* INET6 */
+
+ default:
+ /* unknown protocol. no drop. */
+ return (NULL);
+ }
+ fv->fv_flows = flows; /* save the number of active fve's */
+ return (NULL);
+}
+
+static __inline struct fve *
+flowlist_reclaim(fv, pktattr)
+ struct flowvalve *fv;
+ struct altq_pktattr *pktattr;
+{
+ struct fve *fve;
+ struct ip *ip;
+#ifdef INET6
+ struct ip6_hdr *ip6;
+#endif
+
+ /*
+ * get an entry from the tail of the LRU list.
+ */
+ fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
+
+ switch (pktattr->pattr_af) {
+ case AF_INET:
+ ip = (struct ip *)pktattr->pattr_hdr;
+ fve->fve_flow.flow_af = AF_INET;
+ fve->fve_flow.flow_ip.ip_src = ip->ip_src;
+ fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
+ break;
+#ifdef INET6
+ case AF_INET6:
+ ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
+ fve->fve_flow.flow_af = AF_INET6;
+ fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
+ fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
+ break;
+#endif
+ }
+
+ fve->fve_state = Green;
+ fve->fve_p = 0.0;
+ fve->fve_f = 0.0;
+ fve->fve_ifseq = fv->fv_ifseq - 1;
+ fve->fve_count = 0;
+
+ fv->fv_flows++;
+#ifdef FV_STATS
+ fv->fv_stats.alloc++;
+#endif
+ return (fve);
+}
+
+static __inline void
+flowlist_move_to_head(fv, fve)
+ struct flowvalve *fv;
+ struct fve *fve;
+{
+ if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
+ TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
+ TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
+ }
+}
+
+#if 0 /* XXX: make the compiler happy (fv_alloc unused) */
+/*
+ * allocate flowvalve structure
+ */
+static struct flowvalve *
+fv_alloc(rp)
+ struct red *rp;
+{
+ struct flowvalve *fv;
+ struct fve *fve;
+ int i, num;
+
+ num = FV_FLOWLISTSIZE;
+ fv = malloc(sizeof(struct flowvalve),
+ M_DEVBUF, M_WAITOK);
+ if (fv == NULL)
+ return (NULL);
+ bzero(fv, sizeof(struct flowvalve));
+
+ fv->fv_fves = malloc(sizeof(struct fve) * num,
+ M_DEVBUF, M_WAITOK);
+ if (fv->fv_fves == NULL) {
+ free(fv, M_DEVBUF);
+ return (NULL);
+ }
+ bzero(fv->fv_fves, sizeof(struct fve) * num);
+
+ fv->fv_flows = 0;
+ TAILQ_INIT(&fv->fv_flowlist);
+ for (i = 0; i < num; i++) {
+ fve = &fv->fv_fves[i];
+ fve->fve_lastdrop.tv_sec = 0;
+ TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
+ }
+
+ /* initialize drop rate threshold in scaled fixed-point */
+ fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
+
+ /* initialize drop rate to fraction table */
+ fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE,
+ M_DEVBUF, M_WAITOK);
+ if (fv->fv_p2ftab == NULL) {
+ free(fv->fv_fves, M_DEVBUF);
+ free(fv, M_DEVBUF);
+ return (NULL);
+ }
+ /*
+ * create the p2f table.
+ * (shift is used to keep the precision)
+ */
+ for (i = 1; i < BRTT_SIZE; i++) {
+ int f;
+
+ f = brtt_tab[i] << 8;
+ fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
+ }
+
+ return (fv);
+}
+#endif
+
+static void fv_destroy(fv)
+ struct flowvalve *fv;
+{
+ free(fv->fv_p2ftab, M_DEVBUF);
+ free(fv->fv_fves, M_DEVBUF);
+ free(fv, M_DEVBUF);
+}
+
+static __inline int
+fv_p2f(fv, p)
+ struct flowvalve *fv;
+ int p;
+{
+ int val, f;
+
+ if (p >= BRTT_PMAX)
+ f = fv->fv_p2ftab[BRTT_SIZE-1];
+ else if ((val = (p & BRTT_MASK)))
+ f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
+ else
+ f = fv->fv_p2ftab[1];
+ return (f);
+}
+
+/*
+ * check if an arriving packet should be pre-dropped.
+ * called from red_addq() when a packet arrives.
+ * returns 1 when the packet should be pre-dropped.
+ * should be called in splimp.
+ */
+static int
+fv_checkflow(fv, pktattr, fcache)
+ struct flowvalve *fv;
+ struct altq_pktattr *pktattr;
+ struct fve **fcache;
+{
+ struct fve *fve;
+ struct timeval now;
+
+ fv->fv_ifseq++;
+ FV_TIMESTAMP(&now);
+
+ if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
+ /* no matching entry in the flowlist */
+ return (0);
+
+ *fcache = fve;
+
+ /* update fraction f for every FV_N packets */
+ if (++fve->fve_count == FV_N) {
+ /*
+ * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
+ */
+ fve->fve_f =
+ (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
+ + fve->fve_f - FV_FUNSCALE(fve->fve_f);
+ fve->fve_ifseq = fv->fv_ifseq;
+ fve->fve_count = 0;
+ }
+
+ /*
+ * overpumping test
+ */
+ if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
+ int fthresh;
+
+ /* calculate a threshold */
+ fthresh = fv_p2f(fv, fve->fve_p);
+ if (fve->fve_f > fthresh)
+ fve->fve_state = Red;
+ }
+
+ if (fve->fve_state == Red) {
+ /*
+ * backoff test
+ */
+ if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
+ /* no drop for at least FV_BACKOFFTHRESH sec */
+ fve->fve_p = 0;
+ fve->fve_state = Green;
+#ifdef FV_STATS
+ fv->fv_stats.escape++;
+#endif
+ } else {
+ /* block this flow */
+ flowlist_move_to_head(fv, fve);
+ fve->fve_lastdrop = now;
+#ifdef FV_STATS
+ fv->fv_stats.predrop++;
+#endif
+ return (1);
+ }
+ }
+
+ /*
+ * p = (1 - Wp) * p
+ */
+ fve->fve_p -= FV_PUNSCALE(fve->fve_p);
+ if (fve->fve_p < 0)
+ fve->fve_p = 0;
+#ifdef FV_STATS
+ fv->fv_stats.pass++;
+#endif
+ return (0);
+}
+
+/*
+ * called from red_addq when a packet is dropped by red.
+ * should be called in splimp.
+ */
+static void fv_dropbyred(fv, pktattr, fcache)
+ struct flowvalve *fv;
+ struct altq_pktattr *pktattr;
+ struct fve *fcache;
+{
+ struct fve *fve;
+ struct timeval now;
+
+ if (pktattr == NULL)
+ return;
+ FV_TIMESTAMP(&now);
+
+ if (fcache != NULL)
+ /* the fve of this packet is already cached */
+ fve = fcache;
+ else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
+ fve = flowlist_reclaim(fv, pktattr);
+
+ flowlist_move_to_head(fv, fve);
+
+ /*
+ * update p: the following line cancels the update
+ * in fv_checkflow() and calculate
+ * p = Wp + (1 - Wp) * p
+ */
+ fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
+
+ fve->fve_lastdrop = now;
+}
+
+#endif /* ALTQ_FLOWVALVE */
+
+#ifdef KLD_MODULE
+
+static struct altqsw red_sw =
+ {"red", redopen, redclose, redioctl};
+
+ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
+MODULE_VERSION(altq_red, 1);
+
+#endif /* KLD_MODULE */
+#endif /* ALTQ3_COMPAT */
+
+#endif /* ALTQ_RED */
diff --git a/freebsd/sys/net/altq/altq_red.h b/freebsd/sys/net/altq/altq_red.h
new file mode 100644
index 00000000..8ae8d291
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_red.h
@@ -0,0 +1,199 @@
+/*-
+ * Copyright (C) 1997-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq_red.h,v 1.8 2003/07/10 12:07:49 kjc Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _ALTQ_ALTQ_RED_H_
+#define _ALTQ_ALTQ_RED_H_
+
+#include <net/altq/altq_classq.h>
+
+#ifdef ALTQ3_COMPAT
+struct red_interface {
+ char red_ifname[IFNAMSIZ];
+};
+
+struct red_stats {
+ struct red_interface iface;
+ int q_len;
+ int q_avg;
+
+ struct pktcntr xmit_cnt;
+ struct pktcntr drop_cnt;
+ u_int drop_forced;
+ u_int drop_unforced;
+ u_int marked_packets;
+
+ /* static red parameters */
+ int q_limit;
+ int weight;
+ int inv_pmax;
+ int th_min;
+ int th_max;
+
+ /* flowvalve related stuff */
+ u_int fv_flows;
+ u_int fv_pass;
+ u_int fv_predrop;
+ u_int fv_alloc;
+ u_int fv_escape;
+};
+
+struct red_conf {
+ struct red_interface iface;
+ int red_weight; /* weight for EWMA */
+ int red_inv_pmax; /* inverse of max drop probability */
+ int red_thmin; /* red min threshold */
+ int red_thmax; /* red max threshold */
+ int red_limit; /* max queue length */
+ int red_pkttime; /* average packet time in usec */
+ int red_flags; /* see below */
+};
+#endif /* ALTQ3_COMPAT */
+
+/* red flags */
+#define REDF_ECN4 0x01 /* use packet marking for IPv4 packets */
+#define REDF_ECN6 0x02 /* use packet marking for IPv6 packets */
+#define REDF_ECN (REDF_ECN4 | REDF_ECN6)
+#define REDF_FLOWVALVE 0x04 /* use flowvalve (aka penalty-box) */
+
+/*
+ * simpler versions of red parameters and statistics used by other
+ * disciplines (e.g., CBQ)
+ */
+struct redparams {
+ int th_min; /* red min threshold */
+ int th_max; /* red max threshold */
+ int inv_pmax; /* inverse of max drop probability */
+};
+
+struct redstats {
+ int q_avg;
+ struct pktcntr xmit_cnt;
+ struct pktcntr drop_cnt;
+ u_int drop_forced;
+ u_int drop_unforced;
+ u_int marked_packets;
+};
+
+#ifdef ALTQ3_COMPAT
+/*
+ * IOCTLs for RED
+ */
+#define RED_IF_ATTACH _IOW('Q', 1, struct red_interface)
+#define RED_IF_DETACH _IOW('Q', 2, struct red_interface)
+#define RED_ENABLE _IOW('Q', 3, struct red_interface)
+#define RED_DISABLE _IOW('Q', 4, struct red_interface)
+#define RED_CONFIG _IOWR('Q', 6, struct red_conf)
+#define RED_GETSTATS _IOWR('Q', 12, struct red_stats)
+#define RED_SETDEFAULTS _IOW('Q', 30, struct redparams)
+#endif /* ALTQ3_COMPAT */
+
+#ifdef _KERNEL
+
+#ifdef ALTQ3_COMPAT
+struct flowvalve;
+#endif
+
+/* weight table structure for idle time calibration */
+struct wtab {
+ struct wtab *w_next;
+ int w_weight;
+ int w_param_max;
+ int w_refcount;
+ int32_t w_tab[32];
+};
+
+typedef struct red {
+ int red_pkttime; /* average packet time in micro sec
+ used for idle calibration */
+ int red_flags; /* red flags */
+
+ /* red parameters */
+ int red_weight; /* weight for EWMA */
+ int red_inv_pmax; /* inverse of max drop probability */
+ int red_thmin; /* red min threshold */
+ int red_thmax; /* red max threshold */
+
+ /* variables for internal use */
+ int red_wshift; /* log(red_weight) */
+ int red_thmin_s; /* th_min scaled by avgshift */
+ int red_thmax_s; /* th_max scaled by avgshift */
+ int red_probd; /* drop probability denominator */
+
+ int red_avg; /* queue len avg scaled by avgshift */
+ int red_count; /* packet count since last dropped/
+ marked packet */
+ int red_idle; /* queue was empty */
+ int red_old; /* avg is above th_min */
+ struct wtab *red_wtab; /* weight table */
+ struct timeval red_last; /* time when the queue becomes idle */
+
+#ifdef ALTQ3_COMPAT
+ struct flowvalve *red_flowvalve; /* flowvalve state */
+#endif
+
+ struct {
+ struct pktcntr xmit_cnt;
+ struct pktcntr drop_cnt;
+ u_int drop_forced;
+ u_int drop_unforced;
+ u_int marked_packets;
+ } red_stats;
+} red_t;
+
+#ifdef ALTQ3_COMPAT
+typedef struct red_queue {
+ struct red_queue *rq_next; /* next red_state in the list */
+ struct ifaltq *rq_ifq; /* backpointer to ifaltq */
+
+ class_queue_t *rq_q;
+
+ red_t *rq_red;
+} red_queue_t;
+#endif /* ALTQ3_COMPAT */
+
+/* red drop types */
+#define DTYPE_NODROP 0 /* no drop */
+#define DTYPE_FORCED 1 /* a "forced" drop */
+#define DTYPE_EARLY 2 /* an "unforced" (early) drop */
+
+extern red_t *red_alloc(int, int, int, int, int, int);
+extern void red_destroy(red_t *);
+extern void red_getstats(red_t *, struct redstats *);
+extern int red_addq(red_t *, class_queue_t *, struct mbuf *,
+ struct altq_pktattr *);
+extern struct mbuf *red_getq(red_t *, class_queue_t *);
+extern int drop_early(int, int, int);
+extern int mark_ecn(struct mbuf *, struct altq_pktattr *, int);
+extern struct wtab *wtab_alloc(int);
+extern int wtab_destroy(struct wtab *);
+extern int32_t pow_w(struct wtab *, int);
+
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_ALTQ_RED_H_ */
diff --git a/freebsd/sys/net/altq/altq_rio.c b/freebsd/sys/net/altq/altq_rio.c
new file mode 100644
index 00000000..bad0257c
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_rio.c
@@ -0,0 +1,846 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (C) 1998-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*-
+ * Copyright (c) 1990-1994 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the Computer Systems
+ * Engineering Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq_rio.c,v 1.17 2003/07/10 12:07:49 kjc Exp $
+ * $FreeBSD$
+ */
+
+#include <rtems/bsd/local/opt_altq.h>
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+#ifdef ALTQ_RIO /* rio is enabled by ALTQ_RIO option in opt_altq.h */
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+#include <rtems/bsd/sys/errno.h>
+#if 1 /* ALTQ3_COMPAT */
+#include <sys/proc.h>
+#include <sys/sockio.h>
+#include <sys/kernel.h>
+#endif
+
+#include <net/if.h>
+#include <net/if_var.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_cdnr.h>
+#include <net/altq/altq_red.h>
+#include <net/altq/altq_rio.h>
+#ifdef ALTQ3_COMPAT
+#include <net/altq/altq_conf.h>
+#endif
+
+/*
+ * RIO: RED with IN/OUT bit
+ * described in
+ * "Explicit Allocation of Best Effort Packet Delivery Service"
+ * David D. Clark and Wenjia Fang, MIT Lab for Computer Science
+ * http://diffserv.lcs.mit.edu/Papers/exp-alloc-ddc-wf.{ps,pdf}
+ *
+ * this implementation is extended to support more than 2 drop precedence
+ * values as described in RFC2597 (Assured Forwarding PHB Group).
+ *
+ */
+/*
+ * AF DS (differentiated service) codepoints.
+ * (classes can be mapped to CBQ or H-FSC classes.)
+ *
+ * 0 1 2 3 4 5 6 7
+ * +---+---+---+---+---+---+---+---+
+ * | CLASS |DropPre| 0 | CU |
+ * +---+---+---+---+---+---+---+---+
+ *
+ * class 1: 001
+ * class 2: 010
+ * class 3: 011
+ * class 4: 100
+ *
+ * low drop prec: 01
+ * medium drop prec: 10
+ * high drop prec: 01
+ */
+
+/* normal red parameters */
+#define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
+ /* q_weight = 0.00195 */
+
+/* red parameters for a slow link */
+#define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
+ /* q_weight = 0.0078125 */
+
+/* red parameters for a very slow link (e.g., dialup) */
+#define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
+ /* q_weight = 0.015625 */
+
+/* fixed-point uses 12-bit decimal places */
+#define FP_SHIFT 12 /* fixed-point shift */
+
+/* red parameters for drop probability */
+#define INV_P_MAX 10 /* inverse of max drop probability */
+#define TH_MIN 5 /* min threshold */
+#define TH_MAX 15 /* max threshold */
+
+#define RIO_LIMIT 60 /* default max queue length */
+#define RIO_STATS /* collect statistics */
+
+#define TV_DELTA(a, b, delta) { \
+ register int xxs; \
+ \
+ delta = (a)->tv_usec - (b)->tv_usec; \
+ if ((xxs = (a)->tv_sec - (b)->tv_sec) != 0) { \
+ if (xxs < 0) { \
+ delta = 60000000; \
+ } else if (xxs > 4) { \
+ if (xxs > 60) \
+ delta = 60000000; \
+ else \
+ delta += xxs * 1000000; \
+ } else while (xxs > 0) { \
+ delta += 1000000; \
+ xxs--; \
+ } \
+ } \
+}
+
+#ifdef ALTQ3_COMPAT
+/* rio_list keeps all rio_queue_t's allocated. */
+static rio_queue_t *rio_list = NULL;
+#endif
+/* default rio parameter values */
+static struct redparams default_rio_params[RIO_NDROPPREC] = {
+ /* th_min, th_max, inv_pmax */
+ { TH_MAX * 2 + TH_MIN, TH_MAX * 3, INV_P_MAX }, /* low drop precedence */
+ { TH_MAX + TH_MIN, TH_MAX * 2, INV_P_MAX }, /* medium drop precedence */
+ { TH_MIN, TH_MAX, INV_P_MAX } /* high drop precedence */
+};
+
+/* internal function prototypes */
+static int dscp2index(u_int8_t);
+#ifdef ALTQ3_COMPAT
+static int rio_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+static struct mbuf *rio_dequeue(struct ifaltq *, int);
+static int rio_request(struct ifaltq *, int, void *);
+static int rio_detach(rio_queue_t *);
+
+/*
+ * rio device interface
+ */
+altqdev_decl(rio);
+
+#endif /* ALTQ3_COMPAT */
+
+rio_t *
+rio_alloc(int weight, struct redparams *params, int flags, int pkttime)
+{
+ rio_t *rp;
+ int w, i;
+ int npkts_per_sec;
+
+ rp = malloc(sizeof(rio_t), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (rp == NULL)
+ return (NULL);
+
+ rp->rio_flags = flags;
+ if (pkttime == 0)
+ /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
+ rp->rio_pkttime = 800;
+ else
+ rp->rio_pkttime = pkttime;
+
+ if (weight != 0)
+ rp->rio_weight = weight;
+ else {
+ /* use default */
+ rp->rio_weight = W_WEIGHT;
+
+ /* when the link is very slow, adjust red parameters */
+ npkts_per_sec = 1000000 / rp->rio_pkttime;
+ if (npkts_per_sec < 50) {
+ /* up to about 400Kbps */
+ rp->rio_weight = W_WEIGHT_2;
+ } else if (npkts_per_sec < 300) {
+ /* up to about 2.4Mbps */
+ rp->rio_weight = W_WEIGHT_1;
+ }
+ }
+
+ /* calculate wshift. weight must be power of 2 */
+ w = rp->rio_weight;
+ for (i = 0; w > 1; i++)
+ w = w >> 1;
+ rp->rio_wshift = i;
+ w = 1 << rp->rio_wshift;
+ if (w != rp->rio_weight) {
+ printf("invalid weight value %d for red! use %d\n",
+ rp->rio_weight, w);
+ rp->rio_weight = w;
+ }
+
+ /* allocate weight table */
+ rp->rio_wtab = wtab_alloc(rp->rio_weight);
+
+ for (i = 0; i < RIO_NDROPPREC; i++) {
+ struct dropprec_state *prec = &rp->rio_precstate[i];
+
+ prec->avg = 0;
+ prec->idle = 1;
+
+ if (params == NULL || params[i].inv_pmax == 0)
+ prec->inv_pmax = default_rio_params[i].inv_pmax;
+ else
+ prec->inv_pmax = params[i].inv_pmax;
+ if (params == NULL || params[i].th_min == 0)
+ prec->th_min = default_rio_params[i].th_min;
+ else
+ prec->th_min = params[i].th_min;
+ if (params == NULL || params[i].th_max == 0)
+ prec->th_max = default_rio_params[i].th_max;
+ else
+ prec->th_max = params[i].th_max;
+
+ /*
+ * th_min_s and th_max_s are scaled versions of th_min
+ * and th_max to be compared with avg.
+ */
+ prec->th_min_s = prec->th_min << (rp->rio_wshift + FP_SHIFT);
+ prec->th_max_s = prec->th_max << (rp->rio_wshift + FP_SHIFT);
+
+ /*
+ * precompute probability denominator
+ * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
+ */
+ prec->probd = (2 * (prec->th_max - prec->th_min)
+ * prec->inv_pmax) << FP_SHIFT;
+
+ microtime(&prec->last);
+ }
+
+ return (rp);
+}
+
+void
+rio_destroy(rio_t *rp)
+{
+ wtab_destroy(rp->rio_wtab);
+ free(rp, M_DEVBUF);
+}
+
+void
+rio_getstats(rio_t *rp, struct redstats *sp)
+{
+ int i;
+
+ for (i = 0; i < RIO_NDROPPREC; i++) {
+ bcopy(&rp->q_stats[i], sp, sizeof(struct redstats));
+ sp->q_avg = rp->rio_precstate[i].avg >> rp->rio_wshift;
+ sp++;
+ }
+}
+
+#if (RIO_NDROPPREC == 3)
+/*
+ * internally, a drop precedence value is converted to an index
+ * starting from 0.
+ */
+static int
+dscp2index(u_int8_t dscp)
+{
+ int dpindex = dscp & AF_DROPPRECMASK;
+
+ if (dpindex == 0)
+ return (0);
+ return ((dpindex >> 3) - 1);
+}
+#endif
+
+#if 1
+/*
+ * kludge: when a packet is dequeued, we need to know its drop precedence
+ * in order to keep the queue length of each drop precedence.
+ * use m_pkthdr.rcvif to pass this info.
+ */
+#define RIOM_SET_PRECINDEX(m, idx) \
+ do { (m)->m_pkthdr.rcvif = (void *)((long)(idx)); } while (0)
+#define RIOM_GET_PRECINDEX(m) \
+ ({ long idx; idx = (long)((m)->m_pkthdr.rcvif); \
+ (m)->m_pkthdr.rcvif = NULL; idx; })
+#endif
+
+int
+rio_addq(rio_t *rp, class_queue_t *q, struct mbuf *m,
+ struct altq_pktattr *pktattr)
+{
+ int avg, droptype;
+ u_int8_t dsfield, odsfield;
+ int dpindex, i, n, t;
+ struct timeval now;
+ struct dropprec_state *prec;
+
+ dsfield = odsfield = read_dsfield(m, pktattr);
+ dpindex = dscp2index(dsfield);
+
+ /*
+ * update avg of the precedence states whose drop precedence
+ * is larger than or equal to the drop precedence of the packet
+ */
+ now.tv_sec = 0;
+ for (i = dpindex; i < RIO_NDROPPREC; i++) {
+ prec = &rp->rio_precstate[i];
+ avg = prec->avg;
+ if (prec->idle) {
+ prec->idle = 0;
+ if (now.tv_sec == 0)
+ microtime(&now);
+ t = (now.tv_sec - prec->last.tv_sec);
+ if (t > 60)
+ avg = 0;
+ else {
+ t = t * 1000000 +
+ (now.tv_usec - prec->last.tv_usec);
+ n = t / rp->rio_pkttime;
+ /* calculate (avg = (1 - Wq)^n * avg) */
+ if (n > 0)
+ avg = (avg >> FP_SHIFT) *
+ pow_w(rp->rio_wtab, n);
+ }
+ }
+
+ /* run estimator. (avg is scaled by WEIGHT in fixed-point) */
+ avg += (prec->qlen << FP_SHIFT) - (avg >> rp->rio_wshift);
+ prec->avg = avg; /* save the new value */
+ /*
+ * count keeps a tally of arriving traffic that has not
+ * been dropped.
+ */
+ prec->count++;
+ }
+
+ prec = &rp->rio_precstate[dpindex];
+ avg = prec->avg;
+
+ /* see if we drop early */
+ droptype = DTYPE_NODROP;
+ if (avg >= prec->th_min_s && prec->qlen > 1) {
+ if (avg >= prec->th_max_s) {
+ /* avg >= th_max: forced drop */
+ droptype = DTYPE_FORCED;
+ } else if (prec->old == 0) {
+ /* first exceeds th_min */
+ prec->count = 1;
+ prec->old = 1;
+ } else if (drop_early((avg - prec->th_min_s) >> rp->rio_wshift,
+ prec->probd, prec->count)) {
+ /* unforced drop by red */
+ droptype = DTYPE_EARLY;
+ }
+ } else {
+ /* avg < th_min */
+ prec->old = 0;
+ }
+
+ /*
+ * if the queue length hits the hard limit, it's a forced drop.
+ */
+ if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
+ droptype = DTYPE_FORCED;
+
+ if (droptype != DTYPE_NODROP) {
+ /* always drop incoming packet (as opposed to randomdrop) */
+ for (i = dpindex; i < RIO_NDROPPREC; i++)
+ rp->rio_precstate[i].count = 0;
+#ifdef RIO_STATS
+ if (droptype == DTYPE_EARLY)
+ rp->q_stats[dpindex].drop_unforced++;
+ else
+ rp->q_stats[dpindex].drop_forced++;
+ PKTCNTR_ADD(&rp->q_stats[dpindex].drop_cnt, m_pktlen(m));
+#endif
+ m_freem(m);
+ return (-1);
+ }
+
+ for (i = dpindex; i < RIO_NDROPPREC; i++)
+ rp->rio_precstate[i].qlen++;
+
+ /* save drop precedence index in mbuf hdr */
+ RIOM_SET_PRECINDEX(m, dpindex);
+
+ if (rp->rio_flags & RIOF_CLEARDSCP)
+ dsfield &= ~DSCP_MASK;
+
+ if (dsfield != odsfield)
+ write_dsfield(m, pktattr, dsfield);
+
+ _addq(q, m);
+
+#ifdef RIO_STATS
+ PKTCNTR_ADD(&rp->q_stats[dpindex].xmit_cnt, m_pktlen(m));
+#endif
+ return (0);
+}
+
+struct mbuf *
+rio_getq(rio_t *rp, class_queue_t *q)
+{
+ struct mbuf *m;
+ int dpindex, i;
+
+ if ((m = _getq(q)) == NULL)
+ return NULL;
+
+ dpindex = RIOM_GET_PRECINDEX(m);
+ for (i = dpindex; i < RIO_NDROPPREC; i++) {
+ if (--rp->rio_precstate[i].qlen == 0) {
+ if (rp->rio_precstate[i].idle == 0) {
+ rp->rio_precstate[i].idle = 1;
+ microtime(&rp->rio_precstate[i].last);
+ }
+ }
+ }
+ return (m);
+}
+
+#ifdef ALTQ3_COMPAT
+int
+rioopen(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ /* everything will be done when the queueing scheme is attached. */
+ return 0;
+}
+
+int
+rioclose(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ rio_queue_t *rqp;
+ int err, error = 0;
+
+ while ((rqp = rio_list) != NULL) {
+ /* destroy all */
+ err = rio_detach(rqp);
+ if (err != 0 && error == 0)
+ error = err;
+ }
+
+ return error;
+}
+
+int
+rioioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ ioctlcmd_t cmd;
+ caddr_t addr;
+ int flag;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ rio_queue_t *rqp;
+ struct rio_interface *ifacep;
+ struct ifnet *ifp;
+ int error = 0;
+
+ /* check super-user privilege */
+ switch (cmd) {
+ case RIO_GETSTATS:
+ break;
+ default:
+#if (__FreeBSD_version > 700000)
+ if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
+ return (error);
+#elsif (__FreeBSD_version > 400000)
+ if ((error = suser(p)) != 0)
+ return (error);
+#else
+ if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+ return (error);
+#endif
+ break;
+ }
+
+ switch (cmd) {
+
+ case RIO_ENABLE:
+ ifacep = (struct rio_interface *)addr;
+ if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ error = altq_enable(rqp->rq_ifq);
+ break;
+
+ case RIO_DISABLE:
+ ifacep = (struct rio_interface *)addr;
+ if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ error = altq_disable(rqp->rq_ifq);
+ break;
+
+ case RIO_IF_ATTACH:
+ ifp = ifunit(((struct rio_interface *)addr)->rio_ifname);
+ if (ifp == NULL) {
+ error = ENXIO;
+ break;
+ }
+
+ /* allocate and initialize rio_queue_t */
+ rqp = malloc(sizeof(rio_queue_t), M_DEVBUF, M_WAITOK);
+ if (rqp == NULL) {
+ error = ENOMEM;
+ break;
+ }
+ bzero(rqp, sizeof(rio_queue_t));
+
+ rqp->rq_q = malloc(sizeof(class_queue_t),
+ M_DEVBUF, M_WAITOK);
+ if (rqp->rq_q == NULL) {
+ free(rqp, M_DEVBUF);
+ error = ENOMEM;
+ break;
+ }
+ bzero(rqp->rq_q, sizeof(class_queue_t));
+
+ rqp->rq_rio = rio_alloc(0, NULL, 0, 0);
+ if (rqp->rq_rio == NULL) {
+ free(rqp->rq_q, M_DEVBUF);
+ free(rqp, M_DEVBUF);
+ error = ENOMEM;
+ break;
+ }
+
+ rqp->rq_ifq = &ifp->if_snd;
+ qtail(rqp->rq_q) = NULL;
+ qlen(rqp->rq_q) = 0;
+ qlimit(rqp->rq_q) = RIO_LIMIT;
+ qtype(rqp->rq_q) = Q_RIO;
+
+ /*
+ * set RIO to this ifnet structure.
+ */
+ error = altq_attach(rqp->rq_ifq, ALTQT_RIO, rqp,
+ rio_enqueue, rio_dequeue, rio_request,
+ NULL, NULL);
+ if (error) {
+ rio_destroy(rqp->rq_rio);
+ free(rqp->rq_q, M_DEVBUF);
+ free(rqp, M_DEVBUF);
+ break;
+ }
+
+ /* add this state to the rio list */
+ rqp->rq_next = rio_list;
+ rio_list = rqp;
+ break;
+
+ case RIO_IF_DETACH:
+ ifacep = (struct rio_interface *)addr;
+ if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ error = rio_detach(rqp);
+ break;
+
+ case RIO_GETSTATS:
+ do {
+ struct rio_stats *q_stats;
+ rio_t *rp;
+ int i;
+
+ q_stats = (struct rio_stats *)addr;
+ if ((rqp = altq_lookup(q_stats->iface.rio_ifname,
+ ALTQT_RIO)) == NULL) {
+ error = EBADF;
+ break;
+ }
+
+ rp = rqp->rq_rio;
+
+ q_stats->q_limit = qlimit(rqp->rq_q);
+ q_stats->weight = rp->rio_weight;
+ q_stats->flags = rp->rio_flags;
+
+ for (i = 0; i < RIO_NDROPPREC; i++) {
+ q_stats->q_len[i] = rp->rio_precstate[i].qlen;
+ bcopy(&rp->q_stats[i], &q_stats->q_stats[i],
+ sizeof(struct redstats));
+ q_stats->q_stats[i].q_avg =
+ rp->rio_precstate[i].avg >> rp->rio_wshift;
+
+ q_stats->q_params[i].inv_pmax
+ = rp->rio_precstate[i].inv_pmax;
+ q_stats->q_params[i].th_min
+ = rp->rio_precstate[i].th_min;
+ q_stats->q_params[i].th_max
+ = rp->rio_precstate[i].th_max;
+ }
+ } while (/*CONSTCOND*/ 0);
+ break;
+
+ case RIO_CONFIG:
+ do {
+ struct rio_conf *fc;
+ rio_t *new;
+ int s, limit, i;
+
+ fc = (struct rio_conf *)addr;
+ if ((rqp = altq_lookup(fc->iface.rio_ifname,
+ ALTQT_RIO)) == NULL) {
+ error = EBADF;
+ break;
+ }
+
+ new = rio_alloc(fc->rio_weight, &fc->q_params[0],
+ fc->rio_flags, fc->rio_pkttime);
+ if (new == NULL) {
+ error = ENOMEM;
+ break;
+ }
+
+ s = splnet();
+ _flushq(rqp->rq_q);
+ limit = fc->rio_limit;
+ if (limit < fc->q_params[RIO_NDROPPREC-1].th_max)
+ limit = fc->q_params[RIO_NDROPPREC-1].th_max;
+ qlimit(rqp->rq_q) = limit;
+
+ rio_destroy(rqp->rq_rio);
+ rqp->rq_rio = new;
+
+ splx(s);
+
+ /* write back new values */
+ fc->rio_limit = limit;
+ for (i = 0; i < RIO_NDROPPREC; i++) {
+ fc->q_params[i].inv_pmax =
+ rqp->rq_rio->rio_precstate[i].inv_pmax;
+ fc->q_params[i].th_min =
+ rqp->rq_rio->rio_precstate[i].th_min;
+ fc->q_params[i].th_max =
+ rqp->rq_rio->rio_precstate[i].th_max;
+ }
+ } while (/*CONSTCOND*/ 0);
+ break;
+
+ case RIO_SETDEFAULTS:
+ do {
+ struct redparams *rp;
+ int i;
+
+ rp = (struct redparams *)addr;
+ for (i = 0; i < RIO_NDROPPREC; i++)
+ default_rio_params[i] = rp[i];
+ } while (/*CONSTCOND*/ 0);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return error;
+}
+
+static int
+rio_detach(rqp)
+ rio_queue_t *rqp;
+{
+ rio_queue_t *tmp;
+ int error = 0;
+
+ if (ALTQ_IS_ENABLED(rqp->rq_ifq))
+ altq_disable(rqp->rq_ifq);
+
+ if ((error = altq_detach(rqp->rq_ifq)))
+ return (error);
+
+ if (rio_list == rqp)
+ rio_list = rqp->rq_next;
+ else {
+ for (tmp = rio_list; tmp != NULL; tmp = tmp->rq_next)
+ if (tmp->rq_next == rqp) {
+ tmp->rq_next = rqp->rq_next;
+ break;
+ }
+ if (tmp == NULL)
+ printf("rio_detach: no state found in rio_list!\n");
+ }
+
+ rio_destroy(rqp->rq_rio);
+ free(rqp->rq_q, M_DEVBUF);
+ free(rqp, M_DEVBUF);
+ return (error);
+}
+
+/*
+ * rio support routines
+ */
+static int
+rio_request(ifq, req, arg)
+ struct ifaltq *ifq;
+ int req;
+ void *arg;
+{
+ rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ switch (req) {
+ case ALTRQ_PURGE:
+ _flushq(rqp->rq_q);
+ if (ALTQ_IS_ENABLED(ifq))
+ ifq->ifq_len = 0;
+ break;
+ }
+ return (0);
+}
+
+/*
+ * enqueue routine:
+ *
+ * returns: 0 when successfully queued.
+ * ENOBUFS when drop occurs.
+ */
+static int
+rio_enqueue(ifq, m, pktattr)
+ struct ifaltq *ifq;
+ struct mbuf *m;
+ struct altq_pktattr *pktattr;
+{
+ rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
+ int error = 0;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ if (rio_addq(rqp->rq_rio, rqp->rq_q, m, pktattr) == 0)
+ ifq->ifq_len++;
+ else
+ error = ENOBUFS;
+ return error;
+}
+
+/*
+ * dequeue routine:
+ * must be called in splimp.
+ *
+ * returns: mbuf dequeued.
+ * NULL when no packet is available in the queue.
+ */
+
+static struct mbuf *
+rio_dequeue(ifq, op)
+ struct ifaltq *ifq;
+ int op;
+{
+ rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
+ struct mbuf *m = NULL;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ if (op == ALTDQ_POLL)
+ return qhead(rqp->rq_q);
+
+ m = rio_getq(rqp->rq_rio, rqp->rq_q);
+ if (m != NULL)
+ ifq->ifq_len--;
+ return m;
+}
+
+#ifdef KLD_MODULE
+
+static struct altqsw rio_sw =
+ {"rio", rioopen, rioclose, rioioctl};
+
+ALTQ_MODULE(altq_rio, ALTQT_RIO, &rio_sw);
+MODULE_VERSION(altq_rio, 1);
+MODULE_DEPEND(altq_rio, altq_red, 1, 1, 1);
+
+#endif /* KLD_MODULE */
+#endif /* ALTQ3_COMPAT */
+
+#endif /* ALTQ_RIO */
diff --git a/freebsd/sys/net/altq/altq_rio.h b/freebsd/sys/net/altq/altq_rio.h
new file mode 100644
index 00000000..ce9dc0e0
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_rio.h
@@ -0,0 +1,145 @@
+/*-
+ * Copyright (C) 1998-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq_rio.h,v 1.9 2003/07/10 12:07:49 kjc Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _ALTQ_ALTQ_RIO_H_
+#define _ALTQ_ALTQ_RIO_H_
+
+#include <net/altq/altq_classq.h>
+
+/*
+ * RIO: RED with IN/OUT bit
+ * (extended to support more than 2 drop precedence values)
+ */
+#define RIO_NDROPPREC 3 /* number of drop precedence values */
+
+#ifdef ALTQ3_COMPAT
+struct rio_interface {
+ char rio_ifname[IFNAMSIZ];
+};
+
+struct rio_stats {
+ struct rio_interface iface;
+ int q_len[RIO_NDROPPREC];
+ struct redstats q_stats[RIO_NDROPPREC];
+
+ /* static red parameters */
+ int q_limit;
+ int weight;
+ int flags;
+ struct redparams q_params[RIO_NDROPPREC];
+};
+
+struct rio_conf {
+ struct rio_interface iface;
+ struct redparams q_params[RIO_NDROPPREC];
+ int rio_weight; /* weight for EWMA */
+ int rio_limit; /* max queue length */
+ int rio_pkttime; /* average packet time in usec */
+ int rio_flags; /* see below */
+};
+#endif /* ALTQ3_COMPAT */
+
+/* rio flags */
+#define RIOF_ECN4 0x01 /* use packet marking for IPv4 packets */
+#define RIOF_ECN6 0x02 /* use packet marking for IPv6 packets */
+#define RIOF_ECN (RIOF_ECN4 | RIOF_ECN6)
+#define RIOF_CLEARDSCP 0x200 /* clear diffserv codepoint */
+
+#ifdef ALTQ3_COMPAT
+/*
+ * IOCTLs for RIO
+ */
+#define RIO_IF_ATTACH _IOW('Q', 1, struct rio_interface)
+#define RIO_IF_DETACH _IOW('Q', 2, struct rio_interface)
+#define RIO_ENABLE _IOW('Q', 3, struct rio_interface)
+#define RIO_DISABLE _IOW('Q', 4, struct rio_interface)
+#define RIO_CONFIG _IOWR('Q', 6, struct rio_conf)
+#define RIO_GETSTATS _IOWR('Q', 12, struct rio_stats)
+#define RIO_SETDEFAULTS _IOW('Q', 30, struct redparams[RIO_NDROPPREC])
+#endif /* ALTQ3_COMPAT */
+
+#ifdef _KERNEL
+
+typedef struct rio {
+ /* per drop precedence structure */
+ struct dropprec_state {
+ /* red parameters */
+ int inv_pmax; /* inverse of max drop probability */
+ int th_min; /* red min threshold */
+ int th_max; /* red max threshold */
+
+ /* variables for internal use */
+ int th_min_s; /* th_min scaled by avgshift */
+ int th_max_s; /* th_max scaled by avgshift */
+ int probd; /* drop probability denominator */
+
+ int qlen; /* queue length */
+ int avg; /* (scaled) queue length average */
+ int count; /* packet count since the last dropped/
+ marked packet */
+ int idle; /* queue was empty */
+ int old; /* avg is above th_min */
+ struct timeval last; /* timestamp when queue becomes idle */
+ } rio_precstate[RIO_NDROPPREC];
+
+ int rio_wshift; /* log(red_weight) */
+ int rio_weight; /* weight for EWMA */
+ struct wtab *rio_wtab; /* weight table */
+
+ int rio_pkttime; /* average packet time in micro sec
+ used for idle calibration */
+ int rio_flags; /* rio flags */
+
+ u_int8_t rio_codepoint; /* codepoint value to tag packets */
+ u_int8_t rio_codepointmask; /* codepoint mask bits */
+
+ struct redstats q_stats[RIO_NDROPPREC]; /* statistics */
+} rio_t;
+
+#ifdef ALTQ3_COMPAT
+typedef struct rio_queue {
+ struct rio_queue *rq_next; /* next red_state in the list */
+ struct ifaltq *rq_ifq; /* backpointer to ifaltq */
+
+ class_queue_t *rq_q;
+
+ rio_t *rq_rio;
+} rio_queue_t;
+#endif /* ALTQ3_COMPAT */
+
+extern rio_t *rio_alloc(int, struct redparams *, int, int);
+extern void rio_destroy(rio_t *);
+extern void rio_getstats(rio_t *, struct redstats *);
+extern int rio_addq(rio_t *, class_queue_t *, struct mbuf *,
+ struct altq_pktattr *);
+extern struct mbuf *rio_getq(rio_t *, class_queue_t *);
+
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_ALTQ_RIO_H_ */
diff --git a/freebsd/sys/net/altq/altq_rmclass.c b/freebsd/sys/net/altq/altq_rmclass.c
new file mode 100644
index 00000000..160884e2
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_rmclass.c
@@ -0,0 +1,1841 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 1991-1997 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the Network Research
+ * Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * LBL code modified by speer@eng.sun.com, May 1977.
+ * For questions and/or comments, please send mail to cbq@ee.lbl.gov
+ *
+ * @(#)rm_class.c 1.48 97/12/05 SMI
+ * $KAME: altq_rmclass.c,v 1.19 2005/04/13 03:44:25 suz Exp $
+ * $FreeBSD$
+ */
+#include <rtems/bsd/local/opt_altq.h>
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+#ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+#include <rtems/bsd/sys/errno.h>
+#include <sys/time.h>
+#ifdef ALTQ3_COMPAT
+#include <sys/kernel.h>
+#endif
+
+#include <net/if.h>
+#include <net/if_var.h>
+#ifdef ALTQ3_COMPAT
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#endif
+
+#include <net/altq/if_altq.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_codel.h>
+#include <net/altq/altq_rmclass.h>
+#include <net/altq/altq_rmclass_debug.h>
+#include <net/altq/altq_red.h>
+#include <net/altq/altq_rio.h>
+
+/*
+ * Local Macros
+ */
+
+#define reset_cutoff(ifd) { ifd->cutoff_ = RM_MAXDEPTH; }
+
+/*
+ * Local routines.
+ */
+
+static int rmc_satisfied(struct rm_class *, struct timeval *);
+static void rmc_wrr_set_weights(struct rm_ifdat *);
+static void rmc_depth_compute(struct rm_class *);
+static void rmc_depth_recompute(rm_class_t *);
+
+static mbuf_t *_rmc_wrr_dequeue_next(struct rm_ifdat *, int);
+static mbuf_t *_rmc_prr_dequeue_next(struct rm_ifdat *, int);
+
+static int _rmc_addq(rm_class_t *, mbuf_t *);
+static void _rmc_dropq(rm_class_t *);
+static mbuf_t *_rmc_getq(rm_class_t *);
+static mbuf_t *_rmc_pollq(rm_class_t *);
+
+static int rmc_under_limit(struct rm_class *, struct timeval *);
+static void rmc_tl_satisfied(struct rm_ifdat *, struct timeval *);
+static void rmc_drop_action(struct rm_class *);
+static void rmc_restart(struct rm_class *);
+static void rmc_root_overlimit(struct rm_class *, struct rm_class *);
+
+#define BORROW_OFFTIME
+/*
+ * BORROW_OFFTIME (experimental):
+ * borrow the offtime of the class borrowing from.
+ * the reason is that when its own offtime is set, the class is unable
+ * to borrow much, especially when cutoff is taking effect.
+ * but when the borrowed class is overloaded (advidle is close to minidle),
+ * use the borrowing class's offtime to avoid overload.
+ */
+#define ADJUST_CUTOFF
+/*
+ * ADJUST_CUTOFF (experimental):
+ * if no underlimit class is found due to cutoff, increase cutoff and
+ * retry the scheduling loop.
+ * also, don't invoke delay_actions while cutoff is taking effect,
+ * since a sleeping class won't have a chance to be scheduled in the
+ * next loop.
+ *
+ * now heuristics for setting the top-level variable (cutoff_) becomes:
+ * 1. if a packet arrives for a not-overlimit class, set cutoff
+ * to the depth of the class.
+ * 2. if cutoff is i, and a packet arrives for an overlimit class
+ * with an underlimit ancestor at a lower level than i (say j),
+ * then set cutoff to j.
+ * 3. at scheduling a packet, if there is no underlimit class
+ * due to the current cutoff level, increase cutoff by 1 and
+ * then try to schedule again.
+ */
+
+/*
+ * rm_class_t *
+ * rmc_newclass(...) - Create a new resource management class at priority
+ * 'pri' on the interface given by 'ifd'.
+ *
+ * nsecPerByte is the data rate of the interface in nanoseconds/byte.
+ * E.g., 800 for a 10Mb/s ethernet. If the class gets less
+ * than 100% of the bandwidth, this number should be the
+ * 'effective' rate for the class. Let f be the
+ * bandwidth fraction allocated to this class, and let
+ * nsPerByte be the data rate of the output link in
+ * nanoseconds/byte. Then nsecPerByte is set to
+ * nsPerByte / f. E.g., 1600 (= 800 / .5)
+ * for a class that gets 50% of an ethernet's bandwidth.
+ *
+ * action the routine to call when the class is over limit.
+ *
+ * maxq max allowable queue size for class (in packets).
+ *
+ * parent parent class pointer.
+ *
+ * borrow class to borrow from (should be either 'parent' or null).
+ *
+ * maxidle max value allowed for class 'idle' time estimate (this
+ * parameter determines how large an initial burst of packets
+ * can be before overlimit action is invoked.
+ *
+ * offtime how long 'delay' action will delay when class goes over
+ * limit (this parameter determines the steady-state burst
+ * size when a class is running over its limit).
+ *
+ * Maxidle and offtime have to be computed from the following: If the
+ * average packet size is s, the bandwidth fraction allocated to this
+ * class is f, we want to allow b packet bursts, and the gain of the
+ * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then:
+ *
+ * ptime = s * nsPerByte * (1 - f) / f
+ * maxidle = ptime * (1 - g^b) / g^b
+ * minidle = -ptime * (1 / (f - 1))
+ * offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1)
+ *
+ * Operationally, it's convenient to specify maxidle & offtime in units
+ * independent of the link bandwidth so the maxidle & offtime passed to
+ * this routine are the above values multiplied by 8*f/(1000*nsPerByte).
+ * (The constant factor is a scale factor needed to make the parameters
+ * integers. This scaling also means that the 'unscaled' values of
+ * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds,
+ * not nanoseconds.) Also note that the 'idle' filter computation keeps
+ * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of
+ * maxidle also must be scaled upward by this value. Thus, the passed
+ * values for maxidle and offtime can be computed as follows:
+ *
+ * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte)
+ * offtime = offtime * 8 / (1000 * nsecPerByte)
+ *
+ * When USE_HRTIME is employed, then maxidle and offtime become:
+ * maxidle = maxilde * (8.0 / nsecPerByte);
+ * offtime = offtime * (8.0 / nsecPerByte);
+ */
+struct rm_class *
+rmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte,
+ void (*action)(rm_class_t *, rm_class_t *), int maxq,
+ struct rm_class *parent, struct rm_class *borrow, u_int maxidle,
+ int minidle, u_int offtime, int pktsize, int flags)
+{
+ struct rm_class *cl;
+ struct rm_class *peer;
+ int s;
+
+ if (pri >= RM_MAXPRIO)
+ return (NULL);
+#ifndef ALTQ_RED
+ if (flags & RMCF_RED) {
+#ifdef ALTQ_DEBUG
+ printf("rmc_newclass: RED not configured for CBQ!\n");
+#endif
+ return (NULL);
+ }
+#endif
+#ifndef ALTQ_RIO
+ if (flags & RMCF_RIO) {
+#ifdef ALTQ_DEBUG
+ printf("rmc_newclass: RIO not configured for CBQ!\n");
+#endif
+ return (NULL);
+ }
+#endif
+#ifndef ALTQ_CODEL
+ if (flags & RMCF_CODEL) {
+#ifdef ALTQ_DEBUG
+ printf("rmc_newclass: CODEL not configured for CBQ!\n");
+#endif
+ return (NULL);
+ }
+#endif
+
+ cl = malloc(sizeof(struct rm_class), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (cl == NULL)
+ return (NULL);
+ CALLOUT_INIT(&cl->callout_);
+ cl->q_ = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (cl->q_ == NULL) {
+ free(cl, M_DEVBUF);
+ return (NULL);
+ }
+
+ /*
+ * Class initialization.
+ */
+ cl->children_ = NULL;
+ cl->parent_ = parent;
+ cl->borrow_ = borrow;
+ cl->leaf_ = 1;
+ cl->ifdat_ = ifd;
+ cl->pri_ = pri;
+ cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
+ cl->depth_ = 0;
+ cl->qthresh_ = 0;
+ cl->ns_per_byte_ = nsecPerByte;
+
+ qlimit(cl->q_) = maxq;
+ qtype(cl->q_) = Q_DROPHEAD;
+ qlen(cl->q_) = 0;
+ cl->flags_ = flags;
+
+#if 1 /* minidle is also scaled in ALTQ */
+ cl->minidle_ = (minidle * (int)nsecPerByte) / 8;
+ if (cl->minidle_ > 0)
+ cl->minidle_ = 0;
+#else
+ cl->minidle_ = minidle;
+#endif
+ cl->maxidle_ = (maxidle * nsecPerByte) / 8;
+ if (cl->maxidle_ == 0)
+ cl->maxidle_ = 1;
+#if 1 /* offtime is also scaled in ALTQ */
+ cl->avgidle_ = cl->maxidle_;
+ cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
+ if (cl->offtime_ == 0)
+ cl->offtime_ = 1;
+#else
+ cl->avgidle_ = 0;
+ cl->offtime_ = (offtime * nsecPerByte) / 8;
+#endif
+ cl->overlimit = action;
+
+#ifdef ALTQ_RED
+ if (flags & (RMCF_RED|RMCF_RIO)) {
+ int red_flags, red_pkttime;
+
+ red_flags = 0;
+ if (flags & RMCF_ECN)
+ red_flags |= REDF_ECN;
+ if (flags & RMCF_FLOWVALVE)
+ red_flags |= REDF_FLOWVALVE;
+#ifdef ALTQ_RIO
+ if (flags & RMCF_CLEARDSCP)
+ red_flags |= RIOF_CLEARDSCP;
+#endif
+ red_pkttime = nsecPerByte * pktsize / 1000;
+
+ if (flags & RMCF_RED) {
+ cl->red_ = red_alloc(0, 0,
+ qlimit(cl->q_) * 10/100,
+ qlimit(cl->q_) * 30/100,
+ red_flags, red_pkttime);
+ if (cl->red_ != NULL)
+ qtype(cl->q_) = Q_RED;
+ }
+#ifdef ALTQ_RIO
+ else {
+ cl->red_ = (red_t *)rio_alloc(0, NULL,
+ red_flags, red_pkttime);
+ if (cl->red_ != NULL)
+ qtype(cl->q_) = Q_RIO;
+ }
+#endif
+ }
+#endif /* ALTQ_RED */
+#ifdef ALTQ_CODEL
+ if (flags & RMCF_CODEL) {
+ cl->codel_ = codel_alloc(5, 100, 0);
+ if (cl->codel_ != NULL)
+ qtype(cl->q_) = Q_CODEL;
+ }
+#endif
+
+ /*
+ * put the class into the class tree
+ */
+ s = splnet();
+ IFQ_LOCK(ifd->ifq_);
+ if ((peer = ifd->active_[pri]) != NULL) {
+ /* find the last class at this pri */
+ cl->peer_ = peer;
+ while (peer->peer_ != ifd->active_[pri])
+ peer = peer->peer_;
+ peer->peer_ = cl;
+ } else {
+ ifd->active_[pri] = cl;
+ cl->peer_ = cl;
+ }
+
+ if (cl->parent_) {
+ cl->next_ = parent->children_;
+ parent->children_ = cl;
+ parent->leaf_ = 0;
+ }
+
+ /*
+ * Compute the depth of this class and its ancestors in the class
+ * hierarchy.
+ */
+ rmc_depth_compute(cl);
+
+ /*
+ * If CBQ's WRR is enabled, then initialize the class WRR state.
+ */
+ if (ifd->wrr_) {
+ ifd->num_[pri]++;
+ ifd->alloc_[pri] += cl->allotment_;
+ rmc_wrr_set_weights(ifd);
+ }
+ IFQ_UNLOCK(ifd->ifq_);
+ splx(s);
+ return (cl);
+}
+
+int
+rmc_modclass(struct rm_class *cl, u_int nsecPerByte, int maxq, u_int maxidle,
+ int minidle, u_int offtime, int pktsize)
+{
+ struct rm_ifdat *ifd;
+ u_int old_allotment;
+ int s;
+
+ ifd = cl->ifdat_;
+ old_allotment = cl->allotment_;
+
+ s = splnet();
+ IFQ_LOCK(ifd->ifq_);
+ cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
+ cl->qthresh_ = 0;
+ cl->ns_per_byte_ = nsecPerByte;
+
+ qlimit(cl->q_) = maxq;
+
+#if 1 /* minidle is also scaled in ALTQ */
+ cl->minidle_ = (minidle * nsecPerByte) / 8;
+ if (cl->minidle_ > 0)
+ cl->minidle_ = 0;
+#else
+ cl->minidle_ = minidle;
+#endif
+ cl->maxidle_ = (maxidle * nsecPerByte) / 8;
+ if (cl->maxidle_ == 0)
+ cl->maxidle_ = 1;
+#if 1 /* offtime is also scaled in ALTQ */
+ cl->avgidle_ = cl->maxidle_;
+ cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
+ if (cl->offtime_ == 0)
+ cl->offtime_ = 1;
+#else
+ cl->avgidle_ = 0;
+ cl->offtime_ = (offtime * nsecPerByte) / 8;
+#endif
+
+ /*
+ * If CBQ's WRR is enabled, then initialize the class WRR state.
+ */
+ if (ifd->wrr_) {
+ ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment;
+ rmc_wrr_set_weights(ifd);
+ }
+ IFQ_UNLOCK(ifd->ifq_);
+ splx(s);
+ return (0);
+}
+
+/*
+ * static void
+ * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes
+ * the appropriate run robin weights for the CBQ weighted round robin
+ * algorithm.
+ *
+ * Returns: NONE
+ */
+
+static void
+rmc_wrr_set_weights(struct rm_ifdat *ifd)
+{
+ int i;
+ struct rm_class *cl, *clh;
+
+ for (i = 0; i < RM_MAXPRIO; i++) {
+ /*
+ * This is inverted from that of the simulator to
+ * maintain precision.
+ */
+ if (ifd->num_[i] == 0)
+ ifd->M_[i] = 0;
+ else
+ ifd->M_[i] = ifd->alloc_[i] /
+ (ifd->num_[i] * ifd->maxpkt_);
+ /*
+ * Compute the weighted allotment for each class.
+ * This takes the expensive div instruction out
+ * of the main loop for the wrr scheduling path.
+ * These only get recomputed when a class comes or
+ * goes.
+ */
+ if (ifd->active_[i] != NULL) {
+ clh = cl = ifd->active_[i];
+ do {
+ /* safe-guard for slow link or alloc_ == 0 */
+ if (ifd->M_[i] == 0)
+ cl->w_allotment_ = 0;
+ else
+ cl->w_allotment_ = cl->allotment_ /
+ ifd->M_[i];
+ cl = cl->peer_;
+ } while ((cl != NULL) && (cl != clh));
+ }
+ }
+}
+
+int
+rmc_get_weight(struct rm_ifdat *ifd, int pri)
+{
+ if ((pri >= 0) && (pri < RM_MAXPRIO))
+ return (ifd->M_[pri]);
+ else
+ return (0);
+}
+
+/*
+ * static void
+ * rmc_depth_compute(struct rm_class *cl) - This function computes the
+ * appropriate depth of class 'cl' and its ancestors.
+ *
+ * Returns: NONE
+ */
+
+static void
+rmc_depth_compute(struct rm_class *cl)
+{
+ rm_class_t *t = cl, *p;
+
+ /*
+ * Recompute the depth for the branch of the tree.
+ */
+ while (t != NULL) {
+ p = t->parent_;
+ if (p && (t->depth_ >= p->depth_)) {
+ p->depth_ = t->depth_ + 1;
+ t = p;
+ } else
+ t = NULL;
+ }
+}
+
+/*
+ * static void
+ * rmc_depth_recompute(struct rm_class *cl) - This function re-computes
+ * the depth of the tree after a class has been deleted.
+ *
+ * Returns: NONE
+ */
+
+static void
+rmc_depth_recompute(rm_class_t *cl)
+{
+#if 1 /* ALTQ */
+ rm_class_t *p, *t;
+
+ p = cl;
+ while (p != NULL) {
+ if ((t = p->children_) == NULL) {
+ p->depth_ = 0;
+ } else {
+ int cdepth = 0;
+
+ while (t != NULL) {
+ if (t->depth_ > cdepth)
+ cdepth = t->depth_;
+ t = t->next_;
+ }
+
+ if (p->depth_ == cdepth + 1)
+ /* no change to this parent */
+ return;
+
+ p->depth_ = cdepth + 1;
+ }
+
+ p = p->parent_;
+ }
+#else
+ rm_class_t *t;
+
+ if (cl->depth_ >= 1) {
+ if (cl->children_ == NULL) {
+ cl->depth_ = 0;
+ } else if ((t = cl->children_) != NULL) {
+ while (t != NULL) {
+ if (t->children_ != NULL)
+ rmc_depth_recompute(t);
+ t = t->next_;
+ }
+ } else
+ rmc_depth_compute(cl);
+ }
+#endif
+}
+
+/*
+ * void
+ * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This
+ * function deletes a class from the link-sharing structure and frees
+ * all resources associated with the class.
+ *
+ * Returns: NONE
+ */
+
+void
+rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl)
+{
+ struct rm_class *p, *head, *previous;
+ int s;
+
+ ASSERT(cl->children_ == NULL);
+
+ if (cl->sleeping_)
+ CALLOUT_STOP(&cl->callout_);
+
+ s = splnet();
+ IFQ_LOCK(ifd->ifq_);
+ /*
+ * Free packets in the packet queue.
+ * XXX - this may not be a desired behavior. Packets should be
+ * re-queued.
+ */
+ rmc_dropall(cl);
+
+ /*
+ * If the class has a parent, then remove the class from the
+ * class from the parent's children chain.
+ */
+ if (cl->parent_ != NULL) {
+ head = cl->parent_->children_;
+ p = previous = head;
+ if (head->next_ == NULL) {
+ ASSERT(head == cl);
+ cl->parent_->children_ = NULL;
+ cl->parent_->leaf_ = 1;
+ } else while (p != NULL) {
+ if (p == cl) {
+ if (cl == head)
+ cl->parent_->children_ = cl->next_;
+ else
+ previous->next_ = cl->next_;
+ cl->next_ = NULL;
+ p = NULL;
+ } else {
+ previous = p;
+ p = p->next_;
+ }
+ }
+ }
+
+ /*
+ * Delete class from class priority peer list.
+ */
+ if ((p = ifd->active_[cl->pri_]) != NULL) {
+ /*
+ * If there is more than one member of this priority
+ * level, then look for class(cl) in the priority level.
+ */
+ if (p != p->peer_) {
+ while (p->peer_ != cl)
+ p = p->peer_;
+ p->peer_ = cl->peer_;
+
+ if (ifd->active_[cl->pri_] == cl)
+ ifd->active_[cl->pri_] = cl->peer_;
+ } else {
+ ASSERT(p == cl);
+ ifd->active_[cl->pri_] = NULL;
+ }
+ }
+
+ /*
+ * Recompute the WRR weights.
+ */
+ if (ifd->wrr_) {
+ ifd->alloc_[cl->pri_] -= cl->allotment_;
+ ifd->num_[cl->pri_]--;
+ rmc_wrr_set_weights(ifd);
+ }
+
+ /*
+ * Re-compute the depth of the tree.
+ */
+#if 1 /* ALTQ */
+ rmc_depth_recompute(cl->parent_);
+#else
+ rmc_depth_recompute(ifd->root_);
+#endif
+
+ IFQ_UNLOCK(ifd->ifq_);
+ splx(s);
+
+ /*
+ * Free the class structure.
+ */
+ if (cl->red_ != NULL) {
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->q_))
+ rio_destroy((rio_t *)cl->red_);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->q_))
+ red_destroy(cl->red_);
+#endif
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->q_))
+ codel_destroy(cl->codel_);
+#endif
+ }
+ free(cl->q_, M_DEVBUF);
+ free(cl, M_DEVBUF);
+}
+
+
+/*
+ * void
+ * rmc_init(...) - Initialize the resource management data structures
+ * associated with the output portion of interface 'ifp'. 'ifd' is
+ * where the structures will be built (for backwards compatibility, the
+ * structures aren't kept in the ifnet struct). 'nsecPerByte'
+ * gives the link speed (inverse of bandwidth) in nanoseconds/byte.
+ * 'restart' is the driver-specific routine that the generic 'delay
+ * until under limit' action will call to restart output. `maxq'
+ * is the queue size of the 'link' & 'default' classes. 'maxqueued'
+ * is the maximum number of packets that the resource management
+ * code will allow to be queued 'downstream' (this is typically 1).
+ *
+ * Returns: NONE
+ */
+
+void
+rmc_init(struct ifaltq *ifq, struct rm_ifdat *ifd, u_int nsecPerByte,
+ void (*restart)(struct ifaltq *), int maxq, int maxqueued, u_int maxidle,
+ int minidle, u_int offtime, int flags)
+{
+ int i, mtu;
+
+ /*
+ * Initialize the CBQ tracing/debug facility.
+ */
+ CBQTRACEINIT();
+
+ bzero((char *)ifd, sizeof (*ifd));
+ mtu = ifq->altq_ifp->if_mtu;
+ ifd->ifq_ = ifq;
+ ifd->restart = restart;
+ ifd->maxqueued_ = maxqueued;
+ ifd->ns_per_byte_ = nsecPerByte;
+ ifd->maxpkt_ = mtu;
+ ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0;
+ ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0;
+#if 1
+ ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16;
+ if (mtu * nsecPerByte > 10 * 1000000)
+ ifd->maxiftime_ /= 4;
+#endif
+
+ reset_cutoff(ifd);
+ CBQTRACE(rmc_init, 'INIT', ifd->cutoff_);
+
+ /*
+ * Initialize the CBQ's WRR state.
+ */
+ for (i = 0; i < RM_MAXPRIO; i++) {
+ ifd->alloc_[i] = 0;
+ ifd->M_[i] = 0;
+ ifd->num_[i] = 0;
+ ifd->na_[i] = 0;
+ ifd->active_[i] = NULL;
+ }
+
+ /*
+ * Initialize current packet state.
+ */
+ ifd->qi_ = 0;
+ ifd->qo_ = 0;
+ for (i = 0; i < RM_MAXQUEUED; i++) {
+ ifd->class_[i] = NULL;
+ ifd->curlen_[i] = 0;
+ ifd->borrowed_[i] = NULL;
+ }
+
+ /*
+ * Create the root class of the link-sharing structure.
+ */
+ if ((ifd->root_ = rmc_newclass(0, ifd,
+ nsecPerByte,
+ rmc_root_overlimit, maxq, 0, 0,
+ maxidle, minidle, offtime,
+ 0, 0)) == NULL) {
+ printf("rmc_init: root class not allocated\n");
+ return ;
+ }
+ ifd->root_->depth_ = 0;
+}
+
+/*
+ * void
+ * rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by
+ * mbuf 'm' to queue for resource class 'cl'. This routine is called
+ * by a driver's if_output routine. This routine must be called with
+ * output packet completion interrupts locked out (to avoid racing with
+ * rmc_dequeue_next).
+ *
+ * Returns: 0 on successful queueing
+ * -1 when packet drop occurs
+ */
+int
+rmc_queue_packet(struct rm_class *cl, mbuf_t *m)
+{
+ struct timeval now;
+ struct rm_ifdat *ifd = cl->ifdat_;
+ int cpri = cl->pri_;
+ int is_empty = qempty(cl->q_);
+
+ RM_GETTIME(now);
+ if (ifd->cutoff_ > 0) {
+ if (TV_LT(&cl->undertime_, &now)) {
+ if (ifd->cutoff_ > cl->depth_)
+ ifd->cutoff_ = cl->depth_;
+ CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_);
+ }
+#if 1 /* ALTQ */
+ else {
+ /*
+ * the class is overlimit. if the class has
+ * underlimit ancestors, set cutoff to the lowest
+ * depth among them.
+ */
+ struct rm_class *borrow = cl->borrow_;
+
+ while (borrow != NULL &&
+ borrow->depth_ < ifd->cutoff_) {
+ if (TV_LT(&borrow->undertime_, &now)) {
+ ifd->cutoff_ = borrow->depth_;
+ CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_);
+ break;
+ }
+ borrow = borrow->borrow_;
+ }
+ }
+#else /* !ALTQ */
+ else if ((ifd->cutoff_ > 1) && cl->borrow_) {
+ if (TV_LT(&cl->borrow_->undertime_, &now)) {
+ ifd->cutoff_ = cl->borrow_->depth_;
+ CBQTRACE(rmc_queue_packet, 'ffob',
+ cl->borrow_->depth_);
+ }
+ }
+#endif /* !ALTQ */
+ }
+
+ if (_rmc_addq(cl, m) < 0)
+ /* failed */
+ return (-1);
+
+ if (is_empty) {
+ CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle);
+ ifd->na_[cpri]++;
+ }
+
+ if (qlen(cl->q_) > qlimit(cl->q_)) {
+ /* note: qlimit can be set to 0 or 1 */
+ rmc_drop_action(cl);
+ return (-1);
+ }
+ return (0);
+}
+
+/*
+ * void
+ * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all
+ * classes to see if there are satified.
+ */
+
+static void
+rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now)
+{
+ int i;
+ rm_class_t *p, *bp;
+
+ for (i = RM_MAXPRIO - 1; i >= 0; i--) {
+ if ((bp = ifd->active_[i]) != NULL) {
+ p = bp;
+ do {
+ if (!rmc_satisfied(p, now)) {
+ ifd->cutoff_ = p->depth_;
+ return;
+ }
+ p = p->peer_;
+ } while (p != bp);
+ }
+ }
+
+ reset_cutoff(ifd);
+}
+
+/*
+ * rmc_satisfied - Return 1 of the class is satisfied. O, otherwise.
+ */
+
+static int
+rmc_satisfied(struct rm_class *cl, struct timeval *now)
+{
+ rm_class_t *p;
+
+ if (cl == NULL)
+ return (1);
+ if (TV_LT(now, &cl->undertime_))
+ return (1);
+ if (cl->depth_ == 0) {
+ if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_))
+ return (0);
+ else
+ return (1);
+ }
+ if (cl->children_ != NULL) {
+ p = cl->children_;
+ while (p != NULL) {
+ if (!rmc_satisfied(p, now))
+ return (0);
+ p = p->next_;
+ }
+ }
+
+ return (1);
+}
+
+/*
+ * Return 1 if class 'cl' is under limit or can borrow from a parent,
+ * 0 if overlimit. As a side-effect, this routine will invoke the
+ * class overlimit action if the class if overlimit.
+ */
+
+static int
+rmc_under_limit(struct rm_class *cl, struct timeval *now)
+{
+ rm_class_t *p = cl;
+ rm_class_t *top;
+ struct rm_ifdat *ifd = cl->ifdat_;
+
+ ifd->borrowed_[ifd->qi_] = NULL;
+ /*
+ * If cl is the root class, then always return that it is
+ * underlimit. Otherwise, check to see if the class is underlimit.
+ */
+ if (cl->parent_ == NULL)
+ return (1);
+
+ if (cl->sleeping_) {
+ if (TV_LT(now, &cl->undertime_))
+ return (0);
+
+ CALLOUT_STOP(&cl->callout_);
+ cl->sleeping_ = 0;
+ cl->undertime_.tv_sec = 0;
+ return (1);
+ }
+
+ top = NULL;
+ while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) {
+ if (((cl = cl->borrow_) == NULL) ||
+ (cl->depth_ > ifd->cutoff_)) {
+#ifdef ADJUST_CUTOFF
+ if (cl != NULL)
+ /* cutoff is taking effect, just
+ return false without calling
+ the delay action. */
+ return (0);
+#endif
+#ifdef BORROW_OFFTIME
+ /*
+ * check if the class can borrow offtime too.
+ * borrow offtime from the top of the borrow
+ * chain if the top class is not overloaded.
+ */
+ if (cl != NULL) {
+ /* cutoff is taking effect, use this class as top. */
+ top = cl;
+ CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_);
+ }
+ if (top != NULL && top->avgidle_ == top->minidle_)
+ top = NULL;
+ p->overtime_ = *now;
+ (p->overlimit)(p, top);
+#else
+ p->overtime_ = *now;
+ (p->overlimit)(p, NULL);
+#endif
+ return (0);
+ }
+ top = cl;
+ }
+
+ if (cl != p)
+ ifd->borrowed_[ifd->qi_] = cl;
+ return (1);
+}
+
+/*
+ * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to
+ * Packet-by-packet round robin.
+ *
+ * The heart of the weighted round-robin scheduler, which decides which
+ * class next gets to send a packet. Highest priority first, then
+ * weighted round-robin within priorites.
+ *
+ * Each able-to-send class gets to send until its byte allocation is
+ * exhausted. Thus, the active pointer is only changed after a class has
+ * exhausted its allocation.
+ *
+ * If the scheduler finds no class that is underlimit or able to borrow,
+ * then the first class found that had a nonzero queue and is allowed to
+ * borrow gets to send.
+ */
+
+static mbuf_t *
+_rmc_wrr_dequeue_next(struct rm_ifdat *ifd, int op)
+{
+ struct rm_class *cl = NULL, *first = NULL;
+ u_int deficit;
+ int cpri;
+ mbuf_t *m;
+ struct timeval now;
+
+ RM_GETTIME(now);
+
+ /*
+ * if the driver polls the top of the queue and then removes
+ * the polled packet, we must return the same packet.
+ */
+ if (op == ALTDQ_REMOVE && ifd->pollcache_) {
+ cl = ifd->pollcache_;
+ cpri = cl->pri_;
+ if (ifd->efficient_) {
+ /* check if this class is overlimit */
+ if (cl->undertime_.tv_sec != 0 &&
+ rmc_under_limit(cl, &now) == 0)
+ first = cl;
+ }
+ ifd->pollcache_ = NULL;
+ goto _wrr_out;
+ }
+ else {
+ /* mode == ALTDQ_POLL || pollcache == NULL */
+ ifd->pollcache_ = NULL;
+ ifd->borrowed_[ifd->qi_] = NULL;
+ }
+#ifdef ADJUST_CUTOFF
+ _again:
+#endif
+ for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
+ if (ifd->na_[cpri] == 0)
+ continue;
+ deficit = 0;
+ /*
+ * Loop through twice for a priority level, if some class
+ * was unable to send a packet the first round because
+ * of the weighted round-robin mechanism.
+ * During the second loop at this level, deficit==2.
+ * (This second loop is not needed if for every class,
+ * "M[cl->pri_])" times "cl->allotment" is greater than
+ * the byte size for the largest packet in the class.)
+ */
+ _wrr_loop:
+ cl = ifd->active_[cpri];
+ ASSERT(cl != NULL);
+ do {
+ if ((deficit < 2) && (cl->bytes_alloc_ <= 0))
+ cl->bytes_alloc_ += cl->w_allotment_;
+ if (!qempty(cl->q_)) {
+ if ((cl->undertime_.tv_sec == 0) ||
+ rmc_under_limit(cl, &now)) {
+ if (cl->bytes_alloc_ > 0 || deficit > 1)
+ goto _wrr_out;
+
+ /* underlimit but no alloc */
+ deficit = 1;
+#if 1
+ ifd->borrowed_[ifd->qi_] = NULL;
+#endif
+ }
+ else if (first == NULL && cl->borrow_ != NULL)
+ first = cl; /* borrowing candidate */
+ }
+
+ cl->bytes_alloc_ = 0;
+ cl = cl->peer_;
+ } while (cl != ifd->active_[cpri]);
+
+ if (deficit == 1) {
+ /* first loop found an underlimit class with deficit */
+ /* Loop on same priority level, with new deficit. */
+ deficit = 2;
+ goto _wrr_loop;
+ }
+ }
+
+#ifdef ADJUST_CUTOFF
+ /*
+ * no underlimit class found. if cutoff is taking effect,
+ * increase cutoff and try again.
+ */
+ if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
+ ifd->cutoff_++;
+ CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_);
+ goto _again;
+ }
+#endif /* ADJUST_CUTOFF */
+ /*
+ * If LINK_EFFICIENCY is turned on, then the first overlimit
+ * class we encounter will send a packet if all the classes
+ * of the link-sharing structure are overlimit.
+ */
+ reset_cutoff(ifd);
+ CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_);
+
+ if (!ifd->efficient_ || first == NULL)
+ return (NULL);
+
+ cl = first;
+ cpri = cl->pri_;
+#if 0 /* too time-consuming for nothing */
+ if (cl->sleeping_)
+ CALLOUT_STOP(&cl->callout_);
+ cl->sleeping_ = 0;
+ cl->undertime_.tv_sec = 0;
+#endif
+ ifd->borrowed_[ifd->qi_] = cl->borrow_;
+ ifd->cutoff_ = cl->borrow_->depth_;
+
+ /*
+ * Deque the packet and do the book keeping...
+ */
+ _wrr_out:
+ if (op == ALTDQ_REMOVE) {
+ m = _rmc_getq(cl);
+ if (m == NULL)
+ panic("_rmc_wrr_dequeue_next");
+ if (qempty(cl->q_))
+ ifd->na_[cpri]--;
+
+ /*
+ * Update class statistics and link data.
+ */
+ if (cl->bytes_alloc_ > 0)
+ cl->bytes_alloc_ -= m_pktlen(m);
+
+ if ((cl->bytes_alloc_ <= 0) || first == cl)
+ ifd->active_[cl->pri_] = cl->peer_;
+ else
+ ifd->active_[cl->pri_] = cl;
+
+ ifd->class_[ifd->qi_] = cl;
+ ifd->curlen_[ifd->qi_] = m_pktlen(m);
+ ifd->now_[ifd->qi_] = now;
+ ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
+ ifd->queued_++;
+ } else {
+ /* mode == ALTDQ_PPOLL */
+ m = _rmc_pollq(cl);
+ ifd->pollcache_ = cl;
+ }
+ return (m);
+}
+
+/*
+ * Dequeue & return next packet from the highest priority class that
+ * has a packet to send & has enough allocation to send it. This
+ * routine is called by a driver whenever it needs a new packet to
+ * output.
+ */
+static mbuf_t *
+_rmc_prr_dequeue_next(struct rm_ifdat *ifd, int op)
+{
+ mbuf_t *m;
+ int cpri;
+ struct rm_class *cl, *first = NULL;
+ struct timeval now;
+
+ RM_GETTIME(now);
+
+ /*
+ * if the driver polls the top of the queue and then removes
+ * the polled packet, we must return the same packet.
+ */
+ if (op == ALTDQ_REMOVE && ifd->pollcache_) {
+ cl = ifd->pollcache_;
+ cpri = cl->pri_;
+ ifd->pollcache_ = NULL;
+ goto _prr_out;
+ } else {
+ /* mode == ALTDQ_POLL || pollcache == NULL */
+ ifd->pollcache_ = NULL;
+ ifd->borrowed_[ifd->qi_] = NULL;
+ }
+#ifdef ADJUST_CUTOFF
+ _again:
+#endif
+ for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
+ if (ifd->na_[cpri] == 0)
+ continue;
+ cl = ifd->active_[cpri];
+ ASSERT(cl != NULL);
+ do {
+ if (!qempty(cl->q_)) {
+ if ((cl->undertime_.tv_sec == 0) ||
+ rmc_under_limit(cl, &now))
+ goto _prr_out;
+ if (first == NULL && cl->borrow_ != NULL)
+ first = cl;
+ }
+ cl = cl->peer_;
+ } while (cl != ifd->active_[cpri]);
+ }
+
+#ifdef ADJUST_CUTOFF
+ /*
+ * no underlimit class found. if cutoff is taking effect, increase
+ * cutoff and try again.
+ */
+ if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
+ ifd->cutoff_++;
+ goto _again;
+ }
+#endif /* ADJUST_CUTOFF */
+ /*
+ * If LINK_EFFICIENCY is turned on, then the first overlimit
+ * class we encounter will send a packet if all the classes
+ * of the link-sharing structure are overlimit.
+ */
+ reset_cutoff(ifd);
+ if (!ifd->efficient_ || first == NULL)
+ return (NULL);
+
+ cl = first;
+ cpri = cl->pri_;
+#if 0 /* too time-consuming for nothing */
+ if (cl->sleeping_)
+ CALLOUT_STOP(&cl->callout_);
+ cl->sleeping_ = 0;
+ cl->undertime_.tv_sec = 0;
+#endif
+ ifd->borrowed_[ifd->qi_] = cl->borrow_;
+ ifd->cutoff_ = cl->borrow_->depth_;
+
+ /*
+ * Deque the packet and do the book keeping...
+ */
+ _prr_out:
+ if (op == ALTDQ_REMOVE) {
+ m = _rmc_getq(cl);
+ if (m == NULL)
+ panic("_rmc_prr_dequeue_next");
+ if (qempty(cl->q_))
+ ifd->na_[cpri]--;
+
+ ifd->active_[cpri] = cl->peer_;
+
+ ifd->class_[ifd->qi_] = cl;
+ ifd->curlen_[ifd->qi_] = m_pktlen(m);
+ ifd->now_[ifd->qi_] = now;
+ ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
+ ifd->queued_++;
+ } else {
+ /* mode == ALTDQ_POLL */
+ m = _rmc_pollq(cl);
+ ifd->pollcache_ = cl;
+ }
+ return (m);
+}
+
+/*
+ * mbuf_t *
+ * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function
+ * is invoked by the packet driver to get the next packet to be
+ * dequeued and output on the link. If WRR is enabled, then the
+ * WRR dequeue next routine will determine the next packet to sent.
+ * Otherwise, packet-by-packet round robin is invoked.
+ *
+ * Returns: NULL, if a packet is not available or if all
+ * classes are overlimit.
+ *
+ * Otherwise, Pointer to the next packet.
+ */
+
+mbuf_t *
+rmc_dequeue_next(struct rm_ifdat *ifd, int mode)
+{
+ if (ifd->queued_ >= ifd->maxqueued_)
+ return (NULL);
+ else if (ifd->wrr_)
+ return (_rmc_wrr_dequeue_next(ifd, mode));
+ else
+ return (_rmc_prr_dequeue_next(ifd, mode));
+}
+
+/*
+ * Update the utilization estimate for the packet that just completed.
+ * The packet's class & the parent(s) of that class all get their
+ * estimators updated. This routine is called by the driver's output-
+ * packet-completion interrupt service routine.
+ */
+
+/*
+ * a macro to approximate "divide by 1000" that gives 0.000999,
+ * if a value has enough effective digits.
+ * (on pentium, mul takes 9 cycles but div takes 46!)
+ */
+#define NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17))
+void
+rmc_update_class_util(struct rm_ifdat *ifd)
+{
+ int idle, avgidle, pktlen;
+ int pkt_time, tidle;
+ rm_class_t *cl, *borrowed;
+ rm_class_t *borrows;
+ struct timeval *nowp;
+
+ /*
+ * Get the most recent completed class.
+ */
+ if ((cl = ifd->class_[ifd->qo_]) == NULL)
+ return;
+
+ pktlen = ifd->curlen_[ifd->qo_];
+ borrowed = ifd->borrowed_[ifd->qo_];
+ borrows = borrowed;
+
+ PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
+
+ /*
+ * Run estimator on class and its ancestors.
+ */
+ /*
+ * rm_update_class_util is designed to be called when the
+ * transfer is completed from a xmit complete interrupt,
+ * but most drivers don't implement an upcall for that.
+ * so, just use estimated completion time.
+ * as a result, ifd->qi_ and ifd->qo_ are always synced.
+ */
+ nowp = &ifd->now_[ifd->qo_];
+ /* get pkt_time (for link) in usec */
+#if 1 /* use approximation */
+ pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_;
+ pkt_time = NSEC_TO_USEC(pkt_time);
+#else
+ pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000;
+#endif
+#if 1 /* ALTQ4PPP */
+ if (TV_LT(nowp, &ifd->ifnow_)) {
+ int iftime;
+
+ /*
+ * make sure the estimated completion time does not go
+ * too far. it can happen when the link layer supports
+ * data compression or the interface speed is set to
+ * a much lower value.
+ */
+ TV_DELTA(&ifd->ifnow_, nowp, iftime);
+ if (iftime+pkt_time < ifd->maxiftime_) {
+ TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
+ } else {
+ TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_);
+ }
+ } else {
+ TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
+ }
+#else
+ if (TV_LT(nowp, &ifd->ifnow_)) {
+ TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
+ } else {
+ TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
+ }
+#endif
+
+ while (cl != NULL) {
+ TV_DELTA(&ifd->ifnow_, &cl->last_, idle);
+ if (idle >= 2000000)
+ /*
+ * this class is idle enough, reset avgidle.
+ * (TV_DELTA returns 2000000 us when delta is large.)
+ */
+ cl->avgidle_ = cl->maxidle_;
+
+ /* get pkt_time (for class) in usec */
+#if 1 /* use approximation */
+ pkt_time = pktlen * cl->ns_per_byte_;
+ pkt_time = NSEC_TO_USEC(pkt_time);
+#else
+ pkt_time = pktlen * cl->ns_per_byte_ / 1000;
+#endif
+ idle -= pkt_time;
+
+ avgidle = cl->avgidle_;
+ avgidle += idle - (avgidle >> RM_FILTER_GAIN);
+ cl->avgidle_ = avgidle;
+
+ /* Are we overlimit ? */
+ if (avgidle <= 0) {
+ CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle);
+#if 1 /* ALTQ */
+ /*
+ * need some lower bound for avgidle, otherwise
+ * a borrowing class gets unbounded penalty.
+ */
+ if (avgidle < cl->minidle_)
+ avgidle = cl->avgidle_ = cl->minidle_;
+#endif
+ /* set next idle to make avgidle 0 */
+ tidle = pkt_time +
+ (((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN);
+ TV_ADD_DELTA(nowp, tidle, &cl->undertime_);
+ ++cl->stats_.over;
+ } else {
+ cl->avgidle_ =
+ (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle;
+ cl->undertime_.tv_sec = 0;
+ if (cl->sleeping_) {
+ CALLOUT_STOP(&cl->callout_);
+ cl->sleeping_ = 0;
+ }
+ }
+
+ if (borrows != NULL) {
+ if (borrows != cl)
+ ++cl->stats_.borrows;
+ else
+ borrows = NULL;
+ }
+ cl->last_ = ifd->ifnow_;
+ cl->last_pkttime_ = pkt_time;
+
+#if 1
+ if (cl->parent_ == NULL) {
+ /* take stats of root class */
+ PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
+ }
+#endif
+
+ cl = cl->parent_;
+ }
+
+ /*
+ * Check to see if cutoff needs to set to a new level.
+ */
+ cl = ifd->class_[ifd->qo_];
+ if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) {
+#if 1 /* ALTQ */
+ if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) {
+ rmc_tl_satisfied(ifd, nowp);
+ CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
+ } else {
+ ifd->cutoff_ = borrowed->depth_;
+ CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
+ }
+#else /* !ALTQ */
+ if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) {
+ reset_cutoff(ifd);
+#ifdef notdef
+ rmc_tl_satisfied(ifd, &now);
+#endif
+ CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
+ } else {
+ ifd->cutoff_ = borrowed->depth_;
+ CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
+ }
+#endif /* !ALTQ */
+ }
+
+ /*
+ * Release class slot
+ */
+ ifd->borrowed_[ifd->qo_] = NULL;
+ ifd->class_[ifd->qo_] = NULL;
+ ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_;
+ ifd->queued_--;
+}
+
+/*
+ * void
+ * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific)
+ * over-limit action routines. These get invoked by rmc_under_limit()
+ * if a class with packets to send if over its bandwidth limit & can't
+ * borrow from a parent class.
+ *
+ * Returns: NONE
+ */
+
+static void
+rmc_drop_action(struct rm_class *cl)
+{
+ struct rm_ifdat *ifd = cl->ifdat_;
+
+ ASSERT(qlen(cl->q_) > 0);
+ _rmc_dropq(cl);
+ if (qempty(cl->q_))
+ ifd->na_[cl->pri_]--;
+}
+
+void rmc_dropall(struct rm_class *cl)
+{
+ struct rm_ifdat *ifd = cl->ifdat_;
+
+ if (!qempty(cl->q_)) {
+ _flushq(cl->q_);
+
+ ifd->na_[cl->pri_]--;
+ }
+}
+
+#if (__FreeBSD_version > 300000)
+/* hzto() is removed from FreeBSD-3.0 */
+static int hzto(struct timeval *);
+
+static int
+hzto(tv)
+ struct timeval *tv;
+{
+ struct timeval t2;
+
+ getmicrotime(&t2);
+ t2.tv_sec = tv->tv_sec - t2.tv_sec;
+ t2.tv_usec = tv->tv_usec - t2.tv_usec;
+ return (tvtohz(&t2));
+}
+#endif /* __FreeBSD_version > 300000 */
+
+/*
+ * void
+ * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ
+ * delay action routine. It is invoked via rmc_under_limit when the
+ * packet is discoverd to be overlimit.
+ *
+ * If the delay action is result of borrow class being overlimit, then
+ * delay for the offtime of the borrowing class that is overlimit.
+ *
+ * Returns: NONE
+ */
+
+void
+rmc_delay_action(struct rm_class *cl, struct rm_class *borrow)
+{
+ int delay, t, extradelay;
+
+ cl->stats_.overactions++;
+ TV_DELTA(&cl->undertime_, &cl->overtime_, delay);
+#ifndef BORROW_OFFTIME
+ delay += cl->offtime_;
+#endif
+
+ if (!cl->sleeping_) {
+ CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle);
+#ifdef BORROW_OFFTIME
+ if (borrow != NULL)
+ extradelay = borrow->offtime_;
+ else
+#endif
+ extradelay = cl->offtime_;
+
+#ifdef ALTQ
+ /*
+ * XXX recalculate suspend time:
+ * current undertime is (tidle + pkt_time) calculated
+ * from the last transmission.
+ * tidle: time required to bring avgidle back to 0
+ * pkt_time: target waiting time for this class
+ * we need to replace pkt_time by offtime
+ */
+ extradelay -= cl->last_pkttime_;
+#endif
+ if (extradelay > 0) {
+ TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_);
+ delay += extradelay;
+ }
+
+ cl->sleeping_ = 1;
+ cl->stats_.delays++;
+
+ /*
+ * Since packets are phased randomly with respect to the
+ * clock, 1 tick (the next clock tick) can be an arbitrarily
+ * short time so we have to wait for at least two ticks.
+ * NOTE: If there's no other traffic, we need the timer as
+ * a 'backstop' to restart this class.
+ */
+ if (delay > tick * 2) {
+ /* FreeBSD rounds up the tick */
+ t = hzto(&cl->undertime_);
+ } else
+ t = 2;
+ CALLOUT_RESET(&cl->callout_, t,
+ (timeout_t *)rmc_restart, (caddr_t)cl);
+ }
+}
+
+/*
+ * void
+ * rmc_restart() - is just a helper routine for rmc_delay_action -- it is
+ * called by the system timer code & is responsible checking if the
+ * class is still sleeping (it might have been restarted as a side
+ * effect of the queue scan on a packet arrival) and, if so, restarting
+ * output for the class. Inspecting the class state & restarting output
+ * require locking the class structure. In general the driver is
+ * responsible for locking but this is the only routine that is not
+ * called directly or indirectly from the interface driver so it has
+ * know about system locking conventions. Under bsd, locking is done
+ * by raising IPL to splimp so that's what's implemented here. On a
+ * different system this would probably need to be changed.
+ *
+ * Returns: NONE
+ */
+
+static void
+rmc_restart(struct rm_class *cl)
+{
+ struct rm_ifdat *ifd = cl->ifdat_;
+ int s;
+
+ s = splnet();
+ IFQ_LOCK(ifd->ifq_);
+ if (cl->sleeping_) {
+ cl->sleeping_ = 0;
+ cl->undertime_.tv_sec = 0;
+
+ if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) {
+ CBQTRACE(rmc_restart, 'trts', cl->stats_.handle);
+ (ifd->restart)(ifd->ifq_);
+ }
+ }
+ IFQ_UNLOCK(ifd->ifq_);
+ splx(s);
+}
+
+/*
+ * void
+ * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit
+ * handling routine for the root class of the link sharing structure.
+ *
+ * Returns: NONE
+ */
+
+static void
+rmc_root_overlimit(struct rm_class *cl, struct rm_class *borrow)
+{
+ panic("rmc_root_overlimit");
+}
+
+/*
+ * Packet Queue handling routines. Eventually, this is to localize the
+ * effects on the code whether queues are red queues or droptail
+ * queues.
+ */
+
+static int
+_rmc_addq(rm_class_t *cl, mbuf_t *m)
+{
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->q_))
+ return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->q_))
+ return red_addq(cl->red_, cl->q_, m, cl->pktattr_);
+#endif /* ALTQ_RED */
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->q_))
+ return codel_addq(cl->codel_, cl->q_, m);
+#endif
+
+ if (cl->flags_ & RMCF_CLEARDSCP)
+ write_dsfield(m, cl->pktattr_, 0);
+
+ _addq(cl->q_, m);
+ return (0);
+}
+
+/* note: _rmc_dropq is not called for red */
+static void
+_rmc_dropq(rm_class_t *cl)
+{
+ mbuf_t *m;
+
+ if ((m = _getq(cl->q_)) != NULL)
+ m_freem(m);
+}
+
+static mbuf_t *
+_rmc_getq(rm_class_t *cl)
+{
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->q_))
+ return rio_getq((rio_t *)cl->red_, cl->q_);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->q_))
+ return red_getq(cl->red_, cl->q_);
+#endif
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->q_))
+ return codel_getq(cl->codel_, cl->q_);
+#endif
+ return _getq(cl->q_);
+}
+
+static mbuf_t *
+_rmc_pollq(rm_class_t *cl)
+{
+ return qhead(cl->q_);
+}
+
+#ifdef CBQ_TRACE
+
+struct cbqtrace cbqtrace_buffer[NCBQTRACE+1];
+struct cbqtrace *cbqtrace_ptr = NULL;
+int cbqtrace_count;
+
+/*
+ * DDB hook to trace cbq events:
+ * the last 1024 events are held in a circular buffer.
+ * use "call cbqtrace_dump(N)" to display 20 events from Nth event.
+ */
+void cbqtrace_dump(int);
+static char *rmc_funcname(void *);
+
+static struct rmc_funcs {
+ void *func;
+ char *name;
+} rmc_funcs[] =
+{
+ rmc_init, "rmc_init",
+ rmc_queue_packet, "rmc_queue_packet",
+ rmc_under_limit, "rmc_under_limit",
+ rmc_update_class_util, "rmc_update_class_util",
+ rmc_delay_action, "rmc_delay_action",
+ rmc_restart, "rmc_restart",
+ _rmc_wrr_dequeue_next, "_rmc_wrr_dequeue_next",
+ NULL, NULL
+};
+
+static char *rmc_funcname(void *func)
+{
+ struct rmc_funcs *fp;
+
+ for (fp = rmc_funcs; fp->func != NULL; fp++)
+ if (fp->func == func)
+ return (fp->name);
+ return ("unknown");
+}
+
+void cbqtrace_dump(int counter)
+{
+ int i, *p;
+ char *cp;
+
+ counter = counter % NCBQTRACE;
+ p = (int *)&cbqtrace_buffer[counter];
+
+ for (i=0; i<20; i++) {
+ printf("[0x%x] ", *p++);
+ printf("%s: ", rmc_funcname((void *)*p++));
+ cp = (char *)p++;
+ printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]);
+ printf("%d\n",*p++);
+
+ if (p >= (int *)&cbqtrace_buffer[NCBQTRACE])
+ p = (int *)cbqtrace_buffer;
+ }
+}
+#endif /* CBQ_TRACE */
+#endif /* ALTQ_CBQ */
+
+#if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || \
+ defined(ALTQ_HFSC) || defined(ALTQ_PRIQ) || defined(ALTQ_CODEL)
+#if !defined(__GNUC__) || defined(ALTQ_DEBUG)
+
+void
+_addq(class_queue_t *q, mbuf_t *m)
+{
+ mbuf_t *m0;
+
+ if ((m0 = qtail(q)) != NULL)
+ m->m_nextpkt = m0->m_nextpkt;
+ else
+ m0 = m;
+ m0->m_nextpkt = m;
+ qtail(q) = m;
+ qlen(q)++;
+}
+
+mbuf_t *
+_getq(class_queue_t *q)
+{
+ mbuf_t *m, *m0;
+
+ if ((m = qtail(q)) == NULL)
+ return (NULL);
+ if ((m0 = m->m_nextpkt) != m)
+ m->m_nextpkt = m0->m_nextpkt;
+ else {
+ ASSERT(qlen(q) == 1);
+ qtail(q) = NULL;
+ }
+ qlen(q)--;
+ m0->m_nextpkt = NULL;
+ return (m0);
+}
+
+/* drop a packet at the tail of the queue */
+mbuf_t *
+_getq_tail(class_queue_t *q)
+{
+ mbuf_t *m, *m0, *prev;
+
+ if ((m = m0 = qtail(q)) == NULL)
+ return NULL;
+ do {
+ prev = m0;
+ m0 = m0->m_nextpkt;
+ } while (m0 != m);
+ prev->m_nextpkt = m->m_nextpkt;
+ if (prev == m) {
+ ASSERT(qlen(q) == 1);
+ qtail(q) = NULL;
+ } else
+ qtail(q) = prev;
+ qlen(q)--;
+ m->m_nextpkt = NULL;
+ return (m);
+}
+
+/* randomly select a packet in the queue */
+mbuf_t *
+_getq_random(class_queue_t *q)
+{
+ struct mbuf *m;
+ int i, n;
+
+ if ((m = qtail(q)) == NULL)
+ return NULL;
+ if (m->m_nextpkt == m) {
+ ASSERT(qlen(q) == 1);
+ qtail(q) = NULL;
+ } else {
+ struct mbuf *prev = NULL;
+
+ n = arc4random() % qlen(q) + 1;
+ for (i = 0; i < n; i++) {
+ prev = m;
+ m = m->m_nextpkt;
+ }
+ prev->m_nextpkt = m->m_nextpkt;
+ if (m == qtail(q))
+ qtail(q) = prev;
+ }
+ qlen(q)--;
+ m->m_nextpkt = NULL;
+ return (m);
+}
+
+void
+_removeq(class_queue_t *q, mbuf_t *m)
+{
+ mbuf_t *m0, *prev;
+
+ m0 = qtail(q);
+ do {
+ prev = m0;
+ m0 = m0->m_nextpkt;
+ } while (m0 != m);
+ prev->m_nextpkt = m->m_nextpkt;
+ if (prev == m)
+ qtail(q) = NULL;
+ else if (qtail(q) == m)
+ qtail(q) = prev;
+ qlen(q)--;
+}
+
+void
+_flushq(class_queue_t *q)
+{
+ mbuf_t *m;
+
+ while ((m = _getq(q)) != NULL)
+ m_freem(m);
+ ASSERT(qlen(q) == 0);
+}
+
+#endif /* !__GNUC__ || ALTQ_DEBUG */
+#endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */
diff --git a/freebsd/sys/net/altq/altq_rmclass.h b/freebsd/sys/net/altq/altq_rmclass.h
new file mode 100644
index 00000000..6130c4ff
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_rmclass.h
@@ -0,0 +1,273 @@
+/*-
+ * Copyright (c) 1991-1997 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the Network Research
+ * Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq_rmclass.h,v 1.10 2003/08/20 23:30:23 itojun Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _ALTQ_ALTQ_RMCLASS_H_
+#define _ALTQ_ALTQ_RMCLASS_H_
+
+#include <net/altq/altq_classq.h>
+
+/* #pragma ident "@(#)rm_class.h 1.20 97/10/23 SMI" */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RM_MAXPRIO 8 /* Max priority */
+
+#ifdef _KERNEL
+
+typedef struct mbuf mbuf_t;
+typedef struct rm_ifdat rm_ifdat_t;
+typedef struct rm_class rm_class_t;
+
+struct red;
+
+/*
+ * Macros for dealing with time values. We assume all times are
+ * 'timevals'. `microtime' is used to get the best available clock
+ * resolution. If `microtime' *doesn't* return a value that's about
+ * ten times smaller than the average packet time on the fastest
+ * link that will use these routines, a slightly different clock
+ * scheme than this one should be used.
+ * (Bias due to truncation error in this scheme will overestimate utilization
+ * and discriminate against high bandwidth classes. To remove this bias an
+ * integrator needs to be added. The simplest integrator uses a history of
+ * 10 * avg.packet.time / min.tick.time packet completion entries. This is
+ * straight forward to add but we don't want to pay the extra memory
+ * traffic to maintain it if it's not necessary (occasionally a vendor
+ * accidentally builds a workstation with a decent clock - e.g., Sun & HP).)
+ */
+
+#define RM_GETTIME(now) microtime(&now)
+
+#define TV_LT(a, b) (((a)->tv_sec < (b)->tv_sec) || \
+ (((a)->tv_usec < (b)->tv_usec) && ((a)->tv_sec <= (b)->tv_sec)))
+
+#define TV_DELTA(a, b, delta) { \
+ register int xxs; \
+ \
+ delta = (a)->tv_usec - (b)->tv_usec; \
+ if ((xxs = (a)->tv_sec - (b)->tv_sec)) { \
+ switch (xxs) { \
+ default: \
+ /* if (xxs < 0) \
+ printf("rm_class: bogus time values\n"); */ \
+ delta = 0; \
+ /* fall through */ \
+ case 2: \
+ delta += 1000000; \
+ /* fall through */ \
+ case 1: \
+ delta += 1000000; \
+ break; \
+ } \
+ } \
+}
+
+#define TV_ADD_DELTA(a, delta, res) { \
+ register int xxus = (a)->tv_usec + (delta); \
+ \
+ (res)->tv_sec = (a)->tv_sec; \
+ while (xxus >= 1000000) { \
+ ++((res)->tv_sec); \
+ xxus -= 1000000; \
+ } \
+ (res)->tv_usec = xxus; \
+}
+
+#define RM_TIMEOUT 2 /* 1 Clock tick. */
+
+#if 1
+#define RM_MAXQUEUED 1 /* this isn't used in ALTQ/CBQ */
+#else
+#define RM_MAXQUEUED 16 /* Max number of packets downstream of CBQ */
+#endif
+#define RM_MAXQUEUE 64 /* Max queue length */
+#define RM_FILTER_GAIN 5 /* log2 of gain, e.g., 5 => 31/32 */
+#define RM_POWER (1 << RM_FILTER_GAIN)
+#define RM_MAXDEPTH 32
+#define RM_NS_PER_SEC (1000000000)
+
+typedef struct _rm_class_stats_ {
+ u_int handle;
+ u_int depth;
+
+ struct pktcntr xmit_cnt; /* packets sent in this class */
+ struct pktcntr drop_cnt; /* dropped packets */
+ u_int over; /* # times went over limit */
+ u_int borrows; /* # times tried to borrow */
+ u_int overactions; /* # times invoked overlimit action */
+ u_int delays; /* # times invoked delay actions */
+} rm_class_stats_t;
+
+/*
+ * CBQ Class state structure
+ */
+struct rm_class {
+ class_queue_t *q_; /* Queue of packets */
+ rm_ifdat_t *ifdat_;
+ int pri_; /* Class priority. */
+ int depth_; /* Class depth */
+ u_int ns_per_byte_; /* NanoSeconds per byte. */
+ u_int maxrate_; /* Bytes per second for this class. */
+ u_int allotment_; /* Fraction of link bandwidth. */
+ u_int w_allotment_; /* Weighted allotment for WRR */
+ int bytes_alloc_; /* Allocation for round of WRR */
+
+ int avgidle_;
+ int maxidle_;
+ int minidle_;
+ int offtime_;
+ int sleeping_; /* != 0 if delaying */
+ int qthresh_; /* Queue threshold for formal link sharing */
+ int leaf_; /* Note whether leaf class or not.*/
+
+ rm_class_t *children_; /* Children of this class */
+ rm_class_t *next_; /* Next pointer, used if child */
+
+ rm_class_t *peer_; /* Peer class */
+ rm_class_t *borrow_; /* Borrow class */
+ rm_class_t *parent_; /* Parent class */
+
+ void (*overlimit)(struct rm_class *, struct rm_class *);
+ void (*drop)(struct rm_class *); /* Class drop action. */
+
+ union {
+ struct red *red_; /* RED state pointer */
+ struct codel *codel_; /* codel state pointer */
+ } cl_aqm_;
+#define red_ cl_aqm_.red_
+#define codel_ cl_aqm_.codel_
+ struct altq_pktattr *pktattr_; /* saved hdr used by RED/ECN */
+ int flags_;
+
+ int last_pkttime_; /* saved pkt_time */
+ struct timeval undertime_; /* time can next send */
+ struct timeval last_; /* time last packet sent */
+ struct timeval overtime_;
+ struct callout callout_; /* for timeout() calls */
+
+ rm_class_stats_t stats_; /* Class Statistics */
+};
+
+/*
+ * CBQ Interface state
+ */
+struct rm_ifdat {
+ int queued_; /* # pkts queued downstream */
+ int efficient_; /* Link Efficiency bit */
+ int wrr_; /* Enable Weighted Round-Robin */
+ u_long ns_per_byte_; /* Link byte speed. */
+ int maxqueued_; /* Max packets to queue */
+ int maxpkt_; /* Max packet size. */
+ int qi_; /* In/out pointers for downstream */
+ int qo_; /* packets */
+
+ /*
+ * Active class state and WRR state.
+ */
+ rm_class_t *active_[RM_MAXPRIO]; /* Active cl's in each pri */
+ int na_[RM_MAXPRIO]; /* # of active cl's in a pri */
+ int num_[RM_MAXPRIO]; /* # of cl's per pri */
+ int alloc_[RM_MAXPRIO]; /* Byte Allocation */
+ u_long M_[RM_MAXPRIO]; /* WRR weights. */
+
+ /*
+ * Network Interface/Solaris Queue state pointer.
+ */
+ struct ifaltq *ifq_;
+ rm_class_t *default_; /* Default Pkt class, BE */
+ rm_class_t *root_; /* Root Link class. */
+ rm_class_t *ctl_; /* Control Traffic class. */
+ void (*restart)(struct ifaltq *); /* Restart routine. */
+
+ /*
+ * Current packet downstream packet state and dynamic state.
+ */
+ rm_class_t *borrowed_[RM_MAXQUEUED]; /* Class borrowed last */
+ rm_class_t *class_[RM_MAXQUEUED]; /* class sending */
+ int curlen_[RM_MAXQUEUED]; /* Current pktlen */
+ struct timeval now_[RM_MAXQUEUED]; /* Current packet time. */
+ int is_overlimit_[RM_MAXQUEUED];/* Current packet time. */
+
+ int cutoff_; /* Cut-off depth for borrowing */
+
+ struct timeval ifnow_; /* expected xmit completion time */
+#if 1 /* ALTQ4PPP */
+ int maxiftime_; /* max delay inside interface */
+#endif
+ rm_class_t *pollcache_; /* cached rm_class by poll operation */
+};
+
+/* flags for rmc_init and rmc_newclass */
+/* class flags */
+#define RMCF_RED 0x0001
+#define RMCF_ECN 0x0002
+#define RMCF_RIO 0x0004
+#define RMCF_FLOWVALVE 0x0008 /* use flowvalve (aka penalty-box) */
+#define RMCF_CLEARDSCP 0x0010 /* clear diffserv codepoint */
+#define RMCF_CODEL 0x0020
+
+/* flags for rmc_init */
+#define RMCF_WRR 0x0100
+#define RMCF_EFFICIENT 0x0200
+
+#define is_a_parent_class(cl) ((cl)->children_ != NULL)
+
+extern rm_class_t *rmc_newclass(int, struct rm_ifdat *, u_int,
+ void (*)(struct rm_class *, struct rm_class *),
+ int, struct rm_class *, struct rm_class *,
+ u_int, int, u_int, int, int);
+extern void rmc_delete_class(struct rm_ifdat *, struct rm_class *);
+extern int rmc_modclass(struct rm_class *, u_int, int,
+ u_int, int, u_int, int);
+extern void rmc_init(struct ifaltq *, struct rm_ifdat *, u_int,
+ void (*)(struct ifaltq *),
+ int, int, u_int, int, u_int, int);
+extern int rmc_queue_packet(struct rm_class *, mbuf_t *);
+extern mbuf_t *rmc_dequeue_next(struct rm_ifdat *, int);
+extern void rmc_update_class_util(struct rm_ifdat *);
+extern void rmc_delay_action(struct rm_class *, struct rm_class *);
+extern void rmc_dropall(struct rm_class *);
+extern int rmc_get_weight(struct rm_ifdat *, int);
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ALTQ_ALTQ_RMCLASS_H_ */
diff --git a/freebsd/sys/net/altq/altq_rmclass_debug.h b/freebsd/sys/net/altq/altq_rmclass_debug.h
new file mode 100644
index 00000000..7adbaec4
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_rmclass_debug.h
@@ -0,0 +1,113 @@
+/*-
+ * Copyright (c) Sun Microsystems, Inc. 1998 All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the SMCC Technology
+ * Development Group at Sun Microsystems, Inc.
+ *
+ * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE
+ * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE. The software is
+ * provided "as is" without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this software.
+ *
+ * $KAME: altq_rmclass_debug.h,v 1.3 2002/11/29 04:36:24 kjc Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _ALTQ_ALTQ_RMCLASS_DEBUG_H_
+#define _ALTQ_ALTQ_RMCLASS_DEBUG_H_
+
+/* #pragma ident "@(#)rm_class_debug.h 1.7 98/05/04 SMI" */
+
+/*
+ * Cbq debugging macros
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef CBQ_TRACE
+#ifndef NCBQTRACE
+#define NCBQTRACE (16 * 1024)
+#endif
+
+/*
+ * To view the trace output, using adb, type:
+ * adb -k /dev/ksyms /dev/mem <cr>, then type
+ * cbqtrace_count/D to get the count, then type
+ * cbqtrace_buffer,0tcount/Dp4C" "Xn
+ * This will dump the trace buffer from 0 to count.
+ */
+/*
+ * in ALTQ, "call cbqtrace_dump(N)" from DDB to display 20 events
+ * from Nth event in the circular buffer.
+ */
+
+struct cbqtrace {
+ int count;
+ int function; /* address of function */
+ int trace_action; /* descriptive 4 characters */
+ int object; /* object operated on */
+};
+
+extern struct cbqtrace cbqtrace_buffer[];
+extern struct cbqtrace *cbqtrace_ptr;
+extern int cbqtrace_count;
+
+#define CBQTRACEINIT() { \
+ if (cbqtrace_ptr == NULL) \
+ cbqtrace_ptr = cbqtrace_buffer; \
+ else { \
+ cbqtrace_ptr = cbqtrace_buffer; \
+ bzero((void *)cbqtrace_ptr, sizeof(cbqtrace_buffer)); \
+ cbqtrace_count = 0; \
+ } \
+}
+
+#define LOCK_TRACE() splimp()
+#define UNLOCK_TRACE(x) splx(x)
+
+#define CBQTRACE(func, act, obj) { \
+ int __s = LOCK_TRACE(); \
+ int *_p = &cbqtrace_ptr->count; \
+ *_p++ = ++cbqtrace_count; \
+ *_p++ = (int)(func); \
+ *_p++ = (int)(act); \
+ *_p++ = (int)(obj); \
+ if ((struct cbqtrace *)(void *)_p >= &cbqtrace_buffer[NCBQTRACE])\
+ cbqtrace_ptr = cbqtrace_buffer; \
+ else \
+ cbqtrace_ptr = (struct cbqtrace *)(void *)_p; \
+ UNLOCK_TRACE(__s); \
+ }
+#else
+
+/* If no tracing, define no-ops */
+#define CBQTRACEINIT()
+#define CBQTRACE(a, b, c)
+
+#endif /* !CBQ_TRACE */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ALTQ_ALTQ_RMCLASS_DEBUG_H_ */
diff --git a/freebsd/sys/net/altq/altq_subr.c b/freebsd/sys/net/altq/altq_subr.c
new file mode 100644
index 00000000..66ff441d
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_subr.c
@@ -0,0 +1,1978 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (C) 1997-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $
+ * $FreeBSD$
+ */
+
+#include <rtems/bsd/local/opt_altq.h>
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/errno.h>
+#include <sys/syslog.h>
+#include <sys/sysctl.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <net/altq/altq.h>
+#ifdef ALTQ3_COMPAT
+#include <net/altq/altq_conf.h>
+#endif
+
+/* machine dependent clock related includes */
+#include <sys/bus.h>
+#include <sys/cpu.h>
+#include <sys/eventhandler.h>
+#include <machine/clock.h>
+#if defined(__amd64__) || defined(__i386__)
+#include <machine/cpufunc.h> /* for pentium tsc */
+#include <machine/specialreg.h> /* for CPUID_TSC */
+#include <machine/md_var.h> /* for cpu_feature */
+#endif /* __amd64 || __i386__ */
+
+/*
+ * internal function prototypes
+ */
+static void tbr_timeout(void *);
+int (*altq_input)(struct mbuf *, int) = NULL;
+static struct mbuf *tbr_dequeue(struct ifaltq *, int);
+static int tbr_timer = 0; /* token bucket regulator timer */
+#if !defined(__FreeBSD__) || (__FreeBSD_version < 600000)
+static struct callout tbr_callout = CALLOUT_INITIALIZER;
+#else
+static struct callout tbr_callout;
+#endif
+
+#ifdef ALTQ3_CLFIER_COMPAT
+static int extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *);
+#ifdef INET6
+static int extract_ports6(struct mbuf *, struct ip6_hdr *,
+ struct flowinfo_in6 *);
+#endif
+static int apply_filter4(u_int32_t, struct flow_filter *,
+ struct flowinfo_in *);
+static int apply_ppfilter4(u_int32_t, struct flow_filter *,
+ struct flowinfo_in *);
+#ifdef INET6
+static int apply_filter6(u_int32_t, struct flow_filter6 *,
+ struct flowinfo_in6 *);
+#endif
+static int apply_tosfilter4(u_int32_t, struct flow_filter *,
+ struct flowinfo_in *);
+static u_long get_filt_handle(struct acc_classifier *, int);
+static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long);
+static u_int32_t filt2fibmask(struct flow_filter *);
+
+static void ip4f_cache(struct ip *, struct flowinfo_in *);
+static int ip4f_lookup(struct ip *, struct flowinfo_in *);
+static int ip4f_init(void);
+static struct ip4_frag *ip4f_alloc(void);
+static void ip4f_free(struct ip4_frag *);
+#endif /* ALTQ3_CLFIER_COMPAT */
+
+/*
+ * alternate queueing support routines
+ */
+
+/* look up the queue state by the interface name and the queueing type. */
+void *
+altq_lookup(name, type)
+ char *name;
+ int type;
+{
+ struct ifnet *ifp;
+
+ if ((ifp = ifunit(name)) != NULL) {
+ /* read if_snd unlocked */
+ if (type != ALTQT_NONE && ifp->if_snd.altq_type == type)
+ return (ifp->if_snd.altq_disc);
+ }
+
+ return NULL;
+}
+
+int
+altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify)
+ struct ifaltq *ifq;
+ int type;
+ void *discipline;
+ int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+ struct mbuf *(*dequeue)(struct ifaltq *, int);
+ int (*request)(struct ifaltq *, int, void *);
+ void *clfier;
+ void *(*classify)(void *, struct mbuf *, int);
+{
+ IFQ_LOCK(ifq);
+ if (!ALTQ_IS_READY(ifq)) {
+ IFQ_UNLOCK(ifq);
+ return ENXIO;
+ }
+
+#ifdef ALTQ3_COMPAT
+ /*
+ * pfaltq can override the existing discipline, but altq3 cannot.
+ * check these if clfier is not NULL (which implies altq3).
+ */
+ if (clfier != NULL) {
+ if (ALTQ_IS_ENABLED(ifq)) {
+ IFQ_UNLOCK(ifq);
+ return EBUSY;
+ }
+ if (ALTQ_IS_ATTACHED(ifq)) {
+ IFQ_UNLOCK(ifq);
+ return EEXIST;
+ }
+ }
+#endif
+ ifq->altq_type = type;
+ ifq->altq_disc = discipline;
+ ifq->altq_enqueue = enqueue;
+ ifq->altq_dequeue = dequeue;
+ ifq->altq_request = request;
+ ifq->altq_clfier = clfier;
+ ifq->altq_classify = classify;
+ ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED);
+#ifdef ALTQ3_COMPAT
+#ifdef ALTQ_KLD
+ altq_module_incref(type);
+#endif
+#endif
+ IFQ_UNLOCK(ifq);
+ return 0;
+}
+
+int
+altq_detach(ifq)
+ struct ifaltq *ifq;
+{
+ IFQ_LOCK(ifq);
+
+ if (!ALTQ_IS_READY(ifq)) {
+ IFQ_UNLOCK(ifq);
+ return ENXIO;
+ }
+ if (ALTQ_IS_ENABLED(ifq)) {
+ IFQ_UNLOCK(ifq);
+ return EBUSY;
+ }
+ if (!ALTQ_IS_ATTACHED(ifq)) {
+ IFQ_UNLOCK(ifq);
+ return (0);
+ }
+#ifdef ALTQ3_COMPAT
+#ifdef ALTQ_KLD
+ altq_module_declref(ifq->altq_type);
+#endif
+#endif
+
+ ifq->altq_type = ALTQT_NONE;
+ ifq->altq_disc = NULL;
+ ifq->altq_enqueue = NULL;
+ ifq->altq_dequeue = NULL;
+ ifq->altq_request = NULL;
+ ifq->altq_clfier = NULL;
+ ifq->altq_classify = NULL;
+ ifq->altq_flags &= ALTQF_CANTCHANGE;
+
+ IFQ_UNLOCK(ifq);
+ return 0;
+}
+
+int
+altq_enable(ifq)
+ struct ifaltq *ifq;
+{
+ int s;
+
+ IFQ_LOCK(ifq);
+
+ if (!ALTQ_IS_READY(ifq)) {
+ IFQ_UNLOCK(ifq);
+ return ENXIO;
+ }
+ if (ALTQ_IS_ENABLED(ifq)) {
+ IFQ_UNLOCK(ifq);
+ return 0;
+ }
+
+ s = splnet();
+ IFQ_PURGE_NOLOCK(ifq);
+ ASSERT(ifq->ifq_len == 0);
+ ifq->ifq_drv_maxlen = 0; /* disable bulk dequeue */
+ ifq->altq_flags |= ALTQF_ENABLED;
+ if (ifq->altq_clfier != NULL)
+ ifq->altq_flags |= ALTQF_CLASSIFY;
+ splx(s);
+
+ IFQ_UNLOCK(ifq);
+ return 0;
+}
+
+int
+altq_disable(ifq)
+ struct ifaltq *ifq;
+{
+ int s;
+
+ IFQ_LOCK(ifq);
+ if (!ALTQ_IS_ENABLED(ifq)) {
+ IFQ_UNLOCK(ifq);
+ return 0;
+ }
+
+ s = splnet();
+ IFQ_PURGE_NOLOCK(ifq);
+ ASSERT(ifq->ifq_len == 0);
+ ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY);
+ splx(s);
+
+ IFQ_UNLOCK(ifq);
+ return 0;
+}
+
+#ifdef ALTQ_DEBUG
+void
+altq_assert(file, line, failedexpr)
+ const char *file, *failedexpr;
+ int line;
+{
+ (void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n",
+ failedexpr, file, line);
+ panic("altq assertion");
+ /* NOTREACHED */
+}
+#endif
+
+/*
+ * internal representation of token bucket parameters
+ * rate: byte_per_unittime << 32
+ * (((bits_per_sec) / 8) << 32) / machclk_freq
+ * depth: byte << 32
+ *
+ */
+#define TBR_SHIFT 32
+#define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT)
+#define TBR_UNSCALE(x) ((x) >> TBR_SHIFT)
+
+static struct mbuf *
+tbr_dequeue(ifq, op)
+ struct ifaltq *ifq;
+ int op;
+{
+ struct tb_regulator *tbr;
+ struct mbuf *m;
+ int64_t interval;
+ u_int64_t now;
+
+ IFQ_LOCK_ASSERT(ifq);
+ tbr = ifq->altq_tbr;
+ if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) {
+ /* if this is a remove after poll, bypass tbr check */
+ } else {
+ /* update token only when it is negative */
+ if (tbr->tbr_token <= 0) {
+ now = read_machclk();
+ interval = now - tbr->tbr_last;
+ if (interval >= tbr->tbr_filluptime)
+ tbr->tbr_token = tbr->tbr_depth;
+ else {
+ tbr->tbr_token += interval * tbr->tbr_rate;
+ if (tbr->tbr_token > tbr->tbr_depth)
+ tbr->tbr_token = tbr->tbr_depth;
+ }
+ tbr->tbr_last = now;
+ }
+ /* if token is still negative, don't allow dequeue */
+ if (tbr->tbr_token <= 0)
+ return (NULL);
+ }
+
+ if (ALTQ_IS_ENABLED(ifq))
+ m = (*ifq->altq_dequeue)(ifq, op);
+ else {
+ if (op == ALTDQ_POLL)
+ _IF_POLL(ifq, m);
+ else
+ _IF_DEQUEUE(ifq, m);
+ }
+
+ if (m != NULL && op == ALTDQ_REMOVE)
+ tbr->tbr_token -= TBR_SCALE(m_pktlen(m));
+ tbr->tbr_lastop = op;
+ return (m);
+}
+
+/*
+ * set a token bucket regulator.
+ * if the specified rate is zero, the token bucket regulator is deleted.
+ */
+int
+tbr_set(ifq, profile)
+ struct ifaltq *ifq;
+ struct tb_profile *profile;
+{
+ struct tb_regulator *tbr, *otbr;
+
+ if (tbr_dequeue_ptr == NULL)
+ tbr_dequeue_ptr = tbr_dequeue;
+
+ if (machclk_freq == 0)
+ init_machclk();
+ if (machclk_freq == 0) {
+ printf("tbr_set: no cpu clock available!\n");
+ return (ENXIO);
+ }
+
+ IFQ_LOCK(ifq);
+ if (profile->rate == 0) {
+ /* delete this tbr */
+ if ((tbr = ifq->altq_tbr) == NULL) {
+ IFQ_UNLOCK(ifq);
+ return (ENOENT);
+ }
+ ifq->altq_tbr = NULL;
+ free(tbr, M_DEVBUF);
+ IFQ_UNLOCK(ifq);
+ return (0);
+ }
+
+ tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (tbr == NULL) {
+ IFQ_UNLOCK(ifq);
+ return (ENOMEM);
+ }
+
+ tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq;
+ tbr->tbr_depth = TBR_SCALE(profile->depth);
+ if (tbr->tbr_rate > 0)
+ tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
+ else
+ tbr->tbr_filluptime = 0xffffffffffffffffLL;
+ tbr->tbr_token = tbr->tbr_depth;
+ tbr->tbr_last = read_machclk();
+ tbr->tbr_lastop = ALTDQ_REMOVE;
+
+ otbr = ifq->altq_tbr;
+ ifq->altq_tbr = tbr; /* set the new tbr */
+
+ if (otbr != NULL)
+ free(otbr, M_DEVBUF);
+ else {
+ if (tbr_timer == 0) {
+ CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
+ tbr_timer = 1;
+ }
+ }
+ IFQ_UNLOCK(ifq);
+ return (0);
+}
+
+/*
+ * tbr_timeout goes through the interface list, and kicks the drivers
+ * if necessary.
+ *
+ * MPSAFE
+ */
+static void
+tbr_timeout(arg)
+ void *arg;
+{
+ VNET_ITERATOR_DECL(vnet_iter);
+ struct ifnet *ifp;
+ int active, s;
+
+ active = 0;
+ s = splnet();
+ IFNET_RLOCK_NOSLEEP();
+ VNET_LIST_RLOCK_NOSLEEP();
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET(vnet_iter);
+ for (ifp = TAILQ_FIRST(&V_ifnet); ifp;
+ ifp = TAILQ_NEXT(ifp, if_list)) {
+ /* read from if_snd unlocked */
+ if (!TBR_IS_ENABLED(&ifp->if_snd))
+ continue;
+ active++;
+ if (!IFQ_IS_EMPTY(&ifp->if_snd) &&
+ ifp->if_start != NULL)
+ (*ifp->if_start)(ifp);
+ }
+ CURVNET_RESTORE();
+ }
+ VNET_LIST_RUNLOCK_NOSLEEP();
+ IFNET_RUNLOCK_NOSLEEP();
+ splx(s);
+ if (active > 0)
+ CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
+ else
+ tbr_timer = 0; /* don't need tbr_timer anymore */
+}
+
+/*
+ * get token bucket regulator profile
+ */
+int
+tbr_get(ifq, profile)
+ struct ifaltq *ifq;
+ struct tb_profile *profile;
+{
+ struct tb_regulator *tbr;
+
+ IFQ_LOCK(ifq);
+ if ((tbr = ifq->altq_tbr) == NULL) {
+ profile->rate = 0;
+ profile->depth = 0;
+ } else {
+ profile->rate =
+ (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq);
+ profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth);
+ }
+ IFQ_UNLOCK(ifq);
+ return (0);
+}
+
+/*
+ * attach a discipline to the interface. if one already exists, it is
+ * overridden.
+ * Locking is done in the discipline specific attach functions. Basically
+ * they call back to altq_attach which takes care of the attach and locking.
+ */
+int
+altq_pfattach(struct pf_altq *a)
+{
+ int error = 0;
+
+ switch (a->scheduler) {
+ case ALTQT_NONE:
+ break;
+#ifdef ALTQ_CBQ
+ case ALTQT_CBQ:
+ error = cbq_pfattach(a);
+ break;
+#endif
+#ifdef ALTQ_PRIQ
+ case ALTQT_PRIQ:
+ error = priq_pfattach(a);
+ break;
+#endif
+#ifdef ALTQ_HFSC
+ case ALTQT_HFSC:
+ error = hfsc_pfattach(a);
+ break;
+#endif
+#ifdef ALTQ_FAIRQ
+ case ALTQT_FAIRQ:
+ error = fairq_pfattach(a);
+ break;
+#endif
+#ifdef ALTQ_CODEL
+ case ALTQT_CODEL:
+ error = codel_pfattach(a);
+ break;
+#endif
+ default:
+ error = ENXIO;
+ }
+
+ return (error);
+}
+
+/*
+ * detach a discipline from the interface.
+ * it is possible that the discipline was already overridden by another
+ * discipline.
+ */
+int
+altq_pfdetach(struct pf_altq *a)
+{
+ struct ifnet *ifp;
+ int s, error = 0;
+
+ if ((ifp = ifunit(a->ifname)) == NULL)
+ return (EINVAL);
+
+ /* if this discipline is no longer referenced, just return */
+ /* read unlocked from if_snd */
+ if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc)
+ return (0);
+
+ s = splnet();
+ /* read unlocked from if_snd, _disable and _detach take care */
+ if (ALTQ_IS_ENABLED(&ifp->if_snd))
+ error = altq_disable(&ifp->if_snd);
+ if (error == 0)
+ error = altq_detach(&ifp->if_snd);
+ splx(s);
+
+ return (error);
+}
+
+/*
+ * add a discipline or a queue
+ * Locking is done in the discipline specific functions with regards to
+ * malloc with WAITOK, also it is not yet clear which lock to use.
+ */
+int
+altq_add(struct pf_altq *a)
+{
+ int error = 0;
+
+ if (a->qname[0] != 0)
+ return (altq_add_queue(a));
+
+ if (machclk_freq == 0)
+ init_machclk();
+ if (machclk_freq == 0)
+ panic("altq_add: no cpu clock");
+
+ switch (a->scheduler) {
+#ifdef ALTQ_CBQ
+ case ALTQT_CBQ:
+ error = cbq_add_altq(a);
+ break;
+#endif
+#ifdef ALTQ_PRIQ
+ case ALTQT_PRIQ:
+ error = priq_add_altq(a);
+ break;
+#endif
+#ifdef ALTQ_HFSC
+ case ALTQT_HFSC:
+ error = hfsc_add_altq(a);
+ break;
+#endif
+#ifdef ALTQ_FAIRQ
+ case ALTQT_FAIRQ:
+ error = fairq_add_altq(a);
+ break;
+#endif
+#ifdef ALTQ_CODEL
+ case ALTQT_CODEL:
+ error = codel_add_altq(a);
+ break;
+#endif
+ default:
+ error = ENXIO;
+ }
+
+ return (error);
+}
+
+/*
+ * remove a discipline or a queue
+ * It is yet unclear what lock to use to protect this operation, the
+ * discipline specific functions will determine and grab it
+ */
+int
+altq_remove(struct pf_altq *a)
+{
+ int error = 0;
+
+ if (a->qname[0] != 0)
+ return (altq_remove_queue(a));
+
+ switch (a->scheduler) {
+#ifdef ALTQ_CBQ
+ case ALTQT_CBQ:
+ error = cbq_remove_altq(a);
+ break;
+#endif
+#ifdef ALTQ_PRIQ
+ case ALTQT_PRIQ:
+ error = priq_remove_altq(a);
+ break;
+#endif
+#ifdef ALTQ_HFSC
+ case ALTQT_HFSC:
+ error = hfsc_remove_altq(a);
+ break;
+#endif
+#ifdef ALTQ_FAIRQ
+ case ALTQT_FAIRQ:
+ error = fairq_remove_altq(a);
+ break;
+#endif
+#ifdef ALTQ_CODEL
+ case ALTQT_CODEL:
+ error = codel_remove_altq(a);
+ break;
+#endif
+ default:
+ error = ENXIO;
+ }
+
+ return (error);
+}
+
+/*
+ * add a queue to the discipline
+ * It is yet unclear what lock to use to protect this operation, the
+ * discipline specific functions will determine and grab it
+ */
+int
+altq_add_queue(struct pf_altq *a)
+{
+ int error = 0;
+
+ switch (a->scheduler) {
+#ifdef ALTQ_CBQ
+ case ALTQT_CBQ:
+ error = cbq_add_queue(a);
+ break;
+#endif
+#ifdef ALTQ_PRIQ
+ case ALTQT_PRIQ:
+ error = priq_add_queue(a);
+ break;
+#endif
+#ifdef ALTQ_HFSC
+ case ALTQT_HFSC:
+ error = hfsc_add_queue(a);
+ break;
+#endif
+#ifdef ALTQ_FAIRQ
+ case ALTQT_FAIRQ:
+ error = fairq_add_queue(a);
+ break;
+#endif
+ default:
+ error = ENXIO;
+ }
+
+ return (error);
+}
+
+/*
+ * remove a queue from the discipline
+ * It is yet unclear what lock to use to protect this operation, the
+ * discipline specific functions will determine and grab it
+ */
+int
+altq_remove_queue(struct pf_altq *a)
+{
+ int error = 0;
+
+ switch (a->scheduler) {
+#ifdef ALTQ_CBQ
+ case ALTQT_CBQ:
+ error = cbq_remove_queue(a);
+ break;
+#endif
+#ifdef ALTQ_PRIQ
+ case ALTQT_PRIQ:
+ error = priq_remove_queue(a);
+ break;
+#endif
+#ifdef ALTQ_HFSC
+ case ALTQT_HFSC:
+ error = hfsc_remove_queue(a);
+ break;
+#endif
+#ifdef ALTQ_FAIRQ
+ case ALTQT_FAIRQ:
+ error = fairq_remove_queue(a);
+ break;
+#endif
+ default:
+ error = ENXIO;
+ }
+
+ return (error);
+}
+
+/*
+ * get queue statistics
+ * Locking is done in the discipline specific functions with regards to
+ * copyout operations, also it is not yet clear which lock to use.
+ */
+int
+altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+ int error = 0;
+
+ switch (a->scheduler) {
+#ifdef ALTQ_CBQ
+ case ALTQT_CBQ:
+ error = cbq_getqstats(a, ubuf, nbytes);
+ break;
+#endif
+#ifdef ALTQ_PRIQ
+ case ALTQT_PRIQ:
+ error = priq_getqstats(a, ubuf, nbytes);
+ break;
+#endif
+#ifdef ALTQ_HFSC
+ case ALTQT_HFSC:
+ error = hfsc_getqstats(a, ubuf, nbytes);
+ break;
+#endif
+#ifdef ALTQ_FAIRQ
+ case ALTQT_FAIRQ:
+ error = fairq_getqstats(a, ubuf, nbytes);
+ break;
+#endif
+#ifdef ALTQ_CODEL
+ case ALTQT_CODEL:
+ error = codel_getqstats(a, ubuf, nbytes);
+ break;
+#endif
+ default:
+ error = ENXIO;
+ }
+
+ return (error);
+}
+
+/*
+ * read and write diffserv field in IPv4 or IPv6 header
+ */
+u_int8_t
+read_dsfield(m, pktattr)
+ struct mbuf *m;
+ struct altq_pktattr *pktattr;
+{
+ struct mbuf *m0;
+ u_int8_t ds_field = 0;
+
+ if (pktattr == NULL ||
+ (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
+ return ((u_int8_t)0);
+
+ /* verify that pattr_hdr is within the mbuf data */
+ for (m0 = m; m0 != NULL; m0 = m0->m_next)
+ if ((pktattr->pattr_hdr >= m0->m_data) &&
+ (pktattr->pattr_hdr < m0->m_data + m0->m_len))
+ break;
+ if (m0 == NULL) {
+ /* ick, pattr_hdr is stale */
+ pktattr->pattr_af = AF_UNSPEC;
+#ifdef ALTQ_DEBUG
+ printf("read_dsfield: can't locate header!\n");
+#endif
+ return ((u_int8_t)0);
+ }
+
+ if (pktattr->pattr_af == AF_INET) {
+ struct ip *ip = (struct ip *)pktattr->pattr_hdr;
+
+ if (ip->ip_v != 4)
+ return ((u_int8_t)0); /* version mismatch! */
+ ds_field = ip->ip_tos;
+ }
+#ifdef INET6
+ else if (pktattr->pattr_af == AF_INET6) {
+ struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
+ u_int32_t flowlabel;
+
+ flowlabel = ntohl(ip6->ip6_flow);
+ if ((flowlabel >> 28) != 6)
+ return ((u_int8_t)0); /* version mismatch! */
+ ds_field = (flowlabel >> 20) & 0xff;
+ }
+#endif
+ return (ds_field);
+}
+
+void
+write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield)
+{
+ struct mbuf *m0;
+
+ if (pktattr == NULL ||
+ (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
+ return;
+
+ /* verify that pattr_hdr is within the mbuf data */
+ for (m0 = m; m0 != NULL; m0 = m0->m_next)
+ if ((pktattr->pattr_hdr >= m0->m_data) &&
+ (pktattr->pattr_hdr < m0->m_data + m0->m_len))
+ break;
+ if (m0 == NULL) {
+ /* ick, pattr_hdr is stale */
+ pktattr->pattr_af = AF_UNSPEC;
+#ifdef ALTQ_DEBUG
+ printf("write_dsfield: can't locate header!\n");
+#endif
+ return;
+ }
+
+ if (pktattr->pattr_af == AF_INET) {
+ struct ip *ip = (struct ip *)pktattr->pattr_hdr;
+ u_int8_t old;
+ int32_t sum;
+
+ if (ip->ip_v != 4)
+ return; /* version mismatch! */
+ old = ip->ip_tos;
+ dsfield |= old & 3; /* leave CU bits */
+ if (old == dsfield)
+ return;
+ ip->ip_tos = dsfield;
+ /*
+ * update checksum (from RFC1624)
+ * HC' = ~(~HC + ~m + m')
+ */
+ sum = ~ntohs(ip->ip_sum) & 0xffff;
+ sum += 0xff00 + (~old & 0xff) + dsfield;
+ sum = (sum >> 16) + (sum & 0xffff);
+ sum += (sum >> 16); /* add carry */
+
+ ip->ip_sum = htons(~sum & 0xffff);
+ }
+#ifdef INET6
+ else if (pktattr->pattr_af == AF_INET6) {
+ struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
+ u_int32_t flowlabel;
+
+ flowlabel = ntohl(ip6->ip6_flow);
+ if ((flowlabel >> 28) != 6)
+ return; /* version mismatch! */
+ flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20);
+ ip6->ip6_flow = htonl(flowlabel);
+ }
+#endif
+ return;
+}
+
+
+/*
+ * high resolution clock support taking advantage of a machine dependent
+ * high resolution time counter (e.g., timestamp counter of intel pentium).
+ * we assume
+ * - 64-bit-long monotonically-increasing counter
+ * - frequency range is 100M-4GHz (CPU speed)
+ */
+/* if pcc is not available or disabled, emulate 256MHz using microtime() */
+#define MACHCLK_SHIFT 8
+
+int machclk_usepcc;
+u_int32_t machclk_freq;
+u_int32_t machclk_per_tick;
+
+#if defined(__i386__) && defined(__NetBSD__)
+extern u_int64_t cpu_tsc_freq;
+#endif
+
+#if (__FreeBSD_version >= 700035)
+/* Update TSC freq with the value indicated by the caller. */
+static void
+tsc_freq_changed(void *arg, const struct cf_level *level, int status)
+{
+ /* If there was an error during the transition, don't do anything. */
+ if (status != 0)
+ return;
+
+#if (__FreeBSD_version >= 701102) && (defined(__amd64__) || defined(__i386__))
+ /* If TSC is P-state invariant, don't do anything. */
+ if (tsc_is_invariant)
+ return;
+#endif
+
+ /* Total setting for this level gives the new frequency in MHz. */
+ init_machclk();
+}
+EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL,
+ EVENTHANDLER_PRI_LAST);
+#endif /* __FreeBSD_version >= 700035 */
+
+static void
+init_machclk_setup(void)
+{
+#if (__FreeBSD_version >= 600000)
+ callout_init(&tbr_callout, 0);
+#endif
+
+ machclk_usepcc = 1;
+
+#if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC)
+ machclk_usepcc = 0;
+#endif
+#if defined(__FreeBSD__) && defined(SMP)
+ machclk_usepcc = 0;
+#endif
+#if defined(__NetBSD__) && defined(MULTIPROCESSOR)
+ machclk_usepcc = 0;
+#endif
+#if defined(__amd64__) || defined(__i386__)
+ /* check if TSC is available */
+ if ((cpu_feature & CPUID_TSC) == 0 ||
+ atomic_load_acq_64(&tsc_freq) == 0)
+ machclk_usepcc = 0;
+#endif
+}
+
+void
+init_machclk(void)
+{
+ static int called;
+
+ /* Call one-time initialization function. */
+ if (!called) {
+ init_machclk_setup();
+ called = 1;
+ }
+
+ if (machclk_usepcc == 0) {
+ /* emulate 256MHz using microtime() */
+ machclk_freq = 1000000 << MACHCLK_SHIFT;
+ machclk_per_tick = machclk_freq / hz;
+#ifdef ALTQ_DEBUG
+ printf("altq: emulate %uHz cpu clock\n", machclk_freq);
+#endif
+ return;
+ }
+
+ /*
+ * if the clock frequency (of Pentium TSC or Alpha PCC) is
+ * accessible, just use it.
+ */
+#if defined(__amd64__) || defined(__i386__)
+ machclk_freq = atomic_load_acq_64(&tsc_freq);
+#endif
+
+ /*
+ * if we don't know the clock frequency, measure it.
+ */
+ if (machclk_freq == 0) {
+ static int wait;
+ struct timeval tv_start, tv_end;
+ u_int64_t start, end, diff;
+ int timo;
+
+ microtime(&tv_start);
+ start = read_machclk();
+ timo = hz; /* 1 sec */
+ (void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo);
+ microtime(&tv_end);
+ end = read_machclk();
+ diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000
+ + tv_end.tv_usec - tv_start.tv_usec;
+ if (diff != 0)
+ machclk_freq = (u_int)((end - start) * 1000000 / diff);
+ }
+
+ machclk_per_tick = machclk_freq / hz;
+
+#ifdef ALTQ_DEBUG
+ printf("altq: CPU clock: %uHz\n", machclk_freq);
+#endif
+}
+
+#if defined(__OpenBSD__) && defined(__i386__)
+static __inline u_int64_t
+rdtsc(void)
+{
+ u_int64_t rv;
+ __asm __volatile(".byte 0x0f, 0x31" : "=A" (rv));
+ return (rv);
+}
+#endif /* __OpenBSD__ && __i386__ */
+
+u_int64_t
+read_machclk(void)
+{
+ u_int64_t val;
+
+ if (machclk_usepcc) {
+#if defined(__amd64__) || defined(__i386__)
+ val = rdtsc();
+#else
+ panic("read_machclk");
+#endif
+ } else {
+ struct timeval tv, boottime;
+
+ microtime(&tv);
+ getboottime(&boottime);
+ val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000
+ + tv.tv_usec) << MACHCLK_SHIFT);
+ }
+ return (val);
+}
+
+#ifdef ALTQ3_CLFIER_COMPAT
+
+#ifndef IPPROTO_ESP
+#define IPPROTO_ESP 50 /* encapsulating security payload */
+#endif
+#ifndef IPPROTO_AH
+#define IPPROTO_AH 51 /* authentication header */
+#endif
+
+/*
+ * extract flow information from a given packet.
+ * filt_mask shows flowinfo fields required.
+ * we assume the ip header is in one mbuf, and addresses and ports are
+ * in network byte order.
+ */
+int
+altq_extractflow(m, af, flow, filt_bmask)
+ struct mbuf *m;
+ int af;
+ struct flowinfo *flow;
+ u_int32_t filt_bmask;
+{
+
+ switch (af) {
+ case PF_INET: {
+ struct flowinfo_in *fin;
+ struct ip *ip;
+
+ ip = mtod(m, struct ip *);
+
+ if (ip->ip_v != 4)
+ break;
+
+ fin = (struct flowinfo_in *)flow;
+ fin->fi_len = sizeof(struct flowinfo_in);
+ fin->fi_family = AF_INET;
+
+ fin->fi_proto = ip->ip_p;
+ fin->fi_tos = ip->ip_tos;
+
+ fin->fi_src.s_addr = ip->ip_src.s_addr;
+ fin->fi_dst.s_addr = ip->ip_dst.s_addr;
+
+ if (filt_bmask & FIMB4_PORTS)
+ /* if port info is required, extract port numbers */
+ extract_ports4(m, ip, fin);
+ else {
+ fin->fi_sport = 0;
+ fin->fi_dport = 0;
+ fin->fi_gpi = 0;
+ }
+ return (1);
+ }
+
+#ifdef INET6
+ case PF_INET6: {
+ struct flowinfo_in6 *fin6;
+ struct ip6_hdr *ip6;
+
+ ip6 = mtod(m, struct ip6_hdr *);
+ /* should we check the ip version? */
+
+ fin6 = (struct flowinfo_in6 *)flow;
+ fin6->fi6_len = sizeof(struct flowinfo_in6);
+ fin6->fi6_family = AF_INET6;
+
+ fin6->fi6_proto = ip6->ip6_nxt;
+ fin6->fi6_tclass = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+
+ fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff);
+ fin6->fi6_src = ip6->ip6_src;
+ fin6->fi6_dst = ip6->ip6_dst;
+
+ if ((filt_bmask & FIMB6_PORTS) ||
+ ((filt_bmask & FIMB6_PROTO)
+ && ip6->ip6_nxt > IPPROTO_IPV6))
+ /*
+ * if port info is required, or proto is required
+ * but there are option headers, extract port
+ * and protocol numbers.
+ */
+ extract_ports6(m, ip6, fin6);
+ else {
+ fin6->fi6_sport = 0;
+ fin6->fi6_dport = 0;
+ fin6->fi6_gpi = 0;
+ }
+ return (1);
+ }
+#endif /* INET6 */
+
+ default:
+ break;
+ }
+
+ /* failed */
+ flow->fi_len = sizeof(struct flowinfo);
+ flow->fi_family = AF_UNSPEC;
+ return (0);
+}
+
+/*
+ * helper routine to extract port numbers
+ */
+/* structure for ipsec and ipv6 option header template */
+struct _opt6 {
+ u_int8_t opt6_nxt; /* next header */
+ u_int8_t opt6_hlen; /* header extension length */
+ u_int16_t _pad;
+ u_int32_t ah_spi; /* security parameter index
+ for authentication header */
+};
+
+/*
+ * extract port numbers from a ipv4 packet.
+ */
+static int
+extract_ports4(m, ip, fin)
+ struct mbuf *m;
+ struct ip *ip;
+ struct flowinfo_in *fin;
+{
+ struct mbuf *m0;
+ u_short ip_off;
+ u_int8_t proto;
+ int off;
+
+ fin->fi_sport = 0;
+ fin->fi_dport = 0;
+ fin->fi_gpi = 0;
+
+ ip_off = ntohs(ip->ip_off);
+ /* if it is a fragment, try cached fragment info */
+ if (ip_off & IP_OFFMASK) {
+ ip4f_lookup(ip, fin);
+ return (1);
+ }
+
+ /* locate the mbuf containing the protocol header */
+ for (m0 = m; m0 != NULL; m0 = m0->m_next)
+ if (((caddr_t)ip >= m0->m_data) &&
+ ((caddr_t)ip < m0->m_data + m0->m_len))
+ break;
+ if (m0 == NULL) {
+#ifdef ALTQ_DEBUG
+ printf("extract_ports4: can't locate header! ip=%p\n", ip);
+#endif
+ return (0);
+ }
+ off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2);
+ proto = ip->ip_p;
+
+#ifdef ALTQ_IPSEC
+ again:
+#endif
+ while (off >= m0->m_len) {
+ off -= m0->m_len;
+ m0 = m0->m_next;
+ if (m0 == NULL)
+ return (0); /* bogus ip_hl! */
+ }
+ if (m0->m_len < off + 4)
+ return (0);
+
+ switch (proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP: {
+ struct udphdr *udp;
+
+ udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
+ fin->fi_sport = udp->uh_sport;
+ fin->fi_dport = udp->uh_dport;
+ fin->fi_proto = proto;
+ }
+ break;
+
+#ifdef ALTQ_IPSEC
+ case IPPROTO_ESP:
+ if (fin->fi_gpi == 0){
+ u_int32_t *gpi;
+
+ gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
+ fin->fi_gpi = *gpi;
+ }
+ fin->fi_proto = proto;
+ break;
+
+ case IPPROTO_AH: {
+ /* get next header and header length */
+ struct _opt6 *opt6;
+
+ opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
+ proto = opt6->opt6_nxt;
+ off += 8 + (opt6->opt6_hlen * 4);
+ if (fin->fi_gpi == 0 && m0->m_len >= off + 8)
+ fin->fi_gpi = opt6->ah_spi;
+ }
+ /* goto the next header */
+ goto again;
+#endif /* ALTQ_IPSEC */
+
+ default:
+ fin->fi_proto = proto;
+ return (0);
+ }
+
+ /* if this is a first fragment, cache it. */
+ if (ip_off & IP_MF)
+ ip4f_cache(ip, fin);
+
+ return (1);
+}
+
+#ifdef INET6
+static int
+extract_ports6(m, ip6, fin6)
+ struct mbuf *m;
+ struct ip6_hdr *ip6;
+ struct flowinfo_in6 *fin6;
+{
+ struct mbuf *m0;
+ int off;
+ u_int8_t proto;
+
+ fin6->fi6_gpi = 0;
+ fin6->fi6_sport = 0;
+ fin6->fi6_dport = 0;
+
+ /* locate the mbuf containing the protocol header */
+ for (m0 = m; m0 != NULL; m0 = m0->m_next)
+ if (((caddr_t)ip6 >= m0->m_data) &&
+ ((caddr_t)ip6 < m0->m_data + m0->m_len))
+ break;
+ if (m0 == NULL) {
+#ifdef ALTQ_DEBUG
+ printf("extract_ports6: can't locate header! ip6=%p\n", ip6);
+#endif
+ return (0);
+ }
+ off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr);
+
+ proto = ip6->ip6_nxt;
+ do {
+ while (off >= m0->m_len) {
+ off -= m0->m_len;
+ m0 = m0->m_next;
+ if (m0 == NULL)
+ return (0);
+ }
+ if (m0->m_len < off + 4)
+ return (0);
+
+ switch (proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP: {
+ struct udphdr *udp;
+
+ udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
+ fin6->fi6_sport = udp->uh_sport;
+ fin6->fi6_dport = udp->uh_dport;
+ fin6->fi6_proto = proto;
+ }
+ return (1);
+
+ case IPPROTO_ESP:
+ if (fin6->fi6_gpi == 0) {
+ u_int32_t *gpi;
+
+ gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
+ fin6->fi6_gpi = *gpi;
+ }
+ fin6->fi6_proto = proto;
+ return (1);
+
+ case IPPROTO_AH: {
+ /* get next header and header length */
+ struct _opt6 *opt6;
+
+ opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
+ if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8)
+ fin6->fi6_gpi = opt6->ah_spi;
+ proto = opt6->opt6_nxt;
+ off += 8 + (opt6->opt6_hlen * 4);
+ /* goto the next header */
+ break;
+ }
+
+ case IPPROTO_HOPOPTS:
+ case IPPROTO_ROUTING:
+ case IPPROTO_DSTOPTS: {
+ /* get next header and header length */
+ struct _opt6 *opt6;
+
+ opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
+ proto = opt6->opt6_nxt;
+ off += (opt6->opt6_hlen + 1) * 8;
+ /* goto the next header */
+ break;
+ }
+
+ case IPPROTO_FRAGMENT:
+ /* ipv6 fragmentations are not supported yet */
+ default:
+ fin6->fi6_proto = proto;
+ return (0);
+ }
+ } while (1);
+ /*NOTREACHED*/
+}
+#endif /* INET6 */
+
+/*
+ * altq common classifier
+ */
+int
+acc_add_filter(classifier, filter, class, phandle)
+ struct acc_classifier *classifier;
+ struct flow_filter *filter;
+ void *class;
+ u_long *phandle;
+{
+ struct acc_filter *afp, *prev, *tmp;
+ int i, s;
+
+#ifdef INET6
+ if (filter->ff_flow.fi_family != AF_INET &&
+ filter->ff_flow.fi_family != AF_INET6)
+ return (EINVAL);
+#else
+ if (filter->ff_flow.fi_family != AF_INET)
+ return (EINVAL);
+#endif
+
+ afp = malloc(sizeof(struct acc_filter),
+ M_DEVBUF, M_WAITOK);
+ if (afp == NULL)
+ return (ENOMEM);
+ bzero(afp, sizeof(struct acc_filter));
+
+ afp->f_filter = *filter;
+ afp->f_class = class;
+
+ i = ACC_WILDCARD_INDEX;
+ if (filter->ff_flow.fi_family == AF_INET) {
+ struct flow_filter *filter4 = &afp->f_filter;
+
+ /*
+ * if address is 0, it's a wildcard. if address mask
+ * isn't set, use full mask.
+ */
+ if (filter4->ff_flow.fi_dst.s_addr == 0)
+ filter4->ff_mask.mask_dst.s_addr = 0;
+ else if (filter4->ff_mask.mask_dst.s_addr == 0)
+ filter4->ff_mask.mask_dst.s_addr = 0xffffffff;
+ if (filter4->ff_flow.fi_src.s_addr == 0)
+ filter4->ff_mask.mask_src.s_addr = 0;
+ else if (filter4->ff_mask.mask_src.s_addr == 0)
+ filter4->ff_mask.mask_src.s_addr = 0xffffffff;
+
+ /* clear extra bits in addresses */
+ filter4->ff_flow.fi_dst.s_addr &=
+ filter4->ff_mask.mask_dst.s_addr;
+ filter4->ff_flow.fi_src.s_addr &=
+ filter4->ff_mask.mask_src.s_addr;
+
+ /*
+ * if dst address is a wildcard, use hash-entry
+ * ACC_WILDCARD_INDEX.
+ */
+ if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff)
+ i = ACC_WILDCARD_INDEX;
+ else
+ i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr);
+ }
+#ifdef INET6
+ else if (filter->ff_flow.fi_family == AF_INET6) {
+ struct flow_filter6 *filter6 =
+ (struct flow_filter6 *)&afp->f_filter;
+#ifndef IN6MASK0 /* taken from kame ipv6 */
+#define IN6MASK0 {{{ 0, 0, 0, 0 }}}
+#define IN6MASK128 {{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}}
+ const struct in6_addr in6mask0 = IN6MASK0;
+ const struct in6_addr in6mask128 = IN6MASK128;
+#endif
+
+ if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst))
+ filter6->ff_mask6.mask6_dst = in6mask0;
+ else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst))
+ filter6->ff_mask6.mask6_dst = in6mask128;
+ if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src))
+ filter6->ff_mask6.mask6_src = in6mask0;
+ else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src))
+ filter6->ff_mask6.mask6_src = in6mask128;
+
+ /* clear extra bits in addresses */
+ for (i = 0; i < 16; i++)
+ filter6->ff_flow6.fi6_dst.s6_addr[i] &=
+ filter6->ff_mask6.mask6_dst.s6_addr[i];
+ for (i = 0; i < 16; i++)
+ filter6->ff_flow6.fi6_src.s6_addr[i] &=
+ filter6->ff_mask6.mask6_src.s6_addr[i];
+
+ if (filter6->ff_flow6.fi6_flowlabel == 0)
+ i = ACC_WILDCARD_INDEX;
+ else
+ i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel);
+ }
+#endif /* INET6 */
+
+ afp->f_handle = get_filt_handle(classifier, i);
+
+ /* update filter bitmask */
+ afp->f_fbmask = filt2fibmask(filter);
+ classifier->acc_fbmask |= afp->f_fbmask;
+
+ /*
+ * add this filter to the filter list.
+ * filters are ordered from the highest rule number.
+ */
+ s = splnet();
+ prev = NULL;
+ LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) {
+ if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno)
+ prev = tmp;
+ else
+ break;
+ }
+ if (prev == NULL)
+ LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain);
+ else
+ LIST_INSERT_AFTER(prev, afp, f_chain);
+ splx(s);
+
+ *phandle = afp->f_handle;
+ return (0);
+}
+
+int
+acc_delete_filter(classifier, handle)
+ struct acc_classifier *classifier;
+ u_long handle;
+{
+ struct acc_filter *afp;
+ int s;
+
+ if ((afp = filth_to_filtp(classifier, handle)) == NULL)
+ return (EINVAL);
+
+ s = splnet();
+ LIST_REMOVE(afp, f_chain);
+ splx(s);
+
+ free(afp, M_DEVBUF);
+
+ /* todo: update filt_bmask */
+
+ return (0);
+}
+
+/*
+ * delete filters referencing to the specified class.
+ * if the all flag is not 0, delete all the filters.
+ */
+int
+acc_discard_filters(classifier, class, all)
+ struct acc_classifier *classifier;
+ void *class;
+ int all;
+{
+ struct acc_filter *afp;
+ int i, s;
+
+ s = splnet();
+ for (i = 0; i < ACC_FILTER_TABLESIZE; i++) {
+ do {
+ LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
+ if (all || afp->f_class == class) {
+ LIST_REMOVE(afp, f_chain);
+ free(afp, M_DEVBUF);
+ /* start again from the head */
+ break;
+ }
+ } while (afp != NULL);
+ }
+ splx(s);
+
+ if (all)
+ classifier->acc_fbmask = 0;
+
+ return (0);
+}
+
+void *
+acc_classify(clfier, m, af)
+ void *clfier;
+ struct mbuf *m;
+ int af;
+{
+ struct acc_classifier *classifier;
+ struct flowinfo flow;
+ struct acc_filter *afp;
+ int i;
+
+ classifier = (struct acc_classifier *)clfier;
+ altq_extractflow(m, af, &flow, classifier->acc_fbmask);
+
+ if (flow.fi_family == AF_INET) {
+ struct flowinfo_in *fp = (struct flowinfo_in *)&flow;
+
+ if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) {
+ /* only tos is used */
+ LIST_FOREACH(afp,
+ &classifier->acc_filters[ACC_WILDCARD_INDEX],
+ f_chain)
+ if (apply_tosfilter4(afp->f_fbmask,
+ &afp->f_filter, fp))
+ /* filter matched */
+ return (afp->f_class);
+ } else if ((classifier->acc_fbmask &
+ (~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL))
+ == 0) {
+ /* only proto and ports are used */
+ LIST_FOREACH(afp,
+ &classifier->acc_filters[ACC_WILDCARD_INDEX],
+ f_chain)
+ if (apply_ppfilter4(afp->f_fbmask,
+ &afp->f_filter, fp))
+ /* filter matched */
+ return (afp->f_class);
+ } else {
+ /* get the filter hash entry from its dest address */
+ i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr);
+ do {
+ /*
+ * go through this loop twice. first for dst
+ * hash, second for wildcards.
+ */
+ LIST_FOREACH(afp, &classifier->acc_filters[i],
+ f_chain)
+ if (apply_filter4(afp->f_fbmask,
+ &afp->f_filter, fp))
+ /* filter matched */
+ return (afp->f_class);
+
+ /*
+ * check again for filters with a dst addr
+ * wildcard.
+ * (daddr == 0 || dmask != 0xffffffff).
+ */
+ if (i != ACC_WILDCARD_INDEX)
+ i = ACC_WILDCARD_INDEX;
+ else
+ break;
+ } while (1);
+ }
+ }
+#ifdef INET6
+ else if (flow.fi_family == AF_INET6) {
+ struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow;
+
+ /* get the filter hash entry from its flow ID */
+ if (fp6->fi6_flowlabel != 0)
+ i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel);
+ else
+ /* flowlable can be zero */
+ i = ACC_WILDCARD_INDEX;
+
+ /* go through this loop twice. first for flow hash, second
+ for wildcards. */
+ do {
+ LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
+ if (apply_filter6(afp->f_fbmask,
+ (struct flow_filter6 *)&afp->f_filter,
+ fp6))
+ /* filter matched */
+ return (afp->f_class);
+
+ /*
+ * check again for filters with a wildcard.
+ */
+ if (i != ACC_WILDCARD_INDEX)
+ i = ACC_WILDCARD_INDEX;
+ else
+ break;
+ } while (1);
+ }
+#endif /* INET6 */
+
+ /* no filter matched */
+ return (NULL);
+}
+
+static int
+apply_filter4(fbmask, filt, pkt)
+ u_int32_t fbmask;
+ struct flow_filter *filt;
+ struct flowinfo_in *pkt;
+{
+ if (filt->ff_flow.fi_family != AF_INET)
+ return (0);
+ if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
+ return (0);
+ if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
+ return (0);
+ if ((fbmask & FIMB4_DADDR) &&
+ filt->ff_flow.fi_dst.s_addr !=
+ (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr))
+ return (0);
+ if ((fbmask & FIMB4_SADDR) &&
+ filt->ff_flow.fi_src.s_addr !=
+ (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr))
+ return (0);
+ if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
+ return (0);
+ if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
+ (pkt->fi_tos & filt->ff_mask.mask_tos))
+ return (0);
+ if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi))
+ return (0);
+ /* match */
+ return (1);
+}
+
+/*
+ * filter matching function optimized for a common case that checks
+ * only protocol and port numbers
+ */
+static int
+apply_ppfilter4(fbmask, filt, pkt)
+ u_int32_t fbmask;
+ struct flow_filter *filt;
+ struct flowinfo_in *pkt;
+{
+ if (filt->ff_flow.fi_family != AF_INET)
+ return (0);
+ if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
+ return (0);
+ if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
+ return (0);
+ if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
+ return (0);
+ /* match */
+ return (1);
+}
+
+/*
+ * filter matching function only for tos field.
+ */
+static int
+apply_tosfilter4(fbmask, filt, pkt)
+ u_int32_t fbmask;
+ struct flow_filter *filt;
+ struct flowinfo_in *pkt;
+{
+ if (filt->ff_flow.fi_family != AF_INET)
+ return (0);
+ if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
+ (pkt->fi_tos & filt->ff_mask.mask_tos))
+ return (0);
+ /* match */
+ return (1);
+}
+
+#ifdef INET6
+static int
+apply_filter6(fbmask, filt, pkt)
+ u_int32_t fbmask;
+ struct flow_filter6 *filt;
+ struct flowinfo_in6 *pkt;
+{
+ int i;
+
+ if (filt->ff_flow6.fi6_family != AF_INET6)
+ return (0);
+ if ((fbmask & FIMB6_FLABEL) &&
+ filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel)
+ return (0);
+ if ((fbmask & FIMB6_PROTO) &&
+ filt->ff_flow6.fi6_proto != pkt->fi6_proto)
+ return (0);
+ if ((fbmask & FIMB6_SPORT) &&
+ filt->ff_flow6.fi6_sport != pkt->fi6_sport)
+ return (0);
+ if ((fbmask & FIMB6_DPORT) &&
+ filt->ff_flow6.fi6_dport != pkt->fi6_dport)
+ return (0);
+ if (fbmask & FIMB6_SADDR) {
+ for (i = 0; i < 4; i++)
+ if (filt->ff_flow6.fi6_src.s6_addr32[i] !=
+ (pkt->fi6_src.s6_addr32[i] &
+ filt->ff_mask6.mask6_src.s6_addr32[i]))
+ return (0);
+ }
+ if (fbmask & FIMB6_DADDR) {
+ for (i = 0; i < 4; i++)
+ if (filt->ff_flow6.fi6_dst.s6_addr32[i] !=
+ (pkt->fi6_dst.s6_addr32[i] &
+ filt->ff_mask6.mask6_dst.s6_addr32[i]))
+ return (0);
+ }
+ if ((fbmask & FIMB6_TCLASS) &&
+ filt->ff_flow6.fi6_tclass !=
+ (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass))
+ return (0);
+ if ((fbmask & FIMB6_GPI) &&
+ filt->ff_flow6.fi6_gpi != pkt->fi6_gpi)
+ return (0);
+ /* match */
+ return (1);
+}
+#endif /* INET6 */
+
+/*
+ * filter handle:
+ * bit 20-28: index to the filter hash table
+ * bit 0-19: unique id in the hash bucket.
+ */
+static u_long
+get_filt_handle(classifier, i)
+ struct acc_classifier *classifier;
+ int i;
+{
+ static u_long handle_number = 1;
+ u_long handle;
+ struct acc_filter *afp;
+
+ while (1) {
+ handle = handle_number++ & 0x000fffff;
+
+ if (LIST_EMPTY(&classifier->acc_filters[i]))
+ break;
+
+ LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
+ if ((afp->f_handle & 0x000fffff) == handle)
+ break;
+ if (afp == NULL)
+ break;
+ /* this handle is already used, try again */
+ }
+
+ return ((i << 20) | handle);
+}
+
+/* convert filter handle to filter pointer */
+static struct acc_filter *
+filth_to_filtp(classifier, handle)
+ struct acc_classifier *classifier;
+ u_long handle;
+{
+ struct acc_filter *afp;
+ int i;
+
+ i = ACC_GET_HINDEX(handle);
+
+ LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
+ if (afp->f_handle == handle)
+ return (afp);
+
+ return (NULL);
+}
+
+/* create flowinfo bitmask */
+static u_int32_t
+filt2fibmask(filt)
+ struct flow_filter *filt;
+{
+ u_int32_t mask = 0;
+#ifdef INET6
+ struct flow_filter6 *filt6;
+#endif
+
+ switch (filt->ff_flow.fi_family) {
+ case AF_INET:
+ if (filt->ff_flow.fi_proto != 0)
+ mask |= FIMB4_PROTO;
+ if (filt->ff_flow.fi_tos != 0)
+ mask |= FIMB4_TOS;
+ if (filt->ff_flow.fi_dst.s_addr != 0)
+ mask |= FIMB4_DADDR;
+ if (filt->ff_flow.fi_src.s_addr != 0)
+ mask |= FIMB4_SADDR;
+ if (filt->ff_flow.fi_sport != 0)
+ mask |= FIMB4_SPORT;
+ if (filt->ff_flow.fi_dport != 0)
+ mask |= FIMB4_DPORT;
+ if (filt->ff_flow.fi_gpi != 0)
+ mask |= FIMB4_GPI;
+ break;
+#ifdef INET6
+ case AF_INET6:
+ filt6 = (struct flow_filter6 *)filt;
+
+ if (filt6->ff_flow6.fi6_proto != 0)
+ mask |= FIMB6_PROTO;
+ if (filt6->ff_flow6.fi6_tclass != 0)
+ mask |= FIMB6_TCLASS;
+ if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst))
+ mask |= FIMB6_DADDR;
+ if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src))
+ mask |= FIMB6_SADDR;
+ if (filt6->ff_flow6.fi6_sport != 0)
+ mask |= FIMB6_SPORT;
+ if (filt6->ff_flow6.fi6_dport != 0)
+ mask |= FIMB6_DPORT;
+ if (filt6->ff_flow6.fi6_gpi != 0)
+ mask |= FIMB6_GPI;
+ if (filt6->ff_flow6.fi6_flowlabel != 0)
+ mask |= FIMB6_FLABEL;
+ break;
+#endif /* INET6 */
+ }
+ return (mask);
+}
+
+
+/*
+ * helper functions to handle IPv4 fragments.
+ * currently only in-sequence fragments are handled.
+ * - fragment info is cached in a LRU list.
+ * - when a first fragment is found, cache its flow info.
+ * - when a non-first fragment is found, lookup the cache.
+ */
+
+struct ip4_frag {
+ TAILQ_ENTRY(ip4_frag) ip4f_chain;
+ char ip4f_valid;
+ u_short ip4f_id;
+ struct flowinfo_in ip4f_info;
+};
+
+static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */
+
+#define IP4F_TABSIZE 16 /* IPv4 fragment cache size */
+
+
+static void
+ip4f_cache(ip, fin)
+ struct ip *ip;
+ struct flowinfo_in *fin;
+{
+ struct ip4_frag *fp;
+
+ if (TAILQ_EMPTY(&ip4f_list)) {
+ /* first time call, allocate fragment cache entries. */
+ if (ip4f_init() < 0)
+ /* allocation failed! */
+ return;
+ }
+
+ fp = ip4f_alloc();
+ fp->ip4f_id = ip->ip_id;
+ fp->ip4f_info.fi_proto = ip->ip_p;
+ fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr;
+ fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr;
+
+ /* save port numbers */
+ fp->ip4f_info.fi_sport = fin->fi_sport;
+ fp->ip4f_info.fi_dport = fin->fi_dport;
+ fp->ip4f_info.fi_gpi = fin->fi_gpi;
+}
+
+static int
+ip4f_lookup(ip, fin)
+ struct ip *ip;
+ struct flowinfo_in *fin;
+{
+ struct ip4_frag *fp;
+
+ for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid;
+ fp = TAILQ_NEXT(fp, ip4f_chain))
+ if (ip->ip_id == fp->ip4f_id &&
+ ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr &&
+ ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr &&
+ ip->ip_p == fp->ip4f_info.fi_proto) {
+
+ /* found the matching entry */
+ fin->fi_sport = fp->ip4f_info.fi_sport;
+ fin->fi_dport = fp->ip4f_info.fi_dport;
+ fin->fi_gpi = fp->ip4f_info.fi_gpi;
+
+ if ((ntohs(ip->ip_off) & IP_MF) == 0)
+ /* this is the last fragment,
+ release the entry. */
+ ip4f_free(fp);
+
+ return (1);
+ }
+
+ /* no matching entry found */
+ return (0);
+}
+
+static int
+ip4f_init(void)
+{
+ struct ip4_frag *fp;
+ int i;
+
+ TAILQ_INIT(&ip4f_list);
+ for (i=0; i<IP4F_TABSIZE; i++) {
+ fp = malloc(sizeof(struct ip4_frag),
+ M_DEVBUF, M_NOWAIT);
+ if (fp == NULL) {
+ printf("ip4f_init: can't alloc %dth entry!\n", i);
+ if (i == 0)
+ return (-1);
+ return (0);
+ }
+ fp->ip4f_valid = 0;
+ TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
+ }
+ return (0);
+}
+
+static struct ip4_frag *
+ip4f_alloc(void)
+{
+ struct ip4_frag *fp;
+
+ /* reclaim an entry at the tail, put it at the head */
+ fp = TAILQ_LAST(&ip4f_list, ip4f_list);
+ TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
+ fp->ip4f_valid = 1;
+ TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain);
+ return (fp);
+}
+
+static void
+ip4f_free(fp)
+ struct ip4_frag *fp;
+{
+ TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
+ fp->ip4f_valid = 0;
+ TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
+}
+
+#endif /* ALTQ3_CLFIER_COMPAT */
diff --git a/freebsd/sys/net/altq/altq_var.h b/freebsd/sys/net/altq/altq_var.h
new file mode 100644
index 00000000..2ddcb211
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_var.h
@@ -0,0 +1,243 @@
+/*-
+ * Copyright (C) 1998-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq_var.h,v 1.16 2003/10/03 05:05:15 kjc Exp $
+ * $FreeBSD$
+ */
+#ifndef _ALTQ_ALTQ_VAR_H_
+#define _ALTQ_ALTQ_VAR_H_
+
+#ifdef _KERNEL
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+
+#ifdef ALTQ3_CLFIER_COMPAT
+/*
+ * filter structure for altq common classifier
+ */
+struct acc_filter {
+ LIST_ENTRY(acc_filter) f_chain;
+ void *f_class; /* pointer to the class */
+ u_long f_handle; /* filter id */
+ u_int32_t f_fbmask; /* filter bitmask */
+ struct flow_filter f_filter; /* filter value */
+};
+
+/*
+ * XXX ACC_FILTER_TABLESIZE can't be larger than 2048 unless we fix
+ * the handle assignment.
+ */
+#define ACC_FILTER_TABLESIZE (256+1)
+#define ACC_FILTER_MASK (ACC_FILTER_TABLESIZE - 2)
+#define ACC_WILDCARD_INDEX (ACC_FILTER_TABLESIZE - 1)
+#ifdef __GNUC__
+#define ACC_GET_HASH_INDEX(addr) \
+ ({int x = (addr) + ((addr) >> 16); (x + (x >> 8)) & ACC_FILTER_MASK;})
+#else
+#define ACC_GET_HASH_INDEX(addr) \
+ (((addr) + ((addr) >> 8) + ((addr) >> 16) + ((addr) >> 24)) \
+ & ACC_FILTER_MASK)
+#endif
+#define ACC_GET_HINDEX(handle) ((handle) >> 20)
+
+#if (__FreeBSD_version > 500000)
+#define ACC_LOCK_INIT(ac) mtx_init(&(ac)->acc_mtx, "classifier", MTX_DEF)
+#define ACC_LOCK_DESTROY(ac) mtx_destroy(&(ac)->acc_mtx)
+#define ACC_LOCK(ac) mtx_lock(&(ac)->acc_mtx)
+#define ACC_UNLOCK(ac) mtx_unlock(&(ac)->acc_mtx)
+#else
+#define ACC_LOCK_INIT(ac)
+#define ACC_LOCK_DESTROY(ac)
+#define ACC_LOCK(ac)
+#define ACC_UNLOCK(ac)
+#endif
+
+struct acc_classifier {
+ u_int32_t acc_fbmask;
+ LIST_HEAD(filt, acc_filter) acc_filters[ACC_FILTER_TABLESIZE];
+
+#if (__FreeBSD_version > 500000)
+ struct mtx acc_mtx;
+#endif
+};
+
+/*
+ * flowinfo mask bits used by classifier
+ */
+/* for ipv4 */
+#define FIMB4_PROTO 0x0001
+#define FIMB4_TOS 0x0002
+#define FIMB4_DADDR 0x0004
+#define FIMB4_SADDR 0x0008
+#define FIMB4_DPORT 0x0010
+#define FIMB4_SPORT 0x0020
+#define FIMB4_GPI 0x0040
+#define FIMB4_ALL 0x007f
+/* for ipv6 */
+#define FIMB6_PROTO 0x0100
+#define FIMB6_TCLASS 0x0200
+#define FIMB6_DADDR 0x0400
+#define FIMB6_SADDR 0x0800
+#define FIMB6_DPORT 0x1000
+#define FIMB6_SPORT 0x2000
+#define FIMB6_GPI 0x4000
+#define FIMB6_FLABEL 0x8000
+#define FIMB6_ALL 0xff00
+
+#define FIMB_ALL (FIMB4_ALL|FIMB6_ALL)
+
+#define FIMB4_PORTS (FIMB4_DPORT|FIMB4_SPORT|FIMB4_GPI)
+#define FIMB6_PORTS (FIMB6_DPORT|FIMB6_SPORT|FIMB6_GPI)
+#endif /* ALTQ3_CLFIER_COMPAT */
+
+/*
+ * machine dependent clock
+ * a 64bit high resolution time counter.
+ */
+extern int machclk_usepcc;
+extern u_int32_t machclk_freq;
+extern u_int32_t machclk_per_tick;
+extern void init_machclk(void);
+extern u_int64_t read_machclk(void);
+
+/*
+ * debug support
+ */
+#ifdef ALTQ_DEBUG
+#ifdef __STDC__
+#define ASSERT(e) ((e) ? (void)0 : altq_assert(__FILE__, __LINE__, #e))
+#else /* PCC */
+#define ASSERT(e) ((e) ? (void)0 : altq_assert(__FILE__, __LINE__, "e"))
+#endif
+#else
+#define ASSERT(e) ((void)0)
+#endif
+
+/*
+ * misc stuff for compatibility
+ */
+/* ioctl cmd type */
+typedef u_long ioctlcmd_t;
+
+/*
+ * queue macros:
+ * the interface of TAILQ_LAST macro changed after the introduction
+ * of softupdate. redefine it here to make it work with pre-2.2.7.
+ */
+#undef TAILQ_LAST
+#define TAILQ_LAST(head, headname) \
+ (*(((struct headname *)((head)->tqh_last))->tqh_last))
+
+#ifndef TAILQ_EMPTY
+#define TAILQ_EMPTY(head) ((head)->tqh_first == NULL)
+#endif
+#ifndef TAILQ_FOREACH
+#define TAILQ_FOREACH(var, head, field) \
+ for (var = TAILQ_FIRST(head); var; var = TAILQ_NEXT(var, field))
+#endif
+
+/* macro for timeout/untimeout */
+/* use callout */
+#include <sys/callout.h>
+
+#if (__FreeBSD_version > 500000)
+#define CALLOUT_INIT(c) callout_init((c), 0)
+#else
+#define CALLOUT_INIT(c) callout_init((c))
+#endif
+#define CALLOUT_RESET(c,t,f,a) callout_reset((c),(t),(f),(a))
+#define CALLOUT_STOP(c) callout_stop((c))
+#if !defined(CALLOUT_INITIALIZER) && (__FreeBSD_version < 600000)
+#define CALLOUT_INITIALIZER { { { NULL } }, 0, NULL, NULL, 0 }
+#endif
+
+#define m_pktlen(m) ((m)->m_pkthdr.len)
+
+struct ifnet; struct mbuf;
+struct pf_altq;
+#ifdef ALTQ3_CLFIER_COMPAT
+struct flowinfo;
+#endif
+
+void *altq_lookup(char *, int);
+#ifdef ALTQ3_CLFIER_COMPAT
+int altq_extractflow(struct mbuf *, int, struct flowinfo *, u_int32_t);
+int acc_add_filter(struct acc_classifier *, struct flow_filter *,
+ void *, u_long *);
+int acc_delete_filter(struct acc_classifier *, u_long);
+int acc_discard_filters(struct acc_classifier *, void *, int);
+void *acc_classify(void *, struct mbuf *, int);
+#endif
+u_int8_t read_dsfield(struct mbuf *, struct altq_pktattr *);
+void write_dsfield(struct mbuf *, struct altq_pktattr *, u_int8_t);
+void altq_assert(const char *, int, const char *);
+int tbr_set(struct ifaltq *, struct tb_profile *);
+int tbr_get(struct ifaltq *, struct tb_profile *);
+
+int altq_pfattach(struct pf_altq *);
+int altq_pfdetach(struct pf_altq *);
+int altq_add(struct pf_altq *);
+int altq_remove(struct pf_altq *);
+int altq_add_queue(struct pf_altq *);
+int altq_remove_queue(struct pf_altq *);
+int altq_getqstats(struct pf_altq *, void *, int *);
+
+int cbq_pfattach(struct pf_altq *);
+int cbq_add_altq(struct pf_altq *);
+int cbq_remove_altq(struct pf_altq *);
+int cbq_add_queue(struct pf_altq *);
+int cbq_remove_queue(struct pf_altq *);
+int cbq_getqstats(struct pf_altq *, void *, int *);
+
+int codel_pfattach(struct pf_altq *);
+int codel_add_altq(struct pf_altq *);
+int codel_remove_altq(struct pf_altq *);
+int codel_getqstats(struct pf_altq *, void *, int *);
+
+int priq_pfattach(struct pf_altq *);
+int priq_add_altq(struct pf_altq *);
+int priq_remove_altq(struct pf_altq *);
+int priq_add_queue(struct pf_altq *);
+int priq_remove_queue(struct pf_altq *);
+int priq_getqstats(struct pf_altq *, void *, int *);
+
+int hfsc_pfattach(struct pf_altq *);
+int hfsc_add_altq(struct pf_altq *);
+int hfsc_remove_altq(struct pf_altq *);
+int hfsc_add_queue(struct pf_altq *);
+int hfsc_remove_queue(struct pf_altq *);
+int hfsc_getqstats(struct pf_altq *, void *, int *);
+
+int fairq_pfattach(struct pf_altq *);
+int fairq_add_altq(struct pf_altq *);
+int fairq_remove_altq(struct pf_altq *);
+int fairq_add_queue(struct pf_altq *);
+int fairq_remove_queue(struct pf_altq *);
+int fairq_getqstats(struct pf_altq *, void *, int *);
+
+#endif /* _KERNEL */
+#endif /* _ALTQ_ALTQ_VAR_H_ */
diff --git a/freebsd/sys/net/altq/if_altq.h b/freebsd/sys/net/altq/if_altq.h
new file mode 100644
index 00000000..c5ad2875
--- /dev/null
+++ b/freebsd/sys/net/altq/if_altq.h
@@ -0,0 +1,182 @@
+/*-
+ * Copyright (C) 1997-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: if_altq.h,v 1.12 2005/04/13 03:44:25 suz Exp $
+ * $FreeBSD$
+ */
+#ifndef _ALTQ_IF_ALTQ_H_
+#define _ALTQ_IF_ALTQ_H_
+
+#include <rtems/bsd/sys/lock.h> /* XXX */
+#include <sys/mutex.h> /* XXX */
+#include <sys/event.h> /* XXX */
+
+struct altq_pktattr; struct tb_regulator; struct top_cdnr;
+
+/*
+ * Structure defining a queue for a network interface.
+ */
+struct ifaltq {
+ /* fields compatible with struct ifqueue */
+ struct mbuf *ifq_head;
+ struct mbuf *ifq_tail;
+ int ifq_len;
+ int ifq_maxlen;
+ struct mtx ifq_mtx;
+
+ /* driver owned queue (used for bulk dequeue and prepend) UNLOCKED */
+ struct mbuf *ifq_drv_head;
+ struct mbuf *ifq_drv_tail;
+ int ifq_drv_len;
+ int ifq_drv_maxlen;
+
+ /* alternate queueing related fields */
+ int altq_type; /* discipline type */
+ int altq_flags; /* flags (e.g. ready, in-use) */
+ void *altq_disc; /* for discipline-specific use */
+ struct ifnet *altq_ifp; /* back pointer to interface */
+
+ int (*altq_enqueue)(struct ifaltq *, struct mbuf *,
+ struct altq_pktattr *);
+ struct mbuf *(*altq_dequeue)(struct ifaltq *, int);
+ int (*altq_request)(struct ifaltq *, int, void *);
+
+ /* classifier fields */
+ void *altq_clfier; /* classifier-specific use */
+ void *(*altq_classify)(void *, struct mbuf *, int);
+
+ /* token bucket regulator */
+ struct tb_regulator *altq_tbr;
+
+ /* input traffic conditioner (doesn't belong to the output queue...) */
+ struct top_cdnr *altq_cdnr;
+};
+
+
+#ifdef _KERNEL
+
+/*
+ * packet attributes used by queueing disciplines.
+ * pattr_class is a discipline-dependent scheduling class that is
+ * set by a classifier.
+ * pattr_hdr and pattr_af may be used by a discipline to access
+ * the header within a mbuf. (e.g. ECN needs to update the CE bit)
+ * note that pattr_hdr could be stale after m_pullup, though link
+ * layer output routines usually don't use m_pullup. link-level
+ * compression also invalidates these fields. thus, pattr_hdr needs
+ * to be verified when a discipline touches the header.
+ */
+struct altq_pktattr {
+ void *pattr_class; /* sched class set by classifier */
+ int pattr_af; /* address family */
+ caddr_t pattr_hdr; /* saved header position in mbuf */
+};
+
+/*
+ * mbuf tag to carry a queue id (and hints for ECN).
+ */
+struct altq_tag {
+ u_int32_t qid; /* queue id */
+ /* hints for ecn */
+ int af; /* address family */
+ void *hdr; /* saved header position in mbuf */
+};
+
+/*
+ * a token-bucket regulator limits the rate that a network driver can
+ * dequeue packets from the output queue.
+ * modern cards are able to buffer a large amount of packets and dequeue
+ * too many packets at a time. this bursty dequeue behavior makes it
+ * impossible to schedule packets by queueing disciplines.
+ * a token-bucket is used to control the burst size in a device
+ * independent manner.
+ */
+struct tb_regulator {
+ int64_t tbr_rate; /* (scaled) token bucket rate */
+ int64_t tbr_depth; /* (scaled) token bucket depth */
+
+ int64_t tbr_token; /* (scaled) current token */
+ int64_t tbr_filluptime; /* (scaled) time to fill up bucket */
+ u_int64_t tbr_last; /* last time token was updated */
+
+ int tbr_lastop; /* last dequeue operation type
+ needed for poll-and-dequeue */
+};
+
+/* if_altqflags */
+#define ALTQF_READY 0x01 /* driver supports alternate queueing */
+#define ALTQF_ENABLED 0x02 /* altq is in use */
+#define ALTQF_CLASSIFY 0x04 /* classify packets */
+#define ALTQF_CNDTNING 0x08 /* altq traffic conditioning is enabled */
+#define ALTQF_DRIVER1 0x40 /* driver specific */
+
+/* if_altqflags set internally only: */
+#define ALTQF_CANTCHANGE (ALTQF_READY)
+
+/* altq_dequeue 2nd arg */
+#define ALTDQ_REMOVE 1 /* dequeue mbuf from the queue */
+#define ALTDQ_POLL 2 /* don't dequeue mbuf from the queue */
+
+/* altq request types (currently only purge is defined) */
+#define ALTRQ_PURGE 1 /* purge all packets */
+
+#define ALTQ_IS_READY(ifq) ((ifq)->altq_flags & ALTQF_READY)
+#define ALTQ_IS_ENABLED(ifq) ((ifq)->altq_flags & ALTQF_ENABLED)
+#define ALTQ_NEEDS_CLASSIFY(ifq) ((ifq)->altq_flags & ALTQF_CLASSIFY)
+#define ALTQ_IS_CNDTNING(ifq) ((ifq)->altq_flags & ALTQF_CNDTNING)
+
+#define ALTQ_SET_CNDTNING(ifq) ((ifq)->altq_flags |= ALTQF_CNDTNING)
+#define ALTQ_CLEAR_CNDTNING(ifq) ((ifq)->altq_flags &= ~ALTQF_CNDTNING)
+#define ALTQ_IS_ATTACHED(ifq) ((ifq)->altq_disc != NULL)
+
+#define ALTQ_ENQUEUE(ifq, m, pa, err) \
+ (err) = (*(ifq)->altq_enqueue)((ifq),(m),(pa))
+#define ALTQ_DEQUEUE(ifq, m) \
+ (m) = (*(ifq)->altq_dequeue)((ifq), ALTDQ_REMOVE)
+#define ALTQ_POLL(ifq, m) \
+ (m) = (*(ifq)->altq_dequeue)((ifq), ALTDQ_POLL)
+#define ALTQ_PURGE(ifq) \
+ (void)(*(ifq)->altq_request)((ifq), ALTRQ_PURGE, (void *)0)
+#define ALTQ_IS_EMPTY(ifq) ((ifq)->ifq_len == 0)
+#define TBR_IS_ENABLED(ifq) ((ifq)->altq_tbr != NULL)
+
+extern int altq_attach(struct ifaltq *, int, void *,
+ int (*)(struct ifaltq *, struct mbuf *,
+ struct altq_pktattr *),
+ struct mbuf *(*)(struct ifaltq *, int),
+ int (*)(struct ifaltq *, int, void *),
+ void *,
+ void *(*)(void *, struct mbuf *, int));
+extern int altq_detach(struct ifaltq *);
+extern int altq_enable(struct ifaltq *);
+extern int altq_disable(struct ifaltq *);
+extern struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int);
+extern int (*altq_input)(struct mbuf *, int);
+#if 0 /* ALTQ3_CLFIER_COMPAT */
+void altq_etherclassify(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+#endif
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_IF_ALTQ_H_ */
diff --git a/freebsd/sys/net/bpf.c b/freebsd/sys/net/bpf.c
index f74ac9a1..e7822586 100644
--- a/freebsd/sys/net/bpf.c
+++ b/freebsd/sys/net/bpf.c
@@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_bpf.h>
#include <rtems/bsd/local/opt_compat.h>
+#include <rtems/bsd/local/opt_ddb.h>
#include <rtems/bsd/local/opt_netgraph.h>
#include <sys/types.h>
@@ -69,8 +70,13 @@ __FBSDID("$FreeBSD$");
#include <sys/socket.h>
+#ifdef DDB
+#include <ddb/ddb.h>
+#endif
+
#include <net/if.h>
-#define BPF_INTERNAL
+#include <net/if_var.h>
+#include <net/if_dl.h>
#include <net/bpf.h>
#include <net/bpf_buffer.h>
#ifdef BPF_JITTER
@@ -78,6 +84,7 @@ __FBSDID("$FreeBSD$");
#endif
#include <net/bpf_zerocopy.h>
#include <net/bpfdesc.h>
+#include <net/route.h>
#include <net/vnet.h>
#include <netinet/in.h>
@@ -96,6 +103,20 @@ __FBSDID("$FreeBSD$");
MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
+struct bpf_if {
+#define bif_next bif_ext.bif_next
+#define bif_dlist bif_ext.bif_dlist
+ struct bpf_if_ext bif_ext; /* public members */
+ u_int bif_dlt; /* link layer type */
+ u_int bif_hdrlen; /* length of link header */
+ struct ifnet *bif_ifp; /* corresponding interface */
+ struct rwlock bif_lock; /* interface lock */
+ LIST_HEAD(, bpf_d) bif_wlist; /* writer-only list */
+ int bif_flags; /* Interface flags */
+};
+
+CTASSERT(offsetof(struct bpf_if, bif_ext) == 0);
+
#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
#define PRINET 26 /* interruptible */
@@ -107,7 +128,7 @@ MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
#include <sys/mount.h>
#include <compat/freebsd32/freebsd32.h>
#define BPF_ALIGNMENT32 sizeof(int32_t)
-#define BPF_WORDALIGN32(x) (((x)+(BPF_ALIGNMENT32-1))&~(BPF_ALIGNMENT32-1))
+#define BPF_WORDALIGN32(x) roundup2(x, BPF_ALIGNMENT32)
#ifndef BURN_BRIDGES
/*
@@ -148,7 +169,7 @@ struct bpf_dltlist32 {
* structures registered by different layers in the stack (i.e., 802.11
* frames, ethernet frames, etc).
*/
-static LIST_HEAD(, bpf_if) bpf_iflist;
+static LIST_HEAD(, bpf_if) bpf_iflist, bpf_freelist;
static struct mtx bpf_mtx; /* bpf global lock */
static int bpf_bpfd_cnt;
@@ -157,7 +178,7 @@ static void bpf_detachd(struct bpf_d *);
static void bpf_detachd_locked(struct bpf_d *);
static void bpf_freed(struct bpf_d *);
static int bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
- struct sockaddr *, int *, struct bpf_insn *);
+ struct sockaddr *, int *, struct bpf_d *);
static int bpf_setif(struct bpf_d *, struct ifreq *);
static void bpf_timed_out(void *);
static __inline void
@@ -188,8 +209,8 @@ static SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW,
static VNET_DEFINE(int, bpf_optimize_writers) = 0;
#define V_bpf_optimize_writers VNET(bpf_optimize_writers)
-SYSCTL_VNET_INT(_net_bpf, OID_AUTO, optimize_writers,
- CTLFLAG_RW, &VNET_NAME(bpf_optimize_writers), 0,
+SYSCTL_INT(_net_bpf, OID_AUTO, optimize_writers, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(bpf_optimize_writers), 0,
"Do not send packets until BPF program is set");
#ifndef __rtems__
@@ -479,7 +500,7 @@ bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
*/
static int
bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
- struct sockaddr *sockp, int *hdrlen, struct bpf_insn *wfilter)
+ struct sockaddr *sockp, int *hdrlen, struct bpf_d *d)
{
const struct ieee80211_bpf_params *p;
struct ether_header *eh;
@@ -561,37 +582,20 @@ bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
}
len = uio->uio_resid;
-
- if (len - hlen > ifp->if_mtu)
+ if (len < hlen || len - hlen > ifp->if_mtu)
return (EMSGSIZE);
- if ((unsigned)len > MJUM16BYTES)
+ m = m_get2(len, M_WAITOK, MT_DATA, M_PKTHDR);
+ if (m == NULL)
return (EIO);
-
- if (len <= MHLEN)
- MGETHDR(m, M_WAIT, MT_DATA);
- else if (len <= MCLBYTES)
- m = m_getcl(M_WAIT, MT_DATA, M_PKTHDR);
- else
- m = m_getjcl(M_WAIT, MT_DATA, M_PKTHDR,
-#if (MJUMPAGESIZE > MCLBYTES)
- len <= MJUMPAGESIZE ? MJUMPAGESIZE :
-#endif
- (len <= MJUM9BYTES ? MJUM9BYTES : MJUM16BYTES));
m->m_pkthdr.len = m->m_len = len;
- m->m_pkthdr.rcvif = NULL;
*mp = m;
- if (m->m_len < hlen) {
- error = EPERM;
- goto bad;
- }
-
error = uiomove(mtod(m, u_char *), len, uio);
if (error)
goto bad;
- slen = bpf_filter(wfilter, mtod(m, u_char *), len, len);
+ slen = bpf_filter(d->bd_wfilter, mtod(m, u_char *), len, len);
if (slen == 0) {
error = EPERM;
goto bad;
@@ -608,6 +612,10 @@ bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
else
m->m_flags |= M_MCAST;
}
+ if (d->bd_hdrcmplt == 0) {
+ memcpy(eh->ether_shost, IF_LLADDR(ifp),
+ sizeof(eh->ether_shost));
+ }
break;
}
@@ -632,7 +640,7 @@ bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
goto bad;
}
}
- bcopy(m->m_data, sockp->sa_data, hlen);
+ bcopy(mtod(m, const void *), sockp->sa_data, hlen);
}
*hdrlen = hlen;
@@ -656,13 +664,13 @@ bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
* Save sysctl value to protect from sysctl change
* between reads
*/
- op_w = V_bpf_optimize_writers;
+ op_w = V_bpf_optimize_writers || d->bd_writer;
if (d->bd_bif != NULL)
bpf_detachd_locked(d);
/*
* Point d at bp, and add d to the interface's list.
- * Since there are many applicaiotns using BPF for
+ * Since there are many applications using BPF for
* sending raw packets only (dhcpd, cdpd are good examples)
* we can delay adding d to the list of active listeners until
* some filter is configured.
@@ -760,7 +768,7 @@ bpf_check_upgrade(u_long cmd, struct bpf_d *d, struct bpf_insn *fcode, int flen)
/*
* Add d to the list of active bp filters.
- * Reuqires bpf_attachd() to be called before
+ * Requires bpf_attachd() to be called before.
*/
static void
bpf_upgraded(struct bpf_d *d)
@@ -909,7 +917,7 @@ bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
{
struct bpf_d *d;
#ifndef __rtems__
- int error, size;
+ int error;
d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
error = devfs_set_cdevpriv(d, bpf_dtor);
@@ -932,6 +940,8 @@ bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
* particular buffer method.
*/
bpf_buffer_init(d);
+ if ((flags & FREAD) == 0)
+ d->bd_writer = 2;
d->bd_hbuf_in_use = 0;
d->bd_bufmode = BPF_BUFMODE_BUFFER;
d->bd_sig = SIGIO;
@@ -945,10 +955,6 @@ bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
callout_init_mtx(&d->bd_callout, &d->bd_lock, 0);
knlist_init_mtx(&d->bd_sel.si_note, &d->bd_lock);
- /* Allocate default buffers */
- size = d->bd_bufsize;
- bpf_buffer_ioctl_sblen(d, &size);
-
#ifndef __rtems__
return (0);
#else /* __rtems__ */
@@ -1163,6 +1169,7 @@ bpfwrite(struct bpf_d *d, struct uio *uio, int ioflag)
struct ifnet *ifp;
struct mbuf *m, *mc;
struct sockaddr dst;
+ struct route ro;
int error, hlen;
error = devfs_get_cdevpriv((void **)&d);
@@ -1194,7 +1201,7 @@ bpfwrite(struct bpf_d *d, struct uio *uio, int ioflag)
hlen = 0;
/* XXX: bpf_movein() can sleep */
error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
- &m, &dst, &hlen, d->bd_wfilter);
+ &m, &dst, &hlen, d);
if (error) {
d->bd_wdcount++;
return (error);
@@ -1204,7 +1211,7 @@ bpfwrite(struct bpf_d *d, struct uio *uio, int ioflag)
dst.sa_family = pseudo_AF_HDRCMPLT;
if (d->bd_feedback) {
- mc = m_dup(m, M_DONTWAIT);
+ mc = m_dup(m, M_NOWAIT);
if (mc != NULL)
mc->m_pkthdr.rcvif = ifp;
/* Set M_PROMISC for outgoing packets to be discarded. */
@@ -1226,7 +1233,14 @@ bpfwrite(struct bpf_d *d, struct uio *uio, int ioflag)
BPFD_UNLOCK(d);
#endif
- error = (*ifp->if_output)(ifp, m, &dst, NULL);
+ bzero(&ro, sizeof(ro));
+ if (hlen != 0) {
+ ro.ro_prepend = (u_char *)&dst.sa_data;
+ ro.ro_plen = hlen;
+ ro.ro_flags = RT_HAS_HEADER;
+ }
+
+ error = (*ifp->if_output)(ifp, m, &dst, &ro);
if (error)
d->bd_wdcount++;
@@ -1278,7 +1292,6 @@ reset_d(struct bpf_d *d)
/*
* FIONREAD Check for read packet available.
- * SIOCGIFADDR Get interface address - convenient hook to driver.
* BIOCGBLEN Get buffer len [for read()].
* BIOCSETF Set read filter.
* BIOCSETFNR Set read filter without resetting descriptor.
@@ -1347,7 +1360,7 @@ bpfioctl(struct bpf_d *d, u_long cmd, caddr_t addr, int flags,
#endif
case BIOCGETIF:
case BIOCGRTIMEOUT:
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
case BIOCGRTIMEOUT32:
#endif
case BIOCGSTATS:
@@ -1359,7 +1372,7 @@ bpfioctl(struct bpf_d *d, u_long cmd, caddr_t addr, int flags,
case FIONREAD:
case BIOCLOCK:
case BIOCSRTIMEOUT:
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
case BIOCSRTIMEOUT32:
#endif
case BIOCIMMEDIATE:
@@ -1415,19 +1428,6 @@ bpfioctl(struct bpf_d *d, u_long cmd, caddr_t addr, int flags,
break;
}
- case SIOCGIFADDR:
- {
- struct ifnet *ifp;
-
- if (d->bd_bif == NULL)
- error = EINVAL;
- else {
- ifp = d->bd_bif->bif_ifp;
- error = (*ifp->if_ioctl)(ifp, cmd, addr);
- }
- break;
- }
-
/*
* Get buffer len [for read()].
*/
@@ -1564,21 +1564,44 @@ bpfioctl(struct bpf_d *d, u_long cmd, caddr_t addr, int flags,
* Set interface.
*/
case BIOCSETIF:
- BPF_LOCK();
- error = bpf_setif(d, (struct ifreq *)addr);
- BPF_UNLOCK();
- break;
+ {
+ int alloc_buf, size;
+
+ /*
+ * Behavior here depends on the buffering model. If
+ * we're using kernel memory buffers, then we can
+ * allocate them here. If we're using zero-copy,
+ * then the user process must have registered buffers
+ * by the time we get here.
+ */
+ alloc_buf = 0;
+ BPFD_LOCK(d);
+ if (d->bd_bufmode == BPF_BUFMODE_BUFFER &&
+ d->bd_sbuf == NULL)
+ alloc_buf = 1;
+ BPFD_UNLOCK(d);
+ if (alloc_buf) {
+ size = d->bd_bufsize;
+ error = bpf_buffer_ioctl_sblen(d, &size);
+ if (error != 0)
+ break;
+ }
+ BPF_LOCK();
+ error = bpf_setif(d, (struct ifreq *)addr);
+ BPF_UNLOCK();
+ break;
+ }
/*
* Set read timeout.
*/
case BIOCSRTIMEOUT:
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
case BIOCSRTIMEOUT32:
#endif
{
struct timeval *tv = (struct timeval *)addr;
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
struct timeval32 *tv32;
struct timeval tv64;
@@ -1604,12 +1627,12 @@ bpfioctl(struct bpf_d *d, u_long cmd, caddr_t addr, int flags,
* Get read timeout.
*/
case BIOCGRTIMEOUT:
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
case BIOCGRTIMEOUT32:
#endif
{
struct timeval *tv;
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
struct timeval32 *tv32;
struct timeval tv64;
@@ -1621,7 +1644,7 @@ bpfioctl(struct bpf_d *d, u_long cmd, caddr_t addr, int flags,
tv->tv_sec = d->bd_rtout / hz;
tv->tv_usec = (d->bd_rtout % hz) * tick;
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
if (cmd == BIOCGRTIMEOUT32) {
tv32 = (struct timeval32 *)addr;
tv32->tv_sec = tv->tv_sec;
@@ -2001,17 +2024,15 @@ bpf_setif(struct bpf_d *d, struct ifreq *ifr)
/* Check if interface is not being detached from BPF */
BPFIF_RLOCK(bp);
- if (bp->flags & BPFIF_FLAG_DYING) {
+ if (bp->bif_flags & BPFIF_FLAG_DYING) {
BPFIF_RUNLOCK(bp);
return (ENXIO);
}
BPFIF_RUNLOCK(bp);
/*
- * Behavior here depends on the buffering model. If we're using
- * kernel memory buffers, then we can allocate them here. If we're
- * using zero-copy, then the user process must have registered
- * buffers by the time we get here. If not, return an error.
+ * At this point, we expect the buffer is already allocated. If not,
+ * return an error.
*/
switch (d->bd_bufmode) {
case BPF_BUFMODE_BUFFER:
@@ -2131,10 +2152,10 @@ filt_bpfread(struct knote *kn, long hint)
ready = bpf_ready(d);
if (ready) {
kn->kn_data = d->bd_slen;
- while (d->bd_hbuf_in_use)
- mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
- PRINET, "bd_hbuf", 0);
- if (d->bd_hbuf)
+ /*
+ * Ignore the hold buffer if it is being copied to user space.
+ */
+ if (!d->bd_hbuf_in_use && d->bd_hbuf)
kn->kn_data += d->bd_hlen;
} else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
callout_reset(&d->bd_callout, d->bd_rtout,
@@ -2405,12 +2426,19 @@ bpf_hdrlen(struct bpf_d *d)
static void
bpf_bintime2ts(struct bintime *bt, struct bpf_ts *ts, int tstype)
{
+#ifndef __rtems__
+ struct bintime bt2, boottimebin;
+#else /* __rtems__ */
struct bintime bt2;
+#endif /* __rtems__ */
struct timeval tsm;
struct timespec tsn;
if ((tstype & BPF_T_MONOTONIC) == 0) {
bt2 = *bt;
+#ifndef __rtems__
+ getboottimebin(&boottimebin);
+#endif /* __rtems__ */
bintime_add(&bt2, &boottimebin);
bt = &bt2;
}
@@ -2466,9 +2494,6 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
* spot to do it.
*/
if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) {
- while (d->bd_hbuf_in_use)
- mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
- PRINET, "bd_hbuf", 0);
d->bd_fbuf = d->bd_hbuf;
d->bd_hbuf = NULL;
d->bd_hlen = 0;
@@ -2511,9 +2536,7 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
++d->bd_dcount;
return;
}
- while (d->bd_hbuf_in_use)
- mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
- PRINET, "bd_hbuf", 0);
+ KASSERT(!d->bd_hbuf_in_use, ("hold buffer is in use"));
ROTATE_BUFFERS(d);
do_wakeup = 1;
curlen = 0;
@@ -2652,10 +2675,36 @@ bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
bp->bif_hdrlen = hdrlen;
- if (bootverbose)
+ if (bootverbose && IS_DEFAULT_VNET(curvnet))
if_printf(ifp, "bpf attached\n");
}
+#ifdef VIMAGE
+/*
+ * When moving interfaces between vnet instances we need a way to
+ * query the dlt and hdrlen before detach so we can re-attch the if_bpf
+ * after the vmove. We unfortunately have no device driver infrastructure
+ * to query the interface for these values after creation/attach, thus
+ * add this as a workaround.
+ */
+int
+bpf_get_bp_params(struct bpf_if *bp, u_int *bif_dlt, u_int *bif_hdrlen)
+{
+
+ if (bp == NULL)
+ return (ENXIO);
+ if (bif_dlt == NULL && bif_hdrlen == NULL)
+ return (0);
+
+ if (bif_dlt != NULL)
+ *bif_dlt = bp->bif_dlt;
+ if (bif_hdrlen != NULL)
+ *bif_hdrlen = bp->bif_hdrlen;
+
+ return (0);
+}
+#endif
+
/*
* Detach bpf from an interface. This involves detaching each descriptor
* associated with the interface. Notify each descriptor as it's detached
@@ -2664,52 +2713,51 @@ bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
void
bpfdetach(struct ifnet *ifp)
{
- struct bpf_if *bp;
+ struct bpf_if *bp, *bp_temp;
struct bpf_d *d;
-#ifdef INVARIANTS
int ndetached;
ndetached = 0;
-#endif
BPF_LOCK();
/* Find all bpf_if struct's which reference ifp and detach them. */
- do {
- LIST_FOREACH(bp, &bpf_iflist, bif_next) {
- if (ifp == bp->bif_ifp)
- break;
- }
- if (bp != NULL)
- LIST_REMOVE(bp, bif_next);
+ LIST_FOREACH_SAFE(bp, &bpf_iflist, bif_next, bp_temp) {
+ if (ifp != bp->bif_ifp)
+ continue;
- if (bp != NULL) {
-#ifdef INVARIANTS
- ndetached++;
-#endif
- while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
- bpf_detachd_locked(d);
- BPFD_LOCK(d);
- bpf_wakeup(d);
- BPFD_UNLOCK(d);
- }
- /* Free writer-only descriptors */
- while ((d = LIST_FIRST(&bp->bif_wlist)) != NULL) {
- bpf_detachd_locked(d);
- BPFD_LOCK(d);
- bpf_wakeup(d);
- BPFD_UNLOCK(d);
- }
+ LIST_REMOVE(bp, bif_next);
+ /* Add to to-be-freed list */
+ LIST_INSERT_HEAD(&bpf_freelist, bp, bif_next);
- /*
- * Delay freing bp till interface is detached
- * and all routes through this interface are removed.
- * Mark bp as detached to restrict new consumers.
- */
- BPFIF_WLOCK(bp);
- bp->flags |= BPFIF_FLAG_DYING;
- BPFIF_WUNLOCK(bp);
+ ndetached++;
+ /*
+ * Delay freeing bp till interface is detached
+ * and all routes through this interface are removed.
+ * Mark bp as detached to restrict new consumers.
+ */
+ BPFIF_WLOCK(bp);
+ bp->bif_flags |= BPFIF_FLAG_DYING;
+ BPFIF_WUNLOCK(bp);
+
+ CTR4(KTR_NET, "%s: sheduling free for encap %d (%p) for if %p",
+ __func__, bp->bif_dlt, bp, ifp);
+
+ /* Free common descriptors */
+ while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
+ bpf_detachd_locked(d);
+ BPFD_LOCK(d);
+ bpf_wakeup(d);
+ BPFD_UNLOCK(d);
}
- } while (bp != NULL);
+
+ /* Free writer-only descriptors */
+ while ((d = LIST_FIRST(&bp->bif_wlist)) != NULL) {
+ bpf_detachd_locked(d);
+ BPFD_LOCK(d);
+ bpf_wakeup(d);
+ BPFD_UNLOCK(d);
+ }
+ }
BPF_UNLOCK();
#ifdef INVARIANTS
@@ -2721,32 +2769,46 @@ bpfdetach(struct ifnet *ifp)
/*
* Interface departure handler.
* Note departure event does not guarantee interface is going down.
+ * Interface renaming is currently done via departure/arrival event set.
+ *
+ * Departure handled is called after all routes pointing to
+ * given interface are removed and interface is in down state
+ * restricting any packets to be sent/received. We assume it is now safe
+ * to free data allocated by BPF.
*/
static void
bpf_ifdetach(void *arg __unused, struct ifnet *ifp)
{
- struct bpf_if *bp;
+ struct bpf_if *bp, *bp_temp;
+ int nmatched = 0;
BPF_LOCK();
- if ((bp = ifp->if_bpf) == NULL) {
- BPF_UNLOCK();
- return;
- }
+ /*
+ * Find matching entries in free list.
+ * Nothing should be found if bpfdetach() was not called.
+ */
+ LIST_FOREACH_SAFE(bp, &bpf_freelist, bif_next, bp_temp) {
+ if (ifp != bp->bif_ifp)
+ continue;
- /* Check if bpfdetach() was called previously */
- if ((bp->flags & BPFIF_FLAG_DYING) == 0) {
- BPF_UNLOCK();
- return;
- }
+ CTR3(KTR_NET, "%s: freeing BPF instance %p for interface %p",
+ __func__, bp, ifp);
- CTR3(KTR_NET, "%s: freing BPF instance %p for interface %p",
- __func__, bp, ifp);
+ LIST_REMOVE(bp, bif_next);
- ifp->if_bpf = NULL;
+ rw_destroy(&bp->bif_lock);
+ free(bp, M_BPF);
+
+ nmatched++;
+ }
BPF_UNLOCK();
- rw_destroy(&bp->bif_lock);
- free(bp, M_BPF);
+ /*
+ * Note that we cannot zero other pointers to
+ * custom DLTs possibly used by given interface.
+ */
+ if (nmatched != 0)
+ ifp->if_bpf = NULL;
}
/*
@@ -2755,26 +2817,44 @@ bpf_ifdetach(void *arg __unused, struct ifnet *ifp)
static int
bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
{
- int n, error;
struct ifnet *ifp;
struct bpf_if *bp;
+ u_int *lst;
+ int error, n, n1;
BPF_LOCK_ASSERT();
ifp = d->bd_bif->bif_ifp;
+again:
+ n1 = 0;
+ LIST_FOREACH(bp, &bpf_iflist, bif_next) {
+ if (bp->bif_ifp == ifp)
+ n1++;
+ }
+ if (bfl->bfl_list == NULL) {
+ bfl->bfl_len = n1;
+ return (0);
+ }
+ if (n1 > bfl->bfl_len)
+ return (ENOMEM);
+ BPF_UNLOCK();
+ lst = malloc(n1 * sizeof(u_int), M_TEMP, M_WAITOK);
n = 0;
- error = 0;
+ BPF_LOCK();
LIST_FOREACH(bp, &bpf_iflist, bif_next) {
if (bp->bif_ifp != ifp)
continue;
- if (bfl->bfl_list != NULL) {
- if (n >= bfl->bfl_len)
- return (ENOMEM);
- error = copyout(&bp->bif_dlt,
- bfl->bfl_list + n, sizeof(u_int));
+ if (n >= n1) {
+ free(lst, M_TEMP);
+ goto again;
}
+ lst[n] = bp->bif_dlt;
n++;
}
+ BPF_UNLOCK();
+ error = copyout(lst, bfl->bfl_list, sizeof(u_int) * n);
+ free(lst, M_TEMP);
+ BPF_LOCK();
bfl->bfl_len = n;
return (error);
}
@@ -2999,6 +3079,7 @@ bpf_drvinit(void *unused)
mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
LIST_INIT(&bpf_iflist);
+ LIST_INIT(&bpf_freelist);
#ifndef __rtems__
dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf");
@@ -3214,3 +3295,34 @@ bpf_validate(const struct bpf_insn *f, int len)
}
#endif /* !DEV_BPF && !NETGRAPH_BPF */
+
+#ifdef DDB
+static void
+bpf_show_bpf_if(struct bpf_if *bpf_if)
+{
+
+ if (bpf_if == NULL)
+ return;
+ db_printf("%p:\n", bpf_if);
+#define BPF_DB_PRINTF(f, e) db_printf(" %s = " f "\n", #e, bpf_if->e);
+ /* bif_ext.bif_next */
+ /* bif_ext.bif_dlist */
+ BPF_DB_PRINTF("%#x", bif_dlt);
+ BPF_DB_PRINTF("%u", bif_hdrlen);
+ BPF_DB_PRINTF("%p", bif_ifp);
+ /* bif_lock */
+ /* bif_wlist */
+ BPF_DB_PRINTF("%#x", bif_flags);
+}
+
+DB_SHOW_COMMAND(bpf_if, db_show_bpf_if)
+{
+
+ if (!have_addr) {
+ db_printf("usage: show bpf_if <struct bpf_if *>\n");
+ return;
+ }
+
+ bpf_show_bpf_if((struct bpf_if *)addr);
+}
+#endif
diff --git a/freebsd/sys/net/bpf.h b/freebsd/sys/net/bpf.h
index bfe8cfe0..f707f436 100644
--- a/freebsd/sys/net/bpf.h
+++ b/freebsd/sys/net/bpf.h
@@ -582,7 +582,7 @@ struct bpf_zbuf_header {
* input packets such as port scans, packets from old lost connections,
* etc. to force the connection to stay up).
*
- * The first byte of the PPP header (0xff03) is modified to accomodate
+ * The first byte of the PPP header (0xff03) is modified to accommodate
* the direction - 0x00 = IN, 0x01 = OUT.
*/
#define DLT_PPP_PPPD 166
@@ -1096,7 +1096,7 @@ struct bpf_zbuf_header {
#define DLT_NETANALYZER_TRANSPARENT 241
/*
- * IP-over-Infiniband, as specified by RFC 4391.
+ * IP-over-InfiniBand, as specified by RFC 4391.
*
* Requested by Petr Sumbera <petr.sumbera@oracle.com>.
*/
@@ -1138,7 +1138,145 @@ struct bpf_zbuf_header {
#define DLT_PFSYNC 246
#endif
-#define DLT_MATCHING_MAX 246 /* highest value in the "matching" range */
+/*
+ * Raw InfiniBand packets, starting with the Local Routing Header.
+ *
+ * Requested by Oren Kladnitsky <orenk@mellanox.com>.
+ */
+#define DLT_INFINIBAND 247
+
+/*
+ * SCTP, with no lower-level protocols (i.e., no IPv4 or IPv6).
+ *
+ * Requested by Michael Tuexen <Michael.Tuexen@lurchi.franken.de>.
+ */
+#define DLT_SCTP 248
+
+/*
+ * USB packets, beginning with a USBPcap header.
+ *
+ * Requested by Tomasz Mon <desowin@gmail.com>
+ */
+#define DLT_USBPCAP 249
+
+/*
+ * Schweitzer Engineering Laboratories "RTAC" product serial-line
+ * packets.
+ *
+ * Requested by Chris Bontje <chris_bontje@selinc.com>.
+ */
+#define DLT_RTAC_SERIAL 250
+
+/*
+ * Bluetooth Low Energy air interface link-layer packets.
+ *
+ * Requested by Mike Kershaw <dragorn@kismetwireless.net>.
+ */
+#define DLT_BLUETOOTH_LE_LL 251
+
+/*
+ * DLT type for upper-protocol layer PDU saves from wireshark.
+ *
+ * the actual contents are determined by two TAGs stored with each
+ * packet:
+ * EXP_PDU_TAG_LINKTYPE the link type (LINKTYPE_ value) of the
+ * original packet.
+ *
+ * EXP_PDU_TAG_PROTO_NAME the name of the wireshark dissector
+ * that can make sense of the data stored.
+ */
+#define DLT_WIRESHARK_UPPER_PDU 252
+
+/*
+ * DLT type for the netlink protocol (nlmon devices).
+ */
+#define DLT_NETLINK 253
+
+/*
+ * Bluetooth Linux Monitor headers for the BlueZ stack.
+ */
+#define DLT_BLUETOOTH_LINUX_MONITOR 254
+
+/*
+ * Bluetooth Basic Rate/Enhanced Data Rate baseband packets, as
+ * captured by Ubertooth.
+ */
+#define DLT_BLUETOOTH_BREDR_BB 255
+
+/*
+ * Bluetooth Low Energy link layer packets, as captured by Ubertooth.
+ */
+#define DLT_BLUETOOTH_LE_LL_WITH_PHDR 256
+
+/*
+ * PROFIBUS data link layer.
+ */
+#define DLT_PROFIBUS_DL 257
+
+/*
+ * Apple's DLT_PKTAP headers.
+ *
+ * Sadly, the folks at Apple either had no clue that the DLT_USERn values
+ * are for internal use within an organization and partners only, and
+ * didn't know that the right way to get a link-layer header type is to
+ * ask tcpdump.org for one, or knew and didn't care, so they just
+ * used DLT_USER2, which causes problems for everything except for
+ * their version of tcpdump.
+ *
+ * So I'll just give them one; hopefully this will show up in a
+ * libpcap release in time for them to get this into 10.10 Big Sur
+ * or whatever Mavericks' successor is called. LINKTYPE_PKTAP
+ * will be 258 *even on OS X*; that is *intentional*, so that
+ * PKTAP files look the same on *all* OSes (different OSes can have
+ * different numerical values for a given DLT_, but *MUST NOT* have
+ * different values for what goes in a file, as files can be moved
+ * between OSes!).
+ *
+ * When capturing, on a system with a Darwin-based OS, on a device
+ * that returns 149 (DLT_USER2 and Apple's DLT_PKTAP) with this
+ * version of libpcap, the DLT_ value for the pcap_t will be DLT_PKTAP,
+ * and that will continue to be DLT_USER2 on Darwin-based OSes. That way,
+ * binary compatibility with Mavericks is preserved for programs using
+ * this version of libpcap. This does mean that if you were using
+ * DLT_USER2 for some capture device on OS X, you can't do so with
+ * this version of libpcap, just as you can't with Apple's libpcap -
+ * on OS X, they define DLT_PKTAP to be DLT_USER2, so programs won't
+ * be able to distinguish between PKTAP and whatever you were using
+ * DLT_USER2 for.
+ *
+ * If the program saves the capture to a file using this version of
+ * libpcap's pcap_dump code, the LINKTYPE_ value in the file will be
+ * LINKTYPE_PKTAP, which will be 258, even on Darwin-based OSes.
+ * That way, the file will *not* be a DLT_USER2 file. That means
+ * that the latest version of tcpdump, when built with this version
+ * of libpcap, and sufficiently recent versions of Wireshark will
+ * be able to read those files and interpret them correctly; however,
+ * Apple's version of tcpdump in OS X 10.9 won't be able to handle
+ * them. (Hopefully, Apple will pick up this version of libpcap,
+ * and the corresponding version of tcpdump, so that tcpdump will
+ * be able to handle the old LINKTYPE_USER2 captures *and* the new
+ * LINKTYPE_PKTAP captures.)
+ */
+#ifdef __APPLE__
+#define DLT_PKTAP DLT_USER2
+#else
+#define DLT_PKTAP 258
+#endif
+
+/*
+ * Ethernet packets preceded by a header giving the last 6 octets
+ * of the preamble specified by 802.3-2012 Clause 65, section
+ * 65.1.3.2 "Transmit".
+ */
+#define DLT_EPON 259
+
+/*
+ * IPMI trace packets, as specified by Table 3-20 "Trace Data Block Format"
+ * in the PICMG HPM.2 specification.
+ */
+#define DLT_IPMI_HPM_2 260
+
+#define DLT_MATCHING_MAX 260 /* highest value in the "matching" range */
/*
* DLT and savefile link type values are split into a class and
@@ -1149,7 +1287,17 @@ struct bpf_zbuf_header {
/*
* The instruction encodings.
+ *
+ * Please inform tcpdump-workers@lists.tcpdump.org if you use any
+ * of the reserved values, so that we can note that they're used
+ * (and perhaps implement it in the reference BPF implementation
+ * and encourage its implementation elsewhere).
*/
+
+/*
+ * The upper 8 bits of the opcode aren't used. BSD/OS used 0x8000.
+ */
+
/* instruction classes */
#define BPF_CLASS(code) ((code) & 0x07)
#define BPF_LD 0x00
@@ -1166,6 +1314,7 @@ struct bpf_zbuf_header {
#define BPF_W 0x00
#define BPF_H 0x08
#define BPF_B 0x10
+/* 0x18 reserved; used by BSD/OS */
#define BPF_MODE(code) ((code) & 0xe0)
#define BPF_IMM 0x00
#define BPF_ABS 0x20
@@ -1173,6 +1322,8 @@ struct bpf_zbuf_header {
#define BPF_MEM 0x60
#define BPF_LEN 0x80
#define BPF_MSH 0xa0
+/* 0xc0 reserved; used by BSD/OS */
+/* 0xe0 reserved; used by BSD/OS */
/* alu/jmp fields */
#define BPF_OP(code) ((code) & 0xf0)
@@ -1185,11 +1336,30 @@ struct bpf_zbuf_header {
#define BPF_LSH 0x60
#define BPF_RSH 0x70
#define BPF_NEG 0x80
+#define BPF_MOD 0x90
+#define BPF_XOR 0xa0
+/* 0xb0 reserved */
+/* 0xc0 reserved */
+/* 0xd0 reserved */
+/* 0xe0 reserved */
+/* 0xf0 reserved */
+
#define BPF_JA 0x00
#define BPF_JEQ 0x10
#define BPF_JGT 0x20
#define BPF_JGE 0x30
#define BPF_JSET 0x40
+/* 0x50 reserved; used on BSD/OS */
+/* 0x60 reserved */
+/* 0x70 reserved */
+/* 0x80 reserved */
+/* 0x90 reserved */
+/* 0xa0 reserved */
+/* 0xb0 reserved */
+/* 0xc0 reserved */
+/* 0xd0 reserved */
+/* 0xe0 reserved */
+/* 0xf0 reserved */
#define BPF_SRC(code) ((code) & 0x08)
#define BPF_K 0x00
#define BPF_X 0x08
@@ -1197,11 +1367,43 @@ struct bpf_zbuf_header {
/* ret - BPF_K and BPF_X also apply */
#define BPF_RVAL(code) ((code) & 0x18)
#define BPF_A 0x10
+/* 0x18 reserved */
/* misc */
#define BPF_MISCOP(code) ((code) & 0xf8)
#define BPF_TAX 0x00
+/* 0x08 reserved */
+/* 0x10 reserved */
+/* 0x18 reserved */
+/* #define BPF_COP 0x20 NetBSD "coprocessor" extensions */
+/* 0x28 reserved */
+/* 0x30 reserved */
+/* 0x38 reserved */
+/* #define BPF_COPX 0x40 NetBSD "coprocessor" extensions */
+/* also used on BSD/OS */
+/* 0x48 reserved */
+/* 0x50 reserved */
+/* 0x58 reserved */
+/* 0x60 reserved */
+/* 0x68 reserved */
+/* 0x70 reserved */
+/* 0x78 reserved */
#define BPF_TXA 0x80
+/* 0x88 reserved */
+/* 0x90 reserved */
+/* 0x98 reserved */
+/* 0xa0 reserved */
+/* 0xa8 reserved */
+/* 0xb0 reserved */
+/* 0xb8 reserved */
+/* 0xc0 reserved; used on BSD/OS */
+/* 0xc8 reserved */
+/* 0xd0 reserved */
+/* 0xd8 reserved */
+/* 0xe0 reserved */
+/* 0xe8 reserved */
+/* 0xf0 reserved */
+/* 0xf8 reserved */
/*
* The instruction data structure.
@@ -1237,9 +1439,9 @@ SYSCTL_DECL(_net_bpf);
/*
* Rotate the packet buffers in descriptor d. Move the store buffer into the
- * hold slot, and the free buffer ino the store slot. Zero the length of the
- * new store buffer. Descriptor lock should be held. Hold buffer must
- * not be marked "in use".
+ * hold slot, and the free buffer into the store slot. Zero the length of the
+ * new store buffer. Descriptor lock should be held. One must be careful to
+ * not rotate the buffers twice, i.e. if fbuf != NULL.
*/
#define ROTATE_BUFFERS(d) do { \
(d)->bd_hbuf = (d)->bd_sbuf; \
@@ -1252,21 +1454,14 @@ SYSCTL_DECL(_net_bpf);
/*
* Descriptor associated with each attached hardware interface.
- * FIXME: this structure is exposed to external callers to speed up
- * bpf_peers_present() call. However we cover all fields not needed by
- * this function via BPF_INTERNAL define
+ * Part of this structure is exposed to external callers to speed up
+ * bpf_peers_present() calls.
*/
-struct bpf_if {
+struct bpf_if;
+
+struct bpf_if_ext {
LIST_ENTRY(bpf_if) bif_next; /* list of all interfaces */
LIST_HEAD(, bpf_d) bif_dlist; /* descriptor list */
-#ifdef BPF_INTERNAL
- u_int bif_dlt; /* link layer type */
- u_int bif_hdrlen; /* length of link header */
- struct ifnet *bif_ifp; /* corresponding interface */
- struct rwlock bif_lock; /* interface lock */
- LIST_HEAD(, bpf_d) bif_wlist; /* writer-only list */
- int flags; /* Interface flags */
-#endif
};
void bpf_bufheld(struct bpf_d *d);
@@ -1277,6 +1472,9 @@ void bpf_mtap2(struct bpf_if *, void *, u_int, struct mbuf *);
void bpfattach(struct ifnet *, u_int, u_int);
void bpfattach2(struct ifnet *, u_int, u_int, struct bpf_if **);
void bpfdetach(struct ifnet *);
+#ifdef VIMAGE
+int bpf_get_bp_params(struct bpf_if *, u_int *, u_int *);
+#endif
void bpfilterattach(int);
u_int bpf_filter(const struct bpf_insn *, u_char *, u_int, u_int);
@@ -1284,8 +1482,10 @@ u_int bpf_filter(const struct bpf_insn *, u_char *, u_int, u_int);
static __inline int
bpf_peers_present(struct bpf_if *bpf)
{
+ struct bpf_if_ext *ext;
- if (!LIST_EMPTY(&bpf->bif_dlist))
+ ext = (struct bpf_if_ext *)bpf;
+ if (!LIST_EMPTY(&ext->bif_dlist))
return (1);
return (0);
}
@@ -1313,4 +1513,12 @@ bpf_peers_present(struct bpf_if *bpf)
*/
#define BPF_MEMWORDS 16
+#ifdef _SYS_EVENTHANDLER_H_
+/* BPF attach/detach events */
+struct ifnet;
+typedef void (*bpf_track_fn)(void *, struct ifnet *, int /* dlt */,
+ int /* 1 =>'s attach */);
+EVENTHANDLER_DECLARE(bpf_track, bpf_track_fn);
+#endif /* _SYS_EVENTHANDLER_H_ */
+
#endif /* _NET_BPF_H_ */
diff --git a/freebsd/sys/net/bpf_buffer.c b/freebsd/sys/net/bpf_buffer.c
index ec6aed74..d42df1b0 100644
--- a/freebsd/sys/net/bpf_buffer.c
+++ b/freebsd/sys/net/bpf_buffer.c
@@ -81,8 +81,6 @@ __FBSDID("$FreeBSD$");
#include <net/bpf_buffer.h>
#include <net/bpfdesc.h>
-#define PRINET 26 /* interruptible */
-
/*
* Implement historical kernel memory buffering model for BPF: two malloc(9)
* kernel buffers are hung off of the descriptor. The size is fixed prior to
@@ -193,9 +191,6 @@ bpf_buffer_ioctl_sblen(struct bpf_d *d, u_int *i)
return (EINVAL);
}
- while (d->bd_hbuf_in_use)
- mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
- PRINET, "bd_hbuf", 0);
/* Free old buffers if set */
if (d->bd_fbuf != NULL)
free(d->bd_fbuf, M_BPF);
diff --git a/freebsd/sys/net/bpf_filter.c b/freebsd/sys/net/bpf_filter.c
index a313f4bd..941fa290 100644
--- a/freebsd/sys/net/bpf_filter.c
+++ b/freebsd/sys/net/bpf_filter.c
@@ -41,6 +41,9 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
+#if !defined(_KERNEL)
+#include <strings.h>
+#endif
#if !defined(_KERNEL) || defined(sun)
#include <netinet/in.h>
#endif
@@ -98,7 +101,7 @@ m_xword(struct mbuf *m, bpf_u_int32 k, int *err)
while (k >= len) {
k -= len;
m = m->m_next;
- if (m == 0)
+ if (m == NULL)
goto bad;
len = m->m_len;
}
@@ -108,7 +111,7 @@ m_xword(struct mbuf *m, bpf_u_int32 k, int *err)
return (EXTRACT_LONG(cp));
}
m0 = m->m_next;
- if (m0 == 0 || m0->m_len + len - k < 4)
+ if (m0 == NULL || m0->m_len + len - k < 4)
goto bad;
*err = 0;
np = mtod(m0, u_char *);
@@ -147,7 +150,7 @@ m_xhalf(struct mbuf *m, bpf_u_int32 k, int *err)
while (k >= len) {
k -= len;
m = m->m_next;
- if (m == 0)
+ if (m == NULL)
goto bad;
len = m->m_len;
}
@@ -157,7 +160,7 @@ m_xhalf(struct mbuf *m, bpf_u_int32 k, int *err)
return (EXTRACT_SHORT(cp));
}
m0 = m->m_next;
- if (m0 == 0)
+ if (m0 == NULL)
goto bad;
*err = 0;
return ((cp[0] << 8) | mtod(m0, u_char *)[0]);
diff --git a/freebsd/sys/net/bridgestp.c b/freebsd/sys/net/bridgestp.c
index 167bc59f..5fea7ae7 100644
--- a/freebsd/sys/net/bridgestp.c
+++ b/freebsd/sys/net/bridgestp.c
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/kernel.h>
+#include <sys/malloc.h>
#include <sys/callout.h>
#include <sys/module.h>
#include <sys/proc.h>
@@ -53,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <sys/taskqueue.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/if_types.h>
#include <net/if_llc.h>
@@ -236,7 +238,7 @@ bstp_transmit_tcn(struct bstp_state *bs, struct bstp_port *bp)
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
return;
- MGETHDR(m, M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return;
@@ -350,7 +352,7 @@ bstp_send_bpdu(struct bstp_state *bs, struct bstp_port *bp,
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
return;
- MGETHDR(m, M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return;
@@ -789,7 +791,7 @@ bstp_assign_roles(struct bstp_state *bs)
bs->bs_root_htime = bs->bs_bridge_htime;
bs->bs_root_port = NULL;
- /* check if any recieved info supersedes us */
+ /* check if any received info supersedes us */
LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
if (bp->bp_infois != BSTP_INFO_RECEIVED)
continue;
diff --git a/freebsd/sys/net/ethernet.h b/freebsd/sys/net/ethernet.h
index ae7341ee..bc5fa9cb 100644
--- a/freebsd/sys/net/ethernet.h
+++ b/freebsd/sys/net/ethernet.h
@@ -71,6 +71,28 @@ struct ether_addr {
} __packed;
#define ETHER_IS_MULTICAST(addr) (*(addr) & 0x01) /* is address mcast/bcast? */
+#define ETHER_IS_BROADCAST(addr) \
+ (((addr)[0] & (addr)[1] & (addr)[2] & \
+ (addr)[3] & (addr)[4] & (addr)[5]) == 0xff)
+
+/*
+ * 802.1q Virtual LAN header.
+ */
+struct ether_vlan_header {
+ uint8_t evl_dhost[ETHER_ADDR_LEN];
+ uint8_t evl_shost[ETHER_ADDR_LEN];
+ uint16_t evl_encap_proto;
+ uint16_t evl_tag;
+ uint16_t evl_proto;
+} __packed;
+
+#define EVL_VLID_MASK 0x0FFF
+#define EVL_PRI_MASK 0xE000
+#define EVL_VLANOFTAG(tag) ((tag) & EVL_VLID_MASK)
+#define EVL_PRIOFTAG(tag) (((tag) >> 13) & 7)
+#define EVL_CFIOFTAG(tag) (((tag) >> 12) & 1)
+#define EVL_MAKETAG(vlid, pri, cfi) \
+ ((((((pri) & 7) << 1) | ((cfi) & 1)) << 12) | ((vlid) & EVL_VLID_MASK))
/*
* NOTE: 0x0000-0x05DC (0..1500) are generally IEEE 802.3 length fields.
@@ -314,6 +336,7 @@ struct ether_addr {
#define ETHERTYPE_SLOW 0x8809 /* 802.3ad link aggregation (LACP) */
#define ETHERTYPE_PPP 0x880B /* PPP (obsolete by PPPoE) */
#define ETHERTYPE_HITACHI 0x8820 /* Hitachi Cable (Optoelectronic Systems Laboratory) */
+#define ETHERTYPE_TEST 0x8822 /* Network Conformance Testing */
#define ETHERTYPE_MPLS 0x8847 /* MPLS Unicast */
#define ETHERTYPE_MPLS_MCAST 0x8848 /* MPLS Multicast */
#define ETHERTYPE_AXIS 0x8856 /* Axis Communications AB proprietary bootstrap/config */
@@ -375,8 +398,8 @@ extern void ether_demux(struct ifnet *, struct mbuf *);
extern void ether_ifattach(struct ifnet *, const u_int8_t *);
extern void ether_ifdetach(struct ifnet *);
extern int ether_ioctl(struct ifnet *, u_long, caddr_t);
-extern int ether_output(struct ifnet *,
- struct mbuf *, struct sockaddr *, struct route *);
+extern int ether_output(struct ifnet *, struct mbuf *,
+ const struct sockaddr *, struct route *);
extern int ether_output_frame(struct ifnet *, struct mbuf *);
extern char *ether_sprintf(const u_int8_t *);
void ether_vlan_mtap(struct bpf_if *, struct mbuf *,
diff --git a/freebsd/sys/net/flowtable.h b/freebsd/sys/net/flowtable.h
index d810fa33..5a1d9273 100644
--- a/freebsd/sys/net/flowtable.h
+++ b/freebsd/sys/net/flowtable.h
@@ -1,83 +1,56 @@
-/**************************************************************************
-
-Copyright (c) 2008-2010, BitGravity Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
-
- 2. Neither the name of the BitGravity Corporation nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-$FreeBSD$
-
-***************************************************************************/
+/*-
+ * Copyright (c) 2014 Gleb Smirnoff <glebius@FreeBSD.org>
+ * Copyright (c) 2008-2010, BitGravity Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Neither the name of the BitGravity Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
#ifndef _NET_FLOWTABLE_H_
#define _NET_FLOWTABLE_H_
-#ifdef _KERNEL
-
-#define FL_HASH_ALL (1<<0) /* hash 4-tuple + protocol */
-#define FL_PCPU (1<<1) /* pcpu cache */
-#define FL_NOAUTO (1<<2) /* don't automatically add flentry on miss */
-#define FL_IPV6 (1<<9)
-
-#define FL_TCP (1<<11)
-#define FL_SCTP (1<<12)
-#define FL_UDP (1<<13)
-#define FL_DEBUG (1<<14)
-#define FL_DEBUG_ALL (1<<15)
-
-struct flowtable;
-struct flentry;
-struct route;
-struct route_in6;
+struct flowtable_stat {
+ uint64_t ft_collisions;
+ uint64_t ft_misses;
+ uint64_t ft_free_checks;
+ uint64_t ft_frees;
+ uint64_t ft_hits;
+ uint64_t ft_lookups;
+ uint64_t ft_fail_lle_invalid;
+ uint64_t ft_inserts;
+};
-VNET_DECLARE(struct flowtable *, ip_ft);
-#define V_ip_ft VNET(ip_ft)
-
-VNET_DECLARE(struct flowtable *, ip6_ft);
-#define V_ip6_ft VNET(ip6_ft)
-
-struct flowtable *flowtable_alloc(char *name, int nentry, int flags);
+#ifdef _KERNEL
/*
- * Given a flow table, look up the L3 and L2 information and
- * return it in the route.
- *
+ * Given a flow table, look up the L3 and L2 information
+ * and return it in the route.
*/
-struct flentry *flowtable_lookup_mbuf(struct flowtable *ft, struct mbuf *m, int af);
-
-struct flentry *flowtable_lookup(struct flowtable *ft, struct sockaddr_storage *ssa,
- struct sockaddr_storage *dsa, uint32_t fibnum, int flags);
-
-int kern_flowtable_insert(struct flowtable *ft, struct sockaddr_storage *ssa,
- struct sockaddr_storage *dsa, struct route *ro, uint32_t fibnum, int flags);
-
-void flow_invalidate(struct flentry *fl);
-void flowtable_route_flush(struct flowtable *ft, struct rtentry *rt);
-
-void flow_to_route(struct flentry *fl, struct route *ro);
-
-void flow_to_route_in6(struct flentry *fl, struct route_in6 *ro);
-
+int flowtable_lookup(sa_family_t, struct mbuf *, struct route *);
+void flowtable_route_flush(sa_family_t, struct rtentry *);
#endif /* _KERNEL */
-#endif
+#endif /* !_NET_FLOWTABLE_H_ */
diff --git a/freebsd/sys/net/ieee8023ad_lacp.c b/freebsd/sys/net/ieee8023ad_lacp.c
index 5172ad54..619db8af 100644
--- a/freebsd/sys/net/ieee8023ad_lacp.c
+++ b/freebsd/sys/net/ieee8023ad_lacp.c
@@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/callout.h>
+#include <sys/eventhandler.h>
#include <sys/mbuf.h>
#include <sys/systm.h>
#include <sys/malloc.h>
@@ -44,8 +45,10 @@ __FBSDID("$FreeBSD$");
#include <machine/stdarg.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/rwlock.h>
+#include <sys/taskqueue.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/ethernet.h>
#include <net/if_media.h>
@@ -189,30 +192,37 @@ static const char *lacp_format_portid(const struct lacp_portid *, char *,
static void lacp_dprintf(const struct lacp_port *, const char *, ...)
__attribute__((__format__(__printf__, 2, 3)));
-static int lacp_debug = 0;
-SYSCTL_INT(_net, OID_AUTO, lacp_debug, CTLFLAG_RW | CTLFLAG_TUN,
- &lacp_debug, 0, "Enable LACP debug logging (1=debug, 2=trace)");
-TUNABLE_INT("net.lacp_debug", &lacp_debug);
+static VNET_DEFINE(int, lacp_debug);
+#define V_lacp_debug VNET(lacp_debug)
+SYSCTL_NODE(_net_link_lagg, OID_AUTO, lacp, CTLFLAG_RD, 0, "ieee802.3ad");
+SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, debug, CTLFLAG_RWTUN | CTLFLAG_VNET,
+ &VNET_NAME(lacp_debug), 0, "Enable LACP debug logging (1=debug, 2=trace)");
-#define LACP_DPRINTF(a) if (lacp_debug > 0) { lacp_dprintf a ; }
-#define LACP_TRACE(a) if (lacp_debug > 1) { lacp_dprintf(a,"%s\n",__func__); }
+static VNET_DEFINE(int, lacp_default_strict_mode) = 1;
+SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, default_strict_mode, CTLFLAG_RWTUN,
+ &VNET_NAME(lacp_default_strict_mode), 0,
+ "LACP strict protocol compliance default");
+
+#define LACP_DPRINTF(a) if (V_lacp_debug & 0x01) { lacp_dprintf a ; }
+#define LACP_TRACE(a) if (V_lacp_debug & 0x02) { lacp_dprintf(a,"%s\n",__func__); }
+#define LACP_TPRINTF(a) if (V_lacp_debug & 0x04) { lacp_dprintf a ; }
/*
* partner administration variables.
* XXX should be configurable.
*/
-static const struct lacp_peerinfo lacp_partner_admin = {
+static const struct lacp_peerinfo lacp_partner_admin_optimistic = {
.lip_systemid = { .lsi_prio = 0xffff },
.lip_portid = { .lpi_prio = 0xffff },
-#if 1
- /* optimistic */
.lip_state = LACP_STATE_SYNC | LACP_STATE_AGGREGATION |
LACP_STATE_COLLECTING | LACP_STATE_DISTRIBUTING,
-#else
- /* pessimistic */
+};
+
+static const struct lacp_peerinfo lacp_partner_admin_strict = {
+ .lip_systemid = { .lsi_prio = 0xffff },
+ .lip_portid = { .lpi_prio = 0xffff },
.lip_state = 0,
-#endif
};
static const lacp_timer_func_t lacp_timer_funcs[LACP_NTIMER] = {
@@ -298,11 +308,16 @@ lacp_pdu_input(struct lacp_port *lp, struct mbuf *m)
goto bad;
}
- if (lacp_debug > 0) {
+ if (V_lacp_debug > 0) {
lacp_dprintf(lp, "lacpdu receive\n");
lacp_dump_lacpdu(du);
}
+ if ((1 << lp->lp_ifp->if_dunit) & lp->lp_lsc->lsc_debug.lsc_rx_test) {
+ LACP_TPRINTF((lp, "Dropping RX PDU\n"));
+ goto bad;
+ }
+
LACP_LOCK(lsc);
lacp_sm_rx(lp, du);
LACP_UNLOCK(lsc);
@@ -350,7 +365,7 @@ lacp_xmit_lacpdu(struct lacp_port *lp)
LACP_LOCK_ASSERT(lp->lp_lsc);
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL) {
return (ENOMEM);
}
@@ -378,7 +393,7 @@ lacp_xmit_lacpdu(struct lacp_port *lp)
sizeof(du->ldu_collector));
du->ldu_collector.lci_maxdelay = 0;
- if (lacp_debug > 0) {
+ if (V_lacp_debug > 0) {
lacp_dprintf(lp, "lacpdu transmit\n");
lacp_dump_lacpdu(du);
}
@@ -404,7 +419,7 @@ lacp_xmit_marker(struct lacp_port *lp)
LACP_LOCK_ASSERT(lp->lp_lsc);
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL) {
return (ENOMEM);
}
@@ -490,12 +505,14 @@ lacp_tick(void *arg)
if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0)
continue;
+ CURVNET_SET(lp->lp_ifp->if_vnet);
lacp_run_timers(lp);
lacp_select(lp);
lacp_sm_mux(lp);
lacp_sm_tx(lp);
lacp_sm_ptx_tx_schedule(lp);
+ CURVNET_RESTORE();
}
callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc);
}
@@ -512,20 +529,17 @@ lacp_port_create(struct lagg_port *lgp)
int error;
boolean_t active = TRUE; /* XXX should be configurable */
- boolean_t fast = FALSE; /* XXX should be configurable */
+ boolean_t fast = FALSE; /* Configurable via ioctl */
- bzero((char *)&sdl, sizeof(sdl));
- sdl.sdl_len = sizeof(sdl);
- sdl.sdl_family = AF_LINK;
- sdl.sdl_index = ifp->if_index;
- sdl.sdl_type = IFT_ETHER;
+ link_init_sdl(ifp, (struct sockaddr *)&sdl, IFT_ETHER);
sdl.sdl_alen = ETHER_ADDR_LEN;
bcopy(&ethermulticastaddr_slowprotocols,
LLADDR(&sdl), ETHER_ADDR_LEN);
error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma);
if (error) {
- printf("%s: ADDMULTI failed on %s\n", __func__, lgp->lp_ifname);
+ printf("%s: ADDMULTI failed on %s\n", __func__,
+ lgp->lp_ifp->if_xname);
return (error);
}
@@ -535,7 +549,7 @@ lacp_port_create(struct lagg_port *lgp)
return (ENOMEM);
LACP_LOCK(lsc);
- lgp->lp_psc = (caddr_t)lp;
+ lgp->lp_psc = lp;
lp->lp_ifp = ifp;
lp->lp_lagg = lgp;
lp->lp_lsc = lsc;
@@ -572,17 +586,18 @@ lacp_port_destroy(struct lagg_port *lgp)
lacp_disable_distributing(lp);
lacp_unselect(lp);
+ LIST_REMOVE(lp, lp_next);
+ LACP_UNLOCK(lsc);
+
/* The address may have already been removed by if_purgemaddrs() */
if (!lgp->lp_detaching)
if_delmulti_ifma(lp->lp_ifma);
- LIST_REMOVE(lp, lp_next);
- LACP_UNLOCK(lsc);
free(lp, M_DEVBUF);
}
void
-lacp_req(struct lagg_softc *sc, caddr_t data)
+lacp_req(struct lagg_softc *sc, void *data)
{
struct lacp_opreq *req = (struct lacp_opreq *)data;
struct lacp_softc *lsc = LACP_SOFTC(sc);
@@ -590,7 +605,7 @@ lacp_req(struct lagg_softc *sc, caddr_t data)
bzero(req, sizeof(struct lacp_opreq));
- /*
+ /*
* If the LACP softc is NULL, return with the opreq structure full of
* zeros. It is normal for the softc to be NULL while the lagg is
* being destroyed.
@@ -621,7 +636,7 @@ lacp_req(struct lagg_softc *sc, caddr_t data)
}
void
-lacp_portreq(struct lagg_port *lgp, caddr_t data)
+lacp_portreq(struct lagg_port *lgp, void *data)
{
struct lacp_opreq *req = (struct lacp_opreq *)data;
struct lacp_port *lp = LACP_PORT(lgp);
@@ -665,6 +680,7 @@ lacp_disable_distributing(struct lacp_port *lp)
{
struct lacp_aggregator *la = lp->lp_aggregator;
struct lacp_softc *lsc = lp->lp_lsc;
+ struct lagg_softc *sc = lsc->lsc_softc;
char buf[LACP_LAGIDSTR_MAX+1];
LACP_LOCK_ASSERT(lsc);
@@ -684,6 +700,7 @@ lacp_disable_distributing(struct lacp_port *lp)
TAILQ_REMOVE(&la->la_ports, lp, lp_dist_q);
la->la_nports--;
+ sc->sc_active = la->la_nports;
if (lsc->lsc_active_aggregator == la) {
lacp_suppress_distributing(lsc, la);
@@ -700,6 +717,7 @@ lacp_enable_distributing(struct lacp_port *lp)
{
struct lacp_aggregator *la = lp->lp_aggregator;
struct lacp_softc *lsc = lp->lp_lsc;
+ struct lagg_softc *sc = lsc->lsc_softc;
char buf[LACP_LAGIDSTR_MAX+1];
LACP_LOCK_ASSERT(lsc);
@@ -716,6 +734,7 @@ lacp_enable_distributing(struct lacp_port *lp)
KASSERT(la->la_refcnt > la->la_nports, ("aggregator refcnt invalid"));
TAILQ_INSERT_HEAD(&la->la_ports, lp, lp_dist_q);
la->la_nports++;
+ sc->sc_active = la->la_nports;
lp->lp_state |= LACP_STATE_DISTRIBUTING;
@@ -734,26 +753,26 @@ lacp_transit_expire(void *vp)
LACP_LOCK_ASSERT(lsc);
+ CURVNET_SET(lsc->lsc_softc->sc_ifp->if_vnet);
LACP_TRACE(NULL);
+ CURVNET_RESTORE();
lsc->lsc_suppress_distributing = FALSE;
}
-int
+void
lacp_attach(struct lagg_softc *sc)
{
struct lacp_softc *lsc;
- lsc = malloc(sizeof(struct lacp_softc),
- M_DEVBUF, M_NOWAIT|M_ZERO);
- if (lsc == NULL)
- return (ENOMEM);
+ lsc = malloc(sizeof(struct lacp_softc), M_DEVBUF, M_WAITOK | M_ZERO);
- sc->sc_psc = (caddr_t)lsc;
+ sc->sc_psc = lsc;
lsc->lsc_softc = sc;
- lsc->lsc_hashkey = arc4random();
+ lsc->lsc_hashkey = m_ether_tcpip_hash_init();
lsc->lsc_active_aggregator = NULL;
+ lsc->lsc_strict_mode = VNET(lacp_default_strict_mode);
LACP_LOCK_INIT(lsc);
TAILQ_INIT(&lsc->lsc_aggregators);
LIST_INIT(&lsc->lsc_ports);
@@ -764,27 +783,23 @@ lacp_attach(struct lagg_softc *sc)
/* if the lagg is already up then do the same */
if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
lacp_init(sc);
-
- return (0);
}
-int
-lacp_detach(struct lagg_softc *sc)
+void
+lacp_detach(void *psc)
{
- struct lacp_softc *lsc = LACP_SOFTC(sc);
+ struct lacp_softc *lsc = (struct lacp_softc *)psc;
KASSERT(TAILQ_EMPTY(&lsc->lsc_aggregators),
("aggregators still active"));
KASSERT(lsc->lsc_active_aggregator == NULL,
("aggregator still attached"));
- sc->sc_psc = NULL;
callout_drain(&lsc->lsc_transit_callout);
callout_drain(&lsc->lsc_callout);
LACP_LOCK_DESTROY(lsc);
free(lsc, M_DEVBUF);
- return (0);
}
void
@@ -827,10 +842,11 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
return (NULL);
}
- if (sc->use_flowid && (m->m_flags & M_FLOWID))
- hash = m->m_pkthdr.flowid;
+ if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
+ M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
+ hash = m->m_pkthdr.flowid >> sc->flowid_shift;
else
- hash = lagg_hashmbuf(sc, m, lsc->lsc_hashkey);
+ hash = m_ether_tcpip_hash(sc->sc_flags, m, lsc->lsc_hashkey);
hash %= pm->pm_count;
lp = pm->pm_map[hash];
@@ -920,7 +936,6 @@ lacp_aggregator_bandwidth(struct lacp_aggregator *la)
static void
lacp_select_active_aggregator(struct lacp_softc *lsc)
{
- struct lagg_softc *sc = lsc->lsc_softc;
struct lacp_aggregator *la;
struct lacp_aggregator *best_la = NULL;
uint64_t best_speed = 0;
@@ -940,13 +955,13 @@ lacp_select_active_aggregator(struct lacp_softc *lsc)
lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
speed, la->la_nports));
- /* This aggregator is chosen if
- * the partner has a better system priority
- * or, the total aggregated speed is higher
- * or, it is already the chosen aggregator
+ /*
+ * This aggregator is chosen if the partner has a better
+ * system priority or, the total aggregated speed is higher
+ * or, it is already the chosen aggregator
*/
if ((best_la != NULL && LACP_SYS_PRI(la->la_partner) <
- LACP_SYS_PRI(best_la->la_partner)) ||
+ LACP_SYS_PRI(best_la->la_partner)) ||
speed > best_speed ||
(speed == best_speed &&
la == lsc->lsc_active_aggregator)) {
@@ -972,7 +987,6 @@ lacp_select_active_aggregator(struct lacp_softc *lsc)
lacp_format_lagid_aggregator(best_la, buf, sizeof(buf))));
if (lsc->lsc_active_aggregator != best_la) {
- sc->sc_ifp->if_baudrate = best_speed;
lsc->lsc_active_aggregator = best_la;
lacp_update_portmap(lsc);
if (best_la) {
@@ -988,15 +1002,18 @@ lacp_select_active_aggregator(struct lacp_softc *lsc)
static void
lacp_update_portmap(struct lacp_softc *lsc)
{
+ struct lagg_softc *sc = lsc->lsc_softc;
struct lacp_aggregator *la;
struct lacp_portmap *p;
struct lacp_port *lp;
+ uint64_t speed;
u_int newmap;
int i;
newmap = lsc->lsc_activemap == 0 ? 1 : 0;
p = &lsc->lsc_pmap[newmap];
la = lsc->lsc_active_aggregator;
+ speed = 0;
bzero(p, sizeof(struct lacp_portmap));
if (la != NULL && la->la_nports > 0) {
@@ -1005,7 +1022,9 @@ lacp_update_portmap(struct lacp_softc *lsc)
TAILQ_FOREACH(lp, &la->la_ports, lp_dist_q)
p->pm_map[i++] = lp;
KASSERT(i == p->pm_count, ("Invalid port count"));
+ speed = lacp_aggregator_bandwidth(la);
}
+ sc->sc_ifp->if_baudrate = speed;
/* switch the active portmap over */
atomic_store_rel_int(&lsc->lsc_activemap, newmap);
@@ -1054,12 +1073,16 @@ lacp_compose_key(struct lacp_port *lp)
case IFM_100_T4:
case IFM_100_VG:
case IFM_100_T2:
+ case IFM_100_T:
key = IFM_100_TX;
break;
case IFM_1000_SX:
case IFM_1000_LX:
case IFM_1000_CX:
case IFM_1000_T:
+ case IFM_1000_KX:
+ case IFM_1000_SGMII:
+ case IFM_1000_CX_SGMII:
key = IFM_1000_SX;
break;
case IFM_10G_LR:
@@ -1069,15 +1092,53 @@ lacp_compose_key(struct lacp_port *lp)
case IFM_10G_TWINAX_LONG:
case IFM_10G_LRM:
case IFM_10G_T:
+ case IFM_10G_KX4:
+ case IFM_10G_KR:
+ case IFM_10G_CR1:
+ case IFM_10G_ER:
+ case IFM_10G_SFI:
key = IFM_10G_LR;
break;
+ case IFM_20G_KR2:
+ key = IFM_20G_KR2;
+ break;
+ case IFM_2500_KX:
+ case IFM_2500_T:
+ key = IFM_2500_KX;
+ break;
+ case IFM_5000_T:
+ key = IFM_5000_T;
+ break;
+ case IFM_50G_PCIE:
+ case IFM_50G_CR2:
+ case IFM_50G_KR2:
+ key = IFM_50G_PCIE;
+ break;
+ case IFM_56G_R4:
+ key = IFM_56G_R4;
+ break;
+ case IFM_25G_PCIE:
+ case IFM_25G_CR:
+ case IFM_25G_KR:
+ case IFM_25G_SR:
+ key = IFM_25G_PCIE;
+ break;
case IFM_40G_CR4:
case IFM_40G_SR4:
case IFM_40G_LR4:
+ case IFM_40G_XLPPI:
+ case IFM_40G_KR4:
key = IFM_40G_CR4;
break;
+ case IFM_100G_CR4:
+ case IFM_100G_SR4:
+ case IFM_100G_KR4:
+ case IFM_100G_LR4:
+ key = IFM_100G_CR4;
+ break;
default:
key = subtype;
+ break;
}
/* bit 5..14: (some bits of) if_index of lagg device */
key |= 0x7fe0 & ((sc->sc_ifp->if_index) << 5);
@@ -1313,6 +1374,8 @@ lacp_unselect(struct lacp_port *lp)
static void
lacp_sm_mux(struct lacp_port *lp)
{
+ struct lagg_port *lgp = lp->lp_lagg;
+ struct lagg_softc *sc = lgp->lp_softc;
enum lacp_mux_state new_state;
boolean_t p_sync =
(lp->lp_partner.lip_state & LACP_STATE_SYNC) != 0;
@@ -1321,8 +1384,10 @@ lacp_sm_mux(struct lacp_port *lp)
enum lacp_selected selected = lp->lp_selected;
struct lacp_aggregator *la;
- if (lacp_debug > 1)
- lacp_dprintf(lp, "%s: state %d\n", __func__, lp->lp_mux_state);
+ if (V_lacp_debug > 1)
+ lacp_dprintf(lp, "%s: state= 0x%x, selected= 0x%x, "
+ "p_sync= 0x%x, p_collecting= 0x%x\n", __func__,
+ lp->lp_mux_state, selected, p_sync, p_collecting);
re_eval:
la = lp->lp_aggregator;
@@ -1362,6 +1427,8 @@ re_eval:
case LACP_MUX_DISTRIBUTING:
if (selected != LACP_SELECTED || !p_sync || !p_collecting) {
new_state = LACP_MUX_COLLECTING;
+ lacp_dprintf(lp, "Interface stopped DISTRIBUTING, possible flapping\n");
+ sc->sc_flapping++;
}
break;
default:
@@ -1610,6 +1677,10 @@ lacp_sm_rx_record_pdu(struct lacp_port *lp, const struct lacpdu *du)
sizeof(buf))));
}
+ /* XXX Hack, still need to implement 5.4.9 para 2,3,4 */
+ if (lp->lp_lsc->lsc_strict_mode)
+ lp->lp_partner.lip_state |= LACP_STATE_SYNC;
+
lacp_sm_ptx_update_timeout(lp, oldpstate);
}
@@ -1635,7 +1706,10 @@ lacp_sm_rx_record_default(struct lacp_port *lp)
LACP_TRACE(lp);
oldpstate = lp->lp_partner.lip_state;
- lp->lp_partner = lacp_partner_admin;
+ if (lp->lp_lsc->lsc_strict_mode)
+ lp->lp_partner = lacp_partner_admin_strict;
+ else
+ lp->lp_partner = lacp_partner_admin_optimistic;
lp->lp_state |= LACP_STATE_DEFAULTED;
lacp_sm_ptx_update_timeout(lp, oldpstate);
}
@@ -1670,7 +1744,12 @@ lacp_sm_rx_update_default_selected(struct lacp_port *lp)
LACP_TRACE(lp);
- lacp_sm_rx_update_selected_from_peerinfo(lp, &lacp_partner_admin);
+ if (lp->lp_lsc->lsc_strict_mode)
+ lacp_sm_rx_update_selected_from_peerinfo(lp,
+ &lacp_partner_admin_strict);
+ else
+ lacp_sm_rx_update_selected_from_peerinfo(lp,
+ &lacp_partner_admin_optimistic);
}
/* transmit machine */
@@ -1678,7 +1757,7 @@ lacp_sm_rx_update_default_selected(struct lacp_port *lp)
static void
lacp_sm_tx(struct lacp_port *lp)
{
- int error;
+ int error = 0;
if (!(lp->lp_state & LACP_STATE_AGGREGATION)
#if 1
@@ -1700,7 +1779,11 @@ lacp_sm_tx(struct lacp_port *lp)
return;
}
- error = lacp_xmit_lacpdu(lp);
+ if (((1 << lp->lp_ifp->if_dunit) & lp->lp_lsc->lsc_debug.lsc_tx_test) == 0) {
+ error = lacp_xmit_lacpdu(lp);
+ } else {
+ LACP_TPRINTF((lp, "Dropping TX PDU\n"));
+ }
if (error == 0) {
lp->lp_flags &= ~LACP_PORT_NTT;
diff --git a/freebsd/sys/net/ieee8023ad_lacp.h b/freebsd/sys/net/ieee8023ad_lacp.h
index 9cebc591..8f0f51a7 100644
--- a/freebsd/sys/net/ieee8023ad_lacp.h
+++ b/freebsd/sys/net/ieee8023ad_lacp.h
@@ -75,6 +75,7 @@
"\007DEFAULTED" \
"\010EXPIRED"
+#ifdef _KERNEL
/*
* IEEE802.3 slow protocols
*
@@ -245,6 +246,12 @@ struct lacp_softc {
struct lacp_portmap lsc_pmap[2];
volatile u_int lsc_activemap;
u_int32_t lsc_hashkey;
+ struct {
+ u_int32_t lsc_rx_test;
+ u_int32_t lsc_tx_test;
+ } lsc_debug;
+ u_int32_t lsc_strict_mode;
+ boolean_t lsc_fast_timeout; /* if set, fast timeout */
};
#define LACP_TYPE_ACTORINFO 1
@@ -277,15 +284,15 @@ struct lacp_softc {
struct mbuf *lacp_input(struct lagg_port *, struct mbuf *);
struct lagg_port *lacp_select_tx_port(struct lagg_softc *, struct mbuf *);
-int lacp_attach(struct lagg_softc *);
-int lacp_detach(struct lagg_softc *);
+void lacp_attach(struct lagg_softc *);
+void lacp_detach(void *);
void lacp_init(struct lagg_softc *);
void lacp_stop(struct lagg_softc *);
int lacp_port_create(struct lagg_port *);
void lacp_port_destroy(struct lagg_port *);
void lacp_linkstate(struct lagg_port *);
-void lacp_req(struct lagg_softc *, caddr_t);
-void lacp_portreq(struct lagg_port *, caddr_t);
+void lacp_req(struct lagg_softc *, void *);
+void lacp_portreq(struct lagg_port *, void *);
static __inline int
lacp_isactive(struct lagg_port *lgp)
@@ -331,3 +338,4 @@ lacp_isdistributing(struct lagg_port *lgp)
#define LACP_LAGIDSTR_MAX \
(1 + LACP_PARTNERSTR_MAX + 1 + LACP_PARTNERSTR_MAX + 1)
#define LACP_STATESTR_MAX (255) /* XXX */
+#endif /* _KERNEL */
diff --git a/freebsd/sys/net/if.c b/freebsd/sys/net/if.c
index 2c638a37..8bfa9e21 100644
--- a/freebsd/sys/net/if.c
+++ b/freebsd/sys/net/if.c
@@ -65,12 +65,16 @@
#include <machine/stdarg.h>
#include <vm/uma.h>
+#include <net/bpf.h>
+#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <net/if_clone.h>
#include <net/if_dl.h>
#include <net/if_types.h>
#include <net/if_var.h>
+#include <net/if_media.h>
+#include <net/if_vlan_var.h>
#include <net/radix.h>
#include <net/route.h>
#include <net/vnet.h>
@@ -97,14 +101,9 @@
#include <compat/freebsd32/freebsd32.h>
#endif
-struct ifindex_entry {
- struct ifnet *ife_ifnet;
-};
-
SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
-TUNABLE_INT("net.link.ifqmaxlen", &ifqmaxlen);
SYSCTL_INT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN,
&ifqmaxlen, 0, "max send queue size");
@@ -115,6 +114,13 @@ SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW,
&log_link_state_change, 0,
"log interface link state change events");
+/* Log promiscuous mode change events */
+static int log_promisc_mode_change = 1;
+
+SYSCTL_INT(_net_link, OID_AUTO, log_promisc_mode_change, CTLFLAG_RDTUN,
+ &log_promisc_mode_change, 1,
+ "log promiscuous mode change events");
+
/* Interface description */
static unsigned int ifdescr_maxlen = 1024;
SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW,
@@ -132,18 +138,22 @@ void (*ng_ether_link_state_p)(struct ifnet *ifp, int state);
void (*lagg_linkstate_p)(struct ifnet *ifp, int state);
/* These are external hooks for CARP. */
void (*carp_linkstate_p)(struct ifnet *ifp);
+void (*carp_demote_adj_p)(int, char *);
+int (*carp_master_p)(struct ifaddr *);
#if defined(INET) || defined(INET6)
-struct ifnet *(*carp_forus_p)(struct ifnet *ifp, u_char *dhost);
+int (*carp_forus_p)(struct ifnet *ifp, u_char *dhost);
int (*carp_output_p)(struct ifnet *ifp, struct mbuf *m,
- struct sockaddr *sa, struct rtentry *rt);
+ const struct sockaddr *sa);
+int (*carp_ioctl_p)(struct ifreq *, u_long, struct thread *);
+int (*carp_attach_p)(struct ifaddr *, int);
+void (*carp_detach_p)(struct ifaddr *);
#endif
#ifdef INET
-int (*carp_iamatch_p)(struct ifnet *, struct in_ifaddr *, struct in_addr *,
- u_int8_t **);
+int (*carp_iamatch_p)(struct ifaddr *, uint8_t **);
#endif
#ifdef INET6
struct ifaddr *(*carp_iamatch6_p)(struct ifnet *ifp, struct in6_addr *taddr6);
-caddr_t (*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m,
+caddr_t (*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m,
const struct in6_addr *taddr);
#endif
@@ -158,23 +168,25 @@ static void if_attachdomain(void *);
static void if_attachdomain1(struct ifnet *);
static int ifconf(u_long, caddr_t);
static void if_freemulti(struct ifmultiaddr *);
-static void if_init(void *);
static void if_grow(void);
static void if_input_default(struct ifnet *, struct mbuf *);
+static int if_requestencap_default(struct ifnet *, struct if_encap_req *);
static void if_route(struct ifnet *, int flag, int fam);
static int if_setflag(struct ifnet *, int, int, int *, int);
static int if_transmit(struct ifnet *ifp, struct mbuf *m);
static void if_unroute(struct ifnet *, int flag, int fam);
static void link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
-static int if_rtdel(struct radix_node *, void *);
static int ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
static int if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int);
static void do_link_state_change(void *, int);
static int if_getgroup(struct ifgroupreq *, struct ifnet *);
static int if_getgroupmembers(struct ifgroupreq *);
static void if_delgroups(struct ifnet *);
-static void if_attach_internal(struct ifnet *, int);
-static void if_detach_internal(struct ifnet *, int);
+static void if_attach_internal(struct ifnet *, int, struct if_clone *);
+static int if_detach_internal(struct ifnet *, int, struct if_clone **);
+#ifdef VIMAGE
+static void if_vmove(struct ifnet *, struct vnet *);
+#endif
#ifdef INET6
/*
@@ -184,6 +196,10 @@ static void if_detach_internal(struct ifnet *, int);
extern void nd6_setmtu(struct ifnet *);
#endif
+/* ipsec helper hooks */
+VNET_DEFINE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]);
+VNET_DEFINE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]);
+
VNET_DEFINE(int, if_index);
int ifqmaxlen = IFQ_MAXLEN;
VNET_DEFINE(struct ifnethead, ifnet); /* depend on static init XXX */
@@ -192,7 +208,7 @@ VNET_DEFINE(struct ifgrouphead, ifg_head);
static VNET_DEFINE(int, if_indexlim) = 8;
/* Table of ifnet by index. */
-VNET_DEFINE(struct ifindex_entry *, ifindex_table);
+VNET_DEFINE(struct ifnet **, ifindex_table);
#define V_if_indexlim VNET(if_indexlim)
#define V_ifindex_table VNET(ifindex_table)
@@ -207,7 +223,9 @@ VNET_DEFINE(struct ifindex_entry *, ifindex_table);
* inversions and deadlocks.
*/
struct rwlock ifnet_rwlock;
+RW_SYSINIT_FLAGS(ifnet_rw, &ifnet_rwlock, "ifnet_rw", RW_RECURSE);
struct sx ifnet_sxlock;
+SX_SYSINIT_FLAGS(ifnet_sx, &ifnet_sxlock, "ifnet_sx", SX_RECURSE);
/*
* The allocation of network interfaces is a rather non-atomic affair; we
@@ -229,9 +247,9 @@ ifnet_byindex_locked(u_short idx)
if (idx > V_if_index)
return (NULL);
- if (V_ifindex_table[idx].ife_ifnet == IFNET_HOLD)
+ if (V_ifindex_table[idx] == IFNET_HOLD)
return (NULL);
- return (V_ifindex_table[idx].ife_ifnet);
+ return (V_ifindex_table[idx]);
}
struct ifnet *
@@ -265,34 +283,30 @@ ifnet_byindex_ref(u_short idx)
* Allocate an ifindex array entry; return 0 on success or an error on
* failure.
*/
-static int
-ifindex_alloc_locked(u_short *idxp)
+static u_short
+ifindex_alloc(void)
{
u_short idx;
IFNET_WLOCK_ASSERT();
-
retry:
/*
* Try to find an empty slot below V_if_index. If we fail, take the
* next slot.
*/
for (idx = 1; idx <= V_if_index; idx++) {
- if (V_ifindex_table[idx].ife_ifnet == NULL)
+ if (V_ifindex_table[idx] == NULL)
break;
}
/* Catch if_index overflow. */
- if (idx < 1)
- return (ENOSPC);
if (idx >= V_if_indexlim) {
if_grow();
goto retry;
}
if (idx > V_if_index)
V_if_index = idx;
- *idxp = idx;
- return (0);
+ return (idx);
}
static void
@@ -301,9 +315,9 @@ ifindex_free_locked(u_short idx)
IFNET_WLOCK_ASSERT();
- V_ifindex_table[idx].ife_ifnet = NULL;
+ V_ifindex_table[idx] = NULL;
while (V_if_index > 0 &&
- V_ifindex_table[V_if_index].ife_ifnet == NULL)
+ V_ifindex_table[V_if_index] == NULL)
V_if_index--;
}
@@ -322,7 +336,7 @@ ifnet_setbyindex_locked(u_short idx, struct ifnet *ifp)
IFNET_WLOCK_ASSERT();
- V_ifindex_table[idx].ife_ifnet = ifp;
+ V_ifindex_table[idx] = ifp;
}
static void
@@ -337,11 +351,12 @@ ifnet_setbyindex(u_short idx, struct ifnet *ifp)
struct ifaddr *
ifaddr_byindex(u_short idx)
{
- struct ifaddr *ifa;
+ struct ifnet *ifp;
+ struct ifaddr *ifa = NULL;
IFNET_RLOCK_NOSLEEP();
- ifa = ifnet_byindex_locked(idx)->if_addr;
- if (ifa != NULL)
+ ifp = ifnet_byindex_locked(idx);
+ if (ifp != NULL && (ifa = ifp->if_addr) != NULL)
ifa_ref(ifa);
IFNET_RUNLOCK_NOSLEEP();
return (ifa);
@@ -368,17 +383,6 @@ vnet_if_init(const void *unused __unused)
VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init,
NULL);
-/* ARGSUSED*/
-static void
-if_init(void *dummy __unused)
-{
-
- IFNET_LOCK_INIT();
- if_clone_init();
-}
-SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_FIRST, if_init, NULL);
-
-
#ifdef VIMAGE
static void
vnet_if_uninit(const void *unused __unused)
@@ -393,6 +397,20 @@ vnet_if_uninit(const void *unused __unused)
}
VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST,
vnet_if_uninit, NULL);
+
+static void
+vnet_if_return(const void *unused __unused)
+{
+ struct ifnet *ifp, *nifp;
+
+ /* Return all inherited interfaces to their parent vnets. */
+ TAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) {
+ if (ifp->if_home_vnet != ifp->if_vnet)
+ if_vmove(ifp, ifp->if_home_vnet);
+ }
+}
+VNET_SYSUNINIT(vnet_if_return, SI_SUB_VNET_DONE, SI_ORDER_ANY,
+ vnet_if_return, NULL);
#endif
static void
@@ -400,7 +418,7 @@ if_grow(void)
{
int oldlim;
u_int n;
- struct ifindex_entry *e;
+ struct ifnet **e;
IFNET_WLOCK_ASSERT();
oldlim = V_if_indexlim;
@@ -433,16 +451,15 @@ if_alloc(u_char type)
ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO);
IFNET_WLOCK();
- if (ifindex_alloc_locked(&idx) != 0) {
- IFNET_WUNLOCK();
- free(ifp, M_IFNET);
- return (NULL);
- }
+ idx = ifindex_alloc();
ifnet_setbyindex_locked(idx, IFNET_HOLD);
IFNET_WUNLOCK();
ifp->if_index = idx;
ifp->if_type = type;
ifp->if_alloctype = type;
+#ifdef VIMAGE
+ ifp->if_vnet = curvnet;
+#endif
if (if_com_alloc[type] != NULL) {
ifp->if_l2com = if_com_alloc[type](type, ifp);
if (ifp->if_l2com == NULL) {
@@ -457,7 +474,6 @@ if_alloc(u_char type)
ifp->if_afdata_initialized = 0;
IF_AFDATA_LOCK_INIT(ifp);
TAILQ_INIT(&ifp->if_addrhead);
- TAILQ_INIT(&ifp->if_prefixhead);
TAILQ_INIT(&ifp->if_multiaddrs);
TAILQ_INIT(&ifp->if_groups);
#ifdef MAC
@@ -466,6 +482,9 @@ if_alloc(u_char type)
ifq_init(&ifp->if_snd, ifp);
refcount_init(&ifp->if_refcount, 1); /* Index reference. */
+ for (int i = 0; i < IFCOUNTERS; i++)
+ ifp->if_counters[i] = counter_u64_alloc(M_WAITOK);
+ ifp->if_get_counter = if_get_counter_default;
ifnet_setbyindex(ifp->if_index, ifp);
return (ifp);
}
@@ -494,23 +513,20 @@ if_free_internal(struct ifnet *ifp)
IF_AFDATA_DESTROY(ifp);
IF_ADDR_LOCK_DESTROY(ifp);
ifq_delete(&ifp->if_snd);
+
+ for (int i = 0; i < IFCOUNTERS; i++)
+ counter_u64_free(ifp->if_counters[i]);
+
free(ifp, M_IFNET);
}
/*
- * This version should only be called by intefaces that switch their type
- * after calling if_alloc(). if_free_type() will go away again now that we
- * have if_alloctype to cache the original allocation type. For now, assert
- * that they match, since we require that in practice.
+ * Deregister an interface and free the associated storage.
*/
void
-if_free_type(struct ifnet *ifp, u_char type)
+if_free(struct ifnet *ifp)
{
- KASSERT(ifp->if_alloctype == type,
- ("if_free_type: type (%d) != alloctype (%d)", type,
- ifp->if_alloctype));
-
ifp->if_flags |= IFF_DYING; /* XXX: Locking */
CURVNET_SET_QUIET(ifp->if_vnet);
@@ -527,18 +543,6 @@ if_free_type(struct ifnet *ifp, u_char type)
}
/*
- * This is the normal version of if_free(), used by device drivers to free a
- * detached network interface. The contents of if_free_type() will move into
- * here when if_free_type() goes away.
- */
-void
-if_free(struct ifnet *ifp)
-{
-
- if_free_type(ifp, ifp->if_alloctype);
-}
-
-/*
* Interfaces to keep an ifnet type-stable despite the possibility of the
* driver calling if_free(). If there are additional references, we defer
* freeing the underlying data structure.
@@ -583,12 +587,21 @@ ifq_delete(struct ifaltq *ifq)
}
/*
- * Perform generic interface initalization tasks and attach the interface
+ * Perform generic interface initialization tasks and attach the interface
* to the list of "active" interfaces. If vmove flag is set on entry
* to if_attach_internal(), perform only a limited subset of initialization
* tasks, given that we are moving from one vnet to another an ifnet which
* has already been fully initialized.
*
+ * Note that if_detach_internal() removes group membership unconditionally
+ * even when vmove flag is set, and if_attach_internal() adds only IFG_ALL.
+ * Thus, when if_vmove() is applied to a cloned interface, group membership
+ * is lost while a cloned one always joins a group whose name is
+ * ifc->ifc_name. To recover this after if_detach_internal() and
+ * if_attach_internal(), the cloner should be specified to
+ * if_attach_internal() via ifc. If it is non-NULL, if_attach_internal()
+ * attempts to join a group whose name is ifc->ifc_name.
+ *
* XXX:
* - The decision to return void and thus require this function to
* succeed is questionable.
@@ -599,14 +612,14 @@ void
if_attach(struct ifnet *ifp)
{
- if_attach_internal(ifp, 0);
+ if_attach_internal(ifp, 0, NULL);
}
/*
* Compute the least common TSO limit.
*/
void
-if_hw_tsomax_common(struct ifnet *ifp, struct ifnet_hw_tsomax *pmax)
+if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *pmax)
{
/*
* 1) If there is no limit currently, take the limit from
@@ -635,7 +648,7 @@ if_hw_tsomax_common(struct ifnet *ifp, struct ifnet_hw_tsomax *pmax)
* Returns zero if no change. Else non-zero.
*/
int
-if_hw_tsomax_update(struct ifnet *ifp, struct ifnet_hw_tsomax *pmax)
+if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *pmax)
{
int retval = 0;
if (ifp->if_hw_tsomax != pmax->tsomaxbytes) {
@@ -654,7 +667,7 @@ if_hw_tsomax_update(struct ifnet *ifp, struct ifnet_hw_tsomax *pmax)
}
static void
-if_attach_internal(struct ifnet *ifp, int vmove)
+if_attach_internal(struct ifnet *ifp, int vmove, struct if_clone *ifc)
{
unsigned socksize, ifasize;
int namelen, masklen;
@@ -673,9 +686,12 @@ if_attach_internal(struct ifnet *ifp, int vmove)
if_addgroup(ifp, IFG_ALL);
+ /* Restore group membership for cloned interfaces. */
+ if (vmove && ifc != NULL)
+ if_clone_addgroup(ifp, ifc);
+
getmicrotime(&ifp->if_lastchange);
- ifp->if_data.ifi_epoch = time_uptime;
- ifp->if_data.ifi_datalen = sizeof(struct if_data);
+ ifp->if_epoch = time_uptime;
KASSERT((ifp->if_transmit == NULL && ifp->if_qflush == NULL) ||
(ifp->if_transmit != NULL && ifp->if_qflush != NULL),
@@ -687,6 +703,9 @@ if_attach_internal(struct ifnet *ifp, int vmove)
if (ifp->if_input == NULL)
ifp->if_input = if_input_default;
+ if (ifp->if_requestencap == NULL)
+ ifp->if_requestencap = if_requestencap_default;
+
if (!vmove) {
#ifdef MAC
mac_ifnet_create(ifp);
@@ -706,8 +725,7 @@ if_attach_internal(struct ifnet *ifp, int vmove)
socksize = sizeof(*sdl);
socksize = roundup2(socksize, sizeof(long));
ifasize = sizeof(*ifa) + 2 * socksize;
- ifa = malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
- ifa_init(ifa);
+ ifa = ifa_alloc(ifasize, M_WAITOK);
sdl = (struct sockaddr_dl *)(ifa + 1);
sdl->sdl_len = socksize;
sdl->sdl_family = AF_LINK;
@@ -792,12 +810,9 @@ static void
if_attachdomain(void *dummy)
{
struct ifnet *ifp;
- int s;
- s = splnet();
TAILQ_FOREACH(ifp, &V_ifnet, if_link)
if_attachdomain1(ifp);
- splx(s);
}
SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND,
if_attachdomain, NULL);
@@ -806,23 +821,16 @@ static void
if_attachdomain1(struct ifnet *ifp)
{
struct domain *dp;
- int s;
-
- s = splnet();
/*
* Since dp->dom_ifattach calls malloc() with M_WAITOK, we
* cannot lock ifp->if_afdata initialization, entirely.
*/
- if (IF_AFDATA_TRYLOCK(ifp) == 0) {
- splx(s);
- return;
- }
+ IF_AFDATA_LOCK(ifp);
if (ifp->if_afdata_initialized >= domain_init_status) {
IF_AFDATA_UNLOCK(ifp);
- splx(s);
- printf("if_attachdomain called more than once on %s\n",
- ifp->if_xname);
+ log(LOG_WARNING, "%s called more than once on %s\n",
+ __func__, ifp->if_xname);
return;
}
ifp->if_afdata_initialized = domain_init_status;
@@ -835,8 +843,6 @@ if_attachdomain1(struct ifnet *ifp)
ifp->if_afdata[dp->dom_family] =
(*dp->dom_ifattach)(ifp);
}
-
- splx(s);
}
/*
@@ -847,6 +853,7 @@ if_purgeaddrs(struct ifnet *ifp)
{
struct ifaddr *ifa, *next;
+ /* XXX cannot hold IF_ADDR_WLOCK over called functions. */
TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
if (ifa->ifa_addr->sa_family == AF_LINK)
continue;
@@ -871,7 +878,9 @@ if_purgeaddrs(struct ifnet *ifp)
continue;
}
#endif /* INET6 */
+ IF_ADDR_WLOCK(ifp);
TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
+ IF_ADDR_WUNLOCK(ifp);
ifa_free(ifa);
}
}
@@ -906,20 +915,34 @@ if_detach(struct ifnet *ifp)
{
CURVNET_SET_QUIET(ifp->if_vnet);
- if_detach_internal(ifp, 0);
+ if_detach_internal(ifp, 0, NULL);
CURVNET_RESTORE();
}
-static void
-if_detach_internal(struct ifnet *ifp, int vmove)
+/*
+ * The vmove flag, if set, indicates that we are called from a callpath
+ * that is moving an interface to a different vnet instance.
+ *
+ * The shutdown flag, if set, indicates that we are called in the
+ * process of shutting down a vnet instance. Currently only the
+ * vnet_if_return SYSUNINIT function sets it. Note: we can be called
+ * on a vnet instance shutdown without this flag being set, e.g., when
+ * the cloned interfaces are destoyed as first thing of teardown.
+ */
+static int
+if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp)
{
struct ifaddr *ifa;
- struct radix_node_head *rnh;
- int i, j;
+ int i;
struct domain *dp;
struct ifnet *iter;
int found = 0;
+#ifdef VIMAGE
+ int shutdown;
+ shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
+ ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
+#endif
IFNET_WLOCK();
TAILQ_FOREACH(iter, &V_ifnet, if_link)
if (iter == ifp) {
@@ -927,28 +950,77 @@ if_detach_internal(struct ifnet *ifp, int vmove)
found = 1;
break;
}
-#ifdef VIMAGE
- if (found)
- curvnet->vnet_ifcnt--;
-#endif
IFNET_WUNLOCK();
if (!found) {
+ /*
+ * While we would want to panic here, we cannot
+ * guarantee that the interface is indeed still on
+ * the list given we don't hold locks all the way.
+ */
+ return (ENOENT);
+#if 0
if (vmove)
panic("%s: ifp=%p not on the ifnet tailq %p",
__func__, ifp, &V_ifnet);
else
return; /* XXX this should panic as well? */
+#endif
}
/*
- * Remove/wait for pending events.
+ * At this point we know the interface still was on the ifnet list
+ * and we removed it so we are in a stable state.
*/
+#ifdef VIMAGE
+ curvnet->vnet_ifcnt--;
+#endif
+
+ /*
+ * In any case (destroy or vmove) detach us from the groups
+ * and remove/wait for pending events on the taskq.
+ * XXX-BZ in theory an interface could still enqueue a taskq change?
+ */
+ if_delgroups(ifp);
+
taskqueue_drain(taskqueue_swi, &ifp->if_linktask);
/*
- * Remove routes and flush queues.
+ * Check if this is a cloned interface or not. Must do even if
+ * shutting down as a if_vmove_reclaim() would move the ifp and
+ * the if_clone_addgroup() will have a corrupted string overwise
+ * from a gibberish pointer.
*/
+ if (vmove && ifcp != NULL)
+ *ifcp = if_clone_findifc(ifp);
+
if_down(ifp);
+
+#ifdef VIMAGE
+ /*
+ * On VNET shutdown abort here as the stack teardown will do all
+ * the work top-down for us.
+ */
+ if (shutdown) {
+ /*
+ * In case of a vmove we are done here without error.
+ * If we would signal an error it would lead to the same
+ * abort as if we did not find the ifnet anymore.
+ * if_detach() calls us in void context and does not care
+ * about an early abort notification, so life is splendid :)
+ */
+ goto finish_vnet_shutdown;
+ }
+#endif
+
+ /*
+ * At this point we are not tearing down a VNET and are either
+ * going to destroy or vmove the interface and have to cleanup
+ * accordingly.
+ */
+
+ /*
+ * Remove routes and flush queues.
+ */
#ifdef ALTQ
if (ALTQ_IS_ENABLED(&ifp->if_snd))
altq_disable(&ifp->if_snd);
@@ -973,6 +1045,12 @@ if_detach_internal(struct ifnet *ifp, int vmove)
#endif
if_purgemaddrs(ifp);
+ /* Announce that the interface is gone. */
+ rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
+ EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
+ if (IS_DEFAULT_VNET(curvnet))
+ devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
+
if (!vmove) {
/*
* Prevent further calls into the device driver via ifnet.
@@ -986,37 +1064,21 @@ if_detach_internal(struct ifnet *ifp, int vmove)
ifp->if_addr = NULL;
/* We can now free link ifaddr. */
+ IF_ADDR_WLOCK(ifp);
if (!TAILQ_EMPTY(&ifp->if_addrhead)) {
ifa = TAILQ_FIRST(&ifp->if_addrhead);
TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
+ IF_ADDR_WUNLOCK(ifp);
ifa_free(ifa);
- }
- }
-
- /*
- * Delete all remaining routes using this interface
- * Unfortuneatly the only way to do this is to slog through
- * the entire routing table looking for routes which point
- * to this interface...oh well...
- */
- for (i = 1; i <= AF_MAX; i++) {
- for (j = 0; j < rt_numfibs; j++) {
- rnh = rt_tables_get_rnh(j, i);
- if (rnh == NULL)
- continue;
- RADIX_NODE_HEAD_LOCK(rnh);
- (void) rnh->rnh_walktree(rnh, if_rtdel, ifp);
- RADIX_NODE_HEAD_UNLOCK(rnh);
- }
+ } else
+ IF_ADDR_WUNLOCK(ifp);
}
- /* Announce that the interface is gone. */
- rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
- EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
- if (IS_DEFAULT_VNET(curvnet))
- devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
- if_delgroups(ifp);
+ rt_flushifroutes(ifp);
+#ifdef VIMAGE
+finish_vnet_shutdown:
+#endif
/*
* We cannot hold the lock over dom_ifdetach calls as they might
* sleep, for example trying to drain a callout, thus open up the
@@ -1027,10 +1089,14 @@ if_detach_internal(struct ifnet *ifp, int vmove)
ifp->if_afdata_initialized = 0;
IF_AFDATA_UNLOCK(ifp);
for (dp = domains; i > 0 && dp; dp = dp->dom_next) {
- if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
+ if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) {
(*dp->dom_ifdetach)(ifp,
ifp->if_afdata[dp->dom_family]);
+ ifp->if_afdata[dp->dom_family] = NULL;
+ }
}
+
+ return (0);
}
#ifdef VIMAGE
@@ -1041,16 +1107,28 @@ if_detach_internal(struct ifnet *ifp, int vmove)
* unused if_index in target vnet and calls if_grow() if necessary,
* and finally find an unused if_xname for the target vnet.
*/
-void
+static void
if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
{
- u_short idx;
+ struct if_clone *ifc;
+ u_int bif_dlt, bif_hdrlen;
+ int rc;
+
+ /*
+ * if_detach_internal() will call the eventhandler to notify
+ * interface departure. That will detach if_bpf. We need to
+ * safe the dlt and hdrlen so we can re-attach it later.
+ */
+ bpf_get_bp_params(ifp->if_bpf, &bif_dlt, &bif_hdrlen);
/*
* Detach from current vnet, but preserve LLADDR info, do not
* mark as dead etc. so that the ifnet can be reattached later.
+ * If we cannot find it, we lost the race to someone else.
*/
- if_detach_internal(ifp, 1);
+ rc = if_detach_internal(ifp, 1, &ifc);
+ if (rc != 0)
+ return;
/*
* Unlink the ifnet from ifindex_table[] in current vnet, and shrink
@@ -1076,15 +1154,14 @@ if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
CURVNET_SET_QUIET(new_vnet);
IFNET_WLOCK();
- if (ifindex_alloc_locked(&idx) != 0) {
- IFNET_WUNLOCK();
- panic("if_index overflow");
- }
- ifp->if_index = idx;
+ ifp->if_index = ifindex_alloc();
ifnet_setbyindex_locked(ifp->if_index, ifp);
IFNET_WUNLOCK();
- if_attach_internal(ifp, 1);
+ if_attach_internal(ifp, 1, ifc);
+
+ if (ifp->if_bpf == NULL)
+ bpfattach(ifp, bif_dlt, bif_hdrlen);
CURVNET_RESTORE();
}
@@ -1097,6 +1174,7 @@ if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid)
{
struct prison *pr;
struct ifnet *difp;
+ int shutdown;
/* Try to find the prison within our visibility. */
sx_slock(&allprison_lock);
@@ -1117,12 +1195,22 @@ if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid)
/* XXX Lock interfaces to avoid races. */
CURVNET_SET_QUIET(pr->pr_vnet);
difp = ifunit(ifname);
- CURVNET_RESTORE();
if (difp != NULL) {
+ CURVNET_RESTORE();
prison_free(pr);
return (EEXIST);
}
+ /* Make sure the VNET is stable. */
+ shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
+ ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
+ if (shutdown) {
+ CURVNET_RESTORE();
+ prison_free(pr);
+ return (EBUSY);
+ }
+ CURVNET_RESTORE();
+
/* Move the interface into the child jail/vnet. */
if_vmove(ifp, pr->pr_vnet);
@@ -1139,6 +1227,7 @@ if_vmove_reclaim(struct thread *td, char *ifname, int jid)
struct prison *pr;
struct vnet *vnet_dst;
struct ifnet *ifp;
+ int shutdown;
/* Try to find the prison within our visibility. */
sx_slock(&allprison_lock);
@@ -1166,6 +1255,15 @@ if_vmove_reclaim(struct thread *td, char *ifname, int jid)
return (EEXIST);
}
+ /* Make sure the VNET is stable. */
+ shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
+ ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
+ if (shutdown) {
+ CURVNET_RESTORE();
+ prison_free(pr);
+ return (EBUSY);
+ }
+
/* Get interface back from child jail/vnet. */
if_vmove(ifp, vnet_dst);
CURVNET_RESTORE();
@@ -1187,6 +1285,7 @@ if_addgroup(struct ifnet *ifp, const char *groupname)
struct ifg_list *ifgl;
struct ifg_group *ifg = NULL;
struct ifg_member *ifgm;
+ int new = 0;
if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
groupname[strlen(groupname) - 1] <= '9')
@@ -1227,8 +1326,8 @@ if_addgroup(struct ifnet *ifp, const char *groupname)
strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
ifg->ifg_refcnt = 0;
TAILQ_INIT(&ifg->ifg_members);
- EVENTHANDLER_INVOKE(group_attach_event, ifg);
TAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next);
+ new = 1;
}
ifg->ifg_refcnt++;
@@ -1242,6 +1341,8 @@ if_addgroup(struct ifnet *ifp, const char *groupname)
IFNET_WUNLOCK();
+ if (new)
+ EVENTHANDLER_INVOKE(group_attach_event, ifg);
EVENTHANDLER_INVOKE(group_change_event, groupname);
return (0);
@@ -1280,10 +1381,11 @@ if_delgroup(struct ifnet *ifp, const char *groupname)
if (--ifgl->ifgl_group->ifg_refcnt == 0) {
TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
+ IFNET_WUNLOCK();
EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group);
free(ifgl->ifgl_group, M_TEMP);
- }
- IFNET_WUNLOCK();
+ } else
+ IFNET_WUNLOCK();
free(ifgl, M_TEMP);
@@ -1324,11 +1426,12 @@ if_delgroups(struct ifnet *ifp)
if (--ifgl->ifgl_group->ifg_refcnt == 0) {
TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
+ IFNET_WUNLOCK();
EVENTHANDLER_INVOKE(group_detach_event,
ifgl->ifgl_group);
free(ifgl->ifgl_group, M_TEMP);
- }
- IFNET_WUNLOCK();
+ } else
+ IFNET_WUNLOCK();
free(ifgl, M_TEMP);
@@ -1434,46 +1537,63 @@ if_getgroupmembers(struct ifgroupreq *data)
}
/*
- * Delete Routes for a Network Interface
- *
- * Called for each routing entry via the rnh->rnh_walktree() call above
- * to delete all route entries referencing a detaching network interface.
- *
- * Arguments:
- * rn pointer to node in the routing table
- * arg argument passed to rnh->rnh_walktree() - detaching interface
- *
- * Returns:
- * 0 successful
- * errno failed - reason indicated
- *
+ * Return counter values from counter(9)s stored in ifnet.
*/
-static int
-if_rtdel(struct radix_node *rn, void *arg)
+uint64_t
+if_get_counter_default(struct ifnet *ifp, ift_counter cnt)
{
- struct rtentry *rt = (struct rtentry *)rn;
- struct ifnet *ifp = arg;
- int err;
- if (rt->rt_ifp == ifp) {
+ KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
- /*
- * Protect (sorta) against walktree recursion problems
- * with cloned routes
- */
- if ((rt->rt_flags & RTF_UP) == 0)
- return (0);
+ return (counter_u64_fetch(ifp->if_counters[cnt]));
+}
- err = rtrequest_fib(RTM_DELETE, rt_key(rt), rt->rt_gateway,
- rt_mask(rt),
- rt->rt_flags|RTF_RNH_LOCKED|RTF_PINNED,
- (struct rtentry **) NULL, rt->rt_fibnum);
- if (err) {
- log(LOG_WARNING, "if_rtdel: error %d\n", err);
- }
- }
+/*
+ * Increase an ifnet counter. Usually used for counters shared
+ * between the stack and a driver, but function supports them all.
+ */
+void
+if_inc_counter(struct ifnet *ifp, ift_counter cnt, int64_t inc)
+{
- return (0);
+ KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
+
+ counter_u64_add(ifp->if_counters[cnt], inc);
+}
+
+/*
+ * Copy data from ifnet to userland API structure if_data.
+ */
+void
+if_data_copy(struct ifnet *ifp, struct if_data *ifd)
+{
+
+ ifd->ifi_type = ifp->if_type;
+ ifd->ifi_physical = 0;
+ ifd->ifi_addrlen = ifp->if_addrlen;
+ ifd->ifi_hdrlen = ifp->if_hdrlen;
+ ifd->ifi_link_state = ifp->if_link_state;
+ ifd->ifi_vhid = 0;
+ ifd->ifi_datalen = sizeof(struct if_data);
+ ifd->ifi_mtu = ifp->if_mtu;
+ ifd->ifi_metric = ifp->if_metric;
+ ifd->ifi_baudrate = ifp->if_baudrate;
+ ifd->ifi_hwassist = ifp->if_hwassist;
+ ifd->ifi_epoch = ifp->if_epoch;
+ ifd->ifi_lastchange = ifp->if_lastchange;
+
+ ifd->ifi_ipackets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS);
+ ifd->ifi_ierrors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS);
+ ifd->ifi_opackets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS);
+ ifd->ifi_oerrors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS);
+ ifd->ifi_collisions = ifp->if_get_counter(ifp, IFCOUNTER_COLLISIONS);
+ ifd->ifi_ibytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES);
+ ifd->ifi_obytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES);
+ ifd->ifi_imcasts = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS);
+ ifd->ifi_omcasts = ifp->if_get_counter(ifp, IFCOUNTER_OMCASTS);
+ ifd->ifi_iqdrops = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS);
+ ifd->ifi_oqdrops = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS);
+ ifd->ifi_noproto = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO);
}
/*
@@ -1497,28 +1617,56 @@ if_addr_runlock(struct ifnet *ifp)
}
void
-if_maddr_rlock(struct ifnet *ifp)
+if_maddr_rlock(if_t ifp)
{
- IF_ADDR_RLOCK(ifp);
+ IF_ADDR_RLOCK((struct ifnet *)ifp);
}
void
-if_maddr_runlock(struct ifnet *ifp)
+if_maddr_runlock(if_t ifp)
{
- IF_ADDR_RUNLOCK(ifp);
+ IF_ADDR_RUNLOCK((struct ifnet *)ifp);
}
/*
- * Reference count functions for ifaddrs.
+ * Initialization, destruction and refcounting functions for ifaddrs.
*/
-void
-ifa_init(struct ifaddr *ifa)
+struct ifaddr *
+ifa_alloc(size_t size, int flags)
{
+ struct ifaddr *ifa;
+
+ KASSERT(size >= sizeof(struct ifaddr),
+ ("%s: invalid size %zu", __func__, size));
+
+ ifa = malloc(size, M_IFADDR, M_ZERO | flags);
+ if (ifa == NULL)
+ return (NULL);
+
+ if ((ifa->ifa_opackets = counter_u64_alloc(flags)) == NULL)
+ goto fail;
+ if ((ifa->ifa_ipackets = counter_u64_alloc(flags)) == NULL)
+ goto fail;
+ if ((ifa->ifa_obytes = counter_u64_alloc(flags)) == NULL)
+ goto fail;
+ if ((ifa->ifa_ibytes = counter_u64_alloc(flags)) == NULL)
+ goto fail;
- mtx_init(&ifa->ifa_mtx, "ifaddr", NULL, MTX_DEF);
refcount_init(&ifa->ifa_refcnt, 1);
+
+ return (ifa);
+
+fail:
+ /* free(NULL) is okay */
+ counter_u64_free(ifa->ifa_opackets);
+ counter_u64_free(ifa->ifa_ipackets);
+ counter_u64_free(ifa->ifa_obytes);
+ counter_u64_free(ifa->ifa_ibytes);
+ free(ifa, M_IFADDR);
+
+ return (NULL);
}
void
@@ -1533,62 +1681,61 @@ ifa_free(struct ifaddr *ifa)
{
if (refcount_release(&ifa->ifa_refcnt)) {
- mtx_destroy(&ifa->ifa_mtx);
+ counter_u64_free(ifa->ifa_opackets);
+ counter_u64_free(ifa->ifa_ipackets);
+ counter_u64_free(ifa->ifa_obytes);
+ counter_u64_free(ifa->ifa_ibytes);
free(ifa, M_IFADDR);
}
}
-int
-ifa_add_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
+static int
+ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa,
+ struct sockaddr *ia)
{
- int error = 0;
- struct rtentry *rt = NULL;
+ int error;
struct rt_addrinfo info;
- static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
+ struct sockaddr_dl null_sdl;
+ struct ifnet *ifp;
+
+ ifp = ifa->ifa_ifp;
bzero(&info, sizeof(info));
- info.rti_ifp = V_loif;
+ if (cmd != RTM_DELETE)
+ info.rti_ifp = V_loif;
info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC;
info.rti_info[RTAX_DST] = ia;
info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl;
- error = rtrequest1_fib(RTM_ADD, &info, &rt, ifa->ifa_ifp->if_fib);
-
- if (error == 0 && rt != NULL) {
- RT_LOCK(rt);
- ((struct sockaddr_dl *)rt->rt_gateway)->sdl_type =
- ifa->ifa_ifp->if_type;
- ((struct sockaddr_dl *)rt->rt_gateway)->sdl_index =
- ifa->ifa_ifp->if_index;
- RT_REMREF(rt);
- RT_UNLOCK(rt);
- } else if (error != 0)
- log(LOG_INFO, "ifa_add_loopback_route: insertion failed\n");
+ link_init_sdl(ifp, (struct sockaddr *)&null_sdl, ifp->if_type);
+
+ error = rtrequest1_fib(cmd, &info, NULL, ifp->if_fib);
+
+ if (error != 0)
+ log(LOG_DEBUG, "%s: %s failed for interface %s: %u\n",
+ __func__, otype, if_name(ifp), error);
return (error);
}
int
+ifa_add_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
+{
+
+ return (ifa_maintain_loopback_route(RTM_ADD, "insertion", ifa, ia));
+}
+
+int
ifa_del_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
{
- int error = 0;
- struct rt_addrinfo info;
- struct sockaddr_dl null_sdl;
- bzero(&null_sdl, sizeof(null_sdl));
- null_sdl.sdl_len = sizeof(null_sdl);
- null_sdl.sdl_family = AF_LINK;
- null_sdl.sdl_type = ifa->ifa_ifp->if_type;
- null_sdl.sdl_index = ifa->ifa_ifp->if_index;
- bzero(&info, sizeof(info));
- info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC;
- info.rti_info[RTAX_DST] = ia;
- info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl;
- error = rtrequest1_fib(RTM_DELETE, &info, NULL, ifa->ifa_ifp->if_fib);
+ return (ifa_maintain_loopback_route(RTM_DELETE, "deletion", ifa, ia));
+}
- if (error != 0)
- log(LOG_INFO, "ifa_del_loopback_route: deletion failed\n");
+int
+ifa_switch_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
+{
- return (error);
+ return (ifa_maintain_loopback_route(RTM_CHANGE, "switch", ifa, ia));
}
/*
@@ -1597,22 +1744,19 @@ ifa_del_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
* to perform a different comparison.
*/
-#define sa_equal(a1, a2) \
- (bcmp((a1), (a2), ((a1))->sa_len) == 0)
-
#define sa_dl_equal(a1, a2) \
- ((((struct sockaddr_dl *)(a1))->sdl_len == \
- ((struct sockaddr_dl *)(a2))->sdl_len) && \
- (bcmp(LLADDR((struct sockaddr_dl *)(a1)), \
- LLADDR((struct sockaddr_dl *)(a2)), \
- ((struct sockaddr_dl *)(a1))->sdl_alen) == 0))
+ ((((const struct sockaddr_dl *)(a1))->sdl_len == \
+ ((const struct sockaddr_dl *)(a2))->sdl_len) && \
+ (bcmp(CLLADDR((const struct sockaddr_dl *)(a1)), \
+ CLLADDR((const struct sockaddr_dl *)(a2)), \
+ ((const struct sockaddr_dl *)(a1))->sdl_alen) == 0))
/*
* Locate an interface based on a complete address.
*/
/*ARGSUSED*/
static struct ifaddr *
-ifa_ifwithaddr_internal(struct sockaddr *addr, int getref)
+ifa_ifwithaddr_internal(const struct sockaddr *addr, int getref)
{
struct ifnet *ifp;
struct ifaddr *ifa;
@@ -1649,14 +1793,14 @@ done:
}
struct ifaddr *
-ifa_ifwithaddr(struct sockaddr *addr)
+ifa_ifwithaddr(const struct sockaddr *addr)
{
return (ifa_ifwithaddr_internal(addr, 1));
}
int
-ifa_ifwithaddr_check(struct sockaddr *addr)
+ifa_ifwithaddr_check(const struct sockaddr *addr)
{
return (ifa_ifwithaddr_internal(addr, 0) != NULL);
@@ -1667,13 +1811,15 @@ ifa_ifwithaddr_check(struct sockaddr *addr)
*/
/* ARGSUSED */
struct ifaddr *
-ifa_ifwithbroadaddr(struct sockaddr *addr)
+ifa_ifwithbroadaddr(const struct sockaddr *addr, int fibnum)
{
struct ifnet *ifp;
struct ifaddr *ifa;
IFNET_RLOCK_NOSLEEP();
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
+ continue;
IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != addr->sa_family)
@@ -1700,7 +1846,7 @@ done:
*/
/*ARGSUSED*/
struct ifaddr *
-ifa_ifwithdstaddr_fib(struct sockaddr *addr, int fibnum)
+ifa_ifwithdstaddr(const struct sockaddr *addr, int fibnum)
{
struct ifnet *ifp;
struct ifaddr *ifa;
@@ -1730,32 +1876,25 @@ done:
return (ifa);
}
-struct ifaddr *
-ifa_ifwithdstaddr(struct sockaddr *addr)
-{
-
- return (ifa_ifwithdstaddr_fib(addr, RT_ALL_FIBS));
-}
-
/*
* Find an interface on a specific network. If many, choice
* is most specific found.
*/
struct ifaddr *
-ifa_ifwithnet_fib(struct sockaddr *addr, int ignore_ptp, int fibnum)
+ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum)
{
struct ifnet *ifp;
struct ifaddr *ifa;
struct ifaddr *ifa_maybe = NULL;
u_int af = addr->sa_family;
- char *addr_data = addr->sa_data, *cplim;
+ const char *addr_data = addr->sa_data, *cplim;
/*
* AF_LINK addresses can be looked up directly by their index number,
* so do that if we can.
*/
if (af == AF_LINK) {
- struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
+ const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)addr;
if (sdl->sdl_index && sdl->sdl_index <= V_if_index)
return (ifaddr_byindex(sdl->sdl_index));
}
@@ -1772,7 +1911,7 @@ ifa_ifwithnet_fib(struct sockaddr *addr, int ignore_ptp, int fibnum)
continue;
IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- char *cp, *cp2, *cp3;
+ const char *cp, *cp2, *cp3;
if (ifa->ifa_addr->sa_family != af)
next: continue;
@@ -1794,19 +1933,6 @@ next: continue;
}
} else {
/*
- * if we have a special address handler,
- * then use it instead of the generic one.
- */
- if (ifa->ifa_claim_addr) {
- if ((*ifa->ifa_claim_addr)(ifa, addr)) {
- ifa_ref(ifa);
- IF_ADDR_RUNLOCK(ifp);
- goto done;
- }
- continue;
- }
-
- /*
* Scan all the bits in the ifa's address.
* If a bit dissagrees with what we are
* looking for, mask it with the netmask
@@ -1826,11 +1952,13 @@ next: continue;
/*
* If the netmask of what we just found
* is more specific than what we had before
- * (if we had one) then remember the new one
- * before continuing to search
- * for an even better one.
+ * (if we had one), or if the virtual status
+ * of new prefix is better than of the old one,
+ * then remember the new one before continuing
+ * to search for an even better one.
*/
if (ifa_maybe == NULL ||
+ ifa_preferred(ifa_maybe, ifa) ||
rn_refines((caddr_t)ifa->ifa_netmask,
(caddr_t)ifa_maybe->ifa_netmask)) {
if (ifa_maybe != NULL)
@@ -1851,22 +1979,15 @@ done:
return (ifa);
}
-struct ifaddr *
-ifa_ifwithnet(struct sockaddr *addr, int ignore_ptp)
-{
-
- return (ifa_ifwithnet_fib(addr, ignore_ptp, RT_ALL_FIBS));
-}
-
/*
* Find an interface address specific to an interface best matching
* a given address.
*/
struct ifaddr *
-ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
+ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp)
{
struct ifaddr *ifa;
- char *cp, *cp2, *cp3;
+ const char *cp, *cp2, *cp3;
char *cplim;
struct ifaddr *ifa_maybe = NULL;
u_int af = addr->sa_family;
@@ -1909,6 +2030,21 @@ done:
return (ifa);
}
+/*
+ * See whether new ifa is better than current one:
+ * 1) A non-virtual one is preferred over virtual.
+ * 2) A virtual in master state preferred over any other state.
+ *
+ * Used in several address selecting functions.
+ */
+int
+ifa_preferred(struct ifaddr *cur, struct ifaddr *next)
+{
+
+ return (cur->ifa_carp && (!next->ifa_carp ||
+ ((*carp_master_p)(next) && !(*carp_master_p)(cur))));
+}
+
#include <net/if_llatbl.h>
/*
@@ -1923,10 +2059,8 @@ link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
struct sockaddr *dst;
struct ifnet *ifp;
- RT_LOCK_ASSERT(rt);
-
- if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
- ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
+ if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == NULL) ||
+ ((ifp = ifa->ifa_ifp) == NULL) || ((dst = rt_key(rt)) == NULL))
return;
ifa = ifaof_ifpforaddr(dst, ifp);
if (ifa) {
@@ -1938,10 +2072,41 @@ link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
}
}
+struct sockaddr_dl *
+link_alloc_sdl(size_t size, int flags)
+{
+
+ return (malloc(size, M_TEMP, flags));
+}
+
+void
+link_free_sdl(struct sockaddr *sa)
+{
+ free(sa, M_TEMP);
+}
+
+/*
+ * Fills in given sdl with interface basic info.
+ * Returns pointer to filled sdl.
+ */
+struct sockaddr_dl *
+link_init_sdl(struct ifnet *ifp, struct sockaddr *paddr, u_char iftype)
+{
+ struct sockaddr_dl *sdl;
+
+ sdl = (struct sockaddr_dl *)paddr;
+ memset(sdl, 0, sizeof(struct sockaddr_dl));
+ sdl->sdl_len = sizeof(struct sockaddr_dl);
+ sdl->sdl_family = AF_LINK;
+ sdl->sdl_index = ifp->if_index;
+ sdl->sdl_type = iftype;
+
+ return (sdl);
+}
+
/*
* Mark an interface down and notify protocols of
* the transition.
- * NOTE: must be called at splnet or eqivalent.
*/
static void
if_unroute(struct ifnet *ifp, int flag, int fam)
@@ -1965,7 +2130,6 @@ if_unroute(struct ifnet *ifp, int flag, int fam)
/*
* Mark an interface up and notify protocols of
* the transition.
- * NOTE: must be called at splnet or eqivalent.
*/
static void
if_route(struct ifnet *ifp, int flag, int fam)
@@ -2026,7 +2190,7 @@ do_link_state_change(void *arg, int pending)
(*vlan_link_state_p)(ifp);
if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) &&
- IFP2AC(ifp)->ac_netgraph != NULL)
+ ifp->if_l2com != NULL)
(*ng_ether_link_state_p)(ifp, link_state);
if (ifp->if_carp)
(*carp_linkstate_p)(ifp);
@@ -2051,7 +2215,6 @@ do_link_state_change(void *arg, int pending)
/*
* Mark an interface down and notify protocols of
* the transition.
- * NOTE: must be called at splnet or eqivalent.
*/
void
if_down(struct ifnet *ifp)
@@ -2063,7 +2226,6 @@ if_down(struct ifnet *ifp)
/*
* Mark an interface up and notify protocols of
* the transition.
- * NOTE: must be called at splnet or eqivalent.
*/
void
if_up(struct ifnet *ifp)
@@ -2088,8 +2250,8 @@ if_qflush(struct ifnet *ifp)
ALTQ_PURGE(ifq);
#endif
n = ifq->ifq_head;
- while ((m = n) != 0) {
- n = m->m_act;
+ while ((m = n) != NULL) {
+ n = m->m_nextpkt;
m_freem(m);
}
ifq->ifq_head = 0;
@@ -2140,7 +2302,6 @@ static int
ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
{
struct ifreq *ifr;
- struct ifstat *ifs;
int error = 0;
int new_flags, temp_flags;
size_t namelen, onamelen;
@@ -2182,7 +2343,8 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
break;
case SIOCGIFPHYS:
- ifr->ifr_phys = ifp->if_physical;
+ /* XXXGL: did this ever worked? */
+ ifr->ifr_phys = 0;
break;
case SIOCGIFDESCR:
@@ -2262,18 +2424,12 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
*/
new_flags = (ifr->ifr_flags & 0xffff) |
(ifr->ifr_flagshigh << 16);
- if (ifp->if_flags & IFF_SMART) {
- /* Smart drivers twiddle their own routes */
- } else if (ifp->if_flags & IFF_UP &&
+ if (ifp->if_flags & IFF_UP &&
(new_flags & IFF_UP) == 0) {
- int s = splimp();
if_down(ifp);
- splx(s);
} else if (new_flags & IFF_UP &&
(ifp->if_flags & IFF_UP) == 0) {
- int s = splimp();
if_up(ifp);
- splx(s);
}
/* See if permanently promiscuous mode bit is about to flip */
if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) {
@@ -2281,9 +2437,11 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
ifp->if_flags |= IFF_PROMISC;
else if (ifp->if_pcount == 0)
ifp->if_flags &= ~IFF_PROMISC;
- log(LOG_INFO, "%s: permanently promiscuous mode %s\n",
- ifp->if_xname,
- (new_flags & IFF_PPROMISC) ? "enabled" : "disabled");
+ if (log_promisc_mode_change)
+ log(LOG_INFO, "%s: permanently promiscuous mode %s\n",
+ ifp->if_xname,
+ ((new_flags & IFF_PPROMISC) ?
+ "enabled" : "disabled"));
}
ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
(new_flags &~ IFF_CANTCHANGE);
@@ -2321,6 +2479,11 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
return (error);
if (new_name[0] == '\0')
return (EINVAL);
+ if (new_name[IFNAMSIZ-1] != '\0') {
+ new_name[IFNAMSIZ-1] = '\0';
+ if (strlen(new_name) == IFNAMSIZ-1)
+ return (EINVAL);
+ }
if (ifunit(new_name) != NULL)
return (EEXIST);
@@ -2339,9 +2502,9 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
log(LOG_INFO, "%s: changing name to '%s'\n",
ifp->if_xname, new_name);
+ IF_ADDR_WLOCK(ifp);
strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
ifa = ifp->if_addr;
- IFA_LOCK(ifa);
sdl = (struct sockaddr_dl *)ifa->ifa_addr;
namelen = strlen(new_name);
onamelen = sdl->sdl_nlen;
@@ -2360,7 +2523,7 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
bzero(sdl->sdl_data, onamelen);
while (namelen != 0)
sdl->sdl_data[--namelen] = 0xff;
- IFA_UNLOCK(ifa);
+ IF_ADDR_WUNLOCK(ifp);
EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
/* Announce the return of the interface. */
@@ -2420,6 +2583,7 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
#ifdef INET6
nd6_setmtu(ifp);
#endif
+ rt_updatemtu(ifp);
}
break;
}
@@ -2470,7 +2634,6 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
#ifdef INET6
case SIOCSIFPHYADDR_IN6:
#endif
- case SIOCSLIFPHYADDR:
case SIOCSIFMEDIA:
case SIOCSIFGENERIC:
error = priv_check(td, PRIV_NET_HWIOCTL);
@@ -2484,13 +2647,10 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
break;
case SIOCGIFSTATUS:
- ifs = (struct ifstat *)data;
- ifs->ascii[0] = '\0';
-
case SIOCGIFPSRCADDR:
case SIOCGIFPDSTADDR:
- case SIOCGLIFPHYADDR:
case SIOCGIFMEDIA:
+ case SIOCGIFXMEDIA:
case SIOCGIFGENERIC:
if (ifp->if_ioctl == NULL)
return (EOPNOTSUPP);
@@ -2503,7 +2663,6 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
return (error);
error = if_setlladdr(ifp,
ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
- EVENTHANDLER_INVOKE(iflladdr_event, ifp);
break;
case SIOCAIFGROUP:
@@ -2542,6 +2701,9 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
return (error);
}
+/* COMPAT_SVR4 */
+#define OSIOCGIFCONF _IOWR('i', 20, struct ifconf)
+
#ifdef COMPAT_FREEBSD32
struct ifconf32 {
int32_t ifc_len;
@@ -2563,11 +2725,25 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
struct ifreq *ifr;
int error;
int oif_flags;
+#ifdef VIMAGE
+ int shutdown;
+#endif
CURVNET_SET(so->so_vnet);
+#ifdef VIMAGE
+ /* Make sure the VNET is stable. */
+ shutdown = (so->so_vnet->vnet_state > SI_SUB_VNET &&
+ so->so_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
+ if (shutdown) {
+ CURVNET_RESTORE();
+ return (EBUSY);
+ }
+#endif
+
+
switch (cmd) {
case SIOCGIFCONF:
- case OSIOCGIFCONF:
+ case OSIOCGIFCONF: /* COMPAT_SVR4 */
error = ifconf(cmd, data);
CURVNET_RESTORE();
return (error);
@@ -2626,6 +2802,16 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
error = if_getgroupmembers((struct ifgroupreq *)data);
CURVNET_RESTORE();
return (error);
+#if defined(INET) || defined(INET6)
+ case SIOCSVH:
+ case SIOCGVH:
+ if (carp_ioctl_p == NULL)
+ error = EPROTONOSUPPORT;
+ else
+ error = (*carp_ioctl_p)(ifr, cmd, td);
+ CURVNET_RESTORE();
+ return (error);
+#endif
}
ifp = ifunit_ref(ifr->ifr_name);
@@ -2657,79 +2843,17 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
* layer, and do not perform any credentials checks or input
* validation.
*/
-#ifndef COMPAT_43
- error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
- data,
- ifp, td));
+ error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd, data,
+ ifp, td));
if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL &&
cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR &&
cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK)
error = (*ifp->if_ioctl)(ifp, cmd, data);
-#else
- {
- u_long ocmd = cmd;
-
- switch (cmd) {
-
- case SIOCSIFDSTADDR:
- case SIOCSIFADDR:
- case SIOCSIFBRDADDR:
- case SIOCSIFNETMASK:
-#if BYTE_ORDER != BIG_ENDIAN
- if (ifr->ifr_addr.sa_family == 0 &&
- ifr->ifr_addr.sa_len < 16) {
- ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
- ifr->ifr_addr.sa_len = 16;
- }
-#else
- if (ifr->ifr_addr.sa_len == 0)
- ifr->ifr_addr.sa_len = 16;
-#endif
- break;
-
- case OSIOCGIFADDR:
- cmd = SIOCGIFADDR;
- break;
-
- case OSIOCGIFDSTADDR:
- cmd = SIOCGIFDSTADDR;
- break;
-
- case OSIOCGIFBRDADDR:
- cmd = SIOCGIFBRDADDR;
- break;
-
- case OSIOCGIFNETMASK:
- cmd = SIOCGIFNETMASK;
- }
- error = ((*so->so_proto->pr_usrreqs->pru_control)(so,
- cmd,
- data,
- ifp, td));
- if (error == EOPNOTSUPP && ifp != NULL &&
- ifp->if_ioctl != NULL &&
- cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR &&
- cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK)
- error = (*ifp->if_ioctl)(ifp, cmd, data);
- switch (ocmd) {
-
- case OSIOCGIFADDR:
- case OSIOCGIFDSTADDR:
- case OSIOCGIFBRDADDR:
- case OSIOCGIFNETMASK:
- *(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
-
- }
- }
-#endif /* COMPAT_43 */
if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
#ifdef INET6
- if (ifp->if_flags & IFF_UP) {
- int s = splimp();
+ if (ifp->if_flags & IFF_UP)
in6_if_up(ifp);
- splx(s);
- }
#endif
}
if_rele(ifp);
@@ -2825,7 +2949,8 @@ ifpromisc(struct ifnet *ifp, int pswitch)
error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC,
&ifp->if_pcount, pswitch);
/* If promiscuous mode status has changed, log a message */
- if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC))
+ if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC) &&
+ log_promisc_mode_change)
log(LOG_INFO, "%s: promiscuous mode %s\n",
ifp->if_xname,
(ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
@@ -2890,16 +3015,15 @@ again:
if (prison_if(curthread->td_ucred, sa) != 0)
continue;
addrs++;
-#ifdef COMPAT_43
+ /* COMPAT_SVR4 */
if (cmd == OSIOCGIFCONF) {
struct osockaddr *osa =
- (struct osockaddr *)&ifr.ifr_addr;
+ (struct osockaddr *)&ifr.ifr_addr;
ifr.ifr_addr = *sa;
osa->sa_family = sa->sa_family;
sbuf_bcat(sb, &ifr, sizeof(ifr));
max_len += sizeof(ifr);
} else
-#endif
if (sa->sa_len <= sizeof(*sa)) {
ifr.ifr_addr = *sa;
sbuf_bcat(sb, &ifr, sizeof(ifr));
@@ -2955,7 +3079,7 @@ if_allmulti(struct ifnet *ifp, int onswitch)
}
struct ifmultiaddr *
-if_findmulti(struct ifnet *ifp, struct sockaddr *sa)
+if_findmulti(struct ifnet *ifp, const struct sockaddr *sa)
{
struct ifmultiaddr *ifma;
@@ -3034,8 +3158,6 @@ if_freemulti(struct ifmultiaddr *ifma)
KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d",
ifma->ifma_refcount));
- KASSERT(ifma->ifma_protospec == NULL,
- ("if_freemulti: protospec not NULL"));
if (ifma->ifma_lladdr != NULL)
free(ifma->ifma_lladdr, M_IFMADDR);
@@ -3067,6 +3189,7 @@ if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
{
struct ifmultiaddr *ifma, *ll_ifma;
struct sockaddr *llsa;
+ struct sockaddr_dl sdl;
int error;
/*
@@ -3086,12 +3209,18 @@ if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
/*
* The address isn't already present; resolve the protocol address
* into a link layer address, and then look that up, bump its
- * refcount or allocate an ifma for that also. If 'llsa' was
- * returned, we will need to free it later.
+ * refcount or allocate an ifma for that also.
+ * Most link layer resolving functions returns address data which
+ * fits inside default sockaddr_dl structure. However callback
+ * can allocate another sockaddr structure, in that case we need to
+ * free it later.
*/
llsa = NULL;
ll_ifma = NULL;
if (ifp->if_resolvemulti != NULL) {
+ /* Provide called function with buffer size information */
+ sdl.sdl_len = sizeof(sdl);
+ llsa = (struct sockaddr *)&sdl;
error = ifp->if_resolvemulti(ifp, &llsa, sa);
if (error)
goto unlock_out;
@@ -3155,14 +3284,14 @@ if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
(void) (*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0);
}
- if (llsa != NULL)
- free(llsa, M_IFMADDR);
+ if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl))
+ link_free_sdl(llsa);
return (0);
free_llsa_out:
- if (llsa != NULL)
- free(llsa, M_IFMADDR);
+ if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl))
+ link_free_sdl(llsa);
unlock_out:
IF_ADDR_WUNLOCK(ifp);
@@ -3363,8 +3492,10 @@ if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching)
*
* At this time we only support certain types of interfaces,
* and we don't allow the length of the address to change.
+ *
+ * Set noinline to be dtrace-friendly
*/
-int
+__noinline int
if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
{
struct sockaddr_dl *sdl;
@@ -3422,17 +3553,45 @@ if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
ifr.ifr_flagshigh = ifp->if_flags >> 16;
(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
}
-#ifdef INET
- /*
- * Also send gratuitous ARPs to notify other nodes about
- * the address change.
- */
- TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- if (ifa->ifa_addr->sa_family == AF_INET)
- arp_ifinit(ifp, ifa);
- }
-#endif
}
+ EVENTHANDLER_INVOKE(iflladdr_event, ifp);
+ return (0);
+}
+
+/*
+ * Compat function for handling basic encapsulation requests.
+ * Not converted stacks (FDDI, IB, ..) supports traditional
+ * output model: ARP (and other similar L2 protocols) are handled
+ * inside output routine, arpresolve/nd6_resolve() returns MAC
+ * address instead of full prepend.
+ *
+ * This function creates calculated header==MAC for IPv4/IPv6 and
+ * returns EAFNOSUPPORT (which is then handled in ARP code) for other
+ * address families.
+ */
+static int
+if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req)
+{
+
+ if (req->rtype != IFENCAP_LL)
+ return (EOPNOTSUPP);
+
+ if (req->bufsize < req->lladdr_len)
+ return (ENOMEM);
+
+ switch (req->family) {
+ case AF_INET:
+ case AF_INET6:
+ break;
+ default:
+ return (EAFNOSUPPORT);
+ }
+
+ /* Copy lladdr to storage as is */
+ memmove(req->buf, req->lladdr, req->lladdr_len);
+ req->bufsize = req->lladdr_len;
+ req->lladdr_off = 0;
+
return (0);
}
@@ -3500,15 +3659,15 @@ if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust)
IF_LOCK(ifq);
if (_IF_QFULL(ifq)) {
- _IF_DROP(ifq);
IF_UNLOCK(ifq);
+ if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
m_freem(m);
return (0);
}
if (ifp != NULL) {
- ifp->if_obytes += m->m_pkthdr.len + adjust;
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len + adjust);
if (m->m_flags & (M_BCAST|M_MCAST))
- ifp->if_omcasts++;
+ if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
active = ifp->if_drv_flags & IFF_DRV_OACTIVE;
}
_IF_ENQUEUE(ifq, m);
@@ -3543,3 +3702,465 @@ if_deregister_com_alloc(u_char type)
if_com_alloc[type] = NULL;
if_com_free[type] = NULL;
}
+
+/* API for driver access to network stack owned ifnet.*/
+uint64_t
+if_setbaudrate(struct ifnet *ifp, uint64_t baudrate)
+{
+ uint64_t oldbrate;
+
+ oldbrate = ifp->if_baudrate;
+ ifp->if_baudrate = baudrate;
+ return (oldbrate);
+}
+
+uint64_t
+if_getbaudrate(if_t ifp)
+{
+
+ return (((struct ifnet *)ifp)->if_baudrate);
+}
+
+int
+if_setcapabilities(if_t ifp, int capabilities)
+{
+ ((struct ifnet *)ifp)->if_capabilities = capabilities;
+ return (0);
+}
+
+int
+if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit)
+{
+ ((struct ifnet *)ifp)->if_capabilities |= setbit;
+ ((struct ifnet *)ifp)->if_capabilities &= ~clearbit;
+
+ return (0);
+}
+
+int
+if_getcapabilities(if_t ifp)
+{
+ return ((struct ifnet *)ifp)->if_capabilities;
+}
+
+int
+if_setcapenable(if_t ifp, int capabilities)
+{
+ ((struct ifnet *)ifp)->if_capenable = capabilities;
+ return (0);
+}
+
+int
+if_setcapenablebit(if_t ifp, int setcap, int clearcap)
+{
+ if(setcap)
+ ((struct ifnet *)ifp)->if_capenable |= setcap;
+ if(clearcap)
+ ((struct ifnet *)ifp)->if_capenable &= ~clearcap;
+
+ return (0);
+}
+
+const char *
+if_getdname(if_t ifp)
+{
+ return ((struct ifnet *)ifp)->if_dname;
+}
+
+int
+if_togglecapenable(if_t ifp, int togglecap)
+{
+ ((struct ifnet *)ifp)->if_capenable ^= togglecap;
+ return (0);
+}
+
+int
+if_getcapenable(if_t ifp)
+{
+ return ((struct ifnet *)ifp)->if_capenable;
+}
+
+/*
+ * This is largely undesirable because it ties ifnet to a device, but does
+ * provide flexiblity for an embedded product vendor. Should be used with
+ * the understanding that it violates the interface boundaries, and should be
+ * a last resort only.
+ */
+int
+if_setdev(if_t ifp, void *dev)
+{
+ return (0);
+}
+
+int
+if_setdrvflagbits(if_t ifp, int set_flags, int clear_flags)
+{
+ ((struct ifnet *)ifp)->if_drv_flags |= set_flags;
+ ((struct ifnet *)ifp)->if_drv_flags &= ~clear_flags;
+
+ return (0);
+}
+
+int
+if_getdrvflags(if_t ifp)
+{
+ return ((struct ifnet *)ifp)->if_drv_flags;
+}
+
+int
+if_setdrvflags(if_t ifp, int flags)
+{
+ ((struct ifnet *)ifp)->if_drv_flags = flags;
+ return (0);
+}
+
+
+int
+if_setflags(if_t ifp, int flags)
+{
+ ((struct ifnet *)ifp)->if_flags = flags;
+ return (0);
+}
+
+int
+if_setflagbits(if_t ifp, int set, int clear)
+{
+ ((struct ifnet *)ifp)->if_flags |= set;
+ ((struct ifnet *)ifp)->if_flags &= ~clear;
+
+ return (0);
+}
+
+int
+if_getflags(if_t ifp)
+{
+ return ((struct ifnet *)ifp)->if_flags;
+}
+
+int
+if_clearhwassist(if_t ifp)
+{
+ ((struct ifnet *)ifp)->if_hwassist = 0;
+ return (0);
+}
+
+int
+if_sethwassistbits(if_t ifp, int toset, int toclear)
+{
+ ((struct ifnet *)ifp)->if_hwassist |= toset;
+ ((struct ifnet *)ifp)->if_hwassist &= ~toclear;
+
+ return (0);
+}
+
+int
+if_sethwassist(if_t ifp, int hwassist_bit)
+{
+ ((struct ifnet *)ifp)->if_hwassist = hwassist_bit;
+ return (0);
+}
+
+int
+if_gethwassist(if_t ifp)
+{
+ return ((struct ifnet *)ifp)->if_hwassist;
+}
+
+int
+if_setmtu(if_t ifp, int mtu)
+{
+ ((struct ifnet *)ifp)->if_mtu = mtu;
+ return (0);
+}
+
+int
+if_getmtu(if_t ifp)
+{
+ return ((struct ifnet *)ifp)->if_mtu;
+}
+
+int
+if_getmtu_family(if_t ifp, int family)
+{
+ struct domain *dp;
+
+ for (dp = domains; dp; dp = dp->dom_next) {
+ if (dp->dom_family == family && dp->dom_ifmtu != NULL)
+ return (dp->dom_ifmtu((struct ifnet *)ifp));
+ }
+
+ return (((struct ifnet *)ifp)->if_mtu);
+}
+
+int
+if_setsoftc(if_t ifp, void *softc)
+{
+ ((struct ifnet *)ifp)->if_softc = softc;
+ return (0);
+}
+
+void *
+if_getsoftc(if_t ifp)
+{
+ return ((struct ifnet *)ifp)->if_softc;
+}
+
+void
+if_setrcvif(struct mbuf *m, if_t ifp)
+{
+ m->m_pkthdr.rcvif = (struct ifnet *)ifp;
+}
+
+void
+if_setvtag(struct mbuf *m, uint16_t tag)
+{
+ m->m_pkthdr.ether_vtag = tag;
+}
+
+uint16_t
+if_getvtag(struct mbuf *m)
+{
+
+ return (m->m_pkthdr.ether_vtag);
+}
+
+int
+if_sendq_empty(if_t ifp)
+{
+ return IFQ_DRV_IS_EMPTY(&((struct ifnet *)ifp)->if_snd);
+}
+
+struct ifaddr *
+if_getifaddr(if_t ifp)
+{
+ return ((struct ifnet *)ifp)->if_addr;
+}
+
+int
+if_getamcount(if_t ifp)
+{
+ return ((struct ifnet *)ifp)->if_amcount;
+}
+
+
+int
+if_setsendqready(if_t ifp)
+{
+ IFQ_SET_READY(&((struct ifnet *)ifp)->if_snd);
+ return (0);
+}
+
+int
+if_setsendqlen(if_t ifp, int tx_desc_count)
+{
+ IFQ_SET_MAXLEN(&((struct ifnet *)ifp)->if_snd, tx_desc_count);
+ ((struct ifnet *)ifp)->if_snd.ifq_drv_maxlen = tx_desc_count;
+
+ return (0);
+}
+
+int
+if_vlantrunkinuse(if_t ifp)
+{
+ return ((struct ifnet *)ifp)->if_vlantrunk != NULL?1:0;
+}
+
+int
+if_input(if_t ifp, struct mbuf* sendmp)
+{
+ (*((struct ifnet *)ifp)->if_input)((struct ifnet *)ifp, sendmp);
+ return (0);
+
+}
+
+/* XXX */
+#ifndef ETH_ADDR_LEN
+#define ETH_ADDR_LEN 6
+#endif
+
+int
+if_setupmultiaddr(if_t ifp, void *mta, int *cnt, int max)
+{
+ struct ifmultiaddr *ifma;
+ uint8_t *lmta = (uint8_t *)mta;
+ int mcnt = 0;
+
+ TAILQ_FOREACH(ifma, &((struct ifnet *)ifp)->if_multiaddrs, ifma_link) {
+ if (ifma->ifma_addr->sa_family != AF_LINK)
+ continue;
+
+ if (mcnt == max)
+ break;
+
+ bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
+ &lmta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
+ mcnt++;
+ }
+ *cnt = mcnt;
+
+ return (0);
+}
+
+int
+if_multiaddr_array(if_t ifp, void *mta, int *cnt, int max)
+{
+ int error;
+
+ if_maddr_rlock(ifp);
+ error = if_setupmultiaddr(ifp, mta, cnt, max);
+ if_maddr_runlock(ifp);
+ return (error);
+}
+
+int
+if_multiaddr_count(if_t ifp, int max)
+{
+ struct ifmultiaddr *ifma;
+ int count;
+
+ count = 0;
+ if_maddr_rlock(ifp);
+ TAILQ_FOREACH(ifma, &((struct ifnet *)ifp)->if_multiaddrs, ifma_link) {
+ if (ifma->ifma_addr->sa_family != AF_LINK)
+ continue;
+ count++;
+ if (count == max)
+ break;
+ }
+ if_maddr_runlock(ifp);
+ return (count);
+}
+
+int
+if_multi_apply(struct ifnet *ifp, int (*filter)(void *, struct ifmultiaddr *, int), void *arg)
+{
+ struct ifmultiaddr *ifma;
+ int cnt = 0;
+
+ if_maddr_rlock(ifp);
+ TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
+ cnt += filter(arg, ifma, cnt);
+ if_maddr_runlock(ifp);
+ return (cnt);
+}
+
+struct mbuf *
+if_dequeue(if_t ifp)
+{
+ struct mbuf *m;
+ IFQ_DRV_DEQUEUE(&((struct ifnet *)ifp)->if_snd, m);
+
+ return (m);
+}
+
+int
+if_sendq_prepend(if_t ifp, struct mbuf *m)
+{
+ IFQ_DRV_PREPEND(&((struct ifnet *)ifp)->if_snd, m);
+ return (0);
+}
+
+int
+if_setifheaderlen(if_t ifp, int len)
+{
+ ((struct ifnet *)ifp)->if_hdrlen = len;
+ return (0);
+}
+
+caddr_t
+if_getlladdr(if_t ifp)
+{
+ return (IF_LLADDR((struct ifnet *)ifp));
+}
+
+void *
+if_gethandle(u_char type)
+{
+ return (if_alloc(type));
+}
+
+void
+if_bpfmtap(if_t ifh, struct mbuf *m)
+{
+ struct ifnet *ifp = (struct ifnet *)ifh;
+
+ BPF_MTAP(ifp, m);
+}
+
+void
+if_etherbpfmtap(if_t ifh, struct mbuf *m)
+{
+ struct ifnet *ifp = (struct ifnet *)ifh;
+
+ ETHER_BPF_MTAP(ifp, m);
+}
+
+void
+if_vlancap(if_t ifh)
+{
+ struct ifnet *ifp = (struct ifnet *)ifh;
+ VLAN_CAPABILITIES(ifp);
+}
+
+void
+if_setinitfn(if_t ifp, void (*init_fn)(void *))
+{
+ ((struct ifnet *)ifp)->if_init = init_fn;
+}
+
+void
+if_setioctlfn(if_t ifp, int (*ioctl_fn)(if_t, u_long, caddr_t))
+{
+ ((struct ifnet *)ifp)->if_ioctl = (void *)ioctl_fn;
+}
+
+void
+if_setstartfn(if_t ifp, void (*start_fn)(if_t))
+{
+ ((struct ifnet *)ifp)->if_start = (void *)start_fn;
+}
+
+void
+if_settransmitfn(if_t ifp, if_transmit_fn_t start_fn)
+{
+ ((struct ifnet *)ifp)->if_transmit = start_fn;
+}
+
+void if_setqflushfn(if_t ifp, if_qflush_fn_t flush_fn)
+{
+ ((struct ifnet *)ifp)->if_qflush = flush_fn;
+
+}
+
+void
+if_setgetcounterfn(if_t ifp, if_get_counter_t fn)
+{
+
+ ifp->if_get_counter = fn;
+}
+
+/* Revisit these - These are inline functions originally. */
+int
+drbr_inuse_drv(if_t ifh, struct buf_ring *br)
+{
+ return drbr_inuse(ifh, br);
+}
+
+struct mbuf*
+drbr_dequeue_drv(if_t ifh, struct buf_ring *br)
+{
+ return drbr_dequeue(ifh, br);
+}
+
+int
+drbr_needs_enqueue_drv(if_t ifh, struct buf_ring *br)
+{
+ return drbr_needs_enqueue(ifh, br);
+}
+
+int
+drbr_enqueue_drv(if_t ifh, struct buf_ring *br, struct mbuf *m)
+{
+ return drbr_enqueue(ifh, br, m);
+
+}
diff --git a/freebsd/sys/net/if.h b/freebsd/sys/net/if.h
index e182db54..98ae0a82 100644
--- a/freebsd/sys/net/if.h
+++ b/freebsd/sys/net/if.h
@@ -35,10 +35,6 @@
#include <sys/cdefs.h>
-#ifdef _KERNEL
-#include <sys/queue.h>
-#endif
-
#if __BSD_VISIBLE
/*
* <net/if.h> does not depend on <sys/time.h> on most other systems. This
@@ -49,8 +45,6 @@
#include <sys/time.h>
#include <sys/socket.h>
#endif
-
-struct ifnet;
#endif
/*
@@ -80,32 +74,45 @@ struct if_clonereq {
*/
struct if_data {
/* generic interface information */
- u_char ifi_type; /* ethernet, tokenring, etc */
- u_char ifi_physical; /* e.g., AUI, Thinnet, 10base-T, etc */
- u_char ifi_addrlen; /* media address length */
- u_char ifi_hdrlen; /* media header length */
- u_char ifi_link_state; /* current link state */
- u_char ifi_spare_char1; /* spare byte */
- u_char ifi_spare_char2; /* spare byte */
- u_char ifi_datalen; /* length of this data struct */
- u_long ifi_mtu; /* maximum transmission unit */
- u_long ifi_metric; /* routing metric (external only) */
- u_long ifi_baudrate; /* linespeed */
+ uint8_t ifi_type; /* ethernet, tokenring, etc */
+ uint8_t ifi_physical; /* e.g., AUI, Thinnet, 10base-T, etc */
+ uint8_t ifi_addrlen; /* media address length */
+ uint8_t ifi_hdrlen; /* media header length */
+ uint8_t ifi_link_state; /* current link state */
+ uint8_t ifi_vhid; /* carp vhid */
+ uint16_t ifi_datalen; /* length of this data struct */
+ uint32_t ifi_mtu; /* maximum transmission unit */
+ uint32_t ifi_metric; /* routing metric (external only) */
+ uint64_t ifi_baudrate; /* linespeed */
/* volatile statistics */
- u_long ifi_ipackets; /* packets received on interface */
- u_long ifi_ierrors; /* input errors on interface */
- u_long ifi_opackets; /* packets sent on interface */
- u_long ifi_oerrors; /* output errors on interface */
- u_long ifi_collisions; /* collisions on csma interfaces */
- u_long ifi_ibytes; /* total number of octets received */
- u_long ifi_obytes; /* total number of octets sent */
- u_long ifi_imcasts; /* packets received via multicast */
- u_long ifi_omcasts; /* packets sent via multicast */
- u_long ifi_iqdrops; /* dropped on input, this interface */
- u_long ifi_noproto; /* destined for unsupported protocol */
- u_long ifi_hwassist; /* HW offload capabilities, see IFCAP */
- time_t ifi_epoch; /* uptime at attach or stat reset */
- struct timeval ifi_lastchange; /* time of last administrative change */
+ uint64_t ifi_ipackets; /* packets received on interface */
+ uint64_t ifi_ierrors; /* input errors on interface */
+ uint64_t ifi_opackets; /* packets sent on interface */
+ uint64_t ifi_oerrors; /* output errors on interface */
+ uint64_t ifi_collisions; /* collisions on csma interfaces */
+ uint64_t ifi_ibytes; /* total number of octets received */
+ uint64_t ifi_obytes; /* total number of octets sent */
+ uint64_t ifi_imcasts; /* packets received via multicast */
+ uint64_t ifi_omcasts; /* packets sent via multicast */
+ uint64_t ifi_iqdrops; /* dropped on input */
+ uint64_t ifi_oqdrops; /* dropped on output */
+ uint64_t ifi_noproto; /* destined for unsupported protocol */
+ uint64_t ifi_hwassist; /* HW offload capabilities, see IFCAP */
+
+ /* Unions are here to make sizes MI. */
+ union { /* uptime at attach or stat reset */
+ time_t tt;
+ uint64_t ph;
+ } __ifi_epoch;
+#define ifi_epoch __ifi_epoch.tt
+ union { /* time of last administrative change */
+ struct timeval tv;
+ struct {
+ uint64_t ph1;
+ uint64_t ph2;
+ } ph;
+ } __ifi_lastchange;
+#define ifi_lastchange __ifi_lastchange.tv
};
/*-
@@ -135,7 +142,7 @@ struct if_data {
#define IFF_DEBUG 0x4 /* (n) turn on debugging */
#define IFF_LOOPBACK 0x8 /* (i) is a loopback net */
#define IFF_POINTOPOINT 0x10 /* (i) is a point-to-point link */
-#define IFF_SMART 0x20 /* (i) interface manages own routes */
+/* 0x20 was IFF_SMART */
#define IFF_DRV_RUNNING 0x40 /* (d) resources allocated */
#define IFF_NOARP 0x80 /* (n) no address resolution protocol */
#define IFF_PROMISC 0x100 /* (n) receive all packets */
@@ -153,7 +160,6 @@ struct if_data {
#define IFF_STATICARP 0x80000 /* (n) static ARP */
#define IFF_DYING 0x200000 /* (n) interface is winding down */
#define IFF_RENAMING 0x400000 /* (n) interface is being renamed */
-
/*
* Old names for driver flags so that user space tools can continue to use
* the old (portable) names.
@@ -166,7 +172,7 @@ struct if_data {
/* flags set internally only: */
#define IFF_CANTCHANGE \
(IFF_BROADCAST|IFF_POINTOPOINT|IFF_DRV_RUNNING|IFF_DRV_OACTIVE|\
- IFF_SIMPLEX|IFF_MULTICAST|IFF_ALLMULTI|IFF_SMART|IFF_PROMISC|\
+ IFF_SIMPLEX|IFF_MULTICAST|IFF_ALLMULTI|IFF_PROMISC|\
IFF_DYING|IFF_CANTCONFIG)
/*
@@ -180,7 +186,7 @@ struct if_data {
* Some convenience macros used for setting ifi_baudrate.
* XXX 1000 vs. 1024? --thorpej@netbsd.org
*/
-#define IF_Kbps(x) ((x) * 1000) /* kilobits/sec. */
+#define IF_Kbps(x) ((uintmax_t)(x) * 1000) /* kilobits/sec. */
#define IF_Mbps(x) (IF_Kbps((x) * 1000)) /* megabits/sec. */
#define IF_Gbps(x) (IF_Mbps((x) * 1000)) /* gigabits/sec. */
@@ -232,6 +238,7 @@ struct if_data {
#define IFCAP_NETMAP 0x100000 /* netmap mode supported/enabled */
#define IFCAP_RXCSUM_IPV6 0x200000 /* can offload checksum on IPv6 RX */
#define IFCAP_TXCSUM_IPV6 0x400000 /* can offload checksum on IPv6 TX */
+#define IFCAP_HWSTATS 0x800000 /* manages counters internally */
#define IFCAP_HWCSUM_IPV6 (IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6)
@@ -297,7 +304,7 @@ struct ifa_msghdr {
int ifam_addrs; /* like rtm_addrs */
int ifam_flags; /* value of ifa_flags */
u_short ifam_index; /* index for associated ifp */
- int ifam_metric; /* value of ifa_metric */
+ int ifam_metric; /* value of ifa_ifp->if_metric */
};
/*
@@ -322,7 +329,7 @@ struct ifa_msghdrl {
u_short _ifam_spare1; /* spare space to grow if_index, see if_var.h */
u_short ifam_len; /* length of ifa_msghdrl incl. if_data */
u_short ifam_data_off; /* offset of if_data from beginning */
- int ifam_metric; /* value of ifa_metric */
+ int ifam_metric; /* value of ifa_ifp->if_metric */
struct if_data ifam_data;/* statistics and other data about if or
* address */
};
@@ -386,6 +393,7 @@ struct ifreq {
caddr_t ifru_data;
int ifru_cap[2];
u_int ifru_fib;
+ u_char ifru_vlan_pcp;
} ifr_ifru;
#define ifr_addr ifr_ifru.ifru_addr /* address */
#define ifr_dstaddr ifr_ifru.ifru_dstaddr /* other end of p-to-p link */
@@ -403,6 +411,7 @@ struct ifreq {
#define ifr_curcap ifr_ifru.ifru_cap[1] /* current capabilities */
#define ifr_index ifr_ifru.ifru_index /* interface index */
#define ifr_fib ifr_ifru.ifru_fib /* interface fib */
+#define ifr_vlan_pcp ifr_ifru.ifru_vlan_pcp /* VLAN priority */
};
#define _SIZEOF_ADDR_IFREQ(ifr) \
@@ -415,6 +424,15 @@ struct ifaliasreq {
struct sockaddr ifra_addr;
struct sockaddr ifra_broadaddr;
struct sockaddr ifra_mask;
+ int ifra_vhid;
+};
+
+/* 9.x compat */
+struct oifaliasreq {
+ char ifra_name[IFNAMSIZ];
+ struct sockaddr ifra_addr;
+ struct sockaddr ifra_broadaddr;
+ struct sockaddr ifra_mask;
};
struct ifmediareq {
@@ -495,16 +513,17 @@ struct ifgroupreq {
};
/*
- * Structure for SIOC[AGD]LIFADDR
+ * Structure used to request i2c data
+ * from interface transceivers.
*/
-struct if_laddrreq {
- char iflr_name[IFNAMSIZ];
- u_int flags;
-#define IFLR_PREFIX 0x8000 /* in: prefix given out: kernel fills id */
- u_int prefixlen; /* in/out */
- struct sockaddr_storage addr; /* in/out */
- struct sockaddr_storage dstaddr; /* out */
-};
+struct ifi2creq {
+ uint8_t dev_addr; /* i2c address (0xA0, 0xA2) */
+ uint8_t offset; /* read offset */
+ uint8_t len; /* read length */
+ uint8_t spare0;
+ uint32_t spare1;
+ uint8_t data[8]; /* read buffer */
+};
#endif /* __BSD_VISIBLE */
@@ -528,10 +547,4 @@ struct if_nameindex *if_nameindex(void);
unsigned int if_nametoindex(const char *);
__END_DECLS
#endif
-
-#ifdef _KERNEL
-/* XXX - this should go away soon. */
-#include <net/if_var.h>
-#endif
-
#endif /* !_NET_IF_H_ */
diff --git a/freebsd/sys/net/if_arc.h b/freebsd/sys/net/if_arc.h
index 88a72403..23139aa6 100644
--- a/freebsd/sys/net/if_arc.h
+++ b/freebsd/sys/net/if_arc.h
@@ -133,7 +133,7 @@ void arc_storelladdr(struct ifnet *, u_int8_t);
int arc_isphds(u_int8_t);
void arc_input(struct ifnet *, struct mbuf *);
int arc_output(struct ifnet *, struct mbuf *,
- struct sockaddr *, struct route *);
+ const struct sockaddr *, struct route *);
int arc_ioctl(struct ifnet *, u_long, caddr_t);
void arc_frag_init(struct ifnet *);
diff --git a/freebsd/sys/net/if_arcsubr.c b/freebsd/sys/net/if_arcsubr.c
index fae432ad..1954e262 100644
--- a/freebsd/sys/net/if_arcsubr.c
+++ b/freebsd/sys/net/if_arcsubr.c
@@ -42,7 +42,6 @@
*/
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipx.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
@@ -59,6 +58,7 @@
#include <machine/cpu.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/netisr.h>
#include <net/route.h>
#include <net/if_dl.h>
@@ -78,11 +78,6 @@
#include <netinet6/nd6.h>
#endif
-#ifdef IPX
-#include <netipx/ipx.h>
-#include <netipx/ipx_if.h>
-#endif
-
#define ARCNET_ALLOW_BROKEN_ARP
static struct mbuf *arc_defrag(struct ifnet *, struct mbuf *);
@@ -94,8 +89,7 @@ u_int8_t arcbroadcastaddr = 0;
#define ARC_LLADDR(ifp) (*(u_int8_t *)IF_LLADDR(ifp))
#define senderr(e) { error = (e); goto bad;}
-#define SIN(s) ((struct sockaddr_in *)s)
-#define SIPX(s) ((struct sockaddr_ipx *)s)
+#define SIN(s) ((const struct sockaddr_in *)(s))
/*
* ARCnet output routine.
@@ -103,7 +97,7 @@ u_int8_t arcbroadcastaddr = 0;
* Assumes that ifp is actually pointer to arccom structure.
*/
int
-arc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+arc_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
struct route *ro)
{
struct arc_header *ah;
@@ -112,7 +106,7 @@ arc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
int loop_copy = 0;
int isphds;
#if defined(INET) || defined(INET6)
- struct llentry *lle;
+ int is_gw = 0;
#endif
if (!((ifp->if_flags & IFF_UP) &&
@@ -120,6 +114,10 @@ arc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
return(ENETDOWN); /* m, m1 aren't initialized yet */
error = 0;
+#if defined(INET) || defined(INET6)
+ if (ro != NULL)
+ is_gw = (ro->ro_flags & RT_HAS_GW) != 0;
+#endif
switch (dst->sa_family) {
#ifdef INET
@@ -133,8 +131,8 @@ arc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
else if (ifp->if_flags & IFF_NOARP)
adst = ntohl(SIN(dst)->sin_addr.s_addr) & 0xFF;
else {
- error = arpresolve(ifp, ro ? ro->ro_rt : NULL,
- m, dst, &adst, &lle);
+ error = arpresolve(ifp, is_gw, m, dst, &adst, NULL,
+ NULL);
if (error)
return (error == EWOULDBLOCK ? 0 : error);
}
@@ -172,24 +170,23 @@ arc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
#endif
#ifdef INET6
case AF_INET6:
- error = nd6_storelladdr(ifp, m, dst, (u_char *)&adst, &lle);
- if (error)
- return (error);
+ if ((m->m_flags & M_MCAST) != 0)
+ adst = arcbroadcastaddr; /* ARCnet broadcast address */
+ else {
+ error = nd6_resolve(ifp, is_gw, m, dst, &adst, NULL,
+ NULL);
+ if (error != 0)
+ return (error == EWOULDBLOCK ? 0 : error);
+ }
atype = ARCTYPE_INET6;
break;
#endif
-#ifdef IPX
- case AF_IPX:
- adst = SIPX(dst)->sipx_addr.x_host.c_host[5];
- atype = ARCTYPE_IPX;
- if (adst == 0xff)
- adst = arcbroadcastaddr;
- break;
-#endif
-
case AF_UNSPEC:
+ {
+ const struct arc_header *ah;
+
loop_copy = -1;
- ah = (struct arc_header *)dst->sa_data;
+ ah = (const struct arc_header *)dst->sa_data;
adst = ah->arc_dhost;
atype = ah->arc_type;
@@ -209,15 +206,15 @@ arc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
#endif
}
break;
-
+ }
default:
if_printf(ifp, "can't handle af%d\n", dst->sa_family);
senderr(EAFNOSUPPORT);
}
isphds = arc_isphds(atype);
- M_PREPEND(m, isphds ? ARC_HDRNEWLEN : ARC_HDRLEN, M_DONTWAIT);
- if (m == 0)
+ M_PREPEND(m, isphds ? ARC_HDRNEWLEN : ARC_HDRLEN, M_NOWAIT);
+ if (m == NULL)
senderr(ENOBUFS);
ah = mtod(m, struct arc_header *);
ah->arc_type = atype;
@@ -268,12 +265,12 @@ arc_frag_next(struct ifnet *ifp)
struct arc_header *ah;
ac = (struct arccom *)ifp->if_l2com;
- if ((m = ac->curr_frag) == 0) {
+ if ((m = ac->curr_frag) == NULL) {
int tfrags;
/* dequeue new packet */
IF_DEQUEUE(&ifp->if_snd, m);
- if (m == 0)
+ if (m == NULL)
return 0;
ah = mtod(m, struct arc_header *);
@@ -281,7 +278,7 @@ arc_frag_next(struct ifnet *ifp)
return m;
++ac->ac_seqid; /* make the seqid unique */
- tfrags = (m->m_pkthdr.len + ARC_MAX_DATA - 1) / ARC_MAX_DATA;
+ tfrags = howmany(m->m_pkthdr.len, ARC_MAX_DATA);
ac->fsflag = 2 * tfrags - 3;
ac->sflag = 0;
ac->rsflag = ac->fsflag;
@@ -296,14 +293,14 @@ arc_frag_next(struct ifnet *ifp)
/* split out next fragment and return it */
if (ac->sflag < ac->fsflag) {
/* we CAN'T have short packets here */
- ac->curr_frag = m_split(m, ARC_MAX_DATA, M_DONTWAIT);
+ ac->curr_frag = m_split(m, ARC_MAX_DATA, M_NOWAIT);
if (ac->curr_frag == 0) {
m_freem(m);
return 0;
}
- M_PREPEND(m, ARC_HDRNEWLEN, M_DONTWAIT);
- if (m == 0) {
+ M_PREPEND(m, ARC_HDRNEWLEN, M_NOWAIT);
+ if (m == NULL) {
m_freem(ac->curr_frag);
ac->curr_frag = 0;
return 0;
@@ -321,8 +318,8 @@ arc_frag_next(struct ifnet *ifp)
ARC_MAX_FORBID_LEN - ARC_HDRNEWLEN + 2)) {
ac->curr_frag = 0;
- M_PREPEND(m, ARC_HDRNEWLEN_EXC, M_DONTWAIT);
- if (m == 0)
+ M_PREPEND(m, ARC_HDRNEWLEN_EXC, M_NOWAIT);
+ if (m == NULL)
return 0;
ah = mtod(m, struct arc_header *);
@@ -334,8 +331,8 @@ arc_frag_next(struct ifnet *ifp)
} else {
ac->curr_frag = 0;
- M_PREPEND(m, ARC_HDRNEWLEN, M_DONTWAIT);
- if (m == 0)
+ M_PREPEND(m, ARC_HDRNEWLEN, M_NOWAIT);
+ if (m == NULL)
return 0;
ah = mtod(m, struct arc_header *);
@@ -352,7 +349,7 @@ arc_frag_next(struct ifnet *ifp)
/*
* Defragmenter. Returns mbuf if last packet found, else
- * NULL. frees imcoming mbuf as necessary.
+ * NULL. frees incoming mbuf as necessary.
*/
static __inline struct mbuf *
@@ -371,7 +368,7 @@ arc_defrag(struct ifnet *ifp, struct mbuf *m)
if (m->m_len < ARC_HDRNEWLEN) {
m = m_pullup(m, ARC_HDRNEWLEN);
if (m == NULL) {
- ++ifp->if_ierrors;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
return NULL;
}
}
@@ -391,7 +388,7 @@ arc_defrag(struct ifnet *ifp, struct mbuf *m)
if (m->m_len < ARC_HDRNEWLEN) {
m = m_pullup(m, ARC_HDRNEWLEN);
if (m == NULL) {
- ++ifp->if_ierrors;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
return NULL;
}
}
@@ -544,11 +541,11 @@ arc_input(struct ifnet *ifp, struct mbuf *m)
return;
}
- ifp->if_ibytes += m->m_pkthdr.len;
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
if (ah->arc_dhost == arcbroadcastaddr) {
m->m_flags |= M_BCAST|M_MCAST;
- ifp->if_imcasts++;
+ if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
}
atype = ah->arc_type;
@@ -556,15 +553,11 @@ arc_input(struct ifnet *ifp, struct mbuf *m)
#ifdef INET
case ARCTYPE_IP:
m_adj(m, ARC_HDRNEWLEN);
- if ((m = ip_fastforward(m)) == NULL)
- return;
isr = NETISR_IP;
break;
case ARCTYPE_IP_OLD:
m_adj(m, ARC_HDRLEN);
- if ((m = ip_fastforward(m)) == NULL)
- return;
isr = NETISR_IP;
break;
@@ -600,12 +593,6 @@ arc_input(struct ifnet *ifp, struct mbuf *m)
isr = NETISR_IPV6;
break;
#endif
-#ifdef IPX
- case ARCTYPE_IPX:
- m_adj(m, ARC_HDRNEWLEN);
- isr = NETISR_IPX;
- break;
-#endif
default:
m_freem(m);
return;
@@ -640,11 +627,7 @@ arc_ifattach(struct ifnet *ifp, u_int8_t lla)
ifp->if_resolvemulti = arc_resolvemulti;
if (ifp->if_baudrate == 0)
ifp->if_baudrate = 2500000;
-#if __FreeBSD_version < 500000
- ifa = ifnet_addrs[ifp->if_index - 1];
-#else
ifa = ifp->if_addr;
-#endif
KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
sdl = (struct sockaddr_dl *)ifa->ifa_addr;
sdl->sdl_type = IFT_ARCNET;
@@ -691,26 +674,6 @@ arc_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
arp_ifinit(ifp, ifa);
break;
#endif
-#ifdef IPX
- /*
- * XXX This code is probably wrong
- */
- case AF_IPX:
- {
- struct ipx_addr *ina = &(IA_SIPX(ifa)->sipx_addr);
-
- if (ipx_nullhost(*ina))
- ina->x_host.c_host[5] = ARC_LLADDR(ifp);
- else
- arc_storelladdr(ifp, ina->x_host.c_host[5]);
-
- /*
- * Set new address
- */
- ifp->if_init(ifp->if_softc);
- break;
- }
-#endif
default:
ifp->if_init(ifp->if_softc);
break;
@@ -781,21 +744,14 @@ arc_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
sdl = (struct sockaddr_dl *)sa;
if (*LLADDR(sdl) != arcbroadcastaddr)
return EADDRNOTAVAIL;
- *llsa = 0;
+ *llsa = NULL;
return 0;
#ifdef INET
case AF_INET:
sin = (struct sockaddr_in *)sa;
if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
return EADDRNOTAVAIL;
- sdl = malloc(sizeof *sdl, M_IFMADDR,
- M_NOWAIT | M_ZERO);
- if (sdl == NULL)
- return ENOMEM;
- sdl->sdl_len = sizeof *sdl;
- sdl->sdl_family = AF_LINK;
- sdl->sdl_index = ifp->if_index;
- sdl->sdl_type = IFT_ARCNET;
+ sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
sdl->sdl_alen = ARC_ADDR_LEN;
*LLADDR(sdl) = 0;
*llsa = (struct sockaddr *)sdl;
@@ -811,19 +767,12 @@ arc_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
* (This is used for multicast routers.)
*/
ifp->if_flags |= IFF_ALLMULTI;
- *llsa = 0;
+ *llsa = NULL;
return 0;
}
if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
return EADDRNOTAVAIL;
- sdl = malloc(sizeof *sdl, M_IFMADDR,
- M_NOWAIT | M_ZERO);
- if (sdl == NULL)
- return ENOMEM;
- sdl->sdl_len = sizeof *sdl;
- sdl->sdl_family = AF_LINK;
- sdl->sdl_index = ifp->if_index;
- sdl->sdl_type = IFT_ARCNET;
+ sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
sdl->sdl_alen = ARC_ADDR_LEN;
*LLADDR(sdl) = 0;
*llsa = (struct sockaddr *)sdl;
diff --git a/freebsd/sys/net/if_arp.h b/freebsd/sys/net/if_arp.h
index 38c64020..7d141f37 100644
--- a/freebsd/sys/net/if_arp.h
+++ b/freebsd/sys/net/if_arp.h
@@ -97,43 +97,37 @@ struct arpreq {
#define ATF_PUBL 0x08 /* publish entry (respond for other host) */
#define ATF_USETRAILERS 0x10 /* has requested trailers */
-#ifdef _KERNEL
-/*
- * Structure shared between the ethernet driver modules and
- * the address resolution code.
- */
-struct arpcom {
- struct ifnet *ac_ifp; /* network-visible interface */
- void *ac_netgraph; /* ng_ether(4) netgraph node info */
-};
-#define IFP2AC(ifp) ((struct arpcom *)(ifp->if_l2com))
-#define AC2IFP(ac) ((ac)->ac_ifp)
-
-#endif /* _KERNEL */
-
struct arpstat {
/* Normal things that happen: */
- u_long txrequests; /* # of ARP requests sent by this host. */
- u_long txreplies; /* # of ARP replies sent by this host. */
- u_long rxrequests; /* # of ARP requests received by this host. */
- u_long rxreplies; /* # of ARP replies received by this host. */
- u_long received; /* # of ARP packets received by this host. */
+ uint64_t txrequests; /* # of ARP requests sent by this host. */
+ uint64_t txreplies; /* # of ARP replies sent by this host. */
+ uint64_t rxrequests; /* # of ARP requests received by this host. */
+ uint64_t rxreplies; /* # of ARP replies received by this host. */
+ uint64_t received; /* # of ARP packets received by this host. */
- u_long arp_spares[4]; /* For either the upper or lower half. */
+ uint64_t arp_spares[4]; /* For either the upper or lower half. */
/* Abnormal event and error counting: */
- u_long dropped; /* # of packets dropped waiting for a reply. */
- u_long timeouts; /* # of times with entries removed */
+ uint64_t dropped; /* # of packets dropped waiting for a reply. */
+ uint64_t timeouts; /* # of times with entries removed */
/* due to timeout. */
- u_long dupips; /* # of duplicate IPs detected. */
+ uint64_t dupips; /* # of duplicate IPs detected. */
};
+#ifdef _KERNEL
+#include <sys/counter.h>
+#include <net/vnet.h>
+
+VNET_PCPUSTAT_DECLARE(struct arpstat, arpstat);
/*
* In-kernel consumers can use these accessor macros directly to update
* stats.
*/
-#define ARPSTAT_ADD(name, val) V_arpstat.name += (val)
-#define ARPSTAT_SUB(name, val) V_arpstat.name -= (val)
+#define ARPSTAT_ADD(name, val) \
+ VNET_PCPUSTAT_ADD(struct arpstat, arpstat, name, (val))
+#define ARPSTAT_SUB(name, val) ARPSTAT_ADD(name, -(val))
#define ARPSTAT_INC(name) ARPSTAT_ADD(name, 1)
#define ARPSTAT_DEC(name) ARPSTAT_SUB(name, 1)
+#endif /* _KERNEL */
+
#endif /* !_NET_IF_ARP_H_ */
diff --git a/freebsd/sys/net/if_atm.h b/freebsd/sys/net/if_atm.h
index e8f69da0..a0900eee 100644
--- a/freebsd/sys/net/if_atm.h
+++ b/freebsd/sys/net/if_atm.h
@@ -96,7 +96,7 @@ struct ifatm_mib {
/*
* Traffic parameters for ATM connections. This contains all parameters
- * to accomodate UBR, UBR+MCR, CBR, VBR and ABR connections.
+ * to accommodate UBR, UBR+MCR, CBR, VBR and ABR connections.
*
* Keep in sync with ng_atm.h
*/
@@ -292,7 +292,7 @@ void atm_ifattach(struct ifnet *);
void atm_ifdetach(struct ifnet *);
void atm_input(struct ifnet *, struct atm_pseudohdr *,
struct mbuf *, void *);
-int atm_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+int atm_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
struct route *);
struct atmio_vcctable *atm_getvccs(struct atmio_vcc **, u_int, u_int,
struct mtx *, int);
diff --git a/freebsd/sys/net/if_atmsubr.c b/freebsd/sys/net/if_atmsubr.c
index a4cbeb09..fff233c4 100644
--- a/freebsd/sys/net/if_atmsubr.c
+++ b/freebsd/sys/net/if_atmsubr.c
@@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$");
#include <sys/malloc.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/netisr.h>
#include <net/route.h>
#include <net/if_dl.h>
@@ -123,7 +124,7 @@ static MALLOC_DEFINE(M_IFATM, "ifatm", "atm interface internals");
* ro->ro_rt must also be NULL.
*/
int
-atm_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
+atm_output(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst,
struct route *ro)
{
u_int16_t etype = 0; /* if using LLC/SNAP */
@@ -131,7 +132,7 @@ atm_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
struct atm_pseudohdr atmdst, *ad;
struct mbuf *m = m0;
struct atmllc *atmllc;
- struct atmllc *llc_hdr = NULL;
+ const struct atmllc *llc_hdr = NULL;
u_int32_t atm_flags;
#ifdef MAC
@@ -175,7 +176,7 @@ atm_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
* (atm pseudo header (4) + LLC/SNAP (8))
*/
bcopy(dst->sa_data, &atmdst, sizeof(atmdst));
- llc_hdr = (struct atmllc *)(dst->sa_data +
+ llc_hdr = (const struct atmllc *)(dst->sa_data +
sizeof(atmdst));
break;
@@ -192,8 +193,8 @@ atm_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
atm_flags = ATM_PH_FLAGS(&atmdst);
if (atm_flags & ATM_PH_LLCSNAP)
sz += 8; /* sizeof snap == 8 */
- M_PREPEND(m, sz, M_DONTWAIT);
- if (m == 0)
+ M_PREPEND(m, sz, M_NOWAIT);
+ if (m == NULL)
senderr(ENOBUFS);
ad = mtod(m, struct atm_pseudohdr *);
*ad = atmdst;
@@ -253,7 +254,7 @@ atm_input(struct ifnet *ifp, struct atm_pseudohdr *ah, struct mbuf *m,
#ifdef MAC
mac_ifnet_create_mbuf(ifp, m);
#endif
- ifp->if_ibytes += m->m_pkthdr.len;
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
if (ng_atm_input_p != NULL) {
(*ng_atm_input_p)(ifp, &m, ah, rxhand);
@@ -296,7 +297,7 @@ atm_input(struct ifnet *ifp, struct atm_pseudohdr *ah, struct mbuf *m,
struct atmllc *alc;
if (m->m_len < sizeof(*alc) &&
- (m = m_pullup(m, sizeof(*alc))) == 0)
+ (m = m_pullup(m, sizeof(*alc))) == NULL)
return; /* failed */
alc = mtod(m, struct atmllc *);
if (bcmp(alc, ATMLLC_HDR, 6)) {
diff --git a/freebsd/sys/net/if_bridge.c b/freebsd/sys/net/if_bridge.c
index 65553092..77b376b9 100644
--- a/freebsd/sys/net/if_bridge.c
+++ b/freebsd/sys/net/if_bridge.c
@@ -73,7 +73,7 @@
* - Currently only supports Ethernet-like interfaces (Ethernet,
* 802.11, VLANs on Ethernet, etc.) Figure out a nice way
* to bridge other types of interfaces (FDDI-FDDI, and maybe
- * consider heterogenous bridges).
+ * consider heterogeneous bridges).
*/
#include <sys/cdefs.h>
@@ -83,6 +83,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/sys/param.h>
+#include <sys/eventhandler.h>
#include <sys/mbuf.h>
#include <sys/malloc.h>
#include <sys/protosw.h>
@@ -102,7 +103,6 @@ __FBSDID("$FreeBSD$");
#include <sys/proc.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/mutex.h>
-#include <sys/rwlock.h>
#include <net/bpf.h>
#include <net/if.h>
@@ -113,7 +113,7 @@ __FBSDID("$FreeBSD$");
#include <net/pfil.h>
#include <net/vnet.h>
-#include <netinet/in.h> /* for struct arpcom */
+#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
@@ -127,15 +127,13 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip_carp.h>
#endif
#include <machine/in_cksum.h>
-#include <netinet/if_ether.h> /* for struct arpcom */
+#include <netinet/if_ether.h>
#include <net/bridgestp.h>
#include <net/if_bridgevar.h>
#include <net/if_llc.h>
#include <net/if_vlan_var.h>
#include <net/route.h>
-#include <netinet/ip_fw.h>
-#include <netpfil/ipfw/ip_fw_private.h>
/*
* Size of the route hash table. Must be a power of two.
@@ -170,7 +168,8 @@ __FBSDID("$FreeBSD$");
/*
* List of capabilities to possibly mask on the member interface.
*/
-#define BRIDGE_IFCAPS_MASK (IFCAP_TOE|IFCAP_TSO|IFCAP_TXCSUM)
+#define BRIDGE_IFCAPS_MASK (IFCAP_TOE|IFCAP_TSO|IFCAP_TXCSUM|\
+ IFCAP_TXCSUM_IPV6)
/*
* List of capabilities to strip
@@ -230,8 +229,9 @@ struct bridge_softc {
u_char sc_defaddr[6]; /* Default MAC address */
};
-static struct mtx bridge_list_mtx;
-eventhandler_tag bridge_detach_cookie = NULL;
+static VNET_DEFINE(struct mtx, bridge_list_mtx);
+#define V_bridge_list_mtx VNET(bridge_list_mtx)
+static eventhandler_tag bridge_detach_cookie;
int bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
@@ -248,11 +248,12 @@ static void bridge_ifdetach(void *arg __unused, struct ifnet *);
static void bridge_init(void *);
static void bridge_dummynet(struct mbuf *, struct ifnet *);
static void bridge_stop(struct ifnet *, int);
-static void bridge_start(struct ifnet *);
+static int bridge_transmit(struct ifnet *, struct mbuf *);
+static void bridge_qflush(struct ifnet *);
static struct mbuf *bridge_input(struct ifnet *, struct mbuf *);
static int bridge_output(struct ifnet *, struct mbuf *, struct sockaddr *,
struct rtentry *);
-static void bridge_enqueue(struct bridge_softc *, struct ifnet *,
+static int bridge_enqueue(struct bridge_softc *, struct ifnet *,
struct mbuf *);
static void bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
@@ -275,7 +276,7 @@ static void bridge_rtflush(struct bridge_softc *, int);
static int bridge_rtdaddr(struct bridge_softc *, const uint8_t *,
uint16_t);
-static int bridge_rtable_init(struct bridge_softc *);
+static void bridge_rtable_init(struct bridge_softc *);
static void bridge_rtable_fini(struct bridge_softc *);
static int bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
@@ -353,43 +354,64 @@ static struct bstp_cb_ops bridge_ops = {
SYSCTL_DECL(_net_link);
static SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW, 0, "Bridge");
-static int pfil_onlyip = 1; /* only pass IP[46] packets when pfil is enabled */
-static int pfil_bridge = 1; /* run pfil hooks on the bridge interface */
-static int pfil_member = 1; /* run pfil hooks on the member interface */
-static int pfil_ipfw = 0; /* layer2 filter with ipfw */
-static int pfil_ipfw_arp = 0; /* layer2 filter with ipfw */
-static int pfil_local_phys = 0; /* run pfil hooks on the physical interface for
- locally destined packets */
-static int log_stp = 0; /* log STP state changes */
-static int bridge_inherit_mac = 0; /* share MAC with first bridge member */
-TUNABLE_INT("net.link.bridge.pfil_onlyip", &pfil_onlyip);
-SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip, CTLFLAG_RW,
- &pfil_onlyip, 0, "Only pass IP packets when pfil is enabled");
-TUNABLE_INT("net.link.bridge.ipfw_arp", &pfil_ipfw_arp);
-SYSCTL_INT(_net_link_bridge, OID_AUTO, ipfw_arp, CTLFLAG_RW,
- &pfil_ipfw_arp, 0, "Filter ARP packets through IPFW layer2");
-TUNABLE_INT("net.link.bridge.pfil_bridge", &pfil_bridge);
-SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge, CTLFLAG_RW,
- &pfil_bridge, 0, "Packet filter on the bridge interface");
-TUNABLE_INT("net.link.bridge.pfil_member", &pfil_member);
-SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member, CTLFLAG_RW,
- &pfil_member, 0, "Packet filter on the member interface");
-TUNABLE_INT("net.link.bridge.pfil_local_phys", &pfil_local_phys);
-SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_local_phys, CTLFLAG_RW,
- &pfil_local_phys, 0,
+/* only pass IP[46] packets when pfil is enabled */
+static VNET_DEFINE(int, pfil_onlyip) = 1;
+#define V_pfil_onlyip VNET(pfil_onlyip)
+SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip,
+ CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_onlyip), 0,
+ "Only pass IP packets when pfil is enabled");
+
+/* run pfil hooks on the bridge interface */
+static VNET_DEFINE(int, pfil_bridge) = 1;
+#define V_pfil_bridge VNET(pfil_bridge)
+SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge,
+ CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_bridge), 0,
+ "Packet filter on the bridge interface");
+
+/* layer2 filter with ipfw */
+static VNET_DEFINE(int, pfil_ipfw);
+#define V_pfil_ipfw VNET(pfil_ipfw)
+
+/* layer2 ARP filter with ipfw */
+static VNET_DEFINE(int, pfil_ipfw_arp);
+#define V_pfil_ipfw_arp VNET(pfil_ipfw_arp)
+SYSCTL_INT(_net_link_bridge, OID_AUTO, ipfw_arp,
+ CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_ipfw_arp), 0,
+ "Filter ARP packets through IPFW layer2");
+
+/* run pfil hooks on the member interface */
+static VNET_DEFINE(int, pfil_member) = 1;
+#define V_pfil_member VNET(pfil_member)
+SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member,
+ CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_member), 0,
+ "Packet filter on the member interface");
+
+/* run pfil hooks on the physical interface for locally destined packets */
+static VNET_DEFINE(int, pfil_local_phys);
+#define V_pfil_local_phys VNET(pfil_local_phys)
+SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_local_phys,
+ CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_local_phys), 0,
"Packet filter on the physical interface for locally destined packets");
-TUNABLE_INT("net.link.bridge.log_stp", &log_stp);
-SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW,
- &log_stp, 0, "Log STP state changes");
-TUNABLE_INT("net.link.bridge.inherit_mac", &bridge_inherit_mac);
-SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac, CTLFLAG_RW,
- &bridge_inherit_mac, 0,
+
+/* log STP state changes */
+static VNET_DEFINE(int, log_stp);
+#define V_log_stp VNET(log_stp)
+SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp,
+ CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(log_stp), 0,
+ "Log STP state changes");
+
+/* share MAC with first bridge member */
+static VNET_DEFINE(int, bridge_inherit_mac);
+#define V_bridge_inherit_mac VNET(bridge_inherit_mac)
+SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac,
+ CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(bridge_inherit_mac), 0,
"Inherit MAC address from the first bridge member");
static VNET_DEFINE(int, allow_llz_overlap) = 0;
#define V_allow_llz_overlap VNET(allow_llz_overlap)
-SYSCTL_VNET_INT(_net_link_bridge, OID_AUTO, allow_llz_overlap, CTLFLAG_RW,
- &VNET_NAME(allow_llz_overlap), 0, "Allow overlap of link-local scope "
+SYSCTL_INT(_net_link_bridge, OID_AUTO, allow_llz_overlap,
+ CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(allow_llz_overlap), 0,
+ "Allow overlap of link-local scope "
"zones of a bridge interface and the member interfaces");
struct bridge_control {
@@ -487,12 +509,43 @@ const struct bridge_control bridge_control_table[] = {
BC_F_COPYIN|BC_F_SUSER },
};
-const int bridge_control_table_size =
- sizeof(bridge_control_table) / sizeof(bridge_control_table[0]);
+const int bridge_control_table_size = nitems(bridge_control_table);
+
+static VNET_DEFINE(LIST_HEAD(, bridge_softc), bridge_list);
+#define V_bridge_list VNET(bridge_list)
+#define BRIDGE_LIST_LOCK_INIT(x) mtx_init(&V_bridge_list_mtx, \
+ "if_bridge list", NULL, MTX_DEF)
+#define BRIDGE_LIST_LOCK_DESTROY(x) mtx_destroy(&V_bridge_list_mtx)
+#define BRIDGE_LIST_LOCK(x) mtx_lock(&V_bridge_list_mtx)
+#define BRIDGE_LIST_UNLOCK(x) mtx_unlock(&V_bridge_list_mtx)
+
+static VNET_DEFINE(struct if_clone *, bridge_cloner);
+#define V_bridge_cloner VNET(bridge_cloner)
-LIST_HEAD(, bridge_softc) bridge_list;
+static const char bridge_name[] = "bridge";
+
+static void
+vnet_bridge_init(const void *unused __unused)
+{
+
+ BRIDGE_LIST_LOCK_INIT();
+ LIST_INIT(&V_bridge_list);
+ V_bridge_cloner = if_clone_simple(bridge_name,
+ bridge_clone_create, bridge_clone_destroy, 0);
+}
+VNET_SYSINIT(vnet_bridge_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_bridge_init, NULL);
+
+static void
+vnet_bridge_uninit(const void *unused __unused)
+{
-IFC_SIMPLE_DECLARE(bridge, 0);
+ if_clone_detach(V_bridge_cloner);
+ V_bridge_cloner = NULL;
+ BRIDGE_LIST_LOCK_DESTROY();
+}
+VNET_SYSUNINIT(vnet_bridge_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY,
+ vnet_bridge_uninit, NULL);
static int
bridge_modevent(module_t mod, int type, void *data)
@@ -500,12 +553,9 @@ bridge_modevent(module_t mod, int type, void *data)
switch (type) {
case MOD_LOAD:
- mtx_init(&bridge_list_mtx, "if_bridge list", NULL, MTX_DEF);
- if_clone_attach(&bridge_cloner);
bridge_rtnode_zone = uma_zcreate("bridge_rtnode",
sizeof(struct bridge_rtnode), NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, 0);
- LIST_INIT(&bridge_list);
bridge_input_p = bridge_input;
bridge_output_p = bridge_output;
bridge_dn_p = bridge_dummynet;
@@ -517,13 +567,11 @@ bridge_modevent(module_t mod, int type, void *data)
case MOD_UNLOAD:
EVENTHANDLER_DEREGISTER(ifnet_departure_event,
bridge_detach_cookie);
- if_clone_detach(&bridge_cloner);
uma_zdestroy(bridge_rtnode_zone);
bridge_input_p = NULL;
bridge_output_p = NULL;
bridge_dn_p = NULL;
bridge_linkstate_p = NULL;
- mtx_destroy(&bridge_list_mtx);
break;
default:
return (EOPNOTSUPP);
@@ -541,19 +589,19 @@ DECLARE_MODULE(if_bridge, bridge_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
MODULE_DEPEND(if_bridge, bridgestp, 1, 1, 1);
/*
- * handler for net.link.bridge.pfil_ipfw
+ * handler for net.link.bridge.ipfw
*/
static int
sysctl_pfil_ipfw(SYSCTL_HANDLER_ARGS)
{
- int enable = pfil_ipfw;
+ int enable = V_pfil_ipfw;
int error;
error = sysctl_handle_int(oidp, &enable, 0, req);
- enable = (enable) ? 1 : 0;
+ enable &= 1;
- if (enable != pfil_ipfw) {
- pfil_ipfw = enable;
+ if (enable != V_pfil_ipfw) {
+ V_pfil_ipfw = enable;
/*
* Disable pfil so that ipfw doesnt run twice, if the user
@@ -561,17 +609,19 @@ sysctl_pfil_ipfw(SYSCTL_HANDLER_ARGS)
* pfil_member. Also allow non-ip packets as ipfw can filter by
* layer2 type.
*/
- if (pfil_ipfw) {
- pfil_onlyip = 0;
- pfil_bridge = 0;
- pfil_member = 0;
+ if (V_pfil_ipfw) {
+ V_pfil_onlyip = 0;
+ V_pfil_bridge = 0;
+ V_pfil_member = 0;
}
}
return (error);
}
-SYSCTL_PROC(_net_link_bridge, OID_AUTO, ipfw, CTLTYPE_INT|CTLFLAG_RW,
- &pfil_ipfw, 0, &sysctl_pfil_ipfw, "I", "Layer2 filter with IPFW");
+SYSCTL_PROC(_net_link_bridge, OID_AUTO, ipfw,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_VNET,
+ &VNET_NAME(pfil_ipfw), 0, &sysctl_pfil_ipfw, "I",
+ "Layer2 filter with IPFW");
/*
* bridge_clone_create:
@@ -606,15 +656,13 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params)
LIST_INIT(&sc->sc_spanlist);
ifp->if_softc = sc;
- if_initname(ifp, ifc->ifc_name, unit);
+ if_initname(ifp, bridge_name, unit);
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_ioctl = bridge_ioctl;
- ifp->if_start = bridge_start;
+ ifp->if_transmit = bridge_transmit;
+ ifp->if_qflush = bridge_qflush;
ifp->if_init = bridge_init;
ifp->if_type = IFT_BRIDGE;
- IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
- ifp->if_snd.ifq_drv_maxlen = ifqmaxlen;
- IFQ_SET_READY(&ifp->if_snd);
/*
* Generate an ethernet address with a locally administered address.
@@ -626,7 +674,7 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params)
*/
fb = 0;
getcredhostid(curthread->td_ucred, &hostid);
- for (retry = 1; retry != 0;) {
+ do {
if (fb || hostid == 0) {
arc4rand(sc->sc_defaddr, ETHER_ADDR_LEN, 1);
sc->sc_defaddr[0] &= ~1;/* clear multicast bit */
@@ -642,15 +690,17 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params)
fb = 1;
retry = 0;
- mtx_lock(&bridge_list_mtx);
- LIST_FOREACH(sc2, &bridge_list, sc_list) {
+ BRIDGE_LIST_LOCK();
+ LIST_FOREACH(sc2, &V_bridge_list, sc_list) {
bifp = sc2->sc_ifp;
if (memcmp(sc->sc_defaddr,
- IF_LLADDR(bifp), ETHER_ADDR_LEN) == 0)
+ IF_LLADDR(bifp), ETHER_ADDR_LEN) == 0) {
retry = 1;
+ break;
+ }
}
- mtx_unlock(&bridge_list_mtx);
- }
+ BRIDGE_LIST_UNLOCK();
+ } while (retry == 1);
bstp_attach(&sc->sc_stp, &bridge_ops);
ether_ifattach(ifp, sc->sc_defaddr);
@@ -658,9 +708,9 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params)
ifp->if_baudrate = 0;
ifp->if_type = IFT_BRIDGE;
- mtx_lock(&bridge_list_mtx);
- LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
- mtx_unlock(&bridge_list_mtx);
+ BRIDGE_LIST_LOCK();
+ LIST_INSERT_HEAD(&V_bridge_list, sc, sc_list);
+ BRIDGE_LIST_UNLOCK();
return (0);
}
@@ -692,13 +742,13 @@ bridge_clone_destroy(struct ifnet *ifp)
callout_drain(&sc->sc_brcallout);
- mtx_lock(&bridge_list_mtx);
+ BRIDGE_LIST_LOCK();
LIST_REMOVE(sc, sc_list);
- mtx_unlock(&bridge_list_mtx);
+ BRIDGE_LIST_UNLOCK();
bstp_detach(&sc->sc_stp);
ether_ifdetach(ifp);
- if_free_type(ifp, IFT_ETHER);
+ if_free(ifp);
/* Tear down the routing table. */
bridge_rtable_fini(sc);
@@ -818,7 +868,7 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
BRIDGE_LOCK(sc);
LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
if (bif->bif_ifp->if_mtu != ifr->ifr_mtu) {
- log(LOG_NOTICE, "%s: invalid MTU: %lu(%s)"
+ log(LOG_NOTICE, "%s: invalid MTU: %u(%s)"
" != %d\n", sc->sc_ifp->if_xname,
bif->bif_ifp->if_mtu,
bif->bif_ifp->if_xname, ifr->ifr_mtu);
@@ -960,7 +1010,7 @@ bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
* the mac address of the bridge to the address of the next member, or
* to its default address if no members are left.
*/
- if (bridge_inherit_mac && sc->sc_ifaddr == ifs) {
+ if (V_bridge_inherit_mac && sc->sc_ifaddr == ifs) {
if (LIST_EMPTY(&sc->sc_iflist)) {
bcopy(sc->sc_defaddr,
IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
@@ -986,9 +1036,12 @@ bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
case IFT_ETHER:
case IFT_L2VLAN:
/*
- * Take the interface out of promiscuous mode.
+ * Take the interface out of promiscuous mode, but only
+ * if it was promiscuous in the first place. It might
+ * not be if we're in the bridge_ioctl_add() error path.
*/
- (void) ifpromisc(ifs, 0);
+ if (ifs->if_flags & IFF_PROMISC)
+ (void) ifpromisc(ifs, 0);
break;
case IFT_GIF:
@@ -1108,7 +1161,7 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
if (LIST_EMPTY(&sc->sc_iflist))
sc->sc_ifp->if_mtu = ifs->if_mtu;
else if (sc->sc_ifp->if_mtu != ifs->if_mtu) {
- if_printf(sc->sc_ifp, "invalid MTU: %lu(%s) != %lu\n",
+ if_printf(sc->sc_ifp, "invalid MTU: %u(%s) != %u\n",
ifs->if_mtu, ifs->if_xname, sc->sc_ifp->if_mtu);
return (EINVAL);
}
@@ -1126,7 +1179,7 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
* member and the MAC address of the bridge has not been changed from
* the default randomly generated one.
*/
- if (bridge_inherit_mac && LIST_EMPTY(&sc->sc_iflist) &&
+ if (V_bridge_inherit_mac && LIST_EMPTY(&sc->sc_iflist) &&
!memcmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr, ETHER_ADDR_LEN)) {
bcopy(IF_LLADDR(ifs), IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
sc->sc_ifaddr = ifs;
@@ -1156,10 +1209,8 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
break;
}
- if (error) {
+ if (error)
bridge_delete_member(sc, bif, 0);
- free(bif, M_DEVBUF);
- }
return (error);
}
@@ -1751,7 +1802,13 @@ bridge_ifdetach(void *arg __unused, struct ifnet *ifp)
if (ifp->if_flags & IFF_RENAMING)
return;
-
+ if (V_bridge_cloner == NULL) {
+ /*
+ * This detach handler can be called after
+ * vnet_bridge_uninit(). Just return in that case.
+ */
+ return;
+ }
/* Check if the interface is a bridge member */
if (sc != NULL) {
BRIDGE_LOCK(sc);
@@ -1765,8 +1822,8 @@ bridge_ifdetach(void *arg __unused, struct ifnet *ifp)
}
/* Check if the interface is a span port */
- mtx_lock(&bridge_list_mtx);
- LIST_FOREACH(sc, &bridge_list, sc_list) {
+ BRIDGE_LIST_LOCK();
+ LIST_FOREACH(sc, &V_bridge_list, sc_list) {
BRIDGE_LOCK(sc);
LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
if (ifp == bif->bif_ifp) {
@@ -1776,7 +1833,7 @@ bridge_ifdetach(void *arg __unused, struct ifnet *ifp)
BRIDGE_UNLOCK(sc);
}
- mtx_unlock(&bridge_list_mtx);
+ BRIDGE_LIST_UNLOCK();
}
/*
@@ -1832,20 +1889,19 @@ bridge_stop(struct ifnet *ifp, int disable)
* Enqueue a packet on a bridge member interface.
*
*/
-static void
+static int
bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m)
{
int len, err = 0;
short mflags;
struct mbuf *m0;
- len = m->m_pkthdr.len;
- mflags = m->m_flags;
-
/* We may be sending a fragment so traverse the mbuf */
for (; m; m = m0) {
m0 = m->m_nextpkt;
m->m_nextpkt = NULL;
+ len = m->m_pkthdr.len;
+ mflags = m->m_flags;
/*
* If underlying interface can not do VLAN tag insertion itself
@@ -1857,7 +1913,7 @@ bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m)
if (m == NULL) {
if_printf(dst_ifp,
"unable to prepend VLAN header\n");
- dst_ifp->if_oerrors++;
+ if_inc_counter(dst_ifp, IFCOUNTER_OERRORS, 1);
continue;
}
m->m_flags &= ~M_VLANTAG;
@@ -1865,16 +1921,17 @@ bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m)
if ((err = dst_ifp->if_transmit(dst_ifp, m))) {
m_freem(m0);
+ if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
break;
}
- }
- if (err == 0) {
- sc->sc_ifp->if_opackets++;
- sc->sc_ifp->if_obytes += len;
+ if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, len);
if (mflags & M_MCAST)
- sc->sc_ifp->if_omcasts++;
+ if_inc_counter(sc->sc_ifp, IFCOUNTER_OMCASTS, 1);
}
+
+ return (err);
}
/*
@@ -2000,9 +2057,9 @@ bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
used = 1;
mc = m;
} else {
- mc = m_copypacket(m, M_DONTWAIT);
+ mc = m_copypacket(m, M_NOWAIT);
if (mc == NULL) {
- sc->sc_ifp->if_oerrors++;
+ if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
continue;
}
}
@@ -2033,44 +2090,42 @@ sendunicast:
}
/*
- * bridge_start:
+ * bridge_transmit:
*
- * Start output on a bridge.
+ * Do output on a bridge.
*
*/
-static void
-bridge_start(struct ifnet *ifp)
+static int
+bridge_transmit(struct ifnet *ifp, struct mbuf *m)
{
struct bridge_softc *sc;
- struct mbuf *m;
struct ether_header *eh;
struct ifnet *dst_if;
+ int error = 0;
sc = ifp->if_softc;
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
- for (;;) {
- IFQ_DEQUEUE(&ifp->if_snd, m);
- if (m == 0)
- break;
- ETHER_BPF_MTAP(ifp, m);
+ ETHER_BPF_MTAP(ifp, m);
- eh = mtod(m, struct ether_header *);
- dst_if = NULL;
+ eh = mtod(m, struct ether_header *);
- BRIDGE_LOCK(sc);
- if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
- dst_if = bridge_rtlookup(sc, eh->ether_dhost, 1);
- }
+ BRIDGE_LOCK(sc);
+ if (((m->m_flags & (M_BCAST|M_MCAST)) == 0) &&
+ (dst_if = bridge_rtlookup(sc, eh->ether_dhost, 1)) != NULL) {
+ BRIDGE_UNLOCK(sc);
+ error = bridge_enqueue(sc, dst_if, m);
+ } else
+ bridge_broadcast(sc, ifp, m, 0);
- if (dst_if == NULL)
- bridge_broadcast(sc, ifp, m, 0);
- else {
- BRIDGE_UNLOCK(sc);
- bridge_enqueue(sc, dst_if, m);
- }
- }
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ return (error);
+}
+
+/*
+ * The ifp->if_qflush entry point for if_bridge(4) is no-op.
+ */
+static void
+bridge_qflush(struct ifnet *ifp __unused)
+{
}
/*
@@ -2094,8 +2149,8 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
src_if = m->m_pkthdr.rcvif;
ifp = sc->sc_ifp;
- ifp->if_ipackets++;
- ifp->if_ibytes += m->m_pkthdr.len;
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
vlan = VLANTAGOF(m);
if ((sbif->bif_flags & IFBIF_STP) &&
@@ -2147,7 +2202,7 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
goto drop;
/* ...forward it to all interfaces. */
- ifp->if_imcasts++;
+ if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
dst_if = NULL;
}
@@ -2255,8 +2310,8 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
if ((bifp->if_flags & IFF_MONITOR) != 0) {
m->m_pkthdr.rcvif = bifp;
ETHER_BPF_MTAP(bifp, m);
- bifp->if_ipackets++;
- bifp->if_ibytes += m->m_pkthdr.len;
+ if_inc_counter(bifp, IFCOUNTER_IPACKETS, 1);
+ if_inc_counter(bifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
m_freem(m);
return (NULL);
}
@@ -2291,7 +2346,7 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
* for bridge processing; return the original packet for
* local processing.
*/
- mc = m_dup(m, M_DONTWAIT);
+ mc = m_dup(m, M_NOWAIT);
if (mc == NULL) {
BRIDGE_UNLOCK(sc);
return (m);
@@ -2308,7 +2363,7 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
*/
KASSERT(bifp->if_bridge == NULL,
("loop created in bridge_input"));
- mc2 = m_dup(m, M_DONTWAIT);
+ mc2 = m_dup(m, M_NOWAIT);
if (mc2 != NULL) {
/* Keep the layer3 header aligned */
int i = min(mc2->m_pkthdr.len, max_protohdr);
@@ -2357,9 +2412,10 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
) { \
if ((iface)->if_type == IFT_BRIDGE) { \
ETHER_BPF_MTAP(iface, m); \
- iface->if_ipackets++; \
+ if_inc_counter(iface, IFCOUNTER_IPACKETS, 1); \
+ if_inc_counter(iface, IFCOUNTER_IBYTES, m->m_pkthdr.len); \
/* Filter on the physical interface. */ \
- if (pfil_local_phys && \
+ if (V_pfil_local_phys && \
(PFIL_HOOKED(&V_inet_pfil_hook) \
OR_PFIL_HOOKED_INET6)) { \
if (bridge_pfil(&m, NULL, ifp, \
@@ -2485,9 +2541,9 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
mc = m;
used = 1;
} else {
- mc = m_dup(m, M_DONTWAIT);
+ mc = m_dup(m, M_NOWAIT);
if (mc == NULL) {
- sc->sc_ifp->if_oerrors++;
+ if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
continue;
}
}
@@ -2507,7 +2563,7 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
i = min(mc->m_pkthdr.len, max_protohdr);
mc = m_copyup(mc, i, ETHER_ALIGN);
if (mc == NULL) {
- sc->sc_ifp->if_oerrors++;
+ if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
continue;
}
}
@@ -2548,9 +2604,9 @@ bridge_span(struct bridge_softc *sc, struct mbuf *m)
if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0)
continue;
- mc = m_copypacket(m, M_DONTWAIT);
+ mc = m_copypacket(m, M_NOWAIT);
if (mc == NULL) {
- sc->sc_ifp->if_oerrors++;
+ if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
continue;
}
@@ -2793,24 +2849,19 @@ bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
*
* Initialize the route table for this bridge.
*/
-static int
+static void
bridge_rtable_init(struct bridge_softc *sc)
{
int i;
sc->sc_rthash = malloc(sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE,
- M_DEVBUF, M_NOWAIT);
- if (sc->sc_rthash == NULL)
- return (ENOMEM);
+ M_DEVBUF, M_WAITOK);
for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
LIST_INIT(&sc->sc_rthash[i]);
sc->sc_rthash_key = arc4random();
-
LIST_INIT(&sc->sc_rtlist);
-
- return (0);
}
/*
@@ -3018,9 +3069,11 @@ bridge_state_change(struct ifnet *ifp, int state)
"discarding"
};
- if (log_stp)
+ CURVNET_SET(ifp->if_vnet);
+ if (V_log_stp)
log(LOG_NOTICE, "%s: state changed to %s on %s\n",
sc->sc_ifp->if_xname, stpstates[state], ifp->if_xname);
+ CURVNET_RESTORE();
}
/*
@@ -3034,7 +3087,6 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
{
int snap, error, i, hlen;
struct ether_header *eh1, eh2;
- struct ip_fw_args args;
struct ip *ip;
struct llc llc1;
u_int16_t ether_type;
@@ -3047,7 +3099,7 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
KASSERT(M_WRITABLE(*mp), ("%s: modifying a shared mbuf", __func__));
#endif
- if (pfil_bridge == 0 && pfil_member == 0 && pfil_ipfw == 0)
+ if (V_pfil_bridge == 0 && V_pfil_member == 0 && V_pfil_ipfw == 0)
return (0); /* filtering is disabled */
i = min((*mp)->m_pkthdr.len, max_protohdr);
@@ -3089,7 +3141,7 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
switch (ether_type) {
case ETHERTYPE_ARP:
case ETHERTYPE_REVARP:
- if (pfil_ipfw_arp == 0)
+ if (V_pfil_ipfw_arp == 0)
return (0); /* Automatically pass */
break;
@@ -3104,10 +3156,20 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
* packets, these will not be checked by pfil(9) and
* passed unconditionally so the default is to drop.
*/
- if (pfil_onlyip)
+ if (V_pfil_onlyip)
goto bad;
}
+ /* Run the packet through pfil before stripping link headers */
+ if (PFIL_HOOKED(&V_link_pfil_hook) && V_pfil_ipfw != 0 &&
+ dir == PFIL_OUT && ifp != NULL) {
+
+ error = pfil_run_hooks(&V_link_pfil_hook, mp, ifp, dir, NULL);
+
+ if (*mp == NULL || error != 0) /* packet consumed by filter */
+ return (error);
+ }
+
/* Strip off the Ethernet header and keep a copy. */
m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t) &eh2);
m_adj(*mp, ETHER_HDR_LEN);
@@ -3138,63 +3200,6 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
goto bad;
}
- /* XXX this section is also in if_ethersubr.c */
- // XXX PFIL_OUT or DIR_OUT ?
- if (V_ip_fw_chk_ptr && pfil_ipfw != 0 &&
- dir == PFIL_OUT && ifp != NULL) {
- struct m_tag *mtag;
-
- error = -1;
- /* fetch the start point from existing tags, if any */
- mtag = m_tag_locate(*mp, MTAG_IPFW_RULE, 0, NULL);
- if (mtag == NULL) {
- args.rule.slot = 0;
- } else {
- struct ipfw_rule_ref *r;
-
- /* XXX can we free the tag after use ? */
- mtag->m_tag_id = PACKET_TAG_NONE;
- r = (struct ipfw_rule_ref *)(mtag + 1);
- /* packet already partially processed ? */
- if (r->info & IPFW_ONEPASS)
- goto ipfwpass;
- args.rule = *r;
- }
-
- args.m = *mp;
- args.oif = ifp;
- args.next_hop = NULL;
- args.next_hop6 = NULL;
- args.eh = &eh2;
- args.inp = NULL; /* used by ipfw uid/gid/jail rules */
- i = V_ip_fw_chk_ptr(&args);
- *mp = args.m;
-
- if (*mp == NULL)
- return (error);
-
- if (ip_dn_io_ptr && (i == IP_FW_DUMMYNET)) {
-
- /* put the Ethernet header back on */
- M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT);
- if (*mp == NULL)
- return (error);
- bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
-
- /*
- * Pass the pkt to dummynet, which consumes it. The
- * packet will return to us via bridge_dummynet().
- */
- args.oif = ifp;
- ip_dn_io_ptr(mp, DIR_FWD | PROTO_IFB, &args);
- return (error);
- }
-
- if (i != IP_FW_PASS) /* drop */
- goto bad;
- }
-
-ipfwpass:
error = 0;
/*
@@ -3203,36 +3208,27 @@ ipfwpass:
switch (ether_type) {
case ETHERTYPE_IP:
/*
- * before calling the firewall, swap fields the same as
- * IP does. here we assume the header is contiguous
- */
- ip = mtod(*mp, struct ip *);
-
- ip->ip_len = ntohs(ip->ip_len);
- ip->ip_off = ntohs(ip->ip_off);
-
- /*
* Run pfil on the member interface and the bridge, both can
* be skipped by clearing pfil_member or pfil_bridge.
*
* Keep the order:
* in_if -> bridge_if -> out_if
*/
- if (pfil_bridge && dir == PFIL_OUT && bifp != NULL)
+ if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL)
error = pfil_run_hooks(&V_inet_pfil_hook, mp, bifp,
dir, NULL);
if (*mp == NULL || error != 0) /* filter may consume */
break;
- if (pfil_member && ifp != NULL)
+ if (V_pfil_member && ifp != NULL)
error = pfil_run_hooks(&V_inet_pfil_hook, mp, ifp,
dir, NULL);
if (*mp == NULL || error != 0) /* filter may consume */
break;
- if (pfil_bridge && dir == PFIL_IN && bifp != NULL)
+ if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL)
error = pfil_run_hooks(&V_inet_pfil_hook, mp, bifp,
dir, NULL);
@@ -3240,7 +3236,7 @@ ipfwpass:
break;
/* check if we need to fragment the packet */
- if (pfil_member && ifp != NULL && dir == PFIL_OUT) {
+ if (V_pfil_member && ifp != NULL && dir == PFIL_OUT) {
i = (*mp)->m_pkthdr.len;
if (i > ifp->if_mtu) {
error = bridge_fragment(ifp, *mp, &eh2, snap,
@@ -3249,20 +3245,18 @@ ipfwpass:
}
}
- /* Recalculate the ip checksum and restore byte ordering */
+ /* Recalculate the ip checksum. */
ip = mtod(*mp, struct ip *);
hlen = ip->ip_hl << 2;
if (hlen < sizeof(struct ip))
goto bad;
if (hlen > (*mp)->m_len) {
- if ((*mp = m_pullup(*mp, hlen)) == 0)
+ if ((*mp = m_pullup(*mp, hlen)) == NULL)
goto bad;
ip = mtod(*mp, struct ip *);
if (ip == NULL)
goto bad;
}
- ip->ip_len = htons(ip->ip_len);
- ip->ip_off = htons(ip->ip_off);
ip->ip_sum = 0;
if (hlen == sizeof(struct ip))
ip->ip_sum = in_cksum_hdr(ip);
@@ -3272,21 +3266,21 @@ ipfwpass:
break;
#ifdef INET6
case ETHERTYPE_IPV6:
- if (pfil_bridge && dir == PFIL_OUT && bifp != NULL)
+ if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL)
error = pfil_run_hooks(&V_inet6_pfil_hook, mp, bifp,
dir, NULL);
if (*mp == NULL || error != 0) /* filter may consume */
break;
- if (pfil_member && ifp != NULL)
+ if (V_pfil_member && ifp != NULL)
error = pfil_run_hooks(&V_inet6_pfil_hook, mp, ifp,
dir, NULL);
if (*mp == NULL || error != 0) /* filter may consume */
break;
- if (pfil_bridge && dir == PFIL_IN && bifp != NULL)
+ if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL)
error = pfil_run_hooks(&V_inet6_pfil_hook, mp, bifp,
dir, NULL);
break;
@@ -3307,13 +3301,13 @@ ipfwpass:
* Finally, put everything back the way it was and return
*/
if (snap) {
- M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT);
+ M_PREPEND(*mp, sizeof(struct llc), M_NOWAIT);
if (*mp == NULL)
return (error);
bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
}
- M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT);
+ M_PREPEND(*mp, ETHER_HDR_LEN, M_NOWAIT);
if (*mp == NULL)
return (error);
bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
@@ -3375,7 +3369,7 @@ bridge_ip_checkbasic(struct mbuf **mp)
goto bad;
}
if (hlen > m->m_len) {
- if ((m = m_pullup(m, hlen)) == 0) {
+ if ((m = m_pullup(m, hlen)) == NULL) {
KMOD_IPSTAT_INC(ips_badhlen);
goto bad;
}
@@ -3499,8 +3493,8 @@ bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
goto out;
ip = mtod(m, struct ip *);
- error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist,
- CSUM_DELAY_IP);
+ m->m_pkthdr.csum_flags |= CSUM_IP;
+ error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist);
if (error)
goto out;
@@ -3508,7 +3502,7 @@ bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
for (m0 = m; m0; m0 = m0->m_nextpkt) {
if (error == 0) {
if (snap) {
- M_PREPEND(m0, sizeof(struct llc), M_DONTWAIT);
+ M_PREPEND(m0, sizeof(struct llc), M_NOWAIT);
if (m0 == NULL) {
error = ENOBUFS;
continue;
@@ -3516,7 +3510,7 @@ bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
bcopy(llc, mtod(m0, caddr_t),
sizeof(struct llc));
}
- M_PREPEND(m0, ETHER_HDR_LEN, M_DONTWAIT);
+ M_PREPEND(m0, ETHER_HDR_LEN, M_NOWAIT);
if (m0 == NULL) {
error = ENOBUFS;
continue;
diff --git a/freebsd/sys/net/if_clone.c b/freebsd/sys/net/if_clone.c
index 0b752139..61ba9c6c 100644
--- a/freebsd/sys/net/if_clone.c
+++ b/freebsd/sys/net/if_clone.c
@@ -1,6 +1,7 @@
#include <machine/rtems-bsd-kernel-space.h>
/*-
+ * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
* Copyright (c) 1980, 1986, 1993
* The Regents of the University of California. All rights reserved.
*
@@ -33,6 +34,7 @@
*/
#include <rtems/bsd/sys/param.h>
+#include <sys/eventhandler.h>
#include <sys/malloc.h>
#include <sys/limits.h>
#include <rtems/bsd/sys/lock.h>
@@ -43,29 +45,74 @@
#include <sys/socket.h>
#include <net/if.h>
-#include <net/if_clone.h>
-#if 0
-#include <net/if_dl.h>
-#endif
-#include <net/if_types.h>
#include <net/if_var.h>
+#include <net/if_clone.h>
#include <net/radix.h>
#include <net/route.h>
#include <net/vnet.h>
+/* Current IF_MAXUNIT expands maximum to 5 characters. */
+#define IFCLOSIZ (IFNAMSIZ - 5)
+
+/*
+ * Structure describing a `cloning' interface.
+ *
+ * List of locks
+ * (c) const until freeing
+ * (d) driver specific data, may need external protection.
+ * (e) locked by if_cloners_mtx
+ * (i) locked by ifc_mtx mtx
+ */
+struct if_clone {
+ char ifc_name[IFCLOSIZ]; /* (c) Name of device, e.g. `gif' */
+ struct unrhdr *ifc_unrhdr; /* (c) alloc_unr(9) header */
+ int ifc_maxunit; /* (c) maximum unit number */
+ long ifc_refcnt; /* (i) Reference count. */
+ LIST_HEAD(, ifnet) ifc_iflist; /* (i) List of cloned interfaces */
+ struct mtx ifc_mtx; /* Mutex to protect members. */
+
+ enum { SIMPLE, ADVANCED } ifc_type; /* (c) */
+
+ /* (c) Driver specific cloning functions. Called with no locks held. */
+ union {
+ struct { /* advanced cloner */
+ ifc_match_t *_ifc_match;
+ ifc_create_t *_ifc_create;
+ ifc_destroy_t *_ifc_destroy;
+ } A;
+ struct { /* simple cloner */
+ ifcs_create_t *_ifcs_create;
+ ifcs_destroy_t *_ifcs_destroy;
+ int _ifcs_minifs; /* minimum ifs */
+
+ } S;
+ } U;
+#define ifc_match U.A._ifc_match
+#define ifc_create U.A._ifc_create
+#define ifc_destroy U.A._ifc_destroy
+#define ifcs_create U.S._ifcs_create
+#define ifcs_destroy U.S._ifcs_destroy
+#define ifcs_minifs U.S._ifcs_minifs
+
+ LIST_ENTRY(if_clone) ifc_list; /* (e) On list of cloners */
+};
+
static void if_clone_free(struct if_clone *ifc);
static int if_clone_createif(struct if_clone *ifc, char *name, size_t len,
caddr_t params);
-static struct mtx if_cloners_mtx;
+static int ifc_simple_match(struct if_clone *, const char *);
+static int ifc_simple_create(struct if_clone *, char *, size_t, caddr_t);
+static int ifc_simple_destroy(struct if_clone *, struct ifnet *);
+
+static struct mtx if_cloners_mtx;
+MTX_SYSINIT(if_cloners_lock, &if_cloners_mtx, "if_cloners lock", MTX_DEF);
static VNET_DEFINE(int, if_cloners_count);
VNET_DEFINE(LIST_HEAD(, if_clone), if_cloners);
#define V_if_cloners_count VNET(if_cloners_count)
#define V_if_cloners VNET(if_cloners)
-#define IF_CLONERS_LOCK_INIT() \
- mtx_init(&if_cloners_mtx, "if_cloners lock", NULL, MTX_DEF)
#define IF_CLONERS_LOCK_ASSERT() mtx_assert(&if_cloners_mtx, MA_OWNED)
#define IF_CLONERS_LOCK() mtx_lock(&if_cloners_mtx)
#define IF_CLONERS_UNLOCK() mtx_unlock(&if_cloners_mtx)
@@ -123,13 +170,6 @@ vnet_if_clone_init(void)
LIST_INIT(&V_if_cloners);
}
-void
-if_clone_init(void)
-{
-
- IF_CLONERS_LOCK_INIT();
-}
-
/*
* Lookup and create a clone network interface.
*/
@@ -140,18 +180,25 @@ if_clone_create(char *name, size_t len, caddr_t params)
/* Try to find an applicable cloner for this request */
IF_CLONERS_LOCK();
- LIST_FOREACH(ifc, &V_if_cloners, ifc_list) {
- if (ifc->ifc_match(ifc, name)) {
- break;
+ LIST_FOREACH(ifc, &V_if_cloners, ifc_list)
+ if (ifc->ifc_type == SIMPLE) {
+ if (ifc_simple_match(ifc, name))
+ break;
+ } else {
+ if (ifc->ifc_match(ifc, name))
+ break;
}
- }
#ifdef VIMAGE
if (ifc == NULL && !IS_DEFAULT_VNET(curvnet)) {
CURVNET_SET_QUIET(vnet0);
- LIST_FOREACH(ifc, &V_if_cloners, ifc_list) {
- if (ifc->ifc_match(ifc, name))
- break;
- }
+ LIST_FOREACH(ifc, &V_if_cloners, ifc_list)
+ if (ifc->ifc_type == SIMPLE) {
+ if (ifc_simple_match(ifc, name))
+ break;
+ } else {
+ if (ifc->ifc_match(ifc, name))
+ break;
+ }
CURVNET_RESTORE();
}
#endif
@@ -175,7 +222,10 @@ if_clone_createif(struct if_clone *ifc, char *name, size_t len, caddr_t params)
if (ifunit(name) != NULL)
return (EEXIST);
- err = (*ifc->ifc_create)(ifc, name, len, params);
+ if (ifc->ifc_type == SIMPLE)
+ err = ifc_simple_create(ifc, name, len, params);
+ else
+ err = (*ifc->ifc_create)(ifc, name, len, params);
if (!err) {
ifp = ifunit(name);
@@ -216,10 +266,14 @@ if_clone_destroy(const char *name)
#ifdef VIMAGE
if (ifc == NULL && !IS_DEFAULT_VNET(curvnet)) {
CURVNET_SET_QUIET(vnet0);
- LIST_FOREACH(ifc, &V_if_cloners, ifc_list) {
- if (ifc->ifc_match(ifc, name))
- break;
- }
+ LIST_FOREACH(ifc, &V_if_cloners, ifc_list)
+ if (ifc->ifc_type == SIMPLE) {
+ if (ifc_simple_match(ifc, name))
+ break;
+ } else {
+ if (ifc->ifc_match(ifc, name))
+ break;
+ }
CURVNET_RESTORE();
}
#endif
@@ -243,7 +297,7 @@ if_clone_destroyif(struct if_clone *ifc, struct ifnet *ifp)
int err;
struct ifnet *ifcifp;
- if (ifc->ifc_destroy == NULL)
+ if (ifc->ifc_type == ADVANCED && ifc->ifc_destroy == NULL)
return(EOPNOTSUPP);
/*
@@ -268,7 +322,10 @@ if_clone_destroyif(struct if_clone *ifc, struct ifnet *ifp)
if_delgroup(ifp, ifc->ifc_name);
- err = (*ifc->ifc_destroy)(ifc, ifp);
+ if (ifc->ifc_type == SIMPLE)
+ err = ifc_simple_destroy(ifc, ifp);
+ else
+ err = (*ifc->ifc_destroy)(ifc, ifp);
if (err != 0) {
if_addgroup(ifp, ifc->ifc_name);
@@ -281,36 +338,97 @@ if_clone_destroyif(struct if_clone *ifc, struct ifnet *ifp)
return (err);
}
-/*
- * Register a network interface cloner.
- */
-void
-if_clone_attach(struct if_clone *ifc)
+static struct if_clone *
+if_clone_alloc(const char *name, int maxunit)
{
- int len, maxclone;
+ struct if_clone *ifc;
- /*
- * Compute bitmap size and allocate it.
- */
- maxclone = ifc->ifc_maxunit + 1;
- len = maxclone >> 3;
- if ((len << 3) < maxclone)
- len++;
- ifc->ifc_units = malloc(len, M_CLONE, M_WAITOK | M_ZERO);
- ifc->ifc_bmlen = len;
+ KASSERT(name != NULL, ("%s: no name\n", __func__));
+
+ ifc = malloc(sizeof(struct if_clone), M_CLONE, M_WAITOK | M_ZERO);
+ strncpy(ifc->ifc_name, name, IFCLOSIZ-1);
IF_CLONE_LOCK_INIT(ifc);
IF_CLONE_ADDREF(ifc);
+ ifc->ifc_maxunit = maxunit ? maxunit : IF_MAXUNIT;
+ ifc->ifc_unrhdr = new_unrhdr(0, ifc->ifc_maxunit, &ifc->ifc_mtx);
+ LIST_INIT(&ifc->ifc_iflist);
+
+ return (ifc);
+}
+
+static int
+if_clone_attach(struct if_clone *ifc)
+{
+ struct if_clone *ifc1;
IF_CLONERS_LOCK();
+ LIST_FOREACH(ifc1, &V_if_cloners, ifc_list)
+ if (strcmp(ifc->ifc_name, ifc1->ifc_name) == 0) {
+ IF_CLONERS_UNLOCK();
+ IF_CLONE_REMREF(ifc);
+ return (EEXIST);
+ }
LIST_INSERT_HEAD(&V_if_cloners, ifc, ifc_list);
V_if_cloners_count++;
IF_CLONERS_UNLOCK();
- LIST_INIT(&ifc->ifc_iflist);
+ return (0);
+}
+
+struct if_clone *
+if_clone_advanced(const char *name, u_int maxunit, ifc_match_t match,
+ ifc_create_t create, ifc_destroy_t destroy)
+{
+ struct if_clone *ifc;
+
+ ifc = if_clone_alloc(name, maxunit);
+ ifc->ifc_type = ADVANCED;
+ ifc->ifc_match = match;
+ ifc->ifc_create = create;
+ ifc->ifc_destroy = destroy;
+
+ if (if_clone_attach(ifc) != 0) {
+ if_clone_free(ifc);
+ return (NULL);
+ }
- if (ifc->ifc_attach != NULL)
- (*ifc->ifc_attach)(ifc);
EVENTHANDLER_INVOKE(if_clone_event, ifc);
+
+ return (ifc);
+}
+
+struct if_clone *
+if_clone_simple(const char *name, ifcs_create_t create, ifcs_destroy_t destroy,
+ u_int minifs)
+{
+ struct if_clone *ifc;
+ u_int unit;
+
+ ifc = if_clone_alloc(name, 0);
+ ifc->ifc_type = SIMPLE;
+ ifc->ifcs_create = create;
+ ifc->ifcs_destroy = destroy;
+ ifc->ifcs_minifs = minifs;
+
+ if (if_clone_attach(ifc) != 0) {
+ if_clone_free(ifc);
+ return (NULL);
+ }
+
+ for (unit = 0; unit < minifs; unit++) {
+ char name[IFNAMSIZ];
+ int error;
+
+ snprintf(name, IFNAMSIZ, "%s%d", ifc->ifc_name, unit);
+ error = if_clone_createif(ifc, name, IFNAMSIZ, NULL);
+ KASSERT(error == 0,
+ ("%s: failed to create required interface %s",
+ __func__, name));
+ }
+
+ EVENTHANDLER_INVOKE(if_clone_event, ifc);
+
+ return (ifc);
}
/*
@@ -319,7 +437,6 @@ if_clone_attach(struct if_clone *ifc)
void
if_clone_detach(struct if_clone *ifc)
{
- struct ifc_simple_data *ifcs = ifc->ifc_data;
IF_CLONERS_LOCK();
LIST_REMOVE(ifc, ifc_list);
@@ -327,8 +444,8 @@ if_clone_detach(struct if_clone *ifc)
IF_CLONERS_UNLOCK();
/* Allow all simples to be destroyed */
- if (ifc->ifc_attach == ifc_simple_attach)
- ifcs->ifcs_minifs = 0;
+ if (ifc->ifc_type == SIMPLE)
+ ifc->ifcs_minifs = 0;
/* destroy all interfaces for this cloner */
while (!LIST_EMPTY(&ifc->ifc_iflist))
@@ -340,16 +457,13 @@ if_clone_detach(struct if_clone *ifc)
static void
if_clone_free(struct if_clone *ifc)
{
- for (int bytoff = 0; bytoff < ifc->ifc_bmlen; bytoff++) {
- KASSERT(ifc->ifc_units[bytoff] == 0x00,
- ("ifc_units[%d] is not empty", bytoff));
- }
KASSERT(LIST_EMPTY(&ifc->ifc_iflist),
("%s: ifc_iflist not empty", __func__));
IF_CLONE_LOCK_DESTROY(ifc);
- free(ifc->ifc_units, M_CLONE);
+ delete_unrhdr(ifc->ifc_unrhdr);
+ free(ifc, M_CLONE);
}
/*
@@ -372,7 +486,7 @@ if_clone_list(struct if_clonereq *ifcr)
* below, but that's not a major problem. Not caping our
* allocation to the number of cloners actually in the system
* could be because that would let arbitrary users cause us to
- * allocate abritrary amounts of kernel memory.
+ * allocate arbitrary amounts of kernel memory.
*/
buf_count = (V_if_cloners_count < ifcr->ifcr_count) ?
V_if_cloners_count : ifcr->ifcr_count;
@@ -406,6 +520,49 @@ done:
}
/*
+ * if_clone_findifc() looks up ifnet from the current
+ * cloner list, and returns ifc if found. Note that ifc_refcnt
+ * is incremented.
+ */
+struct if_clone *
+if_clone_findifc(struct ifnet *ifp)
+{
+ struct if_clone *ifc, *ifc0;
+ struct ifnet *ifcifp;
+
+ ifc0 = NULL;
+ IF_CLONERS_LOCK();
+ LIST_FOREACH(ifc, &V_if_cloners, ifc_list) {
+ IF_CLONE_LOCK(ifc);
+ LIST_FOREACH(ifcifp, &ifc->ifc_iflist, if_clones) {
+ if (ifp == ifcifp) {
+ ifc0 = ifc;
+ IF_CLONE_ADDREF_LOCKED(ifc);
+ break;
+ }
+ }
+ IF_CLONE_UNLOCK(ifc);
+ if (ifc0 != NULL)
+ break;
+ }
+ IF_CLONERS_UNLOCK();
+
+ return (ifc0);
+}
+
+/*
+ * if_clone_addgroup() decrements ifc_refcnt because it is called after
+ * if_clone_findifc().
+ */
+void
+if_clone_addgroup(struct ifnet *ifp, struct if_clone *ifc)
+{
+
+ if_addgroup(ifp, ifc->ifc_name);
+ IF_CLONE_REMREF(ifc);
+}
+
+/*
* A utility function to extract unit numbers from interface names of
* the form name###.
*
@@ -443,98 +600,52 @@ ifc_name2unit(const char *name, int *unit)
int
ifc_alloc_unit(struct if_clone *ifc, int *unit)
{
- int wildcard, bytoff, bitoff;
- int err = 0;
-
- IF_CLONE_LOCK(ifc);
+ char name[IFNAMSIZ];
+ int wildcard;
- bytoff = bitoff = 0;
wildcard = (*unit < 0);
- /*
- * Find a free unit if none was given.
- */
- if (wildcard) {
- while ((bytoff < ifc->ifc_bmlen)
- && (ifc->ifc_units[bytoff] == 0xff))
- bytoff++;
- if (bytoff >= ifc->ifc_bmlen) {
- err = ENOSPC;
- goto done;
+retry:
+ if (*unit > ifc->ifc_maxunit)
+ return (ENOSPC);
+ if (*unit < 0) {
+ *unit = alloc_unr(ifc->ifc_unrhdr);
+ if (*unit == -1)
+ return (ENOSPC);
+ } else {
+ *unit = alloc_unr_specific(ifc->ifc_unrhdr, *unit);
+ if (*unit == -1) {
+ if (wildcard) {
+ (*unit)++;
+ goto retry;
+ } else
+ return (EEXIST);
}
- while ((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0)
- bitoff++;
- *unit = (bytoff << 3) + bitoff;
}
- if (*unit > ifc->ifc_maxunit) {
- err = ENOSPC;
- goto done;
+ snprintf(name, IFNAMSIZ, "%s%d", ifc->ifc_name, *unit);
+ if (ifunit(name) != NULL) {
+ free_unr(ifc->ifc_unrhdr, *unit);
+ if (wildcard) {
+ (*unit)++;
+ goto retry;
+ } else
+ return (EEXIST);
}
- if (!wildcard) {
- bytoff = *unit >> 3;
- bitoff = *unit - (bytoff << 3);
- }
-
- if((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0) {
- err = EEXIST;
- goto done;
- }
- /*
- * Allocate the unit in the bitmap.
- */
- KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) == 0,
- ("%s: bit is already set", __func__));
- ifc->ifc_units[bytoff] |= (1 << bitoff);
- IF_CLONE_ADDREF_LOCKED(ifc);
+ IF_CLONE_ADDREF(ifc);
-done:
- IF_CLONE_UNLOCK(ifc);
- return (err);
+ return (0);
}
void
ifc_free_unit(struct if_clone *ifc, int unit)
{
- int bytoff, bitoff;
-
-
- /*
- * Compute offset in the bitmap and deallocate the unit.
- */
- bytoff = unit >> 3;
- bitoff = unit - (bytoff << 3);
- IF_CLONE_LOCK(ifc);
- KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0,
- ("%s: bit is already cleared", __func__));
- ifc->ifc_units[bytoff] &= ~(1 << bitoff);
- IF_CLONE_REMREF_LOCKED(ifc); /* releases lock */
-}
-
-void
-ifc_simple_attach(struct if_clone *ifc)
-{
- int err;
- int unit;
- char name[IFNAMSIZ];
- struct ifc_simple_data *ifcs = ifc->ifc_data;
-
- KASSERT(ifcs->ifcs_minifs - 1 <= ifc->ifc_maxunit,
- ("%s: %s requested more units than allowed (%d > %d)",
- __func__, ifc->ifc_name, ifcs->ifcs_minifs,
- ifc->ifc_maxunit + 1));
-
- for (unit = 0; unit < ifcs->ifcs_minifs; unit++) {
- snprintf(name, IFNAMSIZ, "%s%d", ifc->ifc_name, unit);
- err = if_clone_createif(ifc, name, IFNAMSIZ, NULL);
- KASSERT(err == 0,
- ("%s: failed to create required interface %s",
- __func__, name));
- }
+ free_unr(ifc->ifc_unrhdr, unit);
+ IF_CLONE_REMREF(ifc);
}
-int
+static int
ifc_simple_match(struct if_clone *ifc, const char *name)
{
const char *cp;
@@ -555,14 +666,13 @@ ifc_simple_match(struct if_clone *ifc, const char *name)
return (1);
}
-int
+static int
ifc_simple_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
{
char *dp;
int wildcard;
int unit;
int err;
- struct ifc_simple_data *ifcs = ifc->ifc_data;
err = ifc_name2unit(name, &unit);
if (err != 0)
@@ -574,7 +684,7 @@ ifc_simple_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
if (err != 0)
return (err);
- err = ifcs->ifcs_create(ifc, unit, params);
+ err = ifc->ifcs_create(ifc, unit, params);
if (err != 0) {
ifc_free_unit(ifc, unit);
return (err);
@@ -598,18 +708,17 @@ ifc_simple_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
return (0);
}
-int
+static int
ifc_simple_destroy(struct if_clone *ifc, struct ifnet *ifp)
{
int unit;
- struct ifc_simple_data *ifcs = ifc->ifc_data;
unit = ifp->if_dunit;
- if (unit < ifcs->ifcs_minifs)
+ if (unit < ifc->ifcs_minifs)
return (EINVAL);
- ifcs->ifcs_destroy(ifp);
+ ifc->ifcs_destroy(ifp);
ifc_free_unit(ifc, unit);
diff --git a/freebsd/sys/net/if_clone.h b/freebsd/sys/net/if_clone.h
index f125f8b5..3a60b0a1 100644
--- a/freebsd/sys/net/if_clone.h
+++ b/freebsd/sys/net/if_clone.h
@@ -35,82 +35,45 @@
#ifdef _KERNEL
-#define IFC_CLONE_INITIALIZER(name, data, maxunit, \
- attach, match, create, destroy) \
- { { 0 }, name, maxunit, NULL, 0, data, attach, match, create, destroy }
-
-/*
- * Structure describing a `cloning' interface.
- *
- * List of locks
- * (c) const until freeing
- * (d) driver specific data, may need external protection.
- * (e) locked by if_cloners_mtx
- * (i) locked by ifc_mtx mtx
- */
-struct if_clone {
- LIST_ENTRY(if_clone) ifc_list; /* (e) On list of cloners */
- const char *ifc_name; /* (c) Name of device, e.g. `gif' */
- int ifc_maxunit; /* (c) Maximum unit number */
- unsigned char *ifc_units; /* (i) Bitmap to handle units. */
- /* Considered private, access */
- /* via ifc_(alloc|free)_unit(). */
- int ifc_bmlen; /* (c) Bitmap length. */
- void *ifc_data; /* (*) Data for ifc_* functions. */
-
- /* (c) Driver specific cloning functions. Called with no locks held. */
- void (*ifc_attach)(struct if_clone *);
- int (*ifc_match)(struct if_clone *, const char *);
- int (*ifc_create)(struct if_clone *, char *, size_t, caddr_t);
- int (*ifc_destroy)(struct if_clone *, struct ifnet *);
-
- long ifc_refcnt; /* (i) Refrence count. */
- struct mtx ifc_mtx; /* Muted to protect members. */
- LIST_HEAD(, ifnet) ifc_iflist; /* (i) List of cloned interfaces */
-};
-
-void if_clone_init(void);
-void if_clone_attach(struct if_clone *);
+struct if_clone;
+
+/* Methods. */
+typedef int ifc_match_t(struct if_clone *, const char *);
+typedef int ifc_create_t(struct if_clone *, char *, size_t, caddr_t);
+typedef int ifc_destroy_t(struct if_clone *, struct ifnet *);
+
+typedef int ifcs_create_t(struct if_clone *, int, caddr_t);
+typedef void ifcs_destroy_t(struct ifnet *);
+
+/* Interface cloner (de)allocating functions. */
+struct if_clone *
+ if_clone_advanced(const char *, u_int, ifc_match_t, ifc_create_t,
+ ifc_destroy_t);
+struct if_clone *
+ if_clone_simple(const char *, ifcs_create_t, ifcs_destroy_t, u_int);
void if_clone_detach(struct if_clone *);
-void vnet_if_clone_init(void);
-
-int if_clone_create(char *, size_t, caddr_t);
-int if_clone_destroy(const char *);
-int if_clone_destroyif(struct if_clone *, struct ifnet *);
-int if_clone_list(struct if_clonereq *);
+/* Unit (de)allocating fucntions. */
int ifc_name2unit(const char *name, int *unit);
int ifc_alloc_unit(struct if_clone *, int *);
void ifc_free_unit(struct if_clone *, int);
-/*
- * The ifc_simple functions, structures, and macros implement basic
- * cloning as in 5.[012].
- */
-
-struct ifc_simple_data {
- int ifcs_minifs; /* minimum number of interfaces */
-
- int (*ifcs_create)(struct if_clone *, int, caddr_t);
- void (*ifcs_destroy)(struct ifnet *);
-};
-
-/* interface clone event */
+#ifdef _SYS_EVENTHANDLER_H_
+/* Interface clone event. */
typedef void (*if_clone_event_handler_t)(void *, struct if_clone *);
EVENTHANDLER_DECLARE(if_clone_event, if_clone_event_handler_t);
+#endif
-#define IFC_SIMPLE_DECLARE(name, minifs) \
-struct ifc_simple_data name##_cloner_data = \
- {minifs, name##_clone_create, name##_clone_destroy}; \
-struct if_clone name##_cloner = \
- IFC_CLONE_INITIALIZER(#name, &name##_cloner_data, IF_MAXUNIT, \
- ifc_simple_attach, ifc_simple_match, ifc_simple_create, ifc_simple_destroy)
+/* The below interfaces used only by net/if.c. */
+void vnet_if_clone_init(void);
+int if_clone_create(char *, size_t, caddr_t);
+int if_clone_destroy(const char *);
+int if_clone_list(struct if_clonereq *);
+struct if_clone *if_clone_findifc(struct ifnet *);
+void if_clone_addgroup(struct ifnet *, struct if_clone *);
-void ifc_simple_attach(struct if_clone *);
-int ifc_simple_match(struct if_clone *, const char *);
-int ifc_simple_create(struct if_clone *, char *, size_t, caddr_t);
-int ifc_simple_destroy(struct if_clone *, struct ifnet *);
+/* The below interface used only by epair(4). */
+int if_clone_destroyif(struct if_clone *, struct ifnet *);
#endif /* _KERNEL */
-
#endif /* !_NET_IF_CLONE_H_ */
diff --git a/freebsd/sys/net/if_dead.c b/freebsd/sys/net/if_dead.c
index b85793f8..e290823c 100644
--- a/freebsd/sys/net/if_dead.c
+++ b/freebsd/sys/net/if_dead.c
@@ -44,7 +44,7 @@ __FBSDID("$FreeBSD$");
#include <net/if_var.h>
static int
-ifdead_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
+ifdead_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa,
struct route *ro)
{
@@ -95,6 +95,13 @@ ifdead_transmit(struct ifnet *ifp, struct mbuf *m)
return (ENXIO);
}
+static uint64_t
+ifdead_get_counter(struct ifnet *ifp, ift_counter cnt)
+{
+
+ return (0);
+}
+
void
if_dead(struct ifnet *ifp)
{
@@ -106,4 +113,5 @@ if_dead(struct ifnet *ifp)
ifp->if_resolvemulti = ifdead_resolvemulti;
ifp->if_qflush = ifdead_qflush;
ifp->if_transmit = ifdead_transmit;
+ ifp->if_get_counter = ifdead_get_counter;
}
diff --git a/freebsd/sys/net/if_disc.c b/freebsd/sys/net/if_disc.c
index 3d4f3159..a2e5a7e8 100644
--- a/freebsd/sys/net/if_disc.c
+++ b/freebsd/sys/net/if_disc.c
@@ -47,10 +47,12 @@
#include <sys/sockio.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_clone.h>
#include <net/if_types.h>
#include <net/route.h>
#include <net/bpf.h>
+#include <net/vnet.h>
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
@@ -61,22 +63,21 @@
#define DSMTU 65532
#endif
-#define DISCNAME "disc"
-
struct disc_softc {
struct ifnet *sc_ifp;
};
static int discoutput(struct ifnet *, struct mbuf *,
- struct sockaddr *, struct route *);
-static void discrtrequest(int, struct rtentry *, struct rt_addrinfo *);
+ const struct sockaddr *, struct route *);
static int discioctl(struct ifnet *, u_long, caddr_t);
static int disc_clone_create(struct if_clone *, int, caddr_t);
static void disc_clone_destroy(struct ifnet *);
-static MALLOC_DEFINE(M_DISC, DISCNAME, "Discard interface");
+static const char discname[] = "disc";
+static MALLOC_DEFINE(M_DISC, discname, "Discard interface");
-IFC_SIMPLE_DECLARE(disc, 0);
+static VNET_DEFINE(struct if_clone *, disc_cloner);
+#define V_disc_cloner VNET(disc_cloner)
static int
disc_clone_create(struct if_clone *ifc, int unit, caddr_t params)
@@ -92,7 +93,7 @@ disc_clone_create(struct if_clone *ifc, int unit, caddr_t params)
}
ifp->if_softc = sc;
- if_initname(ifp, ifc->ifc_name, unit);
+ if_initname(ifp, discname, unit);
ifp->if_mtu = DSMTU;
/*
* IFF_LOOPBACK should not be removed from disc's flags because
@@ -131,16 +132,32 @@ disc_clone_destroy(struct ifnet *ifp)
free(sc, M_DISC);
}
+static void
+vnet_disc_init(const void *unused __unused)
+{
+
+ V_disc_cloner = if_clone_simple(discname, disc_clone_create,
+ disc_clone_destroy, 0);
+}
+VNET_SYSINIT(vnet_disc_init, SI_SUB_PSEUDO, SI_ORDER_ANY,
+ vnet_disc_init, NULL);
+
+static void
+vnet_disc_uninit(const void *unused __unused)
+{
+
+ if_clone_detach(V_disc_cloner);
+}
+VNET_SYSUNINIT(vnet_disc_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
+ vnet_disc_uninit, NULL);
+
static int
disc_modevent(module_t mod, int type, void *data)
{
switch (type) {
case MOD_LOAD:
- if_clone_attach(&disc_cloner);
- break;
case MOD_UNLOAD:
- if_clone_detach(&disc_cloner);
break;
default:
return (EOPNOTSUPP);
@@ -157,7 +174,7 @@ static moduledata_t disc_mod = {
DECLARE_MODULE(if_disc, disc_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
static int
-discoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+discoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
struct route *ro)
{
u_int32_t af;
@@ -165,62 +182,47 @@ discoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
M_ASSERTPKTHDR(m);
/* BPF writes need to be handled specially. */
- if (dst->sa_family == AF_UNSPEC) {
+ if (dst->sa_family == AF_UNSPEC)
bcopy(dst->sa_data, &af, sizeof(af));
- dst->sa_family = af;
- }
+ else
+ af = dst->sa_family;
- if (bpf_peers_present(ifp->if_bpf)) {
- u_int af = dst->sa_family;
+ if (bpf_peers_present(ifp->if_bpf))
bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
- }
+
m->m_pkthdr.rcvif = ifp;
- ifp->if_opackets++;
- ifp->if_obytes += m->m_pkthdr.len;
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
m_freem(m);
return (0);
}
-/* ARGSUSED */
-static void
-discrtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
-{
- RT_LOCK_ASSERT(rt);
- rt->rt_rmx.rmx_mtu = DSMTU;
-}
-
/*
* Process an ioctl request.
*/
static int
discioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
- struct ifaddr *ifa;
struct ifreq *ifr = (struct ifreq *)data;
int error = 0;
switch (cmd) {
-
case SIOCSIFADDR:
ifp->if_flags |= IFF_UP;
- ifa = (struct ifaddr *)data;
- if (ifa != 0)
- ifa->ifa_rtrequest = discrtrequest;
+
/*
* Everything else is done at a higher level.
*/
break;
-
case SIOCADDMULTI:
case SIOCDELMULTI:
- if (ifr == 0) {
+ if (ifr == NULL) {
error = EAFNOSUPPORT; /* XXX */
break;
}
switch (ifr->ifr_addr.sa_family) {
-
#ifdef INET
case AF_INET:
break;
@@ -229,17 +231,14 @@ discioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
case AF_INET6:
break;
#endif
-
default:
error = EAFNOSUPPORT;
break;
}
break;
-
case SIOCSIFMTU:
ifp->if_mtu = ifr->ifr_mtu;
break;
-
default:
error = EINVAL;
}
diff --git a/freebsd/sys/net/if_dl.h b/freebsd/sys/net/if_dl.h
index 8d88623d..f53bc5e4 100644
--- a/freebsd/sys/net/if_dl.h
+++ b/freebsd/sys/net/if_dl.h
@@ -67,6 +67,14 @@ struct sockaddr_dl {
};
#define LLADDR(s) ((caddr_t)((s)->sdl_data + (s)->sdl_nlen))
+#define CLLADDR(s) ((c_caddr_t)((s)->sdl_data + (s)->sdl_nlen))
+#define LLINDEX(s) ((s)->sdl_index)
+
+
+struct ifnet;
+struct sockaddr_dl *link_alloc_sdl(size_t, int);
+void link_free_sdl(struct sockaddr *sa);
+struct sockaddr_dl *link_init_sdl(struct ifnet *, struct sockaddr *, u_char);
#ifndef _KERNEL
diff --git a/freebsd/sys/net/if_edsc.c b/freebsd/sys/net/if_edsc.c
index 6bb80fdb..d90f072a 100644
--- a/freebsd/sys/net/if_edsc.c
+++ b/freebsd/sys/net/if_edsc.c
@@ -48,10 +48,14 @@
#include <net/bpf.h> /* bpf(9) */
#include <net/ethernet.h> /* Ethernet related constants and types */
-#include <net/if.h> /* basic part of ifnet(9) */
+#include <net/if.h>
+#include <net/if_var.h> /* basic part of ifnet(9) */
#include <net/if_clone.h> /* network interface cloning */
#include <net/if_types.h> /* IFT_ETHER and friends */
#include <net/if_var.h> /* kernel-only part of ifnet(9) */
+#include <net/vnet.h>
+
+static const char edscname[] = "edsc";
/*
* Software configuration of an interface specific to this device type.
@@ -66,9 +70,10 @@ struct edsc_softc {
};
/*
- * Simple cloning methods.
- * IFC_SIMPLE_DECLARE() expects precisely these names.
+ * Attach to the interface cloning framework.
*/
+static VNET_DEFINE(struct if_clone *, edsc_cloner);
+#define V_edsc_cloner VNET(edsc_cloner)
static int edsc_clone_create(struct if_clone *, int, caddr_t);
static void edsc_clone_destroy(struct ifnet *);
@@ -83,15 +88,7 @@ static void edsc_start(struct ifnet *ifp);
/*
* We'll allocate softc instances from this.
*/
-static MALLOC_DEFINE(M_EDSC, "edsc", "Ethernet discard interface");
-
-/*
- * Attach to the interface cloning framework under the name of "edsc".
- * The second argument is the number of units to be created from
- * the outset. It's also the minimum number of units allowed.
- * We don't want any units created as soon as the driver is loaded.
- */
-IFC_SIMPLE_DECLARE(edsc, 0);
+static MALLOC_DEFINE(M_EDSC, edscname, "Ethernet discard interface");
/*
* Create an interface instance.
@@ -118,7 +115,7 @@ edsc_clone_create(struct if_clone *ifc, int unit, caddr_t params)
/*
* Get a name for this particular interface in its ifnet structure.
*/
- if_initname(ifp, ifc->ifc_name, unit);
+ if_initname(ifp, edscname, unit);
/*
* Typical Ethernet interface flags: we can do broadcast and
@@ -298,8 +295,8 @@ edsc_start(struct ifnet *ifp)
/*
* Update the interface counters.
*/
- ifp->if_obytes += m->m_pkthdr.len;
- ifp->if_opackets++;
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
/*
* Finally, just drop the packet.
@@ -314,6 +311,36 @@ edsc_start(struct ifnet *ifp)
*/
}
+static void
+vnet_edsc_init(const void *unused __unused)
+{
+
+ /*
+ * Connect to the network interface cloning framework.
+ * The last argument is the number of units to be created
+ * from the outset. It's also the minimum number of units
+ * allowed. We don't want any units created as soon as the
+ * driver is loaded.
+ */
+ V_edsc_cloner = if_clone_simple(edscname, edsc_clone_create,
+ edsc_clone_destroy, 0);
+}
+VNET_SYSINIT(vnet_edsc_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_edsc_init, NULL);
+
+static void
+vnet_edsc_uninit(const void *unused __unused)
+{
+
+ /*
+ * Disconnect from the cloning framework.
+ * Existing interfaces will be disposed of properly.
+ */
+ if_clone_detach(V_edsc_cloner);
+}
+VNET_SYSUNINIT(vnet_edsc_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
+ vnet_edsc_uninit, NULL);
+
/*
* This function provides handlers for module events, namely load and unload.
*/
@@ -323,20 +350,8 @@ edsc_modevent(module_t mod, int type, void *data)
switch (type) {
case MOD_LOAD:
- /*
- * Connect to the network interface cloning framework.
- */
- if_clone_attach(&edsc_cloner);
- break;
-
case MOD_UNLOAD:
- /*
- * Disconnect from the cloning framework.
- * Existing interfaces will be disposed of properly.
- */
- if_clone_detach(&edsc_cloner);
break;
-
default:
/*
* There are other event types, but we don't handle them.
diff --git a/freebsd/sys/net/if_ef.c b/freebsd/sys/net/if_ef.c
deleted file mode 100644
index 4aa76712..00000000
--- a/freebsd/sys/net/if_ef.c
+++ /dev/null
@@ -1,610 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/*-
- * Copyright (c) 1999, 2000 Boris Popov
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#include <rtems/bsd/local/opt_inet.h>
-#include <rtems/bsd/local/opt_ipx.h>
-#include <rtems/bsd/local/opt_ef.h>
-
-#include <rtems/bsd/sys/param.h>
-#include <sys/systm.h>
-#include <sys/sockio.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/socket.h>
-#include <sys/syslog.h>
-#include <sys/kernel.h>
-#include <sys/module.h>
-
-#include <net/ethernet.h>
-#include <net/if_llc.h>
-#include <net/if.h>
-#include <net/if_arp.h>
-#include <net/if_dl.h>
-#include <net/if_types.h>
-#include <net/netisr.h>
-#include <net/bpf.h>
-#include <net/vnet.h>
-
-#ifdef INET
-#include <netinet/in.h>
-#include <netinet/in_var.h>
-#include <netinet/if_ether.h>
-#endif
-
-#ifdef IPX
-#include <netipx/ipx.h>
-#include <netipx/ipx_if.h>
-#endif
-
-/* If none of the supported layers is enabled explicitly enable them all */
-#if !defined(ETHER_II) && !defined(ETHER_8023) && !defined(ETHER_8022) && \
- !defined(ETHER_SNAP)
-#define ETHER_II 1
-#define ETHER_8023 1
-#define ETHER_8022 1
-#define ETHER_SNAP 1
-#endif
-
-/* internal frame types */
-#define ETHER_FT_EII 0 /* Ethernet_II - default */
-#define ETHER_FT_8023 1 /* 802.3 (Novell) */
-#define ETHER_FT_8022 2 /* 802.2 */
-#define ETHER_FT_SNAP 3 /* SNAP */
-#define EF_NFT 4 /* total number of frame types */
-
-#ifdef EF_DEBUG
-#define EFDEBUG(format, args...) printf("%s: "format, __func__ ,## args)
-#else
-#define EFDEBUG(format, args...)
-#endif
-
-#define EFERROR(format, args...) printf("%s: "format, __func__ ,## args)
-
-struct efnet {
- struct ifnet *ef_ifp;
- struct ifnet *ef_pifp;
- int ef_frametype;
-};
-
-struct ef_link {
- SLIST_ENTRY(ef_link) el_next;
- struct ifnet *el_ifp; /* raw device for this clones */
- struct efnet *el_units[EF_NFT]; /* our clones */
-};
-
-static SLIST_HEAD(ef_link_head, ef_link) efdev = {NULL};
-static int efcount;
-
-extern int (*ef_inputp)(struct ifnet*, struct ether_header *eh, struct mbuf *m);
-extern int (*ef_outputp)(struct ifnet *ifp, struct mbuf **mp,
- struct sockaddr *dst, short *tp, int *hlen);
-
-/*
-static void ef_reset (struct ifnet *);
-*/
-static int ef_attach(struct efnet *sc);
-static int ef_detach(struct efnet *sc);
-static void ef_init(void *);
-static int ef_ioctl(struct ifnet *, u_long, caddr_t);
-static void ef_start(struct ifnet *);
-static int ef_input(struct ifnet*, struct ether_header *, struct mbuf *);
-static int ef_output(struct ifnet *ifp, struct mbuf **mp,
- struct sockaddr *dst, short *tp, int *hlen);
-
-static int ef_load(void);
-static int ef_unload(void);
-
-/*
- * Install the interface, most of structure initialization done in ef_clone()
- */
-static int
-ef_attach(struct efnet *sc)
-{
- struct ifnet *ifp = sc->ef_ifp;
-
- ifp->if_start = ef_start;
- ifp->if_init = ef_init;
- ifp->if_snd.ifq_maxlen = ifqmaxlen;
- ifp->if_flags = (IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
- /*
- * Attach the interface
- */
- ether_ifattach(ifp, IF_LLADDR(sc->ef_pifp));
-
- ifp->if_resolvemulti = 0;
- ifp->if_type = IFT_XETHER;
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
-
- EFDEBUG("%s: attached\n", ifp->if_xname);
- return 1;
-}
-
-/*
- * This is for _testing_only_, just removes interface from interfaces list
- */
-static int
-ef_detach(struct efnet *sc)
-{
- struct ifnet *ifp = sc->ef_ifp;
- int s;
-
- s = splimp();
-
- ether_ifdetach(ifp);
- if_free(ifp);
-
- splx(s);
- return 0;
-}
-
-static void
-ef_init(void *foo) {
- return;
-}
-
-static int
-ef_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
-{
- struct efnet *sc = ifp->if_softc;
- struct ifaddr *ifa = (struct ifaddr*)data;
- int s, error;
-
- EFDEBUG("IOCTL %ld for %s\n", cmd, ifp->if_xname);
- error = 0;
- s = splimp();
- switch (cmd) {
- case SIOCSIFFLAGS:
- error = 0;
- break;
- case SIOCSIFADDR:
- if (sc->ef_frametype == ETHER_FT_8023 &&
- ifa->ifa_addr->sa_family != AF_IPX) {
- error = EAFNOSUPPORT;
- break;
- }
- ifp->if_flags |= IFF_UP;
- /* FALL THROUGH */
- default:
- error = ether_ioctl(ifp, cmd, data);
- break;
- }
- splx(s);
- return error;
-}
-
-/*
- * Currently packet prepared in the ether_output(), but this can be a better
- * place.
- */
-static void
-ef_start(struct ifnet *ifp)
-{
- struct efnet *sc = (struct efnet*)ifp->if_softc;
- struct ifnet *p;
- struct mbuf *m;
- int error;
-
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
- p = sc->ef_pifp;
-
- EFDEBUG("\n");
- for (;;) {
- IF_DEQUEUE(&ifp->if_snd, m);
- if (m == 0)
- break;
- BPF_MTAP(ifp, m);
- error = p->if_transmit(p, m);
- if (error) {
- ifp->if_oerrors++;
- continue;
- }
- ifp->if_opackets++;
- }
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
- return;
-}
-
-/*
- * Inline functions do not put additional overhead to procedure call or
- * parameter passing but simplify the code
- */
-static int __inline
-ef_inputEII(struct mbuf *m, struct ether_header *eh, u_short ether_type)
-{
- int isr;
-
- switch(ether_type) {
-#ifdef IPX
- case ETHERTYPE_IPX:
- isr = NETISR_IPX;
- break;
-#endif
-#ifdef INET
- case ETHERTYPE_IP:
- if ((m = ip_fastforward(m)) == NULL)
- return (0);
- isr = NETISR_IP;
- break;
-
- case ETHERTYPE_ARP:
- isr = NETISR_ARP;
- break;
-#endif
- default:
- return (EPROTONOSUPPORT);
- }
- netisr_dispatch(isr, m);
- return (0);
-}
-
-static int __inline
-ef_inputSNAP(struct mbuf *m, struct ether_header *eh, struct llc* l,
- u_short ether_type)
-{
- int isr;
-
- switch(ether_type) {
-#ifdef IPX
- case ETHERTYPE_IPX:
- m_adj(m, 8);
- isr = NETISR_IPX;
- break;
-#endif
- default:
- return (EPROTONOSUPPORT);
- }
- netisr_dispatch(isr, m);
- return (0);
-}
-
-static int __inline
-ef_input8022(struct mbuf *m, struct ether_header *eh, struct llc* l,
- u_short ether_type)
-{
- int isr;
-
- switch(ether_type) {
-#ifdef IPX
- case 0xe0:
- m_adj(m, 3);
- isr = NETISR_IPX;
- break;
-#endif
- default:
- return (EPROTONOSUPPORT);
- }
- netisr_dispatch(isr, m);
- return (0);
-}
-
-/*
- * Called from ether_input()
- */
-static int
-ef_input(struct ifnet *ifp, struct ether_header *eh, struct mbuf *m)
-{
- u_short ether_type;
- int ft = -1;
- struct efnet *efp;
- struct ifnet *eifp;
- struct llc *l;
- struct ef_link *efl;
- int isr;
-
- ether_type = ntohs(eh->ether_type);
- l = NULL;
- if (ether_type < ETHERMTU) {
- l = mtod(m, struct llc*);
- if (l->llc_dsap == 0xff && l->llc_ssap == 0xff) {
- /*
- * Novell's "802.3" frame
- */
- ft = ETHER_FT_8023;
- } else if (l->llc_dsap == 0xaa && l->llc_ssap == 0xaa) {
- /*
- * 802.2/SNAP
- */
- ft = ETHER_FT_SNAP;
- ether_type = ntohs(l->llc_un.type_snap.ether_type);
- } else if (l->llc_dsap == l->llc_ssap) {
- /*
- * 802.3/802.2
- */
- ft = ETHER_FT_8022;
- ether_type = l->llc_ssap;
- }
- } else
- ft = ETHER_FT_EII;
-
- if (ft == -1) {
- EFDEBUG("Unrecognised ether_type %x\n", ether_type);
- return EPROTONOSUPPORT;
- }
-
- /*
- * Check if interface configured for the given frame
- */
- efp = NULL;
- SLIST_FOREACH(efl, &efdev, el_next) {
- if (efl->el_ifp == ifp) {
- efp = efl->el_units[ft];
- break;
- }
- }
- if (efp == NULL) {
- EFDEBUG("Can't find if for %d\n", ft);
- return EPROTONOSUPPORT;
- }
- eifp = efp->ef_ifp;
- if ((eifp->if_flags & IFF_UP) == 0)
- return EPROTONOSUPPORT;
- eifp->if_ibytes += m->m_pkthdr.len + sizeof (*eh);
- m->m_pkthdr.rcvif = eifp;
-
- BPF_MTAP2(eifp, eh, ETHER_HDR_LEN, m);
- /*
- * Now we ready to adjust mbufs and pass them to protocol intr's
- */
- switch(ft) {
- case ETHER_FT_EII:
- return (ef_inputEII(m, eh, ether_type));
-#ifdef IPX
- case ETHER_FT_8023: /* only IPX can be here */
- isr = NETISR_IPX;
- break;
-#endif
- case ETHER_FT_SNAP:
- return (ef_inputSNAP(m, eh, l, ether_type));
- case ETHER_FT_8022:
- return (ef_input8022(m, eh, l, ether_type));
- default:
- EFDEBUG("No support for frame %d and proto %04x\n",
- ft, ether_type);
- return (EPROTONOSUPPORT);
- }
- netisr_dispatch(isr, m);
- return (0);
-}
-
-static int
-ef_output(struct ifnet *ifp, struct mbuf **mp, struct sockaddr *dst, short *tp,
- int *hlen)
-{
- struct efnet *sc = (struct efnet*)ifp->if_softc;
- struct mbuf *m = *mp;
- u_char *cp;
- short type;
-
- if (ifp->if_type != IFT_XETHER)
- return ENETDOWN;
- switch (sc->ef_frametype) {
- case ETHER_FT_EII:
-#ifdef IPX
- type = htons(ETHERTYPE_IPX);
-#else
- return EPFNOSUPPORT;
-#endif
- break;
- case ETHER_FT_8023:
- type = htons(m->m_pkthdr.len);
- break;
- case ETHER_FT_8022:
- M_PREPEND(m, ETHER_HDR_LEN + 3, M_WAIT);
- /*
- * Ensure that ethernet header and next three bytes
- * will fit into single mbuf
- */
- m = m_pullup(m, ETHER_HDR_LEN + 3);
- if (m == NULL) {
- *mp = NULL;
- return ENOBUFS;
- }
- m_adj(m, ETHER_HDR_LEN);
- type = htons(m->m_pkthdr.len);
- cp = mtod(m, u_char *);
- *cp++ = 0xE0;
- *cp++ = 0xE0;
- *cp++ = 0x03;
- *hlen += 3;
- break;
- case ETHER_FT_SNAP:
- M_PREPEND(m, 8, M_WAIT);
- type = htons(m->m_pkthdr.len);
- cp = mtod(m, u_char *);
- bcopy("\xAA\xAA\x03\x00\x00\x00\x81\x37", cp, 8);
- *hlen += 8;
- break;
- default:
- return EPFNOSUPPORT;
- }
- *mp = m;
- *tp = type;
- return 0;
-}
-
-/*
- * Create clone from the given interface
- */
-static int
-ef_clone(struct ef_link *efl, int ft)
-{
- struct efnet *efp;
- struct ifnet *eifp;
- struct ifnet *ifp = efl->el_ifp;
-
- efp = (struct efnet*)malloc(sizeof(struct efnet), M_IFADDR,
- M_WAITOK | M_ZERO);
- if (efp == NULL)
- return ENOMEM;
- efp->ef_pifp = ifp;
- efp->ef_frametype = ft;
- eifp = efp->ef_ifp = if_alloc(IFT_ETHER);
- if (eifp == NULL) {
- free(efp, M_IFADDR);
- return (ENOSPC);
- }
- snprintf(eifp->if_xname, IFNAMSIZ,
- "%sf%d", ifp->if_xname, efp->ef_frametype);
- eifp->if_dname = "ef";
- eifp->if_dunit = IF_DUNIT_NONE;
- eifp->if_softc = efp;
- if (ifp->if_ioctl)
- eifp->if_ioctl = ef_ioctl;
- efl->el_units[ft] = efp;
- return 0;
-}
-
-static int
-ef_load(void)
-{
- VNET_ITERATOR_DECL(vnet_iter);
- struct ifnet *ifp;
- struct efnet *efp;
- struct ef_link *efl = NULL, *efl_temp;
- int error = 0, d;
-
- VNET_LIST_RLOCK();
- VNET_FOREACH(vnet_iter) {
- CURVNET_SET(vnet_iter);
-
- /*
- * XXXRW: The following loop walks the ifnet list while
- * modifying it, something not well-supported by ifnet
- * locking. To avoid lock upgrade/recursion issues, manually
- * acquire a write lock of ifnet_sxlock here, rather than a
- * read lock, so that when if_alloc() recurses the lock, we
- * don't panic. This structure, in which if_ef automatically
- * attaches to all ethernet interfaces, should be replaced
- * with a model like that found in if_vlan, in which
- * interfaces are explicitly configured, which would avoid
- * this (and other) problems.
- */
- sx_xlock(&ifnet_sxlock);
- TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
- if (ifp->if_type != IFT_ETHER) continue;
- EFDEBUG("Found interface %s\n", ifp->if_xname);
- efl = (struct ef_link*)malloc(sizeof(struct ef_link),
- M_IFADDR, M_WAITOK | M_ZERO);
- if (efl == NULL) {
- error = ENOMEM;
- break;
- }
-
- efl->el_ifp = ifp;
-#ifdef ETHER_II
- error = ef_clone(efl, ETHER_FT_EII);
- if (error) break;
-#endif
-#ifdef ETHER_8023
- error = ef_clone(efl, ETHER_FT_8023);
- if (error) break;
-#endif
-#ifdef ETHER_8022
- error = ef_clone(efl, ETHER_FT_8022);
- if (error) break;
-#endif
-#ifdef ETHER_SNAP
- error = ef_clone(efl, ETHER_FT_SNAP);
- if (error) break;
-#endif
- efcount++;
- SLIST_INSERT_HEAD(&efdev, efl, el_next);
- }
- sx_xunlock(&ifnet_sxlock);
- CURVNET_RESTORE();
- }
- VNET_LIST_RUNLOCK();
- if (error) {
- if (efl)
- SLIST_INSERT_HEAD(&efdev, efl, el_next);
- SLIST_FOREACH_SAFE(efl, &efdev, el_next, efl_temp) {
- for (d = 0; d < EF_NFT; d++)
- if (efl->el_units[d]) {
- if (efl->el_units[d]->ef_pifp != NULL)
- if_free(efl->el_units[d]->ef_pifp);
- free(efl->el_units[d], M_IFADDR);
- }
- free(efl, M_IFADDR);
- }
- return error;
- }
- SLIST_FOREACH(efl, &efdev, el_next) {
- for (d = 0; d < EF_NFT; d++) {
- efp = efl->el_units[d];
- if (efp)
- ef_attach(efp);
- }
- }
- ef_inputp = ef_input;
- ef_outputp = ef_output;
- EFDEBUG("Loaded\n");
- return 0;
-}
-
-static int
-ef_unload(void)
-{
- struct efnet *efp;
- struct ef_link *efl;
- int d;
-
- ef_inputp = NULL;
- ef_outputp = NULL;
- SLIST_FOREACH(efl, &efdev, el_next) {
- for (d = 0; d < EF_NFT; d++) {
- efp = efl->el_units[d];
- if (efp) {
- ef_detach(efp);
- }
- }
- }
- EFDEBUG("Unloaded\n");
- return 0;
-}
-
-static int
-if_ef_modevent(module_t mod, int type, void *data)
-{
- switch ((modeventtype_t)type) {
- case MOD_LOAD:
- return ef_load();
- case MOD_UNLOAD:
- return ef_unload();
- default:
- return EOPNOTSUPP;
- }
- return 0;
-}
-
-static moduledata_t if_ef_mod = {
- "if_ef", if_ef_modevent, NULL
-};
-
-DECLARE_MODULE(if_ef, if_ef_mod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE);
diff --git a/freebsd/sys/net/if_enc.c b/freebsd/sys/net/if_enc.c
index 91d34722..d0d065b8 100644
--- a/freebsd/sys/net/if_enc.c
+++ b/freebsd/sys/net/if_enc.c
@@ -2,6 +2,7 @@
/*-
* Copyright (c) 2006 The FreeBSD Project.
+ * Copyright (c) 2015 Andrey V. Elsukov <ae@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -31,10 +32,10 @@
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_enc.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
+#include <sys/hhook.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
@@ -46,6 +47,8 @@
#include <sys/sysctl.h>
#include <net/if.h>
+#include <net/if_enc.h>
+#include <net/if_var.h>
#include <net/if_clone.h>
#include <net/if_types.h>
#include <net/pfil.h>
@@ -80,56 +83,66 @@ struct enchdr {
u_int32_t spi;
u_int32_t flags;
};
-
-struct ifnet *encif;
-static struct mtx enc_mtx;
-
struct enc_softc {
struct ifnet *sc_ifp;
};
+static VNET_DEFINE(struct enc_softc *, enc_sc);
+#define V_enc_sc VNET(enc_sc)
+static VNET_DEFINE(struct if_clone *, enc_cloner);
+#define V_enc_cloner VNET(enc_cloner)
static int enc_ioctl(struct ifnet *, u_long, caddr_t);
-static int enc_output(struct ifnet *ifp, struct mbuf *m,
- struct sockaddr *dst, struct route *ro);
+static int enc_output(struct ifnet *, struct mbuf *,
+ const struct sockaddr *, struct route *);
static int enc_clone_create(struct if_clone *, int, caddr_t);
static void enc_clone_destroy(struct ifnet *);
+static int enc_add_hhooks(struct enc_softc *);
+static void enc_remove_hhooks(struct enc_softc *);
-IFC_SIMPLE_DECLARE(enc, 1);
-
-/*
- * Sysctls.
- */
+static const char encname[] = "enc";
/*
* Before and after are relative to when we are stripping the
* outer IP header.
*/
-static SYSCTL_NODE(_net, OID_AUTO, enc, CTLFLAG_RW, 0, "enc sysctl");
+static VNET_DEFINE(int, filter_mask_in) = IPSEC_ENC_BEFORE;
+static VNET_DEFINE(int, bpf_mask_in) = IPSEC_ENC_BEFORE;
+static VNET_DEFINE(int, filter_mask_out) = IPSEC_ENC_BEFORE;
+static VNET_DEFINE(int, bpf_mask_out) = IPSEC_ENC_BEFORE | IPSEC_ENC_AFTER;
+#define V_filter_mask_in VNET(filter_mask_in)
+#define V_bpf_mask_in VNET(bpf_mask_in)
+#define V_filter_mask_out VNET(filter_mask_out)
+#define V_bpf_mask_out VNET(bpf_mask_out)
+static SYSCTL_NODE(_net, OID_AUTO, enc, CTLFLAG_RW, 0, "enc sysctl");
static SYSCTL_NODE(_net_enc, OID_AUTO, in, CTLFLAG_RW, 0, "enc input sysctl");
-static int ipsec_filter_mask_in = ENC_BEFORE;
-SYSCTL_INT(_net_enc_in, OID_AUTO, ipsec_filter_mask, CTLFLAG_RW,
- &ipsec_filter_mask_in, 0, "IPsec input firewall filter mask");
-static int ipsec_bpf_mask_in = ENC_BEFORE;
-SYSCTL_INT(_net_enc_in, OID_AUTO, ipsec_bpf_mask, CTLFLAG_RW,
- &ipsec_bpf_mask_in, 0, "IPsec input bpf mask");
-
static SYSCTL_NODE(_net_enc, OID_AUTO, out, CTLFLAG_RW, 0, "enc output sysctl");
-static int ipsec_filter_mask_out = ENC_BEFORE;
-SYSCTL_INT(_net_enc_out, OID_AUTO, ipsec_filter_mask, CTLFLAG_RW,
- &ipsec_filter_mask_out, 0, "IPsec output firewall filter mask");
-static int ipsec_bpf_mask_out = ENC_BEFORE|ENC_AFTER;
-SYSCTL_INT(_net_enc_out, OID_AUTO, ipsec_bpf_mask, CTLFLAG_RW,
- &ipsec_bpf_mask_out, 0, "IPsec output bpf mask");
+SYSCTL_INT(_net_enc_in, OID_AUTO, ipsec_filter_mask,
+ CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(filter_mask_in), 0,
+ "IPsec input firewall filter mask");
+SYSCTL_INT(_net_enc_in, OID_AUTO, ipsec_bpf_mask,
+ CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(bpf_mask_in), 0,
+ "IPsec input bpf mask");
+SYSCTL_INT(_net_enc_out, OID_AUTO, ipsec_filter_mask,
+ CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(filter_mask_out), 0,
+ "IPsec output firewall filter mask");
+SYSCTL_INT(_net_enc_out, OID_AUTO, ipsec_bpf_mask,
+ CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(bpf_mask_out), 0,
+ "IPsec output bpf mask");
static void
enc_clone_destroy(struct ifnet *ifp)
{
- KASSERT(ifp != encif, ("%s: destroying encif", __func__));
+ struct enc_softc *sc;
+
+ sc = ifp->if_softc;
+ KASSERT(sc == V_enc_sc, ("sc != ifp->if_softc"));
bpfdetach(ifp);
if_detach(ifp);
if_free(ifp);
+ free(sc, M_DEVBUF);
+ V_enc_sc = NULL;
}
static int
@@ -138,244 +151,277 @@ enc_clone_create(struct if_clone *ifc, int unit, caddr_t params)
struct ifnet *ifp;
struct enc_softc *sc;
- sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
+ sc = malloc(sizeof(struct enc_softc), M_DEVBUF,
+ M_WAITOK | M_ZERO);
ifp = sc->sc_ifp = if_alloc(IFT_ENC);
if (ifp == NULL) {
free(sc, M_DEVBUF);
return (ENOSPC);
}
-
- if_initname(ifp, ifc->ifc_name, unit);
+ if (V_enc_sc != NULL) {
+ if_free(ifp);
+ free(sc, M_DEVBUF);
+ return (EEXIST);
+ }
+ V_enc_sc = sc;
+ if_initname(ifp, encname, unit);
ifp->if_mtu = ENCMTU;
ifp->if_ioctl = enc_ioctl;
ifp->if_output = enc_output;
- ifp->if_snd.ifq_maxlen = ifqmaxlen;
ifp->if_softc = sc;
if_attach(ifp);
bpfattach(ifp, DLT_ENC, sizeof(struct enchdr));
-
- mtx_lock(&enc_mtx);
- /* grab a pointer to enc0, ignore the rest */
- if (encif == NULL)
- encif = ifp;
- mtx_unlock(&enc_mtx);
-
return (0);
}
static int
-enc_modevent(module_t mod, int type, void *data)
+enc_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
{
- switch (type) {
- case MOD_LOAD:
- mtx_init(&enc_mtx, "enc mtx", NULL, MTX_DEF);
- if_clone_attach(&enc_cloner);
- break;
- case MOD_UNLOAD:
- printf("enc module unload - not possible for this module\n");
- return (EINVAL);
- default:
- return (EOPNOTSUPP);
- }
+
+ m_freem(m);
return (0);
}
-static moduledata_t enc_mod = {
- "if_enc",
- enc_modevent,
- 0
-};
-
-DECLARE_MODULE(if_enc, enc_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
-
static int
-enc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
- struct route *ro)
+enc_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
- m_freem(m);
+
+ if (cmd != SIOCSIFFLAGS)
+ return (EINVAL);
+ if (ifp->if_flags & IFF_UP)
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ else
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
return (0);
}
/*
- * Process an ioctl request.
+ * One helper hook function is used by any hook points.
+ * + from hhook_type we can determine the packet direction:
+ * HHOOK_TYPE_IPSEC_IN or HHOOK_TYPE_IPSEC_OUT;
+ * + from hhook_id we can determine address family: AF_INET or AF_INET6;
+ * + udata contains pointer to enc_softc;
+ * + ctx_data contains pointer to struct ipsec_ctx_data.
*/
-/* ARGSUSED */
static int
-enc_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+enc_hhook(int32_t hhook_type, int32_t hhook_id, void *udata, void *ctx_data,
+ void *hdata, struct osd *hosd)
{
- int error = 0;
-
- mtx_lock(&enc_mtx);
+ struct enchdr hdr;
+ struct ipsec_ctx_data *ctx;
+ struct enc_softc *sc;
+ struct ifnet *ifp, *rcvif;
+ struct pfil_head *ph;
+ int pdir;
- switch (cmd) {
+ sc = (struct enc_softc *)udata;
+ ifp = sc->sc_ifp;
+ if ((ifp->if_flags & IFF_UP) == 0)
+ return (0);
- case SIOCSIFFLAGS:
- if (ifp->if_flags & IFF_UP)
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
- else
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ ctx = (struct ipsec_ctx_data *)ctx_data;
+ /* XXX: wrong hook point was used by caller? */
+ if (ctx->af != hhook_id)
+ return (EPFNOSUPPORT);
+
+ if (((hhook_type == HHOOK_TYPE_IPSEC_IN &&
+ (ctx->enc & V_bpf_mask_in) != 0) ||
+ (hhook_type == HHOOK_TYPE_IPSEC_OUT &&
+ (ctx->enc & V_bpf_mask_out) != 0)) &&
+ bpf_peers_present(ifp->if_bpf) != 0) {
+ hdr.af = ctx->af;
+ hdr.spi = ctx->sav->spi;
+ hdr.flags = 0;
+ if (ctx->sav->alg_enc != SADB_EALG_NONE)
+ hdr.flags |= M_CONF;
+ if (ctx->sav->alg_auth != SADB_AALG_NONE)
+ hdr.flags |= M_AUTH;
+ bpf_mtap2(ifp->if_bpf, &hdr, sizeof(hdr), *ctx->mp);
+ }
+ switch (hhook_type) {
+ case HHOOK_TYPE_IPSEC_IN:
+ if (ctx->enc == IPSEC_ENC_BEFORE) {
+ /* Do accounting only once */
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_IBYTES,
+ (*ctx->mp)->m_pkthdr.len);
+ }
+ if ((ctx->enc & V_filter_mask_in) == 0)
+ return (0); /* skip pfil processing */
+ pdir = PFIL_IN;
+ break;
+ case HHOOK_TYPE_IPSEC_OUT:
+ if (ctx->enc == IPSEC_ENC_BEFORE) {
+ /* Do accounting only once */
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OBYTES,
+ (*ctx->mp)->m_pkthdr.len);
+ }
+ if ((ctx->enc & V_filter_mask_out) == 0)
+ return (0); /* skip pfil processing */
+ pdir = PFIL_OUT;
break;
-
default:
- error = EINVAL;
+ return (EINVAL);
}
- mtx_unlock(&enc_mtx);
- return (error);
+ switch (hhook_id) {
+#ifdef INET
+ case AF_INET:
+ ph = &V_inet_pfil_hook;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ ph = &V_inet6_pfil_hook;
+ break;
+#endif
+ default:
+ ph = NULL;
+ }
+ if (ph == NULL || !PFIL_HOOKED(ph))
+ return (0);
+ /* Make a packet looks like it was received on enc(4) */
+ rcvif = (*ctx->mp)->m_pkthdr.rcvif;
+ (*ctx->mp)->m_pkthdr.rcvif = ifp;
+ if (pfil_run_hooks(ph, ctx->mp, ifp, pdir, NULL) != 0 ||
+ *ctx->mp == NULL) {
+ *ctx->mp = NULL; /* consumed by filter */
+ return (EACCES);
+ }
+ (*ctx->mp)->m_pkthdr.rcvif = rcvif;
+ return (0);
}
-int
-ipsec_filter(struct mbuf **mp, int dir, int flags)
+static int
+enc_add_hhooks(struct enc_softc *sc)
{
- int error, i;
- struct ip *ip;
-
- KASSERT(encif != NULL, ("%s: encif is null", __func__));
- KASSERT(flags & (ENC_IN|ENC_OUT),
- ("%s: invalid flags: %04x", __func__, flags));
-
- if ((encif->if_drv_flags & IFF_DRV_RUNNING) == 0)
- return (0);
+ struct hookinfo hki;
+ int error;
- if (flags & ENC_IN) {
- if ((flags & ipsec_filter_mask_in) == 0)
- return (0);
- } else {
- if ((flags & ipsec_filter_mask_out) == 0)
- return (0);
- }
-
- /* Skip pfil(9) if no filters are loaded */
- if (1
+ error = EPFNOSUPPORT;
+ hki.hook_func = enc_hhook;
+ hki.hook_helper = NULL;
+ hki.hook_udata = sc;
#ifdef INET
- && !PFIL_HOOKED(&V_inet_pfil_hook)
+ hki.hook_id = AF_INET;
+ hki.hook_type = HHOOK_TYPE_IPSEC_IN;
+ error = hhook_add_hook(V_ipsec_hhh_in[HHOOK_IPSEC_INET],
+ &hki, HHOOK_WAITOK);
+ if (error != 0)
+ return (error);
+ hki.hook_type = HHOOK_TYPE_IPSEC_OUT;
+ error = hhook_add_hook(V_ipsec_hhh_out[HHOOK_IPSEC_INET],
+ &hki, HHOOK_WAITOK);
+ if (error != 0)
+ return (error);
#endif
#ifdef INET6
- && !PFIL_HOOKED(&V_inet6_pfil_hook)
+ hki.hook_id = AF_INET6;
+ hki.hook_type = HHOOK_TYPE_IPSEC_IN;
+ error = hhook_add_hook(V_ipsec_hhh_in[HHOOK_IPSEC_INET6],
+ &hki, HHOOK_WAITOK);
+ if (error != 0)
+ return (error);
+ hki.hook_type = HHOOK_TYPE_IPSEC_OUT;
+ error = hhook_add_hook(V_ipsec_hhh_out[HHOOK_IPSEC_INET6],
+ &hki, HHOOK_WAITOK);
+ if (error != 0)
+ return (error);
#endif
- ) {
- return (0);
- }
+ return (error);
+}
- i = min((*mp)->m_pkthdr.len, max_protohdr);
- if ((*mp)->m_len < i) {
- *mp = m_pullup(*mp, i);
- if (*mp == NULL) {
- printf("%s: m_pullup failed\n", __func__);
- return (-1);
- }
- }
+static void
+enc_remove_hhooks(struct enc_softc *sc)
+{
+ struct hookinfo hki;
- error = 0;
- ip = mtod(*mp, struct ip *);
- switch (ip->ip_v) {
+ hki.hook_func = enc_hhook;
+ hki.hook_helper = NULL;
+ hki.hook_udata = sc;
#ifdef INET
- case 4:
- /*
- * before calling the firewall, swap fields the same as
- * IP does. here we assume the header is contiguous
- */
- ip->ip_len = ntohs(ip->ip_len);
- ip->ip_off = ntohs(ip->ip_off);
-
- error = pfil_run_hooks(&V_inet_pfil_hook, mp,
- encif, dir, NULL);
-
- if (*mp == NULL || error != 0)
- break;
-
- /* restore byte ordering */
- ip = mtod(*mp, struct ip *);
- ip->ip_len = htons(ip->ip_len);
- ip->ip_off = htons(ip->ip_off);
- break;
+ hki.hook_id = AF_INET;
+ hki.hook_type = HHOOK_TYPE_IPSEC_IN;
+ hhook_remove_hook(V_ipsec_hhh_in[HHOOK_IPSEC_INET], &hki);
+ hki.hook_type = HHOOK_TYPE_IPSEC_OUT;
+ hhook_remove_hook(V_ipsec_hhh_out[HHOOK_IPSEC_INET], &hki);
#endif
#ifdef INET6
- case 6:
- error = pfil_run_hooks(&V_inet6_pfil_hook, mp,
- encif, dir, NULL);
- break;
+ hki.hook_id = AF_INET6;
+ hki.hook_type = HHOOK_TYPE_IPSEC_IN;
+ hhook_remove_hook(V_ipsec_hhh_in[HHOOK_IPSEC_INET6], &hki);
+ hki.hook_type = HHOOK_TYPE_IPSEC_OUT;
+ hhook_remove_hook(V_ipsec_hhh_out[HHOOK_IPSEC_INET6], &hki);
#endif
- default:
- printf("%s: unknown IP version\n", __func__);
- }
+}
- /*
- * If the mbuf was consumed by the filter for requeueing (dummynet, etc)
- * then error will be zero but we still want to return an error to our
- * caller so the null mbuf isn't forwarded further.
- */
- if (*mp == NULL && error == 0)
- return (-1); /* Consumed by the filter */
- if (*mp == NULL)
- return (error);
- if (error != 0)
- goto bad;
+static void
+vnet_enc_init(const void *unused __unused)
+{
- return (error);
+ V_enc_sc = NULL;
+ V_enc_cloner = if_clone_simple(encname, enc_clone_create,
+ enc_clone_destroy, 1);
+}
+VNET_SYSINIT(vnet_enc_init, SI_SUB_PSEUDO, SI_ORDER_ANY,
+ vnet_enc_init, NULL);
-bad:
- m_freem(*mp);
- *mp = NULL;
- return (error);
+static void
+vnet_enc_init_proto(void *unused __unused)
+{
+ KASSERT(V_enc_sc != NULL, ("%s: V_enc_sc is %p\n", __func__, V_enc_sc));
+
+ if (enc_add_hhooks(V_enc_sc) != 0)
+ enc_clone_destroy(V_enc_sc->sc_ifp);
}
+VNET_SYSINIT(vnet_enc_init_proto, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_enc_init_proto, NULL);
-void
-ipsec_bpf(struct mbuf *m, struct secasvar *sav, int af, int flags)
+static void
+vnet_enc_uninit(const void *unused __unused)
{
- int mflags;
- struct enchdr hdr;
+ KASSERT(V_enc_sc != NULL, ("%s: V_enc_sc is %p\n", __func__, V_enc_sc));
- KASSERT(encif != NULL, ("%s: encif is null", __func__));
- KASSERT(flags & (ENC_IN|ENC_OUT),
- ("%s: invalid flags: %04x", __func__, flags));
+ if_clone_detach(V_enc_cloner);
+}
+VNET_SYSUNINIT(vnet_enc_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
+ vnet_enc_uninit, NULL);
- if ((encif->if_drv_flags & IFF_DRV_RUNNING) == 0)
- return;
+/*
+ * The hhook consumer needs to go before ip[6]_destroy are called on
+ * SI_ORDER_THIRD.
+ */
+static void
+vnet_enc_uninit_hhook(const void *unused __unused)
+{
+ KASSERT(V_enc_sc != NULL, ("%s: V_enc_sc is %p\n", __func__, V_enc_sc));
- if (flags & ENC_IN) {
- if ((flags & ipsec_bpf_mask_in) == 0)
- return;
- } else {
- if ((flags & ipsec_bpf_mask_out) == 0)
- return;
- }
+ enc_remove_hhooks(V_enc_sc);
+}
+VNET_SYSUNINIT(vnet_enc_uninit_hhook, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH,
+ vnet_enc_uninit_hhook, NULL);
- if (bpf_peers_present(encif->if_bpf)) {
- mflags = 0;
- hdr.spi = 0;
- if (!sav) {
- struct m_tag *mtag;
- mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
- if (mtag != NULL) {
- struct tdb_ident *tdbi;
- tdbi = (struct tdb_ident *) (mtag + 1);
- if (tdbi->alg_enc != SADB_EALG_NONE)
- mflags |= M_CONF;
- if (tdbi->alg_auth != SADB_AALG_NONE)
- mflags |= M_AUTH;
- hdr.spi = tdbi->spi;
- }
- } else {
- if (sav->alg_enc != SADB_EALG_NONE)
- mflags |= M_CONF;
- if (sav->alg_auth != SADB_AALG_NONE)
- mflags |= M_AUTH;
- hdr.spi = sav->spi;
- }
+static int
+enc_modevent(module_t mod, int type, void *data)
+{
- /*
- * We need to prepend the address family as a four byte
- * field. Cons up a dummy header to pacify bpf. This
- * is safe because bpf will only read from the mbuf
- * (i.e., it won't try to free it or keep a pointer a
- * to it).
- */
- hdr.af = af;
- /* hdr.spi already set above */
- hdr.flags = mflags;
-
- bpf_mtap2(encif->if_bpf, &hdr, sizeof(hdr), m);
+ switch (type) {
+ case MOD_LOAD:
+ case MOD_UNLOAD:
+ break;
+ default:
+ return (EOPNOTSUPP);
}
+ return (0);
}
+
+static moduledata_t enc_mod = {
+ "if_enc",
+ enc_modevent,
+ 0
+};
+
+DECLARE_MODULE(if_enc, enc_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
diff --git a/freebsd/sys/net/if_enc.h b/freebsd/sys/net/if_enc.h
index 59a55fcf..941ed12a 100644
--- a/freebsd/sys/net/if_enc.h
+++ b/freebsd/sys/net/if_enc.h
@@ -30,6 +30,13 @@
#ifndef _NET_IF_ENC_H
#define _NET_IF_ENC_H
-extern struct ifnet *encif;
+struct ipsec_ctx_data {
+ struct mbuf **mp;
+ struct secasvar *sav;
+ uint8_t af;
+#define IPSEC_ENC_BEFORE 0x01
+#define IPSEC_ENC_AFTER 0x02
+ uint8_t enc;
+};
#endif /* _NET_IF_ENC_H */
diff --git a/freebsd/sys/net/if_epair.c b/freebsd/sys/net/if_epair.c
index 755e608a..b4f73d68 100644
--- a/freebsd/sys/net/if_epair.c
+++ b/freebsd/sys/net/if_epair.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/kernel.h>
+#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/module.h>
#include <sys/refcount.h>
@@ -67,6 +68,7 @@ __FBSDID("$FreeBSD$");
#include <net/bpf.h>
#include <net/ethernet.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_clone.h>
#include <net/if_media.h>
#include <net/if_var.h>
@@ -74,8 +76,6 @@ __FBSDID("$FreeBSD$");
#include <net/netisr.h>
#include <net/vnet.h>
-#define EPAIRNAME "epair"
-
SYSCTL_DECL(_net_link);
static SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW, 0, "epair sysctl");
@@ -102,9 +102,11 @@ static int epair_clone_match(struct if_clone *, const char *);
static int epair_clone_create(struct if_clone *, char *, size_t, caddr_t);
static int epair_clone_destroy(struct if_clone *, struct ifnet *);
-/* Netisr realted definitions and sysctl. */
+static const char epairname[] = "epair";
+
+/* Netisr related definitions and sysctl. */
static struct netisr_handler epair_nh = {
- .nh_name = EPAIRNAME,
+ .nh_name = epairname,
.nh_proto = NETISR_EPAIR,
.nh_policy = NETISR_POLICY_CPU,
.nh_handler = epair_nh_sintr,
@@ -170,12 +172,11 @@ STAILQ_HEAD(eid_list, epair_ifp_drain);
#define EPAIR_REFCOUNT_ASSERT(a, p)
#endif
-static MALLOC_DEFINE(M_EPAIR, EPAIRNAME,
+static MALLOC_DEFINE(M_EPAIR, epairname,
"Pair of virtual cross-over connected Ethernet-like interfaces");
-static struct if_clone epair_cloner = IFC_CLONE_INITIALIZER(
- EPAIRNAME, NULL, IF_MAXUNIT,
- NULL, epair_clone_match, epair_clone_create, epair_clone_destroy);
+static VNET_DEFINE(struct if_clone *, epair_cloner);
+#define V_epair_cloner VNET(epair_cloner)
/*
* DPCPU area and functions.
@@ -421,7 +422,7 @@ epair_start_locked(struct ifnet *ifp)
*/
if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
(oifp->if_flags & IFF_UP) ==0) {
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
m_freem(m);
continue;
}
@@ -437,15 +438,15 @@ epair_start_locked(struct ifnet *ifp)
error = netisr_queue(NETISR_EPAIR, m);
CURVNET_RESTORE();
if (!error) {
- ifp->if_opackets++;
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
/* Someone else received the packet. */
- oifp->if_ipackets++;
+ if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1);
} else {
/* The packet was freed already. */
epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE;
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
(void) epair_add_ifp_for_draining(ifp);
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
EPAIR_REFCOUNT_RELEASE(&sc->refcount);
EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1,
("%s: ifp=%p sc->refcount not >= 1: %d",
@@ -506,7 +507,7 @@ epair_transmit_locked(struct ifnet *ifp, struct mbuf *m)
oifp = sc->oifp;
if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
(oifp->if_flags & IFF_UP) ==0) {
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
m_freem(m);
return (0);
}
@@ -515,17 +516,17 @@ epair_transmit_locked(struct ifnet *ifp, struct mbuf *m)
DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname);
#ifdef ALTQ
- /* Support ALTQ via the clasic if_start() path. */
+ /* Support ALTQ via the classic if_start() path. */
IF_LOCK(&ifp->if_snd);
if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error);
if (error)
- ifp->if_snd.ifq_drops++;
+ if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
IF_UNLOCK(&ifp->if_snd);
if (!error) {
- ifp->if_obytes += len;
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
if (mflags & (M_BCAST|M_MCAST))
- ifp->if_omcasts++;
+ if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0)
epair_start_locked(ifp);
@@ -559,22 +560,22 @@ epair_transmit_locked(struct ifnet *ifp, struct mbuf *m)
error = netisr_queue(NETISR_EPAIR, m);
CURVNET_RESTORE();
if (!error) {
- ifp->if_opackets++;
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
/*
* IFQ_HANDOFF_ADJ/ip_handoff() update statistics,
* but as we bypass all this we have to duplicate
* the logic another time.
*/
- ifp->if_obytes += len;
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
if (mflags & (M_BCAST|M_MCAST))
- ifp->if_omcasts++;
+ if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
/* Someone else received the packet. */
- oifp->if_ipackets++;
+ if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1);
} else {
/* The packet was freed already. */
epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE;
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
EPAIR_REFCOUNT_RELEASE(&sc->refcount);
EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1,
("%s: ifp=%p sc->refcount not >= 1: %d",
@@ -694,10 +695,10 @@ epair_clone_match(struct if_clone *ifc, const char *name)
* - epair<n>
* but not the epair<n>[ab] versions.
*/
- if (strncmp(EPAIRNAME, name, sizeof(EPAIRNAME)-1) != 0)
+ if (strncmp(epairname, name, sizeof(epairname)-1) != 0)
return (0);
- for (cp = name + sizeof(EPAIRNAME) - 1; *cp != '\0'; cp++) {
+ for (cp = name + sizeof(epairname) - 1; *cp != '\0'; cp++) {
if (*cp < '0' || *cp > '9')
return (0);
}
@@ -716,7 +717,7 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
/*
* We are abusing params to create our second interface.
- * Actually we already created it and called if_clone_createif()
+ * Actually we already created it and called if_clone_create()
* for it to do the official insertion procedure the moment we knew
* it cannot fail anymore. So just do attach it here.
*/
@@ -763,10 +764,17 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
ifc_free_unit(ifc, unit);
return (ENOSPC);
}
- *dp = 'a';
+ *dp = 'b';
/* Must not change dp so we can replace 'a' by 'b' later. */
*(dp+1) = '\0';
+ /* Check if 'a' and 'b' interfaces already exist. */
+ if (ifunit(name) != NULL)
+ return (EEXIST);
+ *dp = 'a';
+ if (ifunit(name) != NULL)
+ return (EEXIST);
+
/* Allocate memory for both [ab] interfaces */
sca = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO);
EPAIR_REFCOUNT_INIT(&sca->refcount, 1);
@@ -801,15 +809,23 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
* cache locality but we can at least allow parallelism.
*/
sca->cpuid =
- netisr_get_cpuid(sca->ifp->if_index % netisr_get_cpucount());
+ netisr_get_cpuid(sca->ifp->if_index);
scb->cpuid =
- netisr_get_cpuid(scb->ifp->if_index % netisr_get_cpucount());
+ netisr_get_cpuid(scb->ifp->if_index);
+
+ /* Initialise pseudo media types. */
+ ifmedia_init(&sca->media, 0, epair_media_change, epair_media_status);
+ ifmedia_add(&sca->media, IFM_ETHER | IFM_10G_T, 0, NULL);
+ ifmedia_set(&sca->media, IFM_ETHER | IFM_10G_T);
+ ifmedia_init(&scb->media, 0, epair_media_change, epair_media_status);
+ ifmedia_add(&scb->media, IFM_ETHER | IFM_10G_T, 0, NULL);
+ ifmedia_set(&scb->media, IFM_ETHER | IFM_10G_T);
/* Finish initialization of interface <n>a. */
ifp = sca->ifp;
ifp->if_softc = sca;
strlcpy(ifp->if_xname, name, IFNAMSIZ);
- ifp->if_dname = ifc->ifc_name;
+ ifp->if_dname = epairname;
ifp->if_dunit = unit;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_capabilities = IFCAP_VLAN_MTU;
@@ -827,7 +843,7 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
sca->if_qflush = ifp->if_qflush;
ifp->if_qflush = epair_qflush;
ifp->if_transmit = epair_transmit;
- ifp->if_baudrate = IF_Gbps(10UL); /* arbitrary maximum */
+ ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */
/* Swap the name and finish initialization of interface <n>b. */
*dp = 'b';
@@ -835,7 +851,7 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
ifp = scb->ifp;
ifp->if_softc = scb;
strlcpy(ifp->if_xname, name, IFNAMSIZ);
- ifp->if_dname = ifc->ifc_name;
+ ifp->if_dname = epairname;
ifp->if_dunit = unit;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_capabilities = IFCAP_VLAN_MTU;
@@ -845,15 +861,15 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
ifp->if_init = epair_init;
ifp->if_snd.ifq_maxlen = ifqmaxlen;
/* We need to play some tricks here for the second interface. */
- strlcpy(name, EPAIRNAME, len);
+ strlcpy(name, epairname, len);
error = if_clone_create(name, len, (caddr_t)scb);
if (error)
- panic("%s: if_clone_createif() for our 2nd iface failed: %d",
+ panic("%s: if_clone_create() for our 2nd iface failed: %d",
__func__, error);
scb->if_qflush = ifp->if_qflush;
ifp->if_qflush = epair_qflush;
ifp->if_transmit = epair_transmit;
- ifp->if_baudrate = IF_Gbps(10UL); /* arbitrary maximum */
+ ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */
/*
* Restore name to <n>a as the ifp for this will go into the
@@ -862,14 +878,6 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
strlcpy(name, sca->ifp->if_xname, len);
DPRINTF("name='%s/%db' created sca=%p scb=%p\n", name, unit, sca, scb);
- /* Initialise pseudo media types. */
- ifmedia_init(&sca->media, 0, epair_media_change, epair_media_status);
- ifmedia_add(&sca->media, IFM_ETHER | IFM_10G_T, 0, NULL);
- ifmedia_set(&sca->media, IFM_ETHER | IFM_10G_T);
- ifmedia_init(&scb->media, 0, epair_media_change, epair_media_status);
- ifmedia_add(&scb->media, IFM_ETHER | IFM_10G_T, 0, NULL);
- ifmedia_set(&scb->media, IFM_ETHER | IFM_10G_T);
-
/* Tell the world, that we are ready to rock. */
sca->ifp->if_drv_flags |= IFF_DRV_RUNNING;
scb->ifp->if_drv_flags |= IFF_DRV_RUNNING;
@@ -947,6 +955,31 @@ epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
return (0);
}
+static void
+vnet_epair_init(const void *unused __unused)
+{
+
+ V_epair_cloner = if_clone_advanced(epairname, 0,
+ epair_clone_match, epair_clone_create, epair_clone_destroy);
+#ifdef VIMAGE
+ netisr_register_vnet(&epair_nh);
+#endif
+}
+VNET_SYSINIT(vnet_epair_init, SI_SUB_PSEUDO, SI_ORDER_ANY,
+ vnet_epair_init, NULL);
+
+static void
+vnet_epair_uninit(const void *unused __unused)
+{
+
+#ifdef VIMAGE
+ netisr_unregister_vnet(&epair_nh);
+#endif
+ if_clone_detach(V_epair_cloner);
+}
+VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
+ vnet_epair_uninit, NULL);
+
static int
epair_modevent(module_t mod, int type, void *data)
{
@@ -962,16 +995,14 @@ epair_modevent(module_t mod, int type, void *data)
epair_nh.nh_qlimit = qlimit;
#endif /* __rtems__ */
netisr_register(&epair_nh);
- if_clone_attach(&epair_cloner);
if (bootverbose)
- printf("%s initialized.\n", EPAIRNAME);
+ printf("%s initialized.\n", epairname);
break;
case MOD_UNLOAD:
- if_clone_detach(&epair_cloner);
netisr_unregister(&epair_nh);
epair_dpcpu_detach();
if (bootverbose)
- printf("%s unloaded.\n", EPAIRNAME);
+ printf("%s unloaded.\n", epairname);
break;
default:
return (EOPNOTSUPP);
@@ -985,5 +1016,5 @@ static moduledata_t epair_mod = {
0
};
-DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE);
MODULE_VERSION(if_epair, 1);
diff --git a/freebsd/sys/net/if_ethersubr.c b/freebsd/sys/net/if_ethersubr.c
index 5ee2606e..1d22c0a6 100644
--- a/freebsd/sys/net/if_ethersubr.c
+++ b/freebsd/sys/net/if_ethersubr.c
@@ -32,12 +32,11 @@
* $FreeBSD$
*/
-#include <rtems/bsd/local/opt_atalk.h>
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipx.h>
#include <rtems/bsd/local/opt_netgraph.h>
#include <rtems/bsd/local/opt_mbuf_profiling.h>
+#include <rtems/bsd/local/opt_rss.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
@@ -47,12 +46,13 @@
#include <sys/module.h>
#include <sys/mbuf.h>
#include <sys/random.h>
-#include <sys/rwlock.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/sysctl.h>
+#include <sys/uuid.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_arp.h>
#include <net/netisr.h>
#include <net/route.h>
@@ -64,43 +64,22 @@
#include <net/if_bridgevar.h>
#include <net/if_vlan_var.h>
#include <net/if_llatbl.h>
-#include <net/pf_mtag.h>
+#include <net/pfil.h>
+#include <net/rss_config.h>
#include <net/vnet.h>
+#include <netpfil/pf/pf_mtag.h>
+
#if defined(INET) || defined(INET6)
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/if_ether.h>
#include <netinet/ip_carp.h>
#include <netinet/ip_var.h>
-#include <netinet/ip_fw.h>
-#include <netpfil/ipfw/ip_fw_private.h>
#endif
#ifdef INET6
#include <netinet6/nd6.h>
#endif
-
-#ifdef IPX
-#include <netipx/ipx.h>
-#include <netipx/ipx_if.h>
-#endif
-
-int (*ef_inputp)(struct ifnet*, struct ether_header *eh, struct mbuf *m);
-int (*ef_outputp)(struct ifnet *ifp, struct mbuf **mp,
- struct sockaddr *dst, short *tp, int *hlen);
-
-#ifdef NETATALK
-#include <netatalk/at.h>
-#include <netatalk/at_var.h>
-#include <netatalk/at_extern.h>
-
-#define llc_snap_org_code llc_un.type_snap.org_code
-#define llc_snap_ether_type llc_un.type_snap.ether_type
-
-extern u_char at_org_code[3];
-extern u_char aarp_org_code[3];
-#endif /* NETATALK */
-
#include <security/mac/mac_framework.h>
#ifdef CTASSERT
@@ -108,6 +87,8 @@ CTASSERT(sizeof (struct ether_header) == ETHER_ADDR_LEN * 2 + 2);
CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN);
#endif
+VNET_DEFINE(struct pfil_head, link_pfil_hook); /* Packet filter hooks */
+
/* netgraph node hooks for ng_ether(4) */
void (*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp);
void (*ng_ether_input_orphan_p)(struct ifnet *ifp, struct mbuf *m);
@@ -134,22 +115,160 @@ static int ether_resolvemulti(struct ifnet *, struct sockaddr **,
#ifdef VIMAGE
static void ether_reassign(struct ifnet *, struct vnet *, char *);
#endif
+static int ether_requestencap(struct ifnet *, struct if_encap_req *);
-/* XXX: should be in an arp support file, not here */
-static MALLOC_DEFINE(M_ARPCOM, "arpcom", "802.* interface internals");
-
-#define ETHER_IS_BROADCAST(addr) \
- (bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0)
#define senderr(e) do { error = (e); goto bad;} while (0)
+static void
+update_mbuf_csumflags(struct mbuf *src, struct mbuf *dst)
+{
+ int csum_flags = 0;
+
+ if (src->m_pkthdr.csum_flags & CSUM_IP)
+ csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
+ if (src->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
+ csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
+ if (src->m_pkthdr.csum_flags & CSUM_SCTP)
+ csum_flags |= CSUM_SCTP_VALID;
+ dst->m_pkthdr.csum_flags |= csum_flags;
+ if (csum_flags & CSUM_DATA_VALID)
+ dst->m_pkthdr.csum_data = 0xffff;
+}
+
+/*
+ * Handle link-layer encapsulation requests.
+ */
+static int
+ether_requestencap(struct ifnet *ifp, struct if_encap_req *req)
+{
+ struct ether_header *eh;
+ struct arphdr *ah;
+ uint16_t etype;
+ const u_char *lladdr;
+
+ if (req->rtype != IFENCAP_LL)
+ return (EOPNOTSUPP);
+
+ if (req->bufsize < ETHER_HDR_LEN)
+ return (ENOMEM);
+
+ eh = (struct ether_header *)req->buf;
+ lladdr = req->lladdr;
+ req->lladdr_off = 0;
+
+ switch (req->family) {
+ case AF_INET:
+ etype = htons(ETHERTYPE_IP);
+ break;
+ case AF_INET6:
+ etype = htons(ETHERTYPE_IPV6);
+ break;
+ case AF_ARP:
+ ah = (struct arphdr *)req->hdata;
+ ah->ar_hrd = htons(ARPHRD_ETHER);
+
+ switch(ntohs(ah->ar_op)) {
+ case ARPOP_REVREQUEST:
+ case ARPOP_REVREPLY:
+ etype = htons(ETHERTYPE_REVARP);
+ break;
+ case ARPOP_REQUEST:
+ case ARPOP_REPLY:
+ default:
+ etype = htons(ETHERTYPE_ARP);
+ break;
+ }
+
+ if (req->flags & IFENCAP_FLAG_BROADCAST)
+ lladdr = ifp->if_broadcastaddr;
+ break;
+ default:
+ return (EAFNOSUPPORT);
+ }
+
+ memcpy(&eh->ether_type, &etype, sizeof(eh->ether_type));
+ memcpy(eh->ether_dhost, lladdr, ETHER_ADDR_LEN);
+ memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+ req->bufsize = sizeof(struct ether_header);
+
+ return (0);
+}
+
+
+static int
+ether_resolve_addr(struct ifnet *ifp, struct mbuf *m,
+ const struct sockaddr *dst, struct route *ro, u_char *phdr,
+ uint32_t *pflags, struct llentry **plle)
+{
+ struct ether_header *eh;
+ uint32_t lleflags = 0;
+ int error = 0;
#if defined(INET) || defined(INET6)
-int
-ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst, int shared);
-static VNET_DEFINE(int, ether_ipfw);
-#define V_ether_ipfw VNET(ether_ipfw)
+ uint16_t etype;
+#endif
+
+ if (plle)
+ *plle = NULL;
+ eh = (struct ether_header *)phdr;
+
+ switch (dst->sa_family) {
+#ifdef INET
+ case AF_INET:
+ if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
+ error = arpresolve(ifp, 0, m, dst, phdr, &lleflags,
+ plle);
+ else {
+ if (m->m_flags & M_BCAST)
+ memcpy(eh->ether_dhost, ifp->if_broadcastaddr,
+ ETHER_ADDR_LEN);
+ else {
+ const struct in_addr *a;
+ a = &(((const struct sockaddr_in *)dst)->sin_addr);
+ ETHER_MAP_IP_MULTICAST(a, eh->ether_dhost);
+ }
+ etype = htons(ETHERTYPE_IP);
+ memcpy(&eh->ether_type, &etype, sizeof(etype));
+ memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+ }
+ break;
#endif
+#ifdef INET6
+ case AF_INET6:
+ if ((m->m_flags & M_MCAST) == 0)
+ error = nd6_resolve(ifp, 0, m, dst, phdr, &lleflags,
+ plle);
+ else {
+ const struct in6_addr *a6;
+ a6 = &(((const struct sockaddr_in6 *)dst)->sin6_addr);
+ ETHER_MAP_IPV6_MULTICAST(a6, eh->ether_dhost);
+ etype = htons(ETHERTYPE_IPV6);
+ memcpy(&eh->ether_type, &etype, sizeof(etype));
+ memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+ }
+ break;
+#endif
+ default:
+ if_printf(ifp, "can't handle af%d\n", dst->sa_family);
+ if (m != NULL)
+ m_freem(m);
+ return (EAFNOSUPPORT);
+ }
+
+ if (error == EHOSTDOWN) {
+ if (ro != NULL && (ro->ro_flags & RT_HAS_GW) != 0)
+ error = EHOSTUNREACH;
+ }
+ if (error != 0)
+ return (error);
+
+ *pflags = RT_MAY_LOOP;
+ if (lleflags & LLE_IFADDR)
+ *pflags |= RT_L2_ME;
+
+ return (0);
+}
/*
* Ethernet output routine.
@@ -159,23 +278,49 @@ static VNET_DEFINE(int, ether_ipfw);
*/
int
ether_output(struct ifnet *ifp, struct mbuf *m,
- struct sockaddr *dst, struct route *ro)
+ const struct sockaddr *dst, struct route *ro)
{
- short type;
- int error = 0, hdrcmplt = 0;
- u_char esrc[ETHER_ADDR_LEN], edst[ETHER_ADDR_LEN];
- struct llentry *lle = NULL;
- struct rtentry *rt0 = NULL;
+ int error = 0;
+ char linkhdr[ETHER_HDR_LEN], *phdr;
struct ether_header *eh;
struct pf_mtag *t;
int loop_copy = 1;
int hlen; /* link layer header length */
+ uint32_t pflags;
+ struct llentry *lle = NULL;
+ struct rtentry *rt0 = NULL;
+ int addref = 0;
+ phdr = NULL;
+ pflags = 0;
if (ro != NULL) {
- if (!(m->m_flags & (M_BCAST | M_MCAST)))
- lle = ro->ro_lle;
+ /* XXX BPF uses ro_prepend */
+ if (ro->ro_prepend != NULL) {
+ phdr = ro->ro_prepend;
+ hlen = ro->ro_plen;
+ } else if (!(m->m_flags & (M_BCAST | M_MCAST))) {
+ if ((ro->ro_flags & RT_LLE_CACHE) != 0) {
+ lle = ro->ro_lle;
+ if (lle != NULL &&
+ (lle->la_flags & LLE_VALID) == 0) {
+ LLE_FREE(lle);
+ lle = NULL; /* redundant */
+ ro->ro_lle = NULL;
+ }
+ if (lle == NULL) {
+ /* if we lookup, keep cache */
+ addref = 1;
+ }
+ }
+ if (lle != NULL) {
+ phdr = lle->r_linkdata;
+ hlen = lle->r_hdrlen;
+ pflags = lle->r_flags;
+ }
+ }
rt0 = ro->ro_rt;
}
+
#ifdef MAC
error = mac_ifnet_check_transmit(ifp, m);
if (error)
@@ -189,153 +334,39 @@ ether_output(struct ifnet *ifp, struct mbuf *m,
(ifp->if_drv_flags & IFF_DRV_RUNNING)))
senderr(ENETDOWN);
- hlen = ETHER_HDR_LEN;
- switch (dst->sa_family) {
-#ifdef INET
- case AF_INET:
- if (lle != NULL && (lle->la_flags & LLE_VALID))
- memcpy(edst, &lle->ll_addr.mac16, sizeof(edst));
- else
- error = arpresolve(ifp, rt0, m, dst, edst, &lle);
- if (error)
+ if (phdr == NULL) {
+ /* No prepend data supplied. Try to calculate ourselves. */
+ phdr = linkhdr;
+ hlen = ETHER_HDR_LEN;
+ error = ether_resolve_addr(ifp, m, dst, ro, phdr, &pflags,
+ addref ? &lle : NULL);
+ if (addref && lle != NULL)
+ ro->ro_lle = lle;
+ if (error != 0)
return (error == EWOULDBLOCK ? 0 : error);
- type = htons(ETHERTYPE_IP);
- break;
- case AF_ARP:
- {
- struct arphdr *ah;
- ah = mtod(m, struct arphdr *);
- ah->ar_hrd = htons(ARPHRD_ETHER);
-
- loop_copy = 0; /* if this is for us, don't do it */
-
- switch(ntohs(ah->ar_op)) {
- case ARPOP_REVREQUEST:
- case ARPOP_REVREPLY:
- type = htons(ETHERTYPE_REVARP);
- break;
- case ARPOP_REQUEST:
- case ARPOP_REPLY:
- default:
- type = htons(ETHERTYPE_ARP);
- break;
- }
-
- if (m->m_flags & M_BCAST)
- bcopy(ifp->if_broadcastaddr, edst, ETHER_ADDR_LEN);
- else
- bcopy(ar_tha(ah), edst, ETHER_ADDR_LEN);
-
- }
- break;
-#endif
-#ifdef INET6
- case AF_INET6:
- if (lle != NULL && (lle->la_flags & LLE_VALID))
- memcpy(edst, &lle->ll_addr.mac16, sizeof(edst));
- else
- error = nd6_storelladdr(ifp, m, dst, (u_char *)edst, &lle);
- if (error)
- return error;
- type = htons(ETHERTYPE_IPV6);
- break;
-#endif
-#ifdef IPX
- case AF_IPX:
- if (ef_outputp) {
- error = ef_outputp(ifp, &m, dst, &type, &hlen);
- if (error)
- goto bad;
- } else
- type = htons(ETHERTYPE_IPX);
- bcopy((caddr_t)&(((struct sockaddr_ipx *)dst)->sipx_addr.x_host),
- (caddr_t)edst, sizeof (edst));
- break;
-#endif
-#ifdef NETATALK
- case AF_APPLETALK:
- {
- struct at_ifaddr *aa;
-
- if ((aa = at_ifawithnet((struct sockaddr_at *)dst)) == NULL)
- senderr(EHOSTUNREACH); /* XXX */
- if (!aarpresolve(ifp, m, (struct sockaddr_at *)dst, edst)) {
- ifa_free(&aa->aa_ifa);
- return (0);
- }
- /*
- * In the phase 2 case, need to prepend an mbuf for the llc header.
- */
- if ( aa->aa_flags & AFA_PHASE2 ) {
- struct llc llc;
-
- ifa_free(&aa->aa_ifa);
- M_PREPEND(m, LLC_SNAPFRAMELEN, M_DONTWAIT);
- if (m == NULL)
- senderr(ENOBUFS);
- llc.llc_dsap = llc.llc_ssap = LLC_SNAP_LSAP;
- llc.llc_control = LLC_UI;
- bcopy(at_org_code, llc.llc_snap_org_code, sizeof(at_org_code));
- llc.llc_snap_ether_type = htons( ETHERTYPE_AT );
- bcopy(&llc, mtod(m, caddr_t), LLC_SNAPFRAMELEN);
- type = htons(m->m_pkthdr.len);
- hlen = LLC_SNAPFRAMELEN + ETHER_HDR_LEN;
- } else {
- ifa_free(&aa->aa_ifa);
- type = htons(ETHERTYPE_AT);
- }
- break;
- }
-#endif /* NETATALK */
-
- case pseudo_AF_HDRCMPLT:
- hdrcmplt = 1;
- eh = (struct ether_header *)dst->sa_data;
- (void)memcpy(esrc, eh->ether_shost, sizeof (esrc));
- /* FALLTHROUGH */
-
- case AF_UNSPEC:
- loop_copy = 0; /* if this is for us, don't do it */
- eh = (struct ether_header *)dst->sa_data;
- (void)memcpy(edst, eh->ether_dhost, sizeof (edst));
- type = eh->ether_type;
- break;
-
- default:
- if_printf(ifp, "can't handle af%d\n", dst->sa_family);
- senderr(EAFNOSUPPORT);
}
- if (lle != NULL && (lle->la_flags & LLE_IFADDR)) {
- int csum_flags = 0;
- if (m->m_pkthdr.csum_flags & CSUM_IP)
- csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
- if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
- csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
- if (m->m_pkthdr.csum_flags & CSUM_SCTP)
- csum_flags |= CSUM_SCTP_VALID;
- m->m_pkthdr.csum_flags |= csum_flags;
- m->m_pkthdr.csum_data = 0xffff;
+ if ((pflags & RT_L2_ME) != 0) {
+ update_mbuf_csumflags(m, m);
return (if_simloop(ifp, m, dst->sa_family, 0));
}
+ loop_copy = pflags & RT_MAY_LOOP;
/*
* Add local net header. If no space in first mbuf,
* allocate another.
+ *
+ * Note that we do prepend regardless of RT_HAS_HEADER flag.
+ * This is done because BPF code shifts m_data pointer
+ * to the end of ethernet header prior to calling if_output().
*/
- M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
+ M_PREPEND(m, hlen, M_NOWAIT);
if (m == NULL)
senderr(ENOBUFS);
- eh = mtod(m, struct ether_header *);
- (void)memcpy(&eh->ether_type, &type,
- sizeof(eh->ether_type));
- (void)memcpy(eh->ether_dhost, edst, sizeof (edst));
- if (hdrcmplt)
- (void)memcpy(eh->ether_shost, esrc,
- sizeof(eh->ether_shost));
- else
- (void)memcpy(eh->ether_shost, IF_LLADDR(ifp),
- sizeof(eh->ether_shost));
+ if ((pflags & RT_HAS_HEADER) == 0) {
+ eh = mtod(m, struct ether_header *);
+ memcpy(eh, phdr, hlen);
+ }
/*
* If a simplex interface, and the packet is being sent to our
@@ -346,47 +377,27 @@ ether_output(struct ifnet *ifp, struct mbuf *m,
* on the wire). However, we don't do that here for security
* reasons and compatibility with the original behavior.
*/
- if ((ifp->if_flags & IFF_SIMPLEX) && loop_copy &&
+ if ((m->m_flags & M_BCAST) && loop_copy && (ifp->if_flags & IFF_SIMPLEX) &&
((t = pf_find_mtag(m)) == NULL || !t->routed)) {
- int csum_flags = 0;
+ struct mbuf *n;
- if (m->m_pkthdr.csum_flags & CSUM_IP)
- csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
- if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
- csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
- if (m->m_pkthdr.csum_flags & CSUM_SCTP)
- csum_flags |= CSUM_SCTP_VALID;
-
- if (m->m_flags & M_BCAST) {
- struct mbuf *n;
-
- /*
- * Because if_simloop() modifies the packet, we need a
- * writable copy through m_dup() instead of a readonly
- * one as m_copy[m] would give us. The alternative would
- * be to modify if_simloop() to handle the readonly mbuf,
- * but performancewise it is mostly equivalent (trading
- * extra data copying vs. extra locking).
- *
- * XXX This is a local workaround. A number of less
- * often used kernel parts suffer from the same bug.
- * See PR kern/105943 for a proposed general solution.
- */
- if ((n = m_dup(m, M_DONTWAIT)) != NULL) {
- n->m_pkthdr.csum_flags |= csum_flags;
- if (csum_flags & CSUM_DATA_VALID)
- n->m_pkthdr.csum_data = 0xffff;
- (void)if_simloop(ifp, n, dst->sa_family, hlen);
- } else
- ifp->if_iqdrops++;
- } else if (bcmp(eh->ether_dhost, eh->ether_shost,
- ETHER_ADDR_LEN) == 0) {
- m->m_pkthdr.csum_flags |= csum_flags;
- if (csum_flags & CSUM_DATA_VALID)
- m->m_pkthdr.csum_data = 0xffff;
- (void) if_simloop(ifp, m, dst->sa_family, hlen);
- return (0); /* XXX */
- }
+ /*
+ * Because if_simloop() modifies the packet, we need a
+ * writable copy through m_dup() instead of a readonly
+ * one as m_copy[m] would give us. The alternative would
+ * be to modify if_simloop() to handle the readonly mbuf,
+ * but performancewise it is mostly equivalent (trading
+ * extra data copying vs. extra locking).
+ *
+ * XXX This is a local workaround. A number of less
+ * often used kernel parts suffer from the same bug.
+ * See PR kern/105943 for a proposed general solution.
+ */
+ if ((n = m_dup(m, M_NOWAIT)) != NULL) {
+ update_mbuf_csumflags(m, n);
+ (void)if_simloop(ifp, n, dst->sa_family, hlen);
+ } else
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
}
/*
@@ -399,12 +410,12 @@ ether_output(struct ifnet *ifp, struct mbuf *m,
#if defined(INET) || defined(INET6)
if (ifp->if_carp &&
- (error = (*carp_output_p)(ifp, m, dst, NULL)))
+ (error = (*carp_output_p)(ifp, m, dst)))
goto bad;
#endif
/* Handle ng_ether(4) processing, if any */
- if (IFP2AC(ifp)->ac_netgraph != NULL) {
+ if (ifp->if_l2com != NULL) {
KASSERT(ng_ether_output_p != NULL,
("ng_ether_output_p is NULL"));
if ((error = (*ng_ether_output_p)(ifp, &m)) != 0) {
@@ -429,18 +440,17 @@ bad: if (m != NULL)
int
ether_output_frame(struct ifnet *ifp, struct mbuf *m)
{
-#if defined(INET) || defined(INET6)
+ int i;
- if (V_ip_fw_chk_ptr && V_ether_ipfw != 0) {
- if (ether_ipfw_chk(&m, ifp, 0) == 0) {
- if (m) {
- m_freem(m);
- return EACCES; /* pkt dropped */
- } else
- return 0; /* consumed e.g. in a pipe */
- }
+ if (PFIL_HOOKED(&V_link_pfil_hook)) {
+ i = pfil_run_hooks(&V_link_pfil_hook, &m, ifp, PFIL_OUT, NULL);
+
+ if (i != 0)
+ return (EACCES);
+
+ if (m == NULL)
+ return (0);
}
-#endif
/*
* Queue message on interface, update output statistics if
@@ -449,116 +459,6 @@ ether_output_frame(struct ifnet *ifp, struct mbuf *m)
return ((ifp->if_transmit)(ifp, m));
}
-#if defined(INET) || defined(INET6)
-/*
- * ipfw processing for ethernet packets (in and out).
- * The second parameter is NULL from ether_demux, and ifp from
- * ether_output_frame.
- */
-int
-ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst, int shared)
-{
- struct ether_header *eh;
- struct ether_header save_eh;
- struct mbuf *m;
- int i;
- struct ip_fw_args args;
- struct m_tag *mtag;
-
- /* fetch start point from rule, if any */
- mtag = m_tag_locate(*m0, MTAG_IPFW_RULE, 0, NULL);
- if (mtag == NULL) {
- args.rule.slot = 0;
- } else {
- /* dummynet packet, already partially processed */
- struct ipfw_rule_ref *r;
-
- /* XXX can we free it after use ? */
- mtag->m_tag_id = PACKET_TAG_NONE;
- r = (struct ipfw_rule_ref *)(mtag + 1);
- if (r->info & IPFW_ONEPASS)
- return (1);
- args.rule = *r;
- }
-
- /*
- * I need some amt of data to be contiguous, and in case others need
- * the packet (shared==1) also better be in the first mbuf.
- */
- m = *m0;
- i = min( m->m_pkthdr.len, max_protohdr);
- if ( shared || m->m_len < i) {
- m = m_pullup(m, i);
- if (m == NULL) {
- *m0 = m;
- return 0;
- }
- }
- eh = mtod(m, struct ether_header *);
- save_eh = *eh; /* save copy for restore below */
- m_adj(m, ETHER_HDR_LEN); /* strip ethernet header */
-
- args.m = m; /* the packet we are looking at */
- args.oif = dst; /* destination, if any */
- args.next_hop = NULL; /* we do not support forward yet */
- args.next_hop6 = NULL; /* we do not support forward yet */
- args.eh = &save_eh; /* MAC header for bridged/MAC packets */
- args.inp = NULL; /* used by ipfw uid/gid/jail rules */
- i = V_ip_fw_chk_ptr(&args);
- m = args.m;
- if (m != NULL) {
- /*
- * Restore Ethernet header, as needed, in case the
- * mbuf chain was replaced by ipfw.
- */
- M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
- if (m == NULL) {
- *m0 = m;
- return 0;
- }
- if (eh != mtod(m, struct ether_header *))
- bcopy(&save_eh, mtod(m, struct ether_header *),
- ETHER_HDR_LEN);
- }
- *m0 = m;
-
- if (i == IP_FW_DENY) /* drop */
- return 0;
-
- KASSERT(m != NULL, ("ether_ipfw_chk: m is NULL"));
-
- if (i == IP_FW_PASS) /* a PASS rule. */
- return 1;
-
- if (ip_dn_io_ptr && (i == IP_FW_DUMMYNET)) {
- int dir;
- /*
- * Pass the pkt to dummynet, which consumes it.
- * If shared, make a copy and keep the original.
- */
- if (shared) {
- m = m_copypacket(m, M_DONTWAIT);
- if (m == NULL)
- return 0;
- } else {
- /*
- * Pass the original to dummynet and
- * nothing back to the caller
- */
- *m0 = NULL ;
- }
- dir = PROTO_LAYER2 | (dst ? DIR_OUT : DIR_IN);
- ip_dn_io_ptr(&m, dir, &args);
- return 0;
- }
- /*
- * XXX at some point add support for divert/forward actions.
- * If none of the above matches, we have to drop the pkt.
- */
- return 0;
-}
-#endif
-
/*
* Process a received Ethernet packet; the packet is in the
* mbuf chain m with the ethernet header at the front.
@@ -580,39 +480,18 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m)
return;
}
#endif
- /*
- * Do consistency checks to verify assumptions
- * made by code past this point.
- */
- if ((m->m_flags & M_PKTHDR) == 0) {
- if_printf(ifp, "discard frame w/o packet header\n");
- ifp->if_ierrors++;
- m_freem(m);
- return;
- }
if (m->m_len < ETHER_HDR_LEN) {
/* XXX maybe should pullup? */
if_printf(ifp, "discard frame w/o leading ethernet "
"header (len %u pkt len %u)\n",
m->m_len, m->m_pkthdr.len);
- ifp->if_ierrors++;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
m_freem(m);
return;
}
eh = mtod(m, struct ether_header *);
etype = ntohs(eh->ether_type);
- if (m->m_pkthdr.rcvif == NULL) {
- if_printf(ifp, "discard frame w/o interface pointer\n");
- ifp->if_ierrors++;
- m_freem(m);
- return;
- }
-#ifdef DIAGNOSTIC
- if (m->m_pkthdr.rcvif != ifp) {
- if_printf(ifp, "Warning, frame marked as received on %s\n",
- m->m_pkthdr.rcvif->if_xname);
- }
-#endif
+ random_harvest_queue(m, sizeof(*m), 2, RANDOM_NET_ETHER);
CURVNET_SET_QUIET(ifp->if_vnet);
@@ -621,7 +500,7 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m)
m->m_flags |= M_BCAST;
else
m->m_flags |= M_MCAST;
- ifp->if_imcasts++;
+ if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
}
#ifdef MAC
@@ -647,7 +526,8 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m)
m->m_flags &= ~M_HASFCS;
}
- ifp->if_ibytes += m->m_pkthdr.len;
+ if (!(ifp->if_capenable & IFCAP_HWSTATS))
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
/* Allow monitor mode to claim this frame, after stats are updated. */
if (ifp->if_flags & IFF_MONITOR) {
@@ -683,8 +563,7 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m)
#ifdef DIAGNOSTIC
if_printf(ifp, "cannot pullup VLAN header\n");
#endif
- ifp->if_ierrors++;
- m_freem(m);
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
CURVNET_RESTORE();
return;
}
@@ -702,7 +581,7 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m)
M_SETFIB(m, ifp->if_fib);
/* Allow ng_ether(4) to claim this frame. */
- if (IFP2AC(ifp)->ac_netgraph != NULL) {
+ if (ifp->if_l2com != NULL) {
KASSERT(ng_ether_input_p != NULL,
("%s: ng_ether_input_p is NULL", __func__));
m->m_flags &= ~M_PROMISC;
@@ -757,22 +636,36 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m)
m->m_flags |= M_PROMISC;
}
- /* First chunk of an mbuf contains good entropy */
- if (harvest.ethernet)
- random_harvest(m, 16, 3, 0, RANDOM_NET);
-
ether_demux(ifp, m);
CURVNET_RESTORE();
}
/*
* Ethernet input dispatch; by default, direct dispatch here regardless of
- * global configuration.
+ * global configuration. However, if RSS is enabled, hook up RSS affinity
+ * so that when deferred or hybrid dispatch is enabled, we can redistribute
+ * load based on RSS.
+ *
+ * XXXRW: Would be nice if the ifnet passed up a flag indicating whether or
+ * not it had already done work distribution via multi-queue. Then we could
+ * direct dispatch in the event load balancing was already complete and
+ * handle the case of interfaces with different capabilities better.
+ *
+ * XXXRW: Sort of want an M_DISTRIBUTED flag to avoid multiple distributions
+ * at multiple layers?
+ *
+ * XXXRW: For now, enable all this only if RSS is compiled in, although it
+ * works fine without RSS. Need to characterise the performance overhead
+ * of the detour through the netisr code in the event the result is always
+ * direct dispatch.
*/
static void
ether_nh_input(struct mbuf *m)
{
+ M_ASSERTPKTHDR(m);
+ KASSERT(m->m_pkthdr.rcvif != NULL,
+ ("%s: NULL interface pointer", __func__));
ether_input_internal(m->m_pkthdr.rcvif, m);
}
@@ -780,8 +673,14 @@ static struct netisr_handler ether_nh = {
.nh_name = "ether",
.nh_handler = ether_nh_input,
.nh_proto = NETISR_ETHER,
+#ifdef RSS
+ .nh_policy = NETISR_POLICY_CPU,
+ .nh_dispatch = NETISR_DISPATCH_DIRECT,
+ .nh_m2cpuid = rss_m2cpuid,
+#else
.nh_policy = NETISR_POLICY_SOURCE,
.nh_dispatch = NETISR_DISPATCH_DIRECT,
+#endif
};
static void
@@ -793,16 +692,74 @@ ether_init(__unused void *arg)
SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL);
static void
+vnet_ether_init(__unused void *arg)
+{
+ int i;
+
+ /* Initialize packet filter hooks. */
+ V_link_pfil_hook.ph_type = PFIL_TYPE_AF;
+ V_link_pfil_hook.ph_af = AF_LINK;
+ if ((i = pfil_head_register(&V_link_pfil_hook)) != 0)
+ printf("%s: WARNING: unable to register pfil link hook, "
+ "error %d\n", __func__, i);
+#ifdef VIMAGE
+ netisr_register_vnet(&ether_nh);
+#endif
+}
+VNET_SYSINIT(vnet_ether_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
+ vnet_ether_init, NULL);
+
+#ifdef VIMAGE
+static void
+vnet_ether_pfil_destroy(__unused void *arg)
+{
+ int i;
+
+ if ((i = pfil_head_unregister(&V_link_pfil_hook)) != 0)
+ printf("%s: WARNING: unable to unregister pfil link hook, "
+ "error %d\n", __func__, i);
+}
+VNET_SYSUNINIT(vnet_ether_pfil_uninit, SI_SUB_PROTO_PFIL, SI_ORDER_ANY,
+ vnet_ether_pfil_destroy, NULL);
+
+static void
+vnet_ether_destroy(__unused void *arg)
+{
+
+ netisr_unregister_vnet(&ether_nh);
+}
+VNET_SYSUNINIT(vnet_ether_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
+ vnet_ether_destroy, NULL);
+#endif
+
+
+
+static void
ether_input(struct ifnet *ifp, struct mbuf *m)
{
+ struct mbuf *mn;
+
/*
- * We will rely on rcvif being set properly in the deferred context,
- * so assert it is correct here.
+ * The drivers are allowed to pass in a chain of packets linked with
+ * m_nextpkt. We split them up into separate packets here and pass
+ * them up. This allows the drivers to amortize the receive lock.
*/
- KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch", __func__));
+ while (m) {
+ mn = m->m_nextpkt;
+ m->m_nextpkt = NULL;
- netisr_dispatch(NETISR_ETHER, m);
+ /*
+ * We will rely on rcvif being set properly in the deferred context,
+ * so assert it is correct here.
+ */
+ KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch m %p "
+ "rcvif %p ifp %p", __func__, m, m->m_pkthdr.rcvif, ifp));
+ CURVNET_SET_QUIET(ifp->if_vnet);
+ netisr_dispatch(NETISR_ETHER, m);
+ CURVNET_RESTORE();
+ m = mn;
+ }
}
/*
@@ -812,27 +769,19 @@ void
ether_demux(struct ifnet *ifp, struct mbuf *m)
{
struct ether_header *eh;
- int isr;
+ int i, isr;
u_short ether_type;
-#if defined(NETATALK)
- struct llc *l;
-#endif
KASSERT(ifp != NULL, ("%s: NULL interface pointer", __func__));
-#if defined(INET) || defined(INET6)
- /*
- * Allow dummynet and/or ipfw to claim the frame.
- * Do not do this for PROMISC frames in case we are re-entered.
- */
- if (V_ip_fw_chk_ptr && V_ether_ipfw != 0 && !(m->m_flags & M_PROMISC)) {
- if (ether_ipfw_chk(&m, NULL, 0) == 0) {
- if (m)
- m_freem(m); /* dropped; free mbuf chain */
- return; /* consumed */
- }
+ /* Do not grab PROMISC frames in case we are re-entered. */
+ if (PFIL_HOOKED(&V_link_pfil_hook) && !(m->m_flags & M_PROMISC)) {
+ i = pfil_run_hooks(&V_link_pfil_hook, &m, ifp, PFIL_IN, NULL);
+
+ if (i != 0 || m == NULL)
+ return;
}
-#endif
+
eh = mtod(m, struct ether_header *);
ether_type = ntohs(eh->ether_type);
@@ -843,7 +792,7 @@ ether_demux(struct ifnet *ifp, struct mbuf *m)
if ((m->m_flags & M_VLANTAG) &&
EVL_VLANOFTAG(m->m_pkthdr.ether_vtag) != 0) {
if (ifp->if_vlantrunk == NULL) {
- ifp->if_noproto++;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
m_freem(m);
return;
}
@@ -869,7 +818,7 @@ ether_demux(struct ifnet *ifp, struct mbuf *m)
* Strip off Ethernet header.
*/
m->m_flags &= ~M_VLANTAG;
- m->m_flags &= ~(M_PROTOFLAGS);
+ m_clrprotoflags(m);
m_adj(m, ETHER_HDR_LEN);
/*
@@ -878,8 +827,6 @@ ether_demux(struct ifnet *ifp, struct mbuf *m)
switch (ether_type) {
#ifdef INET
case ETHERTYPE_IP:
- if ((m = ip_fastforward(m)) == NULL)
- return;
isr = NETISR_IP;
break;
@@ -892,54 +839,12 @@ ether_demux(struct ifnet *ifp, struct mbuf *m)
isr = NETISR_ARP;
break;
#endif
-#ifdef IPX
- case ETHERTYPE_IPX:
- if (ef_inputp && ef_inputp(ifp, eh, m) == 0)
- return;
- isr = NETISR_IPX;
- break;
-#endif
#ifdef INET6
case ETHERTYPE_IPV6:
isr = NETISR_IPV6;
break;
#endif
-#ifdef NETATALK
- case ETHERTYPE_AT:
- isr = NETISR_ATALK1;
- break;
- case ETHERTYPE_AARP:
- isr = NETISR_AARP;
- break;
-#endif /* NETATALK */
default:
-#ifdef IPX
- if (ef_inputp && ef_inputp(ifp, eh, m) == 0)
- return;
-#endif /* IPX */
-#if defined(NETATALK)
- if (ether_type > ETHERMTU)
- goto discard;
- l = mtod(m, struct llc *);
- if (l->llc_dsap == LLC_SNAP_LSAP &&
- l->llc_ssap == LLC_SNAP_LSAP &&
- l->llc_control == LLC_UI) {
- if (bcmp(&(l->llc_snap_org_code)[0], at_org_code,
- sizeof(at_org_code)) == 0 &&
- ntohs(l->llc_snap_ether_type) == ETHERTYPE_AT) {
- m_adj(m, LLC_SNAPFRAMELEN);
- isr = NETISR_ATALK2;
- break;
- }
- if (bcmp(&(l->llc_snap_org_code)[0], aarp_org_code,
- sizeof(aarp_org_code)) == 0 &&
- ntohs(l->llc_snap_ether_type) == ETHERTYPE_AARP) {
- m_adj(m, LLC_SNAPFRAMELEN);
- isr = NETISR_AARP;
- break;
- }
- }
-#endif /* NETATALK */
goto discard;
}
netisr_dispatch(isr, m);
@@ -951,14 +856,14 @@ discard:
* hand the packet to it for last chance processing;
* otherwise dispose of it.
*/
- if (IFP2AC(ifp)->ac_netgraph != NULL) {
+ if (ifp->if_l2com != NULL) {
KASSERT(ng_ether_input_orphan_p != NULL,
("ng_ether_input_orphan_p is NULL"));
/*
* Put back the ethernet header so netgraph has a
* consistent view of inbound packets.
*/
- M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
+ M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
(*ng_ether_input_orphan_p)(ifp, m);
return;
}
@@ -998,6 +903,7 @@ ether_ifattach(struct ifnet *ifp, const u_int8_t *lla)
ifp->if_output = ether_output;
ifp->if_input = ether_input;
ifp->if_resolvemulti = ether_resolvemulti;
+ ifp->if_requestencap = ether_requestencap;
#ifdef VIMAGE
ifp->if_reassign = ether_reassign;
#endif
@@ -1022,6 +928,8 @@ ether_ifattach(struct ifnet *ifp, const u_int8_t *lla)
break;
if (i != ifp->if_addrlen)
if_printf(ifp, "Ethernet address: %6D\n", lla, ":");
+
+ uuid_ether_add(LLADDR(sdl));
}
/*
@@ -1030,7 +938,12 @@ ether_ifattach(struct ifnet *ifp, const u_int8_t *lla)
void
ether_ifdetach(struct ifnet *ifp)
{
- if (IFP2AC(ifp)->ac_netgraph != NULL) {
+ struct sockaddr_dl *sdl;
+
+ sdl = (struct sockaddr_dl *)(ifp->if_addr->ifa_addr);
+ uuid_ether_del(LLADDR(sdl));
+
+ if (ifp->if_l2com != NULL) {
KASSERT(ng_ether_detach_p != NULL,
("ng_ether_detach_p is NULL"));
(*ng_ether_detach_p)(ifp);
@@ -1045,7 +958,7 @@ void
ether_reassign(struct ifnet *ifp, struct vnet *new_vnet, char *unused __unused)
{
- if (IFP2AC(ifp)->ac_netgraph != NULL) {
+ if (ifp->if_l2com != NULL) {
KASSERT(ng_ether_detach_p != NULL,
("ng_ether_detach_p is NULL"));
(*ng_ether_detach_p)(ifp);
@@ -1061,10 +974,6 @@ ether_reassign(struct ifnet *ifp, struct vnet *new_vnet, char *unused __unused)
SYSCTL_DECL(_net_link);
SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet");
-#if defined(INET) || defined(INET6)
-SYSCTL_VNET_INT(_net_link_ether, OID_AUTO, ipfw, CTLFLAG_RW,
- &VNET_NAME(ether_ipfw), 0, "Pass ether pkts through firewall");
-#endif
#if 0
/*
@@ -1158,31 +1067,6 @@ ether_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
arp_ifinit(ifp, ifa);
break;
#endif
-#ifdef IPX
- /*
- * XXX - This code is probably wrong
- */
- case AF_IPX:
- {
- struct ipx_addr *ina = &(IA_SIPX(ifa)->sipx_addr);
-
- if (ipx_nullhost(*ina))
- ina->x_host =
- *(union ipx_host *)
- IF_LLADDR(ifp);
- else {
- bcopy((caddr_t) ina->x_host.c_host,
- (caddr_t) IF_LLADDR(ifp),
- ETHER_ADDR_LEN);
- }
-
- /*
- * Set new address
- */
- ifp->if_init(ifp->if_softc);
- break;
- }
-#endif
default:
ifp->if_init(ifp->if_softc);
break;
@@ -1238,7 +1122,7 @@ ether_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
e_addr = LLADDR(sdl);
if (!ETHER_IS_MULTICAST(e_addr))
return EADDRNOTAVAIL;
- *llsa = 0;
+ *llsa = NULL;
return 0;
#ifdef INET
@@ -1246,14 +1130,7 @@ ether_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
sin = (struct sockaddr_in *)sa;
if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
return EADDRNOTAVAIL;
- sdl = malloc(sizeof *sdl, M_IFMADDR,
- M_NOWAIT|M_ZERO);
- if (sdl == NULL)
- return ENOMEM;
- sdl->sdl_len = sizeof *sdl;
- sdl->sdl_family = AF_LINK;
- sdl->sdl_index = ifp->if_index;
- sdl->sdl_type = IFT_ETHER;
+ sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
sdl->sdl_alen = ETHER_ADDR_LEN;
e_addr = LLADDR(sdl);
ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr);
@@ -1270,19 +1147,12 @@ ether_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
* (This is used for multicast routers.)
*/
ifp->if_flags |= IFF_ALLMULTI;
- *llsa = 0;
+ *llsa = NULL;
return 0;
}
if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
return EADDRNOTAVAIL;
- sdl = malloc(sizeof *sdl, M_IFMADDR,
- M_NOWAIT|M_ZERO);
- if (sdl == NULL)
- return (ENOMEM);
- sdl->sdl_len = sizeof *sdl;
- sdl->sdl_family = AF_LINK;
- sdl->sdl_index = ifp->if_index;
- sdl->sdl_type = IFT_ETHER;
+ sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
sdl->sdl_alen = ETHER_ADDR_LEN;
e_addr = LLADDR(sdl);
ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr);
@@ -1299,46 +1169,8 @@ ether_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
}
}
-static void*
-ether_alloc(u_char type, struct ifnet *ifp)
-{
- struct arpcom *ac;
-
- ac = malloc(sizeof(struct arpcom), M_ARPCOM, M_WAITOK | M_ZERO);
- ac->ac_ifp = ifp;
-
- return (ac);
-}
-
-static void
-ether_free(void *com, u_char type)
-{
-
- free(com, M_ARPCOM);
-}
-
-static int
-ether_modevent(module_t mod, int type, void *data)
-{
-
- switch (type) {
- case MOD_LOAD:
- if_register_com_alloc(IFT_ETHER, ether_alloc, ether_free);
- break;
- case MOD_UNLOAD:
- if_deregister_com_alloc(IFT_ETHER);
- break;
- default:
- return EOPNOTSUPP;
- }
-
- return (0);
-}
-
static moduledata_t ether_mod = {
- "ether",
- ether_modevent,
- 0
+ .name = "ether",
};
void
@@ -1386,7 +1218,7 @@ ether_vlanencap(struct mbuf *m, uint16_t tag)
{
struct ether_vlan_header *evl;
- M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT);
+ M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
if (m == NULL)
return (NULL);
/* M_PREPEND takes care of m_len, m_pkthdr.len for us */
diff --git a/freebsd/sys/net/if_faith.c b/freebsd/sys/net/if_faith.c
deleted file mode 100644
index cf4a7fba..00000000
--- a/freebsd/sys/net/if_faith.c
+++ /dev/null
@@ -1,353 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/* $KAME: if_faith.c,v 1.23 2001/12/17 13:55:29 sumikawa Exp $ */
-
-/*-
- * Copyright (c) 1982, 1986, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-/*
- * derived from
- * @(#)if_loop.c 8.1 (Berkeley) 6/10/93
- * Id: if_loop.c,v 1.22 1996/06/19 16:24:10 wollman Exp
- */
-
-/*
- * Loopback interface driver for protocol testing and timing.
- */
-#include <rtems/bsd/local/opt_inet.h>
-#include <rtems/bsd/local/opt_inet6.h>
-
-#include <rtems/bsd/sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/mbuf.h>
-#include <sys/module.h>
-#include <sys/socket.h>
-#include <rtems/bsd/sys/errno.h>
-#include <sys/sockio.h>
-#include <sys/time.h>
-#include <sys/queue.h>
-#include <sys/types.h>
-#include <sys/malloc.h>
-
-#include <net/if.h>
-#include <net/if_clone.h>
-#include <net/if_types.h>
-#include <net/netisr.h>
-#include <net/route.h>
-#include <net/bpf.h>
-#include <net/vnet.h>
-
-#ifdef INET
-#include <netinet/in.h>
-#include <netinet/in_systm.h>
-#include <netinet/in_var.h>
-#include <netinet/ip.h>
-#endif
-
-#ifdef INET6
-#ifndef INET
-#include <netinet/in.h>
-#endif
-#include <netinet6/in6_var.h>
-#include <netinet/ip6.h>
-#include <netinet6/ip6_var.h>
-#endif
-
-#define FAITHNAME "faith"
-
-struct faith_softc {
- struct ifnet *sc_ifp;
-};
-
-static int faithioctl(struct ifnet *, u_long, caddr_t);
-int faithoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
- struct route *);
-static void faithrtrequest(int, struct rtentry *, struct rt_addrinfo *);
-#ifdef INET6
-static int faithprefix(struct in6_addr *);
-#endif
-
-static int faithmodevent(module_t, int, void *);
-
-static MALLOC_DEFINE(M_FAITH, FAITHNAME, "Firewall Assisted Tunnel Interface");
-
-static int faith_clone_create(struct if_clone *, int, caddr_t);
-static void faith_clone_destroy(struct ifnet *);
-
-IFC_SIMPLE_DECLARE(faith, 0);
-
-#define FAITHMTU 1500
-
-static int
-faithmodevent(mod, type, data)
- module_t mod;
- int type;
- void *data;
-{
-
- switch (type) {
- case MOD_LOAD:
- if_clone_attach(&faith_cloner);
-
-#ifdef INET6
- faithprefix_p = faithprefix;
-#endif
-
- break;
- case MOD_UNLOAD:
-#ifdef INET6
- faithprefix_p = NULL;
-#endif
-
- if_clone_detach(&faith_cloner);
- break;
- default:
- return EOPNOTSUPP;
- }
- return 0;
-}
-
-static moduledata_t faith_mod = {
- "if_faith",
- faithmodevent,
- 0
-};
-
-DECLARE_MODULE(if_faith, faith_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
-MODULE_VERSION(if_faith, 1);
-
-static int
-faith_clone_create(ifc, unit, params)
- struct if_clone *ifc;
- int unit;
- caddr_t params;
-{
- struct ifnet *ifp;
- struct faith_softc *sc;
-
- sc = malloc(sizeof(struct faith_softc), M_FAITH, M_WAITOK | M_ZERO);
- ifp = sc->sc_ifp = if_alloc(IFT_FAITH);
- if (ifp == NULL) {
- free(sc, M_FAITH);
- return (ENOSPC);
- }
-
- ifp->if_softc = sc;
- if_initname(sc->sc_ifp, ifc->ifc_name, unit);
-
- ifp->if_mtu = FAITHMTU;
- /* Change to BROADCAST experimentaly to announce its prefix. */
- ifp->if_flags = /* IFF_LOOPBACK */ IFF_BROADCAST | IFF_MULTICAST;
- ifp->if_ioctl = faithioctl;
- ifp->if_output = faithoutput;
- ifp->if_hdrlen = 0;
- ifp->if_addrlen = 0;
- ifp->if_snd.ifq_maxlen = ifqmaxlen;
- if_attach(ifp);
- bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
- return (0);
-}
-
-static void
-faith_clone_destroy(ifp)
- struct ifnet *ifp;
-{
- struct faith_softc *sc = ifp->if_softc;
-
- bpfdetach(ifp);
- if_detach(ifp);
- if_free(ifp);
- free(sc, M_FAITH);
-}
-
-int
-faithoutput(ifp, m, dst, ro)
- struct ifnet *ifp;
- struct mbuf *m;
- struct sockaddr *dst;
- struct route *ro;
-{
- int isr;
- u_int32_t af;
- struct rtentry *rt = NULL;
-
- M_ASSERTPKTHDR(m);
-
- if (ro != NULL)
- rt = ro->ro_rt;
- /* BPF writes need to be handled specially. */
- if (dst->sa_family == AF_UNSPEC) {
- bcopy(dst->sa_data, &af, sizeof(af));
- dst->sa_family = af;
- }
-
- if (bpf_peers_present(ifp->if_bpf)) {
- af = dst->sa_family;
- bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
- }
-
- if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
- m_freem(m);
- return (rt->rt_flags & RTF_BLACKHOLE ? 0 :
- rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
- }
- ifp->if_opackets++;
- ifp->if_obytes += m->m_pkthdr.len;
- switch (dst->sa_family) {
-#ifdef INET
- case AF_INET:
- isr = NETISR_IP;
- break;
-#endif
-#ifdef INET6
- case AF_INET6:
- isr = NETISR_IPV6;
- break;
-#endif
- default:
- m_freem(m);
- return EAFNOSUPPORT;
- }
-
- /* XXX do we need more sanity checks? */
-
- m->m_pkthdr.rcvif = ifp;
- ifp->if_ipackets++;
- ifp->if_ibytes += m->m_pkthdr.len;
- netisr_dispatch(isr, m);
- return (0);
-}
-
-/* ARGSUSED */
-static void
-faithrtrequest(cmd, rt, info)
- int cmd;
- struct rtentry *rt;
- struct rt_addrinfo *info;
-{
- RT_LOCK_ASSERT(rt);
- rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
-}
-
-/*
- * Process an ioctl request.
- */
-/* ARGSUSED */
-static int
-faithioctl(ifp, cmd, data)
- struct ifnet *ifp;
- u_long cmd;
- caddr_t data;
-{
- struct ifaddr *ifa;
- struct ifreq *ifr = (struct ifreq *)data;
- int error = 0;
-
- switch (cmd) {
-
- case SIOCSIFADDR:
- ifp->if_flags |= IFF_UP;
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
- ifa = (struct ifaddr *)data;
- ifa->ifa_rtrequest = faithrtrequest;
- /*
- * Everything else is done at a higher level.
- */
- break;
-
- case SIOCADDMULTI:
- case SIOCDELMULTI:
- if (ifr == 0) {
- error = EAFNOSUPPORT; /* XXX */
- break;
- }
- switch (ifr->ifr_addr.sa_family) {
-#ifdef INET
- case AF_INET:
- break;
-#endif
-#ifdef INET6
- case AF_INET6:
- break;
-#endif
-
- default:
- error = EAFNOSUPPORT;
- break;
- }
- break;
-
-#ifdef SIOCSIFMTU
- case SIOCSIFMTU:
- ifp->if_mtu = ifr->ifr_mtu;
- break;
-#endif
-
- case SIOCSIFFLAGS:
- break;
-
- default:
- error = EINVAL;
- }
- return (error);
-}
-
-#ifdef INET6
-/*
- * XXX could be slow
- * XXX could be layer violation to call sys/net from sys/netinet6
- */
-static int
-faithprefix(in6)
- struct in6_addr *in6;
-{
- struct rtentry *rt;
- struct sockaddr_in6 sin6;
- int ret;
-
- if (V_ip6_keepfaith == 0)
- return 0;
-
- bzero(&sin6, sizeof(sin6));
- sin6.sin6_family = AF_INET6;
- sin6.sin6_len = sizeof(struct sockaddr_in6);
- sin6.sin6_addr = *in6;
- rt = in6_rtalloc1((struct sockaddr *)&sin6, 0, 0UL, RT_DEFAULT_FIB);
- if (rt && rt->rt_ifp && rt->rt_ifp->if_type == IFT_FAITH &&
- (rt->rt_ifp->if_flags & IFF_UP) != 0)
- ret = 1;
- else
- ret = 0;
- if (rt)
- RTFREE_LOCKED(rt);
- return ret;
-}
-#endif
diff --git a/freebsd/sys/net/if_fddisubr.c b/freebsd/sys/net/if_fddisubr.c
index 7a7fb471..9df882ec 100644
--- a/freebsd/sys/net/if_fddisubr.c
+++ b/freebsd/sys/net/if_fddisubr.c
@@ -38,10 +38,8 @@
* $FreeBSD$
*/
-#include <rtems/bsd/local/opt_atalk.h>
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipx.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
@@ -53,6 +51,7 @@
#include <sys/sockio.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/if_llc.h>
#include <net/if_types.h>
@@ -73,24 +72,10 @@
#include <netinet6/nd6.h>
#endif
-#ifdef IPX
-#include <netipx/ipx.h>
-#include <netipx/ipx_if.h>
-#endif
-
#ifdef DECNET
#include <netdnet/dn.h>
#endif
-#ifdef NETATALK
-#include <netatalk/at.h>
-#include <netatalk/at_var.h>
-#include <netatalk/at_extern.h>
-
-extern u_char at_org_code[ 3 ];
-extern u_char aarp_org_code[ 3 ];
-#endif /* NETATALK */
-
#include <security/mac/mac_framework.h>
static const u_char fddibroadcastaddr[FDDI_ADDR_LEN] =
@@ -98,7 +83,7 @@ static const u_char fddibroadcastaddr[FDDI_ADDR_LEN] =
static int fddi_resolvemulti(struct ifnet *, struct sockaddr **,
struct sockaddr *);
-static int fddi_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+static int fddi_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
struct route *);
static void fddi_input(struct ifnet *ifp, struct mbuf *m);
@@ -109,21 +94,17 @@ static void fddi_input(struct ifnet *ifp, struct mbuf *m);
* Encapsulate a packet of type family for the local net.
* Use trailer local net encapsulation if enough data in first
* packet leaves a multiple of 512 bytes of data in remainder.
- * Assumes that ifp is actually pointer to arpcom structure.
*/
static int
-fddi_output(ifp, m, dst, ro)
- struct ifnet *ifp;
- struct mbuf *m;
- struct sockaddr *dst;
- struct route *ro;
+fddi_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
{
u_int16_t type;
int loop_copy = 0, error = 0, hdrcmplt = 0;
u_char esrc[FDDI_ADDR_LEN], edst[FDDI_ADDR_LEN];
struct fddi_header *fh;
#if defined(INET) || defined(INET6)
- struct llentry *lle;
+ int is_gw = 0;
#endif
#ifdef MAC
@@ -139,14 +120,15 @@ fddi_output(ifp, m, dst, ro)
senderr(ENETDOWN);
getmicrotime(&ifp->if_lastchange);
+#if defined(INET) || defined(INET6)
+ if (ro != NULL)
+ is_gw = (ro->ro_flags & RT_HAS_GW) != 0;
+#endif
+
switch (dst->sa_family) {
#ifdef INET
case AF_INET: {
- struct rtentry *rt0 = NULL;
-
- if (ro != NULL)
- rt0 = ro->ro_rt;
- error = arpresolve(ifp, rt0, m, dst, edst, &lle);
+ error = arpresolve(ifp, is_gw, m, dst, edst, NULL, NULL);
if (error)
return (error == EWOULDBLOCK ? 0 : error);
type = htons(ETHERTYPE_IP);
@@ -182,68 +164,29 @@ fddi_output(ifp, m, dst, ro)
#endif /* INET */
#ifdef INET6
case AF_INET6:
- error = nd6_storelladdr(ifp, m, dst, (u_char *)edst, &lle);
+ error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL, NULL);
if (error)
- return (error); /* Something bad happened */
+ return (error == EWOULDBLOCK ? 0 : error);
type = htons(ETHERTYPE_IPV6);
break;
#endif /* INET6 */
-#ifdef IPX
- case AF_IPX:
- type = htons(ETHERTYPE_IPX);
- bcopy((caddr_t)&(((struct sockaddr_ipx *)dst)->sipx_addr.x_host),
- (caddr_t)edst, FDDI_ADDR_LEN);
- break;
-#endif /* IPX */
-#ifdef NETATALK
- case AF_APPLETALK: {
- struct at_ifaddr *aa;
- if (!aarpresolve(ifp, m, (struct sockaddr_at *)dst, edst))
- return (0);
- /*
- * ifaddr is the first thing in at_ifaddr
- */
- if ((aa = at_ifawithnet( (struct sockaddr_at *)dst)) == 0)
- goto bad;
-
- /*
- * In the phase 2 case, we need to prepend an mbuf for the llc header.
- * Since we must preserve the value of m, which is passed to us by
- * value, we m_copy() the first mbuf, and use it for our llc header.
- */
- if (aa->aa_flags & AFA_PHASE2) {
- struct llc llc;
-
- M_PREPEND(m, LLC_SNAPFRAMELEN, M_WAIT);
- llc.llc_dsap = llc.llc_ssap = LLC_SNAP_LSAP;
- llc.llc_control = LLC_UI;
- bcopy(at_org_code, llc.llc_snap.org_code, sizeof(at_org_code));
- llc.llc_snap.ether_type = htons(ETHERTYPE_AT);
- bcopy(&llc, mtod(m, caddr_t), LLC_SNAPFRAMELEN);
- type = 0;
- } else {
- type = htons(ETHERTYPE_AT);
- }
- ifa_free(&aa->aa_ifa);
- break;
- }
-#endif /* NETATALK */
-
case pseudo_AF_HDRCMPLT:
{
- struct ether_header *eh;
+ const struct ether_header *eh;
+
hdrcmplt = 1;
- eh = (struct ether_header *)dst->sa_data;
- bcopy((caddr_t)eh->ether_shost, (caddr_t)esrc, FDDI_ADDR_LEN);
+ eh = (const struct ether_header *)dst->sa_data;
+ bcopy(eh->ether_shost, esrc, FDDI_ADDR_LEN);
/* FALLTHROUGH */
}
case AF_UNSPEC:
{
- struct ether_header *eh;
+ const struct ether_header *eh;
+
loop_copy = -1;
- eh = (struct ether_header *)dst->sa_data;
- bcopy((caddr_t)eh->ether_dhost, (caddr_t)edst, FDDI_ADDR_LEN);
+ eh = (const struct ether_header *)dst->sa_data;
+ bcopy(eh->ether_dhost, edst, FDDI_ADDR_LEN);
if (*edst & 1)
m->m_flags |= (M_BCAST|M_MCAST);
type = eh->ether_type;
@@ -293,8 +236,8 @@ fddi_output(ifp, m, dst, ro)
*/
if (type != 0) {
struct llc *l;
- M_PREPEND(m, LLC_SNAPFRAMELEN, M_DONTWAIT);
- if (m == 0)
+ M_PREPEND(m, LLC_SNAPFRAMELEN, M_NOWAIT);
+ if (m == NULL)
senderr(ENOBUFS);
l = mtod(m, struct llc *);
l->llc_control = LLC_UI;
@@ -309,8 +252,8 @@ fddi_output(ifp, m, dst, ro)
* Add local net header. If no space in first mbuf,
* allocate another.
*/
- M_PREPEND(m, FDDI_HDR_LEN, M_DONTWAIT);
- if (m == 0)
+ M_PREPEND(m, FDDI_HDR_LEN, M_NOWAIT);
+ if (m == NULL)
senderr(ENOBUFS);
fh = mtod(m, struct fddi_header *);
fh->fddi_fc = FDDIFC_LLC_ASYNC|FDDIFC_LLC_PRIO4;
@@ -347,12 +290,12 @@ fddi_output(ifp, m, dst, ro)
error = (ifp->if_transmit)(ifp, m);
if (error)
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return (error);
bad:
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
if (m)
m_freem(m);
return (error);
@@ -376,24 +319,23 @@ fddi_input(ifp, m)
*/
if ((m->m_flags & M_PKTHDR) == 0) {
if_printf(ifp, "discard frame w/o packet header\n");
- ifp->if_ierrors++;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
m_freem(m);
return;
}
if (m->m_pkthdr.rcvif == NULL) {
if_printf(ifp, "discard frame w/o interface pointer\n");
- ifp->if_ierrors++;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
m_freem(m);
return;
}
m = m_pullup(m, FDDI_HDR_LEN);
if (m == NULL) {
- ifp->if_ierrors++;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
goto dropanyway;
}
fh = mtod(m, struct fddi_header *);
- m->m_pkthdr.header = (void *)fh;
/*
* Discard packet if interface is not up.
@@ -422,7 +364,7 @@ fddi_input(ifp, m)
/*
* Update interface statistics.
*/
- ifp->if_ibytes += m->m_pkthdr.len;
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
getmicrotime(&ifp->if_lastchange);
/*
@@ -443,7 +385,7 @@ fddi_input(ifp, m)
m->m_flags |= M_BCAST;
else
m->m_flags |= M_MCAST;
- ifp->if_imcasts++;
+ if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
}
#ifdef M_LINK0
@@ -461,7 +403,7 @@ fddi_input(ifp, m)
m = m_pullup(m, LLC_SNAPFRAMELEN);
if (m == 0) {
- ifp->if_ierrors++;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
goto dropanyway;
}
l = mtod(m, struct llc *);
@@ -472,30 +414,13 @@ fddi_input(ifp, m)
u_int16_t type;
if ((l->llc_control != LLC_UI) ||
(l->llc_ssap != LLC_SNAP_LSAP)) {
- ifp->if_noproto++;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
goto dropanyway;
}
-#ifdef NETATALK
- if (bcmp(&(l->llc_snap.org_code)[0], at_org_code,
- sizeof(at_org_code)) == 0 &&
- ntohs(l->llc_snap.ether_type) == ETHERTYPE_AT) {
- isr = NETISR_ATALK2;
- m_adj(m, LLC_SNAPFRAMELEN);
- break;
- }
-
- if (bcmp(&(l->llc_snap.org_code)[0], aarp_org_code,
- sizeof(aarp_org_code)) == 0 &&
- ntohs(l->llc_snap.ether_type) == ETHERTYPE_AARP) {
- m_adj(m, LLC_SNAPFRAMELEN);
- isr = NETISR_AARP;
- break;
- }
-#endif /* NETATALK */
if (l->llc_snap.org_code[0] != 0 ||
l->llc_snap.org_code[1] != 0 ||
l->llc_snap.org_code[2] != 0) {
- ifp->if_noproto++;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
goto dropanyway;
}
@@ -505,8 +430,6 @@ fddi_input(ifp, m)
switch (type) {
#ifdef INET
case ETHERTYPE_IP:
- if ((m = ip_fastforward(m)) == NULL)
- return;
isr = NETISR_IP;
break;
@@ -521,27 +444,14 @@ fddi_input(ifp, m)
isr = NETISR_IPV6;
break;
#endif
-#ifdef IPX
- case ETHERTYPE_IPX:
- isr = NETISR_IPX;
- break;
-#endif
#ifdef DECNET
case ETHERTYPE_DECNET:
isr = NETISR_DECNET;
break;
#endif
-#ifdef NETATALK
- case ETHERTYPE_AT:
- isr = NETISR_ATALK1;
- break;
- case ETHERTYPE_AARP:
- isr = NETISR_AARP;
- break;
-#endif /* NETATALK */
default:
/* printf("fddi_input: unknown protocol 0x%x\n", type); */
- ifp->if_noproto++;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
goto dropanyway;
}
break;
@@ -549,7 +459,7 @@ fddi_input(ifp, m)
default:
/* printf("fddi_input: unknown dsap 0x%x\n", l->llc_dsap); */
- ifp->if_noproto++;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
goto dropanyway;
}
M_SETFIB(m, ifp->if_fib);
@@ -557,7 +467,7 @@ fddi_input(ifp, m)
return;
dropanyway:
- ifp->if_iqdrops++;
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
if (m)
m_freem(m);
return;
@@ -643,31 +553,6 @@ fddi_ioctl (ifp, command, data)
arp_ifinit(ifp, ifa);
break;
#endif
-#ifdef IPX
- /*
- * XXX - This code is probably wrong
- */
- case AF_IPX: {
- struct ipx_addr *ina;
-
- ina = &(IA_SIPX(ifa)->sipx_addr);
-
- if (ipx_nullhost(*ina)) {
- ina->x_host = *(union ipx_host *)
- IF_LLADDR(ifp);
- } else {
- bcopy((caddr_t) ina->x_host.c_host,
- (caddr_t) IF_LLADDR(ifp),
- ETHER_ADDR_LEN);
- }
-
- /*
- * Set new address
- */
- ifp->if_init(ifp->if_softc);
- }
- break;
-#endif
default:
ifp->if_init(ifp->if_softc);
break;
@@ -724,7 +609,7 @@ fddi_resolvemulti(ifp, llsa, sa)
e_addr = LLADDR(sdl);
if ((e_addr[0] & 1) != 1)
return (EADDRNOTAVAIL);
- *llsa = 0;
+ *llsa = NULL;
return (0);
#ifdef INET
@@ -732,14 +617,7 @@ fddi_resolvemulti(ifp, llsa, sa)
sin = (struct sockaddr_in *)sa;
if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
return (EADDRNOTAVAIL);
- sdl = malloc(sizeof *sdl, M_IFMADDR,
- M_NOWAIT | M_ZERO);
- if (sdl == NULL)
- return (ENOMEM);
- sdl->sdl_len = sizeof *sdl;
- sdl->sdl_family = AF_LINK;
- sdl->sdl_index = ifp->if_index;
- sdl->sdl_type = IFT_FDDI;
+ sdl = link_init_sdl(ifp, *llsa, IFT_FDDI);
sdl->sdl_nlen = 0;
sdl->sdl_alen = FDDI_ADDR_LEN;
sdl->sdl_slen = 0;
@@ -758,19 +636,12 @@ fddi_resolvemulti(ifp, llsa, sa)
* (This is used for multicast routers.)
*/
ifp->if_flags |= IFF_ALLMULTI;
- *llsa = 0;
+ *llsa = NULL;
return (0);
}
if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
return (EADDRNOTAVAIL);
- sdl = malloc(sizeof *sdl, M_IFMADDR,
- M_NOWAIT | M_ZERO);
- if (sdl == NULL)
- return (ENOMEM);
- sdl->sdl_len = sizeof *sdl;
- sdl->sdl_family = AF_LINK;
- sdl->sdl_index = ifp->if_index;
- sdl->sdl_type = IFT_FDDI;
+ sdl = link_init_sdl(ifp, *llsa, IFT_FDDI);
sdl->sdl_nlen = 0;
sdl->sdl_alen = FDDI_ADDR_LEN;
sdl->sdl_slen = 0;
diff --git a/freebsd/sys/net/if_fwsubr.c b/freebsd/sys/net/if_fwsubr.c
index b022ecae..df4c38cf 100644
--- a/freebsd/sys/net/if_fwsubr.c
+++ b/freebsd/sys/net/if_fwsubr.c
@@ -45,6 +45,7 @@
#include <sys/sockio.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/netisr.h>
#include <net/route.h>
#include <net/if_llc.h>
@@ -77,7 +78,7 @@ struct fw_hwaddr firewire_broadcastaddr = {
};
static int
-firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+firewire_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
struct route *ro)
{
struct fw_com *fc = IFP2FWC(ifp);
@@ -91,7 +92,7 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
int unicast, dgl, foff;
static int next_dgl;
#if defined(INET) || defined(INET6)
- struct llentry *lle;
+ int is_gw = 0;
#endif
#ifdef MAC
@@ -106,6 +107,10 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
goto bad;
}
+#if defined(INET) || defined(INET6)
+ if (ro != NULL)
+ is_gw = (ro->ro_flags & RT_HAS_GW) != 0;
+#endif
/*
* For unicast, we make a tag to store the lladdr of the
* destination. This might not be the first time we have seen
@@ -129,7 +134,7 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
}
destfw = (struct fw_hwaddr *)(mtag + 1);
} else {
- destfw = 0;
+ destfw = NULL;
}
switch (dst->sa_family) {
@@ -141,7 +146,8 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
* doesn't fit into the arp model.
*/
if (unicast) {
- error = arpresolve(ifp, ro ? ro->ro_rt : NULL, m, dst, (u_char *) destfw, &lle);
+ error = arpresolve(ifp, is_gw, m, dst,
+ (u_char *) destfw, NULL, NULL);
if (error)
return (error == EWOULDBLOCK ? 0 : error);
}
@@ -170,10 +176,10 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
#ifdef INET6
case AF_INET6:
if (unicast) {
- error = nd6_storelladdr(fc->fc_ifp, m, dst,
- (u_char *) destfw, &lle);
+ error = nd6_resolve(fc->fc_ifp, is_gw, m, dst,
+ (u_char *) destfw, NULL, NULL);
if (error)
- return (error);
+ return (error == EWOULDBLOCK ? 0 : error);
}
type = ETHERTYPE_IPV6;
break;
@@ -231,7 +237,7 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
/*
* No fragmentation is necessary.
*/
- M_PREPEND(m, sizeof(uint32_t), M_DONTWAIT);
+ M_PREPEND(m, sizeof(uint32_t), M_NOWAIT);
if (!m) {
error = ENOBUFS;
goto bad;
@@ -263,17 +269,17 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
* Split off the tail segment from the
* datagram, copying our tags over.
*/
- mtail = m_split(m, fsize, M_DONTWAIT);
+ mtail = m_split(m, fsize, M_NOWAIT);
m_tag_copy_chain(mtail, m, M_NOWAIT);
} else {
- mtail = 0;
+ mtail = NULL;
}
/*
* Add our encapsulation header to this
* fragment and hand it off to the link.
*/
- M_PREPEND(m, 2*sizeof(uint32_t), M_DONTWAIT);
+ M_PREPEND(m, 2*sizeof(uint32_t), M_NOWAIT);
if (!m) {
error = ENOBUFS;
goto bad;
@@ -538,7 +544,7 @@ firewire_input(struct ifnet *ifp, struct mbuf *m, uint16_t src)
if (m->m_pkthdr.rcvif == NULL) {
if_printf(ifp, "discard frame w/o interface pointer\n");
- ifp->if_ierrors++;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
m_freem(m);
return;
}
@@ -583,7 +589,7 @@ firewire_input(struct ifnet *ifp, struct mbuf *m, uint16_t src)
return;
}
- ifp->if_ibytes += m->m_pkthdr.len;
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
/* Discard packet if interface is not up */
if ((ifp->if_flags & IFF_UP) == 0) {
@@ -592,13 +598,11 @@ firewire_input(struct ifnet *ifp, struct mbuf *m, uint16_t src)
}
if (m->m_flags & (M_BCAST|M_MCAST))
- ifp->if_imcasts++;
+ if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
switch (type) {
#ifdef INET
case ETHERTYPE_IP:
- if ((m = ip_fastforward(m)) == NULL)
- return;
isr = NETISR_IP;
break;
@@ -700,7 +704,7 @@ firewire_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
/*
* No mapping needed.
*/
- *llsa = 0;
+ *llsa = NULL;
return 0;
#ifdef INET
@@ -708,7 +712,7 @@ firewire_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
sin = (struct sockaddr_in *)sa;
if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
return EADDRNOTAVAIL;
- *llsa = 0;
+ *llsa = NULL;
return 0;
#endif
#ifdef INET6
@@ -721,12 +725,12 @@ firewire_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
* (This is used for multicast routers.)
*/
ifp->if_flags |= IFF_ALLMULTI;
- *llsa = 0;
+ *llsa = NULL;
return 0;
}
if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
return EADDRNOTAVAIL;
- *llsa = 0;
+ *llsa = NULL;
return 0;
#endif
diff --git a/freebsd/sys/net/if_gif.c b/freebsd/sys/net/if_gif.c
index 27cbbdda..e07a2da0 100644
--- a/freebsd/sys/net/if_gif.c
+++ b/freebsd/sys/net/if_gif.c
@@ -1,8 +1,5 @@
#include <machine/rtems-bsd-kernel-space.h>
-/* $FreeBSD$ */
-/* $KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $ */
-
/*-
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* All rights reserved.
@@ -30,8 +27,13 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
+ *
+ * $KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
@@ -39,11 +41,14 @@
#include <sys/systm.h>
#include <sys/jail.h>
#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/module.h>
+#include <sys/rmlock.h>
#include <sys/socket.h>
#include <sys/sockio.h>
+#include <sys/sx.h>
#include <rtems/bsd/sys/errno.h>
#include <sys/time.h>
#include <sys/sysctl.h>
@@ -55,6 +60,7 @@
#include <machine/cpu.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_clone.h>
#include <net/if_types.h>
#include <net/netisr.h>
@@ -65,9 +71,9 @@
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
+#include <netinet/ip_ecn.h>
#ifdef INET
#include <netinet/in_var.h>
-#include <netinet/in_gif.h>
#include <netinet/ip_var.h>
#endif /* INET */
@@ -77,9 +83,9 @@
#endif
#include <netinet6/in6_var.h>
#include <netinet/ip6.h>
+#include <netinet6/ip6_ecn.h>
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
-#include <netinet6/in6_gif.h>
#include <netinet6/ip6protosw.h>
#endif /* INET6 */
@@ -90,26 +96,41 @@
#include <security/mac/mac_framework.h>
-#define GIFNAME "gif"
+static const char gifname[] = "gif";
/*
- * gif_mtx protects the global gif_softc_list.
+ * gif_mtx protects a per-vnet gif_softc_list.
*/
-static struct mtx gif_mtx;
+static VNET_DEFINE(struct mtx, gif_mtx);
+#define V_gif_mtx VNET(gif_mtx)
static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
static VNET_DEFINE(LIST_HEAD(, gif_softc), gif_softc_list);
#define V_gif_softc_list VNET(gif_softc_list)
+static struct sx gif_ioctl_sx;
+SX_SYSINIT(gif_ioctl_sx, &gif_ioctl_sx, "gif_ioctl");
+
+#define GIF_LIST_LOCK_INIT(x) mtx_init(&V_gif_mtx, "gif_mtx", \
+ NULL, MTX_DEF)
+#define GIF_LIST_LOCK_DESTROY(x) mtx_destroy(&V_gif_mtx)
+#define GIF_LIST_LOCK(x) mtx_lock(&V_gif_mtx)
+#define GIF_LIST_UNLOCK(x) mtx_unlock(&V_gif_mtx)
void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af);
void (*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af);
void (*ng_gif_attach_p)(struct ifnet *ifp);
void (*ng_gif_detach_p)(struct ifnet *ifp);
-static void gif_start(struct ifnet *);
+static int gif_check_nesting(struct ifnet *, struct mbuf *);
+static int gif_set_tunnel(struct ifnet *, struct sockaddr *,
+ struct sockaddr *);
+static void gif_delete_tunnel(struct ifnet *);
+static int gif_ioctl(struct ifnet *, u_long, caddr_t);
+static int gif_transmit(struct ifnet *, struct mbuf *);
+static void gif_qflush(struct ifnet *);
static int gif_clone_create(struct if_clone *, int, caddr_t);
static void gif_clone_destroy(struct ifnet *);
-
-IFC_SIMPLE_DECLARE(gif, 0);
+static VNET_DEFINE(struct if_clone *, gif_cloner);
+#define V_gif_cloner VNET(gif_cloner)
static int gifmodevent(module_t, int, void *);
@@ -129,7 +150,7 @@ static SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0,
#endif
static VNET_DEFINE(int, max_gif_nesting) = MAX_GIF_NEST;
#define V_max_gif_nesting VNET(max_gif_nesting)
-SYSCTL_VNET_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_RW,
+SYSCTL_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(max_gif_nesting), 0, "Max nested tunnels");
/*
@@ -143,22 +164,12 @@ static VNET_DEFINE(int, parallel_tunnels) = 1;
static VNET_DEFINE(int, parallel_tunnels) = 0;
#endif
#define V_parallel_tunnels VNET(parallel_tunnels)
-SYSCTL_VNET_INT(_net_link_gif, OID_AUTO, parallel_tunnels, CTLFLAG_RW,
- &VNET_NAME(parallel_tunnels), 0, "Allow parallel tunnels?");
-
-/* copy from src/sys/net/if_ethersubr.c */
-static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
- { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
-#ifndef ETHER_IS_BROADCAST
-#define ETHER_IS_BROADCAST(addr) \
- (bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0)
-#endif
+SYSCTL_INT(_net_link_gif, OID_AUTO, parallel_tunnels,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(parallel_tunnels), 0,
+ "Allow parallel tunnels?");
static int
-gif_clone_create(ifc, unit, params)
- struct if_clone *ifc;
- int unit;
- caddr_t params;
+gif_clone_create(struct if_clone *ifc, int unit, caddr_t params)
{
struct gif_softc *sc;
@@ -169,18 +180,9 @@ gif_clone_create(ifc, unit, params)
sc->gif_fibnum = BSD_DEFAULT_FIB;
#endif /* __rtems__ */
GIF2IFP(sc) = if_alloc(IFT_GIF);
- if (GIF2IFP(sc) == NULL) {
- free(sc, M_GIF);
- return (ENOSPC);
- }
-
GIF_LOCK_INIT(sc);
-
GIF2IFP(sc)->if_softc = sc;
- if_initname(GIF2IFP(sc), ifc->ifc_name, unit);
-
- sc->encap_cookie4 = sc->encap_cookie6 = NULL;
- sc->gif_options = GIF_ACCEPT_REVETHIP;
+ if_initname(GIF2IFP(sc), gifname, unit);
GIF2IFP(sc)->if_addrlen = 0;
GIF2IFP(sc)->if_mtu = GIF_MTU;
@@ -190,56 +192,42 @@ gif_clone_create(ifc, unit, params)
GIF2IFP(sc)->if_flags |= IFF_LINK2;
#endif
GIF2IFP(sc)->if_ioctl = gif_ioctl;
- GIF2IFP(sc)->if_start = gif_start;
+ GIF2IFP(sc)->if_transmit = gif_transmit;
+ GIF2IFP(sc)->if_qflush = gif_qflush;
GIF2IFP(sc)->if_output = gif_output;
- GIF2IFP(sc)->if_snd.ifq_maxlen = ifqmaxlen;
+ GIF2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
+ GIF2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
if_attach(GIF2IFP(sc));
bpfattach(GIF2IFP(sc), DLT_NULL, sizeof(u_int32_t));
if (ng_gif_attach_p != NULL)
(*ng_gif_attach_p)(GIF2IFP(sc));
- mtx_lock(&gif_mtx);
+ GIF_LIST_LOCK();
LIST_INSERT_HEAD(&V_gif_softc_list, sc, gif_list);
- mtx_unlock(&gif_mtx);
-
+ GIF_LIST_UNLOCK();
return (0);
}
static void
-gif_clone_destroy(ifp)
- struct ifnet *ifp;
+gif_clone_destroy(struct ifnet *ifp)
{
-#if defined(INET) || defined(INET6)
- int err;
-#endif
- struct gif_softc *sc = ifp->if_softc;
-
- mtx_lock(&gif_mtx);
- LIST_REMOVE(sc, gif_list);
- mtx_unlock(&gif_mtx);
+ struct gif_softc *sc;
+ sx_xlock(&gif_ioctl_sx);
+ sc = ifp->if_softc;
gif_delete_tunnel(ifp);
-#ifdef INET6
- if (sc->encap_cookie6 != NULL) {
- err = encap_detach(sc->encap_cookie6);
- KASSERT(err == 0, ("Unexpected error detaching encap_cookie6"));
- }
-#endif
-#ifdef INET
- if (sc->encap_cookie4 != NULL) {
- err = encap_detach(sc->encap_cookie4);
- KASSERT(err == 0, ("Unexpected error detaching encap_cookie4"));
- }
-#endif
-
+ GIF_LIST_LOCK();
+ LIST_REMOVE(sc, gif_list);
+ GIF_LIST_UNLOCK();
if (ng_gif_detach_p != NULL)
(*ng_gif_detach_p)(ifp);
bpfdetach(ifp);
if_detach(ifp);
- if_free(ifp);
+ ifp->if_softc = NULL;
+ sx_xunlock(&gif_ioctl_sx);
+ if_free(ifp);
GIF_LOCK_DESTROY(sc);
-
free(sc, M_GIF);
}
@@ -248,31 +236,35 @@ vnet_gif_init(const void *unused __unused)
{
LIST_INIT(&V_gif_softc_list);
+ GIF_LIST_LOCK_INIT();
+ V_gif_cloner = if_clone_simple(gifname, gif_clone_create,
+ gif_clone_destroy, 0);
}
-VNET_SYSINIT(vnet_gif_init, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, vnet_gif_init,
- NULL);
+VNET_SYSINIT(vnet_gif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_gif_init, NULL);
+
+static void
+vnet_gif_uninit(const void *unused __unused)
+{
+
+ if_clone_detach(V_gif_cloner);
+ GIF_LIST_LOCK_DESTROY();
+}
+VNET_SYSUNINIT(vnet_gif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_gif_uninit, NULL);
static int
-gifmodevent(mod, type, data)
- module_t mod;
- int type;
- void *data;
+gifmodevent(module_t mod, int type, void *data)
{
switch (type) {
case MOD_LOAD:
- mtx_init(&gif_mtx, "gif_mtx", NULL, MTX_DEF);
- if_clone_attach(&gif_cloner);
- break;
-
case MOD_UNLOAD:
- if_clone_detach(&gif_cloner);
- mtx_destroy(&gif_mtx);
break;
default:
- return EOPNOTSUPP;
+ return (EOPNOTSUPP);
}
- return 0;
+ return (0);
}
static moduledata_t gif_mod = {
@@ -285,219 +277,257 @@ DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
MODULE_VERSION(if_gif, 1);
int
-gif_encapcheck(m, off, proto, arg)
- const struct mbuf *m;
- int off;
- int proto;
- void *arg;
+gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
{
- struct ip ip;
+ GIF_RLOCK_TRACKER;
+ const struct ip *ip;
struct gif_softc *sc;
+ int ret;
sc = (struct gif_softc *)arg;
- if (sc == NULL)
- return 0;
+ if (sc == NULL || (GIF2IFP(sc)->if_flags & IFF_UP) == 0)
+ return (0);
- if ((GIF2IFP(sc)->if_flags & IFF_UP) == 0)
- return 0;
+ ret = 0;
+ GIF_RLOCK(sc);
/* no physical address */
- if (!sc->gif_psrc || !sc->gif_pdst)
- return 0;
+ if (sc->gif_family == 0)
+ goto done;
switch (proto) {
#ifdef INET
case IPPROTO_IPV4:
- break;
#endif
#ifdef INET6
case IPPROTO_IPV6:
- break;
#endif
case IPPROTO_ETHERIP:
break;
-
default:
- return 0;
+ goto done;
}
/* Bail on short packets */
- if (m->m_pkthdr.len < sizeof(ip))
- return 0;
+ M_ASSERTPKTHDR(m);
+ if (m->m_pkthdr.len < sizeof(struct ip))
+ goto done;
- m_copydata(m, 0, sizeof(ip), (caddr_t)&ip);
-
- switch (ip.ip_v) {
+ ip = mtod(m, const struct ip *);
+ switch (ip->ip_v) {
#ifdef INET
case 4:
- if (sc->gif_psrc->sa_family != AF_INET ||
- sc->gif_pdst->sa_family != AF_INET)
- return 0;
- return gif_encapcheck4(m, off, proto, arg);
+ if (sc->gif_family != AF_INET)
+ goto done;
+ ret = in_gif_encapcheck(m, off, proto, arg);
+ break;
#endif
#ifdef INET6
case 6:
if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
- return 0;
- if (sc->gif_psrc->sa_family != AF_INET6 ||
- sc->gif_pdst->sa_family != AF_INET6)
- return 0;
- return gif_encapcheck6(m, off, proto, arg);
+ goto done;
+ if (sc->gif_family != AF_INET6)
+ goto done;
+ ret = in6_gif_encapcheck(m, off, proto, arg);
+ break;
#endif
- default:
- return 0;
}
+done:
+ GIF_RUNLOCK(sc);
+ return (ret);
}
-static void
-gif_start(struct ifnet *ifp)
+static int
+gif_transmit(struct ifnet *ifp, struct mbuf *m)
{
struct gif_softc *sc;
- struct mbuf *m;
-
- sc = ifp->if_softc;
-
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
- for (;;) {
- IFQ_DEQUEUE(&ifp->if_snd, m);
- if (m == 0)
- break;
-
- gif_output(ifp, m, sc->gif_pdst, NULL);
-
- }
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
-
- return;
-}
-
-int
-gif_output(ifp, m, dst, ro)
- struct ifnet *ifp;
- struct mbuf *m;
- struct sockaddr *dst;
- struct route *ro;
-{
- struct gif_softc *sc = ifp->if_softc;
- struct m_tag *mtag;
- int error = 0;
- int gif_called;
- u_int32_t af;
+ struct etherip_header *eth;
+#ifdef INET
+ struct ip *ip;
+#endif
+#ifdef INET6
+ struct ip6_hdr *ip6;
+ uint32_t t;
+#endif
+ uint32_t af;
+ uint8_t proto, ecn;
+ int error;
#ifdef MAC
error = mac_ifnet_check_transmit(ifp, m);
if (error) {
m_freem(m);
- goto end;
+ goto err;
}
#endif
-
- /*
- * gif may cause infinite recursion calls when misconfigured.
- * We'll prevent this by detecting loops.
- *
- * High nesting level may cause stack exhaustion.
- * We'll prevent this by introducing upper limit.
- */
- gif_called = 1;
- mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, NULL);
- while (mtag != NULL) {
- if (*(struct ifnet **)(mtag + 1) == ifp) {
- log(LOG_NOTICE,
- "gif_output: loop detected on %s\n",
- (*(struct ifnet **)(mtag + 1))->if_xname);
- m_freem(m);
- error = EIO; /* is there better errno? */
- goto end;
- }
- mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, mtag);
- gif_called++;
- }
- if (gif_called > V_max_gif_nesting) {
- log(LOG_NOTICE,
- "gif_output: recursively called too many times(%d)\n",
- gif_called);
- m_freem(m);
- error = EIO; /* is there better errno? */
- goto end;
- }
- mtag = m_tag_alloc(MTAG_GIF, MTAG_GIF_CALLED, sizeof(struct ifnet *),
- M_NOWAIT);
- if (mtag == NULL) {
- m_freem(m);
- error = ENOMEM;
- goto end;
- }
- *(struct ifnet **)(mtag + 1) = ifp;
- m_tag_prepend(m, mtag);
-
- m->m_flags &= ~(M_BCAST|M_MCAST);
-
- GIF_LOCK(sc);
-
- if (!(ifp->if_flags & IFF_UP) ||
- sc->gif_psrc == NULL || sc->gif_pdst == NULL) {
- GIF_UNLOCK(sc);
+ error = ENETDOWN;
+ sc = ifp->if_softc;
+ if ((ifp->if_flags & IFF_MONITOR) != 0 ||
+ (ifp->if_flags & IFF_UP) == 0 ||
+ sc->gif_family == 0 ||
+ (error = gif_check_nesting(ifp, m)) != 0) {
m_freem(m);
- error = ENETDOWN;
- goto end;
- }
-
- /* BPF writes need to be handled specially. */
- if (dst->sa_family == AF_UNSPEC) {
- bcopy(dst->sa_data, &af, sizeof(af));
- dst->sa_family = af;
+ goto err;
}
-
- af = dst->sa_family;
- BPF_MTAP2(ifp, &af, sizeof(af), m);
- ifp->if_opackets++;
- ifp->if_obytes += m->m_pkthdr.len;
-
- /* override to IPPROTO_ETHERIP for bridged traffic */
+ /* Now pull back the af that we stashed in the csum_data. */
if (ifp->if_bridge)
af = AF_LINK;
-
+ else
+ af = m->m_pkthdr.csum_data;
+ m->m_flags &= ~(M_BCAST|M_MCAST);
M_SETFIB(m, sc->gif_fibnum);
+ BPF_MTAP2(ifp, &af, sizeof(af), m);
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
/* inner AF-specific encapsulation */
-
+ ecn = 0;
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ proto = IPPROTO_IPV4;
+ if (m->m_len < sizeof(struct ip))
+ m = m_pullup(m, sizeof(struct ip));
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto err;
+ }
+ ip = mtod(m, struct ip *);
+ ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
+ ECN_NOCARE, &ecn, &ip->ip_tos);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ proto = IPPROTO_IPV6;
+ if (m->m_len < sizeof(struct ip6_hdr))
+ m = m_pullup(m, sizeof(struct ip6_hdr));
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto err;
+ }
+ t = 0;
+ ip6 = mtod(m, struct ip6_hdr *);
+ ip6_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
+ ECN_NOCARE, &t, &ip6->ip6_flow);
+ ecn = (ntohl(t) >> 20) & 0xff;
+ break;
+#endif
+ case AF_LINK:
+ proto = IPPROTO_ETHERIP;
+ M_PREPEND(m, sizeof(struct etherip_header), M_NOWAIT);
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto err;
+ }
+ eth = mtod(m, struct etherip_header *);
+ eth->eip_resvh = 0;
+ eth->eip_ver = ETHERIP_VERSION;
+ eth->eip_resvl = 0;
+ break;
+ default:
+ error = EAFNOSUPPORT;
+ m_freem(m);
+ goto err;
+ }
/* XXX should we check if our outer source is legal? */
-
/* dispatch to output logic based on outer AF */
- switch (sc->gif_psrc->sa_family) {
+ switch (sc->gif_family) {
#ifdef INET
case AF_INET:
- error = in_gif_output(ifp, af, m);
+ error = in_gif_output(ifp, m, proto, ecn);
break;
#endif
#ifdef INET6
case AF_INET6:
- error = in6_gif_output(ifp, af, m);
+ error = in6_gif_output(ifp, m, proto, ecn);
break;
#endif
default:
- m_freem(m);
- error = ENETDOWN;
+ m_freem(m);
}
-
- GIF_UNLOCK(sc);
- end:
+err:
if (error)
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return (error);
}
+static void
+gif_qflush(struct ifnet *ifp __unused)
+{
+
+}
+
+#define MTAG_GIF 1080679712
+static int
+gif_check_nesting(struct ifnet *ifp, struct mbuf *m)
+{
+ struct m_tag *mtag;
+ int count;
+
+ /*
+ * gif may cause infinite recursion calls when misconfigured.
+ * We'll prevent this by detecting loops.
+ *
+ * High nesting level may cause stack exhaustion.
+ * We'll prevent this by introducing upper limit.
+ */
+ count = 1;
+ mtag = NULL;
+ while ((mtag = m_tag_locate(m, MTAG_GIF, 0, mtag)) != NULL) {
+ if (*(struct ifnet **)(mtag + 1) == ifp) {
+ log(LOG_NOTICE, "%s: loop detected\n", if_name(ifp));
+ return (EIO);
+ }
+ count++;
+ }
+ if (count > V_max_gif_nesting) {
+ log(LOG_NOTICE,
+ "%s: if_output recursively called too many times(%d)\n",
+ if_name(ifp), count);
+ return (EIO);
+ }
+ mtag = m_tag_alloc(MTAG_GIF, 0, sizeof(struct ifnet *), M_NOWAIT);
+ if (mtag == NULL)
+ return (ENOMEM);
+ *(struct ifnet **)(mtag + 1) = ifp;
+ m_tag_prepend(m, mtag);
+ return (0);
+}
+
+int
+gif_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
+{
+ uint32_t af;
+
+ if (dst->sa_family == AF_UNSPEC)
+ bcopy(dst->sa_data, &af, sizeof(af));
+ else
+ af = dst->sa_family;
+ /*
+ * Now save the af in the inbound pkt csum data, this is a cheat since
+ * we are using the inbound csum_data field to carry the af over to
+ * the gif_transmit() routine, avoiding using yet another mtag.
+ */
+ m->m_pkthdr.csum_data = af;
+ return (ifp->if_transmit(ifp, m));
+}
+
void
-gif_input(m, af, ifp)
- struct mbuf *m;
- int af;
- struct ifnet *ifp;
+gif_input(struct mbuf *m, struct ifnet *ifp, int proto, uint8_t ecn)
{
- int isr, n;
- struct gif_softc *sc;
struct etherip_header *eip;
+#ifdef INET
+ struct ip *ip;
+#endif
+#ifdef INET6
+ struct ip6_hdr *ip6;
+ uint32_t t;
+#endif
+ struct gif_softc *sc;
struct ether_header *eh;
struct ifnet *oldifp;
+ int isr, n, af;
if (ifp == NULL) {
/* just in case */
@@ -506,20 +536,67 @@ gif_input(m, af, ifp)
}
sc = ifp->if_softc;
m->m_pkthdr.rcvif = ifp;
+ m_clrprotoflags(m);
+ switch (proto) {
+#ifdef INET
+ case IPPROTO_IPV4:
+ af = AF_INET;
+ if (m->m_len < sizeof(struct ip))
+ m = m_pullup(m, sizeof(struct ip));
+ if (m == NULL)
+ goto drop;
+ ip = mtod(m, struct ip *);
+ if (ip_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
+ ECN_NOCARE, &ecn, &ip->ip_tos) == 0) {
+ m_freem(m);
+ goto drop;
+ }
+ break;
+#endif
+#ifdef INET6
+ case IPPROTO_IPV6:
+ af = AF_INET6;
+ if (m->m_len < sizeof(struct ip6_hdr))
+ m = m_pullup(m, sizeof(struct ip6_hdr));
+ if (m == NULL)
+ goto drop;
+ t = htonl((uint32_t)ecn << 20);
+ ip6 = mtod(m, struct ip6_hdr *);
+ if (ip6_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
+ ECN_NOCARE, &t, &ip6->ip6_flow) == 0) {
+ m_freem(m);
+ goto drop;
+ }
+ break;
+#endif
+ case IPPROTO_ETHERIP:
+ af = AF_LINK;
+ break;
+ default:
+ m_freem(m);
+ goto drop;
+ }
#ifdef MAC
mac_ifnet_create_mbuf(ifp, m);
#endif
if (bpf_peers_present(ifp->if_bpf)) {
- u_int32_t af1 = af;
+ uint32_t af1 = af;
bpf_mtap2(ifp->if_bpf, &af1, sizeof(af1), m);
}
+ if ((ifp->if_flags & IFF_MONITOR) != 0) {
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
+ m_freem(m);
+ return;
+ }
+
if (ng_gif_input_p != NULL) {
(*ng_gif_input_p)(ifp, &m, af);
if (m == NULL)
- return;
+ goto drop;
}
/*
@@ -546,34 +623,15 @@ gif_input(m, af, ifp)
#endif
case AF_LINK:
n = sizeof(struct etherip_header) + sizeof(struct ether_header);
- if (n > m->m_len) {
+ if (n > m->m_len)
m = m_pullup(m, n);
- if (m == NULL) {
- ifp->if_ierrors++;
- return;
- }
- }
-
+ if (m == NULL)
+ goto drop;
eip = mtod(m, struct etherip_header *);
- /*
- * GIF_ACCEPT_REVETHIP (enabled by default) intentionally
- * accepts an EtherIP packet with revered version field in
- * the header. This is a knob for backward compatibility
- * with FreeBSD 7.2R or prior.
- */
- if (sc->gif_options & GIF_ACCEPT_REVETHIP) {
- if (eip->eip_resvl != ETHERIP_VERSION
- && eip->eip_ver != ETHERIP_VERSION) {
- /* discard unknown versions */
- m_freem(m);
- return;
- }
- } else {
- if (eip->eip_ver != ETHERIP_VERSION) {
- /* discard unknown versions */
- m_freem(m);
- return;
- }
+ if (eip->eip_ver != ETHERIP_VERSION) {
+ /* discard unknown versions */
+ m_freem(m);
+ goto drop;
}
m_adj(m, sizeof(struct etherip_header));
@@ -588,7 +646,7 @@ gif_input(m, af, ifp)
m->m_flags |= M_BCAST;
else
m->m_flags |= M_MCAST;
- ifp->if_imcasts++;
+ if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
}
BRIDGE_INPUT(ifp, m);
@@ -613,59 +671,61 @@ gif_input(m, af, ifp)
return;
}
- ifp->if_ipackets++;
- ifp->if_ibytes += m->m_pkthdr.len;
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
M_SETFIB(m, ifp->if_fib);
netisr_dispatch(isr, m);
+ return;
+drop:
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
}
/* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
int
-gif_ioctl(ifp, cmd, data)
- struct ifnet *ifp;
- u_long cmd;
- caddr_t data;
+gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
- struct gif_softc *sc = ifp->if_softc;
- struct ifreq *ifr = (struct ifreq*)data;
- int error = 0, size;
- u_int options;
+ GIF_RLOCK_TRACKER;
+ struct ifreq *ifr = (struct ifreq*)data;
struct sockaddr *dst, *src;
-#ifdef SIOCSIFMTU /* xxx */
- u_long mtu;
+ struct gif_softc *sc;
+#ifdef INET
+ struct sockaddr_in *sin = NULL;
#endif
+#ifdef INET6
+ struct sockaddr_in6 *sin6 = NULL;
+#endif
+ u_int options;
+ int error;
switch (cmd) {
case SIOCSIFADDR:
ifp->if_flags |= IFF_UP;
- break;
-
- case SIOCSIFDSTADDR:
- break;
-
case SIOCADDMULTI:
case SIOCDELMULTI:
- break;
-
-#ifdef SIOCSIFMTU /* xxx */
case SIOCGIFMTU:
- break;
-
+ case SIOCSIFFLAGS:
+ return (0);
case SIOCSIFMTU:
- mtu = ifr->ifr_mtu;
- if (mtu < GIF_MTU_MIN || mtu > GIF_MTU_MAX)
+ if (ifr->ifr_mtu < GIF_MTU_MIN ||
+ ifr->ifr_mtu > GIF_MTU_MAX)
return (EINVAL);
- ifp->if_mtu = mtu;
- break;
-#endif /* SIOCSIFMTU */
-
-#ifdef INET
+ else
+ ifp->if_mtu = ifr->ifr_mtu;
+ return (0);
+ }
+ sx_xlock(&gif_ioctl_sx);
+ sc = ifp->if_softc;
+ if (sc == NULL) {
+ error = ENXIO;
+ goto bad;
+ }
+ error = 0;
+ switch (cmd) {
case SIOCSIFPHYADDR:
-#endif
#ifdef INET6
case SIOCSIFPHYADDR_IN6:
-#endif /* INET6 */
- case SIOCSLIFPHYADDR:
+#endif
+ error = EINVAL;
switch (cmd) {
#ifdef INET
case SIOCSIFPHYADDR:
@@ -683,199 +743,169 @@ gif_ioctl(ifp, cmd, data)
&(((struct in6_aliasreq *)data)->ifra_dstaddr);
break;
#endif
- case SIOCSLIFPHYADDR:
- src = (struct sockaddr *)
- &(((struct if_laddrreq *)data)->addr);
- dst = (struct sockaddr *)
- &(((struct if_laddrreq *)data)->dstaddr);
- break;
default:
- return EINVAL;
+ goto bad;
}
-
/* sa_family must be equal */
- if (src->sa_family != dst->sa_family)
- return EINVAL;
+ if (src->sa_family != dst->sa_family ||
+ src->sa_len != dst->sa_len)
+ goto bad;
/* validate sa_len */
+ /* check sa_family looks sane for the cmd */
switch (src->sa_family) {
#ifdef INET
case AF_INET:
if (src->sa_len != sizeof(struct sockaddr_in))
- return EINVAL;
+ goto bad;
+ if (cmd != SIOCSIFPHYADDR) {
+ error = EAFNOSUPPORT;
+ goto bad;
+ }
+ if (satosin(src)->sin_addr.s_addr == INADDR_ANY ||
+ satosin(dst)->sin_addr.s_addr == INADDR_ANY) {
+ error = EADDRNOTAVAIL;
+ goto bad;
+ }
break;
#endif
#ifdef INET6
case AF_INET6:
if (src->sa_len != sizeof(struct sockaddr_in6))
- return EINVAL;
- break;
-#endif
- default:
- return EAFNOSUPPORT;
- }
- switch (dst->sa_family) {
-#ifdef INET
- case AF_INET:
- if (dst->sa_len != sizeof(struct sockaddr_in))
- return EINVAL;
- break;
-#endif
-#ifdef INET6
- case AF_INET6:
- if (dst->sa_len != sizeof(struct sockaddr_in6))
- return EINVAL;
+ goto bad;
+ if (cmd != SIOCSIFPHYADDR_IN6) {
+ error = EAFNOSUPPORT;
+ goto bad;
+ }
+ error = EADDRNOTAVAIL;
+ if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)
+ ||
+ IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr))
+ goto bad;
+ /*
+ * Check validity of the scope zone ID of the
+ * addresses, and convert it into the kernel
+ * internal form if necessary.
+ */
+ error = sa6_embedscope(satosin6(src), 0);
+ if (error != 0)
+ goto bad;
+ error = sa6_embedscope(satosin6(dst), 0);
+ if (error != 0)
+ goto bad;
break;
#endif
default:
- return EAFNOSUPPORT;
- }
-
- /* check sa_family looks sane for the cmd */
- switch (cmd) {
- case SIOCSIFPHYADDR:
- if (src->sa_family == AF_INET)
- break;
- return EAFNOSUPPORT;
-#ifdef INET6
- case SIOCSIFPHYADDR_IN6:
- if (src->sa_family == AF_INET6)
- break;
- return EAFNOSUPPORT;
-#endif /* INET6 */
- case SIOCSLIFPHYADDR:
- /* checks done in the above */
- break;
+ error = EAFNOSUPPORT;
+ goto bad;
}
-
- error = gif_set_tunnel(GIF2IFP(sc), src, dst);
+ error = gif_set_tunnel(ifp, src, dst);
break;
-
-#ifdef SIOCDIFPHYADDR
case SIOCDIFPHYADDR:
- gif_delete_tunnel(GIF2IFP(sc));
+ gif_delete_tunnel(ifp);
break;
-#endif
-
case SIOCGIFPSRCADDR:
+ case SIOCGIFPDSTADDR:
#ifdef INET6
case SIOCGIFPSRCADDR_IN6:
-#endif /* INET6 */
- if (sc->gif_psrc == NULL) {
+ case SIOCGIFPDSTADDR_IN6:
+#endif
+ if (sc->gif_family == 0) {
error = EADDRNOTAVAIL;
- goto bad;
+ break;
}
- src = sc->gif_psrc;
+ GIF_RLOCK(sc);
switch (cmd) {
#ifdef INET
case SIOCGIFPSRCADDR:
- dst = &ifr->ifr_addr;
- size = sizeof(ifr->ifr_addr);
+ case SIOCGIFPDSTADDR:
+ if (sc->gif_family != AF_INET) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ sin = (struct sockaddr_in *)&ifr->ifr_addr;
+ memset(sin, 0, sizeof(*sin));
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
break;
-#endif /* INET */
+#endif
#ifdef INET6
case SIOCGIFPSRCADDR_IN6:
- dst = (struct sockaddr *)
+ case SIOCGIFPDSTADDR_IN6:
+ if (sc->gif_family != AF_INET6) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ sin6 = (struct sockaddr_in6 *)
&(((struct in6_ifreq *)data)->ifr_addr);
- size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
+ memset(sin6, 0, sizeof(*sin6));
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_len = sizeof(*sin6);
break;
-#endif /* INET6 */
+#endif
default:
- error = EADDRNOTAVAIL;
- goto bad;
- }
- if (src->sa_len > size)
- return EINVAL;
- bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
-#ifdef INET6
- if (dst->sa_family == AF_INET6) {
- error = sa6_recoverscope((struct sockaddr_in6 *)dst);
- if (error != 0)
- return (error);
+ error = EAFNOSUPPORT;
}
+ if (error == 0) {
+ switch (cmd) {
+#ifdef INET
+ case SIOCGIFPSRCADDR:
+ sin->sin_addr = sc->gif_iphdr->ip_src;
+ break;
+ case SIOCGIFPDSTADDR:
+ sin->sin_addr = sc->gif_iphdr->ip_dst;
+ break;
#endif
- break;
-
- case SIOCGIFPDSTADDR:
#ifdef INET6
- case SIOCGIFPDSTADDR_IN6:
-#endif /* INET6 */
- if (sc->gif_pdst == NULL) {
- error = EADDRNOTAVAIL;
- goto bad;
+ case SIOCGIFPSRCADDR_IN6:
+ sin6->sin6_addr = sc->gif_ip6hdr->ip6_src;
+ break;
+ case SIOCGIFPDSTADDR_IN6:
+ sin6->sin6_addr = sc->gif_ip6hdr->ip6_dst;
+ break;
+#endif
+ }
}
- src = sc->gif_pdst;
+ GIF_RUNLOCK(sc);
+ if (error != 0)
+ break;
switch (cmd) {
#ifdef INET
+ case SIOCGIFPSRCADDR:
case SIOCGIFPDSTADDR:
- dst = &ifr->ifr_addr;
- size = sizeof(ifr->ifr_addr);
+ error = prison_if(curthread->td_ucred,
+ (struct sockaddr *)sin);
+ if (error != 0)
+ memset(sin, 0, sizeof(*sin));
break;
-#endif /* INET */
+#endif
#ifdef INET6
+ case SIOCGIFPSRCADDR_IN6:
case SIOCGIFPDSTADDR_IN6:
- dst = (struct sockaddr *)
- &(((struct in6_ifreq *)data)->ifr_addr);
- size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
- break;
-#endif /* INET6 */
- default:
- error = EADDRNOTAVAIL;
- goto bad;
- }
- if (src->sa_len > size)
- return EINVAL;
- error = prison_if(curthread->td_ucred, src);
- if (error != 0)
- return (error);
- error = prison_if(curthread->td_ucred, dst);
- if (error != 0)
- return (error);
- bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
-#ifdef INET6
- if (dst->sa_family == AF_INET6) {
- error = sa6_recoverscope((struct sockaddr_in6 *)dst);
+ error = prison_if(curthread->td_ucred,
+ (struct sockaddr *)sin6);
+ if (error == 0)
+ error = sa6_recoverscope(sin6);
if (error != 0)
- return (error);
- }
+ memset(sin6, 0, sizeof(*sin6));
#endif
- break;
-
- case SIOCGLIFPHYADDR:
- if (sc->gif_psrc == NULL || sc->gif_pdst == NULL) {
- error = EADDRNOTAVAIL;
- goto bad;
}
-
- /* copy src */
- src = sc->gif_psrc;
- dst = (struct sockaddr *)
- &(((struct if_laddrreq *)data)->addr);
- size = sizeof(((struct if_laddrreq *)data)->addr);
- if (src->sa_len > size)
- return EINVAL;
- bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
-
- /* copy dst */
- src = sc->gif_pdst;
- dst = (struct sockaddr *)
- &(((struct if_laddrreq *)data)->dstaddr);
- size = sizeof(((struct if_laddrreq *)data)->dstaddr);
- if (src->sa_len > size)
- return EINVAL;
- bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
break;
-
- case SIOCSIFFLAGS:
- /* if_ioctl() takes care of it */
+ case SIOCGTUNFIB:
+ ifr->ifr_fib = sc->gif_fibnum;
+ break;
+ case SIOCSTUNFIB:
+ if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
+ break;
+ if (ifr->ifr_fib >= rt_numfibs)
+ error = EINVAL;
+ else
+ sc->gif_fibnum = ifr->ifr_fib;
break;
-
case GIFGOPTS:
options = sc->gif_options;
- error = copyout(&options, ifr->ifr_data,
- sizeof(options));
+ error = copyout(&options, ifr->ifr_data, sizeof(options));
break;
-
case GIFSOPTS:
if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
break;
@@ -887,151 +917,154 @@ gif_ioctl(ifp, cmd, data)
else
sc->gif_options = options;
break;
-
default:
error = EINVAL;
break;
}
- bad:
- return error;
+bad:
+ sx_xunlock(&gif_ioctl_sx);
+ return (error);
}
-/*
- * XXXRW: There's a general event-ordering issue here: the code to check
- * if a given tunnel is already present happens before we perform a
- * potentially blocking setup of the tunnel. This code needs to be
- * re-ordered so that the check and replacement can be atomic using
- * a mutex.
- */
-int
-gif_set_tunnel(ifp, src, dst)
- struct ifnet *ifp;
- struct sockaddr *src;
- struct sockaddr *dst;
+static void
+gif_detach(struct gif_softc *sc)
{
- struct gif_softc *sc = ifp->if_softc;
- struct gif_softc *sc2;
- struct sockaddr *osrc, *odst, *sa;
- int error = 0;
-
- mtx_lock(&gif_mtx);
- LIST_FOREACH(sc2, &V_gif_softc_list, gif_list) {
- if (sc2 == sc)
- continue;
- if (!sc2->gif_pdst || !sc2->gif_psrc)
- continue;
- if (sc2->gif_pdst->sa_family != dst->sa_family ||
- sc2->gif_pdst->sa_len != dst->sa_len ||
- sc2->gif_psrc->sa_family != src->sa_family ||
- sc2->gif_psrc->sa_len != src->sa_len)
- continue;
-
- /*
- * Disallow parallel tunnels unless instructed
- * otherwise.
- */
- if (!V_parallel_tunnels &&
- bcmp(sc2->gif_pdst, dst, dst->sa_len) == 0 &&
- bcmp(sc2->gif_psrc, src, src->sa_len) == 0) {
- error = EADDRNOTAVAIL;
- mtx_unlock(&gif_mtx);
- goto bad;
- }
- /* XXX both end must be valid? (I mean, not 0.0.0.0) */
- }
- mtx_unlock(&gif_mtx);
+ sx_assert(&gif_ioctl_sx, SA_XLOCKED);
+ if (sc->gif_ecookie != NULL)
+ encap_detach(sc->gif_ecookie);
+ sc->gif_ecookie = NULL;
+}
+
+static int
+gif_attach(struct gif_softc *sc, int af)
+{
- /* XXX we can detach from both, but be polite just in case */
- if (sc->gif_psrc)
- switch (sc->gif_psrc->sa_family) {
+ sx_assert(&gif_ioctl_sx, SA_XLOCKED);
+ switch (af) {
#ifdef INET
- case AF_INET:
- (void)in_gif_detach(sc);
- break;
+ case AF_INET:
+ return (in_gif_attach(sc));
#endif
#ifdef INET6
- case AF_INET6:
- (void)in6_gif_detach(sc);
- break;
+ case AF_INET6:
+ return (in6_gif_attach(sc));
#endif
- }
-
- osrc = sc->gif_psrc;
- sa = (struct sockaddr *)malloc(src->sa_len, M_IFADDR, M_WAITOK);
- bcopy((caddr_t)src, (caddr_t)sa, src->sa_len);
- sc->gif_psrc = sa;
+ }
+ return (EAFNOSUPPORT);
+}
- odst = sc->gif_pdst;
- sa = (struct sockaddr *)malloc(dst->sa_len, M_IFADDR, M_WAITOK);
- bcopy((caddr_t)dst, (caddr_t)sa, dst->sa_len);
- sc->gif_pdst = sa;
+static int
+gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
+{
+ struct gif_softc *sc = ifp->if_softc;
+ struct gif_softc *tsc;
+#ifdef INET
+ struct ip *ip;
+#endif
+#ifdef INET6
+ struct ip6_hdr *ip6;
+#endif
+ void *hdr;
+ int error = 0;
- switch (sc->gif_psrc->sa_family) {
+ if (sc == NULL)
+ return (ENXIO);
+ /* Disallow parallel tunnels unless instructed otherwise. */
+ if (V_parallel_tunnels == 0) {
+ GIF_LIST_LOCK();
+ LIST_FOREACH(tsc, &V_gif_softc_list, gif_list) {
+ if (tsc == sc || tsc->gif_family != src->sa_family)
+ continue;
+#ifdef INET
+ if (tsc->gif_family == AF_INET &&
+ tsc->gif_iphdr->ip_src.s_addr ==
+ satosin(src)->sin_addr.s_addr &&
+ tsc->gif_iphdr->ip_dst.s_addr ==
+ satosin(dst)->sin_addr.s_addr) {
+ error = EADDRNOTAVAIL;
+ GIF_LIST_UNLOCK();
+ goto bad;
+ }
+#endif
+#ifdef INET6
+ if (tsc->gif_family == AF_INET6 &&
+ IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_src,
+ &satosin6(src)->sin6_addr) &&
+ IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_dst,
+ &satosin6(dst)->sin6_addr)) {
+ error = EADDRNOTAVAIL;
+ GIF_LIST_UNLOCK();
+ goto bad;
+ }
+#endif
+ }
+ GIF_LIST_UNLOCK();
+ }
+ switch (src->sa_family) {
#ifdef INET
case AF_INET:
- error = in_gif_attach(sc);
+ hdr = ip = malloc(sizeof(struct ip), M_GIF,
+ M_WAITOK | M_ZERO);
+ ip->ip_src.s_addr = satosin(src)->sin_addr.s_addr;
+ ip->ip_dst.s_addr = satosin(dst)->sin_addr.s_addr;
break;
#endif
#ifdef INET6
case AF_INET6:
- /*
- * Check validity of the scope zone ID of the addresses, and
- * convert it into the kernel internal form if necessary.
- */
- error = sa6_embedscope((struct sockaddr_in6 *)sc->gif_psrc, 0);
- if (error != 0)
- break;
- error = sa6_embedscope((struct sockaddr_in6 *)sc->gif_pdst, 0);
- if (error != 0)
- break;
- error = in6_gif_attach(sc);
+ hdr = ip6 = malloc(sizeof(struct ip6_hdr), M_GIF,
+ M_WAITOK | M_ZERO);
+ ip6->ip6_src = satosin6(src)->sin6_addr;
+ ip6->ip6_dst = satosin6(dst)->sin6_addr;
+ ip6->ip6_vfc = IPV6_VERSION;
break;
#endif
+ default:
+ return (EAFNOSUPPORT);
}
- if (error) {
- /* rollback */
- free((caddr_t)sc->gif_psrc, M_IFADDR);
- free((caddr_t)sc->gif_pdst, M_IFADDR);
- sc->gif_psrc = osrc;
- sc->gif_pdst = odst;
- goto bad;
- }
-
- if (osrc)
- free((caddr_t)osrc, M_IFADDR);
- if (odst)
- free((caddr_t)odst, M_IFADDR);
- bad:
- if (sc->gif_psrc && sc->gif_pdst)
+ if (sc->gif_family != src->sa_family)
+ gif_detach(sc);
+ if (sc->gif_family == 0 ||
+ sc->gif_family != src->sa_family)
+ error = gif_attach(sc, src->sa_family);
+
+ GIF_WLOCK(sc);
+ if (sc->gif_family != 0)
+ free(sc->gif_hdr, M_GIF);
+ sc->gif_family = src->sa_family;
+ sc->gif_hdr = hdr;
+ GIF_WUNLOCK(sc);
+#if defined(INET) || defined(INET6)
+bad:
+#endif
+ if (error == 0 && sc->gif_family != 0) {
ifp->if_drv_flags |= IFF_DRV_RUNNING;
- else
+ if_link_state_change(ifp, LINK_STATE_UP);
+ } else {
ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
-
- return error;
+ if_link_state_change(ifp, LINK_STATE_DOWN);
+ }
+ return (error);
}
-void
-gif_delete_tunnel(ifp)
- struct ifnet *ifp;
+static void
+gif_delete_tunnel(struct ifnet *ifp)
{
struct gif_softc *sc = ifp->if_softc;
+ int family;
- if (sc->gif_psrc) {
- free((caddr_t)sc->gif_psrc, M_IFADDR);
- sc->gif_psrc = NULL;
- }
- if (sc->gif_pdst) {
- free((caddr_t)sc->gif_pdst, M_IFADDR);
- sc->gif_pdst = NULL;
+ if (sc == NULL)
+ return;
+
+ GIF_WLOCK(sc);
+ family = sc->gif_family;
+ sc->gif_family = 0;
+ GIF_WUNLOCK(sc);
+ if (family != 0) {
+ gif_detach(sc);
+ free(sc->gif_hdr, M_GIF);
}
- /* it is safe to detach from both */
-#ifdef INET
- (void)in_gif_detach(sc);
-#endif
-#ifdef INET6
- (void)in6_gif_detach(sc);
-#endif
ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ if_link_state_change(ifp, LINK_STATE_DOWN);
}
diff --git a/freebsd/sys/net/if_gif.h b/freebsd/sys/net/if_gif.h
index a2f214c5..28da85bd 100644
--- a/freebsd/sys/net/if_gif.h
+++ b/freebsd/sys/net/if_gif.h
@@ -30,21 +30,17 @@
* SUCH DAMAGE.
*/
-/*
- * if_gif.h
- */
-
#ifndef _NET_IF_GIF_H_
#define _NET_IF_GIF_H_
-
#ifdef _KERNEL
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
#include <netinet/in.h>
-/* xxx sigh, why route have struct route instead of pointer? */
+struct ip;
+struct ip6_hdr;
struct encaptab;
extern void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp,
@@ -56,44 +52,44 @@ extern void (*ng_gif_attach_p)(struct ifnet *ifp);
extern void (*ng_gif_detach_p)(struct ifnet *ifp);
struct gif_softc {
- struct ifnet *gif_ifp;
- struct mtx gif_mtx;
- struct sockaddr *gif_psrc; /* Physical src addr */
- struct sockaddr *gif_pdst; /* Physical dst addr */
+ struct ifnet *gif_ifp;
+ struct rmlock gif_lock;
+ const struct encaptab *gif_ecookie;
+ int gif_family;
+ int gif_flags;
+ u_int gif_fibnum;
+ u_int gif_options;
+ void *gif_netgraph; /* netgraph node info */
union {
- struct route gifscr_ro; /* xxx */
+ void *hdr;
+ struct ip *iphdr;
#ifdef INET6
- struct route_in6 gifscr_ro6; /* xxx */
+ struct ip6_hdr *ip6hdr;
#endif
- } gifsc_gifscr;
- int gif_flags;
- u_int gif_fibnum;
- const struct encaptab *encap_cookie4;
- const struct encaptab *encap_cookie6;
- void *gif_netgraph; /* ng_gif(4) netgraph node info */
- u_int gif_options;
- LIST_ENTRY(gif_softc) gif_list; /* all gif's are linked */
+ } gif_uhdr;
+ LIST_ENTRY(gif_softc) gif_list; /* all gif's are linked */
};
#define GIF2IFP(sc) ((sc)->gif_ifp)
-#define GIF_LOCK_INIT(sc) mtx_init(&(sc)->gif_mtx, "gif softc", \
- NULL, MTX_DEF)
-#define GIF_LOCK_DESTROY(sc) mtx_destroy(&(sc)->gif_mtx)
-#define GIF_LOCK(sc) mtx_lock(&(sc)->gif_mtx)
-#define GIF_UNLOCK(sc) mtx_unlock(&(sc)->gif_mtx)
-#define GIF_LOCK_ASSERT(sc) mtx_assert(&(sc)->gif_mtx, MA_OWNED)
-
-#define gif_ro gifsc_gifscr.gifscr_ro
+#define GIF_LOCK_INIT(sc) rm_init(&(sc)->gif_lock, "gif softc")
+#define GIF_LOCK_DESTROY(sc) rm_destroy(&(sc)->gif_lock)
+#define GIF_RLOCK_TRACKER struct rm_priotracker gif_tracker
+#define GIF_RLOCK(sc) rm_rlock(&(sc)->gif_lock, &gif_tracker)
+#define GIF_RUNLOCK(sc) rm_runlock(&(sc)->gif_lock, &gif_tracker)
+#define GIF_RLOCK_ASSERT(sc) rm_assert(&(sc)->gif_lock, RA_RLOCKED)
+#define GIF_WLOCK(sc) rm_wlock(&(sc)->gif_lock)
+#define GIF_WUNLOCK(sc) rm_wunlock(&(sc)->gif_lock)
+#define GIF_WLOCK_ASSERT(sc) rm_assert(&(sc)->gif_lock, RA_WLOCKED)
+
+#define gif_iphdr gif_uhdr.iphdr
+#define gif_hdr gif_uhdr.hdr
#ifdef INET6
-#define gif_ro6 gifsc_gifscr.gifscr_ro6
+#define gif_ip6hdr gif_uhdr.ip6hdr
#endif
#define GIF_MTU (1280) /* Default MTU */
#define GIF_MTU_MIN (1280) /* Minimum MTU */
#define GIF_MTU_MAX (8192) /* Maximum MTU */
-#define MTAG_GIF 1080679712
-#define MTAG_GIF_CALLED 0
-
struct etherip_header {
#if BYTE_ORDER == LITTLE_ENDIAN
u_int eip_resvl:4, /* reserved */
@@ -111,20 +107,26 @@ struct etherip_header {
#define ETHERIP_ALIGN 2
/* Prototypes */
-void gif_input(struct mbuf *, int, struct ifnet *);
-int gif_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+void gif_input(struct mbuf *, struct ifnet *, int, uint8_t);
+int gif_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
struct route *);
-int gif_ioctl(struct ifnet *, u_long, caddr_t);
-int gif_set_tunnel(struct ifnet *, struct sockaddr *, struct sockaddr *);
-void gif_delete_tunnel(struct ifnet *);
int gif_encapcheck(const struct mbuf *, int, int, void *);
+#ifdef INET
+int in_gif_output(struct ifnet *, struct mbuf *, int, uint8_t);
+int in_gif_encapcheck(const struct mbuf *, int, int, void *);
+int in_gif_attach(struct gif_softc *);
+#endif
+#ifdef INET6
+int in6_gif_output(struct ifnet *, struct mbuf *, int, uint8_t);
+int in6_gif_encapcheck(const struct mbuf *, int, int, void *);
+int in6_gif_attach(struct gif_softc *);
+#endif
#endif /* _KERNEL */
#define GIFGOPTS _IOWR('i', 150, struct ifreq)
#define GIFSOPTS _IOW('i', 151, struct ifreq)
-#define GIF_ACCEPT_REVETHIP 0x0001
-#define GIF_SEND_REVETHIP 0x0010
-#define GIF_OPTMASK (GIF_ACCEPT_REVETHIP|GIF_SEND_REVETHIP)
+#define GIF_IGNORE_SOURCE 0x0002
+#define GIF_OPTMASK (GIF_IGNORE_SOURCE)
#endif /* _NET_IF_GIF_H_ */
diff --git a/freebsd/sys/net/if_gre.c b/freebsd/sys/net/if_gre.c
index b7e0bd15..68b515ea 100644
--- a/freebsd/sys/net/if_gre.c
+++ b/freebsd/sys/net/if_gre.c
@@ -1,10 +1,8 @@
#include <machine/rtems-bsd-kernel-space.h>
-/* $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $ */
-/* $FreeBSD$ */
-
/*-
* Copyright (c) 1998 The NetBSD Foundation, Inc.
+ * Copyright (c) 2014 Andrey V. Elsukov <ae@FreeBSD.org>
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -32,24 +30,20 @@
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $
*/
-/*
- * Encapsulate L3 protocols into IP
- * See RFC 2784 (successor of RFC 1701 and 1702) for more details.
- * If_gre is compatible with Cisco GRE tunnels, so you can
- * have a NetBSD box as the other end of a tunnel interface of a Cisco
- * router. See gre(4) for more details.
- * Also supported: IP in IP encaps (proto 55) as of RFC 2004
- */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
-#include <rtems/bsd/local/opt_atalk.h>
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/sys/param.h>
#include <sys/jail.h>
#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
#include <sys/libkern.h>
#include <sys/malloc.h>
#include <sys/module.h>
@@ -57,97 +51,76 @@
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
+#include <sys/rmlock.h>
#include <sys/socket.h>
#include <sys/sockio.h>
+#include <sys/sx.h>
#include <sys/sysctl.h>
+#include <sys/syslog.h>
#include <sys/systm.h>
#include <net/ethernet.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_clone.h>
#include <net/if_types.h>
-#include <net/route.h>
+#include <net/netisr.h>
#include <net/vnet.h>
+#include <net/route.h>
-#ifdef INET
#include <netinet/in.h>
+#ifdef INET
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
-#include <netinet/ip_gre.h>
#include <netinet/ip_var.h>
-#include <netinet/ip_encap.h>
-#else
-#error "Huh? if_gre without inet?"
#endif
-#include <net/bpf.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/scope6_var.h>
+#endif
+#include <netinet/ip_encap.h>
+#include <net/bpf.h>
#include <net/if_gre.h>
-/*
- * It is not easy to calculate the right value for a GRE MTU.
- * We leave this task to the admin and use the same default that
- * other vendors use.
- */
-#define GREMTU 1476
-
-#define GRENAME "gre"
-
-#define MTAG_COOKIE_GRE 1307983903
-#define MTAG_GRE_NESTING 1
-struct mtag_gre_nesting {
- uint16_t count;
- uint16_t max;
- struct ifnet *ifp[];
-};
-
-/*
- * gre_mtx protects all global variables in if_gre.c.
- * XXX: gre_softc data not protected yet.
- */
-struct mtx gre_mtx;
-static MALLOC_DEFINE(M_GRE, GRENAME, "Generic Routing Encapsulation");
-
-struct gre_softc_head gre_softc_list;
+#include <machine/in_cksum.h>
+#include <security/mac/mac_framework.h>
+
+#define GREMTU 1500
+static const char grename[] = "gre";
+static MALLOC_DEFINE(M_GRE, grename, "Generic Routing Encapsulation");
+static VNET_DEFINE(struct mtx, gre_mtx);
+#define V_gre_mtx VNET(gre_mtx)
+#define GRE_LIST_LOCK_INIT(x) mtx_init(&V_gre_mtx, "gre_mtx", NULL, \
+ MTX_DEF)
+#define GRE_LIST_LOCK_DESTROY(x) mtx_destroy(&V_gre_mtx)
+#define GRE_LIST_LOCK(x) mtx_lock(&V_gre_mtx)
+#define GRE_LIST_UNLOCK(x) mtx_unlock(&V_gre_mtx)
+
+static VNET_DEFINE(LIST_HEAD(, gre_softc), gre_softc_list);
+#define V_gre_softc_list VNET(gre_softc_list)
+static struct sx gre_ioctl_sx;
+SX_SYSINIT(gre_ioctl_sx, &gre_ioctl_sx, "gre_ioctl");
static int gre_clone_create(struct if_clone *, int, caddr_t);
static void gre_clone_destroy(struct ifnet *);
-static int gre_ioctl(struct ifnet *, u_long, caddr_t);
-static int gre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
- struct route *ro);
-
-IFC_SIMPLE_DECLARE(gre, 0);
+static VNET_DEFINE(struct if_clone *, gre_cloner);
+#define V_gre_cloner VNET(gre_cloner)
-static int gre_compute_route(struct gre_softc *sc);
-
-static void greattach(void);
+static void gre_qflush(struct ifnet *);
+static int gre_transmit(struct ifnet *, struct mbuf *);
+static int gre_ioctl(struct ifnet *, u_long, caddr_t);
+static int gre_output(struct ifnet *, struct mbuf *,
+ const struct sockaddr *, struct route *);
-#ifdef INET
-extern struct domain inetdomain;
-static const struct protosw in_gre_protosw = {
- .pr_type = SOCK_RAW,
- .pr_domain = &inetdomain,
- .pr_protocol = IPPROTO_GRE,
- .pr_flags = PR_ATOMIC|PR_ADDR,
- .pr_input = gre_input,
- .pr_output = (pr_output_t *)rip_output,
- .pr_ctlinput = rip_ctlinput,
- .pr_ctloutput = rip_ctloutput,
- .pr_usrreqs = &rip_usrreqs
-};
-static const struct protosw in_mobile_protosw = {
- .pr_type = SOCK_RAW,
- .pr_domain = &inetdomain,
- .pr_protocol = IPPROTO_MOBILE,
- .pr_flags = PR_ATOMIC|PR_ADDR,
- .pr_input = gre_mobile_input,
- .pr_output = (pr_output_t *)rip_output,
- .pr_ctlinput = rip_ctlinput,
- .pr_ctloutput = rip_ctloutput,
- .pr_usrreqs = &rip_usrreqs
-};
-#endif
+static void gre_updatehdr(struct gre_softc *);
+static int gre_set_tunnel(struct ifnet *, struct sockaddr *,
+ struct sockaddr *);
+static void gre_delete_tunnel(struct ifnet *);
SYSCTL_DECL(_net_link);
static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0,
@@ -163,805 +136,851 @@ static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0,
*/
#define MAX_GRE_NEST 1
#endif
-static int max_gre_nesting = MAX_GRE_NEST;
-SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW,
- &max_gre_nesting, 0, "Max nested tunnels");
-/* ARGSUSED */
+static VNET_DEFINE(int, max_gre_nesting) = MAX_GRE_NEST;
+#define V_max_gre_nesting VNET(max_gre_nesting)
+SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET,
+ &VNET_NAME(max_gre_nesting), 0, "Max nested tunnels");
+
+static void
+vnet_gre_init(const void *unused __unused)
+{
+ LIST_INIT(&V_gre_softc_list);
+ GRE_LIST_LOCK_INIT();
+ V_gre_cloner = if_clone_simple(grename, gre_clone_create,
+ gre_clone_destroy, 0);
+}
+VNET_SYSINIT(vnet_gre_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_gre_init, NULL);
+
static void
-greattach(void)
+vnet_gre_uninit(const void *unused __unused)
{
- mtx_init(&gre_mtx, "gre_mtx", NULL, MTX_DEF);
- LIST_INIT(&gre_softc_list);
- if_clone_attach(&gre_cloner);
+ if_clone_detach(V_gre_cloner);
+ GRE_LIST_LOCK_DESTROY();
}
+VNET_SYSUNINIT(vnet_gre_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_gre_uninit, NULL);
static int
-gre_clone_create(ifc, unit, params)
- struct if_clone *ifc;
- int unit;
- caddr_t params;
+gre_clone_create(struct if_clone *ifc, int unit, caddr_t params)
{
struct gre_softc *sc;
sc = malloc(sizeof(struct gre_softc), M_GRE, M_WAITOK | M_ZERO);
-
+#ifndef __rtems__
+ sc->gre_fibnum = curthread->td_proc->p_fibnum;
+#else /* __rtems__ */
+ sc->gre_fibnum = BSD_DEFAULT_FIB;
+#endif /* __rtems__ */
GRE2IFP(sc) = if_alloc(IFT_TUNNEL);
- if (GRE2IFP(sc) == NULL) {
- free(sc, M_GRE);
- return (ENOSPC);
- }
-
+ GRE_LOCK_INIT(sc);
GRE2IFP(sc)->if_softc = sc;
- if_initname(GRE2IFP(sc), ifc->ifc_name, unit);
+ if_initname(GRE2IFP(sc), grename, unit);
- GRE2IFP(sc)->if_snd.ifq_maxlen = ifqmaxlen;
- GRE2IFP(sc)->if_addrlen = 0;
- GRE2IFP(sc)->if_hdrlen = 24; /* IP + GRE */
- GRE2IFP(sc)->if_mtu = GREMTU;
+ GRE2IFP(sc)->if_mtu = sc->gre_mtu = GREMTU;
GRE2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
GRE2IFP(sc)->if_output = gre_output;
GRE2IFP(sc)->if_ioctl = gre_ioctl;
- sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
- sc->g_proto = IPPROTO_GRE;
- GRE2IFP(sc)->if_flags |= IFF_LINK0;
- sc->encap = NULL;
-#ifndef __rtems__
- sc->gre_fibnum = curthread->td_proc->p_fibnum;
-#else /* __rtems__ */
- sc->gre_fibnum = BSD_DEFAULT_FIB;
-#endif /* __rtems__ */
- sc->wccp_ver = WCCP_V1;
- sc->key = 0;
+ GRE2IFP(sc)->if_transmit = gre_transmit;
+ GRE2IFP(sc)->if_qflush = gre_qflush;
+ GRE2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
+ GRE2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
if_attach(GRE2IFP(sc));
bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t));
- mtx_lock(&gre_mtx);
- LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
- mtx_unlock(&gre_mtx);
+ GRE_LIST_LOCK();
+ LIST_INSERT_HEAD(&V_gre_softc_list, sc, gre_list);
+ GRE_LIST_UNLOCK();
return (0);
}
static void
-gre_clone_destroy(ifp)
- struct ifnet *ifp;
+gre_clone_destroy(struct ifnet *ifp)
{
- struct gre_softc *sc = ifp->if_softc;
-
- mtx_lock(&gre_mtx);
- LIST_REMOVE(sc, sc_list);
- mtx_unlock(&gre_mtx);
+ struct gre_softc *sc;
-#ifdef INET
- if (sc->encap != NULL)
- encap_detach(sc->encap);
-#endif
+ sx_xlock(&gre_ioctl_sx);
+ sc = ifp->if_softc;
+ gre_delete_tunnel(ifp);
+ GRE_LIST_LOCK();
+ LIST_REMOVE(sc, gre_list);
+ GRE_LIST_UNLOCK();
bpfdetach(ifp);
if_detach(ifp);
+ ifp->if_softc = NULL;
+ sx_xunlock(&gre_ioctl_sx);
+
if_free(ifp);
+ GRE_LOCK_DESTROY(sc);
free(sc, M_GRE);
}
-/*
- * The output routine. Takes a packet and encapsulates it in the protocol
- * given by sc->g_proto. See also RFC 1701 and RFC 2004
- */
static int
-gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
- struct route *ro)
+gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
- int error = 0;
- struct gre_softc *sc = ifp->if_softc;
- struct greip *gh;
- struct ip *ip;
- struct m_tag *mtag;
- struct mtag_gre_nesting *gt;
- size_t len;
- u_short gre_ip_id = 0;
- uint8_t gre_ip_tos = 0;
- u_int16_t etype = 0;
- struct mobile_h mob_h;
- u_int32_t af;
- int extra = 0, max;
-
- /*
- * gre may cause infinite recursion calls when misconfigured. High
- * nesting level may cause stack exhaustion. We'll prevent this by
- * detecting loops and by introducing upper limit.
- */
- mtag = m_tag_locate(m, MTAG_COOKIE_GRE, MTAG_GRE_NESTING, NULL);
- if (mtag != NULL) {
- struct ifnet **ifp2;
-
- gt = (struct mtag_gre_nesting *)(mtag + 1);
- gt->count++;
- if (gt->count > min(gt->max,max_gre_nesting)) {
- printf("%s: hit maximum recursion limit %u on %s\n",
- __func__, gt->count - 1, ifp->if_xname);
- m_freem(m);
- error = EIO; /* is there better errno? */
- goto end;
- }
-
- ifp2 = gt->ifp;
- for (max = gt->count - 1; max > 0; max--) {
- if (*ifp2 == ifp)
- break;
- ifp2++;
- }
- if (*ifp2 == ifp) {
- printf("%s: detected loop with nexting %u on %s\n",
- __func__, gt->count-1, ifp->if_xname);
- m_freem(m);
- error = EIO; /* is there better errno? */
- goto end;
- }
- *ifp2 = ifp;
+ GRE_RLOCK_TRACKER;
+ struct ifreq *ifr = (struct ifreq *)data;
+ struct sockaddr *src, *dst;
+ struct gre_softc *sc;
+#ifdef INET
+ struct sockaddr_in *sin = NULL;
+#endif
+#ifdef INET6
+ struct sockaddr_in6 *sin6 = NULL;
+#endif
+ uint32_t opt;
+ int error;
- } else {
- /*
- * Given that people should NOT increase max_gre_nesting beyond
- * their real needs, we allocate once per packet rather than
- * allocating an mtag once per passing through gre.
- *
- * Note: the sysctl does not actually check for saneness, so we
- * limit the maximum numbers of possible recursions here.
- */
- max = imin(max_gre_nesting, 256);
- /* If someone sets the sysctl <= 0, we want at least 1. */
- max = imax(max, 1);
- len = sizeof(struct mtag_gre_nesting) +
- max * sizeof(struct ifnet *);
- mtag = m_tag_alloc(MTAG_COOKIE_GRE, MTAG_GRE_NESTING, len,
- M_NOWAIT);
- if (mtag == NULL) {
- m_freem(m);
- error = ENOMEM;
- goto end;
- }
- gt = (struct mtag_gre_nesting *)(mtag + 1);
- bzero(gt, len);
- gt->count = 1;
- gt->max = max;
- *gt->ifp = ifp;
- m_tag_prepend(m, mtag);
+ switch (cmd) {
+ case SIOCSIFMTU:
+ /* XXX: */
+ if (ifr->ifr_mtu < 576)
+ return (EINVAL);
+ break;
+ case SIOCSIFADDR:
+ ifp->if_flags |= IFF_UP;
+ case SIOCSIFFLAGS:
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ return (0);
+ case GRESADDRS:
+ case GRESADDRD:
+ case GREGADDRS:
+ case GREGADDRD:
+ case GRESPROTO:
+ case GREGPROTO:
+ return (EOPNOTSUPP);
}
-
- if (!((ifp->if_flags & IFF_UP) &&
- (ifp->if_drv_flags & IFF_DRV_RUNNING)) ||
- sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
- m_freem(m);
- error = ENETDOWN;
+ src = dst = NULL;
+ sx_xlock(&gre_ioctl_sx);
+ sc = ifp->if_softc;
+ if (sc == NULL) {
+ error = ENXIO;
goto end;
}
-
- gh = NULL;
- ip = NULL;
-
- /* BPF writes need to be handled specially. */
- if (dst->sa_family == AF_UNSPEC) {
- bcopy(dst->sa_data, &af, sizeof(af));
- dst->sa_family = af;
- }
-
- if (bpf_peers_present(ifp->if_bpf)) {
- af = dst->sa_family;
- bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
- }
-
- if ((ifp->if_flags & IFF_MONITOR) != 0) {
- m_freem(m);
- error = ENETDOWN;
+ error = 0;
+ switch (cmd) {
+ case SIOCSIFMTU:
+ GRE_WLOCK(sc);
+ sc->gre_mtu = ifr->ifr_mtu;
+ gre_updatehdr(sc);
+ GRE_WUNLOCK(sc);
goto end;
- }
-
- m->m_flags &= ~(M_BCAST|M_MCAST);
-
- if (sc->g_proto == IPPROTO_MOBILE) {
- if (dst->sa_family == AF_INET) {
- struct mbuf *m0;
- int msiz;
-
- ip = mtod(m, struct ip *);
-
- /*
- * RFC2004 specifies that fragmented diagrams shouldn't
- * be encapsulated.
- */
- if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
- _IF_DROP(&ifp->if_snd);
- m_freem(m);
- error = EINVAL; /* is there better errno? */
- goto end;
- }
- memset(&mob_h, 0, MOB_H_SIZ_L);
- mob_h.proto = (ip->ip_p) << 8;
- mob_h.odst = ip->ip_dst.s_addr;
- ip->ip_dst.s_addr = sc->g_dst.s_addr;
-
- /*
- * If the packet comes from our host, we only change
- * the destination address in the IP header.
- * Else we also need to save and change the source
- */
- if (in_hosteq(ip->ip_src, sc->g_src)) {
- msiz = MOB_H_SIZ_S;
- } else {
- mob_h.proto |= MOB_H_SBIT;
- mob_h.osrc = ip->ip_src.s_addr;
- ip->ip_src.s_addr = sc->g_src.s_addr;
- msiz = MOB_H_SIZ_L;
- }
- mob_h.proto = htons(mob_h.proto);
- mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz);
-
- if ((m->m_data - msiz) < m->m_pktdat) {
- /* need new mbuf */
- MGETHDR(m0, M_DONTWAIT, MT_DATA);
- if (m0 == NULL) {
- _IF_DROP(&ifp->if_snd);
- m_freem(m);
- error = ENOBUFS;
- goto end;
- }
- m0->m_next = m;
- m->m_data += sizeof(struct ip);
- m->m_len -= sizeof(struct ip);
- m0->m_pkthdr.len = m->m_pkthdr.len + msiz;
- m0->m_len = msiz + sizeof(struct ip);
- m0->m_data += max_linkhdr;
- memcpy(mtod(m0, caddr_t), (caddr_t)ip,
- sizeof(struct ip));
- m = m0;
- } else { /* we have some space left in the old one */
- m->m_data -= msiz;
- m->m_len += msiz;
- m->m_pkthdr.len += msiz;
- bcopy(ip, mtod(m, caddr_t),
- sizeof(struct ip));
- }
- ip = mtod(m, struct ip *);
- memcpy((caddr_t)(ip + 1), &mob_h, (unsigned)msiz);
- ip->ip_len = ntohs(ip->ip_len) + msiz;
- } else { /* AF_INET */
- _IF_DROP(&ifp->if_snd);
- m_freem(m);
- error = EINVAL;
- goto end;
- }
- } else if (sc->g_proto == IPPROTO_GRE) {
- switch (dst->sa_family) {
- case AF_INET:
- ip = mtod(m, struct ip *);
- gre_ip_tos = ip->ip_tos;
- gre_ip_id = ip->ip_id;
- if (sc->wccp_ver == WCCP_V2) {
- extra = sizeof(uint32_t);
- etype = WCCP_PROTOCOL_TYPE;
- } else {
- etype = ETHERTYPE_IP;
- }
- break;
+ case SIOCSIFPHYADDR:
#ifdef INET6
- case AF_INET6:
- gre_ip_id = ip_newid();
- etype = ETHERTYPE_IPV6;
+ case SIOCSIFPHYADDR_IN6:
+#endif
+ error = EINVAL;
+ switch (cmd) {
+#ifdef INET
+ case SIOCSIFPHYADDR:
+ src = (struct sockaddr *)
+ &(((struct in_aliasreq *)data)->ifra_addr);
+ dst = (struct sockaddr *)
+ &(((struct in_aliasreq *)data)->ifra_dstaddr);
break;
#endif
-#ifdef NETATALK
- case AF_APPLETALK:
- etype = ETHERTYPE_ATALK;
+#ifdef INET6
+ case SIOCSIFPHYADDR_IN6:
+ src = (struct sockaddr *)
+ &(((struct in6_aliasreq *)data)->ifra_addr);
+ dst = (struct sockaddr *)
+ &(((struct in6_aliasreq *)data)->ifra_dstaddr);
break;
#endif
default:
- _IF_DROP(&ifp->if_snd);
- m_freem(m);
error = EAFNOSUPPORT;
goto end;
}
-
- /* Reserve space for GRE header + optional GRE key */
- int hdrlen = sizeof(struct greip) + extra;
- if (sc->key)
- hdrlen += sizeof(uint32_t);
- M_PREPEND(m, hdrlen, M_DONTWAIT);
- } else {
- _IF_DROP(&ifp->if_snd);
- m_freem(m);
- error = EINVAL;
- goto end;
- }
-
- if (m == NULL) { /* mbuf allocation failed */
- _IF_DROP(&ifp->if_snd);
- error = ENOBUFS;
- goto end;
- }
-
- M_SETFIB(m, sc->gre_fibnum); /* The envelope may use a different FIB */
-
- gh = mtod(m, struct greip *);
- if (sc->g_proto == IPPROTO_GRE) {
- uint32_t *options = gh->gi_options;
-
- memset((void *)gh, 0, sizeof(struct greip) + extra);
- gh->gi_ptype = htons(etype);
- gh->gi_flags = 0;
-
- /* Add key option */
- if (sc->key)
- {
- gh->gi_flags |= htons(GRE_KP);
- *(options++) = htonl(sc->key);
- }
- }
-
- gh->gi_pr = sc->g_proto;
- if (sc->g_proto != IPPROTO_MOBILE) {
- gh->gi_src = sc->g_src;
- gh->gi_dst = sc->g_dst;
- ((struct ip*)gh)->ip_v = IPPROTO_IPV4;
- ((struct ip*)gh)->ip_hl = (sizeof(struct ip)) >> 2;
- ((struct ip*)gh)->ip_ttl = GRE_TTL;
- ((struct ip*)gh)->ip_tos = gre_ip_tos;
- ((struct ip*)gh)->ip_id = gre_ip_id;
- gh->gi_len = m->m_pkthdr.len;
- }
-
- ifp->if_opackets++;
- ifp->if_obytes += m->m_pkthdr.len;
- /*
- * Send it off and with IP_FORWARD flag to prevent it from
- * overwriting the ip_id again. ip_id is already set to the
- * ip_id of the encapsulated packet.
- */
- error = ip_output(m, NULL, &sc->route, IP_FORWARDING,
- (struct ip_moptions *)NULL, (struct inpcb *)NULL);
- end:
- if (error)
- ifp->if_oerrors++;
- return (error);
-}
-
-static int
-gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
-{
- struct ifreq *ifr = (struct ifreq *)data;
- struct if_laddrreq *lifr = (struct if_laddrreq *)data;
- struct in_aliasreq *aifr = (struct in_aliasreq *)data;
- struct gre_softc *sc = ifp->if_softc;
- int s;
- struct sockaddr_in si;
- struct sockaddr *sa = NULL;
- int error, adj;
- struct sockaddr_in sp, sm, dp, dm;
- uint32_t key;
-
- error = 0;
- adj = 0;
+ /* sa_family must be equal */
+ if (src->sa_family != dst->sa_family ||
+ src->sa_len != dst->sa_len)
+ goto end;
- s = splnet();
- switch (cmd) {
- case SIOCSIFADDR:
- ifp->if_flags |= IFF_UP;
- break;
- case SIOCSIFDSTADDR:
- break;
- case SIOCSIFFLAGS:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_SETIFFLAGS)) != 0)
- break;
- if ((ifr->ifr_flags & IFF_LINK0) != 0)
- sc->g_proto = IPPROTO_GRE;
- else
- sc->g_proto = IPPROTO_MOBILE;
- if ((ifr->ifr_flags & IFF_LINK2) != 0)
- sc->wccp_ver = WCCP_V2;
- else
- sc->wccp_ver = WCCP_V1;
- goto recompute;
- case SIOCSIFMTU:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_SETIFMTU)) != 0)
- break;
- if (ifr->ifr_mtu < 576) {
- error = EINVAL;
- break;
- }
- ifp->if_mtu = ifr->ifr_mtu;
- break;
- case SIOCGIFMTU:
- ifr->ifr_mtu = GRE2IFP(sc)->if_mtu;
- break;
- case SIOCADDMULTI:
- /*
- * XXXRW: Isn't this priv_checkr() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_ADDMULTI)) != 0)
- break;
- if (ifr == 0) {
- error = EAFNOSUPPORT;
- break;
- }
- switch (ifr->ifr_addr.sa_family) {
+ /* validate sa_len */
+ switch (src->sa_family) {
#ifdef INET
case AF_INET:
+ if (src->sa_len != sizeof(struct sockaddr_in))
+ goto end;
break;
#endif
#ifdef INET6
case AF_INET6:
+ if (src->sa_len != sizeof(struct sockaddr_in6))
+ goto end;
break;
#endif
default:
error = EAFNOSUPPORT;
- break;
+ goto end;
}
- break;
- case SIOCDELMULTI:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_DELIFGROUP)) != 0)
- break;
- if (ifr == 0) {
- error = EAFNOSUPPORT;
- break;
+ /* check sa_family looks sane for the cmd */
+ error = EAFNOSUPPORT;
+ switch (cmd) {
+#ifdef INET
+ case SIOCSIFPHYADDR:
+ if (src->sa_family == AF_INET)
+ break;
+ goto end;
+#endif
+#ifdef INET6
+ case SIOCSIFPHYADDR_IN6:
+ if (src->sa_family == AF_INET6)
+ break;
+ goto end;
+#endif
}
- switch (ifr->ifr_addr.sa_family) {
+ error = EADDRNOTAVAIL;
+ switch (src->sa_family) {
#ifdef INET
case AF_INET:
+ if (satosin(src)->sin_addr.s_addr == INADDR_ANY ||
+ satosin(dst)->sin_addr.s_addr == INADDR_ANY)
+ goto end;
break;
#endif
#ifdef INET6
case AF_INET6:
- break;
+ if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)
+ ||
+ IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr))
+ goto end;
+ /*
+ * Check validity of the scope zone ID of the
+ * addresses, and convert it into the kernel
+ * internal form if necessary.
+ */
+ error = sa6_embedscope(satosin6(src), 0);
+ if (error != 0)
+ goto end;
+ error = sa6_embedscope(satosin6(dst), 0);
+ if (error != 0)
+ goto end;
#endif
- default:
- error = EAFNOSUPPORT;
- break;
}
+ error = gre_set_tunnel(ifp, src, dst);
break;
- case GRESPROTO:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
- break;
- sc->g_proto = ifr->ifr_flags;
- switch (sc->g_proto) {
- case IPPROTO_GRE:
- ifp->if_flags |= IFF_LINK0;
- break;
- case IPPROTO_MOBILE:
- ifp->if_flags &= ~IFF_LINK0;
- break;
- default:
- error = EPROTONOSUPPORT;
- break;
- }
- goto recompute;
- case GREGPROTO:
- ifr->ifr_flags = sc->g_proto;
+ case SIOCDIFPHYADDR:
+ gre_delete_tunnel(ifp);
break;
- case GRESADDRS:
- case GRESADDRD:
- error = priv_check(curthread, PRIV_NET_GRE);
- if (error)
- return (error);
- /*
- * set tunnel endpoints, compute a less specific route
- * to the remote end and mark if as up
- */
- sa = &ifr->ifr_addr;
- if (cmd == GRESADDRS)
- sc->g_src = (satosin(sa))->sin_addr;
- if (cmd == GRESADDRD)
- sc->g_dst = (satosin(sa))->sin_addr;
- recompute:
-#ifdef INET
- if (sc->encap != NULL) {
- encap_detach(sc->encap);
- sc->encap = NULL;
- }
+ case SIOCGIFPSRCADDR:
+ case SIOCGIFPDSTADDR:
+#ifdef INET6
+ case SIOCGIFPSRCADDR_IN6:
+ case SIOCGIFPDSTADDR_IN6:
#endif
- if ((sc->g_src.s_addr != INADDR_ANY) &&
- (sc->g_dst.s_addr != INADDR_ANY)) {
- bzero(&sp, sizeof(sp));
- bzero(&sm, sizeof(sm));
- bzero(&dp, sizeof(dp));
- bzero(&dm, sizeof(dm));
- sp.sin_len = sm.sin_len = dp.sin_len = dm.sin_len =
- sizeof(struct sockaddr_in);
- sp.sin_family = sm.sin_family = dp.sin_family =
- dm.sin_family = AF_INET;
- sp.sin_addr = sc->g_src;
- dp.sin_addr = sc->g_dst;
- sm.sin_addr.s_addr = dm.sin_addr.s_addr =
- INADDR_BROADCAST;
-#ifdef INET
- sc->encap = encap_attach(AF_INET, sc->g_proto,
- sintosa(&sp), sintosa(&sm), sintosa(&dp),
- sintosa(&dm), (sc->g_proto == IPPROTO_GRE) ?
- &in_gre_protosw : &in_mobile_protosw, sc);
- if (sc->encap == NULL)
- printf("%s: unable to attach encap\n",
- if_name(GRE2IFP(sc)));
-#endif
- if (sc->route.ro_rt != 0) /* free old route */
- RTFREE(sc->route.ro_rt);
- if (gre_compute_route(sc) == 0)
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
- else
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
- }
- break;
- case GREGADDRS:
- memset(&si, 0, sizeof(si));
- si.sin_family = AF_INET;
- si.sin_len = sizeof(struct sockaddr_in);
- si.sin_addr.s_addr = sc->g_src.s_addr;
- sa = sintosa(&si);
- error = prison_if(curthread->td_ucred, sa);
- if (error != 0)
- break;
- ifr->ifr_addr = *sa;
- break;
- case GREGADDRD:
- memset(&si, 0, sizeof(si));
- si.sin_family = AF_INET;
- si.sin_len = sizeof(struct sockaddr_in);
- si.sin_addr.s_addr = sc->g_dst.s_addr;
- sa = sintosa(&si);
- error = prison_if(curthread->td_ucred, sa);
- if (error != 0)
- break;
- ifr->ifr_addr = *sa;
- break;
- case SIOCSIFPHYADDR:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
- break;
- if (aifr->ifra_addr.sin_family != AF_INET ||
- aifr->ifra_dstaddr.sin_family != AF_INET) {
- error = EAFNOSUPPORT;
- break;
- }
- if (aifr->ifra_addr.sin_len != sizeof(si) ||
- aifr->ifra_dstaddr.sin_len != sizeof(si)) {
- error = EINVAL;
- break;
- }
- sc->g_src = aifr->ifra_addr.sin_addr;
- sc->g_dst = aifr->ifra_dstaddr.sin_addr;
- goto recompute;
- case SIOCSLIFPHYADDR:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
- break;
- if (lifr->addr.ss_family != AF_INET ||
- lifr->dstaddr.ss_family != AF_INET) {
- error = EAFNOSUPPORT;
- break;
- }
- if (lifr->addr.ss_len != sizeof(si) ||
- lifr->dstaddr.ss_len != sizeof(si)) {
- error = EINVAL;
- break;
- }
- sc->g_src = (satosin(&lifr->addr))->sin_addr;
- sc->g_dst =
- (satosin(&lifr->dstaddr))->sin_addr;
- goto recompute;
- case SIOCDIFPHYADDR:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
- break;
- sc->g_src.s_addr = INADDR_ANY;
- sc->g_dst.s_addr = INADDR_ANY;
- goto recompute;
- case SIOCGLIFPHYADDR:
- if (sc->g_src.s_addr == INADDR_ANY ||
- sc->g_dst.s_addr == INADDR_ANY) {
+ if (sc->gre_family == 0) {
error = EADDRNOTAVAIL;
break;
}
- memset(&si, 0, sizeof(si));
- si.sin_family = AF_INET;
- si.sin_len = sizeof(struct sockaddr_in);
- si.sin_addr.s_addr = sc->g_src.s_addr;
- error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
- if (error != 0)
+ GRE_RLOCK(sc);
+ switch (cmd) {
+#ifdef INET
+ case SIOCGIFPSRCADDR:
+ case SIOCGIFPDSTADDR:
+ if (sc->gre_family != AF_INET) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ sin = (struct sockaddr_in *)&ifr->ifr_addr;
+ memset(sin, 0, sizeof(*sin));
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
break;
- memcpy(&lifr->addr, &si, sizeof(si));
- si.sin_addr.s_addr = sc->g_dst.s_addr;
- error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
- if (error != 0)
+#endif
+#ifdef INET6
+ case SIOCGIFPSRCADDR_IN6:
+ case SIOCGIFPDSTADDR_IN6:
+ if (sc->gre_family != AF_INET6) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ sin6 = (struct sockaddr_in6 *)
+ &(((struct in6_ifreq *)data)->ifr_addr);
+ memset(sin6, 0, sizeof(*sin6));
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_len = sizeof(*sin6);
break;
- memcpy(&lifr->dstaddr, &si, sizeof(si));
- break;
- case SIOCGIFPSRCADDR:
+#endif
+ }
+ if (error == 0) {
+ switch (cmd) {
+#ifdef INET
+ case SIOCGIFPSRCADDR:
+ sin->sin_addr = sc->gre_oip.ip_src;
+ break;
+ case SIOCGIFPDSTADDR:
+ sin->sin_addr = sc->gre_oip.ip_dst;
+ break;
+#endif
#ifdef INET6
- case SIOCGIFPSRCADDR_IN6:
+ case SIOCGIFPSRCADDR_IN6:
+ sin6->sin6_addr = sc->gre_oip6.ip6_src;
+ break;
+ case SIOCGIFPDSTADDR_IN6:
+ sin6->sin6_addr = sc->gre_oip6.ip6_dst;
+ break;
#endif
- if (sc->g_src.s_addr == INADDR_ANY) {
- error = EADDRNOTAVAIL;
- break;
+ }
}
- memset(&si, 0, sizeof(si));
- si.sin_family = AF_INET;
- si.sin_len = sizeof(struct sockaddr_in);
- si.sin_addr.s_addr = sc->g_src.s_addr;
- error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
+ GRE_RUNLOCK(sc);
if (error != 0)
break;
- bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr));
- break;
- case SIOCGIFPDSTADDR:
+ switch (cmd) {
+#ifdef INET
+ case SIOCGIFPSRCADDR:
+ case SIOCGIFPDSTADDR:
+ error = prison_if(curthread->td_ucred,
+ (struct sockaddr *)sin);
+ if (error != 0)
+ memset(sin, 0, sizeof(*sin));
+ break;
+#endif
#ifdef INET6
- case SIOCGIFPDSTADDR_IN6:
+ case SIOCGIFPSRCADDR_IN6:
+ case SIOCGIFPDSTADDR_IN6:
+ error = prison_if(curthread->td_ucred,
+ (struct sockaddr *)sin6);
+ if (error == 0)
+ error = sa6_recoverscope(sin6);
+ if (error != 0)
+ memset(sin6, 0, sizeof(*sin6));
#endif
- if (sc->g_dst.s_addr == INADDR_ANY) {
- error = EADDRNOTAVAIL;
- break;
}
- memset(&si, 0, sizeof(si));
- si.sin_family = AF_INET;
- si.sin_len = sizeof(struct sockaddr_in);
- si.sin_addr.s_addr = sc->g_dst.s_addr;
- error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
- if (error != 0)
+ break;
+ case SIOCGTUNFIB:
+ ifr->ifr_fib = sc->gre_fibnum;
+ break;
+ case SIOCSTUNFIB:
+ if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
break;
- bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr));
+ if (ifr->ifr_fib >= rt_numfibs)
+ error = EINVAL;
+ else
+ sc->gre_fibnum = ifr->ifr_fib;
break;
case GRESKEY:
- error = priv_check(curthread, PRIV_NET_GRE);
- if (error)
- break;
- error = copyin(ifr->ifr_data, &key, sizeof(key));
- if (error)
+ if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
break;
- /* adjust MTU for option header */
- if (key == 0 && sc->key != 0) /* clear */
- adj += sizeof(key);
- else if (key != 0 && sc->key == 0) /* set */
- adj -= sizeof(key);
-
- if (ifp->if_mtu + adj < 576) {
- error = EINVAL;
+ if ((error = copyin(ifr->ifr_data, &opt, sizeof(opt))) != 0)
break;
+ if (sc->gre_key != opt) {
+ GRE_WLOCK(sc);
+ sc->gre_key = opt;
+ gre_updatehdr(sc);
+ GRE_WUNLOCK(sc);
}
- ifp->if_mtu += adj;
- sc->key = key;
break;
case GREGKEY:
- error = copyout(&sc->key, ifr->ifr_data, sizeof(sc->key));
+ error = copyout(&sc->gre_key, ifr->ifr_data,
+ sizeof(sc->gre_key));
+ break;
+ case GRESOPTS:
+ if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
+ break;
+ if ((error = copyin(ifr->ifr_data, &opt, sizeof(opt))) != 0)
+ break;
+ if (opt & ~GRE_OPTMASK)
+ error = EINVAL;
+ else {
+ if (sc->gre_options != opt) {
+ GRE_WLOCK(sc);
+ sc->gre_options = opt;
+ gre_updatehdr(sc);
+ GRE_WUNLOCK(sc);
+ }
+ }
break;
+ case GREGOPTS:
+ error = copyout(&sc->gre_options, ifr->ifr_data,
+ sizeof(sc->gre_options));
+ break;
default:
error = EINVAL;
break;
}
-
- splx(s);
+end:
+ sx_xunlock(&gre_ioctl_sx);
return (error);
}
-/*
- * computes a route to our destination that is not the one
- * which would be taken by ip_output(), as this one will loop back to
- * us. If the interface is p2p as a--->b, then a routing entry exists
- * If we now send a packet to b (e.g. ping b), this will come down here
- * gets src=a, dst=b tacked on and would from ip_output() sent back to
- * if_gre.
- * Goal here is to compute a route to b that is less specific than
- * a-->b. We know that this one exists as in normal operation we have
- * at least a default route which matches.
- */
+static void
+gre_updatehdr(struct gre_softc *sc)
+{
+ struct grehdr *gh = NULL;
+ uint32_t *opts;
+ uint16_t flags;
+
+ GRE_WLOCK_ASSERT(sc);
+ switch (sc->gre_family) {
+#ifdef INET
+ case AF_INET:
+ sc->gre_hlen = sizeof(struct greip);
+ sc->gre_oip.ip_v = IPPROTO_IPV4;
+ sc->gre_oip.ip_hl = sizeof(struct ip) >> 2;
+ sc->gre_oip.ip_p = IPPROTO_GRE;
+ gh = &sc->gre_gihdr->gi_gre;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ sc->gre_hlen = sizeof(struct greip6);
+ sc->gre_oip6.ip6_vfc = IPV6_VERSION;
+ sc->gre_oip6.ip6_nxt = IPPROTO_GRE;
+ gh = &sc->gre_gi6hdr->gi6_gre;
+ break;
+#endif
+ default:
+ return;
+ }
+ flags = 0;
+ opts = gh->gre_opts;
+ if (sc->gre_options & GRE_ENABLE_CSUM) {
+ flags |= GRE_FLAGS_CP;
+ sc->gre_hlen += 2 * sizeof(uint16_t);
+ *opts++ = 0;
+ }
+ if (sc->gre_key != 0) {
+ flags |= GRE_FLAGS_KP;
+ sc->gre_hlen += sizeof(uint32_t);
+ *opts++ = htonl(sc->gre_key);
+ }
+ if (sc->gre_options & GRE_ENABLE_SEQ) {
+ flags |= GRE_FLAGS_SP;
+ sc->gre_hlen += sizeof(uint32_t);
+ *opts++ = 0;
+ } else
+ sc->gre_oseq = 0;
+ gh->gre_flags = htons(flags);
+ GRE2IFP(sc)->if_mtu = sc->gre_mtu - sc->gre_hlen;
+}
+
+static void
+gre_detach(struct gre_softc *sc)
+{
+
+ sx_assert(&gre_ioctl_sx, SA_XLOCKED);
+ if (sc->gre_ecookie != NULL)
+ encap_detach(sc->gre_ecookie);
+ sc->gre_ecookie = NULL;
+}
+
static int
-gre_compute_route(struct gre_softc *sc)
+gre_set_tunnel(struct ifnet *ifp, struct sockaddr *src,
+ struct sockaddr *dst)
{
- struct route *ro;
-
- ro = &sc->route;
-
- memset(ro, 0, sizeof(struct route));
- ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
- ro->ro_dst.sa_family = AF_INET;
- ro->ro_dst.sa_len = sizeof(ro->ro_dst);
-
- /*
- * toggle last bit, so our interface is not found, but a less
- * specific route. I'd rather like to specify a shorter mask,
- * but this is not possible. Should work though. XXX
- * XXX MRT Use a different FIB for the tunnel to solve this problem.
- */
- if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0) {
- ((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr ^=
- htonl(0x01);
+ struct gre_softc *sc, *tsc;
+#ifdef INET6
+ struct ip6_hdr *ip6;
+#endif
+#ifdef INET
+ struct ip *ip;
+#endif
+ void *hdr;
+ int error;
+
+ sx_assert(&gre_ioctl_sx, SA_XLOCKED);
+ GRE_LIST_LOCK();
+ sc = ifp->if_softc;
+ LIST_FOREACH(tsc, &V_gre_softc_list, gre_list) {
+ if (tsc == sc || tsc->gre_family != src->sa_family)
+ continue;
+#ifdef INET
+ if (tsc->gre_family == AF_INET &&
+ tsc->gre_oip.ip_src.s_addr ==
+ satosin(src)->sin_addr.s_addr &&
+ tsc->gre_oip.ip_dst.s_addr ==
+ satosin(dst)->sin_addr.s_addr) {
+ GRE_LIST_UNLOCK();
+ return (EADDRNOTAVAIL);
+ }
+#endif
+#ifdef INET6
+ if (tsc->gre_family == AF_INET6 &&
+ IN6_ARE_ADDR_EQUAL(&tsc->gre_oip6.ip6_src,
+ &satosin6(src)->sin6_addr) &&
+ IN6_ARE_ADDR_EQUAL(&tsc->gre_oip6.ip6_dst,
+ &satosin6(dst)->sin6_addr)) {
+ GRE_LIST_UNLOCK();
+ return (EADDRNOTAVAIL);
+ }
+#endif
}
+ GRE_LIST_UNLOCK();
-#ifdef DIAGNOSTIC
- printf("%s: searching for a route to %s", if_name(GRE2IFP(sc)),
- inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr));
+ switch (src->sa_family) {
+#ifdef INET
+ case AF_INET:
+ hdr = ip = malloc(sizeof(struct greip) +
+ 3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO);
+ ip->ip_src = satosin(src)->sin_addr;
+ ip->ip_dst = satosin(dst)->sin_addr;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ hdr = ip6 = malloc(sizeof(struct greip6) +
+ 3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO);
+ ip6->ip6_src = satosin6(src)->sin6_addr;
+ ip6->ip6_dst = satosin6(dst)->sin6_addr;
+ break;
+#endif
+ default:
+ return (EAFNOSUPPORT);
+ }
+ if (sc->gre_family != 0)
+ gre_detach(sc);
+ GRE_WLOCK(sc);
+ if (sc->gre_family != 0)
+ free(sc->gre_hdr, M_GRE);
+ sc->gre_family = src->sa_family;
+ sc->gre_hdr = hdr;
+ sc->gre_oseq = 0;
+ sc->gre_iseq = UINT32_MAX;
+ gre_updatehdr(sc);
+ GRE_WUNLOCK(sc);
+
+ error = 0;
+ switch (src->sa_family) {
+#ifdef INET
+ case AF_INET:
+ error = in_gre_attach(sc);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ error = in6_gre_attach(sc);
+ break;
#endif
+ }
+ if (error == 0) {
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ if_link_state_change(ifp, LINK_STATE_UP);
+ }
+ return (error);
+}
- rtalloc_fib(ro, sc->gre_fibnum);
+static void
+gre_delete_tunnel(struct ifnet *ifp)
+{
+ struct gre_softc *sc = ifp->if_softc;
+ int family;
+
+ GRE_WLOCK(sc);
+ family = sc->gre_family;
+ sc->gre_family = 0;
+ GRE_WUNLOCK(sc);
+ if (family != 0) {
+ gre_detach(sc);
+ free(sc->gre_hdr, M_GRE);
+ }
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ if_link_state_change(ifp, LINK_STATE_DOWN);
+}
- /*
- * check if this returned a route at all and this route is no
- * recursion to ourself
- */
- if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) {
-#ifdef DIAGNOSTIC
- if (ro->ro_rt == NULL)
- printf(" - no route found!\n");
- else
- printf(" - route loops back to ourself!\n");
+int
+gre_input(struct mbuf **mp, int *offp, int proto)
+{
+ struct gre_softc *sc;
+ struct grehdr *gh;
+ struct ifnet *ifp;
+ struct mbuf *m;
+ uint32_t *opts;
+#ifdef notyet
+ uint32_t key;
+#endif
+ uint16_t flags;
+ int hlen, isr, af;
+
+ m = *mp;
+ sc = encap_getarg(m);
+ KASSERT(sc != NULL, ("encap_getarg returned NULL"));
+
+ ifp = GRE2IFP(sc);
+ hlen = *offp + sizeof(struct grehdr) + 4 * sizeof(uint32_t);
+ if (m->m_pkthdr.len < hlen)
+ goto drop;
+ if (m->m_len < hlen) {
+ m = m_pullup(m, hlen);
+ if (m == NULL)
+ goto drop;
+ }
+ gh = (struct grehdr *)mtodo(m, *offp);
+ flags = ntohs(gh->gre_flags);
+ if (flags & ~GRE_FLAGS_MASK)
+ goto drop;
+ opts = gh->gre_opts;
+ hlen = 2 * sizeof(uint16_t);
+ if (flags & GRE_FLAGS_CP) {
+ /* reserved1 field must be zero */
+ if (((uint16_t *)opts)[1] != 0)
+ goto drop;
+ if (in_cksum_skip(m, m->m_pkthdr.len, *offp) != 0)
+ goto drop;
+ hlen += 2 * sizeof(uint16_t);
+ opts++;
+ }
+ if (flags & GRE_FLAGS_KP) {
+#ifdef notyet
+ /*
+ * XXX: The current implementation uses the key only for outgoing
+ * packets. But we can check the key value here, or even in the
+ * encapcheck function.
+ */
+ key = ntohl(*opts);
+#endif
+ hlen += sizeof(uint32_t);
+ opts++;
+ }
+#ifdef notyet
+ } else
+ key = 0;
+
+ if (sc->gre_key != 0 && (key != sc->gre_key || key != 0))
+ goto drop;
+#endif
+ if (flags & GRE_FLAGS_SP) {
+#ifdef notyet
+ seq = ntohl(*opts);
+#endif
+ hlen += sizeof(uint32_t);
+ }
+ switch (ntohs(gh->gre_proto)) {
+ case ETHERTYPE_WCCP:
+ /*
+ * For WCCP skip an additional 4 bytes if after GRE header
+ * doesn't follow an IP header.
+ */
+ if (flags == 0 && (*(uint8_t *)gh->gre_opts & 0xF0) != 0x40)
+ hlen += sizeof(uint32_t);
+ /* FALLTHROUGH */
+ case ETHERTYPE_IP:
+ isr = NETISR_IP;
+ af = AF_INET;
+ break;
+ case ETHERTYPE_IPV6:
+ isr = NETISR_IPV6;
+ af = AF_INET6;
+ break;
+ default:
+ goto drop;
+ }
+ m_adj(m, *offp + hlen);
+ m_clrprotoflags(m);
+ m->m_pkthdr.rcvif = ifp;
+ M_SETFIB(m, ifp->if_fib);
+#ifdef MAC
+ mac_ifnet_create_mbuf(ifp, m);
#endif
- return EADDRNOTAVAIL;
+ BPF_MTAP2(ifp, &af, sizeof(af), m);
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
+ if ((ifp->if_flags & IFF_MONITOR) != 0)
+ m_freem(m);
+ else
+ netisr_dispatch(isr, m);
+ return (IPPROTO_DONE);
+drop:
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+ m_freem(m);
+ return (IPPROTO_DONE);
+}
+
+#define MTAG_GRE 1307983903
+static int
+gre_check_nesting(struct ifnet *ifp, struct mbuf *m)
+{
+ struct m_tag *mtag;
+ int count;
+
+ count = 1;
+ mtag = NULL;
+ while ((mtag = m_tag_locate(m, MTAG_GRE, 0, mtag)) != NULL) {
+ if (*(struct ifnet **)(mtag + 1) == ifp) {
+ log(LOG_NOTICE, "%s: loop detected\n", ifp->if_xname);
+ return (EIO);
+ }
+ count++;
+ }
+ if (count > V_max_gre_nesting) {
+ log(LOG_NOTICE,
+ "%s: if_output recursively called too many times(%d)\n",
+ ifp->if_xname, count);
+ return (EIO);
}
+ mtag = m_tag_alloc(MTAG_GRE, 0, sizeof(struct ifnet *), M_NOWAIT);
+ if (mtag == NULL)
+ return (ENOMEM);
+ *(struct ifnet **)(mtag + 1) = ifp;
+ m_tag_prepend(m, mtag);
+ return (0);
+}
- /*
- * now change it back - else ip_output will just drop
- * the route and search one to this interface ...
- */
- if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0)
- ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
+static int
+gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
+{
+ uint32_t af;
+ int error;
-#ifdef DIAGNOSTIC
- printf(", choosing %s with gateway %s", if_name(ro->ro_rt->rt_ifp),
- inet_ntoa(((struct sockaddr_in *)(ro->ro_rt->rt_gateway))->sin_addr));
- printf("\n");
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m);
+ if (error != 0)
+ goto drop;
#endif
+ if ((ifp->if_flags & IFF_MONITOR) != 0 ||
+ (ifp->if_flags & IFF_UP) == 0) {
+ error = ENETDOWN;
+ goto drop;
+ }
+
+ error = gre_check_nesting(ifp, m);
+ if (error != 0)
+ goto drop;
- return 0;
+ m->m_flags &= ~(M_BCAST|M_MCAST);
+ if (dst->sa_family == AF_UNSPEC)
+ bcopy(dst->sa_data, &af, sizeof(af));
+ else
+ af = dst->sa_family;
+ BPF_MTAP2(ifp, &af, sizeof(af), m);
+ m->m_pkthdr.csum_data = af; /* save af for if_transmit */
+ return (ifp->if_transmit(ifp, m));
+drop:
+ m_freem(m);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ return (error);
}
-/*
- * do a checksum of a buffer - much like in_cksum, which operates on
- * mbufs.
- */
-u_int16_t
-gre_in_cksum(u_int16_t *p, u_int len)
+static void
+gre_setseqn(struct grehdr *gh, uint32_t seq)
{
- u_int32_t sum = 0;
- int nwords = len >> 1;
-
- while (nwords-- != 0)
- sum += *p++;
-
- if (len & 1) {
- union {
- u_short w;
- u_char c[2];
- } u;
- u.c[0] = *(u_char *)p;
- u.c[1] = 0;
- sum += u.w;
+ uint32_t *opts;
+ uint16_t flags;
+
+ opts = gh->gre_opts;
+ flags = ntohs(gh->gre_flags);
+ KASSERT((flags & GRE_FLAGS_SP) != 0,
+ ("gre_setseqn called, but GRE_FLAGS_SP isn't set "));
+ if (flags & GRE_FLAGS_CP)
+ opts++;
+ if (flags & GRE_FLAGS_KP)
+ opts++;
+ *opts = htonl(seq);
+}
+
+static int
+gre_transmit(struct ifnet *ifp, struct mbuf *m)
+{
+ GRE_RLOCK_TRACKER;
+ struct gre_softc *sc;
+ struct grehdr *gh;
+ uint32_t iaf, oaf, oseq;
+ int error, hlen, olen, plen;
+ int want_seq, want_csum;
+
+ plen = 0;
+ sc = ifp->if_softc;
+ if (sc == NULL) {
+ error = ENETDOWN;
+ m_freem(m);
+ goto drop;
+ }
+ GRE_RLOCK(sc);
+ if (sc->gre_family == 0) {
+ GRE_RUNLOCK(sc);
+ error = ENETDOWN;
+ m_freem(m);
+ goto drop;
+ }
+ iaf = m->m_pkthdr.csum_data;
+ oaf = sc->gre_family;
+ hlen = sc->gre_hlen;
+ want_seq = (sc->gre_options & GRE_ENABLE_SEQ) != 0;
+ if (want_seq)
+ oseq = sc->gre_oseq++; /* XXX */
+ else
+ oseq = 0; /* Make compiler happy. */
+ want_csum = (sc->gre_options & GRE_ENABLE_CSUM) != 0;
+ M_SETFIB(m, sc->gre_fibnum);
+ M_PREPEND(m, hlen, M_NOWAIT);
+ if (m == NULL) {
+ GRE_RUNLOCK(sc);
+ error = ENOBUFS;
+ goto drop;
+ }
+ bcopy(sc->gre_hdr, mtod(m, void *), hlen);
+ GRE_RUNLOCK(sc);
+ switch (oaf) {
+#ifdef INET
+ case AF_INET:
+ olen = sizeof(struct ip);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ olen = sizeof(struct ip6_hdr);
+ break;
+#endif
+ default:
+ error = ENETDOWN;
+ goto drop;
}
+ gh = (struct grehdr *)mtodo(m, olen);
+ switch (iaf) {
+#ifdef INET
+ case AF_INET:
+ gh->gre_proto = htons(ETHERTYPE_IP);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ gh->gre_proto = htons(ETHERTYPE_IPV6);
+ break;
+#endif
+ default:
+ error = ENETDOWN;
+ goto drop;
+ }
+ if (want_seq)
+ gre_setseqn(gh, oseq);
+ if (want_csum) {
+ *(uint16_t *)gh->gre_opts = in_cksum_skip(m,
+ m->m_pkthdr.len, olen);
+ }
+ plen = m->m_pkthdr.len - hlen;
+ switch (oaf) {
+#ifdef INET
+ case AF_INET:
+ error = in_gre_output(m, iaf, hlen);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ error = in6_gre_output(m, iaf, hlen);
+ break;
+#endif
+ default:
+ m_freem(m);
+ error = ENETDOWN;
+ }
+drop:
+ if (error)
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ else {
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
+ }
+ return (error);
+}
+
+static void
+gre_qflush(struct ifnet *ifp __unused)
+{
- /* end-around-carry */
- sum = (sum >> 16) + (sum & 0xffff);
- sum += (sum >> 16);
- return (~sum);
}
static int
@@ -970,16 +989,12 @@ gremodevent(module_t mod, int type, void *data)
switch (type) {
case MOD_LOAD:
- greattach();
- break;
case MOD_UNLOAD:
- if_clone_detach(&gre_cloner);
- mtx_destroy(&gre_mtx);
break;
default:
- return EOPNOTSUPP;
+ return (EOPNOTSUPP);
}
- return 0;
+ return (0);
}
static moduledata_t gre_mod = {
diff --git a/freebsd/sys/net/if_gre.h b/freebsd/sys/net/if_gre.h
index 74d16b1c..806b0cb8 100644
--- a/freebsd/sys/net/if_gre.h
+++ b/freebsd/sys/net/if_gre.h
@@ -1,8 +1,6 @@
-/* $NetBSD: if_gre.h,v 1.13 2003/11/10 08:51:52 wiz Exp $ */
-/* $FreeBSD$ */
-
/*-
* Copyright (c) 1998 The NetBSD Foundation, Inc.
+ * Copyright (c) 2014 Andrey V. Elsukov <ae@FreeBSD.org>
* All rights reserved
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -28,158 +26,111 @@
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $NetBSD: if_gre.h,v 1.13 2003/11/10 08:51:52 wiz Exp $
+ * $FreeBSD$
*/
-#ifndef _NET_IF_GRE_H
-#define _NET_IF_GRE_H
+#ifndef _NET_IF_GRE_H_
+#define _NET_IF_GRE_H_
-#include <sys/ioccom.h>
#ifdef _KERNEL
-#include <sys/queue.h>
-
-/*
- * Version of the WCCP, need to be configured manually since
- * header for version 2 is the same but IP payload is prepended
- * with additional 4-bytes field.
- */
-typedef enum {
- WCCP_V1 = 0,
- WCCP_V2
-} wccp_ver_t;
-
-struct gre_softc {
- struct ifnet *sc_ifp;
- LIST_ENTRY(gre_softc) sc_list;
- int gre_unit;
- int gre_flags;
- u_int gre_fibnum; /* use this fib for envelopes */
- struct in_addr g_src; /* source address of gre packets */
- struct in_addr g_dst; /* destination address of gre packets */
- struct route route; /* routing entry that determines, where a
- encapsulated packet should go */
- u_char g_proto; /* protocol of encapsulator */
-
- const struct encaptab *encap; /* encapsulation cookie */
-
- uint32_t key; /* key included in outgoing GRE packets */
- /* zero means none */
-
- wccp_ver_t wccp_ver; /* version of the WCCP */
-};
-#define GRE2IFP(sc) ((sc)->sc_ifp)
-
-
-struct gre_h {
- u_int16_t flags; /* GRE flags */
- u_int16_t ptype; /* protocol type of payload typically
- Ether protocol type*/
- uint32_t options[0]; /* optional options */
-/*
- * from here on: fields are optional, presence indicated by flags
- *
- u_int_16 checksum checksum (one-complements of GRE header
- and payload
- Present if (ck_pres | rt_pres == 1).
- Valid if (ck_pres == 1).
- u_int_16 offset offset from start of routing filed to
- first octet of active SRE (see below).
- Present if (ck_pres | rt_pres == 1).
- Valid if (rt_pres == 1).
- u_int_32 key inserted by encapsulator e.g. for
- authentication
- Present if (key_pres ==1 ).
- u_int_32 seq_num Sequence number to allow for packet order
- Present if (seq_pres ==1 ).
- struct gre_sre[] routing Routing fileds (see below)
- Present if (rt_pres == 1)
- */
+/* GRE header according to RFC 2784 and RFC 2890 */
+struct grehdr {
+ uint16_t gre_flags; /* GRE flags */
+#define GRE_FLAGS_CP 0x8000 /* checksum present */
+#define GRE_FLAGS_KP 0x2000 /* key present */
+#define GRE_FLAGS_SP 0x1000 /* sequence present */
+#define GRE_FLAGS_MASK (GRE_FLAGS_CP|GRE_FLAGS_KP|GRE_FLAGS_SP)
+ uint16_t gre_proto; /* protocol type */
+ uint32_t gre_opts[0]; /* optional fields */
} __packed;
+#ifdef INET
struct greip {
- struct ip gi_i;
- struct gre_h gi_g;
+ struct ip gi_ip;
+ struct grehdr gi_gre;
} __packed;
+#endif
-#define gi_pr gi_i.ip_p
-#define gi_len gi_i.ip_len
-#define gi_src gi_i.ip_src
-#define gi_dst gi_i.ip_dst
-#define gi_ptype gi_g.ptype
-#define gi_flags gi_g.flags
-#define gi_options gi_g.options
-
-#define GRE_CP 0x8000 /* Checksum Present */
-#define GRE_RP 0x4000 /* Routing Present */
-#define GRE_KP 0x2000 /* Key Present */
-#define GRE_SP 0x1000 /* Sequence Present */
-#define GRE_SS 0x0800 /* Strict Source Route */
+#ifdef INET6
+struct greip6 {
+ struct ip6_hdr gi6_ip6;
+ struct grehdr gi6_gre;
+} __packed;
+#endif
+struct gre_softc {
+ struct ifnet *gre_ifp;
+ LIST_ENTRY(gre_softc) gre_list;
+ struct rmlock gre_lock;
+ int gre_family; /* AF of delivery header */
+ uint32_t gre_iseq;
+ uint32_t gre_oseq;
+ uint32_t gre_key;
+ uint32_t gre_options;
+ uint32_t gre_mtu;
+ u_int gre_fibnum;
+ u_int gre_hlen; /* header size */
+ union {
+ void *hdr;
+#ifdef INET
+ struct greip *gihdr;
+#endif
+#ifdef INET6
+ struct greip6 *gi6hdr;
+#endif
+ } gre_uhdr;
+ const struct encaptab *gre_ecookie;
+};
+#define GRE2IFP(sc) ((sc)->gre_ifp)
+#define GRE_LOCK_INIT(sc) rm_init(&(sc)->gre_lock, "gre softc")
+#define GRE_LOCK_DESTROY(sc) rm_destroy(&(sc)->gre_lock)
+#define GRE_RLOCK_TRACKER struct rm_priotracker gre_tracker
+#define GRE_RLOCK(sc) rm_rlock(&(sc)->gre_lock, &gre_tracker)
+#define GRE_RUNLOCK(sc) rm_runlock(&(sc)->gre_lock, &gre_tracker)
+#define GRE_RLOCK_ASSERT(sc) rm_assert(&(sc)->gre_lock, RA_RLOCKED)
+#define GRE_WLOCK(sc) rm_wlock(&(sc)->gre_lock)
+#define GRE_WUNLOCK(sc) rm_wunlock(&(sc)->gre_lock)
+#define GRE_WLOCK_ASSERT(sc) rm_assert(&(sc)->gre_lock, RA_WLOCKED)
+
+#define gre_hdr gre_uhdr.hdr
+#define gre_gihdr gre_uhdr.gihdr
+#define gre_gi6hdr gre_uhdr.gi6hdr
+#define gre_oip gre_gihdr->gi_ip
+#define gre_oip6 gre_gi6hdr->gi6_ip6
+
+int gre_input(struct mbuf **, int *, int);
+#ifdef INET
+int in_gre_attach(struct gre_softc *);
+int in_gre_output(struct mbuf *, int, int);
+#endif
+#ifdef INET6
+int in6_gre_attach(struct gre_softc *);
+int in6_gre_output(struct mbuf *, int, int);
+#endif
/*
* CISCO uses special type for GRE tunnel created as part of WCCP
* connection, while in fact those packets are just IPv4 encapsulated
* into GRE.
*/
-#define WCCP_PROTOCOL_TYPE 0x883E
-
-/*
- * gre_sre defines a Source route Entry. These are needed if packets
- * should be routed over more than one tunnel hop by hop
- */
-struct gre_sre {
- u_int16_t sre_family; /* address family */
- u_char sre_offset; /* offset to first octet of active entry */
- u_char sre_length; /* number of octets in the SRE.
- sre_lengthl==0 -> last entry. */
- u_char *sre_rtinfo; /* the routing information */
-};
-
-struct greioctl {
- int unit;
- struct in_addr addr;
-};
-
-/* for mobile encaps */
-
-struct mobile_h {
- u_int16_t proto; /* protocol and S-bit */
- u_int16_t hcrc; /* header checksum */
- u_int32_t odst; /* original destination address */
- u_int32_t osrc; /* original source addr, if S-bit set */
-} __packed;
-
-struct mobip_h {
- struct ip mi;
- struct mobile_h mh;
-} __packed;
-
-
-#define MOB_H_SIZ_S (sizeof(struct mobile_h) - sizeof(u_int32_t))
-#define MOB_H_SIZ_L (sizeof(struct mobile_h))
-#define MOB_H_SBIT 0x0080
-
-#define GRE_TTL 30
-
+#define ETHERTYPE_WCCP 0x883E
#endif /* _KERNEL */
-/*
- * ioctls needed to manipulate the interface
- */
-
#define GRESADDRS _IOW('i', 101, struct ifreq)
#define GRESADDRD _IOW('i', 102, struct ifreq)
#define GREGADDRS _IOWR('i', 103, struct ifreq)
#define GREGADDRD _IOWR('i', 104, struct ifreq)
#define GRESPROTO _IOW('i' , 105, struct ifreq)
#define GREGPROTO _IOWR('i', 106, struct ifreq)
-#define GREGKEY _IOWR('i', 107, struct ifreq)
-#define GRESKEY _IOW('i', 108, struct ifreq)
-#ifdef _KERNEL
-LIST_HEAD(gre_softc_head, gre_softc);
-extern struct mtx gre_mtx;
-extern struct gre_softc_head gre_softc_list;
+#define GREGKEY _IOWR('i', 107, struct ifreq)
+#define GRESKEY _IOW('i', 108, struct ifreq)
+#define GREGOPTS _IOWR('i', 109, struct ifreq)
+#define GRESOPTS _IOW('i', 110, struct ifreq)
-u_int16_t gre_in_cksum(u_int16_t *, u_int);
-#endif /* _KERNEL */
+#define GRE_ENABLE_CSUM 0x0001
+#define GRE_ENABLE_SEQ 0x0002
+#define GRE_OPTMASK (GRE_ENABLE_CSUM|GRE_ENABLE_SEQ)
-#endif
+#endif /* _NET_IF_GRE_H_ */
diff --git a/freebsd/sys/net/if_iso88025subr.c b/freebsd/sys/net/if_iso88025subr.c
index 660dc7dd..d26d0ebd 100644
--- a/freebsd/sys/net/if_iso88025subr.c
+++ b/freebsd/sys/net/if_iso88025subr.c
@@ -44,7 +44,6 @@
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipx.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
@@ -56,6 +55,7 @@
#include <sys/sockio.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_arp.h>
#include <net/if_dl.h>
#include <net/if_llc.h>
@@ -77,11 +77,6 @@
#include <netinet6/nd6.h>
#endif
-#ifdef IPX
-#include <netipx/ipx.h>
-#include <netipx/ipx_if.h>
-#endif
-
#include <security/mac/mac_framework.h>
static const u_char iso88025_broadcastaddr[ISO88025_ADDR_LEN] =
@@ -172,30 +167,6 @@ iso88025_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
arp_ifinit(ifp, ifa);
break;
#endif /* INET */
-#ifdef IPX
- /*
- * XXX - This code is probably wrong
- */
- case AF_IPX: {
- struct ipx_addr *ina;
-
- ina = &(IA_SIPX(ifa)->sipx_addr);
-
- if (ipx_nullhost(*ina))
- ina->x_host = *(union ipx_host *)
- IF_LLADDR(ifp);
- else
- bcopy((caddr_t) ina->x_host.c_host,
- (caddr_t) IF_LLADDR(ifp),
- ISO88025_ADDR_LEN);
-
- /*
- * Set new address
- */
- ifp->if_init(ifp->if_softc);
- }
- break;
-#endif /* IPX */
default:
ifp->if_init(ifp->if_softc);
break;
@@ -233,11 +204,8 @@ iso88025_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
* ISO88025 encapsulation
*/
int
-iso88025_output(ifp, m, dst, ro)
- struct ifnet *ifp;
- struct mbuf *m;
- struct sockaddr *dst;
- struct route *ro;
+iso88025_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
{
u_int16_t snap_type = 0;
int loop_copy = 0, error = 0, rif_len = 0;
@@ -246,13 +214,10 @@ iso88025_output(ifp, m, dst, ro)
struct iso88025_header gen_th;
struct sockaddr_dl *sdl = NULL;
struct rtentry *rt0 = NULL;
-#if defined(INET) || defined(INET6)
- struct llentry *lle;
-#endif
+ int is_gw = 0;
if (ro != NULL)
- rt0 = ro->ro_rt;
-
+ is_gw = (ro->ro_flags & RT_HAS_GW) != 0;
#ifdef MAC
error = mac_ifnet_check_transmit(ifp, m);
if (error)
@@ -291,7 +256,7 @@ iso88025_output(ifp, m, dst, ro)
switch (dst->sa_family) {
#ifdef INET
case AF_INET:
- error = arpresolve(ifp, rt0, m, dst, edst, &lle);
+ error = arpresolve(ifp, is_gw, m, dst, edst, NULL, NULL);
if (error)
return (error == EWOULDBLOCK ? 0 : error);
snap_type = ETHERTYPE_IP;
@@ -326,34 +291,15 @@ iso88025_output(ifp, m, dst, ro)
#endif /* INET */
#ifdef INET6
case AF_INET6:
- error = nd6_storelladdr(ifp, m, dst, (u_char *)edst, &lle);
+ error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL, NULL);
if (error)
- return (error);
+ return (error == EWOULDBLOCK ? 0 : error);
snap_type = ETHERTYPE_IPV6;
break;
#endif /* INET6 */
-#ifdef IPX
- case AF_IPX:
- {
- u_int8_t *cp;
-
- bcopy((caddr_t)&(satoipx_addr(dst).x_host), (caddr_t)edst,
- ISO88025_ADDR_LEN);
-
- M_PREPEND(m, 3, M_WAIT);
- m = m_pullup(m, 3);
- if (m == 0)
- senderr(ENOBUFS);
- cp = mtod(m, u_int8_t *);
- *cp++ = ETHERTYPE_IPX_8022;
- *cp++ = ETHERTYPE_IPX_8022;
- *cp++ = LLC_UI;
- }
- break;
-#endif /* IPX */
case AF_UNSPEC:
{
- struct iso88025_sockaddr_data *sd;
+ const struct iso88025_sockaddr_data *sd;
/*
* For AF_UNSPEC sockaddr.sa_data must contain all of the
* mac information needed to send the packet. This allows
@@ -363,13 +309,12 @@ iso88025_output(ifp, m, dst, ro)
* should be an iso88025_sockaddr_data structure see iso88025.h
*/
loop_copy = -1;
- sd = (struct iso88025_sockaddr_data *)dst->sa_data;
+ sd = (const struct iso88025_sockaddr_data *)dst->sa_data;
gen_th.ac = sd->ac;
gen_th.fc = sd->fc;
- (void)memcpy((caddr_t)edst, (caddr_t)sd->ether_dhost,
- ISO88025_ADDR_LEN);
- (void)memcpy((caddr_t)gen_th.iso88025_shost,
- (caddr_t)sd->ether_shost, ISO88025_ADDR_LEN);
+ (void)memcpy(edst, sd->ether_dhost, ISO88025_ADDR_LEN);
+ (void)memcpy(gen_th.iso88025_shost, sd->ether_shost,
+ ISO88025_ADDR_LEN);
rif_len = 0;
break;
}
@@ -384,8 +329,8 @@ iso88025_output(ifp, m, dst, ro)
*/
if (snap_type != 0) {
struct llc *l;
- M_PREPEND(m, LLC_SNAPFRAMELEN, M_DONTWAIT);
- if (m == 0)
+ M_PREPEND(m, LLC_SNAPFRAMELEN, M_NOWAIT);
+ if (m == NULL)
senderr(ENOBUFS);
l = mtod(m, struct llc *);
l->llc_control = LLC_UI;
@@ -400,8 +345,8 @@ iso88025_output(ifp, m, dst, ro)
* Add local net header. If no space in first mbuf,
* allocate another.
*/
- M_PREPEND(m, ISO88025_HDR_LEN + rif_len, M_DONTWAIT);
- if (m == 0)
+ M_PREPEND(m, ISO88025_HDR_LEN + rif_len, M_NOWAIT);
+ if (m == NULL)
senderr(ENOBUFS);
th = mtod(m, struct iso88025_header *);
bcopy((caddr_t)edst, (caddr_t)&gen_th.iso88025_dhost, ISO88025_ADDR_LEN);
@@ -435,12 +380,12 @@ iso88025_output(ifp, m, dst, ro)
IFQ_HANDOFF_ADJ(ifp, m, ISO88025_HDR_LEN + LLC_SNAPFRAMELEN, error);
if (error) {
printf("iso88025_output: packet dropped QFULL.\n");
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
}
return (error);
bad:
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
if (m)
m_freem(m);
return (error);
@@ -465,24 +410,23 @@ iso88025_input(ifp, m)
*/
if ((m->m_flags & M_PKTHDR) == 0) {
if_printf(ifp, "discard frame w/o packet header\n");
- ifp->if_ierrors++;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
m_freem(m);
return;
}
if (m->m_pkthdr.rcvif == NULL) {
if_printf(ifp, "discard frame w/o interface pointer\n");
- ifp->if_ierrors++;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
m_freem(m);
return;
}
m = m_pullup(m, ISO88025_HDR_LEN);
if (m == NULL) {
- ifp->if_ierrors++;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
goto dropanyway;
}
th = mtod(m, struct iso88025_header *);
- m->m_pkthdr.header = (void *)th;
/*
* Discard packet if interface is not up.
@@ -511,7 +455,7 @@ iso88025_input(ifp, m)
/*
* Update interface statistics.
*/
- ifp->if_ibytes += m->m_pkthdr.len;
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
getmicrotime(&ifp->if_lastchange);
/*
@@ -533,7 +477,7 @@ iso88025_input(ifp, m)
m->m_flags |= M_BCAST;
else
m->m_flags |= M_MCAST;
- ifp->if_imcasts++;
+ if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
}
mac_hdr_len = ISO88025_HDR_LEN;
@@ -546,37 +490,24 @@ iso88025_input(ifp, m)
m = m_pullup(m, LLC_SNAPFRAMELEN);
if (m == 0) {
- ifp->if_ierrors++;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
goto dropanyway;
}
l = mtod(m, struct llc *);
switch (l->llc_dsap) {
-#ifdef IPX
- case ETHERTYPE_IPX_8022: /* Thanks a bunch Novell */
- if ((l->llc_control != LLC_UI) ||
- (l->llc_ssap != ETHERTYPE_IPX_8022)) {
- ifp->if_noproto++;
- goto dropanyway;
- }
-
- th->iso88025_shost[0] &= ~(TR_RII);
- m_adj(m, 3);
- isr = NETISR_IPX;
- break;
-#endif /* IPX */
case LLC_SNAP_LSAP: {
u_int16_t type;
if ((l->llc_control != LLC_UI) ||
(l->llc_ssap != LLC_SNAP_LSAP)) {
- ifp->if_noproto++;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
goto dropanyway;
}
if (l->llc_snap.org_code[0] != 0 ||
l->llc_snap.org_code[1] != 0 ||
l->llc_snap.org_code[2] != 0) {
- ifp->if_noproto++;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
goto dropanyway;
}
@@ -586,8 +517,6 @@ iso88025_input(ifp, m)
#ifdef INET
case ETHERTYPE_IP:
th->iso88025_shost[0] &= ~(TR_RII);
- if ((m = ip_fastforward(m)) == NULL)
- return;
isr = NETISR_IP;
break;
@@ -597,12 +526,6 @@ iso88025_input(ifp, m)
isr = NETISR_ARP;
break;
#endif /* INET */
-#ifdef IPX_SNAP /* XXX: Not supported! */
- case ETHERTYPE_IPX:
- th->iso88025_shost[0] &= ~(TR_RII);
- isr = NETISR_IPX;
- break;
-#endif /* IPX_SNAP */
#ifdef INET6
case ETHERTYPE_IPV6:
th->iso88025_shost[0] &= ~(TR_RII);
@@ -611,7 +534,7 @@ iso88025_input(ifp, m)
#endif /* INET6 */
default:
printf("iso88025_input: unexpected llc_snap ether_type 0x%02x\n", type);
- ifp->if_noproto++;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
goto dropanyway;
}
break;
@@ -620,7 +543,7 @@ iso88025_input(ifp, m)
case LLC_ISO_LSAP:
switch (l->llc_control) {
case LLC_UI:
- ifp->if_noproto++;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
goto dropanyway;
break;
case LLC_XID:
@@ -636,7 +559,6 @@ iso88025_input(ifp, m)
case LLC_TEST_P:
{
struct sockaddr sa;
- struct arpcom *ac;
struct iso88025_sockaddr_data *th2;
int i;
u_char c;
@@ -669,7 +591,7 @@ iso88025_input(ifp, m)
}
default:
printf("iso88025_input: unexpected llc control 0x%02x\n", l->llc_control);
- ifp->if_noproto++;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
goto dropanyway;
break;
}
@@ -677,7 +599,7 @@ iso88025_input(ifp, m)
#endif /* ISO */
default:
printf("iso88025_input: unknown dsap 0x%x\n", l->llc_dsap);
- ifp->if_noproto++;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
goto dropanyway;
break;
}
@@ -687,7 +609,7 @@ iso88025_input(ifp, m)
return;
dropanyway:
- ifp->if_iqdrops++;
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
if (m)
m_freem(m);
return;
@@ -718,7 +640,7 @@ iso88025_resolvemulti (ifp, llsa, sa)
if ((e_addr[0] & 1) != 1) {
return (EADDRNOTAVAIL);
}
- *llsa = 0;
+ *llsa = NULL;
return (0);
#ifdef INET
@@ -727,14 +649,7 @@ iso88025_resolvemulti (ifp, llsa, sa)
if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
return (EADDRNOTAVAIL);
}
- sdl = malloc(sizeof *sdl, M_IFMADDR,
- M_NOWAIT|M_ZERO);
- if (sdl == NULL)
- return (ENOMEM);
- sdl->sdl_len = sizeof *sdl;
- sdl->sdl_family = AF_LINK;
- sdl->sdl_index = ifp->if_index;
- sdl->sdl_type = IFT_ISO88025;
+ sdl = link_init_sdl(ifp, *llsa, IFT_ISO88025);
sdl->sdl_alen = ISO88025_ADDR_LEN;
e_addr = LLADDR(sdl);
ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr);
@@ -751,20 +666,13 @@ iso88025_resolvemulti (ifp, llsa, sa)
* (This is used for multicast routers.)
*/
ifp->if_flags |= IFF_ALLMULTI;
- *llsa = 0;
+ *llsa = NULL;
return (0);
}
if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
return (EADDRNOTAVAIL);
}
- sdl = malloc(sizeof *sdl, M_IFMADDR,
- M_NOWAIT|M_ZERO);
- if (sdl == NULL)
- return (ENOMEM);
- sdl->sdl_len = sizeof *sdl;
- sdl->sdl_family = AF_LINK;
- sdl->sdl_index = ifp->if_index;
- sdl->sdl_type = IFT_ISO88025;
+ sdl = link_init_sdl(ifp, *llsa, IFT_ISO88025);
sdl->sdl_alen = ISO88025_ADDR_LEN;
e_addr = LLADDR(sdl);
ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr);
@@ -783,49 +691,8 @@ iso88025_resolvemulti (ifp, llsa, sa)
return (0);
}
-static MALLOC_DEFINE(M_ISO88025, "arpcom", "802.5 interface internals");
-
-static void*
-iso88025_alloc(u_char type, struct ifnet *ifp)
-{
- struct arpcom *ac;
-
- ac = malloc(sizeof(struct arpcom), M_ISO88025, M_WAITOK | M_ZERO);
- ac->ac_ifp = ifp;
-
- return (ac);
-}
-
-static void
-iso88025_free(void *com, u_char type)
-{
-
- free(com, M_ISO88025);
-}
-
-static int
-iso88025_modevent(module_t mod, int type, void *data)
-{
-
- switch (type) {
- case MOD_LOAD:
- if_register_com_alloc(IFT_ISO88025, iso88025_alloc,
- iso88025_free);
- break;
- case MOD_UNLOAD:
- if_deregister_com_alloc(IFT_ISO88025);
- break;
- default:
- return EOPNOTSUPP;
- }
-
- return (0);
-}
-
static moduledata_t iso88025_mod = {
- "iso88025",
- iso88025_modevent,
- 0
+ .name = "iso88025",
};
DECLARE_MODULE(iso88025, iso88025_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
diff --git a/freebsd/sys/net/if_lagg.c b/freebsd/sys/net/if_lagg.c
index 46f3f46c..9cfb7b8b 100644
--- a/freebsd/sys/net/if_lagg.c
+++ b/freebsd/sys/net/if_lagg.c
@@ -5,6 +5,7 @@
/*
* Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org>
* Copyright (c) 2007 Andrew Thompson <thompsa@FreeBSD.org>
+ * Copyright (c) 2014, 2016 Marcelo Araujo <araujo@FreeBSD.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -37,9 +38,8 @@ __FBSDID("$FreeBSD$");
#include <sys/priv.h>
#include <sys/systm.h>
#include <sys/proc.h>
-#include <sys/hash.h>
#include <rtems/bsd/sys/lock.h>
-#include <sys/rwlock.h>
+#include <sys/rmlock.h>
#include <sys/taskqueue.h>
#include <sys/eventhandler.h>
@@ -48,11 +48,11 @@ __FBSDID("$FreeBSD$");
#include <net/if_clone.h>
#include <net/if_arp.h>
#include <net/if_dl.h>
-#include <net/if_llc.h>
#include <net/if_media.h>
#include <net/if_types.h>
#include <net/if_var.h>
#include <net/bpf.h>
+#include <net/vnet.h>
#if defined(INET) || defined(INET6)
#include <netinet/in.h>
@@ -83,15 +83,26 @@ static struct {
{0, NULL}
};
-SLIST_HEAD(__trhead, lagg_softc) lagg_list; /* list of laggs */
-static struct mtx lagg_list_mtx;
+VNET_DEFINE(SLIST_HEAD(__trhead, lagg_softc), lagg_list); /* list of laggs */
+#define V_lagg_list VNET(lagg_list)
+static VNET_DEFINE(struct mtx, lagg_list_mtx);
+#define V_lagg_list_mtx VNET(lagg_list_mtx)
+#define LAGG_LIST_LOCK_INIT(x) mtx_init(&V_lagg_list_mtx, \
+ "if_lagg list", NULL, MTX_DEF)
+#define LAGG_LIST_LOCK_DESTROY(x) mtx_destroy(&V_lagg_list_mtx)
+#define LAGG_LIST_LOCK(x) mtx_lock(&V_lagg_list_mtx)
+#define LAGG_LIST_UNLOCK(x) mtx_unlock(&V_lagg_list_mtx)
eventhandler_tag lagg_detach_cookie = NULL;
static int lagg_clone_create(struct if_clone *, int, caddr_t);
static void lagg_clone_destroy(struct ifnet *);
+static VNET_DEFINE(struct if_clone *, lagg_cloner);
+#define V_lagg_cloner VNET(lagg_cloner)
+static const char laggname[] = "lagg";
+
static void lagg_lladdr(struct lagg_softc *, uint8_t *);
static void lagg_capabilities(struct lagg_softc *);
-static void lagg_port_lladdr(struct lagg_port *, uint8_t *);
+static void lagg_port_lladdr(struct lagg_port *, uint8_t *, lagg_llqtype);
static void lagg_port_setlladdr(void *, int);
static int lagg_port_create(struct lagg_softc *, struct ifnet *);
static int lagg_port_destroy(struct lagg_port *, int);
@@ -100,7 +111,7 @@ static void lagg_linkstate(struct lagg_softc *);
static void lagg_port_state(struct ifnet *, int);
static int lagg_port_ioctl(struct ifnet *, u_long, caddr_t);
static int lagg_port_output(struct ifnet *, struct mbuf *,
- struct sockaddr *, struct route *);
+ const struct sockaddr *, struct route *);
static void lagg_port_ifdetach(void *arg __unused, struct ifnet *);
#ifdef LAGG_PORT_STACKING
static int lagg_port_checkstacking(struct lagg_softc *);
@@ -114,33 +125,28 @@ static int lagg_ether_cmdmulti(struct lagg_port *, int);
static int lagg_setflag(struct lagg_port *, int, int,
int (*func)(struct ifnet *, int));
static int lagg_setflags(struct lagg_port *, int status);
+static uint64_t lagg_get_counter(struct ifnet *ifp, ift_counter cnt);
static int lagg_transmit(struct ifnet *, struct mbuf *);
static void lagg_qflush(struct ifnet *);
static int lagg_media_change(struct ifnet *);
static void lagg_media_status(struct ifnet *, struct ifmediareq *);
static struct lagg_port *lagg_link_active(struct lagg_softc *,
struct lagg_port *);
-static const void *lagg_gethdr(struct mbuf *, u_int, u_int, void *);
-
-IFC_SIMPLE_DECLARE(lagg, 0);
/* Simple round robin */
-static int lagg_rr_attach(struct lagg_softc *);
-static int lagg_rr_detach(struct lagg_softc *);
+static void lagg_rr_attach(struct lagg_softc *);
static int lagg_rr_start(struct lagg_softc *, struct mbuf *);
static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *,
struct mbuf *);
/* Active failover */
-static int lagg_fail_attach(struct lagg_softc *);
-static int lagg_fail_detach(struct lagg_softc *);
static int lagg_fail_start(struct lagg_softc *, struct mbuf *);
static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *,
struct mbuf *);
/* Loadbalancing */
-static int lagg_lb_attach(struct lagg_softc *);
-static int lagg_lb_detach(struct lagg_softc *);
+static void lagg_lb_attach(struct lagg_softc *);
+static void lagg_lb_detach(struct lagg_softc *);
static int lagg_lb_port_create(struct lagg_port *);
static void lagg_lb_port_destroy(struct lagg_port *);
static int lagg_lb_start(struct lagg_softc *, struct mbuf *);
@@ -148,50 +154,134 @@ static struct mbuf *lagg_lb_input(struct lagg_softc *, struct lagg_port *,
struct mbuf *);
static int lagg_lb_porttable(struct lagg_softc *, struct lagg_port *);
+/* Broadcast */
+static int lagg_bcast_start(struct lagg_softc *, struct mbuf *);
+static struct mbuf *lagg_bcast_input(struct lagg_softc *, struct lagg_port *,
+ struct mbuf *);
+
/* 802.3ad LACP */
-static int lagg_lacp_attach(struct lagg_softc *);
-static int lagg_lacp_detach(struct lagg_softc *);
+static void lagg_lacp_attach(struct lagg_softc *);
+static void lagg_lacp_detach(struct lagg_softc *);
static int lagg_lacp_start(struct lagg_softc *, struct mbuf *);
static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *,
struct mbuf *);
static void lagg_lacp_lladdr(struct lagg_softc *);
/* lagg protocol table */
-static const struct {
- int ti_proto;
- int (*ti_attach)(struct lagg_softc *);
+static const struct lagg_proto {
+ lagg_proto pr_num;
+ void (*pr_attach)(struct lagg_softc *);
+ void (*pr_detach)(struct lagg_softc *);
+ int (*pr_start)(struct lagg_softc *, struct mbuf *);
+ struct mbuf * (*pr_input)(struct lagg_softc *, struct lagg_port *,
+ struct mbuf *);
+ int (*pr_addport)(struct lagg_port *);
+ void (*pr_delport)(struct lagg_port *);
+ void (*pr_linkstate)(struct lagg_port *);
+ void (*pr_init)(struct lagg_softc *);
+ void (*pr_stop)(struct lagg_softc *);
+ void (*pr_lladdr)(struct lagg_softc *);
+ void (*pr_request)(struct lagg_softc *, void *);
+ void (*pr_portreq)(struct lagg_port *, void *);
} lagg_protos[] = {
- { LAGG_PROTO_ROUNDROBIN, lagg_rr_attach },
- { LAGG_PROTO_FAILOVER, lagg_fail_attach },
- { LAGG_PROTO_LOADBALANCE, lagg_lb_attach },
- { LAGG_PROTO_ETHERCHANNEL, lagg_lb_attach },
- { LAGG_PROTO_LACP, lagg_lacp_attach },
- { LAGG_PROTO_NONE, NULL }
+ {
+ .pr_num = LAGG_PROTO_NONE
+ },
+ {
+ .pr_num = LAGG_PROTO_ROUNDROBIN,
+ .pr_attach = lagg_rr_attach,
+ .pr_start = lagg_rr_start,
+ .pr_input = lagg_rr_input,
+ },
+ {
+ .pr_num = LAGG_PROTO_FAILOVER,
+ .pr_start = lagg_fail_start,
+ .pr_input = lagg_fail_input,
+ },
+ {
+ .pr_num = LAGG_PROTO_LOADBALANCE,
+ .pr_attach = lagg_lb_attach,
+ .pr_detach = lagg_lb_detach,
+ .pr_start = lagg_lb_start,
+ .pr_input = lagg_lb_input,
+ .pr_addport = lagg_lb_port_create,
+ .pr_delport = lagg_lb_port_destroy,
+ },
+ {
+ .pr_num = LAGG_PROTO_LACP,
+ .pr_attach = lagg_lacp_attach,
+ .pr_detach = lagg_lacp_detach,
+ .pr_start = lagg_lacp_start,
+ .pr_input = lagg_lacp_input,
+ .pr_addport = lacp_port_create,
+ .pr_delport = lacp_port_destroy,
+ .pr_linkstate = lacp_linkstate,
+ .pr_init = lacp_init,
+ .pr_stop = lacp_stop,
+ .pr_lladdr = lagg_lacp_lladdr,
+ .pr_request = lacp_req,
+ .pr_portreq = lacp_portreq,
+ },
+ {
+ .pr_num = LAGG_PROTO_BROADCAST,
+ .pr_start = lagg_bcast_start,
+ .pr_input = lagg_bcast_input,
+ },
};
SYSCTL_DECL(_net_link);
-static SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0,
+SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0,
"Link Aggregation");
-static int lagg_failover_rx_all = 0; /* Allow input on any failover links */
-SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW,
- &lagg_failover_rx_all, 0,
+/* Allow input on any failover links */
+static VNET_DEFINE(int, lagg_failover_rx_all);
+#define V_lagg_failover_rx_all VNET(lagg_failover_rx_all)
+SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW | CTLFLAG_VNET,
+ &VNET_NAME(lagg_failover_rx_all), 0,
"Accept input from any interface in a failover lagg");
-static int def_use_flowid = 1; /* Default value for using M_FLOWID */
-TUNABLE_INT("net.link.lagg.default_use_flowid", &def_use_flowid);
-SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RW,
- &def_use_flowid, 0,
+
+/* Default value for using flowid */
+static VNET_DEFINE(int, def_use_flowid) = 1;
+#define V_def_use_flowid VNET(def_use_flowid)
+SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RWTUN,
+ &VNET_NAME(def_use_flowid), 0,
"Default setting for using flow id for load sharing");
+/* Default value for flowid shift */
+static VNET_DEFINE(int, def_flowid_shift) = 16;
+#define V_def_flowid_shift VNET(def_flowid_shift)
+SYSCTL_INT(_net_link_lagg, OID_AUTO, default_flowid_shift, CTLFLAG_RWTUN,
+ &VNET_NAME(def_flowid_shift), 0,
+ "Default setting for flowid shift for load sharing");
+
+static void
+vnet_lagg_init(const void *unused __unused)
+{
+
+ LAGG_LIST_LOCK_INIT();
+ SLIST_INIT(&V_lagg_list);
+ V_lagg_cloner = if_clone_simple(laggname, lagg_clone_create,
+ lagg_clone_destroy, 0);
+}
+VNET_SYSINIT(vnet_lagg_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_lagg_init, NULL);
+
+static void
+vnet_lagg_uninit(const void *unused __unused)
+{
+
+ if_clone_detach(V_lagg_cloner);
+ LAGG_LIST_LOCK_DESTROY();
+}
+VNET_SYSUNINIT(vnet_lagg_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
+ vnet_lagg_uninit, NULL);
+
static int
lagg_modevent(module_t mod, int type, void *data)
{
switch (type) {
case MOD_LOAD:
- mtx_init(&lagg_list_mtx, "if_lagg list", NULL, MTX_DEF);
- SLIST_INIT(&lagg_list);
- if_clone_attach(&lagg_cloner);
lagg_input_p = lagg_input;
lagg_linkstate_p = lagg_port_state;
lagg_detach_cookie = EVENTHANDLER_REGISTER(
@@ -201,10 +291,8 @@ lagg_modevent(module_t mod, int type, void *data)
case MOD_UNLOAD:
EVENTHANDLER_DEREGISTER(ifnet_departure_event,
lagg_detach_cookie);
- if_clone_detach(&lagg_cloner);
lagg_input_p = NULL;
lagg_linkstate_p = NULL;
- mtx_destroy(&lagg_list_mtx);
break;
default:
return (EOPNOTSUPP);
@@ -221,7 +309,117 @@ static moduledata_t lagg_mod = {
DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
MODULE_VERSION(if_lagg, 1);
-#if __FreeBSD_version >= 800000
+static void
+lagg_proto_attach(struct lagg_softc *sc, lagg_proto pr)
+{
+
+ KASSERT(sc->sc_proto == LAGG_PROTO_NONE, ("%s: sc %p has proto",
+ __func__, sc));
+
+ if (sc->sc_ifflags & IFF_DEBUG)
+ if_printf(sc->sc_ifp, "using proto %u\n", pr);
+
+ if (lagg_protos[pr].pr_attach != NULL)
+ lagg_protos[pr].pr_attach(sc);
+ sc->sc_proto = pr;
+}
+
+static void
+lagg_proto_detach(struct lagg_softc *sc)
+{
+ lagg_proto pr;
+
+ LAGG_WLOCK_ASSERT(sc);
+
+ pr = sc->sc_proto;
+ sc->sc_proto = LAGG_PROTO_NONE;
+
+ if (lagg_protos[pr].pr_detach != NULL)
+ lagg_protos[pr].pr_detach(sc);
+ else
+ LAGG_WUNLOCK(sc);
+}
+
+static int
+lagg_proto_start(struct lagg_softc *sc, struct mbuf *m)
+{
+
+ return (lagg_protos[sc->sc_proto].pr_start(sc, m));
+}
+
+static struct mbuf *
+lagg_proto_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
+{
+
+ return (lagg_protos[sc->sc_proto].pr_input(sc, lp, m));
+}
+
+static int
+lagg_proto_addport(struct lagg_softc *sc, struct lagg_port *lp)
+{
+
+ if (lagg_protos[sc->sc_proto].pr_addport == NULL)
+ return (0);
+ else
+ return (lagg_protos[sc->sc_proto].pr_addport(lp));
+}
+
+static void
+lagg_proto_delport(struct lagg_softc *sc, struct lagg_port *lp)
+{
+
+ if (lagg_protos[sc->sc_proto].pr_delport != NULL)
+ lagg_protos[sc->sc_proto].pr_delport(lp);
+}
+
+static void
+lagg_proto_linkstate(struct lagg_softc *sc, struct lagg_port *lp)
+{
+
+ if (lagg_protos[sc->sc_proto].pr_linkstate != NULL)
+ lagg_protos[sc->sc_proto].pr_linkstate(lp);
+}
+
+static void
+lagg_proto_init(struct lagg_softc *sc)
+{
+
+ if (lagg_protos[sc->sc_proto].pr_init != NULL)
+ lagg_protos[sc->sc_proto].pr_init(sc);
+}
+
+static void
+lagg_proto_stop(struct lagg_softc *sc)
+{
+
+ if (lagg_protos[sc->sc_proto].pr_stop != NULL)
+ lagg_protos[sc->sc_proto].pr_stop(sc);
+}
+
+static void
+lagg_proto_lladdr(struct lagg_softc *sc)
+{
+
+ if (lagg_protos[sc->sc_proto].pr_lladdr != NULL)
+ lagg_protos[sc->sc_proto].pr_lladdr(sc);
+}
+
+static void
+lagg_proto_request(struct lagg_softc *sc, void *v)
+{
+
+ if (lagg_protos[sc->sc_proto].pr_request != NULL)
+ lagg_protos[sc->sc_proto].pr_request(sc, v);
+}
+
+static void
+lagg_proto_portreq(struct lagg_softc *sc, struct lagg_port *lp, void *v)
+{
+
+ if (lagg_protos[sc->sc_proto].pr_portreq != NULL)
+ lagg_protos[sc->sc_proto].pr_portreq(lp, v);
+}
+
/*
* This routine is run via an vlan
* config EVENT
@@ -229,18 +427,19 @@ MODULE_VERSION(if_lagg, 1);
static void
lagg_register_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
{
- struct lagg_softc *sc = ifp->if_softc;
- struct lagg_port *lp;
+ struct lagg_softc *sc = ifp->if_softc;
+ struct lagg_port *lp;
+ struct rm_priotracker tracker;
- if (ifp->if_softc != arg) /* Not our event */
- return;
+ if (ifp->if_softc != arg) /* Not our event */
+ return;
- LAGG_RLOCK(sc);
- if (!SLIST_EMPTY(&sc->sc_ports)) {
- SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
- EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag);
- }
- LAGG_RUNLOCK(sc);
+ LAGG_RLOCK(sc, &tracker);
+ if (!SLIST_EMPTY(&sc->sc_ports)) {
+ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
+ EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag);
+ }
+ LAGG_RUNLOCK(sc, &tracker);
}
/*
@@ -250,30 +449,27 @@ lagg_register_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
static void
lagg_unregister_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
{
- struct lagg_softc *sc = ifp->if_softc;
- struct lagg_port *lp;
+ struct lagg_softc *sc = ifp->if_softc;
+ struct lagg_port *lp;
+ struct rm_priotracker tracker;
- if (ifp->if_softc != arg) /* Not our event */
- return;
+ if (ifp->if_softc != arg) /* Not our event */
+ return;
- LAGG_RLOCK(sc);
- if (!SLIST_EMPTY(&sc->sc_ports)) {
- SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
- EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag);
- }
- LAGG_RUNLOCK(sc);
+ LAGG_RLOCK(sc, &tracker);
+ if (!SLIST_EMPTY(&sc->sc_ports)) {
+ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
+ EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag);
+ }
+ LAGG_RUNLOCK(sc, &tracker);
}
-#endif
static int
lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
{
struct lagg_softc *sc;
struct ifnet *ifp;
- int i, error = 0;
static const u_char eaddr[6]; /* 00:00:00:00:00:00 */
- struct sysctl_oid *oid;
- char num[14]; /* sufficient for 32 bits */
sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
@@ -282,32 +478,15 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
return (ENOSPC);
}
- sysctl_ctx_init(&sc->ctx);
- snprintf(num, sizeof(num), "%u", unit);
- sc->use_flowid = def_use_flowid;
- oid = SYSCTL_ADD_NODE(&sc->ctx, &SYSCTL_NODE_CHILDREN(_net_link, lagg),
- OID_AUTO, num, CTLFLAG_RD, NULL, "");
- SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
- "use_flowid", CTLFLAG_RW, &sc->use_flowid, sc->use_flowid,
- "Use flow id for load sharing");
- SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
- "count", CTLFLAG_RD, &sc->sc_count, sc->sc_count,
- "Total number of ports");
+ if (V_def_use_flowid)
+ sc->sc_opts |= LAGG_OPT_USE_FLOWID;
+ sc->flowid_shift = V_def_flowid_shift;
+
/* Hash all layers by default */
- sc->sc_flags = LAGG_F_HASHL2|LAGG_F_HASHL3|LAGG_F_HASHL4;
+ sc->sc_flags = MBUF_HASHFLAG_L2|MBUF_HASHFLAG_L3|MBUF_HASHFLAG_L4;
+
+ lagg_proto_attach(sc, LAGG_PROTO_DEFAULT);
- sc->sc_proto = LAGG_PROTO_NONE;
- for (i = 0; lagg_protos[i].ti_proto != LAGG_PROTO_NONE; i++) {
- if (lagg_protos[i].ti_proto == LAGG_PROTO_DEFAULT) {
- sc->sc_proto = lagg_protos[i].ti_proto;
- if ((error = lagg_protos[i].ti_attach(sc)) != 0) {
- if_free_type(ifp, IFT_ETHER);
- free(sc, M_DEVBUF);
- return (error);
- }
- break;
- }
- }
LAGG_LOCK_INIT(sc);
SLIST_INIT(&sc->sc_ports);
TASK_INIT(&sc->sc_lladdr_task, 0, lagg_port_setlladdr, sc);
@@ -318,32 +497,31 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
- if_initname(ifp, ifc->ifc_name, unit);
- ifp->if_type = IFT_ETHER;
+ if_initname(ifp, laggname, unit);
ifp->if_softc = sc;
ifp->if_transmit = lagg_transmit;
ifp->if_qflush = lagg_qflush;
ifp->if_init = lagg_init;
ifp->if_ioctl = lagg_ioctl;
+ ifp->if_get_counter = lagg_get_counter;
ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
+ ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS;
/*
- * Attach as an ordinary ethernet device, childs will be attached
+ * Attach as an ordinary ethernet device, children will be attached
* as special device IFT_IEEE8023ADLAG.
*/
ether_ifattach(ifp, eaddr);
-#if __FreeBSD_version >= 800000
sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
lagg_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
-#endif
/* Insert into the global list of laggs */
- mtx_lock(&lagg_list_mtx);
- SLIST_INSERT_HEAD(&lagg_list, sc, sc_entries);
- mtx_unlock(&lagg_list_mtx);
+ LAGG_LIST_LOCK();
+ SLIST_INSERT_HEAD(&V_lagg_list, sc, sc_entries);
+ LAGG_LIST_UNLOCK();
return (0);
}
@@ -359,47 +537,64 @@ lagg_clone_destroy(struct ifnet *ifp)
lagg_stop(sc);
ifp->if_flags &= ~IFF_UP;
-#if __FreeBSD_version >= 800000
EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach);
EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach);
-#endif
/* Shutdown and remove lagg ports */
while ((lp = SLIST_FIRST(&sc->sc_ports)) != NULL)
lagg_port_destroy(lp, 1);
/* Unhook the aggregation protocol */
- if (sc->sc_detach != NULL)
- (*sc->sc_detach)(sc);
+ lagg_proto_detach(sc);
+ LAGG_UNLOCK_ASSERT(sc);
- LAGG_WUNLOCK(sc);
-
- sysctl_ctx_free(&sc->ctx);
ifmedia_removeall(&sc->sc_media);
ether_ifdetach(ifp);
- if_free_type(ifp, IFT_ETHER);
+ if_free(ifp);
- mtx_lock(&lagg_list_mtx);
- SLIST_REMOVE(&lagg_list, sc, lagg_softc, sc_entries);
- mtx_unlock(&lagg_list_mtx);
+ LAGG_LIST_LOCK();
+ SLIST_REMOVE(&V_lagg_list, sc, lagg_softc, sc_entries);
+ LAGG_LIST_UNLOCK();
taskqueue_drain(taskqueue_swi, &sc->sc_lladdr_task);
LAGG_LOCK_DESTROY(sc);
free(sc, M_DEVBUF);
}
-static void
+/*
+ * Set link-layer address on the lagg interface itself.
+ *
+ * Set noinline to be dtrace-friendly
+ */
+static __noinline void
lagg_lladdr(struct lagg_softc *sc, uint8_t *lladdr)
{
struct ifnet *ifp = sc->sc_ifp;
+ struct lagg_port lp;
if (memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
return;
+ LAGG_WLOCK_ASSERT(sc);
+ /*
+ * Set the link layer address on the lagg interface.
+ * lagg_proto_lladdr() notifies the MAC change to
+ * the aggregation protocol. iflladdr_event handler which
+ * may trigger gratuitous ARPs for INET will be handled in
+ * a taskqueue.
+ */
bcopy(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN);
- /* Let the protocol know the MAC has changed */
- if (sc->sc_lladdr != NULL)
- (*sc->sc_lladdr)(sc);
- EVENTHANDLER_INVOKE(iflladdr_event, ifp);
+ lagg_proto_lladdr(sc);
+
+ /*
+ * Send notification request for lagg interface
+ * itself. Note that new lladdr is already set.
+ */
+ bzero(&lp, sizeof(lp));
+ lp.lp_ifp = sc->sc_ifp;
+ lp.lp_softc = sc;
+
+ /* Do not request lladdr change */
+ lagg_port_lladdr(&lp, lladdr, LAGG_LLQTYPE_VIRT);
}
static void
@@ -440,54 +635,63 @@ lagg_capabilities(struct lagg_softc *sc)
}
}
-static void
-lagg_port_lladdr(struct lagg_port *lp, uint8_t *lladdr)
+/*
+ * Enqueue interface lladdr notification.
+ * If request is already queued, it is updated.
+ * If setting lladdr is also desired, @do_change has to be set to 1.
+ *
+ * Set noinline to be dtrace-friendly
+ */
+static __noinline void
+lagg_port_lladdr(struct lagg_port *lp, uint8_t *lladdr, lagg_llqtype llq_type)
{
struct lagg_softc *sc = lp->lp_softc;
struct ifnet *ifp = lp->lp_ifp;
struct lagg_llq *llq;
- int pending = 0;
LAGG_WLOCK_ASSERT(sc);
- if (lp->lp_detaching ||
- memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
+ /*
+ * Do not enqueue requests where lladdr is the same for
+ * "physical" interfaces (e.g. ports in lagg)
+ */
+ if (llq_type == LAGG_LLQTYPE_PHYS &&
+ memcmp(IF_LLADDR(ifp), lladdr, ETHER_ADDR_LEN) == 0)
return;
/* Check to make sure its not already queued to be changed */
SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) {
if (llq->llq_ifp == ifp) {
- pending = 1;
- break;
+ /* Update lladdr, it may have changed */
+ bcopy(lladdr, llq->llq_lladdr, ETHER_ADDR_LEN);
+ return;
}
}
- if (!pending) {
- llq = malloc(sizeof(struct lagg_llq), M_DEVBUF, M_NOWAIT);
- if (llq == NULL) /* XXX what to do */
- return;
- }
+ llq = malloc(sizeof(struct lagg_llq), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (llq == NULL) /* XXX what to do */
+ return;
- /* Update the lladdr even if pending, it may have changed */
llq->llq_ifp = ifp;
+ llq->llq_type = llq_type;
bcopy(lladdr, llq->llq_lladdr, ETHER_ADDR_LEN);
-
- if (!pending)
- SLIST_INSERT_HEAD(&sc->sc_llq_head, llq, llq_entries);
+ /* XXX: We should insert to tail */
+ SLIST_INSERT_HEAD(&sc->sc_llq_head, llq, llq_entries);
taskqueue_enqueue(taskqueue_swi, &sc->sc_lladdr_task);
}
/*
* Set the interface MAC address from a taskqueue to avoid a LOR.
+ *
+ * Set noinline to be dtrace-friendly
*/
-static void
+static __noinline void
lagg_port_setlladdr(void *arg, int pending)
{
struct lagg_softc *sc = (struct lagg_softc *)arg;
struct lagg_llq *llq, *head;
struct ifnet *ifp;
- int error;
/* Grab a local reference of the queue and remove it from the softc */
LAGG_WLOCK(sc);
@@ -502,14 +706,19 @@ lagg_port_setlladdr(void *arg, int pending)
for (llq = head; llq != NULL; llq = head) {
ifp = llq->llq_ifp;
- /* Set the link layer address */
CURVNET_SET(ifp->if_vnet);
- error = if_setlladdr(ifp, llq->llq_lladdr, ETHER_ADDR_LEN);
- CURVNET_RESTORE();
- if (error)
- printf("%s: setlladdr failed on %s\n", __func__,
- ifp->if_xname);
+ /*
+ * Set the link layer address on the laggport interface.
+ * Note that if_setlladdr() or iflladdr_event handler
+ * may result in arp transmission / lltable updates.
+ */
+ if (llq->llq_type == LAGG_LLQTYPE_PHYS)
+ if_setlladdr(ifp, llq->llq_lladdr,
+ ETHER_ADDR_LEN);
+ else
+ EVENTHANDLER_INVOKE(iflladdr_event, ifp);
+ CURVNET_RESTORE();
head = SLIST_NEXT(llq, llq_entries);
free(llq, M_DEVBUF);
}
@@ -520,7 +729,8 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
{
struct lagg_softc *sc_ptr;
struct lagg_port *lp, *tlp;
- int error = 0;
+ int error, i;
+ uint64_t *pval;
LAGG_WLOCK_ASSERT(sc);
@@ -538,37 +748,9 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
}
/* XXX Disallow non-ethernet interfaces (this should be any of 802) */
- if (ifp->if_type != IFT_ETHER)
+ if (ifp->if_type != IFT_ETHER && ifp->if_type != IFT_L2VLAN)
return (EPROTONOSUPPORT);
-#ifdef INET6
- /*
- * The member interface should not have inet6 address because
- * two interfaces with a valid link-local scope zone must not be
- * merged in any form. This restriction is needed to
- * prevent violation of link-local scope zone. Attempts to
- * add a member interface which has inet6 addresses triggers
- * removal of all inet6 addresses on the member interface.
- */
- SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
- if (in6ifa_llaonifp(lp->lp_ifp)) {
- in6_ifdetach(lp->lp_ifp);
- if_printf(sc->sc_ifp,
- "IPv6 addresses on %s have been removed "
- "before adding it as a member to prevent "
- "IPv6 address scope violation.\n",
- lp->lp_ifp->if_xname);
- }
- }
- if (in6ifa_llaonifp(ifp)) {
- in6_ifdetach(ifp);
- if_printf(sc->sc_ifp,
- "IPv6 addresses on %s have been removed "
- "before adding it as a member to prevent "
- "IPv6 address scope violation.\n",
- ifp->if_xname);
- }
-#endif
/* Allow the first Ethernet member to define the MTU */
if (SLIST_EMPTY(&sc->sc_ports))
sc->sc_ifp->if_mtu = ifp->if_mtu;
@@ -583,10 +765,10 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
return (ENOMEM);
/* Check if port is a stacked lagg */
- mtx_lock(&lagg_list_mtx);
- SLIST_FOREACH(sc_ptr, &lagg_list, sc_entries) {
+ LAGG_LIST_LOCK();
+ SLIST_FOREACH(sc_ptr, &V_lagg_list, sc_entries) {
if (ifp == sc_ptr->sc_ifp) {
- mtx_unlock(&lagg_list_mtx);
+ LAGG_LIST_UNLOCK();
free(lp, M_DEVBUF);
return (EINVAL);
/* XXX disable stacking for the moment, its untested */
@@ -594,14 +776,14 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
lp->lp_flags |= LAGG_PORT_STACK;
if (lagg_port_checkstacking(sc_ptr) >=
LAGG_MAX_STACKING) {
- mtx_unlock(&lagg_list_mtx);
+ LAGG_LIST_UNLOCK();
free(lp, M_DEVBUF);
return (E2BIG);
}
#endif
}
}
- mtx_unlock(&lagg_list_mtx);
+ LAGG_LIST_UNLOCK();
/* Change the interface type */
lp->lp_iftype = ifp->if_type;
@@ -620,10 +802,15 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
if (SLIST_EMPTY(&sc->sc_ports)) {
sc->sc_primary = lp;
+ /* First port in lagg. Update/notify lagg lladdress */
lagg_lladdr(sc, IF_LLADDR(ifp));
} else {
- /* Update link layer address for this port */
- lagg_port_lladdr(lp, IF_LLADDR(sc->sc_ifp));
+
+ /*
+ * Update link layer address for this port and
+ * send notifications to other subsystems.
+ */
+ lagg_port_lladdr(lp, IF_LLADDR(sc->sc_ifp), LAGG_LLQTYPE_PHYS);
}
/*
@@ -649,19 +836,21 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
lagg_capabilities(sc);
lagg_linkstate(sc);
+ /* Read port counters */
+ pval = lp->port_counters.val;
+ for (i = 0; i < IFCOUNTERS; i++, pval++)
+ *pval = ifp->if_get_counter(ifp, i);
/* Add multicast addresses and interface flags to this port */
lagg_ether_cmdmulti(lp, 1);
lagg_setflags(lp, 1);
- if (sc->sc_port_create != NULL)
- error = (*sc->sc_port_create)(lp);
- if (error) {
- /* remove the port again, without calling sc_port_destroy */
+ if ((error = lagg_proto_addport(sc, lp)) != 0) {
+ /* Remove the port, without calling pr_delport. */
lagg_port_destroy(lp, 0);
return (error);
}
- return (error);
+ return (0);
}
#ifdef LAGG_PORT_STACKING
@@ -686,17 +875,19 @@ lagg_port_checkstacking(struct lagg_softc *sc)
#endif
static int
-lagg_port_destroy(struct lagg_port *lp, int runpd)
+lagg_port_destroy(struct lagg_port *lp, int rundelport)
{
struct lagg_softc *sc = lp->lp_softc;
- struct lagg_port *lp_ptr;
+ struct lagg_port *lp_ptr, *lp0;
struct lagg_llq *llq;
struct ifnet *ifp = lp->lp_ifp;
+ uint64_t *pval, vdiff;
+ int i;
LAGG_WLOCK_ASSERT(sc);
- if (runpd && sc->sc_port_destroy != NULL)
- (*sc->sc_port_destroy)(lp);
+ if (rundelport)
+ lagg_proto_delport(sc, lp);
/*
* Remove multicast addresses and interface flags from this port and
@@ -705,7 +896,7 @@ lagg_port_destroy(struct lagg_port *lp, int runpd)
if (!lp->lp_detaching) {
lagg_ether_cmdmulti(lp, 0);
lagg_setflags(lp, 0);
- lagg_port_lladdr(lp, lp->lp_lladdr);
+ lagg_port_lladdr(lp, lp->lp_lladdr, LAGG_LLQTYPE_PHYS);
}
/* Restore interface */
@@ -714,6 +905,13 @@ lagg_port_destroy(struct lagg_port *lp, int runpd)
ifp->if_output = lp->lp_output;
ifp->if_lagg = NULL;
+ /* Update detached port counters */
+ pval = lp->port_counters.val;
+ for (i = 0; i < IFCOUNTERS; i++, pval++) {
+ vdiff = ifp->if_get_counter(ifp, i) - *pval;
+ sc->detached_counters.val[i] += vdiff;
+ }
+
/* Finally, remove the port from the lagg */
SLIST_REMOVE(&sc->sc_ports, lp, lagg_port, lp_entries);
sc->sc_count--;
@@ -722,18 +920,24 @@ lagg_port_destroy(struct lagg_port *lp, int runpd)
if (lp == sc->sc_primary) {
uint8_t lladdr[ETHER_ADDR_LEN];
- if ((lp_ptr = SLIST_FIRST(&sc->sc_ports)) == NULL) {
+ if ((lp0 = SLIST_FIRST(&sc->sc_ports)) == NULL) {
bzero(&lladdr, ETHER_ADDR_LEN);
} else {
- bcopy(lp_ptr->lp_lladdr,
+ bcopy(lp0->lp_lladdr,
lladdr, ETHER_ADDR_LEN);
}
lagg_lladdr(sc, lladdr);
- sc->sc_primary = lp_ptr;
- /* Update link layer address for each port */
+ /* Mark lp0 as new primary */
+ sc->sc_primary = lp0;
+
+ /*
+ * Enqueue lladdr update/notification for each port
+ * (new primary needs update as well, to switch from
+ * old lladdr to its 'real' one).
+ */
SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries)
- lagg_port_lladdr(lp_ptr, lladdr);
+ lagg_port_lladdr(lp_ptr, lladdr, LAGG_LLQTYPE_PHYS);
}
/* Remove any pending lladdr changes from the queue */
@@ -767,6 +971,7 @@ lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
struct lagg_softc *sc;
struct lagg_port *lp = NULL;
int error = 0;
+ struct rm_priotracker tracker;
/* Should be checked by the caller */
if (ifp->if_type != IFT_IEEE8023ADLAG ||
@@ -781,15 +986,15 @@ lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
break;
}
- LAGG_RLOCK(sc);
+ LAGG_RLOCK(sc, &tracker);
if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) {
error = ENOENT;
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
break;
}
lagg_port2req(lp, rp);
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
break;
case SIOCSIFCAP:
@@ -826,11 +1031,66 @@ fallback:
}
/*
+ * Requests counter @cnt data.
+ *
+ * Counter value is calculated the following way:
+ * 1) for each port, sum difference between current and "initial" measurements.
+ * 2) add lagg logical interface counters.
+ * 3) add data from detached_counters array.
+ *
+ * We also do the following things on ports attach/detach:
+ * 1) On port attach we store all counters it has into port_counter array.
+ * 2) On port detach we add the different between "initial" and
+ * current counters data to detached_counters array.
+ */
+static uint64_t
+lagg_get_counter(struct ifnet *ifp, ift_counter cnt)
+{
+ struct lagg_softc *sc;
+ struct lagg_port *lp;
+ struct ifnet *lpifp;
+ struct rm_priotracker tracker;
+ uint64_t newval, oldval, vsum;
+
+ /* Revise this when we've got non-generic counters. */
+ KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
+
+ sc = (struct lagg_softc *)ifp->if_softc;
+ LAGG_RLOCK(sc, &tracker);
+
+ vsum = 0;
+ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
+ /* Saved attached value */
+ oldval = lp->port_counters.val[cnt];
+ /* current value */
+ lpifp = lp->lp_ifp;
+ newval = lpifp->if_get_counter(lpifp, cnt);
+ /* Calculate diff and save new */
+ vsum += newval - oldval;
+ }
+
+ /*
+ * Add counter data which might be added by upper
+ * layer protocols operating on logical interface.
+ */
+ vsum += if_get_counter_default(ifp, cnt);
+
+ /*
+ * Add counter data from detached ports counters
+ */
+ vsum += sc->detached_counters.val[cnt];
+
+ LAGG_RUNLOCK(sc, &tracker);
+
+ return (vsum);
+}
+
+/*
* For direct output to child ports.
*/
static int
lagg_port_output(struct ifnet *ifp, struct mbuf *m,
- struct sockaddr *dst, struct route *ro)
+ const struct sockaddr *dst, struct route *ro)
{
struct lagg_port *lp = ifp->if_lagg;
@@ -874,8 +1134,7 @@ lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp)
strlcpy(rp->rp_portname, lp->lp_ifp->if_xname, sizeof(rp->rp_portname));
rp->rp_prio = lp->lp_prio;
rp->rp_flags = lp->lp_flags;
- if (sc->sc_portreq != NULL)
- (*sc->sc_portreq)(lp, (caddr_t)&rp->rp_psc);
+ lagg_proto_portreq(sc, lp, &rp->rp_psc);
/* Add protocol specific flags */
switch (sc->sc_proto) {
@@ -888,7 +1147,7 @@ lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp)
case LAGG_PROTO_ROUNDROBIN:
case LAGG_PROTO_LOADBALANCE:
- case LAGG_PROTO_ETHERCHANNEL:
+ case LAGG_PROTO_BROADCAST:
if (LAGG_PORTACTIVE(lp))
rp->rp_flags |= LAGG_PORT_ACTIVE;
break;
@@ -910,8 +1169,8 @@ static void
lagg_init(void *xsc)
{
struct lagg_softc *sc = (struct lagg_softc *)xsc;
- struct lagg_port *lp;
struct ifnet *ifp = sc->sc_ifp;
+ struct lagg_port *lp;
if (ifp->if_drv_flags & IFF_DRV_RUNNING)
return;
@@ -919,12 +1178,16 @@ lagg_init(void *xsc)
LAGG_WLOCK(sc);
ifp->if_drv_flags |= IFF_DRV_RUNNING;
- /* Update the port lladdrs */
+
+ /*
+ * Update the port lladdrs if needed.
+ * This might be if_setlladdr() notification
+ * that lladdr has been changed.
+ */
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
- lagg_port_lladdr(lp, IF_LLADDR(ifp));
+ lagg_port_lladdr(lp, IF_LLADDR(ifp), LAGG_LLQTYPE_PHYS);
- if (sc->sc_init != NULL)
- (*sc->sc_init)(sc);
+ lagg_proto_init(sc);
LAGG_WUNLOCK(sc);
}
@@ -941,8 +1204,7 @@ lagg_stop(struct lagg_softc *sc)
ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
- if (sc->sc_stop != NULL)
- (*sc->sc_stop)(sc);
+ lagg_proto_stop(sc);
}
static int
@@ -950,6 +1212,7 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
struct lagg_reqall *ra = (struct lagg_reqall *)data;
+ struct lagg_reqopts *ro = (struct lagg_reqopts *)data;
struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf;
struct lagg_reqflags *rf = (struct lagg_reqflags *)data;
struct ifreq *ifr = (struct ifreq *)data;
@@ -958,25 +1221,24 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
struct thread *td = curthread;
char *buf, *outbuf;
int count, buflen, len, error = 0;
+ struct rm_priotracker tracker;
bzero(&rpbuf, sizeof(rpbuf));
switch (cmd) {
case SIOCGLAGG:
- LAGG_RLOCK(sc);
+ LAGG_RLOCK(sc, &tracker);
count = 0;
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
count++;
buflen = count * sizeof(struct lagg_reqport);
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
- LAGG_RLOCK(sc);
+ LAGG_RLOCK(sc, &tracker);
ra->ra_proto = sc->sc_proto;
- if (sc->sc_req != NULL)
- (*sc->sc_req)(sc, (caddr_t)&ra->ra_psc);
-
+ lagg_proto_request(sc, &ra->ra_psc);
count = 0;
buf = outbuf;
len = min(ra->ra_size, buflen);
@@ -990,7 +1252,7 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
buf += sizeof(rpbuf);
len -= sizeof(rpbuf);
}
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
ra->ra_ports = count;
ra->ra_size = count * sizeof(rpbuf);
error = copyout(outbuf, ra->ra_port, ra->ra_size);
@@ -1004,49 +1266,150 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
error = EPROTONOSUPPORT;
break;
}
+
LAGG_WLOCK(sc);
- if (sc->sc_proto != LAGG_PROTO_NONE) {
- /* Reset protocol first in case detach unlocks */
- sc->sc_proto = LAGG_PROTO_NONE;
- error = sc->sc_detach(sc);
- sc->sc_detach = NULL;
- sc->sc_start = NULL;
- sc->sc_input = NULL;
- sc->sc_port_create = NULL;
- sc->sc_port_destroy = NULL;
- sc->sc_linkstate = NULL;
- sc->sc_init = NULL;
- sc->sc_stop = NULL;
- sc->sc_lladdr = NULL;
- sc->sc_req = NULL;
- sc->sc_portreq = NULL;
- } else if (sc->sc_input != NULL) {
- /* Still detaching */
- error = EBUSY;
+ lagg_proto_detach(sc);
+ LAGG_UNLOCK_ASSERT(sc);
+ lagg_proto_attach(sc, ra->ra_proto);
+ break;
+ case SIOCGLAGGOPTS:
+ ro->ro_opts = sc->sc_opts;
+ if (sc->sc_proto == LAGG_PROTO_LACP) {
+ struct lacp_softc *lsc;
+
+ lsc = (struct lacp_softc *)sc->sc_psc;
+ if (lsc->lsc_debug.lsc_tx_test != 0)
+ ro->ro_opts |= LAGG_OPT_LACP_TXTEST;
+ if (lsc->lsc_debug.lsc_rx_test != 0)
+ ro->ro_opts |= LAGG_OPT_LACP_RXTEST;
+ if (lsc->lsc_strict_mode != 0)
+ ro->ro_opts |= LAGG_OPT_LACP_STRICT;
+ if (lsc->lsc_fast_timeout != 0)
+ ro->ro_opts |= LAGG_OPT_LACP_TIMEOUT;
+
+ ro->ro_active = sc->sc_active;
+ } else {
+ ro->ro_active = 0;
+ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
+ ro->ro_active += LAGG_PORTACTIVE(lp);
}
- if (error != 0) {
- LAGG_WUNLOCK(sc);
+ ro->ro_bkt = sc->sc_bkt;
+ ro->ro_flapping = sc->sc_flapping;
+ ro->ro_flowid_shift = sc->flowid_shift;
+ break;
+ case SIOCSLAGGOPTS:
+ if (sc->sc_proto == LAGG_PROTO_ROUNDROBIN) {
+ if (ro->ro_bkt == 0)
+ sc->sc_bkt = 1; // Minimum 1 packet per iface.
+ else
+ sc->sc_bkt = ro->ro_bkt;
+ }
+ error = priv_check(td, PRIV_NET_LAGG);
+ if (error)
+ break;
+ if (ro->ro_opts == 0)
+ break;
+ /*
+ * Set options. LACP options are stored in sc->sc_psc,
+ * not in sc_opts.
+ */
+ int valid, lacp;
+
+ switch (ro->ro_opts) {
+ case LAGG_OPT_USE_FLOWID:
+ case -LAGG_OPT_USE_FLOWID:
+ case LAGG_OPT_FLOWIDSHIFT:
+ valid = 1;
+ lacp = 0;
+ break;
+ case LAGG_OPT_LACP_TXTEST:
+ case -LAGG_OPT_LACP_TXTEST:
+ case LAGG_OPT_LACP_RXTEST:
+ case -LAGG_OPT_LACP_RXTEST:
+ case LAGG_OPT_LACP_STRICT:
+ case -LAGG_OPT_LACP_STRICT:
+ case LAGG_OPT_LACP_TIMEOUT:
+ case -LAGG_OPT_LACP_TIMEOUT:
+ valid = lacp = 1;
+ break;
+ default:
+ valid = lacp = 0;
break;
}
- for (int i = 0; i < (sizeof(lagg_protos) /
- sizeof(lagg_protos[0])); i++) {
- if (lagg_protos[i].ti_proto == ra->ra_proto) {
- if (sc->sc_ifflags & IFF_DEBUG)
- printf("%s: using proto %u\n",
- sc->sc_ifname,
- lagg_protos[i].ti_proto);
- sc->sc_proto = lagg_protos[i].ti_proto;
- if (sc->sc_proto != LAGG_PROTO_NONE)
- error = lagg_protos[i].ti_attach(sc);
- LAGG_WUNLOCK(sc);
- return (error);
+
+ LAGG_WLOCK(sc);
+
+ if (valid == 0 ||
+ (lacp == 1 && sc->sc_proto != LAGG_PROTO_LACP)) {
+ /* Invalid combination of options specified. */
+ error = EINVAL;
+ LAGG_WUNLOCK(sc);
+ break; /* Return from SIOCSLAGGOPTS. */
+ }
+ /*
+ * Store new options into sc->sc_opts except for
+ * FLOWIDSHIFT and LACP options.
+ */
+ if (lacp == 0) {
+ if (ro->ro_opts == LAGG_OPT_FLOWIDSHIFT)
+ sc->flowid_shift = ro->ro_flowid_shift;
+ else if (ro->ro_opts > 0)
+ sc->sc_opts |= ro->ro_opts;
+ else
+ sc->sc_opts &= ~ro->ro_opts;
+ } else {
+ struct lacp_softc *lsc;
+ struct lacp_port *lp;
+
+ lsc = (struct lacp_softc *)sc->sc_psc;
+
+ switch (ro->ro_opts) {
+ case LAGG_OPT_LACP_TXTEST:
+ lsc->lsc_debug.lsc_tx_test = 1;
+ break;
+ case -LAGG_OPT_LACP_TXTEST:
+ lsc->lsc_debug.lsc_tx_test = 0;
+ break;
+ case LAGG_OPT_LACP_RXTEST:
+ lsc->lsc_debug.lsc_rx_test = 1;
+ break;
+ case -LAGG_OPT_LACP_RXTEST:
+ lsc->lsc_debug.lsc_rx_test = 0;
+ break;
+ case LAGG_OPT_LACP_STRICT:
+ lsc->lsc_strict_mode = 1;
+ break;
+ case -LAGG_OPT_LACP_STRICT:
+ lsc->lsc_strict_mode = 0;
+ break;
+ case LAGG_OPT_LACP_TIMEOUT:
+ LACP_LOCK(lsc);
+ LIST_FOREACH(lp, &lsc->lsc_ports, lp_next)
+ lp->lp_state |= LACP_STATE_TIMEOUT;
+ LACP_UNLOCK(lsc);
+ lsc->lsc_fast_timeout = 1;
+ break;
+ case -LAGG_OPT_LACP_TIMEOUT:
+ LACP_LOCK(lsc);
+ LIST_FOREACH(lp, &lsc->lsc_ports, lp_next)
+ lp->lp_state &= ~LACP_STATE_TIMEOUT;
+ LACP_UNLOCK(lsc);
+ lsc->lsc_fast_timeout = 0;
+ break;
}
}
LAGG_WUNLOCK(sc);
- error = EPROTONOSUPPORT;
break;
case SIOCGLAGGFLAGS:
- rf->rf_flags = sc->sc_flags;
+ rf->rf_flags = 0;
+ LAGG_RLOCK(sc, &tracker);
+ if (sc->sc_flags & MBUF_HASHFLAG_L2)
+ rf->rf_flags |= LAGG_F_HASHL2;
+ if (sc->sc_flags & MBUF_HASHFLAG_L3)
+ rf->rf_flags |= LAGG_F_HASHL3;
+ if (sc->sc_flags & MBUF_HASHFLAG_L4)
+ rf->rf_flags |= LAGG_F_HASHL4;
+ LAGG_RUNLOCK(sc, &tracker);
break;
case SIOCSLAGGHASH:
error = priv_check(td, PRIV_NET_LAGG);
@@ -1057,8 +1420,13 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
break;
}
LAGG_WLOCK(sc);
- sc->sc_flags &= ~LAGG_F_HASHMASK;
- sc->sc_flags |= rf->rf_flags & LAGG_F_HASHMASK;
+ sc->sc_flags = 0;
+ if (rf->rf_flags & LAGG_F_HASHL2)
+ sc->sc_flags |= MBUF_HASHFLAG_L2;
+ if (rf->rf_flags & LAGG_F_HASHL3)
+ sc->sc_flags |= MBUF_HASHFLAG_L3;
+ if (rf->rf_flags & LAGG_F_HASHL4)
+ sc->sc_flags |= MBUF_HASHFLAG_L4;
LAGG_WUNLOCK(sc);
break;
case SIOCGLAGGPORT:
@@ -1068,16 +1436,16 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
break;
}
- LAGG_RLOCK(sc);
+ LAGG_RLOCK(sc, &tracker);
if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
lp->lp_softc != sc) {
error = ENOENT;
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
break;
}
lagg_port2req(lp, rp);
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
break;
case SIOCSLAGGPORT:
error = priv_check(td, PRIV_NET_LAGG);
@@ -1088,6 +1456,26 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
error = EINVAL;
break;
}
+#ifdef INET6
+ /*
+ * A laggport interface should not have inet6 address
+ * because two interfaces with a valid link-local
+ * scope zone must not be merged in any form. This
+ * restriction is needed to prevent violation of
+ * link-local scope zone. Attempts to add a laggport
+ * interface which has inet6 addresses triggers
+ * removal of all inet6 addresses on the member
+ * interface.
+ */
+ if (in6ifa_llaonifp(tpif)) {
+ in6_ifdetach(tpif);
+ if_printf(sc->sc_ifp,
+ "IPv6 addresses on %s have been removed "
+ "before adding it as a member to prevent "
+ "IPv6 address scope violation.\n",
+ tpif->if_xname);
+ }
+#endif
LAGG_WLOCK(sc);
error = lagg_port_create(sc, tpif);
LAGG_WUNLOCK(sc);
@@ -1186,39 +1574,39 @@ lagg_ether_cmdmulti(struct lagg_port *lp, int set)
struct ifnet *ifp = lp->lp_ifp;
struct ifnet *scifp = sc->sc_ifp;
struct lagg_mc *mc;
- struct ifmultiaddr *ifma, *rifma = NULL;
- struct sockaddr_dl sdl;
+ struct ifmultiaddr *ifma;
int error;
LAGG_WLOCK_ASSERT(sc);
- bzero((char *)&sdl, sizeof(sdl));
- sdl.sdl_len = sizeof(sdl);
- sdl.sdl_family = AF_LINK;
- sdl.sdl_type = IFT_ETHER;
- sdl.sdl_alen = ETHER_ADDR_LEN;
- sdl.sdl_index = ifp->if_index;
-
if (set) {
+ IF_ADDR_WLOCK(scifp);
TAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) {
if (ifma->ifma_addr->sa_family != AF_LINK)
continue;
- bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
- LLADDR(&sdl), ETHER_ADDR_LEN);
-
- error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma);
- if (error)
- return (error);
mc = malloc(sizeof(struct lagg_mc), M_DEVBUF, M_NOWAIT);
- if (mc == NULL)
+ if (mc == NULL) {
+ IF_ADDR_WUNLOCK(scifp);
return (ENOMEM);
- mc->mc_ifma = rifma;
+ }
+ bcopy(ifma->ifma_addr, &mc->mc_addr,
+ ifma->ifma_addr->sa_len);
+ mc->mc_addr.sdl_index = ifp->if_index;
+ mc->mc_ifma = NULL;
SLIST_INSERT_HEAD(&lp->lp_mc_head, mc, mc_entries);
}
+ IF_ADDR_WUNLOCK(scifp);
+ SLIST_FOREACH (mc, &lp->lp_mc_head, mc_entries) {
+ error = if_addmulti(ifp,
+ (struct sockaddr *)&mc->mc_addr, &mc->mc_ifma);
+ if (error)
+ return (error);
+ }
} else {
while ((mc = SLIST_FIRST(&lp->lp_mc_head)) != NULL) {
SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries);
- if_delmulti_ifma(mc->mc_ifma);
+ if (mc->mc_ifma && !lp->lp_detaching)
+ if_delmulti_ifma(mc->mc_ifma);
free(mc, M_DEVBUF);
}
}
@@ -1228,7 +1616,7 @@ lagg_ether_cmdmulti(struct lagg_port *lp, int set)
/* Handle a ref counted flag that should be set on the lagg port as well */
static int
lagg_setflag(struct lagg_port *lp, int flag, int status,
- int (*func)(struct ifnet *, int))
+ int (*func)(struct ifnet *, int))
{
struct lagg_softc *sc = lp->lp_softc;
struct ifnet *scifp = sc->sc_ifp;
@@ -1283,30 +1671,27 @@ lagg_transmit(struct ifnet *ifp, struct mbuf *m)
{
struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
int error, len, mcast;
+ struct rm_priotracker tracker;
len = m->m_pkthdr.len;
mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
- LAGG_RLOCK(sc);
+ LAGG_RLOCK(sc, &tracker);
/* We need a Tx algorithm and at least one port */
if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
m_freem(m);
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return (ENXIO);
}
ETHER_BPF_MTAP(ifp, m);
- error = (*sc->sc_start)(sc, m);
- LAGG_RUNLOCK(sc);
+ error = lagg_proto_start(sc, m);
+ LAGG_RUNLOCK(sc, &tracker);
- if (error == 0) {
- ifp->if_opackets++;
- ifp->if_omcasts += mcast;
- ifp->if_obytes += len;
- } else
- ifp->if_oerrors++;
+ if (error != 0)
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return (error);
}
@@ -1325,31 +1710,33 @@ lagg_input(struct ifnet *ifp, struct mbuf *m)
struct lagg_port *lp = ifp->if_lagg;
struct lagg_softc *sc = lp->lp_softc;
struct ifnet *scifp = sc->sc_ifp;
+ struct rm_priotracker tracker;
- LAGG_RLOCK(sc);
+ LAGG_RLOCK(sc, &tracker);
if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
(lp->lp_flags & LAGG_PORT_DISABLED) ||
sc->sc_proto == LAGG_PROTO_NONE) {
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
m_freem(m);
return (NULL);
}
ETHER_BPF_MTAP(scifp, m);
- m = (*sc->sc_input)(sc, lp, m);
+ if (lp->lp_detaching != 0) {
+ m_freem(m);
+ m = NULL;
+ } else
+ m = lagg_proto_input(sc, lp, m);
if (m != NULL) {
- scifp->if_ipackets++;
- scifp->if_ibytes += m->m_pkthdr.len;
-
if (scifp->if_flags & IFF_MONITOR) {
m_freem(m);
m = NULL;
}
}
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
return (m);
}
@@ -1370,16 +1757,17 @@ lagg_media_status(struct ifnet *ifp, struct ifmediareq *imr)
{
struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
struct lagg_port *lp;
+ struct rm_priotracker tracker;
imr->ifm_status = IFM_AVALID;
imr->ifm_active = IFM_ETHER | IFM_AUTO;
- LAGG_RLOCK(sc);
+ LAGG_RLOCK(sc, &tracker);
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
if (LAGG_PORTACTIVE(lp))
imr->ifm_status |= IFM_ACTIVE;
}
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
}
static void
@@ -1391,7 +1779,7 @@ lagg_linkstate(struct lagg_softc *sc)
/* Our link is considered up if at least one of our ports is active */
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
- if (lp->lp_link_state == LINK_STATE_UP) {
+ if (lp->lp_ifp->if_link_state == LINK_STATE_UP) {
new_link = LINK_STATE_UP;
break;
}
@@ -1406,7 +1794,7 @@ lagg_linkstate(struct lagg_softc *sc)
break;
case LAGG_PROTO_ROUNDROBIN:
case LAGG_PROTO_LOADBALANCE:
- case LAGG_PROTO_ETHERCHANNEL:
+ case LAGG_PROTO_BROADCAST:
speed = 0;
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
speed += lp->lp_ifp->if_baudrate;
@@ -1431,8 +1819,7 @@ lagg_port_state(struct ifnet *ifp, int state)
LAGG_WLOCK(sc);
lagg_linkstate(sc);
- if (sc->sc_linkstate != NULL)
- (*sc->sc_linkstate)(lp);
+ lagg_proto_linkstate(sc, lp);
LAGG_WUNLOCK(sc);
}
@@ -1487,120 +1874,6 @@ found:
return (rval);
}
-static const void *
-lagg_gethdr(struct mbuf *m, u_int off, u_int len, void *buf)
-{
- if (m->m_pkthdr.len < (off + len)) {
- return (NULL);
- } else if (m->m_len < (off + len)) {
- m_copydata(m, off, len, buf);
- return (buf);
- }
- return (mtod(m, char *) + off);
-}
-
-uint32_t
-lagg_hashmbuf(struct lagg_softc *sc, struct mbuf *m, uint32_t key)
-{
- uint16_t etype;
- uint32_t p = key;
- int off;
- struct ether_header *eh;
- const struct ether_vlan_header *vlan;
-#ifdef INET
- const struct ip *ip;
- const uint32_t *ports;
- int iphlen;
-#endif
-#ifdef INET6
- const struct ip6_hdr *ip6;
- uint32_t flow;
-#endif
- union {
-#ifdef INET
- struct ip ip;
-#endif
-#ifdef INET6
- struct ip6_hdr ip6;
-#endif
- struct ether_vlan_header vlan;
- uint32_t port;
- } buf;
-
-
- off = sizeof(*eh);
- if (m->m_len < off)
- goto out;
- eh = mtod(m, struct ether_header *);
- etype = ntohs(eh->ether_type);
- if (sc->sc_flags & LAGG_F_HASHL2) {
- p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p);
- p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
- }
-
- /* Special handling for encapsulating VLAN frames */
- if ((m->m_flags & M_VLANTAG) && (sc->sc_flags & LAGG_F_HASHL2)) {
- p = hash32_buf(&m->m_pkthdr.ether_vtag,
- sizeof(m->m_pkthdr.ether_vtag), p);
- } else if (etype == ETHERTYPE_VLAN) {
- vlan = lagg_gethdr(m, off, sizeof(*vlan), &buf);
- if (vlan == NULL)
- goto out;
-
- if (sc->sc_flags & LAGG_F_HASHL2)
- p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
- etype = ntohs(vlan->evl_proto);
- off += sizeof(*vlan) - sizeof(*eh);
- }
-
- switch (etype) {
-#ifdef INET
- case ETHERTYPE_IP:
- ip = lagg_gethdr(m, off, sizeof(*ip), &buf);
- if (ip == NULL)
- goto out;
-
- if (sc->sc_flags & LAGG_F_HASHL3) {
- p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p);
- p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p);
- }
- if (!(sc->sc_flags & LAGG_F_HASHL4))
- break;
- switch (ip->ip_p) {
- case IPPROTO_TCP:
- case IPPROTO_UDP:
- case IPPROTO_SCTP:
- iphlen = ip->ip_hl << 2;
- if (iphlen < sizeof(*ip))
- break;
- off += iphlen;
- ports = lagg_gethdr(m, off, sizeof(*ports), &buf);
- if (ports == NULL)
- break;
- p = hash32_buf(ports, sizeof(*ports), p);
- break;
- }
- break;
-#endif
-#ifdef INET6
- case ETHERTYPE_IPV6:
- if (!(sc->sc_flags & LAGG_F_HASHL3))
- break;
- ip6 = lagg_gethdr(m, off, sizeof(*ip6), &buf);
- if (ip6 == NULL)
- goto out;
-
- p = hash32_buf(&ip6->ip6_src, sizeof(struct in6_addr), p);
- p = hash32_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p);
- flow = ip6->ip6_flow & IPV6_FLOWLABEL_MASK;
- p = hash32_buf(&flow, sizeof(flow), p); /* IPv6 flow label */
- break;
-#endif
- }
-out:
- return (p);
-}
-
int
lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
{
@@ -1611,24 +1884,12 @@ lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
/*
* Simple round robin aggregation
*/
-
-static int
+static void
lagg_rr_attach(struct lagg_softc *sc)
{
- sc->sc_detach = lagg_rr_detach;
- sc->sc_start = lagg_rr_start;
- sc->sc_input = lagg_rr_input;
- sc->sc_port_create = NULL;
sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
sc->sc_seq = 0;
-
- return (0);
-}
-
-static int
-lagg_rr_detach(struct lagg_softc *sc)
-{
- return (0);
+ sc->sc_bkt_count = sc->sc_bkt;
}
static int
@@ -1637,9 +1898,21 @@ lagg_rr_start(struct lagg_softc *sc, struct mbuf *m)
struct lagg_port *lp;
uint32_t p;
- p = atomic_fetchadd_32(&sc->sc_seq, 1);
+ if (sc->sc_bkt_count == 0 && sc->sc_bkt > 0)
+ sc->sc_bkt_count = sc->sc_bkt;
+
+ if (sc->sc_bkt > 0) {
+ atomic_subtract_int(&sc->sc_bkt_count, 1);
+ if (atomic_cmpset_int(&sc->sc_bkt_count, 0, sc->sc_bkt))
+ p = atomic_fetchadd_32(&sc->sc_seq, 1);
+ else
+ p = sc->sc_seq;
+ } else
+ p = atomic_fetchadd_32(&sc->sc_seq, 1);
+
p %= sc->sc_count;
lp = SLIST_FIRST(&sc->sc_ports);
+
while (p--)
lp = SLIST_NEXT(lp, lp_entries);
@@ -1668,27 +1941,69 @@ lagg_rr_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
}
/*
- * Active failover
+ * Broadcast mode
*/
-
static int
-lagg_fail_attach(struct lagg_softc *sc)
+lagg_bcast_start(struct lagg_softc *sc, struct mbuf *m)
{
- sc->sc_detach = lagg_fail_detach;
- sc->sc_start = lagg_fail_start;
- sc->sc_input = lagg_fail_input;
- sc->sc_port_create = NULL;
- sc->sc_port_destroy = NULL;
+ int active_ports = 0;
+ int errors = 0;
+ int ret;
+ struct lagg_port *lp, *last = NULL;
+ struct mbuf *m0;
+
+ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
+ if (!LAGG_PORTACTIVE(lp))
+ continue;
+
+ active_ports++;
+
+ if (last != NULL) {
+ m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT);
+ if (m0 == NULL) {
+ ret = ENOBUFS;
+ errors++;
+ break;
+ }
+
+ ret = lagg_enqueue(last->lp_ifp, m0);
+ if (ret != 0)
+ errors++;
+ }
+ last = lp;
+ }
+ if (last == NULL) {
+ m_freem(m);
+ return (ENOENT);
+ }
+ if ((last = lagg_link_active(sc, last)) == NULL) {
+ m_freem(m);
+ return (ENETDOWN);
+ }
+
+ ret = lagg_enqueue(last->lp_ifp, m);
+ if (ret != 0)
+ errors++;
+
+ if (errors == 0)
+ return (ret);
return (0);
}
-static int
-lagg_fail_detach(struct lagg_softc *sc)
+static struct mbuf*
+lagg_bcast_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
{
- return (0);
+ struct ifnet *ifp = sc->sc_ifp;
+
+ /* Just pass in the packet to our lagg device */
+ m->m_pkthdr.rcvif = ifp;
+ return (m);
}
+/*
+ * Active failover
+ */
static int
lagg_fail_start(struct lagg_softc *sc, struct mbuf *m)
{
@@ -1710,7 +2025,7 @@ lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
struct ifnet *ifp = sc->sc_ifp;
struct lagg_port *tmp_tp;
- if (lp == sc->sc_primary || lagg_failover_rx_all) {
+ if (lp == sc->sc_primary || V_lagg_failover_rx_all) {
m->m_pkthdr.rcvif = ifp;
return (m);
}
@@ -1718,7 +2033,7 @@ lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
if (!LAGG_PORTACTIVE(sc->sc_primary)) {
tmp_tp = lagg_link_active(sc, sc->sc_primary);
/*
- * If tmp_tp is null, we've recieved a packet when all
+ * If tmp_tp is null, we've received a packet when all
* our links are down. Weird, but process it anyways.
*/
if ((tmp_tp == NULL || tmp_tp == lp)) {
@@ -1734,40 +2049,32 @@ lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
/*
* Loadbalancing
*/
-
-static int
+static void
lagg_lb_attach(struct lagg_softc *sc)
{
struct lagg_port *lp;
struct lagg_lb *lb;
- if ((lb = (struct lagg_lb *)malloc(sizeof(struct lagg_lb),
- M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
- return (ENOMEM);
+ lb = malloc(sizeof(struct lagg_lb), M_DEVBUF, M_WAITOK | M_ZERO);
- sc->sc_detach = lagg_lb_detach;
- sc->sc_start = lagg_lb_start;
- sc->sc_input = lagg_lb_input;
- sc->sc_port_create = lagg_lb_port_create;
- sc->sc_port_destroy = lagg_lb_port_destroy;
sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
- lb->lb_key = arc4random();
- sc->sc_psc = (caddr_t)lb;
+ lb->lb_key = m_ether_tcpip_hash_init();
+ sc->sc_psc = lb;
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
lagg_lb_port_create(lp);
-
- return (0);
}
-static int
+static void
lagg_lb_detach(struct lagg_softc *sc)
{
- struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
+ struct lagg_lb *lb;
+
+ lb = (struct lagg_lb *)sc->sc_psc;
+ LAGG_WUNLOCK(sc);
if (lb != NULL)
free(lb, M_DEVBUF);
- return (0);
}
static int
@@ -1785,7 +2092,7 @@ lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp)
return (EINVAL);
if (sc->sc_ifflags & IFF_DEBUG)
printf("%s: port %s at index %d\n",
- sc->sc_ifname, lp_next->lp_ifname, i);
+ sc->sc_ifname, lp_next->lp_ifp->if_xname, i);
lb->lb_ports[i++] = lp_next;
}
@@ -1813,10 +2120,11 @@ lagg_lb_start(struct lagg_softc *sc, struct mbuf *m)
struct lagg_port *lp = NULL;
uint32_t p = 0;
- if (sc->use_flowid && (m->m_flags & M_FLOWID))
- p = m->m_pkthdr.flowid;
+ if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
+ M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
+ p = m->m_pkthdr.flowid >> sc->flowid_shift;
else
- p = lagg_hashmbuf(sc, m, lb->lb_key);
+ p = m_ether_tcpip_hash(sc->sc_flags, m, lb->lb_key);
p %= sc->sc_count;
lp = lb->lb_ports[p];
@@ -1847,50 +2155,30 @@ lagg_lb_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
/*
* 802.3ad LACP
*/
-
-static int
+static void
lagg_lacp_attach(struct lagg_softc *sc)
{
struct lagg_port *lp;
- int error;
-
- sc->sc_detach = lagg_lacp_detach;
- sc->sc_port_create = lacp_port_create;
- sc->sc_port_destroy = lacp_port_destroy;
- sc->sc_linkstate = lacp_linkstate;
- sc->sc_start = lagg_lacp_start;
- sc->sc_input = lagg_lacp_input;
- sc->sc_init = lacp_init;
- sc->sc_stop = lacp_stop;
- sc->sc_lladdr = lagg_lacp_lladdr;
- sc->sc_req = lacp_req;
- sc->sc_portreq = lacp_portreq;
-
- error = lacp_attach(sc);
- if (error)
- return (error);
+ lacp_attach(sc);
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
lacp_port_create(lp);
-
- return (error);
}
-static int
+static void
lagg_lacp_detach(struct lagg_softc *sc)
{
struct lagg_port *lp;
- int error;
+ void *psc;
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
lacp_port_destroy(lp);
- /* unlocking is safe here */
+ psc = sc->sc_psc;
+ sc->sc_psc = NULL;
LAGG_WUNLOCK(sc);
- error = lacp_detach(sc);
- LAGG_WLOCK(sc);
- return (error);
+ lacp_detach(psc);
}
static void
@@ -1951,3 +2239,4 @@ lagg_lacp_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
m->m_pkthdr.rcvif = ifp;
return (m);
}
+
diff --git a/freebsd/sys/net/if_lagg.h b/freebsd/sys/net/if_lagg.h
index 27ab46f2..334995e5 100644
--- a/freebsd/sys/net/if_lagg.h
+++ b/freebsd/sys/net/if_lagg.h
@@ -21,8 +21,6 @@
#ifndef _NET_LAGG_H
#define _NET_LAGG_H
-#include <sys/sysctl.h>
-
/*
* Global definitions
*/
@@ -49,26 +47,28 @@
"\05DISTRIBUTING\06DISABLED"
/* Supported lagg PROTOs */
-#define LAGG_PROTO_NONE 0 /* no lagg protocol defined */
-#define LAGG_PROTO_ROUNDROBIN 1 /* simple round robin */
-#define LAGG_PROTO_FAILOVER 2 /* active failover */
-#define LAGG_PROTO_LOADBALANCE 3 /* loadbalance */
-#define LAGG_PROTO_LACP 4 /* 802.3ad lacp */
-#define LAGG_PROTO_ETHERCHANNEL 5 /* Cisco FEC */
-#define LAGG_PROTO_MAX 6
+typedef enum {
+ LAGG_PROTO_NONE = 0, /* no lagg protocol defined */
+ LAGG_PROTO_ROUNDROBIN, /* simple round robin */
+ LAGG_PROTO_FAILOVER, /* active failover */
+ LAGG_PROTO_LOADBALANCE, /* loadbalance */
+ LAGG_PROTO_LACP, /* 802.3ad lacp */
+ LAGG_PROTO_BROADCAST, /* broadcast */
+ LAGG_PROTO_MAX,
+} lagg_proto;
struct lagg_protos {
const char *lpr_name;
- int lpr_proto;
+ lagg_proto lpr_proto;
};
#define LAGG_PROTO_DEFAULT LAGG_PROTO_FAILOVER
#define LAGG_PROTOS { \
- { "failover", LAGG_PROTO_FAILOVER }, \
- { "fec", LAGG_PROTO_ETHERCHANNEL }, \
+ { "failover", LAGG_PROTO_FAILOVER }, \
{ "lacp", LAGG_PROTO_LACP }, \
{ "loadbalance", LAGG_PROTO_LOADBALANCE }, \
- { "roundrobin", LAGG_PROTO_ROUNDROBIN }, \
+ { "roundrobin", LAGG_PROTO_ROUNDROBIN }, \
+ { "broadcast", LAGG_PROTO_BROADCAST }, \
{ "none", LAGG_PROTO_NONE }, \
{ "default", LAGG_PROTO_DEFAULT } \
}
@@ -136,16 +136,40 @@ struct lagg_reqflags {
#define SIOCGLAGGFLAGS _IOWR('i', 145, struct lagg_reqflags)
#define SIOCSLAGGHASH _IOW('i', 146, struct lagg_reqflags)
+struct lagg_reqopts {
+ char ro_ifname[IFNAMSIZ]; /* name of the lagg */
+
+ int ro_opts; /* Option bitmap */
+#define LAGG_OPT_NONE 0x00
+#define LAGG_OPT_USE_FLOWID 0x01 /* enable use of flowid */
+/* Pseudo flags which are used in ro_opts but not stored into sc_opts. */
+#define LAGG_OPT_FLOWIDSHIFT 0x02 /* set flowid shift */
+#define LAGG_OPT_FLOWIDSHIFT_MASK 0x1f /* flowid is uint32_t */
+#define LAGG_OPT_LACP_STRICT 0x10 /* LACP strict mode */
+#define LAGG_OPT_LACP_TXTEST 0x20 /* LACP debug: txtest */
+#define LAGG_OPT_LACP_RXTEST 0x40 /* LACP debug: rxtest */
+#define LAGG_OPT_LACP_TIMEOUT 0x80 /* LACP timeout */
+ u_int ro_count; /* number of ports */
+ u_int ro_active; /* active port count */
+ u_int ro_flapping; /* number of flapping */
+ int ro_flowid_shift; /* shift the flowid */
+ uint32_t ro_bkt; /* packet bucket for roundrobin */
+};
+
+#define SIOCGLAGGOPTS _IOWR('i', 152, struct lagg_reqopts)
+#define SIOCSLAGGOPTS _IOW('i', 153, struct lagg_reqopts)
+
+#define LAGG_OPT_BITS "\020\001USE_FLOWID\005LACP_STRICT" \
+ "\006LACP_TXTEST\007LACP_RXTEST"
+
#ifdef _KERNEL
+
/*
* Internal kernel part
*/
-#define lp_ifname lp_ifp->if_xname /* interface name */
-#define lp_link_state lp_ifp->if_link_state /* link state */
-
#define LAGG_PORTACTIVE(_tp) ( \
- ((_tp)->lp_link_state == LINK_STATE_UP) && \
+ ((_tp)->lp_ifp->if_link_state == LINK_STATE_UP) && \
((_tp)->lp_ifp->if_flags & IFF_UP) \
)
@@ -173,25 +197,39 @@ struct lagg_lb {
};
struct lagg_mc {
+ struct sockaddr_dl mc_addr;
struct ifmultiaddr *mc_ifma;
SLIST_ENTRY(lagg_mc) mc_entries;
};
+typedef enum {
+ LAGG_LLQTYPE_PHYS = 0, /* Task related to physical (underlying) port */
+ LAGG_LLQTYPE_VIRT, /* Task related to lagg interface itself */
+} lagg_llqtype;
+
/* List of interfaces to have the MAC address modified */
struct lagg_llq {
struct ifnet *llq_ifp;
uint8_t llq_lladdr[ETHER_ADDR_LEN];
+ lagg_llqtype llq_type;
SLIST_ENTRY(lagg_llq) llq_entries;
};
+struct lagg_counters {
+ uint64_t val[IFCOUNTERS];
+};
+
struct lagg_softc {
struct ifnet *sc_ifp; /* virtual interface */
- struct rwlock sc_mtx;
+ struct rmlock sc_mtx;
int sc_proto; /* lagg protocol */
u_int sc_count; /* number of ports */
+ u_int sc_active; /* active port count */
+ u_int sc_flapping; /* number of flapping
+ * events */
struct lagg_port *sc_primary; /* primary port */
struct ifmedia sc_media; /* media config */
- caddr_t sc_psc; /* protocol data */
+ void *sc_psc; /* protocol data */
uint32_t sc_seq; /* sequence counter */
uint32_t sc_flags;
@@ -201,26 +239,14 @@ struct lagg_softc {
struct task sc_lladdr_task;
SLIST_HEAD(__llqhd, lagg_llq) sc_llq_head; /* interfaces to program
the lladdr on */
-
- /* lagg protocol callbacks */
- int (*sc_detach)(struct lagg_softc *);
- int (*sc_start)(struct lagg_softc *, struct mbuf *);
- struct mbuf *(*sc_input)(struct lagg_softc *, struct lagg_port *,
- struct mbuf *);
- int (*sc_port_create)(struct lagg_port *);
- void (*sc_port_destroy)(struct lagg_port *);
- void (*sc_linkstate)(struct lagg_port *);
- void (*sc_init)(struct lagg_softc *);
- void (*sc_stop)(struct lagg_softc *);
- void (*sc_lladdr)(struct lagg_softc *);
- void (*sc_req)(struct lagg_softc *, caddr_t);
- void (*sc_portreq)(struct lagg_port *, caddr_t);
-#if __FreeBSD_version >= 800000
eventhandler_tag vlan_attach;
eventhandler_tag vlan_detach;
-#endif
- struct sysctl_ctx_list ctx; /* sysctl variables */
- int use_flowid; /* use M_FLOWID */
+ struct callout sc_callout;
+ u_int sc_opts;
+ int flowid_shift; /* shift the flowid */
+ uint32_t sc_bkt; /* packates bucket for roundrobin */
+ uint32_t sc_bkt_count; /* packates bucket count for roundrobin */
+ struct lagg_counters detached_counters; /* detached ports sum */
};
struct lagg_port {
@@ -233,33 +259,36 @@ struct lagg_port {
uint32_t lp_flags; /* port flags */
int lp_ifflags; /* saved ifp flags */
void *lh_cookie; /* if state hook */
- caddr_t lp_psc; /* protocol data */
+ void *lp_psc; /* protocol data */
int lp_detaching; /* ifnet is detaching */
SLIST_HEAD(__mclhd, lagg_mc) lp_mc_head; /* multicast addresses */
/* Redirected callbacks */
int (*lp_ioctl)(struct ifnet *, u_long, caddr_t);
- int (*lp_output)(struct ifnet *, struct mbuf *, struct sockaddr *,
- struct route *);
+ int (*lp_output)(struct ifnet *, struct mbuf *,
+ const struct sockaddr *, struct route *);
+ struct lagg_counters port_counters; /* ifp counters copy */
SLIST_ENTRY(lagg_port) lp_entries;
};
-#define LAGG_LOCK_INIT(_sc) rw_init(&(_sc)->sc_mtx, "if_lagg rwlock")
-#define LAGG_LOCK_DESTROY(_sc) rw_destroy(&(_sc)->sc_mtx)
-#define LAGG_RLOCK(_sc) rw_rlock(&(_sc)->sc_mtx)
-#define LAGG_WLOCK(_sc) rw_wlock(&(_sc)->sc_mtx)
-#define LAGG_RUNLOCK(_sc) rw_runlock(&(_sc)->sc_mtx)
-#define LAGG_WUNLOCK(_sc) rw_wunlock(&(_sc)->sc_mtx)
-#define LAGG_RLOCK_ASSERT(_sc) rw_assert(&(_sc)->sc_mtx, RA_RLOCKED)
-#define LAGG_WLOCK_ASSERT(_sc) rw_assert(&(_sc)->sc_mtx, RA_WLOCKED)
+#define LAGG_LOCK_INIT(_sc) rm_init(&(_sc)->sc_mtx, "if_lagg rmlock")
+#define LAGG_LOCK_DESTROY(_sc) rm_destroy(&(_sc)->sc_mtx)
+#define LAGG_RLOCK(_sc, _p) rm_rlock(&(_sc)->sc_mtx, (_p))
+#define LAGG_WLOCK(_sc) rm_wlock(&(_sc)->sc_mtx)
+#define LAGG_RUNLOCK(_sc, _p) rm_runlock(&(_sc)->sc_mtx, (_p))
+#define LAGG_WUNLOCK(_sc) rm_wunlock(&(_sc)->sc_mtx)
+#define LAGG_RLOCK_ASSERT(_sc) rm_assert(&(_sc)->sc_mtx, RA_RLOCKED)
+#define LAGG_WLOCK_ASSERT(_sc) rm_assert(&(_sc)->sc_mtx, RA_WLOCKED)
+#define LAGG_UNLOCK_ASSERT(_sc) rm_assert(&(_sc)->sc_mtx, RA_UNLOCKED)
extern struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *);
extern void (*lagg_linkstate_p)(struct ifnet *, int );
int lagg_enqueue(struct ifnet *, struct mbuf *);
-uint32_t lagg_hashmbuf(struct lagg_softc *, struct mbuf *, uint32_t);
+
+SYSCTL_DECL(_net_link_lagg);
#endif /* _KERNEL */
diff --git a/freebsd/sys/net/if_llatbl.c b/freebsd/sys/net/if_llatbl.c
index 55b816a7..20c0b9d2 100644
--- a/freebsd/sys/net/if_llatbl.c
+++ b/freebsd/sys/net/if_llatbl.c
@@ -64,17 +64,43 @@ __FBSDID("$FreeBSD$");
MALLOC_DEFINE(M_LLTABLE, "lltable", "link level address tables");
-static VNET_DEFINE(SLIST_HEAD(, lltable), lltables);
+static VNET_DEFINE(SLIST_HEAD(, lltable), lltables) =
+ SLIST_HEAD_INITIALIZER(lltables);
#define V_lltables VNET(lltables)
-extern void arprequest(struct ifnet *, struct in_addr *, struct in_addr *,
- u_char *);
-
-static void vnet_lltable_init(void);
-
struct rwlock lltable_rwlock;
RW_SYSINIT(lltable_rwlock, &lltable_rwlock, "lltable_rwlock");
+static void lltable_unlink(struct lltable *llt);
+static void llentries_unlink(struct lltable *llt, struct llentries *head);
+
+static void htable_unlink_entry(struct llentry *lle);
+static void htable_link_entry(struct lltable *llt, struct llentry *lle);
+static int htable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f,
+ void *farg);
+
+/*
+ * Dump lle state for a specific address family.
+ */
+static int
+lltable_dump_af(struct lltable *llt, struct sysctl_req *wr)
+{
+ int error;
+
+ LLTABLE_LOCK_ASSERT();
+
+ if (llt->llt_ifp->if_flags & IFF_LOOPBACK)
+ return (0);
+ error = 0;
+
+ IF_AFDATA_RLOCK(llt->llt_ifp);
+ error = lltable_foreach_lle(llt,
+ (llt_foreach_cb_t *)llt->llt_dump_entry, wr);
+ IF_AFDATA_RUNLOCK(llt->llt_ifp);
+
+ return (error);
+}
+
/*
* Dump arp state for a specific address family.
*/
@@ -87,7 +113,7 @@ lltable_sysctl_dumparp(int af, struct sysctl_req *wr)
LLTABLE_RLOCK();
SLIST_FOREACH(llt, &V_lltables, llt_link) {
if (llt->llt_af == af) {
- error = llt->llt_dump(llt, wr);
+ error = lltable_dump_af(llt, wr);
if (error != 0)
goto done;
}
@@ -98,25 +124,144 @@ done:
}
/*
- * Deletes an address from the address table.
- * This function is called by the timer functions
- * such as arptimer() and nd6_llinfo_timer(), and
- * the caller does the locking.
+ * Common function helpers for chained hash table.
+ */
+
+/*
+ * Runs specified callback for each entry in @llt.
+ * Caller does the locking.
+ *
+ */
+static int
+htable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f, void *farg)
+{
+ struct llentry *lle, *next;
+ int i, error;
+
+ error = 0;
+
+ for (i = 0; i < llt->llt_hsize; i++) {
+ LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
+ error = f(llt, lle, farg);
+ if (error != 0)
+ break;
+ }
+ }
+
+ return (error);
+}
+
+static void
+htable_link_entry(struct lltable *llt, struct llentry *lle)
+{
+ struct llentries *lleh;
+ uint32_t hashidx;
+
+ if ((lle->la_flags & LLE_LINKED) != 0)
+ return;
+
+ IF_AFDATA_WLOCK_ASSERT(llt->llt_ifp);
+
+ hashidx = llt->llt_hash(lle, llt->llt_hsize);
+ lleh = &llt->lle_head[hashidx];
+
+ lle->lle_tbl = llt;
+ lle->lle_head = lleh;
+ lle->la_flags |= LLE_LINKED;
+ LIST_INSERT_HEAD(lleh, lle, lle_next);
+}
+
+static void
+htable_unlink_entry(struct llentry *lle)
+{
+
+ if ((lle->la_flags & LLE_LINKED) != 0) {
+ IF_AFDATA_WLOCK_ASSERT(lle->lle_tbl->llt_ifp);
+ LIST_REMOVE(lle, lle_next);
+ lle->la_flags &= ~(LLE_VALID | LLE_LINKED);
+#if 0
+ lle->lle_tbl = NULL;
+ lle->lle_head = NULL;
+#endif
+ }
+}
+
+struct prefix_match_data {
+ const struct sockaddr *addr;
+ const struct sockaddr *mask;
+ struct llentries dchain;
+ u_int flags;
+};
+
+static int
+htable_prefix_free_cb(struct lltable *llt, struct llentry *lle, void *farg)
+{
+ struct prefix_match_data *pmd;
+
+ pmd = (struct prefix_match_data *)farg;
+
+ if (llt->llt_match_prefix(pmd->addr, pmd->mask, pmd->flags, lle)) {
+ LLE_WLOCK(lle);
+ LIST_INSERT_HEAD(&pmd->dchain, lle, lle_chain);
+ }
+
+ return (0);
+}
+
+static void
+htable_prefix_free(struct lltable *llt, const struct sockaddr *addr,
+ const struct sockaddr *mask, u_int flags)
+{
+ struct llentry *lle, *next;
+ struct prefix_match_data pmd;
+
+ bzero(&pmd, sizeof(pmd));
+ pmd.addr = addr;
+ pmd.mask = mask;
+ pmd.flags = flags;
+ LIST_INIT(&pmd.dchain);
+
+ IF_AFDATA_WLOCK(llt->llt_ifp);
+ /* Push matching lles to chain */
+ lltable_foreach_lle(llt, htable_prefix_free_cb, &pmd);
+
+ llentries_unlink(llt, &pmd.dchain);
+ IF_AFDATA_WUNLOCK(llt->llt_ifp);
+
+ LIST_FOREACH_SAFE(lle, &pmd.dchain, lle_chain, next)
+ lltable_free_entry(llt, lle);
+}
+
+static void
+htable_free_tbl(struct lltable *llt)
+{
+
+ free(llt->lle_head, M_LLTABLE);
+ free(llt, M_LLTABLE);
+}
+
+static void
+llentries_unlink(struct lltable *llt, struct llentries *head)
+{
+ struct llentry *lle, *next;
+
+ LIST_FOREACH_SAFE(lle, head, lle_chain, next)
+ llt->llt_unlink_entry(lle);
+}
+
+/*
+ * Helper function used to drop all mbufs in hold queue.
*
* Returns the number of held packets, if any, that were dropped.
*/
size_t
-llentry_free(struct llentry *lle)
+lltable_drop_entry_queue(struct llentry *lle)
{
size_t pkts_dropped;
struct mbuf *next;
- IF_AFDATA_WLOCK_ASSERT(lle->lle_tbl->llt_ifp);
LLE_WLOCK_ASSERT(lle);
- LIST_REMOVE(lle, lle_next);
- lle->la_flags &= ~(LLE_VALID | LLE_LINKED);
-
pkts_dropped = 0;
while ((lle->la_numheld > 0) && (lle->la_hold != NULL)) {
next = lle->la_hold->m_nextpkt;
@@ -130,6 +275,162 @@ llentry_free(struct llentry *lle)
("%s: la_numheld %d > 0, pkts_droped %zd", __func__,
lle->la_numheld, pkts_dropped));
+ return (pkts_dropped);
+}
+
+void
+lltable_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
+ const char *linkhdr, size_t linkhdrsize, int lladdr_off)
+{
+
+ memcpy(lle->r_linkdata, linkhdr, linkhdrsize);
+ lle->r_hdrlen = linkhdrsize;
+ lle->ll_addr = &lle->r_linkdata[lladdr_off];
+ lle->la_flags |= LLE_VALID;
+ lle->r_flags |= RLLE_VALID;
+}
+
+/*
+ * Tries to update @lle link-level address.
+ * Since update requires AFDATA WLOCK, function
+ * drops @lle lock, acquires AFDATA lock and then acquires
+ * @lle lock to maintain lock order.
+ *
+ * Returns 1 on success.
+ */
+int
+lltable_try_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
+ const char *linkhdr, size_t linkhdrsize, int lladdr_off)
+{
+
+ /* Perform real LLE update */
+ /* use afdata WLOCK to update fields */
+ LLE_WLOCK_ASSERT(lle);
+ LLE_ADDREF(lle);
+ LLE_WUNLOCK(lle);
+ IF_AFDATA_WLOCK(ifp);
+ LLE_WLOCK(lle);
+
+ /*
+ * Since we droppped LLE lock, other thread might have deleted
+ * this lle. Check and return
+ */
+ if ((lle->la_flags & LLE_DELETED) != 0) {
+ IF_AFDATA_WUNLOCK(ifp);
+ LLE_FREE_LOCKED(lle);
+ return (0);
+ }
+
+ /* Update data */
+ lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize, lladdr_off);
+
+ IF_AFDATA_WUNLOCK(ifp);
+
+ LLE_REMREF(lle);
+
+ return (1);
+}
+
+ /*
+ * Helper function used to pre-compute full/partial link-layer
+ * header data suitable for feeding into if_output().
+ */
+int
+lltable_calc_llheader(struct ifnet *ifp, int family, char *lladdr,
+ char *buf, size_t *bufsize, int *lladdr_off)
+{
+ struct if_encap_req ereq;
+ int error;
+
+ bzero(buf, *bufsize);
+ bzero(&ereq, sizeof(ereq));
+ ereq.buf = buf;
+ ereq.bufsize = *bufsize;
+ ereq.rtype = IFENCAP_LL;
+ ereq.family = family;
+ ereq.lladdr = lladdr;
+ ereq.lladdr_len = ifp->if_addrlen;
+ error = ifp->if_requestencap(ifp, &ereq);
+ if (error == 0) {
+ *bufsize = ereq.bufsize;
+ *lladdr_off = ereq.lladdr_off;
+ }
+
+ return (error);
+}
+
+/*
+ * Update link-layer header for given @lle after
+ * interface lladdr was changed.
+ */
+static int
+llentry_update_ifaddr(struct lltable *llt, struct llentry *lle, void *farg)
+{
+ struct ifnet *ifp;
+ u_char linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ u_char *lladdr;
+ int lladdr_off;
+
+ ifp = (struct ifnet *)farg;
+
+ lladdr = lle->ll_addr;
+
+ LLE_WLOCK(lle);
+ if ((lle->la_flags & LLE_VALID) == 0) {
+ LLE_WUNLOCK(lle);
+ return (0);
+ }
+
+ if ((lle->la_flags & LLE_IFADDR) != 0)
+ lladdr = IF_LLADDR(ifp);
+
+ linkhdrsize = sizeof(linkhdr);
+ lltable_calc_llheader(ifp, llt->llt_af, lladdr, linkhdr, &linkhdrsize,
+ &lladdr_off);
+ memcpy(lle->r_linkdata, linkhdr, linkhdrsize);
+ LLE_WUNLOCK(lle);
+
+ return (0);
+}
+
+/*
+ * Update all calculated headers for given @llt
+ */
+void
+lltable_update_ifaddr(struct lltable *llt)
+{
+
+ if (llt->llt_ifp->if_flags & IFF_LOOPBACK)
+ return;
+
+ IF_AFDATA_WLOCK(llt->llt_ifp);
+ lltable_foreach_lle(llt, llentry_update_ifaddr, llt->llt_ifp);
+ IF_AFDATA_WUNLOCK(llt->llt_ifp);
+}
+
+/*
+ *
+ * Performs generic cleanup routines and frees lle.
+ *
+ * Called for non-linked entries, with callouts and
+ * other AF-specific cleanups performed.
+ *
+ * @lle must be passed WLOCK'ed
+ *
+ * Returns the number of held packets, if any, that were dropped.
+ */
+size_t
+llentry_free(struct llentry *lle)
+{
+ size_t pkts_dropped;
+
+ LLE_WLOCK_ASSERT(lle);
+
+ KASSERT((lle->la_flags & LLE_LINKED) == 0, ("freeing linked lle"));
+
+ pkts_dropped = lltable_drop_entry_queue(lle);
+
LLE_FREE_LOCKED(lle);
return (pkts_dropped);
@@ -144,22 +445,35 @@ struct llentry *
llentry_alloc(struct ifnet *ifp, struct lltable *lt,
struct sockaddr_storage *dst)
{
- struct llentry *la;
+ struct llentry *la, *la_tmp;
IF_AFDATA_RLOCK(ifp);
la = lla_lookup(lt, LLE_EXCLUSIVE, (struct sockaddr *)dst);
IF_AFDATA_RUNLOCK(ifp);
- if ((la == NULL) &&
- (ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0) {
- IF_AFDATA_WLOCK(ifp);
- la = lla_lookup(lt, (LLE_CREATE | LLE_EXCLUSIVE),
- (struct sockaddr *)dst);
- IF_AFDATA_WUNLOCK(ifp);
- }
if (la != NULL) {
LLE_ADDREF(la);
LLE_WUNLOCK(la);
+ return (la);
+ }
+
+ if ((ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0) {
+ la = lltable_alloc_entry(lt, 0, (struct sockaddr *)dst);
+ if (la == NULL)
+ return (NULL);
+ IF_AFDATA_WLOCK(ifp);
+ LLE_WLOCK(la);
+ /* Prefer any existing LLE over newly-created one */
+ la_tmp = lla_lookup(lt, LLE_EXCLUSIVE, (struct sockaddr *)dst);
+ if (la_tmp == NULL)
+ lltable_link_entry(lt, la);
+ IF_AFDATA_WUNLOCK(ifp);
+ if (la_tmp != NULL) {
+ lltable_free_entry(lt, la);
+ la = la_tmp;
+ }
+ LLE_ADDREF(la);
+ LLE_WUNLOCK(la);
}
return (la);
@@ -168,30 +482,47 @@ llentry_alloc(struct ifnet *ifp, struct lltable *lt,
/*
* Free all entries from given table and free itself.
*/
+
+static int
+lltable_free_cb(struct lltable *llt, struct llentry *lle, void *farg)
+{
+ struct llentries *dchain;
+
+ dchain = (struct llentries *)farg;
+
+ LLE_WLOCK(lle);
+ LIST_INSERT_HEAD(dchain, lle, lle_chain);
+
+ return (0);
+}
+
+/*
+ * Free all entries from given table and free itself.
+ */
void
lltable_free(struct lltable *llt)
{
struct llentry *lle, *next;
- int i;
+ struct llentries dchain;
KASSERT(llt != NULL, ("%s: llt is NULL", __func__));
- LLTABLE_WLOCK();
- SLIST_REMOVE(&V_lltables, llt, lltable, llt_link);
- LLTABLE_WUNLOCK();
+ lltable_unlink(llt);
+ LIST_INIT(&dchain);
IF_AFDATA_WLOCK(llt->llt_ifp);
- for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
- LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
- LLE_WLOCK(lle);
- if (callout_stop(&lle->la_timer))
- LLE_REMREF(lle);
- llentry_free(lle);
- }
- }
+ /* Push all lles to @dchain */
+ lltable_foreach_lle(llt, lltable_free_cb, &dchain);
+ llentries_unlink(llt, &dchain);
IF_AFDATA_WUNLOCK(llt->llt_ifp);
- free(llt, M_LLTABLE);
+ LIST_FOREACH_SAFE(lle, &dchain, lle_chain, next) {
+ if (callout_stop(&lle->lle_timer) > 0)
+ LLE_REMREF(lle);
+ llentry_free(lle);
+ }
+
+ llt->llt_free_tbl(llt);
}
#if 0
@@ -207,7 +538,7 @@ lltable_drain(int af)
if (llt->llt_af != af)
continue;
- for (i=0; i < LLTBL_HASHTBL_SIZE; i++) {
+ for (i=0; i < llt->llt_hsize; i++) {
LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
LLE_WLOCK(lle);
if (lle->la_hold) {
@@ -222,8 +553,42 @@ lltable_drain(int af)
}
#endif
+/*
+ * Deletes an address from given lltable.
+ * Used for userland interaction to remove
+ * individual entries. Skips entries added by OS.
+ */
+int
+lltable_delete_addr(struct lltable *llt, u_int flags,
+ const struct sockaddr *l3addr)
+{
+ struct llentry *lle;
+ struct ifnet *ifp;
+
+ ifp = llt->llt_ifp;
+ IF_AFDATA_WLOCK(ifp);
+ lle = lla_lookup(llt, LLE_EXCLUSIVE, l3addr);
+
+ if (lle == NULL) {
+ IF_AFDATA_WUNLOCK(ifp);
+ return (ENOENT);
+ }
+ if ((lle->la_flags & LLE_IFADDR) != 0 && (flags & LLE_IFADDR) == 0) {
+ IF_AFDATA_WUNLOCK(ifp);
+ LLE_WUNLOCK(lle);
+ return (EPERM);
+ }
+
+ lltable_unlink_entry(llt, lle);
+ IF_AFDATA_WUNLOCK(ifp);
+
+ llt->llt_delete_entry(llt, lle);
+
+ return (0);
+}
+
void
-lltable_prefix_free(int af, struct sockaddr *prefix, struct sockaddr *mask,
+lltable_prefix_free(int af, struct sockaddr *addr, struct sockaddr *mask,
u_int flags)
{
struct lltable *llt;
@@ -233,38 +598,122 @@ lltable_prefix_free(int af, struct sockaddr *prefix, struct sockaddr *mask,
if (llt->llt_af != af)
continue;
- llt->llt_prefix_free(llt, prefix, mask, flags);
+ llt->llt_prefix_free(llt, addr, mask, flags);
}
LLTABLE_RUNLOCK();
}
-
-
-/*
- * Create a new lltable.
- */
struct lltable *
-lltable_init(struct ifnet *ifp, int af)
+lltable_allocate_htbl(uint32_t hsize)
{
struct lltable *llt;
- register int i;
+ int i;
- llt = malloc(sizeof(struct lltable), M_LLTABLE, M_WAITOK);
+ llt = malloc(sizeof(struct lltable), M_LLTABLE, M_WAITOK | M_ZERO);
+ llt->llt_hsize = hsize;
+ llt->lle_head = malloc(sizeof(struct llentries) * hsize,
+ M_LLTABLE, M_WAITOK | M_ZERO);
- llt->llt_af = af;
- llt->llt_ifp = ifp;
- for (i = 0; i < LLTBL_HASHTBL_SIZE; i++)
+ for (i = 0; i < llt->llt_hsize; i++)
LIST_INIT(&llt->lle_head[i]);
+ /* Set some default callbacks */
+ llt->llt_link_entry = htable_link_entry;
+ llt->llt_unlink_entry = htable_unlink_entry;
+ llt->llt_prefix_free = htable_prefix_free;
+ llt->llt_foreach_entry = htable_foreach_lle;
+ llt->llt_free_tbl = htable_free_tbl;
+
+ return (llt);
+}
+
+/*
+ * Links lltable to global llt list.
+ */
+void
+lltable_link(struct lltable *llt)
+{
+
LLTABLE_WLOCK();
SLIST_INSERT_HEAD(&V_lltables, llt, llt_link);
LLTABLE_WUNLOCK();
+}
- return (llt);
+static void
+lltable_unlink(struct lltable *llt)
+{
+
+ LLTABLE_WLOCK();
+ SLIST_REMOVE(&V_lltables, llt, lltable, llt_link);
+ LLTABLE_WUNLOCK();
+
+}
+
+/*
+ * External methods used by lltable consumers
+ */
+
+int
+lltable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f, void *farg)
+{
+
+ return (llt->llt_foreach_entry(llt, f, farg));
+}
+
+struct llentry *
+lltable_alloc_entry(struct lltable *llt, u_int flags,
+ const struct sockaddr *l3addr)
+{
+
+ return (llt->llt_alloc_entry(llt, flags, l3addr));
+}
+
+void
+lltable_free_entry(struct lltable *llt, struct llentry *lle)
+{
+
+ llt->llt_free_entry(llt, lle);
+}
+
+void
+lltable_link_entry(struct lltable *llt, struct llentry *lle)
+{
+
+ llt->llt_link_entry(llt, lle);
+}
+
+void
+lltable_unlink_entry(struct lltable *llt, struct llentry *lle)
+{
+
+ llt->llt_unlink_entry(lle);
+}
+
+void
+lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa)
+{
+ struct lltable *llt;
+
+ llt = lle->lle_tbl;
+ llt->llt_fill_sa_entry(lle, sa);
+}
+
+struct ifnet *
+lltable_get_ifp(const struct lltable *llt)
+{
+
+ return (llt->llt_ifp);
+}
+
+int
+lltable_get_af(const struct lltable *llt)
+{
+
+ return (llt->llt_af);
}
/*
- * Called in route_output when adding/deleting a route to an interface.
+ * Called in route_output when rtm_flags contains RTF_LLDATA.
*/
int
lla_rt_output(struct rt_msghdr *rtm, struct rt_addrinfo *info)
@@ -274,14 +723,16 @@ lla_rt_output(struct rt_msghdr *rtm, struct rt_addrinfo *info)
struct sockaddr *dst = (struct sockaddr *)info->rti_info[RTAX_DST];
struct ifnet *ifp;
struct lltable *llt;
- struct llentry *lle;
- u_int laflags = 0, flags = 0;
- int error = 0;
+ struct llentry *lle, *lle_tmp;
+ uint8_t linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ int lladdr_off;
+ u_int laflags = 0;
+ int error;
+
+ KASSERT(dl != NULL && dl->sdl_family == AF_LINK,
+ ("%s: invalid dl\n", __func__));
- if (dl == NULL || dl->sdl_family != AF_LINK) {
- log(LOG_INFO, "%s: invalid dl\n", __func__);
- return EINVAL;
- }
ifp = ifnet_byindex(dl->sdl_index);
if (ifp == NULL) {
log(LOG_INFO, "%s: invalid ifp (sdl_index %d)\n",
@@ -289,44 +740,6 @@ lla_rt_output(struct rt_msghdr *rtm, struct rt_addrinfo *info)
return EINVAL;
}
- switch (rtm->rtm_type) {
- case RTM_ADD:
- if (rtm->rtm_flags & RTF_ANNOUNCE) {
- flags |= LLE_PUB;
-#ifdef INET
- if (dst->sa_family == AF_INET &&
- ((struct sockaddr_inarp *)dst)->sin_other != 0) {
- struct rtentry *rt;
- ((struct sockaddr_inarp *)dst)->sin_other = 0;
- rt = rtalloc1(dst, 0, 0);
- if (rt == NULL || !(rt->rt_flags & RTF_HOST)) {
- log(LOG_INFO, "%s: RTM_ADD publish "
- "(proxy only) is invalid\n",
- __func__);
- if (rt)
- RTFREE_LOCKED(rt);
- return EINVAL;
- }
- RTFREE_LOCKED(rt);
-
- flags |= LLE_PROXY;
- }
-#endif
- }
- flags |= LLE_CREATE;
- break;
-
- case RTM_DELETE:
- flags |= LLE_DELETE;
- break;
-
- case RTM_CHANGE:
- break;
-
- default:
- return EINVAL; /* XXX not implemented yet */
- }
-
/* XXX linked list may be too expensive */
LLTABLE_RLOCK();
SLIST_FOREACH(llt, &V_lltables, llt_link) {
@@ -337,73 +750,82 @@ lla_rt_output(struct rt_msghdr *rtm, struct rt_addrinfo *info)
LLTABLE_RUNLOCK();
KASSERT(llt != NULL, ("Yep, ugly hacks are bad\n"));
- if (flags & LLE_CREATE)
- flags |= LLE_EXCLUSIVE;
-
- IF_AFDATA_LOCK(ifp);
- lle = lla_lookup(llt, flags, dst);
- IF_AFDATA_UNLOCK(ifp);
- if (LLE_IS_VALID(lle)) {
- if (flags & LLE_CREATE) {
- /*
- * If we delay the delete, then a subsequent
- * "arp add" should look up this entry, reset the
- * LLE_DELETED flag, and reset the expiration timer
- */
- bcopy(LLADDR(dl), &lle->ll_addr, ifp->if_addrlen);
- lle->la_flags |= (flags & (LLE_PUB | LLE_PROXY));
- lle->la_flags |= LLE_VALID;
- lle->la_flags &= ~LLE_DELETED;
-#ifdef INET6
- /*
- * ND6
- */
- if (dst->sa_family == AF_INET6)
- lle->ln_state = ND6_LLINFO_REACHABLE;
-#endif
- /*
- * NB: arp and ndp always set (RTF_STATIC | RTF_HOST)
- */
-
- if (rtm->rtm_rmx.rmx_expire == 0) {
- lle->la_flags |= LLE_STATIC;
- lle->la_expire = 0;
- } else
- lle->la_expire = rtm->rtm_rmx.rmx_expire;
- laflags = lle->la_flags;
- LLE_WUNLOCK(lle);
-#ifdef INET
- /* gratuitous ARP */
- if ((laflags & LLE_PUB) && dst->sa_family == AF_INET) {
- arprequest(ifp,
- &((struct sockaddr_in *)dst)->sin_addr,
- &((struct sockaddr_in *)dst)->sin_addr,
- ((laflags & LLE_PROXY) ?
- (u_char *)IF_LLADDR(ifp) :
- (u_char *)LLADDR(dl)));
+ error = 0;
+
+ switch (rtm->rtm_type) {
+ case RTM_ADD:
+ /* Add static LLE */
+ laflags = 0;
+ if (rtm->rtm_rmx.rmx_expire == 0)
+ laflags = LLE_STATIC;
+ lle = lltable_alloc_entry(llt, laflags, dst);
+ if (lle == NULL)
+ return (ENOMEM);
+
+ linkhdrsize = sizeof(linkhdr);
+ if (lltable_calc_llheader(ifp, dst->sa_family, LLADDR(dl),
+ linkhdr, &linkhdrsize, &lladdr_off) != 0)
+ return (EINVAL);
+ lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize,
+ lladdr_off);
+ if ((rtm->rtm_flags & RTF_ANNOUNCE))
+ lle->la_flags |= LLE_PUB;
+ lle->la_expire = rtm->rtm_rmx.rmx_expire;
+
+ laflags = lle->la_flags;
+
+ /* Try to link new entry */
+ lle_tmp = NULL;
+ IF_AFDATA_WLOCK(ifp);
+ LLE_WLOCK(lle);
+ lle_tmp = lla_lookup(llt, LLE_EXCLUSIVE, dst);
+ if (lle_tmp != NULL) {
+ /* Check if we are trying to replace immutable entry */
+ if ((lle_tmp->la_flags & LLE_IFADDR) != 0) {
+ IF_AFDATA_WUNLOCK(ifp);
+ LLE_WUNLOCK(lle_tmp);
+ lltable_free_entry(llt, lle);
+ return (EPERM);
}
-#endif
- } else {
- if (flags & LLE_EXCLUSIVE)
- LLE_WUNLOCK(lle);
- else
- LLE_RUNLOCK(lle);
+ /* Unlink existing entry from table */
+ lltable_unlink_entry(llt, lle_tmp);
}
- } else if ((lle == NULL) && (flags & LLE_DELETE))
- error = EINVAL;
+ lltable_link_entry(llt, lle);
+ IF_AFDATA_WUNLOCK(ifp);
+ if (lle_tmp != NULL) {
+ EVENTHANDLER_INVOKE(lle_event, lle_tmp,LLENTRY_EXPIRED);
+ lltable_free_entry(llt, lle_tmp);
+ }
- return (error);
-}
+ /*
+ * By invoking LLE handler here we might get
+ * two events on static LLE entry insertion
+ * in routing socket. However, since we might have
+ * other subscribers we need to generate this event.
+ */
+ EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_RESOLVED);
+ LLE_WUNLOCK(lle);
+#ifdef INET
+ /* gratuitous ARP */
+ if ((laflags & LLE_PUB) && dst->sa_family == AF_INET)
+ arprequest(ifp,
+ &((struct sockaddr_in *)dst)->sin_addr,
+ &((struct sockaddr_in *)dst)->sin_addr,
+ (u_char *)LLADDR(dl));
+#endif
-static void
-vnet_lltable_init()
-{
+ break;
- SLIST_INIT(&V_lltables);
+ case RTM_DELETE:
+ return (lltable_delete_addr(llt, 0, dst));
+
+ default:
+ error = EINVAL;
+ }
+
+ return (error);
}
-VNET_SYSINIT(vnet_lltable_init, SI_SUB_PSEUDO, SI_ORDER_FIRST,
- vnet_lltable_init, NULL);
#ifdef DDB
struct llentry_sa {
@@ -429,15 +851,14 @@ llatbl_lle_show(struct llentry_sa *la)
db_printf(" la_flags=0x%04x\n", lle->la_flags);
db_printf(" la_asked=%u\n", lle->la_asked);
db_printf(" la_preempt=%u\n", lle->la_preempt);
- db_printf(" ln_byhint=%u\n", lle->ln_byhint);
db_printf(" ln_state=%d\n", lle->ln_state);
db_printf(" ln_router=%u\n", lle->ln_router);
db_printf(" ln_ntick=%ju\n", (uintmax_t)lle->ln_ntick);
db_printf(" lle_refcnt=%d\n", lle->lle_refcnt);
- bcopy(&lle->ll_addr.mac16, octet, sizeof(octet));
+ bcopy(lle->ll_addr, octet, sizeof(octet));
db_printf(" ll_addr=%02x:%02x:%02x:%02x:%02x:%02x\n",
octet[0], octet[1], octet[2], octet[3], octet[4], octet[5]);
- db_printf(" la_timer=%p\n", &lle->la_timer);
+ db_printf(" lle_timer=%p\n", &lle->lle_timer);
switch (la->l3_addr.sa_family) {
#ifdef INET
@@ -490,7 +911,7 @@ llatbl_llt_show(struct lltable *llt)
db_printf("llt=%p llt_af=%d llt_ifp=%p\n",
llt, llt->llt_af, llt->llt_ifp);
- for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
+ for (i = 0; i < llt->llt_hsize; i++) {
LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
llatbl_lle_show((struct llentry_sa *)lle);
diff --git a/freebsd/sys/net/if_llatbl.h b/freebsd/sys/net/if_llatbl.h
index 8ac72c4f..51de726a 100644
--- a/freebsd/sys/net/if_llatbl.h
+++ b/freebsd/sys/net/if_llatbl.h
@@ -30,8 +30,6 @@ __FBSDID("$FreeBSD$");
#ifndef _NET_IF_LLATBL_H_
#define _NET_IF_LLATBL_H_
-#include <rtems/bsd/local/opt_ofed.h>
-
#include <sys/_rwlock.h>
#include <netinet/in.h>
@@ -50,42 +48,44 @@ extern struct rwlock lltable_rwlock;
#define LLTABLE_WUNLOCK() rw_wunlock(&lltable_rwlock)
#define LLTABLE_LOCK_ASSERT() rw_assert(&lltable_rwlock, RA_LOCKED)
+#define LLE_MAX_LINKHDR 24 /* Full IB header */
/*
* Code referencing llentry must at least hold
* a shared lock
*/
struct llentry {
LIST_ENTRY(llentry) lle_next;
- struct rwlock lle_lock;
+ union {
+ struct in_addr addr4;
+ struct in6_addr addr6;
+ } r_l3addr;
+ char r_linkdata[LLE_MAX_LINKHDR]; /* L2 data */
+ uint8_t r_hdrlen; /* length for LL header */
+ uint8_t spare0[3];
+ uint16_t r_flags; /* LLE runtime flags */
+ uint16_t r_skip_req; /* feedback from fast path */
+
struct lltable *lle_tbl;
struct llentries *lle_head;
- void (*lle_free)(struct lltable *, struct llentry *);
+ void (*lle_free)(struct llentry *);
struct mbuf *la_hold;
int la_numheld; /* # of packets currently held */
time_t la_expire;
uint16_t la_flags;
uint16_t la_asked;
uint16_t la_preempt;
- uint16_t ln_byhint;
int16_t ln_state; /* IPv6 has ND6_LLINFO_NOSTATE == -2 */
uint16_t ln_router;
time_t ln_ntick;
+ time_t lle_remtime; /* Real time remaining */
+ time_t lle_hittime; /* Time when r_skip_req was unset */
int lle_refcnt;
+ char *ll_addr; /* link-layer address */
- union {
- uint64_t mac_aligned;
- uint16_t mac16[3];
-#ifdef OFED
- uint8_t mac8[20]; /* IB needs 20 bytes. */
-#endif
- } ll_addr;
-
- /* XXX af-private? */
- union {
- struct callout ln_timer_ch;
- struct callout la_timer;
- } lle_timer;
- /* NB: struct sockaddr must immediately follow */
+ LIST_ENTRY(llentry) lle_chain; /* chain of deleted items */
+ struct callout lle_timer;
+ struct rwlock lle_lock;
+ struct mtx req_mtx;
};
#define LLE_WLOCK(lle) rw_wlock(&(lle)->lle_lock)
@@ -98,6 +98,12 @@ struct llentry {
#define LLE_LOCK_DESTROY(lle) rw_destroy(&(lle)->lle_lock)
#define LLE_WLOCK_ASSERT(lle) rw_assert(&(lle)->lle_lock, RA_WLOCKED)
+#define LLE_REQ_INIT(lle) mtx_init(&(lle)->req_mtx, "lle req", \
+ NULL, MTX_DEF)
+#define LLE_REQ_DESTROY(lle) mtx_destroy(&(lle)->req_mtx)
+#define LLE_REQ_LOCK(lle) mtx_lock(&(lle)->req_mtx)
+#define LLE_REQ_UNLOCK(lle) mtx_unlock(&(lle)->req_mtx)
+
#define LLE_IS_VALID(lle) (((lle) != NULL) && ((lle) != (void *)-1))
#define LLE_ADDREF(lle) do { \
@@ -118,7 +124,7 @@ struct llentry {
#define LLE_FREE_LOCKED(lle) do { \
if ((lle)->lle_refcnt == 1) \
- (lle)->lle_free((lle)->lle_tbl, (lle)); \
+ (lle)->lle_free(lle); \
else { \
LLE_REMREF(lle); \
LLE_WUNLOCK(lle); \
@@ -132,58 +138,77 @@ struct llentry {
LLE_FREE_LOCKED(lle); \
} while (0)
+typedef struct llentry *(llt_lookup_t)(struct lltable *, u_int flags,
+ const struct sockaddr *l3addr);
+typedef struct llentry *(llt_alloc_t)(struct lltable *, u_int flags,
+ const struct sockaddr *l3addr);
+typedef void (llt_delete_t)(struct lltable *, struct llentry *);
+typedef void (llt_prefix_free_t)(struct lltable *,
+ const struct sockaddr *addr, const struct sockaddr *mask, u_int flags);
+typedef int (llt_dump_entry_t)(struct lltable *, struct llentry *,
+ struct sysctl_req *);
+typedef uint32_t (llt_hash_t)(const struct llentry *, uint32_t);
+typedef int (llt_match_prefix_t)(const struct sockaddr *,
+ const struct sockaddr *, u_int, struct llentry *);
+typedef void (llt_free_entry_t)(struct lltable *, struct llentry *);
+typedef void (llt_fill_sa_entry_t)(const struct llentry *, struct sockaddr *);
+typedef void (llt_free_tbl_t)(struct lltable *);
+typedef void (llt_link_entry_t)(struct lltable *, struct llentry *);
+typedef void (llt_unlink_entry_t)(struct llentry *);
-#define ln_timer_ch lle_timer.ln_timer_ch
-#define la_timer lle_timer.la_timer
-
-/* XXX bad name */
-#define L3_ADDR(lle) ((struct sockaddr *)(&lle[1]))
-#define L3_ADDR_LEN(lle) (((struct sockaddr *)(&lle[1]))->sa_len)
-
-#ifndef LLTBL_HASHTBL_SIZE
-#define LLTBL_HASHTBL_SIZE 32 /* default 32 ? */
-#endif
-
-#ifndef LLTBL_HASHMASK
-#define LLTBL_HASHMASK (LLTBL_HASHTBL_SIZE - 1)
-#endif
+typedef int (llt_foreach_cb_t)(struct lltable *, struct llentry *, void *);
+typedef int (llt_foreach_entry_t)(struct lltable *, llt_foreach_cb_t *, void *);
struct lltable {
SLIST_ENTRY(lltable) llt_link;
- struct llentries lle_head[LLTBL_HASHTBL_SIZE];
int llt_af;
+ int llt_hsize;
+ struct llentries *lle_head;
struct ifnet *llt_ifp;
- void (*llt_prefix_free)(struct lltable *,
- const struct sockaddr *prefix,
- const struct sockaddr *mask,
- u_int flags);
- struct llentry * (*llt_lookup)(struct lltable *, u_int flags,
- const struct sockaddr *l3addr);
- int (*llt_dump)(struct lltable *,
- struct sysctl_req *);
+ llt_lookup_t *llt_lookup;
+ llt_alloc_t *llt_alloc_entry;
+ llt_delete_t *llt_delete_entry;
+ llt_prefix_free_t *llt_prefix_free;
+ llt_dump_entry_t *llt_dump_entry;
+ llt_hash_t *llt_hash;
+ llt_match_prefix_t *llt_match_prefix;
+ llt_free_entry_t *llt_free_entry;
+ llt_foreach_entry_t *llt_foreach_entry;
+ llt_link_entry_t *llt_link_entry;
+ llt_unlink_entry_t *llt_unlink_entry;
+ llt_fill_sa_entry_t *llt_fill_sa_entry;
+ llt_free_tbl_t *llt_free_tbl;
};
+
MALLOC_DECLARE(M_LLTABLE);
/*
- * flags to be passed to arplookup.
+ * LLentry flags
*/
#define LLE_DELETED 0x0001 /* entry must be deleted */
#define LLE_STATIC 0x0002 /* entry is static */
#define LLE_IFADDR 0x0004 /* entry is interface addr */
#define LLE_VALID 0x0008 /* ll_addr is valid */
-#define LLE_PROXY 0x0010 /* proxy entry ??? */
+#define LLE_REDIRECT 0x0010 /* installed by redirect; has host rtentry */
#define LLE_PUB 0x0020 /* publish entry ??? */
#define LLE_LINKED 0x0040 /* linked to lookup structure */
+/* LLE request flags */
#define LLE_EXCLUSIVE 0x2000 /* return lle xlocked */
-#define LLE_DELETE 0x4000 /* delete on a lookup - match LLE_IFADDR */
-#define LLE_CREATE 0x8000 /* create on a lookup miss */
+#define LLE_UNLOCKED 0x4000 /* return lle unlocked */
+#define LLE_ADDRONLY 0x4000 /* return lladdr instead of full header */
+#define LLE_CREATE 0x8000 /* hint to avoid lle lookup */
+
+/* LLE flags used by fastpath code */
+#define RLLE_VALID 0x0001 /* entry is valid */
+#define RLLE_IFADDR LLE_IFADDR /* entry is ifaddr */
#define LLATBL_HASH(key, mask) \
(((((((key >> 8) ^ key) >> 8) ^ key) >> 8) ^ key) & mask)
-struct lltable *lltable_init(struct ifnet *, int);
+struct lltable *lltable_allocate_htbl(uint32_t hsize);
void lltable_free(struct lltable *);
+void lltable_link(struct lltable *llt);
void lltable_prefix_free(int, struct sockaddr *,
struct sockaddr *, u_int);
#if 0
@@ -195,13 +220,37 @@ size_t llentry_free(struct llentry *);
struct llentry *llentry_alloc(struct ifnet *, struct lltable *,
struct sockaddr_storage *);
+/* helper functions */
+size_t lltable_drop_entry_queue(struct llentry *);
+void lltable_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
+ const char *linkhdr, size_t linkhdrsize, int lladdr_off);
+int lltable_try_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
+ const char *linkhdr, size_t linkhdrsize, int lladdr_off);
+
+int lltable_calc_llheader(struct ifnet *ifp, int family, char *lladdr,
+ char *buf, size_t *bufsize, int *lladdr_off);
+void lltable_update_ifaddr(struct lltable *llt);
+struct llentry *lltable_alloc_entry(struct lltable *llt, u_int flags,
+ const struct sockaddr *l4addr);
+void lltable_free_entry(struct lltable *llt, struct llentry *lle);
+int lltable_delete_addr(struct lltable *llt, u_int flags,
+ const struct sockaddr *l3addr);
+void lltable_link_entry(struct lltable *llt, struct llentry *lle);
+void lltable_unlink_entry(struct lltable *llt, struct llentry *lle);
+void lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa);
+struct ifnet *lltable_get_ifp(const struct lltable *llt);
+int lltable_get_af(const struct lltable *llt);
+
+int lltable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f,
+ void *farg);
/*
* Generic link layer address lookup function.
*/
static __inline struct llentry *
lla_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr)
{
- return llt->llt_lookup(llt, flags, l3addr);
+
+ return (llt->llt_lookup(llt, flags, l3addr));
}
int lla_rt_output(struct rt_msghdr *, struct rt_addrinfo *);
diff --git a/freebsd/sys/net/if_loop.c b/freebsd/sys/net/if_loop.c
index b40dec8e..aa5109eb 100644
--- a/freebsd/sys/net/if_loop.c
+++ b/freebsd/sys/net/if_loop.c
@@ -36,10 +36,8 @@
* Loopback interface driver for protocol testing and timing.
*/
-#include <rtems/bsd/local/opt_atalk.h>
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipx.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
@@ -53,6 +51,7 @@
#include <sys/sysctl.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_clone.h>
#include <net/if_types.h>
#include <net/netisr.h>
@@ -65,11 +64,6 @@
#include <netinet/in_var.h>
#endif
-#ifdef IPX
-#include <netipx/ipx.h>
-#include <netipx/ipx_if.h>
-#endif
-
#ifdef INET6
#ifndef INET
#include <netinet/in.h>
@@ -78,11 +72,6 @@
#include <netinet/ip6.h>
#endif
-#ifdef NETATALK
-#include <netatalk/at.h>
-#include <netatalk/at_var.h>
-#endif
-
#include <security/mac/mac_framework.h>
#ifdef TINY_LOMTU
@@ -101,22 +90,20 @@
CSUM_SCTP_VALID)
int loioctl(struct ifnet *, u_long, caddr_t);
-static void lortrequest(int, struct rtentry *, struct rt_addrinfo *);
int looutput(struct ifnet *ifp, struct mbuf *m,
- struct sockaddr *dst, struct route *ro);
+ const struct sockaddr *dst, struct route *ro);
static int lo_clone_create(struct if_clone *, int, caddr_t);
static void lo_clone_destroy(struct ifnet *);
VNET_DEFINE(struct ifnet *, loif); /* Used externally */
#ifdef VIMAGE
-static VNET_DEFINE(struct ifc_simple_data, lo_cloner_data);
-static VNET_DEFINE(struct if_clone, lo_cloner);
-#define V_lo_cloner_data VNET(lo_cloner_data)
+static VNET_DEFINE(struct if_clone *, lo_cloner);
#define V_lo_cloner VNET(lo_cloner)
#endif
-IFC_SIMPLE_DECLARE(lo, 1);
+static struct if_clone *lo_cloner;
+static const char loname[] = "lo";
static void
lo_clone_destroy(struct ifnet *ifp)
@@ -141,7 +128,7 @@ lo_clone_create(struct if_clone *ifc, int unit, caddr_t params)
if (ifp == NULL)
return (ENOSPC);
- if_initname(ifp, ifc->ifc_name, unit);
+ if_initname(ifp, loname, unit);
ifp->if_mtu = LOMTU;
ifp->if_flags = IFF_LOOPBACK | IFF_MULTICAST;
ifp->if_ioctl = loioctl;
@@ -163,15 +150,15 @@ vnet_loif_init(const void *unused __unused)
{
#ifdef VIMAGE
+ lo_cloner = if_clone_simple(loname, lo_clone_create, lo_clone_destroy,
+ 1);
V_lo_cloner = lo_cloner;
- V_lo_cloner_data = lo_cloner_data;
- V_lo_cloner.ifc_data = &V_lo_cloner_data;
- if_clone_attach(&V_lo_cloner);
#else
- if_clone_attach(&lo_cloner);
+ lo_cloner = if_clone_simple(loname, lo_clone_create, lo_clone_destroy,
+ 1);
#endif
}
-VNET_SYSINIT(vnet_loif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+VNET_SYSINIT(vnet_loif_init, SI_SUB_PSEUDO, SI_ORDER_ANY,
vnet_loif_init, NULL);
#ifdef VIMAGE
@@ -179,10 +166,10 @@ static void
vnet_loif_uninit(const void *unused __unused)
{
- if_clone_detach(&V_lo_cloner);
+ if_clone_detach(V_lo_cloner);
V_loif = NULL;
}
-VNET_SYSUNINIT(vnet_loif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+VNET_SYSUNINIT(vnet_loif_uninit, SI_SUB_INIT_IF, SI_ORDER_SECOND,
vnet_loif_uninit, NULL);
#endif
@@ -213,19 +200,16 @@ static moduledata_t loop_mod = {
DECLARE_MODULE(if_lo, loop_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
int
-looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+looutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
struct route *ro)
{
u_int32_t af;
- struct rtentry *rt = NULL;
#ifdef MAC
int error;
#endif
M_ASSERTPKTHDR(m); /* check if we have the packet header */
- if (ro != NULL)
- rt = ro->ro_rt;
#ifdef MAC
error = mac_ifnet_check_transmit(ifp, m);
if (error) {
@@ -234,23 +218,22 @@ looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
}
#endif
- if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
+ if (ro != NULL && ro->ro_flags & (RT_REJECT|RT_BLACKHOLE)) {
m_freem(m);
- return (rt->rt_flags & RTF_BLACKHOLE ? 0 :
- rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
+ return (ro->ro_flags & RT_BLACKHOLE ? 0 : EHOSTUNREACH);
}
- ifp->if_opackets++;
- ifp->if_obytes += m->m_pkthdr.len;
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
/* BPF writes need to be handled specially. */
- if (dst->sa_family == AF_UNSPEC) {
+ if (dst->sa_family == AF_UNSPEC || dst->sa_family == pseudo_AF_HDRCMPLT)
bcopy(dst->sa_data, &af, sizeof(af));
- dst->sa_family = af;
- }
+ else
+ af = dst->sa_family;
#if 1 /* XXX */
- switch (dst->sa_family) {
+ switch (af) {
case AF_INET:
if (ifp->if_capenable & IFCAP_RXCSUM) {
m->m_pkthdr.csum_data = 0xffff;
@@ -275,16 +258,13 @@ looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
#endif
m->m_pkthdr.csum_flags &= ~LO_CSUM_FEATURES6;
break;
- case AF_IPX:
- case AF_APPLETALK:
- break;
default:
- printf("looutput: af=%d unexpected\n", dst->sa_family);
+ printf("looutput: af=%d unexpected\n", af);
m_freem(m);
return (EAFNOSUPPORT);
}
#endif
- return (if_simloop(ifp, m, dst->sa_family, 0));
+ return (if_simloop(ifp, m, af, 0));
}
/*
@@ -370,36 +350,17 @@ if_simloop(struct ifnet *ifp, struct mbuf *m, int af, int hlen)
isr = NETISR_IPV6;
break;
#endif
-#ifdef IPX
- case AF_IPX:
- isr = NETISR_IPX;
- break;
-#endif
-#ifdef NETATALK
- case AF_APPLETALK:
- isr = NETISR_ATALK2;
- break;
-#endif
default:
printf("if_simloop: can't handle af=%d\n", af);
m_freem(m);
return (EAFNOSUPPORT);
}
- ifp->if_ipackets++;
- ifp->if_ibytes += m->m_pkthdr.len;
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
netisr_queue(isr, m); /* mbuf is free'd on failure. */
return (0);
}
-/* ARGSUSED */
-static void
-lortrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
-{
-
- RT_LOCK_ASSERT(rt);
- rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
-}
-
/*
* Process an ioctl request.
*/
@@ -407,7 +368,6 @@ lortrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
int
loioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
- struct ifaddr *ifa;
struct ifreq *ifr = (struct ifreq *)data;
int error = 0, mask;
@@ -415,8 +375,6 @@ loioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
case SIOCSIFADDR:
ifp->if_flags |= IFF_UP;
ifp->if_drv_flags |= IFF_DRV_RUNNING;
- ifa = (struct ifaddr *)data;
- ifa->ifa_rtrequest = lortrequest;
/*
* Everything else is done at a higher level.
*/
@@ -424,7 +382,7 @@ loioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
case SIOCADDMULTI:
case SIOCDELMULTI:
- if (ifr == 0) {
+ if (ifr == NULL) {
error = EAFNOSUPPORT; /* XXX */
break;
}
diff --git a/freebsd/sys/net/if_media.c b/freebsd/sys/net/if_media.c
index 264d3535..66b13568 100644
--- a/freebsd/sys/net/if_media.c
+++ b/freebsd/sys/net/if_media.c
@@ -48,6 +48,8 @@
* to implement this interface.
*/
+#include <rtems/bsd/local/opt_ifmedia.h>
+
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
#include <sys/socket.h>
@@ -70,6 +72,7 @@ static struct ifmedia_entry *ifmedia_match(struct ifmedia *ifm,
int flags, int mask);
#ifdef IFMEDIA_DEBUG
+#include <net/if_var.h>
int ifmedia_debug = 0;
SYSCTL_INT(_debug, OID_AUTO, ifmedia, CTLFLAG_RW, &ifmedia_debug,
0, "if_media debugging msgs");
@@ -195,6 +198,21 @@ ifmedia_set(ifm, target)
}
/*
+ * Given a media word, return one suitable for an application
+ * using the original encoding.
+ */
+static int
+compat_media(int media)
+{
+
+ if (IFM_TYPE(media) == IFM_ETHER && IFM_SUBTYPE(media) > IFM_OTHER) {
+ media &= ~(IFM_ETH_XTYPE|IFM_TMASK);
+ media |= IFM_OTHER;
+ }
+ return (media);
+}
+
+/*
* Device-independent media ioctl support function.
*/
int
@@ -206,7 +224,7 @@ ifmedia_ioctl(ifp, ifr, ifm, cmd)
{
struct ifmedia_entry *match;
struct ifmediareq *ifmr = (struct ifmediareq *) ifr;
- int error = 0, sticky;
+ int error = 0;
if (ifp == NULL || ifr == NULL || ifm == NULL)
return(EINVAL);
@@ -273,80 +291,42 @@ ifmedia_ioctl(ifp, ifr, ifm, cmd)
* Get list of available media and current media on interface.
*/
case SIOCGIFMEDIA:
+ case SIOCGIFXMEDIA:
{
struct ifmedia_entry *ep;
- int *kptr, count;
- int usermax; /* user requested max */
+ int i;
- kptr = NULL; /* XXX gcc */
+ if (ifmr->ifm_count < 0)
+ return (EINVAL);
- ifmr->ifm_active = ifmr->ifm_current = ifm->ifm_cur ?
- ifm->ifm_cur->ifm_media : IFM_NONE;
+ if (cmd == SIOCGIFMEDIA) {
+ ifmr->ifm_active = ifmr->ifm_current = ifm->ifm_cur ?
+ compat_media(ifm->ifm_cur->ifm_media) : IFM_NONE;
+ } else {
+ ifmr->ifm_active = ifmr->ifm_current = ifm->ifm_cur ?
+ ifm->ifm_cur->ifm_media : IFM_NONE;
+ }
ifmr->ifm_mask = ifm->ifm_mask;
ifmr->ifm_status = 0;
(*ifm->ifm_status)(ifp, ifmr);
- count = 0;
- usermax = 0;
-
/*
* If there are more interfaces on the list, count
* them. This allows the caller to set ifmr->ifm_count
* to 0 on the first call to know how much space to
* allocate.
*/
+ i = 0;
LIST_FOREACH(ep, &ifm->ifm_list, ifm_list)
- usermax++;
-
- /*
- * Don't allow the user to ask for too many
- * or a negative number.
- */
- if (ifmr->ifm_count > usermax)
- ifmr->ifm_count = usermax;
- else if (ifmr->ifm_count < 0)
- return (EINVAL);
-
- if (ifmr->ifm_count != 0) {
- kptr = (int *)malloc(ifmr->ifm_count * sizeof(int),
- M_TEMP, M_NOWAIT);
-
- if (kptr == NULL)
- return (ENOMEM);
- /*
- * Get the media words from the interface's list.
- */
- ep = LIST_FIRST(&ifm->ifm_list);
- for (; ep != NULL && count < ifmr->ifm_count;
- ep = LIST_NEXT(ep, ifm_list), count++)
- kptr[count] = ep->ifm_media;
-
- if (ep != NULL)
- error = E2BIG; /* oops! */
- } else {
- count = usermax;
- }
-
- /*
- * We do the copyout on E2BIG, because that's
- * just our way of telling userland that there
- * are more. This is the behavior I've observed
- * under BSD/OS 3.0
- */
- sticky = error;
- if ((error == 0 || error == E2BIG) && ifmr->ifm_count != 0) {
- error = copyout((caddr_t)kptr,
- (caddr_t)ifmr->ifm_ulist,
- ifmr->ifm_count * sizeof(int));
- }
-
- if (error == 0)
- error = sticky;
-
- if (ifmr->ifm_count != 0)
- free(kptr, M_TEMP);
-
- ifmr->ifm_count = count;
+ if (i++ < ifmr->ifm_count) {
+ error = copyout(&ep->ifm_media,
+ ifmr->ifm_ulist + i - 1, sizeof(int));
+ if (error)
+ break;
+ }
+ if (error == 0 && i > ifmr->ifm_count)
+ error = ifmr->ifm_count ? E2BIG : 0;
+ ifmr->ifm_count = i;
break;
}
@@ -400,8 +380,7 @@ ifmedia_baudrate(int mword)
int i;
for (i = 0; ifmedia_baudrate_descriptions[i].ifmb_word != 0; i++) {
- if ((mword & (IFM_NMASK|IFM_TMASK)) ==
- ifmedia_baudrate_descriptions[i].ifmb_word)
+ if (IFM_TYPE_MATCH(mword, ifmedia_baudrate_descriptions[i].ifmb_word))
return (ifmedia_baudrate_descriptions[i].ifmb_baudrate);
}
@@ -507,7 +486,7 @@ ifmedia_printword(ifmw)
printf("<unknown type>\n");
return;
}
- printf(desc->ifmt_string);
+ printf("%s", desc->ifmt_string);
/* Any mode. */
for (desc = ttos->modes; desc && desc->ifmt_string != NULL; desc++)
diff --git a/freebsd/sys/net/if_media.h b/freebsd/sys/net/if_media.h
index 12585095..86439950 100644
--- a/freebsd/sys/net/if_media.h
+++ b/freebsd/sys/net/if_media.h
@@ -54,11 +54,13 @@
#include <sys/queue.h>
+struct ifnet;
+
/*
* Driver callbacks for media status and change requests.
*/
-typedef int (*ifm_change_cb_t)(struct ifnet *ifp);
-typedef void (*ifm_stat_cb_t)(struct ifnet *ifp, struct ifmediareq *req);
+typedef int (*ifm_change_cb_t)(struct ifnet *);
+typedef void (*ifm_stat_cb_t)(struct ifnet *, struct ifmediareq *req);
/*
* In-kernel representation of a single supported media type.
@@ -104,6 +106,7 @@ void ifmedia_set(struct ifmedia *ifm, int mword);
int ifmedia_ioctl(struct ifnet *ifp, struct ifreq *ifr,
struct ifmedia *ifm, u_long cmd);
+
/* Compute baudrate for a given media. */
uint64_t ifmedia_baudrate(int);
@@ -115,7 +118,7 @@ uint64_t ifmedia_baudrate(int);
* ---- -------
* 0-4 Media variant
* 5-7 Media type
- * 8-15 Type specific options
+ * 8-15 Type specific options (includes added variant bits on Ethernet)
* 16-18 Mode (for multi-mode devices)
* 19 RFU
* 20-27 Shared (global) options
@@ -124,8 +127,18 @@ uint64_t ifmedia_baudrate(int);
/*
* Ethernet
+ * In order to use more than 31 subtypes, Ethernet uses some of the option
+ * bits as part of the subtype field. See the options section below for
+ * relevant definitions
*/
#define IFM_ETHER 0x00000020
+#define IFM_ETHER_SUBTYPE(x) (((x) & IFM_TMASK) | \
+ (((x) & (IFM_ETH_XTYPE >> IFM_ETH_XSHIFT)) << IFM_ETH_XSHIFT))
+#define IFM_X(x) IFM_ETHER_SUBTYPE(x) /* internal shorthand */
+#define IFM_ETHER_SUBTYPE_SET(x) (IFM_ETHER_SUBTYPE(x) | IFM_ETHER)
+#define IFM_ETHER_SUBTYPE_GET(x) ((x) & (IFM_TMASK|IFM_ETH_XTYPE))
+#define IFM_ETHER_IS_EXTENDED(x) ((x) & IFM_ETH_XTYPE)
+
#define IFM_10_T 3 /* 10BaseT - RJ45 */
#define IFM_10_2 4 /* 10Base2 - Thinnet */
#define IFM_10_5 5 /* 10Base5 - AUI */
@@ -153,15 +166,49 @@ uint64_t ifmedia_baudrate(int);
#define IFM_40G_CR4 27 /* 40GBase-CR4 */
#define IFM_40G_SR4 28 /* 40GBase-SR4 */
#define IFM_40G_LR4 29 /* 40GBase-LR4 */
+#define IFM_1000_KX 30 /* 1000Base-KX backplane */
+#define IFM_OTHER 31 /* Other: one of the following */
+
+/* following types are not visible to old binaries using only IFM_TMASK */
+#define IFM_10G_KX4 IFM_X(32) /* 10GBase-KX4 backplane */
+#define IFM_10G_KR IFM_X(33) /* 10GBase-KR backplane */
+#define IFM_10G_CR1 IFM_X(34) /* 10GBase-CR1 Twinax splitter */
+#define IFM_20G_KR2 IFM_X(35) /* 20GBase-KR2 backplane */
+#define IFM_2500_KX IFM_X(36) /* 2500Base-KX backplane */
+#define IFM_2500_T IFM_X(37) /* 2500Base-T - RJ45 (NBaseT) */
+#define IFM_5000_T IFM_X(38) /* 5000Base-T - RJ45 (NBaseT) */
+#define IFM_50G_PCIE IFM_X(39) /* 50G Ethernet over PCIE */
+#define IFM_25G_PCIE IFM_X(40) /* 25G Ethernet over PCIE */
+#define IFM_1000_SGMII IFM_X(41) /* 1G media interface */
+#define IFM_10G_SFI IFM_X(42) /* 10G media interface */
+#define IFM_40G_XLPPI IFM_X(43) /* 40G media interface */
+#define IFM_1000_CX_SGMII IFM_X(44) /* 1000Base-CX-SGMII */
+#define IFM_40G_KR4 IFM_X(45) /* 40GBase-KR4 */
+#define IFM_10G_ER IFM_X(46) /* 10GBase-ER */
+#define IFM_100G_CR4 IFM_X(47) /* 100GBase-CR4 */
+#define IFM_100G_SR4 IFM_X(48) /* 100GBase-SR4 */
+#define IFM_100G_KR4 IFM_X(49) /* 100GBase-KR4 */
+#define IFM_100G_LR4 IFM_X(50) /* 100GBase-LR4 */
+#define IFM_56G_R4 IFM_X(51) /* 56GBase-R4 */
+#define IFM_100_T IFM_X(52) /* 100BaseT - RJ45 */
+#define IFM_25G_CR IFM_X(53) /* 25GBase-CR */
+#define IFM_25G_KR IFM_X(54) /* 25GBase-KR */
+#define IFM_25G_SR IFM_X(55) /* 25GBase-SR */
+#define IFM_50G_CR2 IFM_X(56) /* 50GBase-CR2 */
+#define IFM_50G_KR2 IFM_X(57) /* 50GBase-KR2 */
+
/*
* Please update ieee8023ad_lacp.c:lacp_compose_key()
* after adding new Ethernet media types.
*/
-/* note 31 is the max! */
+/* Note IFM_X(511) is the max! */
+/* Ethernet option values; includes bits used for extended variant field */
#define IFM_ETH_MASTER 0x00000100 /* master mode (1000baseT) */
#define IFM_ETH_RXPAUSE 0x00000200 /* receive PAUSE frames */
#define IFM_ETH_TXPAUSE 0x00000400 /* transmit PAUSE frames */
+#define IFM_ETH_XTYPE 0x00007800 /* extended media variants */
+#define IFM_ETH_XSHIFT 6 /* shift XTYPE next to TMASK */
/*
* Token ring
@@ -253,11 +300,6 @@ uint64_t ifmedia_baudrate(int);
#define IFM_ATM_UNASSIGNED 0x00000400 /* unassigned cells */
/*
- * CARP Common Address Redundancy Protocol
- */
-#define IFM_CARP 0x000000c0
-
-/*
* Shared media sub-types
*/
#define IFM_AUTO 0 /* Autoselect best media */
@@ -309,7 +351,10 @@ uint64_t ifmedia_baudrate(int);
* Macros to extract various bits of information from the media word.
*/
#define IFM_TYPE(x) ((x) & IFM_NMASK)
-#define IFM_SUBTYPE(x) ((x) & IFM_TMASK)
+#define IFM_SUBTYPE(x) \
+ (IFM_TYPE(x) == IFM_ETHER ? IFM_ETHER_SUBTYPE_GET(x) : ((x) & IFM_TMASK))
+#define IFM_TYPE_MATCH(x,y) \
+ (IFM_TYPE(x) == IFM_TYPE(y) && IFM_SUBTYPE(x) == IFM_SUBTYPE(y))
#define IFM_TYPE_OPTIONS(x) ((x) & IFM_OMASK)
#define IFM_INST(x) (((x) & IFM_IMASK) >> IFM_ISHIFT)
#define IFM_OPTIONS(x) ((x) & (IFM_OMASK | IFM_GMASK))
@@ -343,7 +388,6 @@ struct ifmedia_description {
{ IFM_FDDI, "FDDI" }, \
{ IFM_IEEE80211, "IEEE 802.11 Wireless Ethernet" }, \
{ IFM_ATM, "ATM" }, \
- { IFM_CARP, "Common Address Redundancy Protocol" }, \
{ 0, NULL }, \
}
@@ -375,6 +419,34 @@ struct ifmedia_description {
{ IFM_40G_CR4, "40Gbase-CR4" }, \
{ IFM_40G_SR4, "40Gbase-SR4" }, \
{ IFM_40G_LR4, "40Gbase-LR4" }, \
+ { IFM_1000_KX, "1000Base-KX" }, \
+ { IFM_OTHER, "Other" }, \
+ { IFM_10G_KX4, "10GBase-KX4" }, \
+ { IFM_10G_KR, "10GBase-KR" }, \
+ { IFM_10G_CR1, "10GBase-CR1" }, \
+ { IFM_20G_KR2, "20GBase-KR2" }, \
+ { IFM_2500_KX, "2500Base-KX" }, \
+ { IFM_2500_T, "2500Base-T" }, \
+ { IFM_5000_T, "5000Base-T" }, \
+ { IFM_50G_PCIE, "PCIExpress-50G" }, \
+ { IFM_25G_PCIE, "PCIExpress-25G" }, \
+ { IFM_1000_SGMII, "1000Base-SGMII" }, \
+ { IFM_10G_SFI, "10GBase-SFI" }, \
+ { IFM_40G_XLPPI, "40GBase-XLPPI" }, \
+ { IFM_1000_CX_SGMII, "1000Base-CX-SGMII" }, \
+ { IFM_40G_KR4, "40GBase-KR4" }, \
+ { IFM_10G_ER, "10GBase-ER" }, \
+ { IFM_100G_CR4, "100GBase-CR4" }, \
+ { IFM_100G_SR4, "100GBase-SR4" }, \
+ { IFM_100G_KR4, "100GBase-KR4" }, \
+ { IFM_100G_LR4, "100GBase-LR4" }, \
+ { IFM_56G_R4, "56GBase-R4" }, \
+ { IFM_100_T, "100BaseT" }, \
+ { IFM_25G_CR, "25GBase-CR" }, \
+ { IFM_25G_KR, "25GBase-KR" }, \
+ { IFM_25G_SR, "25GBase-SR" }, \
+ { IFM_50G_CR2, "50GBase-CR2" }, \
+ { IFM_50G_KR2, "50GBase-KR2" }, \
{ 0, NULL }, \
}
@@ -676,6 +748,33 @@ struct ifmedia_baudrate {
{ IFM_ETHER | IFM_40G_CR4, IF_Gbps(40ULL) }, \
{ IFM_ETHER | IFM_40G_SR4, IF_Gbps(40ULL) }, \
{ IFM_ETHER | IFM_40G_LR4, IF_Gbps(40ULL) }, \
+ { IFM_ETHER | IFM_1000_KX, IF_Mbps(1000) }, \
+ { IFM_ETHER | IFM_10G_KX4, IF_Gbps(10ULL) }, \
+ { IFM_ETHER | IFM_10G_KR, IF_Gbps(10ULL) }, \
+ { IFM_ETHER | IFM_10G_CR1, IF_Gbps(10ULL) }, \
+ { IFM_ETHER | IFM_20G_KR2, IF_Gbps(20ULL) }, \
+ { IFM_ETHER | IFM_2500_KX, IF_Mbps(2500) }, \
+ { IFM_ETHER | IFM_2500_T, IF_Mbps(2500) }, \
+ { IFM_ETHER | IFM_5000_T, IF_Mbps(5000) }, \
+ { IFM_ETHER | IFM_50G_PCIE, IF_Gbps(50ULL) }, \
+ { IFM_ETHER | IFM_25G_PCIE, IF_Gbps(25ULL) }, \
+ { IFM_ETHER | IFM_1000_SGMII, IF_Mbps(1000) }, \
+ { IFM_ETHER | IFM_10G_SFI, IF_Gbps(10ULL) }, \
+ { IFM_ETHER | IFM_40G_XLPPI, IF_Gbps(40ULL) }, \
+ { IFM_ETHER | IFM_1000_CX_SGMII, IF_Mbps(1000) }, \
+ { IFM_ETHER | IFM_40G_KR4, IF_Gbps(40ULL) }, \
+ { IFM_ETHER | IFM_10G_ER, IF_Gbps(10ULL) }, \
+ { IFM_ETHER | IFM_100G_CR4, IF_Gbps(100ULL) }, \
+ { IFM_ETHER | IFM_100G_SR4, IF_Gbps(100ULL) }, \
+ { IFM_ETHER | IFM_100G_KR4, IF_Gbps(100ULL) }, \
+ { IFM_ETHER | IFM_100G_LR4, IF_Gbps(100ULL) }, \
+ { IFM_ETHER | IFM_56G_R4, IF_Gbps(56ULL) }, \
+ { IFM_ETHER | IFM_100_T, IF_Mbps(100ULL) }, \
+ { IFM_ETHER | IFM_25G_CR, IF_Gbps(25ULL) }, \
+ { IFM_ETHER | IFM_25G_KR, IF_Gbps(25ULL) }, \
+ { IFM_ETHER | IFM_25G_SR, IF_Gbps(25ULL) }, \
+ { IFM_ETHER | IFM_50G_CR2, IF_Gbps(50ULL) }, \
+ { IFM_ETHER | IFM_50G_KR2, IF_Gbps(50ULL) }, \
\
{ IFM_TOKEN | IFM_TOK_STP4, IF_Mbps(4) }, \
{ IFM_TOKEN | IFM_TOK_STP16, IF_Mbps(16) }, \
@@ -730,8 +829,6 @@ struct ifmedia_status_description {
{ "no network", "active" } }, \
{ IFM_ATM, IFM_AVALID, IFM_ACTIVE, \
{ "no network", "active" } }, \
- { IFM_CARP, IFM_AVALID, IFM_ACTIVE, \
- { "backup", "master" } }, \
{ 0, 0, 0, \
{ NULL, NULL } } \
}
diff --git a/freebsd/sys/net/if_mib.c b/freebsd/sys/net/if_mib.c
index ec7a6984..d91c94ab 100644
--- a/freebsd/sys/net/if_mib.c
+++ b/freebsd/sys/net/if_mib.c
@@ -34,10 +34,12 @@
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
+#include <sys/malloc.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_mib.h>
#include <net/vnet.h>
@@ -68,9 +70,9 @@ SYSCTL_DECL(_net_link_generic);
static SYSCTL_NODE(_net_link_generic, IFMIB_SYSTEM, system, CTLFLAG_RW, 0,
"Variables global to all interfaces");
-SYSCTL_VNET_INT(_net_link_generic_system, IFMIB_IFCOUNT, ifcount, CTLFLAG_RD,
- &VNET_NAME(if_index), 0,
- "Number of configured interfaces");
+SYSCTL_INT(_net_link_generic_system, IFMIB_IFCOUNT, ifcount,
+ CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(if_index), 0,
+ "Number of configured interfaces");
static int
sysctl_ifdata(SYSCTL_HANDLER_ARGS) /* XXX bad syntax! */
@@ -100,37 +102,18 @@ sysctl_ifdata(SYSCTL_HANDLER_ARGS) /* XXX bad syntax! */
bzero(&ifmd, sizeof(ifmd));
strlcpy(ifmd.ifmd_name, ifp->if_xname, sizeof(ifmd.ifmd_name));
-#define COPY(fld) ifmd.ifmd_##fld = ifp->if_##fld
- COPY(pcount);
- COPY(data);
-#undef COPY
+ ifmd.ifmd_pcount = ifp->if_pcount;
+ if_data_copy(ifp, &ifmd.ifmd_data);
+
ifmd.ifmd_flags = ifp->if_flags | ifp->if_drv_flags;
ifmd.ifmd_snd_len = ifp->if_snd.ifq_len;
ifmd.ifmd_snd_maxlen = ifp->if_snd.ifq_maxlen;
- ifmd.ifmd_snd_drops = ifp->if_snd.ifq_drops;
+ ifmd.ifmd_snd_drops =
+ ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS);
error = SYSCTL_OUT(req, &ifmd, sizeof ifmd);
- if (error || !req->newptr)
- goto out;
-
- error = SYSCTL_IN(req, &ifmd, sizeof ifmd);
if (error)
goto out;
-
-#define DONTCOPY(fld) ifmd.ifmd_data.ifi_##fld = ifp->if_data.ifi_##fld
- DONTCOPY(type);
- DONTCOPY(physical);
- DONTCOPY(addrlen);
- DONTCOPY(hdrlen);
- DONTCOPY(mtu);
- DONTCOPY(metric);
- DONTCOPY(baudrate);
-#undef DONTCOPY
-#define COPY(fld) ifp->if_##fld = ifmd.ifmd_##fld
- COPY(data);
- ifp->if_snd.ifq_maxlen = ifmd.ifmd_snd_maxlen;
- ifp->if_snd.ifq_drops = ifmd.ifmd_snd_drops;
-#undef COPY
break;
case IFDATA_LINKSPECIFIC:
diff --git a/freebsd/sys/net/if_pflog.h b/freebsd/sys/net/if_pflog.h
new file mode 100644
index 00000000..0faeb7d4
--- /dev/null
+++ b/freebsd/sys/net/if_pflog.h
@@ -0,0 +1,66 @@
+/* $OpenBSD: if_pflog.h,v 1.13 2006/10/23 12:46:09 henning Exp $ */
+/*
+ * Copyright 2001 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _NET_IF_PFLOG_H_
+#define _NET_IF_PFLOG_H_
+
+#define PFLOGIFS_MAX 16
+
+#define PFLOG_RULESET_NAME_SIZE 16
+
+struct pfloghdr {
+ u_int8_t length;
+ sa_family_t af;
+ u_int8_t action;
+ u_int8_t reason;
+ char ifname[IFNAMSIZ];
+ char ruleset[PFLOG_RULESET_NAME_SIZE];
+ u_int32_t rulenr;
+ u_int32_t subrulenr;
+ uid_t uid;
+ pid_t pid;
+ uid_t rule_uid;
+ pid_t rule_pid;
+ u_int8_t dir;
+ u_int8_t pad[3];
+};
+
+#define PFLOG_HDRLEN sizeof(struct pfloghdr)
+/* minus pad, also used as a signature */
+#define PFLOG_REAL_HDRLEN offsetof(struct pfloghdr, pad)
+
+#ifdef _KERNEL
+struct pf_rule;
+struct pf_ruleset;
+struct pfi_kif;
+struct pf_pdesc;
+
+#define PFLOG_PACKET(i,a,b,c,d,e,f,g,h,di) do { \
+ if (pflog_packet_ptr != NULL) \
+ pflog_packet_ptr(i,a,b,c,d,e,f,g,h,di); \
+} while (0)
+#endif /* _KERNEL */
+#endif /* _NET_IF_PFLOG_H_ */
diff --git a/freebsd/sys/net/if_pfsync.h b/freebsd/sys/net/if_pfsync.h
new file mode 100644
index 00000000..5c4ba631
--- /dev/null
+++ b/freebsd/sys/net/if_pfsync.h
@@ -0,0 +1,265 @@
+/*-
+ * Copyright (c) 2001 Michael Shalayeff
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 2008 David Gwynne <dlg@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $OpenBSD: if_pfsync.h,v 1.35 2008/06/29 08:42:15 mcbride Exp $
+ * $FreeBSD$
+ */
+
+
+#ifndef _NET_IF_PFSYNC_H_
+#define _NET_IF_PFSYNC_H_
+
+#define PFSYNC_VERSION 5
+#define PFSYNC_DFLTTL 255
+
+#define PFSYNC_ACT_CLR 0 /* clear all states */
+#define PFSYNC_ACT_INS 1 /* insert state */
+#define PFSYNC_ACT_INS_ACK 2 /* ack of insterted state */
+#define PFSYNC_ACT_UPD 3 /* update state */
+#define PFSYNC_ACT_UPD_C 4 /* "compressed" update state */
+#define PFSYNC_ACT_UPD_REQ 5 /* request "uncompressed" state */
+#define PFSYNC_ACT_DEL 6 /* delete state */
+#define PFSYNC_ACT_DEL_C 7 /* "compressed" delete state */
+#define PFSYNC_ACT_INS_F 8 /* insert fragment */
+#define PFSYNC_ACT_DEL_F 9 /* delete fragments */
+#define PFSYNC_ACT_BUS 10 /* bulk update status */
+#define PFSYNC_ACT_TDB 11 /* TDB replay counter update */
+#define PFSYNC_ACT_EOF 12 /* end of frame */
+#define PFSYNC_ACT_MAX 13
+
+/*
+ * A pfsync frame is built from a header followed by several sections which
+ * are all prefixed with their own subheaders. Frames must be terminated with
+ * an EOF subheader.
+ *
+ * | ... |
+ * | IP header |
+ * +============================+
+ * | pfsync_header |
+ * +----------------------------+
+ * | pfsync_subheader |
+ * +----------------------------+
+ * | first action fields |
+ * | ... |
+ * +----------------------------+
+ * | pfsync_subheader |
+ * +----------------------------+
+ * | second action fields |
+ * | ... |
+ * +----------------------------+
+ * | EOF pfsync_subheader |
+ * +----------------------------+
+ * | HMAC |
+ * +============================+
+ */
+
+/*
+ * Frame header
+ */
+
+struct pfsync_header {
+ u_int8_t version;
+ u_int8_t _pad;
+ u_int16_t len;
+ u_int8_t pfcksum[PF_MD5_DIGEST_LENGTH];
+} __packed;
+
+/*
+ * Frame region subheader
+ */
+
+struct pfsync_subheader {
+ u_int8_t action;
+ u_int8_t _pad;
+ u_int16_t count;
+} __packed;
+
+/*
+ * CLR
+ */
+
+struct pfsync_clr {
+ char ifname[IFNAMSIZ];
+ u_int32_t creatorid;
+} __packed;
+
+/*
+ * INS, UPD, DEL
+ */
+
+/* these use struct pfsync_state in pfvar.h */
+
+/*
+ * INS_ACK
+ */
+
+struct pfsync_ins_ack {
+ u_int64_t id;
+ u_int32_t creatorid;
+} __packed;
+
+/*
+ * UPD_C
+ */
+
+struct pfsync_upd_c {
+ u_int64_t id;
+ struct pfsync_state_peer src;
+ struct pfsync_state_peer dst;
+ u_int32_t creatorid;
+ u_int32_t expire;
+ u_int8_t timeout;
+ u_int8_t _pad[3];
+} __packed;
+
+/*
+ * UPD_REQ
+ */
+
+struct pfsync_upd_req {
+ u_int64_t id;
+ u_int32_t creatorid;
+} __packed;
+
+/*
+ * DEL_C
+ */
+
+struct pfsync_del_c {
+ u_int64_t id;
+ u_int32_t creatorid;
+} __packed;
+
+/*
+ * INS_F, DEL_F
+ */
+
+/* not implemented (yet) */
+
+/*
+ * BUS
+ */
+
+struct pfsync_bus {
+ u_int32_t creatorid;
+ u_int32_t endtime;
+ u_int8_t status;
+#define PFSYNC_BUS_START 1
+#define PFSYNC_BUS_END 2
+ u_int8_t _pad[3];
+} __packed;
+
+/*
+ * TDB
+ */
+
+struct pfsync_tdb {
+ u_int32_t spi;
+ union sockaddr_union dst;
+ u_int32_t rpl;
+ u_int64_t cur_bytes;
+ u_int8_t sproto;
+ u_int8_t updates;
+ u_int8_t _pad[2];
+} __packed;
+
+#define PFSYNC_HDRLEN sizeof(struct pfsync_header)
+
+struct pfsyncstats {
+ u_int64_t pfsyncs_ipackets; /* total input packets, IPv4 */
+ u_int64_t pfsyncs_ipackets6; /* total input packets, IPv6 */
+ u_int64_t pfsyncs_badif; /* not the right interface */
+ u_int64_t pfsyncs_badttl; /* TTL is not PFSYNC_DFLTTL */
+ u_int64_t pfsyncs_hdrops; /* packets shorter than hdr */
+ u_int64_t pfsyncs_badver; /* bad (incl unsupp) version */
+ u_int64_t pfsyncs_badact; /* bad action */
+ u_int64_t pfsyncs_badlen; /* data length does not match */
+ u_int64_t pfsyncs_badauth; /* bad authentication */
+ u_int64_t pfsyncs_stale; /* stale state */
+ u_int64_t pfsyncs_badval; /* bad values */
+ u_int64_t pfsyncs_badstate; /* insert/lookup failed */
+
+ u_int64_t pfsyncs_opackets; /* total output packets, IPv4 */
+ u_int64_t pfsyncs_opackets6; /* total output packets, IPv6 */
+ u_int64_t pfsyncs_onomem; /* no memory for an mbuf */
+ u_int64_t pfsyncs_oerrors; /* ip output error */
+
+ u_int64_t pfsyncs_iacts[PFSYNC_ACT_MAX];
+ u_int64_t pfsyncs_oacts[PFSYNC_ACT_MAX];
+};
+
+/*
+ * Configuration structure for SIOCSETPFSYNC SIOCGETPFSYNC
+ */
+struct pfsyncreq {
+ char pfsyncr_syncdev[IFNAMSIZ];
+ struct in_addr pfsyncr_syncpeer;
+ int pfsyncr_maxupdates;
+ int pfsyncr_defer;
+};
+
+#define SIOCSETPFSYNC _IOW('i', 247, struct ifreq)
+#define SIOCGETPFSYNC _IOWR('i', 248, struct ifreq)
+
+#ifdef _KERNEL
+
+/*
+ * this shows where a pf state is with respect to the syncing.
+ */
+#define PFSYNC_S_INS 0x00
+#define PFSYNC_S_IACK 0x01
+#define PFSYNC_S_UPD 0x02
+#define PFSYNC_S_UPD_C 0x03
+#define PFSYNC_S_DEL 0x04
+#define PFSYNC_S_COUNT 0x05
+
+#define PFSYNC_S_DEFER 0xfe
+#define PFSYNC_S_NONE 0xff
+
+#define PFSYNC_SI_IOCTL 0x01
+#define PFSYNC_SI_CKSUM 0x02
+#define PFSYNC_SI_ACK 0x04
+
+#endif /* _KERNEL */
+
+#endif /* _NET_IF_PFSYNC_H_ */
diff --git a/freebsd/sys/net/if_sppp.h b/freebsd/sys/net/if_sppp.h
index 97f94b39..23a08e77 100644
--- a/freebsd/sys/net/if_sppp.h
+++ b/freebsd/sys/net/if_sppp.h
@@ -78,7 +78,7 @@ struct sauth {
/*
* Don't change the order of this. Ordering the phases this way allows
- * for a comparision of ``pp_phase >= PHASE_AUTHENTICATE'' in order to
+ * for a comparison of ``pp_phase >= PHASE_AUTHENTICATE'' in order to
* know whether LCP is up.
*/
enum ppp_phase {
diff --git a/freebsd/sys/net/if_spppfr.c b/freebsd/sys/net/if_spppfr.c
index 93bbaeba..d30509d5 100644
--- a/freebsd/sys/net/if_spppfr.c
+++ b/freebsd/sys/net/if_spppfr.c
@@ -27,10 +27,9 @@
#include <rtems/bsd/sys/param.h>
-#if defined(__FreeBSD__) && __FreeBSD__ >= 3
+#if defined(__FreeBSD__)
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipx.h>
#endif
#ifdef NetBSD1_3
@@ -47,7 +46,7 @@
#include <sys/sockio.h>
#include <sys/socket.h>
#include <sys/syslog.h>
-#if defined(__FreeBSD__) && __FreeBSD__ >= 3
+#if defined(__FreeBSD__)
#include <sys/random.h>
#endif
#include <sys/malloc.h>
@@ -60,6 +59,7 @@
#endif
#include <net/if.h>
+#include <net/if_var.h>
#include <net/netisr.h>
#include <net/if_types.h>
#include <net/route.h>
@@ -86,11 +86,6 @@
# include <net/ethertypes.h>
#endif
-#ifdef IPX
-#include <netipx/ipx.h>
-#include <netipx/ipx_if.h>
-#endif
-
#include <net/if_sppp.h>
/*
@@ -151,7 +146,7 @@ struct arp_req {
unsigned short ptarget2;
} __packed;
-#if defined(__FreeBSD__) && __FreeBSD__ >= 3 && __FreeBSD_version < 501113
+#if defined(__FreeBSD__) && __FreeBSD_version < 501113
#define SPP_FMT "%s%d: "
#define SPP_ARGS(ifp) (ifp)->if_name, (ifp)->if_unit
#else
@@ -257,9 +252,9 @@ bad: m_freem (m);
switch (proto) {
default:
- ++ifp->if_noproto;
-drop: ++ifp->if_ierrors;
- ++ifp->if_iqdrops;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
+drop: if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
m_freem (m);
return;
#ifdef INET
@@ -267,16 +262,6 @@ drop: ++ifp->if_ierrors;
isr = NETISR_IP;
break;
#endif
-#ifdef IPX
- case ETHERTYPE_IPX:
- isr = NETISR_IPX;
- break;
-#endif
-#ifdef NETATALK
- case ETHERTYPE_AT:
- isr = NETISR_ATALK;
- break;
-#endif
}
if (! (ifp->if_flags & IFF_UP))
@@ -306,7 +291,7 @@ struct mbuf *sppp_fr_header (struct sppp *sp, struct mbuf *m,
/* Prepend the space for Frame Relay header. */
hlen = (family == AF_INET) ? 4 : 10;
- M_PREPEND (m, hlen, M_DONTWAIT);
+ M_PREPEND (m, hlen, M_NOWAIT);
if (! m)
return 0;
h = mtod (m, u_char*);
@@ -346,21 +331,11 @@ struct mbuf *sppp_fr_header (struct sppp *sp, struct mbuf *m,
h[3] = FR_IP;
return m;
#endif
-#ifdef IPX
- case AF_IPX:
- type = ETHERTYPE_IPX;
- break;
-#endif
#ifdef NS
case AF_NS:
type = 0x8137;
break;
#endif
-#ifdef NETATALK
- case AF_APPLETALK:
- type = ETHERTYPE_AT;
- break;
-#endif
}
h[3] = FR_PADDING;
h[4] = FR_SNAP;
@@ -383,7 +358,7 @@ void sppp_fr_keepalive (struct sppp *sp)
unsigned char *h, *p;
struct mbuf *m;
- MGETHDR (m, M_DONTWAIT, MT_DATA);
+ MGETHDR (m, M_NOWAIT, MT_DATA);
if (! m)
return;
m->m_pkthdr.rcvif = 0;
@@ -421,7 +396,7 @@ void sppp_fr_keepalive (struct sppp *sp)
(u_char) sp->pp_rseq[IDX_LCP]);
if (! IF_HANDOFF_ADJ(&sp->pp_cpq, m, ifp, 3))
- ++ifp->if_oerrors;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
}
/*
@@ -503,7 +478,7 @@ static void sppp_fr_arp (struct sppp *sp, struct arp_req *req,
(unsigned char) his_ip_address);
/* Send the Inverse ARP reply. */
- MGETHDR (m, M_DONTWAIT, MT_DATA);
+ MGETHDR (m, M_NOWAIT, MT_DATA);
if (! m)
return;
m->m_pkthdr.len = m->m_len = 10 + sizeof (*reply);
@@ -535,7 +510,7 @@ static void sppp_fr_arp (struct sppp *sp, struct arp_req *req,
reply->ptarget2 = htonl (his_ip_address) >> 16;
if (! IF_HANDOFF_ADJ(&sp->pp_cpq, m, ifp, 3))
- ++ifp->if_oerrors;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
}
/*
diff --git a/freebsd/sys/net/if_spppsubr.c b/freebsd/sys/net/if_spppsubr.c
index fa6a7c1b..e7a62277 100644
--- a/freebsd/sys/net/if_spppsubr.c
+++ b/freebsd/sys/net/if_spppsubr.c
@@ -27,11 +27,12 @@
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipx.h>
#include <sys/systm.h>
#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
#include <sys/module.h>
+#include <sys/rmlock.h>
#include <sys/sockio.h>
#include <sys/socket.h>
#include <sys/syslog.h>
@@ -42,6 +43,7 @@
#include <sys/md5.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/netisr.h>
#include <net/if_types.h>
#include <net/route.h>
@@ -66,11 +68,6 @@
#include <netinet/if_ether.h>
-#ifdef IPX
-#include <netipx/ipx.h>
-#include <netipx/ipx_if.h>
-#endif
-
#include <net/if_sppp.h>
#define IOCTL_CMD_T u_long
@@ -264,7 +261,7 @@ static const u_short interactive_ports[8] = {
int debug = ifp->if_flags & IFF_DEBUG
static int sppp_output(struct ifnet *ifp, struct mbuf *m,
- struct sockaddr *dst, struct route *ro);
+ const struct sockaddr *dst, struct route *ro);
static void sppp_cisco_send(struct sppp *sp, int type, long par1, long par2);
static void sppp_cisco_input(struct sppp *sp, struct mbuf *m);
@@ -525,7 +522,7 @@ sppp_input(struct ifnet *ifp, struct mbuf *m)
if (ifp->if_flags & IFF_UP)
/* Count received bytes, add FCS and one flag */
- ifp->if_ibytes += m->m_pkthdr.len + 3;
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len + 3);
if (m->m_pkthdr.len <= PPP_HEADER_LEN) {
/* Too small packet, drop it. */
@@ -537,8 +534,8 @@ sppp_input(struct ifnet *ifp, struct mbuf *m)
m_freem (m);
SPPP_UNLOCK(sp);
drop2:
- ++ifp->if_ierrors;
- ++ifp->if_iqdrops;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
return;
}
@@ -577,7 +574,7 @@ sppp_input(struct ifnet *ifp, struct mbuf *m)
sppp_cp_send (sp, PPP_LCP, PROTO_REJ,
++sp->pp_seq[IDX_LCP], m->m_pkthdr.len + 2,
&h->protocol);
- ++ifp->if_noproto;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
goto drop;
case PPP_LCP:
sppp_cp_input(&lcp, sp, m);
@@ -631,7 +628,7 @@ sppp_input(struct ifnet *ifp, struct mbuf *m)
* enough leading space in the existing mbuf).
*/
m_adj(m, vjlen);
- M_PREPEND(m, hlen, M_DONTWAIT);
+ M_PREPEND(m, hlen, M_NOWAIT);
if (m == NULL) {
SPPP_UNLOCK(sp);
goto drop2;
@@ -673,14 +670,6 @@ sppp_input(struct ifnet *ifp, struct mbuf *m)
do_account++;
break;
#endif
-#ifdef IPX
- case PPP_IPX:
- /* IPX IPXCP not implemented yet */
- if (sp->pp_phase == PHASE_NETWORK)
- isr = NETISR_IPX;
- do_account++;
- break;
-#endif
}
break;
case CISCO_MULTICAST:
@@ -697,7 +686,7 @@ sppp_input(struct ifnet *ifp, struct mbuf *m)
}
switch (ntohs (h->protocol)) {
default:
- ++ifp->if_noproto;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
goto invalid;
case CISCO_KEEPALIVE:
sppp_cisco_input (sp, m);
@@ -716,12 +705,6 @@ sppp_input(struct ifnet *ifp, struct mbuf *m)
do_account++;
break;
#endif
-#ifdef IPX
- case ETHERTYPE_IPX:
- isr = NETISR_IPX;
- do_account++;
- break;
-#endif
}
break;
default: /* Invalid PPP packet. */
@@ -787,19 +770,18 @@ sppp_ifstart(struct ifnet *ifp)
* Enqueue transmit packet.
*/
static int
-sppp_output(struct ifnet *ifp, struct mbuf *m,
- struct sockaddr *dst, struct route *ro)
+sppp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
{
struct sppp *sp = IFP2SP(ifp);
struct ppp_header *h;
struct ifqueue *ifq = NULL;
- int s, error, rv = 0;
+ int error, rv = 0;
#ifdef INET
int ipproto = PPP_IP;
#endif
int debug = ifp->if_flags & IFF_DEBUG;
- s = splimp();
SPPP_LOCK(sp);
if (!(ifp->if_flags & IFF_UP) ||
@@ -810,7 +792,6 @@ sppp_output(struct ifnet *ifp, struct mbuf *m,
#endif
m_freem (m);
SPPP_UNLOCK(sp);
- splx (s);
return (ENETDOWN);
}
@@ -834,9 +815,7 @@ sppp_output(struct ifnet *ifp, struct mbuf *m,
* to start LCP for it.
*/
ifp->if_drv_flags |= IFF_DRV_RUNNING;
- splx(s);
lcp.Open(sp);
- s = splimp();
}
#ifdef INET
@@ -860,7 +839,6 @@ sppp_output(struct ifnet *ifp, struct mbuf *m,
{
m_freem(m);
SPPP_UNLOCK(sp);
- splx(s);
if(ip->ip_p == IPPROTO_TCP)
return(EADDRNOTAVAIL);
else
@@ -905,7 +883,6 @@ sppp_output(struct ifnet *ifp, struct mbuf *m,
default:
m_freem(m);
SPPP_UNLOCK(sp);
- splx(s);
return (EINVAL);
}
}
@@ -928,14 +905,13 @@ sppp_output(struct ifnet *ifp, struct mbuf *m,
/*
* Prepend general data packet PPP header. For now, IP only.
*/
- M_PREPEND (m, PPP_HEADER_LEN, M_DONTWAIT);
+ M_PREPEND (m, PPP_HEADER_LEN, M_NOWAIT);
if (! m) {
nobufs: if (debug)
log(LOG_DEBUG, SPP_FMT "no memory for transmit header\n",
SPP_ARGS(ifp));
- ++ifp->if_oerrors;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
SPPP_UNLOCK(sp);
- splx (s);
return (ENOBUFS);
}
/*
@@ -992,17 +968,10 @@ nobufs: if (debug)
}
break;
#endif
-#ifdef IPX
- case AF_IPX: /* Novell IPX Protocol */
- h->protocol = htons (sp->pp_mode == IFF_CISCO ?
- ETHERTYPE_IPX : PPP_IPX);
- break;
-#endif
default:
m_freem (m);
- ++ifp->if_oerrors;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
SPPP_UNLOCK(sp);
- splx (s);
return (EAFNOSUPPORT);
}
@@ -1016,13 +985,11 @@ out:
else
IFQ_HANDOFF_ADJ(ifp, m, 3, error);
if (error) {
- ++ifp->if_oerrors;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
SPPP_UNLOCK(sp);
- splx (s);
return (rv? rv: ENOBUFS);
}
SPPP_UNLOCK(sp);
- splx (s);
/*
* Unlike in sppp_input(), we can always bump the timestamp
* here since sppp_output() is only called on behalf of
@@ -1042,7 +1009,7 @@ sppp_attach(struct ifnet *ifp)
mtx_init(&sp->mtx, "sppp", MTX_NETWORK_LOCK, MTX_DEF | MTX_RECURSE);
/* Initialize keepalive handler. */
- callout_init(&sp->keepalive_callout, CALLOUT_MPSAFE);
+ callout_init(&sp->keepalive_callout, 1);
callout_reset(&sp->keepalive_callout, hz * 10, sppp_keepalive,
(void *)sp);
@@ -1074,7 +1041,7 @@ sppp_attach(struct ifnet *ifp)
#ifdef INET6
sp->confflags |= CONF_ENABLE_IPV6;
#endif
- callout_init(&sp->ifstart_callout, CALLOUT_MPSAFE);
+ callout_init(&sp->ifstart_callout, 1);
sp->if_start = ifp->if_start;
ifp->if_start = sppp_ifstart;
sp->pp_comp = malloc(sizeof(struct slcompress), M_TEMP, M_WAITOK);
@@ -1139,14 +1106,12 @@ int
sppp_isempty(struct ifnet *ifp)
{
struct sppp *sp = IFP2SP(ifp);
- int empty, s;
+ int empty;
- s = splimp();
SPPP_LOCK(sp);
empty = !sp->pp_fastq.ifq_head && !sp->pp_cpq.ifq_head &&
!SP2IFP(sp)->if_snd.ifq_head;
SPPP_UNLOCK(sp);
- splx(s);
return (empty);
}
@@ -1158,9 +1123,7 @@ sppp_dequeue(struct ifnet *ifp)
{
struct sppp *sp = IFP2SP(ifp);
struct mbuf *m;
- int s;
- s = splimp();
SPPP_LOCK(sp);
/*
* Process only the control protocol queue until we have at
@@ -1177,7 +1140,6 @@ sppp_dequeue(struct ifnet *ifp)
IF_DEQUEUE (&SP2IFP(sp)->if_snd, m);
}
SPPP_UNLOCK(sp);
- splx(s);
return m;
}
@@ -1189,9 +1151,7 @@ sppp_pick(struct ifnet *ifp)
{
struct sppp *sp = IFP2SP(ifp);
struct mbuf *m;
- int s;
- s = splimp ();
SPPP_LOCK(sp);
m = sp->pp_cpq.ifq_head;
@@ -1202,7 +1162,6 @@ sppp_pick(struct ifnet *ifp)
if ((m = sp->pp_fastq.ifq_head) == NULL)
m = SP2IFP(sp)->if_snd.ifq_head;
SPPP_UNLOCK(sp);
- splx (s);
return (m);
}
@@ -1214,14 +1173,12 @@ sppp_ioctl(struct ifnet *ifp, IOCTL_CMD_T cmd, void *data)
{
struct ifreq *ifr = (struct ifreq*) data;
struct sppp *sp = IFP2SP(ifp);
- int s, rv, going_up, going_down, newmode;
+ int rv, going_up, going_down, newmode;
- s = splimp();
SPPP_LOCK(sp);
rv = 0;
switch (cmd) {
case SIOCAIFADDR:
- case SIOCSIFDSTADDR:
break;
case SIOCSIFADDR:
@@ -1322,7 +1279,6 @@ sppp_ioctl(struct ifnet *ifp, IOCTL_CMD_T cmd, void *data)
rv = ENOTTY;
}
SPPP_UNLOCK(sp);
- splx(s);
return rv;
}
@@ -1414,7 +1370,7 @@ sppp_cisco_send(struct sppp *sp, int type, long par1, long par2)
getmicrouptime(&tv);
- MGETHDR (m, M_DONTWAIT, MT_DATA);
+ MGETHDR (m, M_NOWAIT, MT_DATA);
if (! m)
return;
m->m_pkthdr.len = m->m_len = PPP_HEADER_LEN + CISCO_PACKET_LEN;
@@ -1441,7 +1397,7 @@ sppp_cisco_send(struct sppp *sp, int type, long par1, long par2)
(u_long)ch->par2, (u_int)ch->rel, (u_int)ch->time0, (u_int)ch->time1);
if (! IF_HANDOFF_ADJ(&sp->pp_cpq, m, ifp, 3))
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
}
/*
@@ -1462,7 +1418,7 @@ sppp_cp_send(struct sppp *sp, u_short proto, u_char type,
if (len > MHLEN - PPP_HEADER_LEN - LCP_HEADER_LEN)
len = MHLEN - PPP_HEADER_LEN - LCP_HEADER_LEN;
- MGETHDR (m, M_DONTWAIT, MT_DATA);
+ MGETHDR (m, M_NOWAIT, MT_DATA);
if (! m)
return;
m->m_pkthdr.len = m->m_len = PPP_HEADER_LEN + LCP_HEADER_LEN + len;
@@ -1490,7 +1446,7 @@ sppp_cp_send(struct sppp *sp, u_short proto, u_char type,
log(-1, ">\n");
}
if (! IF_HANDOFF_ADJ(&sp->pp_cpq, m, ifp, 3))
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
}
/*
@@ -1532,7 +1488,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
log(-1, SPP_FMT "%s invalid conf-req length %d\n",
SPP_ARGS(ifp), cp->name,
len);
- ++ifp->if_ierrors;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
break;
}
/* handle states where RCR doesn't get a SCA/SCN */
@@ -1588,7 +1544,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
SPP_ARGS(ifp), cp->name,
sppp_cp_type_name(h->type),
sppp_state_name(sp->state[cp->protoidx]));
- ++ifp->if_ierrors;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
}
break;
case CONF_ACK:
@@ -1597,7 +1553,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
log(-1, SPP_FMT "%s id mismatch 0x%x != 0x%x\n",
SPP_ARGS(ifp), cp->name,
h->ident, sp->confid[cp->protoidx]);
- ++ifp->if_ierrors;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
break;
}
switch (sp->state[cp->protoidx]) {
@@ -1632,7 +1588,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
SPP_ARGS(ifp), cp->name,
sppp_cp_type_name(h->type),
sppp_state_name(sp->state[cp->protoidx]));
- ++ifp->if_ierrors;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
}
break;
case CONF_NAK:
@@ -1642,7 +1598,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
log(-1, SPP_FMT "%s id mismatch 0x%x != 0x%x\n",
SPP_ARGS(ifp), cp->name,
h->ident, sp->confid[cp->protoidx]);
- ++ifp->if_ierrors;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
break;
}
if (h->type == CONF_NAK)
@@ -1682,7 +1638,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
SPP_ARGS(ifp), cp->name,
sppp_cp_type_name(h->type),
sppp_state_name(sp->state[cp->protoidx]));
- ++ifp->if_ierrors;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
}
break;
@@ -1715,7 +1671,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
SPP_ARGS(ifp), cp->name,
sppp_cp_type_name(h->type),
sppp_state_name(sp->state[cp->protoidx]));
- ++ifp->if_ierrors;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
}
break;
case TERM_ACK:
@@ -1746,7 +1702,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
SPP_ARGS(ifp), cp->name,
sppp_cp_type_name(h->type),
sppp_state_name(sp->state[cp->protoidx]));
- ++ifp->if_ierrors;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
}
break;
case CODE_REJ:
@@ -1773,7 +1729,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
SPP_ARGS(ifp), cp->name,
sppp_cp_type_name(h->type),
sppp_state_name(sp->state[cp->protoidx]));
- ++ifp->if_ierrors;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
}
break;
case PROTO_REJ:
@@ -1832,7 +1788,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
SPP_ARGS(ifp), cp->name,
sppp_cp_type_name(h->type),
sppp_state_name(sp->state[cp->protoidx]));
- ++ifp->if_ierrors;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
}
break;
}
@@ -1848,7 +1804,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
if (debug)
log(-1, SPP_FMT "lcp echo req but lcp closed\n",
SPP_ARGS(ifp));
- ++ifp->if_ierrors;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
break;
}
if (len < 8) {
@@ -1882,7 +1838,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
if (cp->proto != PPP_LCP)
goto illegal;
if (h->ident != sp->lcp.echoid) {
- ++ifp->if_ierrors;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
break;
}
if (len < 8) {
@@ -1907,7 +1863,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
SPP_ARGS(ifp), cp->name, h->type);
sppp_cp_send(sp, cp->proto, CODE_REJ,
++sp->pp_seq[cp->protoidx], m->m_pkthdr.len, h);
- ++ifp->if_ierrors;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
}
}
@@ -2072,9 +2028,7 @@ static void
sppp_to_event(const struct cp *cp, struct sppp *sp)
{
STDDCL;
- int s;
- s = splimp();
SPPP_LOCK(sp);
if (debug)
log(LOG_DEBUG, SPP_FMT "%s TO(%s) rst_counter = %d\n",
@@ -2124,7 +2078,6 @@ sppp_to_event(const struct cp *cp, struct sppp *sp)
}
SPPP_UNLOCK(sp);
- splx(s);
}
/*
@@ -2196,7 +2149,7 @@ sppp_lcp_init(struct sppp *sp)
sp->lcp.max_terminate = 2;
sp->lcp.max_configure = 10;
sp->lcp.max_failure = 10;
- callout_init(&sp->ch[IDX_LCP], CALLOUT_MPSAFE);
+ callout_init(&sp->ch[IDX_LCP], 1);
}
static void
@@ -2887,7 +2840,7 @@ sppp_ipcp_init(struct sppp *sp)
sp->fail_counter[IDX_IPCP] = 0;
sp->pp_seq[IDX_IPCP] = 0;
sp->pp_rseq[IDX_IPCP] = 0;
- callout_init(&sp->ch[IDX_IPCP], CALLOUT_MPSAFE);
+ callout_init(&sp->ch[IDX_IPCP], 1);
}
static void
@@ -3011,7 +2964,7 @@ sppp_ipcp_RCR(struct sppp *sp, struct lcp_header *h, int len)
* since our algorithm always uses the
* original option to NAK it with new values,
* things would become more complicated. In
- * pratice, the only commonly implemented IP
+ * practice, the only commonly implemented IP
* compression option is VJ anyway, so the
* difference is negligible.
*/
@@ -3446,7 +3399,7 @@ sppp_ipv6cp_init(struct sppp *sp)
sp->fail_counter[IDX_IPV6CP] = 0;
sp->pp_seq[IDX_IPV6CP] = 0;
sp->pp_rseq[IDX_IPV6CP] = 0;
- callout_init(&sp->ch[IDX_IPV6CP], CALLOUT_MPSAFE);
+ callout_init(&sp->ch[IDX_IPV6CP], 1);
}
static void
@@ -4027,7 +3980,7 @@ sppp_chap_input(struct sppp *sp, struct mbuf *m)
{
STDDCL;
struct lcp_header *h;
- int len, x;
+ int len;
u_char *value, *name, digest[AUTHKEYLEN], dsize;
int value_len, name_len;
MD5_CTX ctx;
@@ -4104,7 +4057,6 @@ sppp_chap_input(struct sppp *sp, struct mbuf *m)
}
log(-1, "\n");
}
- x = splimp();
SPPP_LOCK(sp);
sp->pp_flags &= ~PP_NEEDAUTH;
if (sp->myauth.proto == PPP_CHAP &&
@@ -4116,11 +4068,9 @@ sppp_chap_input(struct sppp *sp, struct mbuf *m)
* to network phase.
*/
SPPP_UNLOCK(sp);
- splx(x);
break;
}
SPPP_UNLOCK(sp);
- splx(x);
sppp_phase_network(sp);
break;
@@ -4254,7 +4204,7 @@ sppp_chap_init(struct sppp *sp)
sp->fail_counter[IDX_CHAP] = 0;
sp->pp_seq[IDX_CHAP] = 0;
sp->pp_rseq[IDX_CHAP] = 0;
- callout_init(&sp->ch[IDX_CHAP], CALLOUT_MPSAFE);
+ callout_init(&sp->ch[IDX_CHAP], 1);
}
static void
@@ -4282,9 +4232,7 @@ sppp_chap_TO(void *cookie)
{
struct sppp *sp = (struct sppp *)cookie;
STDDCL;
- int s;
- s = splimp();
SPPP_LOCK(sp);
if (debug)
log(LOG_DEBUG, SPP_FMT "chap TO(%s) rst_counter = %d\n",
@@ -4315,14 +4263,13 @@ sppp_chap_TO(void *cookie)
}
SPPP_UNLOCK(sp);
- splx(s);
}
static void
sppp_chap_tlu(struct sppp *sp)
{
STDDCL;
- int i, x;
+ int i;
i = 0;
sp->rst_counter[IDX_CHAP] = sp->lcp.max_configure;
@@ -4350,10 +4297,9 @@ sppp_chap_tlu(struct sppp *sp)
if ((sp->hisauth.flags & AUTHFLAG_NORECHALLENGE) == 0)
log(-1, "next re-challenge in %d seconds\n", i);
else
- log(-1, "re-challenging supressed\n");
+ log(-1, "re-challenging suppressed\n");
}
- x = splimp();
SPPP_LOCK(sp);
/* indicate to LCP that we need to be closed down */
sp->lcp.protos |= (1 << IDX_CHAP);
@@ -4365,11 +4311,9 @@ sppp_chap_tlu(struct sppp *sp)
* phase.
*/
SPPP_UNLOCK(sp);
- splx(x);
return;
}
SPPP_UNLOCK(sp);
- splx(x);
/*
* If we are already in phase network, we are done here. This
@@ -4438,7 +4382,7 @@ sppp_pap_input(struct sppp *sp, struct mbuf *m)
{
STDDCL;
struct lcp_header *h;
- int len, x;
+ int len;
u_char *name, *passwd, mlen;
int name_len, passwd_len;
@@ -4525,7 +4469,6 @@ sppp_pap_input(struct sppp *sp, struct mbuf *m)
}
log(-1, "\n");
}
- x = splimp();
SPPP_LOCK(sp);
sp->pp_flags &= ~PP_NEEDAUTH;
if (sp->myauth.proto == PPP_PAP &&
@@ -4537,11 +4480,9 @@ sppp_pap_input(struct sppp *sp, struct mbuf *m)
* to network phase.
*/
SPPP_UNLOCK(sp);
- splx(x);
break;
}
SPPP_UNLOCK(sp);
- splx(x);
sppp_phase_network(sp);
break;
@@ -4585,8 +4526,8 @@ sppp_pap_init(struct sppp *sp)
sp->fail_counter[IDX_PAP] = 0;
sp->pp_seq[IDX_PAP] = 0;
sp->pp_rseq[IDX_PAP] = 0;
- callout_init(&sp->ch[IDX_PAP], CALLOUT_MPSAFE);
- callout_init(&sp->pap_my_to_ch, CALLOUT_MPSAFE);
+ callout_init(&sp->ch[IDX_PAP], 1);
+ callout_init(&sp->pap_my_to_ch, 1);
}
static void
@@ -4622,9 +4563,7 @@ sppp_pap_TO(void *cookie)
{
struct sppp *sp = (struct sppp *)cookie;
STDDCL;
- int s;
- s = splimp();
SPPP_LOCK(sp);
if (debug)
log(LOG_DEBUG, SPP_FMT "pap TO(%s) rst_counter = %d\n",
@@ -4650,7 +4589,6 @@ sppp_pap_TO(void *cookie)
}
SPPP_UNLOCK(sp);
- splx(s);
}
/*
@@ -4677,7 +4615,6 @@ static void
sppp_pap_tlu(struct sppp *sp)
{
STDDCL;
- int x;
sp->rst_counter[IDX_PAP] = sp->lcp.max_configure;
@@ -4685,7 +4622,6 @@ sppp_pap_tlu(struct sppp *sp)
log(LOG_DEBUG, SPP_FMT "%s tlu\n",
SPP_ARGS(ifp), pap.name);
- x = splimp();
SPPP_LOCK(sp);
/* indicate to LCP that we need to be closed down */
sp->lcp.protos |= (1 << IDX_PAP);
@@ -4697,11 +4633,9 @@ sppp_pap_tlu(struct sppp *sp)
* phase.
*/
SPPP_UNLOCK(sp);
- splx(x);
return;
}
SPPP_UNLOCK(sp);
- splx(x);
sppp_phase_network(sp);
}
@@ -4766,7 +4700,7 @@ sppp_auth_send(const struct cp *cp, struct sppp *sp,
const char *msg;
va_list ap;
- MGETHDR (m, M_DONTWAIT, MT_DATA);
+ MGETHDR (m, M_NOWAIT, MT_DATA);
if (! m)
return;
m->m_pkthdr.rcvif = 0;
@@ -4810,7 +4744,7 @@ sppp_auth_send(const struct cp *cp, struct sppp *sp,
log(-1, ">\n");
}
if (! IF_HANDOFF_ADJ(&sp->pp_cpq, m, ifp, 3))
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
}
/*
@@ -4823,7 +4757,7 @@ sppp_qflush(struct ifqueue *ifq)
n = ifq->ifq_head;
while ((m = n)) {
- n = m->m_act;
+ n = m->m_nextpkt;
m_freem (m);
}
ifq->ifq_head = 0;
@@ -4839,9 +4773,7 @@ sppp_keepalive(void *dummy)
{
struct sppp *sp = (struct sppp*)dummy;
struct ifnet *ifp = SP2IFP(sp);
- int s;
- s = splimp();
SPPP_LOCK(sp);
/* Keepalive mode disabled or channel down? */
if (! (sp->pp_flags & PP_KEEPALIVE) ||
@@ -4884,7 +4816,6 @@ sppp_keepalive(void *dummy)
}
out:
SPPP_UNLOCK(sp);
- splx(s);
callout_reset(&sp->keepalive_callout, hz * 10, sppp_keepalive,
(void *)sp);
}
@@ -4906,7 +4837,7 @@ sppp_get_ip_addrs(struct sppp *sp, u_long *src, u_long *dst, u_long *srcmask)
* Pick the first AF_INET address from the list,
* aliases don't make any sense on a p2p link anyway.
*/
- si = 0;
+ si = NULL;
if_addr_rlock(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
if (ifa->ifa_addr->sa_family == AF_INET) {
@@ -4934,7 +4865,7 @@ sppp_get_ip_addrs(struct sppp *sp, u_long *src, u_long *dst, u_long *srcmask)
#ifdef INET
/*
- * Set my IP address. Must be called at splimp.
+ * Set my IP address.
*/
static void
sppp_set_ip_addr(struct sppp *sp, u_long src)
@@ -4948,7 +4879,7 @@ sppp_set_ip_addr(struct sppp *sp, u_long src)
* Pick the first AF_INET address from the list,
* aliases don't make any sense on a p2p link anyway.
*/
- si = 0;
+ si = NULL;
if_addr_rlock(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family == AF_INET) {
@@ -5051,7 +4982,7 @@ sppp_gen_ip6_addr(struct sppp *sp, struct in6_addr *addr)
}
/*
- * Set my IPv6 address. Must be called at splimp.
+ * Set my IPv6 address.
*/
static void
sppp_set_ip6_addr(struct sppp *sp, const struct in6_addr *src)
@@ -5126,14 +5057,15 @@ sppp_params(struct sppp *sp, u_long cmd, void *data)
struct spppreq *spr;
int rv = 0;
- if ((spr = malloc(sizeof(struct spppreq), M_TEMP, M_NOWAIT)) == 0)
+ if ((spr = malloc(sizeof(struct spppreq), M_TEMP, M_NOWAIT)) == NULL)
return (EAGAIN);
/*
* ifr->ifr_data is supposed to point to a struct spppreq.
* Check the cmd word first before attempting to fetch all the
* data.
*/
- if ((subcmd = fuword(ifr->ifr_data)) == -1) {
+ rv = fueword(ifr->ifr_data, &subcmd);
+ if (rv == -1) {
rv = EFAULT;
goto quit;
}
diff --git a/freebsd/sys/net/if_stf.c b/freebsd/sys/net/if_stf.c
index e88fd34d..7c1b7075 100644
--- a/freebsd/sys/net/if_stf.c
+++ b/freebsd/sys/net/if_stf.c
@@ -76,9 +76,6 @@
* Note that there is no way to be 100% secure.
*/
-#include <rtems/bsd/local/opt_inet.h>
-#include <rtems/bsd/local/opt_inet6.h>
-
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
#include <sys/socket.h>
@@ -86,24 +83,27 @@
#include <sys/mbuf.h>
#include <rtems/bsd/sys/errno.h>
#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
#include <sys/module.h>
#include <sys/protosw.h>
#include <sys/proc.h>
#include <sys/queue.h>
+#include <sys/rmlock.h>
#include <sys/sysctl.h>
#include <machine/cpu.h>
#include <sys/malloc.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_clone.h>
#include <net/route.h>
#include <net/netisr.h>
#include <net/if_types.h>
-#include <net/if_stf.h>
#include <net/vnet.h>
#include <netinet/in.h>
+#include <netinet/in_fib.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
@@ -125,16 +125,10 @@
SYSCTL_DECL(_net_link);
static SYSCTL_NODE(_net_link, IFT_STF, stf, CTLFLAG_RW, 0, "6to4 Interface");
-static int stf_route_cache = 1;
-SYSCTL_INT(_net_link_stf, OID_AUTO, route_cache, CTLFLAG_RW,
- &stf_route_cache, 0, "Caching of IPv4 routes for 6to4 Output");
-
static int stf_permit_rfc1918 = 0;
-TUNABLE_INT("net.link.stf.permit_rfc1918", &stf_permit_rfc1918);
-SYSCTL_INT(_net_link_stf, OID_AUTO, permit_rfc1918, CTLFLAG_RW | CTLFLAG_TUN,
+SYSCTL_INT(_net_link_stf, OID_AUTO, permit_rfc1918, CTLFLAG_RWTUN,
&stf_permit_rfc1918, 0, "Permit the use of private IPv4 addresses");
-#define STFNAME "stf"
#define STFUNIT 0
#define IN6_IS_ADDR_6TO4(x) (ntohs((x)->s6_addr16[0]) == 0x2002)
@@ -143,36 +137,34 @@ SYSCTL_INT(_net_link_stf, OID_AUTO, permit_rfc1918, CTLFLAG_RW | CTLFLAG_TUN,
* XXX: Return a pointer with 16-bit aligned. Don't cast it to
* struct in_addr *; use bcopy() instead.
*/
-#define GET_V4(x) ((caddr_t)(&(x)->s6_addr16[1]))
+#define GET_V4(x) (&(x)->s6_addr16[1])
struct stf_softc {
struct ifnet *sc_ifp;
- union {
- struct route __sc_ro4;
- struct route_in6 __sc_ro6; /* just for safety */
- } __sc_ro46;
-#define sc_ro __sc_ro46.__sc_ro4
struct mtx sc_ro_mtx;
u_int sc_fibnum;
const struct encaptab *encap_cookie;
};
#define STF2IFP(sc) ((sc)->sc_ifp)
+static const char stfname[] = "stf";
+
/*
* Note that mutable fields in the softc are not currently locked.
* We do lock sc_ro in stf_output though.
*/
-static MALLOC_DEFINE(M_STF, STFNAME, "6to4 Tunnel Interface");
+static MALLOC_DEFINE(M_STF, stfname, "6to4 Tunnel Interface");
static const int ip_stf_ttl = 40;
extern struct domain inetdomain;
-struct protosw in_stf_protosw = {
+static int in_stf_input(struct mbuf **, int *, int);
+static struct protosw in_stf_protosw = {
.pr_type = SOCK_RAW,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_IPV6,
.pr_flags = PR_ATOMIC|PR_ADDR,
.pr_input = in_stf_input,
- .pr_output = (pr_output_t *)rip_output,
+ .pr_output = rip_output,
.pr_ctloutput = rip_ctloutput,
.pr_usrreqs = &rip_usrreqs
};
@@ -181,22 +173,20 @@ static char *stfnames[] = {"stf0", "stf", "6to4", NULL};
static int stfmodevent(module_t, int, void *);
static int stf_encapcheck(const struct mbuf *, int, int, void *);
-static struct in6_ifaddr *stf_getsrcifa6(struct ifnet *);
-static int stf_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+static int stf_getsrcifa6(struct ifnet *, struct in6_addr *, struct in6_addr *);
+static int stf_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
struct route *);
static int isrfc1918addr(struct in_addr *);
static int stf_checkaddr4(struct stf_softc *, struct in_addr *,
struct ifnet *);
static int stf_checkaddr6(struct stf_softc *, struct in6_addr *,
struct ifnet *);
-static void stf_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
static int stf_ioctl(struct ifnet *, u_long, caddr_t);
static int stf_clone_match(struct if_clone *, const char *);
static int stf_clone_create(struct if_clone *, char *, size_t, caddr_t);
static int stf_clone_destroy(struct if_clone *, struct ifnet *);
-struct if_clone stf_cloner = IFC_CLONE_INITIALIZER(STFNAME, NULL, 0,
- NULL, stf_clone_match, stf_clone_create, stf_clone_destroy);
+static struct if_clone *stf_cloner;
static int
stf_clone_match(struct if_clone *ifc, const char *name)
@@ -247,7 +237,7 @@ stf_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
* we don't conform to the default naming convention for interfaces.
*/
strlcpy(ifp->if_xname, name, IFNAMSIZ);
- ifp->if_dname = ifc->ifc_name;
+ ifp->if_dname = stfname;
ifp->if_dunit = IF_DUNIT_NONE;
mtx_init(&(sc)->sc_ro_mtx, "stf ro", NULL, MTX_DEF);
@@ -289,18 +279,16 @@ stf_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
}
static int
-stfmodevent(mod, type, data)
- module_t mod;
- int type;
- void *data;
+stfmodevent(module_t mod, int type, void *data)
{
switch (type) {
case MOD_LOAD:
- if_clone_attach(&stf_cloner);
+ stf_cloner = if_clone_advanced(stfname, 0, stf_clone_match,
+ stf_clone_create, stf_clone_destroy);
break;
case MOD_UNLOAD:
- if_clone_detach(&stf_cloner);
+ if_clone_detach(stf_cloner);
break;
default:
return (EOPNOTSUPP);
@@ -318,16 +306,12 @@ static moduledata_t stf_mod = {
DECLARE_MODULE(if_stf, stf_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
static int
-stf_encapcheck(m, off, proto, arg)
- const struct mbuf *m;
- int off;
- int proto;
- void *arg;
+stf_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
{
struct ip ip;
- struct in6_ifaddr *ia6;
struct stf_softc *sc;
struct in_addr a, b, mask;
+ struct in6_addr addr6, mask6;
sc = (struct stf_softc *)arg;
if (sc == NULL)
@@ -349,20 +333,16 @@ stf_encapcheck(m, off, proto, arg)
if (ip.ip_v != 4)
return 0;
- ia6 = stf_getsrcifa6(STF2IFP(sc));
- if (ia6 == NULL)
- return 0;
+ if (stf_getsrcifa6(STF2IFP(sc), &addr6, &mask6) != 0)
+ return (0);
/*
* check if IPv4 dst matches the IPv4 address derived from the
* local 6to4 address.
* success on: dst = 10.1.1.1, ia6->ia_addr = 2002:0a01:0101:...
*/
- if (bcmp(GET_V4(&ia6->ia_addr.sin6_addr), &ip.ip_dst,
- sizeof(ip.ip_dst)) != 0) {
- ifa_free(&ia6->ia_ifa);
+ if (bcmp(GET_V4(&addr6), &ip.ip_dst, sizeof(ip.ip_dst)) != 0)
return 0;
- }
/*
* check if IPv4 src matches the IPv4 address derived from the
@@ -371,9 +351,8 @@ stf_encapcheck(m, off, proto, arg)
* fail on: src = 10.1.1.1, ia6->ia_addr = 2002:0b00:.../24
*/
bzero(&a, sizeof(a));
- bcopy(GET_V4(&ia6->ia_addr.sin6_addr), &a, sizeof(a));
- bcopy(GET_V4(&ia6->ia_prefixmask.sin6_addr), &mask, sizeof(mask));
- ifa_free(&ia6->ia_ifa);
+ bcopy(GET_V4(&addr6), &a, sizeof(a));
+ bcopy(GET_V4(&mask6), &mask, sizeof(mask));
a.s_addr &= mask.s_addr;
b = ip.ip_src;
b.s_addr &= mask.s_addr;
@@ -384,12 +363,12 @@ stf_encapcheck(m, off, proto, arg)
return 32;
}
-static struct in6_ifaddr *
-stf_getsrcifa6(ifp)
- struct ifnet *ifp;
+static int
+stf_getsrcifa6(struct ifnet *ifp, struct in6_addr *addr, struct in6_addr *mask)
{
struct ifaddr *ia;
struct in_ifaddr *ia4;
+ struct in6_ifaddr *ia6;
struct sockaddr_in6 *sin6;
struct in_addr in;
@@ -408,33 +387,30 @@ stf_getsrcifa6(ifp)
if (ia4 == NULL)
continue;
- ifa_ref(ia);
+ ia6 = (struct in6_ifaddr *)ia;
+
+ *addr = sin6->sin6_addr;
+ *mask = ia6->ia_prefixmask.sin6_addr;
if_addr_runlock(ifp);
- return (struct in6_ifaddr *)ia;
+ return (0);
}
if_addr_runlock(ifp);
- return NULL;
+ return (ENOENT);
}
static int
-stf_output(ifp, m, dst, ro)
- struct ifnet *ifp;
- struct mbuf *m;
- struct sockaddr *dst;
- struct route *ro;
+stf_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
{
struct stf_softc *sc;
- struct sockaddr_in6 *dst6;
- struct route *cached_route;
+ const struct sockaddr_in6 *dst6;
struct in_addr in4;
- caddr_t ptr;
- struct sockaddr_in *dst4;
+ const void *ptr;
u_int8_t tos;
struct ip *ip;
struct ip6_hdr *ip6;
- struct in6_ifaddr *ia6;
- u_int32_t af;
+ struct in6_addr addr6, mask6;
int error;
#ifdef MAC
@@ -446,12 +422,12 @@ stf_output(ifp, m, dst, ro)
#endif
sc = ifp->if_softc;
- dst6 = (struct sockaddr_in6 *)dst;
+ dst6 = (const struct sockaddr_in6 *)dst;
/* just in case */
if ((ifp->if_flags & IFF_UP) == 0) {
m_freem(m);
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return ENETDOWN;
}
@@ -460,18 +436,16 @@ stf_output(ifp, m, dst, ro)
* we shouldn't generate output. Without this check, we'll end up
* using wrong IPv4 source.
*/
- ia6 = stf_getsrcifa6(ifp);
- if (ia6 == NULL) {
+ if (stf_getsrcifa6(ifp, &addr6, &mask6) != 0) {
m_freem(m);
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return ENETDOWN;
}
if (m->m_len < sizeof(*ip6)) {
m = m_pullup(m, sizeof(*ip6));
if (!m) {
- ifa_free(&ia6->ia_ifa);
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return ENOBUFS;
}
}
@@ -479,15 +453,6 @@ stf_output(ifp, m, dst, ro)
tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
/*
- * BPF writes need to be handled specially.
- * This is a null operation, nothing here checks dst->sa_family.
- */
- if (dst->sa_family == AF_UNSPEC) {
- bcopy(dst->sa_data, &af, sizeof(af));
- dst->sa_family = af;
- }
-
- /*
* Pickup the right outer dst addr from the list of candidates.
* ip6_dst has priority as it may be able to give us shorter IPv4 hops.
*/
@@ -497,9 +462,8 @@ stf_output(ifp, m, dst, ro)
else if (IN6_IS_ADDR_6TO4(&dst6->sin6_addr))
ptr = GET_V4(&dst6->sin6_addr);
else {
- ifa_free(&ia6->ia_ifa);
m_freem(m);
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return ENETUNREACH;
}
bcopy(ptr, &in4, sizeof(in4));
@@ -512,78 +476,38 @@ stf_output(ifp, m, dst, ro)
* will only read from the mbuf (i.e., it won't
* try to free it or keep a pointer a to it).
*/
- af = AF_INET6;
+ u_int af = AF_INET6;
bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
}
- M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
+ M_PREPEND(m, sizeof(struct ip), M_NOWAIT);
if (m == NULL) {
- ifa_free(&ia6->ia_ifa);
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return ENOBUFS;
}
ip = mtod(m, struct ip *);
bzero(ip, sizeof(*ip));
- bcopy(GET_V4(&((struct sockaddr_in6 *)&ia6->ia_addr)->sin6_addr),
- &ip->ip_src, sizeof(ip->ip_src));
- ifa_free(&ia6->ia_ifa);
+ bcopy(GET_V4(&addr6), &ip->ip_src, sizeof(ip->ip_src));
bcopy(&in4, &ip->ip_dst, sizeof(ip->ip_dst));
ip->ip_p = IPPROTO_IPV6;
ip->ip_ttl = ip_stf_ttl;
- ip->ip_len = m->m_pkthdr.len; /*host order*/
+ ip->ip_len = htons(m->m_pkthdr.len);
if (ifp->if_flags & IFF_LINK1)
ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &tos);
else
ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos);
- if (!stf_route_cache) {
- cached_route = NULL;
- goto sendit;
- }
-
- /*
- * Do we have a cached route?
- */
- mtx_lock(&(sc)->sc_ro_mtx);
- dst4 = (struct sockaddr_in *)&sc->sc_ro.ro_dst;
- if (dst4->sin_family != AF_INET ||
- bcmp(&dst4->sin_addr, &ip->ip_dst, sizeof(ip->ip_dst)) != 0) {
- /* cache route doesn't match */
- dst4->sin_family = AF_INET;
- dst4->sin_len = sizeof(struct sockaddr_in);
- bcopy(&ip->ip_dst, &dst4->sin_addr, sizeof(dst4->sin_addr));
- if (sc->sc_ro.ro_rt) {
- RTFREE(sc->sc_ro.ro_rt);
- sc->sc_ro.ro_rt = NULL;
- }
- }
-
- if (sc->sc_ro.ro_rt == NULL) {
- rtalloc_fib(&sc->sc_ro, sc->sc_fibnum);
- if (sc->sc_ro.ro_rt == NULL) {
- m_freem(m);
- mtx_unlock(&(sc)->sc_ro_mtx);
- ifp->if_oerrors++;
- return ENETUNREACH;
- }
- }
- cached_route = &sc->sc_ro;
-
-sendit:
M_SETFIB(m, sc->sc_fibnum);
- ifp->if_opackets++;
- error = ip_output(m, NULL, cached_route, 0, NULL, NULL);
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ error = ip_output(m, NULL, NULL, 0, NULL, NULL);
- if (cached_route != NULL)
- mtx_unlock(&(sc)->sc_ro_mtx);
return error;
}
static int
-isrfc1918addr(in)
- struct in_addr *in;
+isrfc1918addr(struct in_addr *in)
{
/*
* returns 1 if private address range:
@@ -599,11 +523,9 @@ isrfc1918addr(in)
}
static int
-stf_checkaddr4(sc, in, inifp)
- struct stf_softc *sc;
- struct in_addr *in;
- struct ifnet *inifp; /* incoming interface */
+stf_checkaddr4(struct stf_softc *sc, struct in_addr *in, struct ifnet *inifp)
{
+ struct rm_priotracker in_ifa_tracker;
struct in_ifaddr *ia4;
/*
@@ -627,54 +549,35 @@ stf_checkaddr4(sc, in, inifp)
/*
* reject packets with broadcast
*/
- IN_IFADDR_RLOCK();
- for (ia4 = TAILQ_FIRST(&V_in_ifaddrhead);
- ia4;
- ia4 = TAILQ_NEXT(ia4, ia_link))
- {
+ IN_IFADDR_RLOCK(&in_ifa_tracker);
+ TAILQ_FOREACH(ia4, &V_in_ifaddrhead, ia_link) {
if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0)
continue;
if (in->s_addr == ia4->ia_broadaddr.sin_addr.s_addr) {
- IN_IFADDR_RUNLOCK();
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
return -1;
}
}
- IN_IFADDR_RUNLOCK();
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
/*
* perform ingress filter
*/
if (sc && (STF2IFP(sc)->if_flags & IFF_LINK2) == 0 && inifp) {
- struct sockaddr_in sin;
- struct rtentry *rt;
-
- bzero(&sin, sizeof(sin));
- sin.sin_family = AF_INET;
- sin.sin_len = sizeof(struct sockaddr_in);
- sin.sin_addr = *in;
- rt = rtalloc1_fib((struct sockaddr *)&sin, 0,
- 0UL, sc->sc_fibnum);
- if (!rt || rt->rt_ifp != inifp) {
-#if 0
- log(LOG_WARNING, "%s: packet from 0x%x dropped "
- "due to ingress filter\n", if_name(STF2IFP(sc)),
- (u_int32_t)ntohl(sin.sin_addr.s_addr));
-#endif
- if (rt)
- RTFREE_LOCKED(rt);
- return -1;
- }
- RTFREE_LOCKED(rt);
+ struct nhop4_basic nh4;
+
+ if (fib4_lookup_nh_basic(sc->sc_fibnum, *in, 0, 0, &nh4) != 0)
+ return (-1);
+
+ if (nh4.nh_ifp != inifp)
+ return (-1);
}
return 0;
}
static int
-stf_checkaddr6(sc, in6, inifp)
- struct stf_softc *sc;
- struct in6_addr *in6;
- struct ifnet *inifp; /* incoming interface */
+stf_checkaddr6(struct stf_softc *sc, struct in6_addr *in6, struct ifnet *inifp)
{
/*
* check 6to4 addresses
@@ -697,23 +600,23 @@ stf_checkaddr6(sc, in6, inifp)
return 0;
}
-void
-in_stf_input(m, off)
- struct mbuf *m;
- int off;
+static int
+in_stf_input(struct mbuf **mp, int *offp, int proto)
{
- int proto;
struct stf_softc *sc;
struct ip *ip;
struct ip6_hdr *ip6;
+ struct mbuf *m;
u_int8_t otos, itos;
struct ifnet *ifp;
+ int off;
- proto = mtod(m, struct ip *)->ip_p;
+ m = *mp;
+ off = *offp;
if (proto != IPPROTO_IPV6) {
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
ip = mtod(m, struct ip *);
@@ -722,7 +625,7 @@ in_stf_input(m, off)
if (sc == NULL || (STF2IFP(sc)->if_flags & IFF_UP) == 0) {
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
ifp = STF2IFP(sc);
@@ -738,7 +641,7 @@ in_stf_input(m, off)
if (stf_checkaddr4(sc, &ip->ip_dst, NULL) < 0 ||
stf_checkaddr4(sc, &ip->ip_src, m->m_pkthdr.rcvif) < 0) {
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
otos = ip->ip_tos;
@@ -747,7 +650,7 @@ in_stf_input(m, off)
if (m->m_len < sizeof(*ip6)) {
m = m_pullup(m, sizeof(*ip6));
if (!m)
- return;
+ return (IPPROTO_DONE);
}
ip6 = mtod(m, struct ip6_hdr *);
@@ -758,7 +661,7 @@ in_stf_input(m, off)
if (stf_checkaddr6(sc, &ip6->ip6_dst, NULL) < 0 ||
stf_checkaddr6(sc, &ip6->ip6_src, m->m_pkthdr.rcvif) < 0) {
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
@@ -789,28 +692,15 @@ in_stf_input(m, off)
* See net/if_gif.c for possible issues with packet processing
* reorder due to extra queueing.
*/
- ifp->if_ipackets++;
- ifp->if_ibytes += m->m_pkthdr.len;
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
M_SETFIB(m, ifp->if_fib);
netisr_dispatch(NETISR_IPV6, m);
-}
-
-/* ARGSUSED */
-static void
-stf_rtrequest(cmd, rt, info)
- int cmd;
- struct rtentry *rt;
- struct rt_addrinfo *info;
-{
- RT_LOCK_ASSERT(rt);
- rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
+ return (IPPROTO_DONE);
}
static int
-stf_ioctl(ifp, cmd, data)
- struct ifnet *ifp;
- u_long cmd;
- caddr_t data;
+stf_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
struct ifaddr *ifa;
struct ifreq *ifr;
@@ -837,7 +727,6 @@ stf_ioctl(ifp, cmd, data)
break;
}
- ifa->ifa_rtrequest = stf_rtrequest;
ifp->if_flags |= IFF_UP;
break;
diff --git a/freebsd/sys/net/if_tap.c b/freebsd/sys/net/if_tap.c
index 599905e8..24ae0092 100644
--- a/freebsd/sys/net/if_tap.c
+++ b/freebsd/sys/net/if_tap.c
@@ -65,6 +65,7 @@
#include <net/bpf.h>
#include <net/ethernet.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_clone.h>
#include <net/if_dl.h>
#include <net/if_media.h>
@@ -81,8 +82,8 @@
#define CDEV_NAME "tap"
#define TAPDEBUG if (tapdebug) printf
-#define TAP "tap"
-#define VMNET "vmnet"
+static const char tapname[] = "tap";
+static const char vmnetname[] = "vmnet";
#define TAPMAXUNIT 0x7fff
#define VMNET_DEV_MASK CLONE_FLAG0
@@ -101,11 +102,10 @@ static void tapifinit(void *);
static int tap_clone_create(struct if_clone *, int, caddr_t);
static void tap_clone_destroy(struct ifnet *);
+static struct if_clone *tap_cloner;
static int vmnet_clone_create(struct if_clone *, int, caddr_t);
static void vmnet_clone_destroy(struct ifnet *);
-
-IFC_SIMPLE_DECLARE(tap, 0);
-IFC_SIMPLE_DECLARE(vmnet, 0);
+static struct if_clone *vmnet_cloner;
/* character device */
static d_open_t tapopen;
@@ -137,7 +137,7 @@ static struct filterops tap_write_filterops = {
static struct cdevsw tap_cdevsw = {
.d_version = D_VERSION,
- .d_flags = D_PSEUDO | D_NEEDMINOR,
+ .d_flags = D_NEEDMINOR,
.d_open = tapopen,
.d_close = tapclose,
.d_read = tapread,
@@ -172,12 +172,10 @@ SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tapuopen, 0,
"Allow user to open /dev/tap (based on node permissions)");
SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0,
"Bring interface up when /dev/tap is opened");
-SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RW, &tapdclone, 0,
- "Enably legacy devfs interface creation");
+SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tapdclone, 0,
+ "Enable legacy devfs interface creation");
SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tapdebug, 0, "");
-TUNABLE_INT("net.link.tap.devfs_cloning", &tapdclone);
-
DEV_MODULE(if_tap, tapmodevent, NULL);
static int
@@ -185,18 +183,12 @@ tap_clone_create(struct if_clone *ifc, int unit, caddr_t params)
{
struct cdev *dev;
int i;
- int extra;
- if (strcmp(ifc->ifc_name, VMNET) == 0)
- extra = VMNET_DEV_MASK;
- else
- extra = 0;
-
- /* find any existing device, or allocate new unit number */
- i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, extra);
+ /* Find any existing device, or allocate new unit number. */
+ i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, 0);
if (i) {
- dev = make_dev(&tap_cdevsw, unit | extra,
- UID_ROOT, GID_WHEEL, 0600, "%s%d", ifc->ifc_name, unit);
+ dev = make_dev(&tap_cdevsw, unit, UID_ROOT, GID_WHEEL, 0600,
+ "%s%d", tapname, unit);
}
tapcreate(dev);
@@ -207,7 +199,18 @@ tap_clone_create(struct if_clone *ifc, int unit, caddr_t params)
static int
vmnet_clone_create(struct if_clone *ifc, int unit, caddr_t params)
{
- return tap_clone_create(ifc, unit, params);
+ struct cdev *dev;
+ int i;
+
+ /* Find any existing device, or allocate new unit number. */
+ i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, VMNET_DEV_MASK);
+ if (i) {
+ dev = make_dev(&tap_cdevsw, unit | VMNET_DEV_MASK, UID_ROOT,
+ GID_WHEEL, 0600, "%s%d", vmnetname, unit);
+ }
+
+ tapcreate(dev);
+ return (0);
}
static void
@@ -218,9 +221,10 @@ tap_destroy(struct tap_softc *tp)
CURVNET_SET(ifp->if_vnet);
destroy_dev(tp->tap_dev);
seldrain(&tp->tap_rsel);
+ knlist_clear(&tp->tap_rsel.si_note, 0);
knlist_destroy(&tp->tap_rsel.si_note);
ether_ifdetach(ifp);
- if_free_type(ifp, IFT_ETHER);
+ if_free(ifp);
mtx_destroy(&tp->tap_mtx);
free(tp, M_TAP);
@@ -272,8 +276,10 @@ tapmodevent(module_t mod, int type, void *data)
mtx_destroy(&tapmtx);
return (ENOMEM);
}
- if_clone_attach(&tap_cloner);
- if_clone_attach(&vmnet_cloner);
+ tap_cloner = if_clone_simple(tapname, tap_clone_create,
+ tap_clone_destroy, 0);
+ vmnet_cloner = if_clone_simple(vmnetname, vmnet_clone_create,
+ vmnet_clone_destroy, 0);
return (0);
case MOD_UNLOAD:
@@ -295,8 +301,8 @@ tapmodevent(module_t mod, int type, void *data)
mtx_unlock(&tapmtx);
EVENTHANDLER_DEREGISTER(dev_clone, eh_tag);
- if_clone_detach(&tap_cloner);
- if_clone_detach(&vmnet_cloner);
+ if_clone_detach(tap_cloner);
+ if_clone_detach(vmnet_cloner);
drain_dev_clone_events();
mtx_lock(&tapmtx);
@@ -350,13 +356,13 @@ tapclone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **d
extra = 0;
/* We're interested in only tap/vmnet devices. */
- if (strcmp(name, TAP) == 0) {
+ if (strcmp(name, tapname) == 0) {
unit = -1;
- } else if (strcmp(name, VMNET) == 0) {
+ } else if (strcmp(name, vmnetname) == 0) {
unit = -1;
extra = VMNET_DEV_MASK;
- } else if (dev_stdclone(name, NULL, TAP, &unit) != 1) {
- if (dev_stdclone(name, NULL, VMNET, &unit) != 1) {
+ } else if (dev_stdclone(name, NULL, tapname, &unit) != 1) {
+ if (dev_stdclone(name, NULL, vmnetname, &unit) != 1) {
return;
} else {
extra = VMNET_DEV_MASK;
@@ -402,11 +408,9 @@ tapcreate(struct cdev *dev)
unsigned short macaddr_hi;
uint32_t macaddr_mid;
int unit;
- char *name = NULL;
+ const char *name = NULL;
u_char eaddr[6];
- dev->si_flags &= ~SI_CHEAPCLONE;
-
/* allocate driver storage and create device */
tp = malloc(sizeof(*tp), M_TAP, M_WAITOK | M_ZERO);
mtx_init(&tp->tap_mtx, "tap_mtx", NULL, MTX_DEF);
@@ -418,10 +422,10 @@ tapcreate(struct cdev *dev)
/* select device: tap or vmnet */
if (unit & VMNET_DEV_MASK) {
- name = VMNET;
+ name = vmnetname;
tp->tap_flags |= TAP_VMNET;
} else
- name = TAP;
+ name = tapname;
unit &= TAPMAXUNIT;
@@ -534,11 +538,11 @@ tapclose(struct cdev *dev, int foo, int bar, struct thread *td)
IF_DRAIN(&ifp->if_snd);
/*
- * do not bring the interface down, and do not anything with
- * interface, if we are in VMnet mode. just close the device.
+ * Do not bring the interface down, and do not anything with
+ * interface, if we are in VMnet mode. Just close the device.
*/
-
- if (((tp->tap_flags & TAP_VMNET) == 0) && (ifp->if_flags & IFF_UP)) {
+ if (((tp->tap_flags & TAP_VMNET) == 0) &&
+ (ifp->if_flags & (IFF_UP | IFF_LINK0)) == IFF_UP) {
mtx_unlock(&tp->tap_mtx);
if_down(ifp);
mtx_lock(&tp->tap_mtx);
@@ -636,12 +640,12 @@ tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
case SIOCGIFSTATUS:
ifs = (struct ifstat *)data;
- dummy = strlen(ifs->ascii);
mtx_lock(&tp->tap_mtx);
- if (tp->tap_pid != 0 && dummy < sizeof(ifs->ascii))
- snprintf(ifs->ascii + dummy,
- sizeof(ifs->ascii) - dummy,
+ if (tp->tap_pid != 0)
+ snprintf(ifs->ascii, sizeof(ifs->ascii),
"\tOpened by PID %d\n", tp->tap_pid);
+ else
+ ifs->ascii[0] = '\0';
mtx_unlock(&tp->tap_mtx);
break;
@@ -684,7 +688,7 @@ tapifstart(struct ifnet *ifp)
IF_DEQUEUE(&ifp->if_snd, m);
if (m != NULL) {
m_freem(m);
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
} else
break;
}
@@ -709,7 +713,7 @@ tapifstart(struct ifnet *ifp)
selwakeuppri(&tp->tap_rsel, PZERO+1);
KNOTE_LOCKED(&tp->tap_rsel.si_note, 0);
- ifp->if_opackets ++; /* obytes are counted in ether_output */
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); /* obytes are counted in ether_output */
}
ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
@@ -829,8 +833,7 @@ tapioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td
mtx_unlock(&tp->tap_mtx);
break;
- case OSIOCGIFADDR: /* get MAC address of the remote side */
- case SIOCGIFADDR:
+ case SIOCGIFADDR: /* get MAC address of the remote side */
mtx_lock(&tp->tap_mtx);
bcopy(tp->ether_addr, data, sizeof(tp->ether_addr));
mtx_unlock(&tp->tap_mtx);
@@ -948,9 +951,9 @@ tapwrite(struct cdev *dev, struct uio *uio, int flag)
return (EIO);
}
- if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, ETHER_ALIGN,
+ if ((m = m_uiotombuf(uio, M_NOWAIT, 0, ETHER_ALIGN,
M_PKTHDR)) == NULL) {
- ifp->if_ierrors ++;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
return (ENOBUFS);
}
@@ -977,7 +980,7 @@ tapwrite(struct cdev *dev, struct uio *uio, int flag)
CURVNET_SET(ifp->if_vnet);
(*ifp->if_input)(ifp, m);
CURVNET_RESTORE();
- ifp->if_ipackets ++; /* ibytes are counted in parent */
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); /* ibytes are counted in parent */
return (0);
} /* tapwrite */
diff --git a/freebsd/sys/net/if_tun.c b/freebsd/sys/net/if_tun.c
index 556a4860..edb30d04 100644
--- a/freebsd/sys/net/if_tun.c
+++ b/freebsd/sys/net/if_tun.c
@@ -18,10 +18,8 @@
* $FreeBSD$
*/
-#include <rtems/bsd/local/opt_atalk.h>
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipx.h>
#include <rtems/bsd/sys/param.h>
#include <sys/priv.h>
@@ -47,6 +45,7 @@
#include <sys/random.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_clone.h>
#include <net/if_types.h>
#include <net/netisr.h>
@@ -101,7 +100,6 @@ struct tun_softc {
#define TUN2IFP(sc) ((sc)->tun_ifp)
#define TUNDEBUG if (tundebug) if_printf
-#define TUNNAME "tun"
/*
* All mutable global variables in if_tun are locked using tunmtx, with
@@ -109,7 +107,8 @@ struct tun_softc {
* which is static after setup.
*/
static struct mtx tunmtx;
-static MALLOC_DEFINE(M_TUN, TUNNAME, "Tunnel Interface");
+static const char tunname[] = "tun";
+static MALLOC_DEFINE(M_TUN, tunname, "Tunnel Interface");
static int tundebug = 0;
static int tundclone = 1;
static struct clonedevs *tunclones;
@@ -119,25 +118,22 @@ SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
SYSCTL_DECL(_net_link);
static SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0,
"IP tunnel software network interface.");
-SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RW, &tundclone, 0,
+SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tundclone, 0,
"Enable legacy devfs interface creation.");
-TUNABLE_INT("net.link.tun.devfs_cloning", &tundclone);
-
static void tunclone(void *arg, struct ucred *cred, char *name,
int namelen, struct cdev **dev);
static void tuncreate(const char *name, struct cdev *dev);
static int tunifioctl(struct ifnet *, u_long, caddr_t);
static void tuninit(struct ifnet *);
static int tunmodevent(module_t, int, void *);
-static int tunoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
- struct route *ro);
+static int tunoutput(struct ifnet *, struct mbuf *,
+ const struct sockaddr *, struct route *ro);
static void tunstart(struct ifnet *);
static int tun_clone_create(struct if_clone *, int, caddr_t);
static void tun_clone_destroy(struct ifnet *);
-
-IFC_SIMPLE_DECLARE(tun, 0);
+static struct if_clone *tun_cloner;
static d_open_t tunopen;
static d_close_t tunclose;
@@ -167,7 +163,7 @@ static struct filterops tun_write_filterops = {
static struct cdevsw tun_cdevsw = {
.d_version = D_VERSION,
- .d_flags = D_PSEUDO | D_NEEDMINOR,
+ .d_flags = D_NEEDMINOR,
.d_open = tunopen,
.d_close = tunclose,
.d_read = tunread,
@@ -175,7 +171,7 @@ static struct cdevsw tun_cdevsw = {
.d_ioctl = tunioctl,
.d_poll = tunpoll,
.d_kqfilter = tunkqfilter,
- .d_name = TUNNAME,
+ .d_name = tunname,
};
static int
@@ -189,9 +185,9 @@ tun_clone_create(struct if_clone *ifc, int unit, caddr_t params)
if (i) {
/* No preexisting struct cdev *, create one */
dev = make_dev(&tun_cdevsw, unit,
- UID_UUCP, GID_DIALER, 0600, "%s%d", ifc->ifc_name, unit);
+ UID_UUCP, GID_DIALER, 0600, "%s%d", tunname, unit);
}
- tuncreate(ifc->ifc_name, dev);
+ tuncreate(tunname, dev);
return (0);
}
@@ -213,9 +209,9 @@ tunclone(void *arg, struct ucred *cred, char *name, int namelen,
if (!tundclone || priv_check_cred(cred, PRIV_NET_IFCREATE, 0) != 0)
return;
- if (strcmp(name, TUNNAME) == 0) {
+ if (strcmp(name, tunname) == 0) {
u = -1;
- } else if (dev_stdclone(name, NULL, TUNNAME, &u) != 1)
+ } else if (dev_stdclone(name, NULL, tunname, &u) != 1)
return; /* Don't recognise the name */
if (u != -1 && u > IF_MAXUNIT)
return; /* Unit number too high */
@@ -248,7 +244,6 @@ tun_destroy(struct tun_softc *tp)
{
struct cdev *dev;
- /* Unlocked read. */
mtx_lock(&tp->tun_mtx);
if ((tp->tun_flags & TUN_OPEN) != 0)
cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx);
@@ -262,6 +257,7 @@ tun_destroy(struct tun_softc *tp)
if_free(TUN2IFP(tp));
destroy_dev(dev);
seldrain(&tp->tun_rsel);
+ knlist_clear(&tp->tun_rsel.si_note, 0);
knlist_destroy(&tp->tun_rsel.si_note);
mtx_destroy(&tp->tun_mtx);
cv_destroy(&tp->tun_cv);
@@ -293,10 +289,11 @@ tunmodevent(module_t mod, int type, void *data)
tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000);
if (tag == NULL)
return (ENOMEM);
- if_clone_attach(&tun_cloner);
+ tun_cloner = if_clone_simple(tunname, tun_clone_create,
+ tun_clone_destroy, 0);
break;
case MOD_UNLOAD:
- if_clone_detach(&tun_cloner);
+ if_clone_detach(tun_cloner);
EVENTHANDLER_DEREGISTER(dev_clone, tag);
drain_dev_clone_events();
@@ -364,8 +361,6 @@ tuncreate(const char *name, struct cdev *dev)
struct tun_softc *sc;
struct ifnet *ifp;
- dev->si_flags &= ~SI_CHEAPCLONE;
-
sc = malloc(sizeof(*sc), M_TUN, M_WAITOK | M_ZERO);
mtx_init(&sc->tun_mtx, "tun_mtx", NULL, MTX_DEF);
cv_init(&sc->tun_cv, "tun_condvar");
@@ -412,7 +407,7 @@ tunopen(struct cdev *dev, int flag, int mode, struct thread *td)
*/
tp = dev->si_drv1;
if (!tp) {
- tuncreate(TUNNAME, dev);
+ tuncreate(tunname, dev);
tp = dev->si_drv1;
}
@@ -557,18 +552,16 @@ tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
ifs = (struct ifstat *)data;
mtx_lock(&tp->tun_mtx);
if (tp->tun_pid)
- sprintf(ifs->ascii + strlen(ifs->ascii),
+ snprintf(ifs->ascii, sizeof(ifs->ascii),
"\tOpened by PID %d\n", tp->tun_pid);
+ else
+ ifs->ascii[0] = '\0';
mtx_unlock(&tp->tun_mtx);
break;
case SIOCSIFADDR:
tuninit(ifp);
TUNDEBUG(ifp, "address set\n");
break;
- case SIOCSIFDSTADDR:
- tuninit(ifp);
- TUNDEBUG(ifp, "destination address set\n");
- break;
case SIOCSIFMTU:
ifp->if_mtu = ifr->ifr_mtu;
TUNDEBUG(ifp, "mtu set\n");
@@ -587,7 +580,7 @@ tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
* tunoutput - queue packets from higher level ready to put out.
*/
static int
-tunoutput(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
+tunoutput(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst,
struct route *ro)
{
struct tun_softc *tp = ifp->if_softc;
@@ -621,25 +614,23 @@ tunoutput(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
}
/* BPF writes need to be handled specially. */
- if (dst->sa_family == AF_UNSPEC) {
+ if (dst->sa_family == AF_UNSPEC)
bcopy(dst->sa_data, &af, sizeof(af));
- dst->sa_family = af;
- }
-
- if (bpf_peers_present(ifp->if_bpf)) {
+ else
af = dst->sa_family;
+
+ if (bpf_peers_present(ifp->if_bpf))
bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0);
- }
/* prepend sockaddr? this may abort if the mbuf allocation fails */
if (cached_tun_flags & TUN_LMODE) {
/* allocate space for sockaddr */
- M_PREPEND(m0, dst->sa_len, M_DONTWAIT);
+ M_PREPEND(m0, dst->sa_len, M_NOWAIT);
/* if allocation failed drop packet */
if (m0 == NULL) {
- ifp->if_iqdrops++;
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return (ENOBUFS);
} else {
bcopy(dst, m0->m_data, dst->sa_len);
@@ -648,18 +639,18 @@ tunoutput(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
if (cached_tun_flags & TUN_IFHEAD) {
/* Prepend the address family */
- M_PREPEND(m0, 4, M_DONTWAIT);
+ M_PREPEND(m0, 4, M_NOWAIT);
/* if allocation failed drop packet */
if (m0 == NULL) {
- ifp->if_iqdrops++;
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return (ENOBUFS);
} else
- *(u_int32_t *)m0->m_data = htonl(dst->sa_family);
+ *(u_int32_t *)m0->m_data = htonl(af);
} else {
#ifdef INET
- if (dst->sa_family != AF_INET)
+ if (af != AF_INET)
#endif
{
m_freem(m0);
@@ -670,7 +661,7 @@ tunoutput(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
error = (ifp->if_transmit)(ifp, m0);
if (error)
return (ENOBUFS);
- ifp->if_opackets++;
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
return (0);
}
@@ -871,7 +862,7 @@ tunwrite(struct cdev *dev, struct uio *uio, int flag)
struct tun_softc *tp = dev->si_drv1;
struct ifnet *ifp = TUN2IFP(tp);
struct mbuf *m;
- uint32_t family;
+ uint32_t family, mru;
int isr;
TUNDEBUG(ifp, "tunwrite\n");
@@ -883,13 +874,16 @@ tunwrite(struct cdev *dev, struct uio *uio, int flag)
if (uio->uio_resid == 0)
return (0);
- if (uio->uio_resid < 0 || uio->uio_resid > TUNMRU) {
+ mru = TUNMRU;
+ if (tp->tun_flags & TUN_IFHEAD)
+ mru += sizeof(family);
+ if (uio->uio_resid < 0 || uio->uio_resid > mru) {
TUNDEBUG(ifp, "len=%zd!\n", uio->uio_resid);
return (EIO);
}
- if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, 0, M_PKTHDR)) == NULL) {
- ifp->if_ierrors++;
+ if ((m = m_uiotombuf(uio, M_NOWAIT, 0, 0, M_PKTHDR)) == NULL) {
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
return (ENOBUFS);
}
@@ -925,25 +919,13 @@ tunwrite(struct cdev *dev, struct uio *uio, int flag)
isr = NETISR_IPV6;
break;
#endif
-#ifdef IPX
- case AF_IPX:
- isr = NETISR_IPX;
- break;
-#endif
-#ifdef NETATALK
- case AF_APPLETALK:
- isr = NETISR_ATALK2;
- break;
-#endif
default:
m_freem(m);
return (EAFNOSUPPORT);
}
- /* First chunk of an mbuf contains good junk */
- if (harvest.point_to_point)
- random_harvest(m, 16, 3, 0, RANDOM_NET);
- ifp->if_ibytes += m->m_pkthdr.len;
- ifp->if_ipackets++;
+ random_harvest_queue(m, sizeof(*m), 2, RANDOM_NET_TUN);
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
CURVNET_SET(ifp->if_vnet);
M_SETFIB(m, ifp->if_fib);
netisr_dispatch(isr, m);
diff --git a/freebsd/sys/net/if_tun.h b/freebsd/sys/net/if_tun.h
index 382881cb..1ea375f7 100644
--- a/freebsd/sys/net/if_tun.h
+++ b/freebsd/sys/net/if_tun.h
@@ -25,11 +25,11 @@
#define TUNMTU 1500
/* Maximum receive packet size (hard limit) */
-#define TUNMRU 16384
+#define TUNMRU 65535
struct tuninfo {
int baudrate; /* linespeed */
- short mtu; /* maximum transmission unit */
+ unsigned short mtu; /* maximum transmission unit */
u_char type; /* ethernet, tokenring, etc. */
u_char dummy; /* place holder */
};
diff --git a/freebsd/sys/net/if_types.h b/freebsd/sys/net/if_types.h
index c2effacd..92e101ac 100644
--- a/freebsd/sys/net/if_types.h
+++ b/freebsd/sys/net/if_types.h
@@ -42,214 +42,232 @@
* http://www.iana.org/assignments/smi-numbers
*/
-#define IFT_OTHER 0x1 /* none of the following */
-#define IFT_1822 0x2 /* old-style arpanet imp */
-#define IFT_HDH1822 0x3 /* HDH arpanet imp */
-#define IFT_X25DDN 0x4 /* x25 to imp */
-#define IFT_X25 0x5 /* PDN X25 interface (RFC877) */
-#define IFT_ETHER 0x6 /* Ethernet CSMA/CD */
-#define IFT_ISO88023 0x7 /* CMSA/CD */
-#define IFT_ISO88024 0x8 /* Token Bus */
-#define IFT_ISO88025 0x9 /* Token Ring */
-#define IFT_ISO88026 0xa /* MAN */
-#define IFT_STARLAN 0xb
-#define IFT_P10 0xc /* Proteon 10MBit ring */
-#define IFT_P80 0xd /* Proteon 80MBit ring */
-#define IFT_HY 0xe /* Hyperchannel */
-#define IFT_FDDI 0xf
-#define IFT_LAPB 0x10
-#define IFT_SDLC 0x11
-#define IFT_T1 0x12
-#define IFT_CEPT 0x13 /* E1 - european T1 */
-#define IFT_ISDNBASIC 0x14
-#define IFT_ISDNPRIMARY 0x15
-#define IFT_PTPSERIAL 0x16 /* Proprietary PTP serial */
-#define IFT_PPP 0x17 /* RFC 1331 */
-#define IFT_LOOP 0x18 /* loopback */
-#define IFT_EON 0x19 /* ISO over IP */
-#define IFT_XETHER 0x1a /* obsolete 3MB experimental ethernet */
-#define IFT_NSIP 0x1b /* XNS over IP */
-#define IFT_SLIP 0x1c /* IP over generic TTY */
-#define IFT_ULTRA 0x1d /* Ultra Technologies */
-#define IFT_DS3 0x1e /* Generic T3 */
-#define IFT_SIP 0x1f /* SMDS */
-#define IFT_FRELAY 0x20 /* Frame Relay DTE only */
-#define IFT_RS232 0x21
-#define IFT_PARA 0x22 /* parallel-port */
-#define IFT_ARCNET 0x23
-#define IFT_ARCNETPLUS 0x24
-#define IFT_ATM 0x25 /* ATM cells */
-#define IFT_MIOX25 0x26
-#define IFT_SONET 0x27 /* SONET or SDH */
-#define IFT_X25PLE 0x28
-#define IFT_ISO88022LLC 0x29
-#define IFT_LOCALTALK 0x2a
-#define IFT_SMDSDXI 0x2b
-#define IFT_FRELAYDCE 0x2c /* Frame Relay DCE */
-#define IFT_V35 0x2d
-#define IFT_HSSI 0x2e
-#define IFT_HIPPI 0x2f
-#define IFT_MODEM 0x30 /* Generic Modem */
-#define IFT_AAL5 0x31 /* AAL5 over ATM */
-#define IFT_SONETPATH 0x32
-#define IFT_SONETVT 0x33
-#define IFT_SMDSICIP 0x34 /* SMDS InterCarrier Interface */
-#define IFT_PROPVIRTUAL 0x35 /* Proprietary Virtual/internal */
-#define IFT_PROPMUX 0x36 /* Proprietary Multiplexing */
-#define IFT_IEEE80212 0x37 /* 100BaseVG */
-#define IFT_FIBRECHANNEL 0x38 /* Fibre Channel */
-#define IFT_HIPPIINTERFACE 0x39 /* HIPPI interfaces */
-#define IFT_FRAMERELAYINTERCONNECT 0x3a /* Obsolete, use either 0x20 or 0x2c */
-#define IFT_AFLANE8023 0x3b /* ATM Emulated LAN for 802.3 */
-#define IFT_AFLANE8025 0x3c /* ATM Emulated LAN for 802.5 */
-#define IFT_CCTEMUL 0x3d /* ATM Emulated circuit */
-#define IFT_FASTETHER 0x3e /* Fast Ethernet (100BaseT) */
-#define IFT_ISDN 0x3f /* ISDN and X.25 */
-#define IFT_V11 0x40 /* CCITT V.11/X.21 */
-#define IFT_V36 0x41 /* CCITT V.36 */
-#define IFT_G703AT64K 0x42 /* CCITT G703 at 64Kbps */
-#define IFT_G703AT2MB 0x43 /* Obsolete see DS1-MIB */
-#define IFT_QLLC 0x44 /* SNA QLLC */
-#define IFT_FASTETHERFX 0x45 /* Fast Ethernet (100BaseFX) */
-#define IFT_CHANNEL 0x46 /* channel */
-#define IFT_IEEE80211 0x47 /* radio spread spectrum */
-#define IFT_IBM370PARCHAN 0x48 /* IBM System 360/370 OEMI Channel */
-#define IFT_ESCON 0x49 /* IBM Enterprise Systems Connection */
-#define IFT_DLSW 0x4a /* Data Link Switching */
-#define IFT_ISDNS 0x4b /* ISDN S/T interface */
-#define IFT_ISDNU 0x4c /* ISDN U interface */
-#define IFT_LAPD 0x4d /* Link Access Protocol D */
-#define IFT_IPSWITCH 0x4e /* IP Switching Objects */
-#define IFT_RSRB 0x4f /* Remote Source Route Bridging */
-#define IFT_ATMLOGICAL 0x50 /* ATM Logical Port */
-#define IFT_DS0 0x51 /* Digital Signal Level 0 */
-#define IFT_DS0BUNDLE 0x52 /* group of ds0s on the same ds1 */
-#define IFT_BSC 0x53 /* Bisynchronous Protocol */
-#define IFT_ASYNC 0x54 /* Asynchronous Protocol */
-#define IFT_CNR 0x55 /* Combat Net Radio */
-#define IFT_ISO88025DTR 0x56 /* ISO 802.5r DTR */
-#define IFT_EPLRS 0x57 /* Ext Pos Loc Report Sys */
-#define IFT_ARAP 0x58 /* Appletalk Remote Access Protocol */
-#define IFT_PROPCNLS 0x59 /* Proprietary Connectionless Protocol*/
-#define IFT_HOSTPAD 0x5a /* CCITT-ITU X.29 PAD Protocol */
-#define IFT_TERMPAD 0x5b /* CCITT-ITU X.3 PAD Facility */
-#define IFT_FRAMERELAYMPI 0x5c /* Multiproto Interconnect over FR */
-#define IFT_X213 0x5d /* CCITT-ITU X213 */
-#define IFT_ADSL 0x5e /* Asymmetric Digital Subscriber Loop */
-#define IFT_RADSL 0x5f /* Rate-Adapt. Digital Subscriber Loop*/
-#define IFT_SDSL 0x60 /* Symmetric Digital Subscriber Loop */
-#define IFT_VDSL 0x61 /* Very H-Speed Digital Subscrib. Loop*/
-#define IFT_ISO88025CRFPINT 0x62 /* ISO 802.5 CRFP */
-#define IFT_MYRINET 0x63 /* Myricom Myrinet */
-#define IFT_VOICEEM 0x64 /* voice recEive and transMit */
-#define IFT_VOICEFXO 0x65 /* voice Foreign Exchange Office */
-#define IFT_VOICEFXS 0x66 /* voice Foreign Exchange Station */
-#define IFT_VOICEENCAP 0x67 /* voice encapsulation */
-#define IFT_VOICEOVERIP 0x68 /* voice over IP encapsulation */
-#define IFT_ATMDXI 0x69 /* ATM DXI */
-#define IFT_ATMFUNI 0x6a /* ATM FUNI */
-#define IFT_ATMIMA 0x6b /* ATM IMA */
-#define IFT_PPPMULTILINKBUNDLE 0x6c /* PPP Multilink Bundle */
-#define IFT_IPOVERCDLC 0x6d /* IBM ipOverCdlc */
-#define IFT_IPOVERCLAW 0x6e /* IBM Common Link Access to Workstn */
-#define IFT_STACKTOSTACK 0x6f /* IBM stackToStack */
-#define IFT_VIRTUALIPADDRESS 0x70 /* IBM VIPA */
-#define IFT_MPC 0x71 /* IBM multi-protocol channel support */
-#define IFT_IPOVERATM 0x72 /* IBM ipOverAtm */
-#define IFT_ISO88025FIBER 0x73 /* ISO 802.5j Fiber Token Ring */
-#define IFT_TDLC 0x74 /* IBM twinaxial data link control */
-#define IFT_GIGABITETHERNET 0x75 /* Gigabit Ethernet */
-#define IFT_HDLC 0x76 /* HDLC */
-#define IFT_LAPF 0x77 /* LAP F */
-#define IFT_V37 0x78 /* V.37 */
-#define IFT_X25MLP 0x79 /* Multi-Link Protocol */
-#define IFT_X25HUNTGROUP 0x7a /* X25 Hunt Group */
-#define IFT_TRANSPHDLC 0x7b /* Transp HDLC */
-#define IFT_INTERLEAVE 0x7c /* Interleave channel */
-#define IFT_FAST 0x7d /* Fast channel */
-#define IFT_IP 0x7e /* IP (for APPN HPR in IP networks) */
-#define IFT_DOCSCABLEMACLAYER 0x7f /* CATV Mac Layer */
-#define IFT_DOCSCABLEDOWNSTREAM 0x80 /* CATV Downstream interface */
-#define IFT_DOCSCABLEUPSTREAM 0x81 /* CATV Upstream interface */
-#define IFT_A12MPPSWITCH 0x82 /* Avalon Parallel Processor */
-#define IFT_TUNNEL 0x83 /* Encapsulation interface */
-#define IFT_COFFEE 0x84 /* coffee pot */
-#define IFT_CES 0x85 /* Circiut Emulation Service */
-#define IFT_ATMSUBINTERFACE 0x86 /* (x) ATM Sub Interface */
-#define IFT_L2VLAN 0x87 /* Layer 2 Virtual LAN using 802.1Q */
-#define IFT_L3IPVLAN 0x88 /* Layer 3 Virtual LAN - IP Protocol */
-#define IFT_L3IPXVLAN 0x89 /* Layer 3 Virtual LAN - IPX Prot. */
-#define IFT_DIGITALPOWERLINE 0x8a /* IP over Power Lines */
-#define IFT_MEDIAMAILOVERIP 0x8b /* (xxx) Multimedia Mail over IP */
-#define IFT_DTM 0x8c /* Dynamic synchronous Transfer Mode */
-#define IFT_DCN 0x8d /* Data Communications Network */
-#define IFT_IPFORWARD 0x8e /* IP Forwarding Interface */
-#define IFT_MSDSL 0x8f /* Multi-rate Symmetric DSL */
-#define IFT_IEEE1394 0x90 /* IEEE1394 High Performance SerialBus*/
-#define IFT_IFGSN 0x91 /* HIPPI-6400 */
-#define IFT_DVBRCCMACLAYER 0x92 /* DVB-RCC MAC Layer */
-#define IFT_DVBRCCDOWNSTREAM 0x93 /* DVB-RCC Downstream Channel */
-#define IFT_DVBRCCUPSTREAM 0x94 /* DVB-RCC Upstream Channel */
-#define IFT_ATMVIRTUAL 0x95 /* ATM Virtual Interface */
-#define IFT_MPLSTUNNEL 0x96 /* MPLS Tunnel Virtual Interface */
-#define IFT_SRP 0x97 /* Spatial Reuse Protocol */
-#define IFT_VOICEOVERATM 0x98 /* Voice over ATM */
-#define IFT_VOICEOVERFRAMERELAY 0x99 /* Voice Over Frame Relay */
-#define IFT_IDSL 0x9a /* Digital Subscriber Loop over ISDN */
-#define IFT_COMPOSITELINK 0x9b /* Avici Composite Link Interface */
-#define IFT_SS7SIGLINK 0x9c /* SS7 Signaling Link */
-#define IFT_PROPWIRELESSP2P 0x9d /* Prop. P2P wireless interface */
-#define IFT_FRFORWARD 0x9e /* Frame forward Interface */
-#define IFT_RFC1483 0x9f /* Multiprotocol over ATM AAL5 */
-#define IFT_USB 0xa0 /* USB Interface */
-#define IFT_IEEE8023ADLAG 0xa1 /* IEEE 802.3ad Link Aggregate*/
-#define IFT_BGPPOLICYACCOUNTING 0xa2 /* BGP Policy Accounting */
-#define IFT_FRF16MFRBUNDLE 0xa3 /* FRF.16 Multilik Frame Relay*/
-#define IFT_H323GATEKEEPER 0xa4 /* H323 Gatekeeper */
-#define IFT_H323PROXY 0xa5 /* H323 Voice and Video Proxy */
-#define IFT_MPLS 0xa6 /* MPLS */
-#define IFT_MFSIGLINK 0xa7 /* Multi-frequency signaling link */
-#define IFT_HDSL2 0xa8 /* High Bit-Rate DSL, 2nd gen. */
-#define IFT_SHDSL 0xa9 /* Multirate HDSL2 */
-#define IFT_DS1FDL 0xaa /* Facility Data Link (4Kbps) on a DS1*/
-#define IFT_POS 0xab /* Packet over SONET/SDH Interface */
-#define IFT_DVBASILN 0xac /* DVB-ASI Input */
-#define IFT_DVBASIOUT 0xad /* DVB-ASI Output */
-#define IFT_PLC 0xae /* Power Line Communications */
-#define IFT_NFAS 0xaf /* Non-Facility Associated Signaling */
-#define IFT_TR008 0xb0 /* TROO8 */
-#define IFT_GR303RDT 0xb1 /* Remote Digital Terminal */
-#define IFT_GR303IDT 0xb2 /* Integrated Digital Terminal */
-#define IFT_ISUP 0xb3 /* ISUP */
-#define IFT_PROPDOCSWIRELESSMACLAYER 0xb4 /* prop/Wireless MAC Layer */
-#define IFT_PROPDOCSWIRELESSDOWNSTREAM 0xb5 /* prop/Wireless Downstream */
-#define IFT_PROPDOCSWIRELESSUPSTREAM 0xb6 /* prop/Wireless Upstream */
-#define IFT_HIPERLAN2 0xb7 /* HIPERLAN Type 2 Radio Interface */
-#define IFT_PROPBWAP2MP 0xb8 /* PropBroadbandWirelessAccess P2MP*/
-#define IFT_SONETOVERHEADCHANNEL 0xb9 /* SONET Overhead Channel */
-#define IFT_DIGITALWRAPPEROVERHEADCHANNEL 0xba /* Digital Wrapper Overhead */
-#define IFT_AAL2 0xbb /* ATM adaptation layer 2 */
-#define IFT_RADIOMAC 0xbc /* MAC layer over radio links */
-#define IFT_ATMRADIO 0xbd /* ATM over radio links */
-#define IFT_IMT 0xbe /* Inter-Machine Trunks */
-#define IFT_MVL 0xbf /* Multiple Virtual Lines DSL */
-#define IFT_REACHDSL 0xc0 /* Long Reach DSL */
-#define IFT_FRDLCIENDPT 0xc1 /* Frame Relay DLCI End Point */
-#define IFT_ATMVCIENDPT 0xc2 /* ATM VCI End Point */
-#define IFT_OPTICALCHANNEL 0xc3 /* Optical Channel */
-#define IFT_OPTICALTRANSPORT 0xc4 /* Optical Transport */
-#define IFT_INFINIBAND 0xc7 /* Infiniband */
-#define IFT_BRIDGE 0xd1 /* Transparent bridge interface */
+typedef enum {
+ IFT_OTHER = 0x1, /* none of the following */
+ IFT_1822 = 0x2, /* old-style arpanet imp */
+ IFT_HDH1822 = 0x3, /* HDH arpanet imp */
+ IFT_X25DDN = 0x4, /* x25 to imp */
+ IFT_X25 = 0x5, /* PDN X25 interface (RFC877) */
+ IFT_ETHER = 0x6, /* Ethernet CSMA/CD */
+ IFT_ISO88023 = 0x7, /* CMSA/CD */
+ IFT_ISO88024 = 0x8, /* Token Bus */
+ IFT_ISO88025 = 0x9, /* Token Ring */
+ IFT_ISO88026 = 0xa, /* MAN */
+ IFT_STARLAN = 0xb,
+ IFT_P10 = 0xc, /* Proteon 10MBit ring */
+ IFT_P80 = 0xd, /* Proteon 80MBit ring */
+ IFT_HY = 0xe, /* Hyperchannel */
+ IFT_FDDI = 0xf,
+ IFT_LAPB = 0x10,
+ IFT_SDLC = 0x11,
+ IFT_T1 = 0x12,
+ IFT_CEPT = 0x13, /* E1 - european T1 */
+ IFT_ISDNBASIC = 0x14,
+ IFT_ISDNPRIMARY = 0x15,
+ IFT_PTPSERIAL = 0x16, /* Proprietary PTP serial */
+ IFT_PPP = 0x17, /* RFC 1331 */
+ IFT_LOOP = 0x18, /* loopback */
+ IFT_EON = 0x19, /* ISO over IP */
+ IFT_XETHER = 0x1a, /* obsolete 3MB experimental ethernet */
+ IFT_NSIP = 0x1b, /* XNS over IP */
+ IFT_SLIP = 0x1c, /* IP over generic TTY */
+ IFT_ULTRA = 0x1d, /* Ultra Technologies */
+ IFT_DS3 = 0x1e, /* Generic T3 */
+ IFT_SIP = 0x1f, /* SMDS */
+ IFT_FRELAY = 0x20, /* Frame Relay DTE only */
+ IFT_RS232 = 0x21,
+ IFT_PARA = 0x22, /* parallel-port */
+ IFT_ARCNET = 0x23,
+ IFT_ARCNETPLUS = 0x24,
+ IFT_ATM = 0x25, /* ATM cells */
+ IFT_MIOX25 = 0x26,
+ IFT_SONET = 0x27, /* SONET or SDH */
+ IFT_X25PLE = 0x28,
+ IFT_ISO88022LLC = 0x29,
+ IFT_LOCALTALK = 0x2a,
+ IFT_SMDSDXI = 0x2b,
+ IFT_FRELAYDCE = 0x2c, /* Frame Relay DCE */
+ IFT_V35 = 0x2d,
+ IFT_HSSI = 0x2e,
+ IFT_HIPPI = 0x2f,
+ IFT_MODEM = 0x30, /* Generic Modem */
+ IFT_AAL5 = 0x31, /* AAL5 over ATM */
+ IFT_SONETPATH = 0x32,
+ IFT_SONETVT = 0x33,
+ IFT_SMDSICIP = 0x34, /* SMDS InterCarrier Interface */
+ IFT_PROPVIRTUAL = 0x35, /* Proprietary Virtual/internal */
+ IFT_PROPMUX = 0x36, /* Proprietary Multiplexing */
+ IFT_IEEE80212 = 0x37, /* 100BaseVG */
+ IFT_FIBRECHANNEL = 0x38, /* Fibre Channel */
+ IFT_HIPPIINTERFACE = 0x39, /* HIPPI interfaces */
+ IFT_FRAMERELAYINTERCONNECT = 0x3a, /* Obsolete, use 0x20 either 0x2c */
+ IFT_AFLANE8023 = 0x3b, /* ATM Emulated LAN for 802.3 */
+ IFT_AFLANE8025 = 0x3c, /* ATM Emulated LAN for 802.5 */
+ IFT_CCTEMUL = 0x3d, /* ATM Emulated circuit */
+ IFT_FASTETHER = 0x3e, /* Fast Ethernet (100BaseT) */
+ IFT_ISDN = 0x3f, /* ISDN and X.25 */
+ IFT_V11 = 0x40, /* CCITT V.11/X.21 */
+ IFT_V36 = 0x41, /* CCITT V.36 */
+ IFT_G703AT64K = 0x42, /* CCITT G703 at 64Kbps */
+ IFT_G703AT2MB = 0x43, /* Obsolete see DS1-MIB */
+ IFT_QLLC = 0x44, /* SNA QLLC */
+ IFT_FASTETHERFX = 0x45, /* Fast Ethernet (100BaseFX) */
+ IFT_CHANNEL = 0x46, /* channel */
+ IFT_IEEE80211 = 0x47, /* radio spread spectrum */
+ IFT_IBM370PARCHAN = 0x48, /* IBM System 360/370 OEMI Channel */
+ IFT_ESCON = 0x49, /* IBM Enterprise Systems Connection */
+ IFT_DLSW = 0x4a, /* Data Link Switching */
+ IFT_ISDNS = 0x4b, /* ISDN S/T interface */
+ IFT_ISDNU = 0x4c, /* ISDN U interface */
+ IFT_LAPD = 0x4d, /* Link Access Protocol D */
+ IFT_IPSWITCH = 0x4e, /* IP Switching Objects */
+ IFT_RSRB = 0x4f, /* Remote Source Route Bridging */
+ IFT_ATMLOGICAL = 0x50, /* ATM Logical Port */
+ IFT_DS0 = 0x51, /* Digital Signal Level 0 */
+ IFT_DS0BUNDLE = 0x52, /* group of ds0s on the same ds1 */
+ IFT_BSC = 0x53, /* Bisynchronous Protocol */
+ IFT_ASYNC = 0x54, /* Asynchronous Protocol */
+ IFT_CNR = 0x55, /* Combat Net Radio */
+ IFT_ISO88025DTR = 0x56, /* ISO 802.5r DTR */
+ IFT_EPLRS = 0x57, /* Ext Pos Loc Report Sys */
+ IFT_ARAP = 0x58, /* Appletalk Remote Access Protocol */
+ IFT_PROPCNLS = 0x59, /* Proprietary Connectionless Protocol*/
+ IFT_HOSTPAD = 0x5a, /* CCITT-ITU X.29 PAD Protocol */
+ IFT_TERMPAD = 0x5b, /* CCITT-ITU X.3 PAD Facility */
+ IFT_FRAMERELAYMPI = 0x5c, /* Multiproto Interconnect over FR */
+ IFT_X213 = 0x5d, /* CCITT-ITU X213 */
+ IFT_ADSL = 0x5e, /* Asymmetric Digital Subscriber Loop */
+ IFT_RADSL = 0x5f, /* Rate-Adapt. Digital Subscriber Loop*/
+ IFT_SDSL = 0x60, /* Symmetric Digital Subscriber Loop */
+ IFT_VDSL = 0x61, /* Very H-Speed Digital Subscrib. Loop*/
+ IFT_ISO88025CRFPINT = 0x62, /* ISO 802.5 CRFP */
+ IFT_MYRINET = 0x63, /* Myricom Myrinet */
+ IFT_VOICEEM = 0x64, /* voice recEive and transMit */
+ IFT_VOICEFXO = 0x65, /* voice Foreign Exchange Office */
+ IFT_VOICEFXS = 0x66, /* voice Foreign Exchange Station */
+ IFT_VOICEENCAP = 0x67, /* voice encapsulation */
+ IFT_VOICEOVERIP = 0x68, /* voice over IP encapsulation */
+ IFT_ATMDXI = 0x69, /* ATM DXI */
+ IFT_ATMFUNI = 0x6a, /* ATM FUNI */
+ IFT_ATMIMA = 0x6b, /* ATM IMA */
+ IFT_PPPMULTILINKBUNDLE = 0x6c, /* PPP Multilink Bundle */
+ IFT_IPOVERCDLC = 0x6d, /* IBM ipOverCdlc */
+ IFT_IPOVERCLAW = 0x6e, /* IBM Common Link Access to Workstn */
+ IFT_STACKTOSTACK = 0x6f, /* IBM stackToStack */
+ IFT_VIRTUALIPADDRESS = 0x70, /* IBM VIPA */
+ IFT_MPC = 0x71, /* IBM multi-protocol channel support */
+ IFT_IPOVERATM = 0x72, /* IBM ipOverAtm */
+ IFT_ISO88025FIBER = 0x73, /* ISO 802.5j Fiber Token Ring */
+ IFT_TDLC = 0x74, /* IBM twinaxial data link control */
+ IFT_GIGABITETHERNET = 0x75, /* Gigabit Ethernet */
+ IFT_HDLC = 0x76, /* HDLC */
+ IFT_LAPF = 0x77, /* LAP F */
+ IFT_V37 = 0x78, /* V.37 */
+ IFT_X25MLP = 0x79, /* Multi-Link Protocol */
+ IFT_X25HUNTGROUP = 0x7a, /* X25 Hunt Group */
+ IFT_TRANSPHDLC = 0x7b, /* Transp HDLC */
+ IFT_INTERLEAVE = 0x7c, /* Interleave channel */
+ IFT_FAST = 0x7d, /* Fast channel */
+ IFT_IP = 0x7e, /* IP (for APPN HPR in IP networks) */
+ IFT_DOCSCABLEMACLAYER = 0x7f, /* CATV Mac Layer */
+ IFT_DOCSCABLEDOWNSTREAM = 0x80, /* CATV Downstream interface */
+ IFT_DOCSCABLEUPSTREAM = 0x81, /* CATV Upstream interface */
+ IFT_A12MPPSWITCH = 0x82, /* Avalon Parallel Processor */
+ IFT_TUNNEL = 0x83, /* Encapsulation interface */
+ IFT_COFFEE = 0x84, /* coffee pot */
+ IFT_CES = 0x85, /* Circiut Emulation Service */
+ IFT_ATMSUBINTERFACE = 0x86, /* (x) ATM Sub Interface */
+ IFT_L2VLAN = 0x87, /* Layer 2 Virtual LAN using 802.1Q */
+ IFT_L3IPVLAN = 0x88, /* Layer 3 Virtual LAN - IP Protocol */
+ IFT_L3IPXVLAN = 0x89, /* Layer 3 Virtual LAN - IPX Prot. */
+ IFT_DIGITALPOWERLINE = 0x8a, /* IP over Power Lines */
+ IFT_MEDIAMAILOVERIP = 0x8b, /* (xxx) Multimedia Mail over IP */
+ IFT_DTM = 0x8c, /* Dynamic synchronous Transfer Mode */
+ IFT_DCN = 0x8d, /* Data Communications Network */
+ IFT_IPFORWARD = 0x8e, /* IP Forwarding Interface */
+ IFT_MSDSL = 0x8f, /* Multi-rate Symmetric DSL */
+ IFT_IEEE1394 = 0x90, /* IEEE1394 High Performance SerialBus*/
+ IFT_IFGSN = 0x91, /* HIPPI-6400 */
+ IFT_DVBRCCMACLAYER = 0x92, /* DVB-RCC MAC Layer */
+ IFT_DVBRCCDOWNSTREAM = 0x93, /* DVB-RCC Downstream Channel */
+ IFT_DVBRCCUPSTREAM = 0x94, /* DVB-RCC Upstream Channel */
+ IFT_ATMVIRTUAL = 0x95, /* ATM Virtual Interface */
+ IFT_MPLSTUNNEL = 0x96, /* MPLS Tunnel Virtual Interface */
+ IFT_SRP = 0x97, /* Spatial Reuse Protocol */
+ IFT_VOICEOVERATM = 0x98, /* Voice over ATM */
+ IFT_VOICEOVERFRAMERELAY = 0x99, /* Voice Over Frame Relay */
+ IFT_IDSL = 0x9a, /* Digital Subscriber Loop over ISDN */
+ IFT_COMPOSITELINK = 0x9b, /* Avici Composite Link Interface */
+ IFT_SS7SIGLINK = 0x9c, /* SS7 Signaling Link */
+ IFT_PROPWIRELESSP2P = 0x9d, /* Prop. P2P wireless interface */
+ IFT_FRFORWARD = 0x9e, /* Frame forward Interface */
+ IFT_RFC1483 = 0x9f, /* Multiprotocol over ATM AAL5 */
+ IFT_USB = 0xa0, /* USB Interface */
+ IFT_IEEE8023ADLAG = 0xa1, /* IEEE 802.3ad Link Aggregate*/
+ IFT_BGPPOLICYACCOUNTING = 0xa2, /* BGP Policy Accounting */
+ IFT_FRF16MFRBUNDLE = 0xa3, /* FRF.16 Multilik Frame Relay*/
+ IFT_H323GATEKEEPER = 0xa4, /* H323 Gatekeeper */
+ IFT_H323PROXY = 0xa5, /* H323 Voice and Video Proxy */
+ IFT_MPLS = 0xa6, /* MPLS */
+ IFT_MFSIGLINK = 0xa7, /* Multi-frequency signaling link */
+ IFT_HDSL2 = 0xa8, /* High Bit-Rate DSL, 2nd gen. */
+ IFT_SHDSL = 0xa9, /* Multirate HDSL2 */
+ IFT_DS1FDL = 0xaa, /* Facility Data Link (4Kbps) on a DS1*/
+ IFT_POS = 0xab, /* Packet over SONET/SDH Interface */
+ IFT_DVBASILN = 0xac, /* DVB-ASI Input */
+ IFT_DVBASIOUT = 0xad, /* DVB-ASI Output */
+ IFT_PLC = 0xae, /* Power Line Communications */
+ IFT_NFAS = 0xaf, /* Non-Facility Associated Signaling */
+ IFT_TR008 = 0xb0, /* TROO8 */
+ IFT_GR303RDT = 0xb1, /* Remote Digital Terminal */
+ IFT_GR303IDT = 0xb2, /* Integrated Digital Terminal */
+ IFT_ISUP = 0xb3, /* ISUP */
+ IFT_PROPDOCSWIRELESSMACLAYER = 0xb4, /* prop/Wireless MAC Layer */
+ IFT_PROPDOCSWIRELESSDOWNSTREAM = 0xb5, /* prop/Wireless Downstream */
+ IFT_PROPDOCSWIRELESSUPSTREAM = 0xb6, /* prop/Wireless Upstream */
+ IFT_HIPERLAN2 = 0xb7, /* HIPERLAN Type 2 Radio Interface */
+ IFT_PROPBWAP2MP = 0xb8, /* PropBroadbandWirelessAccess P2MP*/
+ IFT_SONETOVERHEADCHANNEL = 0xb9, /* SONET Overhead Channel */
+ IFT_DIGITALWRAPPEROVERHEADCHANNEL = 0xba, /* Digital Wrapper Overhead */
+ IFT_AAL2 = 0xbb, /* ATM adaptation layer 2 */
+ IFT_RADIOMAC = 0xbc, /* MAC layer over radio links */
+ IFT_ATMRADIO = 0xbd, /* ATM over radio links */
+ IFT_IMT = 0xbe, /* Inter-Machine Trunks */
+ IFT_MVL = 0xbf, /* Multiple Virtual Lines DSL */
+ IFT_REACHDSL = 0xc0, /* Long Reach DSL */
+ IFT_FRDLCIENDPT = 0xc1, /* Frame Relay DLCI End Point */
+ IFT_ATMVCIENDPT = 0xc2, /* ATM VCI End Point */
+ IFT_OPTICALCHANNEL = 0xc3, /* Optical Channel */
+ IFT_OPTICALTRANSPORT = 0xc4, /* Optical Transport */
+ IFT_INFINIBAND = 0xc7, /* Infiniband */
+ IFT_BRIDGE = 0xd1, /* Transparent bridge interface */
+ IFT_STF = 0xd7, /* 6to4 interface */
-#define IFT_STF 0xd7 /* 6to4 interface */
+ /*
+ * Not based on IANA assignments. Conflicting with IANA assignments.
+ * We should make them negative probably.
+ * This requires changes to struct if_data.
+ */
+ IFT_GIF = 0xf0, /* Generic tunnel interface */
+ IFT_PVC = 0xf1, /* Unused */
+ IFT_ENC = 0xf4, /* Encapsulating interface */
+ IFT_PFLOG = 0xf6, /* PF packet filter logging */
+ IFT_PFSYNC = 0xf7, /* PF packet filter synchronization */
+} ifType;
+
+/*
+ * Some (broken) software uses #ifdef IFT_TYPE to check whether
+ * an operating systems supports certain interface type. Lack of
+ * ifdef leads to a piece of functionality compiled out.
+ */
+#ifndef BURN_BRIDGES
+#define IFT_BRIDGE IFT_BRIDGE
+#define IFT_PPP IFT_PPP
+#define IFT_PROPVIRTUAL IFT_PROPVIRTUAL
+#define IFT_L2VLAN IFT_L2VLAN
+#define IFT_L3IPVLAN IFT_L3IPVLAN
+#define IFT_IEEE1394 IFT_IEEE1394
+#define IFT_INFINIBAND IFT_INFINIBAND
+#endif
-/* not based on IANA assignments */
-#define IFT_GIF 0xf0
-#define IFT_PVC 0xf1
-#define IFT_FAITH 0xf2
-#define IFT_ENC 0xf4
-#define IFT_PFLOG 0xf6
-#define IFT_PFSYNC 0xf7
-#define IFT_CARP 0xf8 /* Common Address Redundancy Protocol */
-#define IFT_IPXIP 0xf9 /* IPX over IP tunneling; no longer used. */
#endif /* !_NET_IF_TYPES_H_ */
diff --git a/freebsd/sys/net/if_var.h b/freebsd/sys/net/if_var.h
index ee4db195..ec3719d4 100644
--- a/freebsd/sys/net/if_var.h
+++ b/freebsd/sys/net/if_var.h
@@ -58,58 +58,75 @@
* interfaces. These routines live in the files if.c and route.c
*/
-#ifdef __STDC__
-/*
- * Forward structure declarations for function prototypes [sic].
- */
-struct mbuf;
-struct thread;
-struct rtentry;
-struct rt_addrinfo;
+struct rtentry; /* ifa_rtrequest */
+struct rt_addrinfo; /* ifa_rtrequest */
struct socket;
-struct ether_header;
struct carp_if;
+struct carp_softc;
struct ifvlantrunk;
-struct route;
+struct route; /* if_output */
struct vnet;
-#endif
-
-#include <sys/queue.h> /* get TAILQ macros */
+struct ifmedia;
+struct netmap_adapter;
#ifdef _KERNEL
-#include <sys/mbuf.h>
-#include <sys/eventhandler.h>
+#include <sys/mbuf.h> /* ifqueue only? */
#include <sys/buf_ring.h>
#include <net/vnet.h>
#endif /* _KERNEL */
+#include <sys/counter.h>
#include <rtems/bsd/sys/lock.h> /* XXX */
-#include <sys/mutex.h> /* XXX */
+#include <sys/mutex.h> /* struct ifqueue */
#include <sys/rwlock.h> /* XXX */
#include <sys/sx.h> /* XXX */
-#include <sys/event.h> /* XXX */
-#include <sys/_task.h>
+#include <sys/_task.h> /* if_link_task */
#define IF_DUNIT_NONE -1
-#include <altq/if_altq.h>
+#include <net/altq/if_altq.h>
TAILQ_HEAD(ifnethead, ifnet); /* we use TAILQs so that the order of */
TAILQ_HEAD(ifaddrhead, ifaddr); /* instantiation is preserved in the list */
-TAILQ_HEAD(ifprefixhead, ifprefix);
TAILQ_HEAD(ifmultihead, ifmultiaddr);
TAILQ_HEAD(ifgrouphead, ifg_group);
-/*
- * Structure defining a queue for a network interface.
- */
-struct ifqueue {
- struct mbuf *ifq_head;
- struct mbuf *ifq_tail;
- int ifq_len;
- int ifq_maxlen;
- int ifq_drops;
- struct mtx ifq_mtx;
-};
+#ifdef _KERNEL
+VNET_DECLARE(struct pfil_head, link_pfil_hook); /* packet filter hooks */
+#define V_link_pfil_hook VNET(link_pfil_hook)
+
+#define HHOOK_IPSEC_INET 0
+#define HHOOK_IPSEC_INET6 1
+#define HHOOK_IPSEC_COUNT 2
+VNET_DECLARE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]);
+VNET_DECLARE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]);
+#define V_ipsec_hhh_in VNET(ipsec_hhh_in)
+#define V_ipsec_hhh_out VNET(ipsec_hhh_out)
+#endif /* _KERNEL */
+
+typedef enum {
+ IFCOUNTER_IPACKETS = 0,
+ IFCOUNTER_IERRORS,
+ IFCOUNTER_OPACKETS,
+ IFCOUNTER_OERRORS,
+ IFCOUNTER_COLLISIONS,
+ IFCOUNTER_IBYTES,
+ IFCOUNTER_OBYTES,
+ IFCOUNTER_IMCASTS,
+ IFCOUNTER_OMCASTS,
+ IFCOUNTER_IQDROPS,
+ IFCOUNTER_OQDROPS,
+ IFCOUNTER_NOPROTO,
+ IFCOUNTERS /* Array size. */
+} ift_counter;
+
+typedef struct ifnet * if_t;
+
+typedef void (*if_start_fn_t)(if_t);
+typedef int (*if_ioctl_fn_t)(if_t, u_long, caddr_t);
+typedef void (*if_init_fn_t)(void *);
+typedef void (*if_qflush_fn_t)(if_t);
+typedef int (*if_transmit_fn_t)(if_t, struct mbuf *);
+typedef uint64_t (*if_get_counter_t)(if_t, ift_counter);
struct ifnet_hw_tsomax {
u_int tsomaxbytes; /* TSO total burst length limit in bytes */
@@ -117,22 +134,99 @@ struct ifnet_hw_tsomax {
u_int tsomaxsegsize; /* TSO maximum segment size in bytes */
};
+/* Interface encap request types */
+typedef enum {
+ IFENCAP_LL = 1 /* pre-calculate link-layer header */
+} ife_type;
+
/*
- * Structure defining a network interface.
+ * The structure below allows to request various pre-calculated L2/L3 headers
+ * for different media. Requests varies by type (rtype field).
+ *
+ * IFENCAP_LL type: pre-calculates link header based on address family
+ * and destination lladdr.
*
- * (Would like to call this struct ``if'', but C isn't PL/1.)
+ * Input data fields:
+ * buf: pointer to destination buffer
+ * bufsize: buffer size
+ * flags: IFENCAP_FLAG_BROADCAST if destination is broadcast
+ * family: address family defined by AF_ constant.
+ * lladdr: pointer to link-layer address
+ * lladdr_len: length of link-layer address
+ * hdata: pointer to L3 header (optional, used for ARP requests).
+ * Output data fields:
+ * buf: encap data is stored here
+ * bufsize: resulting encap length is stored here
+ * lladdr_off: offset of link-layer address from encap hdr start
+ * hdata: L3 header may be altered if necessary
*/
+struct if_encap_req {
+ u_char *buf; /* Destination buffer (w) */
+ size_t bufsize; /* size of provided buffer (r) */
+ ife_type rtype; /* request type (r) */
+ uint32_t flags; /* Request flags (r) */
+ int family; /* Address family AF_* (r) */
+ int lladdr_off; /* offset from header start (w) */
+ int lladdr_len; /* lladdr length (r) */
+ char *lladdr; /* link-level address pointer (r) */
+ char *hdata; /* Upper layer header data (rw) */
+};
+
+#define IFENCAP_FLAG_BROADCAST 0x02 /* Destination is broadcast */
+
+
+/*
+ * Structure defining a network interface.
+ *
+ * Size ILP32: 592 (approx)
+ * LP64: 1048 (approx)
+ */
struct ifnet {
+ /* General book keeping of interface lists. */
+ TAILQ_ENTRY(ifnet) if_link; /* all struct ifnets are chained */
+ LIST_ENTRY(ifnet) if_clones; /* interfaces of a cloner */
+ TAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if */
+ /* protected by if_addr_lock */
+ u_char if_alloctype; /* if_type at time of allocation */
+
+ /* Driver and protocol specific information that remains stable. */
void *if_softc; /* pointer to driver state */
+ void *if_llsoftc; /* link layer softc */
void *if_l2com; /* pointer to protocol bits */
- struct vnet *if_vnet; /* pointer to network stack instance */
- TAILQ_ENTRY(ifnet) if_link; /* all struct ifnets are chained */
- char if_xname[IFNAMSIZ]; /* external name (name + unit) */
const char *if_dname; /* driver name */
int if_dunit; /* unit or IF_DUNIT_NONE */
+ u_short if_index; /* numeric abbreviation for this if */
+ short if_index_reserved; /* spare space to grow if_index */
+ char if_xname[IFNAMSIZ]; /* external name (name + unit) */
+ char *if_description; /* interface description */
+
+ /* Variable fields that are touched by the stack and drivers. */
+ int if_flags; /* up/down, broadcast, etc. */
+ int if_drv_flags; /* driver-managed status flags */
+ int if_capabilities; /* interface features & capabilities */
+ int if_capenable; /* enabled features & capabilities */
+ void *if_linkmib; /* link-type-specific MIB data */
+ size_t if_linkmiblen; /* length of above data */
u_int if_refcount; /* reference count */
- struct ifaddrhead if_addrhead; /* linked list of addresses per if */
+
+ /* These fields are shared with struct if_data. */
+ uint8_t if_type; /* ethernet, tokenring, etc */
+ uint8_t if_addrlen; /* media address length */
+ uint8_t if_hdrlen; /* media header length */
+ uint8_t if_link_state; /* current link state */
+ uint32_t if_mtu; /* maximum transmission unit */
+ uint32_t if_metric; /* routing metric (external only) */
+ uint64_t if_baudrate; /* linespeed */
+ uint64_t if_hwassist; /* HW offload capabilities, see IFCAP */
+ time_t if_epoch; /* uptime at attach or stat reset */
+ struct timeval if_lastchange; /* time of last administrative change */
+
+ struct ifaltq if_snd; /* output queue (includes altq) */
+ struct task if_linktask; /* task for link change events */
+
+ /* Addresses of different protocol families assigned to this if. */
+ struct rwlock if_addr_lock; /* lock to protect address lists */
/*
* if_addrhead is the list of all addresses associated to
* an interface.
@@ -143,74 +237,53 @@ struct ifnet {
* However, access to the AF_LINK address through this
* field is deprecated. Use if_addr or ifaddr_byindex() instead.
*/
- int if_pcount; /* number of promiscuous listeners */
- struct carp_if *if_carp; /* carp interface structure */
- struct bpf_if *if_bpf; /* packet filter structure */
- u_short if_index; /* numeric abbreviation for this if */
- short if_index_reserved; /* spare space to grow if_index */
- struct ifvlantrunk *if_vlantrunk; /* pointer to 802.1q data */
- int if_flags; /* up/down, broadcast, etc. */
- int if_capabilities; /* interface features & capabilities */
- int if_capenable; /* enabled features & capabilities */
- void *if_linkmib; /* link-type-specific MIB data */
- size_t if_linkmiblen; /* length of above data */
- struct if_data if_data;
+ struct ifaddrhead if_addrhead; /* linked list of addresses per if */
struct ifmultihead if_multiaddrs; /* multicast addresses configured */
int if_amcount; /* number of all-multicast requests */
-/* procedure handles */
+ struct ifaddr *if_addr; /* pointer to link-level address */
+ const u_int8_t *if_broadcastaddr; /* linklevel broadcast bytestring */
+ struct rwlock if_afdata_lock;
+ void *if_afdata[AF_MAX];
+ int if_afdata_initialized;
+
+ /* Additional features hung off the interface. */
+ u_int if_fib; /* interface FIB */
+ struct vnet *if_vnet; /* pointer to network stack instance */
+ struct vnet *if_home_vnet; /* where this ifnet originates from */
+ struct ifvlantrunk *if_vlantrunk; /* pointer to 802.1q data */
+ struct bpf_if *if_bpf; /* packet filter structure */
+ int if_pcount; /* number of promiscuous listeners */
+ void *if_bridge; /* bridge glue */
+ void *if_lagg; /* lagg glue */
+ void *if_pf_kif; /* pf glue */
+ struct carp_if *if_carp; /* carp interface structure */
+ struct label *if_label; /* interface MAC label */
+ struct netmap_adapter *if_netmap; /* netmap(4) softc */
+
+ /* Various procedures of the layer2 encapsulation and drivers. */
int (*if_output) /* output routine (enqueue) */
- (struct ifnet *, struct mbuf *, struct sockaddr *,
+ (struct ifnet *, struct mbuf *, const struct sockaddr *,
struct route *);
void (*if_input) /* input routine (from h/w driver) */
(struct ifnet *, struct mbuf *);
- void (*if_start) /* initiate output routine */
- (struct ifnet *);
- int (*if_ioctl) /* ioctl routine */
- (struct ifnet *, u_long, caddr_t);
- void (*if_init) /* Init routine */
- (void *);
+ if_start_fn_t if_start; /* initiate output routine */
+ if_ioctl_fn_t if_ioctl; /* ioctl routine */
+ if_init_fn_t if_init; /* Init routine */
int (*if_resolvemulti) /* validate/resolve multicast */
(struct ifnet *, struct sockaddr **, struct sockaddr *);
- void (*if_qflush) /* flush any queues */
- (struct ifnet *);
- int (*if_transmit) /* initiate output routine */
- (struct ifnet *, struct mbuf *);
+ if_qflush_fn_t if_qflush; /* flush any queue */
+ if_transmit_fn_t if_transmit; /* initiate output routine */
+
void (*if_reassign) /* reassign to vnet routine */
(struct ifnet *, struct vnet *, char *);
- struct vnet *if_home_vnet; /* where this ifnet originates from */
- struct ifaddr *if_addr; /* pointer to link-level address */
- void *if_llsoftc; /* link layer softc */
- int if_drv_flags; /* driver-managed status flags */
- struct ifaltq if_snd; /* output queue (includes altq) */
- const u_int8_t *if_broadcastaddr; /* linklevel broadcast bytestring */
+ if_get_counter_t if_get_counter; /* get counter values */
+ int (*if_requestencap) /* make link header from request */
+ (struct ifnet *, struct if_encap_req *);
- void *if_bridge; /* bridge glue */
+ /* Statistics. */
+ counter_u64_t if_counters[IFCOUNTERS];
- struct label *if_label; /* interface MAC label */
-
- /* these are only used by IPv6 */
- struct ifprefixhead if_prefixhead; /* list of prefixes per if */
- void *if_afdata[AF_MAX];
- int if_afdata_initialized;
- struct rwlock if_afdata_lock;
- struct task if_linktask; /* task for link change events */
- struct mtx if_addr_mtx; /* mutex to protect address lists */
-
- LIST_ENTRY(ifnet) if_clones; /* interfaces of a cloner */
- TAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if */
- /* protected by if_addr_mtx */
- void *if_pf_kif;
- void *if_lagg; /* lagg glue */
- char *if_description; /* interface description */
- u_int if_fib; /* interface FIB */
- u_char if_alloctype; /* if_type at time of allocation */
-
- /*
- * Spare fields are added so that we can modify sensitive data
- * structures without changing the kernel binary interface, and must
- * be used with care where binary compatibility is required.
- */
- char if_cspare[3];
+ /* Stuff that's only temporary and doesn't belong here. */
/*
* Network adapter TSO limits:
@@ -222,50 +295,25 @@ struct ifnet {
* count limit does not apply. If all three fields are zero,
* there is no TSO limit.
*
- * NOTE: The TSO limits only apply to the data payload part of
- * a TCP/IP packet. That means there is no need to subtract
- * space for ethernet-, vlan-, IP- or TCP- headers from the
- * TSO limits unless the hardware driver in question requires
- * so.
- */
- u_int if_hw_tsomax;
- int if_ispare[1];
- /*
- * TSO fields for segment limits. If a field is zero below,
- * there is no limit:
+ * NOTE: The TSO limits should reflect the values used in the
+ * BUSDMA tag a network adapter is using to load a mbuf chain
+ * for transmission. The TCP/IP network stack will subtract
+ * space for all linklevel and protocol level headers and
+ * ensure that the full mbuf chain passed to the network
+ * adapter fits within the given limits.
*/
+ u_int if_hw_tsomax; /* TSO maximum size in bytes */
u_int if_hw_tsomaxsegcount; /* TSO maximum segment count */
u_int if_hw_tsomaxsegsize; /* TSO maximum segment size in bytes */
- void *if_pspare[8]; /* 1 netmap, 7 TDB */
-};
-
-typedef void if_init_f_t(void *);
-/*
- * XXX These aliases are terribly dangerous because they could apply
- * to anything.
- */
-#define if_mtu if_data.ifi_mtu
-#define if_type if_data.ifi_type
-#define if_physical if_data.ifi_physical
-#define if_addrlen if_data.ifi_addrlen
-#define if_hdrlen if_data.ifi_hdrlen
-#define if_metric if_data.ifi_metric
-#define if_link_state if_data.ifi_link_state
-#define if_baudrate if_data.ifi_baudrate
-#define if_hwassist if_data.ifi_hwassist
-#define if_ipackets if_data.ifi_ipackets
-#define if_ierrors if_data.ifi_ierrors
-#define if_opackets if_data.ifi_opackets
-#define if_oerrors if_data.ifi_oerrors
-#define if_collisions if_data.ifi_collisions
-#define if_ibytes if_data.ifi_ibytes
-#define if_obytes if_data.ifi_obytes
-#define if_imcasts if_data.ifi_imcasts
-#define if_omcasts if_data.ifi_omcasts
-#define if_iqdrops if_data.ifi_iqdrops
-#define if_noproto if_data.ifi_noproto
-#define if_lastchange if_data.ifi_lastchange
+ /*
+ * Spare fields to be added before branching a stable branch, so
+ * that structure can be enhanced without changing the kernel
+ * binary interface.
+ */
+ void *if_pspare[4]; /* packet pacing / general use */
+ int if_ispare[4]; /* packet pacing / general use */
+};
/* for compatibility with other BSDs */
#define if_addrlist if_addrhead
@@ -275,18 +323,14 @@ typedef void if_init_f_t(void *);
/*
* Locks for address lists on the network interface.
*/
-#define IF_ADDR_LOCK_INIT(if) mtx_init(&(if)->if_addr_mtx, \
- "if_addr_mtx", NULL, MTX_DEF)
-#define IF_ADDR_LOCK_DESTROY(if) mtx_destroy(&(if)->if_addr_mtx)
-#define IF_ADDR_WLOCK(if) mtx_lock(&(if)->if_addr_mtx)
-#define IF_ADDR_WUNLOCK(if) mtx_unlock(&(if)->if_addr_mtx)
-#define IF_ADDR_RLOCK(if) mtx_lock(&(if)->if_addr_mtx)
-#define IF_ADDR_RUNLOCK(if) mtx_unlock(&(if)->if_addr_mtx)
-#define IF_ADDR_LOCK_ASSERT(if) mtx_assert(&(if)->if_addr_mtx, MA_OWNED)
-#define IF_ADDR_WLOCK_ASSERT(if) mtx_assert(&(if)->if_addr_mtx, MA_OWNED)
-/* XXX: Compat. */
-#define IF_ADDR_LOCK(if) IF_ADDR_WLOCK(if)
-#define IF_ADDR_UNLOCK(if) IF_ADDR_WUNLOCK(if)
+#define IF_ADDR_LOCK_INIT(if) rw_init(&(if)->if_addr_lock, "if_addr_lock")
+#define IF_ADDR_LOCK_DESTROY(if) rw_destroy(&(if)->if_addr_lock)
+#define IF_ADDR_WLOCK(if) rw_wlock(&(if)->if_addr_lock)
+#define IF_ADDR_WUNLOCK(if) rw_wunlock(&(if)->if_addr_lock)
+#define IF_ADDR_RLOCK(if) rw_rlock(&(if)->if_addr_lock)
+#define IF_ADDR_RUNLOCK(if) rw_runlock(&(if)->if_addr_lock)
+#define IF_ADDR_LOCK_ASSERT(if) rw_assert(&(if)->if_addr_lock, RA_LOCKED)
+#define IF_ADDR_WLOCK_ASSERT(if) rw_assert(&(if)->if_addr_lock, RA_WLOCKED)
/*
* Function variations on locking macros intended to be used by loadable
@@ -295,100 +339,11 @@ typedef void if_init_f_t(void *);
*/
void if_addr_rlock(struct ifnet *ifp); /* if_addrhead */
void if_addr_runlock(struct ifnet *ifp); /* if_addrhead */
-void if_maddr_rlock(struct ifnet *ifp); /* if_multiaddrs */
-void if_maddr_runlock(struct ifnet *ifp); /* if_multiaddrs */
-
-/*
- * Output queues (ifp->if_snd) and slow device input queues (*ifp->if_slowq)
- * are queues of messages stored on ifqueue structures
- * (defined above). Entries are added to and deleted from these structures
- * by these macros, which should be called with ipl raised to splimp().
- */
-#define IF_LOCK(ifq) mtx_lock(&(ifq)->ifq_mtx)
-#define IF_UNLOCK(ifq) mtx_unlock(&(ifq)->ifq_mtx)
-#define IF_LOCK_ASSERT(ifq) mtx_assert(&(ifq)->ifq_mtx, MA_OWNED)
-#define _IF_QFULL(ifq) ((ifq)->ifq_len >= (ifq)->ifq_maxlen)
-#define _IF_DROP(ifq) ((ifq)->ifq_drops++)
-#define _IF_QLEN(ifq) ((ifq)->ifq_len)
-
-#define _IF_ENQUEUE(ifq, m) do { \
- (m)->m_nextpkt = NULL; \
- if ((ifq)->ifq_tail == NULL) \
- (ifq)->ifq_head = m; \
- else \
- (ifq)->ifq_tail->m_nextpkt = m; \
- (ifq)->ifq_tail = m; \
- (ifq)->ifq_len++; \
-} while (0)
-
-#define IF_ENQUEUE(ifq, m) do { \
- IF_LOCK(ifq); \
- _IF_ENQUEUE(ifq, m); \
- IF_UNLOCK(ifq); \
-} while (0)
-
-#define _IF_PREPEND(ifq, m) do { \
- (m)->m_nextpkt = (ifq)->ifq_head; \
- if ((ifq)->ifq_tail == NULL) \
- (ifq)->ifq_tail = (m); \
- (ifq)->ifq_head = (m); \
- (ifq)->ifq_len++; \
-} while (0)
-
-#define IF_PREPEND(ifq, m) do { \
- IF_LOCK(ifq); \
- _IF_PREPEND(ifq, m); \
- IF_UNLOCK(ifq); \
-} while (0)
-
-#define _IF_DEQUEUE(ifq, m) do { \
- (m) = (ifq)->ifq_head; \
- if (m) { \
- if (((ifq)->ifq_head = (m)->m_nextpkt) == NULL) \
- (ifq)->ifq_tail = NULL; \
- (m)->m_nextpkt = NULL; \
- (ifq)->ifq_len--; \
- } \
-} while (0)
-
-#define IF_DEQUEUE(ifq, m) do { \
- IF_LOCK(ifq); \
- _IF_DEQUEUE(ifq, m); \
- IF_UNLOCK(ifq); \
-} while (0)
-
-#define _IF_DEQUEUE_ALL(ifq, m) do { \
- (m) = (ifq)->ifq_head; \
- (ifq)->ifq_head = (ifq)->ifq_tail = NULL; \
- (ifq)->ifq_len = 0; \
-} while (0)
-
-#define IF_DEQUEUE_ALL(ifq, m) do { \
- IF_LOCK(ifq); \
- _IF_DEQUEUE_ALL(ifq, m); \
- IF_UNLOCK(ifq); \
-} while (0)
-
-#define _IF_POLL(ifq, m) ((m) = (ifq)->ifq_head)
-#define IF_POLL(ifq, m) _IF_POLL(ifq, m)
-
-#define _IF_DRAIN(ifq) do { \
- struct mbuf *m; \
- for (;;) { \
- _IF_DEQUEUE(ifq, m); \
- if (m == NULL) \
- break; \
- m_freem(m); \
- } \
-} while (0)
-
-#define IF_DRAIN(ifq) do { \
- IF_LOCK(ifq); \
- _IF_DRAIN(ifq); \
- IF_UNLOCK(ifq); \
-} while(0)
+void if_maddr_rlock(if_t ifp); /* if_multiaddrs */
+void if_maddr_runlock(if_t ifp); /* if_multiaddrs */
#ifdef _KERNEL
+#ifdef _SYS_EVENTHANDLER_H_
/* interface link layer address change event */
typedef void (*iflladdr_event_handler_t)(void *, struct ifnet *);
EVENTHANDLER_DECLARE(iflladdr_event, iflladdr_event_handler_t);
@@ -404,6 +359,7 @@ EVENTHANDLER_DECLARE(ifnet_departure_event, ifnet_departure_event_handler_t);
/* Interface link state change event */
typedef void (*ifnet_link_event_handler_t)(void *, struct ifnet *, int);
EVENTHANDLER_DECLARE(ifnet_link_event, ifnet_link_event_handler_t);
+#endif /* _SYS_EVENTHANDLER_H_ */
/*
* interface groups
@@ -426,6 +382,7 @@ struct ifg_list {
TAILQ_ENTRY(ifg_list) ifgl_next;
};
+#ifdef _SYS_EVENTHANDLER_H_
/* group attach event */
typedef void (*group_attach_event_handler_t)(void *, struct ifg_group *);
EVENTHANDLER_DECLARE(group_attach_event, group_attach_event_handler_t);
@@ -435,6 +392,7 @@ EVENTHANDLER_DECLARE(group_detach_event, group_detach_event_handler_t);
/* group change event */
typedef void (*group_change_event_handler_t)(void *, const char *);
EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t);
+#endif /* _SYS_EVENTHANDLER_H_ */
#define IF_AFDATA_LOCK_INIT(ifp) \
rw_init(&(ifp)->if_afdata_lock, "if_afdata")
@@ -453,331 +411,6 @@ EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t);
#define IF_AFDATA_WLOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_WLOCKED)
#define IF_AFDATA_UNLOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_UNLOCKED)
-int if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp,
- int adjust);
-#define IF_HANDOFF(ifq, m, ifp) \
- if_handoff((struct ifqueue *)ifq, m, ifp, 0)
-#define IF_HANDOFF_ADJ(ifq, m, ifp, adj) \
- if_handoff((struct ifqueue *)ifq, m, ifp, adj)
-
-void if_start(struct ifnet *);
-
-#define IFQ_ENQUEUE(ifq, m, err) \
-do { \
- IF_LOCK(ifq); \
- if (ALTQ_IS_ENABLED(ifq)) \
- ALTQ_ENQUEUE(ifq, m, NULL, err); \
- else { \
- if (_IF_QFULL(ifq)) { \
- m_freem(m); \
- (err) = ENOBUFS; \
- } else { \
- _IF_ENQUEUE(ifq, m); \
- (err) = 0; \
- } \
- } \
- if (err) \
- (ifq)->ifq_drops++; \
- IF_UNLOCK(ifq); \
-} while (0)
-
-#define IFQ_DEQUEUE_NOLOCK(ifq, m) \
-do { \
- if (TBR_IS_ENABLED(ifq)) \
- (m) = tbr_dequeue_ptr(ifq, ALTDQ_REMOVE); \
- else if (ALTQ_IS_ENABLED(ifq)) \
- ALTQ_DEQUEUE(ifq, m); \
- else \
- _IF_DEQUEUE(ifq, m); \
-} while (0)
-
-#define IFQ_DEQUEUE(ifq, m) \
-do { \
- IF_LOCK(ifq); \
- IFQ_DEQUEUE_NOLOCK(ifq, m); \
- IF_UNLOCK(ifq); \
-} while (0)
-
-#define IFQ_POLL_NOLOCK(ifq, m) \
-do { \
- if (TBR_IS_ENABLED(ifq)) \
- (m) = tbr_dequeue_ptr(ifq, ALTDQ_POLL); \
- else if (ALTQ_IS_ENABLED(ifq)) \
- ALTQ_POLL(ifq, m); \
- else \
- _IF_POLL(ifq, m); \
-} while (0)
-
-#define IFQ_POLL(ifq, m) \
-do { \
- IF_LOCK(ifq); \
- IFQ_POLL_NOLOCK(ifq, m); \
- IF_UNLOCK(ifq); \
-} while (0)
-
-#define IFQ_PURGE_NOLOCK(ifq) \
-do { \
- if (ALTQ_IS_ENABLED(ifq)) { \
- ALTQ_PURGE(ifq); \
- } else \
- _IF_DRAIN(ifq); \
-} while (0)
-
-#define IFQ_PURGE(ifq) \
-do { \
- IF_LOCK(ifq); \
- IFQ_PURGE_NOLOCK(ifq); \
- IF_UNLOCK(ifq); \
-} while (0)
-
-#define IFQ_SET_READY(ifq) \
- do { ((ifq)->altq_flags |= ALTQF_READY); } while (0)
-
-#define IFQ_LOCK(ifq) IF_LOCK(ifq)
-#define IFQ_UNLOCK(ifq) IF_UNLOCK(ifq)
-#define IFQ_LOCK_ASSERT(ifq) IF_LOCK_ASSERT(ifq)
-#define IFQ_IS_EMPTY(ifq) ((ifq)->ifq_len == 0)
-#define IFQ_INC_LEN(ifq) ((ifq)->ifq_len++)
-#define IFQ_DEC_LEN(ifq) (--(ifq)->ifq_len)
-#define IFQ_INC_DROPS(ifq) ((ifq)->ifq_drops++)
-#define IFQ_SET_MAXLEN(ifq, len) ((ifq)->ifq_maxlen = (len))
-
-/*
- * The IFF_DRV_OACTIVE test should really occur in the device driver, not in
- * the handoff logic, as that flag is locked by the device driver.
- */
-#define IFQ_HANDOFF_ADJ(ifp, m, adj, err) \
-do { \
- int len; \
- short mflags; \
- \
- len = (m)->m_pkthdr.len; \
- mflags = (m)->m_flags; \
- IFQ_ENQUEUE(&(ifp)->if_snd, m, err); \
- if ((err) == 0) { \
- (ifp)->if_obytes += len + (adj); \
- if (mflags & M_MCAST) \
- (ifp)->if_omcasts++; \
- if (((ifp)->if_drv_flags & IFF_DRV_OACTIVE) == 0) \
- if_start(ifp); \
- } \
-} while (0)
-
-#define IFQ_HANDOFF(ifp, m, err) \
- IFQ_HANDOFF_ADJ(ifp, m, 0, err)
-
-#define IFQ_DRV_DEQUEUE(ifq, m) \
-do { \
- (m) = (ifq)->ifq_drv_head; \
- if (m) { \
- if (((ifq)->ifq_drv_head = (m)->m_nextpkt) == NULL) \
- (ifq)->ifq_drv_tail = NULL; \
- (m)->m_nextpkt = NULL; \
- (ifq)->ifq_drv_len--; \
- } else { \
- IFQ_LOCK(ifq); \
- IFQ_DEQUEUE_NOLOCK(ifq, m); \
- while ((ifq)->ifq_drv_len < (ifq)->ifq_drv_maxlen) { \
- struct mbuf *m0; \
- IFQ_DEQUEUE_NOLOCK(ifq, m0); \
- if (m0 == NULL) \
- break; \
- m0->m_nextpkt = NULL; \
- if ((ifq)->ifq_drv_tail == NULL) \
- (ifq)->ifq_drv_head = m0; \
- else \
- (ifq)->ifq_drv_tail->m_nextpkt = m0; \
- (ifq)->ifq_drv_tail = m0; \
- (ifq)->ifq_drv_len++; \
- } \
- IFQ_UNLOCK(ifq); \
- } \
-} while (0)
-
-#define IFQ_DRV_PREPEND(ifq, m) \
-do { \
- (m)->m_nextpkt = (ifq)->ifq_drv_head; \
- if ((ifq)->ifq_drv_tail == NULL) \
- (ifq)->ifq_drv_tail = (m); \
- (ifq)->ifq_drv_head = (m); \
- (ifq)->ifq_drv_len++; \
-} while (0)
-
-#define IFQ_DRV_IS_EMPTY(ifq) \
- (((ifq)->ifq_drv_len == 0) && ((ifq)->ifq_len == 0))
-
-#define IFQ_DRV_PURGE(ifq) \
-do { \
- struct mbuf *m, *n = (ifq)->ifq_drv_head; \
- while((m = n) != NULL) { \
- n = m->m_nextpkt; \
- m_freem(m); \
- } \
- (ifq)->ifq_drv_head = (ifq)->ifq_drv_tail = NULL; \
- (ifq)->ifq_drv_len = 0; \
- IFQ_PURGE(ifq); \
-} while (0)
-
-#ifdef _KERNEL
-static __inline int
-drbr_enqueue(struct ifnet *ifp, struct buf_ring *br, struct mbuf *m)
-{
- int error = 0;
-
-#ifdef ALTQ
- if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
- IFQ_ENQUEUE(&ifp->if_snd, m, error);
- return (error);
- }
-#endif
- error = buf_ring_enqueue(br, m);
- if (error)
- m_freem(m);
-
- return (error);
-}
-
-static __inline void
-drbr_putback(struct ifnet *ifp, struct buf_ring *br, struct mbuf *new_mbuf)
-{
- /*
- * The top of the list needs to be swapped
- * for this one.
- */
-#ifdef ALTQ
- if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
- /*
- * Peek in altq case dequeued it
- * so put it back.
- */
- IFQ_DRV_PREPEND(&ifp->if_snd, new_mbuf);
- return;
- }
-#endif
- buf_ring_putback_sc(br, new_mbuf);
-}
-
-static __inline struct mbuf *
-drbr_peek(struct ifnet *ifp, struct buf_ring *br)
-{
-#ifdef ALTQ
- struct mbuf *m;
- if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
- /*
- * Pull it off like a dequeue
- * since drbr_advance() does nothing
- * for altq and drbr_putback() will
- * use the old prepend function.
- */
- IFQ_DEQUEUE(&ifp->if_snd, m);
- return (m);
- }
-#endif
- return ((struct mbuf *)buf_ring_peek(br));
-}
-
-static __inline void
-drbr_flush(struct ifnet *ifp, struct buf_ring *br)
-{
- struct mbuf *m;
-
-#ifdef ALTQ
- if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd))
- IFQ_PURGE(&ifp->if_snd);
-#endif
- while ((m = (struct mbuf *)buf_ring_dequeue_sc(br)) != NULL)
- m_freem(m);
-}
-
-static __inline void
-drbr_free(struct buf_ring *br, struct malloc_type *type)
-{
-
- drbr_flush(NULL, br);
- buf_ring_free(br, type);
-}
-
-static __inline struct mbuf *
-drbr_dequeue(struct ifnet *ifp, struct buf_ring *br)
-{
-#ifdef ALTQ
- struct mbuf *m;
-
- if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
- IFQ_DEQUEUE(&ifp->if_snd, m);
- return (m);
- }
-#endif
- return ((struct mbuf *)buf_ring_dequeue_sc(br));
-}
-
-static __inline void
-drbr_advance(struct ifnet *ifp, struct buf_ring *br)
-{
-#ifdef ALTQ
- /* Nothing to do here since peek dequeues in altq case */
- if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd))
- return;
-#endif
- return (buf_ring_advance_sc(br));
-}
-
-
-static __inline struct mbuf *
-drbr_dequeue_cond(struct ifnet *ifp, struct buf_ring *br,
- int (*func) (struct mbuf *, void *), void *arg)
-{
- struct mbuf *m;
-#ifdef ALTQ
- if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
- IFQ_LOCK(&ifp->if_snd);
- IFQ_POLL_NOLOCK(&ifp->if_snd, m);
- if (m != NULL && func(m, arg) == 0) {
- IFQ_UNLOCK(&ifp->if_snd);
- return (NULL);
- }
- IFQ_DEQUEUE_NOLOCK(&ifp->if_snd, m);
- IFQ_UNLOCK(&ifp->if_snd);
- return (m);
- }
-#endif
- m = (struct mbuf *)buf_ring_peek(br);
- if (m == NULL || func(m, arg) == 0)
- return (NULL);
-
- return ((struct mbuf *)buf_ring_dequeue_sc(br));
-}
-
-static __inline int
-drbr_empty(struct ifnet *ifp, struct buf_ring *br)
-{
-#ifdef ALTQ
- if (ALTQ_IS_ENABLED(&ifp->if_snd))
- return (IFQ_IS_EMPTY(&ifp->if_snd));
-#endif
- return (buf_ring_empty(br));
-}
-
-static __inline int
-drbr_needs_enqueue(struct ifnet *ifp, struct buf_ring *br)
-{
-#ifdef ALTQ
- if (ALTQ_IS_ENABLED(&ifp->if_snd))
- return (1);
-#endif
- return (!buf_ring_empty(br));
-}
-
-static __inline int
-drbr_inuse(struct ifnet *ifp, struct buf_ring *br)
-{
-#ifdef ALTQ
- if (ALTQ_IS_ENABLED(&ifp->if_snd))
- return (ifp->if_snd.ifq_len);
-#endif
- return (buf_ring_count(br));
-}
-#endif
/*
* 72 was chosen below because it is the size of a TCP/IP
* header (40) + the minimum mss (32).
@@ -787,8 +420,6 @@ drbr_inuse(struct ifnet *ifp, struct buf_ring *br)
#define TOEDEV(ifp) ((ifp)->if_llsoftc)
-#endif /* _KERNEL */
-
/*
* The ifaddr structure contains information about one address
* of an interface. They are maintained by the different address families,
@@ -804,46 +435,28 @@ struct ifaddr {
struct sockaddr *ifa_dstaddr; /* other end of p-to-p link */
#define ifa_broadaddr ifa_dstaddr /* broadcast address interface */
struct sockaddr *ifa_netmask; /* used to determine subnet */
- struct if_data if_data; /* not all members are meaningful */
struct ifnet *ifa_ifp; /* back-pointer to interface */
+ struct carp_softc *ifa_carp; /* pointer to CARP data */
TAILQ_ENTRY(ifaddr) ifa_link; /* queue macro glue */
void (*ifa_rtrequest) /* check or clean routes (+ or -)'d */
(int, struct rtentry *, struct rt_addrinfo *);
u_short ifa_flags; /* mostly rt_flags for cloning */
+#define IFA_ROUTE RTF_UP /* route installed */
+#define IFA_RTSELF RTF_HOST /* loopback route to self installed */
u_int ifa_refcnt; /* references to this structure */
- int ifa_metric; /* cost of going out this interface */
- int (*ifa_claim_addr) /* check if an addr goes to this if */
- (struct ifaddr *, struct sockaddr *);
- struct mtx ifa_mtx;
+
+ counter_u64_t ifa_ipackets;
+ counter_u64_t ifa_opackets;
+ counter_u64_t ifa_ibytes;
+ counter_u64_t ifa_obytes;
};
-#define IFA_ROUTE RTF_UP /* route installed */
-#define IFA_RTSELF RTF_HOST /* loopback route to self installed */
-/* for compatibility with other BSDs */
+/* For compatibility with other BSDs. SCTP uses it. */
#define ifa_list ifa_link
-#ifdef _KERNEL
-#define IFA_LOCK(ifa) mtx_lock(&(ifa)->ifa_mtx)
-#define IFA_UNLOCK(ifa) mtx_unlock(&(ifa)->ifa_mtx)
-
+struct ifaddr * ifa_alloc(size_t size, int flags);
void ifa_free(struct ifaddr *ifa);
-void ifa_init(struct ifaddr *ifa);
void ifa_ref(struct ifaddr *ifa);
-#endif
-
-/*
- * The prefix structure contains information about one prefix
- * of an interface. They are maintained by the different address families,
- * are allocated and attached when a prefix or an address is set,
- * and are linked together so all prefixes for an interface can be located.
- */
-struct ifprefix {
- struct sockaddr *ifpr_prefix; /* prefix of interface */
- struct ifnet *ifpr_ifp; /* back-pointer to interface */
- TAILQ_ENTRY(ifprefix) ifpr_list; /* queue macro glue */
- u_char ifpr_plen; /* prefix length in bits */
- u_char ifpr_type; /* protocol dependent prefix type */
-};
/*
* Multicast address structure. This is analogous to the ifaddr
@@ -859,16 +472,9 @@ struct ifmultiaddr {
struct ifmultiaddr *ifma_llifma; /* pointer to ifma for ifma_lladdr */
};
-#ifdef _KERNEL
-
extern struct rwlock ifnet_rwlock;
extern struct sx ifnet_sxlock;
-#define IFNET_LOCK_INIT() do { \
- rw_init_flags(&ifnet_rwlock, "ifnet_rw", RW_RECURSE); \
- sx_init_flags(&ifnet_sxlock, "ifnet_sx", SX_RECURSE); \
-} while(0)
-
#define IFNET_WLOCK() do { \
sx_xlock(&ifnet_sxlock); \
rw_wlock(&ifnet_rwlock); \
@@ -915,15 +521,11 @@ VNET_DECLARE(struct ifnethead, ifnet);
VNET_DECLARE(struct ifgrouphead, ifg_head);
VNET_DECLARE(int, if_index);
VNET_DECLARE(struct ifnet *, loif); /* first loopback interface */
-VNET_DECLARE(int, useloopback);
#define V_ifnet VNET(ifnet)
#define V_ifg_head VNET(ifg_head)
#define V_if_index VNET(if_index)
#define V_loif VNET(loif)
-#define V_useloopback VNET(useloopback)
-
-extern int ifqmaxlen;
int if_addgroup(struct ifnet *, const char *);
int if_delgroup(struct ifnet *, const char *);
@@ -935,18 +537,15 @@ void if_dead(struct ifnet *);
int if_delmulti(struct ifnet *, struct sockaddr *);
void if_delmulti_ifma(struct ifmultiaddr *);
void if_detach(struct ifnet *);
-void if_vmove(struct ifnet *, struct vnet *);
void if_purgeaddrs(struct ifnet *);
void if_delallmulti(struct ifnet *);
void if_down(struct ifnet *);
struct ifmultiaddr *
- if_findmulti(struct ifnet *, struct sockaddr *);
+ if_findmulti(struct ifnet *, const struct sockaddr *);
void if_free(struct ifnet *);
-void if_free_type(struct ifnet *, u_char);
void if_initname(struct ifnet *, const char *, int);
void if_link_state_change(struct ifnet *, int);
int if_printf(struct ifnet *, const char *, ...) __printflike(2, 3);
-void if_qflush(struct ifnet *);
void if_ref(struct ifnet *);
void if_rele(struct ifnet *);
int if_setlladdr(struct ifnet *, const u_char *, int);
@@ -956,23 +555,19 @@ int ifpromisc(struct ifnet *, int);
struct ifnet *ifunit(const char *);
struct ifnet *ifunit_ref(const char *);
-void ifq_init(struct ifaltq *, struct ifnet *ifp);
-void ifq_delete(struct ifaltq *);
-
int ifa_add_loopback_route(struct ifaddr *, struct sockaddr *);
int ifa_del_loopback_route(struct ifaddr *, struct sockaddr *);
-
-struct ifaddr *ifa_ifwithaddr(struct sockaddr *);
-int ifa_ifwithaddr_check(struct sockaddr *);
-struct ifaddr *ifa_ifwithbroadaddr(struct sockaddr *);
-struct ifaddr *ifa_ifwithdstaddr(struct sockaddr *);
-struct ifaddr *ifa_ifwithdstaddr_fib(struct sockaddr *, int);
-struct ifaddr *ifa_ifwithnet(struct sockaddr *, int);
-struct ifaddr *ifa_ifwithnet_fib(struct sockaddr *, int, int);
-struct ifaddr *ifa_ifwithroute(int, struct sockaddr *, struct sockaddr *);
-struct ifaddr *ifa_ifwithroute_fib(int, struct sockaddr *, struct sockaddr *, u_int);
-
-struct ifaddr *ifaof_ifpforaddr(struct sockaddr *, struct ifnet *);
+int ifa_switch_loopback_route(struct ifaddr *, struct sockaddr *);
+
+struct ifaddr *ifa_ifwithaddr(const struct sockaddr *);
+int ifa_ifwithaddr_check(const struct sockaddr *);
+struct ifaddr *ifa_ifwithbroadaddr(const struct sockaddr *, int);
+struct ifaddr *ifa_ifwithdstaddr(const struct sockaddr *, int);
+struct ifaddr *ifa_ifwithnet(const struct sockaddr *, int, int);
+struct ifaddr *ifa_ifwithroute(int, const struct sockaddr *, struct sockaddr *,
+ u_int);
+struct ifaddr *ifaof_ifpforaddr(const struct sockaddr *, struct ifnet *);
+int ifa_preferred(struct ifaddr *, struct ifaddr *);
int if_simloop(struct ifnet *ifp, struct mbuf *m, int af, int hlen);
@@ -980,22 +575,92 @@ typedef void *if_com_alloc_t(u_char type, struct ifnet *ifp);
typedef void if_com_free_t(void *com, u_char type);
void if_register_com_alloc(u_char type, if_com_alloc_t *a, if_com_free_t *f);
void if_deregister_com_alloc(u_char type);
+void if_data_copy(struct ifnet *, struct if_data *);
+uint64_t if_get_counter_default(struct ifnet *, ift_counter);
+void if_inc_counter(struct ifnet *, ift_counter, int64_t);
#define IF_LLADDR(ifp) \
LLADDR((struct sockaddr_dl *)((ifp)->if_addr->ifa_addr))
+uint64_t if_setbaudrate(if_t ifp, uint64_t baudrate);
+uint64_t if_getbaudrate(if_t ifp);
+int if_setcapabilities(if_t ifp, int capabilities);
+int if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit);
+int if_getcapabilities(if_t ifp);
+int if_togglecapenable(if_t ifp, int togglecap);
+int if_setcapenable(if_t ifp, int capenable);
+int if_setcapenablebit(if_t ifp, int setcap, int clearcap);
+int if_getcapenable(if_t ifp);
+const char *if_getdname(if_t ifp);
+int if_setdev(if_t ifp, void *dev);
+int if_setdrvflagbits(if_t ifp, int if_setflags, int clear_flags);
+int if_getdrvflags(if_t ifp);
+int if_setdrvflags(if_t ifp, int flags);
+int if_clearhwassist(if_t ifp);
+int if_sethwassistbits(if_t ifp, int toset, int toclear);
+int if_sethwassist(if_t ifp, int hwassist_bit);
+int if_gethwassist(if_t ifp);
+int if_setsoftc(if_t ifp, void *softc);
+void *if_getsoftc(if_t ifp);
+int if_setflags(if_t ifp, int flags);
+int if_setmtu(if_t ifp, int mtu);
+int if_getmtu(if_t ifp);
+int if_getmtu_family(if_t ifp, int family);
+int if_setflagbits(if_t ifp, int set, int clear);
+int if_getflags(if_t ifp);
+int if_sendq_empty(if_t ifp);
+int if_setsendqready(if_t ifp);
+int if_setsendqlen(if_t ifp, int tx_desc_count);
+int if_input(if_t ifp, struct mbuf* sendmp);
+int if_sendq_prepend(if_t ifp, struct mbuf *m);
+struct mbuf *if_dequeue(if_t ifp);
+int if_setifheaderlen(if_t ifp, int len);
+void if_setrcvif(struct mbuf *m, if_t ifp);
+void if_setvtag(struct mbuf *m, u_int16_t tag);
+u_int16_t if_getvtag(struct mbuf *m);
+int if_vlantrunkinuse(if_t ifp);
+caddr_t if_getlladdr(if_t ifp);
+void *if_gethandle(u_char);
+void if_bpfmtap(if_t ifp, struct mbuf *m);
+void if_etherbpfmtap(if_t ifp, struct mbuf *m);
+void if_vlancap(if_t ifp);
+
+int if_setupmultiaddr(if_t ifp, void *mta, int *cnt, int max);
+int if_multiaddr_array(if_t ifp, void *mta, int *cnt, int max);
+int if_multiaddr_count(if_t ifp, int max);
+
+int if_multi_apply(struct ifnet *ifp, int (*filter)(void *, struct ifmultiaddr *, int), void *arg);
+int if_getamcount(if_t ifp);
+struct ifaddr * if_getifaddr(if_t ifp);
+
+/* Functions */
+void if_setinitfn(if_t ifp, void (*)(void *));
+void if_setioctlfn(if_t ifp, int (*)(if_t, u_long, caddr_t));
+void if_setstartfn(if_t ifp, void (*)(if_t));
+void if_settransmitfn(if_t ifp, if_transmit_fn_t);
+void if_setqflushfn(if_t ifp, if_qflush_fn_t);
+void if_setgetcounterfn(if_t ifp, if_get_counter_t);
+
+/* Revisit the below. These are inline functions originally */
+int drbr_inuse_drv(if_t ifp, struct buf_ring *br);
+struct mbuf* drbr_dequeue_drv(if_t ifp, struct buf_ring *br);
+int drbr_needs_enqueue_drv(if_t ifp, struct buf_ring *br);
+int drbr_enqueue_drv(if_t ifp, struct buf_ring *br, struct mbuf *m);
+
+/* TSO */
+void if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *);
+int if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *);
+
#ifdef DEVICE_POLLING
-enum poll_cmd { POLL_ONLY, POLL_AND_CHECK_STATUS };
+enum poll_cmd { POLL_ONLY, POLL_AND_CHECK_STATUS };
-typedef int poll_handler_t(struct ifnet *ifp, enum poll_cmd cmd, int count);
-int ether_poll_register(poll_handler_t *h, struct ifnet *ifp);
-int ether_poll_deregister(struct ifnet *ifp);
+typedef int poll_handler_t(if_t ifp, enum poll_cmd cmd, int count);
+int ether_poll_register(poll_handler_t *h, if_t ifp);
+int ether_poll_deregister(if_t ifp);
#endif /* DEVICE_POLLING */
-/* TSO */
-void if_hw_tsomax_common(struct ifnet *, struct ifnet_hw_tsomax *);
-int if_hw_tsomax_update(struct ifnet *, struct ifnet_hw_tsomax *);
-
#endif /* _KERNEL */
+#include <net/ifq.h> /* XXXAO: temporary unconditional include */
+
#endif /* !_NET_IF_VAR_H_ */
diff --git a/freebsd/sys/net/if_vlan.c b/freebsd/sys/net/if_vlan.c
index 7d08e298..8a93565b 100644
--- a/freebsd/sys/net/if_vlan.c
+++ b/freebsd/sys/net/if_vlan.c
@@ -2,6 +2,10 @@
/*-
* Copyright 1998 Massachusetts Institute of Technology
+ * Copyright 2012 ADARA Networks, Inc.
+ *
+ * Portions of this software were developed by Robert N. M. Watson under
+ * contract to ADARA Networks, Inc.
*
* Permission to use, copy, modify, and distribute this software and
* its documentation for any purpose and without fee is hereby
@@ -31,8 +35,7 @@
/*
* if_vlan.c - pseudo-device driver for IEEE 802.1Q virtual LANs.
- * Might be extended some day to also handle IEEE 802.1p priority
- * tagging. This is sort of sneaky in the implementation, since
+ * This is sort of sneaky in the implementation, since
* we need to pretend to be enough of an Ethernet implementation
* to make arp work. The way we do this is by telling everyone
* that we are an Ethernet, and then catch the packets that
@@ -47,12 +50,14 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_vlan.h>
#include <rtems/bsd/sys/param.h>
+#include <sys/eventhandler.h>
#include <sys/kernel.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/module.h>
-#include <sys/rwlock.h>
+#include <sys/rmlock.h>
+#include <sys/priv.h>
#include <sys/queue.h>
#include <sys/socket.h>
#include <sys/sockio.h>
@@ -63,6 +68,7 @@ __FBSDID("$FreeBSD$");
#include <net/bpf.h>
#include <net/ethernet.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_clone.h>
#include <net/if_dl.h>
#include <net/if_types.h>
@@ -74,7 +80,6 @@ __FBSDID("$FreeBSD$");
#include <netinet/if_ether.h>
#endif
-#define VLANNAME "vlan"
#define VLAN_DEF_HWIDTH 4
#define VLAN_IFFLAGS (IFF_BROADCAST | IFF_MULTICAST)
@@ -85,7 +90,7 @@ LIST_HEAD(ifvlanhead, ifvlan);
struct ifvlantrunk {
struct ifnet *parent; /* parent interface of this trunk */
- struct rwlock rw;
+ struct rmlock lock;
#ifdef VLAN_ARRAY
#define VLAN_ARRAY_SIZE (EVL_VLID_MASK + 1)
struct ifvlan *vlans[VLAN_ARRAY_SIZE]; /* static table */
@@ -105,9 +110,9 @@ struct vlan_mc_entry {
struct ifvlan {
struct ifvlantrunk *ifv_trunk;
struct ifnet *ifv_ifp;
- void *ifv_cookie;
#define TRUNK(ifv) ((ifv)->ifv_trunk)
#define PARENT(ifv) ((ifv)->ifv_trunk->parent)
+ void *ifv_cookie;
int ifv_pflags; /* special flags we have set on parent */
struct ifv_linkmib {
int ifvm_encaplen; /* encapsulation length */
@@ -115,6 +120,8 @@ struct ifvlan {
int ifvm_mintu; /* min transmission unit */
uint16_t ifvm_proto; /* encapsulation ethertype */
uint16_t ifvm_tag; /* tag to apply on packets leaving if */
+ uint16_t ifvm_vid; /* VLAN ID */
+ uint8_t ifvm_pcp; /* Priority Code Point (PCP). */
} ifv_mib;
SLIST_HEAD(, vlan_mc_entry) vlan_mc_listhead;
#ifndef VLAN_ARRAY
@@ -123,6 +130,8 @@ struct ifvlan {
};
#define ifv_proto ifv_mib.ifvm_proto
#define ifv_tag ifv_mib.ifvm_tag
+#define ifv_vid ifv_mib.ifvm_vid
+#define ifv_pcp ifv_mib.ifvm_pcp
#define ifv_encaplen ifv_mib.ifvm_encaplen
#define ifv_mtufudge ifv_mib.ifvm_mtufudge
#define ifv_mintu ifv_mib.ifvm_mintu
@@ -143,11 +152,22 @@ static SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW, 0,
static SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW, 0,
"for consistency");
-static int soft_pad = 0;
-SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW, &soft_pad, 0,
- "pad short frames before tagging");
+static VNET_DEFINE(int, soft_pad);
+#define V_soft_pad VNET(soft_pad)
+SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW | CTLFLAG_VNET,
+ &VNET_NAME(soft_pad), 0, "pad short frames before tagging");
+
+/*
+ * For now, make preserving PCP via an mbuf tag optional, as it increases
+ * per-packet memory allocations and frees. In the future, it would be
+ * preferable to reuse ether_vtag for this, or similar.
+ */
+static int vlan_mtag_pcp = 0;
+SYSCTL_INT(_net_link_vlan, OID_AUTO, mtag_pcp, CTLFLAG_RW, &vlan_mtag_pcp, 0,
+ "Retain VLAN PCP information as packets are passed up the stack");
-static MALLOC_DEFINE(M_VLAN, VLANNAME, "802.1Q Virtual LAN Interface");
+static const char vlanname[] = "vlan";
+static MALLOC_DEFINE(M_VLAN, vlanname, "802.1Q Virtual LAN Interface");
static eventhandler_tag ifdetach_tag;
static eventhandler_tag iflladdr_tag;
@@ -156,7 +176,7 @@ static eventhandler_tag iflladdr_tag;
* We have a global mutex, that is used to serialize configuration
* changes and isn't used in normal packet delivery.
*
- * We also have a per-trunk rwlock, that is locked shared on packet
+ * We also have a per-trunk rmlock(9), that is locked shared on packet
* processing and exclusive when configuration is changed.
*
* The VLAN_ARRAY substitutes the dynamic hash with a static array
@@ -170,14 +190,15 @@ static struct sx ifv_lock;
#define VLAN_LOCK_ASSERT() sx_assert(&ifv_lock, SA_LOCKED)
#define VLAN_LOCK() sx_xlock(&ifv_lock)
#define VLAN_UNLOCK() sx_xunlock(&ifv_lock)
-#define TRUNK_LOCK_INIT(trunk) rw_init(&(trunk)->rw, VLANNAME)
-#define TRUNK_LOCK_DESTROY(trunk) rw_destroy(&(trunk)->rw)
-#define TRUNK_LOCK(trunk) rw_wlock(&(trunk)->rw)
-#define TRUNK_UNLOCK(trunk) rw_wunlock(&(trunk)->rw)
-#define TRUNK_LOCK_ASSERT(trunk) rw_assert(&(trunk)->rw, RA_WLOCKED)
-#define TRUNK_RLOCK(trunk) rw_rlock(&(trunk)->rw)
-#define TRUNK_RUNLOCK(trunk) rw_runlock(&(trunk)->rw)
-#define TRUNK_LOCK_RASSERT(trunk) rw_assert(&(trunk)->rw, RA_RLOCKED)
+#define TRUNK_LOCK_INIT(trunk) rm_init(&(trunk)->lock, vlanname)
+#define TRUNK_LOCK_DESTROY(trunk) rm_destroy(&(trunk)->lock)
+#define TRUNK_LOCK(trunk) rm_wlock(&(trunk)->lock)
+#define TRUNK_UNLOCK(trunk) rm_wunlock(&(trunk)->lock)
+#define TRUNK_LOCK_ASSERT(trunk) rm_assert(&(trunk)->lock, RA_WLOCKED)
+#define TRUNK_RLOCK(trunk) rm_rlock(&(trunk)->lock, &tracker)
+#define TRUNK_RUNLOCK(trunk) rm_runlock(&(trunk)->lock, &tracker)
+#define TRUNK_LOCK_RASSERT(trunk) rm_assert(&(trunk)->lock, RA_RLOCKED)
+#define TRUNK_LOCK_READER struct rm_priotracker tracker
#ifndef VLAN_ARRAY
static void vlan_inithash(struct ifvlantrunk *trunk);
@@ -186,7 +207,7 @@ static int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv);
static int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv);
static void vlan_growhash(struct ifvlantrunk *trunk, int howmuch);
static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk,
- uint16_t tag);
+ uint16_t vid);
#endif
static void trunk_destroy(struct ifvlantrunk *trunk);
@@ -206,8 +227,7 @@ static void vlan_link_state(struct ifnet *ifp);
static void vlan_capabilities(struct ifvlan *ifv);
static void vlan_trunk_capabilities(struct ifnet *ifp);
-static struct ifnet *vlan_clone_match_ethertag(struct if_clone *,
- const char *, int *);
+static struct ifnet *vlan_clone_match_ethervid(const char *, int *);
static int vlan_clone_match(struct if_clone *, const char *);
static int vlan_clone_create(struct if_clone *, char *, size_t, caddr_t);
static int vlan_clone_destroy(struct if_clone *, struct ifnet *);
@@ -215,11 +235,10 @@ static int vlan_clone_destroy(struct if_clone *, struct ifnet *);
static void vlan_ifdetach(void *arg, struct ifnet *ifp);
static void vlan_iflladdr(void *arg, struct ifnet *ifp);
-static struct if_clone vlan_cloner = IFC_CLONE_INITIALIZER(VLANNAME, NULL,
- IF_MAXUNIT, NULL, vlan_clone_match, vlan_clone_create, vlan_clone_destroy);
+static struct if_clone *vlan_cloner;
#ifdef VIMAGE
-static VNET_DEFINE(struct if_clone, vlan_cloner);
+static VNET_DEFINE(struct if_clone *, vlan_cloner);
#define V_vlan_cloner VNET(vlan_cloner)
#endif
@@ -274,9 +293,9 @@ vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
b = 1 << trunk->hwidth;
- i = HASH(ifv->ifv_tag, trunk->hmask);
+ i = HASH(ifv->ifv_vid, trunk->hmask);
LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
- if (ifv->ifv_tag == ifv2->ifv_tag)
+ if (ifv->ifv_vid == ifv2->ifv_vid)
return (EEXIST);
/*
@@ -286,7 +305,7 @@ vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
*/
if (trunk->refcnt > (b * b) / 2) {
vlan_growhash(trunk, 1);
- i = HASH(ifv->ifv_tag, trunk->hmask);
+ i = HASH(ifv->ifv_vid, trunk->hmask);
}
LIST_INSERT_HEAD(&trunk->hash[i], ifv, ifv_list);
trunk->refcnt++;
@@ -304,7 +323,7 @@ vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
b = 1 << trunk->hwidth;
- i = HASH(ifv->ifv_tag, trunk->hmask);
+ i = HASH(ifv->ifv_vid, trunk->hmask);
LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
if (ifv2 == ifv) {
trunk->refcnt--;
@@ -356,7 +375,7 @@ vlan_growhash(struct ifvlantrunk *trunk, int howmuch)
for (i = 0; i < n; i++)
while ((ifv = LIST_FIRST(&trunk->hash[i])) != NULL) {
LIST_REMOVE(ifv, ifv_list);
- j = HASH(ifv->ifv_tag, n2 - 1);
+ j = HASH(ifv->ifv_vid, n2 - 1);
LIST_INSERT_HEAD(&hash2[j], ifv, ifv_list);
}
free(trunk->hash, M_VLAN);
@@ -370,14 +389,14 @@ vlan_growhash(struct ifvlantrunk *trunk, int howmuch)
}
static __inline struct ifvlan *
-vlan_gethash(struct ifvlantrunk *trunk, uint16_t tag)
+vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid)
{
struct ifvlan *ifv;
TRUNK_LOCK_RASSERT(trunk);
- LIST_FOREACH(ifv, &trunk->hash[HASH(tag, trunk->hmask)], ifv_list)
- if (ifv->ifv_tag == tag)
+ LIST_FOREACH(ifv, &trunk->hash[HASH(vid, trunk->hmask)], ifv_list)
+ if (ifv->ifv_vid == vid)
return (ifv);
return (NULL);
}
@@ -401,19 +420,19 @@ vlan_dumphash(struct ifvlantrunk *trunk)
#else
static __inline struct ifvlan *
-vlan_gethash(struct ifvlantrunk *trunk, uint16_t tag)
+vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid)
{
- return trunk->vlans[tag];
+ return trunk->vlans[vid];
}
static __inline int
vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
{
- if (trunk->vlans[ifv->ifv_tag] != NULL)
+ if (trunk->vlans[ifv->ifv_vid] != NULL)
return EEXIST;
- trunk->vlans[ifv->ifv_tag] = ifv;
+ trunk->vlans[ifv->ifv_vid] = ifv;
trunk->refcnt++;
return (0);
@@ -423,7 +442,7 @@ static __inline int
vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
{
- trunk->vlans[ifv->ifv_tag] = NULL;
+ trunk->vlans[ifv->ifv_vid] = NULL;
trunk->refcnt--;
return (0);
@@ -461,48 +480,48 @@ trunk_destroy(struct ifvlantrunk *trunk)
* traffic that it doesn't really want, which ends up being discarded
* later by the upper protocol layers. Unfortunately, there's no way
* to avoid this: there really is only one physical interface.
- *
- * XXX: There is a possible race here if more than one thread is
- * modifying the multicast state of the vlan interface at the same time.
*/
static int
vlan_setmulti(struct ifnet *ifp)
{
struct ifnet *ifp_p;
- struct ifmultiaddr *ifma, *rifma = NULL;
+ struct ifmultiaddr *ifma;
struct ifvlan *sc;
struct vlan_mc_entry *mc;
int error;
- /*VLAN_LOCK_ASSERT();*/
-
/* Find the parent. */
sc = ifp->if_softc;
+ TRUNK_LOCK_ASSERT(TRUNK(sc));
ifp_p = PARENT(sc);
CURVNET_SET_QUIET(ifp_p->if_vnet);
/* First, remove any existing filter entries. */
while ((mc = SLIST_FIRST(&sc->vlan_mc_listhead)) != NULL) {
- error = if_delmulti(ifp_p, (struct sockaddr *)&mc->mc_addr);
- if (error)
- return (error);
SLIST_REMOVE_HEAD(&sc->vlan_mc_listhead, mc_entries);
+ (void)if_delmulti(ifp_p, (struct sockaddr *)&mc->mc_addr);
free(mc, M_VLAN);
}
/* Now program new ones. */
+ IF_ADDR_WLOCK(ifp);
TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
if (ifma->ifma_addr->sa_family != AF_LINK)
continue;
mc = malloc(sizeof(struct vlan_mc_entry), M_VLAN, M_NOWAIT);
- if (mc == NULL)
+ if (mc == NULL) {
+ IF_ADDR_WUNLOCK(ifp);
return (ENOMEM);
+ }
bcopy(ifma->ifma_addr, &mc->mc_addr, ifma->ifma_addr->sa_len);
mc->mc_addr.sdl_index = ifp_p->if_index;
SLIST_INSERT_HEAD(&sc->vlan_mc_listhead, mc, mc_entries);
+ }
+ IF_ADDR_WUNLOCK(ifp);
+ SLIST_FOREACH (mc, &sc->vlan_mc_listhead, mc_entries) {
error = if_addmulti(ifp_p, (struct sockaddr *)&mc->mc_addr,
- &rifma);
+ NULL);
if (error)
return (error);
}
@@ -625,17 +644,21 @@ vlan_trunkdev(struct ifnet *ifp)
}
/*
- * Return the 16bit vlan tag for this interface.
+ * Return the 12-bit VLAN VID for this interface, for use by external
+ * components such as Infiniband.
+ *
+ * XXXRW: Note that the function name here is historical; it should be named
+ * vlan_vid().
*/
static int
-vlan_tag(struct ifnet *ifp, uint16_t *tagp)
+vlan_tag(struct ifnet *ifp, uint16_t *vidp)
{
struct ifvlan *ifv;
if (ifp->if_type != IFT_L2VLAN)
return (EINVAL);
ifv = ifp->if_softc;
- *tagp = ifv->ifv_tag;
+ *vidp = ifv->ifv_vid;
return (0);
}
@@ -671,20 +694,21 @@ vlan_setcookie(struct ifnet *ifp, void *cookie)
}
/*
- * Return the vlan device present at the specific tag.
+ * Return the vlan device present at the specific VID.
*/
static struct ifnet *
-vlan_devat(struct ifnet *ifp, uint16_t tag)
+vlan_devat(struct ifnet *ifp, uint16_t vid)
{
struct ifvlantrunk *trunk;
struct ifvlan *ifv;
+ TRUNK_LOCK_READER;
trunk = ifp->if_vlantrunk;
if (trunk == NULL)
return (NULL);
ifp = NULL;
TRUNK_RLOCK(trunk);
- ifv = vlan_gethash(trunk, tag);
+ ifv = vlan_gethash(trunk, vid);
if (ifv)
ifp = ifv->ifv_ifp;
TRUNK_RUNLOCK(trunk);
@@ -692,10 +716,20 @@ vlan_devat(struct ifnet *ifp, uint16_t tag)
}
/*
+ * Recalculate the cached VLAN tag exposed via the MIB.
+ */
+static void
+vlan_tag_recalculate(struct ifvlan *ifv)
+{
+
+ ifv->ifv_tag = EVL_MAKETAG(ifv->ifv_vid, ifv->ifv_pcp, 0);
+}
+
+/*
* VLAN support can be loaded as a module. The only place in the
* system that's intimately aware of this is ether_input. We hook
* into this code through vlan_input_p which is defined there and
- * set here. Noone else in the system should be aware of this so
+ * set here. No one else in the system should be aware of this so
* we use an explicit reference here.
*/
extern void (*vlan_input_p)(struct ifnet *, struct mbuf *);
@@ -727,7 +761,8 @@ vlan_modevent(module_t mod, int type, void *data)
vlan_tag_p = vlan_tag;
vlan_devat_p = vlan_devat;
#ifndef VIMAGE
- if_clone_attach(&vlan_cloner);
+ vlan_cloner = if_clone_advanced(vlanname, 0, vlan_clone_match,
+ vlan_clone_create, vlan_clone_destroy);
#endif
if (bootverbose)
printf("vlan: initialized, using "
@@ -741,7 +776,7 @@ vlan_modevent(module_t mod, int type, void *data)
break;
case MOD_UNLOAD:
#ifndef VIMAGE
- if_clone_detach(&vlan_cloner);
+ if_clone_detach(vlan_cloner);
#endif
EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_tag);
EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_tag);
@@ -777,8 +812,9 @@ static void
vnet_vlan_init(const void *unused __unused)
{
+ vlan_cloner = if_clone_advanced(vlanname, 0, vlan_clone_match,
+ vlan_clone_create, vlan_clone_destroy);
V_vlan_cloner = vlan_cloner;
- if_clone_attach(&V_vlan_cloner);
}
VNET_SYSINIT(vnet_vlan_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
vnet_vlan_init, NULL);
@@ -787,46 +823,39 @@ static void
vnet_vlan_uninit(const void *unused __unused)
{
- if_clone_detach(&V_vlan_cloner);
+ if_clone_detach(V_vlan_cloner);
}
-VNET_SYSUNINIT(vnet_vlan_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
+VNET_SYSUNINIT(vnet_vlan_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST,
vnet_vlan_uninit, NULL);
#endif
+/*
+ * Check for <etherif>.<vlan> style interface names.
+ */
static struct ifnet *
-vlan_clone_match_ethertag(struct if_clone *ifc, const char *name, int *tag)
+vlan_clone_match_ethervid(const char *name, int *vidp)
{
- const char *cp;
+ char ifname[IFNAMSIZ];
+ char *cp;
struct ifnet *ifp;
- int t;
+ int vid;
- /* Check for <etherif>.<vlan> style interface names. */
- IFNET_RLOCK_NOSLEEP();
- TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
- /*
- * We can handle non-ethernet hardware types as long as
- * they handle the tagging and headers themselves.
- */
- if (ifp->if_type != IFT_ETHER &&
- (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0)
- continue;
- if (strncmp(ifp->if_xname, name, strlen(ifp->if_xname)) != 0)
- continue;
- cp = name + strlen(ifp->if_xname);
- if (*cp++ != '.')
- continue;
- if (*cp == '\0')
- continue;
- t = 0;
- for(; *cp >= '0' && *cp <= '9'; cp++)
- t = (t * 10) + (*cp - '0');
- if (*cp != '\0')
- continue;
- if (tag != NULL)
- *tag = t;
- break;
- }
- IFNET_RUNLOCK_NOSLEEP();
+ strlcpy(ifname, name, IFNAMSIZ);
+ if ((cp = strchr(ifname, '.')) == NULL)
+ return (NULL);
+ *cp = '\0';
+ if ((ifp = ifunit(ifname)) == NULL)
+ return (NULL);
+ /* Parse VID. */
+ if (*++cp == '\0')
+ return (NULL);
+ vid = 0;
+ for(; *cp >= '0' && *cp <= '9'; cp++)
+ vid = (vid * 10) + (*cp - '0');
+ if (*cp != '\0')
+ return (NULL);
+ if (vidp != NULL)
+ *vidp = vid;
return (ifp);
}
@@ -836,10 +865,10 @@ vlan_clone_match(struct if_clone *ifc, const char *name)
{
const char *cp;
- if (vlan_clone_match_ethertag(ifc, name, NULL) != NULL)
+ if (vlan_clone_match_ethervid(name, NULL) != NULL)
return (1);
- if (strncmp(VLANNAME, name, strlen(VLANNAME)) != 0)
+ if (strncmp(vlanname, name, strlen(vlanname)) != 0)
return (0);
for (cp = name + 4; *cp != '\0'; cp++) {
if (*cp < '0' || *cp > '9')
@@ -856,7 +885,7 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
int wildcard;
int unit;
int error;
- int tag;
+ int vid;
int ethertag;
struct ifvlan *ifv;
struct ifnet *ifp;
@@ -873,7 +902,10 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
* o specify no parameters and get an unattached device that
* must be configured separately.
* The first technique is preferred; the latter two are
- * supported for backwards compatibilty.
+ * supported for backwards compatibility.
+ *
+ * XXXRW: Note historic use of the word "tag" here. New ioctls may be
+ * called for.
*/
if (params) {
error = copyin(params, &vlr, sizeof(vlr));
@@ -881,31 +913,18 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
return error;
p = ifunit(vlr.vlr_parent);
if (p == NULL)
- return ENXIO;
- /*
- * Don't let the caller set up a VLAN tag with
- * anything except VLID bits.
- */
- if (vlr.vlr_tag & ~EVL_VLID_MASK)
- return (EINVAL);
+ return (ENXIO);
error = ifc_name2unit(name, &unit);
if (error != 0)
return (error);
ethertag = 1;
- tag = vlr.vlr_tag;
+ vid = vlr.vlr_tag;
wildcard = (unit < 0);
- } else if ((p = vlan_clone_match_ethertag(ifc, name, &tag)) != NULL) {
+ } else if ((p = vlan_clone_match_ethervid(name, &vid)) != NULL) {
ethertag = 1;
unit = -1;
wildcard = 0;
-
- /*
- * Don't let the caller set up a VLAN tag with
- * anything except VLID bits.
- */
- if (tag & ~EVL_VLID_MASK)
- return (EINVAL);
} else {
ethertag = 0;
@@ -937,14 +956,13 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
return (ENOSPC);
}
SLIST_INIT(&ifv->vlan_mc_listhead);
-
ifp->if_softc = ifv;
/*
* Set the name manually rather than using if_initname because
* we don't conform to the default naming convention for interfaces.
*/
strlcpy(ifp->if_xname, name, IFNAMSIZ);
- ifp->if_dname = ifc->ifc_name;
+ ifp->if_dname = vlanname;
ifp->if_dunit = unit;
/* NB: flags are not set here */
ifp->if_linkmib = &ifv->ifv_mib;
@@ -966,7 +984,7 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
sdl->sdl_type = IFT_L2VLAN;
if (ethertag) {
- error = vlan_config(ifv, p, tag);
+ error = vlan_config(ifv, p, vid);
if (error != 0) {
/*
* Since we've partially failed, we need to back
@@ -975,7 +993,7 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
*/
ether_ifdetach(ifp);
vlan_unconfig(ifp);
- if_free_type(ifp, IFT_ETHER);
+ if_free(ifp);
ifc_free_unit(ifc, unit);
free(ifv, M_VLAN);
@@ -997,7 +1015,7 @@ vlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
ether_ifdetach(ifp); /* first, remove it from system-wide lists */
vlan_unconfig(ifp); /* now it can be unconfigured and freed */
- if_free_type(ifp, IFT_ETHER);
+ if_free(ifp);
free(ifv, M_VLAN);
ifc_free_unit(ifc, unit);
@@ -1020,6 +1038,8 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m)
{
struct ifvlan *ifv;
struct ifnet *p;
+ struct m_tag *mtag;
+ uint16_t tag;
int error, len, mcast;
ifv = ifp->if_softc;
@@ -1035,7 +1055,7 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m)
*/
if (!UP_AND_RUNNING(p)) {
m_freem(m);
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return (ENETDOWN);
}
@@ -1051,7 +1071,7 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m)
* devices that just discard such runts instead or mishandle
* them somehow.
*/
- if (soft_pad && p->if_type == IFT_ETHER) {
+ if (V_soft_pad && p->if_type == IFT_ETHER) {
static char pad[8]; /* just zeros */
int n;
@@ -1062,7 +1082,7 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m)
if (n > 0) {
if_printf(ifp, "cannot pad short frame\n");
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
m_freem(m);
return (0);
}
@@ -1075,14 +1095,19 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m)
* knows how to find the VLAN tag to use, so we attach a
* packet tag that holds it.
*/
+ if (vlan_mtag_pcp && (mtag = m_tag_locate(m, MTAG_8021Q,
+ MTAG_8021Q_PCP_OUT, NULL)) != NULL)
+ tag = EVL_MAKETAG(ifv->ifv_vid, *(uint8_t *)(mtag + 1), 0);
+ else
+ tag = ifv->ifv_tag;
if (p->if_capenable & IFCAP_VLAN_HWTAGGING) {
- m->m_pkthdr.ether_vtag = ifv->ifv_tag;
+ m->m_pkthdr.ether_vtag = tag;
m->m_flags |= M_VLANTAG;
} else {
- m = ether_vlanencap(m, ifv->ifv_tag);
+ m = ether_vlanencap(m, tag);
if (m == NULL) {
if_printf(ifp, "unable to prepend VLAN header\n");
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return (0);
}
}
@@ -1091,12 +1116,12 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m)
* Send it, precisely as ether_output() would have.
*/
error = (p->if_transmit)(p, m);
- if (!error) {
- ifp->if_opackets++;
- ifp->if_omcasts += mcast;
- ifp->if_obytes += len;
+ if (error == 0) {
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
+ if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast);
} else
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return (error);
}
@@ -1113,7 +1138,9 @@ vlan_input(struct ifnet *ifp, struct mbuf *m)
{
struct ifvlantrunk *trunk = ifp->if_vlantrunk;
struct ifvlan *ifv;
- uint16_t tag;
+ TRUNK_LOCK_READER;
+ struct m_tag *mtag;
+ uint16_t vid, tag;
KASSERT(trunk != NULL, ("%s: no trunk", __func__));
@@ -1122,7 +1149,7 @@ vlan_input(struct ifnet *ifp, struct mbuf *m)
* Packet is tagged, but m contains a normal
* Ethernet frame; the tag is stored out-of-band.
*/
- tag = EVL_VLANOFTAG(m->m_pkthdr.ether_vtag);
+ tag = m->m_pkthdr.ether_vtag;
m->m_flags &= ~M_VLANTAG;
} else {
struct ether_vlan_header *evl;
@@ -1138,7 +1165,7 @@ vlan_input(struct ifnet *ifp, struct mbuf *m)
return;
}
evl = mtod(m, struct ether_vlan_header *);
- tag = EVL_VLANOFTAG(ntohs(evl->evl_tag));
+ tag = ntohs(evl->evl_tag);
/*
* Remove the 802.1q header by copying the Ethernet
@@ -1157,43 +1184,75 @@ vlan_input(struct ifnet *ifp, struct mbuf *m)
__func__, ifp->if_xname, ifp->if_type);
#endif
m_freem(m);
- ifp->if_noproto++;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
return;
}
}
+ vid = EVL_VLANOFTAG(tag);
+
TRUNK_RLOCK(trunk);
- ifv = vlan_gethash(trunk, tag);
+ ifv = vlan_gethash(trunk, vid);
if (ifv == NULL || !UP_AND_RUNNING(ifv->ifv_ifp)) {
TRUNK_RUNLOCK(trunk);
m_freem(m);
- ifp->if_noproto++;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
return;
}
TRUNK_RUNLOCK(trunk);
+ if (vlan_mtag_pcp) {
+ /*
+ * While uncommon, it is possible that we will find a 802.1q
+ * packet encapsulated inside another packet that also had an
+ * 802.1q header. For example, ethernet tunneled over IPSEC
+ * arriving over ethernet. In that case, we replace the
+ * existing 802.1q PCP m_tag value.
+ */
+ mtag = m_tag_locate(m, MTAG_8021Q, MTAG_8021Q_PCP_IN, NULL);
+ if (mtag == NULL) {
+ mtag = m_tag_alloc(MTAG_8021Q, MTAG_8021Q_PCP_IN,
+ sizeof(uint8_t), M_NOWAIT);
+ if (mtag == NULL) {
+ m_freem(m);
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+ return;
+ }
+ m_tag_prepend(m, mtag);
+ }
+ *(uint8_t *)(mtag + 1) = EVL_PRIOFTAG(tag);
+ }
+
m->m_pkthdr.rcvif = ifv->ifv_ifp;
- ifv->ifv_ifp->if_ipackets++;
+ if_inc_counter(ifv->ifv_ifp, IFCOUNTER_IPACKETS, 1);
/* Pass it back through the parent's input routine. */
(*ifp->if_input)(ifv->ifv_ifp, m);
}
static int
-vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag)
+vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid)
{
struct ifvlantrunk *trunk;
struct ifnet *ifp;
int error = 0;
- /* VID numbers 0x0 and 0xFFF are reserved */
- if (tag == 0 || tag == 0xFFF)
- return (EINVAL);
+ /*
+ * We can handle non-ethernet hardware types as long as
+ * they handle the tagging and headers themselves.
+ */
if (p->if_type != IFT_ETHER &&
(p->if_capenable & IFCAP_VLAN_HWTAGGING) == 0)
return (EPROTONOSUPPORT);
if ((p->if_flags & VLAN_IFFLAGS) != VLAN_IFFLAGS)
return (EPROTONOSUPPORT);
+ /*
+ * Don't let the caller set up a VLAN VID with
+ * anything except VLID bits.
+ * VID numbers 0x0 and 0xFFF are reserved.
+ */
+ if (vid == 0 || vid == 0xFFF || (vid & ~EVL_VLID_MASK))
+ return (EINVAL);
if (ifv->ifv_trunk)
return (EBUSY);
@@ -1203,7 +1262,7 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag)
vlan_inithash(trunk);
VLAN_LOCK();
if (p->if_vlantrunk != NULL) {
- /* A race that that is very unlikely to be hit. */
+ /* A race that is very unlikely to be hit. */
vlan_freehash(trunk);
free(trunk, M_VLAN);
goto exists;
@@ -1219,7 +1278,9 @@ exists:
TRUNK_LOCK(trunk);
}
- ifv->ifv_tag = tag; /* must set this before vlan_inshash() */
+ ifv->ifv_vid = vid; /* must set this before vlan_inshash() */
+ ifv->ifv_pcp = 0; /* Default: best effort delivery. */
+ vlan_tag_recalculate(ifv);
error = vlan_inshash(trunk, ifv);
if (error)
goto done;
@@ -1297,7 +1358,7 @@ exists:
done:
TRUNK_UNLOCK(trunk);
if (error == 0)
- EVENTHANDLER_INVOKE(vlan_config, p, ifv->ifv_tag);
+ EVENTHANDLER_INVOKE(vlan_config, p, ifv->ifv_vid);
VLAN_UNLOCK();
return (error);
@@ -1366,7 +1427,7 @@ vlan_unconfig_locked(struct ifnet *ifp, int departing)
* Check if we were the last.
*/
if (trunk->refcnt == 0) {
- trunk->parent->if_vlantrunk = NULL;
+ parent->if_vlantrunk = NULL;
/*
* XXXGL: If some ithread has already entered
* vlan_input() and is now blocked on the trunk
@@ -1393,7 +1454,7 @@ vlan_unconfig_locked(struct ifnet *ifp, int departing)
* to cleanup anyway.
*/
if (parent != NULL)
- EVENTHANDLER_INVOKE(vlan_unconfig, parent, ifv->ifv_tag);
+ EVENTHANDLER_INVOKE(vlan_unconfig, parent, ifv->ifv_vid);
}
/* Handle a reference counted flag that should be set on the parent as well */
@@ -1494,7 +1555,7 @@ vlan_capabilities(struct ifvlan *ifv)
p->if_capenable & IFCAP_VLAN_HWTAGGING) {
ifp->if_capenable = p->if_capenable & IFCAP_HWCSUM;
ifp->if_hwassist = p->if_hwassist & (CSUM_IP | CSUM_TCP |
- CSUM_UDP | CSUM_SCTP | CSUM_FRAGMENT);
+ CSUM_UDP | CSUM_SCTP);
} else {
ifp->if_capenable = 0;
ifp->if_hwassist = 0;
@@ -1562,6 +1623,7 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
struct ifreq *ifr;
struct ifaddr *ifa;
struct ifvlan *ifv;
+ struct ifvlantrunk *trunk;
struct vlanreq vlr;
int error = 0;
@@ -1633,6 +1695,13 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
case SIOCSETVLAN:
#ifdef VIMAGE
+ /*
+ * XXXRW/XXXBZ: The goal in these checks is to allow a VLAN
+ * interface to be delegated to a jail without allowing the
+ * jail to change what underlying interface/VID it is
+ * associated with. We are not entirely convinced that this
+ * is the right way to accomplish that policy goal.
+ */
if (ifp->if_vnet != ifp->if_home_vnet) {
error = EPERM;
break;
@@ -1650,14 +1719,6 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
error = ENOENT;
break;
}
- /*
- * Don't let the caller set up a VLAN tag with
- * anything except VLID bits.
- */
- if (vlr.vlr_tag & ~EVL_VLID_MASK) {
- error = EINVAL;
- break;
- }
error = vlan_config(ifv, p, vlr.vlr_tag);
if (error)
break;
@@ -1678,7 +1739,7 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
if (TRUNK(ifv) != NULL) {
strlcpy(vlr.vlr_parent, PARENT(ifv)->if_xname,
sizeof(vlr.vlr_parent));
- vlr.vlr_tag = ifv->ifv_tag;
+ vlr.vlr_tag = ifv->ifv_vid;
}
VLAN_UNLOCK();
error = copyout(&vlr, ifr->ifr_data, sizeof(vlr));
@@ -1699,8 +1760,40 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
* If we don't have a parent, just remember the membership for
* when we do.
*/
- if (TRUNK(ifv) != NULL)
+ trunk = TRUNK(ifv);
+ if (trunk != NULL) {
+ TRUNK_LOCK(trunk);
error = vlan_setmulti(ifp);
+ TRUNK_UNLOCK(trunk);
+ }
+ break;
+
+ case SIOCGVLANPCP:
+#ifdef VIMAGE
+ if (ifp->if_vnet != ifp->if_home_vnet) {
+ error = EPERM;
+ break;
+ }
+#endif
+ ifr->ifr_vlan_pcp = ifv->ifv_pcp;
+ break;
+
+ case SIOCSVLANPCP:
+#ifdef VIMAGE
+ if (ifp->if_vnet != ifp->if_home_vnet) {
+ error = EPERM;
+ break;
+ }
+#endif
+ error = priv_check(curthread, PRIV_NET_SETVLANPCP);
+ if (error)
+ break;
+ if (ifr->ifr_vlan_pcp > 7) {
+ error = EINVAL;
+ break;
+ }
+ ifv->ifv_pcp = ifr->ifr_vlan_pcp;
+ vlan_tag_recalculate(ifv);
break;
default:
diff --git a/freebsd/sys/net/if_vlan_var.h b/freebsd/sys/net/if_vlan_var.h
index fd3fc4f3..6b20d142 100644
--- a/freebsd/sys/net/if_vlan_var.h
+++ b/freebsd/sys/net/if_vlan_var.h
@@ -32,22 +32,6 @@
#ifndef _NET_IF_VLAN_VAR_H_
#define _NET_IF_VLAN_VAR_H_ 1
-struct ether_vlan_header {
- u_char evl_dhost[ETHER_ADDR_LEN];
- u_char evl_shost[ETHER_ADDR_LEN];
- u_int16_t evl_encap_proto;
- u_int16_t evl_tag;
- u_int16_t evl_proto;
-};
-
-#define EVL_VLID_MASK 0x0FFF
-#define EVL_PRI_MASK 0xE000
-#define EVL_VLANOFTAG(tag) ((tag) & EVL_VLID_MASK)
-#define EVL_PRIOFTAG(tag) (((tag) >> 13) & 7)
-#define EVL_CFIOFTAG(tag) (((tag) >> 12) & 1)
-#define EVL_MAKETAG(vlid, pri, cfi) \
- ((((((pri) & 7) << 1) | ((cfi) & 1)) << 12) | ((vlid) & EVL_VLID_MASK))
-
/* Set the VLAN ID in an mbuf packet header non-destructively. */
#define EVL_APPLY_VLID(m, vlid) \
do { \
@@ -89,6 +73,23 @@ struct vlanreq {
#define SIOCSETVLAN SIOCSIFGENERIC
#define SIOCGETVLAN SIOCGIFGENERIC
+#define SIOCGVLANPCP _IOWR('i', 152, struct ifreq) /* Get VLAN PCP */
+#define SIOCSVLANPCP _IOW('i', 153, struct ifreq) /* Set VLAN PCP */
+
+/*
+ * Names for 802.1q priorities ("802.1p"). Notice that in this scheme,
+ * (0 < 1), allowing default 0-tagged traffic to take priority over background
+ * tagged traffic.
+ */
+#define IEEE8021Q_PCP_BK 1 /* Background (lowest) */
+#define IEEE8021Q_PCP_BE 0 /* Best effort (default) */
+#define IEEE8021Q_PCP_EE 2 /* Excellent effort */
+#define IEEE8021Q_PCP_CA 3 /* Critical applications */
+#define IEEE8021Q_PCP_VI 4 /* Video, < 100ms latency */
+#define IEEE8021Q_PCP_VO 5 /* Video, < 10ms latency */
+#define IEEE8021Q_PCP_IC 6 /* Internetwork control */
+#define IEEE8021Q_PCP_NC 7 /* Network control (highest) */
+
#ifdef _KERNEL
/*
* Drivers that are capable of adding and removing the VLAN header
@@ -108,7 +109,7 @@ struct vlanreq {
* received VLAN tag (containing both vlan and priority information)
* into the ether_vtag mbuf packet header field:
*
- * m->m_pkthdr.ether_vtag = vlan_id; // ntohs()?
+ * m->m_pkthdr.ether_vtag = vtag; // ntohs()?
* m->m_flags |= M_VLANTAG;
*
* to mark the packet m with the specified VLAN tag.
@@ -126,6 +127,16 @@ struct vlanreq {
* if_capabilities.
*/
+/*
+ * The 802.1q code may also tag mbufs with the PCP (priority) field for use in
+ * other layers of the stack, in which case an m_tag will be used. This is
+ * semantically quite different from use of the ether_vtag field, which is
+ * defined only between the device driver and VLAN layer.
+ */
+#define MTAG_8021Q 1326104895
+#define MTAG_8021Q_PCP_IN 0 /* Input priority. */
+#define MTAG_8021Q_PCP_OUT 1 /* Output priority. */
+
#define VLAN_CAPABILITIES(_ifp) do { \
if ((_ifp)->if_vlantrunk != NULL) \
(*vlan_trunk_cap_p)(_ifp); \
@@ -133,15 +144,15 @@ struct vlanreq {
#define VLAN_TRUNKDEV(_ifp) \
(_ifp)->if_type == IFT_L2VLAN ? (*vlan_trunkdev_p)((_ifp)) : NULL
-#define VLAN_TAG(_ifp, _tag) \
- (_ifp)->if_type == IFT_L2VLAN ? (*vlan_tag_p)((_ifp), (_tag)) : EINVAL
+#define VLAN_TAG(_ifp, _vid) \
+ (_ifp)->if_type == IFT_L2VLAN ? (*vlan_tag_p)((_ifp), (_vid)) : EINVAL
#define VLAN_COOKIE(_ifp) \
(_ifp)->if_type == IFT_L2VLAN ? (*vlan_cookie_p)((_ifp)) : NULL
#define VLAN_SETCOOKIE(_ifp, _cookie) \
(_ifp)->if_type == IFT_L2VLAN ? \
(*vlan_setcookie_p)((_ifp), (_cookie)) : EINVAL
-#define VLAN_DEVAT(_ifp, _tag) \
- (_ifp)->if_vlantrunk != NULL ? (*vlan_devat_p)((_ifp), (_tag)) : NULL
+#define VLAN_DEVAT(_ifp, _vid) \
+ (_ifp)->if_vlantrunk != NULL ? (*vlan_devat_p)((_ifp), (_vid)) : NULL
extern void (*vlan_trunk_cap_p)(struct ifnet *);
extern struct ifnet *(*vlan_trunkdev_p)(struct ifnet *);
@@ -150,6 +161,14 @@ extern int (*vlan_tag_p)(struct ifnet *, uint16_t *);
extern int (*vlan_setcookie_p)(struct ifnet *, void *);
extern void *(*vlan_cookie_p)(struct ifnet *);
+#ifdef _SYS_EVENTHANDLER_H_
+/* VLAN state change events */
+typedef void (*vlan_config_fn)(void *, struct ifnet *, uint16_t);
+typedef void (*vlan_unconfig_fn)(void *, struct ifnet *, uint16_t);
+EVENTHANDLER_DECLARE(vlan_config, vlan_config_fn);
+EVENTHANDLER_DECLARE(vlan_unconfig, vlan_unconfig_fn);
+#endif /* _SYS_EVENTHANDLER_H_ */
+
#endif /* _KERNEL */
#endif /* _NET_IF_VLAN_VAR_H_ */
diff --git a/freebsd/sys/net/ifq.h b/freebsd/sys/net/ifq.h
new file mode 100644
index 00000000..f0d206d8
--- /dev/null
+++ b/freebsd/sys/net/ifq.h
@@ -0,0 +1,484 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * From: @(#)if.h 8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NET_IFQ_H_
+#define _NET_IFQ_H_
+
+#ifdef _KERNEL
+#include <sys/mbuf.h> /* ifqueue only? */
+#include <sys/buf_ring.h>
+#include <net/vnet.h>
+#endif /* _KERNEL */
+#include <rtems/bsd/sys/lock.h> /* XXX */
+#include <sys/mutex.h> /* struct ifqueue */
+
+/*
+ * Couple of ugly extra definitions that are required since ifq.h
+ * is splitted from if_var.h.
+ */
+#define IF_DUNIT_NONE -1
+
+#include <net/altq/if_altq.h>
+
+/*
+ * Structure defining a queue for a network interface.
+ */
+struct ifqueue {
+ struct mbuf *ifq_head;
+ struct mbuf *ifq_tail;
+ int ifq_len;
+ int ifq_maxlen;
+ struct mtx ifq_mtx;
+};
+
+#ifdef _KERNEL
+/*
+ * Output queues (ifp->if_snd) and slow device input queues (*ifp->if_slowq)
+ * are queues of messages stored on ifqueue structures
+ * (defined above). Entries are added to and deleted from these structures
+ * by these macros.
+ */
+#define IF_LOCK(ifq) mtx_lock(&(ifq)->ifq_mtx)
+#define IF_UNLOCK(ifq) mtx_unlock(&(ifq)->ifq_mtx)
+#define IF_LOCK_ASSERT(ifq) mtx_assert(&(ifq)->ifq_mtx, MA_OWNED)
+#define _IF_QFULL(ifq) ((ifq)->ifq_len >= (ifq)->ifq_maxlen)
+#define _IF_QLEN(ifq) ((ifq)->ifq_len)
+
+#define _IF_ENQUEUE(ifq, m) do { \
+ (m)->m_nextpkt = NULL; \
+ if ((ifq)->ifq_tail == NULL) \
+ (ifq)->ifq_head = m; \
+ else \
+ (ifq)->ifq_tail->m_nextpkt = m; \
+ (ifq)->ifq_tail = m; \
+ (ifq)->ifq_len++; \
+} while (0)
+
+#define IF_ENQUEUE(ifq, m) do { \
+ IF_LOCK(ifq); \
+ _IF_ENQUEUE(ifq, m); \
+ IF_UNLOCK(ifq); \
+} while (0)
+
+#define _IF_PREPEND(ifq, m) do { \
+ (m)->m_nextpkt = (ifq)->ifq_head; \
+ if ((ifq)->ifq_tail == NULL) \
+ (ifq)->ifq_tail = (m); \
+ (ifq)->ifq_head = (m); \
+ (ifq)->ifq_len++; \
+} while (0)
+
+#define IF_PREPEND(ifq, m) do { \
+ IF_LOCK(ifq); \
+ _IF_PREPEND(ifq, m); \
+ IF_UNLOCK(ifq); \
+} while (0)
+
+#define _IF_DEQUEUE(ifq, m) do { \
+ (m) = (ifq)->ifq_head; \
+ if (m) { \
+ if (((ifq)->ifq_head = (m)->m_nextpkt) == NULL) \
+ (ifq)->ifq_tail = NULL; \
+ (m)->m_nextpkt = NULL; \
+ (ifq)->ifq_len--; \
+ } \
+} while (0)
+
+#define IF_DEQUEUE(ifq, m) do { \
+ IF_LOCK(ifq); \
+ _IF_DEQUEUE(ifq, m); \
+ IF_UNLOCK(ifq); \
+} while (0)
+
+#define _IF_DEQUEUE_ALL(ifq, m) do { \
+ (m) = (ifq)->ifq_head; \
+ (ifq)->ifq_head = (ifq)->ifq_tail = NULL; \
+ (ifq)->ifq_len = 0; \
+} while (0)
+
+#define IF_DEQUEUE_ALL(ifq, m) do { \
+ IF_LOCK(ifq); \
+ _IF_DEQUEUE_ALL(ifq, m); \
+ IF_UNLOCK(ifq); \
+} while (0)
+
+#define _IF_POLL(ifq, m) ((m) = (ifq)->ifq_head)
+#define IF_POLL(ifq, m) _IF_POLL(ifq, m)
+
+#define _IF_DRAIN(ifq) do { \
+ struct mbuf *m; \
+ for (;;) { \
+ _IF_DEQUEUE(ifq, m); \
+ if (m == NULL) \
+ break; \
+ m_freem(m); \
+ } \
+} while (0)
+
+#define IF_DRAIN(ifq) do { \
+ IF_LOCK(ifq); \
+ _IF_DRAIN(ifq); \
+ IF_UNLOCK(ifq); \
+} while(0)
+
+int if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp,
+ int adjust);
+#define IF_HANDOFF(ifq, m, ifp) \
+ if_handoff((struct ifqueue *)ifq, m, ifp, 0)
+#define IF_HANDOFF_ADJ(ifq, m, ifp, adj) \
+ if_handoff((struct ifqueue *)ifq, m, ifp, adj)
+
+void if_start(struct ifnet *);
+
+#define IFQ_ENQUEUE(ifq, m, err) \
+do { \
+ IF_LOCK(ifq); \
+ if (ALTQ_IS_ENABLED(ifq)) \
+ ALTQ_ENQUEUE(ifq, m, NULL, err); \
+ else { \
+ if (_IF_QFULL(ifq)) { \
+ m_freem(m); \
+ (err) = ENOBUFS; \
+ } else { \
+ _IF_ENQUEUE(ifq, m); \
+ (err) = 0; \
+ } \
+ } \
+ IF_UNLOCK(ifq); \
+} while (0)
+
+#define IFQ_DEQUEUE_NOLOCK(ifq, m) \
+do { \
+ if (TBR_IS_ENABLED(ifq)) \
+ (m) = tbr_dequeue_ptr(ifq, ALTDQ_REMOVE); \
+ else if (ALTQ_IS_ENABLED(ifq)) \
+ ALTQ_DEQUEUE(ifq, m); \
+ else \
+ _IF_DEQUEUE(ifq, m); \
+} while (0)
+
+#define IFQ_DEQUEUE(ifq, m) \
+do { \
+ IF_LOCK(ifq); \
+ IFQ_DEQUEUE_NOLOCK(ifq, m); \
+ IF_UNLOCK(ifq); \
+} while (0)
+
+#define IFQ_POLL_NOLOCK(ifq, m) \
+do { \
+ if (TBR_IS_ENABLED(ifq)) \
+ (m) = tbr_dequeue_ptr(ifq, ALTDQ_POLL); \
+ else if (ALTQ_IS_ENABLED(ifq)) \
+ ALTQ_POLL(ifq, m); \
+ else \
+ _IF_POLL(ifq, m); \
+} while (0)
+
+#define IFQ_POLL(ifq, m) \
+do { \
+ IF_LOCK(ifq); \
+ IFQ_POLL_NOLOCK(ifq, m); \
+ IF_UNLOCK(ifq); \
+} while (0)
+
+#define IFQ_PURGE_NOLOCK(ifq) \
+do { \
+ if (ALTQ_IS_ENABLED(ifq)) { \
+ ALTQ_PURGE(ifq); \
+ } else \
+ _IF_DRAIN(ifq); \
+} while (0)
+
+#define IFQ_PURGE(ifq) \
+do { \
+ IF_LOCK(ifq); \
+ IFQ_PURGE_NOLOCK(ifq); \
+ IF_UNLOCK(ifq); \
+} while (0)
+
+#define IFQ_SET_READY(ifq) \
+ do { ((ifq)->altq_flags |= ALTQF_READY); } while (0)
+
+#define IFQ_LOCK(ifq) IF_LOCK(ifq)
+#define IFQ_UNLOCK(ifq) IF_UNLOCK(ifq)
+#define IFQ_LOCK_ASSERT(ifq) IF_LOCK_ASSERT(ifq)
+#define IFQ_IS_EMPTY(ifq) ((ifq)->ifq_len == 0)
+#define IFQ_INC_LEN(ifq) ((ifq)->ifq_len++)
+#define IFQ_DEC_LEN(ifq) (--(ifq)->ifq_len)
+#define IFQ_SET_MAXLEN(ifq, len) ((ifq)->ifq_maxlen = (len))
+
+/*
+ * The IFF_DRV_OACTIVE test should really occur in the device driver, not in
+ * the handoff logic, as that flag is locked by the device driver.
+ */
+#define IFQ_HANDOFF_ADJ(ifp, m, adj, err) \
+do { \
+ int len; \
+ short mflags; \
+ \
+ len = (m)->m_pkthdr.len; \
+ mflags = (m)->m_flags; \
+ IFQ_ENQUEUE(&(ifp)->if_snd, m, err); \
+ if ((err) == 0) { \
+ if_inc_counter((ifp), IFCOUNTER_OBYTES, len + (adj)); \
+ if (mflags & M_MCAST) \
+ if_inc_counter((ifp), IFCOUNTER_OMCASTS, 1); \
+ if (((ifp)->if_drv_flags & IFF_DRV_OACTIVE) == 0) \
+ if_start(ifp); \
+ } else \
+ if_inc_counter((ifp), IFCOUNTER_OQDROPS, 1); \
+} while (0)
+
+#define IFQ_HANDOFF(ifp, m, err) \
+ IFQ_HANDOFF_ADJ(ifp, m, 0, err)
+
+#define IFQ_DRV_DEQUEUE(ifq, m) \
+do { \
+ (m) = (ifq)->ifq_drv_head; \
+ if (m) { \
+ if (((ifq)->ifq_drv_head = (m)->m_nextpkt) == NULL) \
+ (ifq)->ifq_drv_tail = NULL; \
+ (m)->m_nextpkt = NULL; \
+ (ifq)->ifq_drv_len--; \
+ } else { \
+ IFQ_LOCK(ifq); \
+ IFQ_DEQUEUE_NOLOCK(ifq, m); \
+ while ((ifq)->ifq_drv_len < (ifq)->ifq_drv_maxlen) { \
+ struct mbuf *m0; \
+ IFQ_DEQUEUE_NOLOCK(ifq, m0); \
+ if (m0 == NULL) \
+ break; \
+ m0->m_nextpkt = NULL; \
+ if ((ifq)->ifq_drv_tail == NULL) \
+ (ifq)->ifq_drv_head = m0; \
+ else \
+ (ifq)->ifq_drv_tail->m_nextpkt = m0; \
+ (ifq)->ifq_drv_tail = m0; \
+ (ifq)->ifq_drv_len++; \
+ } \
+ IFQ_UNLOCK(ifq); \
+ } \
+} while (0)
+
+#define IFQ_DRV_PREPEND(ifq, m) \
+do { \
+ (m)->m_nextpkt = (ifq)->ifq_drv_head; \
+ if ((ifq)->ifq_drv_tail == NULL) \
+ (ifq)->ifq_drv_tail = (m); \
+ (ifq)->ifq_drv_head = (m); \
+ (ifq)->ifq_drv_len++; \
+} while (0)
+
+#define IFQ_DRV_IS_EMPTY(ifq) \
+ (((ifq)->ifq_drv_len == 0) && ((ifq)->ifq_len == 0))
+
+#define IFQ_DRV_PURGE(ifq) \
+do { \
+ struct mbuf *m, *n = (ifq)->ifq_drv_head; \
+ while((m = n) != NULL) { \
+ n = m->m_nextpkt; \
+ m_freem(m); \
+ } \
+ (ifq)->ifq_drv_head = (ifq)->ifq_drv_tail = NULL; \
+ (ifq)->ifq_drv_len = 0; \
+ IFQ_PURGE(ifq); \
+} while (0)
+
+static __inline int
+drbr_enqueue(struct ifnet *ifp, struct buf_ring *br, struct mbuf *m)
+{
+ int error = 0;
+
+#ifdef ALTQ
+ if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ IFQ_ENQUEUE(&ifp->if_snd, m, error);
+ if (error)
+ if_inc_counter((ifp), IFCOUNTER_OQDROPS, 1);
+ return (error);
+ }
+#endif
+ error = buf_ring_enqueue(br, m);
+ if (error)
+ m_freem(m);
+
+ return (error);
+}
+
+static __inline void
+drbr_putback(struct ifnet *ifp, struct buf_ring *br, struct mbuf *new)
+{
+ /*
+ * The top of the list needs to be swapped
+ * for this one.
+ */
+#ifdef ALTQ
+ if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ /*
+ * Peek in altq case dequeued it
+ * so put it back.
+ */
+ IFQ_DRV_PREPEND(&ifp->if_snd, new);
+ return;
+ }
+#endif
+ buf_ring_putback_sc(br, new);
+}
+
+static __inline struct mbuf *
+drbr_peek(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+ struct mbuf *m;
+ if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ /*
+ * Pull it off like a dequeue
+ * since drbr_advance() does nothing
+ * for altq and drbr_putback() will
+ * use the old prepend function.
+ */
+ IFQ_DEQUEUE(&ifp->if_snd, m);
+ return (m);
+ }
+#endif
+ return(buf_ring_peek_clear_sc(br));
+}
+
+static __inline void
+drbr_flush(struct ifnet *ifp, struct buf_ring *br)
+{
+ struct mbuf *m;
+
+#ifdef ALTQ
+ if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd))
+ IFQ_PURGE(&ifp->if_snd);
+#endif
+ while ((m = buf_ring_dequeue_sc(br)) != NULL)
+ m_freem(m);
+}
+
+static __inline void
+drbr_free(struct buf_ring *br, struct malloc_type *type)
+{
+
+ drbr_flush(NULL, br);
+ buf_ring_free(br, type);
+}
+
+static __inline struct mbuf *
+drbr_dequeue(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+ struct mbuf *m;
+
+ if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ IFQ_DEQUEUE(&ifp->if_snd, m);
+ return (m);
+ }
+#endif
+ return (buf_ring_dequeue_sc(br));
+}
+
+static __inline void
+drbr_advance(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+ /* Nothing to do here since peek dequeues in altq case */
+ if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd))
+ return;
+#endif
+ return (buf_ring_advance_sc(br));
+}
+
+
+static __inline struct mbuf *
+drbr_dequeue_cond(struct ifnet *ifp, struct buf_ring *br,
+ int (*func) (struct mbuf *, void *), void *arg)
+{
+ struct mbuf *m;
+#ifdef ALTQ
+ if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ IFQ_LOCK(&ifp->if_snd);
+ IFQ_POLL_NOLOCK(&ifp->if_snd, m);
+ if (m != NULL && func(m, arg) == 0) {
+ IFQ_UNLOCK(&ifp->if_snd);
+ return (NULL);
+ }
+ IFQ_DEQUEUE_NOLOCK(&ifp->if_snd, m);
+ IFQ_UNLOCK(&ifp->if_snd);
+ return (m);
+ }
+#endif
+ m = buf_ring_peek(br);
+ if (m == NULL || func(m, arg) == 0)
+ return (NULL);
+
+ return (buf_ring_dequeue_sc(br));
+}
+
+static __inline int
+drbr_empty(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+ if (ALTQ_IS_ENABLED(&ifp->if_snd))
+ return (IFQ_IS_EMPTY(&ifp->if_snd));
+#endif
+ return (buf_ring_empty(br));
+}
+
+static __inline int
+drbr_needs_enqueue(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+ if (ALTQ_IS_ENABLED(&ifp->if_snd))
+ return (1);
+#endif
+ return (!buf_ring_empty(br));
+}
+
+static __inline int
+drbr_inuse(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+ if (ALTQ_IS_ENABLED(&ifp->if_snd))
+ return (ifp->if_snd.ifq_len);
+#endif
+ return (buf_ring_count(br));
+}
+
+extern int ifqmaxlen;
+
+void if_qflush(struct ifnet *);
+void ifq_init(struct ifaltq *, struct ifnet *ifp);
+void ifq_delete(struct ifaltq *);
+
+#endif /* _KERNEL */
+#endif /* !_NET_IFQ_H_ */
diff --git a/freebsd/sys/net/iso88025.h b/freebsd/sys/net/iso88025.h
index 6edd2e0b..11bd6ec4 100644
--- a/freebsd/sys/net/iso88025.h
+++ b/freebsd/sys/net/iso88025.h
@@ -162,11 +162,13 @@ struct iso88025_addr {
#define ISO88025_BPF_UNSUPPORTED 0
#define ISO88025_BPF_SUPPORTED 1
+#ifdef _KERNEL
void iso88025_ifattach (struct ifnet *, const u_int8_t *, int);
void iso88025_ifdetach (struct ifnet *, int);
int iso88025_ioctl (struct ifnet *, u_long, caddr_t );
-int iso88025_output (struct ifnet *, struct mbuf *, struct sockaddr *,
- struct route *);
+int iso88025_output (struct ifnet *, struct mbuf *,
+ const struct sockaddr *, struct route *);
void iso88025_input (struct ifnet *, struct mbuf *);
+#endif /* _KERNEL */
-#endif
+#endif /* !_NET_ISO88025_H_ */
diff --git a/freebsd/sys/net/netisr.c b/freebsd/sys/net/netisr.c
index f43cffa1..f14b2e95 100644
--- a/freebsd/sys/net/netisr.c
+++ b/freebsd/sys/net/netisr.c
@@ -72,6 +72,7 @@ __FBSDID("$FreeBSD$");
#include <sys/bus.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
+#include <sys/malloc.h>
#include <sys/interrupt.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/mbuf.h>
@@ -131,7 +132,7 @@ static SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr");
/*-
* Three global direct dispatch policies are supported:
*
- * NETISR_DISPATCH_QUEUED: All work is deferred for a netisr, regardless of
+ * NETISR_DISPATCH_DEFERRED: All work is deferred for a netisr, regardless of
* context (may be overriden by protocols).
*
* NETISR_DISPATCH_HYBRID: If the executing context allows direct dispatch,
@@ -151,37 +152,25 @@ static SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr");
#define NETISR_DISPATCH_POLICY_MAXSTR 20 /* Used for temporary buffers. */
static u_int netisr_dispatch_policy = NETISR_DISPATCH_POLICY_DEFAULT;
static int sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS);
-SYSCTL_PROC(_net_isr, OID_AUTO, dispatch, CTLTYPE_STRING | CTLFLAG_RW |
- CTLFLAG_TUN, 0, 0, sysctl_netisr_dispatch_policy, "A",
+SYSCTL_PROC(_net_isr, OID_AUTO, dispatch, CTLTYPE_STRING | CTLFLAG_RWTUN,
+ 0, 0, sysctl_netisr_dispatch_policy, "A",
"netisr dispatch policy");
/*
- * These sysctls were used in previous versions to control and export
- * dispatch policy state. Now, we provide read-only export via them so that
- * older netstat binaries work. At some point they can be garbage collected.
- */
-static int netisr_direct_force;
-SYSCTL_INT(_net_isr, OID_AUTO, direct_force, CTLFLAG_RD,
- &netisr_direct_force, 0, "compat: force direct dispatch");
-
-static int netisr_direct;
-SYSCTL_INT(_net_isr, OID_AUTO, direct, CTLFLAG_RD, &netisr_direct, 0,
- "compat: enable direct dispatch");
-
-/*
* Allow the administrator to limit the number of threads (CPUs) to use for
* netisr. We don't check netisr_maxthreads before creating the thread for
- * CPU 0, so in practice we ignore values <= 1. This must be set at boot.
- * We will create at most one thread per CPU.
+ * CPU 0. This must be set at boot. We will create at most one thread per CPU.
+ * By default we initialize this to 1 which would assign just 1 cpu (cpu0) and
+ * therefore only 1 workstream. If set to -1, netisr would use all cpus
+ * (mp_ncpus) and therefore would have those many workstreams. One workstream
+ * per thread (CPU).
*/
-static int netisr_maxthreads = -1; /* Max number of threads. */
-TUNABLE_INT("net.isr.maxthreads", &netisr_maxthreads);
+static int netisr_maxthreads = 1; /* Max number of threads. */
SYSCTL_INT(_net_isr, OID_AUTO, maxthreads, CTLFLAG_RDTUN,
&netisr_maxthreads, 0,
"Use at most this many CPUs for netisr processing");
static int netisr_bindthreads = 0; /* Bind threads to CPUs. */
-TUNABLE_INT("net.isr.bindthreads", &netisr_bindthreads);
SYSCTL_INT(_net_isr, OID_AUTO, bindthreads, CTLFLAG_RDTUN,
&netisr_bindthreads, 0, "Bind netisr threads to CPUs.");
@@ -192,7 +181,6 @@ SYSCTL_INT(_net_isr, OID_AUTO, bindthreads, CTLFLAG_RDTUN,
*/
#define NETISR_DEFAULT_MAXQLIMIT 10240
static u_int netisr_maxqlimit = NETISR_DEFAULT_MAXQLIMIT;
-TUNABLE_INT("net.isr.maxqlimit", &netisr_maxqlimit);
SYSCTL_UINT(_net_isr, OID_AUTO, maxqlimit, CTLFLAG_RDTUN,
&netisr_maxqlimit, 0,
"Maximum netisr per-protocol, per-CPU queue depth.");
@@ -204,7 +192,6 @@ SYSCTL_UINT(_net_isr, OID_AUTO, maxqlimit, CTLFLAG_RDTUN,
*/
#define NETISR_DEFAULT_DEFAULTQLIMIT 256
static u_int netisr_defaultqlimit = NETISR_DEFAULT_DEFAULTQLIMIT;
-TUNABLE_INT("net.isr.defaultqlimit", &netisr_defaultqlimit);
SYSCTL_UINT(_net_isr, OID_AUTO, defaultqlimit, CTLFLAG_RDTUN,
&netisr_defaultqlimit, 0,
"Default netisr per-protocol, per-CPU queue limit if not set by protocol");
@@ -225,6 +212,23 @@ SYSCTL_UINT(_net_isr, OID_AUTO, maxprot, CTLFLAG_RD,
*/
static struct netisr_proto netisr_proto[NETISR_MAXPROT];
+#ifdef VIMAGE
+/*
+ * The netisr_enable array describes a per-VNET flag for registered
+ * protocols on whether this netisr is active in this VNET or not.
+ * netisr_register() will automatically enable the netisr for the
+ * default VNET and all currently active instances.
+ * netisr_unregister() will disable all active VNETs, including vnet0.
+ * Individual network stack instances can be enabled/disabled by the
+ * netisr_(un)register _vnet() functions.
+ * With this we keep the one netisr_proto per protocol but add a
+ * mechanism to stop netisr processing for vnet teardown.
+ * Apart from that we expect a VNET to always be enabled.
+ */
+static VNET_DEFINE(u_int, netisr_enable[NETISR_MAXPROT]);
+#define V_netisr_enable VNET(netisr_enable)
+#endif
+
#ifndef __rtems__
/*
* Per-CPU workstream data. See netisr_internal.h for more details.
@@ -275,10 +279,7 @@ u_int
netisr_get_cpuid(u_int cpunumber)
{
- KASSERT(cpunumber < nws_count, ("%s: %u > %u", __func__, cpunumber,
- nws_count));
-
- return (nws_array[cpunumber]);
+ return (nws_array[cpunumber % nws_count]);
}
/*
@@ -308,8 +309,6 @@ static const struct netisr_dispatch_table_entry netisr_dispatch_table[] = {
{ NETISR_DISPATCH_HYBRID, "hybrid" },
{ NETISR_DISPATCH_DIRECT, "direct" },
};
-static const u_int netisr_dispatch_table_len =
- (sizeof(netisr_dispatch_table) / sizeof(netisr_dispatch_table[0]));
static void
netisr_dispatch_policy_to_str(u_int dispatch_policy, char *buffer,
@@ -320,7 +319,7 @@ netisr_dispatch_policy_to_str(u_int dispatch_policy, char *buffer,
u_int i;
str = "unknown";
- for (i = 0; i < netisr_dispatch_table_len; i++) {
+ for (i = 0; i < nitems(netisr_dispatch_table); i++) {
ndtep = &netisr_dispatch_table[i];
if (ndtep->ndte_policy == dispatch_policy) {
str = ndtep->ndte_policy_str;
@@ -336,7 +335,7 @@ netisr_dispatch_policy_from_str(const char *str, u_int *dispatch_policyp)
const struct netisr_dispatch_table_entry *ndtep;
u_int i;
- for (i = 0; i < netisr_dispatch_table_len; i++) {
+ for (i = 0; i < nitems(netisr_dispatch_table); i++) {
ndtep = &netisr_dispatch_table[i];
if (strcmp(ndtep->ndte_policy_str, str) == 0) {
*dispatch_policyp = ndtep->ndte_policy;
@@ -346,32 +345,6 @@ netisr_dispatch_policy_from_str(const char *str, u_int *dispatch_policyp)
return (EINVAL);
}
-static void
-netisr_dispatch_policy_compat(void)
-{
-
- switch (netisr_dispatch_policy) {
- case NETISR_DISPATCH_DEFERRED:
- netisr_direct_force = 0;
- netisr_direct = 0;
- break;
-
- case NETISR_DISPATCH_HYBRID:
- netisr_direct_force = 0;
- netisr_direct = 1;
- break;
-
- case NETISR_DISPATCH_DIRECT:
- netisr_direct_force = 1;
- netisr_direct = 1;
- break;
-
- default:
- panic("%s: unknown policy %u", __func__,
- netisr_dispatch_policy);
- }
-}
-
static int
sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS)
{
@@ -387,10 +360,8 @@ sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS)
&dispatch_policy);
if (error == 0 && dispatch_policy == NETISR_DISPATCH_DEFAULT)
error = EINVAL;
- if (error == 0) {
+ if (error == 0)
netisr_dispatch_policy = dispatch_policy;
- netisr_dispatch_policy_compat();
- }
}
return (error);
}
@@ -403,6 +374,7 @@ sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS)
void
netisr_register(const struct netisr_handler *nhp)
{
+ VNET_ITERATOR_DECL(vnet_iter);
struct netisr_work *npwp;
const char *name;
u_int i, proto;
@@ -475,6 +447,22 @@ netisr_register(const struct netisr_handler *nhp)
bzero(npwp, sizeof(*npwp));
npwp->nw_qlimit = netisr_proto[proto].np_qlimit;
}
+
+#ifdef VIMAGE
+ /*
+ * Test that we are in vnet0 and have a curvnet set.
+ */
+ KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__));
+ KASSERT(IS_DEFAULT_VNET(curvnet), ("%s: curvnet %p is not vnet0 %p",
+ __func__, curvnet, vnet0));
+ VNET_LIST_RLOCK_NOSLEEP();
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET(vnet_iter);
+ V_netisr_enable[proto] = 1;
+ CURVNET_RESTORE();
+ }
+ VNET_LIST_RUNLOCK_NOSLEEP();
+#endif
NETISR_WUNLOCK();
}
@@ -651,6 +639,7 @@ netisr_drain_proto(struct netisr_work *npwp)
void
netisr_unregister(const struct netisr_handler *nhp)
{
+ VNET_ITERATOR_DECL(vnet_iter);
struct netisr_work *npwp;
#ifdef INVARIANTS
const char *name;
@@ -669,6 +658,16 @@ netisr_unregister(const struct netisr_handler *nhp)
("%s(%u): protocol not registered for %s", __func__, proto,
name));
+#ifdef VIMAGE
+ VNET_LIST_RLOCK_NOSLEEP();
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET(vnet_iter);
+ V_netisr_enable[proto] = 0;
+ CURVNET_RESTORE();
+ }
+ VNET_LIST_RUNLOCK_NOSLEEP();
+#endif
+
netisr_proto[proto].np_name = NULL;
netisr_proto[proto].np_handler = NULL;
netisr_proto[proto].np_m2flow = NULL;
@@ -687,6 +686,97 @@ netisr_unregister(const struct netisr_handler *nhp)
NETISR_WUNLOCK();
}
+#ifdef VIMAGE
+void
+netisr_register_vnet(const struct netisr_handler *nhp)
+{
+ u_int proto;
+
+ proto = nhp->nh_proto;
+
+ KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__));
+ KASSERT(proto < NETISR_MAXPROT,
+ ("%s(%u): protocol too big for %s", __func__, proto, nhp->nh_name));
+ NETISR_WLOCK();
+ KASSERT(netisr_proto[proto].np_handler != NULL,
+ ("%s(%u): protocol not registered for %s", __func__, proto,
+ nhp->nh_name));
+
+ V_netisr_enable[proto] = 1;
+ NETISR_WUNLOCK();
+}
+
+static void
+netisr_drain_proto_vnet(struct vnet *vnet, u_int proto)
+{
+ struct netisr_workstream *nwsp;
+ struct netisr_work *npwp;
+ struct mbuf *m, *mp, *n, *ne;
+ u_int i;
+
+ KASSERT(vnet != NULL, ("%s: vnet is NULL", __func__));
+ NETISR_LOCK_ASSERT();
+
+ CPU_FOREACH(i) {
+ nwsp = DPCPU_ID_PTR(i, nws);
+ if (nwsp->nws_intr_event == NULL)
+ continue;
+ npwp = &nwsp->nws_work[proto];
+ NWS_LOCK(nwsp);
+
+ /*
+ * Rather than dissecting and removing mbufs from the middle
+ * of the chain, we build a new chain if the packet stays and
+ * update the head and tail pointers at the end. All packets
+ * matching the given vnet are freed.
+ */
+ m = npwp->nw_head;
+ n = ne = NULL;
+ while (m != NULL) {
+ mp = m;
+ m = m->m_nextpkt;
+ mp->m_nextpkt = NULL;
+ if (mp->m_pkthdr.rcvif->if_vnet != vnet) {
+ if (n == NULL) {
+ n = ne = mp;
+ } else {
+ ne->m_nextpkt = mp;
+ ne = mp;
+ }
+ continue;
+ }
+ /* This is a packet in the selected vnet. Free it. */
+ npwp->nw_len--;
+ m_freem(mp);
+ }
+ npwp->nw_head = n;
+ npwp->nw_tail = ne;
+ NWS_UNLOCK(nwsp);
+ }
+}
+
+void
+netisr_unregister_vnet(const struct netisr_handler *nhp)
+{
+ u_int proto;
+
+ proto = nhp->nh_proto;
+
+ KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__));
+ KASSERT(proto < NETISR_MAXPROT,
+ ("%s(%u): protocol too big for %s", __func__, proto, nhp->nh_name));
+ NETISR_WLOCK();
+ KASSERT(netisr_proto[proto].np_handler != NULL,
+ ("%s(%u): protocol not registered for %s", __func__, proto,
+ nhp->nh_name));
+
+ V_netisr_enable[proto] = 0;
+
+ netisr_drain_proto_vnet(curvnet, proto);
+ NETISR_WUNLOCK();
+}
+#endif
+
/*
* Compose the global and per-protocol policies on dispatch, and return the
* dispatch policy to use.
@@ -746,22 +836,25 @@ netisr_select_cpuid(struct netisr_proto *npp, u_int dispatch_policy,
* dispatch. In the queued case, fall back on the SOURCE
* policy.
*/
- if (*cpuidp != NETISR_CPUID_NONE)
+ if (*cpuidp != NETISR_CPUID_NONE) {
+ *cpuidp = netisr_get_cpuid(*cpuidp);
return (m);
+ }
if (dispatch_policy == NETISR_DISPATCH_HYBRID) {
- *cpuidp = curcpu;
+ *cpuidp = netisr_get_cpuid(curcpu);
return (m);
}
policy = NETISR_POLICY_SOURCE;
}
if (policy == NETISR_POLICY_FLOW) {
- if (!(m->m_flags & M_FLOWID) && npp->np_m2flow != NULL) {
+ if (M_HASHTYPE_GET(m) == M_HASHTYPE_NONE &&
+ npp->np_m2flow != NULL) {
m = npp->np_m2flow(m, source);
if (m == NULL)
return (NULL);
}
- if (m->m_flags & M_FLOWID) {
+ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
*cpuidp =
netisr_default_flow2cpu(m->m_pkthdr.flowid);
return (m);
@@ -984,6 +1077,13 @@ netisr_queue_src(u_int proto, uintptr_t source, struct mbuf *m)
KASSERT(netisr_proto[proto].np_handler != NULL,
("%s: invalid proto %u", __func__, proto));
+#ifdef VIMAGE
+ if (V_netisr_enable[proto] == 0) {
+ m_freem(m);
+ return (ENOPROTOOPT);
+ }
+#endif
+
m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_DEFERRED,
source, m, &cpuid);
if (m != NULL) {
@@ -1030,6 +1130,13 @@ netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m)
KASSERT(npp->np_handler != NULL, ("%s: invalid proto %u", __func__,
proto));
+#ifdef VIMAGE
+ if (V_netisr_enable[proto] == 0) {
+ m_freem(m);
+ return (ENOPROTOOPT);
+ }
+#endif
+
dispatch_policy = netisr_get_dispatch(npp);
if (dispatch_policy == NETISR_DISPATCH_DEFERRED)
return (netisr_queue_src(proto, source, m));
@@ -1215,15 +1322,15 @@ netisr_start_swi(u_int cpuid, struct pcpu *pc)
static void
netisr_init(void *arg)
{
- char tmp[NETISR_DISPATCH_POLICY_MAXSTR];
- u_int dispatch_policy;
- int error;
-
- KASSERT(curcpu == 0, ("%s: not on CPU 0", __func__));
+#ifdef EARLY_AP_STARTUP
+ struct pcpu *pc;
+#endif
NETISR_LOCK_INIT();
- if (netisr_maxthreads < 1)
- netisr_maxthreads = 1;
+ if (netisr_maxthreads == 0 || netisr_maxthreads < -1 )
+ netisr_maxthreads = 1; /* default behavior */
+ else if (netisr_maxthreads == -1)
+ netisr_maxthreads = mp_ncpus; /* use max cpus */
if (netisr_maxthreads > mp_ncpus) {
printf("netisr_init: forcing maxthreads from %d to %d\n",
netisr_maxthreads, mp_ncpus);
@@ -1248,31 +1355,24 @@ netisr_init(void *arg)
}
#endif
-#ifndef __rtems__
- if (TUNABLE_STR_FETCH("net.isr.dispatch", tmp, sizeof(tmp))) {
- error = netisr_dispatch_policy_from_str(tmp,
- &dispatch_policy);
- if (error == 0 && dispatch_policy == NETISR_DISPATCH_DEFAULT)
- error = EINVAL;
- if (error == 0) {
- netisr_dispatch_policy = dispatch_policy;
- netisr_dispatch_policy_compat();
- } else
- printf(
- "%s: invalid dispatch policy %s, using default\n",
- __func__, tmp);
+#ifdef EARLY_AP_STARTUP
+ STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
+ if (nws_count >= netisr_maxthreads)
+ break;
+ netisr_start_swi(pc->pc_cpuid, pc);
}
-#endif /* __rtems__ */
-
+#else
#ifndef __rtems__
netisr_start_swi(curcpu, pcpu_find(curcpu));
#else /* __rtems__ */
netisr_start_swi(0, NULL);
#endif /* __rtems__ */
+#endif
}
SYSINIT(netisr_init, SI_SUB_SOFTINTR, SI_ORDER_FIRST, netisr_init, NULL);
#ifndef __rtems__
+#ifndef EARLY_AP_STARTUP
/*
* Start worker threads for additional CPUs. No attempt to gracefully handle
* work reassignment, we don't yet support dynamic reconfiguration.
@@ -1285,9 +1385,6 @@ netisr_start(void *arg)
STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
if (nws_count >= netisr_maxthreads)
break;
- /* XXXRW: Is skipping absent CPUs still required here? */
- if (CPU_ABSENT(pc->pc_cpuid))
- continue;
/* Worker will already be present for boot CPU. */
if (pc->pc_netisr != NULL)
continue;
@@ -1295,6 +1392,7 @@ netisr_start(void *arg)
}
}
SYSINIT(netisr_start, SI_SUB_SMP, SI_ORDER_MIDDLE, netisr_start, NULL);
+#endif
#endif /* __rtems__ */
/*
diff --git a/freebsd/sys/net/netisr.h b/freebsd/sys/net/netisr.h
index 83bf9ce5..63764a74 100644
--- a/freebsd/sys/net/netisr.h
+++ b/freebsd/sys/net/netisr.h
@@ -52,15 +52,13 @@
#define NETISR_IP 1
#define NETISR_IGMP 2 /* IGMPv3 output queue */
#define NETISR_ROUTE 3 /* routing socket */
-#define NETISR_AARP 4 /* Appletalk ARP */
-#define NETISR_ATALK2 5 /* Appletalk phase 2 */
-#define NETISR_ATALK1 6 /* Appletalk phase 1 */
-#define NETISR_ARP 7 /* same as AF_LINK */
-#define NETISR_IPX 8 /* same as AF_IPX */
-#define NETISR_ETHER 9 /* ethernet input */
-#define NETISR_IPV6 10
-#define NETISR_NATM 11
-#define NETISR_EPAIR 12 /* if_epair(4) */
+#define NETISR_ARP 4 /* same as AF_LINK */
+#define NETISR_ETHER 5 /* ethernet input */
+#define NETISR_IPV6 6
+#define NETISR_NATM 7
+#define NETISR_EPAIR 8 /* if_epair(4) */
+#define NETISR_IP_DIRECT 9 /* direct-dispatch IPv4 */
+#define NETISR_IPV6_DIRECT 10 /* direct-dispatch IPv6 */
/*
* Protocol ordering and affinity policy constants. See the detailed
@@ -212,6 +210,10 @@ void netisr_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp);
void netisr_register(const struct netisr_handler *nhp);
int netisr_setqlimit(const struct netisr_handler *nhp, u_int qlimit);
void netisr_unregister(const struct netisr_handler *nhp);
+#ifdef VIMAGE
+void netisr_register_vnet(const struct netisr_handler *nhp);
+void netisr_unregister_vnet(const struct netisr_handler *nhp);
+#endif
/*
* Process a packet destined for a protocol, and attempt direct dispatch.
diff --git a/freebsd/sys/net/pfil.c b/freebsd/sys/net/pfil.c
index 123d03c4..7fcecc88 100644
--- a/freebsd/sys/net/pfil.c
+++ b/freebsd/sys/net/pfil.c
@@ -47,6 +47,7 @@
#include <sys/queue.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/pfil.h>
static struct mtx pfil_global_lock;
@@ -54,18 +55,18 @@ static struct mtx pfil_global_lock;
MTX_SYSINIT(pfil_heads_lock, &pfil_global_lock, "pfil_head_list lock",
MTX_DEF);
-static int pfil_list_add(pfil_list_t *, struct packet_filter_hook *, int);
-
-static int pfil_list_remove(pfil_list_t *,
- int (*)(void *, struct mbuf **, struct ifnet *, int, struct inpcb *),
- void *);
+static struct packet_filter_hook *pfil_chain_get(int, struct pfil_head *);
+static int pfil_chain_add(pfil_chain_t *, struct packet_filter_hook *, int);
+static int pfil_chain_remove(pfil_chain_t *, pfil_func_t, void *);
LIST_HEAD(pfilheadhead, pfil_head);
VNET_DEFINE(struct pfilheadhead, pfil_head_list);
#define V_pfil_head_list VNET(pfil_head_list)
+VNET_DEFINE(struct rmlock, pfil_lock);
+#define V_pfil_lock VNET(pfil_lock)
/*
- * pfil_run_hooks() runs the specified packet filter hooks.
+ * pfil_run_hooks() runs the specified packet filter hook chain.
*/
int
pfil_run_hooks(struct pfil_head *ph, struct mbuf **mp, struct ifnet *ifp,
@@ -78,8 +79,8 @@ pfil_run_hooks(struct pfil_head *ph, struct mbuf **mp, struct ifnet *ifp,
PFIL_RLOCK(ph, &rmpt);
KASSERT(ph->ph_nhooks >= 0, ("Pfil hook count dropped < 0"));
- for (pfh = pfil_hook_get(dir, ph); pfh != NULL;
- pfh = TAILQ_NEXT(pfh, pfil_link)) {
+ for (pfh = pfil_chain_get(dir, ph); pfh != NULL;
+ pfh = TAILQ_NEXT(pfh, pfil_chain)) {
if (pfh->pfil_func != NULL) {
rv = (*pfh->pfil_func)(pfh->pfil_arg, &m, ifp, dir,
inp);
@@ -92,6 +93,80 @@ pfil_run_hooks(struct pfil_head *ph, struct mbuf **mp, struct ifnet *ifp,
return (rv);
}
+static struct packet_filter_hook *
+pfil_chain_get(int dir, struct pfil_head *ph)
+{
+
+ if (dir == PFIL_IN)
+ return (TAILQ_FIRST(&ph->ph_in));
+ else if (dir == PFIL_OUT)
+ return (TAILQ_FIRST(&ph->ph_out));
+ else
+ return (NULL);
+}
+
+/*
+ * pfil_try_rlock() acquires rm reader lock for specified head
+ * if this is immediately possible.
+ */
+int
+pfil_try_rlock(struct pfil_head *ph, struct rm_priotracker *tracker)
+{
+
+ return (PFIL_TRY_RLOCK(ph, tracker));
+}
+
+/*
+ * pfil_rlock() acquires rm reader lock for specified head.
+ */
+void
+pfil_rlock(struct pfil_head *ph, struct rm_priotracker *tracker)
+{
+
+ PFIL_RLOCK(ph, tracker);
+}
+
+/*
+ * pfil_runlock() releases reader lock for specified head.
+ */
+void
+pfil_runlock(struct pfil_head *ph, struct rm_priotracker *tracker)
+{
+
+ PFIL_RUNLOCK(ph, tracker);
+}
+
+/*
+ * pfil_wlock() acquires writer lock for specified head.
+ */
+void
+pfil_wlock(struct pfil_head *ph)
+{
+
+ PFIL_WLOCK(ph);
+}
+
+/*
+ * pfil_wunlock() releases writer lock for specified head.
+ */
+void
+pfil_wunlock(struct pfil_head *ph)
+{
+
+ PFIL_WUNLOCK(ph);
+}
+
+/*
+ * pfil_wowned() returns a non-zero value if the current thread owns
+ * an exclusive lock.
+ */
+int
+pfil_wowned(struct pfil_head *ph)
+{
+
+ return (PFIL_WOWNED(ph));
+}
+
/*
* pfil_head_register() registers a pfil_head with the packet filter hook
* mechanism.
@@ -101,11 +176,11 @@ pfil_head_register(struct pfil_head *ph)
{
struct pfil_head *lph;
- PFIL_LIST_LOCK();
+ PFIL_HEADLIST_LOCK();
LIST_FOREACH(lph, &V_pfil_head_list, ph_list) {
if (ph->ph_type == lph->ph_type &&
ph->ph_un.phu_val == lph->ph_un.phu_val) {
- PFIL_LIST_UNLOCK();
+ PFIL_HEADLIST_UNLOCK();
return (EEXIST);
}
}
@@ -114,7 +189,7 @@ pfil_head_register(struct pfil_head *ph)
TAILQ_INIT(&ph->ph_in);
TAILQ_INIT(&ph->ph_out);
LIST_INSERT_HEAD(&V_pfil_head_list, ph, ph_list);
- PFIL_LIST_UNLOCK();
+ PFIL_HEADLIST_UNLOCK();
return (0);
}
@@ -128,12 +203,12 @@ pfil_head_unregister(struct pfil_head *ph)
{
struct packet_filter_hook *pfh, *pfnext;
- PFIL_LIST_LOCK();
+ PFIL_HEADLIST_LOCK();
LIST_REMOVE(ph, ph_list);
- PFIL_LIST_UNLOCK();
- TAILQ_FOREACH_SAFE(pfh, &ph->ph_in, pfil_link, pfnext)
+ PFIL_HEADLIST_UNLOCK();
+ TAILQ_FOREACH_SAFE(pfh, &ph->ph_in, pfil_chain, pfnext)
free(pfh, M_IFADDR);
- TAILQ_FOREACH_SAFE(pfh, &ph->ph_out, pfil_link, pfnext)
+ TAILQ_FOREACH_SAFE(pfh, &ph->ph_out, pfil_chain, pfnext)
free(pfh, M_IFADDR);
PFIL_LOCK_DESTROY(ph);
return (0);
@@ -147,11 +222,11 @@ pfil_head_get(int type, u_long val)
{
struct pfil_head *ph;
- PFIL_LIST_LOCK();
+ PFIL_HEADLIST_LOCK();
LIST_FOREACH(ph, &V_pfil_head_list, ph_list)
if (ph->ph_type == type && ph->ph_un.phu_val == val)
break;
- PFIL_LIST_UNLOCK();
+ PFIL_HEADLIST_UNLOCK();
return (ph);
}
@@ -164,8 +239,7 @@ pfil_head_get(int type, u_long val)
* PFIL_WAITOK OK to call malloc with M_WAITOK.
*/
int
-pfil_add_hook(int (*func)(void *, struct mbuf **, struct ifnet *, int,
- struct inpcb *), void *arg, int flags, struct pfil_head *ph)
+pfil_add_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph)
{
struct packet_filter_hook *pfh1 = NULL;
struct packet_filter_hook *pfh2 = NULL;
@@ -191,7 +265,7 @@ pfil_add_hook(int (*func)(void *, struct mbuf **, struct ifnet *, int,
if (flags & PFIL_IN) {
pfh1->pfil_func = func;
pfh1->pfil_arg = arg;
- err = pfil_list_add(&ph->ph_in, pfh1, flags & ~PFIL_OUT);
+ err = pfil_chain_add(&ph->ph_in, pfh1, flags & ~PFIL_OUT);
if (err)
goto locked_error;
ph->ph_nhooks++;
@@ -199,10 +273,10 @@ pfil_add_hook(int (*func)(void *, struct mbuf **, struct ifnet *, int,
if (flags & PFIL_OUT) {
pfh2->pfil_func = func;
pfh2->pfil_arg = arg;
- err = pfil_list_add(&ph->ph_out, pfh2, flags & ~PFIL_IN);
+ err = pfil_chain_add(&ph->ph_out, pfh2, flags & ~PFIL_IN);
if (err) {
if (flags & PFIL_IN)
- pfil_list_remove(&ph->ph_in, func, arg);
+ pfil_chain_remove(&ph->ph_in, func, arg);
goto locked_error;
}
ph->ph_nhooks++;
@@ -221,22 +295,21 @@ error:
/*
* pfil_remove_hook removes a specific function from the packet filter hook
- * list.
+ * chain.
*/
int
-pfil_remove_hook(int (*func)(void *, struct mbuf **, struct ifnet *, int,
- struct inpcb *), void *arg, int flags, struct pfil_head *ph)
+pfil_remove_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph)
{
int err = 0;
PFIL_WLOCK(ph);
if (flags & PFIL_IN) {
- err = pfil_list_remove(&ph->ph_in, func, arg);
+ err = pfil_chain_remove(&ph->ph_in, func, arg);
if (err == 0)
ph->ph_nhooks--;
}
if ((err == 0) && (flags & PFIL_OUT)) {
- err = pfil_list_remove(&ph->ph_out, func, arg);
+ err = pfil_chain_remove(&ph->ph_out, func, arg);
if (err == 0)
ph->ph_nhooks--;
}
@@ -244,15 +317,18 @@ pfil_remove_hook(int (*func)(void *, struct mbuf **, struct ifnet *, int,
return (err);
}
+/*
+ * Internal: Add a new pfil hook into a hook chain.
+ */
static int
-pfil_list_add(pfil_list_t *list, struct packet_filter_hook *pfh1, int flags)
+pfil_chain_add(pfil_chain_t *chain, struct packet_filter_hook *pfh1, int flags)
{
struct packet_filter_hook *pfh;
/*
* First make sure the hook is not already there.
*/
- TAILQ_FOREACH(pfh, list, pfil_link)
+ TAILQ_FOREACH(pfh, chain, pfil_chain)
if (pfh->pfil_func == pfh1->pfil_func &&
pfh->pfil_arg == pfh1->pfil_arg)
return (EEXIST);
@@ -262,26 +338,23 @@ pfil_list_add(pfil_list_t *list, struct packet_filter_hook *pfh1, int flags)
* the same path is followed in or out of the kernel.
*/
if (flags & PFIL_IN)
- TAILQ_INSERT_HEAD(list, pfh1, pfil_link);
+ TAILQ_INSERT_HEAD(chain, pfh1, pfil_chain);
else
- TAILQ_INSERT_TAIL(list, pfh1, pfil_link);
+ TAILQ_INSERT_TAIL(chain, pfh1, pfil_chain);
return (0);
}
/*
- * pfil_list_remove is an internal function that takes a function off the
- * specified list.
+ * Internal: Remove a pfil hook from a hook chain.
*/
static int
-pfil_list_remove(pfil_list_t *list,
- int (*func)(void *, struct mbuf **, struct ifnet *, int, struct inpcb *),
- void *arg)
+pfil_chain_remove(pfil_chain_t *chain, pfil_func_t func, void *arg)
{
struct packet_filter_hook *pfh;
- TAILQ_FOREACH(pfh, list, pfil_link)
+ TAILQ_FOREACH(pfh, chain, pfil_chain)
if (pfh->pfil_func == func && pfh->pfil_arg == arg) {
- TAILQ_REMOVE(list, pfh, pfil_link);
+ TAILQ_REMOVE(chain, pfh, pfil_chain);
free(pfh, M_IFADDR);
return (0);
}
@@ -292,36 +365,34 @@ pfil_list_remove(pfil_list_t *list,
* Stuff that must be initialized for every instance (including the first of
* course).
*/
-static int
-vnet_pfil_init(const void *unused)
+static void
+vnet_pfil_init(const void *unused __unused)
{
LIST_INIT(&V_pfil_head_list);
- return (0);
+ PFIL_LOCK_INIT_REAL(&V_pfil_lock, "shared");
}
/*
* Called for the removal of each instance.
*/
-static int
-vnet_pfil_uninit(const void *unused)
+static void
+vnet_pfil_uninit(const void *unused __unused)
{
- /* XXX should panic if list is not empty */
- return (0);
+ KASSERT(LIST_EMPTY(&V_pfil_head_list),
+ ("%s: pfil_head_list %p not empty", __func__, &V_pfil_head_list));
+ PFIL_LOCK_DESTROY_REAL(&V_pfil_lock);
}
-/* Define startup order. */
-#define PFIL_SYSINIT_ORDER SI_SUB_PROTO_BEGIN
-#define PFIL_MODEVENT_ORDER (SI_ORDER_FIRST) /* On boot slot in here. */
-#define PFIL_VNET_ORDER (PFIL_MODEVENT_ORDER + 2) /* Later still. */
-
/*
* Starting up.
*
* VNET_SYSINIT is called for each existing vnet and each new vnet.
+ * Make sure the pfil bits are first before any possible subsystem which
+ * might piggyback on the SI_SUB_PROTO_PFIL.
*/
-VNET_SYSINIT(vnet_pfil_init, PFIL_SYSINIT_ORDER, PFIL_VNET_ORDER,
+VNET_SYSINIT(vnet_pfil_init, SI_SUB_PROTO_PFIL, SI_ORDER_FIRST,
vnet_pfil_init, NULL);
/*
@@ -329,5 +400,5 @@ VNET_SYSINIT(vnet_pfil_init, PFIL_SYSINIT_ORDER, PFIL_VNET_ORDER,
*
* VNET_SYSUNINIT is called for each exiting vnet as it exits.
*/
-VNET_SYSUNINIT(vnet_pfil_uninit, PFIL_SYSINIT_ORDER, PFIL_VNET_ORDER,
+VNET_SYSUNINIT(vnet_pfil_uninit, SI_SUB_PROTO_PFIL, SI_ORDER_FIRST,
vnet_pfil_uninit, NULL);
diff --git a/freebsd/sys/net/pfil.h b/freebsd/sys/net/pfil.h
index da06dedf..b78023b7 100644
--- a/freebsd/sys/net/pfil.h
+++ b/freebsd/sys/net/pfil.h
@@ -43,15 +43,18 @@ struct mbuf;
struct ifnet;
struct inpcb;
+typedef int (*pfil_func_t)(void *, struct mbuf **, struct ifnet *, int,
+ struct inpcb *);
+
/*
* The packet filter hooks are designed for anything to call them to
- * possibly intercept the packet.
+ * possibly intercept the packet. Multiple filter hooks are chained
+ * together and after each other in the specified order.
*/
struct packet_filter_hook {
- TAILQ_ENTRY(packet_filter_hook) pfil_link;
- int (*pfil_func)(void *, struct mbuf **, struct ifnet *, int,
- struct inpcb *);
- void *pfil_arg;
+ TAILQ_ENTRY(packet_filter_hook) pfil_chain;
+ pfil_func_t pfil_func;
+ void *pfil_arg;
};
#define PFIL_IN 0x00000001
@@ -59,63 +62,87 @@ struct packet_filter_hook {
#define PFIL_WAITOK 0x00000004
#define PFIL_ALL (PFIL_IN|PFIL_OUT)
-typedef TAILQ_HEAD(pfil_list, packet_filter_hook) pfil_list_t;
+typedef TAILQ_HEAD(pfil_chain, packet_filter_hook) pfil_chain_t;
#define PFIL_TYPE_AF 1 /* key is AF_* type */
#define PFIL_TYPE_IFNET 2 /* key is ifnet pointer */
+#define PFIL_FLAG_PRIVATE_LOCK 0x01 /* Personal lock instead of global */
+
+/*
+ * A pfil head is created by each protocol or packet intercept point.
+ * For packet is then run through the hook chain for inspection.
+ */
struct pfil_head {
- pfil_list_t ph_in;
- pfil_list_t ph_out;
- int ph_type;
- int ph_nhooks;
+ pfil_chain_t ph_in;
+ pfil_chain_t ph_out;
+ int ph_type;
+ int ph_nhooks;
#if defined( __linux__ ) || defined( _WIN32 )
- rwlock_t ph_mtx;
+ rwlock_t ph_mtx;
#else
- struct rmlock ph_lock;
+ struct rmlock *ph_plock; /* Pointer to the used lock */
+ struct rmlock ph_lock; /* Private lock storage */
+ int flags;
#endif
union {
- u_long phu_val;
- void *phu_ptr;
+ u_long phu_val;
+ void *phu_ptr;
} ph_un;
-#define ph_af ph_un.phu_val
-#define ph_ifnet ph_un.phu_ptr
+#define ph_af ph_un.phu_val
+#define ph_ifnet ph_un.phu_ptr
LIST_ENTRY(pfil_head) ph_list;
};
-int pfil_add_hook(int (*func)(void *, struct mbuf **, struct ifnet *,
- int, struct inpcb *), void *, int, struct pfil_head *);
-int pfil_remove_hook(int (*func)(void *, struct mbuf **, struct ifnet *,
- int, struct inpcb *), void *, int, struct pfil_head *);
+/* Public functions for pfil hook management by packet filters. */
+struct pfil_head *pfil_head_get(int, u_long);
+int pfil_add_hook(pfil_func_t, void *, int, struct pfil_head *);
+int pfil_remove_hook(pfil_func_t, void *, int, struct pfil_head *);
+#define PFIL_HOOKED(p) ((p)->ph_nhooks > 0)
+
+/* Public functions to run the packet inspection by protocols. */
int pfil_run_hooks(struct pfil_head *, struct mbuf **, struct ifnet *,
int, struct inpcb *inp);
+/* Public functions for pfil head management by protocols. */
int pfil_head_register(struct pfil_head *);
int pfil_head_unregister(struct pfil_head *);
-struct pfil_head *pfil_head_get(int, u_long);
-
-#define PFIL_HOOKED(p) ((p)->ph_nhooks > 0)
-#define PFIL_LOCK_INIT(p) \
- rm_init_flags(&(p)->ph_lock, "PFil hook read/write mutex", RM_RECURSE)
-#define PFIL_LOCK_DESTROY(p) rm_destroy(&(p)->ph_lock)
-#define PFIL_RLOCK(p, t) rm_rlock(&(p)->ph_lock, (t))
-#define PFIL_WLOCK(p) rm_wlock(&(p)->ph_lock)
-#define PFIL_RUNLOCK(p, t) rm_runlock(&(p)->ph_lock, (t))
-#define PFIL_WUNLOCK(p) rm_wunlock(&(p)->ph_lock)
-#define PFIL_LIST_LOCK() mtx_lock(&pfil_global_lock)
-#define PFIL_LIST_UNLOCK() mtx_unlock(&pfil_global_lock)
-
-static __inline struct packet_filter_hook *
-pfil_hook_get(int dir, struct pfil_head *ph)
-{
-
- if (dir == PFIL_IN)
- return (TAILQ_FIRST(&ph->ph_in));
- else if (dir == PFIL_OUT)
- return (TAILQ_FIRST(&ph->ph_out));
- else
- return (NULL);
-}
+/* Public pfil locking functions for self managed locks by packet filters. */
+struct rm_priotracker; /* Do not require including rmlock header */
+int pfil_try_rlock(struct pfil_head *, struct rm_priotracker *);
+void pfil_rlock(struct pfil_head *, struct rm_priotracker *);
+void pfil_runlock(struct pfil_head *, struct rm_priotracker *);
+void pfil_wlock(struct pfil_head *);
+void pfil_wunlock(struct pfil_head *);
+int pfil_wowned(struct pfil_head *ph);
+
+/* Internal pfil locking functions. */
+#define PFIL_LOCK_INIT_REAL(l, t) \
+ rm_init_flags(l, "PFil " t " rmlock", RM_RECURSE)
+#define PFIL_LOCK_DESTROY_REAL(l) \
+ rm_destroy(l)
+#define PFIL_LOCK_INIT(p) do { \
+ if ((p)->flags & PFIL_FLAG_PRIVATE_LOCK) { \
+ PFIL_LOCK_INIT_REAL(&(p)->ph_lock, "private"); \
+ (p)->ph_plock = &(p)->ph_lock; \
+ } else \
+ (p)->ph_plock = &V_pfil_lock; \
+} while (0)
+#define PFIL_LOCK_DESTROY(p) do { \
+ if ((p)->flags & PFIL_FLAG_PRIVATE_LOCK) \
+ PFIL_LOCK_DESTROY_REAL((p)->ph_plock); \
+} while (0)
+
+#define PFIL_TRY_RLOCK(p, t) rm_try_rlock((p)->ph_plock, (t))
+#define PFIL_RLOCK(p, t) rm_rlock((p)->ph_plock, (t))
+#define PFIL_WLOCK(p) rm_wlock((p)->ph_plock)
+#define PFIL_RUNLOCK(p, t) rm_runlock((p)->ph_plock, (t))
+#define PFIL_WUNLOCK(p) rm_wunlock((p)->ph_plock)
+#define PFIL_WOWNED(p) rm_wowned((p)->ph_plock)
+
+/* Internal locking macros for global/vnet pfil_head_list. */
+#define PFIL_HEADLIST_LOCK() mtx_lock(&pfil_global_lock)
+#define PFIL_HEADLIST_UNLOCK() mtx_unlock(&pfil_global_lock)
#endif /* _NET_PFIL_H_ */
diff --git a/freebsd/sys/net/pfkeyv2.h b/freebsd/sys/net/pfkeyv2.h
index c45f8b05..c9b27695 100644
--- a/freebsd/sys/net/pfkeyv2.h
+++ b/freebsd/sys/net/pfkeyv2.h
@@ -218,7 +218,6 @@ struct sadb_x_sa2 {
};
/* XXX Policy Extension */
-/* sizeof(struct sadb_x_policy) == 16 */
struct sadb_x_policy {
u_int16_t sadb_x_policy_len;
u_int16_t sadb_x_policy_exttype;
@@ -226,8 +225,10 @@ struct sadb_x_policy {
u_int8_t sadb_x_policy_dir; /* direction, see ipsec.h */
u_int8_t sadb_x_policy_reserved;
u_int32_t sadb_x_policy_id;
- u_int32_t sadb_x_policy_reserved2;
+ u_int32_t sadb_x_policy_priority;
};
+_Static_assert(sizeof(struct sadb_x_policy) == 16, "struct size mismatch");
+
/*
* When policy_type == IPSEC, it is followed by some of
* the ipsec policy request.
@@ -256,31 +257,31 @@ struct sadb_x_ipsecrequest {
};
/* NAT-Traversal type, see RFC 3948 (and drafts). */
-/* sizeof(struct sadb_x_nat_t_type) == 8 */
struct sadb_x_nat_t_type {
u_int16_t sadb_x_nat_t_type_len;
u_int16_t sadb_x_nat_t_type_exttype;
u_int8_t sadb_x_nat_t_type_type;
u_int8_t sadb_x_nat_t_type_reserved[3];
};
+_Static_assert(sizeof(struct sadb_x_nat_t_type) == 8, "struct size mismatch");
/* NAT-Traversal source or destination port. */
-/* sizeof(struct sadb_x_nat_t_port) == 8 */
struct sadb_x_nat_t_port {
u_int16_t sadb_x_nat_t_port_len;
u_int16_t sadb_x_nat_t_port_exttype;
u_int16_t sadb_x_nat_t_port_port;
u_int16_t sadb_x_nat_t_port_reserved;
};
+_Static_assert(sizeof(struct sadb_x_nat_t_port) == 8, "struct size mismatch");
/* ESP fragmentation size. */
-/* sizeof(struct sadb_x_nat_t_frag) == 8 */
struct sadb_x_nat_t_frag {
u_int16_t sadb_x_nat_t_frag_len;
u_int16_t sadb_x_nat_t_frag_exttype;
u_int16_t sadb_x_nat_t_frag_fraglen;
u_int16_t sadb_x_nat_t_frag_reserved;
};
+_Static_assert(sizeof(struct sadb_x_nat_t_frag) == 8, "struct size mismatch");
#define SADB_EXT_RESERVED 0
@@ -332,39 +333,47 @@ struct sadb_x_nat_t_frag {
#define SADB_SAFLAGS_PFS 1
-/* RFC2367 numbers - meets RFC2407 */
+/*
+ * Though some of these numbers (both _AALG and _EALG) appear to be
+ * IKEv2 numbers and others original IKE numbers, they have no meaning.
+ * These are constants that the various IKE daemons use to tell the kernel
+ * what cipher to use.
+ *
+ * Do not use these constants directly to decide which Transformation ID
+ * to send. You are responsible for mapping them yourself.
+ */
#define SADB_AALG_NONE 0
#define SADB_AALG_MD5HMAC 2
#define SADB_AALG_SHA1HMAC 3
#define SADB_AALG_MAX 252
-/* private allocations - based on RFC2407/IANA assignment */
#define SADB_X_AALG_SHA2_256 5
#define SADB_X_AALG_SHA2_384 6
#define SADB_X_AALG_SHA2_512 7
#define SADB_X_AALG_RIPEMD160HMAC 8
-#define SADB_X_AALG_AES_XCBC_MAC 9 /* draft-ietf-ipsec-ciph-aes-xcbc-mac-04 */
-/* private allocations should use 249-255 (RFC2407) */
+#define SADB_X_AALG_AES_XCBC_MAC 9 /* RFC3566 */
+#define SADB_X_AALG_AES128GMAC 11 /* RFC4543 + Errata1821 */
+#define SADB_X_AALG_AES192GMAC 12
+#define SADB_X_AALG_AES256GMAC 13
#define SADB_X_AALG_MD5 249 /* Keyed MD5 */
#define SADB_X_AALG_SHA 250 /* Keyed SHA */
#define SADB_X_AALG_NULL 251 /* null authentication */
#define SADB_X_AALG_TCP_MD5 252 /* Keyed TCP-MD5 (RFC2385) */
-/* RFC2367 numbers - meets RFC2407 */
#define SADB_EALG_NONE 0
#define SADB_EALG_DESCBC 2
#define SADB_EALG_3DESCBC 3
-#define SADB_EALG_NULL 11
-#define SADB_EALG_MAX 250
-/* private allocations - based on RFC2407/IANA assignment */
#define SADB_X_EALG_CAST128CBC 6
#define SADB_X_EALG_BLOWFISHCBC 7
+#define SADB_EALG_NULL 11
#define SADB_X_EALG_RIJNDAELCBC 12
#define SADB_X_EALG_AES 12
-/* private allocations - based on RFC4312/IANA assignment */
-#define SADB_X_EALG_CAMELLIACBC 22
-/* private allocations should use 249-255 (RFC2407) */
-#define SADB_X_EALG_SKIPJACK 249 /*250*/ /* for IPSEC */
-#define SADB_X_EALG_AESCTR 250 /*249*/ /* draft-ietf-ipsec-ciph-aes-ctr-03 */
+#define SADB_X_EALG_AESCTR 13
+#define SADB_X_EALG_AESGCM8 18 /* RFC4106 */
+#define SADB_X_EALG_AESGCM12 19
+#define SADB_X_EALG_AESGCM16 20
+#define SADB_X_EALG_CAMELLIACBC 22
+#define SADB_X_EALG_AESGMAC 23 /* RFC4543 + Errata1821 */
+#define SADB_EALG_MAX 23 /* !!! keep updated !!! */
/* private allocations - based on RFC2407/IANA assignment */
#define SADB_X_CALG_NONE 0
diff --git a/freebsd/sys/net/pfvar.h b/freebsd/sys/net/pfvar.h
new file mode 100644
index 00000000..17768e96
--- /dev/null
+++ b/freebsd/sys/net/pfvar.h
@@ -0,0 +1,1757 @@
+/*
+ * Copyright (c) 2001 Daniel Hartmeier
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $OpenBSD: pfvar.h,v 1.282 2009/01/29 15:12:28 pyr Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _NET_PFVAR_H_
+#define _NET_PFVAR_H_
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/queue.h>
+#include <sys/counter.h>
+#include <sys/malloc.h>
+#include <sys/refcount.h>
+#include <sys/tree.h>
+#include <vm/uma.h>
+
+#include <net/radix.h>
+#include <netinet/in.h>
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <netpfil/pf/pf_mtag.h>
+
+struct pf_addr {
+ union {
+ struct in_addr v4;
+ struct in6_addr v6;
+ u_int8_t addr8[16];
+ u_int16_t addr16[8];
+ u_int32_t addr32[4];
+ } pfa; /* 128-bit address */
+#define v4 pfa.v4
+#define v6 pfa.v6
+#define addr8 pfa.addr8
+#define addr16 pfa.addr16
+#define addr32 pfa.addr32
+};
+
+#define PFI_AFLAG_NETWORK 0x01
+#define PFI_AFLAG_BROADCAST 0x02
+#define PFI_AFLAG_PEER 0x04
+#define PFI_AFLAG_MODEMASK 0x07
+#define PFI_AFLAG_NOALIAS 0x08
+
+struct pf_addr_wrap {
+ union {
+ struct {
+ struct pf_addr addr;
+ struct pf_addr mask;
+ } a;
+ char ifname[IFNAMSIZ];
+ char tblname[PF_TABLE_NAME_SIZE];
+ } v;
+ union {
+ struct pfi_dynaddr *dyn;
+ struct pfr_ktable *tbl;
+ int dyncnt;
+ int tblcnt;
+ } p;
+ u_int8_t type; /* PF_ADDR_* */
+ u_int8_t iflags; /* PFI_AFLAG_* */
+};
+
+#ifdef _KERNEL
+
+struct pfi_dynaddr {
+ TAILQ_ENTRY(pfi_dynaddr) entry;
+ struct pf_addr pfid_addr4;
+ struct pf_addr pfid_mask4;
+ struct pf_addr pfid_addr6;
+ struct pf_addr pfid_mask6;
+ struct pfr_ktable *pfid_kt;
+ struct pfi_kif *pfid_kif;
+ int pfid_net; /* mask or 128 */
+ int pfid_acnt4; /* address count IPv4 */
+ int pfid_acnt6; /* address count IPv6 */
+ sa_family_t pfid_af; /* rule af */
+ u_int8_t pfid_iflags; /* PFI_AFLAG_* */
+};
+
+/*
+ * Address manipulation macros
+ */
+#define HTONL(x) (x) = htonl((__uint32_t)(x))
+#define HTONS(x) (x) = htons((__uint16_t)(x))
+#define NTOHL(x) (x) = ntohl((__uint32_t)(x))
+#define NTOHS(x) (x) = ntohs((__uint16_t)(x))
+
+#define PF_NAME "pf"
+
+#define PF_HASHROW_ASSERT(h) mtx_assert(&(h)->lock, MA_OWNED)
+#define PF_HASHROW_LOCK(h) mtx_lock(&(h)->lock)
+#define PF_HASHROW_UNLOCK(h) mtx_unlock(&(h)->lock)
+
+#define PF_STATE_LOCK(s) \
+ do { \
+ struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH(s)]; \
+ PF_HASHROW_LOCK(_ih); \
+ } while (0)
+
+#define PF_STATE_UNLOCK(s) \
+ do { \
+ struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH((s))]; \
+ PF_HASHROW_UNLOCK(_ih); \
+ } while (0)
+
+#ifdef INVARIANTS
+#define PF_STATE_LOCK_ASSERT(s) \
+ do { \
+ struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH(s)]; \
+ PF_HASHROW_ASSERT(_ih); \
+ } while (0)
+#else /* !INVARIANTS */
+#define PF_STATE_LOCK_ASSERT(s) do {} while (0)
+#endif /* INVARIANTS */
+
+extern struct mtx pf_unlnkdrules_mtx;
+#define PF_UNLNKDRULES_LOCK() mtx_lock(&pf_unlnkdrules_mtx)
+#define PF_UNLNKDRULES_UNLOCK() mtx_unlock(&pf_unlnkdrules_mtx)
+
+extern struct rwlock pf_rules_lock;
+#define PF_RULES_RLOCK() rw_rlock(&pf_rules_lock)
+#define PF_RULES_RUNLOCK() rw_runlock(&pf_rules_lock)
+#define PF_RULES_WLOCK() rw_wlock(&pf_rules_lock)
+#define PF_RULES_WUNLOCK() rw_wunlock(&pf_rules_lock)
+#define PF_RULES_ASSERT() rw_assert(&pf_rules_lock, RA_LOCKED)
+#define PF_RULES_RASSERT() rw_assert(&pf_rules_lock, RA_RLOCKED)
+#define PF_RULES_WASSERT() rw_assert(&pf_rules_lock, RA_WLOCKED)
+
+#define PF_MODVER 1
+#define PFLOG_MODVER 1
+#define PFSYNC_MODVER 1
+
+#define PFLOG_MINVER 1
+#define PFLOG_PREFVER PFLOG_MODVER
+#define PFLOG_MAXVER 1
+#define PFSYNC_MINVER 1
+#define PFSYNC_PREFVER PFSYNC_MODVER
+#define PFSYNC_MAXVER 1
+
+#ifdef INET
+#ifndef INET6
+#define PF_INET_ONLY
+#endif /* ! INET6 */
+#endif /* INET */
+
+#ifdef INET6
+#ifndef INET
+#define PF_INET6_ONLY
+#endif /* ! INET */
+#endif /* INET6 */
+
+#ifdef INET
+#ifdef INET6
+#define PF_INET_INET6
+#endif /* INET6 */
+#endif /* INET */
+
+#else
+
+#define PF_INET_INET6
+
+#endif /* _KERNEL */
+
+/* Both IPv4 and IPv6 */
+#ifdef PF_INET_INET6
+
+#define PF_AEQ(a, b, c) \
+ ((c == AF_INET && (a)->addr32[0] == (b)->addr32[0]) || \
+ (c == AF_INET6 && (a)->addr32[3] == (b)->addr32[3] && \
+ (a)->addr32[2] == (b)->addr32[2] && \
+ (a)->addr32[1] == (b)->addr32[1] && \
+ (a)->addr32[0] == (b)->addr32[0])) \
+
+#define PF_ANEQ(a, b, c) \
+ ((c == AF_INET && (a)->addr32[0] != (b)->addr32[0]) || \
+ (c == AF_INET6 && ((a)->addr32[0] != (b)->addr32[0] || \
+ (a)->addr32[1] != (b)->addr32[1] || \
+ (a)->addr32[2] != (b)->addr32[2] || \
+ (a)->addr32[3] != (b)->addr32[3]))) \
+
+#define PF_AZERO(a, c) \
+ ((c == AF_INET && !(a)->addr32[0]) || \
+ (c == AF_INET6 && !(a)->addr32[0] && !(a)->addr32[1] && \
+ !(a)->addr32[2] && !(a)->addr32[3] )) \
+
+#define PF_MATCHA(n, a, m, b, f) \
+ pf_match_addr(n, a, m, b, f)
+
+#define PF_ACPY(a, b, f) \
+ pf_addrcpy(a, b, f)
+
+#define PF_AINC(a, f) \
+ pf_addr_inc(a, f)
+
+#define PF_POOLMASK(a, b, c, d, f) \
+ pf_poolmask(a, b, c, d, f)
+
+#else
+
+/* Just IPv6 */
+
+#ifdef PF_INET6_ONLY
+
+#define PF_AEQ(a, b, c) \
+ ((a)->addr32[3] == (b)->addr32[3] && \
+ (a)->addr32[2] == (b)->addr32[2] && \
+ (a)->addr32[1] == (b)->addr32[1] && \
+ (a)->addr32[0] == (b)->addr32[0]) \
+
+#define PF_ANEQ(a, b, c) \
+ ((a)->addr32[3] != (b)->addr32[3] || \
+ (a)->addr32[2] != (b)->addr32[2] || \
+ (a)->addr32[1] != (b)->addr32[1] || \
+ (a)->addr32[0] != (b)->addr32[0]) \
+
+#define PF_AZERO(a, c) \
+ (!(a)->addr32[0] && \
+ !(a)->addr32[1] && \
+ !(a)->addr32[2] && \
+ !(a)->addr32[3] ) \
+
+#define PF_MATCHA(n, a, m, b, f) \
+ pf_match_addr(n, a, m, b, f)
+
+#define PF_ACPY(a, b, f) \
+ pf_addrcpy(a, b, f)
+
+#define PF_AINC(a, f) \
+ pf_addr_inc(a, f)
+
+#define PF_POOLMASK(a, b, c, d, f) \
+ pf_poolmask(a, b, c, d, f)
+
+#else
+
+/* Just IPv4 */
+#ifdef PF_INET_ONLY
+
+#define PF_AEQ(a, b, c) \
+ ((a)->addr32[0] == (b)->addr32[0])
+
+#define PF_ANEQ(a, b, c) \
+ ((a)->addr32[0] != (b)->addr32[0])
+
+#define PF_AZERO(a, c) \
+ (!(a)->addr32[0])
+
+#define PF_MATCHA(n, a, m, b, f) \
+ pf_match_addr(n, a, m, b, f)
+
+#define PF_ACPY(a, b, f) \
+ (a)->v4.s_addr = (b)->v4.s_addr
+
+#define PF_AINC(a, f) \
+ do { \
+ (a)->addr32[0] = htonl(ntohl((a)->addr32[0]) + 1); \
+ } while (0)
+
+#define PF_POOLMASK(a, b, c, d, f) \
+ do { \
+ (a)->addr32[0] = ((b)->addr32[0] & (c)->addr32[0]) | \
+ (((c)->addr32[0] ^ 0xffffffff ) & (d)->addr32[0]); \
+ } while (0)
+
+#endif /* PF_INET_ONLY */
+#endif /* PF_INET6_ONLY */
+#endif /* PF_INET_INET6 */
+
+/*
+ * XXX callers not FIB-aware in our version of pf yet.
+ * OpenBSD fixed it later it seems, 2010/05/07 13:33:16 claudio.
+ */
+#define PF_MISMATCHAW(aw, x, af, neg, ifp, rtid) \
+ ( \
+ (((aw)->type == PF_ADDR_NOROUTE && \
+ pf_routable((x), (af), NULL, (rtid))) || \
+ (((aw)->type == PF_ADDR_URPFFAILED && (ifp) != NULL && \
+ pf_routable((x), (af), (ifp), (rtid))) || \
+ ((aw)->type == PF_ADDR_TABLE && \
+ !pfr_match_addr((aw)->p.tbl, (x), (af))) || \
+ ((aw)->type == PF_ADDR_DYNIFTL && \
+ !pfi_match_addr((aw)->p.dyn, (x), (af))) || \
+ ((aw)->type == PF_ADDR_RANGE && \
+ !pf_match_addr_range(&(aw)->v.a.addr, \
+ &(aw)->v.a.mask, (x), (af))) || \
+ ((aw)->type == PF_ADDR_ADDRMASK && \
+ !PF_AZERO(&(aw)->v.a.mask, (af)) && \
+ !PF_MATCHA(0, &(aw)->v.a.addr, \
+ &(aw)->v.a.mask, (x), (af))))) != \
+ (neg) \
+ )
+
+
+struct pf_rule_uid {
+ uid_t uid[2];
+ u_int8_t op;
+};
+
+struct pf_rule_gid {
+ uid_t gid[2];
+ u_int8_t op;
+};
+
+struct pf_rule_addr {
+ struct pf_addr_wrap addr;
+ u_int16_t port[2];
+ u_int8_t neg;
+ u_int8_t port_op;
+};
+
+struct pf_pooladdr {
+ struct pf_addr_wrap addr;
+ TAILQ_ENTRY(pf_pooladdr) entries;
+ char ifname[IFNAMSIZ];
+ struct pfi_kif *kif;
+};
+
+TAILQ_HEAD(pf_palist, pf_pooladdr);
+
+struct pf_poolhashkey {
+ union {
+ u_int8_t key8[16];
+ u_int16_t key16[8];
+ u_int32_t key32[4];
+ } pfk; /* 128-bit hash key */
+#define key8 pfk.key8
+#define key16 pfk.key16
+#define key32 pfk.key32
+};
+
+struct pf_pool {
+ struct pf_palist list;
+ struct pf_pooladdr *cur;
+ struct pf_poolhashkey key;
+ struct pf_addr counter;
+ int tblidx;
+ u_int16_t proxy_port[2];
+ u_int8_t opts;
+};
+
+
+/* A packed Operating System description for fingerprinting */
+typedef u_int32_t pf_osfp_t;
+#define PF_OSFP_ANY ((pf_osfp_t)0)
+#define PF_OSFP_UNKNOWN ((pf_osfp_t)-1)
+#define PF_OSFP_NOMATCH ((pf_osfp_t)-2)
+
+struct pf_osfp_entry {
+ SLIST_ENTRY(pf_osfp_entry) fp_entry;
+ pf_osfp_t fp_os;
+ int fp_enflags;
+#define PF_OSFP_EXPANDED 0x001 /* expanded entry */
+#define PF_OSFP_GENERIC 0x002 /* generic signature */
+#define PF_OSFP_NODETAIL 0x004 /* no p0f details */
+#define PF_OSFP_LEN 32
+ char fp_class_nm[PF_OSFP_LEN];
+ char fp_version_nm[PF_OSFP_LEN];
+ char fp_subtype_nm[PF_OSFP_LEN];
+};
+#define PF_OSFP_ENTRY_EQ(a, b) \
+ ((a)->fp_os == (b)->fp_os && \
+ memcmp((a)->fp_class_nm, (b)->fp_class_nm, PF_OSFP_LEN) == 0 && \
+ memcmp((a)->fp_version_nm, (b)->fp_version_nm, PF_OSFP_LEN) == 0 && \
+ memcmp((a)->fp_subtype_nm, (b)->fp_subtype_nm, PF_OSFP_LEN) == 0)
+
+/* handle pf_osfp_t packing */
+#define _FP_RESERVED_BIT 1 /* For the special negative #defines */
+#define _FP_UNUSED_BITS 1
+#define _FP_CLASS_BITS 10 /* OS Class (Windows, Linux) */
+#define _FP_VERSION_BITS 10 /* OS version (95, 98, NT, 2.4.54, 3.2) */
+#define _FP_SUBTYPE_BITS 10 /* patch level (NT SP4, SP3, ECN patch) */
+#define PF_OSFP_UNPACK(osfp, class, version, subtype) do { \
+ (class) = ((osfp) >> (_FP_VERSION_BITS+_FP_SUBTYPE_BITS)) & \
+ ((1 << _FP_CLASS_BITS) - 1); \
+ (version) = ((osfp) >> _FP_SUBTYPE_BITS) & \
+ ((1 << _FP_VERSION_BITS) - 1);\
+ (subtype) = (osfp) & ((1 << _FP_SUBTYPE_BITS) - 1); \
+} while(0)
+#define PF_OSFP_PACK(osfp, class, version, subtype) do { \
+ (osfp) = ((class) & ((1 << _FP_CLASS_BITS) - 1)) << (_FP_VERSION_BITS \
+ + _FP_SUBTYPE_BITS); \
+ (osfp) |= ((version) & ((1 << _FP_VERSION_BITS) - 1)) << \
+ _FP_SUBTYPE_BITS; \
+ (osfp) |= (subtype) & ((1 << _FP_SUBTYPE_BITS) - 1); \
+} while(0)
+
+/* the fingerprint of an OSes TCP SYN packet */
+typedef u_int64_t pf_tcpopts_t;
+struct pf_os_fingerprint {
+ SLIST_HEAD(pf_osfp_enlist, pf_osfp_entry) fp_oses; /* list of matches */
+ pf_tcpopts_t fp_tcpopts; /* packed TCP options */
+ u_int16_t fp_wsize; /* TCP window size */
+ u_int16_t fp_psize; /* ip->ip_len */
+ u_int16_t fp_mss; /* TCP MSS */
+ u_int16_t fp_flags;
+#define PF_OSFP_WSIZE_MOD 0x0001 /* Window modulus */
+#define PF_OSFP_WSIZE_DC 0x0002 /* Window don't care */
+#define PF_OSFP_WSIZE_MSS 0x0004 /* Window multiple of MSS */
+#define PF_OSFP_WSIZE_MTU 0x0008 /* Window multiple of MTU */
+#define PF_OSFP_PSIZE_MOD 0x0010 /* packet size modulus */
+#define PF_OSFP_PSIZE_DC 0x0020 /* packet size don't care */
+#define PF_OSFP_WSCALE 0x0040 /* TCP window scaling */
+#define PF_OSFP_WSCALE_MOD 0x0080 /* TCP window scale modulus */
+#define PF_OSFP_WSCALE_DC 0x0100 /* TCP window scale dont-care */
+#define PF_OSFP_MSS 0x0200 /* TCP MSS */
+#define PF_OSFP_MSS_MOD 0x0400 /* TCP MSS modulus */
+#define PF_OSFP_MSS_DC 0x0800 /* TCP MSS dont-care */
+#define PF_OSFP_DF 0x1000 /* IPv4 don't fragment bit */
+#define PF_OSFP_TS0 0x2000 /* Zero timestamp */
+#define PF_OSFP_INET6 0x4000 /* IPv6 */
+ u_int8_t fp_optcnt; /* TCP option count */
+ u_int8_t fp_wscale; /* TCP window scaling */
+ u_int8_t fp_ttl; /* IPv4 TTL */
+#define PF_OSFP_MAXTTL_OFFSET 40
+/* TCP options packing */
+#define PF_OSFP_TCPOPT_NOP 0x0 /* TCP NOP option */
+#define PF_OSFP_TCPOPT_WSCALE 0x1 /* TCP window scaling option */
+#define PF_OSFP_TCPOPT_MSS 0x2 /* TCP max segment size opt */
+#define PF_OSFP_TCPOPT_SACK 0x3 /* TCP SACK OK option */
+#define PF_OSFP_TCPOPT_TS 0x4 /* TCP timestamp option */
+#define PF_OSFP_TCPOPT_BITS 3 /* bits used by each option */
+#define PF_OSFP_MAX_OPTS \
+ (sizeof(((struct pf_os_fingerprint *)0)->fp_tcpopts) * 8) \
+ / PF_OSFP_TCPOPT_BITS
+
+ SLIST_ENTRY(pf_os_fingerprint) fp_next;
+};
+
+struct pf_osfp_ioctl {
+ struct pf_osfp_entry fp_os;
+ pf_tcpopts_t fp_tcpopts; /* packed TCP options */
+ u_int16_t fp_wsize; /* TCP window size */
+ u_int16_t fp_psize; /* ip->ip_len */
+ u_int16_t fp_mss; /* TCP MSS */
+ u_int16_t fp_flags;
+ u_int8_t fp_optcnt; /* TCP option count */
+ u_int8_t fp_wscale; /* TCP window scaling */
+ u_int8_t fp_ttl; /* IPv4 TTL */
+
+ int fp_getnum; /* DIOCOSFPGET number */
+};
+
+
+union pf_rule_ptr {
+ struct pf_rule *ptr;
+ u_int32_t nr;
+};
+
+#define PF_ANCHOR_NAME_SIZE 64
+
+struct pf_rule {
+ struct pf_rule_addr src;
+ struct pf_rule_addr dst;
+#define PF_SKIP_IFP 0
+#define PF_SKIP_DIR 1
+#define PF_SKIP_AF 2
+#define PF_SKIP_PROTO 3
+#define PF_SKIP_SRC_ADDR 4
+#define PF_SKIP_SRC_PORT 5
+#define PF_SKIP_DST_ADDR 6
+#define PF_SKIP_DST_PORT 7
+#define PF_SKIP_COUNT 8
+ union pf_rule_ptr skip[PF_SKIP_COUNT];
+#define PF_RULE_LABEL_SIZE 64
+ char label[PF_RULE_LABEL_SIZE];
+ char ifname[IFNAMSIZ];
+ char qname[PF_QNAME_SIZE];
+ char pqname[PF_QNAME_SIZE];
+#define PF_TAG_NAME_SIZE 64
+ char tagname[PF_TAG_NAME_SIZE];
+ char match_tagname[PF_TAG_NAME_SIZE];
+
+ char overload_tblname[PF_TABLE_NAME_SIZE];
+
+ TAILQ_ENTRY(pf_rule) entries;
+ struct pf_pool rpool;
+
+ u_int64_t evaluations;
+ u_int64_t packets[2];
+ u_int64_t bytes[2];
+
+ struct pfi_kif *kif;
+ struct pf_anchor *anchor;
+ struct pfr_ktable *overload_tbl;
+
+ pf_osfp_t os_fingerprint;
+
+ int rtableid;
+ u_int32_t timeout[PFTM_MAX];
+ u_int32_t max_states;
+ u_int32_t max_src_nodes;
+ u_int32_t max_src_states;
+ u_int32_t max_src_conn;
+ struct {
+ u_int32_t limit;
+ u_int32_t seconds;
+ } max_src_conn_rate;
+ u_int32_t qid;
+ u_int32_t pqid;
+ u_int32_t rt_listid;
+ u_int32_t nr;
+ u_int32_t prob;
+ uid_t cuid;
+ pid_t cpid;
+
+ counter_u64_t states_cur;
+ counter_u64_t states_tot;
+ counter_u64_t src_nodes;
+
+ u_int16_t return_icmp;
+ u_int16_t return_icmp6;
+ u_int16_t max_mss;
+ u_int16_t tag;
+ u_int16_t match_tag;
+ u_int16_t scrub_flags;
+
+ struct pf_rule_uid uid;
+ struct pf_rule_gid gid;
+
+ u_int32_t rule_flag;
+ u_int8_t action;
+ u_int8_t direction;
+ u_int8_t log;
+ u_int8_t logif;
+ u_int8_t quick;
+ u_int8_t ifnot;
+ u_int8_t match_tag_not;
+ u_int8_t natpass;
+
+#define PF_STATE_NORMAL 0x1
+#define PF_STATE_MODULATE 0x2
+#define PF_STATE_SYNPROXY 0x3
+ u_int8_t keep_state;
+ sa_family_t af;
+ u_int8_t proto;
+ u_int8_t type;
+ u_int8_t code;
+ u_int8_t flags;
+ u_int8_t flagset;
+ u_int8_t min_ttl;
+ u_int8_t allow_opts;
+ u_int8_t rt;
+ u_int8_t return_ttl;
+ u_int8_t tos;
+ u_int8_t set_tos;
+ u_int8_t anchor_relative;
+ u_int8_t anchor_wildcard;
+
+#define PF_FLUSH 0x01
+#define PF_FLUSH_GLOBAL 0x02
+ u_int8_t flush;
+#define PF_PRIO_ZERO 0xff /* match "prio 0" packets */
+#define PF_PRIO_MAX 7
+ u_int8_t prio;
+ u_int8_t set_prio[2];
+
+ struct {
+ struct pf_addr addr;
+ u_int16_t port;
+ } divert;
+
+ uint64_t u_states_cur;
+ uint64_t u_states_tot;
+ uint64_t u_src_nodes;
+};
+
+/* rule flags */
+#define PFRULE_DROP 0x0000
+#define PFRULE_RETURNRST 0x0001
+#define PFRULE_FRAGMENT 0x0002
+#define PFRULE_RETURNICMP 0x0004
+#define PFRULE_RETURN 0x0008
+#define PFRULE_NOSYNC 0x0010
+#define PFRULE_SRCTRACK 0x0020 /* track source states */
+#define PFRULE_RULESRCTRACK 0x0040 /* per rule */
+#define PFRULE_REFS 0x0080 /* rule has references */
+
+/* scrub flags */
+#define PFRULE_NODF 0x0100
+#define PFRULE_RANDOMID 0x0800
+#define PFRULE_REASSEMBLE_TCP 0x1000
+#define PFRULE_SET_TOS 0x2000
+
+/* rule flags again */
+#define PFRULE_IFBOUND 0x00010000 /* if-bound */
+#define PFRULE_STATESLOPPY 0x00020000 /* sloppy state tracking */
+
+#define PFSTATE_HIWAT 10000 /* default state table size */
+#define PFSTATE_ADAPT_START 6000 /* default adaptive timeout start */
+#define PFSTATE_ADAPT_END 12000 /* default adaptive timeout end */
+
+
+struct pf_threshold {
+ u_int32_t limit;
+#define PF_THRESHOLD_MULT 1000
+#define PF_THRESHOLD_MAX 0xffffffff / PF_THRESHOLD_MULT
+ u_int32_t seconds;
+ u_int32_t count;
+ u_int32_t last;
+};
+
+struct pf_src_node {
+ LIST_ENTRY(pf_src_node) entry;
+ struct pf_addr addr;
+ struct pf_addr raddr;
+ union pf_rule_ptr rule;
+ struct pfi_kif *kif;
+ u_int64_t bytes[2];
+ u_int64_t packets[2];
+ u_int32_t states;
+ u_int32_t conn;
+ struct pf_threshold conn_rate;
+ u_int32_t creation;
+ u_int32_t expire;
+ sa_family_t af;
+ u_int8_t ruletype;
+};
+
+#define PFSNODE_HIWAT 10000 /* default source node table size */
+
+struct pf_state_scrub {
+ struct timeval pfss_last; /* time received last packet */
+ u_int32_t pfss_tsecr; /* last echoed timestamp */
+ u_int32_t pfss_tsval; /* largest timestamp */
+ u_int32_t pfss_tsval0; /* original timestamp */
+ u_int16_t pfss_flags;
+#define PFSS_TIMESTAMP 0x0001 /* modulate timestamp */
+#define PFSS_PAWS 0x0010 /* stricter PAWS checks */
+#define PFSS_PAWS_IDLED 0x0020 /* was idle too long. no PAWS */
+#define PFSS_DATA_TS 0x0040 /* timestamp on data packets */
+#define PFSS_DATA_NOTS 0x0080 /* no timestamp on data packets */
+ u_int8_t pfss_ttl; /* stashed TTL */
+ u_int8_t pad;
+ u_int32_t pfss_ts_mod; /* timestamp modulation */
+};
+
+struct pf_state_host {
+ struct pf_addr addr;
+ u_int16_t port;
+ u_int16_t pad;
+};
+
+struct pf_state_peer {
+ struct pf_state_scrub *scrub; /* state is scrubbed */
+ u_int32_t seqlo; /* Max sequence number sent */
+ u_int32_t seqhi; /* Max the other end ACKd + win */
+ u_int32_t seqdiff; /* Sequence number modulator */
+ u_int16_t max_win; /* largest window (pre scaling) */
+ u_int16_t mss; /* Maximum segment size option */
+ u_int8_t state; /* active state level */
+ u_int8_t wscale; /* window scaling factor */
+ u_int8_t tcp_est; /* Did we reach TCPS_ESTABLISHED */
+ u_int8_t pad[1];
+};
+
+/* Keep synced with struct pf_state_key. */
+struct pf_state_key_cmp {
+ struct pf_addr addr[2];
+ u_int16_t port[2];
+ sa_family_t af;
+ u_int8_t proto;
+ u_int8_t pad[2];
+};
+
+struct pf_state_key {
+ struct pf_addr addr[2];
+ u_int16_t port[2];
+ sa_family_t af;
+ u_int8_t proto;
+ u_int8_t pad[2];
+
+ LIST_ENTRY(pf_state_key) entry;
+ TAILQ_HEAD(, pf_state) states[2];
+};
+
+/* Keep synced with struct pf_state. */
+struct pf_state_cmp {
+ u_int64_t id;
+ u_int32_t creatorid;
+ u_int8_t direction;
+ u_int8_t pad[3];
+};
+
+struct pf_state {
+ u_int64_t id;
+ u_int32_t creatorid;
+ u_int8_t direction;
+ u_int8_t pad[3];
+
+ u_int refs;
+ TAILQ_ENTRY(pf_state) sync_list;
+ TAILQ_ENTRY(pf_state) key_list[2];
+ LIST_ENTRY(pf_state) entry;
+ struct pf_state_peer src;
+ struct pf_state_peer dst;
+ union pf_rule_ptr rule;
+ union pf_rule_ptr anchor;
+ union pf_rule_ptr nat_rule;
+ struct pf_addr rt_addr;
+ struct pf_state_key *key[2]; /* addresses stack and wire */
+ struct pfi_kif *kif;
+ struct pfi_kif *rt_kif;
+ struct pf_src_node *src_node;
+ struct pf_src_node *nat_src_node;
+ u_int64_t packets[2];
+ u_int64_t bytes[2];
+ u_int32_t creation;
+ u_int32_t expire;
+ u_int32_t pfsync_time;
+ u_int16_t tag;
+ u_int8_t log;
+ u_int8_t state_flags;
+#define PFSTATE_ALLOWOPTS 0x01
+#define PFSTATE_SLOPPY 0x02
+/* was PFSTATE_PFLOW 0x04 */
+#define PFSTATE_NOSYNC 0x08
+#define PFSTATE_ACK 0x10
+#define PFSTATE_SETPRIO 0x0200
+#define PFSTATE_SETMASK (PFSTATE_SETPRIO)
+ u_int8_t timeout;
+ u_int8_t sync_state; /* PFSYNC_S_x */
+
+ /* XXX */
+ u_int8_t sync_updates;
+ u_int8_t _tail[3];
+};
+
+/*
+ * Unified state structures for pulling states out of the kernel
+ * used by pfsync(4) and the pf(4) ioctl.
+ */
+struct pfsync_state_scrub {
+ u_int16_t pfss_flags;
+ u_int8_t pfss_ttl; /* stashed TTL */
+#define PFSYNC_SCRUB_FLAG_VALID 0x01
+ u_int8_t scrub_flag;
+ u_int32_t pfss_ts_mod; /* timestamp modulation */
+} __packed;
+
+struct pfsync_state_peer {
+ struct pfsync_state_scrub scrub; /* state is scrubbed */
+ u_int32_t seqlo; /* Max sequence number sent */
+ u_int32_t seqhi; /* Max the other end ACKd + win */
+ u_int32_t seqdiff; /* Sequence number modulator */
+ u_int16_t max_win; /* largest window (pre scaling) */
+ u_int16_t mss; /* Maximum segment size option */
+ u_int8_t state; /* active state level */
+ u_int8_t wscale; /* window scaling factor */
+ u_int8_t pad[6];
+} __packed;
+
+struct pfsync_state_key {
+ struct pf_addr addr[2];
+ u_int16_t port[2];
+};
+
+struct pfsync_state {
+ u_int64_t id;
+ char ifname[IFNAMSIZ];
+ struct pfsync_state_key key[2];
+ struct pfsync_state_peer src;
+ struct pfsync_state_peer dst;
+ struct pf_addr rt_addr;
+ u_int32_t rule;
+ u_int32_t anchor;
+ u_int32_t nat_rule;
+ u_int32_t creation;
+ u_int32_t expire;
+ u_int32_t packets[2][2];
+ u_int32_t bytes[2][2];
+ u_int32_t creatorid;
+ sa_family_t af;
+ u_int8_t proto;
+ u_int8_t direction;
+ u_int8_t __spare[2];
+ u_int8_t log;
+ u_int8_t state_flags;
+ u_int8_t timeout;
+ u_int8_t sync_flags;
+ u_int8_t updates;
+} __packed;
+
+#ifdef _KERNEL
+/* pfsync */
+typedef int pfsync_state_import_t(struct pfsync_state *, u_int8_t);
+typedef void pfsync_insert_state_t(struct pf_state *);
+typedef void pfsync_update_state_t(struct pf_state *);
+typedef void pfsync_delete_state_t(struct pf_state *);
+typedef void pfsync_clear_states_t(u_int32_t, const char *);
+typedef int pfsync_defer_t(struct pf_state *, struct mbuf *);
+
+extern pfsync_state_import_t *pfsync_state_import_ptr;
+extern pfsync_insert_state_t *pfsync_insert_state_ptr;
+extern pfsync_update_state_t *pfsync_update_state_ptr;
+extern pfsync_delete_state_t *pfsync_delete_state_ptr;
+extern pfsync_clear_states_t *pfsync_clear_states_ptr;
+extern pfsync_defer_t *pfsync_defer_ptr;
+
+void pfsync_state_export(struct pfsync_state *,
+ struct pf_state *);
+
+/* pflog */
+struct pf_ruleset;
+struct pf_pdesc;
+typedef int pflog_packet_t(struct pfi_kif *, struct mbuf *, sa_family_t,
+ u_int8_t, u_int8_t, struct pf_rule *, struct pf_rule *,
+ struct pf_ruleset *, struct pf_pdesc *, int);
+extern pflog_packet_t *pflog_packet_ptr;
+
+#endif /* _KERNEL */
+
+#define PFSYNC_FLAG_SRCNODE 0x04
+#define PFSYNC_FLAG_NATSRCNODE 0x08
+
+/* for copies to/from network byte order */
+/* ioctl interface also uses network byte order */
+#define pf_state_peer_hton(s,d) do { \
+ (d)->seqlo = htonl((s)->seqlo); \
+ (d)->seqhi = htonl((s)->seqhi); \
+ (d)->seqdiff = htonl((s)->seqdiff); \
+ (d)->max_win = htons((s)->max_win); \
+ (d)->mss = htons((s)->mss); \
+ (d)->state = (s)->state; \
+ (d)->wscale = (s)->wscale; \
+ if ((s)->scrub) { \
+ (d)->scrub.pfss_flags = \
+ htons((s)->scrub->pfss_flags & PFSS_TIMESTAMP); \
+ (d)->scrub.pfss_ttl = (s)->scrub->pfss_ttl; \
+ (d)->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);\
+ (d)->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID; \
+ } \
+} while (0)
+
+#define pf_state_peer_ntoh(s,d) do { \
+ (d)->seqlo = ntohl((s)->seqlo); \
+ (d)->seqhi = ntohl((s)->seqhi); \
+ (d)->seqdiff = ntohl((s)->seqdiff); \
+ (d)->max_win = ntohs((s)->max_win); \
+ (d)->mss = ntohs((s)->mss); \
+ (d)->state = (s)->state; \
+ (d)->wscale = (s)->wscale; \
+ if ((s)->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID && \
+ (d)->scrub != NULL) { \
+ (d)->scrub->pfss_flags = \
+ ntohs((s)->scrub.pfss_flags) & PFSS_TIMESTAMP; \
+ (d)->scrub->pfss_ttl = (s)->scrub.pfss_ttl; \
+ (d)->scrub->pfss_ts_mod = ntohl((s)->scrub.pfss_ts_mod);\
+ } \
+} while (0)
+
+#define pf_state_counter_hton(s,d) do { \
+ d[0] = htonl((s>>32)&0xffffffff); \
+ d[1] = htonl(s&0xffffffff); \
+} while (0)
+
+#define pf_state_counter_from_pfsync(s) \
+ (((u_int64_t)(s[0])<<32) | (u_int64_t)(s[1]))
+
+#define pf_state_counter_ntoh(s,d) do { \
+ d = ntohl(s[0]); \
+ d = d<<32; \
+ d += ntohl(s[1]); \
+} while (0)
+
+TAILQ_HEAD(pf_rulequeue, pf_rule);
+
+struct pf_anchor;
+
+struct pf_ruleset {
+ struct {
+ struct pf_rulequeue queues[2];
+ struct {
+ struct pf_rulequeue *ptr;
+ struct pf_rule **ptr_array;
+ u_int32_t rcount;
+ u_int32_t ticket;
+ int open;
+ } active, inactive;
+ } rules[PF_RULESET_MAX];
+ struct pf_anchor *anchor;
+ u_int32_t tticket;
+ int tables;
+ int topen;
+};
+
+RB_HEAD(pf_anchor_global, pf_anchor);
+RB_HEAD(pf_anchor_node, pf_anchor);
+struct pf_anchor {
+ RB_ENTRY(pf_anchor) entry_global;
+ RB_ENTRY(pf_anchor) entry_node;
+ struct pf_anchor *parent;
+ struct pf_anchor_node children;
+ char name[PF_ANCHOR_NAME_SIZE];
+ char path[MAXPATHLEN];
+ struct pf_ruleset ruleset;
+ int refcnt; /* anchor rules */
+ int match; /* XXX: used for pfctl black magic */
+};
+RB_PROTOTYPE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare);
+RB_PROTOTYPE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare);
+
+#define PF_RESERVED_ANCHOR "_pf"
+
+#define PFR_TFLAG_PERSIST 0x00000001
+#define PFR_TFLAG_CONST 0x00000002
+#define PFR_TFLAG_ACTIVE 0x00000004
+#define PFR_TFLAG_INACTIVE 0x00000008
+#define PFR_TFLAG_REFERENCED 0x00000010
+#define PFR_TFLAG_REFDANCHOR 0x00000020
+#define PFR_TFLAG_COUNTERS 0x00000040
+/* Adjust masks below when adding flags. */
+#define PFR_TFLAG_USRMASK (PFR_TFLAG_PERSIST | \
+ PFR_TFLAG_CONST | \
+ PFR_TFLAG_COUNTERS)
+#define PFR_TFLAG_SETMASK (PFR_TFLAG_ACTIVE | \
+ PFR_TFLAG_INACTIVE | \
+ PFR_TFLAG_REFERENCED | \
+ PFR_TFLAG_REFDANCHOR)
+#define PFR_TFLAG_ALLMASK (PFR_TFLAG_PERSIST | \
+ PFR_TFLAG_CONST | \
+ PFR_TFLAG_ACTIVE | \
+ PFR_TFLAG_INACTIVE | \
+ PFR_TFLAG_REFERENCED | \
+ PFR_TFLAG_REFDANCHOR | \
+ PFR_TFLAG_COUNTERS)
+
+struct pf_anchor_stackframe;
+
+struct pfr_table {
+ char pfrt_anchor[MAXPATHLEN];
+ char pfrt_name[PF_TABLE_NAME_SIZE];
+ u_int32_t pfrt_flags;
+ u_int8_t pfrt_fback;
+};
+
+enum { PFR_FB_NONE, PFR_FB_MATCH, PFR_FB_ADDED, PFR_FB_DELETED,
+ PFR_FB_CHANGED, PFR_FB_CLEARED, PFR_FB_DUPLICATE,
+ PFR_FB_NOTMATCH, PFR_FB_CONFLICT, PFR_FB_NOCOUNT, PFR_FB_MAX };
+
+struct pfr_addr {
+ union {
+ struct in_addr _pfra_ip4addr;
+ struct in6_addr _pfra_ip6addr;
+ } pfra_u;
+ u_int8_t pfra_af;
+ u_int8_t pfra_net;
+ u_int8_t pfra_not;
+ u_int8_t pfra_fback;
+};
+#define pfra_ip4addr pfra_u._pfra_ip4addr
+#define pfra_ip6addr pfra_u._pfra_ip6addr
+
+enum { PFR_DIR_IN, PFR_DIR_OUT, PFR_DIR_MAX };
+enum { PFR_OP_BLOCK, PFR_OP_PASS, PFR_OP_ADDR_MAX, PFR_OP_TABLE_MAX };
+#define PFR_OP_XPASS PFR_OP_ADDR_MAX
+
+struct pfr_astats {
+ struct pfr_addr pfras_a;
+ u_int64_t pfras_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+ u_int64_t pfras_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+ long pfras_tzero;
+};
+
+enum { PFR_REFCNT_RULE, PFR_REFCNT_ANCHOR, PFR_REFCNT_MAX };
+
+struct pfr_tstats {
+ struct pfr_table pfrts_t;
+ u_int64_t pfrts_packets[PFR_DIR_MAX][PFR_OP_TABLE_MAX];
+ u_int64_t pfrts_bytes[PFR_DIR_MAX][PFR_OP_TABLE_MAX];
+ u_int64_t pfrts_match;
+ u_int64_t pfrts_nomatch;
+ long pfrts_tzero;
+ int pfrts_cnt;
+ int pfrts_refcnt[PFR_REFCNT_MAX];
+};
+#define pfrts_name pfrts_t.pfrt_name
+#define pfrts_flags pfrts_t.pfrt_flags
+
+#ifndef _SOCKADDR_UNION_DEFINED
+#define _SOCKADDR_UNION_DEFINED
+union sockaddr_union {
+ struct sockaddr sa;
+ struct sockaddr_in sin;
+ struct sockaddr_in6 sin6;
+};
+#endif /* _SOCKADDR_UNION_DEFINED */
+
+struct pfr_kcounters {
+ u_int64_t pfrkc_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+ u_int64_t pfrkc_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+};
+
+SLIST_HEAD(pfr_kentryworkq, pfr_kentry);
+struct pfr_kentry {
+ struct radix_node pfrke_node[2];
+ union sockaddr_union pfrke_sa;
+ SLIST_ENTRY(pfr_kentry) pfrke_workq;
+ struct pfr_kcounters *pfrke_counters;
+ long pfrke_tzero;
+ u_int8_t pfrke_af;
+ u_int8_t pfrke_net;
+ u_int8_t pfrke_not;
+ u_int8_t pfrke_mark;
+};
+
+SLIST_HEAD(pfr_ktableworkq, pfr_ktable);
+RB_HEAD(pfr_ktablehead, pfr_ktable);
+struct pfr_ktable {
+ struct pfr_tstats pfrkt_ts;
+ RB_ENTRY(pfr_ktable) pfrkt_tree;
+ SLIST_ENTRY(pfr_ktable) pfrkt_workq;
+ struct radix_node_head *pfrkt_ip4;
+ struct radix_node_head *pfrkt_ip6;
+ struct pfr_ktable *pfrkt_shadow;
+ struct pfr_ktable *pfrkt_root;
+ struct pf_ruleset *pfrkt_rs;
+ long pfrkt_larg;
+ int pfrkt_nflags;
+};
+#define pfrkt_t pfrkt_ts.pfrts_t
+#define pfrkt_name pfrkt_t.pfrt_name
+#define pfrkt_anchor pfrkt_t.pfrt_anchor
+#define pfrkt_ruleset pfrkt_t.pfrt_ruleset
+#define pfrkt_flags pfrkt_t.pfrt_flags
+#define pfrkt_cnt pfrkt_ts.pfrts_cnt
+#define pfrkt_refcnt pfrkt_ts.pfrts_refcnt
+#define pfrkt_packets pfrkt_ts.pfrts_packets
+#define pfrkt_bytes pfrkt_ts.pfrts_bytes
+#define pfrkt_match pfrkt_ts.pfrts_match
+#define pfrkt_nomatch pfrkt_ts.pfrts_nomatch
+#define pfrkt_tzero pfrkt_ts.pfrts_tzero
+
+/* keep synced with pfi_kif, used in RB_FIND */
+struct pfi_kif_cmp {
+ char pfik_name[IFNAMSIZ];
+};
+
+struct pfi_kif {
+ char pfik_name[IFNAMSIZ];
+ union {
+ RB_ENTRY(pfi_kif) _pfik_tree;
+ LIST_ENTRY(pfi_kif) _pfik_list;
+ } _pfik_glue;
+#define pfik_tree _pfik_glue._pfik_tree
+#define pfik_list _pfik_glue._pfik_list
+ u_int64_t pfik_packets[2][2][2];
+ u_int64_t pfik_bytes[2][2][2];
+ u_int32_t pfik_tzero;
+ u_int pfik_flags;
+ struct ifnet *pfik_ifp;
+ struct ifg_group *pfik_group;
+ u_int pfik_rulerefs;
+ TAILQ_HEAD(, pfi_dynaddr) pfik_dynaddrs;
+};
+
+#define PFI_IFLAG_REFS 0x0001 /* has state references */
+#define PFI_IFLAG_SKIP 0x0100 /* skip filtering on interface */
+
+struct pf_pdesc {
+ struct {
+ int done;
+ uid_t uid;
+ gid_t gid;
+ } lookup;
+ u_int64_t tot_len; /* Make Mickey money */
+ union {
+ struct tcphdr *tcp;
+ struct udphdr *udp;
+ struct icmp *icmp;
+#ifdef INET6
+ struct icmp6_hdr *icmp6;
+#endif /* INET6 */
+ void *any;
+ } hdr;
+
+ struct pf_rule *nat_rule; /* nat/rdr rule applied to packet */
+ struct pf_addr *src; /* src address */
+ struct pf_addr *dst; /* dst address */
+ u_int16_t *sport;
+ u_int16_t *dport;
+ struct pf_mtag *pf_mtag;
+
+ u_int32_t p_len; /* total length of payload */
+
+ u_int16_t *ip_sum;
+ u_int16_t *proto_sum;
+ u_int16_t flags; /* Let SCRUB trigger behavior in
+ * state code. Easier than tags */
+#define PFDESC_TCP_NORM 0x0001 /* TCP shall be statefully scrubbed */
+#define PFDESC_IP_REAS 0x0002 /* IP frags would've been reassembled */
+ sa_family_t af;
+ u_int8_t proto;
+ u_int8_t tos;
+ u_int8_t dir; /* direction */
+ u_int8_t sidx; /* key index for source */
+ u_int8_t didx; /* key index for destination */
+};
+
+/* flags for RDR options */
+#define PF_DPORT_RANGE 0x01 /* Dest port uses range */
+#define PF_RPORT_RANGE 0x02 /* RDR'ed port uses range */
+
+/* UDP state enumeration */
+#define PFUDPS_NO_TRAFFIC 0
+#define PFUDPS_SINGLE 1
+#define PFUDPS_MULTIPLE 2
+
+#define PFUDPS_NSTATES 3 /* number of state levels */
+
+#define PFUDPS_NAMES { \
+ "NO_TRAFFIC", \
+ "SINGLE", \
+ "MULTIPLE", \
+ NULL \
+}
+
+/* Other protocol state enumeration */
+#define PFOTHERS_NO_TRAFFIC 0
+#define PFOTHERS_SINGLE 1
+#define PFOTHERS_MULTIPLE 2
+
+#define PFOTHERS_NSTATES 3 /* number of state levels */
+
+#define PFOTHERS_NAMES { \
+ "NO_TRAFFIC", \
+ "SINGLE", \
+ "MULTIPLE", \
+ NULL \
+}
+
+#define ACTION_SET(a, x) \
+ do { \
+ if ((a) != NULL) \
+ *(a) = (x); \
+ } while (0)
+
+#define REASON_SET(a, x) \
+ do { \
+ if ((a) != NULL) \
+ *(a) = (x); \
+ if (x < PFRES_MAX) \
+ counter_u64_add(V_pf_status.counters[x], 1); \
+ } while (0)
+
+struct pf_kstatus {
+ counter_u64_t counters[PFRES_MAX]; /* reason for passing/dropping */
+ counter_u64_t lcounters[LCNT_MAX]; /* limit counters */
+ counter_u64_t fcounters[FCNT_MAX]; /* state operation counters */
+ counter_u64_t scounters[SCNT_MAX]; /* src_node operation counters */
+ uint32_t states;
+ uint32_t src_nodes;
+ uint32_t running;
+ uint32_t since;
+ uint32_t debug;
+ uint32_t hostid;
+ char ifname[IFNAMSIZ];
+ uint8_t pf_chksum[PF_MD5_DIGEST_LENGTH];
+};
+
+struct pf_divert {
+ union {
+ struct in_addr ipv4;
+ struct in6_addr ipv6;
+ } addr;
+ u_int16_t port;
+};
+
+#define PFFRAG_FRENT_HIWAT 5000 /* Number of fragment entries */
+#define PFR_KENTRY_HIWAT 200000 /* Number of table entries */
+
+/*
+ * ioctl parameter structures
+ */
+
+struct pfioc_pooladdr {
+ u_int32_t action;
+ u_int32_t ticket;
+ u_int32_t nr;
+ u_int32_t r_num;
+ u_int8_t r_action;
+ u_int8_t r_last;
+ u_int8_t af;
+ char anchor[MAXPATHLEN];
+ struct pf_pooladdr addr;
+};
+
+struct pfioc_rule {
+ u_int32_t action;
+ u_int32_t ticket;
+ u_int32_t pool_ticket;
+ u_int32_t nr;
+ char anchor[MAXPATHLEN];
+ char anchor_call[MAXPATHLEN];
+ struct pf_rule rule;
+};
+
+struct pfioc_natlook {
+ struct pf_addr saddr;
+ struct pf_addr daddr;
+ struct pf_addr rsaddr;
+ struct pf_addr rdaddr;
+ u_int16_t sport;
+ u_int16_t dport;
+ u_int16_t rsport;
+ u_int16_t rdport;
+ sa_family_t af;
+ u_int8_t proto;
+ u_int8_t direction;
+};
+
+struct pfioc_state {
+ struct pfsync_state state;
+};
+
+struct pfioc_src_node_kill {
+ sa_family_t psnk_af;
+ struct pf_rule_addr psnk_src;
+ struct pf_rule_addr psnk_dst;
+ u_int psnk_killed;
+};
+
+struct pfioc_state_kill {
+ struct pf_state_cmp psk_pfcmp;
+ sa_family_t psk_af;
+ int psk_proto;
+ struct pf_rule_addr psk_src;
+ struct pf_rule_addr psk_dst;
+ char psk_ifname[IFNAMSIZ];
+ char psk_label[PF_RULE_LABEL_SIZE];
+ u_int psk_killed;
+};
+
+struct pfioc_states {
+ int ps_len;
+ union {
+ caddr_t psu_buf;
+ struct pfsync_state *psu_states;
+ } ps_u;
+#define ps_buf ps_u.psu_buf
+#define ps_states ps_u.psu_states
+};
+
+struct pfioc_src_nodes {
+ int psn_len;
+ union {
+ caddr_t psu_buf;
+ struct pf_src_node *psu_src_nodes;
+ } psn_u;
+#define psn_buf psn_u.psu_buf
+#define psn_src_nodes psn_u.psu_src_nodes
+};
+
+struct pfioc_if {
+ char ifname[IFNAMSIZ];
+};
+
+struct pfioc_tm {
+ int timeout;
+ int seconds;
+};
+
+struct pfioc_limit {
+ int index;
+ unsigned limit;
+};
+
+struct pfioc_altq {
+ u_int32_t action;
+ u_int32_t ticket;
+ u_int32_t nr;
+ struct pf_altq altq;
+};
+
+struct pfioc_qstats {
+ u_int32_t ticket;
+ u_int32_t nr;
+ void *buf;
+ int nbytes;
+ u_int8_t scheduler;
+};
+
+struct pfioc_ruleset {
+ u_int32_t nr;
+ char path[MAXPATHLEN];
+ char name[PF_ANCHOR_NAME_SIZE];
+};
+
+#define PF_RULESET_ALTQ (PF_RULESET_MAX)
+#define PF_RULESET_TABLE (PF_RULESET_MAX+1)
+struct pfioc_trans {
+ int size; /* number of elements */
+ int esize; /* size of each element in bytes */
+ struct pfioc_trans_e {
+ int rs_num;
+ char anchor[MAXPATHLEN];
+ u_int32_t ticket;
+ } *array;
+};
+
+#define PFR_FLAG_ATOMIC 0x00000001 /* unused */
+#define PFR_FLAG_DUMMY 0x00000002
+#define PFR_FLAG_FEEDBACK 0x00000004
+#define PFR_FLAG_CLSTATS 0x00000008
+#define PFR_FLAG_ADDRSTOO 0x00000010
+#define PFR_FLAG_REPLACE 0x00000020
+#define PFR_FLAG_ALLRSETS 0x00000040
+#define PFR_FLAG_ALLMASK 0x0000007F
+#ifdef _KERNEL
+#define PFR_FLAG_USERIOCTL 0x10000000
+#endif
+
+struct pfioc_table {
+ struct pfr_table pfrio_table;
+ void *pfrio_buffer;
+ int pfrio_esize;
+ int pfrio_size;
+ int pfrio_size2;
+ int pfrio_nadd;
+ int pfrio_ndel;
+ int pfrio_nchange;
+ int pfrio_flags;
+ u_int32_t pfrio_ticket;
+};
+#define pfrio_exists pfrio_nadd
+#define pfrio_nzero pfrio_nadd
+#define pfrio_nmatch pfrio_nadd
+#define pfrio_naddr pfrio_size2
+#define pfrio_setflag pfrio_size2
+#define pfrio_clrflag pfrio_nadd
+
+struct pfioc_iface {
+ char pfiio_name[IFNAMSIZ];
+ void *pfiio_buffer;
+ int pfiio_esize;
+ int pfiio_size;
+ int pfiio_nzero;
+ int pfiio_flags;
+};
+
+
+/*
+ * ioctl operations
+ */
+
+#define DIOCSTART _IO ('D', 1)
+#define DIOCSTOP _IO ('D', 2)
+#define DIOCADDRULE _IOWR('D', 4, struct pfioc_rule)
+#define DIOCGETRULES _IOWR('D', 6, struct pfioc_rule)
+#define DIOCGETRULE _IOWR('D', 7, struct pfioc_rule)
+/* XXX cut 8 - 17 */
+#define DIOCCLRSTATES _IOWR('D', 18, struct pfioc_state_kill)
+#define DIOCGETSTATE _IOWR('D', 19, struct pfioc_state)
+#define DIOCSETSTATUSIF _IOWR('D', 20, struct pfioc_if)
+#define DIOCGETSTATUS _IOWR('D', 21, struct pf_status)
+#define DIOCCLRSTATUS _IO ('D', 22)
+#define DIOCNATLOOK _IOWR('D', 23, struct pfioc_natlook)
+#define DIOCSETDEBUG _IOWR('D', 24, u_int32_t)
+#define DIOCGETSTATES _IOWR('D', 25, struct pfioc_states)
+#define DIOCCHANGERULE _IOWR('D', 26, struct pfioc_rule)
+/* XXX cut 26 - 28 */
+#define DIOCSETTIMEOUT _IOWR('D', 29, struct pfioc_tm)
+#define DIOCGETTIMEOUT _IOWR('D', 30, struct pfioc_tm)
+#define DIOCADDSTATE _IOWR('D', 37, struct pfioc_state)
+#define DIOCCLRRULECTRS _IO ('D', 38)
+#define DIOCGETLIMIT _IOWR('D', 39, struct pfioc_limit)
+#define DIOCSETLIMIT _IOWR('D', 40, struct pfioc_limit)
+#define DIOCKILLSTATES _IOWR('D', 41, struct pfioc_state_kill)
+#define DIOCSTARTALTQ _IO ('D', 42)
+#define DIOCSTOPALTQ _IO ('D', 43)
+#define DIOCADDALTQ _IOWR('D', 45, struct pfioc_altq)
+#define DIOCGETALTQS _IOWR('D', 47, struct pfioc_altq)
+#define DIOCGETALTQ _IOWR('D', 48, struct pfioc_altq)
+#define DIOCCHANGEALTQ _IOWR('D', 49, struct pfioc_altq)
+#define DIOCGETQSTATS _IOWR('D', 50, struct pfioc_qstats)
+#define DIOCBEGINADDRS _IOWR('D', 51, struct pfioc_pooladdr)
+#define DIOCADDADDR _IOWR('D', 52, struct pfioc_pooladdr)
+#define DIOCGETADDRS _IOWR('D', 53, struct pfioc_pooladdr)
+#define DIOCGETADDR _IOWR('D', 54, struct pfioc_pooladdr)
+#define DIOCCHANGEADDR _IOWR('D', 55, struct pfioc_pooladdr)
+/* XXX cut 55 - 57 */
+#define DIOCGETRULESETS _IOWR('D', 58, struct pfioc_ruleset)
+#define DIOCGETRULESET _IOWR('D', 59, struct pfioc_ruleset)
+#define DIOCRCLRTABLES _IOWR('D', 60, struct pfioc_table)
+#define DIOCRADDTABLES _IOWR('D', 61, struct pfioc_table)
+#define DIOCRDELTABLES _IOWR('D', 62, struct pfioc_table)
+#define DIOCRGETTABLES _IOWR('D', 63, struct pfioc_table)
+#define DIOCRGETTSTATS _IOWR('D', 64, struct pfioc_table)
+#define DIOCRCLRTSTATS _IOWR('D', 65, struct pfioc_table)
+#define DIOCRCLRADDRS _IOWR('D', 66, struct pfioc_table)
+#define DIOCRADDADDRS _IOWR('D', 67, struct pfioc_table)
+#define DIOCRDELADDRS _IOWR('D', 68, struct pfioc_table)
+#define DIOCRSETADDRS _IOWR('D', 69, struct pfioc_table)
+#define DIOCRGETADDRS _IOWR('D', 70, struct pfioc_table)
+#define DIOCRGETASTATS _IOWR('D', 71, struct pfioc_table)
+#define DIOCRCLRASTATS _IOWR('D', 72, struct pfioc_table)
+#define DIOCRTSTADDRS _IOWR('D', 73, struct pfioc_table)
+#define DIOCRSETTFLAGS _IOWR('D', 74, struct pfioc_table)
+#define DIOCRINADEFINE _IOWR('D', 77, struct pfioc_table)
+#define DIOCOSFPFLUSH _IO('D', 78)
+#define DIOCOSFPADD _IOWR('D', 79, struct pf_osfp_ioctl)
+#define DIOCOSFPGET _IOWR('D', 80, struct pf_osfp_ioctl)
+#define DIOCXBEGIN _IOWR('D', 81, struct pfioc_trans)
+#define DIOCXCOMMIT _IOWR('D', 82, struct pfioc_trans)
+#define DIOCXROLLBACK _IOWR('D', 83, struct pfioc_trans)
+#define DIOCGETSRCNODES _IOWR('D', 84, struct pfioc_src_nodes)
+#define DIOCCLRSRCNODES _IO('D', 85)
+#define DIOCSETHOSTID _IOWR('D', 86, u_int32_t)
+#define DIOCIGETIFACES _IOWR('D', 87, struct pfioc_iface)
+#define DIOCSETIFFLAG _IOWR('D', 89, struct pfioc_iface)
+#define DIOCCLRIFFLAG _IOWR('D', 90, struct pfioc_iface)
+#define DIOCKILLSRCNODES _IOWR('D', 91, struct pfioc_src_node_kill)
+struct pf_ifspeed {
+ char ifname[IFNAMSIZ];
+ u_int32_t baudrate;
+};
+#define DIOCGIFSPEED _IOWR('D', 92, struct pf_ifspeed)
+
+#ifdef _KERNEL
+LIST_HEAD(pf_src_node_list, pf_src_node);
+struct pf_srchash {
+ struct pf_src_node_list nodes;
+ struct mtx lock;
+};
+
+struct pf_keyhash {
+ LIST_HEAD(, pf_state_key) keys;
+ struct mtx lock;
+};
+
+struct pf_idhash {
+ LIST_HEAD(, pf_state) states;
+ struct mtx lock;
+};
+
+extern u_long pf_hashmask;
+extern u_long pf_srchashmask;
+#define PF_HASHSIZ (32768)
+VNET_DECLARE(struct pf_keyhash *, pf_keyhash);
+VNET_DECLARE(struct pf_idhash *, pf_idhash);
+#define V_pf_keyhash VNET(pf_keyhash)
+#define V_pf_idhash VNET(pf_idhash)
+VNET_DECLARE(struct pf_srchash *, pf_srchash);
+#define V_pf_srchash VNET(pf_srchash)
+
+#define PF_IDHASH(s) (be64toh((s)->id) % (pf_hashmask + 1))
+
+VNET_DECLARE(void *, pf_swi_cookie);
+#define V_pf_swi_cookie VNET(pf_swi_cookie)
+
+VNET_DECLARE(uint64_t, pf_stateid[MAXCPU]);
+#define V_pf_stateid VNET(pf_stateid)
+
+TAILQ_HEAD(pf_altqqueue, pf_altq);
+VNET_DECLARE(struct pf_altqqueue, pf_altqs[2]);
+#define V_pf_altqs VNET(pf_altqs)
+VNET_DECLARE(struct pf_palist, pf_pabuf);
+#define V_pf_pabuf VNET(pf_pabuf)
+
+VNET_DECLARE(u_int32_t, ticket_altqs_active);
+#define V_ticket_altqs_active VNET(ticket_altqs_active)
+VNET_DECLARE(u_int32_t, ticket_altqs_inactive);
+#define V_ticket_altqs_inactive VNET(ticket_altqs_inactive)
+VNET_DECLARE(int, altqs_inactive_open);
+#define V_altqs_inactive_open VNET(altqs_inactive_open)
+VNET_DECLARE(u_int32_t, ticket_pabuf);
+#define V_ticket_pabuf VNET(ticket_pabuf)
+VNET_DECLARE(struct pf_altqqueue *, pf_altqs_active);
+#define V_pf_altqs_active VNET(pf_altqs_active)
+VNET_DECLARE(struct pf_altqqueue *, pf_altqs_inactive);
+#define V_pf_altqs_inactive VNET(pf_altqs_inactive)
+
+VNET_DECLARE(struct pf_rulequeue, pf_unlinked_rules);
+#define V_pf_unlinked_rules VNET(pf_unlinked_rules)
+
+void pf_initialize(void);
+void pf_mtag_initialize(void);
+void pf_mtag_cleanup(void);
+void pf_cleanup(void);
+
+struct pf_mtag *pf_get_mtag(struct mbuf *);
+
+extern void pf_calc_skip_steps(struct pf_rulequeue *);
+#ifdef ALTQ
+extern void pf_altq_ifnet_event(struct ifnet *, int);
+#endif
+VNET_DECLARE(uma_zone_t, pf_state_z);
+#define V_pf_state_z VNET(pf_state_z)
+VNET_DECLARE(uma_zone_t, pf_state_key_z);
+#define V_pf_state_key_z VNET(pf_state_key_z)
+VNET_DECLARE(uma_zone_t, pf_state_scrub_z);
+#define V_pf_state_scrub_z VNET(pf_state_scrub_z)
+
+extern void pf_purge_thread(void *);
+extern void pf_unload_vnet_purge(void);
+extern void pf_intr(void *);
+extern void pf_purge_expired_src_nodes(void);
+
+extern int pf_unlink_state(struct pf_state *, u_int);
+#define PF_ENTER_LOCKED 0x00000001
+#define PF_RETURN_LOCKED 0x00000002
+extern int pf_state_insert(struct pfi_kif *,
+ struct pf_state_key *,
+ struct pf_state_key *,
+ struct pf_state *);
+extern void pf_free_state(struct pf_state *);
+
+static __inline void
+pf_ref_state(struct pf_state *s)
+{
+
+ refcount_acquire(&s->refs);
+}
+
+static __inline int
+pf_release_state(struct pf_state *s)
+{
+
+ if (refcount_release(&s->refs)) {
+ pf_free_state(s);
+ return (1);
+ } else
+ return (0);
+}
+
+extern struct pf_state *pf_find_state_byid(uint64_t, uint32_t);
+extern struct pf_state *pf_find_state_all(struct pf_state_key_cmp *,
+ u_int, int *);
+extern struct pf_src_node *pf_find_src_node(struct pf_addr *,
+ struct pf_rule *, sa_family_t, int);
+extern void pf_unlink_src_node(struct pf_src_node *);
+extern u_int pf_free_src_nodes(struct pf_src_node_list *);
+extern void pf_print_state(struct pf_state *);
+extern void pf_print_flags(u_int8_t);
+extern u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t,
+ u_int8_t);
+extern u_int16_t pf_proto_cksum_fixup(struct mbuf *, u_int16_t,
+ u_int16_t, u_int16_t, u_int8_t);
+
+VNET_DECLARE(struct ifnet *, sync_ifp);
+#define V_sync_ifp VNET(sync_ifp);
+VNET_DECLARE(struct pf_rule, pf_default_rule);
+#define V_pf_default_rule VNET(pf_default_rule)
+extern void pf_addrcpy(struct pf_addr *, struct pf_addr *,
+ u_int8_t);
+void pf_free_rule(struct pf_rule *);
+
+#ifdef INET
+int pf_test(int, struct ifnet *, struct mbuf **, struct inpcb *);
+int pf_normalize_ip(struct mbuf **, int, struct pfi_kif *, u_short *,
+ struct pf_pdesc *);
+#endif /* INET */
+
+#ifdef INET6
+int pf_test6(int, struct ifnet *, struct mbuf **, struct inpcb *);
+int pf_normalize_ip6(struct mbuf **, int, struct pfi_kif *, u_short *,
+ struct pf_pdesc *);
+void pf_poolmask(struct pf_addr *, struct pf_addr*,
+ struct pf_addr *, struct pf_addr *, u_int8_t);
+void pf_addr_inc(struct pf_addr *, sa_family_t);
+int pf_refragment6(struct ifnet *, struct mbuf **, struct m_tag *);
+#endif /* INET6 */
+
+u_int32_t pf_new_isn(struct pf_state *);
+void *pf_pull_hdr(struct mbuf *, int, void *, int, u_short *, u_short *,
+ sa_family_t);
+void pf_change_a(void *, u_int16_t *, u_int32_t, u_int8_t);
+void pf_change_proto_a(struct mbuf *, void *, u_int16_t *, u_int32_t,
+ u_int8_t);
+void pf_change_tcp_a(struct mbuf *, void *, u_int16_t *, u_int32_t);
+void pf_send_deferred_syn(struct pf_state *);
+int pf_match_addr(u_int8_t, struct pf_addr *, struct pf_addr *,
+ struct pf_addr *, sa_family_t);
+int pf_match_addr_range(struct pf_addr *, struct pf_addr *,
+ struct pf_addr *, sa_family_t);
+int pf_match_port(u_int8_t, u_int16_t, u_int16_t, u_int16_t);
+
+void pf_normalize_init(void);
+void pf_normalize_cleanup(void);
+int pf_normalize_tcp(int, struct pfi_kif *, struct mbuf *, int, int, void *,
+ struct pf_pdesc *);
+void pf_normalize_tcp_cleanup(struct pf_state *);
+int pf_normalize_tcp_init(struct mbuf *, int, struct pf_pdesc *,
+ struct tcphdr *, struct pf_state_peer *, struct pf_state_peer *);
+int pf_normalize_tcp_stateful(struct mbuf *, int, struct pf_pdesc *,
+ u_short *, struct tcphdr *, struct pf_state *,
+ struct pf_state_peer *, struct pf_state_peer *, int *);
+u_int32_t
+ pf_state_expires(const struct pf_state *);
+void pf_purge_expired_fragments(void);
+int pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *,
+ int);
+int pf_socket_lookup(int, struct pf_pdesc *, struct mbuf *);
+struct pf_state_key *pf_alloc_state_key(int);
+void pfr_initialize(void);
+void pfr_cleanup(void);
+int pfr_match_addr(struct pfr_ktable *, struct pf_addr *, sa_family_t);
+void pfr_update_stats(struct pfr_ktable *, struct pf_addr *, sa_family_t,
+ u_int64_t, int, int, int);
+int pfr_pool_get(struct pfr_ktable *, int *, struct pf_addr *, sa_family_t);
+void pfr_dynaddr_update(struct pfr_ktable *, struct pfi_dynaddr *);
+struct pfr_ktable *
+ pfr_attach_table(struct pf_ruleset *, char *);
+void pfr_detach_table(struct pfr_ktable *);
+int pfr_clr_tables(struct pfr_table *, int *, int);
+int pfr_add_tables(struct pfr_table *, int, int *, int);
+int pfr_del_tables(struct pfr_table *, int, int *, int);
+int pfr_get_tables(struct pfr_table *, struct pfr_table *, int *, int);
+int pfr_get_tstats(struct pfr_table *, struct pfr_tstats *, int *, int);
+int pfr_clr_tstats(struct pfr_table *, int, int *, int);
+int pfr_set_tflags(struct pfr_table *, int, int, int, int *, int *, int);
+int pfr_clr_addrs(struct pfr_table *, int *, int);
+int pfr_insert_kentry(struct pfr_ktable *, struct pfr_addr *, long);
+int pfr_add_addrs(struct pfr_table *, struct pfr_addr *, int, int *,
+ int);
+int pfr_del_addrs(struct pfr_table *, struct pfr_addr *, int, int *,
+ int);
+int pfr_set_addrs(struct pfr_table *, struct pfr_addr *, int, int *,
+ int *, int *, int *, int, u_int32_t);
+int pfr_get_addrs(struct pfr_table *, struct pfr_addr *, int *, int);
+int pfr_get_astats(struct pfr_table *, struct pfr_astats *, int *, int);
+int pfr_clr_astats(struct pfr_table *, struct pfr_addr *, int, int *,
+ int);
+int pfr_tst_addrs(struct pfr_table *, struct pfr_addr *, int, int *,
+ int);
+int pfr_ina_begin(struct pfr_table *, u_int32_t *, int *, int);
+int pfr_ina_rollback(struct pfr_table *, u_int32_t, int *, int);
+int pfr_ina_commit(struct pfr_table *, u_int32_t, int *, int *, int);
+int pfr_ina_define(struct pfr_table *, struct pfr_addr *, int, int *,
+ int *, u_int32_t, int);
+
+MALLOC_DECLARE(PFI_MTYPE);
+VNET_DECLARE(struct pfi_kif *, pfi_all);
+#define V_pfi_all VNET(pfi_all)
+
+void pfi_initialize(void);
+void pfi_initialize_vnet(void);
+void pfi_cleanup(void);
+void pfi_cleanup_vnet(void);
+void pfi_kif_ref(struct pfi_kif *);
+void pfi_kif_unref(struct pfi_kif *);
+struct pfi_kif *pfi_kif_find(const char *);
+struct pfi_kif *pfi_kif_attach(struct pfi_kif *, const char *);
+int pfi_kif_match(struct pfi_kif *, struct pfi_kif *);
+void pfi_kif_purge(void);
+int pfi_match_addr(struct pfi_dynaddr *, struct pf_addr *,
+ sa_family_t);
+int pfi_dynaddr_setup(struct pf_addr_wrap *, sa_family_t);
+void pfi_dynaddr_remove(struct pfi_dynaddr *);
+void pfi_dynaddr_copyout(struct pf_addr_wrap *);
+void pfi_update_status(const char *, struct pf_status *);
+void pfi_get_ifaces(const char *, struct pfi_kif *, int *);
+int pfi_set_flags(const char *, int);
+int pfi_clear_flags(const char *, int);
+
+int pf_match_tag(struct mbuf *, struct pf_rule *, int *, int);
+int pf_tag_packet(struct mbuf *, struct pf_pdesc *, int);
+int pf_addr_cmp(struct pf_addr *, struct pf_addr *,
+ sa_family_t);
+void pf_qid2qname(u_int32_t, char *);
+
+VNET_DECLARE(struct pf_kstatus, pf_status);
+#define V_pf_status VNET(pf_status)
+
+struct pf_limit {
+ uma_zone_t zone;
+ u_int limit;
+};
+VNET_DECLARE(struct pf_limit, pf_limits[PF_LIMIT_MAX]);
+#define V_pf_limits VNET(pf_limits)
+
+#endif /* _KERNEL */
+
+#ifdef _KERNEL
+VNET_DECLARE(struct pf_anchor_global, pf_anchors);
+#define V_pf_anchors VNET(pf_anchors)
+VNET_DECLARE(struct pf_anchor, pf_main_anchor);
+#define V_pf_main_anchor VNET(pf_main_anchor)
+#define pf_main_ruleset V_pf_main_anchor.ruleset
+#endif
+
+/* these ruleset functions can be linked into userland programs (pfctl) */
+int pf_get_ruleset_number(u_int8_t);
+void pf_init_ruleset(struct pf_ruleset *);
+int pf_anchor_setup(struct pf_rule *,
+ const struct pf_ruleset *, const char *);
+int pf_anchor_copyout(const struct pf_ruleset *,
+ const struct pf_rule *, struct pfioc_rule *);
+void pf_anchor_remove(struct pf_rule *);
+void pf_remove_if_empty_ruleset(struct pf_ruleset *);
+struct pf_ruleset *pf_find_ruleset(const char *);
+struct pf_ruleset *pf_find_or_create_ruleset(const char *);
+void pf_rs_initialize(void);
+
+/* The fingerprint functions can be linked into userland programs (tcpdump) */
+int pf_osfp_add(struct pf_osfp_ioctl *);
+#ifdef _KERNEL
+struct pf_osfp_enlist *
+ pf_osfp_fingerprint(struct pf_pdesc *, struct mbuf *, int,
+ const struct tcphdr *);
+#endif /* _KERNEL */
+void pf_osfp_flush(void);
+int pf_osfp_get(struct pf_osfp_ioctl *);
+int pf_osfp_match(struct pf_osfp_enlist *, pf_osfp_t);
+
+#ifdef _KERNEL
+void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
+
+void pf_step_into_anchor(struct pf_anchor_stackframe *, int *,
+ struct pf_ruleset **, int, struct pf_rule **,
+ struct pf_rule **, int *);
+int pf_step_out_of_anchor(struct pf_anchor_stackframe *, int *,
+ struct pf_ruleset **, int, struct pf_rule **,
+ struct pf_rule **, int *);
+
+int pf_map_addr(u_int8_t, struct pf_rule *,
+ struct pf_addr *, struct pf_addr *,
+ struct pf_addr *, struct pf_src_node **);
+struct pf_rule *pf_get_translation(struct pf_pdesc *, struct mbuf *,
+ int, int, struct pfi_kif *, struct pf_src_node **,
+ struct pf_state_key **, struct pf_state_key **,
+ struct pf_addr *, struct pf_addr *,
+ uint16_t, uint16_t, struct pf_anchor_stackframe *);
+
+struct pf_state_key *pf_state_key_setup(struct pf_pdesc *, struct pf_addr *,
+ struct pf_addr *, u_int16_t, u_int16_t);
+struct pf_state_key *pf_state_key_clone(struct pf_state_key *);
+#endif /* _KERNEL */
+
+#endif /* _NET_PFVAR_H_ */
diff --git a/freebsd/sys/net/ppp_defs.h b/freebsd/sys/net/ppp_defs.h
index 386a1763..5f6d4106 100644
--- a/freebsd/sys/net/ppp_defs.h
+++ b/freebsd/sys/net/ppp_defs.h
@@ -31,6 +31,8 @@
#ifndef _PPP_DEFS_H_
#define _PPP_DEFS_H_
+#include <sys/_types.h>
+
/*
* The basic PPP frame.
*/
@@ -83,7 +85,7 @@
/*
* Extended asyncmap - allows any character to be escaped.
*/
-typedef u_int32_t ext_accm[8];
+typedef __uint32_t ext_accm[8];
/*
* What to do with network protocol (NP) packets.
@@ -143,8 +145,8 @@ struct ppp_comp_stats {
* the last NP packet was sent or received.
*/
struct ppp_idle {
- time_t xmit_idle; /* time since last NP packet sent */
- time_t recv_idle; /* time since last NP packet received */
+ __time_t xmit_idle; /* time since last NP packet sent */
+ __time_t recv_idle; /* time since last NP packet received */
};
#ifndef __P
diff --git a/freebsd/sys/net/radix.c b/freebsd/sys/net/radix.c
index ba15eb51..2615de65 100644
--- a/freebsd/sys/net/radix.c
+++ b/freebsd/sys/net/radix.c
@@ -58,18 +58,15 @@
#include <net/radix.h>
#endif /* !_KERNEL */
-static int rn_walktree_from(struct radix_node_head *h, void *a, void *m,
- walktree_f_t *f, void *w);
-static int rn_walktree(struct radix_node_head *, walktree_f_t *, void *);
static struct radix_node
- *rn_insert(void *, struct radix_node_head *, int *,
+ *rn_insert(void *, struct radix_head *, int *,
struct radix_node [2]),
*rn_newpair(void *, int, struct radix_node[2]),
*rn_search(void *, struct radix_node *),
*rn_search_m(void *, struct radix_node *, void *);
+static struct radix_node *rn_addmask(void *, struct radix_mask_head *, int,int);
-static void rn_detachhead_internal(void **head);
-static int rn_inithead_internal(void **head, int off);
+static void rn_detachhead_internal(struct radix_head *);
#define RADIX_MAX_KEY_LEN 32
@@ -81,14 +78,6 @@ static char rn_ones[RADIX_MAX_KEY_LEN] = {
-1, -1, -1, -1, -1, -1, -1, -1,
};
-/*
- * XXX: Compat stuff for old rn_addmask() users
- */
-static struct radix_node_head *mask_rnhead_compat;
-#ifdef _KERNEL
-static struct mtx mask_mtx;
-#endif
-
static int rn_lexobetter(void *m_arg, void *n_arg);
static struct radix_mask *
@@ -225,7 +214,7 @@ rn_refines(void *m_arg, void *n_arg)
* from host routes.
*/
struct radix_node *
-rn_lookup(void *v_arg, void *m_arg, struct radix_node_head *head)
+rn_lookup(void *v_arg, void *m_arg, struct radix_head *head)
{
struct radix_node *x;
caddr_t netmask;
@@ -234,7 +223,7 @@ rn_lookup(void *v_arg, void *m_arg, struct radix_node_head *head)
/*
* Most common case: search exact prefix/mask
*/
- x = rn_addmask_r(m_arg, head->rnh_masks, 1,
+ x = rn_addmask(m_arg, head->rnh_masks, 1,
head->rnh_treetop->rn_offset);
if (x == NULL)
return (NULL);
@@ -287,7 +276,7 @@ rn_satisfies_leaf(char *trial, struct radix_node *leaf, int skip)
* Search for longest-prefix match in given @head
*/
struct radix_node *
-rn_match(void *v_arg, struct radix_node_head *head)
+rn_match(void *v_arg, struct radix_head *head)
{
caddr_t v = v_arg;
struct radix_node *t = head->rnh_treetop, *x;
@@ -436,7 +425,7 @@ rn_newpair(void *v, int b, struct radix_node nodes[2])
}
static struct radix_node *
-rn_insert(void *v_arg, struct radix_node_head *head, int *dupentry,
+rn_insert(void *v_arg, struct radix_head *head, int *dupentry,
struct radix_node nodes[2])
{
caddr_t v = v_arg;
@@ -500,9 +489,9 @@ on1:
}
struct radix_node *
-rn_addmask_r(void *arg, struct radix_node_head *maskhead, int search, int skip)
+rn_addmask(void *n_arg, struct radix_mask_head *maskhead, int search, int skip)
{
- unsigned char *netmask = arg;
+ unsigned char *netmask = n_arg;
unsigned char *cp, *cplim;
struct radix_node *x;
int b = 0, mlen, j;
@@ -515,7 +504,7 @@ rn_addmask_r(void *arg, struct radix_node_head *maskhead, int search, int skip)
if (skip == 0)
skip = 1;
if (mlen <= skip)
- return (maskhead->rnh_nodes);
+ return (maskhead->mask_nodes);
bzero(addmask_key, RADIX_MAX_KEY_LEN);
if (skip > 1)
@@ -528,22 +517,22 @@ rn_addmask_r(void *arg, struct radix_node_head *maskhead, int search, int skip)
cp--;
mlen = cp - addmask_key;
if (mlen <= skip)
- return (maskhead->rnh_nodes);
+ return (maskhead->mask_nodes);
*addmask_key = mlen;
- x = rn_search(addmask_key, maskhead->rnh_treetop);
+ x = rn_search(addmask_key, maskhead->head.rnh_treetop);
if (bcmp(addmask_key, x->rn_key, mlen) != 0)
- x = 0;
+ x = NULL;
if (x || search)
return (x);
R_Zalloc(x, struct radix_node *, RADIX_MAX_KEY_LEN + 2 * sizeof (*x));
- if ((saved_x = x) == 0)
+ if ((saved_x = x) == NULL)
return (0);
- netmask = cp = (caddr_t)(x + 2);
+ netmask = cp = (unsigned char *)(x + 2);
bcopy(addmask_key, cp, mlen);
- x = rn_insert(cp, maskhead, &maskduplicated, x);
+ x = rn_insert(cp, &maskhead->head, &maskduplicated, x);
if (maskduplicated) {
log(LOG_ERR, "rn_addmask: mask impossibly already in tree");
- Free(saved_x);
+ R_Free(saved_x);
return (x);
}
/*
@@ -571,23 +560,6 @@ rn_addmask_r(void *arg, struct radix_node_head *maskhead, int search, int skip)
return (x);
}
-struct radix_node *
-rn_addmask(void *n_arg, int search, int skip)
-{
- struct radix_node *tt;
-
-#ifdef _KERNEL
- mtx_lock(&mask_mtx);
-#endif
- tt = rn_addmask_r(&mask_rnhead_compat, n_arg, search, skip);
-
-#ifdef _KERNEL
- mtx_unlock(&mask_mtx);
-#endif
-
- return (tt);
-}
-
static int /* XXX: arbitrary ordering for non-contiguous masks */
rn_lexobetter(void *m_arg, void *n_arg)
{
@@ -625,11 +597,11 @@ rn_new_radix_mask(struct radix_node *tt, struct radix_mask *next)
}
struct radix_node *
-rn_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
+rn_addroute(void *v_arg, void *n_arg, struct radix_head *head,
struct radix_node treenodes[2])
{
caddr_t v = (caddr_t)v_arg, netmask = (caddr_t)n_arg;
- struct radix_node *t, *x = 0, *tt;
+ struct radix_node *t, *x = NULL, *tt;
struct radix_node *saved_tt, *top = head->rnh_treetop;
short b = 0, b_leaf = 0;
int keyduplicated;
@@ -644,7 +616,7 @@ rn_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
* nodes and possibly save time in calculating indices.
*/
if (netmask) {
- x = rn_addmask_r(netmask, head->rnh_masks, 0, top->rn_offset);
+ x = rn_addmask(netmask, head->rnh_masks, 0, top->rn_offset);
if (x == NULL)
return (0);
b_leaf = x->rn_bit;
@@ -752,7 +724,7 @@ rn_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist)
if (m->rm_bit >= b_leaf)
break;
- t->rn_mklist = m; *mp = 0;
+ t->rn_mklist = m; *mp = NULL;
}
on2:
/* Add new route to highest possible ancestor's list */
@@ -799,7 +771,7 @@ on2:
}
struct radix_node *
-rn_delete(void *v_arg, void *netmask_arg, struct radix_node_head *head)
+rn_delete(void *v_arg, void *netmask_arg, struct radix_head *head)
{
struct radix_node *t, *p, *x, *tt;
struct radix_mask *m, *saved_m, **mp;
@@ -815,22 +787,22 @@ rn_delete(void *v_arg, void *netmask_arg, struct radix_node_head *head)
vlen = LEN(v);
saved_tt = tt;
top = x;
- if (tt == 0 ||
+ if (tt == NULL ||
bcmp(v + head_off, tt->rn_key + head_off, vlen - head_off))
return (0);
/*
* Delete our route from mask lists.
*/
if (netmask) {
- x = rn_addmask_r(netmask, head->rnh_masks, 1, head_off);
+ x = rn_addmask(netmask, head->rnh_masks, 1, head_off);
if (x == NULL)
return (0);
netmask = x->rn_key;
while (tt->rn_mask != netmask)
- if ((tt = tt->rn_dupedkey) == 0)
+ if ((tt = tt->rn_dupedkey) == NULL)
return (0);
}
- if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == 0)
+ if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == NULL)
goto on1;
if (tt->rn_flags & RNF_NORMAL) {
if (m->rm_leaf != tt || m->rm_refs > 0) {
@@ -856,10 +828,10 @@ rn_delete(void *v_arg, void *netmask_arg, struct radix_node_head *head)
for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist)
if (m == saved_m) {
*mp = m->rm_mklist;
- Free(m);
+ R_Free(m);
break;
}
- if (m == 0) {
+ if (m == NULL) {
log(LOG_ERR, "rn_delete: couldn't find our annotation\n");
if (tt->rn_flags & RNF_NORMAL)
return (0); /* Dangling ref to us */
@@ -947,7 +919,7 @@ on1:
struct radix_mask *mm = m->rm_mklist;
x->rn_mklist = 0;
if (--(m->rm_refs) < 0)
- Free(m);
+ R_Free(m);
m = mm;
}
if (m)
@@ -986,8 +958,8 @@ out:
* This is the same as rn_walktree() except for the parameters and the
* exit.
*/
-static int
-rn_walktree_from(struct radix_node_head *h, void *a, void *m,
+int
+rn_walktree_from(struct radix_head *h, void *a, void *m,
walktree_f_t *f, void *w)
{
int error;
@@ -998,6 +970,8 @@ rn_walktree_from(struct radix_node_head *h, void *a, void *m,
int stopping = 0;
int lastb;
+ KASSERT(m != NULL, ("%s: mask needs to be specified", __func__));
+
/*
* rn_search_m is sort-of-open-coded here. We cannot use the
* function because we need to keep track of the last node seen.
@@ -1021,11 +995,11 @@ rn_walktree_from(struct radix_node_head *h, void *a, void *m,
/*
* Two cases: either we stepped off the end of our mask,
* in which case last == rn, or we reached a leaf, in which
- * case we want to start from the last node we looked at.
- * Either way, last is the node we want to start from.
+ * case we want to start from the leaf.
*/
- rn = last;
- lastb = rn->rn_bit;
+ if (rn->rn_bit >= 0)
+ rn = last;
+ lastb = last->rn_bit;
/* printf("rn %p, lastb %d\n", rn, lastb);*/
@@ -1072,7 +1046,7 @@ rn_walktree_from(struct radix_node_head *h, void *a, void *m,
rn = rn->rn_left;
next = rn;
/* Process leaves */
- while ((rn = base) != 0) {
+ while ((rn = base) != NULL) {
base = rn->rn_dupedkey;
/* printf("leaf %p\n", rn); */
if (!(rn->rn_flags & RNF_ROOT)
@@ -1090,8 +1064,8 @@ rn_walktree_from(struct radix_node_head *h, void *a, void *m,
return (0);
}
-static int
-rn_walktree(struct radix_node_head *h, walktree_f_t *f, void *w)
+int
+rn_walktree(struct radix_head *h, walktree_f_t *f, void *w)
{
int error;
struct radix_node *base, *next;
@@ -1130,82 +1104,94 @@ rn_walktree(struct radix_node_head *h, walktree_f_t *f, void *w)
}
/*
- * Allocate and initialize an empty tree. This has 3 nodes, which are
- * part of the radix_node_head (in the order <left,root,right>) and are
+ * Initialize an empty tree. This has 3 nodes, which are passed
+ * via base_nodes (in the order <left,root,right>) and are
* marked RNF_ROOT so they cannot be freed.
* The leaves have all-zero and all-one keys, with significant
* bits starting at 'off'.
- * Return 1 on success, 0 on error.
*/
-static int
-rn_inithead_internal(void **head, int off)
+void
+rn_inithead_internal(struct radix_head *rh, struct radix_node *base_nodes, int off)
{
- struct radix_node_head *rnh;
struct radix_node *t, *tt, *ttt;
- if (*head)
- return (1);
- R_Zalloc(rnh, struct radix_node_head *, sizeof (*rnh));
- if (rnh == 0)
- return (0);
-#ifdef _KERNEL
- RADIX_NODE_HEAD_LOCK_INIT(rnh);
-#endif
- *head = rnh;
- t = rn_newpair(rn_zeros, off, rnh->rnh_nodes);
- ttt = rnh->rnh_nodes + 2;
+
+ t = rn_newpair(rn_zeros, off, base_nodes);
+ ttt = base_nodes + 2;
t->rn_right = ttt;
t->rn_parent = t;
- tt = t->rn_left; /* ... which in turn is rnh->rnh_nodes */
+ tt = t->rn_left; /* ... which in turn is base_nodes */
tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE;
tt->rn_bit = -1 - off;
*ttt = *tt;
ttt->rn_key = rn_ones;
- rnh->rnh_addaddr = rn_addroute;
- rnh->rnh_deladdr = rn_delete;
- rnh->rnh_matchaddr = rn_match;
- rnh->rnh_lookup = rn_lookup;
- rnh->rnh_walktree = rn_walktree;
- rnh->rnh_walktree_from = rn_walktree_from;
- rnh->rnh_treetop = t;
- return (1);
+
+ rh->rnh_treetop = t;
}
static void
-rn_detachhead_internal(void **head)
+rn_detachhead_internal(struct radix_head *head)
{
- struct radix_node_head *rnh;
- KASSERT((head != NULL && *head != NULL),
+ KASSERT((head != NULL),
("%s: head already freed", __func__));
- rnh = *head;
/* Free <left,root,right> nodes. */
- Free(rnh);
-
- *head = NULL;
+ R_Free(head);
}
+/* Functions used by 'struct radix_node_head' users */
+
int
rn_inithead(void **head, int off)
{
struct radix_node_head *rnh;
+ struct radix_mask_head *rmh;
+
+ rnh = *head;
+ rmh = NULL;
if (*head != NULL)
return (1);
- if (rn_inithead_internal(head, off) == 0)
+ R_Zalloc(rnh, struct radix_node_head *, sizeof (*rnh));
+ R_Zalloc(rmh, struct radix_mask_head *, sizeof (*rmh));
+ if (rnh == NULL || rmh == NULL) {
+ if (rnh != NULL)
+ R_Free(rnh);
+ if (rmh != NULL)
+ R_Free(rmh);
return (0);
+ }
- rnh = (struct radix_node_head *)(*head);
+ /* Init trees */
+ rn_inithead_internal(&rnh->rh, rnh->rnh_nodes, off);
+ rn_inithead_internal(&rmh->head, rmh->mask_nodes, 0);
+ *head = rnh;
+ rnh->rh.rnh_masks = rmh;
- if (rn_inithead_internal((void **)&rnh->rnh_masks, 0) == 0) {
- rn_detachhead_internal(head);
- return (0);
- }
+ /* Finally, set base callbacks */
+ rnh->rnh_addaddr = rn_addroute;
+ rnh->rnh_deladdr = rn_delete;
+ rnh->rnh_matchaddr = rn_match;
+ rnh->rnh_lookup = rn_lookup;
+ rnh->rnh_walktree = rn_walktree;
+ rnh->rnh_walktree_from = rn_walktree_from;
return (1);
}
+static int
+rn_freeentry(struct radix_node *rn, void *arg)
+{
+ struct radix_head * const rnh = arg;
+ struct radix_node *x;
+
+ x = (struct radix_node *)rn_delete(rn + 2, NULL, rnh);
+ if (x != NULL)
+ R_Free(x);
+ return (0);
+}
+
int
rn_detachhead(void **head)
{
@@ -1214,29 +1200,14 @@ rn_detachhead(void **head)
KASSERT((head != NULL && *head != NULL),
("%s: head already freed", __func__));
- rnh = *head;
+ rnh = (struct radix_node_head *)(*head);
- rn_detachhead_internal((void **)&rnh->rnh_masks);
- rn_detachhead_internal(head);
- return (1);
-}
+ rn_walktree(&rnh->rh.rnh_masks->head, rn_freeentry, rnh->rh.rnh_masks);
+ rn_detachhead_internal(&rnh->rh.rnh_masks->head);
+ rn_detachhead_internal(&rnh->rh);
-void
-rn_init(int maxk)
-{
- if ((maxk <= 0) || (maxk > RADIX_MAX_KEY_LEN)) {
- log(LOG_ERR,
- "rn_init: max_keylen must be within 1..%d\n",
- RADIX_MAX_KEY_LEN);
- return;
- }
+ *head = NULL;
- /*
- * XXX: Compat for old rn_addmask() users
- */
- if (rn_inithead((void **)(void *)&mask_rnhead_compat, 0) == 0)
- panic("rn_init 2");
-#ifdef _KERNEL
- mtx_init(&mask_mtx, "radix_mask", NULL, MTX_DEF);
-#endif
+ return (1);
}
+
diff --git a/freebsd/sys/net/radix.h b/freebsd/sys/net/radix.h
index 3554c77c..69aad831 100644
--- a/freebsd/sys/net/radix.h
+++ b/freebsd/sys/net/radix.h
@@ -101,52 +101,61 @@ struct radix_mask {
#define rm_mask rm_rmu.rmu_mask
#define rm_leaf rm_rmu.rmu_leaf /* extra field would make 32 bytes */
+struct radix_head;
+
typedef int walktree_f_t(struct radix_node *, void *);
+typedef struct radix_node *rn_matchaddr_f_t(void *v,
+ struct radix_head *head);
+typedef struct radix_node *rn_addaddr_f_t(void *v, void *mask,
+ struct radix_head *head, struct radix_node nodes[]);
+typedef struct radix_node *rn_deladdr_f_t(void *v, void *mask,
+ struct radix_head *head);
+typedef struct radix_node *rn_lookup_f_t(void *v, void *mask,
+ struct radix_head *head);
+typedef int rn_walktree_t(struct radix_head *head, walktree_f_t *f,
+ void *w);
+typedef int rn_walktree_from_t(struct radix_head *head,
+ void *a, void *m, walktree_f_t *f, void *w);
+typedef void rn_close_t(struct radix_node *rn, struct radix_head *head);
+
+struct radix_mask_head;
+
+struct radix_head {
+ struct radix_node *rnh_treetop;
+ struct radix_mask_head *rnh_masks; /* Storage for our masks */
+};
struct radix_node_head {
- struct radix_node *rnh_treetop;
- u_int rnh_gen; /* generation counter */
- int rnh_multipath; /* multipath capable ? */
- int rnh_addrsize; /* permit, but not require fixed keys */
- int rnh_pktsize; /* permit, but not require fixed keys */
- struct radix_node *(*rnh_addaddr) /* add based on sockaddr */
- (void *v, void *mask,
- struct radix_node_head *head, struct radix_node nodes[]);
- struct radix_node *(*rnh_addpkt) /* add based on packet hdr */
- (void *v, void *mask,
- struct radix_node_head *head, struct radix_node nodes[]);
- struct radix_node *(*rnh_deladdr) /* remove based on sockaddr */
- (void *v, void *mask, struct radix_node_head *head);
- struct radix_node *(*rnh_delpkt) /* remove based on packet hdr */
- (void *v, void *mask, struct radix_node_head *head);
- struct radix_node *(*rnh_matchaddr) /* longest match for sockaddr */
- (void *v, struct radix_node_head *head);
- struct radix_node *(*rnh_lookup) /*exact match for sockaddr*/
- (void *v, void *mask, struct radix_node_head *head);
- struct radix_node *(*rnh_matchpkt) /* locate based on packet hdr */
- (void *v, struct radix_node_head *head);
- int (*rnh_walktree) /* traverse tree */
- (struct radix_node_head *head, walktree_f_t *f, void *w);
- int (*rnh_walktree_from) /* traverse tree below a */
- (struct radix_node_head *head, void *a, void *m,
- walktree_f_t *f, void *w);
- void (*rnh_close) /* do something when the last ref drops */
- (struct radix_node *rn, struct radix_node_head *head);
+ struct radix_head rh;
+ rn_matchaddr_f_t *rnh_matchaddr; /* longest match for sockaddr */
+ rn_addaddr_f_t *rnh_addaddr; /* add based on sockaddr*/
+ rn_deladdr_f_t *rnh_deladdr; /* remove based on sockaddr */
+ rn_lookup_f_t *rnh_lookup; /* exact match for sockaddr */
+ rn_walktree_t *rnh_walktree; /* traverse tree */
+ rn_walktree_from_t *rnh_walktree_from; /* traverse tree below a */
+ rn_close_t *rnh_close; /*do something when the last ref drops*/
struct radix_node rnh_nodes[3]; /* empty tree for common case */
#ifdef _KERNEL
struct rwlock rnh_lock; /* locks entire radix tree */
#endif
- struct radix_node_head *rnh_masks; /* Storage for our masks */
};
+struct radix_mask_head {
+ struct radix_head head;
+ struct radix_node mask_nodes[3];
+};
+
+void rn_inithead_internal(struct radix_head *rh, struct radix_node *base_nodes,
+ int off);
+
#ifndef _KERNEL
#define R_Malloc(p, t, n) (p = (t) malloc((unsigned int)(n)))
#define R_Zalloc(p, t, n) (p = (t) calloc(1,(unsigned int)(n)))
-#define Free(p) free((char *)p);
+#define R_Free(p) free((char *)p);
#else
#define R_Malloc(p, t, n) (p = (t) malloc((unsigned long)(n), M_RTABLE, M_NOWAIT))
#define R_Zalloc(p, t, n) (p = (t) malloc((unsigned long)(n), M_RTABLE, M_NOWAIT | M_ZERO))
-#define Free(p) free((caddr_t)p, M_RTABLE);
+#define R_Free(p) free((caddr_t)p, M_RTABLE);
#define RADIX_NODE_HEAD_LOCK_INIT(rnh) \
rw_init_flags(&(rnh)->rnh_lock, "radix node head", 0)
@@ -162,18 +171,17 @@ struct radix_node_head {
#define RADIX_NODE_HEAD_WLOCK_ASSERT(rnh) rw_assert(&(rnh)->rnh_lock, RA_WLOCKED)
#endif /* _KERNEL */
-void rn_init(int);
int rn_inithead(void **, int);
int rn_detachhead(void **);
int rn_refines(void *, void *);
-struct radix_node
- *rn_addmask(void *, int, int),
- *rn_addmask_r(void *, struct radix_node_head *, int, int),
- *rn_addroute (void *, void *, struct radix_node_head *,
- struct radix_node [2]),
- *rn_delete(void *, void *, struct radix_node_head *),
- *rn_lookup (void *v_arg, void *m_arg,
- struct radix_node_head *head),
- *rn_match(void *, struct radix_node_head *);
+struct radix_node *rn_addroute(void *, void *, struct radix_head *,
+ struct radix_node[2]);
+struct radix_node *rn_delete(void *, void *, struct radix_head *);
+struct radix_node *rn_lookup (void *v_arg, void *m_arg,
+ struct radix_head *head);
+struct radix_node *rn_match(void *, struct radix_head *);
+int rn_walktree_from(struct radix_head *h, void *a, void *m,
+ walktree_f_t *f, void *w);
+int rn_walktree(struct radix_head *, walktree_f_t *, void *);
#endif /* _RADIX_H_ */
diff --git a/freebsd/sys/net/radix_mpath.c b/freebsd/sys/net/radix_mpath.c
index 1bce388e..f5215205 100644
--- a/freebsd/sys/net/radix_mpath.c
+++ b/freebsd/sys/net/radix_mpath.c
@@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$");
#include <net/radix.h>
#include <net/radix_mpath.h>
#include <net/route.h>
+#include <net/route_var.h>
#include <net/if.h>
#include <net/if_var.h>
@@ -59,12 +60,19 @@ __FBSDID("$FreeBSD$");
static uint32_t hashjitter;
int
-rn_mpath_capable(struct radix_node_head *rnh)
+rt_mpath_capable(struct rib_head *rnh)
{
return rnh->rnh_multipath;
}
+int
+rn_mpath_capable(struct radix_head *rh)
+{
+
+ return (rt_mpath_capable((struct rib_head *)rh));
+}
+
struct radix_node *
rn_mpath_next(struct radix_node *rn)
{
@@ -91,7 +99,7 @@ rn_mpath_count(struct radix_node *rn)
while (rn != NULL) {
rt = (struct rtentry *)rn;
- i += rt->rt_rmx.rmx_weight;
+ i += rt->rt_weight;
rn = rn_mpath_next(rn);
}
return (i);
@@ -165,14 +173,14 @@ rt_mpath_deldup(struct rtentry *headrt, struct rtentry *rt)
* Assume @rt rt_key host bits are cleared according to @netmask
*/
int
-rt_mpath_conflict(struct radix_node_head *rnh, struct rtentry *rt,
+rt_mpath_conflict(struct rib_head *rnh, struct rtentry *rt,
struct sockaddr *netmask)
{
struct radix_node *rn, *rn1;
struct rtentry *rt1;
rn = (struct radix_node *)rt;
- rn1 = rnh->rnh_lookup(rt_key(rt), netmask, rnh);
+ rn1 = rnh->rnh_lookup(rt_key(rt), netmask, &rnh->head);
if (!rn1 || rn1->rn_flags & RNF_ROOT)
return (0);
@@ -203,18 +211,50 @@ rt_mpath_conflict(struct radix_node_head *rnh, struct rtentry *rt,
return (0);
}
-void
-#ifndef __rtems__
-rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum)
-#else /* __rtems__ */
-rtalloc_mpath_fib(struct route *ro, u_int32_t hash, u_int fibnum)
-#endif /* __rtems__ */
+static struct rtentry *
+rt_mpath_selectrte(struct rtentry *rte, uint32_t hash)
{
struct radix_node *rn0, *rn;
- u_int32_t n;
+ uint32_t total_weight;
struct rtentry *rt;
int64_t weight;
+ /* beyond here, we use rn as the master copy */
+ rn0 = rn = (struct radix_node *)rte;
+ rt = rte;
+
+ /* gw selection by Modulo-N Hash (RFC2991) XXX need improvement? */
+ total_weight = rn_mpath_count(rn0);
+ hash += hashjitter;
+ hash %= total_weight;
+ for (weight = abs((int32_t)hash);
+ rt != NULL && weight >= rt->rt_weight;
+ weight -= (rt == NULL) ? 0 : rt->rt_weight) {
+
+ /* stay within the multipath routes */
+ if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask)
+ break;
+ rn = rn->rn_dupedkey;
+ rt = (struct rtentry *)rn;
+ }
+
+ return (rt);
+}
+
+struct rtentry *
+rt_mpath_select(struct rtentry *rte, uint32_t hash)
+{
+ if (rn_mpath_next((struct radix_node *)rte) == NULL)
+ return (rte);
+
+ return (rt_mpath_selectrte(rte, hash));
+}
+
+void
+rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum)
+{
+ struct rtentry *rt;
+
/*
* XXX we don't attempt to lookup cached route again; what should
* be done for sendto(3) case?
@@ -232,34 +272,18 @@ rtalloc_mpath_fib(struct route *ro, u_int32_t hash, u_int fibnum)
return;
}
- /* beyond here, we use rn as the master copy */
- rn0 = rn = (struct radix_node *)ro->ro_rt;
- n = rn_mpath_count(rn0);
-
- /* gw selection by Modulo-N Hash (RFC2991) XXX need improvement? */
- hash += hashjitter;
- hash %= n;
- for (weight = abs((int32_t)hash), rt = ro->ro_rt;
- weight >= rt->rt_rmx.rmx_weight && rn;
- weight -= rt->rt_rmx.rmx_weight) {
-
- /* stay within the multipath routes */
- if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask)
- break;
- rn = rn->rn_dupedkey;
- rt = (struct rtentry *)rn;
- }
+ rt = rt_mpath_selectrte(ro->ro_rt, hash);
/* XXX try filling rt_gwroute and avoid unreachable gw */
/* gw selection has failed - there must be only zero weight routes */
- if (!rn) {
+ if (!rt) {
RT_UNLOCK(ro->ro_rt);
ro->ro_rt = NULL;
return;
}
if (ro->ro_rt != rt) {
RTFREE_LOCKED(ro->ro_rt);
- ro->ro_rt = (struct rtentry *)rn;
+ ro->ro_rt = rt;
RT_LOCK(ro->ro_rt);
RT_ADDREF(ro->ro_rt);
@@ -274,11 +298,11 @@ extern int in_inithead(void **head, int off);
int
rn4_mpath_inithead(void **head, int off)
{
- struct radix_node_head *rnh;
+ struct rib_head *rnh;
hashjitter = arc4random();
if (in_inithead(head, off) == 1) {
- rnh = (struct radix_node_head *)*head;
+ rnh = (struct rib_head *)*head;
rnh->rnh_multipath = 1;
return 1;
} else
@@ -290,11 +314,11 @@ rn4_mpath_inithead(void **head, int off)
int
rn6_mpath_inithead(void **head, int off)
{
- struct radix_node_head *rnh;
+ struct rib_head *rnh;
hashjitter = arc4random();
if (in6_inithead(head, off) == 1) {
- rnh = (struct radix_node_head *)*head;
+ rnh = (struct rib_head *)*head;
rnh->rnh_multipath = 1;
return 1;
} else
diff --git a/freebsd/sys/net/radix_mpath.h b/freebsd/sys/net/radix_mpath.h
index bcb210e3..2b0d442e 100644
--- a/freebsd/sys/net/radix_mpath.h
+++ b/freebsd/sys/net/radix_mpath.h
@@ -44,16 +44,16 @@
struct route;
struct rtentry;
struct sockaddr;
-int rn_mpath_capable(struct radix_node_head *);
+struct rib_head;
+int rt_mpath_capable(struct rib_head *);
+int rn_mpath_capable(struct radix_head *);
struct radix_node *rn_mpath_next(struct radix_node *);
u_int32_t rn_mpath_count(struct radix_node *);
struct rtentry *rt_mpath_matchgate(struct rtentry *, struct sockaddr *);
-int rt_mpath_conflict(struct radix_node_head *, struct rtentry *,
+int rt_mpath_conflict(struct rib_head *, struct rtentry *,
struct sockaddr *);
void rtalloc_mpath_fib(struct route *, u_int32_t, u_int);
-#define rtalloc_mpath(_route, _hash) rtalloc_mpath_fib((_route), (_hash), 0)
-struct radix_node *rn_mpath_lookup(void *, void *,
- struct radix_node_head *);
+struct rtentry *rt_mpath_select(struct rtentry *, uint32_t);
int rt_mpath_deldup(struct rtentry *, struct rtentry *);
int rn4_mpath_inithead(void **, int);
int rn6_mpath_inithead(void **, int);
diff --git a/freebsd/sys/net/raw_cb.c b/freebsd/sys/net/raw_cb.c
index 10db8bba..00a199f3 100644
--- a/freebsd/sys/net/raw_cb.c
+++ b/freebsd/sys/net/raw_cb.c
@@ -46,8 +46,8 @@
#include <sys/systm.h>
#include <net/if.h>
-#include <net/raw_cb.h>
#include <net/vnet.h>
+#include <net/raw_cb.h>
/*
* Routines to manage the raw protocol control blocks.
diff --git a/freebsd/sys/net/raw_usrreq.c b/freebsd/sys/net/raw_usrreq.c
index 1030526f..e170ad74 100644
--- a/freebsd/sys/net/raw_usrreq.c
+++ b/freebsd/sys/net/raw_usrreq.c
@@ -48,8 +48,8 @@
#include <sys/systm.h>
#include <net/if.h>
-#include <net/raw_cb.h>
#include <net/vnet.h>
+#include <net/raw_cb.h>
MTX_SYSINIT(rawcb_mtx, &rawcb_mtx, "rawcb", MTX_DEF);
@@ -85,7 +85,7 @@ raw_input_ext(struct mbuf *m0, struct sockproto *proto, struct sockaddr *src,
struct mbuf *m = m0;
struct socket *last;
- last = 0;
+ last = NULL;
mtx_lock(&rawcb_mtx);
LIST_FOREACH(rp, &V_rawcb_list, list) {
if (rp->rcb_proto.sp_family != proto->sp_family)
diff --git a/freebsd/sys/net/route.c b/freebsd/sys/net/route.c
index 781d8bb9..3eb05b94 100644
--- a/freebsd/sys/net/route.c
+++ b/freebsd/sys/net/route.c
@@ -45,7 +45,6 @@
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
-#include <sys/syslog.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
@@ -57,8 +56,10 @@
#include <sys/kernel.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/route.h>
+#include <net/route_var.h>
#include <net/vnet.h>
#include <net/flowtable.h>
@@ -75,8 +76,7 @@
#include <sys/file.h>
#endif /* __rtems__ */
-/* We use 4 bits in the mbuf flags, thus we are limited to 16 FIBS. */
-#define RT_MAXFIBS 16
+#define RT_MAXFIBS UINT16_MAX
/* Kernel config default option. */
#ifdef ROUTETABLES
@@ -102,17 +102,7 @@ extern void sctp_addr_change(struct ifaddr *ifa, int cmd);
/* This is read-only.. */
u_int rt_numfibs = RT_NUMFIBS;
-SYSCTL_UINT(_net, OID_AUTO, fibs, CTLFLAG_RD, &rt_numfibs, 0, "");
-/*
- * Allow the boot code to allow LESS than RT_MAXFIBS to be used.
- * We can't do more because storage is statically allocated for now.
- * (for compatibility reasons.. this will change. When this changes, code should
- * be refactored to protocol independent parts and protocol dependent parts,
- * probably hanging of domain(9) specific storage to not need the full
- * fib * af RNH allocation etc. but allow tuning the number of tables per
- * address family).
- */
-TUNABLE_INT("net.fibs", &rt_numfibs);
+SYSCTL_UINT(_net, OID_AUTO, fibs, CTLFLAG_RDTUN, &rt_numfibs, 0, "");
/*
* By default add routes to all fibs for new interfaces.
@@ -124,25 +114,20 @@ TUNABLE_INT("net.fibs", &rt_numfibs);
* always work given the fib can be overridden and prefixes can be added
* from the network stack context.
*/
-u_int rt_add_addr_allfibs = 1;
-SYSCTL_UINT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RW,
- &rt_add_addr_allfibs, 0, "");
-TUNABLE_INT("net.add_addr_allfibs", &rt_add_addr_allfibs);
+VNET_DEFINE(u_int, rt_add_addr_allfibs) = 1;
+SYSCTL_UINT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RWTUN | CTLFLAG_VNET,
+ &VNET_NAME(rt_add_addr_allfibs), 0, "");
VNET_DEFINE(struct rtstat, rtstat);
#define V_rtstat VNET(rtstat)
-VNET_DEFINE(struct radix_node_head *, rt_tables);
+VNET_DEFINE(struct rib_head *, rt_tables);
#define V_rt_tables VNET(rt_tables)
VNET_DEFINE(int, rttrash); /* routes not in table but not freed */
#define V_rttrash VNET(rttrash)
-/* compare two sockaddr structures */
-#define sa_equal(a1, a2) (((a1)->sa_len == (a2)->sa_len) && \
- (bcmp((a1), (a2), (a1)->sa_len) == 0))
-
/*
* Convert a 'struct radix_node *' to a 'struct rtentry *'.
* The operation can be done safely (in this code) because a
@@ -158,6 +143,28 @@ VNET_DEFINE(int, rttrash); /* routes not in table but not freed */
static VNET_DEFINE(uma_zone_t, rtzone); /* Routing table UMA zone. */
#define V_rtzone VNET(rtzone)
+static int rtrequest1_fib_change(struct rib_head *, struct rt_addrinfo *,
+ struct rtentry **, u_int);
+static void rt_setmetrics(const struct rt_addrinfo *, struct rtentry *);
+static int rt_ifdelroute(const struct rtentry *rt, void *arg);
+static struct rtentry *rt_unlinkrte(struct rib_head *rnh,
+ struct rt_addrinfo *info, int *perror);
+static void rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info);
+#ifdef RADIX_MPATH
+static struct radix_node *rt_mpath_unlink(struct rib_head *rnh,
+ struct rt_addrinfo *info, struct rtentry *rto, int *perror);
+#endif
+static int rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info,
+ int flags);
+
+struct if_mtuinfo
+{
+ struct ifnet *ifp;
+ int mtu;
+};
+
+static int if_updatemtu_cb(struct radix_node *, void *);
+
/*
* handler for net.my_fibnum
*/
@@ -179,10 +186,10 @@ sysctl_my_fibnum(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_net, OID_AUTO, my_fibnum, CTLTYPE_INT|CTLFLAG_RD,
NULL, 0, &sysctl_my_fibnum, "I", "default FIB of caller");
-static __inline struct radix_node_head **
+static __inline struct rib_head **
rt_tables_get_rnh_ptr(int table, int fam)
{
- struct radix_node_head **rnh;
+ struct rib_head **rnh;
KASSERT(table >= 0 && table < rt_numfibs, ("%s: table out of bounds.",
__func__));
@@ -190,20 +197,32 @@ rt_tables_get_rnh_ptr(int table, int fam)
__func__));
/* rnh is [fib=0][af=0]. */
- rnh = (struct radix_node_head **)V_rt_tables;
+ rnh = (struct rib_head **)V_rt_tables;
/* Get the offset to the requested table and fam. */
rnh += table * (AF_MAX+1) + fam;
return (rnh);
}
-struct radix_node_head *
+struct rib_head *
rt_tables_get_rnh(int table, int fam)
{
return (*rt_tables_get_rnh_ptr(table, fam));
}
+u_int
+rt_tables_get_gen(int table, int fam)
+{
+ struct rib_head *rnh;
+
+ rnh = *rt_tables_get_rnh_ptr(table, fam);
+ KASSERT(rnh != NULL, ("%s: NULL rib_head pointer table %d fam %d",
+ __func__, table, fam));
+ return (rnh->rnh_gen);
+}
+
+
/*
* route initialization must occur before ip6_init2(), which happenas at
* SI_ORDER_MIDDLE.
@@ -211,36 +230,72 @@ rt_tables_get_rnh(int table, int fam)
static void
route_init(void)
{
- struct domain *dom;
- int max_keylen = 0;
/* whack the tunable ints into line. */
if (rt_numfibs > RT_MAXFIBS)
rt_numfibs = RT_MAXFIBS;
if (rt_numfibs == 0)
rt_numfibs = 1;
+}
+SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0);
- for (dom = domains; dom; dom = dom->dom_next)
- if (dom->dom_maxrtkey > max_keylen)
- max_keylen = dom->dom_maxrtkey;
+static int
+rtentry_zinit(void *mem, int size, int how)
+{
+ struct rtentry *rt = mem;
+
+ rt->rt_pksent = counter_u64_alloc(how);
+ if (rt->rt_pksent == NULL)
+ return (ENOMEM);
- rn_init(max_keylen); /* init all zeroes, all ones, mask table */
+ RT_LOCK_INIT(rt);
+
+ return (0);
+}
+
+static void
+rtentry_zfini(void *mem, int size)
+{
+ struct rtentry *rt = mem;
+
+ RT_LOCK_DESTROY(rt);
+ counter_u64_free(rt->rt_pksent);
+}
+
+static int
+rtentry_ctor(void *mem, int size, void *arg, int how)
+{
+ struct rtentry *rt = mem;
+
+ bzero(rt, offsetof(struct rtentry, rt_endzero));
+ counter_u64_zero(rt->rt_pksent);
+ rt->rt_chain = NULL;
+
+ return (0);
+}
+
+static void
+rtentry_dtor(void *mem, int size, void *arg)
+{
+ struct rtentry *rt = mem;
+
+ RT_UNLOCK_COND(rt);
}
-SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0);
static void
vnet_route_init(const void *unused __unused)
{
struct domain *dom;
- struct radix_node_head **rnh;
+ struct rib_head **rnh;
int table;
int fam;
V_rt_tables = malloc(rt_numfibs * (AF_MAX+1) *
- sizeof(struct radix_node_head *), M_RTABLE, M_WAITOK|M_ZERO);
+ sizeof(struct rib_head *), M_RTABLE, M_WAITOK|M_ZERO);
- V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), NULL, NULL,
- NULL, NULL, UMA_ALIGN_PTR, 0);
+ V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry),
+ rtentry_ctor, rtentry_dtor,
+ rtentry_zinit, rtentry_zfini, UMA_ALIGN_PTR, 0);
for (dom = domains; dom; dom = dom->dom_next) {
if (dom->dom_rtattach == NULL)
continue;
@@ -250,15 +305,10 @@ vnet_route_init(const void *unused __unused)
if (table != 0 && fam != AF_INET6 && fam != AF_INET)
break;
- /*
- * XXX MRT rtattach will be also called from
- * vfs_export.c but the offset will be 0 (only for
- * AF_INET and AF_INET6 which don't need it anyhow).
- */
rnh = rt_tables_get_rnh_ptr(table, fam);
if (rnh == NULL)
panic("%s: rnh NULL", __func__);
- dom->dom_rtattach((void **)rnh, dom->dom_rtoffset);
+ dom->dom_rtattach((void **)rnh, 0);
}
}
}
@@ -272,7 +322,7 @@ vnet_route_uninit(const void *unused __unused)
int table;
int fam;
struct domain *dom;
- struct radix_node_head **rnh;
+ struct rib_head **rnh;
for (dom = domains; dom; dom = dom->dom_next) {
if (dom->dom_rtdetach == NULL)
@@ -287,14 +337,68 @@ vnet_route_uninit(const void *unused __unused)
rnh = rt_tables_get_rnh_ptr(table, fam);
if (rnh == NULL)
panic("%s: rnh NULL", __func__);
- dom->dom_rtdetach((void **)rnh, dom->dom_rtoffset);
+ dom->dom_rtdetach((void **)rnh, 0);
}
}
+
+ free(V_rt_tables, M_RTABLE);
+ uma_zdestroy(V_rtzone);
}
-VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
+VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST,
vnet_route_uninit, 0);
#endif
+struct rib_head *
+rt_table_init(int offset)
+{
+ struct rib_head *rh;
+
+ rh = malloc(sizeof(struct rib_head), M_RTABLE, M_WAITOK | M_ZERO);
+
+ /* TODO: These details should be hidded inside radix.c */
+ /* Init masks tree */
+ rn_inithead_internal(&rh->head, rh->rnh_nodes, offset);
+ rn_inithead_internal(&rh->rmhead.head, rh->rmhead.mask_nodes, 0);
+ rh->head.rnh_masks = &rh->rmhead;
+
+ /* Init locks */
+ rw_init(&rh->rib_lock, "rib head lock");
+
+ /* Finally, set base callbacks */
+ rh->rnh_addaddr = rn_addroute;
+ rh->rnh_deladdr = rn_delete;
+ rh->rnh_matchaddr = rn_match;
+ rh->rnh_lookup = rn_lookup;
+ rh->rnh_walktree = rn_walktree;
+ rh->rnh_walktree_from = rn_walktree_from;
+
+ return (rh);
+}
+
+static int
+rt_freeentry(struct radix_node *rn, void *arg)
+{
+ struct radix_head * const rnh = arg;
+ struct radix_node *x;
+
+ x = (struct radix_node *)rn_delete(rn + 2, NULL, rnh);
+ if (x != NULL)
+ R_Free(x);
+ return (0);
+}
+
+void
+rt_table_destroy(struct rib_head *rh)
+{
+
+ rn_walktree(&rh->rmhead.head, rt_freeentry, &rh->rmhead.head);
+
+ /* Assume table is already empty */
+ rw_destroy(&rh->rib_lock);
+ free(rh, M_RTABLE);
+}
+
+
#ifndef _SYS_SYSPROTO_H_
struct setfib_args {
int fibnum;
@@ -335,35 +439,6 @@ setfib(int fibnum)
* Packet routing routines.
*/
void
-rtalloc(struct route *ro)
-{
-
- rtalloc_ign_fib(ro, 0UL, RT_DEFAULT_FIB);
-}
-
-void
-rtalloc_fib(struct route *ro, u_int fibnum)
-{
- rtalloc_ign_fib(ro, 0UL, fibnum);
-}
-
-void
-rtalloc_ign(struct route *ro, u_long ignore)
-{
- struct rtentry *rt;
-
- if ((rt = ro->ro_rt) != NULL) {
- if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP)
- return;
- RTFREE(rt);
- ro->ro_rt = NULL;
- }
- ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, RT_DEFAULT_FIB);
- if (ro->ro_rt)
- RT_UNLOCK(ro->ro_rt);
-}
-
-void
rtalloc_ign_fib(struct route *ro, u_long ignore, u_int fibnum)
{
struct rtentry *rt;
@@ -396,49 +471,32 @@ struct rtentry *
rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags,
u_int fibnum)
{
- struct radix_node_head *rnh;
+ struct rib_head *rh;
struct radix_node *rn;
struct rtentry *newrt;
struct rt_addrinfo info;
int err = 0, msgtype = RTM_MISS;
- int needlock;
KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum"));
- switch (dst->sa_family) {
- case AF_INET6:
- case AF_INET:
- /* We support multiple FIBs. */
- break;
- default:
- fibnum = RT_DEFAULT_FIB;
- break;
- }
- rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
+ rh = rt_tables_get_rnh(fibnum, dst->sa_family);
newrt = NULL;
- if (rnh == NULL)
+ if (rh == NULL)
goto miss;
/*
* Look up the address in the table for that Address Family
*/
- needlock = !(ignflags & RTF_RNH_LOCKED);
- if (needlock)
- RADIX_NODE_HEAD_RLOCK(rnh);
-#ifdef INVARIANTS
- else
- RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
-#endif
- rn = rnh->rnh_matchaddr(dst, rnh);
+ RIB_RLOCK(rh);
+ rn = rh->rnh_matchaddr(dst, &rh->head);
if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) {
newrt = RNTORT(rn);
RT_LOCK(newrt);
RT_ADDREF(newrt);
- if (needlock)
- RADIX_NODE_HEAD_RUNLOCK(rnh);
- goto done;
+ RIB_RUNLOCK(rh);
+ return (newrt);
- } else if (needlock)
- RADIX_NODE_HEAD_RUNLOCK(rnh);
+ } else
+ RIB_RUNLOCK(rh);
/*
* Either we hit the root or couldn't find any match,
@@ -457,10 +515,7 @@ miss:
bzero(&info, sizeof(info));
info.rti_info[RTAX_DST] = dst;
rt_missmsg_fib(msgtype, &info, 0, err, fibnum);
- }
-done:
- if (newrt)
- RT_LOCK_ASSERT(newrt);
+ }
return (newrt);
}
@@ -471,7 +526,7 @@ done:
void
rtfree(struct rtentry *rt)
{
- struct radix_node_head *rnh;
+ struct rib_head *rnh;
KASSERT(rt != NULL,("%s: NULL rt", __func__));
rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family);
@@ -499,7 +554,7 @@ rtfree(struct rtentry *rt)
* on the entry so that the code below reclaims the storage.
*/
if (rt->rt_refcnt == 0 && rnh->rnh_close)
- rnh->rnh_close((struct radix_node *)rt, rnh);
+ rnh->rnh_close((struct radix_node *)rt, &rnh->head);
/*
* If we are no longer "up" (and ref == 0)
@@ -531,12 +586,11 @@ rtfree(struct rtentry *rt)
* This also frees the gateway, as they are always malloc'd
* together.
*/
- Free(rt_key(rt));
+ R_Free(rt_key(rt));
/*
* and the rtentry itself of course
*/
- RT_LOCK_DESTROY(rt);
uma_zfree(V_rtzone, rt);
return;
}
@@ -552,17 +606,6 @@ done:
* message from the network layer.
*/
void
-rtredirect(struct sockaddr *dst,
- struct sockaddr *gateway,
- struct sockaddr *netmask,
- int flags,
- struct sockaddr *src)
-{
-
- rtredirect_fib(dst, gateway, netmask, flags, src, RT_DEFAULT_FIB);
-}
-
-void
rtredirect_fib(struct sockaddr *dst,
struct sockaddr *gateway,
struct sockaddr *netmask,
@@ -570,12 +613,12 @@ rtredirect_fib(struct sockaddr *dst,
struct sockaddr *src,
u_int fibnum)
{
- struct rtentry *rt, *rt0 = NULL;
+ struct rtentry *rt;
int error = 0;
short *stat = NULL;
struct rt_addrinfo info;
struct ifaddr *ifa;
- struct radix_node_head *rnh;
+ struct rib_head *rnh;
ifa = NULL;
rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
@@ -585,7 +628,7 @@ rtredirect_fib(struct sockaddr *dst,
}
/* verify the gateway is directly reachable */
- if ((ifa = ifa_ifwithnet_fib(gateway, 0, fibnum)) == NULL) {
+ if ((ifa = ifa_ifwithnet(gateway, 0, fibnum)) == NULL) {
error = ENETUNREACH;
goto out;
}
@@ -596,13 +639,20 @@ rtredirect_fib(struct sockaddr *dst,
* we have a routing loop, perhaps as a result of an interface
* going down recently.
*/
- if (!(flags & RTF_DONE) && rt &&
- (!sa_equal(src, rt->rt_gateway) || rt->rt_ifa != ifa))
- error = EINVAL;
- else if (ifa_ifwithaddr_check(gateway))
+ if (!(flags & RTF_DONE) && rt) {
+ if (!sa_equal(src, rt->rt_gateway)) {
+ error = EINVAL;
+ goto done;
+ }
+ if (rt->rt_ifa != ifa && ifa->ifa_addr->sa_family != AF_LINK) {
+ error = EINVAL;
+ goto done;
+ }
+ }
+ if ((flags & RTF_GATEWAY) && ifa_ifwithaddr_check(gateway)) {
error = EHOSTUNREACH;
- if (error)
goto done;
+ }
/*
* Create a new entry if we just got back a wildcard entry
* or the lookup failed. This is necessary for hosts
@@ -622,36 +672,31 @@ rtredirect_fib(struct sockaddr *dst,
* Create new route, rather than smashing route to net.
*/
create:
- rt0 = rt;
- rt = NULL;
+ if (rt != NULL)
+ RTFREE_LOCKED(rt);
- flags |= RTF_GATEWAY | RTF_DYNAMIC;
+ flags |= RTF_DYNAMIC;
bzero((caddr_t)&info, sizeof(info));
info.rti_info[RTAX_DST] = dst;
info.rti_info[RTAX_GATEWAY] = gateway;
info.rti_info[RTAX_NETMASK] = netmask;
info.rti_ifa = ifa;
info.rti_flags = flags;
- if (rt0 != NULL)
- RT_UNLOCK(rt0); /* drop lock to avoid LOR with RNH */
error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum);
if (rt != NULL) {
RT_LOCK(rt);
- if (rt0 != NULL)
- EVENTHANDLER_INVOKE(route_redirect_event, rt0, rt, dst);
flags = rt->rt_flags;
}
- if (rt0 != NULL)
- RTFREE(rt0);
stat = &V_rtstat.rts_dynamic;
} else {
- struct rtentry *gwrt;
/*
* Smash the current notion of the gateway to
* this destination. Should check about netmask!!!
*/
+ if ((flags & RTF_GATEWAY) == 0)
+ rt->rt_flags &= ~RTF_GATEWAY;
rt->rt_flags |= RTF_MODIFIED;
flags |= RTF_MODIFIED;
stat = &V_rtstat.rts_newgateway;
@@ -659,13 +704,10 @@ rtredirect_fib(struct sockaddr *dst,
* add the key and gateway (in one malloc'd chunk).
*/
RT_UNLOCK(rt);
- RADIX_NODE_HEAD_LOCK(rnh);
+ RIB_WLOCK(rnh);
RT_LOCK(rt);
rt_setgate(rt, rt_key(rt), gateway);
- gwrt = rtalloc1(gateway, 1, RTF_RNH_LOCKED);
- RADIX_NODE_HEAD_UNLOCK(rnh);
- EVENTHANDLER_INVOKE(route_redirect_event, rt, gwrt, dst);
- RTFREE_LOCKED(gwrt);
+ RIB_WUNLOCK(rnh);
}
} else
error = EHOSTUNREACH;
@@ -687,13 +729,6 @@ out:
ifa_free(ifa);
}
-int
-rtioctl(u_long req, caddr_t data)
-{
-
- return (rtioctl_fib(req, data, RT_DEFAULT_FIB));
-}
-
/*
* Routing table ioctl interface.
*/
@@ -715,21 +750,11 @@ rtioctl_fib(u_long req, caddr_t data, u_int fibnum)
#endif /* INET */
}
-/*
- * For both ifa_ifwithroute() routines, 'ifa' is returned referenced.
- */
struct ifaddr *
-ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway)
-{
-
- return (ifa_ifwithroute_fib(flags, dst, gateway, RT_DEFAULT_FIB));
-}
-
-struct ifaddr *
-ifa_ifwithroute_fib(int flags, struct sockaddr *dst, struct sockaddr *gateway,
+ifa_ifwithroute(int flags, const struct sockaddr *dst, struct sockaddr *gateway,
u_int fibnum)
{
- register struct ifaddr *ifa;
+ struct ifaddr *ifa;
int not_found = 0;
if ((flags & RTF_GATEWAY) == 0) {
@@ -742,7 +767,7 @@ ifa_ifwithroute_fib(int flags, struct sockaddr *dst, struct sockaddr *gateway,
*/
ifa = NULL;
if (flags & RTF_HOST)
- ifa = ifa_ifwithdstaddr_fib(dst, fibnum);
+ ifa = ifa_ifwithdstaddr(dst, fibnum);
if (ifa == NULL)
ifa = ifa_ifwithaddr(gateway);
} else {
@@ -751,12 +776,12 @@ ifa_ifwithroute_fib(int flags, struct sockaddr *dst, struct sockaddr *gateway,
* or host, the gateway may still be on the
* other end of a pt to pt link.
*/
- ifa = ifa_ifwithdstaddr_fib(gateway, fibnum);
+ ifa = ifa_ifwithdstaddr(gateway, fibnum);
}
if (ifa == NULL)
- ifa = ifa_ifwithnet_fib(gateway, 0, fibnum);
+ ifa = ifa_ifwithnet(gateway, 0, fibnum);
if (ifa == NULL) {
- struct rtentry *rt = rtalloc1_fib(gateway, 0, RTF_RNH_LOCKED, fibnum);
+ struct rtentry *rt = rtalloc1_fib(gateway, 0, 0, fibnum);
if (rt == NULL)
return (NULL);
/*
@@ -800,19 +825,6 @@ ifa_ifwithroute_fib(int flags, struct sockaddr *dst, struct sockaddr *gateway,
* all the bits of info needed
*/
int
-rtrequest(int req,
- struct sockaddr *dst,
- struct sockaddr *gateway,
- struct sockaddr *netmask,
- int flags,
- struct rtentry **ret_nrt)
-{
-
- return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt,
- RT_DEFAULT_FIB));
-}
-
-int
rtrequest_fib(int req,
struct sockaddr *dst,
struct sockaddr *gateway,
@@ -834,6 +846,443 @@ rtrequest_fib(int req,
return rtrequest1_fib(req, &info, ret_nrt, fibnum);
}
+
+/*
+ * Copy most of @rt data into @info.
+ *
+ * If @flags contains NHR_COPY, copies dst,netmask and gw to the
+ * pointers specified by @info structure. Assume such pointers
+ * are zeroed sockaddr-like structures with sa_len field initialized
+ * to reflect size of the provided buffer. if no NHR_COPY is specified,
+ * point dst,netmask and gw @info fields to appropriate @rt values.
+ *
+ * if @flags contains NHR_REF, do refcouting on rt_ifp.
+ *
+ * Returns 0 on success.
+ */
+int
+rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info, int flags)
+{
+ struct rt_metrics *rmx;
+ struct sockaddr *src, *dst;
+ int sa_len;
+
+ if (flags & NHR_COPY) {
+ /* Copy destination if dst is non-zero */
+ src = rt_key(rt);
+ dst = info->rti_info[RTAX_DST];
+ sa_len = src->sa_len;
+ if (dst != NULL) {
+ if (src->sa_len > dst->sa_len)
+ return (ENOMEM);
+ memcpy(dst, src, src->sa_len);
+ info->rti_addrs |= RTA_DST;
+ }
+
+ /* Copy mask if set && dst is non-zero */
+ src = rt_mask(rt);
+ dst = info->rti_info[RTAX_NETMASK];
+ if (src != NULL && dst != NULL) {
+
+ /*
+ * Radix stores different value in sa_len,
+ * assume rt_mask() to have the same length
+ * as rt_key()
+ */
+ if (sa_len > dst->sa_len)
+ return (ENOMEM);
+ memcpy(dst, src, src->sa_len);
+ info->rti_addrs |= RTA_NETMASK;
+ }
+
+ /* Copy gateway is set && dst is non-zero */
+ src = rt->rt_gateway;
+ dst = info->rti_info[RTAX_GATEWAY];
+ if ((rt->rt_flags & RTF_GATEWAY) && src != NULL && dst != NULL){
+ if (src->sa_len > dst->sa_len)
+ return (ENOMEM);
+ memcpy(dst, src, src->sa_len);
+ info->rti_addrs |= RTA_GATEWAY;
+ }
+ } else {
+ info->rti_info[RTAX_DST] = rt_key(rt);
+ info->rti_addrs |= RTA_DST;
+ if (rt_mask(rt) != NULL) {
+ info->rti_info[RTAX_NETMASK] = rt_mask(rt);
+ info->rti_addrs |= RTA_NETMASK;
+ }
+ if (rt->rt_flags & RTF_GATEWAY) {
+ info->rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+ info->rti_addrs |= RTA_GATEWAY;
+ }
+ }
+
+ rmx = info->rti_rmx;
+ if (rmx != NULL) {
+ info->rti_mflags |= RTV_MTU;
+ rmx->rmx_mtu = rt->rt_mtu;
+ }
+
+ info->rti_flags = rt->rt_flags;
+ info->rti_ifp = rt->rt_ifp;
+ info->rti_ifa = rt->rt_ifa;
+
+ if (flags & NHR_REF) {
+ /* Do 'traditional' refcouting */
+ if_ref(info->rti_ifp);
+ }
+
+ return (0);
+}
+
+/*
+ * Lookups up route entry for @dst in RIB database for fib @fibnum.
+ * Exports entry data to @info using rt_exportinfo().
+ *
+ * if @flags contains NHR_REF, refcouting is performed on rt_ifp.
+ * All references can be released later by calling rib_free_info()
+ *
+ * Returns 0 on success.
+ * Returns ENOENT for lookup failure, ENOMEM for export failure.
+ */
+int
+rib_lookup_info(uint32_t fibnum, const struct sockaddr *dst, uint32_t flags,
+ uint32_t flowid, struct rt_addrinfo *info)
+{
+ struct rib_head *rh;
+ struct radix_node *rn;
+ struct rtentry *rt;
+ int error;
+
+ KASSERT((fibnum < rt_numfibs), ("rib_lookup_rte: bad fibnum"));
+ rh = rt_tables_get_rnh(fibnum, dst->sa_family);
+ if (rh == NULL)
+ return (ENOENT);
+
+ RIB_RLOCK(rh);
+ rn = rh->rnh_matchaddr(__DECONST(void *, dst), &rh->head);
+ if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
+ rt = RNTORT(rn);
+ /* Ensure route & ifp is UP */
+ if (RT_LINK_IS_UP(rt->rt_ifp)) {
+ flags = (flags & NHR_REF) | NHR_COPY;
+ error = rt_exportinfo(rt, info, flags);
+ RIB_RUNLOCK(rh);
+
+ return (error);
+ }
+ }
+ RIB_RUNLOCK(rh);
+
+ return (ENOENT);
+}
+
+/*
+ * Releases all references acquired by rib_lookup_info() when
+ * called with NHR_REF flags.
+ */
+void
+rib_free_info(struct rt_addrinfo *info)
+{
+
+ if_rele(info->rti_ifp);
+}
+
+/*
+ * Iterates over all existing fibs in system calling
+ * @setwa_f function prior to traversing each fib.
+ * Calls @wa_f function for each element in current fib.
+ * If af is not AF_UNSPEC, iterates over fibs in particular
+ * address family.
+ */
+void
+rt_foreach_fib_walk(int af, rt_setwarg_t *setwa_f, rt_walktree_f_t *wa_f,
+ void *arg)
+{
+ struct rib_head *rnh;
+ uint32_t fibnum;
+ int i;
+
+ for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
+ /* Do we want some specific family? */
+ if (af != AF_UNSPEC) {
+ rnh = rt_tables_get_rnh(fibnum, af);
+ if (rnh == NULL)
+ continue;
+ if (setwa_f != NULL)
+ setwa_f(rnh, fibnum, af, arg);
+
+ RIB_WLOCK(rnh);
+ rnh->rnh_walktree(&rnh->head, (walktree_f_t *)wa_f,arg);
+ RIB_WUNLOCK(rnh);
+ continue;
+ }
+
+ for (i = 1; i <= AF_MAX; i++) {
+ rnh = rt_tables_get_rnh(fibnum, i);
+ if (rnh == NULL)
+ continue;
+ if (setwa_f != NULL)
+ setwa_f(rnh, fibnum, i, arg);
+
+ RIB_WLOCK(rnh);
+ rnh->rnh_walktree(&rnh->head, (walktree_f_t *)wa_f,arg);
+ RIB_WUNLOCK(rnh);
+ }
+ }
+}
+
+struct rt_delinfo
+{
+ struct rt_addrinfo info;
+ struct rib_head *rnh;
+ struct rtentry *head;
+};
+
+/*
+ * Conditionally unlinks @rn from radix tree based
+ * on info data passed in @arg.
+ */
+static int
+rt_checkdelroute(struct radix_node *rn, void *arg)
+{
+ struct rt_delinfo *di;
+ struct rt_addrinfo *info;
+ struct rtentry *rt;
+ int error;
+
+ di = (struct rt_delinfo *)arg;
+ rt = (struct rtentry *)rn;
+ info = &di->info;
+ error = 0;
+
+ info->rti_info[RTAX_DST] = rt_key(rt);
+ info->rti_info[RTAX_NETMASK] = rt_mask(rt);
+ info->rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+
+ rt = rt_unlinkrte(di->rnh, info, &error);
+ if (rt == NULL) {
+ /* Either not allowed or not matched. Skip entry */
+ return (0);
+ }
+
+ /* Entry was unlinked. Add to the list and return */
+ rt->rt_chain = di->head;
+ di->head = rt;
+
+ return (0);
+}
+
+/*
+ * Iterates over all existing fibs in system.
+ * Deletes each element for which @filter_f function returned
+ * non-zero value.
+ * If @af is not AF_UNSPEC, iterates over fibs in particular
+ * address family.
+ */
+void
+rt_foreach_fib_walk_del(int af, rt_filter_f_t *filter_f, void *arg)
+{
+ struct rib_head *rnh;
+ struct rt_delinfo di;
+ struct rtentry *rt;
+ uint32_t fibnum;
+ int i, start, end;
+
+ bzero(&di, sizeof(di));
+ di.info.rti_filter = filter_f;
+ di.info.rti_filterdata = arg;
+
+ for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
+ /* Do we want some specific family? */
+ if (af != AF_UNSPEC) {
+ start = af;
+ end = af;
+ } else {
+ start = 1;
+ end = AF_MAX;
+ }
+
+ for (i = start; i <= end; i++) {
+ rnh = rt_tables_get_rnh(fibnum, i);
+ if (rnh == NULL)
+ continue;
+ di.rnh = rnh;
+
+ RIB_WLOCK(rnh);
+ rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di);
+ RIB_WUNLOCK(rnh);
+
+ if (di.head == NULL)
+ continue;
+
+ /* We might have something to reclaim */
+ while (di.head != NULL) {
+ rt = di.head;
+ di.head = rt->rt_chain;
+ rt->rt_chain = NULL;
+
+ /* TODO std rt -> rt_addrinfo export */
+ di.info.rti_info[RTAX_DST] = rt_key(rt);
+ di.info.rti_info[RTAX_NETMASK] = rt_mask(rt);
+
+ rt_notifydelete(rt, &di.info);
+ RTFREE_LOCKED(rt);
+ }
+
+ }
+ }
+}
+
+/*
+ * Delete Routes for a Network Interface
+ *
+ * Called for each routing entry via the rnh->rnh_walktree() call above
+ * to delete all route entries referencing a detaching network interface.
+ *
+ * Arguments:
+ * rt pointer to rtentry
+ * arg argument passed to rnh->rnh_walktree() - detaching interface
+ *
+ * Returns:
+ * 0 successful
+ * errno failed - reason indicated
+ */
+static int
+rt_ifdelroute(const struct rtentry *rt, void *arg)
+{
+ struct ifnet *ifp = arg;
+
+ if (rt->rt_ifp != ifp)
+ return (0);
+
+ /*
+ * Protect (sorta) against walktree recursion problems
+ * with cloned routes
+ */
+ if ((rt->rt_flags & RTF_UP) == 0)
+ return (0);
+
+ return (1);
+}
+
+/*
+ * Delete all remaining routes using this interface
+ * Unfortuneatly the only way to do this is to slog through
+ * the entire routing table looking for routes which point
+ * to this interface...oh well...
+ */
+void
+rt_flushifroutes_af(struct ifnet *ifp, int af)
+{
+ KASSERT((af >= 1 && af <= AF_MAX), ("%s: af %d not >= 1 and <= %d",
+ __func__, af, AF_MAX));
+
+ rt_foreach_fib_walk_del(af, rt_ifdelroute, ifp);
+}
+
+void
+rt_flushifroutes(struct ifnet *ifp)
+{
+
+ rt_foreach_fib_walk_del(AF_UNSPEC, rt_ifdelroute, ifp);
+}
+
+/*
+ * Conditionally unlinks rtentry matching data inside @info from @rnh.
+ * Returns unlinked, locked and referenced @rtentry on success,
+ * Returns NULL and sets @perror to:
+ * ESRCH - if prefix was not found,
+ * EADDRINUSE - if trying to delete PINNED route without appropriate flag.
+ * ENOENT - if supplied filter function returned 0 (not matched).
+ */
+static struct rtentry *
+rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, int *perror)
+{
+ struct sockaddr *dst, *netmask;
+ struct rtentry *rt;
+ struct radix_node *rn;
+
+ dst = info->rti_info[RTAX_DST];
+ netmask = info->rti_info[RTAX_NETMASK];
+
+ rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head);
+ if (rt == NULL) {
+ *perror = ESRCH;
+ return (NULL);
+ }
+
+ if ((info->rti_flags & RTF_PINNED) == 0) {
+ /* Check if target route can be deleted */
+ if (rt->rt_flags & RTF_PINNED) {
+ *perror = EADDRINUSE;
+ return (NULL);
+ }
+ }
+
+ if (info->rti_filter != NULL) {
+ if (info->rti_filter(rt, info->rti_filterdata) == 0) {
+ /* Not matched */
+ *perror = ENOENT;
+ return (NULL);
+ }
+
+ /*
+ * Filter function requested rte deletion.
+ * Ease the caller work by filling in remaining info
+ * from that particular entry.
+ */
+ info->rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+ }
+
+ /*
+ * Remove the item from the tree and return it.
+ * Complain if it is not there and do no more processing.
+ */
+ *perror = ESRCH;
+#ifdef RADIX_MPATH
+ if (rt_mpath_capable(rnh))
+ rn = rt_mpath_unlink(rnh, info, rt, perror);
+ else
+#endif
+ rn = rnh->rnh_deladdr(dst, netmask, &rnh->head);
+ if (rn == NULL)
+ return (NULL);
+
+ if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
+ panic ("rtrequest delete");
+
+ rt = RNTORT(rn);
+ RT_LOCK(rt);
+ RT_ADDREF(rt);
+ rt->rt_flags &= ~RTF_UP;
+
+ *perror = 0;
+
+ return (rt);
+}
+
+static void
+rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info)
+{
+ struct ifaddr *ifa;
+
+ /*
+ * give the protocol a chance to keep things in sync.
+ */
+ ifa = rt->rt_ifa;
+ if (ifa != NULL && ifa->ifa_rtrequest != NULL)
+ ifa->ifa_rtrequest(RTM_DELETE, rt, info);
+
+ /*
+ * One more rtentry floating around that is not
+ * linked to the routing table. rttrash will be decremented
+ * when RTFREE(rt) is eventually called.
+ */
+ V_rttrash++;
+}
+
+
/*
* These (questionable) definitions of apparent local variables apply
* to the next two functions. XXXXXX!!!
@@ -845,13 +1294,6 @@ rtrequest_fib(int req,
#define ifpaddr info->rti_info[RTAX_IFP]
#define flags info->rti_flags
-int
-rt_getifa(struct rt_addrinfo *info)
-{
-
- return (rt_getifa_fib(info, RT_DEFAULT_FIB));
-}
-
/*
* Look up rt_addrinfo for a specific fib. Note that if rti_ifa is defined,
* it will be referenced so the caller must free it.
@@ -868,7 +1310,7 @@ rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
*/
if (info->rti_ifp == NULL && ifpaddr != NULL &&
ifpaddr->sa_family == AF_LINK &&
- (ifa = ifa_ifwithnet_fib(ifpaddr, 0, fibnum)) != NULL) {
+ (ifa = ifa_ifwithnet(ifpaddr, 0, fibnum)) != NULL) {
info->rti_ifp = ifa->ifa_ifp;
ifa_free(ifa);
}
@@ -882,10 +1324,10 @@ rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
if (sa != NULL && info->rti_ifp != NULL)
info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp);
else if (dst != NULL && gateway != NULL)
- info->rti_ifa = ifa_ifwithroute_fib(flags, dst, gateway,
+ info->rti_ifa = ifa_ifwithroute(flags, dst, gateway,
fibnum);
else if (sa != NULL)
- info->rti_ifa = ifa_ifwithroute_fib(flags, sa, sa,
+ info->rti_ifa = ifa_ifwithroute(flags, sa, sa,
fibnum);
}
if ((ifa = info->rti_ifa) != NULL) {
@@ -896,94 +1338,70 @@ rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
return (error);
}
-/*
- * Expunges references to a route that's about to be reclaimed.
- * The route must be locked.
- */
-int
-rtexpunge(struct rtentry *rt)
+static int
+if_updatemtu_cb(struct radix_node *rn, void *arg)
{
-#if !defined(RADIX_MPATH)
- struct radix_node *rn;
-#else
- struct rt_addrinfo info;
- int fib;
- struct rtentry *rt0;
-#endif
- struct radix_node_head *rnh;
- struct ifaddr *ifa;
- int error = 0;
-
- /*
- * Find the correct routing tree to use for this Address Family
- */
- rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family);
- RT_LOCK_ASSERT(rt);
- if (rnh == NULL)
- return (EAFNOSUPPORT);
- RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
+ struct rtentry *rt;
+ struct if_mtuinfo *ifmtu;
-#ifdef RADIX_MPATH
- fib = rt->rt_fibnum;
- bzero(&info, sizeof(info));
- info.rti_ifp = rt->rt_ifp;
- info.rti_flags = RTF_RNH_LOCKED;
- info.rti_info[RTAX_DST] = rt_key(rt);
- info.rti_info[RTAX_GATEWAY] = rt->rt_ifa->ifa_addr;
+ rt = (struct rtentry *)rn;
+ ifmtu = (struct if_mtuinfo *)arg;
- RT_UNLOCK(rt);
- error = rtrequest1_fib(RTM_DELETE, &info, &rt0, fib);
+ if (rt->rt_ifp != ifmtu->ifp)
+ return (0);
- if (error == 0 && rt0 != NULL) {
- rt = rt0;
- RT_LOCK(rt);
- } else if (error != 0) {
- RT_LOCK(rt);
- return (error);
+ if (rt->rt_mtu >= ifmtu->mtu) {
+ /* We have to decrease mtu regardless of flags */
+ rt->rt_mtu = ifmtu->mtu;
+ return (0);
}
-#else
+
/*
- * Remove the item from the tree; it should be there,
- * but when callers invoke us blindly it may not (sigh).
+ * New MTU is bigger. Check if are allowed to alter it
*/
- rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), rnh);
- if (rn == NULL) {
- error = ESRCH;
- goto bad;
+ if ((rt->rt_flags & (RTF_FIXEDMTU | RTF_GATEWAY | RTF_HOST)) != 0) {
+
+ /*
+ * Skip routes with user-supplied MTU and
+ * non-interface routes
+ */
+ return (0);
}
- KASSERT((rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) == 0,
- ("unexpected flags 0x%x", rn->rn_flags));
- KASSERT(rt == RNTORT(rn),
- ("lookup mismatch, rt %p rn %p", rt, rn));
-#endif /* RADIX_MPATH */
- rt->rt_flags &= ~RTF_UP;
+ /* We are safe to update route MTU */
+ rt->rt_mtu = ifmtu->mtu;
- /*
- * Give the protocol a chance to keep things in sync.
- */
- if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) {
- struct rt_addrinfo info;
+ return (0);
+}
- bzero((caddr_t)&info, sizeof(info));
- info.rti_flags = rt->rt_flags;
- info.rti_info[RTAX_DST] = rt_key(rt);
- info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
- info.rti_info[RTAX_NETMASK] = rt_mask(rt);
- ifa->ifa_rtrequest(RTM_DELETE, rt, &info);
- }
+void
+rt_updatemtu(struct ifnet *ifp)
+{
+ struct if_mtuinfo ifmtu;
+ struct rib_head *rnh;
+ int i, j;
+
+ ifmtu.ifp = ifp;
/*
- * one more rtentry floating around that is not
- * linked to the routing table.
+ * Try to update rt_mtu for all routes using this interface
+ * Unfortunately the only way to do this is to traverse all
+ * routing tables in all fibs/domains.
*/
- V_rttrash++;
-#if !defined(RADIX_MPATH)
-bad:
-#endif
- return (error);
+ for (i = 1; i <= AF_MAX; i++) {
+ ifmtu.mtu = if_getmtu_family(ifp, i);
+ for (j = 0; j < rt_numfibs; j++) {
+ rnh = rt_tables_get_rnh(j, i);
+ if (rnh == NULL)
+ continue;
+ RIB_WLOCK(rnh);
+ rnh->rnh_walktree(&rnh->head, if_updatemtu_cb, &ifmtu);
+ RIB_WUNLOCK(rnh);
+ }
+ }
}
+
#if 0
int p_sockaddr(char *buf, int buflen, struct sockaddr *s);
int rt_print(char *buf, int buflen, struct rtentry *rt);
@@ -1036,26 +1454,32 @@ rt_print(char *buf, int buflen, struct rtentry *rt)
#endif
#ifdef RADIX_MPATH
-static int
-rn_mpath_update(int req, struct rt_addrinfo *info,
- struct radix_node_head *rnh, struct rtentry **ret_nrt)
+/*
+ * Deletes key for single-path routes, unlinks rtentry with
+ * gateway specified in @info from multi-path routes.
+ *
+ * Returnes unlinked entry. In case of failure, returns NULL
+ * and sets @perror to ESRCH.
+ */
+static struct radix_node *
+rt_mpath_unlink(struct rib_head *rnh, struct rt_addrinfo *info,
+ struct rtentry *rto, int *perror)
{
/*
* if we got multipath routes, we require users to specify
* a matching RTAX_GATEWAY.
*/
- struct rtentry *rt, *rto = NULL;
- register struct radix_node *rn;
- int error = 0;
+ struct rtentry *rt; // *rto = NULL;
+ struct radix_node *rn;
+ struct sockaddr *gw;
- rn = rnh->rnh_lookup(dst, netmask, rnh);
- if (rn == NULL)
- return (ESRCH);
- rto = rt = RNTORT(rn);
+ gw = info->rti_info[RTAX_GATEWAY];
+ rt = rt_mpath_matchgate(rto, gw);
+ if (rt == NULL) {
+ *perror = ESRCH;
+ return (NULL);
+ }
- rt = rt_mpath_matchgate(rt, gateway);
- if (rt == NULL)
- return (ESRCH);
/*
* this is the first entry in the chain
*/
@@ -1078,67 +1502,95 @@ rn_mpath_update(int req, struct rt_addrinfo *info,
* check the case when there is only
* one route in the chain.
*/
- if (gateway &&
- (rt->rt_gateway->sa_len != gateway->sa_len ||
- memcmp(rt->rt_gateway, gateway, gateway->sa_len)))
- error = ESRCH;
- else {
- /*
- * remove from tree before returning it
- * to the caller
- */
- rn = rnh->rnh_deladdr(dst, netmask, rnh);
- KASSERT(rt == RNTORT(rn), ("radix node disappeared"));
- goto gwdelete;
+ if (gw &&
+ (rt->rt_gateway->sa_len != gw->sa_len ||
+ memcmp(rt->rt_gateway, gw, gw->sa_len))) {
+ *perror = ESRCH;
+ return (NULL);
}
-
}
+
/*
* use the normal delete code to remove
* the first entry
*/
- if (req != RTM_DELETE)
- goto nondelete;
-
- error = ENOENT;
- goto done;
+ rn = rnh->rnh_deladdr(dst, netmask, &rnh->head);
+ *perror = 0;
+ return (rn);
}
/*
* if the entry is 2nd and on up
*/
- if ((req == RTM_DELETE) && !rt_mpath_deldup(rto, rt))
+ if (rt_mpath_deldup(rto, rt) == 0)
panic ("rtrequest1: rt_mpath_deldup");
-gwdelete:
- RT_LOCK(rt);
- RT_ADDREF(rt);
- if (req == RTM_DELETE) {
- rt->rt_flags &= ~RTF_UP;
- /*
- * One more rtentry floating around that is not
- * linked to the routing table. rttrash will be decremented
- * when RTFREE(rt) is eventually called.
- */
- V_rttrash++;
+ *perror = 0;
+ rn = (struct radix_node *)rt;
+ return (rn);
+}
+#endif
+
+#ifdef FLOWTABLE
+static struct rtentry *
+rt_flowtable_check_route(struct rib_head *rnh, struct rt_addrinfo *info)
+{
+#if defined(INET6) || defined(INET)
+ struct radix_node *rn;
+#endif
+ struct rtentry *rt0;
+
+ rt0 = NULL;
+ /* "flow-table" only supports IPv6 and IPv4 at the moment. */
+ switch (dst->sa_family) {
+#ifdef INET6
+ case AF_INET6:
+#endif
+#ifdef INET
+ case AF_INET:
+#endif
+#if defined(INET6) || defined(INET)
+ rn = rnh->rnh_matchaddr(dst, &rnh->head);
+ if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) {
+ struct sockaddr *mask;
+ u_char *m, *n;
+ int len;
+
+ /*
+ * compare mask to see if the new route is
+ * more specific than the existing one
+ */
+ rt0 = RNTORT(rn);
+ RT_LOCK(rt0);
+ RT_ADDREF(rt0);
+ RT_UNLOCK(rt0);
+ /*
+ * A host route is already present, so
+ * leave the flow-table entries as is.
+ */
+ if (rt0->rt_flags & RTF_HOST) {
+ RTFREE(rt0);
+ rt0 = NULL;
+ } else if (!(flags & RTF_HOST) && netmask) {
+ mask = rt_mask(rt0);
+ len = mask->sa_len;
+ m = (u_char *)mask;
+ n = (u_char *)netmask;
+ while (len-- > 0) {
+ if (*n != *m)
+ break;
+ n++;
+ m++;
+ }
+ if (len == 0 || (*n < *m)) {
+ RTFREE(rt0);
+ rt0 = NULL;
+ }
+ }
+ }
+#endif/* INET6 || INET */
}
-
-nondelete:
- if (req != RTM_DELETE)
- panic("unrecognized request %d", req);
-
- /*
- * If the caller wants it, then it can have it,
- * but it's up to it to free the rtentry as we won't be
- * doing it.
- */
- if (ret_nrt) {
- *ret_nrt = rt;
- RT_UNLOCK(rt);
- } else
- RTFREE_LOCKED(rt);
-done:
- return (error);
+ return (rt0);
}
#endif
@@ -1146,19 +1598,19 @@ int
rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
u_int fibnum)
{
- int error = 0, needlock = 0;
- register struct rtentry *rt;
+ int error = 0;
+ struct rtentry *rt, *rt_old;
#ifdef FLOWTABLE
- register struct rtentry *rt0;
+ struct rtentry *rt0;
#endif
- register struct radix_node *rn;
- register struct radix_node_head *rnh;
+ struct radix_node *rn;
+ struct rib_head *rnh;
struct ifaddr *ifa;
struct sockaddr *ndst;
struct sockaddr_storage mdst;
-#define senderr(x) { error = x ; goto bad; }
KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum"));
+ KASSERT((flags & RTF_RNH_LOCKED) == 0, ("rtrequest1_fib: locked"));
switch (dst->sa_family) {
case AF_INET6:
case AF_INET:
@@ -1175,12 +1627,7 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
if (rnh == NULL)
return (EAFNOSUPPORT);
- needlock = ((flags & RTF_RNH_LOCKED) == 0);
- flags &= ~RTF_RNH_LOCKED;
- if (needlock)
- RADIX_NODE_HEAD_LOCK(rnh);
- else
- RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
+
/*
* If we are adding a host route then we don't want to put
* a netmask in the tree, nor do we want to clone it.
@@ -1194,52 +1641,14 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask);
dst = (struct sockaddr *)&mdst;
}
-#ifdef RADIX_MPATH
- if (rn_mpath_capable(rnh)) {
- error = rn_mpath_update(req, info, rnh, ret_nrt);
- /*
- * "bad" holds true for the success case
- * as well
- */
- if (error != ENOENT)
- goto bad;
- error = 0;
- }
-#endif
- if ((flags & RTF_PINNED) == 0) {
- /* Check if target route can be deleted */
- rt = (struct rtentry *)rnh->rnh_lookup(dst,
- netmask, rnh);
- if ((rt != NULL) && (rt->rt_flags & RTF_PINNED))
- senderr(EADDRINUSE);
- }
- /*
- * Remove the item from the tree and return it.
- * Complain if it is not there and do no more processing.
- */
- rn = rnh->rnh_deladdr(dst, netmask, rnh);
- if (rn == NULL)
- senderr(ESRCH);
- if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
- panic ("rtrequest delete");
- rt = RNTORT(rn);
- RT_LOCK(rt);
- RT_ADDREF(rt);
- rt->rt_flags &= ~RTF_UP;
+ RIB_WLOCK(rnh);
+ rt = rt_unlinkrte(rnh, info, &error);
+ RIB_WUNLOCK(rnh);
+ if (error != 0)
+ return (error);
- /*
- * give the protocol a chance to keep things in sync.
- */
- if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
- ifa->ifa_rtrequest(RTM_DELETE, rt, info);
-
- /*
- * One more rtentry floating around that is not
- * linked to the routing table. rttrash will be decremented
- * when RTFREE(rt) is eventually called.
- */
- V_rttrash++;
+ rt_notifydelete(rt, info);
/*
* If the caller wants it, then it can have it,
@@ -1260,37 +1669,32 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
break;
case RTM_ADD:
if ((flags & RTF_GATEWAY) && !gateway)
- senderr(EINVAL);
+ return (EINVAL);
if (dst && gateway && (dst->sa_family != gateway->sa_family) &&
(gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK))
- senderr(EINVAL);
+ return (EINVAL);
if (info->rti_ifa == NULL) {
error = rt_getifa_fib(info, fibnum);
if (error)
- senderr(error);
+ return (error);
} else
ifa_ref(info->rti_ifa);
ifa = info->rti_ifa;
- rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO);
+ rt = uma_zalloc(V_rtzone, M_NOWAIT);
if (rt == NULL) {
- if (ifa != NULL)
- ifa_free(ifa);
- senderr(ENOBUFS);
+ ifa_free(ifa);
+ return (ENOBUFS);
}
- RT_LOCK_INIT(rt);
rt->rt_flags = RTF_UP | flags;
rt->rt_fibnum = fibnum;
/*
* Add the gateway. Possibly re-malloc-ing the storage for it.
*/
- RT_LOCK(rt);
if ((error = rt_setgate(rt, dst, gateway)) != 0) {
- RT_LOCK_DESTROY(rt);
- if (ifa != NULL)
- ifa_free(ifa);
+ ifa_free(ifa);
uma_zfree(V_rtzone, rt);
- senderr(error);
+ return (error);
}
/*
@@ -1313,111 +1717,81 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
*/
rt->rt_ifa = ifa;
rt->rt_ifp = ifa->ifa_ifp;
- rt->rt_rmx.rmx_weight = 1;
+ rt->rt_weight = 1;
+ rt_setmetrics(info, rt);
+
+ RIB_WLOCK(rnh);
+ RT_LOCK(rt);
#ifdef RADIX_MPATH
/* do not permit exactly the same dst/mask/gw pair */
- if (rn_mpath_capable(rnh) &&
+ if (rt_mpath_capable(rnh) &&
rt_mpath_conflict(rnh, rt, netmask)) {
- if (rt->rt_ifa) {
- ifa_free(rt->rt_ifa);
- }
- Free(rt_key(rt));
- RT_LOCK_DESTROY(rt);
+ RIB_WUNLOCK(rnh);
+
+ ifa_free(rt->rt_ifa);
+ R_Free(rt_key(rt));
uma_zfree(V_rtzone, rt);
- senderr(EEXIST);
+ return (EEXIST);
}
#endif
#ifdef FLOWTABLE
- rt0 = NULL;
- /* "flow-table" only supports IPv6 and IPv4 at the moment. */
- switch (dst->sa_family) {
-#ifdef INET6
- case AF_INET6:
-#endif
-#ifdef INET
- case AF_INET:
-#endif
-#if defined(INET6) || defined(INET)
- rn = rnh->rnh_matchaddr(dst, rnh);
- if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) {
- struct sockaddr *mask;
- u_char *m, *n;
- int len;
-
- /*
- * compare mask to see if the new route is
- * more specific than the existing one
- */
- rt0 = RNTORT(rn);
- RT_LOCK(rt0);
- RT_ADDREF(rt0);
- RT_UNLOCK(rt0);
- /*
- * A host route is already present, so
- * leave the flow-table entries as is.
- */
- if (rt0->rt_flags & RTF_HOST) {
- RTFREE(rt0);
- rt0 = NULL;
- } else if (!(flags & RTF_HOST) && netmask) {
- mask = rt_mask(rt0);
- len = mask->sa_len;
- m = (u_char *)mask;
- n = (u_char *)netmask;
- while (len-- > 0) {
- if (*n != *m)
- break;
- n++;
- m++;
- }
- if (len == 0 || (*n < *m)) {
- RTFREE(rt0);
- rt0 = NULL;
- }
- }
- }
-#endif/* INET6 || INET */
- }
+ rt0 = rt_flowtable_check_route(rnh, info);
#endif /* FLOWTABLE */
/* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
- rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes);
+ rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes);
+
+ rt_old = NULL;
+ if (rn == NULL && (info->rti_flags & RTF_PINNED) != 0) {
+
+ /*
+ * Force removal and re-try addition
+ * TODO: better multipath&pinned support
+ */
+ struct sockaddr *info_dst = info->rti_info[RTAX_DST];
+ info->rti_info[RTAX_DST] = ndst;
+ /* Do not delete existing PINNED(interface) routes */
+ info->rti_flags &= ~RTF_PINNED;
+ rt_old = rt_unlinkrte(rnh, info, &error);
+ info->rti_flags |= RTF_PINNED;
+ info->rti_info[RTAX_DST] = info_dst;
+ if (rt_old != NULL)
+ rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head,
+ rt->rt_nodes);
+ }
+ RIB_WUNLOCK(rnh);
+
+ if (rt_old != NULL)
+ RT_UNLOCK(rt_old);
+
/*
* If it still failed to go into the tree,
* then un-make it (this should be a function)
*/
if (rn == NULL) {
- if (rt->rt_ifa)
- ifa_free(rt->rt_ifa);
- Free(rt_key(rt));
- RT_LOCK_DESTROY(rt);
+ ifa_free(rt->rt_ifa);
+ R_Free(rt_key(rt));
uma_zfree(V_rtzone, rt);
#ifdef FLOWTABLE
if (rt0 != NULL)
RTFREE(rt0);
#endif
- senderr(EEXIST);
+ return (EEXIST);
}
#ifdef FLOWTABLE
else if (rt0 != NULL) {
- switch (dst->sa_family) {
-#ifdef INET6
- case AF_INET6:
- flowtable_route_flush(V_ip6_ft, rt0);
- break;
-#endif
-#ifdef INET
- case AF_INET:
- flowtable_route_flush(V_ip_ft, rt0);
- break;
-#endif
- }
+ flowtable_route_flush(dst->sa_family, rt0);
RTFREE(rt0);
}
#endif
+ if (rt_old != NULL) {
+ rt_notifydelete(rt_old, info);
+ RTFREE(rt_old);
+ }
+
/*
* If this protocol has something to add to this then
* allow it to do that as well.
@@ -1433,16 +1807,19 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
*ret_nrt = rt;
RT_ADDREF(rt);
}
+ rnh->rnh_gen++; /* Routing table updated */
RT_UNLOCK(rt);
break;
+ case RTM_CHANGE:
+ RIB_WLOCK(rnh);
+ error = rtrequest1_fib_change(rnh, info, ret_nrt, fibnum);
+ RIB_WUNLOCK(rnh);
+ break;
default:
error = EOPNOTSUPP;
}
-bad:
- if (needlock)
- RADIX_NODE_HEAD_UNLOCK(rnh);
+
return (error);
-#undef senderr
}
#undef dst
@@ -1452,20 +1829,147 @@ bad:
#undef ifpaddr
#undef flags
+static int
+rtrequest1_fib_change(struct rib_head *rnh, struct rt_addrinfo *info,
+ struct rtentry **ret_nrt, u_int fibnum)
+{
+ struct rtentry *rt = NULL;
+ int error = 0;
+ int free_ifa = 0;
+ int family, mtu;
+ struct if_mtuinfo ifmtu;
+
+ rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST],
+ info->rti_info[RTAX_NETMASK], &rnh->head);
+
+ if (rt == NULL)
+ return (ESRCH);
+
+#ifdef RADIX_MPATH
+ /*
+ * If we got multipath routes,
+ * we require users to specify a matching RTAX_GATEWAY.
+ */
+ if (rt_mpath_capable(rnh)) {
+ rt = rt_mpath_matchgate(rt, info->rti_info[RTAX_GATEWAY]);
+ if (rt == NULL)
+ return (ESRCH);
+ }
+#endif
+
+ RT_LOCK(rt);
+
+ rt_setmetrics(info, rt);
+
+ /*
+ * New gateway could require new ifaddr, ifp;
+ * flags may also be different; ifp may be specified
+ * by ll sockaddr when protocol address is ambiguous
+ */
+ if (((rt->rt_flags & RTF_GATEWAY) &&
+ info->rti_info[RTAX_GATEWAY] != NULL) ||
+ info->rti_info[RTAX_IFP] != NULL ||
+ (info->rti_info[RTAX_IFA] != NULL &&
+ !sa_equal(info->rti_info[RTAX_IFA], rt->rt_ifa->ifa_addr))) {
+
+ error = rt_getifa_fib(info, fibnum);
+ if (info->rti_ifa != NULL)
+ free_ifa = 1;
+
+ if (error != 0)
+ goto bad;
+ }
+
+ /* Check if outgoing interface has changed */
+ if (info->rti_ifa != NULL && info->rti_ifa != rt->rt_ifa &&
+ rt->rt_ifa != NULL && rt->rt_ifa->ifa_rtrequest != NULL) {
+ rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt, info);
+ ifa_free(rt->rt_ifa);
+ }
+ /* Update gateway address */
+ if (info->rti_info[RTAX_GATEWAY] != NULL) {
+ error = rt_setgate(rt, rt_key(rt), info->rti_info[RTAX_GATEWAY]);
+ if (error != 0)
+ goto bad;
+
+ rt->rt_flags &= ~RTF_GATEWAY;
+ rt->rt_flags |= (RTF_GATEWAY & info->rti_flags);
+ }
+
+ if (info->rti_ifa != NULL && info->rti_ifa != rt->rt_ifa) {
+ ifa_ref(info->rti_ifa);
+ rt->rt_ifa = info->rti_ifa;
+ rt->rt_ifp = info->rti_ifp;
+ }
+ /* Allow some flags to be toggled on change. */
+ rt->rt_flags &= ~RTF_FMASK;
+ rt->rt_flags |= info->rti_flags & RTF_FMASK;
+
+ if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest != NULL)
+ rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, info);
+
+ /* Alter route MTU if necessary */
+ if (rt->rt_ifp != NULL) {
+ family = info->rti_info[RTAX_DST]->sa_family;
+ mtu = if_getmtu_family(rt->rt_ifp, family);
+ /* Set default MTU */
+ if (rt->rt_mtu == 0)
+ rt->rt_mtu = mtu;
+ if (rt->rt_mtu != mtu) {
+ /* Check if we really need to update */
+ ifmtu.ifp = rt->rt_ifp;
+ ifmtu.mtu = mtu;
+ if_updatemtu_cb(rt->rt_nodes, &ifmtu);
+ }
+ }
+
+ if (ret_nrt) {
+ *ret_nrt = rt;
+ RT_ADDREF(rt);
+ }
+bad:
+ RT_UNLOCK(rt);
+ if (free_ifa != 0)
+ ifa_free(info->rti_ifa);
+ return (error);
+}
+
+static void
+rt_setmetrics(const struct rt_addrinfo *info, struct rtentry *rt)
+{
+
+ if (info->rti_mflags & RTV_MTU) {
+ if (info->rti_rmx->rmx_mtu != 0) {
+
+ /*
+ * MTU was explicitly provided by user.
+ * Keep it.
+ */
+ rt->rt_flags |= RTF_FIXEDMTU;
+ } else {
+
+ /*
+ * User explicitly sets MTU to 0.
+ * Assume rollback to default.
+ */
+ rt->rt_flags &= ~RTF_FIXEDMTU;
+ }
+ rt->rt_mtu = info->rti_rmx->rmx_mtu;
+ }
+ if (info->rti_mflags & RTV_WEIGHT)
+ rt->rt_weight = info->rti_rmx->rmx_weight;
+ /* Kernel -> userland timebase conversion. */
+ if (info->rti_mflags & RTV_EXPIRE)
+ rt->rt_expire = info->rti_rmx->rmx_expire ?
+ info->rti_rmx->rmx_expire - time_second + time_uptime : 0;
+}
+
int
rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
{
/* XXX dst may be overwritten, can we move this to below */
int dlen = SA_SIZE(dst), glen = SA_SIZE(gate);
-#ifdef INVARIANTS
- struct radix_node_head *rnh;
- rnh = rt_tables_get_rnh(rt->rt_fibnum, dst->sa_family);
-#endif
-
- RT_LOCK_ASSERT(rt);
- RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
-
/*
* Prepare to store the gateway in rt->rt_gateway.
* Both dst and gateway are stored one after the other in the same
@@ -1487,7 +1991,7 @@ rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
* Free()/free() handle a NULL argument just fine.
*/
bcopy(dst, new, dlen);
- Free(rt_key(rt)); /* free old block, if any */
+ R_Free(rt_key(rt)); /* free old block, if any */
rt_key(rt) = (struct sockaddr *)new;
rt->rt_gateway = (struct sockaddr *)(new + dlen);
}
@@ -1503,9 +2007,9 @@ rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
void
rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask)
{
- register u_char *cp1 = (u_char *)src;
- register u_char *cp2 = (u_char *)dst;
- register u_char *cp3 = (u_char *)netmask;
+ u_char *cp1 = (u_char *)src;
+ u_char *cp2 = (u_char *)dst;
+ u_char *cp3 = (u_char *)netmask;
u_char *cplim = cp2 + *cp3;
u_char *cplim2 = cp2 + *cp1;
@@ -1537,7 +2041,7 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
int didwork = 0;
int a_failure = 0;
static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
- struct radix_node_head *rnh;
+ struct rib_head *rnh;
if (flags & RTF_HOST) {
dst = ifa->ifa_dstaddr;
@@ -1558,13 +2062,13 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
break;
}
if (fibnum == RT_ALL_FIBS) {
- if (rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD) {
+ if (V_rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD)
#ifndef __rtems__
startfib = endfib = ifa->ifa_ifp->if_fib;
#else /* __rtems__ */
startfib = endfib = BSD_DEFAULT_FIB;
#endif /* __rtems__ */
- } else {
+ else {
startfib = 0;
endfib = rt_numfibs - 1;
}
@@ -1609,10 +2113,10 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
if (rnh == NULL)
/* this table doesn't exist but others might */
continue;
- RADIX_NODE_HEAD_RLOCK(rnh);
- rn = rnh->rnh_lookup(dst, netmask, rnh);
+ RIB_RLOCK(rnh);
+ rn = rnh->rnh_lookup(dst, netmask, &rnh->head);
#ifdef RADIX_MPATH
- if (rn_mpath_capable(rnh)) {
+ if (rt_mpath_capable(rnh)) {
if (rn == NULL)
error = ESRCH;
@@ -1635,7 +2139,7 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
error = (rn == NULL ||
(rn->rn_flags & RNF_ROOT) ||
RNTORT(rn)->rt_ifa != ifa);
- RADIX_NODE_HEAD_RUNLOCK(rnh);
+ RIB_RUNLOCK(rnh);
if (error) {
/* this is only an error if bad on ALL tables */
continue;
@@ -1660,32 +2164,6 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
info.rti_info[RTAX_NETMASK] = netmask;
error = rtrequest1_fib(cmd, &info, &rt, fibnum);
- if ((error == EEXIST) && (cmd == RTM_ADD)) {
- /*
- * Interface route addition failed.
- * Atomically delete current prefix generating
- * RTM_DELETE message, and retry adding
- * interface prefix.
- */
- rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
- RADIX_NODE_HEAD_LOCK(rnh);
-
- /* Delete old prefix */
- info.rti_ifa = NULL;
- info.rti_flags = RTF_RNH_LOCKED;
-
- error = rtrequest1_fib(RTM_DELETE, &info, NULL, fibnum);
- if (error == 0) {
- info.rti_ifa = ifa;
- info.rti_flags = flags | RTF_RNH_LOCKED |
- (ifa->ifa_flags & ~IFA_RTSELF) | RTF_PINNED;
- error = rtrequest1_fib(cmd, &info, &rt, fibnum);
- }
-
- RADIX_NODE_HEAD_UNLOCK(rnh);
- }
-
-
if (error == 0 && rt != NULL) {
/*
* notify any listening routing agents of the change
@@ -1760,15 +2238,6 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
return (error);
}
-#ifndef BURN_BRIDGES
-/* special one for inet internal use. may not use. */
-int
-rtinit_fib(struct ifaddr *ifa, int cmd, int flags)
-{
- return (rtinit1(ifa, cmd, flags, RT_ALL_FIBS));
-}
-#endif
-
/*
* Set up a routing table entry, normally
* for an interface.
diff --git a/freebsd/sys/net/route.h b/freebsd/sys/net/route.h
index 0baa9a4c..d44dc9d5 100644
--- a/freebsd/sys/net/route.h
+++ b/freebsd/sys/net/route.h
@@ -33,6 +33,9 @@
#ifndef _NET_ROUTE_H_
#define _NET_ROUTE_H_
+#include <sys/counter.h>
+#include <net/vnet.h>
+
/*
* Kernel resident routing tables.
*
@@ -41,32 +44,39 @@
*/
/*
- * A route consists of a destination address, a reference
- * to a routing entry, and a reference to an llentry.
- * These are often held by protocols in their control
- * blocks, e.g. inpcb.
+ * Struct route consiste of a destination address,
+ * a route entry pointer, link-layer prepend data pointer along
+ * with its length.
*/
struct route {
struct rtentry *ro_rt;
struct llentry *ro_lle;
- struct in_ifaddr *ro_ia;
- int ro_flags;
+ /*
+ * ro_prepend and ro_plen are only used for bpf to pass in a
+ * preformed header. They are not cacheable.
+ */
+ char *ro_prepend;
+ uint16_t ro_plen;
+ uint16_t ro_flags;
+ uint16_t ro_mtu; /* saved ro_rt mtu */
+ uint16_t spare;
struct sockaddr ro_dst;
};
+#define RT_L2_ME_BIT 2 /* dst L2 addr is our address */
+#define RT_MAY_LOOP_BIT 3 /* dst may require loop copy */
+#define RT_HAS_HEADER_BIT 4 /* mbuf already have its header prepended */
+
#define RT_CACHING_CONTEXT 0x1 /* XXX: not used anywhere */
#define RT_NORTREF 0x2 /* doesn't hold reference on ro_rt */
+#define RT_L2_ME (1 << RT_L2_ME_BIT) /* 0x0004 */
+#define RT_MAY_LOOP (1 << RT_MAY_LOOP_BIT) /* 0x0008 */
+#define RT_HAS_HEADER (1 << RT_HAS_HEADER_BIT) /* 0x0010 */
-/*
- * These numbers are used by reliable protocols for determining
- * retransmission behavior and are included in the routing structure.
- */
-struct rt_metrics_lite {
- u_long rmx_mtu; /* MTU for this path */
- u_long rmx_expire; /* lifetime for route, e.g. redirect */
- u_long rmx_pksent; /* packets sent using this route */
- u_long rmx_weight; /* absolute weight */
-};
+#define RT_REJECT 0x0020 /* Destination is reject */
+#define RT_BLACKHOLE 0x0040 /* Destination is blackhole */
+#define RT_HAS_GW 0x0080 /* Destination has GW */
+#define RT_LLE_CACHE 0x0100 /* Cache link layer */
struct rt_metrics {
u_long rmx_locks; /* Kernel must leave these values alone */
@@ -91,14 +101,24 @@ struct rt_metrics {
#define RTM_RTTUNIT 1000000 /* units for rtt, rttvar, as units per sec */
#define RTTTOPRHZ(r) ((r) / (RTM_RTTUNIT / PR_SLOWHZ))
+/* lle state is exported in rmx_state rt_metrics field */
+#define rmx_state rmx_weight
+
+/*
+ * Keep a generation count of routing table, incremented on route addition,
+ * so we can invalidate caches. This is accessed without a lock, as precision
+ * is not required.
+ */
+typedef volatile u_int rt_gen_t; /* tree generation (for adds) */
+#define RT_GEN(fibnum, af) rt_tables_get_gen(fibnum, af)
+
#define RT_DEFAULT_FIB 0 /* Explicitly mark fib=0 restricted cases */
#define RT_ALL_FIBS -1 /* Announce event for every fib */
+#ifdef _KERNEL
extern u_int rt_numfibs; /* number of usable routing tables */
-extern u_int rt_add_addr_allfibs; /* Announce interfaces to all fibs */
-/*
- * XXX kernel function pointer `rt_output' is visible to applications.
- */
-struct mbuf;
+VNET_DECLARE(u_int, rt_add_addr_allfibs); /* Announce interfaces to all fibs */
+#define V_rt_add_addr_allfibs VNET(rt_add_addr_allfibs)
+#endif
/*
* We distinguish between routes to hosts and routes to networks,
@@ -114,6 +134,8 @@ struct mbuf;
#include <net/radix_mpath.h>
#endif
#endif
+
+#if defined(_KERNEL) || defined(_WANT_RTENTRY)
struct rtentry {
struct radix_node rt_nodes[2]; /* tree glue, and other values */
/*
@@ -124,33 +146,20 @@ struct rtentry {
#define rt_key(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_key)))
#define rt_mask(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_mask)))
struct sockaddr *rt_gateway; /* value */
- int rt_flags; /* up/down?, host/net */
- int rt_refcnt; /* # held references */
struct ifnet *rt_ifp; /* the answer: interface to use */
struct ifaddr *rt_ifa; /* the answer: interface address to use */
- struct rt_metrics_lite rt_rmx; /* metrics used by rx'ing protocols */
- u_int rt_fibnum; /* which FIB */
-#ifdef _KERNEL
- /* XXX ugly, user apps use this definition but don't have a mtx def */
- struct mtx rt_mtx; /* mutex for routing entry */
-#endif
+ int rt_flags; /* up/down?, host/net */
+ int rt_refcnt; /* # held references */
+ u_int rt_fibnum; /* which FIB */
+ u_long rt_mtu; /* MTU for this path */
+ u_long rt_weight; /* absolute weight */
+ u_long rt_expire; /* lifetime for route, e.g. redirect */
+#define rt_endzero rt_pksent
+ counter_u64_t rt_pksent; /* packets sent using this route */
+ struct mtx rt_mtx; /* mutex for routing entry */
+ struct rtentry *rt_chain; /* pointer to next rtentry to delete */
};
-
-/*
- * Following structure necessary for 4.3 compatibility;
- * We should eventually move it to a compat file.
- */
-struct ortentry {
- u_long rt_hash; /* to speed lookups */
- struct sockaddr rt_dst; /* key */
- struct sockaddr rt_gateway; /* value */
- short rt_flags; /* up/down?, host/net */
- short rt_refcnt; /* # held references */
- u_long rt_use; /* raw # packets forwarded */
- struct ifnet *rt_ifp; /* the answer: interface to use */
-};
-
-#define rt_use rt_rmx.rmx_pksent
+#endif /* _KERNEL || _WANT_RTENTRY */
#define RTF_UP 0x1 /* route usable */
#define RTF_GATEWAY 0x2 /* destination is a gateway */
@@ -169,15 +178,10 @@ struct ortentry {
#define RTF_BLACKHOLE 0x1000 /* just discard pkts (during updates) */
#define RTF_PROTO2 0x4000 /* protocol specific routing flag */
#define RTF_PROTO1 0x8000 /* protocol specific routing flag */
-
-/* XXX: temporary to stay API/ABI compatible with userland */
-#ifndef _KERNEL
-#define RTF_PRCLONING 0x10000 /* unused, for compatibility */
-#endif
-
+/* 0x10000 unused, was RTF_PRCLONING */
/* 0x20000 unused, was RTF_WASCLONED */
#define RTF_PROTO3 0x40000 /* protocol specific routing flag */
-/* 0x80000 unused */
+#define RTF_FIXEDMTU 0x80000 /* MTU was explicitly specified */
#define RTF_PINNED 0x100000 /* route is immutable */
#define RTF_LOCAL 0x200000 /* route represents a local address */
#define RTF_BROADCAST 0x400000 /* route represents a bcast address */
@@ -185,7 +189,10 @@ struct ortentry {
/* 0x8000000 and up unassigned */
#define RTF_STICKY 0x10000000 /* always route dst->src */
-#define RTF_RNH_LOCKED 0x40000000 /* radix node head is locked */
+#define RTF_RNH_LOCKED 0x40000000 /* unused */
+
+#define RTF_GWFLAG_COMPAT 0x80000000 /* a compatibility bit for interacting
+ with existing routing apps */
/* Mask of RTF flags that are allowed to be modified by RTM_CHANGE. */
#define RTF_FMASK \
@@ -193,6 +200,40 @@ struct ortentry {
RTF_REJECT | RTF_STATIC | RTF_STICKY)
/*
+ * fib_ nexthop API flags.
+ */
+
+/* Consumer-visible nexthop info flags */
+#define NHF_REJECT 0x0010 /* RTF_REJECT */
+#define NHF_BLACKHOLE 0x0020 /* RTF_BLACKHOLE */
+#define NHF_REDIRECT 0x0040 /* RTF_DYNAMIC|RTF_MODIFIED */
+#define NHF_DEFAULT 0x0080 /* Default route */
+#define NHF_BROADCAST 0x0100 /* RTF_BROADCAST */
+#define NHF_GATEWAY 0x0200 /* RTF_GATEWAY */
+
+/* Nexthop request flags */
+#define NHR_IFAIF 0x01 /* Return ifa_ifp interface */
+#define NHR_REF 0x02 /* For future use */
+
+/* Control plane route request flags */
+#define NHR_COPY 0x100 /* Copy rte data */
+
+#ifdef _KERNEL
+/* rte<>ro_flags translation */
+static inline void
+rt_update_ro_flags(struct route *ro)
+{
+ int rt_flags = ro->ro_rt->rt_flags;
+
+ ro->ro_flags &= ~ (RT_REJECT|RT_BLACKHOLE|RT_HAS_GW);
+
+ ro->ro_flags |= (rt_flags & RTF_REJECT) ? RT_REJECT : 0;
+ ro->ro_flags |= (rt_flags & RTF_BLACKHOLE) ? RT_BLACKHOLE : 0;
+ ro->ro_flags |= (rt_flags & RTF_GATEWAY) ? RT_HAS_GW : 0;
+}
+#endif
+
+/*
* Routing statistics.
*/
struct rtstat {
@@ -233,8 +274,8 @@ struct rt_msghdr {
#define RTM_REDIRECT 0x6 /* Told to use different route */
#define RTM_MISS 0x7 /* Lookup failed on this address */
#define RTM_LOCK 0x8 /* fix specified metrics */
-#define RTM_OLDADD 0x9 /* caused by SIOCADDRT */
-#define RTM_OLDDEL 0xa /* caused by SIOCDELRT */
+ /* 0x9 */
+ /* 0xa */
#define RTM_RESOLVE 0xb /* req to resolve dst to LL addr */
#define RTM_NEWADDR 0xc /* address being added to iface */
#define RTM_DELADDR 0xd /* address being removed from iface */
@@ -282,12 +323,19 @@ struct rt_msghdr {
#define RTAX_BRD 7 /* for NEWADDR, broadcast or p-p dest addr */
#define RTAX_MAX 8 /* size of array to allocate */
+typedef int rt_filter_f_t(const struct rtentry *, void *);
+
struct rt_addrinfo {
- int rti_addrs;
- struct sockaddr *rti_info[RTAX_MAX];
- int rti_flags;
- struct ifaddr *rti_ifa;
- struct ifnet *rti_ifp;
+ int rti_addrs; /* Route RTF_ flags */
+ int rti_flags; /* Route RTF_ flags */
+ struct sockaddr *rti_info[RTAX_MAX]; /* Sockaddr data */
+ struct ifaddr *rti_ifa; /* value of rt_ifa addr */
+ struct ifnet *rti_ifp; /* route interface */
+ rt_filter_f_t *rti_filter; /* filter function */
+ void *rti_filterdata; /* filter paramenters */
+ u_long rti_mflags; /* metrics RTV_ flags */
+ u_long rti_spare; /* Will be used for fib */
+ struct rt_metrics *rti_rmx; /* Pointer to route metrics */
};
/*
@@ -302,17 +350,25 @@ struct rt_addrinfo {
sizeof(long) : \
1 + ( (((struct sockaddr *)(sa))->sa_len - 1) | (sizeof(long) - 1) ) )
+#define sa_equal(a, b) ( \
+ (((const struct sockaddr *)(a))->sa_len == ((const struct sockaddr *)(b))->sa_len) && \
+ (bcmp((a), (b), ((const struct sockaddr *)(b))->sa_len) == 0))
+
#ifdef _KERNEL
#define RT_LINK_IS_UP(ifp) (!((ifp)->if_capabilities & IFCAP_LINKSTATE) \
|| (ifp)->if_link_state == LINK_STATE_UP)
#define RT_LOCK_INIT(_rt) \
- mtx_init(&(_rt)->rt_mtx, "rtentry", NULL, MTX_DEF | MTX_DUPOK)
+ mtx_init(&(_rt)->rt_mtx, "rtentry", NULL, MTX_DEF | MTX_DUPOK | MTX_NEW)
#define RT_LOCK(_rt) mtx_lock(&(_rt)->rt_mtx)
#define RT_UNLOCK(_rt) mtx_unlock(&(_rt)->rt_mtx)
#define RT_LOCK_DESTROY(_rt) mtx_destroy(&(_rt)->rt_mtx)
#define RT_LOCK_ASSERT(_rt) mtx_assert(&(_rt)->rt_mtx, MA_OWNED)
+#define RT_UNLOCK_COND(_rt) do { \
+ if (mtx_owned(&(_rt)->rt_mtx)) \
+ mtx_unlock(&(_rt)->rt_mtx); \
+} while (0)
#define RT_ADDREF(_rt) do { \
RT_LOCK_ASSERT(_rt); \
@@ -349,6 +405,7 @@ struct rt_addrinfo {
if ((_ro)->ro_flags & RT_NORTREF) { \
(_ro)->ro_flags &= ~RT_NORTREF; \
(_ro)->ro_rt = NULL; \
+ (_ro)->ro_lle = NULL; \
} else { \
RT_LOCK((_ro)->ro_rt); \
RTFREE_LOCKED((_ro)->ro_rt); \
@@ -356,9 +413,24 @@ struct rt_addrinfo {
} \
} while (0)
-struct radix_node_head *rt_tables_get_rnh(int, int);
+/*
+ * Validate a cached route based on a supplied cookie. If there is an
+ * out-of-date cache, simply free it. Update the generation number
+ * for the new allocation
+ */
+#define RT_VALIDATE(ro, cookiep, fibnum) do { \
+ rt_gen_t cookie = RT_GEN(fibnum, (ro)->ro_dst.sa_family); \
+ if (*(cookiep) != cookie) { \
+ if ((ro)->ro_rt != NULL) { \
+ RTFREE((ro)->ro_rt); \
+ (ro)->ro_rt = NULL; \
+ } \
+ *(cookiep) = cookie; \
+ } \
+} while (0)
struct ifmultiaddr;
+struct rib_head;
void rt_ieee80211msg(struct ifnet *, int, void *, size_t);
void rt_ifannouncemsg(struct ifnet *, int);
@@ -372,6 +444,9 @@ int rt_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int);
void rt_newmaddrmsg(int, struct ifmultiaddr *);
int rt_setgate(struct rtentry *, struct sockaddr *, struct sockaddr *);
void rt_maskedcopy(struct sockaddr *, struct sockaddr *, struct sockaddr *);
+struct rib_head *rt_table_init(int);
+void rt_table_destroy(struct rib_head *);
+u_int rt_tables_get_gen(int table, int fam);
int rtsock_addrmsg(int, struct ifaddr *, int);
int rtsock_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int);
@@ -379,8 +454,6 @@ int rtsock_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int);
/*
* Note the following locking behavior:
*
- * rtalloc_ign() and rtalloc() return ro->ro_rt unlocked
- *
* rtalloc1() returns a locked rtentry
*
* rtfree() and RTFREE_LOCKED() require a locked rtentry
@@ -388,27 +461,20 @@ int rtsock_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int);
* RTFREE() uses an unlocked entry.
*/
-int rtexpunge(struct rtentry *);
void rtfree(struct rtentry *);
-int rt_check(struct rtentry **, struct rtentry **, struct sockaddr *);
+void rt_updatemtu(struct ifnet *);
+
+typedef int rt_walktree_f_t(struct rtentry *, void *);
+typedef void rt_setwarg_t(struct rib_head *, uint32_t, int, void *);
+void rt_foreach_fib_walk(int af, rt_setwarg_t *, rt_walktree_f_t *, void *);
+void rt_foreach_fib_walk_del(int af, rt_filter_f_t *filter_f, void *arg);
+void rt_flushifroutes_af(struct ifnet *, int);
+void rt_flushifroutes(struct ifnet *ifp);
/* XXX MRT COMPAT VERSIONS THAT SET UNIVERSE to 0 */
/* Thes are used by old code not yet converted to use multiple FIBS */
-int rt_getifa(struct rt_addrinfo *);
-void rtalloc_ign(struct route *ro, u_long ignflags);
-void rtalloc(struct route *ro); /* XXX deprecated, use rtalloc_ign(ro, 0) */
struct rtentry *rtalloc1(struct sockaddr *, int, u_long);
int rtinit(struct ifaddr *, int, int);
-int rtioctl(u_long, caddr_t);
-void rtredirect(struct sockaddr *, struct sockaddr *,
- struct sockaddr *, int, struct sockaddr *);
-int rtrequest(int, struct sockaddr *,
- struct sockaddr *, struct sockaddr *, int, struct rtentry **);
-
-#ifndef BURN_BRIDGES
-/* defaults to "all" FIBs */
-int rtinit_fib(struct ifaddr *, int, int);
-#endif
/* XXX MRT NEW VERSIONS THAT USE FIBs
* For now the protocol indepedent versions are the same as the AF_INET ones
@@ -416,7 +482,6 @@ int rtinit_fib(struct ifaddr *, int, int);
*/
int rt_getifa_fib(struct rt_addrinfo *, u_int fibnum);
void rtalloc_ign_fib(struct route *ro, u_long ignflags, u_int fibnum);
-void rtalloc_fib(struct route *ro, u_int fibnum);
struct rtentry *rtalloc1_fib(struct sockaddr *, int, u_long, u_int);
int rtioctl_fib(u_long, caddr_t, u_int);
void rtredirect_fib(struct sockaddr *, struct sockaddr *,
@@ -424,13 +489,10 @@ void rtredirect_fib(struct sockaddr *, struct sockaddr *,
int rtrequest_fib(int, struct sockaddr *,
struct sockaddr *, struct sockaddr *, int, struct rtentry **, u_int);
int rtrequest1_fib(int, struct rt_addrinfo *, struct rtentry **, u_int);
+int rib_lookup_info(uint32_t, const struct sockaddr *, uint32_t, uint32_t,
+ struct rt_addrinfo *);
+void rib_free_info(struct rt_addrinfo *info);
-#include <sys/eventhandler.h>
-typedef void (*rtevent_arp_update_fn)(void *, struct rtentry *, uint8_t *, struct sockaddr *);
-typedef void (*rtevent_redirect_fn)(void *, struct rtentry *, struct rtentry *, struct sockaddr *);
-/* route_arp_update_event is no longer generated; see arp_update_event */
-EVENTHANDLER_DECLARE(route_arp_update_event, rtevent_arp_update_fn);
-EVENTHANDLER_DECLARE(route_redirect_event, rtevent_redirect_fn);
#endif
#endif
diff --git a/freebsd/sys/net/route_var.h b/freebsd/sys/net/route_var.h
new file mode 100644
index 00000000..a8ef56a5
--- /dev/null
+++ b/freebsd/sys/net/route_var.h
@@ -0,0 +1,76 @@
+/*-
+ * Copyright (c) 2015-2016
+ * Alexander V. Chernikov <melifaro@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NET_ROUTE_VAR_H_
+#define _NET_ROUTE_VAR_H_
+
+struct rib_head {
+ struct radix_head head;
+ rn_matchaddr_f_t *rnh_matchaddr; /* longest match for sockaddr */
+ rn_addaddr_f_t *rnh_addaddr; /* add based on sockaddr*/
+ rn_deladdr_f_t *rnh_deladdr; /* remove based on sockaddr */
+ rn_lookup_f_t *rnh_lookup; /* exact match for sockaddr */
+ rn_walktree_t *rnh_walktree; /* traverse tree */
+ rn_walktree_from_t *rnh_walktree_from; /* traverse tree below a */
+ rn_close_t *rnh_close; /*do something when the last ref drops*/
+ rt_gen_t rnh_gen; /* generation counter */
+ int rnh_multipath; /* multipath capable ? */
+ struct radix_node rnh_nodes[3]; /* empty tree for common case */
+ struct rwlock rib_lock; /* config/data path lock */
+ struct radix_mask_head rmhead; /* masks radix head */
+};
+
+#define RIB_RLOCK(rh) rw_rlock(&(rh)->rib_lock)
+#define RIB_RUNLOCK(rh) rw_runlock(&(rh)->rib_lock)
+#define RIB_WLOCK(rh) rw_wlock(&(rh)->rib_lock)
+#define RIB_WUNLOCK(rh) rw_wunlock(&(rh)->rib_lock)
+#define RIB_LOCK_ASSERT(rh) rw_assert(&(rh)->rib_lock, RA_LOCKED)
+#define RIB_WLOCK_ASSERT(rh) rw_assert(&(rh)->rib_lock, RA_WLOCKED)
+
+struct rib_head *rt_tables_get_rnh(int fib, int family);
+
+/* rte<>nhop translation */
+static inline uint16_t
+fib_rte_to_nh_flags(int rt_flags)
+{
+ uint16_t res;
+
+ res = (rt_flags & RTF_REJECT) ? NHF_REJECT : 0;
+ res |= (rt_flags & RTF_BLACKHOLE) ? NHF_BLACKHOLE : 0;
+ res |= (rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) ? NHF_REDIRECT : 0;
+ res |= (rt_flags & RTF_BROADCAST) ? NHF_BROADCAST : 0;
+ res |= (rt_flags & RTF_GATEWAY) ? NHF_GATEWAY : 0;
+
+ return (res);
+}
+
+
+#endif
diff --git a/freebsd/sys/net/rss_config.h b/freebsd/sys/net/rss_config.h
new file mode 100644
index 00000000..2ab32a43
--- /dev/null
+++ b/freebsd/sys/net/rss_config.h
@@ -0,0 +1,138 @@
+/*-
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * This software was developed by Robert N. M. Watson under contract
+ * to Juniper Networks, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NET_RSS_CONFIG_H_
+#define _NET_RSS_CONFIG_H_
+
+#include <netinet/in.h> /* in_addr_t */
+
+/*
+ * Supported RSS hash functions.
+ */
+#define RSS_HASH_NAIVE 0x00000001 /* Poor but fast hash. */
+#define RSS_HASH_TOEPLITZ 0x00000002 /* Required by RSS. */
+#define RSS_HASH_CRC32 0x00000004 /* Future; some NICs do it. */
+
+#define RSS_HASH_MASK (RSS_HASH_NAIVE | RSS_HASH_TOEPLITZ)
+
+/*
+ * Instances of struct inpcbinfo declare an RSS hash type indicating what
+ * header fields are covered.
+ */
+#define RSS_HASHFIELDS_NONE 0
+#define RSS_HASHFIELDS_4TUPLE 1
+#define RSS_HASHFIELDS_2TUPLE 2
+
+/*
+ * Define RSS representations of the M_HASHTYPE_* values, representing
+ * which particular bits are supported. The NICs can then use this to
+ * calculate which hash types to enable and which not to enable.
+ *
+ * The fact that these line up with M_HASHTYPE_* is not to be relied
+ * upon.
+ */
+#define RSS_HASHTYPE_RSS_IPV4 (1 << 1) /* IPv4 2-tuple */
+#define RSS_HASHTYPE_RSS_TCP_IPV4 (1 << 2) /* TCPv4 4-tuple */
+#define RSS_HASHTYPE_RSS_IPV6 (1 << 3) /* IPv6 2-tuple */
+#define RSS_HASHTYPE_RSS_TCP_IPV6 (1 << 4) /* TCPv6 4-tuple */
+#define RSS_HASHTYPE_RSS_IPV6_EX (1 << 5) /* IPv6 2-tuple + ext hdrs */
+#define RSS_HASHTYPE_RSS_TCP_IPV6_EX (1 << 6) /* TCPv6 4-tiple + ext hdrs */
+#define RSS_HASHTYPE_RSS_UDP_IPV4 (1 << 7) /* IPv4 UDP 4-tuple */
+#define RSS_HASHTYPE_RSS_UDP_IPV4_EX (1 << 8) /* IPv4 UDP 4-tuple + ext hdrs */
+#define RSS_HASHTYPE_RSS_UDP_IPV6 (1 << 9) /* IPv6 UDP 4-tuple */
+#define RSS_HASHTYPE_RSS_UDP_IPV6_EX (1 << 10) /* IPv6 UDP 4-tuple + ext hdrs */
+
+/*
+ * Compile-time limits on the size of the indirection table.
+ */
+#define RSS_MAXBITS 7
+#define RSS_TABLE_MAXLEN (1 << RSS_MAXBITS)
+
+/*
+ * Maximum key size used throughout. It's OK for hardware to use only the
+ * first 16 bytes, which is all that's required for IPv4.
+ */
+#define RSS_KEYSIZE 40
+
+/*
+ * For RSS hash methods that do a software hash on an mbuf, the packet
+ * direction (ingress / egress) is required.
+ *
+ * The default direction (INGRESS) is the "receive into the NIC" - ie,
+ * what the hardware is hashing on.
+ */
+#define RSS_HASH_PKT_INGRESS 0
+#define RSS_HASH_PKT_EGRESS 1
+
+/*
+ * Rate limited debugging routines.
+ */
+#define RSS_DEBUG(format, ...) do { \
+ if (rss_debug) { \
+ static struct timeval lastfail; \
+ static int curfail; \
+ if (ppsratecheck(&lastfail, &curfail, 5)) \
+ printf("RSS (%s:%u): " format, __func__, __LINE__,\
+ ##__VA_ARGS__); \
+ } \
+} while (0)
+
+extern int rss_debug;
+
+/*
+ * Device driver interfaces to query RSS properties that must be programmed
+ * into hardware.
+ */
+u_int rss_getbits(void);
+u_int rss_getbucket(u_int hash);
+u_int rss_get_indirection_to_bucket(u_int index);
+u_int rss_getcpu(u_int bucket);
+void rss_getkey(uint8_t *key);
+u_int rss_gethashalgo(void);
+u_int rss_getnumbuckets(void);
+u_int rss_getnumcpus(void);
+u_int rss_gethashconfig(void);
+
+/*
+ * Hash calculation functions.
+ */
+uint32_t rss_hash(u_int datalen, const uint8_t *data);
+
+/*
+ * Network stack interface to query desired CPU affinity of a packet.
+ */
+struct mbuf * rss_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid);
+u_int rss_hash2cpuid(uint32_t hash_val, uint32_t hash_type);
+int rss_hash2bucket(uint32_t hash_val, uint32_t hash_type,
+ uint32_t *bucket_id);
+int rss_m2bucket(struct mbuf *m, uint32_t *bucket_id);
+
+#endif /* !_NET_RSS_CONFIG_H_ */
diff --git a/freebsd/sys/net/rtsock.c b/freebsd/sys/net/rtsock.c
index e768e17b..1e69bcdf 100644
--- a/freebsd/sys/net/rtsock.c
+++ b/freebsd/sys/net/rtsock.c
@@ -54,17 +54,21 @@
#include <sys/systm.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/if_llatbl.h>
#include <net/if_types.h>
#include <net/netisr.h>
#include <net/raw_cb.h>
#include <net/route.h>
+#include <net/route_var.h>
#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/if_ether.h>
+#include <netinet/ip_carp.h>
#ifdef INET6
+#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
#endif
@@ -72,34 +76,6 @@
#include <sys/mount.h>
#include <compat/freebsd32/freebsd32.h>
-struct if_data32 {
- uint8_t ifi_type;
- uint8_t ifi_physical;
- uint8_t ifi_addrlen;
- uint8_t ifi_hdrlen;
- uint8_t ifi_link_state;
- uint8_t ifi_spare_char1;
- uint8_t ifi_spare_char2;
- uint8_t ifi_datalen;
- uint32_t ifi_mtu;
- uint32_t ifi_metric;
- uint32_t ifi_baudrate;
- uint32_t ifi_ipackets;
- uint32_t ifi_ierrors;
- uint32_t ifi_opackets;
- uint32_t ifi_oerrors;
- uint32_t ifi_collisions;
- uint32_t ifi_ibytes;
- uint32_t ifi_obytes;
- uint32_t ifi_imcasts;
- uint32_t ifi_omcasts;
- uint32_t ifi_iqdrops;
- uint32_t ifi_noproto;
- uint32_t ifi_hwassist;
- int32_t ifi_epoch;
- struct timeval32 ifi_lastchange;
-};
-
struct if_msghdr32 {
uint16_t ifm_msglen;
uint8_t ifm_version;
@@ -107,7 +83,7 @@ struct if_msghdr32 {
int32_t ifm_addrs;
int32_t ifm_flags;
uint16_t ifm_index;
- struct if_data32 ifm_data;
+ struct if_data ifm_data;
};
struct if_msghdrl32 {
@@ -120,7 +96,7 @@ struct if_msghdrl32 {
uint16_t _ifm_spare1;
uint16_t ifm_len;
uint16_t ifm_data_off;
- struct if_data32 ifm_data;
+ struct if_data ifm_data;
};
struct ifa_msghdrl32 {
@@ -134,7 +110,7 @@ struct ifa_msghdrl32 {
uint16_t ifam_len;
uint16_t ifam_data_off;
int32_t ifam_metric;
- struct if_data32 ifam_data;
+ struct if_data ifam_data;
};
#endif /* COMPAT_FREEBSD32 */
@@ -144,18 +120,22 @@ MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
static struct sockaddr route_src = { 2, PF_ROUTE, };
static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, };
+/* These are external hooks for CARP. */
+int (*carp_get_vhid_p)(struct ifaddr *);
+
/*
* Used by rtsock/raw_input callback code to decide whether to filter the update
* notification to a socket bound to a particular FIB.
*/
#define RTS_FILTER_FIB M_PROTO8
-static struct {
+typedef struct {
int ip_count; /* attached w/ AF_INET */
int ip6_count; /* attached w/ AF_INET6 */
- int ipx_count; /* attached w/ AF_IPX */
int any_count; /* total attached */
-} route_cb;
+} route_cb_t;
+static VNET_DEFINE(route_cb_t, route_cb);
+#define V_route_cb VNET(route_cb)
struct mtx rtsock_mtx;
MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF);
@@ -174,20 +154,19 @@ struct walkarg {
};
static void rts_input(struct mbuf *m);
-static struct mbuf *rt_msg1(int type, struct rt_addrinfo *rtinfo);
-static int rt_msg2(int type, struct rt_addrinfo *rtinfo,
- caddr_t cp, struct walkarg *w);
+static struct mbuf *rtsock_msg_mbuf(int type, struct rt_addrinfo *rtinfo);
+static int rtsock_msg_buffer(int type, struct rt_addrinfo *rtinfo,
+ struct walkarg *w, int *plen);
static int rt_xaddrs(caddr_t cp, caddr_t cplim,
struct rt_addrinfo *rtinfo);
static int sysctl_dumpentry(struct radix_node *rn, void *vw);
static int sysctl_iflist(int af, struct walkarg *w);
static int sysctl_ifmalist(int af, struct walkarg *w);
-static int route_output(struct mbuf *m, struct socket *so);
-static void rt_setmetrics(u_long which, const struct rt_metrics *in,
- struct rt_metrics_lite *out);
-static void rt_getmetrics(const struct rt_metrics_lite *in,
- struct rt_metrics *out);
+static int route_output(struct mbuf *m, struct socket *so, ...);
+static void rt_getmetrics(const struct rtentry *rt, struct rt_metrics *out);
static void rt_dispatch(struct mbuf *, sa_family_t);
+static struct sockaddr *rtsock_fix_netmask(struct sockaddr *dst,
+ struct sockaddr *smask, struct sockaddr_storage *dmask);
static struct netisr_handler rtsock_nh = {
.nh_name = "rtsock",
@@ -214,17 +193,35 @@ SYSCTL_PROC(_net_route, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW,
"maximum routing socket dispatch queue length");
static void
-rts_init(void)
+vnet_rts_init(void)
{
int tmp;
+ if (IS_DEFAULT_VNET(curvnet)) {
#ifndef __rtems__
- if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
- rtsock_nh.nh_qlimit = tmp;
+ if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
+ rtsock_nh.nh_qlimit = tmp;
+#endif /* __rtems__ */
+ netisr_register(&rtsock_nh);
+ }
+#ifdef VIMAGE
+ else
+ netisr_register_vnet(&rtsock_nh);
#endif
- netisr_register(&rtsock_nh);
}
-SYSINIT(rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rts_init, 0);
+VNET_SYSINIT(vnet_rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
+ vnet_rts_init, 0);
+
+#ifdef VIMAGE
+static void
+vnet_rts_uninit(void)
+{
+
+ netisr_unregister_vnet(&rtsock_nh);
+}
+VNET_SYSUNINIT(vnet_rts_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
+ vnet_rts_uninit, 0);
+#endif
static int
raw_input_rts_cb(struct mbuf *m, struct sockproto *proto, struct sockaddr *src,
@@ -294,23 +291,13 @@ static int
rts_attach(struct socket *so, int proto, struct thread *td)
{
struct rawcb *rp;
- int s, error;
+ int error;
KASSERT(so->so_pcb == NULL, ("rts_attach: so_pcb != NULL"));
/* XXX */
rp = malloc(sizeof *rp, M_PCB, M_WAITOK | M_ZERO);
- if (rp == NULL)
- return ENOBUFS;
- /*
- * The splnet() is necessary to block protocols from sending
- * error notifications (like RTM_REDIRECT or RTM_LOSING) while
- * this PCB is extant but incompletely initialized.
- * Probably we should try to do more of this work beforehand and
- * eliminate the spl.
- */
- s = splnet();
so->so_pcb = (caddr_t)rp;
#ifndef __rtems__
so->so_fibnum = td->td_proc->p_fibnum;
@@ -320,7 +307,6 @@ rts_attach(struct socket *so, int proto, struct thread *td)
error = raw_attach(so, proto);
rp = sotorawcb(so);
if (error) {
- splx(s);
so->so_pcb = NULL;
free(rp, M_PCB);
return error;
@@ -328,20 +314,16 @@ rts_attach(struct socket *so, int proto, struct thread *td)
RTSOCK_LOCK();
switch(rp->rcb_proto.sp_protocol) {
case AF_INET:
- route_cb.ip_count++;
+ V_route_cb.ip_count++;
break;
case AF_INET6:
- route_cb.ip6_count++;
- break;
- case AF_IPX:
- route_cb.ipx_count++;
+ V_route_cb.ip6_count++;
break;
}
- route_cb.any_count++;
+ V_route_cb.any_count++;
RTSOCK_UNLOCK();
soisconnected(so);
so->so_options |= SO_USELOOPBACK;
- splx(s);
return 0;
}
@@ -372,16 +354,13 @@ rts_detach(struct socket *so)
RTSOCK_LOCK();
switch(rp->rcb_proto.sp_protocol) {
case AF_INET:
- route_cb.ip_count--;
+ V_route_cb.ip_count--;
break;
case AF_INET6:
- route_cb.ip6_count--;
- break;
- case AF_IPX:
- route_cb.ipx_count--;
+ V_route_cb.ip6_count--;
break;
}
- route_cb.any_count--;
+ V_route_cb.any_count--;
RTSOCK_UNLOCK();
raw_usrreqs.pru_detach(so);
}
@@ -562,17 +541,25 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
/*ARGSUSED*/
static int
-route_output(struct mbuf *m, struct socket *so)
+route_output(struct mbuf *m, struct socket *so, ...)
{
-#define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
struct rt_msghdr *rtm = NULL;
struct rtentry *rt = NULL;
- struct radix_node_head *rnh;
+ struct rib_head *rnh;
struct rt_addrinfo info;
- int len, error = 0;
+ struct sockaddr_storage ss;
+#ifdef INET6
+ struct sockaddr_in6 *sin6;
+ int i, rti_need_deembed = 0;
+#endif
+ int alloc_len = 0, len, error = 0, fibnum;
struct ifnet *ifp = NULL;
union sockaddr_union saun;
sa_family_t saf = AF_UNSPEC;
+ struct rawcb *rp = NULL;
+ struct walkarg w;
+
+ fibnum = so->so_fibnum;
#define senderr(e) { error = e; goto flush;}
if (m == NULL || ((m->m_len < sizeof(long)) &&
@@ -582,31 +569,53 @@ route_output(struct mbuf *m, struct socket *so)
panic("route_output");
len = m->m_pkthdr.len;
if (len < sizeof(*rtm) ||
- len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
- info.rti_info[RTAX_DST] = NULL;
+ len != mtod(m, struct rt_msghdr *)->rtm_msglen)
senderr(EINVAL);
- }
- R_Malloc(rtm, struct rt_msghdr *, len);
- if (rtm == NULL) {
- info.rti_info[RTAX_DST] = NULL;
+
+ /*
+ * Most of current messages are in range 200-240 bytes,
+ * minimize possible re-allocation on reply using larger size
+ * buffer aligned on 1k boundaty.
+ */
+ alloc_len = roundup2(len, 1024);
+ if ((rtm = malloc(alloc_len, M_TEMP, M_NOWAIT)) == NULL)
senderr(ENOBUFS);
- }
+
m_copydata(m, 0, len, (caddr_t)rtm);
+ bzero(&info, sizeof(info));
+ bzero(&w, sizeof(w));
+
if (rtm->rtm_version != RTM_VERSION) {
- info.rti_info[RTAX_DST] = NULL;
+ /* Do not touch message since format is unknown */
+ free(rtm, M_TEMP);
+ rtm = NULL;
senderr(EPROTONOSUPPORT);
}
+
+ /*
+ * Starting from here, it is possible
+ * to alter original message and insert
+ * caller PID and error value.
+ */
+
#ifndef __rtems__
rtm->rtm_pid = curproc->p_pid;
#else /* __rtems__ */
rtm->rtm_pid = BSD_DEFAULT_PID;
#endif /* __rtems__ */
- bzero(&info, sizeof(info));
info.rti_addrs = rtm->rtm_addrs;
- if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) {
- info.rti_info[RTAX_DST] = NULL;
+
+ info.rti_mflags = rtm->rtm_inits;
+ info.rti_rmx = &rtm->rtm_rmx;
+
+ /*
+ * rt_xaddrs() performs s6_addr[2] := sin6_scope_id for AF_INET6
+ * link-local address because rtrequest requires addresses with
+ * embedded scope id.
+ */
+ if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info))
senderr(EINVAL);
- }
+
info.rti_flags = rtm->rtm_flags;
if (info.rti_info[RTAX_DST] == NULL ||
info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
@@ -634,11 +643,16 @@ route_output(struct mbuf *m, struct socket *so)
*/
if (info.rti_info[RTAX_GATEWAY] != NULL &&
info.rti_info[RTAX_GATEWAY]->sa_family != AF_LINK) {
- struct route gw_ro;
+ struct rt_addrinfo ginfo;
+ struct sockaddr *gdst;
+
+ bzero(&ginfo, sizeof(ginfo));
+ bzero(&ss, sizeof(ss));
+ ss.ss_len = sizeof(ss);
+
+ ginfo.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&ss;
+ gdst = info.rti_info[RTAX_GATEWAY];
- bzero(&gw_ro, sizeof(gw_ro));
- gw_ro.ro_dst = *info.rti_info[RTAX_GATEWAY];
- rtalloc_ign_fib(&gw_ro, 0, so->so_fibnum);
/*
* A host route through the loopback interface is
* installed for each interface adddress. In pre 8.0
@@ -649,18 +663,21 @@ route_output(struct mbuf *m, struct socket *so)
* AF_LINK sa_family type of the rt_gateway, and the
* rt_ifp has the IFF_LOOPBACK flag set.
*/
- if (gw_ro.ro_rt != NULL &&
- gw_ro.ro_rt->rt_gateway->sa_family == AF_LINK &&
- gw_ro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)
- info.rti_flags &= ~RTF_GATEWAY;
- if (gw_ro.ro_rt != NULL)
- RTFREE(gw_ro.ro_rt);
+ if (rib_lookup_info(fibnum, gdst, NHR_REF, 0, &ginfo) == 0) {
+ if (ss.ss_family == AF_LINK &&
+ ginfo.rti_ifp->if_flags & IFF_LOOPBACK) {
+ info.rti_flags &= ~RTF_GATEWAY;
+ info.rti_flags |= RTF_GWFLAG_COMPAT;
+ }
+ rib_free_info(&ginfo);
+ }
}
switch (rtm->rtm_type) {
struct rtentry *saved_nrt;
case RTM_ADD:
+ case RTM_CHANGE:
if (info.rti_info[RTAX_GATEWAY] == NULL)
senderr(EINVAL);
saved_nrt = NULL;
@@ -669,14 +686,19 @@ route_output(struct mbuf *m, struct socket *so)
if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK &&
(rtm->rtm_flags & RTF_LLDATA) != 0) {
error = lla_rt_output(rtm, &info);
+#ifdef INET6
+ if (error == 0)
+ rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
+#endif
break;
}
- error = rtrequest1_fib(RTM_ADD, &info, &saved_nrt,
- so->so_fibnum);
- if (error == 0 && saved_nrt) {
+ error = rtrequest1_fib(rtm->rtm_type, &info, &saved_nrt,
+ fibnum);
+ if (error == 0 && saved_nrt != NULL) {
+#ifdef INET6
+ rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
+#endif
RT_LOCK(saved_nrt);
- rt_setmetrics(rtm->rtm_inits,
- &rtm->rtm_rmx, &saved_nrt->rt_rmx);
rtm->rtm_index = saved_nrt->rt_ifp->if_index;
RT_REMREF(saved_nrt);
RT_UNLOCK(saved_nrt);
@@ -690,26 +712,30 @@ route_output(struct mbuf *m, struct socket *so)
(info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK) &&
(rtm->rtm_flags & RTF_LLDATA) != 0) {
error = lla_rt_output(rtm, &info);
+#ifdef INET6
+ if (error == 0)
+ rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
+#endif
break;
}
- error = rtrequest1_fib(RTM_DELETE, &info, &saved_nrt,
- so->so_fibnum);
+ error = rtrequest1_fib(RTM_DELETE, &info, &saved_nrt, fibnum);
if (error == 0) {
RT_LOCK(saved_nrt);
rt = saved_nrt;
goto report;
}
+#ifdef INET6
+ /* rt_msg2() will not be used when RTM_DELETE fails. */
+ rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
+#endif
break;
case RTM_GET:
- case RTM_CHANGE:
- case RTM_LOCK:
- rnh = rt_tables_get_rnh(so->so_fibnum,
- info.rti_info[RTAX_DST]->sa_family);
+ rnh = rt_tables_get_rnh(fibnum, saf);
if (rnh == NULL)
senderr(EAFNOSUPPORT);
- RADIX_NODE_HEAD_RLOCK(rnh);
+ RIB_RLOCK(rnh);
if (info.rti_info[RTAX_NETMASK] == NULL &&
rtm->rtm_type == RTM_GET) {
@@ -719,14 +745,14 @@ route_output(struct mbuf *m, struct socket *so)
* 'route -n get addr'
*/
rt = (struct rtentry *) rnh->rnh_matchaddr(
- info.rti_info[RTAX_DST], rnh);
+ info.rti_info[RTAX_DST], &rnh->head);
} else
rt = (struct rtentry *) rnh->rnh_lookup(
info.rti_info[RTAX_DST],
- info.rti_info[RTAX_NETMASK], rnh);
+ info.rti_info[RTAX_NETMASK], &rnh->head);
if (rt == NULL) {
- RADIX_NODE_HEAD_RUNLOCK(rnh);
+ RIB_RUNLOCK(rnh);
senderr(ESRCH);
}
#ifdef RADIX_MPATH
@@ -738,11 +764,11 @@ route_output(struct mbuf *m, struct socket *so)
* if gate == NULL the first match is returned.
* (no need to call rt_mpath_matchgate if gate == NULL)
*/
- if (rn_mpath_capable(rnh) &&
+ if (rt_mpath_capable(rnh) &&
(rtm->rtm_type != RTM_GET || info.rti_info[RTAX_GATEWAY])) {
rt = rt_mpath_matchgate(rt, info.rti_info[RTAX_GATEWAY]);
if (!rt) {
- RADIX_NODE_HEAD_RUNLOCK(rnh);
+ RIB_RUNLOCK(rnh);
senderr(ESRCH);
}
}
@@ -760,7 +786,8 @@ route_output(struct mbuf *m, struct socket *so)
rt->rt_ifp->if_type == IFT_PROPVIRTUAL) {
struct ifaddr *ifa;
- ifa = ifa_ifwithnet(info.rti_info[RTAX_DST], 1);
+ ifa = ifa_ifwithnet(info.rti_info[RTAX_DST], 1,
+ RT_ALL_FIBS);
if (ifa != NULL)
rt_maskedcopy(ifa->ifa_addr,
&laddr,
@@ -772,139 +799,81 @@ route_output(struct mbuf *m, struct socket *so)
/*
* refactor rt and no lock operation necessary
*/
- rt = (struct rtentry *)rnh->rnh_matchaddr(&laddr, rnh);
+ rt = (struct rtentry *)rnh->rnh_matchaddr(&laddr,
+ &rnh->head);
if (rt == NULL) {
- RADIX_NODE_HEAD_RUNLOCK(rnh);
+ RIB_RUNLOCK(rnh);
senderr(ESRCH);
}
}
RT_LOCK(rt);
RT_ADDREF(rt);
- RADIX_NODE_HEAD_RUNLOCK(rnh);
-
- switch(rtm->rtm_type) {
-
- case RTM_GET:
- report:
- RT_LOCK_ASSERT(rt);
- if ((rt->rt_flags & RTF_HOST) == 0
- ? jailed_without_vnet(curthread->td_ucred)
- : prison_if(curthread->td_ucred,
- rt_key(rt)) != 0) {
- RT_UNLOCK(rt);
- senderr(ESRCH);
- }
- info.rti_info[RTAX_DST] = rt_key(rt);
- info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
- info.rti_info[RTAX_NETMASK] = rt_mask(rt);
- info.rti_info[RTAX_GENMASK] = 0;
- if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
- ifp = rt->rt_ifp;
- if (ifp) {
- info.rti_info[RTAX_IFP] =
- ifp->if_addr->ifa_addr;
- error = rtm_get_jailed(&info, ifp, rt,
- &saun, curthread->td_ucred);
- if (error != 0) {
- RT_UNLOCK(rt);
- senderr(error);
- }
- if (ifp->if_flags & IFF_POINTOPOINT)
- info.rti_info[RTAX_BRD] =
- rt->rt_ifa->ifa_dstaddr;
- rtm->rtm_index = ifp->if_index;
- } else {
- info.rti_info[RTAX_IFP] = NULL;
- info.rti_info[RTAX_IFA] = NULL;
- }
- } else if ((ifp = rt->rt_ifp) != NULL) {
- rtm->rtm_index = ifp->if_index;
- }
- len = rt_msg2(rtm->rtm_type, &info, NULL, NULL);
- if (len > rtm->rtm_msglen) {
- struct rt_msghdr *new_rtm;
- R_Malloc(new_rtm, struct rt_msghdr *, len);
- if (new_rtm == NULL) {
- RT_UNLOCK(rt);
- senderr(ENOBUFS);
- }
- bcopy(rtm, new_rtm, rtm->rtm_msglen);
- Free(rtm); rtm = new_rtm;
- }
- (void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, NULL);
- rtm->rtm_flags = rt->rt_flags;
- rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
- rtm->rtm_addrs = info.rti_addrs;
- break;
-
- case RTM_CHANGE:
- /*
- * New gateway could require new ifaddr, ifp;
- * flags may also be different; ifp may be specified
- * by ll sockaddr when protocol address is ambiguous
- */
- if (((rt->rt_flags & RTF_GATEWAY) &&
- info.rti_info[RTAX_GATEWAY] != NULL) ||
- info.rti_info[RTAX_IFP] != NULL ||
- (info.rti_info[RTAX_IFA] != NULL &&
- !sa_equal(info.rti_info[RTAX_IFA],
- rt->rt_ifa->ifa_addr))) {
- RT_UNLOCK(rt);
- RADIX_NODE_HEAD_LOCK(rnh);
- error = rt_getifa_fib(&info, rt->rt_fibnum);
- /*
- * XXXRW: Really we should release this
- * reference later, but this maintains
- * historical behavior.
- */
- if (info.rti_ifa != NULL)
- ifa_free(info.rti_ifa);
- RADIX_NODE_HEAD_UNLOCK(rnh);
- if (error != 0)
- senderr(error);
- RT_LOCK(rt);
- }
- if (info.rti_ifa != NULL &&
- info.rti_ifa != rt->rt_ifa &&
- rt->rt_ifa != NULL &&
- rt->rt_ifa->ifa_rtrequest != NULL) {
- rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt,
- &info);
- ifa_free(rt->rt_ifa);
- }
- if (info.rti_info[RTAX_GATEWAY] != NULL) {
- RT_UNLOCK(rt);
- RADIX_NODE_HEAD_LOCK(rnh);
- RT_LOCK(rt);
-
- error = rt_setgate(rt, rt_key(rt),
- info.rti_info[RTAX_GATEWAY]);
- RADIX_NODE_HEAD_UNLOCK(rnh);
+ RIB_RUNLOCK(rnh);
+
+report:
+ RT_LOCK_ASSERT(rt);
+ if ((rt->rt_flags & RTF_HOST) == 0
+ ? jailed_without_vnet(curthread->td_ucred)
+ : prison_if(curthread->td_ucred,
+ rt_key(rt)) != 0) {
+ RT_UNLOCK(rt);
+ senderr(ESRCH);
+ }
+ info.rti_info[RTAX_DST] = rt_key(rt);
+ info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+ info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(rt_key(rt),
+ rt_mask(rt), &ss);
+ info.rti_info[RTAX_GENMASK] = 0;
+ if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
+ ifp = rt->rt_ifp;
+ if (ifp) {
+ info.rti_info[RTAX_IFP] =
+ ifp->if_addr->ifa_addr;
+ error = rtm_get_jailed(&info, ifp, rt,
+ &saun, curthread->td_ucred);
if (error != 0) {
RT_UNLOCK(rt);
senderr(error);
}
- rt->rt_flags |= (RTF_GATEWAY & info.rti_flags);
+ if (ifp->if_flags & IFF_POINTOPOINT)
+ info.rti_info[RTAX_BRD] =
+ rt->rt_ifa->ifa_dstaddr;
+ rtm->rtm_index = ifp->if_index;
+ } else {
+ info.rti_info[RTAX_IFP] = NULL;
+ info.rti_info[RTAX_IFA] = NULL;
}
- if (info.rti_ifa != NULL &&
- info.rti_ifa != rt->rt_ifa) {
- ifa_ref(info.rti_ifa);
- rt->rt_ifa = info.rti_ifa;
- rt->rt_ifp = info.rti_ifp;
+ } else if ((ifp = rt->rt_ifp) != NULL) {
+ rtm->rtm_index = ifp->if_index;
+ }
+
+ /* Check if we need to realloc storage */
+ rtsock_msg_buffer(rtm->rtm_type, &info, NULL, &len);
+ if (len > alloc_len) {
+ struct rt_msghdr *new_rtm;
+ new_rtm = malloc(len, M_TEMP, M_NOWAIT);
+ if (new_rtm == NULL) {
+ RT_UNLOCK(rt);
+ senderr(ENOBUFS);
}
- /* Allow some flags to be toggled on change. */
- rt->rt_flags = (rt->rt_flags & ~RTF_FMASK) |
- (rtm->rtm_flags & RTF_FMASK);
- rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
- &rt->rt_rmx);
- rtm->rtm_index = rt->rt_ifp->if_index;
- if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
- rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info);
- /* FALLTHROUGH */
- case RTM_LOCK:
- /* We don't support locks anymore */
- break;
+ bcopy(rtm, new_rtm, rtm->rtm_msglen);
+ free(rtm, M_TEMP);
+ rtm = new_rtm;
+ alloc_len = len;
}
+
+ w.w_tmem = (caddr_t)rtm;
+ w.w_tmemsize = alloc_len;
+ rtsock_msg_buffer(rtm->rtm_type, &info, &w, &len);
+
+ if (rt->rt_flags & RTF_GWFLAG_COMPAT)
+ rtm->rtm_flags = RTF_GATEWAY |
+ (rt->rt_flags & ~RTF_GWFLAG_COMPAT);
+ else
+ rtm->rtm_flags = rt->rt_flags;
+ rt_getmetrics(rt, &rtm->rtm_rmx);
+ rtm->rtm_addrs = info.rti_addrs;
+
RT_UNLOCK(rt);
break;
@@ -913,39 +882,55 @@ route_output(struct mbuf *m, struct socket *so)
}
flush:
- if (rtm) {
- if (error)
- rtm->rtm_errno = error;
- else
- rtm->rtm_flags |= RTF_DONE;
- }
- if (rt) /* XXX can this be true? */
+ if (rt != NULL)
RTFREE(rt);
- {
- struct rawcb *rp = NULL;
/*
* Check to see if we don't want our own messages.
*/
if ((so->so_options & SO_USELOOPBACK) == 0) {
- if (route_cb.any_count <= 1) {
- if (rtm)
- Free(rtm);
+ if (V_route_cb.any_count <= 1) {
+ if (rtm != NULL)
+ free(rtm, M_TEMP);
m_freem(m);
return (error);
}
/* There is another listener, so construct message */
rp = sotorawcb(so);
}
- if (rtm) {
+
+ if (rtm != NULL) {
+#ifdef INET6
+ if (rti_need_deembed) {
+ /* sin6_scope_id is recovered before sending rtm. */
+ sin6 = (struct sockaddr_in6 *)&ss;
+ for (i = 0; i < RTAX_MAX; i++) {
+ if (info.rti_info[i] == NULL)
+ continue;
+ if (info.rti_info[i]->sa_family != AF_INET6)
+ continue;
+ bcopy(info.rti_info[i], sin6, sizeof(*sin6));
+ if (sa6_recoverscope(sin6) == 0)
+ bcopy(sin6, info.rti_info[i],
+ sizeof(*sin6));
+ }
+ }
+#endif
+ if (error != 0)
+ rtm->rtm_errno = error;
+ else
+ rtm->rtm_flags |= RTF_DONE;
+
m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
if (m->m_pkthdr.len < rtm->rtm_msglen) {
m_freem(m);
m = NULL;
} else if (m->m_pkthdr.len > rtm->rtm_msglen)
m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
+
+ free(rtm, M_TEMP);
}
- if (m) {
- M_SETFIB(m, so->so_fibnum);
+ if (m != NULL) {
+ M_SETFIB(m, fibnum);
m->m_flags |= RTS_FILTER_FIB;
if (rp) {
/*
@@ -959,43 +944,21 @@ flush:
} else
rt_dispatch(m, saf);
}
- /* info.rti_info[RTAX_DST] (used above) can point inside of rtm */
- if (rtm)
- Free(rtm);
- }
+
return (error);
-#undef sa_equal
}
static void
-rt_setmetrics(u_long which, const struct rt_metrics *in,
- struct rt_metrics_lite *out)
+rt_getmetrics(const struct rtentry *rt, struct rt_metrics *out)
{
-#define metric(f, e) if (which & (f)) out->e = in->e;
- /*
- * Only these are stored in the routing entry since introduction
- * of tcp hostcache. The rest is ignored.
- */
- metric(RTV_MTU, rmx_mtu);
- metric(RTV_WEIGHT, rmx_weight);
- /* Userland -> kernel timebase conversion. */
- if (which & RTV_EXPIRE)
- out->rmx_expire = in->rmx_expire ?
- in->rmx_expire - time_second + time_uptime : 0;
-#undef metric
-}
-static void
-rt_getmetrics(const struct rt_metrics_lite *in, struct rt_metrics *out)
-{
-#define metric(e) out->e = in->e;
bzero(out, sizeof(*out));
- metric(rmx_mtu);
- metric(rmx_weight);
+ out->rmx_mtu = rt->rt_mtu;
+ out->rmx_weight = rt->rt_weight;
+ out->rmx_pksent = counter_u64_fetch(rt->rt_pksent);
/* Kernel -> userland timebase conversion. */
- out->rmx_expire = in->rmx_expire ?
- in->rmx_expire - time_uptime + time_second : 0;
-#undef metric
+ out->rmx_expire = rt->rt_expire ?
+ rt->rt_expire - time_uptime + time_second : 0;
}
/*
@@ -1030,6 +993,11 @@ rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
return (0); /* should be EINVAL but for compat */
}
/* accept it */
+#ifdef INET6
+ if (sa->sa_family == AF_INET6)
+ sa6_embedscope((struct sockaddr_in6 *)sa,
+ V_ip6_use_defzone);
+#endif
rtinfo->rti_info[i] = sa;
cp += SA_SIZE(sa);
}
@@ -1037,15 +1005,42 @@ rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
}
/*
- * Used by the routing socket.
+ * Fill in @dmask with valid netmask leaving original @smask
+ * intact. Mostly used with radix netmasks.
+ */
+static struct sockaddr *
+rtsock_fix_netmask(struct sockaddr *dst, struct sockaddr *smask,
+ struct sockaddr_storage *dmask)
+{
+ if (dst == NULL || smask == NULL)
+ return (NULL);
+
+ memset(dmask, 0, dst->sa_len);
+ memcpy(dmask, smask, smask->sa_len);
+ dmask->ss_len = dst->sa_len;
+ dmask->ss_family = dst->sa_family;
+
+ return ((struct sockaddr *)dmask);
+}
+
+/*
+ * Writes information related to @rtinfo object to newly-allocated mbuf.
+ * Assumes MCLBYTES is enough to construct any message.
+ * Used for OS notifications of vaious events (if/ifa announces,etc)
+ *
+ * Returns allocated mbuf or NULL on failure.
*/
static struct mbuf *
-rt_msg1(int type, struct rt_addrinfo *rtinfo)
+rtsock_msg_mbuf(int type, struct rt_addrinfo *rtinfo)
{
struct rt_msghdr *rtm;
struct mbuf *m;
int i;
struct sockaddr *sa;
+#ifdef INET6
+ struct sockaddr_storage ss;
+ struct sockaddr_in6 *sin6;
+#endif
int len, dlen;
switch (type) {
@@ -1072,20 +1067,17 @@ rt_msg1(int type, struct rt_addrinfo *rtinfo)
default:
len = sizeof(struct rt_msghdr);
}
- if (len > MCLBYTES)
- panic("rt_msg1");
- m = m_gethdr(M_DONTWAIT, MT_DATA);
- if (m && len > MHLEN) {
- MCLGET(m, M_DONTWAIT);
- if ((m->m_flags & M_EXT) == 0) {
- m_free(m);
- m = NULL;
- }
- }
+
+ /* XXXGL: can we use MJUMPAGESIZE cluster here? */
+ KASSERT(len <= MCLBYTES, ("%s: message too big", __func__));
+ if (len > MHLEN)
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+ else
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return (m);
+
m->m_pkthdr.len = m->m_len = len;
- m->m_pkthdr.rcvif = NULL;
rtm = mtod(m, struct rt_msghdr *);
bzero((caddr_t)rtm, len);
for (i = 0; i < RTAX_MAX; i++) {
@@ -1093,6 +1085,14 @@ rt_msg1(int type, struct rt_addrinfo *rtinfo)
continue;
rtinfo->rti_addrs |= (1 << i);
dlen = SA_SIZE(sa);
+#ifdef INET6
+ if (V_deembed_scopeid && sa->sa_family == AF_INET6) {
+ sin6 = (struct sockaddr_in6 *)&ss;
+ bcopy(sa, sin6, sizeof(*sin6));
+ if (sa6_recoverscope(sin6) == 0)
+ sa = (struct sockaddr *)sin6;
+ }
+#endif
m_copyback(m, len, dlen, (caddr_t)sa);
len += dlen;
}
@@ -1107,17 +1107,26 @@ rt_msg1(int type, struct rt_addrinfo *rtinfo)
}
/*
- * Used by the sysctl code and routing socket.
+ * Writes information related to @rtinfo object to preallocated buffer.
+ * Stores needed size in @plen. If @w is NULL, calculates size without
+ * writing.
+ * Used for sysctl dumps and rtsock answers (RTM_DEL/RTM_GET) generation.
+ *
+ * Returns 0 on success.
+ *
*/
static int
-rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w)
+rtsock_msg_buffer(int type, struct rt_addrinfo *rtinfo, struct walkarg *w, int *plen)
{
int i;
- int len, dlen, second_time = 0;
- caddr_t cp0;
+ int len, buflen = 0, dlen;
+ caddr_t cp = NULL;
+ struct rt_msghdr *rtm = NULL;
+#ifdef INET6
+ struct sockaddr_storage ss;
+ struct sockaddr_in6 *sin6;
+#endif
- rtinfo->rti_addrs = 0;
-again:
switch (type) {
case RTM_DELADDR:
@@ -1156,9 +1165,14 @@ again:
default:
len = sizeof(struct rt_msghdr);
}
- cp0 = cp;
- if (cp0)
- cp += len;
+
+ if (w != NULL) {
+ rtm = (struct rt_msghdr *)w->w_tmem;
+ buflen = w->w_tmemsize - len;
+ cp = (caddr_t)w->w_tmem + len;
+ }
+
+ rtinfo->rti_addrs = 0;
for (i = 0; i < RTAX_MAX; i++) {
struct sockaddr *sa;
@@ -1166,45 +1180,56 @@ again:
continue;
rtinfo->rti_addrs |= (1 << i);
dlen = SA_SIZE(sa);
- if (cp) {
+ if (cp != NULL && buflen >= dlen) {
+#ifdef INET6
+ if (V_deembed_scopeid && sa->sa_family == AF_INET6) {
+ sin6 = (struct sockaddr_in6 *)&ss;
+ bcopy(sa, sin6, sizeof(*sin6));
+ if (sa6_recoverscope(sin6) == 0)
+ sa = (struct sockaddr *)sin6;
+ }
+#endif
bcopy((caddr_t)sa, cp, (unsigned)dlen);
cp += dlen;
+ buflen -= dlen;
+ } else if (cp != NULL) {
+ /*
+ * Buffer too small. Count needed size
+ * and return with error.
+ */
+ cp = NULL;
}
+
len += dlen;
}
- len = ALIGN(len);
- if (cp == NULL && w != NULL && !second_time) {
- struct walkarg *rw = w;
-
- if (rw->w_req) {
- if (rw->w_tmemsize < len) {
- if (rw->w_tmem)
- free(rw->w_tmem, M_RTABLE);
- rw->w_tmem = (caddr_t)
- malloc(len, M_RTABLE, M_NOWAIT);
- if (rw->w_tmem)
- rw->w_tmemsize = len;
- }
- if (rw->w_tmem) {
- cp = rw->w_tmem;
- second_time = 1;
- goto again;
- }
- }
+
+ if (cp != NULL) {
+ dlen = ALIGN(len) - len;
+ if (buflen < dlen)
+ cp = NULL;
+ else
+ buflen -= dlen;
}
- if (cp) {
- struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
+ len = ALIGN(len);
+ if (cp != NULL) {
+ /* fill header iff buffer is large enough */
rtm->rtm_version = RTM_VERSION;
rtm->rtm_type = type;
rtm->rtm_msglen = len;
}
- return (len);
+
+ *plen = len;
+
+ if (w != NULL && cp == NULL)
+ return (ENOBUFS);
+
+ return (0);
}
/*
* This routine is called to generate a message from the routing
- * socket indicating that a redirect has occured, a routing lookup
+ * socket indicating that a redirect has occurred, a routing lookup
* has failed, or that a protocol has detected timeouts to a particular
* destination.
*/
@@ -1216,9 +1241,9 @@ rt_missmsg_fib(int type, struct rt_addrinfo *rtinfo, int flags, int error,
struct mbuf *m;
struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
- if (route_cb.any_count == 0)
+ if (V_route_cb.any_count == 0)
return;
- m = rt_msg1(type, rtinfo);
+ m = rtsock_msg_mbuf(type, rtinfo);
if (m == NULL)
return;
@@ -1254,16 +1279,16 @@ rt_ifmsg(struct ifnet *ifp)
struct mbuf *m;
struct rt_addrinfo info;
- if (route_cb.any_count == 0)
+ if (V_route_cb.any_count == 0)
return;
bzero((caddr_t)&info, sizeof(info));
- m = rt_msg1(RTM_IFINFO, &info);
+ m = rtsock_msg_mbuf(RTM_IFINFO, &info);
if (m == NULL)
return;
ifm = mtod(m, struct if_msghdr *);
ifm->ifm_index = ifp->if_index;
ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
- ifm->ifm_data = ifp->if_data;
+ if_data_copy(ifp, &ifm->ifm_data);
ifm->ifm_addrs = 0;
rt_dispatch(m, AF_UNSPEC);
}
@@ -1283,8 +1308,9 @@ rtsock_addrmsg(int cmd, struct ifaddr *ifa, int fibnum)
struct mbuf *m;
struct ifa_msghdr *ifam;
struct ifnet *ifp = ifa->ifa_ifp;
+ struct sockaddr_storage ss;
- if (route_cb.any_count == 0)
+ if (V_route_cb.any_count == 0)
return (0);
ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
@@ -1292,13 +1318,14 @@ rtsock_addrmsg(int cmd, struct ifaddr *ifa, int fibnum)
bzero((caddr_t)&info, sizeof(info));
info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
- info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
+ info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(
+ info.rti_info[RTAX_IFP], ifa->ifa_netmask, &ss);
info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
- if ((m = rt_msg1(ncmd, &info)) == NULL)
+ if ((m = rtsock_msg_mbuf(ncmd, &info)) == NULL)
return (ENOBUFS);
ifam = mtod(m, struct ifa_msghdr *);
ifam->ifam_index = ifp->if_index;
- ifam->ifam_metric = ifa->ifa_metric;
+ ifam->ifam_metric = ifa->ifa_ifp->if_metric;
ifam->ifam_flags = ifa->ifa_flags;
ifam->ifam_addrs = info.rti_addrs;
@@ -1331,15 +1358,16 @@ rtsock_routemsg(int cmd, struct ifnet *ifp, int error, struct rtentry *rt,
struct sockaddr *sa;
struct mbuf *m;
struct rt_msghdr *rtm;
+ struct sockaddr_storage ss;
- if (route_cb.any_count == 0)
+ if (V_route_cb.any_count == 0)
return (0);
bzero((caddr_t)&info, sizeof(info));
- info.rti_info[RTAX_NETMASK] = rt_mask(rt);
info.rti_info[RTAX_DST] = sa = rt_key(rt);
+ info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(sa, rt_mask(rt), &ss);
info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
- if ((m = rt_msg1(cmd, &info)) == NULL)
+ if ((m = rtsock_msg_mbuf(cmd, &info)) == NULL)
return (ENOBUFS);
rtm = mtod(m, struct rt_msghdr *);
rtm->rtm_index = ifp->if_index;
@@ -1370,7 +1398,7 @@ rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
struct ifnet *ifp = ifma->ifma_ifp;
struct ifma_msghdr *ifmam;
- if (route_cb.any_count == 0)
+ if (V_route_cb.any_count == 0)
return;
bzero((caddr_t)&info, sizeof(info));
@@ -1381,7 +1409,7 @@ rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
* (similarly to how ARP entries, e.g., are presented).
*/
info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr;
- m = rt_msg1(cmd, &info);
+ m = rtsock_msg_mbuf(cmd, &info);
if (m == NULL)
return;
ifmam = mtod(m, struct ifma_msghdr *);
@@ -1399,10 +1427,10 @@ rt_makeifannouncemsg(struct ifnet *ifp, int type, int what,
struct if_announcemsghdr *ifan;
struct mbuf *m;
- if (route_cb.any_count == 0)
+ if (V_route_cb.any_count == 0)
return NULL;
bzero((caddr_t)info, sizeof(*info));
- m = rt_msg1(type, info);
+ m = rtsock_msg_mbuf(type, info);
if (m != NULL) {
ifan = mtod(m, struct if_announcemsghdr *);
ifan->ifan_index = ifp->if_index;
@@ -1509,6 +1537,7 @@ sysctl_dumpentry(struct radix_node *rn, void *vw)
struct rtentry *rt = (struct rtentry *)rn;
int error = 0, size;
struct rt_addrinfo info;
+ struct sockaddr_storage ss;
if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
return 0;
@@ -1519,7 +1548,8 @@ sysctl_dumpentry(struct radix_node *rn, void *vw)
bzero((caddr_t)&info, sizeof(info));
info.rti_info[RTAX_DST] = rt_key(rt);
info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
- info.rti_info[RTAX_NETMASK] = rt_mask(rt);
+ info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(rt_key(rt),
+ rt_mask(rt), &ss);
info.rti_info[RTAX_GENMASK] = 0;
if (rt->rt_ifp) {
info.rti_info[RTAX_IFP] = rt->rt_ifp->if_addr->ifa_addr;
@@ -1527,16 +1557,17 @@ sysctl_dumpentry(struct radix_node *rn, void *vw)
if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
}
- size = rt_msg2(RTM_GET, &info, NULL, w);
+ if ((error = rtsock_msg_buffer(RTM_GET, &info, w, &size)) != 0)
+ return (error);
if (w->w_req && w->w_tmem) {
struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
- rtm->rtm_flags = rt->rt_flags;
- /*
- * let's be honest about this being a retarded hack
- */
- rtm->rtm_fmask = rt->rt_rmx.rmx_pksent;
- rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
+ if (rt->rt_flags & RTF_GWFLAG_COMPAT)
+ rtm->rtm_flags = RTF_GATEWAY |
+ (rt->rt_flags & ~RTF_GWFLAG_COMPAT);
+ else
+ rtm->rtm_flags = rt->rt_flags;
+ rt_getmetrics(rt, &rtm->rtm_rmx);
rtm->rtm_index = rt->rt_ifp->if_index;
rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
rtm->rtm_addrs = info.rti_addrs;
@@ -1546,70 +1577,40 @@ sysctl_dumpentry(struct radix_node *rn, void *vw)
return (error);
}
-#ifdef COMPAT_FREEBSD32
-static void
-copy_ifdata32(struct if_data *src, struct if_data32 *dst)
-{
-
- bzero(dst, sizeof(*dst));
- CP(*src, *dst, ifi_type);
- CP(*src, *dst, ifi_physical);
- CP(*src, *dst, ifi_addrlen);
- CP(*src, *dst, ifi_hdrlen);
- CP(*src, *dst, ifi_link_state);
- dst->ifi_datalen = sizeof(struct if_data32);
- CP(*src, *dst, ifi_mtu);
- CP(*src, *dst, ifi_metric);
- CP(*src, *dst, ifi_baudrate);
- CP(*src, *dst, ifi_ipackets);
- CP(*src, *dst, ifi_ierrors);
- CP(*src, *dst, ifi_opackets);
- CP(*src, *dst, ifi_oerrors);
- CP(*src, *dst, ifi_collisions);
- CP(*src, *dst, ifi_ibytes);
- CP(*src, *dst, ifi_obytes);
- CP(*src, *dst, ifi_imcasts);
- CP(*src, *dst, ifi_omcasts);
- CP(*src, *dst, ifi_iqdrops);
- CP(*src, *dst, ifi_noproto);
- CP(*src, *dst, ifi_hwassist);
- CP(*src, *dst, ifi_epoch);
- TV_CP(*src, *dst, ifi_lastchange);
-}
-#endif
-
static int
sysctl_iflist_ifml(struct ifnet *ifp, struct rt_addrinfo *info,
struct walkarg *w, int len)
{
struct if_msghdrl *ifm;
+ struct if_data *ifd;
+
+ ifm = (struct if_msghdrl *)w->w_tmem;
#ifdef COMPAT_FREEBSD32
if (w->w_req->flags & SCTL_MASK32) {
struct if_msghdrl32 *ifm32;
- ifm32 = (struct if_msghdrl32 *)w->w_tmem;
+ ifm32 = (struct if_msghdrl32 *)ifm;
ifm32->ifm_addrs = info->rti_addrs;
ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
ifm32->ifm_index = ifp->if_index;
ifm32->_ifm_spare1 = 0;
ifm32->ifm_len = sizeof(*ifm32);
ifm32->ifm_data_off = offsetof(struct if_msghdrl32, ifm_data);
-
- copy_ifdata32(&ifp->if_data, &ifm32->ifm_data);
-
- return (SYSCTL_OUT(w->w_req, (caddr_t)ifm32, len));
- }
+ ifd = &ifm32->ifm_data;
+ } else
#endif
- ifm = (struct if_msghdrl *)w->w_tmem;
- ifm->ifm_addrs = info->rti_addrs;
- ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
- ifm->ifm_index = ifp->if_index;
- ifm->_ifm_spare1 = 0;
- ifm->ifm_len = sizeof(*ifm);
- ifm->ifm_data_off = offsetof(struct if_msghdrl, ifm_data);
+ {
+ ifm->ifm_addrs = info->rti_addrs;
+ ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
+ ifm->ifm_index = ifp->if_index;
+ ifm->_ifm_spare1 = 0;
+ ifm->ifm_len = sizeof(*ifm);
+ ifm->ifm_data_off = offsetof(struct if_msghdrl, ifm_data);
+ ifd = &ifm->ifm_data;
+ }
- ifm->ifm_data = ifp->if_data;
+ if_data_copy(ifp, ifd);
return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
}
@@ -1619,27 +1620,29 @@ sysctl_iflist_ifm(struct ifnet *ifp, struct rt_addrinfo *info,
struct walkarg *w, int len)
{
struct if_msghdr *ifm;
+ struct if_data *ifd;
+
+ ifm = (struct if_msghdr *)w->w_tmem;
#ifdef COMPAT_FREEBSD32
if (w->w_req->flags & SCTL_MASK32) {
struct if_msghdr32 *ifm32;
- ifm32 = (struct if_msghdr32 *)w->w_tmem;
+ ifm32 = (struct if_msghdr32 *)ifm;
ifm32->ifm_addrs = info->rti_addrs;
ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
ifm32->ifm_index = ifp->if_index;
-
- copy_ifdata32(&ifp->if_data, &ifm32->ifm_data);
-
- return (SYSCTL_OUT(w->w_req, (caddr_t)ifm32, len));
- }
+ ifd = &ifm32->ifm_data;
+ } else
#endif
- ifm = (struct if_msghdr *)w->w_tmem;
- ifm->ifm_addrs = info->rti_addrs;
- ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
- ifm->ifm_index = ifp->if_index;
+ {
+ ifm->ifm_addrs = info->rti_addrs;
+ ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
+ ifm->ifm_index = ifp->if_index;
+ ifd = &ifm->ifm_data;
+ }
- ifm->ifm_data = ifp->if_data;
+ if_data_copy(ifp, ifd);
return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
}
@@ -1649,12 +1652,15 @@ sysctl_iflist_ifaml(struct ifaddr *ifa, struct rt_addrinfo *info,
struct walkarg *w, int len)
{
struct ifa_msghdrl *ifam;
+ struct if_data *ifd;
+
+ ifam = (struct ifa_msghdrl *)w->w_tmem;
#ifdef COMPAT_FREEBSD32
if (w->w_req->flags & SCTL_MASK32) {
struct ifa_msghdrl32 *ifam32;
- ifam32 = (struct ifa_msghdrl32 *)w->w_tmem;
+ ifam32 = (struct ifa_msghdrl32 *)ifam;
ifam32->ifam_addrs = info->rti_addrs;
ifam32->ifam_flags = ifa->ifa_flags;
ifam32->ifam_index = ifa->ifa_ifp->if_index;
@@ -1662,24 +1668,31 @@ sysctl_iflist_ifaml(struct ifaddr *ifa, struct rt_addrinfo *info,
ifam32->ifam_len = sizeof(*ifam32);
ifam32->ifam_data_off =
offsetof(struct ifa_msghdrl32, ifam_data);
- ifam32->ifam_metric = ifa->ifa_metric;
-
- copy_ifdata32(&ifa->ifa_ifp->if_data, &ifam32->ifam_data);
-
- return (SYSCTL_OUT(w->w_req, (caddr_t)ifam32, len));
- }
+ ifam32->ifam_metric = ifa->ifa_ifp->if_metric;
+ ifd = &ifam32->ifam_data;
+ } else
#endif
+ {
+ ifam->ifam_addrs = info->rti_addrs;
+ ifam->ifam_flags = ifa->ifa_flags;
+ ifam->ifam_index = ifa->ifa_ifp->if_index;
+ ifam->_ifam_spare1 = 0;
+ ifam->ifam_len = sizeof(*ifam);
+ ifam->ifam_data_off = offsetof(struct ifa_msghdrl, ifam_data);
+ ifam->ifam_metric = ifa->ifa_ifp->if_metric;
+ ifd = &ifam->ifam_data;
+ }
- ifam = (struct ifa_msghdrl *)w->w_tmem;
- ifam->ifam_addrs = info->rti_addrs;
- ifam->ifam_flags = ifa->ifa_flags;
- ifam->ifam_index = ifa->ifa_ifp->if_index;
- ifam->_ifam_spare1 = 0;
- ifam->ifam_len = sizeof(*ifam);
- ifam->ifam_data_off = offsetof(struct ifa_msghdrl, ifam_data);
- ifam->ifam_metric = ifa->ifa_metric;
+ bzero(ifd, sizeof(*ifd));
+ ifd->ifi_datalen = sizeof(struct if_data);
+ ifd->ifi_ipackets = counter_u64_fetch(ifa->ifa_ipackets);
+ ifd->ifi_opackets = counter_u64_fetch(ifa->ifa_opackets);
+ ifd->ifi_ibytes = counter_u64_fetch(ifa->ifa_ibytes);
+ ifd->ifi_obytes = counter_u64_fetch(ifa->ifa_obytes);
- ifam->ifam_data = ifa->if_data;
+ /* Fixup if_data carp(4) vhid. */
+ if (carp_get_vhid_p != NULL)
+ ifd->ifi_vhid = (*carp_get_vhid_p)(ifa);
return (SYSCTL_OUT(w->w_req, w->w_tmem, len));
}
@@ -1694,7 +1707,7 @@ sysctl_iflist_ifam(struct ifaddr *ifa, struct rt_addrinfo *info,
ifam->ifam_addrs = info->rti_addrs;
ifam->ifam_flags = ifa->ifa_flags;
ifam->ifam_index = ifa->ifa_ifp->if_index;
- ifam->ifam_metric = ifa->ifa_metric;
+ ifam->ifam_metric = ifa->ifa_ifp->if_metric;
return (SYSCTL_OUT(w->w_req, w->w_tmem, len));
}
@@ -1706,16 +1719,19 @@ sysctl_iflist(int af, struct walkarg *w)
struct ifaddr *ifa;
struct rt_addrinfo info;
int len, error = 0;
+ struct sockaddr_storage ss;
bzero((caddr_t)&info, sizeof(info));
- IFNET_RLOCK();
+ IFNET_RLOCK_NOSLEEP();
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if (w->w_arg && w->w_arg != ifp->if_index)
continue;
IF_ADDR_RLOCK(ifp);
ifa = ifp->if_addr;
info.rti_info[RTAX_IFP] = ifa->ifa_addr;
- len = rt_msg2(RTM_IFINFO, &info, NULL, w);
+ error = rtsock_msg_buffer(RTM_IFINFO, &info, w, &len);
+ if (error != 0)
+ goto done;
info.rti_info[RTAX_IFP] = NULL;
if (w->w_req && w->w_tmem) {
if (w->w_op == NET_RT_IFLISTL)
@@ -1732,9 +1748,12 @@ sysctl_iflist(int af, struct walkarg *w)
ifa->ifa_addr) != 0)
continue;
info.rti_info[RTAX_IFA] = ifa->ifa_addr;
- info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
+ info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(
+ ifa->ifa_addr, ifa->ifa_netmask, &ss);
info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
- len = rt_msg2(RTM_NEWADDR, &info, NULL, w);
+ error = rtsock_msg_buffer(RTM_NEWADDR, &info, w, &len);
+ if (error != 0)
+ goto done;
if (w->w_req && w->w_tmem) {
if (w->w_op == NET_RT_IFLISTL)
error = sysctl_iflist_ifaml(ifa, &info,
@@ -1747,13 +1766,14 @@ sysctl_iflist(int af, struct walkarg *w)
}
}
IF_ADDR_RUNLOCK(ifp);
- info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
- info.rti_info[RTAX_BRD] = NULL;
+ info.rti_info[RTAX_IFA] = NULL;
+ info.rti_info[RTAX_NETMASK] = NULL;
+ info.rti_info[RTAX_BRD] = NULL;
}
done:
if (ifp != NULL)
IF_ADDR_RUNLOCK(ifp);
- IFNET_RUNLOCK();
+ IFNET_RUNLOCK_NOSLEEP();
return (error);
}
@@ -1767,7 +1787,7 @@ sysctl_ifmalist(int af, struct walkarg *w)
struct ifaddr *ifa;
bzero((caddr_t)&info, sizeof(info));
- IFNET_RLOCK();
+ IFNET_RLOCK_NOSLEEP();
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if (w->w_arg && w->w_arg != ifp->if_index)
continue;
@@ -1784,7 +1804,9 @@ sysctl_ifmalist(int af, struct walkarg *w)
info.rti_info[RTAX_GATEWAY] =
(ifma->ifma_addr->sa_family != AF_LINK) ?
ifma->ifma_lladdr : NULL;
- len = rt_msg2(RTM_NEWMADDR, &info, NULL, w);
+ error = rtsock_msg_buffer(RTM_NEWMADDR, &info, w, &len);
+ if (error != 0)
+ goto done;
if (w->w_req && w->w_tmem) {
struct ifma_msghdr *ifmam;
@@ -1802,7 +1824,7 @@ sysctl_ifmalist(int af, struct walkarg *w)
IF_ADDR_RUNLOCK(ifp);
}
done:
- IFNET_RUNLOCK();
+ IFNET_RUNLOCK_NOSLEEP();
return (error);
}
@@ -1811,7 +1833,7 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS)
{
int *name = (int *)arg1;
u_int namelen = arg2;
- struct radix_node_head *rnh = NULL; /* silence compiler. */
+ struct rib_head *rnh = NULL; /* silence compiler. */
int i, lim, error = EINVAL;
int fib = 0;
u_char af;
@@ -1852,6 +1874,14 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS)
error = sysctl_wire_old_buffer(req, 0);
if (error)
return (error);
+
+ /*
+ * Allocate reply buffer in advance.
+ * All rtsock messages has maximum length of u_short.
+ */
+ w.w_tmemsize = 65536;
+ w.w_tmem = malloc(w.w_tmemsize, M_TEMP, M_WAITOK);
+
switch (w.w_op) {
case NET_RT_DUMP:
@@ -1880,10 +1910,10 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS)
for (error = 0; error == 0 && i <= lim; i++) {
rnh = rt_tables_get_rnh(fib, i);
if (rnh != NULL) {
- RADIX_NODE_HEAD_RLOCK(rnh);
- error = rnh->rnh_walktree(rnh,
+ RIB_RLOCK(rnh);
+ error = rnh->rnh_walktree(&rnh->head,
sysctl_dumpentry, &w);
- RADIX_NODE_HEAD_RUNLOCK(rnh);
+ RIB_RUNLOCK(rnh);
} else if (af != 0)
error = EAFNOSUPPORT;
}
@@ -1898,8 +1928,8 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS)
error = sysctl_ifmalist(af, &w);
break;
}
- if (w.w_tmem)
- free(w.w_tmem, M_RTABLE);
+
+ free(w.w_tmem, M_TEMP);
return (error);
}
@@ -1927,7 +1957,7 @@ static struct domain routedomain = {
.dom_family = PF_ROUTE,
.dom_name = "route",
.dom_protosw = routesw,
- .dom_protoswNPROTOSW = &routesw[sizeof(routesw)/sizeof(routesw[0])]
+ .dom_protoswNPROTOSW = &routesw[nitems(routesw)]
};
VNET_DOMAIN_SET(route);
diff --git a/freebsd/sys/net/sff8436.h b/freebsd/sys/net/sff8436.h
new file mode 100644
index 00000000..3399cce5
--- /dev/null
+++ b/freebsd/sys/net/sff8436.h
@@ -0,0 +1,213 @@
+/*-
+ * Copyright (c) 2014 Yandex LLC.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * The following set of constants are from Document SFF-8436
+ * "QSFP+ 10 Gbs 4X PLUGGABLE TRANSCEIVER" revision 4.8 dated October 31, 2013
+ *
+ * This SFF standard defines the following QSFP+ memory address module:
+ *
+ * 1) 256-byte addressable block and 128-byte pages
+ * 2) Lower 128-bytes addresses always refer to the same page
+ * 3) Upper address space may refer to different pages depending on
+ * "page select" byte value.
+ *
+ * Map description:
+ *
+ * Serial address 0xA02:
+ *
+ * Lower bits
+ * 0-127 Monitoring data & page select byte
+ * 128-255:
+ *
+ * Page 00:
+ * 128-191 Base ID Fields
+ * 191-223 Extended ID
+ * 223-255 Vendor Specific ID
+ *
+ * Page 01 (optional):
+ * 128-255 App-specific data
+ *
+ * Page 02 (optional):
+ * 128-255 User EEPROM Data
+ *
+ * Page 03 (optional for Cable Assmeblies)
+ * 128-223 Thresholds
+ * 225-237 Vendor Specific
+ * 238-253 Channel Controls/Monitor
+ * 254-255 Reserverd
+ *
+ * All these values are read across an I2C (i squared C) bus.
+ */
+
+#define SFF_8436_BASE 0xA0 /* Base address for all requests */
+
+/* Table 17 - Lower Memory Map */
+enum {
+ SFF_8436_MID = 0, /* Copy of SFF_8436_ID field */
+ SFF_8436_STATUS = 1, /* 2-bytes status (Table 18) */
+ SFF_8436_INTR_START = 3, /* Interrupt flags (Tables 19-21) */
+ SFF_8436_INTR_END = 21,
+ SFF_8436_MODMON_START = 22, /* Module monitors (Table 22 */
+ SFF_8436_TEMP = 22, /* Internally measured module temp */
+ SFF_8436_VCC = 26, /* Internally mesasure module
+ * supplied voltage */
+ SFF_8436_MODMON_END = 33,
+ SFF_8436_CHMON_START = 34, /* Channel monitors (Table 23) */
+ SFF_8436_RX_CH1_MSB = 34, /* Internally measured RX input power */
+ SFF_8436_RX_CH1_LSB = 35, /* for channel 1 */
+ SFF_8436_RX_CH2_MSB = 36, /* Internally measured RX input power */
+ SFF_8436_RX_CH2_LSB = 37, /* for channel 2 */
+ SFF_8436_RX_CH3_MSB = 38, /* Internally measured RX input power */
+ SFF_8436_RX_CH3_LSB = 39, /* for channel 3 */
+ SFF_8436_RX_CH4_MSB = 40, /* Internally measured RX input power */
+ SFF_8436_RX_CH4_LSB = 41, /* for channel 4 */
+ SFF_8436_TX_CH1_MSB = 42, /* Internally measured TX bias */
+ SFF_8436_TX_CH1_LSB = 43, /* for channel 1 */
+ SFF_8436_TX_CH2_MSB = 44, /* Internally measured TX bias */
+ SFF_8436_TX_CH2_LSB = 45, /* for channel 2 */
+ SFF_8436_TX_CH3_MSB = 46, /* Internally measured TX bias */
+ SFF_8436_TX_CH3_LSB = 47, /* for channel 3 */
+ SFF_8436_TX_CH4_MSB = 48, /* Internally measured TX bias */
+ SFF_8436_TX_CH4_LSB = 49, /* for channel 4 */
+ SFF_8436_CHANMON_END = 81,
+ SFF_8436_CONTROL_START = 86, /* Control (Table 24) */
+ SFF_8436_CONTROL_END = 97,
+ SFF_8436_MASKS_START = 100, /* Module/channel masks (Table 25) */
+ SFF_8436_MASKS_END = 106,
+ SFF_8436_CHPASSWORD = 119, /* Password change entry (4 bytes) */
+ SFF_8436_PASSWORD = 123, /* Password entry area (4 bytes) */
+ SFF_8436_PAGESEL = 127, /* Page select byte */
+};
+
+/* Table 18 - Status Indicators bits */
+/* Byte 1: all bits reserved */
+
+/* Byte 2 bits */
+#define SFF_8436_STATUS_FLATMEM (1 << 2) /* Upper memory flat or paged
+ * 0 = paging, 1=Page 0 only */
+#define SFF_8436_STATUS_INTL (1 << 1) /* Digital state of the intL
+ * Interrupt output pin */
+#define SFF_8436_STATUS_NOTREADY 1 /* Module has not yet achieved
+ * power up and memory data is not
+ * ready. 0=data is ready */
+/*
+ * Upper page 0 definitions:
+ * Table 29 - Serial ID: Data fields.
+ *
+ * Note that this table is mostly the same as used in SFF-8472.
+ * The only differenee is address shift: +128 bytes.
+ */
+enum {
+ SFF_8436_ID = 128, /* Module Type (defined in sff8472.h) */
+ SFF_8436_EXT_ID = 129, /* Extended transceiver type
+ * (Table 31) */
+ SFF_8436_CONNECTOR = 130, /* Connector type (Table 32) */
+ SFF_8436_TRANS_START = 131, /* Electric or Optical Compatibility
+ * (Table 33) */
+ SFF_8436_CODE_E1040100G = 131, /* 10/40/100G Ethernet Compliance Code */
+ SFF_8436_CODE_SONET = 132, /* SONET Compliance codes */
+ SFF_8436_CODE_SATA = 133, /* SAS/SATA compliance codes */
+ SFF_8436_CODE_E1G = 134, /* Gigabit Ethernet Compliant codes */
+ SFF_8436_CODE_FC_START = 135, /* FC link/media/speed */
+ SFF_8436_CODE_FC_END = 138,
+ SFF_8436_TRANS_END = 138,
+ SFF_8436_ENCODING = 139, /* Encoding Code for high speed
+ * serial encoding algorithm (see
+ * Table 34) */
+ SFF_8436_BITRATE = 140, /* Nominal signaling rate, units
+ * of 100MBd. */
+ SFF_8436_RATEID = 141, /* Extended RateSelect Compliance
+ * (see Table 35) */
+ SFF_8436_LEN_SMF_KM = 142, /* Link length supported for single
+ * mode fiber, units of km */
+ SFF_8436_LEN_OM3 = 143, /* Link length supported for 850nm
+ * 50um multimode fiber, units of 2 m */
+ SFF_8436_LEN_OM2 = 144, /* Link length supported for 50 um
+ * OM2 fiber, units of 1 m */
+ SFF_8436_LEN_OM1 = 145, /* Link length supported for 1310 nm
+ * 50um multi-mode fiber, units of 1m*/
+ SFF_8436_LEN_ASM = 144, /* Link length of passive cable assembly
+ * Length is specified as in the INF
+ * 8074, units of 1m. 0 means this is
+ * not value assembly. Value of 255
+ * means thet the Module supports length
+ * greater than 254 m. */
+ SFF_8436_DEV_TECH = 147, /* Device/transmitter technology,
+ * see Table 36/37 */
+ SFF_8436_VENDOR_START = 148, /* Vendor name, 16 bytes, padded
+ * right with 0x20 */
+ SFF_8436_VENDOR_END = 163,
+ SFF_8436_EXTMODCODE = 164, /* Extended module code, Table 164 */
+ SFF_8436_VENDOR_OUI_START = 165 , /* Vendor OUI SFP vendor IEEE
+ * company ID */
+ SFF_8436_VENDOR_OUI_END = 167,
+ SFF_8436_PN_START = 168, /* Vendor PN, padded right with 0x20 */
+ SFF_8436_PN_END = 183,
+ SFF_8436_REV_START = 184, /* Vendor Revision, padded right 0x20 */
+ SFF_8436_REV_END = 185,
+ SFF_8436_WAVELEN_START = 186, /* Wavelength Laser wavelength
+ * (Passive/Active Cable
+ * Specification Compliance) */
+ SFF_8436_WAVELEN_END = 189,
+ SFF_8436_MAX_CASE_TEMP = 190, /* Allows to specify maximum temp
+ * above 70C. Maximum case temperature is
+ * an 8-bit value in Degrees C. A value
+ *of 0 implies the standard 70C rating.*/
+ SFF_8436_CC_BASE = 191, /* CC_BASE Check code for Base ID
+ * Fields (first 63 bytes) */
+ /* Extended ID fields */
+ SFF_8436_OPTIONS_START = 192, /* Options Indicates which optional
+ * transceiver signals are
+ * implemented (see Table 39) */
+ SFF_8436_OPTIONS_END = 195,
+ SFF_8436_SN_START = 196, /* Vendor SN, riwght padded with 0x20 */
+ SFF_8436_SN_END = 211,
+ SFF_8436_DATE_START = 212, /* Vendor’s manufacturing date code
+ * (see Table 40) */
+ SFF_8436_DATE_END = 219,
+ SFF_8436_DIAG_TYPE = 220, /* Diagnostic Monitoring Type
+ * Indicates which type of
+ * diagnostic monitoring is
+ * implemented (if any) in the
+ * transceiver (see Table 41) */
+
+ SFF_8436_ENHANCED = 221, /* Enhanced Options Indicates which
+ * optional features are implemented
+ * (if any) in the transceiver
+ * (see Table 42) */
+ SFF_8636_BITRATE = 222, /* Nominal bit rate per channel, units
+ * of 250 Mbps */
+ SFF_8436_CC_EXT = 223, /* Check code for the Extended ID
+ * Fields (bytes 192-222 incl) */
+ SFF_8436_VENDOR_RSRVD_START = 224,
+ SFF_8436_VENDOR_RSRVD_END = 255,
+};
+
+
diff --git a/freebsd/sys/net/sff8472.h b/freebsd/sys/net/sff8472.h
new file mode 100644
index 00000000..5c50ea46
--- /dev/null
+++ b/freebsd/sys/net/sff8472.h
@@ -0,0 +1,508 @@
+/*-
+ * Copyright (c) 2013 George V. Neville-Neil
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * The following set of constants are from Document SFF-8472
+ * "Diagnostic Monitoring Interface for Optical Transceivers" revision
+ * 11.3 published by the SFF Committee on June 11, 2013
+ *
+ * The SFF standard defines two ranges of addresses, each 255 bytes
+ * long for the storage of data and diagnostics on cables, such as
+ * SFP+ optics and TwinAx cables. The ranges are defined in the
+ * following way:
+ *
+ * Base Address 0xa0 (Identification Data)
+ * 0-95 Serial ID Defined by SFP MSA
+ * 96-127 Vendor Specific Data
+ * 128-255 Reserved
+ *
+ * Base Address 0xa2 (Diagnostic Data)
+ * 0-55 Alarm and Warning Thresholds
+ * 56-95 Cal Constants
+ * 96-119 Real Time Diagnostic Interface
+ * 120-127 Vendor Specific
+ * 128-247 User Writable EEPROM
+ * 248-255 Vendor Specific
+ *
+ * Note that not all addresses are supported. Where support is
+ * optional this is noted and instructions for checking for the
+ * support are supplied.
+ *
+ * All these values are read across an I2C (i squared C) bus. Any
+ * device wishing to read these addresses must first have support for
+ * i2c calls. The Chelsio T4/T5 driver (dev/cxgbe) is one such
+ * driver.
+ */
+
+
+/* Table 3.1 Two-wire interface ID: Data Fields */
+
+enum {
+ SFF_8472_BASE = 0xa0, /* Base address for all our queries. */
+ SFF_8472_ID = 0, /* Transceiver Type (Table 3.2) */
+ SFF_8472_EXT_ID = 1, /* Extended transceiver type (Table 3.3) */
+ SFF_8472_CONNECTOR = 2, /* Connector type (Table 3.4) */
+ SFF_8472_TRANS_START = 3, /* Elec or Optical Compatibility
+ * (Table 3.5) */
+ SFF_8472_TRANS_END = 10,
+ SFF_8472_ENCODING = 11, /* Encoding Code for high speed
+ * serial encoding algorithm (see
+ * Table 3.6) */
+ SFF_8472_BITRATE = 12, /* Nominal signaling rate, units
+ * of 100MBd. (see details for
+ * rates > 25.0Gb/s) */
+ SFF_8472_RATEID = 13, /* Type of rate select
+ * functionality (see Table
+ * 3.6a) */
+ SFF_8472_LEN_SMF_KM = 14, /* Link length supported for single
+ * mode fiber, units of km */
+ SFF_8472_LEN_SMF = 15, /* Link length supported for single
+ * mode fiber, units of 100 m */
+ SFF_8472_LEN_50UM = 16, /* Link length supported for 50 um
+ * OM2 fiber, units of 10 m */
+ SFF_8472_LEN_625UM = 17, /* Link length supported for 62.5
+ * um OM1 fiber, units of 10 m */
+ SFF_8472_LEN_OM4 = 18, /* Link length supported for 50um
+ * OM4 fiber, units of 10m.
+ * Alternatively copper or direct
+ * attach cable, units of m */
+ SFF_8472_LEN_OM3 = 19, /* Link length supported for 50 um OM3 fiber, units of 10 m */
+ SFF_8472_VENDOR_START = 20, /* Vendor name [Address A0h, Bytes
+ * 20-35] */
+ SFF_8472_VENDOR_END = 35,
+ SFF_8472_TRANS = 36, /* Transceiver Code for electronic
+ * or optical compatibility (see
+ * Table 3.5) */
+ SFF_8472_VENDOR_OUI_START = 37, /* Vendor OUI SFP vendor IEEE
+ * company ID */
+ SFF_8472_VENDOR_OUI_END = 39,
+ SFF_8472_PN_START = 40, /* Vendor PN */
+ SFF_8472_PN_END = 55,
+ SFF_8472_REV_START = 56, /* Vendor Revision */
+ SFF_8472_REV_END = 59,
+ SFF_8472_WAVELEN_START = 60, /* Wavelength Laser wavelength
+ * (Passive/Active Cable
+ * Specification Compliance) */
+ SFF_8472_WAVELEN_END = 61,
+ SFF_8472_CC_BASE = 63, /* CC_BASE Check code for Base ID
+ * Fields (addresses 0 to 62) */
+
+/*
+ * Extension Fields (optional) check the options before reading other
+ * addresses.
+ */
+ SFF_8472_OPTIONS_MSB = 64, /* Options Indicates which optional
+ * transceiver signals are
+ * implemented */
+ SFF_8472_OPTIONS_LSB = 65, /* (see Table 3.7) */
+ SFF_8472_BR_MAX = 66, /* BR max Upper bit rate margin,
+ * units of % (see details for
+ * rates > 25.0Gb/s) */
+ SFF_8472_BR_MIN = 67, /* Lower bit rate margin, units of
+ * % (see details for rates >
+ * 25.0Gb/s) */
+ SFF_8472_SN_START = 68, /* Vendor SN [Address A0h, Bytes 68-83] */
+ SFF_8472_SN_END = 83,
+ SFF_8472_DATE_START = 84, /* Date code Vendor’s manufacturing
+ * date code (see Table 3.8) */
+ SFF_8472_DATE_END = 91,
+ SFF_8472_DIAG_TYPE = 92, /* Diagnostic Monitoring Type
+ * Indicates which type of
+ * diagnostic monitoring is
+ * implemented (if any) in the
+ * transceiver (see Table 3.9)
+ */
+
+ SFF_8472_ENHANCED = 93, /* Enhanced Options Indicates which
+ * optional enhanced features are
+ * implemented (if any) in the
+ * transceiver (see Table 3.10) */
+ SFF_8472_COMPLIANCE = 94, /* SFF-8472 Compliance Indicates
+ * which revision of SFF-8472 the
+ * transceiver complies with. (see
+ * Table 3.12)*/
+ SFF_8472_CC_EXT = 95, /* Check code for the Extended ID
+ * Fields (addresses 64 to 94)
+ */
+
+ SFF_8472_VENDOR_RSRVD_START = 96,
+ SFF_8472_VENDOR_RSRVD_END = 127,
+
+ SFF_8472_RESERVED_START = 128,
+ SFF_8472_RESERVED_END = 255
+};
+
+#define SFF_8472_DIAG_IMPL (1 << 6) /* Required to be 1 */
+#define SFF_8472_DIAG_INTERNAL (1 << 5) /* Internal measurements. */
+#define SFF_8472_DIAG_EXTERNAL (1 << 4) /* External measurements. */
+#define SFF_8472_DIAG_POWER (1 << 3) /* Power measurement type */
+#define SFF_8472_DIAG_ADDR_CHG (1 << 2) /* Address change required.
+ * See SFF-8472 doc. */
+
+ /*
+ * Diagnostics are available at the two wire address 0xa2. All
+ * diagnostics are OPTIONAL so you should check 0xa0 registers 92 to
+ * see which, if any are supported.
+ */
+
+enum {SFF_8472_DIAG = 0xa2}; /* Base address for diagnostics. */
+
+ /*
+ * Table 3.15 Alarm and Warning Thresholds All values are 2 bytes
+ * and MUST be read in a single read operation starting at the MSB
+ */
+
+enum {
+ SFF_8472_TEMP_HIGH_ALM = 0, /* Temp High Alarm */
+ SFF_8472_TEMP_LOW_ALM = 2, /* Temp Low Alarm */
+ SFF_8472_TEMP_HIGH_WARN = 4, /* Temp High Warning */
+ SFF_8472_TEMP_LOW_WARN = 6, /* Temp Low Warning */
+ SFF_8472_VOLTAGE_HIGH_ALM = 8, /* Voltage High Alarm */
+ SFF_8472_VOLTAGE_LOW_ALM = 10, /* Voltage Low Alarm */
+ SFF_8472_VOLTAGE_HIGH_WARN = 12, /* Voltage High Warning */
+ SFF_8472_VOLTAGE_LOW_WARN = 14, /* Voltage Low Warning */
+ SFF_8472_BIAS_HIGH_ALM = 16, /* Bias High Alarm */
+ SFF_8472_BIAS_LOW_ALM = 18, /* Bias Low Alarm */
+ SFF_8472_BIAS_HIGH_WARN = 20, /* Bias High Warning */
+ SFF_8472_BIAS_LOW_WARN = 22, /* Bias Low Warning */
+ SFF_8472_TX_POWER_HIGH_ALM = 24, /* TX Power High Alarm */
+ SFF_8472_TX_POWER_LOW_ALM = 26, /* TX Power Low Alarm */
+ SFF_8472_TX_POWER_HIGH_WARN = 28, /* TX Power High Warning */
+ SFF_8472_TX_POWER_LOW_WARN = 30, /* TX Power Low Warning */
+ SFF_8472_RX_POWER_HIGH_ALM = 32, /* RX Power High Alarm */
+ SFF_8472_RX_POWER_LOW_ALM = 34, /* RX Power Low Alarm */
+ SFF_8472_RX_POWER_HIGH_WARN = 36, /* RX Power High Warning */
+ SFF_8472_RX_POWER_LOW_WARN = 38, /* RX Power Low Warning */
+
+ SFF_8472_RX_POWER4 = 56, /* Rx_PWR(4) Single precision
+ * floating point calibration data
+ * - Rx optical power. Bit 7 of
+ * byte 56 is MSB. Bit 0 of byte
+ * 59 is LSB. Rx_PWR(4) should be
+ * set to zero for “internally
+ * calibrated” devices. */
+ SFF_8472_RX_POWER3 = 60, /* Rx_PWR(3) Single precision
+ * floating point calibration data
+ * - Rx optical power. Bit 7 of
+ * byte 60 is MSB. Bit 0 of byte 63
+ * is LSB. Rx_PWR(3) should be set
+ * to zero for “internally
+ * calibrated” devices.*/
+ SFF_8472_RX_POWER2 = 64, /* Rx_PWR(2) Single precision
+ * floating point calibration data,
+ * Rx optical power. Bit 7 of byte
+ * 64 is MSB, bit 0 of byte 67 is
+ * LSB. Rx_PWR(2) should be set to
+ * zero for “internally calibrated”
+ * devices. */
+ SFF_8472_RX_POWER1 = 68, /* Rx_PWR(1) Single precision
+ * floating point calibration data,
+ * Rx optical power. Bit 7 of byte
+ * 68 is MSB, bit 0 of byte 71 is
+ * LSB. Rx_PWR(1) should be set to
+ * 1 for “internally calibrated”
+ * devices. */
+ SFF_8472_RX_POWER0 = 72, /* Rx_PWR(0) Single precision
+ * floating point calibration data,
+ * Rx optical power. Bit 7 of byte
+ * 72 is MSB, bit 0 of byte 75 is
+ * LSB. Rx_PWR(0) should be set to
+ * zero for “internally calibrated”
+ * devices. */
+ SFF_8472_TX_I_SLOPE = 76, /* Tx_I(Slope) Fixed decimal
+ * (unsigned) calibration data,
+ * laser bias current. Bit 7 of
+ * byte 76 is MSB, bit 0 of byte 77
+ * is LSB. Tx_I(Slope) should be
+ * set to 1 for “internally
+ * calibrated” devices. */
+ SFF_8472_TX_I_OFFSET = 78, /* Tx_I(Offset) Fixed decimal
+ * (signed two’s complement)
+ * calibration data, laser bias
+ * current. Bit 7 of byte 78 is
+ * MSB, bit 0 of byte 79 is
+ * LSB. Tx_I(Offset) should be set
+ * to zero for “internally
+ * calibrated” devices. */
+ SFF_8472_TX_POWER_SLOPE = 80, /* Tx_PWR(Slope) Fixed decimal
+ * (unsigned) calibration data,
+ * transmitter coupled output
+ * power. Bit 7 of byte 80 is MSB,
+ * bit 0 of byte 81 is LSB.
+ * Tx_PWR(Slope) should be set to 1
+ * for “internally calibrated”
+ * devices. */
+ SFF_8472_TX_POWER_OFFSET = 82, /* Tx_PWR(Offset) Fixed decimal
+ * (signed two’s complement)
+ * calibration data, transmitter
+ * coupled output power. Bit 7 of
+ * byte 82 is MSB, bit 0 of byte 83
+ * is LSB. Tx_PWR(Offset) should be
+ * set to zero for “internally
+ * calibrated” devices. */
+ SFF_8472_T_SLOPE = 84, /* T (Slope) Fixed decimal
+ * (unsigned) calibration data,
+ * internal module temperature. Bit
+ * 7 of byte 84 is MSB, bit 0 of
+ * byte 85 is LSB. T(Slope) should
+ * be set to 1 for “internally
+ * calibrated” devices. */
+ SFF_8472_T_OFFSET = 86, /* T (Offset) Fixed decimal (signed
+ * two’s complement) calibration
+ * data, internal module
+ * temperature. Bit 7 of byte 86 is
+ * MSB, bit 0 of byte 87 is LSB.
+ * T(Offset) should be set to zero
+ * for “internally calibrated”
+ * devices. */
+ SFF_8472_V_SLOPE = 88, /* V (Slope) Fixed decimal
+ * (unsigned) calibration data,
+ * internal module supply
+ * voltage. Bit 7 of byte 88 is
+ * MSB, bit 0 of byte 89 is
+ * LSB. V(Slope) should be set to 1
+ * for “internally calibrated”
+ * devices. */
+ SFF_8472_V_OFFSET = 90, /* V (Offset) Fixed decimal (signed
+ * two’s complement) calibration
+ * data, internal module supply
+ * voltage. Bit 7 of byte 90 is
+ * MSB. Bit 0 of byte 91 is
+ * LSB. V(Offset) should be set to
+ * zero for “internally calibrated”
+ * devices. */
+ SFF_8472_CHECKSUM = 95, /* Checksum Byte 95 contains the
+ * low order 8 bits of the sum of
+ * bytes 0 – 94. */
+ /* Internal measurements. */
+
+ SFF_8472_TEMP = 96, /* Internally measured module temperature. */
+ SFF_8472_VCC = 98, /* Internally measured supply
+ * voltage in transceiver.
+ */
+ SFF_8472_TX_BIAS = 100, /* Internally measured TX Bias Current. */
+ SFF_8472_TX_POWER = 102, /* Measured TX output power. */
+ SFF_8472_RX_POWER = 104, /* Measured RX input power. */
+
+ SFF_8472_STATUS = 110 /* See below */
+};
+ /* Status Bits Described */
+
+/*
+ * TX Disable State Digital state of the TX Disable Input Pin. Updated
+ * within 100ms of change on pin.
+ */
+#define SFF_8472_STATUS_TX_DISABLE (1 << 7)
+
+/*
+ * Select Read/write bit that allows software disable of
+ * laser. Writing ‘1’ disables laser. See Table 3.11 for
+ * enable/disable timing requirements. This bit is “OR”d with the hard
+ * TX_DISABLE pin value. Note, per SFP MSA TX_DISABLE pin is default
+ * enabled unless pulled low by hardware. If Soft TX Disable is not
+ * implemented, the transceiver ignores the value of this bit. Default
+ * power up value is zero/low.
+ */
+#define SFF_8472_STATUS_SOFT_TX_DISABLE (1 << 6)
+
+/*
+ * RS(1) State Digital state of SFP input pin AS(1) per SFF-8079 or
+ * RS(1) per SFF-8431. Updated within 100ms of change on pin. See A2h
+ * Byte 118, Bit 3 for Soft RS(1) Select control information.
+ */
+#define SFF_8472_RS_STATE (1 << 5)
+
+/*
+ * Rate_Select State [aka. “RS(0)”] Digital state of the SFP
+ * Rate_Select Input Pin. Updated within 100ms of change on pin. Note:
+ * This pin is also known as AS(0) in SFF-8079 and RS(0) in SFF-8431.
+ */
+#define SFF_8472_STATUS_SELECT_STATE (1 << 4)
+
+/*
+ * Read/write bit that allows software rate select control. Writing
+ * ‘1’ selects full bandwidth operation. This bit is “OR’d with the
+ * hard Rate_Select, AS(0) or RS(0) pin value. See Table 3.11 for
+ * timing requirements. Default at power up is logic zero/low. If Soft
+ * Rate Select is not implemented, the transceiver ignores the value
+ * of this bit. Note: Specific transceiver behaviors of this bit are
+ * identified in Table 3.6a and referenced documents. See Table 3.18a,
+ * byte 118, bit 3 for Soft RS(1) Select.
+ */
+#define SFF_8472_STATUS_SOFT_RATE_SELECT (1 << 3)
+
+/*
+ * TX Fault State Digital state of the TX Fault Output Pin. Updated
+ * within 100ms of change on pin.
+ */
+#define SFF_8472_STATUS_TX_FAULT_STATE (1 << 2)
+
+/*
+ * Digital state of the RX_LOS Output Pin. Updated within 100ms of
+ * change on pin.
+ */
+#define SFF_8472_STATUS_RX_LOS (1 << 1)
+
+/*
+ * Indicates transceiver has achieved power up and data is ready. Bit
+ * remains high until data is ready to be read at which time the
+ * device sets the bit low.
+ */
+#define SFF_8472_STATUS_DATA_READY (1 << 0)
+
+/*
+ * Table 3.2 Identifier values.
+ * Identifier constants has taken from SFF-8024 rev 2.9 table 4.1
+ * (as referenced by table 3.2 footer)
+ * */
+enum {
+ SFF_8024_ID_UNKNOWN = 0x0, /* Unknown or unspecified */
+ SFF_8024_ID_GBIC = 0x1, /* GBIC */
+ SFF_8024_ID_SFF = 0x2, /* Module soldered to motherboard (ex: SFF)*/
+ SFF_8024_ID_SFP = 0x3, /* SFP or SFP “Plus” */
+ SFF_8024_ID_XBI = 0x4, /* 300 pin XBI */
+ SFF_8024_ID_XENPAK = 0x5, /* Xenpak */
+ SFF_8024_ID_XFP = 0x6, /* XFP */
+ SFF_8024_ID_XFF = 0x7, /* XFF */
+ SFF_8024_ID_XFPE = 0x8, /* XFP-E */
+ SFF_8024_ID_XPAK = 0x9, /* XPAk */
+ SFF_8024_ID_X2 = 0xA, /* X2 */
+ SFF_8024_ID_DWDM_SFP = 0xB, /* DWDM-SFP */
+ SFF_8024_ID_QSFP = 0xC, /* QSFP */
+ SFF_8024_ID_QSFPPLUS = 0xD, /* QSFP+ */
+ SFF_8024_ID_CXP = 0xE, /* CXP */
+ SFF_8024_ID_HD4X = 0xF, /* Shielded Mini Multilane HD 4X */
+ SFF_8024_ID_HD8X = 0x10, /* Shielded Mini Multilane HD 8X */
+ SFF_8024_ID_QSFP28 = 0x11, /* QSFP28 */
+ SFF_8024_ID_CXP2 = 0x12, /* CXP2 (aka CXP28) */
+ SFF_8024_ID_CDFP = 0x13, /* CDFP (Style 1/Style 2) */
+ SFF_8024_ID_SMM4 = 0x14, /* Shielded Mini Multilate HD 4X Fanout */
+ SFF_8024_ID_SMM8 = 0x15, /* Shielded Mini Multilate HD 8X Fanout */
+ SFF_8024_ID_CDFP3 = 0x16, /* CDFP (Style3) */
+ SFF_8024_ID_LAST = SFF_8024_ID_CDFP3
+ };
+
+static const char *sff_8024_id[SFF_8024_ID_LAST + 1] = {"Unknown",
+ "GBIC",
+ "SFF",
+ "SFP/SFP+/SFP28",
+ "XBI",
+ "Xenpak",
+ "XFP",
+ "XFF",
+ "XFP-E",
+ "XPAK",
+ "X2",
+ "DWDM-SFP/SFP+",
+ "QSFP",
+ "QSFP+",
+ "CXP",
+ "HD4X",
+ "HD8X",
+ "QSFP28",
+ "CXP2",
+ "CDFP",
+ "SMM4",
+ "SMM8",
+ "CDFP3"};
+
+/* Keep compatibility with old definitions */
+#define SFF_8472_ID_UNKNOWN SFF_8024_ID_UNKNOWN
+#define SFF_8472_ID_GBIC SFF_8024_ID_GBIC
+#define SFF_8472_ID_SFF SFF_8024_ID_SFF
+#define SFF_8472_ID_SFP SFF_8024_ID_SFP
+#define SFF_8472_ID_XBI SFF_8024_ID_XBI
+#define SFF_8472_ID_XENPAK SFF_8024_ID_XENPAK
+#define SFF_8472_ID_XFP SFF_8024_ID_XFP
+#define SFF_8472_ID_XFF SFF_8024_ID_XFF
+#define SFF_8472_ID_XFPE SFF_8024_ID_XFPE
+#define SFF_8472_ID_XPAK SFF_8024_ID_XPAK
+#define SFF_8472_ID_X2 SFF_8024_ID_X2
+#define SFF_8472_ID_DWDM_SFP SFF_8024_ID_DWDM_SFP
+#define SFF_8472_ID_QSFP SFF_8024_ID_QSFP
+#define SFF_8472_ID_LAST SFF_8024_ID_LAST
+
+#define sff_8472_id sff_8024_id
+
+/*
+ * Table 3.9 Diagnostic Monitoring Type (byte 92)
+ * bits described.
+ */
+
+/*
+ * Digital diagnostic monitoring implemented.
+ * Set to 1 for transceivers implementing DDM.
+ */
+#define SFF_8472_DDM_DONE (1 << 6)
+
+/*
+ * Measurements are internally calibrated.
+ */
+#define SFF_8472_DDM_INTERNAL (1 << 5)
+
+/*
+ * Measurements are externally calibrated.
+ */
+#define SFF_8472_DDM_EXTERNAL (1 << 4)
+
+/*
+ * Received power measurement type
+ * 0 = OMA, 1 = average power
+ */
+#define SFF_8472_DDM_PMTYPE (1 << 3)
+
+/* Table 3.13 and 3.14 Temperature Conversion Values */
+#define SFF_8472_TEMP_SIGN (1 << 15)
+#define SFF_8472_TEMP_SHIFT 8
+#define SFF_8472_TEMP_MSK 0xEF00
+#define SFF_8472_TEMP_FRAC 0x00FF
+
+/* Internal Callibration Conversion factors */
+
+/*
+ * Represented as a 16 bit unsigned integer with the voltage defined
+ * as the full 16 bit value (0 – 65535) with LSB equal to 100 uVolt,
+ * yielding a total range of 0 to +6.55 Volts.
+ */
+#define SFF_8472_VCC_FACTOR 10000.0
+
+/*
+ * Represented as a 16 bit unsigned integer with the current defined
+ * as the full 16 bit value (0 – 65535) with LSB equal to 2 uA,
+ * yielding a total range of 0 to 131 mA.
+ */
+
+#define SFF_8472_BIAS_FACTOR 2000.0
+
+/*
+ * Represented as a 16 bit unsigned integer with the power defined as
+ * the full 16 bit value (0 – 65535) with LSB equal to 0.1 uW,
+ * yielding a total range of 0 to 6.5535 mW (~ -40 to +8.2 dBm).
+ */
+
+#define SFF_8472_POWER_FACTOR 10000.0
diff --git a/freebsd/sys/net/vnet.h b/freebsd/sys/net/vnet.h
index 01e26cdb..3e186c12 100644
--- a/freebsd/sys/net/vnet.h
+++ b/freebsd/sys/net/vnet.h
@@ -70,6 +70,7 @@ struct vnet {
u_int vnet_magic_n;
u_int vnet_ifcnt;
u_int vnet_sockcnt;
+ u_int vnet_state; /* SI_SUB_* */
void *vnet_data_mem;
uintptr_t vnet_data_base;
};
@@ -85,6 +86,61 @@ struct vnet {
#ifdef _KERNEL
+#define VNET_PCPUSTAT_DECLARE(type, name) \
+ VNET_DECLARE(counter_u64_t, name[sizeof(type) / sizeof(uint64_t)])
+
+#define VNET_PCPUSTAT_DEFINE(type, name) \
+ VNET_DEFINE(counter_u64_t, name[sizeof(type) / sizeof(uint64_t)])
+
+#define VNET_PCPUSTAT_ALLOC(name, wait) \
+ COUNTER_ARRAY_ALLOC(VNET(name), \
+ sizeof(VNET(name)) / sizeof(counter_u64_t), (wait))
+
+#define VNET_PCPUSTAT_FREE(name) \
+ COUNTER_ARRAY_FREE(VNET(name), sizeof(VNET(name)) / sizeof(counter_u64_t))
+
+#define VNET_PCPUSTAT_ADD(type, name, f, v) \
+ counter_u64_add(VNET(name)[offsetof(type, f) / sizeof(uint64_t)], (v))
+
+#define VNET_PCPUSTAT_FETCH(type, name, f) \
+ counter_u64_fetch(VNET(name)[offsetof(type, f) / sizeof(uint64_t)])
+
+#define VNET_PCPUSTAT_SYSINIT(name) \
+static void \
+vnet_##name##_init(const void *unused) \
+{ \
+ VNET_PCPUSTAT_ALLOC(name, M_WAITOK); \
+} \
+VNET_SYSINIT(vnet_ ## name ## _init, SI_SUB_INIT_IF, \
+ SI_ORDER_FIRST, vnet_ ## name ## _init, NULL)
+
+#define VNET_PCPUSTAT_SYSUNINIT(name) \
+static void \
+vnet_##name##_uninit(const void *unused) \
+{ \
+ VNET_PCPUSTAT_FREE(name); \
+} \
+VNET_SYSUNINIT(vnet_ ## name ## _uninit, SI_SUB_INIT_IF, \
+ SI_ORDER_FIRST, vnet_ ## name ## _uninit, NULL)
+
+#ifdef SYSCTL_OID
+#define SYSCTL_VNET_PCPUSTAT(parent, nbr, name, type, array, desc) \
+static int \
+array##_sysctl(SYSCTL_HANDLER_ARGS) \
+{ \
+ type s; \
+ CTASSERT((sizeof(type) / sizeof(uint64_t)) == \
+ (sizeof(VNET(array)) / sizeof(counter_u64_t))); \
+ COUNTER_ARRAY_COPY(VNET(array), &s, sizeof(type) / sizeof(uint64_t));\
+ if (req->newptr) \
+ COUNTER_ARRAY_ZERO(VNET(array), \
+ sizeof(type) / sizeof(uint64_t)); \
+ return (SYSCTL_OUT(req, &s, sizeof(type))); \
+} \
+SYSCTL_PROC(parent, nbr, name, CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_RW, \
+ NULL, 0, array ## _sysctl, "I", desc)
+#endif /* SYSCTL_OID */
+
#ifdef VIMAGE
#include <rtems/bsd/sys/lock.h>
#include <sys/proc.h> /* for struct thread */
@@ -233,53 +289,6 @@ void vnet_data_copy(void *start, int size);
void vnet_data_free(void *start_arg, int size);
/*
- * Sysctl variants for vnet-virtualized global variables. Include
- * <sys/sysctl.h> to expose these definitions.
- *
- * Note: SYSCTL_PROC() handler functions will need to resolve pointer
- * arguments themselves, if required.
- */
-#ifdef SYSCTL_OID
-int vnet_sysctl_handle_int(SYSCTL_HANDLER_ARGS);
-int vnet_sysctl_handle_opaque(SYSCTL_HANDLER_ARGS);
-int vnet_sysctl_handle_string(SYSCTL_HANDLER_ARGS);
-int vnet_sysctl_handle_uint(SYSCTL_HANDLER_ARGS);
-
-#define SYSCTL_VNET_INT(parent, nbr, name, access, ptr, val, descr) \
- SYSCTL_OID(parent, nbr, name, \
- CTLTYPE_INT|CTLFLAG_MPSAFE|CTLFLAG_VNET|(access), \
- ptr, val, vnet_sysctl_handle_int, "I", descr)
-#define SYSCTL_VNET_PROC(parent, nbr, name, access, ptr, arg, handler, \
- fmt, descr) \
- CTASSERT(((access) & CTLTYPE) != 0); \
- SYSCTL_OID(parent, nbr, name, CTLFLAG_VNET|(access), ptr, arg, \
- handler, fmt, descr)
-#define SYSCTL_VNET_OPAQUE(parent, nbr, name, access, ptr, len, fmt, \
- descr) \
- SYSCTL_OID(parent, nbr, name, \
- CTLTYPE_OPAQUE|CTLFLAG_VNET|(access), ptr, len, \
- vnet_sysctl_handle_opaque, fmt, descr)
-#define SYSCTL_VNET_STRING(parent, nbr, name, access, arg, len, descr) \
- SYSCTL_OID(parent, nbr, name, \
- CTLTYPE_STRING|CTLFLAG_VNET|(access), \
- arg, len, vnet_sysctl_handle_string, "A", descr)
-#define SYSCTL_VNET_STRUCT(parent, nbr, name, access, ptr, type, descr) \
- SYSCTL_OID(parent, nbr, name, \
- CTLTYPE_OPAQUE|CTLFLAG_VNET|(access), ptr, \
- sizeof(struct type), vnet_sysctl_handle_opaque, "S," #type, \
- descr)
-#define SYSCTL_VNET_UINT(parent, nbr, name, access, ptr, val, descr) \
- SYSCTL_OID(parent, nbr, name, \
- CTLTYPE_UINT|CTLFLAG_MPSAFE|CTLFLAG_VNET|(access), \
- ptr, val, vnet_sysctl_handle_uint, "IU", descr)
-#define VNET_SYSCTL_ARG(req, arg1) do { \
- if (arg1 != NULL) \
- arg1 = (void *)(TD_TO_VNET((req)->td)->vnet_data_base + \
- (uintptr_t)(arg1)); \
-} while (0)
-#endif /* SYSCTL_OID */
-
-/*
* Virtual sysinit mechanism, allowing network stack components to declare
* startup and shutdown methods to be run when virtual network stack
* instances are created and destroyed.
@@ -402,29 +411,6 @@ do { \
#define VNET(n) (n)
/*
- * When VIMAGE isn't compiled into the kernel, virtaulized SYSCTLs simply
- * become normal SYSCTLs.
- */
-#ifdef SYSCTL_OID
-#define SYSCTL_VNET_INT(parent, nbr, name, access, ptr, val, descr) \
- SYSCTL_INT(parent, nbr, name, access, ptr, val, descr)
-#define SYSCTL_VNET_PROC(parent, nbr, name, access, ptr, arg, handler, \
- fmt, descr) \
- SYSCTL_PROC(parent, nbr, name, access, ptr, arg, handler, fmt, \
- descr)
-#define SYSCTL_VNET_OPAQUE(parent, nbr, name, access, ptr, len, fmt, \
- descr) \
- SYSCTL_OPAQUE(parent, nbr, name, access, ptr, len, fmt, descr)
-#define SYSCTL_VNET_STRING(parent, nbr, name, access, arg, len, descr) \
- SYSCTL_STRING(parent, nbr, name, access, arg, len, descr)
-#define SYSCTL_VNET_STRUCT(parent, nbr, name, access, ptr, type, descr) \
- SYSCTL_STRUCT(parent, nbr, name, access, ptr, type, descr)
-#define SYSCTL_VNET_UINT(parent, nbr, name, access, ptr, val, descr) \
- SYSCTL_UINT(parent, nbr, name, access, ptr, val, descr)
-#define VNET_SYSCTL_ARG(req, arg1)
-#endif /* SYSCTL_OID */
-
-/*
* When VIMAGE isn't compiled into the kernel, VNET_SYSINIT/VNET_SYSUNINIT
* map into normal sysinits, which have the same ordering properties.
*/