summaryrefslogtreecommitdiffstats
path: root/freebsd/sys/net/altq/altq_rmclass.c
diff options
context:
space:
mode:
Diffstat (limited to 'freebsd/sys/net/altq/altq_rmclass.c')
-rw-r--r--freebsd/sys/net/altq/altq_rmclass.c1841
1 files changed, 1841 insertions, 0 deletions
diff --git a/freebsd/sys/net/altq/altq_rmclass.c b/freebsd/sys/net/altq/altq_rmclass.c
new file mode 100644
index 00000000..160884e2
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_rmclass.c
@@ -0,0 +1,1841 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 1991-1997 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the Network Research
+ * Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * LBL code modified by speer@eng.sun.com, May 1977.
+ * For questions and/or comments, please send mail to cbq@ee.lbl.gov
+ *
+ * @(#)rm_class.c 1.48 97/12/05 SMI
+ * $KAME: altq_rmclass.c,v 1.19 2005/04/13 03:44:25 suz Exp $
+ * $FreeBSD$
+ */
+#include <rtems/bsd/local/opt_altq.h>
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+#ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+#include <rtems/bsd/sys/errno.h>
+#include <sys/time.h>
+#ifdef ALTQ3_COMPAT
+#include <sys/kernel.h>
+#endif
+
+#include <net/if.h>
+#include <net/if_var.h>
+#ifdef ALTQ3_COMPAT
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#endif
+
+#include <net/altq/if_altq.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_codel.h>
+#include <net/altq/altq_rmclass.h>
+#include <net/altq/altq_rmclass_debug.h>
+#include <net/altq/altq_red.h>
+#include <net/altq/altq_rio.h>
+
+/*
+ * Local Macros
+ */
+
+#define reset_cutoff(ifd) { ifd->cutoff_ = RM_MAXDEPTH; }
+
+/*
+ * Local routines.
+ */
+
+static int rmc_satisfied(struct rm_class *, struct timeval *);
+static void rmc_wrr_set_weights(struct rm_ifdat *);
+static void rmc_depth_compute(struct rm_class *);
+static void rmc_depth_recompute(rm_class_t *);
+
+static mbuf_t *_rmc_wrr_dequeue_next(struct rm_ifdat *, int);
+static mbuf_t *_rmc_prr_dequeue_next(struct rm_ifdat *, int);
+
+static int _rmc_addq(rm_class_t *, mbuf_t *);
+static void _rmc_dropq(rm_class_t *);
+static mbuf_t *_rmc_getq(rm_class_t *);
+static mbuf_t *_rmc_pollq(rm_class_t *);
+
+static int rmc_under_limit(struct rm_class *, struct timeval *);
+static void rmc_tl_satisfied(struct rm_ifdat *, struct timeval *);
+static void rmc_drop_action(struct rm_class *);
+static void rmc_restart(struct rm_class *);
+static void rmc_root_overlimit(struct rm_class *, struct rm_class *);
+
+#define BORROW_OFFTIME
+/*
+ * BORROW_OFFTIME (experimental):
+ * borrow the offtime of the class borrowing from.
+ * the reason is that when its own offtime is set, the class is unable
+ * to borrow much, especially when cutoff is taking effect.
+ * but when the borrowed class is overloaded (advidle is close to minidle),
+ * use the borrowing class's offtime to avoid overload.
+ */
+#define ADJUST_CUTOFF
+/*
+ * ADJUST_CUTOFF (experimental):
+ * if no underlimit class is found due to cutoff, increase cutoff and
+ * retry the scheduling loop.
+ * also, don't invoke delay_actions while cutoff is taking effect,
+ * since a sleeping class won't have a chance to be scheduled in the
+ * next loop.
+ *
+ * now heuristics for setting the top-level variable (cutoff_) becomes:
+ * 1. if a packet arrives for a not-overlimit class, set cutoff
+ * to the depth of the class.
+ * 2. if cutoff is i, and a packet arrives for an overlimit class
+ * with an underlimit ancestor at a lower level than i (say j),
+ * then set cutoff to j.
+ * 3. at scheduling a packet, if there is no underlimit class
+ * due to the current cutoff level, increase cutoff by 1 and
+ * then try to schedule again.
+ */
+
+/*
+ * rm_class_t *
+ * rmc_newclass(...) - Create a new resource management class at priority
+ * 'pri' on the interface given by 'ifd'.
+ *
+ * nsecPerByte is the data rate of the interface in nanoseconds/byte.
+ * E.g., 800 for a 10Mb/s ethernet. If the class gets less
+ * than 100% of the bandwidth, this number should be the
+ * 'effective' rate for the class. Let f be the
+ * bandwidth fraction allocated to this class, and let
+ * nsPerByte be the data rate of the output link in
+ * nanoseconds/byte. Then nsecPerByte is set to
+ * nsPerByte / f. E.g., 1600 (= 800 / .5)
+ * for a class that gets 50% of an ethernet's bandwidth.
+ *
+ * action the routine to call when the class is over limit.
+ *
+ * maxq max allowable queue size for class (in packets).
+ *
+ * parent parent class pointer.
+ *
+ * borrow class to borrow from (should be either 'parent' or null).
+ *
+ * maxidle max value allowed for class 'idle' time estimate (this
+ * parameter determines how large an initial burst of packets
+ * can be before overlimit action is invoked.
+ *
+ * offtime how long 'delay' action will delay when class goes over
+ * limit (this parameter determines the steady-state burst
+ * size when a class is running over its limit).
+ *
+ * Maxidle and offtime have to be computed from the following: If the
+ * average packet size is s, the bandwidth fraction allocated to this
+ * class is f, we want to allow b packet bursts, and the gain of the
+ * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then:
+ *
+ * ptime = s * nsPerByte * (1 - f) / f
+ * maxidle = ptime * (1 - g^b) / g^b
+ * minidle = -ptime * (1 / (f - 1))
+ * offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1)
+ *
+ * Operationally, it's convenient to specify maxidle & offtime in units
+ * independent of the link bandwidth so the maxidle & offtime passed to
+ * this routine are the above values multiplied by 8*f/(1000*nsPerByte).
+ * (The constant factor is a scale factor needed to make the parameters
+ * integers. This scaling also means that the 'unscaled' values of
+ * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds,
+ * not nanoseconds.) Also note that the 'idle' filter computation keeps
+ * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of
+ * maxidle also must be scaled upward by this value. Thus, the passed
+ * values for maxidle and offtime can be computed as follows:
+ *
+ * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte)
+ * offtime = offtime * 8 / (1000 * nsecPerByte)
+ *
+ * When USE_HRTIME is employed, then maxidle and offtime become:
+ * maxidle = maxilde * (8.0 / nsecPerByte);
+ * offtime = offtime * (8.0 / nsecPerByte);
+ */
+struct rm_class *
+rmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte,
+ void (*action)(rm_class_t *, rm_class_t *), int maxq,
+ struct rm_class *parent, struct rm_class *borrow, u_int maxidle,
+ int minidle, u_int offtime, int pktsize, int flags)
+{
+ struct rm_class *cl;
+ struct rm_class *peer;
+ int s;
+
+ if (pri >= RM_MAXPRIO)
+ return (NULL);
+#ifndef ALTQ_RED
+ if (flags & RMCF_RED) {
+#ifdef ALTQ_DEBUG
+ printf("rmc_newclass: RED not configured for CBQ!\n");
+#endif
+ return (NULL);
+ }
+#endif
+#ifndef ALTQ_RIO
+ if (flags & RMCF_RIO) {
+#ifdef ALTQ_DEBUG
+ printf("rmc_newclass: RIO not configured for CBQ!\n");
+#endif
+ return (NULL);
+ }
+#endif
+#ifndef ALTQ_CODEL
+ if (flags & RMCF_CODEL) {
+#ifdef ALTQ_DEBUG
+ printf("rmc_newclass: CODEL not configured for CBQ!\n");
+#endif
+ return (NULL);
+ }
+#endif
+
+ cl = malloc(sizeof(struct rm_class), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (cl == NULL)
+ return (NULL);
+ CALLOUT_INIT(&cl->callout_);
+ cl->q_ = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (cl->q_ == NULL) {
+ free(cl, M_DEVBUF);
+ return (NULL);
+ }
+
+ /*
+ * Class initialization.
+ */
+ cl->children_ = NULL;
+ cl->parent_ = parent;
+ cl->borrow_ = borrow;
+ cl->leaf_ = 1;
+ cl->ifdat_ = ifd;
+ cl->pri_ = pri;
+ cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
+ cl->depth_ = 0;
+ cl->qthresh_ = 0;
+ cl->ns_per_byte_ = nsecPerByte;
+
+ qlimit(cl->q_) = maxq;
+ qtype(cl->q_) = Q_DROPHEAD;
+ qlen(cl->q_) = 0;
+ cl->flags_ = flags;
+
+#if 1 /* minidle is also scaled in ALTQ */
+ cl->minidle_ = (minidle * (int)nsecPerByte) / 8;
+ if (cl->minidle_ > 0)
+ cl->minidle_ = 0;
+#else
+ cl->minidle_ = minidle;
+#endif
+ cl->maxidle_ = (maxidle * nsecPerByte) / 8;
+ if (cl->maxidle_ == 0)
+ cl->maxidle_ = 1;
+#if 1 /* offtime is also scaled in ALTQ */
+ cl->avgidle_ = cl->maxidle_;
+ cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
+ if (cl->offtime_ == 0)
+ cl->offtime_ = 1;
+#else
+ cl->avgidle_ = 0;
+ cl->offtime_ = (offtime * nsecPerByte) / 8;
+#endif
+ cl->overlimit = action;
+
+#ifdef ALTQ_RED
+ if (flags & (RMCF_RED|RMCF_RIO)) {
+ int red_flags, red_pkttime;
+
+ red_flags = 0;
+ if (flags & RMCF_ECN)
+ red_flags |= REDF_ECN;
+ if (flags & RMCF_FLOWVALVE)
+ red_flags |= REDF_FLOWVALVE;
+#ifdef ALTQ_RIO
+ if (flags & RMCF_CLEARDSCP)
+ red_flags |= RIOF_CLEARDSCP;
+#endif
+ red_pkttime = nsecPerByte * pktsize / 1000;
+
+ if (flags & RMCF_RED) {
+ cl->red_ = red_alloc(0, 0,
+ qlimit(cl->q_) * 10/100,
+ qlimit(cl->q_) * 30/100,
+ red_flags, red_pkttime);
+ if (cl->red_ != NULL)
+ qtype(cl->q_) = Q_RED;
+ }
+#ifdef ALTQ_RIO
+ else {
+ cl->red_ = (red_t *)rio_alloc(0, NULL,
+ red_flags, red_pkttime);
+ if (cl->red_ != NULL)
+ qtype(cl->q_) = Q_RIO;
+ }
+#endif
+ }
+#endif /* ALTQ_RED */
+#ifdef ALTQ_CODEL
+ if (flags & RMCF_CODEL) {
+ cl->codel_ = codel_alloc(5, 100, 0);
+ if (cl->codel_ != NULL)
+ qtype(cl->q_) = Q_CODEL;
+ }
+#endif
+
+ /*
+ * put the class into the class tree
+ */
+ s = splnet();
+ IFQ_LOCK(ifd->ifq_);
+ if ((peer = ifd->active_[pri]) != NULL) {
+ /* find the last class at this pri */
+ cl->peer_ = peer;
+ while (peer->peer_ != ifd->active_[pri])
+ peer = peer->peer_;
+ peer->peer_ = cl;
+ } else {
+ ifd->active_[pri] = cl;
+ cl->peer_ = cl;
+ }
+
+ if (cl->parent_) {
+ cl->next_ = parent->children_;
+ parent->children_ = cl;
+ parent->leaf_ = 0;
+ }
+
+ /*
+ * Compute the depth of this class and its ancestors in the class
+ * hierarchy.
+ */
+ rmc_depth_compute(cl);
+
+ /*
+ * If CBQ's WRR is enabled, then initialize the class WRR state.
+ */
+ if (ifd->wrr_) {
+ ifd->num_[pri]++;
+ ifd->alloc_[pri] += cl->allotment_;
+ rmc_wrr_set_weights(ifd);
+ }
+ IFQ_UNLOCK(ifd->ifq_);
+ splx(s);
+ return (cl);
+}
+
+int
+rmc_modclass(struct rm_class *cl, u_int nsecPerByte, int maxq, u_int maxidle,
+ int minidle, u_int offtime, int pktsize)
+{
+ struct rm_ifdat *ifd;
+ u_int old_allotment;
+ int s;
+
+ ifd = cl->ifdat_;
+ old_allotment = cl->allotment_;
+
+ s = splnet();
+ IFQ_LOCK(ifd->ifq_);
+ cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
+ cl->qthresh_ = 0;
+ cl->ns_per_byte_ = nsecPerByte;
+
+ qlimit(cl->q_) = maxq;
+
+#if 1 /* minidle is also scaled in ALTQ */
+ cl->minidle_ = (minidle * nsecPerByte) / 8;
+ if (cl->minidle_ > 0)
+ cl->minidle_ = 0;
+#else
+ cl->minidle_ = minidle;
+#endif
+ cl->maxidle_ = (maxidle * nsecPerByte) / 8;
+ if (cl->maxidle_ == 0)
+ cl->maxidle_ = 1;
+#if 1 /* offtime is also scaled in ALTQ */
+ cl->avgidle_ = cl->maxidle_;
+ cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
+ if (cl->offtime_ == 0)
+ cl->offtime_ = 1;
+#else
+ cl->avgidle_ = 0;
+ cl->offtime_ = (offtime * nsecPerByte) / 8;
+#endif
+
+ /*
+ * If CBQ's WRR is enabled, then initialize the class WRR state.
+ */
+ if (ifd->wrr_) {
+ ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment;
+ rmc_wrr_set_weights(ifd);
+ }
+ IFQ_UNLOCK(ifd->ifq_);
+ splx(s);
+ return (0);
+}
+
+/*
+ * static void
+ * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes
+ * the appropriate run robin weights for the CBQ weighted round robin
+ * algorithm.
+ *
+ * Returns: NONE
+ */
+
+static void
+rmc_wrr_set_weights(struct rm_ifdat *ifd)
+{
+ int i;
+ struct rm_class *cl, *clh;
+
+ for (i = 0; i < RM_MAXPRIO; i++) {
+ /*
+ * This is inverted from that of the simulator to
+ * maintain precision.
+ */
+ if (ifd->num_[i] == 0)
+ ifd->M_[i] = 0;
+ else
+ ifd->M_[i] = ifd->alloc_[i] /
+ (ifd->num_[i] * ifd->maxpkt_);
+ /*
+ * Compute the weighted allotment for each class.
+ * This takes the expensive div instruction out
+ * of the main loop for the wrr scheduling path.
+ * These only get recomputed when a class comes or
+ * goes.
+ */
+ if (ifd->active_[i] != NULL) {
+ clh = cl = ifd->active_[i];
+ do {
+ /* safe-guard for slow link or alloc_ == 0 */
+ if (ifd->M_[i] == 0)
+ cl->w_allotment_ = 0;
+ else
+ cl->w_allotment_ = cl->allotment_ /
+ ifd->M_[i];
+ cl = cl->peer_;
+ } while ((cl != NULL) && (cl != clh));
+ }
+ }
+}
+
+int
+rmc_get_weight(struct rm_ifdat *ifd, int pri)
+{
+ if ((pri >= 0) && (pri < RM_MAXPRIO))
+ return (ifd->M_[pri]);
+ else
+ return (0);
+}
+
+/*
+ * static void
+ * rmc_depth_compute(struct rm_class *cl) - This function computes the
+ * appropriate depth of class 'cl' and its ancestors.
+ *
+ * Returns: NONE
+ */
+
+static void
+rmc_depth_compute(struct rm_class *cl)
+{
+ rm_class_t *t = cl, *p;
+
+ /*
+ * Recompute the depth for the branch of the tree.
+ */
+ while (t != NULL) {
+ p = t->parent_;
+ if (p && (t->depth_ >= p->depth_)) {
+ p->depth_ = t->depth_ + 1;
+ t = p;
+ } else
+ t = NULL;
+ }
+}
+
+/*
+ * static void
+ * rmc_depth_recompute(struct rm_class *cl) - This function re-computes
+ * the depth of the tree after a class has been deleted.
+ *
+ * Returns: NONE
+ */
+
+static void
+rmc_depth_recompute(rm_class_t *cl)
+{
+#if 1 /* ALTQ */
+ rm_class_t *p, *t;
+
+ p = cl;
+ while (p != NULL) {
+ if ((t = p->children_) == NULL) {
+ p->depth_ = 0;
+ } else {
+ int cdepth = 0;
+
+ while (t != NULL) {
+ if (t->depth_ > cdepth)
+ cdepth = t->depth_;
+ t = t->next_;
+ }
+
+ if (p->depth_ == cdepth + 1)
+ /* no change to this parent */
+ return;
+
+ p->depth_ = cdepth + 1;
+ }
+
+ p = p->parent_;
+ }
+#else
+ rm_class_t *t;
+
+ if (cl->depth_ >= 1) {
+ if (cl->children_ == NULL) {
+ cl->depth_ = 0;
+ } else if ((t = cl->children_) != NULL) {
+ while (t != NULL) {
+ if (t->children_ != NULL)
+ rmc_depth_recompute(t);
+ t = t->next_;
+ }
+ } else
+ rmc_depth_compute(cl);
+ }
+#endif
+}
+
+/*
+ * void
+ * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This
+ * function deletes a class from the link-sharing structure and frees
+ * all resources associated with the class.
+ *
+ * Returns: NONE
+ */
+
+void
+rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl)
+{
+ struct rm_class *p, *head, *previous;
+ int s;
+
+ ASSERT(cl->children_ == NULL);
+
+ if (cl->sleeping_)
+ CALLOUT_STOP(&cl->callout_);
+
+ s = splnet();
+ IFQ_LOCK(ifd->ifq_);
+ /*
+ * Free packets in the packet queue.
+ * XXX - this may not be a desired behavior. Packets should be
+ * re-queued.
+ */
+ rmc_dropall(cl);
+
+ /*
+ * If the class has a parent, then remove the class from the
+ * class from the parent's children chain.
+ */
+ if (cl->parent_ != NULL) {
+ head = cl->parent_->children_;
+ p = previous = head;
+ if (head->next_ == NULL) {
+ ASSERT(head == cl);
+ cl->parent_->children_ = NULL;
+ cl->parent_->leaf_ = 1;
+ } else while (p != NULL) {
+ if (p == cl) {
+ if (cl == head)
+ cl->parent_->children_ = cl->next_;
+ else
+ previous->next_ = cl->next_;
+ cl->next_ = NULL;
+ p = NULL;
+ } else {
+ previous = p;
+ p = p->next_;
+ }
+ }
+ }
+
+ /*
+ * Delete class from class priority peer list.
+ */
+ if ((p = ifd->active_[cl->pri_]) != NULL) {
+ /*
+ * If there is more than one member of this priority
+ * level, then look for class(cl) in the priority level.
+ */
+ if (p != p->peer_) {
+ while (p->peer_ != cl)
+ p = p->peer_;
+ p->peer_ = cl->peer_;
+
+ if (ifd->active_[cl->pri_] == cl)
+ ifd->active_[cl->pri_] = cl->peer_;
+ } else {
+ ASSERT(p == cl);
+ ifd->active_[cl->pri_] = NULL;
+ }
+ }
+
+ /*
+ * Recompute the WRR weights.
+ */
+ if (ifd->wrr_) {
+ ifd->alloc_[cl->pri_] -= cl->allotment_;
+ ifd->num_[cl->pri_]--;
+ rmc_wrr_set_weights(ifd);
+ }
+
+ /*
+ * Re-compute the depth of the tree.
+ */
+#if 1 /* ALTQ */
+ rmc_depth_recompute(cl->parent_);
+#else
+ rmc_depth_recompute(ifd->root_);
+#endif
+
+ IFQ_UNLOCK(ifd->ifq_);
+ splx(s);
+
+ /*
+ * Free the class structure.
+ */
+ if (cl->red_ != NULL) {
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->q_))
+ rio_destroy((rio_t *)cl->red_);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->q_))
+ red_destroy(cl->red_);
+#endif
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->q_))
+ codel_destroy(cl->codel_);
+#endif
+ }
+ free(cl->q_, M_DEVBUF);
+ free(cl, M_DEVBUF);
+}
+
+
+/*
+ * void
+ * rmc_init(...) - Initialize the resource management data structures
+ * associated with the output portion of interface 'ifp'. 'ifd' is
+ * where the structures will be built (for backwards compatibility, the
+ * structures aren't kept in the ifnet struct). 'nsecPerByte'
+ * gives the link speed (inverse of bandwidth) in nanoseconds/byte.
+ * 'restart' is the driver-specific routine that the generic 'delay
+ * until under limit' action will call to restart output. `maxq'
+ * is the queue size of the 'link' & 'default' classes. 'maxqueued'
+ * is the maximum number of packets that the resource management
+ * code will allow to be queued 'downstream' (this is typically 1).
+ *
+ * Returns: NONE
+ */
+
+void
+rmc_init(struct ifaltq *ifq, struct rm_ifdat *ifd, u_int nsecPerByte,
+ void (*restart)(struct ifaltq *), int maxq, int maxqueued, u_int maxidle,
+ int minidle, u_int offtime, int flags)
+{
+ int i, mtu;
+
+ /*
+ * Initialize the CBQ tracing/debug facility.
+ */
+ CBQTRACEINIT();
+
+ bzero((char *)ifd, sizeof (*ifd));
+ mtu = ifq->altq_ifp->if_mtu;
+ ifd->ifq_ = ifq;
+ ifd->restart = restart;
+ ifd->maxqueued_ = maxqueued;
+ ifd->ns_per_byte_ = nsecPerByte;
+ ifd->maxpkt_ = mtu;
+ ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0;
+ ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0;
+#if 1
+ ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16;
+ if (mtu * nsecPerByte > 10 * 1000000)
+ ifd->maxiftime_ /= 4;
+#endif
+
+ reset_cutoff(ifd);
+ CBQTRACE(rmc_init, 'INIT', ifd->cutoff_);
+
+ /*
+ * Initialize the CBQ's WRR state.
+ */
+ for (i = 0; i < RM_MAXPRIO; i++) {
+ ifd->alloc_[i] = 0;
+ ifd->M_[i] = 0;
+ ifd->num_[i] = 0;
+ ifd->na_[i] = 0;
+ ifd->active_[i] = NULL;
+ }
+
+ /*
+ * Initialize current packet state.
+ */
+ ifd->qi_ = 0;
+ ifd->qo_ = 0;
+ for (i = 0; i < RM_MAXQUEUED; i++) {
+ ifd->class_[i] = NULL;
+ ifd->curlen_[i] = 0;
+ ifd->borrowed_[i] = NULL;
+ }
+
+ /*
+ * Create the root class of the link-sharing structure.
+ */
+ if ((ifd->root_ = rmc_newclass(0, ifd,
+ nsecPerByte,
+ rmc_root_overlimit, maxq, 0, 0,
+ maxidle, minidle, offtime,
+ 0, 0)) == NULL) {
+ printf("rmc_init: root class not allocated\n");
+ return ;
+ }
+ ifd->root_->depth_ = 0;
+}
+
+/*
+ * void
+ * rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by
+ * mbuf 'm' to queue for resource class 'cl'. This routine is called
+ * by a driver's if_output routine. This routine must be called with
+ * output packet completion interrupts locked out (to avoid racing with
+ * rmc_dequeue_next).
+ *
+ * Returns: 0 on successful queueing
+ * -1 when packet drop occurs
+ */
+int
+rmc_queue_packet(struct rm_class *cl, mbuf_t *m)
+{
+ struct timeval now;
+ struct rm_ifdat *ifd = cl->ifdat_;
+ int cpri = cl->pri_;
+ int is_empty = qempty(cl->q_);
+
+ RM_GETTIME(now);
+ if (ifd->cutoff_ > 0) {
+ if (TV_LT(&cl->undertime_, &now)) {
+ if (ifd->cutoff_ > cl->depth_)
+ ifd->cutoff_ = cl->depth_;
+ CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_);
+ }
+#if 1 /* ALTQ */
+ else {
+ /*
+ * the class is overlimit. if the class has
+ * underlimit ancestors, set cutoff to the lowest
+ * depth among them.
+ */
+ struct rm_class *borrow = cl->borrow_;
+
+ while (borrow != NULL &&
+ borrow->depth_ < ifd->cutoff_) {
+ if (TV_LT(&borrow->undertime_, &now)) {
+ ifd->cutoff_ = borrow->depth_;
+ CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_);
+ break;
+ }
+ borrow = borrow->borrow_;
+ }
+ }
+#else /* !ALTQ */
+ else if ((ifd->cutoff_ > 1) && cl->borrow_) {
+ if (TV_LT(&cl->borrow_->undertime_, &now)) {
+ ifd->cutoff_ = cl->borrow_->depth_;
+ CBQTRACE(rmc_queue_packet, 'ffob',
+ cl->borrow_->depth_);
+ }
+ }
+#endif /* !ALTQ */
+ }
+
+ if (_rmc_addq(cl, m) < 0)
+ /* failed */
+ return (-1);
+
+ if (is_empty) {
+ CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle);
+ ifd->na_[cpri]++;
+ }
+
+ if (qlen(cl->q_) > qlimit(cl->q_)) {
+ /* note: qlimit can be set to 0 or 1 */
+ rmc_drop_action(cl);
+ return (-1);
+ }
+ return (0);
+}
+
+/*
+ * void
+ * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all
+ * classes to see if there are satified.
+ */
+
+static void
+rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now)
+{
+ int i;
+ rm_class_t *p, *bp;
+
+ for (i = RM_MAXPRIO - 1; i >= 0; i--) {
+ if ((bp = ifd->active_[i]) != NULL) {
+ p = bp;
+ do {
+ if (!rmc_satisfied(p, now)) {
+ ifd->cutoff_ = p->depth_;
+ return;
+ }
+ p = p->peer_;
+ } while (p != bp);
+ }
+ }
+
+ reset_cutoff(ifd);
+}
+
+/*
+ * rmc_satisfied - Return 1 of the class is satisfied. O, otherwise.
+ */
+
+static int
+rmc_satisfied(struct rm_class *cl, struct timeval *now)
+{
+ rm_class_t *p;
+
+ if (cl == NULL)
+ return (1);
+ if (TV_LT(now, &cl->undertime_))
+ return (1);
+ if (cl->depth_ == 0) {
+ if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_))
+ return (0);
+ else
+ return (1);
+ }
+ if (cl->children_ != NULL) {
+ p = cl->children_;
+ while (p != NULL) {
+ if (!rmc_satisfied(p, now))
+ return (0);
+ p = p->next_;
+ }
+ }
+
+ return (1);
+}
+
+/*
+ * Return 1 if class 'cl' is under limit or can borrow from a parent,
+ * 0 if overlimit. As a side-effect, this routine will invoke the
+ * class overlimit action if the class if overlimit.
+ */
+
+static int
+rmc_under_limit(struct rm_class *cl, struct timeval *now)
+{
+ rm_class_t *p = cl;
+ rm_class_t *top;
+ struct rm_ifdat *ifd = cl->ifdat_;
+
+ ifd->borrowed_[ifd->qi_] = NULL;
+ /*
+ * If cl is the root class, then always return that it is
+ * underlimit. Otherwise, check to see if the class is underlimit.
+ */
+ if (cl->parent_ == NULL)
+ return (1);
+
+ if (cl->sleeping_) {
+ if (TV_LT(now, &cl->undertime_))
+ return (0);
+
+ CALLOUT_STOP(&cl->callout_);
+ cl->sleeping_ = 0;
+ cl->undertime_.tv_sec = 0;
+ return (1);
+ }
+
+ top = NULL;
+ while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) {
+ if (((cl = cl->borrow_) == NULL) ||
+ (cl->depth_ > ifd->cutoff_)) {
+#ifdef ADJUST_CUTOFF
+ if (cl != NULL)
+ /* cutoff is taking effect, just
+ return false without calling
+ the delay action. */
+ return (0);
+#endif
+#ifdef BORROW_OFFTIME
+ /*
+ * check if the class can borrow offtime too.
+ * borrow offtime from the top of the borrow
+ * chain if the top class is not overloaded.
+ */
+ if (cl != NULL) {
+ /* cutoff is taking effect, use this class as top. */
+ top = cl;
+ CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_);
+ }
+ if (top != NULL && top->avgidle_ == top->minidle_)
+ top = NULL;
+ p->overtime_ = *now;
+ (p->overlimit)(p, top);
+#else
+ p->overtime_ = *now;
+ (p->overlimit)(p, NULL);
+#endif
+ return (0);
+ }
+ top = cl;
+ }
+
+ if (cl != p)
+ ifd->borrowed_[ifd->qi_] = cl;
+ return (1);
+}
+
+/*
+ * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to
+ * Packet-by-packet round robin.
+ *
+ * The heart of the weighted round-robin scheduler, which decides which
+ * class next gets to send a packet. Highest priority first, then
+ * weighted round-robin within priorites.
+ *
+ * Each able-to-send class gets to send until its byte allocation is
+ * exhausted. Thus, the active pointer is only changed after a class has
+ * exhausted its allocation.
+ *
+ * If the scheduler finds no class that is underlimit or able to borrow,
+ * then the first class found that had a nonzero queue and is allowed to
+ * borrow gets to send.
+ */
+
+static mbuf_t *
+_rmc_wrr_dequeue_next(struct rm_ifdat *ifd, int op)
+{
+ struct rm_class *cl = NULL, *first = NULL;
+ u_int deficit;
+ int cpri;
+ mbuf_t *m;
+ struct timeval now;
+
+ RM_GETTIME(now);
+
+ /*
+ * if the driver polls the top of the queue and then removes
+ * the polled packet, we must return the same packet.
+ */
+ if (op == ALTDQ_REMOVE && ifd->pollcache_) {
+ cl = ifd->pollcache_;
+ cpri = cl->pri_;
+ if (ifd->efficient_) {
+ /* check if this class is overlimit */
+ if (cl->undertime_.tv_sec != 0 &&
+ rmc_under_limit(cl, &now) == 0)
+ first = cl;
+ }
+ ifd->pollcache_ = NULL;
+ goto _wrr_out;
+ }
+ else {
+ /* mode == ALTDQ_POLL || pollcache == NULL */
+ ifd->pollcache_ = NULL;
+ ifd->borrowed_[ifd->qi_] = NULL;
+ }
+#ifdef ADJUST_CUTOFF
+ _again:
+#endif
+ for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
+ if (ifd->na_[cpri] == 0)
+ continue;
+ deficit = 0;
+ /*
+ * Loop through twice for a priority level, if some class
+ * was unable to send a packet the first round because
+ * of the weighted round-robin mechanism.
+ * During the second loop at this level, deficit==2.
+ * (This second loop is not needed if for every class,
+ * "M[cl->pri_])" times "cl->allotment" is greater than
+ * the byte size for the largest packet in the class.)
+ */
+ _wrr_loop:
+ cl = ifd->active_[cpri];
+ ASSERT(cl != NULL);
+ do {
+ if ((deficit < 2) && (cl->bytes_alloc_ <= 0))
+ cl->bytes_alloc_ += cl->w_allotment_;
+ if (!qempty(cl->q_)) {
+ if ((cl->undertime_.tv_sec == 0) ||
+ rmc_under_limit(cl, &now)) {
+ if (cl->bytes_alloc_ > 0 || deficit > 1)
+ goto _wrr_out;
+
+ /* underlimit but no alloc */
+ deficit = 1;
+#if 1
+ ifd->borrowed_[ifd->qi_] = NULL;
+#endif
+ }
+ else if (first == NULL && cl->borrow_ != NULL)
+ first = cl; /* borrowing candidate */
+ }
+
+ cl->bytes_alloc_ = 0;
+ cl = cl->peer_;
+ } while (cl != ifd->active_[cpri]);
+
+ if (deficit == 1) {
+ /* first loop found an underlimit class with deficit */
+ /* Loop on same priority level, with new deficit. */
+ deficit = 2;
+ goto _wrr_loop;
+ }
+ }
+
+#ifdef ADJUST_CUTOFF
+ /*
+ * no underlimit class found. if cutoff is taking effect,
+ * increase cutoff and try again.
+ */
+ if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
+ ifd->cutoff_++;
+ CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_);
+ goto _again;
+ }
+#endif /* ADJUST_CUTOFF */
+ /*
+ * If LINK_EFFICIENCY is turned on, then the first overlimit
+ * class we encounter will send a packet if all the classes
+ * of the link-sharing structure are overlimit.
+ */
+ reset_cutoff(ifd);
+ CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_);
+
+ if (!ifd->efficient_ || first == NULL)
+ return (NULL);
+
+ cl = first;
+ cpri = cl->pri_;
+#if 0 /* too time-consuming for nothing */
+ if (cl->sleeping_)
+ CALLOUT_STOP(&cl->callout_);
+ cl->sleeping_ = 0;
+ cl->undertime_.tv_sec = 0;
+#endif
+ ifd->borrowed_[ifd->qi_] = cl->borrow_;
+ ifd->cutoff_ = cl->borrow_->depth_;
+
+ /*
+ * Deque the packet and do the book keeping...
+ */
+ _wrr_out:
+ if (op == ALTDQ_REMOVE) {
+ m = _rmc_getq(cl);
+ if (m == NULL)
+ panic("_rmc_wrr_dequeue_next");
+ if (qempty(cl->q_))
+ ifd->na_[cpri]--;
+
+ /*
+ * Update class statistics and link data.
+ */
+ if (cl->bytes_alloc_ > 0)
+ cl->bytes_alloc_ -= m_pktlen(m);
+
+ if ((cl->bytes_alloc_ <= 0) || first == cl)
+ ifd->active_[cl->pri_] = cl->peer_;
+ else
+ ifd->active_[cl->pri_] = cl;
+
+ ifd->class_[ifd->qi_] = cl;
+ ifd->curlen_[ifd->qi_] = m_pktlen(m);
+ ifd->now_[ifd->qi_] = now;
+ ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
+ ifd->queued_++;
+ } else {
+ /* mode == ALTDQ_PPOLL */
+ m = _rmc_pollq(cl);
+ ifd->pollcache_ = cl;
+ }
+ return (m);
+}
+
+/*
+ * Dequeue & return next packet from the highest priority class that
+ * has a packet to send & has enough allocation to send it. This
+ * routine is called by a driver whenever it needs a new packet to
+ * output.
+ */
+static mbuf_t *
+_rmc_prr_dequeue_next(struct rm_ifdat *ifd, int op)
+{
+ mbuf_t *m;
+ int cpri;
+ struct rm_class *cl, *first = NULL;
+ struct timeval now;
+
+ RM_GETTIME(now);
+
+ /*
+ * if the driver polls the top of the queue and then removes
+ * the polled packet, we must return the same packet.
+ */
+ if (op == ALTDQ_REMOVE && ifd->pollcache_) {
+ cl = ifd->pollcache_;
+ cpri = cl->pri_;
+ ifd->pollcache_ = NULL;
+ goto _prr_out;
+ } else {
+ /* mode == ALTDQ_POLL || pollcache == NULL */
+ ifd->pollcache_ = NULL;
+ ifd->borrowed_[ifd->qi_] = NULL;
+ }
+#ifdef ADJUST_CUTOFF
+ _again:
+#endif
+ for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
+ if (ifd->na_[cpri] == 0)
+ continue;
+ cl = ifd->active_[cpri];
+ ASSERT(cl != NULL);
+ do {
+ if (!qempty(cl->q_)) {
+ if ((cl->undertime_.tv_sec == 0) ||
+ rmc_under_limit(cl, &now))
+ goto _prr_out;
+ if (first == NULL && cl->borrow_ != NULL)
+ first = cl;
+ }
+ cl = cl->peer_;
+ } while (cl != ifd->active_[cpri]);
+ }
+
+#ifdef ADJUST_CUTOFF
+ /*
+ * no underlimit class found. if cutoff is taking effect, increase
+ * cutoff and try again.
+ */
+ if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
+ ifd->cutoff_++;
+ goto _again;
+ }
+#endif /* ADJUST_CUTOFF */
+ /*
+ * If LINK_EFFICIENCY is turned on, then the first overlimit
+ * class we encounter will send a packet if all the classes
+ * of the link-sharing structure are overlimit.
+ */
+ reset_cutoff(ifd);
+ if (!ifd->efficient_ || first == NULL)
+ return (NULL);
+
+ cl = first;
+ cpri = cl->pri_;
+#if 0 /* too time-consuming for nothing */
+ if (cl->sleeping_)
+ CALLOUT_STOP(&cl->callout_);
+ cl->sleeping_ = 0;
+ cl->undertime_.tv_sec = 0;
+#endif
+ ifd->borrowed_[ifd->qi_] = cl->borrow_;
+ ifd->cutoff_ = cl->borrow_->depth_;
+
+ /*
+ * Deque the packet and do the book keeping...
+ */
+ _prr_out:
+ if (op == ALTDQ_REMOVE) {
+ m = _rmc_getq(cl);
+ if (m == NULL)
+ panic("_rmc_prr_dequeue_next");
+ if (qempty(cl->q_))
+ ifd->na_[cpri]--;
+
+ ifd->active_[cpri] = cl->peer_;
+
+ ifd->class_[ifd->qi_] = cl;
+ ifd->curlen_[ifd->qi_] = m_pktlen(m);
+ ifd->now_[ifd->qi_] = now;
+ ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
+ ifd->queued_++;
+ } else {
+ /* mode == ALTDQ_POLL */
+ m = _rmc_pollq(cl);
+ ifd->pollcache_ = cl;
+ }
+ return (m);
+}
+
+/*
+ * mbuf_t *
+ * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function
+ * is invoked by the packet driver to get the next packet to be
+ * dequeued and output on the link. If WRR is enabled, then the
+ * WRR dequeue next routine will determine the next packet to sent.
+ * Otherwise, packet-by-packet round robin is invoked.
+ *
+ * Returns: NULL, if a packet is not available or if all
+ * classes are overlimit.
+ *
+ * Otherwise, Pointer to the next packet.
+ */
+
+mbuf_t *
+rmc_dequeue_next(struct rm_ifdat *ifd, int mode)
+{
+ if (ifd->queued_ >= ifd->maxqueued_)
+ return (NULL);
+ else if (ifd->wrr_)
+ return (_rmc_wrr_dequeue_next(ifd, mode));
+ else
+ return (_rmc_prr_dequeue_next(ifd, mode));
+}
+
+/*
+ * Update the utilization estimate for the packet that just completed.
+ * The packet's class & the parent(s) of that class all get their
+ * estimators updated. This routine is called by the driver's output-
+ * packet-completion interrupt service routine.
+ */
+
+/*
+ * a macro to approximate "divide by 1000" that gives 0.000999,
+ * if a value has enough effective digits.
+ * (on pentium, mul takes 9 cycles but div takes 46!)
+ */
+#define NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17))
+void
+rmc_update_class_util(struct rm_ifdat *ifd)
+{
+ int idle, avgidle, pktlen;
+ int pkt_time, tidle;
+ rm_class_t *cl, *borrowed;
+ rm_class_t *borrows;
+ struct timeval *nowp;
+
+ /*
+ * Get the most recent completed class.
+ */
+ if ((cl = ifd->class_[ifd->qo_]) == NULL)
+ return;
+
+ pktlen = ifd->curlen_[ifd->qo_];
+ borrowed = ifd->borrowed_[ifd->qo_];
+ borrows = borrowed;
+
+ PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
+
+ /*
+ * Run estimator on class and its ancestors.
+ */
+ /*
+ * rm_update_class_util is designed to be called when the
+ * transfer is completed from a xmit complete interrupt,
+ * but most drivers don't implement an upcall for that.
+ * so, just use estimated completion time.
+ * as a result, ifd->qi_ and ifd->qo_ are always synced.
+ */
+ nowp = &ifd->now_[ifd->qo_];
+ /* get pkt_time (for link) in usec */
+#if 1 /* use approximation */
+ pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_;
+ pkt_time = NSEC_TO_USEC(pkt_time);
+#else
+ pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000;
+#endif
+#if 1 /* ALTQ4PPP */
+ if (TV_LT(nowp, &ifd->ifnow_)) {
+ int iftime;
+
+ /*
+ * make sure the estimated completion time does not go
+ * too far. it can happen when the link layer supports
+ * data compression or the interface speed is set to
+ * a much lower value.
+ */
+ TV_DELTA(&ifd->ifnow_, nowp, iftime);
+ if (iftime+pkt_time < ifd->maxiftime_) {
+ TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
+ } else {
+ TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_);
+ }
+ } else {
+ TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
+ }
+#else
+ if (TV_LT(nowp, &ifd->ifnow_)) {
+ TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
+ } else {
+ TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
+ }
+#endif
+
+ while (cl != NULL) {
+ TV_DELTA(&ifd->ifnow_, &cl->last_, idle);
+ if (idle >= 2000000)
+ /*
+ * this class is idle enough, reset avgidle.
+ * (TV_DELTA returns 2000000 us when delta is large.)
+ */
+ cl->avgidle_ = cl->maxidle_;
+
+ /* get pkt_time (for class) in usec */
+#if 1 /* use approximation */
+ pkt_time = pktlen * cl->ns_per_byte_;
+ pkt_time = NSEC_TO_USEC(pkt_time);
+#else
+ pkt_time = pktlen * cl->ns_per_byte_ / 1000;
+#endif
+ idle -= pkt_time;
+
+ avgidle = cl->avgidle_;
+ avgidle += idle - (avgidle >> RM_FILTER_GAIN);
+ cl->avgidle_ = avgidle;
+
+ /* Are we overlimit ? */
+ if (avgidle <= 0) {
+ CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle);
+#if 1 /* ALTQ */
+ /*
+ * need some lower bound for avgidle, otherwise
+ * a borrowing class gets unbounded penalty.
+ */
+ if (avgidle < cl->minidle_)
+ avgidle = cl->avgidle_ = cl->minidle_;
+#endif
+ /* set next idle to make avgidle 0 */
+ tidle = pkt_time +
+ (((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN);
+ TV_ADD_DELTA(nowp, tidle, &cl->undertime_);
+ ++cl->stats_.over;
+ } else {
+ cl->avgidle_ =
+ (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle;
+ cl->undertime_.tv_sec = 0;
+ if (cl->sleeping_) {
+ CALLOUT_STOP(&cl->callout_);
+ cl->sleeping_ = 0;
+ }
+ }
+
+ if (borrows != NULL) {
+ if (borrows != cl)
+ ++cl->stats_.borrows;
+ else
+ borrows = NULL;
+ }
+ cl->last_ = ifd->ifnow_;
+ cl->last_pkttime_ = pkt_time;
+
+#if 1
+ if (cl->parent_ == NULL) {
+ /* take stats of root class */
+ PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
+ }
+#endif
+
+ cl = cl->parent_;
+ }
+
+ /*
+ * Check to see if cutoff needs to set to a new level.
+ */
+ cl = ifd->class_[ifd->qo_];
+ if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) {
+#if 1 /* ALTQ */
+ if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) {
+ rmc_tl_satisfied(ifd, nowp);
+ CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
+ } else {
+ ifd->cutoff_ = borrowed->depth_;
+ CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
+ }
+#else /* !ALTQ */
+ if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) {
+ reset_cutoff(ifd);
+#ifdef notdef
+ rmc_tl_satisfied(ifd, &now);
+#endif
+ CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
+ } else {
+ ifd->cutoff_ = borrowed->depth_;
+ CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
+ }
+#endif /* !ALTQ */
+ }
+
+ /*
+ * Release class slot
+ */
+ ifd->borrowed_[ifd->qo_] = NULL;
+ ifd->class_[ifd->qo_] = NULL;
+ ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_;
+ ifd->queued_--;
+}
+
+/*
+ * void
+ * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific)
+ * over-limit action routines. These get invoked by rmc_under_limit()
+ * if a class with packets to send if over its bandwidth limit & can't
+ * borrow from a parent class.
+ *
+ * Returns: NONE
+ */
+
+static void
+rmc_drop_action(struct rm_class *cl)
+{
+ struct rm_ifdat *ifd = cl->ifdat_;
+
+ ASSERT(qlen(cl->q_) > 0);
+ _rmc_dropq(cl);
+ if (qempty(cl->q_))
+ ifd->na_[cl->pri_]--;
+}
+
+void rmc_dropall(struct rm_class *cl)
+{
+ struct rm_ifdat *ifd = cl->ifdat_;
+
+ if (!qempty(cl->q_)) {
+ _flushq(cl->q_);
+
+ ifd->na_[cl->pri_]--;
+ }
+}
+
+#if (__FreeBSD_version > 300000)
+/* hzto() is removed from FreeBSD-3.0 */
+static int hzto(struct timeval *);
+
+static int
+hzto(tv)
+ struct timeval *tv;
+{
+ struct timeval t2;
+
+ getmicrotime(&t2);
+ t2.tv_sec = tv->tv_sec - t2.tv_sec;
+ t2.tv_usec = tv->tv_usec - t2.tv_usec;
+ return (tvtohz(&t2));
+}
+#endif /* __FreeBSD_version > 300000 */
+
+/*
+ * void
+ * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ
+ * delay action routine. It is invoked via rmc_under_limit when the
+ * packet is discoverd to be overlimit.
+ *
+ * If the delay action is result of borrow class being overlimit, then
+ * delay for the offtime of the borrowing class that is overlimit.
+ *
+ * Returns: NONE
+ */
+
+void
+rmc_delay_action(struct rm_class *cl, struct rm_class *borrow)
+{
+ int delay, t, extradelay;
+
+ cl->stats_.overactions++;
+ TV_DELTA(&cl->undertime_, &cl->overtime_, delay);
+#ifndef BORROW_OFFTIME
+ delay += cl->offtime_;
+#endif
+
+ if (!cl->sleeping_) {
+ CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle);
+#ifdef BORROW_OFFTIME
+ if (borrow != NULL)
+ extradelay = borrow->offtime_;
+ else
+#endif
+ extradelay = cl->offtime_;
+
+#ifdef ALTQ
+ /*
+ * XXX recalculate suspend time:
+ * current undertime is (tidle + pkt_time) calculated
+ * from the last transmission.
+ * tidle: time required to bring avgidle back to 0
+ * pkt_time: target waiting time for this class
+ * we need to replace pkt_time by offtime
+ */
+ extradelay -= cl->last_pkttime_;
+#endif
+ if (extradelay > 0) {
+ TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_);
+ delay += extradelay;
+ }
+
+ cl->sleeping_ = 1;
+ cl->stats_.delays++;
+
+ /*
+ * Since packets are phased randomly with respect to the
+ * clock, 1 tick (the next clock tick) can be an arbitrarily
+ * short time so we have to wait for at least two ticks.
+ * NOTE: If there's no other traffic, we need the timer as
+ * a 'backstop' to restart this class.
+ */
+ if (delay > tick * 2) {
+ /* FreeBSD rounds up the tick */
+ t = hzto(&cl->undertime_);
+ } else
+ t = 2;
+ CALLOUT_RESET(&cl->callout_, t,
+ (timeout_t *)rmc_restart, (caddr_t)cl);
+ }
+}
+
+/*
+ * void
+ * rmc_restart() - is just a helper routine for rmc_delay_action -- it is
+ * called by the system timer code & is responsible checking if the
+ * class is still sleeping (it might have been restarted as a side
+ * effect of the queue scan on a packet arrival) and, if so, restarting
+ * output for the class. Inspecting the class state & restarting output
+ * require locking the class structure. In general the driver is
+ * responsible for locking but this is the only routine that is not
+ * called directly or indirectly from the interface driver so it has
+ * know about system locking conventions. Under bsd, locking is done
+ * by raising IPL to splimp so that's what's implemented here. On a
+ * different system this would probably need to be changed.
+ *
+ * Returns: NONE
+ */
+
+static void
+rmc_restart(struct rm_class *cl)
+{
+ struct rm_ifdat *ifd = cl->ifdat_;
+ int s;
+
+ s = splnet();
+ IFQ_LOCK(ifd->ifq_);
+ if (cl->sleeping_) {
+ cl->sleeping_ = 0;
+ cl->undertime_.tv_sec = 0;
+
+ if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) {
+ CBQTRACE(rmc_restart, 'trts', cl->stats_.handle);
+ (ifd->restart)(ifd->ifq_);
+ }
+ }
+ IFQ_UNLOCK(ifd->ifq_);
+ splx(s);
+}
+
+/*
+ * void
+ * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit
+ * handling routine for the root class of the link sharing structure.
+ *
+ * Returns: NONE
+ */
+
+static void
+rmc_root_overlimit(struct rm_class *cl, struct rm_class *borrow)
+{
+ panic("rmc_root_overlimit");
+}
+
+/*
+ * Packet Queue handling routines. Eventually, this is to localize the
+ * effects on the code whether queues are red queues or droptail
+ * queues.
+ */
+
+static int
+_rmc_addq(rm_class_t *cl, mbuf_t *m)
+{
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->q_))
+ return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->q_))
+ return red_addq(cl->red_, cl->q_, m, cl->pktattr_);
+#endif /* ALTQ_RED */
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->q_))
+ return codel_addq(cl->codel_, cl->q_, m);
+#endif
+
+ if (cl->flags_ & RMCF_CLEARDSCP)
+ write_dsfield(m, cl->pktattr_, 0);
+
+ _addq(cl->q_, m);
+ return (0);
+}
+
+/* note: _rmc_dropq is not called for red */
+static void
+_rmc_dropq(rm_class_t *cl)
+{
+ mbuf_t *m;
+
+ if ((m = _getq(cl->q_)) != NULL)
+ m_freem(m);
+}
+
+static mbuf_t *
+_rmc_getq(rm_class_t *cl)
+{
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->q_))
+ return rio_getq((rio_t *)cl->red_, cl->q_);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->q_))
+ return red_getq(cl->red_, cl->q_);
+#endif
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->q_))
+ return codel_getq(cl->codel_, cl->q_);
+#endif
+ return _getq(cl->q_);
+}
+
+static mbuf_t *
+_rmc_pollq(rm_class_t *cl)
+{
+ return qhead(cl->q_);
+}
+
+#ifdef CBQ_TRACE
+
+struct cbqtrace cbqtrace_buffer[NCBQTRACE+1];
+struct cbqtrace *cbqtrace_ptr = NULL;
+int cbqtrace_count;
+
+/*
+ * DDB hook to trace cbq events:
+ * the last 1024 events are held in a circular buffer.
+ * use "call cbqtrace_dump(N)" to display 20 events from Nth event.
+ */
+void cbqtrace_dump(int);
+static char *rmc_funcname(void *);
+
+static struct rmc_funcs {
+ void *func;
+ char *name;
+} rmc_funcs[] =
+{
+ rmc_init, "rmc_init",
+ rmc_queue_packet, "rmc_queue_packet",
+ rmc_under_limit, "rmc_under_limit",
+ rmc_update_class_util, "rmc_update_class_util",
+ rmc_delay_action, "rmc_delay_action",
+ rmc_restart, "rmc_restart",
+ _rmc_wrr_dequeue_next, "_rmc_wrr_dequeue_next",
+ NULL, NULL
+};
+
+static char *rmc_funcname(void *func)
+{
+ struct rmc_funcs *fp;
+
+ for (fp = rmc_funcs; fp->func != NULL; fp++)
+ if (fp->func == func)
+ return (fp->name);
+ return ("unknown");
+}
+
+void cbqtrace_dump(int counter)
+{
+ int i, *p;
+ char *cp;
+
+ counter = counter % NCBQTRACE;
+ p = (int *)&cbqtrace_buffer[counter];
+
+ for (i=0; i<20; i++) {
+ printf("[0x%x] ", *p++);
+ printf("%s: ", rmc_funcname((void *)*p++));
+ cp = (char *)p++;
+ printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]);
+ printf("%d\n",*p++);
+
+ if (p >= (int *)&cbqtrace_buffer[NCBQTRACE])
+ p = (int *)cbqtrace_buffer;
+ }
+}
+#endif /* CBQ_TRACE */
+#endif /* ALTQ_CBQ */
+
+#if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || \
+ defined(ALTQ_HFSC) || defined(ALTQ_PRIQ) || defined(ALTQ_CODEL)
+#if !defined(__GNUC__) || defined(ALTQ_DEBUG)
+
+void
+_addq(class_queue_t *q, mbuf_t *m)
+{
+ mbuf_t *m0;
+
+ if ((m0 = qtail(q)) != NULL)
+ m->m_nextpkt = m0->m_nextpkt;
+ else
+ m0 = m;
+ m0->m_nextpkt = m;
+ qtail(q) = m;
+ qlen(q)++;
+}
+
+mbuf_t *
+_getq(class_queue_t *q)
+{
+ mbuf_t *m, *m0;
+
+ if ((m = qtail(q)) == NULL)
+ return (NULL);
+ if ((m0 = m->m_nextpkt) != m)
+ m->m_nextpkt = m0->m_nextpkt;
+ else {
+ ASSERT(qlen(q) == 1);
+ qtail(q) = NULL;
+ }
+ qlen(q)--;
+ m0->m_nextpkt = NULL;
+ return (m0);
+}
+
+/* drop a packet at the tail of the queue */
+mbuf_t *
+_getq_tail(class_queue_t *q)
+{
+ mbuf_t *m, *m0, *prev;
+
+ if ((m = m0 = qtail(q)) == NULL)
+ return NULL;
+ do {
+ prev = m0;
+ m0 = m0->m_nextpkt;
+ } while (m0 != m);
+ prev->m_nextpkt = m->m_nextpkt;
+ if (prev == m) {
+ ASSERT(qlen(q) == 1);
+ qtail(q) = NULL;
+ } else
+ qtail(q) = prev;
+ qlen(q)--;
+ m->m_nextpkt = NULL;
+ return (m);
+}
+
+/* randomly select a packet in the queue */
+mbuf_t *
+_getq_random(class_queue_t *q)
+{
+ struct mbuf *m;
+ int i, n;
+
+ if ((m = qtail(q)) == NULL)
+ return NULL;
+ if (m->m_nextpkt == m) {
+ ASSERT(qlen(q) == 1);
+ qtail(q) = NULL;
+ } else {
+ struct mbuf *prev = NULL;
+
+ n = arc4random() % qlen(q) + 1;
+ for (i = 0; i < n; i++) {
+ prev = m;
+ m = m->m_nextpkt;
+ }
+ prev->m_nextpkt = m->m_nextpkt;
+ if (m == qtail(q))
+ qtail(q) = prev;
+ }
+ qlen(q)--;
+ m->m_nextpkt = NULL;
+ return (m);
+}
+
+void
+_removeq(class_queue_t *q, mbuf_t *m)
+{
+ mbuf_t *m0, *prev;
+
+ m0 = qtail(q);
+ do {
+ prev = m0;
+ m0 = m0->m_nextpkt;
+ } while (m0 != m);
+ prev->m_nextpkt = m->m_nextpkt;
+ if (prev == m)
+ qtail(q) = NULL;
+ else if (qtail(q) == m)
+ qtail(q) = prev;
+ qlen(q)--;
+}
+
+void
+_flushq(class_queue_t *q)
+{
+ mbuf_t *m;
+
+ while ((m = _getq(q)) != NULL)
+ m_freem(m);
+ ASSERT(qlen(q) == 0);
+}
+
+#endif /* !__GNUC__ || ALTQ_DEBUG */
+#endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */