summaryrefslogtreecommitdiffstats
path: root/netinet
diff options
context:
space:
mode:
authorVijay Kumar Banerjee <vijay@rtems.org>2021-02-03 19:46:50 -0700
committerVijay Kumar Banerjee <vijay@rtems.org>2021-02-24 19:01:17 -0700
commitfbc7459c2c3a86a4d94e19463417b759900ff51c (patch)
tree19d5451b8cee3d0c51a432c23bd598eafe9a53b0 /netinet
downloadrtems-net-legacy-fbc7459c2c3a86a4d94e19463417b759900ff51c.tar.bz2
Initial Commit: Add all files from RTEMS libnetworking directory
Diffstat (limited to 'netinet')
-rw-r--r--netinet/icmp_var.h81
-rw-r--r--netinet/if_ether.c644
-rw-r--r--netinet/if_ether.h176
-rw-r--r--netinet/igmp.c473
-rw-r--r--netinet/igmp.h97
-rw-r--r--netinet/igmp_var.h105
-rw-r--r--netinet/in.c711
-rw-r--r--netinet/in_cksum.c186
-rw-r--r--netinet/in_cksum_arm.h236
-rw-r--r--netinet/in_cksum_i386.h202
-rw-r--r--netinet/in_cksum_m68k.h221
-rw-r--r--netinet/in_cksum_nios2.h248
-rw-r--r--netinet/in_cksum_powerpc.h172
-rw-r--r--netinet/in_cksum_sparc.h269
-rw-r--r--netinet/in_pcb.c733
-rw-r--r--netinet/in_pcb.h152
-rw-r--r--netinet/in_proto.c217
-rw-r--r--netinet/in_rmx.c386
-rw-r--r--netinet/in_systm.h58
-rw-r--r--netinet/in_var.h240
-rw-r--r--netinet/ip.h230
-rw-r--r--netinet/ip_divert.c387
-rw-r--r--netinet/ip_fw.c1082
-rw-r--r--netinet/ip_fw.h183
-rw-r--r--netinet/ip_icmp.c771
-rw-r--r--netinet/ip_icmp.h212
-rw-r--r--netinet/ip_input.c1510
-rw-r--r--netinet/ip_mroute.c2232
-rw-r--r--netinet/ip_mroute.h271
-rw-r--r--netinet/ip_output.c1336
-rw-r--r--netinet/ip_var.h215
-rw-r--r--netinet/raw_ip.c492
-rw-r--r--netinet/tcp_debug.c172
-rw-r--r--netinet/tcp_debug.h69
-rw-r--r--netinet/tcp_fsm.h91
-rw-r--r--netinet/tcp_input.c2151
-rw-r--r--netinet/tcp_output.c757
-rw-r--r--netinet/tcp_seq.h99
-rw-r--r--netinet/tcp_subr.c729
-rw-r--r--netinet/tcp_timer.c385
-rw-r--r--netinet/tcp_timer.h133
-rw-r--r--netinet/tcp_usrreq.c845
-rw-r--r--netinet/tcp_var.h414
-rw-r--r--netinet/tcpip.h67
-rw-r--r--netinet/udp.h50
-rw-r--r--netinet/udp_usrreq.c736
-rw-r--r--netinet/udp_var.h106
47 files changed, 21332 insertions, 0 deletions
diff --git a/netinet/icmp_var.h b/netinet/icmp_var.h
new file mode 100644
index 0000000..31eea3b
--- /dev/null
+++ b/netinet/icmp_var.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)icmp_var.h 8.1 (Berkeley) 6/10/93
+ */
+
+#ifndef _NETINET_ICMP_VAR_H_
+#define _NETINET_ICMP_VAR_H_
+
+#include <netinet/ip_icmp.h> /* ICMP_MAXTYPE */
+
+/*
+ * Variables related to this implementation
+ * of the internet control message protocol.
+ */
+struct icmpstat {
+/* statistics related to icmp packets generated */
+ u_long icps_error; /* # of calls to icmp_error */
+ u_long icps_oldshort; /* no error 'cuz old ip too short */
+ u_long icps_oldicmp; /* no error 'cuz old was icmp */
+ u_long icps_outhist[ICMP_MAXTYPE + 1];
+/* statistics related to input messages processed */
+ u_long icps_badcode; /* icmp_code out of range */
+ u_long icps_tooshort; /* packet < ICMP_MINLEN */
+ u_long icps_checksum; /* bad checksum */
+ u_long icps_badlen; /* calculated bound mismatch */
+ u_long icps_reflect; /* number of responses */
+ u_long icps_inhist[ICMP_MAXTYPE + 1];
+ u_long icps_allecho; /* all echo requests dropped */
+ u_long icps_bmcastecho; /* b/mcast echo requests dropped */
+ u_long icps_bmcasttstamp; /* b/mcast tstamp requests dropped */
+};
+
+/*
+ * Names for ICMP sysctl objects
+ */
+#define ICMPCTL_MASKREPL 1 /* allow replies to netmask requests */
+#define ICMPCTL_STATS 2 /* statistics (read-only) */
+#define ICMPCTL_MAXID 3
+
+#define ICMPCTL_NAMES { \
+ { 0, 0 }, \
+ { "maskrepl", CTLTYPE_INT }, \
+ { "stats", CTLTYPE_STRUCT }, \
+}
+
+#ifdef _KERNEL
+SYSCTL_DECL(_net_inet_icmp);
+extern struct icmpstat icmpstat;
+#endif
+
+#endif
diff --git a/netinet/if_ether.c b/netinet/if_ether.c
new file mode 100644
index 0000000..e068674
--- /dev/null
+++ b/netinet/if_ether.c
@@ -0,0 +1,644 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)if_ether.c 8.1 (Berkeley) 6/10/93
+ * $FreeBSD: src/sys/netinet/if_ether.c,v 1.136 2005/03/13 11:23:22 glebius Exp $
+ */
+
+
+/*
+ * Ethernet address resolution protocol.
+ * TODO:
+ * add "inuse/lock" bit (or ref. count) along with valid bit
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "opt_inet.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/syslog.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+#include <net/route.h>
+#include <net/netisr.h>
+
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet/if_ether.h>
+
+#define SIN(s) ((struct sockaddr_in *)s)
+#define SDL(s) ((struct sockaddr_dl *)s)
+
+SYSCTL_DECL(_net_link_ether);
+SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, "");
+
+/* timer values */
+static int arpt_prune = (5*60*1); /* walk list every 5 minutes */
+static int arpt_keep = (20*60); /* once resolved, good for 20 more minutes */
+static int arpt_down = 20; /* once declared down, don't send for 20 sec */
+
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, prune_intvl, CTLFLAG_RW,
+ &arpt_prune, 0, "");
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_RW,
+ &arpt_keep, 0, "");
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, host_down_time, CTLFLAG_RW,
+ &arpt_down, 0, "");
+
+#define rt_expire rt_rmx.rmx_expire
+
+struct llinfo_arp {
+ LIST_ENTRY(llinfo_arp) la_le;
+ struct rtentry *la_rt;
+ struct mbuf *la_hold; /* last packet until resolved/timeout */
+ long la_asked; /* last time we QUERIED for this addr */
+#define la_timer la_rt->rt_rmx.rmx_expire /* deletion time in seconds */
+};
+
+static LIST_HEAD(, llinfo_arp) llinfo_arp;
+
+struct ifqueue arpintrq = {0, 0, 0, 50, 0};
+static int arp_inuse, arp_allocated;
+
+static int arp_maxtries = 5;
+static int useloopback = 1; /* use loopback interface for local traffic */
+static int arp_proxyall = 0;
+
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, maxtries, CTLFLAG_RW,
+ &arp_maxtries, 0, "");
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, useloopback, CTLFLAG_RW,
+ &useloopback, 0, "");
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_RW,
+ &arp_proxyall, 0, "");
+
+static void arp_rtrequest(int, struct rtentry *, struct sockaddr *);
+static void arprequest(struct arpcom *, struct in_addr *, struct in_addr *, u_char *);
+void arpintr(void);
+static void arptfree(struct llinfo_arp *);
+static void arptimer(void *);
+static struct llinfo_arp
+ *arplookup(u_long, int, int);
+#ifdef INET
+static void in_arpinput(struct mbuf *);
+#endif
+
+/*
+ * Timeout routine. Age arp_tab entries periodically.
+ */
+/* ARGSUSED */
+static void
+arptimer(void *ignored_arg)
+{
+ int s = splnet();
+ struct llinfo_arp *la, *ola;
+
+ la = llinfo_arp.lh_first;
+ timeout(arptimer, (caddr_t)0, arpt_prune * hz);
+ while ((ola = la) != 0) {
+ register struct rtentry *rt = la->la_rt;
+ la = la->la_le.le_next;
+ if (rt->rt_expire && rt->rt_expire <= rtems_bsdnet_seconds_since_boot())
+ arptfree(ola); /* timer has expired, clear */
+ }
+ splx(s);
+}
+
+/*
+ * Parallel to llc_rtrequest.
+ */
+static void
+arp_rtrequest(int req, struct rtentry *rt, struct sockaddr *sa)
+{
+ struct sockaddr *gate;
+ struct llinfo_arp *la;
+ static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK, 0, 0, 0, 0, 0, { 0 } };
+ static int arpinit_done;
+
+ if (!arpinit_done) {
+ arpinit_done = 1;
+ LIST_INIT(&llinfo_arp);
+ timeout(arptimer, (caddr_t)0, hz);
+ }
+ if (rt->rt_flags & RTF_GATEWAY)
+ return;
+ gate = rt->rt_gateway;
+ la = (struct llinfo_arp *)rt->rt_llinfo;
+ switch (req) {
+
+ case RTM_ADD:
+ /*
+ * XXX: If this is a manually added route to interface
+ * such as older version of routed or gated might provide,
+ * restore cloning bit.
+ */
+ if ((rt->rt_flags & RTF_HOST) == 0 &&
+ rt_mask(rt) != NULL &&
+ SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff)
+ rt->rt_flags |= RTF_CLONING;
+ if (rt->rt_flags & RTF_CLONING) {
+ /*
+ * Case 1: This route should come from a route to iface.
+ */
+ rt_setgate(rt, rt_key(rt),
+ (struct sockaddr *)&null_sdl);
+ gate = rt->rt_gateway;
+ SDL(gate)->sdl_type = rt->rt_ifp->if_type;
+ SDL(gate)->sdl_index = rt->rt_ifp->if_index;
+ rt->rt_expire = rtems_bsdnet_seconds_since_boot();
+ break;
+ }
+ /* Announce a new entry if requested. */
+ if (rt->rt_flags & RTF_ANNOUNCE)
+ arprequest((struct arpcom *)rt->rt_ifp,
+ &SIN(rt_key(rt))->sin_addr,
+ &SIN(rt_key(rt))->sin_addr,
+ (u_char *)LLADDR(SDL(gate)));
+ /*FALLTHROUGH*/
+ case RTM_RESOLVE:
+ if (gate->sa_family != AF_LINK ||
+ gate->sa_len < sizeof(null_sdl)) {
+ log(LOG_DEBUG, "arp_rtrequest: bad gateway value\n");
+ break;
+ }
+ SDL(gate)->sdl_type = rt->rt_ifp->if_type;
+ SDL(gate)->sdl_index = rt->rt_ifp->if_index;
+ if (la != 0)
+ break; /* This happens on a route change */
+ /*
+ * Case 2: This route may come from cloning, or a manual route
+ * add with a LL address.
+ */
+ R_Malloc(la, struct llinfo_arp *, sizeof(*la));
+ rt->rt_llinfo = (caddr_t)la;
+ if (la == 0) {
+ log(LOG_DEBUG, "arp_rtrequest: malloc failed\n");
+ break;
+ }
+ arp_inuse++;
+ arp_allocated++;
+ Bzero(la, sizeof(*la));
+ la->la_rt = rt;
+ rt->rt_flags |= RTF_LLINFO;
+ LIST_INSERT_HEAD(&llinfo_arp, la, la_le);
+
+ /*
+ * This keeps the multicast addresses from showing up
+ * in `arp -a' listings as unresolved. It's not actually
+ * functional. Then the same for broadcast.
+ */
+ if (IN_MULTICAST(ntohl(SIN(rt_key(rt))->sin_addr.s_addr))) {
+ ETHER_MAP_IP_MULTICAST(&SIN(rt_key(rt))->sin_addr,
+ LLADDR(SDL(gate)));
+ SDL(gate)->sdl_alen = 6;
+ rt->rt_expire = 0;
+ }
+ if (in_broadcast(SIN(rt_key(rt))->sin_addr, rt->rt_ifp)) {
+ memcpy(LLADDR(SDL(gate)), etherbroadcastaddr, 6);
+ SDL(gate)->sdl_alen = 6;
+ rt->rt_expire = 0;
+ }
+
+ if (SIN(rt_key(rt))->sin_addr.s_addr ==
+ (IA_SIN(rt->rt_ifa))->sin_addr.s_addr) {
+ /*
+ * This test used to be
+ * if (loif.if_flags & IFF_UP)
+ * It allowed local traffic to be forced
+ * through the hardware by configuring the loopback down.
+ * However, it causes problems during network configuration
+ * for boards that can't receive packets they send.
+ * It is now necessary to clear "useloopback" and remove
+ * the route to force traffic out to the hardware.
+ */
+ rt->rt_expire = 0;
+ Bcopy(((struct arpcom *)rt->rt_ifp)->ac_enaddr,
+ LLADDR(SDL(gate)), SDL(gate)->sdl_alen = 6);
+ if (useloopback)
+ rt->rt_ifp = loif;
+
+ }
+ break;
+
+ case RTM_DELETE:
+ if (la == 0)
+ break;
+ arp_inuse--;
+ LIST_REMOVE(la, la_le);
+ rt->rt_llinfo = 0;
+ rt->rt_flags &= ~RTF_LLINFO;
+ if (la->la_hold)
+ m_freem(la->la_hold);
+ Free((caddr_t)la);
+ }
+}
+
+/*
+ * Broadcast an ARP request. Caller specifies:
+ * - arp header source ip address
+ * - arp header target ip address
+ * - arp header source ethernet address
+ */
+static void
+arprequest(struct arpcom *ac, struct in_addr *sip, struct in_addr *tip, u_char *enaddr)
+{
+ struct mbuf *m;
+ struct ether_header *eh;
+ struct ether_arp *ea;
+ struct sockaddr sa;
+
+ if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
+ return;
+ m->m_len = sizeof(*ea);
+ m->m_pkthdr.len = sizeof(*ea);
+ MH_ALIGN(m, sizeof(*ea));
+ ea = mtod(m, struct ether_arp *);
+ eh = (struct ether_header *)sa.sa_data;
+ bzero((caddr_t)ea, sizeof (*ea));
+ (void)memcpy(eh->ether_dhost, etherbroadcastaddr, sizeof(eh->ether_dhost));
+ eh->ether_type = htons(ETHERTYPE_ARP); /* if_output will not swap */
+ ea->arp_hrd = htons(ARPHRD_ETHER);
+ ea->arp_pro = htons(ETHERTYPE_IP);
+ ea->arp_hln = sizeof(ea->arp_sha); /* hardware address length */
+ ea->arp_pln = sizeof(ea->arp_spa); /* protocol address length */
+ ea->arp_op = htons(ARPOP_REQUEST);
+ (void)memcpy(ea->arp_sha, enaddr, sizeof(ea->arp_sha));
+ (void)memcpy(ea->arp_spa, sip, sizeof(ea->arp_spa));
+ (void)memcpy(ea->arp_tpa, tip, sizeof(ea->arp_tpa));
+ sa.sa_family = AF_UNSPEC;
+ sa.sa_len = sizeof(sa);
+ (*ac->ac_if.if_output)(&ac->ac_if, m, &sa, (struct rtentry *)0);
+}
+
+/*
+ * Resolve an IP address into an ethernet address. If success,
+ * desten is filled in. If there is no entry in arptab,
+ * set one up and broadcast a request for the IP address.
+ * Hold onto this mbuf and resend it once the address
+ * is finally resolved. A return value of 1 indicates
+ * that desten has been filled in and the packet should be sent
+ * normally; a 0 return indicates that the packet has been
+ * taken over here, either now or for later transmission.
+ */
+int
+arpresolve(
+ struct arpcom *ac,
+ struct rtentry *rt,
+ struct mbuf *m,
+ struct sockaddr *dst,
+ u_char *desten,
+ struct rtentry *rt0)
+{
+ struct llinfo_arp *la = 0;
+ struct sockaddr_dl *sdl;
+
+ if (m->m_flags & M_BCAST) { /* broadcast */
+ (void)memcpy(desten, etherbroadcastaddr, sizeof(etherbroadcastaddr));
+ return (1);
+ }
+ if (m->m_flags & M_MCAST) { /* multicast */
+ ETHER_MAP_IP_MULTICAST(&SIN(dst)->sin_addr, desten);
+ return(1);
+ }
+ if (rt)
+ la = (struct llinfo_arp *)rt->rt_llinfo;
+ else {
+ la = arplookup(SIN(dst)->sin_addr.s_addr, 1, 0);
+ if (la)
+ rt = la->la_rt;
+ }
+ if (la == 0 || rt == 0) {
+ char addrbuf[INET_ADDRSTRLEN];
+ log(LOG_DEBUG, "arpresolve: can't allocate llinfo for %s\n",
+ inet_ntoa_r(SIN(dst)->sin_addr, addrbuf));
+ m_freem(m);
+ return (0);
+ }
+ sdl = SDL(rt->rt_gateway);
+ /*
+ * Check the address family and length is valid, the address
+ * is resolved; otherwise, try to resolve.
+ */
+ if ((rt->rt_expire == 0 || rt->rt_expire > rtems_bsdnet_seconds_since_boot()) &&
+ sdl->sdl_family == AF_LINK && sdl->sdl_alen != 0) {
+ bcopy(LLADDR(sdl), desten, sdl->sdl_alen);
+ return 1;
+ }
+ /*
+ * There is an arptab entry, but no ethernet address
+ * response yet. Replace the held mbuf with this
+ * latest one.
+ */
+ if (la->la_hold)
+ m_freem(la->la_hold);
+ la->la_hold = m;
+ if (rt->rt_expire) {
+ rt->rt_flags &= ~RTF_REJECT;
+ if (la->la_asked == 0 || rt->rt_expire != rtems_bsdnet_seconds_since_boot()) {
+ rt->rt_expire = rtems_bsdnet_seconds_since_boot();
+ if (la->la_asked++ < arp_maxtries)
+ arprequest(ac,
+ &(SIN(rt->rt_ifa->ifa_addr)->sin_addr),
+ &(SIN(dst)->sin_addr),
+ ac->ac_enaddr);
+ else {
+ rt->rt_flags |= RTF_REJECT;
+ rt->rt_expire += arpt_down;
+ la->la_asked = 0;
+ }
+
+ }
+ }
+ return (0);
+}
+
+/*
+ * Common length and type checks are done here,
+ * then the protocol-specific routine is called.
+ */
+void
+arpintr(void)
+{
+ struct mbuf *m;
+ struct arphdr *ar;
+ int s;
+
+ while (arpintrq.ifq_head) {
+ s = splimp();
+ IF_DEQUEUE(&arpintrq, m);
+ splx(s);
+ if (m == 0 || (m->m_flags & M_PKTHDR) == 0)
+ panic("arpintr");
+ if (m->m_len >= sizeof(struct arphdr) &&
+ (ar = mtod(m, struct arphdr *)) &&
+ ntohs(ar->ar_hrd) == ARPHRD_ETHER &&
+ m->m_len >=
+ sizeof(struct arphdr) + 2 * ar->ar_hln + 2 * ar->ar_pln)
+
+ switch (ntohs(ar->ar_pro)) {
+
+ case ETHERTYPE_IP:
+ in_arpinput(m);
+ continue;
+ }
+ m_freem(m);
+ }
+}
+
+NETISR_SET(NETISR_ARP, arpintr);
+
+/*
+ * ARP for Internet protocols on 10 Mb/s Ethernet.
+ * Algorithm is that given in RFC 826.
+ * In addition, a sanity check is performed on the sender
+ * protocol address, to catch impersonators.
+ * We no longer handle negotiations for use of trailer protocol:
+ * Formerly, ARP replied for protocol type ETHERTYPE_TRAIL sent
+ * along with IP replies if we wanted trailers sent to us,
+ * and also sent them in response to IP replies.
+ * This allowed either end to announce the desire to receive
+ * trailer packets.
+ * We no longer reply to requests for ETHERTYPE_TRAIL protocol either,
+ * but formerly didn't normally send requests.
+ */
+static void
+in_arpinput(struct mbuf *m)
+{
+ struct ether_arp *ea;
+ struct arpcom *ac = (struct arpcom *)m->m_pkthdr.rcvif;
+ struct ether_header *eh;
+ struct llinfo_arp *la = 0;
+ struct rtentry *rt;
+ struct in_ifaddr *ia;
+ struct in_ifaddr *maybe_ia = 0;
+ struct sockaddr_dl *sdl;
+ struct sockaddr sa;
+ struct in_addr isaddr, itaddr, myaddr;
+ int op;
+ char addrbuf[INET_ADDRSTRLEN];
+
+ ea = mtod(m, struct ether_arp *);
+ op = ntohs(ea->arp_op);
+ (void)memcpy(&isaddr, ea->arp_spa, sizeof (isaddr));
+ (void)memcpy(&itaddr, ea->arp_tpa, sizeof (itaddr));
+ for (ia = in_ifaddr; ia; ia = ia->ia_next)
+ if (ia->ia_ifp == &ac->ac_if) {
+ maybe_ia = ia;
+ if ((itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) ||
+ (isaddr.s_addr == ia->ia_addr.sin_addr.s_addr))
+ break;
+ }
+ if (maybe_ia == 0) {
+ m_freem(m);
+ return;
+ }
+ myaddr = ia ? ia->ia_addr.sin_addr : maybe_ia->ia_addr.sin_addr;
+ if (!bcmp((caddr_t)ea->arp_sha, (caddr_t)ac->ac_enaddr,
+ sizeof (ea->arp_sha))) {
+ m_freem(m); /* it's from me, ignore it. */
+ return;
+ }
+ if (!bcmp((caddr_t)ea->arp_sha, (caddr_t)etherbroadcastaddr,
+ sizeof (ea->arp_sha))) {
+ log(LOG_ERR,
+ "arp: ether address is broadcast for IP address %s!\n",
+ inet_ntoa_r(isaddr, addrbuf));
+ m_freem(m);
+ return;
+ }
+ if (isaddr.s_addr == myaddr.s_addr) {
+ log(LOG_ERR,
+ "arp: %6D is using my IP address %s!\n",
+ ea->arp_sha, ":", inet_ntoa_r(isaddr, addrbuf));
+ itaddr = myaddr;
+ goto reply;
+ }
+ la = arplookup(isaddr.s_addr, itaddr.s_addr == myaddr.s_addr, 0);
+ if (la && (rt = la->la_rt) && (sdl = SDL(rt->rt_gateway))) {
+ if (sdl->sdl_alen &&
+ bcmp((caddr_t)ea->arp_sha, LLADDR(sdl), sdl->sdl_alen))
+ log(LOG_INFO, "arp: %s moved from %6D to %6D\n",
+ inet_ntoa_r(isaddr, addrbuf),
+ (u_char *)LLADDR(sdl), ":", ea->arp_sha, ":");
+ (void)memcpy(LLADDR(sdl), ea->arp_sha, sizeof(ea->arp_sha));
+ sdl->sdl_alen = sizeof(ea->arp_sha);
+ if (rt->rt_expire)
+ rt->rt_expire = rtems_bsdnet_seconds_since_boot() + arpt_keep;
+ rt->rt_flags &= ~RTF_REJECT;
+ la->la_asked = 0;
+ if (la->la_hold) {
+ (*ac->ac_if.if_output)(&ac->ac_if, la->la_hold,
+ rt_key(rt), rt);
+ la->la_hold = 0;
+ }
+ }
+reply:
+ if (op != ARPOP_REQUEST) {
+ m_freem(m);
+ return;
+ }
+ if (itaddr.s_addr == myaddr.s_addr) {
+ /* I am the target */
+ (void)memcpy(ea->arp_tha, ea->arp_sha, sizeof(ea->arp_sha));
+ (void)memcpy(ea->arp_sha, ac->ac_enaddr, sizeof(ea->arp_sha));
+ } else {
+ la = arplookup(itaddr.s_addr, 0, SIN_PROXY);
+ if (la == NULL) {
+ struct sockaddr_in sin;
+
+ if (!arp_proxyall) {
+ m_freem(m);
+ return;
+ }
+
+ bzero(&sin, sizeof sin);
+ sin.sin_family = AF_INET;
+ sin.sin_len = sizeof sin;
+ sin.sin_addr = itaddr;
+
+ rt = rtalloc1((struct sockaddr *)&sin, 0, 0UL);
+ if (!rt) {
+ m_freem(m);
+ return;
+ }
+ /*
+ * Don't send proxies for nodes on the same interface
+ * as this one came out of, or we'll get into a fight
+ * over who claims what Ether address.
+ */
+ if (rt->rt_ifp == &ac->ac_if) {
+ rtfree(rt);
+ m_freem(m);
+ return;
+ }
+ (void)memcpy(ea->arp_tha, ea->arp_sha, sizeof(ea->arp_sha));
+ (void)memcpy(ea->arp_sha, ac->ac_enaddr, sizeof(ea->arp_sha));
+ rtfree(rt);
+#ifdef DEBUG_PROXY
+ printf("arp: proxying for %s\n",
+ inet_ntoa_r(itaddr, addrbuf));
+#endif
+ } else {
+ rt = la->la_rt;
+ (void)memcpy(ea->arp_tha, ea->arp_sha, sizeof(ea->arp_sha));
+ sdl = SDL(rt->rt_gateway);
+ (void)memcpy(ea->arp_sha, LLADDR(sdl), sizeof(ea->arp_sha));
+ }
+ }
+
+ (void)memcpy(ea->arp_tpa, ea->arp_spa, sizeof(ea->arp_spa));
+ (void)memcpy(ea->arp_spa, &itaddr, sizeof(ea->arp_spa));
+ ea->arp_op = htons(ARPOP_REPLY);
+ ea->arp_pro = htons(ETHERTYPE_IP); /* let's be sure! */
+ eh = (struct ether_header *)sa.sa_data;
+ (void)memcpy(eh->ether_dhost, ea->arp_tha, sizeof(eh->ether_dhost));
+ eh->ether_type = htons(ETHERTYPE_ARP);
+ sa.sa_family = AF_UNSPEC;
+ sa.sa_len = sizeof(sa);
+ (*ac->ac_if.if_output)(&ac->ac_if, m, &sa, (struct rtentry *)0);
+ return;
+}
+
+/*
+ * Free an arp entry.
+ */
+static void
+arptfree(struct llinfo_arp *la)
+{
+ struct rtentry *rt = la->la_rt;
+ struct sockaddr_dl *sdl;
+ if (rt == 0)
+ panic("arptfree");
+ if (rt->rt_refcnt > 0 && (sdl = SDL(rt->rt_gateway)) &&
+ sdl->sdl_family == AF_LINK) {
+ sdl->sdl_alen = 0;
+ la->la_asked = 0;
+ rt->rt_flags &= ~RTF_REJECT;
+ return;
+ }
+ rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0, rt_mask(rt),
+ 0, (struct rtentry **)0);
+}
+/*
+ * Lookup or enter a new address in arptab.
+ */
+static struct llinfo_arp *
+arplookup(u_long addr, int create, int proxy)
+{
+ struct rtentry *rt;
+ static struct sockaddr_inarp sin = {sizeof(sin), AF_INET, 0, { 0 }, { 0 }, 0, 0 };
+ const char *why = 0;
+
+ sin.sin_len = sizeof(sin);
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = addr;
+ sin.sin_other = proxy ? SIN_PROXY : 0;
+ rt = rtalloc1((struct sockaddr *)&sin, create, 0UL);
+ if (rt == 0)
+ return (0);
+ rt->rt_refcnt--;
+
+ if (rt->rt_flags & RTF_GATEWAY)
+ why = "host is not on local network";
+ else if ((rt->rt_flags & RTF_LLINFO) == 0)
+ why = "could not allocate llinfo";
+ else if (rt->rt_gateway->sa_family != AF_LINK)
+ why = "gateway route is not ours";
+
+ if (why && create) {
+ char addrbuf[INET_ADDRSTRLEN];
+ log(LOG_DEBUG, "arplookup %s failed: %s\n",
+ inet_ntoa_r(sin.sin_addr, addrbuf), why);
+ return 0;
+ } else if (why) {
+ return 0;
+ }
+ return ((struct llinfo_arp *)rt->rt_llinfo);
+}
+
+void
+arp_ifinit(struct arpcom *ac, struct ifaddr *ifa)
+{
+ if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY)
+ arprequest(ac, &(IA_SIN(ifa)->sin_addr),
+ &(IA_SIN(ifa)->sin_addr), ac->ac_enaddr);
+ ifa->ifa_rtrequest = arp_rtrequest;
+ ifa->ifa_flags |= RTF_CLONING;
+}
diff --git a/netinet/if_ether.h b/netinet/if_ether.h
new file mode 100644
index 0000000..f45c414
--- /dev/null
+++ b/netinet/if_ether.h
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)if_ether.h 8.3 (Berkeley) 5/2/95
+ * $FreeBSD: src/sys/netinet/if_ether.h,v 1.32 2005/02/22 13:04:03 glebius Exp $
+ */
+
+
+#ifndef _NETINET_IF_ETHER_H_
+#define _NETINET_IF_ETHER_H_
+
+#include <netinet/in.h> /* struct in_addr */
+#include <net/ethernet.h>
+#include <net/if_arp.h>
+
+#ifdef _KERNEL
+/*
+ * Macro to map an IP multicast address to an Ethernet multicast address.
+ * The high-order 25 bits of the Ethernet address are statically assigned,
+ * and the low-order 23 bits are taken from the low end of the IP address.
+ */
+#define ETHER_MAP_IP_MULTICAST(ipaddr, enaddr) \
+ /* struct in_addr *ipaddr; */ \
+ /* u_char enaddr[ETHER_ADDR_LEN]; */ \
+{ \
+ (enaddr)[0] = 0x01; \
+ (enaddr)[1] = 0x00; \
+ (enaddr)[2] = 0x5e; \
+ (enaddr)[3] = ((u_char *)ipaddr)[1] & 0x7f; \
+ (enaddr)[4] = ((u_char *)ipaddr)[2]; \
+ (enaddr)[5] = ((u_char *)ipaddr)[3]; \
+}
+#endif
+
+/*
+ * Ethernet Address Resolution Protocol.
+ *
+ * See RFC 826 for protocol description. Structure below is adapted
+ * to resolving internet addresses. Field names used correspond to
+ * RFC 826.
+ */
+struct ether_arp {
+ struct arphdr ea_hdr; /* fixed-size header */
+ u_char arp_sha[ETHER_ADDR_LEN]; /* sender hardware address */
+ u_char arp_spa[4]; /* sender protocol address */
+ u_char arp_tha[ETHER_ADDR_LEN]; /* target hardware address */
+ u_char arp_tpa[4]; /* target protocol address */
+};
+#define arp_hrd ea_hdr.ar_hrd
+#define arp_pro ea_hdr.ar_pro
+#define arp_hln ea_hdr.ar_hln
+#define arp_pln ea_hdr.ar_pln
+#define arp_op ea_hdr.ar_op
+
+struct sockaddr_inarp {
+ u_char sin_len;
+ u_char sin_family;
+ u_short sin_port;
+ struct in_addr sin_addr;
+ struct in_addr sin_srcaddr;
+ u_short sin_tos;
+ u_short sin_other;
+#define SIN_PROXY 1
+};
+/*
+ * IP and ethernet specific routing flags
+ */
+#define RTF_USETRAILERS RTF_PROTO1 /* use trailers */
+#define RTF_ANNOUNCE RTF_PROTO2 /* announce new arp entry */
+
+#ifdef _KERNEL
+extern u_char etherbroadcastaddr[ETHER_ADDR_LEN];
+extern u_char ether_ipmulticast_min[ETHER_ADDR_LEN];
+extern u_char ether_ipmulticast_max[ETHER_ADDR_LEN];
+extern struct ifqueue arpintrq;
+
+int arpresolve(struct arpcom *, struct rtentry *, struct mbuf *,
+ struct sockaddr *, u_char *, struct rtentry *);
+void arp_ifinit(struct arpcom *, struct ifaddr *);
+int ether_addmulti(struct ifreq *, struct arpcom *);
+int ether_delmulti(struct ifreq *, struct arpcom *);
+
+/*
+ * Ethernet multicast address structure. There is one of these for each
+ * multicast address or range of multicast addresses that we are supposed
+ * to listen to on a particular interface. They are kept in a linked list,
+ * rooted in the interface's arpcom structure. (This really has nothing to
+ * do with ARP, or with the Internet address family, but this appears to be
+ * the minimally-disrupting place to put it.)
+ */
+struct ether_multi {
+ u_char enm_addrlo[ETHER_ADDR_LEN]; /* low or only address of range */
+ u_char enm_addrhi[ETHER_ADDR_LEN]; /* high or only address of range */
+ struct arpcom *enm_ac; /* back pointer to arpcom */
+ u_int enm_refcount; /* no. claims to this addr/range */
+ struct ether_multi *enm_next; /* ptr to next ether_multi */
+};
+
+/*
+ * Structure used by macros below to remember position when stepping through
+ * all of the ether_multi records.
+ */
+struct ether_multistep {
+ struct ether_multi *e_enm;
+};
+
+/*
+ * Macro for looking up the ether_multi record for a given range of Ethernet
+ * multicast addresses connected to a given arpcom structure. If no matching
+ * record is found, "enm" returns NULL.
+ */
+#define ETHER_LOOKUP_MULTI(addrlo, addrhi, ac, enm) \
+ /* u_char addrlo[ETHER_ADDR_LEN]; */ \
+ /* u_char addrhi[ETHER_ADDR_LEN]; */ \
+ /* struct arpcom *ac; */ \
+ /* struct ether_multi *enm; */ \
+{ \
+ for ((enm) = (ac)->ac_multiaddrs; \
+ (enm) != NULL && \
+ (bcmp((enm)->enm_addrlo, (addrlo), ETHER_ADDR_LEN) != 0 || \
+ bcmp((enm)->enm_addrhi, (addrhi), ETHER_ADDR_LEN) != 0); \
+ (enm) = (enm)->enm_next); \
+}
+
+/*
+ * Macro to step through all of the ether_multi records, one at a time.
+ * The current position is remembered in "step", which the caller must
+ * provide. ETHER_FIRST_MULTI(), below, must be called to initialize "step"
+ * and get the first record. Both macros return a NULL "enm" when there
+ * are no remaining records.
+ */
+#define ETHER_NEXT_MULTI(step, enm) \
+ /* struct ether_multistep step; */ \
+ /* struct ether_multi *enm; */ \
+{ \
+ if (((enm) = (step).e_enm) != NULL) \
+ (step).e_enm = (enm)->enm_next; \
+}
+
+#define ETHER_FIRST_MULTI(step, ac, enm) \
+ /* struct ether_multistep step; */ \
+ /* struct arpcom *ac; */ \
+ /* struct ether_multi *enm; */ \
+{ \
+ (step).e_enm = (ac)->ac_multiaddrs; \
+ ETHER_NEXT_MULTI((step), (enm)); \
+}
+
+#endif
+
+#endif
diff --git a/netinet/igmp.c b/netinet/igmp.c
new file mode 100644
index 0000000..7078914
--- /dev/null
+++ b/netinet/igmp.c
@@ -0,0 +1,473 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * Copyright (c) 1988 Stephen Deering.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Stephen Deering of Stanford University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)igmp.c 8.1 (Berkeley) 7/19/93
+ */
+
+/*
+ * Internet Group Management Protocol (IGMP) routines.
+ *
+ * Written by Steve Deering, Stanford, May 1988.
+ * Modified by Rosen Sharma, Stanford, Aug 1994.
+ * Modified by Bill Fenner, Xerox PARC, Feb 1995.
+ * Modified to fully comply to IGMPv2 by Bill Fenner, Oct 1995.
+ *
+ * MULTICAST Revision: 3.5.1.4
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <rtems/rtems_netinet_in.h>
+#include <netinet/in_var.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/igmp.h>
+#include <netinet/igmp_var.h>
+
+static struct router_info *
+ find_rti (struct ifnet *ifp);
+
+static struct igmpstat igmpstat;
+
+SYSCTL_STRUCT(_net_inet_igmp, IGMPCTL_STATS, stats, CTLFLAG_RD,
+ &igmpstat, igmpstat, "");
+
+static int igmp_timers_are_running;
+static u_long igmp_all_hosts_group;
+static u_long igmp_all_rtrs_group;
+static struct mbuf *router_alert;
+static struct router_info *Head;
+
+static void igmp_sendpkt(struct in_multi *, int, unsigned long);
+
+void
+igmp_init(void)
+{
+ struct ipoption *ra;
+
+ /*
+ * To avoid byte-swapping the same value over and over again.
+ */
+ igmp_all_hosts_group = htonl(INADDR_ALLHOSTS_GROUP);
+ igmp_all_rtrs_group = htonl(INADDR_ALLRTRS_GROUP);
+
+ igmp_timers_are_running = 0;
+
+ /*
+ * Construct a Router Alert option to use in outgoing packets
+ */
+ MGET(router_alert, M_DONTWAIT, MT_DATA);
+ ra = mtod(router_alert, struct ipoption *);
+ ra->ipopt_dst.s_addr = 0;
+ ra->ipopt_list[0] = IPOPT_RA; /* Router Alert Option */
+ ra->ipopt_list[1] = 0x04; /* 4 bytes long */
+ ra->ipopt_list[2] = 0x00;
+ ra->ipopt_list[3] = 0x00;
+ router_alert->m_len = sizeof(ra->ipopt_dst) + ra->ipopt_list[1];
+
+ Head = (struct router_info *) 0;
+}
+
+static struct router_info *
+find_rti(struct ifnet *ifp)
+{
+ register struct router_info *rti = Head;
+
+#ifdef IGMP_DEBUG
+ printf("[igmp.c, _find_rti] --> entering \n");
+#endif
+ while (rti) {
+ if (rti->rti_ifp == ifp) {
+#ifdef IGMP_DEBUG
+ printf("[igmp.c, _find_rti] --> found old entry \n");
+#endif
+ return rti;
+ }
+ rti = rti->rti_next;
+ }
+ MALLOC(rti, struct router_info *, sizeof *rti, M_MRTABLE, M_NOWAIT);
+ rti->rti_ifp = ifp;
+ rti->rti_type = IGMP_V2_ROUTER;
+ rti->rti_time = 0;
+ rti->rti_next = Head;
+ Head = rti;
+#ifdef IGMP_DEBUG
+ printf("[igmp.c, _find_rti] --> created an entry \n");
+#endif
+ return rti;
+}
+
+void
+igmp_input(struct mbuf *m, int iphlen)
+{
+ register struct igmp *igmp;
+ register struct ip *ip;
+ register int igmplen;
+ register struct ifnet *ifp = m->m_pkthdr.rcvif;
+ register int minlen;
+ register struct in_multi *inm;
+ register struct in_ifaddr *ia;
+ struct in_multistep step;
+ struct router_info *rti;
+
+ int timer; /** timer value in the igmp query header **/
+
+ ++igmpstat.igps_rcv_total;
+
+ ip = mtod(m, struct ip *);
+ igmplen = ip->ip_len;
+
+ /*
+ * Validate lengths
+ */
+ if (igmplen < IGMP_MINLEN) {
+ ++igmpstat.igps_rcv_tooshort;
+ m_freem(m);
+ return;
+ }
+ minlen = iphlen + IGMP_MINLEN;
+ if ((m->m_flags & M_EXT || m->m_len < minlen) &&
+ (m = m_pullup(m, minlen)) == 0) {
+ ++igmpstat.igps_rcv_tooshort;
+ return;
+ }
+
+ /*
+ * Validate checksum
+ */
+ m->m_data += iphlen;
+ m->m_len -= iphlen;
+ igmp = mtod(m, struct igmp *);
+ if (in_cksum(m, igmplen)) {
+ ++igmpstat.igps_rcv_badsum;
+ m_freem(m);
+ return;
+ }
+ m->m_data -= iphlen;
+ m->m_len += iphlen;
+
+ ip = mtod(m, struct ip *);
+ timer = igmp->igmp_code * PR_FASTHZ / IGMP_TIMER_SCALE;
+ rti = find_rti(ifp);
+
+ /*
+ * In the IGMPv2 specification, there are 3 states and a flag.
+ *
+ * In Non-Member state, we simply don't have a membership record.
+ * In Delaying Member state, our timer is running (inm->inm_timer)
+ * In Idle Member state, our timer is not running (inm->inm_timer==0)
+ *
+ * The flag is inm->inm_state, it is set to IGMP_OTHERMEMBER if
+ * we have heard a report from another member, or IGMP_IREPORTEDLAST
+ * if I sent the last report.
+ */
+ switch (igmp->igmp_type) {
+
+ case IGMP_MEMBERSHIP_QUERY:
+ ++igmpstat.igps_rcv_queries;
+
+ if (ifp->if_flags & IFF_LOOPBACK)
+ break;
+
+ if (igmp->igmp_code == 0) {
+ /*
+ * Old router. Remember that the querier on this
+ * interface is old, and set the timer to the
+ * value in RFC 1112.
+ */
+
+ rti->rti_type = IGMP_V1_ROUTER;
+ rti->rti_time = 0;
+
+ timer = IGMP_MAX_HOST_REPORT_DELAY * PR_FASTHZ;
+
+ if (ip->ip_dst.s_addr != igmp_all_hosts_group ||
+ igmp->igmp_group.s_addr != 0) {
+ ++igmpstat.igps_rcv_badqueries;
+ m_freem(m);
+ return;
+ }
+ } else {
+ /*
+ * New router. Simply do the new validity check.
+ */
+
+ if (igmp->igmp_group.s_addr != 0 &&
+ !IN_MULTICAST(ntohl(igmp->igmp_group.s_addr))) {
+ ++igmpstat.igps_rcv_badqueries;
+ m_freem(m);
+ return;
+ }
+ }
+
+ /*
+ * - Start the timers in all of our membership records
+ * that the query applies to for the interface on
+ * which the query arrived excl. those that belong
+ * to the "all-hosts" group (224.0.0.1).
+ * - Restart any timer that is already running but has
+ * a value longer than the requested timeout.
+ * - Use the value specified in the query message as
+ * the maximum timeout.
+ */
+ IN_FIRST_MULTI(step, inm);
+ while (inm != NULL) {
+ if (inm->inm_ifp == ifp &&
+ inm->inm_addr.s_addr != igmp_all_hosts_group &&
+ (igmp->igmp_group.s_addr == 0 ||
+ igmp->igmp_group.s_addr == inm->inm_addr.s_addr)) {
+ if (inm->inm_timer == 0 ||
+ inm->inm_timer > timer) {
+ inm->inm_timer =
+ IGMP_RANDOM_DELAY(timer);
+ igmp_timers_are_running = 1;
+ }
+ }
+ IN_NEXT_MULTI(step, inm);
+ }
+
+ break;
+
+ case IGMP_V1_MEMBERSHIP_REPORT:
+ case IGMP_V2_MEMBERSHIP_REPORT:
+ /*
+ * For fast leave to work, we have to know that we are the
+ * last person to send a report for this group. Reports
+ * can potentially get looped back if we are a multicast
+ * router, so discard reports sourced by me.
+ */
+ IFP_TO_IA(ifp, ia);
+ if (ia && ip->ip_src.s_addr == IA_SIN(ia)->sin_addr.s_addr)
+ break;
+
+ ++igmpstat.igps_rcv_reports;
+
+ if (ifp->if_flags & IFF_LOOPBACK)
+ break;
+
+ if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr))) {
+ ++igmpstat.igps_rcv_badreports;
+ m_freem(m);
+ return;
+ }
+
+ /*
+ * KLUDGE: if the IP source address of the report has an
+ * unspecified (i.e., zero) subnet number, as is allowed for
+ * a booting host, replace it with the correct subnet number
+ * so that a process-level multicast routing demon can
+ * determine which subnet it arrived from. This is necessary
+ * to compensate for the lack of any way for a process to
+ * determine the arrival interface of an incoming packet.
+ */
+ if ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) == 0)
+ if (ia) ip->ip_src.s_addr = htonl(ia->ia_subnet);
+
+ /*
+ * If we belong to the group being reported, stop
+ * our timer for that group.
+ */
+ IN_LOOKUP_MULTI(igmp->igmp_group, ifp, inm);
+
+ if (inm != NULL) {
+ inm->inm_timer = 0;
+ ++igmpstat.igps_rcv_ourreports;
+
+ inm->inm_state = IGMP_OTHERMEMBER;
+ }
+
+ break;
+ }
+
+ /*
+ * Pass all valid IGMP packets up to any process(es) listening
+ * on a raw IGMP socket.
+ */
+ rip_input(m, iphlen);
+}
+
+void
+igmp_joingroup(struct in_multi *inm)
+{
+ int s = splnet();
+
+ if (inm->inm_addr.s_addr == igmp_all_hosts_group
+ || inm->inm_ifp->if_flags & IFF_LOOPBACK) {
+ inm->inm_timer = 0;
+ inm->inm_state = IGMP_OTHERMEMBER;
+ } else {
+ inm->inm_rti = find_rti(inm->inm_ifp);
+ igmp_sendpkt(inm, inm->inm_rti->rti_type, 0);
+ inm->inm_timer = IGMP_RANDOM_DELAY(
+ IGMP_MAX_HOST_REPORT_DELAY*PR_FASTHZ);
+ inm->inm_state = IGMP_IREPORTEDLAST;
+ igmp_timers_are_running = 1;
+ }
+ splx(s);
+}
+
+void
+igmp_leavegroup(struct in_multi *inm)
+{
+ if (inm->inm_state == IGMP_IREPORTEDLAST &&
+ inm->inm_addr.s_addr != igmp_all_hosts_group &&
+ !(inm->inm_ifp->if_flags & IFF_LOOPBACK) &&
+ inm->inm_rti->rti_type != IGMP_V1_ROUTER)
+ igmp_sendpkt(inm, IGMP_V2_LEAVE_GROUP, igmp_all_rtrs_group);
+}
+
+void
+igmp_fasttimo(void)
+{
+ register struct in_multi *inm;
+ struct in_multistep step;
+ int s;
+
+ /*
+ * Quick check to see if any work needs to be done, in order
+ * to minimize the overhead of fasttimo processing.
+ */
+
+ if (!igmp_timers_are_running)
+ return;
+
+ s = splnet();
+ igmp_timers_are_running = 0;
+ IN_FIRST_MULTI(step, inm);
+ while (inm != NULL) {
+ if (inm->inm_timer == 0) {
+ /* do nothing */
+ } else if (--inm->inm_timer == 0) {
+ igmp_sendpkt(inm, inm->inm_rti->rti_type, 0);
+ inm->inm_state = IGMP_IREPORTEDLAST;
+ } else {
+ igmp_timers_are_running = 1;
+ }
+ IN_NEXT_MULTI(step, inm);
+ }
+ splx(s);
+}
+
+void
+igmp_slowtimo(void)
+{
+ int s = splnet();
+ register struct router_info *rti = Head;
+
+#ifdef IGMP_DEBUG
+ printf("[igmp.c,_slowtimo] -- > entering \n");
+#endif
+ while (rti) {
+ if (rti->rti_type == IGMP_V1_ROUTER) {
+ rti->rti_time++;
+ if (rti->rti_time >= IGMP_AGE_THRESHOLD) {
+ rti->rti_type = IGMP_V2_ROUTER;
+ }
+ }
+ rti = rti->rti_next;
+ }
+#ifdef IGMP_DEBUG
+ printf("[igmp.c,_slowtimo] -- > exiting \n");
+#endif
+ splx(s);
+}
+
+static struct route igmprt;
+
+static void
+igmp_sendpkt(struct in_multi *inm, int type, unsigned long addr)
+{
+ struct mbuf *m;
+ struct igmp *igmp;
+ struct ip *ip;
+ struct ip_moptions imo;
+
+ MGETHDR(m, M_DONTWAIT, MT_HEADER);
+ if (m == NULL)
+ return;
+
+ m->m_pkthdr.rcvif = loif;
+ m->m_pkthdr.len = sizeof(struct ip) + IGMP_MINLEN;
+ MH_ALIGN(m, IGMP_MINLEN + sizeof(struct ip));
+ m->m_data += sizeof(struct ip);
+ m->m_len = IGMP_MINLEN;
+ igmp = mtod(m, struct igmp *);
+ igmp->igmp_type = type;
+ igmp->igmp_code = 0;
+ igmp->igmp_group = inm->inm_addr;
+ igmp->igmp_cksum = 0;
+ igmp->igmp_cksum = in_cksum(m, IGMP_MINLEN);
+
+ m->m_data -= sizeof(struct ip);
+ m->m_len += sizeof(struct ip);
+ ip = mtod(m, struct ip *);
+ ip->ip_tos = 0;
+ ip->ip_len = sizeof(struct ip) + IGMP_MINLEN;
+ ip->ip_off = 0;
+ ip->ip_p = IPPROTO_IGMP;
+ ip->ip_src.s_addr = INADDR_ANY;
+ ip->ip_dst.s_addr = addr ? addr : igmp->igmp_group.s_addr;
+
+ imo.imo_multicast_ifp = inm->inm_ifp;
+ imo.imo_multicast_ttl = 1;
+ imo.imo_multicast_vif = -1;
+ /*
+ * Request loopback of the report if we are acting as a multicast
+ * router, so that the process-level routing demon can hear it.
+ */
+ imo.imo_multicast_loop = (ip_mrouter != NULL);
+
+ /*
+ * XXX
+ * Do we have to worry about reentrancy here? Don't think so.
+ */
+ ip_output(m, router_alert, &igmprt, 0, &imo);
+
+ ++igmpstat.igps_snd_reports;
+}
diff --git a/netinet/igmp.h b/netinet/igmp.h
new file mode 100644
index 0000000..b397ccd
--- /dev/null
+++ b/netinet/igmp.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 1988 Stephen Deering.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Stephen Deering of Stanford University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)igmp.h 8.1 (Berkeley) 6/10/93
+ */
+
+#ifndef _NETINET_IGMP_H_
+#define _NETINET_IGMP_H_
+
+#include <netinet/in.h> /* struct in_addr */
+
+/*
+ * Internet Group Management Protocol (IGMP) definitions.
+ *
+ * Written by Steve Deering, Stanford, May 1988.
+ *
+ * MULTICAST Revision: 3.5.1.2
+ */
+
+/*
+ * IGMP packet format.
+ */
+struct igmp {
+ u_char igmp_type; /* version & type of IGMP message */
+ u_char igmp_code; /* subtype for routing msgs */
+ u_short igmp_cksum; /* IP-style checksum */
+ struct in_addr igmp_group; /* group address being reported */
+}; /* (zero for queries) */
+
+#define IGMP_MINLEN 8
+
+/*
+ * Message types, including version number.
+ */
+#define IGMP_MEMBERSHIP_QUERY 0x11 /* membership query */
+#define IGMP_V1_MEMBERSHIP_REPORT 0x12 /* Ver. 1 membership report */
+#define IGMP_V2_MEMBERSHIP_REPORT 0x16 /* Ver. 2 membership report */
+#define IGMP_V2_LEAVE_GROUP 0x17 /* Leave-group message */
+
+#define IGMP_DVMRP 0x13 /* DVMRP routing message */
+#define IGMP_PIM 0x14 /* PIM routing message */
+
+#define IGMP_MTRACE_RESP 0x1e /* traceroute resp.(to sender)*/
+#define IGMP_MTRACE 0x1f /* mcast traceroute messages */
+
+#define IGMP_MAX_HOST_REPORT_DELAY 10 /* max delay for response to */
+ /* query (in seconds) according */
+ /* to RFC1112 */
+
+
+#define IGMP_TIMER_SCALE 10 /* denotes that the igmp code field */
+ /* specifies time in 10th of seconds*/
+
+/*
+ * The following four defininitions are for backwards compatibility.
+ * They should be removed as soon as all applications are updated to
+ * use the new constant names.
+ */
+#define IGMP_HOST_MEMBERSHIP_QUERY IGMP_MEMBERSHIP_QUERY
+#define IGMP_HOST_MEMBERSHIP_REPORT IGMP_V1_MEMBERSHIP_REPORT
+#define IGMP_HOST_NEW_MEMBERSHIP_REPORT IGMP_V2_MEMBERSHIP_REPORT
+#define IGMP_HOST_LEAVE_MESSAGE IGMP_V2_LEAVE_GROUP
+
+#endif /* _NETINET_IGMP_H_ */
diff --git a/netinet/igmp_var.h b/netinet/igmp_var.h
new file mode 100644
index 0000000..d1454e0
--- /dev/null
+++ b/netinet/igmp_var.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 1988 Stephen Deering.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Stephen Deering of Stanford University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)igmp_var.h 8.1 (Berkeley) 7/19/93
+ * $FreeBSD: src/sys/netinet/igmp_var.h,v 1.20 2004/04/07 20:46:13 imp Exp $
+ */
+
+#ifndef _NETINET_IGMP_VAR_H_
+#define _NETINET_IGMP_VAR_H_
+
+/*
+ * Internet Group Management Protocol (IGMP),
+ * implementation-specific definitions.
+ *
+ * Written by Steve Deering, Stanford, May 1988.
+ *
+ * MULTICAST Revision: 3.5.1.3
+ */
+
+struct igmpstat {
+ u_int igps_rcv_total; /* total IGMP messages received */
+ u_int igps_rcv_tooshort; /* received with too few bytes */
+ u_int igps_rcv_badsum; /* received with bad checksum */
+ u_int igps_rcv_queries; /* received membership queries */
+ u_int igps_rcv_badqueries; /* received invalid queries */
+ u_int igps_rcv_reports; /* received membership reports */
+ u_int igps_rcv_badreports; /* received invalid reports */
+ u_int igps_rcv_ourreports; /* received reports for our groups */
+ u_int igps_snd_reports; /* sent membership reports */
+};
+
+#ifdef _KERNEL
+#define IGMP_RANDOM_DELAY(X) (random() % (X) + 1)
+
+/*
+ * States for IGMPv2's leave processing
+ */
+#define IGMP_OTHERMEMBER 0
+#define IGMP_IREPORTEDLAST 1
+
+/*
+ * We must remember what version the subnet's querier is.
+ * We conveniently use the IGMP message type for the proper
+ * membership report to keep this state.
+ */
+#define IGMP_V1_ROUTER IGMP_V1_MEMBERSHIP_REPORT
+#define IGMP_V2_ROUTER IGMP_V2_MEMBERSHIP_REPORT
+
+/*
+ * Revert to new router if we haven't heard from an old router in
+ * this amount of time.
+ */
+#define IGMP_AGE_THRESHOLD 540
+
+void igmp_init(void);
+void igmp_input(struct mbuf *, int);
+void igmp_joingroup(struct in_multi *);
+void igmp_leavegroup(struct in_multi *);
+void igmp_fasttimo(void);
+void igmp_slowtimo(void);
+
+SYSCTL_DECL(_net_inet_igmp);
+
+#endif
+
+/*
+ * Names for IGMP sysctl objects
+ */
+#define IGMPCTL_STATS 1 /* statistics (read-only) */
+#define IGMPCTL_MAXID 2
+
+#define IGMPCTL_NAMES { \
+ { 0, 0 }, \
+ { "stats", CTLTYPE_STRUCT }, \
+}
+#endif
diff --git a/netinet/in.c b/netinet/in.c
new file mode 100644
index 0000000..cb08232
--- /dev/null
+++ b/netinet/in.c
@@ -0,0 +1,711 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)in.c 8.4 (Berkeley) 1/9/95
+ * $FreeBSD: src/sys/netinet/in.c,v 1.75 2004/04/07 20:46:13 imp Exp $
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/systm.h>
+#include <sys/sockio.h>
+#include <errno.h>
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet/if_ether.h>
+
+#include <netinet/igmp_var.h>
+
+/*
+ * This structure is used to keep track of in_multi chains which belong to
+ * deleted interface addresses.
+ */
+static LIST_HEAD(, multi_kludge) in_mk; /* XXX BSS initialization */
+
+struct multi_kludge {
+ LIST_ENTRY(multi_kludge) mk_entry;
+ struct ifnet *mk_ifp;
+ struct in_multihead mk_head;
+};
+
+static void in_socktrim(struct sockaddr_in *);
+static int in_ifinit(struct ifnet *,
+ struct in_ifaddr *, struct sockaddr_in *, int);
+static void in_ifscrub(struct ifnet *, struct in_ifaddr *);
+
+static int subnetsarelocal = 0;
+SYSCTL_INT(_net_inet_ip, OID_AUTO, subnets_are_local, CTLFLAG_RW,
+ &subnetsarelocal, 0, "");
+/*
+ * Return 1 if an internet address is for a ``local'' host
+ * (one to which we have a connection). If subnetsarelocal
+ * is true, this includes other subnets of the local net.
+ * Otherwise, it includes only the directly-connected (sub)nets.
+ */
+int
+in_localaddr(struct in_addr in)
+{
+ register u_long i = ntohl(in.s_addr);
+ register struct in_ifaddr *ia;
+
+ if (subnetsarelocal) {
+ for (ia = in_ifaddr; ia; ia = ia->ia_next)
+ if ((i & ia->ia_netmask) == ia->ia_net)
+ return (1);
+ } else {
+ for (ia = in_ifaddr; ia; ia = ia->ia_next)
+ if ((i & ia->ia_subnetmask) == ia->ia_subnet)
+ return (1);
+ }
+ return (0);
+}
+
+/*
+ * Determine whether an IP address is in a reserved set of addresses
+ * that may not be forwarded, or whether datagrams to that destination
+ * may be forwarded.
+ */
+int
+in_canforward(struct in_addr in)
+{
+ register u_long i = ntohl(in.s_addr);
+ register u_long net;
+
+ if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i))
+ return (0);
+ if (IN_CLASSA(i)) {
+ net = i & IN_CLASSA_NET;
+ if (net == 0 || net == (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))
+ return (0);
+ }
+ return (1);
+}
+
+/*
+ * Trim a mask in a sockaddr
+ */
+static void
+in_socktrim(struct sockaddr_in *ap)
+{
+ register char *cplim = (char *) &ap->sin_addr;
+ register char *cp = (char *) (&ap->sin_addr + 1);
+
+ ap->sin_len = 0;
+ while (--cp >= cplim)
+ if (*cp) {
+ (ap)->sin_len = cp - (char *) (ap) + 1;
+ break;
+ }
+}
+
+static int in_interfaces; /* number of external internet interfaces */
+
+/*
+ * Generic internet control operations (ioctl's).
+ * Ifp is 0 if not an interface-specific ioctl.
+ */
+int
+in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp)
+{
+ register struct ifreq *ifr = (struct ifreq *)data;
+ register struct in_ifaddr *ia = 0, *iap;
+ register struct ifaddr *ifa;
+ struct in_ifaddr *oia;
+ struct in_aliasreq *ifra = (struct in_aliasreq *)data;
+ struct sockaddr_in oldaddr;
+ int error, hostIsNew, maskIsNew, s;
+ struct multi_kludge *mk;
+
+ /*
+ * Find address for this interface, if it exists.
+ *
+ * If an alias address was specified, find that one instead of
+ * the first one on the interface.
+ */
+ if (ifp)
+ for (iap = in_ifaddr; iap; iap = iap->ia_next)
+ if (iap->ia_ifp == ifp) {
+ if (((struct sockaddr_in *)&ifr->ifr_addr)->sin_addr.s_addr ==
+ iap->ia_addr.sin_addr.s_addr) {
+ ia = iap;
+ break;
+ } else if (ia == NULL) {
+ ia = iap;
+ if (ifr->ifr_addr.sa_family != AF_INET)
+ break;
+ }
+ }
+
+ switch (cmd) {
+
+ case SIOCAIFADDR:
+ case SIOCDIFADDR:
+ if (ifra->ifra_addr.sin_family == AF_INET) {
+ for (oia = ia; ia; ia = ia->ia_next) {
+ if (ia->ia_ifp == ifp &&
+ ia->ia_addr.sin_addr.s_addr ==
+ ifra->ifra_addr.sin_addr.s_addr)
+ break;
+ }
+ if ((ifp->if_flags & IFF_POINTOPOINT)
+ && (cmd == SIOCAIFADDR)
+ && (ifra->ifra_dstaddr.sin_addr.s_addr
+ == INADDR_ANY)) {
+ return EDESTADDRREQ;
+ }
+ }
+ if (cmd == SIOCDIFADDR && ia == 0)
+ return (EADDRNOTAVAIL);
+ /* FALLTHROUGH */
+ case SIOCSIFADDR:
+ case SIOCSIFNETMASK:
+ case SIOCSIFDSTADDR:
+ if ((so->so_state & SS_PRIV) == 0)
+ return (EPERM);
+
+ if (ifp == 0)
+ panic("in_control");
+ if (ia == (struct in_ifaddr *)0) {
+ oia = (struct in_ifaddr *)
+ malloc(sizeof *oia, M_IFADDR, M_WAITOK);
+ if (oia == (struct in_ifaddr *)NULL)
+ return (ENOBUFS);
+ bzero((caddr_t)oia, sizeof *oia);
+ ia = in_ifaddr;
+ /*
+ * Protect from ipintr() traversing address list
+ * while we're modifying it.
+ */
+ s = splnet();
+
+ if (ia) {
+ for ( ; ia->ia_next; ia = ia->ia_next)
+ continue;
+ ia->ia_next = oia;
+ } else
+ in_ifaddr = oia;
+ ia = oia;
+ ifa = ifp->if_addrlist;
+ if (ifa) {
+ for ( ; ifa->ifa_next; ifa = ifa->ifa_next)
+ continue;
+ ifa->ifa_next = (struct ifaddr *) ia;
+ } else
+ ifp->if_addrlist = (struct ifaddr *) ia;
+ ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
+ ia->ia_ifa.ifa_dstaddr
+ = (struct sockaddr *)&ia->ia_dstaddr;
+ ia->ia_ifa.ifa_netmask
+ = (struct sockaddr *)&ia->ia_sockmask;
+ ia->ia_sockmask.sin_len = 8;
+ if (ifp->if_flags & IFF_BROADCAST) {
+ ia->ia_broadaddr.sin_len = sizeof(ia->ia_addr);
+ ia->ia_broadaddr.sin_family = AF_INET;
+ }
+ ia->ia_ifp = ifp;
+ if (!(ifp->if_flags & IFF_LOOPBACK))
+ in_interfaces++;
+ splx(s);
+ }
+ break;
+
+ case SIOCSIFBRDADDR:
+ if ((so->so_state & SS_PRIV) == 0)
+ return (EPERM);
+ /* FALLTHROUGH */
+
+ case SIOCGIFADDR:
+ case SIOCGIFNETMASK:
+ case SIOCGIFDSTADDR:
+ case SIOCGIFBRDADDR:
+ if (ia == (struct in_ifaddr *)0)
+ return (EADDRNOTAVAIL);
+ break;
+ }
+ switch (cmd) {
+
+ case SIOCGIFADDR:
+ *((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_addr;
+ break;
+
+ case SIOCGIFBRDADDR:
+ if ((ifp->if_flags & IFF_BROADCAST) == 0)
+ return (EINVAL);
+ *((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_broadaddr;
+ break;
+
+ case SIOCGIFDSTADDR:
+ if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
+ return (EINVAL);
+ *((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_dstaddr;
+ break;
+
+ case SIOCGIFNETMASK:
+ *((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_sockmask;
+ break;
+
+ case SIOCSIFDSTADDR:
+ if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
+ return (EINVAL);
+ oldaddr = ia->ia_dstaddr;
+ ia->ia_dstaddr = *(struct sockaddr_in *)&ifr->ifr_dstaddr;
+ if (ifp->if_ioctl && (error = (*ifp->if_ioctl)
+ (ifp, SIOCSIFDSTADDR, (caddr_t)ia))) {
+ ia->ia_dstaddr = oldaddr;
+ return (error);
+ }
+ if (ia->ia_flags & IFA_ROUTE) {
+ ia->ia_ifa.ifa_dstaddr = (struct sockaddr *)&oldaddr;
+ rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
+ ia->ia_ifa.ifa_dstaddr =
+ (struct sockaddr *)&ia->ia_dstaddr;
+ rtinit(&(ia->ia_ifa), (int)RTM_ADD, RTF_HOST|RTF_UP);
+ }
+ break;
+
+ case SIOCSIFBRDADDR:
+ if ((ifp->if_flags & IFF_BROADCAST) == 0)
+ return (EINVAL);
+ ia->ia_broadaddr = *(struct sockaddr_in *)&ifr->ifr_broadaddr;
+ break;
+
+ case SIOCSIFADDR:
+ return (in_ifinit(ifp, ia,
+ (struct sockaddr_in *) &ifr->ifr_addr, 1));
+
+ case SIOCSIFNETMASK:
+ ia->ia_sockmask.sin_addr = ifra->ifra_addr.sin_addr;
+ ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr);
+ break;
+
+ case SIOCAIFADDR:
+ maskIsNew = 0;
+ hostIsNew = 1;
+ error = 0;
+ if (ia->ia_addr.sin_family == AF_INET) {
+ if (ifra->ifra_addr.sin_len == 0) {
+ ifra->ifra_addr = ia->ia_addr;
+ hostIsNew = 0;
+ } else if (ifra->ifra_addr.sin_addr.s_addr ==
+ ia->ia_addr.sin_addr.s_addr)
+ hostIsNew = 0;
+ }
+ if (ifra->ifra_mask.sin_len) {
+ in_ifscrub(ifp, ia);
+ ia->ia_sockmask = ifra->ifra_mask;
+ ia->ia_subnetmask =
+ ntohl(ia->ia_sockmask.sin_addr.s_addr);
+ maskIsNew = 1;
+ }
+ if ((ifp->if_flags & IFF_POINTOPOINT) &&
+ (ifra->ifra_dstaddr.sin_family == AF_INET)) {
+ in_ifscrub(ifp, ia);
+ ia->ia_dstaddr = ifra->ifra_dstaddr;
+ maskIsNew = 1; /* We lie; but the effect's the same */
+ }
+ if (ifra->ifra_addr.sin_family == AF_INET &&
+ (hostIsNew || maskIsNew))
+ error = in_ifinit(ifp, ia, &ifra->ifra_addr, 0);
+ if ((ifp->if_flags & IFF_BROADCAST) &&
+ (ifra->ifra_broadaddr.sin_family == AF_INET))
+ ia->ia_broadaddr = ifra->ifra_broadaddr;
+ return (error);
+
+ case SIOCDIFADDR:
+ mk = malloc(sizeof *mk, M_IPMADDR, M_WAITOK);
+ if (!mk)
+ return ENOBUFS;
+
+ in_ifscrub(ifp, ia);
+ /*
+ * Protect from ipintr() traversing address list
+ * while we're modifying it.
+ */
+ s = splnet();
+
+ if ((ifa = ifp->if_addrlist) == (struct ifaddr *)ia)
+ ifp->if_addrlist = ifa->ifa_next;
+ else {
+ while (ifa->ifa_next &&
+ (ifa->ifa_next != (struct ifaddr *)ia))
+ ifa = ifa->ifa_next;
+ if (ifa->ifa_next)
+ ifa->ifa_next = ((struct ifaddr *)ia)->ifa_next;
+ else
+ printf("Couldn't unlink inifaddr from ifp\n");
+ }
+ oia = ia;
+ if (oia == (ia = in_ifaddr))
+ in_ifaddr = ia->ia_next;
+ else {
+ while (ia->ia_next && (ia->ia_next != oia))
+ ia = ia->ia_next;
+ if (ia->ia_next)
+ ia->ia_next = oia->ia_next;
+ else
+ printf("Didn't unlink inifadr from list\n");
+ }
+
+ if (!oia->ia_multiaddrs.lh_first) {
+ IFAFREE(&oia->ia_ifa);
+ FREE(mk, M_IPMADDR);
+ splx(s);
+ break;
+ }
+
+ /*
+ * Multicast address kludge:
+ * If there were any multicast addresses attached to this
+ * interface address, either move them to another address
+ * on this interface, or save them until such time as this
+ * interface is reconfigured for IP.
+ */
+ IFP_TO_IA(oia->ia_ifp, ia);
+ if (ia) { /* there is another address */
+ struct in_multi *inm;
+ for(inm = oia->ia_multiaddrs.lh_first; inm;
+ inm = inm->inm_entry.le_next) {
+ IFAFREE(&inm->inm_ia->ia_ifa);
+ ia->ia_ifa.ifa_refcnt++;
+ inm->inm_ia = ia;
+ LIST_INSERT_HEAD(&ia->ia_multiaddrs, inm,
+ inm_entry);
+ }
+ FREE(mk, M_IPMADDR);
+ } else { /* last address on this if deleted, save */
+ struct in_multi *inm;
+
+ LIST_INIT(&mk->mk_head);
+ mk->mk_ifp = ifp;
+
+ for(inm = oia->ia_multiaddrs.lh_first; inm;
+ inm = inm->inm_entry.le_next) {
+ LIST_INSERT_HEAD(&mk->mk_head, inm, inm_entry);
+ }
+
+ if (mk->mk_head.lh_first) {
+ LIST_INSERT_HEAD(&in_mk, mk, mk_entry);
+ } else {
+ FREE(mk, M_IPMADDR);
+ }
+ }
+
+ IFAFREE((&oia->ia_ifa));
+ splx(s);
+ break;
+
+ default:
+ if (ifp == 0 || ifp->if_ioctl == 0)
+ return (EOPNOTSUPP);
+ return ((*ifp->if_ioctl)(ifp, cmd, data));
+ }
+ return (0);
+}
+
+/*
+ * Delete any existing route for an interface.
+ */
+static void
+in_ifscrub(struct ifnet *ifp, struct in_ifaddr *ia)
+{
+
+ if ((ia->ia_flags & IFA_ROUTE) == 0)
+ return;
+ if (ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT))
+ rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
+ else
+ rtinit(&(ia->ia_ifa), (int)RTM_DELETE, 0);
+ ia->ia_flags &= ~IFA_ROUTE;
+}
+
+/*
+ * Initialize an interface's internet address
+ * and routing table entry.
+ */
+static int
+in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin,
+ int scrub)
+{
+ register u_long i = ntohl(sin->sin_addr.s_addr);
+ struct sockaddr_in oldaddr;
+ int s = splimp(), flags = RTF_UP, error;
+ struct multi_kludge *mk;
+
+ oldaddr = ia->ia_addr;
+ ia->ia_addr = *sin;
+ /*
+ * Give the interface a chance to initialize
+ * if this is its first address,
+ * and to validate the address if necessary.
+ */
+ if (ifp->if_ioctl &&
+ (error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia))) {
+ splx(s);
+ ia->ia_addr = oldaddr;
+ return (error);
+ }
+ splx(s);
+ if (scrub) {
+ ia->ia_ifa.ifa_addr = (struct sockaddr *)&oldaddr;
+ in_ifscrub(ifp, ia);
+ ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
+ }
+ if (IN_CLASSA(i))
+ ia->ia_netmask = IN_CLASSA_NET;
+ else if (IN_CLASSB(i))
+ ia->ia_netmask = IN_CLASSB_NET;
+ else
+ ia->ia_netmask = IN_CLASSC_NET;
+ /*
+ * The subnet mask usually includes at least the standard network part,
+ * but may may be smaller in the case of supernetting.
+ * If it is set, we believe it.
+ */
+ if (ia->ia_subnetmask == 0) {
+ ia->ia_subnetmask = ia->ia_netmask;
+ ia->ia_sockmask.sin_addr.s_addr = htonl(ia->ia_subnetmask);
+ } else
+ ia->ia_netmask &= ia->ia_subnetmask;
+ ia->ia_net = i & ia->ia_netmask;
+ ia->ia_subnet = i & ia->ia_subnetmask;
+ in_socktrim(&ia->ia_sockmask);
+ /*
+ * Add route for the network.
+ */
+ ia->ia_ifa.ifa_metric = ifp->if_metric;
+ if (ifp->if_flags & IFF_BROADCAST) {
+ ia->ia_broadaddr.sin_addr.s_addr =
+ htonl(ia->ia_subnet | ~ia->ia_subnetmask);
+ ia->ia_netbroadcast.s_addr =
+ htonl(ia->ia_net | ~ ia->ia_netmask);
+ } else if (ifp->if_flags & IFF_LOOPBACK) {
+ ia->ia_ifa.ifa_dstaddr = ia->ia_ifa.ifa_addr;
+ flags |= RTF_HOST;
+ } else if (ifp->if_flags & IFF_POINTOPOINT) {
+ if (ia->ia_dstaddr.sin_family != AF_INET)
+ return (0);
+ flags |= RTF_HOST;
+ }
+ if ((error = rtinit(&(ia->ia_ifa), (int)RTM_ADD, flags)) == 0)
+ ia->ia_flags |= IFA_ROUTE;
+
+ LIST_INIT(&ia->ia_multiaddrs);
+ /*
+ * If the interface supports multicast, join the "all hosts"
+ * multicast group on that interface.
+ */
+ if (ifp->if_flags & IFF_MULTICAST) {
+ struct in_addr addr;
+
+ /*
+ * Continuation of multicast address hack:
+ * If there was a multicast group list previously saved
+ * for this interface, then we re-attach it to the first
+ * address configured on the i/f.
+ */
+ for(mk = in_mk.lh_first; mk; mk = mk->mk_entry.le_next) {
+ if(mk->mk_ifp == ifp) {
+ struct in_multi *inm;
+
+ for(inm = mk->mk_head.lh_first; inm;
+ inm = inm->inm_entry.le_next) {
+ IFAFREE(&inm->inm_ia->ia_ifa);
+ ia->ia_ifa.ifa_refcnt++;
+ inm->inm_ia = ia;
+ LIST_INSERT_HEAD(&ia->ia_multiaddrs,
+ inm, inm_entry);
+ }
+ LIST_REMOVE(mk, mk_entry);
+ free(mk, M_IPMADDR);
+ break;
+ }
+ }
+
+ addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP);
+ in_addmulti(&addr, ifp);
+ }
+ return (error);
+}
+
+
+/*
+ * Return 1 if the address might be a local broadcast address.
+ */
+int
+in_broadcast(struct in_addr in, struct ifnet *ifp)
+{
+ register struct ifaddr *ifa;
+ u_long t;
+
+ if (in.s_addr == INADDR_BROADCAST ||
+ in.s_addr == INADDR_ANY)
+ return 1;
+ if ((ifp->if_flags & IFF_BROADCAST) == 0)
+ return 0;
+ t = ntohl(in.s_addr);
+ /*
+ * Look through the list of addresses for a match
+ * with a broadcast address.
+ */
+#define ia ((struct in_ifaddr *)ifa)
+ for (ifa = ifp->if_addrlist; ifa; ifa = ifa->ifa_next)
+ if (ifa->ifa_addr->sa_family == AF_INET &&
+ (in.s_addr == ia->ia_broadaddr.sin_addr.s_addr ||
+ in.s_addr == ia->ia_netbroadcast.s_addr ||
+ /*
+ * Check for old-style (host 0) broadcast.
+ */
+ t == ia->ia_subnet || t == ia->ia_net) &&
+ /*
+ * Check for an all one subnetmask. These
+ * only exist when an interface gets a secondary
+ * address.
+ */
+ ia->ia_subnetmask != (u_long)0xffffffff)
+ return 1;
+ return (0);
+#undef ia
+}
+/*
+ * Add an address to the list of IP multicast addresses for a given interface.
+ */
+struct in_multi *
+in_addmulti(struct in_addr *ap, struct ifnet *ifp)
+{
+ register struct in_multi *inm;
+ struct ifreq ifr;
+ struct in_ifaddr *ia;
+ int s = splnet();
+
+ /*
+ * See if address already in list.
+ */
+ IN_LOOKUP_MULTI(*ap, ifp, inm);
+ if (inm != NULL) {
+ /*
+ * Found it; just increment the reference count.
+ */
+ ++inm->inm_refcount;
+ }
+ else {
+ /*
+ * New address; allocate a new multicast record
+ * and link it into the interface's multicast list.
+ */
+ inm = (struct in_multi *)malloc(sizeof(*inm),
+ M_IPMADDR, M_NOWAIT);
+ if (inm == NULL) {
+ splx(s);
+ return (NULL);
+ }
+ inm->inm_addr = *ap;
+ inm->inm_ifp = ifp;
+ inm->inm_refcount = 1;
+ IFP_TO_IA(ifp, ia);
+ if (ia == NULL) {
+ free(inm, M_IPMADDR);
+ splx(s);
+ return (NULL);
+ }
+ inm->inm_ia = ia;
+ ia->ia_ifa.ifa_refcnt++; /* gain a reference */
+ LIST_INSERT_HEAD(&ia->ia_multiaddrs, inm, inm_entry);
+
+ /*
+ * Ask the network driver to update its multicast reception
+ * filter appropriately for the new address.
+ */
+ ((struct sockaddr_in *)&ifr.ifr_addr)->sin_family = AF_INET;
+ ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr = *ap;
+ if ((ifp->if_ioctl == NULL) ||
+ (*ifp->if_ioctl)(ifp, SIOCADDMULTI,(caddr_t)&ifr) != 0) {
+ LIST_REMOVE(inm, inm_entry);
+ IFAFREE(&ia->ia_ifa); /* release reference */
+ free(inm, M_IPMADDR);
+ splx(s);
+ return (NULL);
+ }
+ /*
+ * Let IGMP know that we have joined a new IP multicast group.
+ */
+ igmp_joingroup(inm);
+ }
+ splx(s);
+ return (inm);
+}
+
+/*
+ * Delete a multicast address record.
+ */
+void
+in_delmulti(struct in_multi *inm)
+{
+ struct ifreq ifr;
+ int s = splnet();
+
+ if (--inm->inm_refcount == 0) {
+ /*
+ * No remaining claims to this record; let IGMP know that
+ * we are leaving the multicast group.
+ */
+ igmp_leavegroup(inm);
+ /*
+ * Unlink from list.
+ */
+ LIST_REMOVE(inm, inm_entry);
+ IFAFREE(&inm->inm_ia->ia_ifa); /* release reference */
+
+ /*
+ * Notify the network driver to update its multicast reception
+ * filter.
+ */
+ ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
+ ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr =
+ inm->inm_addr;
+ (*inm->inm_ifp->if_ioctl)(inm->inm_ifp, SIOCDELMULTI,
+ (caddr_t)&ifr);
+ free(inm, M_IPMADDR);
+ }
+ splx(s);
+}
diff --git a/netinet/in_cksum.c b/netinet/in_cksum.c
new file mode 100644
index 0000000..39dc9ac
--- /dev/null
+++ b/netinet/in_cksum.c
@@ -0,0 +1,186 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * Copyright (c) 1988, 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+
+/*
+ * Try to use a CPU specific version, then punt to the portable C one.
+ */
+
+
+#if (defined(__GNUC__) && (defined(__arm__) && !defined(__thumb__)))
+
+/* This currently does not support Thumb assembly */
+#include "in_cksum_arm.h"
+
+#elif (defined(__GNUC__) && defined(__i386__))
+
+#include "in_cksum_i386.h"
+
+#elif (defined(__GNUC__) && (defined(__mc68000__) || defined(__m68k__)))
+
+#include "in_cksum_m68k.h"
+#elif (defined(__GNUC__) && defined(__PPC__))
+
+#include "in_cksum_powerpc.h"
+
+#elif (defined(__GNUC__) && defined(__nios2__))
+
+#include "in_cksum_nios2.h"
+
+#elif (defined(__GNUC__) && defined(__sparc__))
+
+#include "in_cksum_sparc.h"
+
+#else
+
+#include <stdio.h> /* for puts */
+
+/*
+ * Checksum routine for Internet Protocol family headers (Portable Version).
+ *
+ * This routine is very heavily used in the network
+ * code and should be modified for each CPU to be as fast as possible.
+ */
+
+#define ADDCARRY(x) (x > 65535L ? x -= 65535L : x)
+#define REDUCE \
+ {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);}
+
+int
+in_cksum(
+ struct mbuf *m,
+ uint32_t len )
+{
+ register u_short *w;
+ register int32_t sum = 0;
+ register int32_t mlen = 0;
+ int byte_swapped = 0;
+
+ union {
+ char c[2];
+ u_short s;
+ } s_util;
+ union {
+ u_short s[2];
+ long l;
+ } l_util;
+
+ for (;m && len; m = m->m_next) {
+ if (m->m_len == 0)
+ continue;
+ w = mtod(m, u_short *);
+ if (mlen == -1) {
+ /*
+ * The first byte of this mbuf is the continuation
+ * of a word spanning between this mbuf and the
+ * last mbuf.
+ *
+ * s_util.c[0] is already saved when scanning previous
+ * mbuf.
+ */
+ s_util.c[1] = *(char *)w;
+ sum += s_util.s;
+ w = (u_short *)((char *)w + 1);
+ mlen = m->m_len - 1;
+ len--;
+ } else
+ mlen = m->m_len;
+ if (len < mlen)
+ mlen = len;
+ len -= mlen;
+ /*
+ * Force to even boundary.
+ */
+ if ((1 & (intptr_t) w) && (mlen > 0)) {
+ REDUCE;
+ sum <<= 8;
+ s_util.c[0] = *(u_char *)w;
+ w = (u_short *)((char *)w + 1);
+ mlen--;
+ byte_swapped = 1;
+ }
+ /*
+ * Unroll the loop to make overhead from
+ * branches &c small.
+ */
+ while ((mlen -= 32) >= 0) {
+ sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3];
+ sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7];
+ sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11];
+ sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15];
+ w += 16;
+ }
+ mlen += 32;
+ while ((mlen -= 8) >= 0) {
+ sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3];
+ w += 4;
+ }
+ mlen += 8;
+ if (mlen == 0 && byte_swapped == 0)
+ continue;
+ REDUCE;
+ while ((mlen -= 2) >= 0) {
+ sum += *w++;
+ }
+ if (byte_swapped) {
+ REDUCE;
+ sum <<= 8;
+ byte_swapped = 0;
+ if (mlen == -1) {
+ s_util.c[1] = *(char *)w;
+ sum += s_util.s;
+ mlen = 0;
+ } else
+ mlen = -1;
+ } else if (mlen == -1)
+ s_util.c[0] = *(char *)w;
+ }
+ if (len)
+ puts("cksum: out of data");
+ if (mlen == -1) {
+ /* The last mbuf has odd # of bytes. Follow the
+ standard (the odd byte may be shifted left by 8 bits
+ or not as determined by endian-ness of the machine) */
+ s_util.c[1] = 0;
+ sum += s_util.s;
+ }
+ REDUCE;
+ return (~sum & 0xffff);
+}
+#endif
diff --git a/netinet/in_cksum_arm.h b/netinet/in_cksum_arm.h
new file mode 100644
index 0000000..d884a0f
--- /dev/null
+++ b/netinet/in_cksum_arm.h
@@ -0,0 +1,236 @@
+/* $NetBSD: in_cksum_arm.c,v 1.3 2001/12/08 21:18:50 chris Exp $ */
+
+/*
+ * ARM version:
+ *
+ * Copyright (c) 1997 Mark Brinicome
+ * Copyright (c) 1997 Causality Limited
+ *
+ * Based on the sparc version.
+ */
+
+/*
+ * Sparc version:
+ *
+ * Copyright (c) 1995 Zubin Dittia.
+ * Copyright (c) 1995 Matthew R. Green.
+ * Copyright (c) 1994 Charles M. Hannum.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)in_cksum.c 8.1 (Berkeley) 6/11/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+
+/*
+ * Checksum routine for Internet Protocol family headers.
+ *
+ * This routine is very heavily used in the network
+ * code and should be modified for each CPU to be as fast as possible.
+ *
+ * ARM version.
+ */
+
+#define ADD64 __asm __volatile(" \n\
+ ldmia %0!, {%2, %3, %4, %5} \n\
+ adds %1,%7,%2; adcs %1,%1,%3 \n\
+ adcs %1,%1,%4; adcs %1,%1,%5 \n\
+ ldmia %0!, {%2, %3, %4, %5} \n\
+ adcs %1,%1,%2; adcs %1,%1,%3 \n\
+ adcs %1,%1,%4; adcs %1,%1,%5 \n\
+ ldmia %0!, {%2, %3, %4, %5} \n\
+ adcs %1,%1,%2; adcs %1,%1,%3 \n\
+ adcs %1,%1,%4; adcs %1,%1,%5 \n\
+ ldmia %0!, {%2, %3, %4, %5} \n\
+ adcs %1,%1,%2; adcs %1,%1,%3 \n\
+ adcs %1,%1,%4; adcs %1,%1,%5 \n\
+ adcs %1,%1,#0\n" \
+ : "=r" (w), "=r" (sum), "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4) \
+ : "0" (w), "r" (sum) \
+ : "cc")
+
+#define ADD32 __asm __volatile(" \n\
+ ldmia %0!, {%2, %3, %4, %5} \n\
+ adds %1,%7,%2; adcs %1,%1,%3 \n\
+ adcs %1,%1,%4; adcs %1,%1,%5 \n\
+ ldmia %0!, {%2, %3, %4, %5} \n\
+ adcs %1,%1,%2; adcs %1,%1,%3 \n\
+ adcs %1,%1,%4; adcs %1,%1,%5 \n\
+ adcs %1,%1,#0\n" \
+ : "=r" (w), "=r" (sum), "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4) \
+ : "0" (w), "r" (sum) \
+ : "cc")
+
+#define ADD16 __asm __volatile(" \n\
+ ldmia %0!, {%2, %3, %4, %5} \n\
+ adds %1,%7,%2; adcs %1,%1,%3 \n\
+ adcs %1,%1,%4; adcs %1,%1,%5 \n\
+ adcs %1,%1,#0\n" \
+ : "=r" (w), "=r" (sum), "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4) \
+ : "0" (w), "r" (sum) \
+ : "cc")
+
+#define ADD8 __asm __volatile(" \n\
+ ldmia %0!, {%2, %3} \n\
+ adds %1,%5,%2; adcs %1,%1,%3 \n\
+ adcs %1,%1,#0\n" \
+ : "=r" (w), "=r" (sum), "=&r" (tmp1), "=&r" (tmp2) \
+ : "0" (w), "r" (sum) \
+ : "cc" )
+
+#define ADD4 __asm __volatile(" \n\
+ ldr %2,[%0],#4 \n\
+ adds %1,%4,%2 \n\
+ adcs %1,%1,#0\n" \
+ : "=r" (w), "=r" (sum), "=&r" (tmp1) \
+ : "0" (w), "r" (sum) \
+ : "cc")
+
+/*#define REDUCE {sum = (sum & 0xffff) + (sum >> 16);}*/
+#define REDUCE __asm __volatile(" \n\
+ mov %2, #0x00ff \n\
+ orr %2, %2, #0xff00 \n\
+ and %2, %0, %2 \n\
+ add %0, %2, %0, lsr #16\n" \
+ : "=r" (sum) \
+ : "0" (sum), "r" (tmp1))
+
+#define ADDCARRY {if (sum > 0xffff) sum -= 0xffff;}
+#define ROL {sum = sum << 8;} /* depends on recent REDUCE */
+#define ADDBYTE {ROL; sum += (*w << 8); byte_swapped ^= 1;}
+#define ADDSHORT {sum += *(u_short *)w;}
+#define ADVANCE(n) {w += n; mlen -= n;}
+#define ADVANCEML(n) {mlen -= n;}
+
+static __inline__ int
+in_cksum_internal(struct mbuf *m, int off, int len, u_int sum)
+{
+ u_char *w;
+ int mlen = 0;
+ int byte_swapped = 0;
+
+ /*
+ * Declare four temporary registers for use by the asm code. We
+ * allow the compiler to pick which specific machine registers to
+ * use, instead of hard-coding this in the asm code above.
+ */
+ register u_int tmp1=0, tmp2, tmp3, tmp4;
+
+ for (; m && len; m = m->m_next) {
+ if (m->m_len == 0)
+ continue;
+ w = mtod(m, u_char *) + off;
+ mlen = m->m_len - off;
+ off = 0;
+ if (len < mlen)
+ mlen = len;
+ len -= mlen;
+
+ /*
+ * Ensure that we're aligned on a word boundary here so
+ * that we can do 32 bit operations below.
+ */
+ if ((3 & (long)w) != 0) {
+ REDUCE;
+ if ((1 & (long)w) != 0 && mlen >= 1) {
+ ADDBYTE;
+ ADVANCE(1);
+ }
+ if ((2 & (long)w) != 0 && mlen >= 2) {
+ ADDSHORT;
+ ADVANCE(2);
+ }
+ }
+
+ /*
+ * Do as many 32 bit operations as possible using the
+ * 64/32/16/8/4 macro's above, using as many as possible of
+ * these.
+ */
+
+ while (mlen >= 64) {
+ ADD64;
+ ADVANCEML(64);
+ }
+ if (mlen >= 32) {
+ ADD32;
+ ADVANCEML(32);
+ }
+ if (mlen >= 16) {
+ ADD16;
+ ADVANCEML(16);
+ }
+ if (mlen >= 8) {
+ ADD8;
+ ADVANCEML(8);
+ }
+ if (mlen >= 4) {
+ ADD4;
+ ADVANCEML(4)
+ }
+ if (mlen == 0)
+ continue;
+
+ REDUCE;
+ if (mlen >= 2) {
+ ADDSHORT;
+ ADVANCE(2);
+ }
+ if (mlen == 1) {
+ ADDBYTE;
+ }
+ }
+ if (byte_swapped) {
+ REDUCE;
+ ROL;
+ }
+ REDUCE;
+ ADDCARRY;
+
+ return (0xffff ^ sum);
+}
+
+int
+in_cksum(
+ struct mbuf *m,
+ int len)
+{
+ int cksum;
+ cksum =in_cksum_internal(m, 0, len, 0);
+ return cksum;
+}
diff --git a/netinet/in_cksum_i386.h b/netinet/in_cksum_i386.h
new file mode 100644
index 0000000..1174287
--- /dev/null
+++ b/netinet/in_cksum_i386.h
@@ -0,0 +1,202 @@
+/*
+ * Checksum routine for Internet Protocol family headers.
+ *
+ * This routine is very heavily used in the network
+ * code and should be modified for each CPU to be as fast as possible.
+ *
+ * This implementation is 386 version.
+ */
+
+#include <stdio.h> /* for puts */
+
+#undef ADDCARRY
+#define ADDCARRY(x) if ((x) > 0xffff) (x) -= 0xffff
+#define REDUCE {sum = (sum & 0xffff) + (sum >> 16); ADDCARRY(sum);}
+
+/*
+ * Thanks to gcc we don't have to guess
+ * which registers contain sum & w.
+ */
+#define ADD(n) __asm__ volatile \
+ ("addl " #n "(%2), %0" : "=r" (sum) : "0" (sum), "r" (w))
+#define ADDC(n) __asm__ volatile \
+ ("adcl " #n "(%2), %0" : "=r" (sum) : "0" (sum), "r" (w))
+#define LOAD(n) __asm__ volatile \
+ ("movb " #n "(%1), %0" : "=q" (junk) : "r" (w))
+#define MOP __asm__ volatile \
+ ("adcl $0, %0" : "=r" (sum) : "0" (sum))
+
+int
+in_cksum(
+ struct mbuf *m,
+ int len )
+{
+ register u_short *w;
+ register unsigned sum = 0;
+ register int mlen = 0;
+ int byte_swapped = 0;
+ union { char c[2]; u_short s; } su;
+
+ for (;m && len; m = m->m_next) {
+ if (m->m_len == 0)
+ continue;
+ w = mtod(m, u_short *);
+ if (mlen == -1) {
+ /*
+ * The first byte of this mbuf is the continuation
+ * of a word spanning between this mbuf and the
+ * last mbuf.
+ */
+
+ /* su.c[0] is already saved when scanning previous
+ * mbuf. sum was REDUCEd when we found mlen == -1
+ */
+ su.c[1] = *(u_char *)w;
+ sum += su.s;
+ w = (u_short *)((char *)w + 1);
+ mlen = m->m_len - 1;
+ len--;
+ } else
+ mlen = m->m_len;
+ if (len < mlen)
+ mlen = len;
+ len -= mlen;
+ /*
+ * Force to long boundary so we do longword aligned
+ * memory operations
+ */
+ if (3 & (int) w) {
+ REDUCE;
+ if ((1 & (int) w) && (mlen > 0)) {
+ sum <<= 8;
+ su.c[0] = *(char *)w;
+ w = (u_short *)((char *)w + 1);
+ mlen--;
+ byte_swapped = 1;
+ }
+ if ((2 & (int) w) && (mlen >= 2)) {
+ sum += *w++;
+ mlen -= 2;
+ }
+ }
+ /*
+ * Advance to a 486 cache line boundary.
+ */
+ if (4 & (int) w && mlen >= 4) {
+ ADD(0);
+ MOP;
+ w += 2;
+ mlen -= 4;
+ }
+ if (8 & (int) w && mlen >= 8) {
+ ADD(0);
+ ADDC(4);
+ MOP;
+ w += 4;
+ mlen -= 8;
+ }
+ /*
+ * Do as much of the checksum as possible 32 bits at at time.
+ * In fact, this loop is unrolled to make overhead from
+ * branches &c small.
+ */
+ mlen -= 1;
+ while ((mlen -= 32) >= 0) {
+ u_char junk;
+ /*
+ * Add with carry 16 words and fold in the last
+ * carry by adding a 0 with carry.
+ *
+ * The early ADD(16) and the LOAD(32) are to load
+ * the next 2 cache lines in advance on 486's. The
+ * 486 has a penalty of 2 clock cycles for loading
+ * a cache line, plus whatever time the external
+ * memory takes to load the first word(s) addressed.
+ * These penalties are unavoidable. Subsequent
+ * accesses to a cache line being loaded (and to
+ * other external memory?) are delayed until the
+ * whole load finishes. These penalties are mostly
+ * avoided by not accessing external memory for
+ * 8 cycles after the ADD(16) and 12 cycles after
+ * the LOAD(32). The loop terminates when mlen
+ * is initially 33 (not 32) to guaranteed that
+ * the LOAD(32) is within bounds.
+ */
+ ADD(16);
+ ADDC(0);
+ ADDC(4);
+ ADDC(8);
+ ADDC(12);
+ LOAD(32);
+ ADDC(20);
+ ADDC(24);
+ ADDC(28);
+ MOP;
+ w += 16;
+ }
+ mlen += 32 + 1;
+ if (mlen >= 32) {
+ ADD(16);
+ ADDC(0);
+ ADDC(4);
+ ADDC(8);
+ ADDC(12);
+ ADDC(20);
+ ADDC(24);
+ ADDC(28);
+ MOP;
+ w += 16;
+ mlen -= 32;
+ }
+ if (mlen >= 16) {
+ ADD(0);
+ ADDC(4);
+ ADDC(8);
+ ADDC(12);
+ MOP;
+ w += 8;
+ mlen -= 16;
+ }
+ if (mlen >= 8) {
+ ADD(0);
+ ADDC(4);
+ MOP;
+ w += 4;
+ mlen -= 8;
+ }
+ if (mlen == 0 && byte_swapped == 0)
+ continue; /* worth 1% maybe ?? */
+ REDUCE;
+ while ((mlen -= 2) >= 0) {
+ sum += *w++;
+ }
+ if (byte_swapped) {
+ sum <<= 8;
+ byte_swapped = 0;
+ if (mlen == -1) {
+ su.c[1] = *(char *)w;
+ sum += su.s;
+ mlen = 0;
+ } else
+ mlen = -1;
+ } else if (mlen == -1)
+ /*
+ * This mbuf has odd number of bytes.
+ * There could be a word split betwen
+ * this mbuf and the next mbuf.
+ * Save the last byte (to prepend to next mbuf).
+ */
+ su.c[0] = *(char *)w;
+ }
+
+ if (len)
+ puts("cksum: out of data");
+ if (mlen == -1) {
+ /* The last mbuf has odd # of bytes. Follow the
+ standard (the odd byte is shifted left by 8 bits) */
+ su.c[1] = 0;
+ sum += su.s;
+ }
+ REDUCE;
+ return (~sum & 0xffff);
+}
diff --git a/netinet/in_cksum_m68k.h b/netinet/in_cksum_m68k.h
new file mode 100644
index 0000000..32e6978
--- /dev/null
+++ b/netinet/in_cksum_m68k.h
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 1988, 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+
+#if defined(__mcoldfire__)
+# define IS_COLDFIRE 1
+#else
+# define IS_COLDFIRE 0
+#endif
+
+#define REDUCE { sum = (sum & 0xFFFF) + (sum >> 16); if (sum > 0xFFFF) sum -= 0xFFFF; }
+
+/*
+ * Motorola 68k version of Internet Protocol Checksum routine
+ * W. Eric Norum
+ * Saskatchewan Accelerator Laboratory
+ * August, 1998
+ */
+int
+in_cksum( struct mbuf *m,
+ int len )
+{
+ unsigned short *w;
+ unsigned long sum = 0;
+ int mlen = 0;
+ int byte_swapped = 0;
+ union {
+ char c[2];
+ u_short s;
+ } s_util;
+
+ for ( ; m && len ; m = m->m_next) {
+ if (m->m_len == 0)
+ continue;
+ w = mtod(m, u_short *);
+ if (mlen == -1) {
+ /*
+ * The first byte of this mbuf is the continuation
+ * of a word spanning between this mbuf and the
+ * last mbuf.
+ *
+ * s_util.c[0] is already saved when scanning previous
+ * mbuf.
+ */
+ s_util.c[1] = *(char *)w;
+ sum += s_util.s;
+ w = (u_short *)((char *)w + 1);
+ mlen = m->m_len - 1;
+ len--;
+ } else
+ mlen = m->m_len;
+ if (len < mlen)
+ mlen = len;
+ len -= mlen;
+
+ /*
+ * Force to longword boundary.
+ */
+ if (3 & (int)w) {
+ REDUCE;
+ if ((1 & (int) w) && (mlen > 0)) {
+ sum <<= 8;
+ s_util.c[0] = *(u_char *)w;
+ w = (u_short *)((char *)w + 1);
+ mlen--;
+ byte_swapped = 1;
+ }
+ if ((2 & (int) w) && (mlen >= 2)) {
+ sum += *w++;
+ mlen -= 2;
+ }
+ }
+
+ /*
+ * Sum all the longwords in the buffer.
+ * See RFC 1071 -- Computing the Internet Checksum.
+ * It should work for all 68k family members.
+ */
+ {
+ unsigned long tcnt = mlen, t1;
+ __asm__ volatile (
+ "movel %2,%3\n\t"
+ "lsrl #6,%2 | count/64 = # loop traversals\n\t"
+ "andl #0x3c,%3 | Then find fractions of a chunk\n\t"
+ "negl %3\n\t | Each long uses 4 instruction bytes\n\t"
+#if IS_COLDFIRE
+ "addql #1,%2 | Clear X (extended carry flag)\n\t"
+ "subql #1,%2 | \n\t"
+#else
+ "andi #0xf,%%cc | Clear X (extended carry flag)\n\t"
+#endif
+ "jmp %%pc@(2f-.-2:b,%3) | Jump into loop\n"
+ "1: | Begin inner loop...\n\t"
+ "movel %1@+,%3 | 0: Fetch 32-bit word\n\t"
+ "addxl %3,%0 | Add word + previous carry\n\t"
+ "movel %1@+,%3 | 1: Fetch 32-bit word\n\t"
+ "addxl %3,%0 | Add word + previous carry\n\t"
+ "movel %1@+,%3 | 2: Fetch 32-bit word\n\t"
+ "addxl %3,%0 | Add word + previous carry\n\t"
+ "movel %1@+,%3 | 3: Fetch 32-bit word\n\t"
+ "addxl %3,%0 | Add word + previous carry\n\t"
+ "movel %1@+,%3 | 4: Fetch 32-bit word\n\t"
+ "addxl %3,%0 | Add word + previous carry\n\t"
+ "movel %1@+,%3 | 5: Fetch 32-bit word\n\t"
+ "addxl %3,%0 | Add word + previous carry\n\t"
+ "movel %1@+,%3 | 6: Fetch 32-bit word\n\t"
+ "addxl %3,%0 | Add word + previous carry\n\t"
+ "movel %1@+,%3 | 7: Fetch 32-bit word\n\t"
+ "addxl %3,%0 | Add word + previous carry\n\t"
+ "movel %1@+,%3 | 8: Fetch 32-bit word\n\t"
+ "addxl %3,%0 | Add word + previous carry\n\t"
+ "movel %1@+,%3 | 9: Fetch 32-bit word\n\t"
+ "addxl %3,%0 | Add word + previous carry\n\t"
+ "movel %1@+,%3 | A: Fetch 32-bit word\n\t"
+ "addxl %3,%0 | Add word + previous carry\n\t"
+ "movel %1@+,%3 | B: Fetch 32-bit word\n\t"
+ "addxl %3,%0 | Add word + previous carry\n\t"
+ "movel %1@+,%3 | C: Fetch 32-bit word\n\t"
+ "addxl %3,%0 | Add word + previous carry\n\t"
+ "movel %1@+,%3 | D: Fetch 32-bit word\n\t"
+ "addxl %3,%0 | Add word + previous carry\n\t"
+ "movel %1@+,%3 | E: Fetch 32-bit word\n\t"
+ "addxl %3,%0 | Add word + previous carry\n\t"
+ "movel %1@+,%3 | F: Fetch 32-bit word\n\t"
+ "addxl %3,%0 | Add word + previous carry\n"
+ "2: | End of unrolled loop\n\t"
+#if IS_COLDFIRE
+ "moveq #0,%3 | Add in last carry\n\t"
+ "addxl %3,%0 |\n\t"
+ "subql #1,%2 | Update loop count\n\t"
+ "bplb 1b | Loop (with X clear) if not done\n\t"
+ "movel #0xffff,%2 | Get word mask\n\t"
+ "movel %0,%3 | Fold 32 bit sum to 16 bits\n\t"
+ "swap %3 |\n\t"
+ "andl %2,%0 | Mask to 16-bit sum\n\t"
+ "andl %2,%3 | Mask to 16-bit sum\n\t"
+ "addl %3,%0 |\n\t"
+ "movel %0,%3 | Add in last carry\n\t"
+ "swap %3 |\n\t"
+ "addl %3,%0 |\n\t"
+ "andl %2,%0 | Mask to 16-bit sum\n\t"
+#else
+ "dbf %2,1b | (NB- dbf doesn't affect X)\n\t"
+ "movel %0,%3 | Fold 32 bit sum to 16 bits\n\t"
+ "swap %3 | (NB- swap doesn't affect X)\n\t"
+ "addxw %3,%0 |\n\t"
+ "moveq #0,%3 | Add in last carry\n\t"
+ "addxw %3,%0 |\n\t"
+ "andl #0xffff,%0 | Mask to 16-bit sum\n"
+#endif
+ :
+ "=d" (sum), "=a" (w), "=d" (tcnt) , "=d" (t1) :
+ "0" (sum), "1" (w), "2" (tcnt) :
+ "cc", "memory");
+ }
+ mlen &= 3;
+
+ /*
+ * Soak up the last 1, 2 or 3 bytes
+ */
+ while ((mlen -= 2) >= 0)
+ sum += *w++;
+ if (byte_swapped) {
+ REDUCE;
+ sum <<= 8;
+ byte_swapped = 0;
+ if (mlen == -1) {
+ s_util.c[1] = *(char *)w;
+ sum += s_util.s;
+ mlen = 0;
+ } else
+ mlen = -1;
+ } else if (mlen == -1)
+ s_util.c[0] = *(char *)w;
+ }
+ if (len)
+ sum = 0xDEAD;
+ if (mlen == -1) {
+ /* The last mbuf has odd # of bytes. Follow the
+ standard (the odd byte may be shifted left by 8 bits
+ or not as determined by endian-ness of the machine) */
+ s_util.c[1] = 0;
+ sum += s_util.s;
+ }
+ REDUCE;
+ return (~sum & 0xffff);
+}
diff --git a/netinet/in_cksum_nios2.h b/netinet/in_cksum_nios2.h
new file mode 100644
index 0000000..1b34a28
--- /dev/null
+++ b/netinet/in_cksum_nios2.h
@@ -0,0 +1,248 @@
+
+/*
+ * Altera Nios2 CRC checksum computation
+ *
+ * Author: Jeffrey O. Hill
+ *
+ * Copyright 2012. Los Alamos National Security, LLC.
+ * This material was produced under U.S. Government contract
+ * DE-AC52-06NA25396 for Los Alamos National Laboratory (LANL),
+ * which is operated by Los Alamos National Security, LLC for
+ * the U.S. Department of Energy. The U.S. Government has rights
+ * to use, reproduce, and distribute this software. NEITHER THE
+ * GOVERNMENT NOR LOS ALAMOS NATIONAL SECURITY, LLC MAKES ANY
+ * WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LIABILITY FOR
+ * THE USE OF THIS SOFTWARE.
+ *
+ * COPYRIGHT (c) 1989-2012.
+ * On-Line Applications Research Corporation (OAR).
+ *
+ * Copyright (c) 1997 Mark Brinicome
+ * Copyright (c) 1997 Causality Limited
+ *
+ * Copyright (c) 1995 Zubin Dittia.
+ * Copyright (c) 1995 Matthew R. Green.
+ * Copyright (c) 1994 Charles M. Hannum.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Based on the arm / sparc version, but using instead
+ * mostly inline functions in place of naaasty macros.
+ *
+ * It would be a great idea to somehow detect at runtime
+ * that the Nios2 has a user defined instruction that
+ * computes the CRC and invoke it here (we could call a
+ * function in the BSP).
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <machine/in_cksum.h>
+
+/*
+ * Checksum routine for Internet Protocol family headers.
+ *
+ * This routine is very heavily used in the network
+ * code and should be modified for each CPU to be as fast as possible.
+ */
+static inline uint32_t _NIOS2_Add_ones_complement_64
+( uint32_t sum, const uint32_t * const pWd )
+{
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[0] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[1] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[2] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[3] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[4] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[5] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[6] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[7] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[8] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[9] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[10] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[11] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[12] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[13] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[14] );
+ return _NIOS2_Add_ones_complement ( sum, pWd[15] );
+}
+
+static inline uint32_t _NIOS2_Add_ones_complement_32
+( uint32_t sum, const uint32_t * const pWd )
+{
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[0] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[1] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[2] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[3] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[4] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[5] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[6] );
+ return _NIOS2_Add_ones_complement ( sum, pWd[7] );
+}
+
+static inline uint32_t _NIOS2_Add_ones_complement_16
+( uint32_t sum, const uint32_t * const pWd )
+{
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[0] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[1] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[2] );
+ return _NIOS2_Add_ones_complement ( sum, pWd[3] );
+}
+
+static inline uint32_t _NIOS2_Add_ones_complement_8
+( uint32_t sum, const uint32_t * const pWd )
+{
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[0] );
+ return _NIOS2_Add_ones_complement ( sum, pWd[1] );
+}
+
+static inline uint32_t _NIOS2_Add_ones_complement_4
+( uint32_t sum, const uint32_t * const pWd )
+{
+ return _NIOS2_Add_ones_complement ( sum, pWd[0] );
+}
+
+static inline uint32_t _NIOS2_Reduce_checksum ( uint32_t a )
+{
+ uint32_t tmp;
+ __asm__ __volatile__ (
+ " srli %1, %0, 16 \n" /* tmp = a >> 16 */
+ " andi %0, %0, 0xffff \n" /* a = a & 0xffff */
+ " add %0, %0, %1 \n" /* a = a + tmp */
+ : "+&r"(a), "=&r"(tmp)
+ );
+ return a;
+}
+
+#define combineTokens( A, B ) A ## B
+
+#define ADD_AND_ADVANCE( N ) \
+if ( mlen >= N ) { \
+ sum = combineTokens ( _NIOS2_Add_ones_complement_, N ) \
+ ( sum, ( uint32_t * ) w ); \
+ mlen -= N; \
+ w += N; \
+}
+
+static int
+in_cksum_internal(struct mbuf *m, int off, int len, u_int sum)
+{
+ const uint8_t * w;
+ int mlen = 0;
+ int byte_swapped = 0;
+
+ for (; m && len; m = m->m_next)
+ {
+ if (m->m_len == 0)
+ continue;
+ w = mtod(m, u_char *) + off;
+ mlen = m->m_len - off;
+ off = 0;
+ if (len < mlen)
+ mlen = len;
+ len -= mlen;
+
+ /*
+ * Ensure that we're aligned on a word boundary here so
+ * that we can do 32 bit operations below.
+ */
+ if ((3 & (uint32_t)w) != 0)
+ {
+ sum = _NIOS2_Reduce_checksum ( sum );
+ if ((1 & (uint32_t)w) != 0 && mlen >= 1)
+ {
+ sum <<= 8u;
+ sum += *w << 8u;
+ byte_swapped ^= 1;
+ w += 1;
+ mlen -= 1;
+ }
+ if ((2 & (uint32_t)w) != 0 && mlen >= 2)
+ {
+ sum += *(uint16_t *)w;
+ w += 2;
+ mlen -= 2;
+ }
+ }
+
+ /*
+ * instead of using a loop, process in unrolled chunks
+ */
+ while ( mlen >= 64 )
+ {
+ sum = _NIOS2_Add_ones_complement_64
+ ( sum, ( uint32_t * ) w );
+ mlen -= 64;
+ w += 64;
+ }
+ ADD_AND_ADVANCE ( 32 );
+ ADD_AND_ADVANCE ( 16 );
+ ADD_AND_ADVANCE ( 8 );
+ ADD_AND_ADVANCE ( 4 );
+
+ if ( mlen > 0 )
+ {
+ sum = _NIOS2_Reduce_checksum ( sum );
+ if ( mlen >= 2 )
+ {
+ sum += *(uint16_t *)w;
+ w += 2;
+ mlen -= 2;
+ }
+ if ( mlen == 1 )
+ {
+ sum <<= 8u;
+ sum += *w << 8u;
+ byte_swapped ^= 1;
+ }
+ }
+ }
+ if ( byte_swapped )
+ {
+ sum = _NIOS2_Reduce_checksum ( sum );
+ sum <<= 8u;
+ }
+ sum = _NIOS2_Add_ones_complement_word_halves ( sum );
+ sum ^= 0xffff;
+ return sum;
+}
+
+int
+in_cksum (
+ struct mbuf *m,
+ int len )
+{
+ return in_cksum_internal ( m, 0, len, 0 );
+}
diff --git a/netinet/in_cksum_powerpc.h b/netinet/in_cksum_powerpc.h
new file mode 100644
index 0000000..ad5e9e1
--- /dev/null
+++ b/netinet/in_cksum_powerpc.h
@@ -0,0 +1,172 @@
+/*
+ * Checksum routine for Internet Protocol family headers.
+ *
+ * This routine is very heavily used in the network
+ * code and should be modified for each CPU to be as fast as possible.
+ *
+ * This implementation is the PowerPC version.
+ */
+
+#include <stdio.h> /* for puts */
+
+#undef ADDCARRY
+#define ADDCARRY(x) if ((x) > 0xffff) (x) -= 0xffff
+#define REDUCE {sum = (sum & 0xffff) + (sum >> 16); ADDCARRY(sum);}
+
+/*
+ * Thanks to gcc we don't have to guess
+ * which registers contain sum & w.
+ */
+
+#define LDTMP(n) tmp = *((u_int *)((u_char *)w + n))
+
+#define ADD(n) \
+ LDTMP(n); \
+ __asm__ volatile("addc %0,%0,%2" : "=r" (sum) : "0" (sum), "r" (tmp))
+
+#define ADDC(n) \
+ LDTMP(n); \
+ __asm__ volatile("adde %0,%0,%2" : "=r" (sum) : "0" (sum), "r" (tmp))
+
+#define MOP \
+ tmp = 0; \
+ __asm__ volatile("adde %0,%0,%2" : "=r" (sum) : "0" (sum), "r" (tmp))
+
+#define LOAD(n) junk = (u_char) *((volatile u_char *) w + n)
+
+
+int
+in_cksum(
+ struct mbuf *m,
+ int len
+)
+{
+ u_char junk;
+ register u_short *w;
+ register unsigned sum = 0;
+ register unsigned tmp;
+ register int mlen = 0;
+ int byte_swapped = 0;
+ union { char c[2]; u_short s; } su;
+
+ for (;m && len; m = m->m_next) {
+ if (m->m_len == 0)
+ continue;
+ w = mtod(m, u_short *);
+ if (mlen == -1) {
+ /*
+ * The first byte of this mbuf is the continuation
+ * of a word spanning between this mbuf and the
+ * last mbuf.
+ */
+
+ /* su.c[0] is already saved when scanning previous
+ * mbuf. sum was REDUCEd when we found mlen == -1
+ */
+ su.c[1] = *(u_char *)w;
+ sum += su.s;
+ w = (u_short *)((char *)w + 1);
+ mlen = m->m_len - 1;
+ len--;
+ } else
+ mlen = m->m_len;
+ if (len < mlen)
+ mlen = len;
+ len -= mlen;
+ /*
+ * Force to long boundary so we do longword aligned
+ * memory operations
+ */
+ if (3 & (int) w) {
+ REDUCE;
+ if ((1 & (int) w) && (mlen > 0)) {
+ sum <<= 8;
+ su.c[0] = *(char *)w;
+ w = (u_short *)((char *)w + 1);
+ mlen--;
+ byte_swapped = 1;
+ }
+ if ((2 & (int) w) && (mlen >= 2)) {
+ sum += *w++;
+ mlen -= 2;
+ }
+ }
+
+ /*
+ * Do as much of the checksum as possible 32 bits at at time.
+ * In fact, this loop is unrolled to keep overhead from
+ * branches small.
+ */
+ while (mlen >= 32) {
+ /*
+ * Add with carry 16 words and fold in the last
+ * carry by adding a 0 with carry.
+ *
+ * The early ADD(16) and the LOAD(32) are intended
+ * to help get the data into the cache.
+ */
+ ADD(16);
+ ADDC(0);
+ ADDC(4);
+ ADDC(8);
+ ADDC(12);
+ LOAD(32);
+ ADDC(20);
+ ADDC(24);
+ ADDC(28);
+ MOP;
+ w += 16;
+ mlen -= 32;
+ }
+ if (mlen >= 16) {
+ ADD(0);
+ ADDC(4);
+ ADDC(8);
+ ADDC(12);
+ MOP;
+ w += 8;
+ mlen -= 16;
+ }
+ if (mlen >= 8) {
+ ADD(0);
+ ADDC(4);
+ MOP;
+ w += 4;
+ mlen -= 8;
+ }
+ if (mlen == 0 && byte_swapped == 0)
+ continue; /* worth 1% maybe ?? */
+ REDUCE;
+ while ((mlen -= 2) >= 0) {
+ sum += *w++;
+ }
+ if (byte_swapped) {
+ sum <<= 8;
+ byte_swapped = 0;
+ if (mlen == -1) {
+ su.c[1] = *(char *)w;
+ sum += su.s;
+ mlen = 0;
+ } else
+ mlen = -1;
+ } else if (mlen == -1)
+ /*
+ * This mbuf has odd number of bytes.
+ * There could be a word split betwen
+ * this mbuf and the next mbuf.
+ * Save the last byte (to prepend to next mbuf).
+ */
+ su.c[0] = *(char *)w;
+ }
+
+ if (len)
+ puts("cksum: out of data");
+ if (mlen == -1) {
+ /* The last mbuf has odd # of bytes. Follow the
+ standard (the odd byte is shifted left by 8 bits) */
+ su.c[1] = 0;
+ sum += su.s;
+ }
+ REDUCE;
+ return (~sum & 0xffff);
+}
diff --git a/netinet/in_cksum_sparc.h b/netinet/in_cksum_sparc.h
new file mode 100644
index 0000000..b88ce92
--- /dev/null
+++ b/netinet/in_cksum_sparc.h
@@ -0,0 +1,269 @@
+/* $NetBSD: in_cksum.c,v 1.12.10.4 2005/12/11 10:28:36 christos Exp $ */
+
+/*
+ * Copyright (c) 1995 Matthew R. Green.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, and it's contributors.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)in_cksum.c 8.1 (Berkeley) 6/11/93
+ */
+
+/*
+ * Copyright (c) 1995 Zubin Dittia.
+ * Copyright (c) 1994, 1998 Charles M. Hannum.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, and it's contributors.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)in_cksum.c 8.1 (Berkeley) 6/11/93
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <netinet/in.h>
+#include <rtems/rtems_netinet_in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+
+/*
+ * Checksum routine for Internet Protocol family headers.
+ *
+ * This routine is very heavily used in the network
+ * code and should be modified for each CPU to be as fast as possible.
+ *
+ * SPARC version.
+ */
+
+/*
+ * The checksum computation code here is significantly faster than its
+ * vanilla C counterpart (by significantly, I mean 2-3 times faster if
+ * the data is in cache, and 1.5-2 times faster if the data is not in
+ * cache).
+ * We optimize on three fronts:
+ * 1. By using the add-with-carry (addxcc) instruction, we can use
+ * 32-bit operations instead of 16-bit operations.
+ * 2. By unrolling the main loop to reduce branch overheads.
+ * 3. By doing a sequence of load,load,add,add,load,load,add,add,
+ * we can avoid the extra stall cycle which is incurred if the
+ * instruction immediately following a load tries to use the
+ * target register of the load.
+ * Another possible optimization is to replace a pair of 32-bit loads
+ * with a single 64-bit load (ldd) instruction, but I found that although
+ * this improves performance somewhat on Sun4c machines, it actually
+ * reduces performance considerably on Sun4m machines (I don't know why).
+ * So I chose to leave it out.
+ *
+ * Zubin Dittia (zubin@dworkin.wustl.edu)
+ */
+
+#define Asm __asm __volatile
+#define ADD64 Asm(" ld [%4+ 0],%1; ld [%4+ 4],%2; \
+ addcc %0,%1,%0; addxcc %0,%2,%0; \
+ ld [%4+ 8],%1; ld [%4+12],%2; \
+ addxcc %0,%1,%0; addxcc %0,%2,%0; \
+ ld [%4+16],%1; ld [%4+20],%2; \
+ addxcc %0,%1,%0; addxcc %0,%2,%0; \
+ ld [%4+24],%1; ld [%4+28],%2; \
+ addxcc %0,%1,%0; addxcc %0,%2,%0; \
+ ld [%4+32],%1; ld [%4+36],%2; \
+ addxcc %0,%1,%0; addxcc %0,%2,%0; \
+ ld [%4+40],%1; ld [%4+44],%2; \
+ addxcc %0,%1,%0; addxcc %0,%2,%0; \
+ ld [%4+48],%1; ld [%4+52],%2; \
+ addxcc %0,%1,%0; addxcc %0,%2,%0; \
+ ld [%4+56],%1; ld [%4+60],%2; \
+ addxcc %0,%1,%0; addxcc %0,%2,%0; \
+ addxcc %0,0,%0" \
+ : "=r" (sum), "=&r" (tmp1), "=&r" (tmp2)\
+ : "0" (sum), "r" (w))
+#define ADD32 Asm(" ld [%4+ 0],%1; ld [%4+ 4],%2; \
+ addcc %0,%1,%0; addxcc %0,%2,%0; \
+ ld [%4+ 8],%1; ld [%4+12],%2; \
+ addxcc %0,%1,%0; addxcc %0,%2,%0; \
+ ld [%4+16],%1; ld [%4+20],%2; \
+ addxcc %0,%1,%0; addxcc %0,%2,%0; \
+ ld [%4+24],%1; ld [%4+28],%2; \
+ addxcc %0,%1,%0; addxcc %0,%2,%0; \
+ addxcc %0,0,%0" \
+ : "=r" (sum), "=&r" (tmp1), "=&r" (tmp2)\
+ : "0" (sum), "r" (w))
+#define ADD16 Asm(" ld [%4+ 0],%1; ld [%4+ 4],%2; \
+ addcc %0,%1,%0; addxcc %0,%2,%0; \
+ ld [%4+ 8],%1; ld [%4+12],%2; \
+ addxcc %0,%1,%0; addxcc %0,%2,%0; \
+ addxcc %0,0,%0" \
+ : "=r" (sum), "=&r" (tmp1), "=&r" (tmp2)\
+ : "0" (sum), "r" (w))
+#define ADD8 Asm(" ld [%4+ 0],%1; ld [%4+ 4],%2; \
+ addcc %0,%1,%0; addxcc %0,%2,%0; \
+ addxcc %0,0,%0" \
+ : "=r" (sum), "=&r" (tmp1), "=&r" (tmp2)\
+ : "0" (sum), "r" (w))
+#define ADD4 Asm(" ld [%3+ 0],%1; \
+ addcc %0,%1,%0; \
+ addxcc %0,0,%0" \
+ : "=r" (sum), "=&r" (tmp1) \
+ : "0" (sum), "r" (w))
+
+#define REDUCE {sum = (sum & 0xffff) + (sum >> 16);}
+#define ADDCARRY {if (sum > 0xffff) sum -= 0xffff;}
+#define ROL {sum = sum << 8;} /* depends on recent REDUCE */
+#define ADDBYTE {ROL; sum += *w; byte_swapped ^= 1;}
+#define ADDSHORT {sum += *(u_short *)w;}
+#define ADVANCE(n) {w += n; mlen -= n;}
+
+static __inline__ int
+in_cksum_internal(struct mbuf *m, int off, int len, u_int sum)
+{
+ u_char *w;
+ int mlen = 0;
+ int byte_swapped = 0;
+
+ /*
+ * Declare two temporary registers for use by the asm code. We
+ * allow the compiler to pick which specific machine registers to
+ * use, instead of hard-coding this in the asm code above.
+ */
+ u_int tmp1, tmp2;
+
+ for (; m && len; m = m->m_next) {
+ if (m->m_len == 0)
+ continue;
+ w = mtod(m, u_char *) + off;
+ mlen = m->m_len - off;
+ off = 0;
+ if (len < mlen)
+ mlen = len;
+ len -= mlen;
+
+ /*
+ * Ensure that we're aligned on a word boundary here so
+ * that we can do 32 bit operations below.
+ */
+ if ((3 & (long)w) != 0) {
+ REDUCE;
+ if ((1 & (long)w) != 0 && mlen >= 1) {
+ ADDBYTE;
+ ADVANCE(1);
+ }
+ if ((2 & (long)w) != 0 && mlen >= 2) {
+ ADDSHORT;
+ ADVANCE(2);
+ }
+ }
+
+ /*
+ * Do as many 32 bit operations as possible using the
+ * 64/32/16/8/4 macro's above, using as many as possible of
+ * these.
+ */
+ while (mlen >= 64) {
+ ADD64;
+ ADVANCE(64);
+ }
+ if (mlen >= 32) {
+ ADD32;
+ ADVANCE(32);
+ }
+ if (mlen >= 16) {
+ ADD16;
+ ADVANCE(16);
+ }
+ if (mlen >= 8) {
+ ADD8;
+ ADVANCE(8);
+ }
+ if (mlen >= 4) {
+ ADD4;
+ ADVANCE(4)
+ }
+ if (mlen == 0)
+ continue;
+
+ REDUCE;
+ if (mlen >= 2) {
+ ADDSHORT;
+ ADVANCE(2);
+ }
+ if (mlen == 1) {
+ ADDBYTE;
+ }
+ }
+ if (byte_swapped) {
+ REDUCE;
+ ROL;
+ }
+ REDUCE;
+ ADDCARRY;
+
+ return (0xffff ^ sum);
+}
+
+int
+in_cksum(struct mbuf *m, int len)
+{
+
+ return (in_cksum_internal(m, 0, len, 0));
+}
diff --git a/netinet/in_pcb.c b/netinet/in_pcb.c
new file mode 100644
index 0000000..d9d4417
--- /dev/null
+++ b/netinet/in_pcb.c
@@ -0,0 +1,733 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * Copyright (c) 1982, 1986, 1991, 1993, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
+ * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.158 2005/01/07 01:45:44 imp Exp $
+ */
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sockio.h>
+#include <errno.h>
+#include <sys/time.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <rtems/rtems_netinet_in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+
+#include <limits.h>
+
+struct in_addr zeroin_addr;
+
+static void in_pcbinshash(struct inpcb *);
+static void in_rtchange(struct inpcb *, int);
+
+/*
+ * These configure the range of local port addresses assigned to
+ * "unspecified" outgoing connections/packets/whatever.
+ */
+static int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */
+static int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */
+static int ipport_firstauto = IPPORT_RESERVED; /* 1024 */
+static int ipport_lastauto = IPPORT_USERRESERVED; /* 5000 */
+static int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 40000 */
+static int ipport_hilastauto = IPPORT_HILASTAUTO; /* 44999 */
+
+#define RANGECHK(var, min, max) \
+ if ((var) < (min)) { (var) = (min); } \
+ else if ((var) > (max)) { (var) = (max); }
+
+static int
+sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS)
+{
+ int error = sysctl_handle_int(oidp,
+ oidp->oid_arg1, oidp->oid_arg2, req);
+ if (!error) {
+ RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
+ RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
+ RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX);
+ RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX);
+ RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX);
+ RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX);
+ }
+ return error;
+}
+
+#undef RANGECHK
+
+SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports");
+
+SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW,
+ &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
+SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW,
+ &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
+SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW,
+ &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
+SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW,
+ &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
+SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW,
+ &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
+SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW,
+ &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
+
+int
+in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
+{
+ register struct inpcb *inp;
+ int s;
+
+ MALLOC(inp, struct inpcb *, sizeof(*inp), M_PCB, M_NOWAIT);
+ if (inp == NULL)
+ return (ENOBUFS);
+ bzero((caddr_t)inp, sizeof(*inp));
+ inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
+ inp->inp_pcbinfo = pcbinfo;
+ inp->inp_socket = so;
+ s = splnet();
+ LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list);
+ pcbinfo->ipi_count++;
+ in_pcbinshash(inp);
+ splx(s);
+ so->so_pcb = (caddr_t)inp;
+ return (0);
+}
+
+int
+in_pcbbind(struct inpcb *inp, struct mbuf *nam)
+{
+ register struct socket *so = inp->inp_socket;
+ unsigned short *lastport;
+ struct sockaddr_in *sin;
+ u_short lport = 0;
+ int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
+ int error;
+
+ if (in_ifaddr == 0)
+ return (EADDRNOTAVAIL);
+ if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY)
+ return (EINVAL);
+ if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 &&
+ ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 ||
+ (so->so_options & SO_ACCEPTCONN) == 0))
+ wild = 1;
+ if (nam) {
+ sin = mtod(nam, struct sockaddr_in *);
+ if (nam->m_len != sizeof (*sin))
+ return (EINVAL);
+#ifdef notdef
+ /*
+ * We should check the family, but old programs
+ * incorrectly fail to initialize it.
+ */
+ if (sin->sin_family != AF_INET)
+ return (EAFNOSUPPORT);
+#endif
+ lport = sin->sin_port;
+ if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
+ /*
+ * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
+ * allow complete duplication of binding if
+ * SO_REUSEPORT is set, or if SO_REUSEADDR is set
+ * and a multicast address is bound on both
+ * new and duplicated sockets.
+ */
+ if (so->so_options & SO_REUSEADDR)
+ reuseport = SO_REUSEADDR|SO_REUSEPORT;
+ } else if (sin->sin_addr.s_addr != INADDR_ANY) {
+ sin->sin_port = 0; /* yech... */
+ if (ifa_ifwithaddr((struct sockaddr *)sin) == 0)
+ return (EADDRNOTAVAIL);
+ }
+ if (lport) {
+ struct inpcb *t;
+
+ /* GROSS */
+ if (ntohs(lport) < IPPORT_RESERVED &&
+ (error = suser(p->p_ucred, &p->p_acflag)))
+ return (EACCES);
+ t = in_pcblookup(inp->inp_pcbinfo, zeroin_addr, 0,
+ sin->sin_addr, lport, wild);
+ if (t && (reuseport & t->inp_socket->so_options) == 0)
+ return (EADDRINUSE);
+ }
+ inp->inp_laddr = sin->sin_addr;
+ }
+ if (lport == 0) {
+ unsigned short first, last;
+ int count;
+
+ inp->inp_flags |= INP_ANONPORT;
+
+ if (inp->inp_flags & INP_HIGHPORT) {
+ first = ipport_hifirstauto; /* sysctl */
+ last = ipport_hilastauto;
+ lastport = &inp->inp_pcbinfo->lasthi;
+ } else if (inp->inp_flags & INP_LOWPORT) {
+ if ((error = suser(p->p_ucred, &p->p_acflag)))
+ return (EACCES);
+ first = ipport_lowfirstauto; /* 1023 */
+ last = ipport_lowlastauto; /* 600 */
+ lastport = &inp->inp_pcbinfo->lastlow;
+ } else {
+ first = ipport_firstauto; /* sysctl */
+ last = ipport_lastauto;
+ lastport = &inp->inp_pcbinfo->lastport;
+ }
+ /*
+ * Simple check to ensure all ports are not used up causing
+ * a deadlock here.
+ *
+ * We split the two cases (up and down) so that the direction
+ * is not being tested on each round of the loop.
+ */
+ if (first > last) {
+ /*
+ * counting down
+ */
+ count = first - last;
+
+ do {
+ if (count-- <= 0) /* completely used? */
+ return (EADDRNOTAVAIL);
+ --*lastport;
+ if (*lastport > first || *lastport < last)
+ *lastport = first;
+ lport = htons(*lastport);
+ } while (in_pcblookup(inp->inp_pcbinfo,
+ zeroin_addr, 0, inp->inp_laddr, lport, wild));
+ } else {
+ /*
+ * counting up
+ */
+ count = last - first;
+
+ do {
+ if (count-- <= 0) /* completely used? */
+ return (EADDRNOTAVAIL);
+ ++*lastport;
+ if (*lastport < first || *lastport > last)
+ *lastport = first;
+ lport = htons(*lastport);
+ } while (in_pcblookup(inp->inp_pcbinfo,
+ zeroin_addr, 0, inp->inp_laddr, lport, wild));
+ }
+ }
+ inp->inp_lport = lport;
+ in_pcbrehash(inp);
+ return (0);
+}
+
+/*
+ * Transform old in_pcbconnect() into an inner subroutine for new
+ * in_pcbconnect(): Do some validity-checking on the remote
+ * address (in mbuf 'nam') and then determine local host address
+ * (i.e., which interface) to use to access that remote host.
+ *
+ * This preserves definition of in_pcbconnect(), while supporting a
+ * slightly different version for T/TCP. (This is more than
+ * a bit of a kludge, but cleaning up the internal interfaces would
+ * have forced minor changes in every protocol).
+ */
+
+int
+in_pcbladdr(struct inpcb *inp, struct mbuf *nam, struct sockaddr_in **plocal_sin)
+{
+ struct in_ifaddr *ia;
+ register struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *);
+
+ if (nam->m_len != sizeof (*sin))
+ return (EINVAL);
+ if (sin->sin_family != AF_INET)
+ return (EAFNOSUPPORT);
+ if (sin->sin_port == 0)
+ return (EADDRNOTAVAIL);
+ if (in_ifaddr) {
+ /*
+ * If the destination address is INADDR_ANY,
+ * use the primary local address.
+ * If the supplied address is INADDR_BROADCAST,
+ * and the primary interface supports broadcast,
+ * choose the broadcast address for that interface.
+ */
+#define satosin(sa) ((struct sockaddr_in *)(sa))
+#define sintosa(sin) ((struct sockaddr *)(sin))
+#define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
+ if (sin->sin_addr.s_addr == INADDR_ANY)
+ sin->sin_addr = IA_SIN(in_ifaddr)->sin_addr;
+ else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST &&
+ (in_ifaddr->ia_ifp->if_flags & IFF_BROADCAST))
+ sin->sin_addr = satosin(&in_ifaddr->ia_broadaddr)->sin_addr;
+ }
+ if (inp->inp_laddr.s_addr == INADDR_ANY) {
+ register struct route *ro;
+
+ ia = (struct in_ifaddr *)0;
+ /*
+ * If route is known or can be allocated now,
+ * our src addr is taken from the i/f, else punt.
+ */
+ ro = &inp->inp_route;
+ if (ro->ro_rt &&
+ (satosin(&ro->ro_dst)->sin_addr.s_addr !=
+ sin->sin_addr.s_addr ||
+ inp->inp_socket->so_options & SO_DONTROUTE)) {
+ RTFREE(ro->ro_rt);
+ ro->ro_rt = (struct rtentry *)0;
+ }
+ if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
+ (ro->ro_rt == (struct rtentry *)0 ||
+ ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
+ /* No route yet, so try to acquire one */
+ ro->ro_dst.sa_family = AF_INET;
+ ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
+ ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
+ sin->sin_addr;
+ rtalloc(ro);
+ }
+ /*
+ * If we found a route, use the address
+ * corresponding to the outgoing interface
+ * unless it is the loopback (in case a route
+ * to our address on another net goes to loopback).
+ */
+ if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))
+ ia = ifatoia(ro->ro_rt->rt_ifa);
+ if (ia == 0) {
+ u_short fport = sin->sin_port;
+
+ sin->sin_port = 0;
+ ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin)));
+ if (ia == 0)
+ ia = ifatoia(ifa_ifwithnet(sintosa(sin)));
+ sin->sin_port = fport;
+ if (ia == 0)
+ ia = in_ifaddr;
+ if (ia == 0)
+ return (EADDRNOTAVAIL);
+ }
+ /*
+ * If the destination address is multicast and an outgoing
+ * interface has been set as a multicast option, use the
+ * address of that interface as our source address.
+ */
+ if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
+ inp->inp_moptions != NULL) {
+ struct ip_moptions *imo;
+ struct ifnet *ifp;
+
+ imo = inp->inp_moptions;
+ if (imo->imo_multicast_ifp != NULL) {
+ ifp = imo->imo_multicast_ifp;
+ for (ia = in_ifaddr; ia; ia = ia->ia_next)
+ if (ia->ia_ifp == ifp)
+ break;
+ if (ia == 0)
+ return (EADDRNOTAVAIL);
+ }
+ }
+ /*
+ * Don't do pcblookup call here; return interface in plocal_sin
+ * and exit to caller, that will do the lookup.
+ */
+ *plocal_sin = &ia->ia_addr;
+
+ }
+ return(0);
+}
+
+/*
+ * Outer subroutine:
+ * Connect from a socket to a specified address.
+ * Both address and port must be specified in argument sin.
+ * If don't have a local address for this socket yet,
+ * then pick one.
+ */
+int
+in_pcbconnect(struct inpcb *inp, struct mbuf *nam)
+{
+ struct sockaddr_in *ifaddr;
+ register struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *);
+ int error;
+
+ /*
+ * Call inner routine, to assign local interface address.
+ */
+ if ((error = in_pcbladdr(inp, nam, &ifaddr)))
+ return(error);
+
+ if (in_pcblookuphash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
+ inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr,
+ inp->inp_lport, 0) != NULL)
+ return (EADDRINUSE);
+ if (inp->inp_laddr.s_addr == INADDR_ANY) {
+ if (inp->inp_lport == 0)
+ (void)in_pcbbind(inp, (struct mbuf *)0);
+ inp->inp_laddr = ifaddr->sin_addr;
+ }
+ inp->inp_faddr = sin->sin_addr;
+ inp->inp_fport = sin->sin_port;
+ in_pcbrehash(inp);
+ return (0);
+}
+
+void
+in_pcbdisconnect(struct inpcb *inp)
+{
+
+ inp->inp_faddr.s_addr = INADDR_ANY;
+ inp->inp_fport = 0;
+ in_pcbrehash(inp);
+ if (inp->inp_socket->so_state & SS_NOFDREF)
+ in_pcbdetach(inp);
+}
+
+void
+in_pcbdetach(struct inpcb *inp)
+{
+ struct socket *so = inp->inp_socket;
+ struct inpcbinfo *ipi = inp->inp_pcbinfo;
+ int s;
+
+ inp->inp_gencnt = ++ipi->ipi_gencnt;
+ so->so_pcb = 0;
+ sofree(so);
+ if (inp->inp_options)
+ (void)m_free(inp->inp_options);
+ if (inp->inp_route.ro_rt)
+ rtfree(inp->inp_route.ro_rt);
+ ip_freemoptions(inp->inp_moptions);
+ s = splnet();
+ LIST_REMOVE(inp, inp_hash);
+ LIST_REMOVE(inp, inp_list);
+ splx(s);
+ FREE(inp, M_PCB);
+}
+
+void
+in_setsockaddr(struct inpcb *inp, struct mbuf *nam)
+{
+ register struct sockaddr_in *sin;
+
+ nam->m_len = sizeof (*sin);
+ sin = mtod(nam, struct sockaddr_in *);
+ bzero((caddr_t)sin, sizeof (*sin));
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
+ sin->sin_port = inp->inp_lport;
+ sin->sin_addr = inp->inp_laddr;
+}
+
+void
+in_setpeeraddr(struct inpcb *inp, struct mbuf *nam)
+{
+ register struct sockaddr_in *sin;
+
+ nam->m_len = sizeof (*sin);
+ sin = mtod(nam, struct sockaddr_in *);
+ bzero((caddr_t)sin, sizeof (*sin));
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
+ sin->sin_port = inp->inp_fport;
+ sin->sin_addr = inp->inp_faddr;
+}
+
+/*
+ * Pass some notification to all connections of a protocol
+ * associated with address dst. The local address and/or port numbers
+ * may be specified to limit the search. The "usual action" will be
+ * taken, depending on the ctlinput cmd. The caller must filter any
+ * cmds that are uninteresting (e.g., no error in the map).
+ * Call the protocol specific routine (if any) to report
+ * any errors for each matching socket.
+ *
+ * Must be called at splnet.
+ */
+void
+in_pcbnotify(struct inpcbhead *head, struct sockaddr *dst, u_int fport_arg,
+ struct in_addr laddr, u_int lport_arg, int cmd,
+ void (*notify)(struct inpcb *, int))
+{
+ register struct inpcb *inp, *oinp;
+ struct in_addr faddr;
+ u_short fport = fport_arg, lport = lport_arg;
+ int errnum, s;
+
+ if ((unsigned)cmd > PRC_NCMDS || dst->sa_family != AF_INET)
+ return;
+ faddr = ((struct sockaddr_in *)dst)->sin_addr;
+ if (faddr.s_addr == INADDR_ANY)
+ return;
+
+ /*
+ * Redirects go to all references to the destination,
+ * and use in_rtchange to invalidate the route cache.
+ * Dead host indications: notify all references to the destination.
+ * Otherwise, if we have knowledge of the local port and address,
+ * deliver only to that socket.
+ */
+ if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) {
+ fport = 0;
+ lport = 0;
+ laddr.s_addr = 0;
+ if (cmd != PRC_HOSTDEAD)
+ notify = in_rtchange;
+ }
+ errnum = inetctlerrmap[cmd];
+ s = splnet();
+ for (inp = head->lh_first; inp != NULL;) {
+ if (inp->inp_faddr.s_addr != faddr.s_addr ||
+ inp->inp_socket == 0 ||
+ (lport && inp->inp_lport != lport) ||
+ (laddr.s_addr && inp->inp_laddr.s_addr != laddr.s_addr) ||
+ (fport && inp->inp_fport != fport)) {
+ inp = inp->inp_list.le_next;
+ continue;
+ }
+ oinp = inp;
+ inp = inp->inp_list.le_next;
+ if (notify)
+ (*notify)(oinp, errnum);
+ }
+ splx(s);
+}
+
+/*
+ * Check for alternatives when higher level complains
+ * about service problems. For now, invalidate cached
+ * routing information. If the route was created dynamically
+ * (by a redirect), time to try a default gateway again.
+ */
+void
+in_losing(struct inpcb *inp)
+{
+ register struct rtentry *rt;
+ struct rt_addrinfo info;
+
+ if ((rt = inp->inp_route.ro_rt)) {
+ inp->inp_route.ro_rt = 0;
+ bzero((caddr_t)&info, sizeof(info));
+ info.rti_info[RTAX_DST] =
+ (struct sockaddr *)&inp->inp_route.ro_dst;
+ info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+ info.rti_info[RTAX_NETMASK] = rt_mask(rt);
+ rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
+ if (rt->rt_flags & RTF_DYNAMIC)
+ (void) rtrequest(RTM_DELETE, rt_key(rt),
+ rt->rt_gateway, rt_mask(rt), rt->rt_flags,
+ (struct rtentry **)0);
+ else
+ /*
+ * A new route can be allocated
+ * the next time output is attempted.
+ */
+ rtfree(rt);
+ }
+}
+
+/*
+ * After a routing change, flush old routing
+ * and allocate a (hopefully) better one.
+ */
+static void
+in_rtchange(struct inpcb *inp, int errnum)
+{
+ if (inp->inp_route.ro_rt) {
+ rtfree(inp->inp_route.ro_rt);
+ inp->inp_route.ro_rt = 0;
+ /*
+ * A new route can be allocated the next time
+ * output is attempted.
+ */
+ }
+}
+
+struct inpcb *
+in_pcblookup(struct inpcbinfo *pcbinfo,
+ struct in_addr faddr, u_int fport_arg,
+ struct in_addr laddr, u_int lport_arg,
+ int wild_okay)
+{
+ register struct inpcb *inp, *match = NULL;
+ int matchwild = 3, wildcard;
+ u_short fport = fport_arg, lport = lport_arg;
+ int s;
+
+ s = splnet();
+
+ for (inp = pcbinfo->listhead->lh_first; inp != NULL; inp = inp->inp_list.le_next) {
+ if (inp->inp_lport != lport)
+ continue;
+ wildcard = 0;
+ if (inp->inp_faddr.s_addr != INADDR_ANY) {
+ if (faddr.s_addr == INADDR_ANY)
+ wildcard++;
+ else if (inp->inp_faddr.s_addr != faddr.s_addr ||
+ inp->inp_fport != fport)
+ continue;
+ } else {
+ if (faddr.s_addr != INADDR_ANY)
+ wildcard++;
+ }
+ if (inp->inp_laddr.s_addr != INADDR_ANY) {
+ if (laddr.s_addr == INADDR_ANY)
+ wildcard++;
+ else if (inp->inp_laddr.s_addr != laddr.s_addr)
+ continue;
+ } else {
+ if (laddr.s_addr != INADDR_ANY)
+ wildcard++;
+ }
+ if (wildcard && wild_okay == 0)
+ continue;
+ if (wildcard < matchwild) {
+ match = inp;
+ matchwild = wildcard;
+ if (matchwild == 0) {
+ break;
+ }
+ }
+ }
+ splx(s);
+ return (match);
+}
+
+/*
+ * Lookup PCB in hash list.
+ */
+struct inpcb *
+in_pcblookuphash(struct inpcbinfo *pcbinfo,
+ struct in_addr faddr, u_int fport_arg,
+ struct in_addr laddr, u_int lport_arg,
+ int wildcard)
+{
+ struct inpcbhead *head;
+ register struct inpcb *inp;
+ u_short fport = fport_arg, lport = lport_arg;
+ int s;
+
+ s = splnet();
+ /*
+ * First look for an exact match.
+ */
+ head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)];
+ for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
+ if (inp->inp_faddr.s_addr == faddr.s_addr &&
+ inp->inp_laddr.s_addr == laddr.s_addr &&
+ inp->inp_fport == fport &&
+ inp->inp_lport == lport)
+ goto found;
+ }
+ if (wildcard) {
+ struct inpcb *local_wild = NULL;
+
+ head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
+ for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
+ if (inp->inp_faddr.s_addr == INADDR_ANY &&
+ inp->inp_fport == 0 && inp->inp_lport == lport) {
+ if (inp->inp_laddr.s_addr == laddr.s_addr)
+ goto found;
+ else if (inp->inp_laddr.s_addr == INADDR_ANY)
+ local_wild = inp;
+ }
+ }
+ if (local_wild != NULL) {
+ inp = local_wild;
+ goto found;
+ }
+ }
+ splx(s);
+ return (NULL);
+
+found:
+ /*
+ * Move PCB to head of this hash chain so that it can be
+ * found more quickly in the future.
+ * XXX - this is a pessimization on machines with few
+ * concurrent connections.
+ */
+ if (inp != head->lh_first) {
+ LIST_REMOVE(inp, inp_hash);
+ LIST_INSERT_HEAD(head, inp, inp_hash);
+ }
+ splx(s);
+ return (inp);
+}
+
+/*
+ * Insert PCB into hash chain. Must be called at splnet.
+ */
+static void
+in_pcbinshash(struct inpcb *inp)
+{
+ struct inpcbhead *head;
+
+ head = &inp->inp_pcbinfo->hashbase[INP_PCBHASH(inp->inp_faddr.s_addr,
+ inp->inp_lport, inp->inp_fport, inp->inp_pcbinfo->hashmask)];
+
+ LIST_INSERT_HEAD(head, inp, inp_hash);
+}
+
+void
+in_pcbrehash(struct inpcb *inp)
+{
+ struct inpcbhead *head;
+ int s;
+
+ s = splnet();
+ LIST_REMOVE(inp, inp_hash);
+
+ head = &inp->inp_pcbinfo->hashbase[INP_PCBHASH(inp->inp_faddr.s_addr,
+ inp->inp_lport, inp->inp_fport, inp->inp_pcbinfo->hashmask)];
+
+ LIST_INSERT_HEAD(head, inp, inp_hash);
+ inp->inp_pcbinfo->ipi_count--;
+ splx(s);
+}
diff --git a/netinet/in_pcb.h b/netinet/in_pcb.h
new file mode 100644
index 0000000..ad8b99c
--- /dev/null
+++ b/netinet/in_pcb.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 1982, 1986, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)in_pcb.h 8.1 (Berkeley) 6/10/93
+ * $FreeBSD: src/sys/netinet/in_pcb.h,v 1.89 2006/07/18 22:34:27 ups Exp $
+ */
+
+
+#ifndef _NETINET_IN_PCB_H_
+#define _NETINET_IN_PCB_H_
+
+#include <sys/queue.h>
+#include <netinet/in.h> /* struct in_addr */
+#include <net/route.h> /* struct route */
+
+/*
+ * Common structure pcb for internet protocol implementation.
+ * Here are stored pointers to local and foreign host table
+ * entries, local and foreign socket numbers, and pointers
+ * up (to a socket structure) and down (to a protocol-specific)
+ * control block.
+ */
+LIST_HEAD(inpcbhead, inpcb);
+
+typedef u_int64_t inp_gen_t;
+
+struct inpcb {
+ LIST_ENTRY(inpcb) inp_hash; /* hash list */
+ LIST_ENTRY(inpcb) inp_list; /* list for all PCBs of this proto */
+ struct inpcbinfo *inp_pcbinfo; /* PCB list info */
+ struct in_addr inp_faddr; /* foreign host table entry */
+ struct in_addr inp_laddr; /* local host table entry */
+ u_short inp_fport; /* foreign port */
+ u_short inp_lport; /* local port */
+ caddr_t inp_ppcb; /* pointer to per-protocol pcb */
+ struct socket *inp_socket; /* back pointer to socket */
+ struct route inp_route; /* placeholder for routing entry */
+ int inp_flags; /* generic IP/datagram flags */
+ u_char inp_vflag; /* IP version flag (v4/v6) */
+ u_char inp_ip_ttl; /* time to live proto */
+ u_char inp_ip_p; /* protocol proto */
+ u_char inp_ip_minttl; /* minimum TTL or drop */
+
+ /* protocol dependent part; options */
+ struct {
+ u_char inp4_ip_tos; /* type of service proto */
+ struct mbuf *inp4_options; /* IP options */
+ struct ip_moptions *inp4_moptions; /* IP multicast options */
+ } inp_depend4;
+#define inp_ip_tos inp_depend4.inp4_ip_tos
+#define inp_options inp_depend4.inp4_options
+#define inp_moptions inp_depend4.inp4_moptions
+ inp_gen_t inp_gencnt; /* generation count of this instance */
+};
+
+/*
+ * Interface exported to userland by various protocols which use
+ * inpcbs. Hack alert -- only define if struct xsocket is in scope.
+ */
+#ifdef _SYS_SOCKETVAR_H_
+struct xinpcb {
+ size_t xi_len; /* length of this structure */
+ struct inpcb xi_inp;
+/* struct xsocket xi_socket; ccj removed */
+ u_int64_t xi_alignment_hack;
+};
+
+struct xinpgen {
+ size_t xig_len; /* length of this structure */
+ u_int xig_count; /* number of PCBs at this time */
+ inp_gen_t xig_gen; /* generation count at this time */
+ so_gen_t xig_sogen; /* socket generation count at this time */
+};
+#endif /* _SYS_SOCKETVAR_H_ */
+
+struct inpcbinfo { /* XXX documentation, prefixes */
+ struct inpcbhead *listhead;
+ struct inpcbhead *hashbase;
+ unsigned long hashmask;
+ unsigned short lastport;
+ unsigned short lastlow;
+ unsigned short lasthi;
+ u_int ipi_count; /* number of pcbs in this list */
+ u_int64_t ipi_gencnt; /* current generation count */
+};
+
+#define INP_PCBHASH(faddr, lport, fport, mask) \
+ (((faddr) ^ ((faddr) >> 16) ^ (lport) ^ (fport)) & (mask))
+
+/* flags in inp_flags: */
+#define INP_RECVOPTS 0x01 /* receive incoming IP options */
+#define INP_RECVRETOPTS 0x02 /* receive IP options for reply */
+#define INP_RECVDSTADDR 0x04 /* receive IP dst address */
+#define INP_HDRINCL 0x08 /* user supplies entire IP header */
+#define INP_HIGHPORT 0x10 /* user wants "high" port binding */
+#define INP_LOWPORT 0x20 /* user wants "low" port binding */
+#define INP_ANONPORT 0x40 /* port chosen for user */
+#define INP_RECVIF 0x80 /* receive incoming interface */
+#define INP_CONTROLOPTS (INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR|\
+ INP_RECVIF)
+
+#define INPLOOKUP_WILDCARD 1
+#define sotoinpcb(so) ((struct inpcb *)(so)->so_pcb)
+
+#ifdef _KERNEL
+void in_losing(struct inpcb *);
+int in_pcballoc(struct socket *, struct inpcbinfo *);
+int in_pcbbind(struct inpcb *, struct mbuf *);
+int in_pcbconnect(struct inpcb *, struct mbuf *);
+void in_pcbdetach(struct inpcb *);
+void in_pcbdisconnect(struct inpcb *);
+int in_pcbladdr(struct inpcb *, struct mbuf *,
+ struct sockaddr_in **);
+struct inpcb *
+ in_pcblookup(struct inpcbinfo *,
+ struct in_addr, u_int, struct in_addr, u_int, int);
+struct inpcb *
+ in_pcblookuphash(struct inpcbinfo *,
+ struct in_addr, u_int, struct in_addr, u_int, int);
+void in_pcbnotify(struct inpcbhead *, struct sockaddr *,
+ u_int, struct in_addr, u_int, int, void (*)(struct inpcb *, int));
+void in_pcbrehash(struct inpcb *);
+void in_setpeeraddr(struct inpcb *, struct mbuf *);
+void in_setsockaddr(struct inpcb *, struct mbuf *);
+#endif /* _KERNEL */
+
+#endif /* !_NETINET_IN_PCB_H_ */
diff --git a/netinet/in_proto.c b/netinet/in_proto.c
new file mode 100644
index 0000000..558c3e2
--- /dev/null
+++ b/netinet/in_proto.c
@@ -0,0 +1,217 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)in_proto.c 8.2 (Berkeley) 2/9/95
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "opt_tcpdebug.h"
+
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/domain.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/radix.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/in_pcb.h>
+#include <netinet/igmp_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#ifdef TCPDEBUG
+#include <netinet/tcp_debug.h>
+#endif
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+/*
+ * TCP/IP protocol family: IP, ICMP, UDP, TCP.
+ */
+
+#ifdef IPXIP
+#include <netipx/ipx.h>
+#include <netipx/ipx_ip.h>
+#endif
+
+#ifdef NSIP
+#include <netns/ns.h>
+#include <netns/ns_if.h>
+#endif
+
+#ifdef TPIP
+void tpip_input(), tpip_ctlinput(), tp_init(), tp_slowtimo(), tp_drain();
+int tp_ctloutput(), tp_usrreq();
+#endif
+
+#ifdef EON
+void eoninput(), eonctlinput(), eonprotoinit();
+#endif /* EON */
+
+extern struct domain inetdomain;
+
+struct protosw inetsw[] = {
+{ 0, &inetdomain, 0, 0,
+ 0, 0, 0, 0,
+ 0,
+ ip_init, 0, ip_slowtimo, ip_drain,
+ NULL
+},
+{ SOCK_DGRAM, &inetdomain, IPPROTO_UDP, PR_ATOMIC|PR_ADDR,
+ udp_input, 0, udp_ctlinput, ip_ctloutput,
+ udp_usrreq,
+ udp_init, 0, 0, 0,
+ NULL
+},
+{ SOCK_STREAM, &inetdomain, IPPROTO_TCP,
+ PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD,
+ tcp_input, 0, tcp_ctlinput, tcp_ctloutput,
+ 0,
+ tcp_init, tcp_fasttimo, tcp_slowtimo, tcp_drain,
+ &tcp_usrreqs
+},
+{ SOCK_RAW, &inetdomain, IPPROTO_RAW, PR_ATOMIC|PR_ADDR,
+ rip_input, 0, 0, rip_ctloutput,
+ rip_usrreq,
+ 0, 0, 0, 0,
+ NULL
+},
+{ SOCK_RAW, &inetdomain, IPPROTO_ICMP, PR_ATOMIC|PR_ADDR,
+ icmp_input, 0, 0, rip_ctloutput,
+ rip_usrreq,
+ 0, 0, 0, 0,
+ NULL
+},
+{ SOCK_RAW, &inetdomain, IPPROTO_IGMP, PR_ATOMIC|PR_ADDR,
+ igmp_input, 0, 0, rip_ctloutput,
+ rip_usrreq,
+ igmp_init, igmp_fasttimo, igmp_slowtimo, 0,
+ NULL
+},
+{ SOCK_RAW, &inetdomain, IPPROTO_RSVP, PR_ATOMIC|PR_ADDR,
+ rsvp_input, 0, 0, rip_ctloutput,
+ rip_usrreq,
+ 0, 0, 0, 0,
+ NULL
+},
+{ SOCK_RAW, &inetdomain, IPPROTO_IPIP, PR_ATOMIC|PR_ADDR,
+ ipip_input, 0, 0, rip_ctloutput,
+ rip_usrreq,
+ 0, 0, 0, 0,
+ NULL
+},
+#ifdef IPDIVERT
+{ SOCK_RAW, &inetdomain, IPPROTO_DIVERT, PR_ATOMIC|PR_ADDR,
+ div_input, 0, 0, ip_ctloutput,
+ div_usrreq,
+ div_init, 0, 0, 0,
+ NULL
+},
+#endif
+#ifdef TPIP
+{ SOCK_SEQPACKET,&inetdomain, IPPROTO_TP, PR_CONNREQUIRED|PR_WANTRCVD,
+ tpip_input, 0, tpip_ctlinput, tp_ctloutput,
+ tp_usrreq,
+ tp_init, 0, tp_slowtimo, tp_drain,
+ NULL
+},
+#endif
+/* EON (ISO CLNL over IP) */
+#ifdef EON
+{ SOCK_RAW, &inetdomain, IPPROTO_EON, 0,
+ eoninput, 0, eonctlinput, 0,
+ 0,
+ eonprotoinit, 0, 0, 0,
+ NULL
+},
+#endif
+#ifdef IPXIP
+{ SOCK_RAW, &inetdomain, IPPROTO_IDP, PR_ATOMIC|PR_ADDR,
+ ipxip_input, 0, ipxip_ctlinput, 0,
+ rip_usrreq,
+ 0, 0, 0, 0,
+ NULL
+},
+#endif
+#ifdef NSIP
+{ SOCK_RAW, &inetdomain, IPPROTO_IDP, PR_ATOMIC|PR_ADDR,
+ idpip_input, 0, nsip_ctlinput, 0,
+ rip_usrreq,
+ 0, 0, 0, 0,
+ NULL
+},
+#endif
+ /* raw wildcard */
+{ SOCK_RAW, &inetdomain, 0, PR_ATOMIC|PR_ADDR,
+ rip_input, 0, 0, rip_ctloutput,
+ rip_usrreq,
+ rip_init, 0, 0, 0,
+ NULL
+},
+};
+
+extern int in_inithead(void **, int);
+
+struct domain inetdomain =
+ { AF_INET, "internet", 0, 0, 0,
+ inetsw, &inetsw[sizeof(inetsw)/sizeof(inetsw[0])], 0,
+ in_inithead, 32, sizeof(struct sockaddr_in)
+ };
+
+DOMAIN_SET(inet);
+
+SYSCTL_NODE(_net, PF_INET, inet, CTLFLAG_RW, 0,
+ "Internet Family");
+
+SYSCTL_NODE(_net_inet, IPPROTO_IP, ip, CTLFLAG_RW, 0, "IP");
+SYSCTL_NODE(_net_inet, IPPROTO_ICMP, icmp, CTLFLAG_RW, 0, "ICMP");
+SYSCTL_NODE(_net_inet, IPPROTO_UDP, udp, CTLFLAG_RW, 0, "UDP");
+SYSCTL_NODE(_net_inet, IPPROTO_TCP, tcp, CTLFLAG_RW, 0, "TCP");
+SYSCTL_NODE(_net_inet, IPPROTO_IGMP, igmp, CTLFLAG_RW, 0, "IGMP");
+#ifdef IPDIVERT
+SYSCTL_NODE(_net_inet, IPPROTO_DIVERT, div, CTLFLAG_RW, 0, "DIVERT");
+#endif
diff --git a/netinet/in_rmx.c b/netinet/in_rmx.c
new file mode 100644
index 0000000..5dd276a
--- /dev/null
+++ b/netinet/in_rmx.c
@@ -0,0 +1,386 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * Copyright 1994, 1995 Massachusetts Institute of Technology
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that both the above copyright notice and this
+ * permission notice appear in all copies, that both the above
+ * copyright notice and this permission notice appear in all
+ * supporting documentation, and that the name of M.I.T. not be used
+ * in advertising or publicity pertaining to distribution of the
+ * software without specific, written prior permission. M.I.T. makes
+ * no representations about the suitability of this software for any
+ * purpose. It is provided "as is" without express or implied
+ * warranty.
+ *
+ * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
+ * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
+ * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * This code does two things necessary for the enhanced TCP metrics to
+ * function in a useful manner:
+ * 1) It marks all non-host routes as `cloning', thus ensuring that
+ * every actual reference to such a route actually gets turned
+ * into a reference to a host route to the specific destination
+ * requested.
+ * 2) When such routes lose all their references, it arranges for them
+ * to be deleted in some random collection of circumstances, so that
+ * a large quantity of stale routing data is not kept in kernel memory
+ * indefinitely. See in_rtqtimo() below for the exact mechanism.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/queue.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/mbuf.h>
+#include <sys/syslog.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <netinet/in.h>
+#include <rtems/rtems_netinet_in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+
+#include <netinet/tcp.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+
+extern int in_inithead(void **head, int off);
+
+#define RTPRF_OURS RTF_PROTO3 /* set on routes we manage */
+
+/*
+ * Do what we need to do when inserting a route.
+ */
+static struct radix_node *
+in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
+ struct radix_node *treenodes)
+{
+ struct rtentry *rt = (struct rtentry *)treenodes;
+ struct sockaddr_in *sin = (struct sockaddr_in *)rt_key(rt);
+ struct radix_node *ret;
+
+ /*
+ * For IP, all unicast non-host routes are automatically cloning.
+ */
+ if(IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
+ rt->rt_flags |= RTF_MULTICAST;
+
+ if(!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST))) {
+ rt->rt_flags |= RTF_PRCLONING;
+ }
+
+ /*
+ * A little bit of help for both IP output and input:
+ * For host routes, we make sure that RTF_BROADCAST
+ * is set for anything that looks like a broadcast address.
+ * This way, we can avoid an expensive call to in_broadcast()
+ * in ip_output() most of the time (because the route passed
+ * to ip_output() is almost always a host route).
+ *
+ * We also do the same for local addresses, with the thought
+ * that this might one day be used to speed up ip_input().
+ *
+ * We also mark routes to multicast addresses as such, because
+ * it's easy to do and might be useful (but this is much more
+ * dubious since it's so easy to inspect the address). (This
+ * is done above.)
+ */
+ if (rt->rt_flags & RTF_HOST) {
+ if (in_broadcast(sin->sin_addr, rt->rt_ifp)) {
+ rt->rt_flags |= RTF_BROADCAST;
+ } else {
+ if (satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr
+ == sin->sin_addr.s_addr)
+ rt->rt_flags |= RTF_LOCAL;
+ }
+ }
+
+ /*
+ * We also specify a send and receive pipe size for every
+ * route added, to help TCP a bit. TCP doesn't actually
+ * want a true pipe size, which would be prohibitive in memory
+ * costs and is hard to compute anyway; it simply uses these
+ * values to size its buffers. So, we fill them in with the
+ * same values that TCP would have used anyway, and allow the
+ * installing program or the link layer to override these values
+ * as it sees fit. This will hopefully allow TCP more
+ * opportunities to save its ssthresh value.
+ */
+ if (!rt->rt_rmx.rmx_sendpipe && !(rt->rt_rmx.rmx_locks & RTV_SPIPE))
+ rt->rt_rmx.rmx_sendpipe = tcp_sendspace;
+
+ if (!rt->rt_rmx.rmx_recvpipe && !(rt->rt_rmx.rmx_locks & RTV_RPIPE))
+ rt->rt_rmx.rmx_recvpipe = tcp_recvspace;
+
+ if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU)
+ && rt->rt_ifp)
+ rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
+
+ ret = rn_addroute(v_arg, n_arg, head, treenodes);
+ if (ret == NULL && rt->rt_flags & RTF_HOST) {
+ struct rtentry *rt2;
+ /*
+ * We are trying to add a host route, but can't.
+ * Find out if it is because of an
+ * ARP entry and delete it if so.
+ */
+ rt2 = rtalloc1((struct sockaddr *)sin, 0,
+ RTF_CLONING | RTF_PRCLONING);
+ if (rt2) {
+ if (rt2->rt_flags & RTF_LLINFO &&
+ rt2->rt_flags & RTF_HOST &&
+ rt2->rt_gateway &&
+ rt2->rt_gateway->sa_family == AF_LINK) {
+ rtrequest(RTM_DELETE,
+ (struct sockaddr *)rt_key(rt2),
+ rt2->rt_gateway,
+ rt_mask(rt2), rt2->rt_flags, 0);
+ ret = rn_addroute(v_arg, n_arg, head,
+ treenodes);
+ }
+ RTFREE(rt2);
+ }
+ }
+ return ret;
+}
+
+/*
+ * This code is the inverse of in_clsroute: on first reference, if we
+ * were managing the route, stop doing so and set the expiration timer
+ * back off again.
+ */
+static struct radix_node *
+in_matroute(void *v_arg, struct radix_node_head *head)
+{
+ struct radix_node *rn = rn_match(v_arg, head);
+ struct rtentry *rt = (struct rtentry *)rn;
+
+ if(rt && rt->rt_refcnt == 0) { /* this is first reference */
+ if(rt->rt_flags & RTPRF_OURS) {
+ rt->rt_flags &= ~RTPRF_OURS;
+ rt->rt_rmx.rmx_expire = 0;
+ }
+ }
+ return rn;
+}
+
+static int rtq_reallyold = 60*60;
+ /* one hour is ``really old'' */
+SYSCTL_INT(_net_inet_ip, IPCTL_RTEXPIRE, rtexpire,
+ CTLFLAG_RW, &rtq_reallyold , 0, "");
+
+static int rtq_minreallyold = 10;
+ /* never automatically crank down to less */
+SYSCTL_INT(_net_inet_ip, IPCTL_RTMINEXPIRE, rtminexpire,
+ CTLFLAG_RW, &rtq_minreallyold , 0, "");
+
+static int rtq_toomany = 128;
+ /* 128 cached routes is ``too many'' */
+SYSCTL_INT(_net_inet_ip, IPCTL_RTMAXCACHE, rtmaxcache,
+ CTLFLAG_RW, &rtq_toomany , 0, "");
+
+
+/*
+ * On last reference drop, mark the route as belong to us so that it can be
+ * timed out.
+ */
+static void
+in_clsroute(struct radix_node *rn, struct radix_node_head *head)
+{
+ struct rtentry *rt = (struct rtentry *)rn;
+
+ if(!(rt->rt_flags & RTF_UP))
+ return; /* prophylactic measures */
+
+ if((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST)
+ return;
+
+ if((rt->rt_flags & (RTF_WASCLONED | RTPRF_OURS))
+ != RTF_WASCLONED)
+ return;
+
+ /*
+ * As requested by David Greenman:
+ * If rtq_reallyold is 0, just delete the route without
+ * waiting for a timeout cycle to kill it.
+ */
+ if(rtq_reallyold != 0) {
+ rt->rt_flags |= RTPRF_OURS;
+ rt->rt_rmx.rmx_expire = rtems_bsdnet_seconds_since_boot() + rtq_reallyold;
+ } else {
+ rtrequest(RTM_DELETE,
+ (struct sockaddr *)rt_key(rt),
+ rt->rt_gateway, rt_mask(rt),
+ rt->rt_flags, 0);
+ }
+}
+
+struct rtqk_arg {
+ struct radix_node_head *rnh;
+ int draining;
+ int killed;
+ int found;
+ int updating;
+ time_t nextstop;
+};
+
+/*
+ * Get rid of old routes. When draining, this deletes everything, even when
+ * the timeout is not expired yet. When updating, this makes sure that
+ * nothing has a timeout longer than the current value of rtq_reallyold.
+ */
+static int
+in_rtqkill(struct radix_node *rn, void *rock)
+{
+ struct rtqk_arg *ap = rock;
+ struct rtentry *rt = (struct rtentry *)rn;
+ int err;
+
+ if(rt->rt_flags & RTPRF_OURS) {
+ ap->found++;
+
+ if(ap->draining || rt->rt_rmx.rmx_expire <= rtems_bsdnet_seconds_since_boot()) {
+ if(rt->rt_refcnt > 0)
+ panic("rtqkill route really not free");
+
+ err = rtrequest(RTM_DELETE,
+ (struct sockaddr *)rt_key(rt),
+ rt->rt_gateway, rt_mask(rt),
+ rt->rt_flags, 0);
+ if(err) {
+ log(LOG_WARNING, "in_rtqkill: error %d\n", err);
+ } else {
+ ap->killed++;
+ }
+ } else {
+ if(ap->updating
+ && (rt->rt_rmx.rmx_expire - rtems_bsdnet_seconds_since_boot()
+ > rtq_reallyold)) {
+ rt->rt_rmx.rmx_expire = rtems_bsdnet_seconds_since_boot()
+ + rtq_reallyold;
+ }
+ ap->nextstop = lmin(ap->nextstop,
+ rt->rt_rmx.rmx_expire);
+ }
+ }
+
+ return 0;
+}
+
+#define RTQ_TIMEOUT 60*10 /* run no less than once every ten minutes */
+static int rtq_timeout = RTQ_TIMEOUT;
+
+static void
+in_rtqtimo(void *rock)
+{
+ struct radix_node_head *rnh = rock;
+ struct rtqk_arg arg;
+ struct timeval atv;
+ static time_t last_adjusted_timeout = 0;
+ int s;
+
+ arg.found = arg.killed = 0;
+ arg.rnh = rnh;
+ arg.nextstop = rtems_bsdnet_seconds_since_boot() + rtq_timeout;
+ arg.draining = arg.updating = 0;
+ s = splnet();
+ rnh->rnh_walktree(rnh, in_rtqkill, &arg);
+ splx(s);
+
+ /*
+ * Attempt to be somewhat dynamic about this:
+ * If there are ``too many'' routes sitting around taking up space,
+ * then crank down the timeout, and see if we can't make some more
+ * go away. However, we make sure that we will never adjust more
+ * than once in rtq_timeout seconds, to keep from cranking down too
+ * hard.
+ */
+ if((arg.found - arg.killed > rtq_toomany)
+ && (rtems_bsdnet_seconds_since_boot() - last_adjusted_timeout >= rtq_timeout)
+ && rtq_reallyold > rtq_minreallyold) {
+ rtq_reallyold = 2*rtq_reallyold / 3;
+ if(rtq_reallyold < rtq_minreallyold) {
+ rtq_reallyold = rtq_minreallyold;
+ }
+
+ last_adjusted_timeout = rtems_bsdnet_seconds_since_boot();
+#ifdef DIAGNOSTIC
+ log(LOG_DEBUG, "in_rtqtimo: adjusted rtq_reallyold to %d\n",
+ rtq_reallyold);
+#endif
+ arg.found = arg.killed = 0;
+ arg.updating = 1;
+ s = splnet();
+ rnh->rnh_walktree(rnh, in_rtqkill, &arg);
+ splx(s);
+ }
+
+ atv.tv_usec = 0;
+ atv.tv_sec = arg.nextstop;
+ timeout(in_rtqtimo, rock, hzto(&atv));
+}
+
+void
+in_rtqdrain(void)
+{
+ struct radix_node_head *rnh = rt_tables[AF_INET];
+ struct rtqk_arg arg;
+ int s;
+ arg.found = arg.killed = 0;
+ arg.rnh = rnh;
+ arg.nextstop = 0;
+ arg.draining = 1;
+ arg.updating = 0;
+ s = splnet();
+ rnh->rnh_walktree(rnh, in_rtqkill, &arg);
+ splx(s);
+}
+
+/*
+ * Initialize our routing tree.
+ */
+int
+in_inithead(void **head, int off)
+{
+ struct radix_node_head *rnh;
+
+ if(!rn_inithead(head, off))
+ return 0;
+
+ if(head != (void **)&rt_tables[AF_INET]) /* BOGUS! */
+ return 1; /* only do this for the real routing table */
+
+ rnh = *head;
+ rnh->rnh_addaddr = in_addroute;
+ rnh->rnh_matchaddr = in_matroute;
+ rnh->rnh_close = in_clsroute;
+ in_rtqtimo(rnh); /* kick off timeout first time */
+ return 1;
+}
diff --git a/netinet/in_systm.h b/netinet/in_systm.h
new file mode 100644
index 0000000..f0f3fa9
--- /dev/null
+++ b/netinet/in_systm.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)in_systm.h 8.1 (Berkeley) 6/10/93
+ * $FreeBSD: src/sys/netinet/in_systm.h,v 1.13 2009/02/13 15:14:43 luigi Exp $
+ */
+
+#ifndef _NETINET_IN_SYSTM_H_
+#define _NETINET_IN_SYSTM_H_
+
+/*
+ * Miscellaneous internetwork
+ * definitions for kernel.
+ */
+
+/*
+ * Network types.
+ *
+ * Internally the system keeps counters in the headers with the bytes
+ * swapped so that VAX instructions will work on them. It reverses
+ * the bytes before transmission at each protocol level. The n_ types
+ * represent the types with the bytes in ``high-ender'' order.
+ */
+typedef u_int16_t n_short; /* short as received from the net */
+typedef u_int32_t n_long; /* long as received from the net */
+
+typedef u_int32_t n_time; /* ms since 00:00 GMT, byte rev */
+
+#ifdef _KERNEL
+uint32_t iptime(void);
+#endif
+
+#endif
diff --git a/netinet/in_var.h b/netinet/in_var.h
new file mode 100644
index 0000000..9ade9c4
--- /dev/null
+++ b/netinet/in_var.h
@@ -0,0 +1,240 @@
+/*
+ * Copyright (c) 1985, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)in_var.h 8.2 (Berkeley) 1/9/95
+ * $FreeBSD: src/sys/netinet/in_var.h,v 1.52 2004/10/19 21:06:14 andre Exp $
+ */
+
+#ifndef _NETINET_IN_VAR_H_
+#define _NETINET_IN_VAR_H_
+
+#include <sys/queue.h>
+#include <net/if_var.h> /* struct ifaddr */
+#include <netinet/in.h> /* struct in_addr */
+
+#if !defined(__rtems__)
+#include <sys/fnv_hash.h>
+#endif
+
+/*
+ * Interface address, Internet version. One of these structures
+ * is allocated for each Internet address on an interface.
+ * The ifaddr structure contains the protocol-independent part
+ * of the structure and is assumed to be first.
+ */
+struct in_ifaddr {
+ struct ifaddr ia_ifa; /* protocol-independent info */
+#define ia_ifp ia_ifa.ifa_ifp
+#define ia_flags ia_ifa.ifa_flags
+ /* ia_{,sub}net{,mask} in host order */
+ u_long ia_net; /* network number of interface */
+ u_long ia_netmask; /* mask of net part */
+ u_long ia_subnet; /* subnet number, including net */
+ u_long ia_subnetmask; /* mask of subnet part */
+ struct in_addr ia_netbroadcast; /* to recognize net broadcasts */
+ struct in_ifaddr *ia_next; /* next in list of internet addresses */
+ struct sockaddr_in ia_addr; /* reserve space for interface name */
+ struct sockaddr_in ia_dstaddr; /* reserve space for broadcast addr */
+#define ia_broadaddr ia_dstaddr
+ struct sockaddr_in ia_sockmask; /* reserve space for general netmask */
+ LIST_HEAD(in_multihead, in_multi) ia_multiaddrs;
+ /* list of multicast addresses */
+};
+
+struct in_aliasreq {
+ char ifra_name[IFNAMSIZ]; /* if name, e.g. "en0" */
+ struct sockaddr_in ifra_addr;
+ struct sockaddr_in ifra_broadaddr;
+#define ifra_dstaddr ifra_broadaddr
+ struct sockaddr_in ifra_mask;
+};
+/*
+ * Given a pointer to an in_ifaddr (ifaddr),
+ * return a pointer to the addr as a sockaddr_in.
+ */
+#define IA_SIN(ia) (&(((struct in_ifaddr *)(ia))->ia_addr))
+#define IA_DSTSIN(ia) (&(((struct in_ifaddr *)(ia))->ia_dstaddr))
+
+#define IN_LNAOF(in, ifa) \
+ ((ntohl((in).s_addr) & ~((struct in_ifaddr *)(ifa)->ia_subnetmask))
+
+
+#ifdef _KERNEL
+extern struct in_ifaddr *in_ifaddr;
+extern struct ifqueue ipintrq; /* ip packet input queue */
+extern struct in_addr zeroin_addr;
+extern u_char inetctlerrmap[];
+
+/*
+ * Macro for finding the interface (ifnet structure) corresponding to one
+ * of our IP addresses.
+ */
+#define INADDR_TO_IFP(addr, ifp) \
+ /* struct in_addr addr; */ \
+ /* struct ifnet *ifp; */ \
+{ \
+ register struct in_ifaddr *ia; \
+\
+ for (ia = in_ifaddr; \
+ ia != NULL && ((ia->ia_ifp->if_flags & IFF_POINTOPOINT)? \
+ IA_DSTSIN(ia):IA_SIN(ia))->sin_addr.s_addr != (addr).s_addr; \
+ ia = ia->ia_next) \
+ continue; \
+ if (ia == NULL) \
+ for (ia = in_ifaddr; \
+ ia != NULL; \
+ ia = ia->ia_next) \
+ if (ia->ia_ifp->if_flags & IFF_POINTOPOINT && \
+ IA_SIN(ia)->sin_addr.s_addr == (addr).s_addr) \
+ break; \
+ (ifp) = (ia == NULL) ? NULL : ia->ia_ifp; \
+}
+
+/*
+ * Macro for finding the internet address structure (in_ifaddr) corresponding
+ * to a given interface (ifnet structure).
+ */
+#define IFP_TO_IA(ifp, ia) \
+ /* struct ifnet *ifp; */ \
+ /* struct in_ifaddr *ia; */ \
+{ \
+ for ((ia) = in_ifaddr; \
+ (ia) != NULL && (ia)->ia_ifp != (ifp); \
+ (ia) = (ia)->ia_next) \
+ continue; \
+}
+#endif
+
+/*
+ * This information should be part of the ifnet structure but we don't wish
+ * to change that - as it might break a number of things
+ */
+
+struct router_info {
+ struct ifnet *rti_ifp;
+ int rti_type; /* type of router which is querier on this interface */
+ int rti_time; /* # of slow timeouts since last old query */
+ struct router_info *rti_next;
+};
+
+/*
+ * Internet multicast address structure. There is one of these for each IP
+ * multicast group to which this host belongs on a given network interface.
+ * They are kept in a linked list, rooted in the interface's in_ifaddr
+ * structure.
+ */
+struct in_multi {
+ LIST_ENTRY(in_multi) inm_entry; /* list glue */
+ struct in_addr inm_addr; /* IP multicast address */
+ struct ifnet *inm_ifp; /* back pointer to ifnet */
+ struct in_ifaddr *inm_ia; /* back pointer to in_ifaddr */
+ u_int inm_refcount; /* no. membership claims by sockets */
+ u_int inm_timer; /* IGMP membership report timer */
+ u_int inm_state; /* state of the membership */
+ struct router_info *inm_rti; /* router info*/
+};
+
+#ifdef _KERNEL
+
+#ifdef SYSCTL_DECL
+SYSCTL_DECL(_net_inet_ip);
+SYSCTL_DECL(_net_inet_raw);
+#endif
+
+/*
+ * Structure used by macros below to remember position when stepping through
+ * all of the in_multi records.
+ */
+struct in_multistep {
+ struct in_ifaddr *i_ia;
+ struct in_multi *i_inm;
+};
+
+/*
+ * Macro for looking up the in_multi record for a given IP multicast address
+ * on a given interface. If no matching record is found, "inm" returns NULL.
+ */
+#define IN_LOOKUP_MULTI(addr, ifp, inm) \
+ /* struct in_addr addr; */ \
+ /* struct ifnet *ifp; */ \
+ /* struct in_multi *inm; */ \
+{ \
+ register struct in_ifaddr *ia; \
+\
+ IFP_TO_IA((ifp), ia); \
+ if (ia == NULL) \
+ (inm) = NULL; \
+ else \
+ for ((inm) = ia->ia_multiaddrs.lh_first; \
+ (inm) != NULL && (inm)->inm_addr.s_addr != (addr).s_addr; \
+ (inm) = inm->inm_entry.le_next) \
+ continue; \
+}
+
+/*
+ * Macro to step through all of the in_multi records, one at a time.
+ * The current position is remembered in "step", which the caller must
+ * provide. IN_FIRST_MULTI(), below, must be called to initialize "step"
+ * and get the first record. Both macros return a NULL "inm" when there
+ * are no remaining records.
+ */
+#define IN_NEXT_MULTI(step, inm) \
+ /* struct in_multistep step; */ \
+ /* struct in_multi *inm; */ \
+{ \
+ if (((inm) = (step).i_inm) != NULL) \
+ (step).i_inm = (inm)->inm_entry.le_next; \
+ else \
+ while ((step).i_ia != NULL) { \
+ (inm) = (step).i_ia->ia_multiaddrs.lh_first; \
+ (step).i_ia = (step).i_ia->ia_next; \
+ if ((inm) != NULL) { \
+ (step).i_inm = (inm)->inm_entry.le_next; \
+ break; \
+ } \
+ } \
+}
+
+#define IN_FIRST_MULTI(step, inm) \
+ /* struct in_multistep step; */ \
+ /* struct in_multi *inm; */ \
+{ \
+ (step).i_ia = in_ifaddr; \
+ (step).i_inm = NULL; \
+ IN_NEXT_MULTI((step), (inm)); \
+}
+
+struct in_multi *in_addmulti(struct in_addr *, struct ifnet *);
+void in_delmulti(struct in_multi *);
+int in_control(struct socket *, u_long, caddr_t, struct ifnet *);
+void in_rtqdrain(void);
+void ip_input(struct mbuf *);
+
+#endif /* _KERNEL */
+
+#endif /* _NETINET_IN_VAR_H_ */
diff --git a/netinet/ip.h b/netinet/ip.h
new file mode 100644
index 0000000..6a2dcad
--- /dev/null
+++ b/netinet/ip.h
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ip.h 8.2 (Berkeley) 6/1/94
+ * $FreeBSD: src/sys/netinet/ip.h,v 1.29 2005/01/07 01:45:44 imp Exp $
+ */
+
+
+#ifndef _NETINET_IP_H_
+#define _NETINET_IP_H_
+
+#include <sys/cdefs.h>
+#include <netinet/in.h> /* struct in_addr */
+#include <netinet/in_systm.h> /* n_long */
+
+/*
+ * Definitions for internet protocol version 4.
+ * Per RFC 791, September 1981.
+ */
+#define IPVERSION 4
+
+#ifndef __packed
+#if defined(__GNUC__)
+#define __packed __attribute__((packed))
+#define __aligned(x) __attribute__((aligned(x)))
+#else
+#define __packed
+#define __aligned(x)
+#endif
+#endif
+
+/*
+ * Structure of an internet header, naked of options.
+ */
+struct ip {
+#ifdef _IP_VHL
+ u_char ip_vhl; /* version << 4 | header length >> 2 */
+#else
+#if BYTE_ORDER == LITTLE_ENDIAN
+ u_int ip_hl:4, /* header length */
+ ip_v:4; /* version */
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+ u_int ip_v:4, /* version */
+ ip_hl:4; /* header length */
+#endif
+#endif /* not _IP_VHL */
+ u_char ip_tos; /* type of service */
+ u_short ip_len; /* total length */
+ u_short ip_id; /* identification */
+ u_short ip_off; /* fragment offset field */
+#define IP_RF 0x8000 /* reserved fragment flag */
+#define IP_DF 0x4000 /* dont fragment flag */
+#define IP_MF 0x2000 /* more fragments flag */
+#define IP_OFFMASK 0x1fff /* mask for fragmenting bits */
+ u_char ip_ttl; /* time to live */
+ u_char ip_p; /* protocol */
+ u_short ip_sum; /* checksum */
+ struct in_addr ip_src,ip_dst; /* source and dest address */
+} __packed __aligned(4);
+
+#ifdef _IP_VHL
+#define IP_MAKE_VHL(v, hl) ((v) << 4 | (hl))
+#define IP_VHL_HL(vhl) ((vhl) & 0x0f)
+#define IP_VHL_V(vhl) ((vhl) >> 4)
+#define IP_VHL_BORING 0x45
+#endif
+
+#ifdef CTASSERT
+CTASSERT(sizeof (struct ip) == 20);
+#endif
+
+#define IP_MAXPACKET 65535L /* maximum packet size */
+
+/*
+ * Definitions for IP type of service (ip_tos)
+ */
+#define IPTOS_LOWDELAY 0x10
+#define IPTOS_THROUGHPUT 0x08
+#define IPTOS_RELIABILITY 0x04
+#define IPTOS_MINCOST 0x02
+#if 1
+/* ECN RFC3168 obsoletes RFC2481, and these will be deprecated soon. */
+#define IPTOS_CE 0x01
+#define IPTOS_ECT 0x02
+#endif
+
+/*
+ * Definitions for IP precedence (also in ip_tos) (hopefully unused)
+ */
+#define IPTOS_PREC_NETCONTROL 0xe0
+#define IPTOS_PREC_INTERNETCONTROL 0xc0
+#define IPTOS_PREC_CRITIC_ECP 0xa0
+#define IPTOS_PREC_FLASHOVERRIDE 0x80
+#define IPTOS_PREC_FLASH 0x60
+#define IPTOS_PREC_IMMEDIATE 0x40
+#define IPTOS_PREC_PRIORITY 0x20
+#define IPTOS_PREC_ROUTINE 0x00
+
+/*
+ * ECN (Explicit Congestion Notification) codepoints in RFC3168
+ * mapped to the lower 2 bits of the TOS field.
+ */
+#define IPTOS_ECN_NOTECT 0x00 /* not-ECT */
+#define IPTOS_ECN_ECT1 0x01 /* ECN-capable transport (1) */
+#define IPTOS_ECN_ECT0 0x02 /* ECN-capable transport (0) */
+#define IPTOS_ECN_CE 0x03 /* congestion experienced */
+#define IPTOS_ECN_MASK 0x03 /* ECN field mask */
+
+/*
+ * Definitions for options.
+ */
+#define IPOPT_COPIED(o) ((o)&0x80)
+#define IPOPT_CLASS(o) ((o)&0x60)
+#define IPOPT_NUMBER(o) ((o)&0x1f)
+
+#define IPOPT_CONTROL 0x00
+#define IPOPT_RESERVED1 0x20
+#define IPOPT_DEBMEAS 0x40
+#define IPOPT_RESERVED2 0x60
+
+#define IPOPT_EOL 0 /* end of option list */
+#define IPOPT_NOP 1 /* no operation */
+
+#define IPOPT_RR 7 /* record packet route */
+#define IPOPT_TS 68 /* timestamp */
+#define IPOPT_SECURITY 130 /* provide s,c,h,tcc */
+#define IPOPT_LSRR 131 /* loose source route */
+#define IPOPT_ESO 133 /* extended security */
+#define IPOPT_CIPSO 134 /* commerical security */
+#define IPOPT_SATID 136 /* satnet id */
+#define IPOPT_SSRR 137 /* strict source route */
+#define IPOPT_RA 148 /* router alert */
+
+/*
+ * Offsets to fields in options other than EOL and NOP.
+ */
+#define IPOPT_OPTVAL 0 /* option ID */
+#define IPOPT_OLEN 1 /* option length */
+#define IPOPT_OFFSET 2 /* offset within option */
+#define IPOPT_MINOFF 4 /* min value of above */
+
+/*
+ * Time stamp option structure.
+ */
+struct ip_timestamp {
+ u_char ipt_code; /* IPOPT_TS */
+ u_char ipt_len; /* size of structure (variable) */
+ u_char ipt_ptr; /* index of current entry */
+#if BYTE_ORDER == LITTLE_ENDIAN
+ u_int ipt_flg:4, /* flags, see below */
+ ipt_oflw:4; /* overflow counter */
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+ u_int ipt_oflw:4, /* overflow counter */
+ ipt_flg:4; /* flags, see below */
+#endif
+ union ipt_timestamp {
+ n_long ipt_time[1];
+ struct ipt_ta {
+ struct in_addr ipt_addr;
+ n_long ipt_time;
+ } ipt_ta[1];
+ } ipt_timestamp;
+};
+
+#include <machine/in_cksum.h>
+
+/* flag bits for ipt_flg */
+#define IPOPT_TS_TSONLY 0 /* timestamps only */
+#define IPOPT_TS_TSANDADDR 1 /* timestamps and addresses */
+#define IPOPT_TS_PRESPEC 3 /* specified modules only */
+
+/* bits for security (not byte swapped) */
+#define IPOPT_SECUR_UNCLASS 0x0000
+#define IPOPT_SECUR_CONFID 0xf135
+#define IPOPT_SECUR_EFTO 0x789a
+#define IPOPT_SECUR_MMMM 0xbc4d
+#define IPOPT_SECUR_RESTR 0xaf13
+#define IPOPT_SECUR_SECRET 0xd788
+#define IPOPT_SECUR_TOPSECRET 0x6bc5
+
+/*
+ * Internet implementation parameters.
+ */
+#define MAXTTL 255 /* maximum time to live (seconds) */
+#define IPDEFTTL 64 /* default ttl, from RFC 1340 */
+#define IPFRAGTTL 60 /* time to live for frags, slowhz */
+#define IPTTLDEC 1 /* subtracted when forwarding */
+
+#define IP_MSS 576 /* default maximum segment size */
+
+/*
+ * This is the real IPv4 pseudo header, used for computing the TCP and UDP
+ * checksums. For the Internet checksum, struct ipovly can be used instead.
+ * For stronger checksums, the real thing must be used.
+ */
+struct ippseudo {
+ struct in_addr ippseudo_src; /* source internet address */
+ struct in_addr ippseudo_dst; /* destination internet address */
+ u_char ippseudo_pad; /* pad, must be zero */
+ u_char ippseudo_p; /* protocol */
+ u_short ippseudo_len; /* protocol length */
+};
+#endif
diff --git a/netinet/ip_divert.c b/netinet/ip_divert.c
new file mode 100644
index 0000000..c34da82
--- /dev/null
+++ b/netinet/ip_divert.c
@@ -0,0 +1,387 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/netinet/ip_divert.c,v 1.113 2005/05/13 11:44:37 glebius Exp $
+ */
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/socketvar.h>
+#include <errno.h>
+#include <sys/systm.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+
+#ifdef IPDIVERT
+
+/*
+ * Divert sockets
+ */
+
+/*
+ * Allocate enough space to hold a full IP packet
+ */
+#define DIVSNDQ (65536 + 100)
+#define DIVRCVQ (65536 + 100)
+
+/* Global variables */
+
+/*
+ * ip_input() and ip_output() set this secret value before calling us to
+ * let us know which divert port to divert a packet to; this is done so
+ * we can use the existing prototype for struct protosw's pr_input().
+ * This is stored in host order.
+ */
+u_short ip_divert_port;
+
+/*
+ * We set this value to a non-zero port number when we want the call to
+ * ip_fw_chk() in ip_input() or ip_output() to ignore ``divert <port>''
+ * chain entries. This is stored in host order.
+ */
+u_short ip_divert_ignore;
+
+/* Internal variables. */
+static struct inpcbhead divcb;
+static struct inpcbinfo divcbinfo;
+
+static u_long div_sendspace = DIVSNDQ; /* XXX sysctl ? */
+static u_long div_recvspace = DIVRCVQ; /* XXX sysctl ? */
+
+/* Optimization: have this preinitialized */
+static struct sockaddr_in divsrc = { sizeof(divsrc), AF_INET, 0, { 0 }, { 0 } };
+
+/* Internal functions */
+
+static int div_output(struct socket *so,
+ struct mbuf *m, struct mbuf *addr, struct mbuf *control);
+
+/*
+ * Initialize divert connection block queue.
+ */
+void
+div_init(void)
+{
+ LIST_INIT(&divcb);
+ divcbinfo.listhead = &divcb;
+ /*
+ * XXX We don't use the hash list for divert IP, but it's easier
+ * to allocate a one entry hash list than it is to check all
+ * over the place for hashbase == NULL.
+ */
+ divcbinfo.hashbase = hashinit(1, M_PCB, &divcbinfo.hashmask);
+}
+
+/*
+ * Setup generic address and protocol structures
+ * for div_input routine, then pass them along with
+ * mbuf chain. ip->ip_len is assumed to have had
+ * the header length (hlen) subtracted out already.
+ * We tell whether the packet was incoming or outgoing
+ * by seeing if hlen == 0, which is a hack.
+ */
+void
+div_input(struct mbuf *m, int hlen)
+{
+ struct ip *ip;
+ struct inpcb *inp;
+ struct socket *sa;
+
+ /* Sanity check */
+ if (ip_divert_port == 0)
+ panic("div_input: port is 0");
+
+ /* Assure header */
+ if (m->m_len < sizeof(struct ip) &&
+ (m = m_pullup(m, sizeof(struct ip))) == 0) {
+ return;
+ }
+ ip = mtod(m, struct ip *);
+
+ /* Record divert port */
+ divsrc.sin_port = htons(ip_divert_port);
+
+ /* Restore packet header fields */
+ ip->ip_len += hlen;
+ HTONS(ip->ip_len);
+ HTONS(ip->ip_off);
+
+ /* Record receive interface address, if any */
+ divsrc.sin_addr.s_addr = 0;
+ if (hlen) {
+ struct ifaddr *ifa;
+
+#ifdef DIAGNOSTIC
+ /* Sanity check */
+ if (!(m->m_flags & M_PKTHDR))
+ panic("div_input: no pkt hdr");
+#endif
+
+ /* More fields affected by ip_input() */
+ HTONS(ip->ip_id);
+
+ /* Find IP address for recieve interface */
+ for (ifa = m->m_pkthdr.rcvif->if_addrlist;
+ ifa != NULL; ifa = ifa->ifa_next) {
+ if (ifa->ifa_addr == NULL)
+ continue;
+ if (ifa->ifa_addr->sa_family != AF_INET)
+ continue;
+ divsrc.sin_addr =
+ ((struct sockaddr_in *) ifa->ifa_addr)->sin_addr;
+ break;
+ }
+ }
+
+ /* Put packet on socket queue, if any */
+ sa = NULL;
+ for (inp = divcb.lh_first; inp != NULL; inp = inp->inp_list.le_next) {
+ if (inp->inp_lport == htons(ip_divert_port))
+ sa = inp->inp_socket;
+ }
+ if (sa) {
+ if (sbappendaddr(&sa->so_rcv, (struct sockaddr *)&divsrc,
+ m, (struct mbuf *)0) == 0)
+ m_freem(m);
+ else
+ sorwakeup(sa);
+ } else {
+ m_freem(m);
+ ipstat.ips_noproto++;
+ ipstat.ips_delivered--;
+ }
+}
+
+/*
+ * Deliver packet back into the IP processing machinery.
+ *
+ * If no address specified, or address is 0.0.0.0, send to ip_output();
+ * otherwise, send to ip_input() and mark as having been received on
+ * the interface with that address.
+ *
+ * If no address specified, or dest port is 0, allow packet to divert
+ * back to this socket; otherwise, don't.
+ */
+static int
+div_output(struct socket *so, struct mbuf *m,
+ struct mbuf *addr, struct mbuf *control)
+{
+ register struct inpcb *const inp = sotoinpcb(so);
+ register struct ip *const ip = mtod(m, struct ip *);
+ struct sockaddr_in *sin = NULL;
+ int error = 0;
+
+ if (control)
+ m_freem(control); /* XXX */
+ if (addr)
+ sin = mtod(addr, struct sockaddr_in *);
+
+ /* Loopback avoidance option */
+ ip_divert_ignore = ntohs(inp->inp_lport);
+
+ /* Reinject packet into the system as incoming or outgoing */
+ if (!sin || sin->sin_addr.s_addr == 0) {
+ /* Don't allow both user specified and setsockopt options,
+ and don't allow packet length sizes that will crash */
+ if (((ip->ip_hl != (sizeof (*ip) >> 2)) && inp->inp_options) ||
+ ((u_short)ntohs(ip->ip_len) > m->m_pkthdr.len)) {
+ error = EINVAL;
+ goto cantsend;
+ }
+
+ /* Convert fields to host order for ip_output() */
+ NTOHS(ip->ip_len);
+ NTOHS(ip->ip_off);
+
+ /* Send packet to output processing */
+ ipstat.ips_rawout++; /* XXX */
+ error = ip_output(m, inp->inp_options, &inp->inp_route,
+ (so->so_options & SO_DONTROUTE) |
+ IP_ALLOWBROADCAST | IP_RAWOUTPUT, inp->inp_moptions);
+ } else {
+ struct ifaddr *ifa;
+
+ /* Find receive interface with the given IP address */
+ sin->sin_port = 0;
+ if ((ifa = ifa_ifwithaddr((struct sockaddr *) sin)) == 0) {
+ error = EADDRNOTAVAIL;
+ goto cantsend;
+ }
+ m->m_pkthdr.rcvif = ifa->ifa_ifp;
+
+ /* Send packet to input processing */
+ ip_input(m);
+ }
+
+ /* Reset for next time (and other packets) */
+ ip_divert_ignore = 0;
+ return error;
+
+cantsend:
+ ip_divert_ignore = 0;
+ m_freem(m);
+ return error;
+}
+
+/*ARGSUSED*/
+int
+div_usrreq(
+ struct socket *so,
+ int req,
+ struct mbuf *m,
+ struct mbuf *nam,
+ struct mbuf *control )
+{
+ register int error = 0;
+ register struct inpcb *inp = sotoinpcb(so);
+ int s;
+
+ if (inp == NULL && req != PRU_ATTACH) {
+ error = EINVAL;
+ goto release;
+ }
+ switch (req) {
+
+ case PRU_ATTACH:
+ if (inp)
+ panic("div_attach");
+ if ((so->so_state & SS_PRIV) == 0) {
+ error = EACCES;
+ break;
+ }
+ s = splnet();
+ error = in_pcballoc(so, &divcbinfo);
+ splx(s);
+ if (error)
+ break;
+ error = soreserve(so, div_sendspace, div_recvspace);
+ if (error)
+ break;
+ inp = (struct inpcb *)so->so_pcb;
+ inp->inp_ip_p = (intptr_t)nam; /* XXX */
+ inp->inp_flags |= INP_HDRINCL;
+ /* The socket is always "connected" because
+ we always know "where" to send the packet */
+ so->so_state |= SS_ISCONNECTED;
+ break;
+
+ case PRU_DISCONNECT:
+ if ((so->so_state & SS_ISCONNECTED) == 0) {
+ error = ENOTCONN;
+ break;
+ }
+ /* FALLTHROUGH */
+ case PRU_ABORT:
+ soisdisconnected(so);
+ /* FALLTHROUGH */
+ case PRU_DETACH:
+ if (inp == 0)
+ panic("div_detach");
+ in_pcbdetach(inp);
+ break;
+
+ case PRU_BIND:
+ s = splnet();
+ error = in_pcbbind(inp, nam);
+ splx(s);
+ break;
+
+ /*
+ * Mark the connection as being incapable of further input.
+ */
+ case PRU_SHUTDOWN:
+ socantsendmore(so);
+ break;
+
+ case PRU_SEND:
+ /* Packet must have a header (but that's about it) */
+ if (m->m_len < sizeof (struct ip) ||
+ (m = m_pullup(m, sizeof (struct ip))) == 0) {
+ ipstat.ips_toosmall++;
+ error = EINVAL;
+ break;
+ }
+
+ /* Send packet */
+ error = div_output(so, m, nam, control);
+ m = NULL;
+ break;
+
+ case PRU_SOCKADDR:
+ in_setsockaddr(inp, nam);
+ break;
+
+ case PRU_SENSE:
+ /*
+ * stat: don't bother with a blocksize.
+ */
+ return (0);
+
+ /*
+ * Not supported.
+ */
+ case PRU_CONNECT:
+ case PRU_CONNECT2:
+ case PRU_CONTROL:
+ case PRU_RCVOOB:
+ case PRU_RCVD:
+ case PRU_LISTEN:
+ case PRU_ACCEPT:
+ case PRU_SENDOOB:
+ case PRU_PEERADDR:
+ error = EOPNOTSUPP;
+ break;
+
+ default:
+ panic("div_usrreq");
+ }
+release:
+ if (m)
+ m_freem(m);
+ return (error);
+}
+#endif /* IPDIVERT */
diff --git a/netinet/ip_fw.c b/netinet/ip_fw.c
new file mode 100644
index 0000000..e5f8ab2
--- /dev/null
+++ b/netinet/ip_fw.c
@@ -0,0 +1,1082 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * Copyright (c) 1996 Alex Nash
+ * Copyright (c) 1993 Daniel Boulet
+ * Copyright (c) 1994 Ugen J.S.Antsilevich
+ *
+ * Redistribution and use in source forms, with and without modification,
+ * are permitted provided that this entire comment appears intact.
+ *
+ * Redistribution in binary form may occur without any restrictions.
+ * Obviously, it would be nice if you gave credit where credit is due
+ * but requiring it would be too onerous.
+ *
+ * This software is provided ``AS IS'' without any warranties of any kind.
+ */
+
+/*
+ * Implement IP packet firewall
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef IPFIREWALL_MODULE
+#include "opt_ipfw.h"
+#endif
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/queue.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/sysctl.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/route.h>
+#include <netinet/in.h>
+#include <rtems/rtems_netinet_in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/ip_fw.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#include <netinet/udp.h>
+#include <inttypes.h>
+
+static int fw_debug = 1;
+#ifdef IPFIREWALL_VERBOSE
+static int fw_verbose = 1;
+#else
+static int fw_verbose = 0;
+#endif
+#ifdef IPFIREWALL_VERBOSE_LIMIT
+static int fw_verbose_limit = IPFIREWALL_VERBOSE_LIMIT;
+#else
+static int fw_verbose_limit = 0;
+#endif
+
+LIST_HEAD (ip_fw_head, ip_fw_chain) ip_fw_chain;
+
+/*
+ * ccj - No current need for firewall so have provided the MIB.
+ */
+#if 0
+#ifdef SYSCTL_NODE
+SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, debug, CTLFLAG_RW, &fw_debug, 0, "");
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose, CTLFLAG_RW, &fw_verbose, 0, "");
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_RW, &fw_verbose_limit, 0, "");
+#endif
+#endif
+
+#define dprintf(a) if (!fw_debug); else printf a
+
+#define print_ip(a) printf("%"PRId32".%"PRId32".%"PRId32".%"PRId32,\
+ (ntohl(a.s_addr)>>24)&0xFF,\
+ (ntohl(a.s_addr)>>16)&0xFF,\
+ (ntohl(a.s_addr)>>8)&0xFF,\
+ (ntohl(a.s_addr))&0xFF);
+
+#define dprint_ip(a) if (!fw_debug); else print_ip(a)
+
+static int add_entry(struct ip_fw_head *chainptr, struct ip_fw *frwl);
+static int del_entry(struct ip_fw_head *chainptr, u_short number);
+static int zero_entry(struct mbuf *m);
+static struct ip_fw *check_ipfw_struct(struct ip_fw *m);
+static struct ip_fw *check_ipfw_mbuf(struct mbuf *fw);
+static int ipopts_match(struct ip *ip, struct ip_fw *f);
+static int port_match(u_short *portptr, int nports, u_short port,
+ int range_flag);
+static int tcpflg_match(struct tcphdr *tcp, struct ip_fw *f);
+static int icmptype_match(struct icmp * icmp, struct ip_fw * f);
+static void ipfw_report(struct ip_fw *f, struct ip *ip,
+ struct ifnet *rif, struct ifnet *oif);
+
+#ifdef IPFIREWALL_MODULE
+static ip_fw_chk_t *old_chk_ptr;
+static ip_fw_ctl_t *old_ctl_ptr;
+#endif
+
+static int ip_fw_chk(struct ip **pip, int hlen,
+ struct ifnet *oif, int ignport, struct mbuf **m);
+static int ip_fw_ctl(int stage, struct mbuf **mm);
+
+static char err_prefix[] = "ip_fw_ctl:";
+
+/*
+ * Returns 1 if the port is matched by the vector, 0 otherwise
+ */
+static inline int
+port_match(u_short *portptr, int nports, u_short port, int range_flag)
+{
+ if (!nports)
+ return 1;
+ if (range_flag) {
+ if (portptr[0] <= port && port <= portptr[1]) {
+ return 1;
+ }
+ nports -= 2;
+ portptr += 2;
+ }
+ while (nports-- > 0) {
+ if (*portptr++ == port) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static int
+tcpflg_match(struct tcphdr *tcp, struct ip_fw *f)
+{
+ u_char flg_set, flg_clr;
+
+ if ((f->fw_tcpf & IP_FW_TCPF_ESTAB) &&
+ (tcp->th_flags & (IP_FW_TCPF_RST | IP_FW_TCPF_ACK)))
+ return 1;
+
+ flg_set = tcp->th_flags & f->fw_tcpf;
+ flg_clr = tcp->th_flags & f->fw_tcpnf;
+
+ if (flg_set != f->fw_tcpf)
+ return 0;
+ if (flg_clr)
+ return 0;
+
+ return 1;
+}
+
+static int
+icmptype_match(struct icmp *icmp, struct ip_fw *f)
+{
+ int type;
+
+ if (!(f->fw_flg & IP_FW_F_ICMPBIT))
+ return(1);
+
+ type = icmp->icmp_type;
+
+ /* check for matching type in the bitmap */
+ if (type < IP_FW_ICMPTYPES_DIM * sizeof(unsigned) * 8 &&
+ (f->fw_icmptypes[type / (sizeof(unsigned) * 8)] &
+ (1U << (type % (8 * sizeof(unsigned))))))
+ return(1);
+
+ return(0); /* no match */
+}
+
+static int
+ipopts_match(struct ip *ip, struct ip_fw *f)
+{
+ register u_char *cp;
+ int opt, optlen, cnt;
+ u_char opts, nopts, nopts_sve;
+
+ cp = (u_char *)(ip + 1);
+ cnt = (ip->ip_hl << 2) - sizeof (struct ip);
+ opts = f->fw_ipopt;
+ nopts = nopts_sve = f->fw_ipnopt;
+
+ for (; cnt > 0; cnt -= optlen, cp += optlen) {
+ opt = cp[IPOPT_OPTVAL];
+ if (opt == IPOPT_EOL)
+ break;
+ if (opt == IPOPT_NOP)
+ optlen = 1;
+ else {
+ optlen = cp[IPOPT_OLEN];
+ if (optlen <= 0 || optlen > cnt) {
+ return 0; /*XXX*/
+ }
+ }
+ switch (opt) {
+
+ default:
+ break;
+
+ case IPOPT_LSRR:
+ opts &= ~IP_FW_IPOPT_LSRR;
+ nopts &= ~IP_FW_IPOPT_LSRR;
+ break;
+
+ case IPOPT_SSRR:
+ opts &= ~IP_FW_IPOPT_SSRR;
+ nopts &= ~IP_FW_IPOPT_SSRR;
+ break;
+
+ case IPOPT_RR:
+ opts &= ~IP_FW_IPOPT_RR;
+ nopts &= ~IP_FW_IPOPT_RR;
+ break;
+ case IPOPT_TS:
+ opts &= ~IP_FW_IPOPT_TS;
+ nopts &= ~IP_FW_IPOPT_TS;
+ break;
+ }
+ if (opts == nopts)
+ break;
+ }
+ if (opts == 0 && nopts == nopts_sve)
+ return 1;
+ else
+ return 0;
+}
+
+static inline int
+iface_match(struct ifnet *ifp, union ip_fw_if *ifu, int byname)
+{
+ /* Check by name or by IP address */
+ if (byname) {
+ /* Check unit number (-1 is wildcard) */
+ if (ifu->fu_via_if.unit != -1
+ && ifp->if_unit != ifu->fu_via_if.unit)
+ return(0);
+ /* Check name */
+ if (strncmp(ifp->if_name, ifu->fu_via_if.name, FW_IFNLEN))
+ return(0);
+ return(1);
+ } else if (ifu->fu_via_ip.s_addr != 0) { /* Zero == wildcard */
+ struct ifaddr *ia;
+
+ for (ia = ifp->if_addrlist; ia; ia = ia->ifa_next) {
+ if (ia->ifa_addr == NULL)
+ continue;
+ if (ia->ifa_addr->sa_family != AF_INET)
+ continue;
+ if (ifu->fu_via_ip.s_addr != ((struct sockaddr_in *)
+ (ia->ifa_addr))->sin_addr.s_addr)
+ continue;
+ return(1);
+ }
+ return(0);
+ }
+ return(1);
+}
+
+static void
+ipfw_report(struct ip_fw *f, struct ip *ip,
+ struct ifnet *rif, struct ifnet *oif)
+{
+ static int counter;
+ struct tcphdr *const tcp = (struct tcphdr *) ((u_long *) ip+ ip->ip_hl);
+ struct udphdr *const udp = (struct udphdr *) ((u_long *) ip+ ip->ip_hl);
+ struct icmp *const icmp = (struct icmp *) ((u_long *) ip + ip->ip_hl);
+ int count;
+
+ count = f ? f->fw_pcnt : ++counter;
+ if (fw_verbose_limit != 0 && count > fw_verbose_limit)
+ return;
+
+ /* Print command name */
+ printf("ipfw: %d ", f ? f->fw_number : -1);
+ if (!f)
+ printf("Refuse");
+ else
+ switch (f->fw_flg & IP_FW_F_COMMAND) {
+ case IP_FW_F_DENY:
+ printf("Deny");
+ break;
+ case IP_FW_F_REJECT:
+ if (f->fw_reject_code == IP_FW_REJECT_RST)
+ printf("Reset");
+ else
+ printf("Unreach");
+ break;
+ case IP_FW_F_ACCEPT:
+ printf("Accept");
+ break;
+ case IP_FW_F_COUNT:
+ printf("Count");
+ break;
+ case IP_FW_F_DIVERT:
+ printf("Divert %d", f->fw_divert_port);
+ break;
+ case IP_FW_F_TEE:
+ printf("Tee %d", f->fw_divert_port);
+ break;
+ case IP_FW_F_SKIPTO:
+ printf("SkipTo %d", f->fw_skipto_rule);
+ break;
+ default:
+ printf("UNKNOWN");
+ break;
+ }
+ printf(" ");
+
+ switch (ip->ip_p) {
+ case IPPROTO_TCP:
+ printf("TCP ");
+ print_ip(ip->ip_src);
+ if ((ip->ip_off & IP_OFFMASK) == 0)
+ printf(":%d ", ntohs(tcp->th_sport));
+ else
+ printf(" ");
+ print_ip(ip->ip_dst);
+ if ((ip->ip_off & IP_OFFMASK) == 0)
+ printf(":%d", ntohs(tcp->th_dport));
+ break;
+ case IPPROTO_UDP:
+ printf("UDP ");
+ print_ip(ip->ip_src);
+ if ((ip->ip_off & IP_OFFMASK) == 0)
+ printf(":%d ", ntohs(udp->uh_sport));
+ else
+ printf(" ");
+ print_ip(ip->ip_dst);
+ if ((ip->ip_off & IP_OFFMASK) == 0)
+ printf(":%d", ntohs(udp->uh_dport));
+ break;
+ case IPPROTO_ICMP:
+ printf("ICMP:%u.%u ", icmp->icmp_type, icmp->icmp_code);
+ print_ip(ip->ip_src);
+ printf(" ");
+ print_ip(ip->ip_dst);
+ break;
+ default:
+ printf("P:%d ", ip->ip_p);
+ print_ip(ip->ip_src);
+ printf(" ");
+ print_ip(ip->ip_dst);
+ break;
+ }
+ if (oif)
+ printf(" out via %s%d", oif->if_name, oif->if_unit);
+ else if (rif)
+ printf(" in via %s%d", rif->if_name, rif->if_unit);
+ if ((ip->ip_off & IP_OFFMASK))
+ printf(" Fragment = %d",ip->ip_off & IP_OFFMASK);
+ printf("\n");
+ if (fw_verbose_limit != 0 && count == fw_verbose_limit)
+ printf("ipfw: limit reached on rule #%d\n",
+ f ? f->fw_number : -1);
+}
+
+/*
+ * Parameters:
+ *
+ * ip Pointer to packet header (struct ip *)
+ * hlen Packet header length
+ * oif Outgoing interface, or NULL if packet is incoming
+ * ignport Ignore all divert/tee rules to this port (if non-zero)
+ * *m The packet; we set to NULL when/if we nuke it.
+ *
+ * Return value:
+ *
+ * 0 The packet is to be accepted and routed normally OR
+ * the packet was denied/rejected and has been dropped;
+ * in the latter case, *m is equal to NULL upon return.
+ * port Divert the packet to port.
+ */
+
+static int
+ip_fw_chk(struct ip **pip, int hlen,
+ struct ifnet *oif, int ignport, struct mbuf **m)
+{
+ struct ip_fw_chain *chain;
+ struct ip_fw *rule = NULL;
+ struct ip *ip = *pip;
+ struct ifnet *const rif = (*m)->m_pkthdr.rcvif;
+ u_short offset = (ip->ip_off & IP_OFFMASK);
+ u_short src_port, dst_port;
+
+ /*
+ * Go down the chain, looking for enlightment
+ */
+ for (chain=ip_fw_chain.lh_first; chain; chain = chain->chain.le_next) {
+ register struct ip_fw *const f = chain->rule;
+
+ /* Check direction inbound */
+ if (!oif && !(f->fw_flg & IP_FW_F_IN))
+ continue;
+
+ /* Check direction outbound */
+ if (oif && !(f->fw_flg & IP_FW_F_OUT))
+ continue;
+
+ /* Fragments */
+ if ((f->fw_flg & IP_FW_F_FRAG) && !(ip->ip_off & IP_OFFMASK))
+ continue;
+
+ /* If src-addr doesn't match, not this rule. */
+ if (((f->fw_flg & IP_FW_F_INVSRC) != 0) ^ ((ip->ip_src.s_addr
+ & f->fw_smsk.s_addr) != f->fw_src.s_addr))
+ continue;
+
+ /* If dest-addr doesn't match, not this rule. */
+ if (((f->fw_flg & IP_FW_F_INVDST) != 0) ^ ((ip->ip_dst.s_addr
+ & f->fw_dmsk.s_addr) != f->fw_dst.s_addr))
+ continue;
+
+ /* Interface check */
+ if ((f->fw_flg & IF_FW_F_VIAHACK) == IF_FW_F_VIAHACK) {
+ struct ifnet *const iface = oif ? oif : rif;
+
+ /* Backwards compatibility hack for "via" */
+ if (!iface || !iface_match(iface,
+ &f->fw_in_if, f->fw_flg & IP_FW_F_OIFNAME))
+ continue;
+ } else {
+ /* Check receive interface */
+ if ((f->fw_flg & IP_FW_F_IIFACE)
+ && (!rif || !iface_match(rif,
+ &f->fw_in_if, f->fw_flg & IP_FW_F_IIFNAME)))
+ continue;
+ /* Check outgoing interface */
+ if ((f->fw_flg & IP_FW_F_OIFACE)
+ && (!oif || !iface_match(oif,
+ &f->fw_out_if, f->fw_flg & IP_FW_F_OIFNAME)))
+ continue;
+ }
+
+ /* Check IP options */
+ if (f->fw_ipopt != f->fw_ipnopt && !ipopts_match(ip, f))
+ continue;
+
+ /* Check protocol; if wildcard, match */
+ if (f->fw_prot == IPPROTO_IP)
+ goto got_match;
+
+ /* If different, don't match */
+ if (ip->ip_p != f->fw_prot)
+ continue;
+
+#define PULLUP_TO(len) do { \
+ if ((*m)->m_len < (len) \
+ && (*m = m_pullup(*m, (len))) == 0) { \
+ goto bogusfrag; \
+ } \
+ *pip = ip = mtod(*m, struct ip *); \
+ offset = (ip->ip_off & IP_OFFMASK); \
+ } while (0)
+
+ /* Protocol specific checks */
+ switch (ip->ip_p) {
+ case IPPROTO_TCP:
+ {
+ struct tcphdr *tcp;
+
+ if (offset == 1) /* cf. RFC 1858 */
+ goto bogusfrag;
+ if (offset != 0) {
+ /*
+ * TCP flags and ports aren't available in this
+ * packet -- if this rule specified either one,
+ * we consider the rule a non-match.
+ */
+ if (f->fw_nports != 0 ||
+ f->fw_tcpf != f->fw_tcpnf)
+ continue;
+
+ break;
+ }
+ PULLUP_TO(hlen + 14);
+ tcp = (struct tcphdr *) ((u_long *)ip + ip->ip_hl);
+ if (f->fw_tcpf != f->fw_tcpnf && !tcpflg_match(tcp, f))
+ continue;
+ src_port = ntohs(tcp->th_sport);
+ dst_port = ntohs(tcp->th_dport);
+ goto check_ports;
+ }
+
+ case IPPROTO_UDP:
+ {
+ struct udphdr *udp;
+
+ if (offset != 0) {
+ /*
+ * Port specification is unavailable -- if this
+ * rule specifies a port, we consider the rule
+ * a non-match.
+ */
+ if (f->fw_nports != 0)
+ continue;
+
+ break;
+ }
+ PULLUP_TO(hlen + 4);
+ udp = (struct udphdr *) ((u_long *)ip + ip->ip_hl);
+ src_port = ntohs(udp->uh_sport);
+ dst_port = ntohs(udp->uh_dport);
+check_ports:
+ if (!port_match(&f->fw_pts[0],
+ IP_FW_GETNSRCP(f), src_port,
+ f->fw_flg & IP_FW_F_SRNG))
+ continue;
+ if (!port_match(&f->fw_pts[IP_FW_GETNSRCP(f)],
+ IP_FW_GETNDSTP(f), dst_port,
+ f->fw_flg & IP_FW_F_DRNG))
+ continue;
+ break;
+ }
+
+ case IPPROTO_ICMP:
+ {
+ struct icmp *icmp;
+
+ if (offset != 0) /* Type isn't valid */
+ break;
+ PULLUP_TO(hlen + 2);
+ icmp = (struct icmp *) ((u_long *)ip + ip->ip_hl);
+ if (!icmptype_match(icmp, f))
+ continue;
+ break;
+ }
+#undef PULLUP_TO
+
+bogusfrag:
+ if (fw_verbose)
+ ipfw_report(NULL, ip, rif, oif);
+ goto dropit;
+ }
+
+got_match:
+ /* Ignore divert/tee rule if socket port is "ignport" */
+ switch (f->fw_flg & IP_FW_F_COMMAND) {
+ case IP_FW_F_DIVERT:
+ case IP_FW_F_TEE:
+ if (f->fw_divert_port == ignport)
+ continue; /* ignore this rule */
+ break;
+ }
+
+ /* Update statistics */
+ f->fw_pcnt += 1;
+ f->fw_bcnt += ip->ip_len;
+ f->timestamp = rtems_bsdnet_seconds_since_boot();
+
+ /* Log to console if desired */
+ if ((f->fw_flg & IP_FW_F_PRN) && fw_verbose)
+ ipfw_report(f, ip, rif, oif);
+
+ /* Take appropriate action */
+ switch (f->fw_flg & IP_FW_F_COMMAND) {
+ case IP_FW_F_ACCEPT:
+ return(0);
+ case IP_FW_F_COUNT:
+ continue;
+ case IP_FW_F_DIVERT:
+ return(f->fw_divert_port);
+ case IP_FW_F_TEE:
+ /*
+ * XXX someday tee packet here, but beware that you
+ * can't use m_copym() or m_copypacket() because
+ * the divert input routine modifies the mbuf
+ * (and these routines only increment reference
+ * counts in the case of mbuf clusters), so need
+ * to write custom routine.
+ */
+ continue;
+ case IP_FW_F_SKIPTO:
+#ifdef DIAGNOSTIC
+ while (chain->chain.le_next
+ && chain->chain.le_next->rule->fw_number
+ < f->fw_skipto_rule)
+#else
+ while (chain->chain.le_next->rule->fw_number
+ < f->fw_skipto_rule)
+#endif
+ chain = chain->chain.le_next;
+ continue;
+ }
+
+ /* Deny/reject this packet using this rule */
+ rule = f;
+ break;
+ }
+
+#ifdef DIAGNOSTIC
+ /* Rule 65535 should always be there and should always match */
+ if (!chain)
+ panic("ip_fw: chain");
+#endif
+
+ /*
+ * At this point, we're going to drop the packet.
+ * Send a reject notice if all of the following are true:
+ *
+ * - The packet matched a reject rule
+ * - The packet is not an ICMP packet
+ * - The packet is not a multicast or broadcast packet
+ */
+ if ((rule->fw_flg & IP_FW_F_COMMAND) == IP_FW_F_REJECT
+ && ip->ip_p != IPPROTO_ICMP
+ && !((*m)->m_flags & (M_BCAST|M_MCAST))
+ && !IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
+ switch (rule->fw_reject_code) {
+ case IP_FW_REJECT_RST:
+ {
+ struct tcphdr *const tcp =
+ (struct tcphdr *) ((u_long *)ip + ip->ip_hl);
+ struct tcpiphdr ti, *const tip = (struct tcpiphdr *) ip;
+
+ if (offset != 0 || (tcp->th_flags & TH_RST))
+ break;
+ ti.ti_i = *((struct ipovly *) ip);
+ ti.ti_t = *tcp;
+ bcopy(&ti, ip, sizeof(ti));
+ NTOHL(tip->ti_seq);
+ NTOHL(tip->ti_ack);
+ tip->ti_len = ip->ip_len - hlen - (tip->ti_off << 2);
+ if (tcp->th_flags & TH_ACK) {
+ tcp_respond(NULL, tip, *m,
+ (tcp_seq)0, ntohl(tcp->th_ack), TH_RST);
+ } else {
+ if (tcp->th_flags & TH_SYN)
+ tip->ti_len++;
+ tcp_respond(NULL, tip, *m, tip->ti_seq
+ + tip->ti_len, (tcp_seq)0, TH_RST|TH_ACK);
+ }
+ *m = NULL;
+ break;
+ }
+ default: /* Send an ICMP unreachable using code */
+ icmp_error(*m, ICMP_UNREACH,
+ rule->fw_reject_code, 0L, 0);
+ *m = NULL;
+ break;
+ }
+ }
+
+dropit:
+ /*
+ * Finally, drop the packet.
+ */
+ if (*m) {
+ m_freem(*m);
+ *m = NULL;
+ }
+ return(0);
+}
+
+static int
+add_entry(struct ip_fw_head *chainptr, struct ip_fw *frwl)
+{
+ struct ip_fw *ftmp = 0;
+ struct ip_fw_chain *fwc = 0, *fcp, *fcpl = 0;
+ u_short nbr = 0;
+ int s;
+
+ fwc = malloc(sizeof *fwc, M_IPFW, M_DONTWAIT);
+ ftmp = malloc(sizeof *ftmp, M_IPFW, M_DONTWAIT);
+ if (!fwc || !ftmp) {
+ dprintf(("%s malloc said no\n", err_prefix));
+ if (fwc) free(fwc, M_IPFW);
+ if (ftmp) free(ftmp, M_IPFW);
+ return (ENOSPC);
+ }
+
+ bcopy(frwl, ftmp, sizeof(struct ip_fw));
+ ftmp->fw_in_if.fu_via_if.name[FW_IFNLEN - 1] = '\0';
+ ftmp->fw_pcnt = 0L;
+ ftmp->fw_bcnt = 0L;
+ fwc->rule = ftmp;
+
+ s = splnet();
+
+ if (!chainptr->lh_first) {
+ LIST_INSERT_HEAD(chainptr, fwc, chain);
+ splx(s);
+ return(0);
+ } else if (ftmp->fw_number == (u_short)-1) {
+ if (fwc) free(fwc, M_IPFW);
+ if (ftmp) free(ftmp, M_IPFW);
+ splx(s);
+ dprintf(("%s bad rule number\n", err_prefix));
+ return (EINVAL);
+ }
+
+ /* If entry number is 0, find highest numbered rule and add 100 */
+ if (ftmp->fw_number == 0) {
+ for (fcp = chainptr->lh_first; fcp; fcp = fcp->chain.le_next) {
+ if (fcp->rule->fw_number != (u_short)-1)
+ nbr = fcp->rule->fw_number;
+ else
+ break;
+ }
+ if (nbr < (u_short)-1 - 100)
+ nbr += 100;
+ ftmp->fw_number = nbr;
+ }
+
+ /* Got a valid number; now insert it, keeping the list ordered */
+ for (fcp = chainptr->lh_first; fcp; fcp = fcp->chain.le_next) {
+ if (fcp->rule->fw_number > ftmp->fw_number) {
+ if (fcpl) {
+ LIST_INSERT_AFTER(fcpl, fwc, chain);
+ } else {
+ LIST_INSERT_HEAD(chainptr, fwc, chain);
+ }
+ break;
+ } else {
+ fcpl = fcp;
+ }
+ }
+
+ splx(s);
+ return (0);
+}
+
+static int
+del_entry(struct ip_fw_head *chainptr, u_short number)
+{
+ struct ip_fw_chain *fcp;
+ int s;
+
+ s = splnet();
+
+ fcp = chainptr->lh_first;
+ if (number != (u_short)-1) {
+ for (; fcp; fcp = fcp->chain.le_next) {
+ if (fcp->rule->fw_number == number) {
+ LIST_REMOVE(fcp, chain);
+ splx(s);
+ free(fcp->rule, M_IPFW);
+ free(fcp, M_IPFW);
+ return 0;
+ }
+ }
+ }
+
+ splx(s);
+ return (EINVAL);
+}
+
+static int
+zero_entry(struct mbuf *m)
+{
+ struct ip_fw *frwl;
+ struct ip_fw_chain *fcp;
+ int s;
+
+ if (m) {
+ if (m->m_len != sizeof(struct ip_fw))
+ return(EINVAL);
+ frwl = mtod(m, struct ip_fw *);
+ }
+ else
+ frwl = NULL;
+
+ /*
+ * It's possible to insert multiple chain entries with the
+ * same number, so we don't stop after finding the first
+ * match if zeroing a specific entry.
+ */
+ s = splnet();
+ for (fcp = ip_fw_chain.lh_first; fcp; fcp = fcp->chain.le_next)
+ if (!frwl || frwl->fw_number == fcp->rule->fw_number) {
+ fcp->rule->fw_bcnt = fcp->rule->fw_pcnt = 0;
+ fcp->rule->timestamp = 0;
+ }
+ splx(s);
+
+ if (fw_verbose) {
+ if (frwl)
+ printf("ipfw: Entry %d cleared.\n", frwl->fw_number);
+ else
+ printf("ipfw: Accounting cleared.\n");
+ }
+
+ return(0);
+}
+
+static struct ip_fw *
+check_ipfw_mbuf(struct mbuf *m)
+{
+ /* Check length */
+ if (m->m_len != sizeof(struct ip_fw)) {
+ dprintf(("%s len=%d, want %d\n", err_prefix, m->m_len,
+ (int)sizeof(struct ip_fw)));
+ return (NULL);
+ }
+ return(check_ipfw_struct(mtod(m, struct ip_fw *)));
+}
+
+static struct ip_fw *
+check_ipfw_struct(struct ip_fw *frwl)
+{
+ /* Check for invalid flag bits */
+ if ((frwl->fw_flg & ~IP_FW_F_MASK) != 0) {
+ dprintf(("%s undefined flag bits set (flags=%x)\n",
+ err_prefix, frwl->fw_flg));
+ return (NULL);
+ }
+ /* Must apply to incoming or outgoing (or both) */
+ if (!(frwl->fw_flg & (IP_FW_F_IN | IP_FW_F_OUT))) {
+ dprintf(("%s neither in nor out\n", err_prefix));
+ return (NULL);
+ }
+ /* Empty interface name is no good */
+ if (((frwl->fw_flg & IP_FW_F_IIFNAME)
+ && !*frwl->fw_in_if.fu_via_if.name)
+ || ((frwl->fw_flg & IP_FW_F_OIFNAME)
+ && !*frwl->fw_out_if.fu_via_if.name)) {
+ dprintf(("%s empty interface name\n", err_prefix));
+ return (NULL);
+ }
+ /* Sanity check interface matching */
+ if ((frwl->fw_flg & IF_FW_F_VIAHACK) == IF_FW_F_VIAHACK) {
+ ; /* allow "via" backwards compatibility */
+ } else if ((frwl->fw_flg & IP_FW_F_IN)
+ && (frwl->fw_flg & IP_FW_F_OIFACE)) {
+ dprintf(("%s outgoing interface check on incoming\n",
+ err_prefix));
+ return (NULL);
+ }
+ /* Sanity check port ranges */
+ if ((frwl->fw_flg & IP_FW_F_SRNG) && IP_FW_GETNSRCP(frwl) < 2) {
+ dprintf(("%s src range set but n_src_p=%d\n",
+ err_prefix, IP_FW_GETNSRCP(frwl)));
+ return (NULL);
+ }
+ if ((frwl->fw_flg & IP_FW_F_DRNG) && IP_FW_GETNDSTP(frwl) < 2) {
+ dprintf(("%s dst range set but n_dst_p=%d\n",
+ err_prefix, IP_FW_GETNDSTP(frwl)));
+ return (NULL);
+ }
+ if (IP_FW_GETNSRCP(frwl) + IP_FW_GETNDSTP(frwl) > IP_FW_MAX_PORTS) {
+ dprintf(("%s too many ports (%d+%d)\n",
+ err_prefix, IP_FW_GETNSRCP(frwl), IP_FW_GETNDSTP(frwl)));
+ return (NULL);
+ }
+ /*
+ * Protocols other than TCP/UDP don't use port range
+ */
+ if ((frwl->fw_prot != IPPROTO_TCP) &&
+ (frwl->fw_prot != IPPROTO_UDP) &&
+ (IP_FW_GETNSRCP(frwl) || IP_FW_GETNDSTP(frwl))) {
+ dprintf(("%s port(s) specified for non TCP/UDP rule\n",
+ err_prefix));
+ return(NULL);
+ }
+
+ /*
+ * Rather than modify the entry to make such entries work,
+ * we reject this rule and require user level utilities
+ * to enforce whatever policy they deem appropriate.
+ */
+ if ((frwl->fw_src.s_addr & (~frwl->fw_smsk.s_addr)) ||
+ (frwl->fw_dst.s_addr & (~frwl->fw_dmsk.s_addr))) {
+ dprintf(("%s rule never matches\n", err_prefix));
+ return(NULL);
+ }
+
+ if ((frwl->fw_flg & IP_FW_F_FRAG) &&
+ (frwl->fw_prot == IPPROTO_UDP || frwl->fw_prot == IPPROTO_TCP)) {
+ if (frwl->fw_nports) {
+ dprintf(("%s cannot mix 'frag' and ports\n", err_prefix));
+ return(NULL);
+ }
+ if (frwl->fw_prot == IPPROTO_TCP &&
+ frwl->fw_tcpf != frwl->fw_tcpnf) {
+ dprintf(("%s cannot mix 'frag' with TCP flags\n", err_prefix));
+ return(NULL);
+ }
+ }
+
+ /* Check command specific stuff */
+ switch (frwl->fw_flg & IP_FW_F_COMMAND)
+ {
+ case IP_FW_F_REJECT:
+ if (frwl->fw_reject_code >= 0x100
+ && !(frwl->fw_prot == IPPROTO_TCP
+ && frwl->fw_reject_code == IP_FW_REJECT_RST)) {
+ dprintf(("%s unknown reject code\n", err_prefix));
+ return(NULL);
+ }
+ break;
+ case IP_FW_F_DIVERT: /* Diverting to port zero is invalid */
+ case IP_FW_F_TEE:
+ if (frwl->fw_divert_port == 0) {
+ dprintf(("%s can't divert to port 0\n", err_prefix));
+ return (NULL);
+ }
+ break;
+ case IP_FW_F_DENY:
+ case IP_FW_F_ACCEPT:
+ case IP_FW_F_COUNT:
+ case IP_FW_F_SKIPTO:
+ break;
+ default:
+ dprintf(("%s invalid command\n", err_prefix));
+ return(NULL);
+ }
+
+ return frwl;
+}
+
+static int
+ip_fw_ctl(int stage, struct mbuf **mm)
+{
+ int error;
+ struct mbuf *m;
+
+ if (stage == IP_FW_GET) {
+ struct ip_fw_chain *fcp = ip_fw_chain.lh_first;
+ *mm = m = m_get(M_WAIT, MT_SOOPTS);
+ for (; fcp; fcp = fcp->chain.le_next) {
+ memcpy(m->m_data, fcp->rule, sizeof *(fcp->rule));
+ m->m_len = sizeof *(fcp->rule);
+ m->m_next = m_get(M_WAIT, MT_SOOPTS);
+ m = m->m_next;
+ m->m_len = 0;
+ }
+ return (0);
+ }
+ m = *mm;
+ /* only allow get calls if secure mode > 2 */
+ if (securelevel > 2) {
+ if (m) (void)m_free(m);
+ return(EPERM);
+ }
+ if (stage == IP_FW_FLUSH) {
+ while (ip_fw_chain.lh_first != NULL &&
+ ip_fw_chain.lh_first->rule->fw_number != (u_short)-1) {
+ struct ip_fw_chain *fcp = ip_fw_chain.lh_first;
+ int s = splnet();
+ LIST_REMOVE(ip_fw_chain.lh_first, chain);
+ splx(s);
+ free(fcp->rule, M_IPFW);
+ free(fcp, M_IPFW);
+ }
+ if (m) (void)m_free(m);
+ return (0);
+ }
+ if (stage == IP_FW_ZERO) {
+ error = zero_entry(m);
+ if (m) (void)m_free(m);
+ return (error);
+ }
+ if (m == NULL) {
+ printf("%s NULL mbuf ptr\n", err_prefix);
+ return (EINVAL);
+ }
+
+ if (stage == IP_FW_ADD) {
+ struct ip_fw *frwl = check_ipfw_mbuf(m);
+
+ if (!frwl)
+ error = EINVAL;
+ else
+ error = add_entry(&ip_fw_chain, frwl);
+ if (m) (void)m_free(m);
+ return error;
+ }
+ if (stage == IP_FW_DEL) {
+ if (m->m_len != sizeof(struct ip_fw)) {
+ dprintf(("%s len=%d, want %d\n", err_prefix, m->m_len,
+ (int)sizeof(struct ip_fw)));
+ error = EINVAL;
+ } else if (mtod(m, struct ip_fw *)->fw_number == (u_short)-1) {
+ dprintf(("%s can't delete rule 65535\n", err_prefix));
+ error = EINVAL;
+ } else
+ error = del_entry(&ip_fw_chain,
+ mtod(m, struct ip_fw *)->fw_number);
+ if (m) (void)m_free(m);
+ return error;
+ }
+
+ dprintf(("%s unknown request %d\n", err_prefix, stage));
+ if (m) (void)m_free(m);
+ return (EINVAL);
+}
+
+void
+ip_fw_init(void)
+{
+ struct ip_fw default_rule;
+
+ ip_fw_chk_ptr = ip_fw_chk;
+ ip_fw_ctl_ptr = ip_fw_ctl;
+ LIST_INIT(&ip_fw_chain);
+
+ bzero(&default_rule, sizeof default_rule);
+ default_rule.fw_prot = IPPROTO_IP;
+ default_rule.fw_number = (u_short)-1;
+#ifdef IPFIREWALL_DEFAULT_TO_ACCEPT
+ default_rule.fw_flg |= IP_FW_F_ACCEPT;
+#else
+ default_rule.fw_flg |= IP_FW_F_DENY;
+#endif
+ default_rule.fw_flg |= IP_FW_F_IN | IP_FW_F_OUT;
+ if (check_ipfw_struct(&default_rule) == NULL ||
+ add_entry(&ip_fw_chain, &default_rule))
+ panic(__FUNCTION__);
+
+ printf("IP packet filtering initialized, "
+#ifdef IPDIVERT
+ "divert enabled, ");
+#else
+ "divert disabled, ");
+#endif
+#ifdef IPFIREWALL_DEFAULT_TO_ACCEPT
+ printf("default to accept, ");
+#endif
+#ifndef IPFIREWALL_VERBOSE
+ printf("logging disabled\n");
+#else
+ if (fw_verbose_limit == 0)
+ printf("unlimited logging\n");
+ else
+ printf("logging limited to %d packets/entry\n",
+ fw_verbose_limit);
+#endif
+}
+
+#ifdef IPFIREWALL_MODULE
+
+#include <sys/exec.h>
+#include <sys/sysent.h>
+#include <sys/lkm.h>
+
+MOD_MISC(ipfw);
+
+static int
+ipfw_load(struct lkm_table *lkmtp, int cmd)
+{
+ int s=splnet();
+
+ old_chk_ptr = ip_fw_chk_ptr;
+ old_ctl_ptr = ip_fw_ctl_ptr;
+
+ ip_fw_init();
+ splx(s);
+ return 0;
+}
+
+static int
+ipfw_unload(struct lkm_table *lkmtp, int cmd)
+{
+ int s=splnet();
+
+ ip_fw_chk_ptr = old_chk_ptr;
+ ip_fw_ctl_ptr = old_ctl_ptr;
+
+ while (ip_fw_chain.lh_first != NULL) {
+ struct ip_fw_chain *fcp = ip_fw_chain.lh_first;
+ LIST_REMOVE(ip_fw_chain.lh_first, chain);
+ free(fcp->rule, M_IPFW);
+ free(fcp, M_IPFW);
+ }
+
+ splx(s);
+ printf("IP firewall unloaded\n");
+ return 0;
+}
+
+int
+ipfw_mod(struct lkm_table *lkmtp, int cmd, int ver)
+{
+ DISPATCH(lkmtp, cmd, ver, ipfw_load, ipfw_unload, lkm_nullcmd);
+}
+#endif
diff --git a/netinet/ip_fw.h b/netinet/ip_fw.h
new file mode 100644
index 0000000..c511154
--- /dev/null
+++ b/netinet/ip_fw.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 1993 Daniel Boulet
+ * Copyright (c) 1994 Ugen J.S.Antsilevich
+ *
+ * Redistribution and use in source forms, with and without modification,
+ * are permitted provided that this entire comment appears intact.
+ *
+ * Redistribution in binary form may occur without any restrictions.
+ * Obviously, it would be nice if you gave credit where credit is due
+ * but requiring it would be too onerous.
+ *
+ * This software is provided ``AS IS'' without any warranties of any kind.
+ */
+
+#ifndef _IP_FW_H
+#define _IP_FW_H
+
+#include <sys/queue.h> /* LIST_ENTRY */
+#include <net/if.h>
+#include <netinet/in.h> /* struct in_addr */
+
+/*
+ * This union structure identifies an interface, either explicitly
+ * by name or implicitly by IP address. The flags IP_FW_F_IIFNAME
+ * and IP_FW_F_OIFNAME say how to interpret this structure. An
+ * interface unit number of -1 matches any unit number, while an
+ * IP address of 0.0.0.0 indicates matches any interface.
+ *
+ * The receive and transmit interfaces are only compared against the
+ * the packet if the corresponding bit (IP_FW_F_IIFACE or IP_FW_F_OIFACE)
+ * is set. Note some packets lack a receive or transmit interface
+ * (in which case the missing "interface" never matches).
+ */
+
+union ip_fw_if {
+ struct in_addr fu_via_ip; /* Specified by IP address */
+ struct { /* Specified by interface name */
+#define FW_IFNLEN IFNAMSIZ
+ char name[FW_IFNLEN];
+ short unit; /* -1 means match any unit */
+ } fu_via_if;
+};
+
+/*
+ * Format of an IP firewall descriptor
+ *
+ * fw_src, fw_dst, fw_smsk, fw_dmsk are always stored in network byte order.
+ * fw_flg and fw_n*p are stored in host byte order (of course).
+ * Port numbers are stored in HOST byte order.
+ * Warning: setsockopt() will fail if sizeof(struct ip_fw) > MLEN (108)
+ */
+
+struct ip_fw {
+ u_long fw_pcnt,fw_bcnt; /* Packet and byte counters */
+ struct in_addr fw_src, fw_dst; /* Source and destination IP addr */
+ struct in_addr fw_smsk, fw_dmsk; /* Mask for src and dest IP addr */
+ u_short fw_number; /* Rule number */
+ u_short fw_flg; /* Flags word */
+#define IP_FW_MAX_PORTS 10 /* A reasonable maximum */
+ u_short fw_pts[IP_FW_MAX_PORTS]; /* Array of port numbers to match */
+ u_char fw_ipopt,fw_ipnopt; /* IP options set/unset */
+ u_char fw_tcpf,fw_tcpnf; /* TCP flags set/unset */
+#define IP_FW_ICMPTYPES_DIM (32 / (sizeof(unsigned) * 8))
+ unsigned fw_icmptypes[IP_FW_ICMPTYPES_DIM]; /* ICMP types bitmap */
+ long timestamp; /* timestamp (tv_sec) of last match */
+ union ip_fw_if fw_in_if, fw_out_if; /* Incoming and outgoing interfaces */
+ union {
+ u_short fu_divert_port; /* Divert/tee port (options IPDIVERT) */
+ u_short fu_skipto_rule; /* SKIPTO command rule number */
+ u_short fu_reject_code; /* REJECT response code */
+ } fw_un;
+ u_char fw_prot; /* IP protocol */
+ u_char fw_nports; /* N'of src ports and # of dst ports */
+ /* in ports array (dst ports follow */
+ /* src ports; max of 10 ports in all; */
+ /* count of 0 means match all ports) */
+};
+
+#define IP_FW_GETNSRCP(rule) ((rule)->fw_nports & 0x0f)
+#define IP_FW_SETNSRCP(rule, n) do { \
+ (rule)->fw_nports &= ~0x0f; \
+ (rule)->fw_nports |= (n); \
+ } while (0)
+#define IP_FW_GETNDSTP(rule) ((rule)->fw_nports >> 4)
+#define IP_FW_SETNDSTP(rule, n) do { \
+ (rule)->fw_nports &= ~0xf0; \
+ (rule)->fw_nports |= (n) << 4;\
+ } while (0)
+
+#define fw_divert_port fw_un.fu_divert_port
+#define fw_skipto_rule fw_un.fu_skipto_rule
+#define fw_reject_code fw_un.fu_reject_code
+
+struct ip_fw_chain {
+ LIST_ENTRY(ip_fw_chain) chain;
+ struct ip_fw *rule;
+};
+
+/*
+ * Values for "flags" field .
+ */
+#define IP_FW_F_IN 0x0001 /* Check inbound packets */
+#define IP_FW_F_OUT 0x0002 /* Check outbound packets */
+#define IP_FW_F_IIFACE 0x0004 /* Apply inbound interface test */
+#define IP_FW_F_OIFACE 0x0008 /* Apply outbound interface test */
+
+#define IP_FW_F_COMMAND 0x0070 /* Mask for type of chain entry: */
+#define IP_FW_F_DENY 0x0000 /* This is a deny rule */
+#define IP_FW_F_REJECT 0x0010 /* Deny and send a response packet */
+#define IP_FW_F_ACCEPT 0x0020 /* This is an accept rule */
+#define IP_FW_F_COUNT 0x0030 /* This is a count rule */
+#define IP_FW_F_DIVERT 0x0040 /* This is a divert rule */
+#define IP_FW_F_TEE 0x0050 /* This is a tee rule */
+#define IP_FW_F_SKIPTO 0x0060 /* This is a skipto rule */
+
+#define IP_FW_F_PRN 0x0080 /* Print if this rule matches */
+
+#define IP_FW_F_SRNG 0x0100 /* The first two src ports are a min *
+ * and max range (stored in host byte *
+ * order). */
+
+#define IP_FW_F_DRNG 0x0200 /* The first two dst ports are a min *
+ * and max range (stored in host byte *
+ * order). */
+
+#define IP_FW_F_IIFNAME 0x0400 /* In interface by name/unit (not IP) */
+#define IP_FW_F_OIFNAME 0x0800 /* Out interface by name/unit (not IP) */
+
+#define IP_FW_F_INVSRC 0x1000 /* Invert sense of src check */
+#define IP_FW_F_INVDST 0x2000 /* Invert sense of dst check */
+
+#define IP_FW_F_FRAG 0x4000 /* Fragment */
+
+#define IP_FW_F_ICMPBIT 0x8000 /* ICMP type bitmap is valid */
+
+#define IP_FW_F_MASK 0xFFFF /* All possible flag bits mask */
+
+/*
+ * For backwards compatibility with rules specifying "via iface" but
+ * not restricted to only "in" or "out" packets, we define this combination
+ * of bits to represent this configuration.
+ */
+
+#define IF_FW_F_VIAHACK (IP_FW_F_IN|IP_FW_F_OUT|IP_FW_F_IIFACE|IP_FW_F_OIFACE)
+
+/*
+ * Definitions for REJECT response codes.
+ * Values less than 256 correspond to ICMP unreachable codes.
+ */
+#define IP_FW_REJECT_RST 0x0100 /* TCP packets: send RST */
+
+/*
+ * Definitions for IP option names.
+ */
+#define IP_FW_IPOPT_LSRR 0x01
+#define IP_FW_IPOPT_SSRR 0x02
+#define IP_FW_IPOPT_RR 0x04
+#define IP_FW_IPOPT_TS 0x08
+
+/*
+ * Definitions for TCP flags.
+ */
+#define IP_FW_TCPF_FIN TH_FIN
+#define IP_FW_TCPF_SYN TH_SYN
+#define IP_FW_TCPF_RST TH_RST
+#define IP_FW_TCPF_PSH TH_PUSH
+#define IP_FW_TCPF_ACK TH_ACK
+#define IP_FW_TCPF_URG TH_URG
+#define IP_FW_TCPF_ESTAB 0x40
+
+/*
+ * Main firewall chains definitions and global var's definitions.
+ */
+#ifdef _KERNEL
+
+/*
+ * Function definitions.
+ */
+void ip_fw_init(void);
+
+#endif /* _KERNEL */
+
+#endif /* _IP_FW_H */
diff --git a/netinet/ip_icmp.c b/netinet/ip_icmp.c
new file mode 100644
index 0000000..e1c7518
--- /dev/null
+++ b/netinet/ip_icmp.c
@@ -0,0 +1,771 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94
+ * $FreeBSD: src/sys/netinet/ip_icmp.c,v 1.101 2005/05/04 13:23:54 andre Exp $
+ */
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/route.h>
+
+#define _IP_VHL
+#include <netinet/in.h>
+#include <rtems/rtems_netinet_in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#include <netinet/icmp_var.h>
+
+#ifdef IPSEC
+#include <netinet6/ipsec.h>
+#include <netkey/key.h>
+#endif
+
+#ifdef FAST_IPSEC
+#include <netipsec/ipsec.h>
+#include <netipsec/key.h>
+#define IPSEC
+#endif
+
+#include <machine/in_cksum.h>
+
+/*
+ * ICMP routines: error generation, receive packet processing, and
+ * routines to turnaround packets back to the originator, and
+ * host table maintenance routines.
+ */
+
+ struct icmpstat icmpstat;
+SYSCTL_STRUCT(_net_inet_icmp, ICMPCTL_STATS, stats, CTLFLAG_RD,
+ &icmpstat, icmpstat, "");
+
+static int icmpmaskrepl = 0;
+SYSCTL_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_RW,
+ &icmpmaskrepl, 0, "");
+
+static int icmpbmcastecho = 1;
+SYSCTL_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_RW, &icmpbmcastecho,
+ 0, "");
+
+static int icmpallecho = 1;
+SYSCTL_INT(_net_inet_icmp, OID_AUTO, allecho, CTLFLAG_RW, &icmpallecho,
+ 0, "");
+
+#ifdef ICMPPRINTFS
+int icmpprintfs = 0;
+#endif
+
+static void icmp_reflect(struct mbuf *);
+static void icmp_send(struct mbuf *, struct mbuf *);
+
+extern struct protosw inetsw[];
+unsigned int icmplenPanicAvoided;
+
+/*
+ * Generate an error packet of type error
+ * in response to bad packet ip.
+ */
+void
+icmp_error(struct mbuf *n, int type, int code, n_long dest,
+ struct ifnet *destifp)
+{
+ register struct ip *oip = mtod(n, struct ip *), *nip;
+#ifdef _IP_VHL
+ register unsigned oiplen = IP_VHL_HL(oip->ip_vhl) << 2;
+#else
+ register unsigned oiplen = oip->ip_hl << 2;
+#endif
+ register struct icmp *icp;
+ register struct mbuf *m;
+ unsigned icmplen;
+
+#ifdef ICMPPRINTFS
+ if (icmpprintfs)
+ printf("icmp_error(%p, %x, %d)\n", oip, type, code);
+#endif
+ if (type != ICMP_REDIRECT)
+ icmpstat.icps_error++;
+ /*
+ * Don't send error if not the first fragment of message.
+ * Don't error if the old packet protocol was ICMP
+ * error message, only known informational types.
+ */
+ if (oip->ip_off &~ (IP_MF|IP_DF))
+ goto freeit;
+ if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
+ n->m_len >= oiplen + ICMP_MINLEN &&
+ !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
+ icmpstat.icps_oldicmp++;
+ goto freeit;
+ }
+ /* Don't send error in response to a multicast or broadcast packet */
+ if (n->m_flags & (M_BCAST|M_MCAST))
+ goto freeit;
+ /* Don't send error in response to malicious packet */
+ icmplen = min(oiplen + 8, oip->ip_len);
+ if (icmplen < sizeof(struct ip)) {
+ icmplenPanicAvoided++;
+ goto freeit;
+ }
+
+ /*
+ * First, formulate icmp message
+ */
+ m = m_gethdr(M_DONTWAIT, MT_HEADER);
+ if (m == NULL)
+ goto freeit;
+#ifdef MAC
+ mac_create_mbuf_netlayer(n, m);
+#endif
+ m->m_len = icmplen + ICMP_MINLEN;
+ MH_ALIGN(m, m->m_len);
+ icp = mtod(m, struct icmp *);
+ if ((u_int)type > ICMP_MAXTYPE)
+ panic("icmp_error");
+ icmpstat.icps_outhist[type]++;
+ icp->icmp_type = type;
+ if (type == ICMP_REDIRECT)
+ icp->icmp_gwaddr.s_addr = dest;
+ else {
+ icp->icmp_void = 0;
+ /*
+ * The following assignments assume an overlay with the
+ * zeroed icmp_void field.
+ */
+ if (type == ICMP_PARAMPROB) {
+ icp->icmp_pptr = code;
+ code = 0;
+ } else if (type == ICMP_UNREACH &&
+ code == ICMP_UNREACH_NEEDFRAG && destifp) {
+ icp->icmp_nextmtu = htons(destifp->if_mtu);
+ }
+ }
+
+ icp->icmp_code = code;
+ bcopy((caddr_t)oip, (caddr_t)&icp->icmp_ip, icmplen);
+ nip = &icp->icmp_ip;
+ nip->ip_len = htons((u_short)(nip->ip_len + oiplen));
+
+ /*
+ * Now, copy old ip header (without options)
+ * in front of icmp message.
+ */
+ if (m->m_data - sizeof(struct ip) < m->m_pktdat)
+ panic("icmp len");
+ m->m_data -= sizeof(struct ip);
+ m->m_len += sizeof(struct ip);
+ m->m_pkthdr.len = m->m_len;
+ m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
+ nip = mtod(m, struct ip *);
+ bcopy((caddr_t)oip, (caddr_t)nip, sizeof(struct ip));
+ nip->ip_len = m->m_len;
+#ifdef _IP_VHL
+ nip->ip_vhl = IP_VHL_BORING;
+#else
+ nip->ip_v = IPVERSION;
+ nip->ip_hl = 5;
+#endif
+ nip->ip_p = IPPROTO_ICMP;
+ nip->ip_tos = 0;
+ icmp_reflect(m);
+
+freeit:
+ m_freem(n);
+}
+
+static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET, 0, {0}, {0} };
+static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET, 0, {0}, {0} };
+static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET, 0, {0}, {0} };
+
+/*
+ * Process a received ICMP message.
+ */
+void
+icmp_input(struct mbuf *m, int off)
+{
+ struct icmp *icp;
+ struct in_ifaddr *ia;
+ struct ip *ip = mtod(m, struct ip *);
+ int hlen = off;
+ int icmplen = ip->ip_len;
+ int i, code;
+ void (*ctlfunc)(int, struct sockaddr *, void *);
+
+ /*
+ * Locate icmp structure in mbuf, and check
+ * that not corrupted and of at least minimum length.
+ */
+#ifdef ICMPPRINTFS
+ if (icmpprintfs) {
+ char buf[4 * sizeof "123"];
+ strcpy(buf, inet_ntoa(ip->ip_src));
+ printf("icmp_input from %s to %s, len %d\n",
+ buf, inet_ntoa(ip->ip_dst), icmplen);
+ }
+#endif
+ if (icmplen < ICMP_MINLEN) {
+ icmpstat.icps_tooshort++;
+ goto freeit;
+ }
+ i = hlen + min(icmplen, ICMP_ADVLENMIN);
+ if (m->m_len < i && (m = m_pullup(m, i)) == NULL) {
+ icmpstat.icps_tooshort++;
+ return;
+ }
+ ip = mtod(m, struct ip *);
+ m->m_len -= hlen;
+ m->m_data += hlen;
+ icp = mtod(m, struct icmp *);
+ if (in_cksum(m, icmplen)) {
+ icmpstat.icps_checksum++;
+ goto freeit;
+ }
+ m->m_len += hlen;
+ m->m_data -= hlen;
+
+#ifdef ICMPPRINTFS
+ if (icmpprintfs)
+ printf("icmp_input, type %d code %d\n", icp->icmp_type,
+ icp->icmp_code);
+#endif
+
+ /*
+ * Message type specific processing.
+ */
+ if (icp->icmp_type > ICMP_MAXTYPE)
+ goto raw;
+ icmpstat.icps_inhist[icp->icmp_type]++;
+ code = icp->icmp_code;
+ switch (icp->icmp_type) {
+
+ case ICMP_UNREACH:
+ switch (code) {
+ case ICMP_UNREACH_NET:
+ case ICMP_UNREACH_HOST:
+ case ICMP_UNREACH_PROTOCOL:
+ case ICMP_UNREACH_PORT:
+ case ICMP_UNREACH_SRCFAIL:
+ code = PRC_UNREACH_NET;
+ break;
+
+ case ICMP_UNREACH_NEEDFRAG:
+ code = PRC_MSGSIZE;
+ break;
+
+ case ICMP_UNREACH_NET_UNKNOWN:
+ case ICMP_UNREACH_NET_PROHIB:
+ case ICMP_UNREACH_TOSNET:
+ code = PRC_UNREACH_NET;
+ break;
+
+ case ICMP_UNREACH_HOST_UNKNOWN:
+ case ICMP_UNREACH_ISOLATED:
+ case ICMP_UNREACH_HOST_PROHIB:
+ case ICMP_UNREACH_TOSHOST:
+ code = PRC_UNREACH_HOST;
+ break;
+
+ case ICMP_UNREACH_FILTER_PROHIB:
+ case ICMP_UNREACH_HOST_PRECEDENCE:
+ case ICMP_UNREACH_PRECEDENCE_CUTOFF:
+ code = PRC_UNREACH_PORT;
+ break;
+
+ default:
+ goto badcode;
+ }
+ goto deliver;
+
+ case ICMP_TIMXCEED:
+ if (code > 1)
+ goto badcode;
+ code += PRC_TIMXCEED_INTRANS;
+ goto deliver;
+
+ case ICMP_PARAMPROB:
+ if (code > 1)
+ goto badcode;
+ code = PRC_PARAMPROB;
+ goto deliver;
+
+ case ICMP_SOURCEQUENCH:
+ if (code)
+ goto badcode;
+ code = PRC_QUENCH;
+ deliver:
+ /*
+ * Problem with datagram; advise higher level routines.
+ */
+ if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
+#ifdef _IP_VHL
+ IP_VHL_HL(icp->icmp_ip.ip_vhl) < (sizeof(struct ip) >> 2)) {
+#else
+ icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
+#endif
+ icmpstat.icps_badlen++;
+ goto freeit;
+ }
+ NTOHS(icp->icmp_ip.ip_len);
+ /* Discard ICMP's in response to multicast packets */
+ if (IN_MULTICAST(ntohl(icp->icmp_ip.ip_dst.s_addr)))
+ goto badcode;
+#ifdef ICMPPRINTFS
+ if (icmpprintfs)
+ printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
+#endif
+ icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
+#if 1
+ /*
+ * MTU discovery:
+ * If we got a needfrag and there is a host route to the
+ * original destination, and the MTU is not locked, then
+ * set the MTU in the route to the suggested new value
+ * (if given) and then notify as usual. The ULPs will
+ * notice that the MTU has changed and adapt accordingly.
+ * If no new MTU was suggested, then we guess a new one
+ * less than the current value. If the new MTU is
+ * unreasonably small (arbitrarily set at 296), then
+ * we reset the MTU to the interface value and enable the
+ * lock bit, indicating that we are no longer doing MTU
+ * discovery.
+ */
+ if (code == PRC_MSGSIZE) {
+ struct rtentry *rt;
+ int mtu;
+
+ rt = rtalloc1((struct sockaddr *)&icmpsrc, 0,
+ RTF_CLONING | RTF_PRCLONING);
+ if (rt && (rt->rt_flags & RTF_HOST)
+ && !(rt->rt_rmx.rmx_locks & RTV_MTU)) {
+ mtu = ntohs(icp->icmp_nextmtu);
+ if (!mtu)
+ mtu = ip_next_mtu(rt->rt_rmx.rmx_mtu,
+ 1);
+#ifdef DEBUG_MTUDISC
+ printf("MTU for %s reduced to %d\n",
+ inet_ntoa(icmpsrc.sin_addr), mtu);
+#endif
+ if (mtu < 296) {
+ /* rt->rt_rmx.rmx_mtu =
+ rt->rt_ifp->if_mtu; */
+ rt->rt_rmx.rmx_locks |= RTV_MTU;
+ } else if (rt->rt_rmx.rmx_mtu > mtu) {
+ rt->rt_rmx.rmx_mtu = mtu;
+ }
+ }
+ if (rt)
+ RTFREE(rt);
+ }
+
+#endif
+ ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
+ if (ctlfunc)
+ (*ctlfunc)(code, (struct sockaddr *)&icmpsrc,
+ (void *)&icp->icmp_ip);
+ break;
+
+ badcode:
+ icmpstat.icps_badcode++;
+ break;
+
+ case ICMP_ECHO:
+ if (!icmpallecho) {
+ icmpstat.icps_allecho++;
+ break;
+ }
+ if (!icmpbmcastecho
+ && (m->m_flags & (M_MCAST | M_BCAST)) != 0
+ && IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
+ icmpstat.icps_bmcastecho++;
+ break;
+ }
+ icp->icmp_type = ICMP_ECHOREPLY;
+ goto reflect;
+
+ case ICMP_TSTAMP:
+ if (!icmpbmcastecho
+ && (m->m_flags & (M_MCAST | M_BCAST)) != 0
+ && IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
+ icmpstat.icps_bmcasttstamp++;
+ break;
+ }
+ if (icmplen < ICMP_TSLEN) {
+ icmpstat.icps_badlen++;
+ break;
+ }
+ icp->icmp_type = ICMP_TSTAMPREPLY;
+ icp->icmp_rtime = iptime();
+ icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */
+ goto reflect;
+
+ case ICMP_MASKREQ:
+#define satosin(sa) ((struct sockaddr_in *)(sa))
+ if (icmpmaskrepl == 0)
+ break;
+ /*
+ * We are not able to respond with all ones broadcast
+ * unless we receive it over a point-to-point interface.
+ */
+ if (icmplen < ICMP_MASKLEN)
+ break;
+ switch (ip->ip_dst.s_addr) {
+
+ case INADDR_BROADCAST:
+ case INADDR_ANY:
+ icmpdst.sin_addr = ip->ip_src;
+ break;
+
+ default:
+ icmpdst.sin_addr = ip->ip_dst;
+ }
+ ia = (struct in_ifaddr *)ifaof_ifpforaddr(
+ (struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif);
+ if (ia == NULL)
+ break;
+ if (ia->ia_ifp == NULL) {
+ break;
+ }
+ icp->icmp_type = ICMP_MASKREPLY;
+ icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
+ if (ip->ip_src.s_addr == 0) {
+ if (ia->ia_ifp->if_flags & IFF_BROADCAST)
+ ip->ip_src = satosin(&ia->ia_broadaddr)->sin_addr;
+ else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
+ ip->ip_src = satosin(&ia->ia_dstaddr)->sin_addr;
+ }
+reflect:
+ ip->ip_len += hlen; /* since ip_input deducts this */
+ icmpstat.icps_reflect++;
+ icmpstat.icps_outhist[icp->icmp_type]++;
+ icmp_reflect(m);
+ return;
+
+ case ICMP_REDIRECT:
+ if (code > 3)
+ goto badcode;
+ if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
+#ifdef _IP_VHL
+ IP_VHL_HL(icp->icmp_ip.ip_vhl) < (sizeof(struct ip) >> 2)) {
+#else
+ icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
+#endif
+ icmpstat.icps_badlen++;
+ break;
+ }
+ /*
+ * Short circuit routing redirects to force
+ * immediate change in the kernel's routing
+ * tables. The message is also handed to anyone
+ * listening on a raw socket (e.g. the routing
+ * daemon for use in updating its tables).
+ */
+ icmpgw.sin_addr = ip->ip_src;
+ icmpdst.sin_addr = icp->icmp_gwaddr;
+#ifdef ICMPPRINTFS
+ if (icmpprintfs) {
+ char buf[4 * sizeof "123"];
+ strcpy(buf, inet_ntoa(icp->icmp_ip.ip_dst));
+
+ printf("redirect dst %s to %s\n",
+ buf, inet_ntoa(icp->icmp_gwaddr));
+ }
+#endif
+ icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
+ rtredirect((struct sockaddr *)&icmpsrc,
+ (struct sockaddr *)&icmpdst,
+ (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
+ (struct sockaddr *)&icmpgw, (struct rtentry **)0);
+ pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&icmpsrc);
+ break;
+
+ /*
+ * No kernel processing for the following;
+ * just fall through to send to raw listener.
+ */
+ case ICMP_ECHOREPLY:
+ case ICMP_ROUTERADVERT:
+ case ICMP_ROUTERSOLICIT:
+ case ICMP_TSTAMPREPLY:
+ case ICMP_IREQREPLY:
+ case ICMP_MASKREPLY:
+ default:
+ break;
+ }
+
+raw:
+ rip_input(m, hlen);
+ return;
+
+freeit:
+ m_freem(m);
+}
+
+/*
+ * Reflect the ip packet back to the source
+ */
+static void
+icmp_reflect(struct mbuf *m)
+{
+ struct ip *ip = mtod(m, struct ip *);
+ struct in_ifaddr *ia;
+ struct in_addr t;
+ struct mbuf *opts = 0;
+#ifdef _IP_VHL
+ int optlen = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof(struct ip);
+#else
+ int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
+#endif
+ if (!in_canforward(ip->ip_src) &&
+ ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) !=
+ (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
+ m_freem(m); /* Bad return address */
+ goto done; /* Ip_output() will check for broadcast */
+ }
+ t = ip->ip_dst;
+ ip->ip_dst = ip->ip_src;
+ /*
+ * If the incoming packet was addressed directly to us,
+ * use dst as the src for the reply. Otherwise (broadcast
+ * or anonymous), use the address which corresponds
+ * to the incoming interface.
+ */
+ for (ia = in_ifaddr; ia; ia = ia->ia_next) {
+ if (t.s_addr == IA_SIN(ia)->sin_addr.s_addr)
+ break;
+ if (ia->ia_ifp && (ia->ia_ifp->if_flags & IFF_BROADCAST) &&
+ t.s_addr == satosin(&ia->ia_broadaddr)->sin_addr.s_addr)
+ break;
+ }
+ icmpdst.sin_addr = t;
+ if ((ia == (struct in_ifaddr *)0) && m->m_pkthdr.rcvif)
+ ia = (struct in_ifaddr *)ifaof_ifpforaddr(
+ (struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif);
+ /*
+ * The following happens if the packet was not addressed to us,
+ * and was received on an interface with no IP address.
+ */
+ if (ia == (struct in_ifaddr *)0)
+ ia = in_ifaddr;
+ t = IA_SIN(ia)->sin_addr;
+ ip->ip_src = t;
+ ip->ip_ttl = MAXTTL;
+
+ if (optlen > 0) {
+ register u_char *cp;
+ int opt, cnt;
+ u_int len;
+
+ /*
+ * Retrieve any source routing from the incoming packet;
+ * add on any record-route or timestamp options.
+ */
+ cp = (u_char *) (ip + 1);
+ if ((opts = ip_srcroute()) == 0 &&
+ (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
+ opts->m_len = sizeof(struct in_addr);
+ mtod(opts, struct in_addr *)->s_addr = 0;
+ }
+ if (opts) {
+#ifdef ICMPPRINTFS
+ if (icmpprintfs)
+ printf("icmp_reflect optlen %d rt %d => ",
+ optlen, opts->m_len);
+#endif
+ for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
+ opt = cp[IPOPT_OPTVAL];
+ if (opt == IPOPT_EOL)
+ break;
+ if (opt == IPOPT_NOP)
+ len = 1;
+ else {
+ if (cnt < IPOPT_OLEN + sizeof(*cp))
+ break;
+ len = cp[IPOPT_OLEN];
+ if (len < IPOPT_OLEN + sizeof(*cp) ||
+ len > cnt)
+ break;
+ }
+ /*
+ * Should check for overflow, but it "can't happen"
+ */
+ if (opt == IPOPT_RR || opt == IPOPT_TS ||
+ opt == IPOPT_SECURITY) {
+ bcopy((caddr_t)cp,
+ mtod(opts, caddr_t) + opts->m_len, len);
+ opts->m_len += len;
+ }
+ }
+ /* Terminate & pad, if necessary */
+ cnt = opts->m_len % 4;
+ if (cnt) {
+ for (; cnt < 4; cnt++) {
+ *(mtod(opts, caddr_t) + opts->m_len) =
+ IPOPT_EOL;
+ opts->m_len++;
+ }
+ }
+#ifdef ICMPPRINTFS
+ if (icmpprintfs)
+ printf("%d\n", opts->m_len);
+#endif
+ }
+ /*
+ * Now strip out original options by copying rest of first
+ * mbuf's data back, and adjust the IP length.
+ */
+ ip->ip_len -= optlen;
+#ifdef _IP_VHL
+ ip->ip_vhl = IP_VHL_BORING;
+#else
+ ip->ip_v = IPVERSION;
+ ip->ip_hl = 5;
+#endif
+ m->m_len -= optlen;
+ if (m->m_flags & M_PKTHDR)
+ m->m_pkthdr.len -= optlen;
+ optlen += sizeof(struct ip);
+ bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
+ (unsigned)(m->m_len - sizeof(struct ip)));
+ }
+ m->m_flags &= ~(M_BCAST|M_MCAST);
+ icmp_send(m, opts);
+done:
+ if (opts)
+ (void)m_free(opts);
+}
+
+/*
+ * Send an icmp packet back to the ip level,
+ * after supplying a checksum.
+ */
+static void
+icmp_send(struct mbuf *m, struct mbuf *opts)
+{
+ register struct ip *ip = mtod(m, struct ip *);
+ register int hlen;
+ register struct icmp *icp;
+ struct route ro;
+
+#ifdef _IP_VHL
+ hlen = IP_VHL_HL(ip->ip_vhl) << 2;
+#else
+ hlen = ip->ip_hl << 2;
+#endif
+ m->m_data += hlen;
+ m->m_len -= hlen;
+ icp = mtod(m, struct icmp *);
+ icp->icmp_cksum = 0;
+ icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
+ m->m_data -= hlen;
+ m->m_len += hlen;
+#ifdef ICMPPRINTFS
+ if (icmpprintfs) {
+ char buf[4 * sizeof "123"];
+ strcpy(buf, inet_ntoa(ip->ip_dst));
+ printf("icmp_send dst %s src %s\n",
+ buf, inet_ntoa(ip->ip_src));
+ }
+#endif
+ bzero(&ro, sizeof ro);
+ (void) ip_output(m, opts, &ro, 0, NULL);
+ if (ro.ro_rt)
+ RTFREE(ro.ro_rt);
+}
+
+/*
+ * Return milliseconds since 00:00 GMT in network format.
+ */
+uint32_t
+iptime(void)
+{
+ struct timeval atv;
+ u_long t;
+
+ microtime(&atv);
+ t = (atv.tv_sec % (24L*60L*60L)) * 1000L + atv.tv_usec / 1000L;
+ return (htonl(t));
+}
+
+/*
+ * Return the next larger or smaller MTU plateau (table from RFC 1191)
+ * given current value MTU. If DIR is less than zero, a larger plateau
+ * is returned; otherwise, a smaller value is returned.
+ */
+int
+ip_next_mtu(int mtu, int dir)
+{
+ static int mtutab[] = {
+ 65535, 32000, 17914, 8166, 4352, 2002, 1492, 1280, 1006, 508,
+ 296, 68, 0
+ };
+ int i;
+
+ for (i = 0; i < (sizeof mtutab) / (sizeof mtutab[0]); i++) {
+ if (mtu >= mtutab[i])
+ break;
+ }
+
+ if (dir < 0) {
+ if (i == 0) {
+ return 0;
+ } else {
+ return mtutab[i - 1];
+ }
+ } else {
+ if (mtutab[i] == 0) {
+ return 0;
+ } else if(mtu > mtutab[i]) {
+ return mtutab[i];
+ } else {
+ return mtutab[i + 1];
+ }
+ }
+}
diff --git a/netinet/ip_icmp.h b/netinet/ip_icmp.h
new file mode 100644
index 0000000..2e66a5f
--- /dev/null
+++ b/netinet/ip_icmp.h
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ip_icmp.h 8.1 (Berkeley) 6/10/93
+ * $FreeBSD: src/sys/netinet/ip_icmp.h,v 1.26 2005/05/04 13:09:19 andre Exp $
+ */
+
+
+#ifndef _NETINET_IP_ICMP_H_
+#define _NETINET_IP_ICMP_H_
+
+#include <netinet/in.h> /* struct in_addr */
+#include <netinet/ip.h> /* struct ip */
+
+/*
+ * Interface Control Message Protocol Definitions.
+ * Per RFC 792, September 1981.
+ */
+
+/*
+ * Internal of an ICMP Router Advertisement
+ */
+struct icmp_ra_addr {
+ u_int32_t ira_addr;
+ u_int32_t ira_preference;
+};
+
+/*
+ * Structure of an icmp header.
+ */
+struct icmp {
+ u_char icmp_type; /* type of message, see below */
+ u_char icmp_code; /* type sub code */
+ u_short icmp_cksum; /* ones complement cksum of struct */
+ union {
+ u_char ih_pptr; /* ICMP_PARAMPROB */
+ struct in_addr ih_gwaddr; /* ICMP_REDIRECT */
+ struct ih_idseq {
+ uint16_t icd_id; /* network format */
+ uint16_t icd_seq; /* network format */
+ } ih_idseq;
+ int ih_void;
+
+ /* ICMP_UNREACH_NEEDFRAG -- Path MTU Discovery (RFC1191) */
+ struct ih_pmtu {
+ uint16_t ipm_void; /* network format */
+ uint16_t ipm_nextmtu; /* network format */
+ } ih_pmtu;
+
+ struct ih_rtradv {
+ u_char irt_num_addrs;
+ u_char irt_wpa;
+ u_int16_t irt_lifetime;
+ } ih_rtradv;
+ } icmp_hun;
+#define icmp_pptr icmp_hun.ih_pptr
+#define icmp_gwaddr icmp_hun.ih_gwaddr
+#define icmp_id icmp_hun.ih_idseq.icd_id
+#define icmp_seq icmp_hun.ih_idseq.icd_seq
+#define icmp_void icmp_hun.ih_void
+#define icmp_pmvoid icmp_hun.ih_pmtu.ipm_void
+#define icmp_nextmtu icmp_hun.ih_pmtu.ipm_nextmtu
+#define icmp_num_addrs icmp_hun.ih_rtradv.irt_num_addrs
+#define icmp_wpa icmp_hun.ih_rtradv.irt_wpa
+#define icmp_lifetime icmp_hun.ih_rtradv.irt_lifetime
+ union {
+ struct id_ts { /* ICMP Timestamp */
+ /*
+ * The next 3 fields are in network format,
+ * milliseconds since 00:00 GMT
+ */
+ uint32_t its_otime; /* Originate */
+ uint32_t its_rtime; /* Receive */
+ uint32_t its_ttime; /* Transmit */
+ } id_ts;
+ struct id_ip {
+ struct ip idi_ip;
+ /* options and then 64 bits of data */
+ } id_ip;
+ struct icmp_ra_addr id_radv;
+ u_int32_t id_mask;
+ char id_data[1];
+ } icmp_dun;
+#define icmp_otime icmp_dun.id_ts.its_otime
+#define icmp_rtime icmp_dun.id_ts.its_rtime
+#define icmp_ttime icmp_dun.id_ts.its_ttime
+#define icmp_ip icmp_dun.id_ip.idi_ip
+#define icmp_radv icmp_dun.id_radv
+#define icmp_mask icmp_dun.id_mask
+#define icmp_data icmp_dun.id_data
+};
+
+/*
+ * Lower bounds on packet lengths for various types.
+ * For the error advice packets must first insure that the
+ * packet is large enough to contain the returned ip header.
+ * Only then can we do the check to see if 64 bits of packet
+ * data have been returned, since we need to check the returned
+ * ip header length.
+ */
+#define ICMP_MINLEN 8 /* abs minimum */
+#define ICMP_TSLEN (8 + 3 * sizeof (uint32_t)) /* timestamp */
+#define ICMP_MASKLEN 12 /* address mask */
+#define ICMP_ADVLENMIN (8 + sizeof (struct ip) + 8) /* min */
+#ifndef _IP_VHL
+#define ICMP_ADVLEN(p) (8 + ((p)->icmp_ip.ip_hl << 2) + 8)
+ /* N.B.: must separately check that ip_hl >= 5 */
+#else
+#define ICMP_ADVLEN(p) (8 + (IP_VHL_HL((p)->icmp_ip.ip_vhl) << 2) + 8)
+ /* N.B.: must separately check that header length >= 5 */
+#endif
+
+/*
+ * Definition of type and code field values.
+ */
+#define ICMP_ECHOREPLY 0 /* echo reply */
+#define ICMP_UNREACH 3 /* dest unreachable, codes: */
+#define ICMP_UNREACH_NET 0 /* bad net */
+#define ICMP_UNREACH_HOST 1 /* bad host */
+#define ICMP_UNREACH_PROTOCOL 2 /* bad protocol */
+#define ICMP_UNREACH_PORT 3 /* bad port */
+#define ICMP_UNREACH_NEEDFRAG 4 /* IP_DF caused drop */
+#define ICMP_UNREACH_SRCFAIL 5 /* src route failed */
+#define ICMP_UNREACH_NET_UNKNOWN 6 /* unknown net */
+#define ICMP_UNREACH_HOST_UNKNOWN 7 /* unknown host */
+#define ICMP_UNREACH_ISOLATED 8 /* src host isolated */
+#define ICMP_UNREACH_NET_PROHIB 9 /* prohibited access */
+#define ICMP_UNREACH_HOST_PROHIB 10 /* ditto */
+#define ICMP_UNREACH_TOSNET 11 /* bad tos for net */
+#define ICMP_UNREACH_TOSHOST 12 /* bad tos for host */
+#define ICMP_UNREACH_FILTER_PROHIB 13 /* admin prohib */
+#define ICMP_UNREACH_HOST_PRECEDENCE 14 /* host prec vio. */
+#define ICMP_UNREACH_PRECEDENCE_CUTOFF 15 /* prec cutoff */
+#define ICMP_SOURCEQUENCH 4 /* packet lost, slow down */
+#define ICMP_REDIRECT 5 /* shorter route, codes: */
+#define ICMP_REDIRECT_NET 0 /* for network */
+#define ICMP_REDIRECT_HOST 1 /* for host */
+#define ICMP_REDIRECT_TOSNET 2 /* for tos and net */
+#define ICMP_REDIRECT_TOSHOST 3 /* for tos and host */
+#define ICMP_ALTHOSTADDR 6 /* alternate host address */
+#define ICMP_ECHO 8 /* echo service */
+#define ICMP_ROUTERADVERT 9 /* router advertisement */
+#define ICMP_ROUTERADVERT_NORMAL 0 /* normal advertisement */
+#define ICMP_ROUTERADVERT_NOROUTE_COMMON 16 /* selective routing */
+#define ICMP_ROUTERSOLICIT 10 /* router solicitation */
+#define ICMP_TIMXCEED 11 /* time exceeded, code: */
+#define ICMP_TIMXCEED_INTRANS 0 /* ttl==0 in transit */
+#define ICMP_TIMXCEED_REASS 1 /* ttl==0 in reass */
+#define ICMP_PARAMPROB 12 /* ip header bad */
+#define ICMP_PARAMPROB_ERRATPTR 0 /* error at param ptr */
+#define ICMP_PARAMPROB_OPTABSENT 1 /* req. opt. absent */
+#define ICMP_PARAMPROB_LENGTH 2 /* bad length */
+#define ICMP_TSTAMP 13 /* timestamp request */
+#define ICMP_TSTAMPREPLY 14 /* timestamp reply */
+#define ICMP_IREQ 15 /* information request */
+#define ICMP_IREQREPLY 16 /* information reply */
+#define ICMP_MASKREQ 17 /* address mask request */
+#define ICMP_MASKREPLY 18 /* address mask reply */
+#define ICMP_TRACEROUTE 30 /* traceroute */
+#define ICMP_DATACONVERR 31 /* data conversion error */
+#define ICMP_MOBILE_REDIRECT 32 /* mobile host redirect */
+#define ICMP_IPV6_WHEREAREYOU 33 /* IPv6 where-are-you */
+#define ICMP_IPV6_IAMHERE 34 /* IPv6 i-am-here */
+#define ICMP_MOBILE_REGREQUEST 35 /* mobile registration req */
+#define ICMP_MOBILE_REGREPLY 36 /* mobile registration reply */
+#define ICMP_SKIP 39 /* SKIP */
+#define ICMP_PHOTURIS 40 /* Photuris */
+#define ICMP_PHOTURIS_UNKNOWN_INDEX 1 /* unknown sec index */
+#define ICMP_PHOTURIS_AUTH_FAILED 2 /* auth failed */
+#define ICMP_PHOTURIS_DECRYPT_FAILED 3 /* decrypt failed */
+
+#define ICMP_MAXTYPE 40
+
+#define ICMP_INFOTYPE(type) \
+ ((type) == ICMP_ECHOREPLY || (type) == ICMP_ECHO || \
+ (type) == ICMP_ROUTERADVERT || (type) == ICMP_ROUTERSOLICIT || \
+ (type) == ICMP_TSTAMP || (type) == ICMP_TSTAMPREPLY || \
+ (type) == ICMP_IREQ || (type) == ICMP_IREQREPLY || \
+ (type) == ICMP_MASKREQ || (type) == ICMP_MASKREPLY)
+
+#ifdef _KERNEL
+void icmp_error(struct mbuf *, int, int, n_long, struct ifnet *);
+void icmp_input(struct mbuf *, int);
+int ip_next_mtu(int, int);
+#endif
+
+#endif
diff --git a/netinet/ip_input.c b/netinet/ip_input.c
new file mode 100644
index 0000000..927537f
--- /dev/null
+++ b/netinet/ip_input.c
@@ -0,0 +1,1510 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ip_input.c 8.2 (Berkeley) 1/4/94
+ * $ANA: ip_input.c,v 1.5 1996/09/18 14:34:59 wollman Exp $
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#define _IP_VHL
+
+#include "opt_ipfw.h"
+
+#include <stddef.h>
+#include <inttypes.h>
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/syslog.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/route.h>
+#include <net/netisr.h>
+
+#include <netinet/in.h>
+#include <rtems/rtems_netinet_in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_icmp.h>
+#include <machine/in_cksum.h>
+
+#include <sys/socketvar.h>
+
+#ifdef IPFIREWALL
+#include <netinet/ip_fw.h>
+#endif
+
+int rsvp_on = 0;
+static int ip_rsvp_on;
+struct socket *ip_rsvpd;
+
+int ipforwarding = 0;
+SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW,
+ &ipforwarding, 0, "Enable IP forwarding between interfaces");
+
+static int ipsendredirects = 1; /* XXX */
+SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW,
+ &ipsendredirects, 0, "Enable sending IP redirects");
+
+int ip_defttl = IPDEFTTL;
+SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW,
+ &ip_defttl, 0, "Maximum TTL on IP packets");
+
+static int ip_dosourceroute = 0;
+SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW,
+ &ip_dosourceroute, 0, "");
+
+static int ip_acceptsourceroute = 0;
+SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute,
+ CTLFLAG_RW, &ip_acceptsourceroute, 0, "");
+#ifdef DIAGNOSTIC
+static int ipprintfs = 0;
+#endif
+
+extern struct domain inetdomain;
+extern struct protosw inetsw[];
+u_char ip_protox[IPPROTO_MAX];
+static int ipqmaxlen = IFQ_MAXLEN;
+struct in_ifaddr *in_ifaddr; /* first inet address */
+struct ifqueue ipintrq;
+SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RD,
+ &ipintrq.ifq_maxlen, 0, "");
+SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD,
+ &ipintrq.ifq_drops, 0,
+ "Number of packets dropped from the IP input queue");
+
+struct ipstat ipstat;
+
+/* Packet reassembly stuff */
+#define IPREASS_NHASH_LOG2 6
+#define IPREASS_NHASH (1 << IPREASS_NHASH_LOG2)
+#define IPREASS_HMASK (IPREASS_NHASH - 1)
+#define IPREASS_HASH(x,y) \
+ (((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
+
+static struct ipq ipq[IPREASS_NHASH];
+static int nipq = 0; /* total # of reass queues */
+static int maxnipq;
+
+#ifdef IPCTL_DEFMTU
+SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
+ &ip_mtu, 0, "Default MTU");
+#endif
+
+#if !defined(COMPAT_IPFW) || COMPAT_IPFW == 1
+#undef COMPAT_IPFW
+#define COMPAT_IPFW 1
+#else
+#undef COMPAT_IPFW
+#endif
+
+#ifdef COMPAT_IPFW
+/* Firewall hooks */
+ip_fw_chk_t *ip_fw_chk_ptr;
+ip_fw_ctl_t *ip_fw_ctl_ptr;
+
+/* IP Network Address Translation (NAT) hooks */
+ip_nat_t *ip_nat_ptr;
+ip_nat_ctl_t *ip_nat_ctl_ptr;
+#endif
+
+/*
+ * We need to save the IP options in case a protocol wants to respond
+ * to an incoming packet over the same route if the packet got here
+ * using IP source routing. This allows connection establishment and
+ * maintenance when the remote end is on a network that is not known
+ * to us.
+ */
+static int ip_nhops = 0;
+static struct ip_srcrt {
+ struct in_addr dst; /* final destination */
+ char nop; /* one NOP to align */
+ char srcopt[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN and OFFSET */
+ struct in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
+} ip_srcrt;
+
+#ifdef IPDIVERT
+/*
+ * Shared variable between ip_input() and ip_reass() to communicate
+ * about which packets, once assembled from fragments, get diverted,
+ * and to which port.
+ */
+static u_short frag_divert_port;
+#endif
+
+static void save_rte(u_char *, struct in_addr);
+static void ip_deq(struct ipasfrag *);
+static int ip_dooptions(struct mbuf *);
+static void ip_enq(struct ipasfrag *, struct ipasfrag *);
+static void ip_forward(struct mbuf *, int);
+static void ip_freef(struct ipq *);
+static struct ip *
+ ip_reass(struct ipasfrag *, struct ipq *, struct ipq *);
+static struct in_ifaddr *
+ ip_rtaddr(struct in_addr);
+void ipintr(void);
+/*
+ * IP initialization: fill in IP protocol switch table.
+ * All protocols not implemented in kernel go to raw IP protocol handler.
+ */
+void
+ip_init(void)
+{
+ register struct protosw *pr;
+ register int i;
+
+ pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
+ if (pr == NULL)
+ panic("ip_init: PF_INET not found");
+
+ /* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
+ for (i = 0; i < IPPROTO_MAX; i++)
+ ip_protox[i] = pr - inetsw;
+ /*
+ * Cycle through IP protocols and put them into the appropriate place
+ * in ip_protox[].
+ */
+ for (pr = inetdomain.dom_protosw;
+ pr < inetdomain.dom_protoswNPROTOSW; pr++)
+ if (pr->pr_domain->dom_family == PF_INET &&
+ pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
+ ip_protox[pr->pr_protocol] = pr - inetsw;
+ }
+
+ for (i = 0; i < IPREASS_NHASH; i++)
+ ipq[i].next = ipq[i].prev = &ipq[i];
+
+ maxnipq = nmbclusters/4;
+
+ /* Initialize various other remaining things. */
+ ip_id = rtems_bsdnet_seconds_since_boot() & 0xffff;
+ ipintrq.ifq_maxlen = ipqmaxlen;
+#ifdef IPFIREWALL
+ ip_fw_init();
+#endif
+#ifdef IPNAT
+ ip_nat_init();
+#endif
+
+}
+
+static struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET, 0, {0}, {0} };
+static struct route ipforward_rt;
+
+/*
+ * Ip input routine. Checksum and byte swap header. If fragmented
+ * try to reassemble. Process options. Pass to next level.
+ */
+void
+ip_input(struct mbuf *m)
+{
+ struct ip *ip = NULL;
+ struct ipq *fp;
+ struct in_ifaddr *ia = NULL;
+ int i, hlen;
+ u_short sum;
+
+#ifdef DIAGNOSTIC
+ if ((m->m_flags & M_PKTHDR) == 0)
+ panic("ip_input no HDR");
+#endif
+ /*
+ * If no IP addresses have been set yet but the interfaces
+ * are receiving, can't do anything with incoming packets yet.
+ */
+ if (in_ifaddr == NULL)
+ goto bad;
+ ipstat.ips_total++;
+
+ if (m->m_pkthdr.len < sizeof(struct ip))
+ goto tooshort;
+
+#if defined(DIAGNOSTIC) && defined(ORIGINAL_FREEBSD_CODE)
+ if (m->m_len < sizeof(struct ip))
+ panic("ipintr mbuf too short");
+#endif
+
+ if (m->m_len < sizeof (struct ip) &&
+ (m = m_pullup(m, sizeof (struct ip))) == NULL) {
+ ipstat.ips_toosmall++;
+ return;
+ }
+ ip = mtod(m, struct ip *);
+
+#ifdef _IP_VHL
+ if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
+#else
+ if (ip->ip_v != IPVERSION) {
+#endif
+ ipstat.ips_badvers++;
+ goto bad;
+ }
+
+#ifdef _IP_VHL
+ hlen = IP_VHL_HL(ip->ip_vhl) << 2;
+#else
+ hlen = ip->ip_hl << 2;
+#endif
+ if (hlen < sizeof(struct ip)) { /* minimum header length */
+ ipstat.ips_badhlen++;
+ goto bad;
+ }
+ if (hlen > m->m_len) {
+ if ((m = m_pullup(m, hlen)) == NULL) {
+ ipstat.ips_badhlen++;
+ return;
+ }
+ ip = mtod(m, struct ip *);
+ }
+ if (hlen == sizeof(struct ip)) {
+ sum = in_cksum_hdr(ip);
+ } else {
+ sum = in_cksum(m, hlen);
+ }
+ if (sum) {
+ ipstat.ips_badsum++;
+ goto bad;
+ }
+
+ /*
+ * Convert fields to host representation.
+ */
+ ip->ip_len = ntohs(ip->ip_len);
+ if (ip->ip_len < hlen) {
+ ipstat.ips_badlen++;
+ goto bad;
+ }
+ NTOHS(ip->ip_id);
+ ip->ip_off = ntohs(ip->ip_off);
+
+ /*
+ * Check that the amount of data in the buffers
+ * is as at least much as the IP header would have us expect.
+ * Trim mbufs if longer than we expect.
+ * Drop packet if shorter than we expect.
+ */
+ if (m->m_pkthdr.len < ip->ip_len) {
+tooshort:
+ ipstat.ips_tooshort++;
+ goto bad;
+ }
+ if (m->m_pkthdr.len > ip->ip_len) {
+ if (m->m_len == m->m_pkthdr.len) {
+ m->m_len = ip->ip_len;
+ m->m_pkthdr.len = ip->ip_len;
+ } else
+ m_adj(m, ip->ip_len - m->m_pkthdr.len);
+ }
+ /*
+ * IpHack's section.
+ * Right now when no processing on packet has done
+ * and it is still fresh out of network we do our black
+ * deals with it.
+ * - Firewall: deny/allow/divert
+ * - Xlate: translate packet's addr/port (NAT).
+ * - Wrap: fake packet's addr/port <unimpl.>
+ * - Encapsulate: put it in another IP and send out. <unimp.>
+ */
+
+#ifdef COMPAT_IPFW
+ if (ip_fw_chk_ptr) {
+#ifdef IPDIVERT
+ u_short port;
+
+ port = (*ip_fw_chk_ptr)(&ip, hlen, NULL, ip_divert_ignore, &m);
+ ip_divert_ignore = 0;
+ if (port) { /* Divert packet */
+ frag_divert_port = port;
+ goto ours;
+ }
+#else
+ /* If ipfw says divert, we have to just drop packet */
+ if ((*ip_fw_chk_ptr)(&ip, hlen, NULL, 0, &m)) {
+ m_freem(m);
+ m = NULL;
+ }
+#endif
+ if (!m)
+ return;
+ }
+
+ if (ip_nat_ptr && !(*ip_nat_ptr)(&ip, &m, m->m_pkthdr.rcvif, IP_NAT_IN))
+ return;
+#endif
+
+ /*
+ * Process options and, if not destined for us,
+ * ship it on. ip_dooptions returns 1 when an
+ * error was detected (causing an icmp message
+ * to be sent and the original packet to be freed).
+ */
+ ip_nhops = 0; /* for source routed packets */
+ if (hlen > sizeof (struct ip) && ip_dooptions(m))
+ return;
+
+ /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
+ * matter if it is destined to another node, or whether it is
+ * a multicast one, RSVP wants it! and prevents it from being forwarded
+ * anywhere else. Also checks if the rsvp daemon is running before
+ * grabbing the packet.
+ */
+ if (rsvp_on && ip->ip_p==IPPROTO_RSVP)
+ goto ours;
+
+ /*
+ * Check our list of addresses, to see if the packet is for us.
+ */
+ for (ia = in_ifaddr; ia; ia = ia->ia_next) {
+#define satosin(sa) ((struct sockaddr_in *)(sa))
+
+ if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr)
+ goto ours;
+#ifdef BOOTP_COMPAT
+ if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY)
+ goto ours;
+#endif
+ if (ia->ia_ifp && ia->ia_ifp->if_flags & IFF_BROADCAST) {
+ if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
+ ip->ip_dst.s_addr)
+ goto ours;
+ if (ip->ip_dst.s_addr == ia->ia_netbroadcast.s_addr)
+ goto ours;
+ }
+ }
+ if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
+ struct in_multi *inm;
+ if (ip_mrouter) {
+ /*
+ * If we are acting as a multicast router, all
+ * incoming multicast packets are passed to the
+ * kernel-level multicast forwarding function.
+ * The packet is returned (relatively) intact; if
+ * ip_mforward() returns a non-zero value, the packet
+ * must be discarded, else it may be accepted below.
+ *
+ * (The IP ident field is put in the same byte order
+ * as expected when ip_mforward() is called from
+ * ip_output().)
+ */
+ ip->ip_id = htons(ip->ip_id);
+ if (ip_mforward(ip, m->m_pkthdr.rcvif, m, 0) != 0) {
+ ipstat.ips_cantforward++;
+ m_freem(m);
+ return;
+ }
+ ip->ip_id = ntohs(ip->ip_id);
+
+ /*
+ * The process-level routing daemon needs to receive
+ * all multicast IGMP packets, whether or not this
+ * host belongs to their destination groups.
+ */
+ if (ip->ip_p == IPPROTO_IGMP)
+ goto ours;
+ ipstat.ips_forward++;
+ }
+ /*
+ * See if we belong to the destination multicast group on the
+ * arrival interface.
+ */
+ IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
+ if (inm == NULL) {
+ ipstat.ips_cantforward++;
+ m_freem(m);
+ return;
+ }
+ goto ours;
+ }
+ if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
+ goto ours;
+ if (ip->ip_dst.s_addr == INADDR_ANY)
+ goto ours;
+
+ /*
+ * Not for us; forward if possible and desirable.
+ */
+ if (ipforwarding == 0) {
+ ipstat.ips_cantforward++;
+ m_freem(m);
+ } else {
+ ip_forward(m, 0);
+ }
+ return;
+
+ours:
+
+ /*
+ * If offset or IP_MF are set, must reassemble.
+ * Otherwise, nothing need be done.
+ * (We could look in the reassembly queue to see
+ * if the packet was previously fragmented,
+ * but it's not worth the time; just let them time out.)
+ */
+ if (ip->ip_off &~ (IP_DF | IP_RF)) {
+ if (m->m_flags & M_EXT) { /* XXX */
+ if ((m = m_pullup(m, sizeof (struct ip))) == 0) {
+ ipstat.ips_toosmall++;
+#ifdef IPDIVERT
+ frag_divert_port = 0;
+#endif
+ return;
+ }
+ ip = mtod(m, struct ip *);
+ }
+ sum = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
+ /*
+ * Look for queue of fragments
+ * of this datagram.
+ */
+ for (fp = ipq[sum].next; fp != &ipq[sum]; fp = fp->next)
+ if (ip->ip_id == fp->ipq_id &&
+ ip->ip_src.s_addr == fp->ipq_src.s_addr &&
+ ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
+ ip->ip_p == fp->ipq_p)
+ goto found;
+
+ fp = 0;
+
+ /* check if there's a place for the new queue */
+ if (nipq > maxnipq) {
+ /*
+ * drop something from the tail of the current queue
+ * before proceeding further
+ */
+ if (ipq[sum].prev == &ipq[sum]) { /* gak */
+ for (i = 0; i < IPREASS_NHASH; i++) {
+ if (ipq[i].prev != &ipq[i]) {
+ ip_freef(ipq[i].prev);
+ break;
+ }
+ }
+ } else
+ ip_freef(ipq[sum].prev);
+ }
+found:
+ /*
+ * Adjust ip_len to not reflect header,
+ * set ip_mff if more fragments are expected,
+ * convert offset of this to bytes.
+ */
+ ip->ip_len -= hlen;
+ ((struct ipasfrag *)ip)->ipf_mff &= ~1;
+ if (ip->ip_off & IP_MF)
+ ((struct ipasfrag *)ip)->ipf_mff |= 1;
+ ip->ip_off <<= 3;
+
+ /*
+ * If datagram marked as having more fragments
+ * or if this is not the first fragment,
+ * attempt reassembly; if it succeeds, proceed.
+ */
+ if (((struct ipasfrag *)ip)->ipf_mff & 1 || ip->ip_off) {
+ ipstat.ips_fragments++;
+ ip = ip_reass((struct ipasfrag *)ip, fp, &ipq[sum]);
+ if (ip == 0)
+ return;
+ ipstat.ips_reassembled++;
+ m = dtom(ip);
+#ifdef IPDIVERT
+ if (frag_divert_port) {
+ ip->ip_len += hlen;
+ HTONS(ip->ip_len);
+ HTONS(ip->ip_off);
+ HTONS(ip->ip_id);
+ ip->ip_sum = 0;
+ ip->ip_sum = in_cksum_hdr(ip);
+ NTOHS(ip->ip_id);
+ NTOHS(ip->ip_off);
+ NTOHS(ip->ip_len);
+ ip->ip_len -= hlen;
+ }
+#endif
+ } else
+ if (fp)
+ ip_freef(fp);
+ } else
+ ip->ip_len -= hlen;
+
+#ifdef IPDIVERT
+ /*
+ * Divert reassembled packets to the divert protocol if required
+ */
+ if (frag_divert_port) {
+ ipstat.ips_delivered++;
+ ip_divert_port = frag_divert_port;
+ frag_divert_port = 0;
+ (*inetsw[ip_protox[IPPROTO_DIVERT]].pr_input)(m, hlen);
+ return;
+ }
+
+ /* Don't let packets divert themselves */
+ if (ip->ip_p == IPPROTO_DIVERT) {
+ ipstat.ips_noproto++;
+ goto bad;
+ }
+#endif
+
+ /*
+ * Switch out to protocol's input routine.
+ */
+ ipstat.ips_delivered++;
+
+ (*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen);
+ return;
+bad:
+ m_freem(m);
+}
+
+/*
+ * IP software interrupt routine - to go away sometime soon
+ */
+void
+ipintr(void)
+{
+ int s;
+ struct mbuf *m;
+
+ while(1) {
+ s = splimp();
+ IF_DEQUEUE(&ipintrq, m);
+ splx(s);
+ if (m == 0)
+ return;
+ ip_input(m);
+ }
+}
+
+NETISR_SET(NETISR_IP, ipintr);
+
+/*
+ * Take incoming datagram fragment and try to
+ * reassemble it into whole datagram. If a chain for
+ * reassembly of this datagram already exists, then it
+ * is given as fp; otherwise have to make a chain.
+ */
+static struct ip *
+ip_reass(struct ipasfrag *ip, struct ipq *fp, struct ipq *where)
+{
+ register struct mbuf *m = dtom(ip);
+ register struct ipasfrag *q;
+ struct mbuf *t;
+ int hlen = ip->ip_hl << 2;
+ int i;
+ int32_t next;
+
+ /*
+ * Presence of header sizes in mbufs
+ * would confuse code below.
+ */
+ m->m_data += hlen;
+ m->m_len -= hlen;
+
+ /*
+ * If first fragment to arrive, create a reassembly queue.
+ */
+ if (fp == NULL) {
+ if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL)
+ goto dropfrag;
+ fp = mtod(t, struct ipq *);
+ insque(fp, where);
+ nipq++;
+ fp->ipq_ttl = IPFRAGTTL;
+ fp->ipq_p = ip->ip_p;
+ fp->ipq_id = ip->ip_id;
+ fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
+ fp->ipq_src = ((struct ip *)ip)->ip_src;
+ fp->ipq_dst = ((struct ip *)ip)->ip_dst;
+#ifdef IPDIVERT
+ fp->ipq_divert = 0;
+#endif
+ q = (struct ipasfrag *)fp;
+ goto insert;
+ }
+
+ /*
+ * Find a segment which begins after this one does.
+ */
+ for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
+ if (q->ip_off > ip->ip_off)
+ break;
+
+ /*
+ * If there is a preceding segment, it may provide some of
+ * our data already. If so, drop the data from the incoming
+ * segment. If it provides all of our data, drop us.
+ */
+ if (q->ipf_prev != (struct ipasfrag *)fp) {
+ i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
+ if (i > 0) {
+ if (i >= ip->ip_len)
+ goto dropfrag;
+ m_adj(dtom(ip), i);
+ ip->ip_off += i;
+ ip->ip_len -= i;
+ }
+ }
+
+ /*
+ * While we overlap succeeding segments trim them or,
+ * if they are completely covered, dequeue them.
+ */
+ while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
+ struct mbuf *m0;
+
+ i = (ip->ip_off + ip->ip_len) - q->ip_off;
+ if (i < q->ip_len) {
+ q->ip_len -= i;
+ q->ip_off += i;
+ m_adj(dtom(q), i);
+ break;
+ }
+ m0 = dtom(q);
+ q = q->ipf_next;
+ ip_deq(q->ipf_prev);
+ m_freem(m0);
+ }
+
+insert:
+
+#ifdef IPDIVERT
+ /*
+ * Any fragment diverting causes the whole packet to divert
+ */
+ if (frag_divert_port != 0)
+ fp->ipq_divert = frag_divert_port;
+ frag_divert_port = 0;
+#endif
+
+ /*
+ * Stick new segment in its place;
+ * check for complete reassembly.
+ */
+ ip_enq(ip, q->ipf_prev);
+ next = 0;
+ for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
+ if (q->ip_off != next)
+ return (0);
+ next += q->ip_len;
+ }
+ if (q->ipf_prev->ipf_mff & 1)
+ return (0);
+
+ /*
+ * Reassembly is complete. Make sure the packet is a sane size.
+ */
+#ifdef _IP_VHL
+ if (next + (IP_VHL_HL(((struct ip *)fp->ipq_next)->ip_vhl) << 2)
+#else
+ if (next + ((((struct ip *)fp->ipq_next)->ip_hl) << 2)
+#endif
+ > IP_MAXPACKET) {
+ ipstat.ips_toolong++;
+ ip_freef(fp);
+ return (0);
+ }
+
+ /*
+ * Concatenate fragments.
+ */
+ q = fp->ipq_next;
+ m = dtom(q);
+ t = m->m_next;
+ m->m_next = NULL;
+ m_cat(m, t);
+ q = q->ipf_next;
+ while (q != (struct ipasfrag *)fp) {
+ t = dtom(q);
+ q = q->ipf_next;
+ m_cat(m, t);
+ }
+
+#ifdef IPDIVERT
+ /*
+ * Record divert port for packet, if any
+ */
+ frag_divert_port = fp->ipq_divert;
+#endif
+
+ /*
+ * Create header for new ip packet by modifying header of first
+ * packet; dequeue and discard fragment reassembly header.
+ * Make header visible.
+ */
+ ip = fp->ipq_next;
+ ip->ip_len = next;
+ ip->ipf_mff &= ~1;
+ ((struct ip *)ip)->ip_src = fp->ipq_src;
+ ((struct ip *)ip)->ip_dst = fp->ipq_dst;
+ remque(fp);
+ nipq--;
+ (void) m_free(dtom(fp));
+ m = dtom(ip);
+ m->m_len += (ip->ip_hl << 2);
+ m->m_data -= (ip->ip_hl << 2);
+ /* some debugging cruft by sklower, below, will go away soon */
+ if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */
+ register int plen = 0;
+ for (t = m; m; m = m->m_next)
+ plen += m->m_len;
+ t->m_pkthdr.len = plen;
+ }
+ return ((struct ip *)ip);
+
+dropfrag:
+ ipstat.ips_fragdropped++;
+ m_freem(m);
+ return (0);
+}
+
+/*
+ * Free a fragment reassembly header and all
+ * associated datagrams.
+ */
+static void
+ip_freef(struct ipq *fp)
+{
+ register struct ipasfrag *q, *p;
+
+ for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = p) {
+ p = q->ipf_next;
+ ip_deq(q);
+ m_freem(dtom(q));
+ }
+ remque(fp);
+ (void) m_free(dtom(fp));
+ nipq--;
+}
+
+/*
+ * Put an ip fragment on a reassembly chain.
+ * Like insque, but pointers in middle of structure.
+ */
+static void
+ip_enq(struct ipasfrag *p, struct ipasfrag *prev)
+{
+
+ p->ipf_prev = prev;
+ p->ipf_next = prev->ipf_next;
+ prev->ipf_next->ipf_prev = p;
+ prev->ipf_next = p;
+}
+
+/*
+ * To ip_enq as remque is to insque.
+ */
+static void
+ip_deq(struct ipasfrag *p)
+{
+
+ p->ipf_prev->ipf_next = p->ipf_next;
+ p->ipf_next->ipf_prev = p->ipf_prev;
+}
+
+/*
+ * IP timer processing;
+ * if a timer expires on a reassembly
+ * queue, discard it.
+ */
+void
+ip_slowtimo(void)
+{
+ register struct ipq *fp;
+ int s = splnet();
+ int i;
+
+ for (i = 0; i < IPREASS_NHASH; i++) {
+ fp = ipq[i].next;
+ if (fp == 0)
+ continue;
+ while (fp != &ipq[i]) {
+ --fp->ipq_ttl;
+ fp = fp->next;
+ if (fp->prev->ipq_ttl == 0) {
+ ipstat.ips_fragtimeout++;
+ ip_freef(fp->prev);
+ }
+ }
+ }
+ splx(s);
+}
+
+/*
+ * Drain off all datagram fragments.
+ */
+void
+ip_drain(void)
+{
+ int i;
+
+ for (i = 0; i < IPREASS_NHASH; i++) {
+ while (ipq[i].next != &ipq[i]) {
+ ipstat.ips_fragdropped++;
+ ip_freef(ipq[i].next);
+ }
+ }
+ in_rtqdrain();
+}
+
+/*
+ * Do option processing on a datagram,
+ * possibly discarding it if bad options are encountered,
+ * or forwarding it if source-routed.
+ * Returns 1 if packet has been forwarded/freed,
+ * 0 if the packet should be processed further.
+ */
+static int
+ip_dooptions(struct mbuf *m)
+{
+ register struct ip *ip = mtod(m, struct ip *);
+ register u_char *cp;
+ register struct ip_timestamp *ipt;
+ register struct in_ifaddr *ia;
+ int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
+ struct in_addr *sin, dst;
+ n_time ntime;
+
+ dst = ip->ip_dst;
+ cp = (u_char *)(ip + 1);
+#ifdef _IP_VHL
+ cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
+#else
+ cnt = (ip->ip_hl << 2) - sizeof (struct ip);
+#endif
+ for (; cnt > 0; cnt -= optlen, cp += optlen) {
+ opt = cp[IPOPT_OPTVAL];
+ if (opt == IPOPT_EOL)
+ break;
+ if (opt == IPOPT_NOP)
+ optlen = 1;
+ else {
+ optlen = cp[IPOPT_OLEN];
+ if (optlen <= 0 || optlen > cnt) {
+ code = &cp[IPOPT_OLEN] - (u_char *)ip;
+ goto bad;
+ }
+ }
+ switch (opt) {
+
+ default:
+ break;
+
+ /*
+ * Source routing with record.
+ * Find interface with current destination address.
+ * If none on this machine then drop if strictly routed,
+ * or do nothing if loosely routed.
+ * Record interface address and bring up next address
+ * component. If strictly routed make sure next
+ * address is on directly accessible net.
+ */
+ case IPOPT_LSRR:
+ case IPOPT_SSRR:
+ if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
+ code = &cp[IPOPT_OFFSET] - (u_char *)ip;
+ goto bad;
+ }
+ ipaddr.sin_addr = ip->ip_dst;
+ ia = (struct in_ifaddr *)
+ ifa_ifwithaddr((struct sockaddr *)&ipaddr);
+ if (ia == 0) {
+ if (opt == IPOPT_SSRR) {
+ type = ICMP_UNREACH;
+ code = ICMP_UNREACH_SRCFAIL;
+ goto bad;
+ }
+ if (!ip_dosourceroute)
+ goto nosourcerouting;
+ /*
+ * Loose routing, and not at next destination
+ * yet; nothing to do except forward.
+ */
+ break;
+ }
+ off--; /* 0 origin */
+ if (off > optlen - sizeof(struct in_addr)) {
+ /*
+ * End of source route. Should be for us.
+ */
+ if (!ip_acceptsourceroute)
+ goto nosourcerouting;
+ save_rte(cp, ip->ip_src);
+ break;
+ }
+
+ if (!ip_dosourceroute) {
+ char buf0[INET_ADDRSTRLEN];
+ char buf1[INET_ADDRSTRLEN];
+
+nosourcerouting:
+ log(LOG_WARNING,
+ "attempted source route from %s to %s\n",
+ inet_ntoa_r(ip->ip_dst, buf0),
+ inet_ntoa_r(ip->ip_src, buf1));
+ type = ICMP_UNREACH;
+ code = ICMP_UNREACH_SRCFAIL;
+ goto bad;
+ }
+
+ /*
+ * locate outgoing interface
+ */
+ (void)memcpy(&ipaddr.sin_addr, cp + off,
+ sizeof(ipaddr.sin_addr));
+
+ if (opt == IPOPT_SSRR) {
+#define INA struct in_ifaddr *
+#define SA struct sockaddr *
+ if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0)
+ ia = (INA)ifa_ifwithnet((SA)&ipaddr);
+ } else
+ ia = ip_rtaddr(ipaddr.sin_addr);
+ if (ia == 0) {
+ type = ICMP_UNREACH;
+ code = ICMP_UNREACH_SRCFAIL;
+ goto bad;
+ }
+ ip->ip_dst = ipaddr.sin_addr;
+ (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
+ sizeof(struct in_addr));
+ cp[IPOPT_OFFSET] += sizeof(struct in_addr);
+ /*
+ * Let ip_intr's mcast routing check handle mcast pkts
+ */
+ forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
+ break;
+
+ case IPOPT_RR:
+ if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
+ code = &cp[IPOPT_OFFSET] - (u_char *)ip;
+ goto bad;
+ }
+ /*
+ * If no space remains, ignore.
+ */
+ off--; /* 0 origin */
+ if (off > optlen - sizeof(struct in_addr))
+ break;
+ (void)memcpy(&ipaddr.sin_addr, &ip->ip_dst,
+ sizeof(ipaddr.sin_addr));
+ /*
+ * locate outgoing interface; if we're the destination,
+ * use the incoming interface (should be same).
+ */
+ if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0 &&
+ (ia = ip_rtaddr(ipaddr.sin_addr)) == 0) {
+ type = ICMP_UNREACH;
+ code = ICMP_UNREACH_HOST;
+ goto bad;
+ }
+ (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
+ sizeof(struct in_addr));
+ cp[IPOPT_OFFSET] += sizeof(struct in_addr);
+ break;
+
+ case IPOPT_TS:
+ code = cp - (u_char *)ip;
+ ipt = (struct ip_timestamp *)cp;
+ if (ipt->ipt_len < 5)
+ goto bad;
+ if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
+ if (++ipt->ipt_oflw == 0)
+ goto bad;
+ break;
+ }
+ sin = (struct in_addr *)(cp + ipt->ipt_ptr - 1);
+ switch (ipt->ipt_flg) {
+
+ case IPOPT_TS_TSONLY:
+ break;
+
+ case IPOPT_TS_TSANDADDR:
+ if (ipt->ipt_ptr + sizeof(n_time) +
+ sizeof(struct in_addr) > ipt->ipt_len)
+ goto bad;
+ ipaddr.sin_addr = dst;
+ ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
+ m->m_pkthdr.rcvif);
+ if (ia == 0)
+ continue;
+ (void)memcpy(sin, &IA_SIN(ia)->sin_addr,
+ sizeof(struct in_addr));
+ ipt->ipt_ptr += sizeof(struct in_addr);
+ break;
+
+ case IPOPT_TS_PRESPEC:
+ if (ipt->ipt_ptr + sizeof(n_time) +
+ sizeof(struct in_addr) > ipt->ipt_len)
+ goto bad;
+ (void)memcpy(&ipaddr.sin_addr, sin,
+ sizeof(struct in_addr));
+ if (ifa_ifwithaddr((SA)&ipaddr) == 0)
+ continue;
+ ipt->ipt_ptr += sizeof(struct in_addr);
+ break;
+
+ default:
+ goto bad;
+ }
+ ntime = iptime();
+ (void)memcpy(cp + ipt->ipt_ptr - 1, &ntime,
+ sizeof(n_time));
+ ipt->ipt_ptr += sizeof(n_time);
+ }
+ }
+ if (forward && ipforwarding) {
+ ip_forward(m, 1);
+ return (1);
+ }
+ return (0);
+bad:
+#ifdef _IP_VHL
+ ip->ip_len -= IP_VHL_HL(ip->ip_vhl) << 2; /* XXX icmp_error adds in hdr length */
+#else
+ ip->ip_len -= ip->ip_hl << 2; /* XXX icmp_error adds in hdr length */
+#endif
+ icmp_error(m, type, code, 0, 0);
+ ipstat.ips_badoptions++;
+ return (1);
+}
+
+/*
+ * Given address of next destination (final or next hop),
+ * return internet address info of interface to be used to get there.
+ */
+static struct in_ifaddr *
+ip_rtaddr(struct in_addr dst)
+{
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *) &ipforward_rt.ro_dst;
+
+ if (ipforward_rt.ro_rt == 0 || dst.s_addr != sin->sin_addr.s_addr) {
+ if (ipforward_rt.ro_rt) {
+ RTFREE(ipforward_rt.ro_rt);
+ ipforward_rt.ro_rt = 0;
+ }
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
+ sin->sin_addr = dst;
+
+ rtalloc_ign(&ipforward_rt, RTF_PRCLONING);
+ }
+ if (ipforward_rt.ro_rt == 0)
+ return ((struct in_ifaddr *)0);
+ return ((struct in_ifaddr *) ipforward_rt.ro_rt->rt_ifa);
+}
+
+/*
+ * Save incoming source route for use in replies,
+ * to be picked up later by ip_srcroute if the receiver is interested.
+ */
+void
+save_rte(u_char *option, struct in_addr dst)
+{
+ unsigned olen;
+
+ olen = option[IPOPT_OLEN];
+#ifdef DIAGNOSTIC
+ if (ipprintfs)
+ printf("save_rte: olen %d\n", olen);
+#endif
+ if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
+ return;
+ bcopy(option, ip_srcrt.srcopt, olen);
+ ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
+ ip_srcrt.dst = dst;
+}
+
+/*
+ * Retrieve incoming source route for use in replies,
+ * in the same form used by setsockopt.
+ * The first hop is placed before the options, will be removed later.
+ */
+struct mbuf *
+ip_srcroute(void)
+{
+ register struct in_addr *p, *q;
+ register struct mbuf *m;
+
+ if (ip_nhops == 0)
+ return ((struct mbuf *)0);
+ m = m_get(M_DONTWAIT, MT_SOOPTS);
+ if (m == 0)
+ return ((struct mbuf *)0);
+
+#define OPTSIZ (sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
+
+ /* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
+ m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
+ OPTSIZ;
+#ifdef DIAGNOSTIC
+ if (ipprintfs)
+ printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
+#endif
+
+ /*
+ * First save first hop for return route
+ */
+ p = &ip_srcrt.route[ip_nhops - 1];
+ *(mtod(m, struct in_addr *)) = *p--;
+#ifdef DIAGNOSTIC
+ if (ipprintfs)
+ printf(" hops %"PRIx32, ntohl(mtod(m, struct in_addr *)->s_addr));
+#endif
+
+ /*
+ * Copy option fields and padding (nop) to mbuf.
+ */
+ ip_srcrt.nop = IPOPT_NOP;
+ ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
+ (void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr),
+ &ip_srcrt.nop, OPTSIZ);
+ q = (struct in_addr *)(mtod(m, caddr_t) +
+ sizeof(struct in_addr) + OPTSIZ);
+#undef OPTSIZ
+ /*
+ * Record return path as an IP source route,
+ * reversing the path (pointers are now aligned).
+ */
+ while (p >= ip_srcrt.route) {
+#ifdef DIAGNOSTIC
+ if (ipprintfs)
+ printf(" %"PRIx32, ntohl(q->s_addr));
+#endif
+ *q++ = *p--;
+ }
+ /*
+ * Last hop goes to final destination.
+ */
+ *q = ip_srcrt.dst;
+#ifdef DIAGNOSTIC
+ if (ipprintfs)
+ printf(" %"PRIx32"\n", ntohl(q->s_addr));
+#endif
+ return (m);
+}
+
+/*
+ * Strip out IP options, at higher
+ * level protocol in the kernel.
+ * Second argument is buffer to which options
+ * will be moved, and return value is their length.
+ * XXX should be deleted; last arg currently ignored.
+ */
+void
+ip_stripoptions(struct mbuf *m, struct mbuf *mopt)
+{
+ register int i;
+ struct ip *ip = mtod(m, struct ip *);
+ register caddr_t opts;
+ int olen;
+
+#ifdef _IP_VHL
+ olen = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
+#else
+ olen = (ip->ip_hl << 2) - sizeof (struct ip);
+#endif
+ opts = (caddr_t)(ip + 1);
+ i = m->m_len - (sizeof (struct ip) + olen);
+ bcopy(opts + olen, opts, (unsigned)i);
+ m->m_len -= olen;
+ if (m->m_flags & M_PKTHDR)
+ m->m_pkthdr.len -= olen;
+#ifdef _IP_VHL
+ ip->ip_vhl = IP_MAKE_VHL(IPVERSION, sizeof(struct ip) >> 2);
+#else
+ ip->ip_v = IPVERSION;
+ ip->ip_hl = (sizeof(struct ip) >> 2);
+#endif
+}
+
+u_char inetctlerrmap[PRC_NCMDS] = {
+ 0, 0, 0, 0,
+ 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH,
+ EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED,
+ EMSGSIZE, EHOSTUNREACH, 0, 0,
+ 0, 0, 0, 0,
+ ENOPROTOOPT
+};
+
+/*
+ * Forward a packet. If some error occurs return the sender
+ * an icmp packet. Note we can't always generate a meaningful
+ * icmp message because icmp doesn't have a large enough repertoire
+ * of codes and types.
+ *
+ * If not forwarding, just drop the packet. This could be confusing
+ * if ipforwarding was zero but some routing protocol was advancing
+ * us as a gateway to somewhere. However, we must let the routing
+ * protocol deal with that.
+ *
+ * The srcrt parameter indicates whether the packet is being forwarded
+ * via a source route.
+ */
+static void
+ip_forward(struct mbuf *m, int srcrt)
+{
+ struct ip *ip = mtod(m, struct ip *);
+ register struct sockaddr_in *sin;
+ register struct rtentry *rt;
+ int error, type = 0, code = 0;
+ struct mbuf *mcopy;
+ n_long dest;
+ struct ifnet *destifp;
+
+ dest = 0;
+#ifdef DIAGNOSTIC
+ if (ipprintfs)
+ printf("forward: src %"PRIx32" dst %"PRIx32" ttl %x\n",
+ ip->ip_src.s_addr, ip->ip_dst.s_addr, ip->ip_ttl);
+#endif
+
+
+ if (m->m_flags & M_BCAST || in_canforward(ip->ip_dst) == 0) {
+ ipstat.ips_cantforward++;
+ m_freem(m);
+ return;
+ }
+ HTONS(ip->ip_id);
+ if (ip->ip_ttl <= IPTTLDEC) {
+ icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, 0);
+ return;
+ }
+ ip->ip_ttl -= IPTTLDEC;
+
+ sin = (struct sockaddr_in *)&ipforward_rt.ro_dst;
+ if ((rt = ipforward_rt.ro_rt) == 0 ||
+ ip->ip_dst.s_addr != sin->sin_addr.s_addr) {
+ if (ipforward_rt.ro_rt) {
+ RTFREE(ipforward_rt.ro_rt);
+ ipforward_rt.ro_rt = 0;
+ }
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
+ sin->sin_addr = ip->ip_dst;
+
+ rtalloc_ign(&ipforward_rt, RTF_PRCLONING);
+ if (ipforward_rt.ro_rt == 0) {
+ icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
+ return;
+ }
+ rt = ipforward_rt.ro_rt;
+ }
+
+ /*
+ * Save at most 64 bytes of the packet in case
+ * we need to generate an ICMP message to the src.
+ */
+ mcopy = m_copy(m, 0, imin((int)ip->ip_len, 64));
+
+ /*
+ * If forwarding packet using same interface that it came in on,
+ * perhaps should send a redirect to sender to shortcut a hop.
+ * Only send redirect if source is sending directly to us,
+ * and if packet was not source routed (or has any options).
+ * Also, don't send redirect if forwarding using a default route
+ * or a route modified by a redirect.
+ */
+#define satosin(sa) ((struct sockaddr_in *)(sa))
+ if (rt->rt_ifp == m->m_pkthdr.rcvif &&
+ (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
+ satosin(rt_key(rt))->sin_addr.s_addr != 0 &&
+ ipsendredirects && !srcrt) {
+#define RTA(rt) ((struct in_ifaddr *)(rt->rt_ifa))
+ u_long src = ntohl(ip->ip_src.s_addr);
+
+ if (RTA(rt) &&
+ (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
+ if (rt->rt_flags & RTF_GATEWAY)
+ dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
+ else
+ dest = ip->ip_dst.s_addr;
+ /* Router requirements says to only send host redirects */
+ type = ICMP_REDIRECT;
+ code = ICMP_REDIRECT_HOST;
+ }
+ }
+
+ error = ip_output(m, (struct mbuf *)0, &ipforward_rt,
+ IP_FORWARDING, 0);
+ if (error)
+ ipstat.ips_cantforward++;
+ else {
+ ipstat.ips_forward++;
+ if (type)
+ ipstat.ips_redirectsent++;
+ else {
+ if (mcopy)
+ m_freem(mcopy);
+ return;
+ }
+ }
+ if (mcopy == NULL)
+ return;
+ destifp = NULL;
+
+ switch (error) {
+
+ case 0: /* forwarded, but need redirect */
+ /* type, code set above */
+ break;
+
+ case ENETUNREACH: /* shouldn't happen, checked above */
+ case EHOSTUNREACH:
+ case ENETDOWN:
+ case EHOSTDOWN:
+ default:
+ type = ICMP_UNREACH;
+ code = ICMP_UNREACH_HOST;
+ break;
+
+ case EMSGSIZE:
+ type = ICMP_UNREACH;
+ code = ICMP_UNREACH_NEEDFRAG;
+ if (ipforward_rt.ro_rt)
+ destifp = ipforward_rt.ro_rt->rt_ifp;
+ ipstat.ips_cantfrag++;
+ break;
+
+ case ENOBUFS:
+ type = ICMP_SOURCEQUENCH;
+ code = 0;
+ break;
+ }
+ icmp_error(mcopy, type, code, dest, destifp);
+}
+
+void
+ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
+ struct mbuf *m)
+{
+ if (inp->inp_socket->so_options & SO_TIMESTAMP) {
+ struct timeval tv;
+
+ microtime(&tv);
+ *mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
+ SCM_TIMESTAMP, SOL_SOCKET);
+ if (*mp)
+ mp = &(*mp)->m_next;
+ }
+ if (inp->inp_flags & INP_RECVDSTADDR) {
+ *mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
+ sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
+ if (*mp)
+ mp = &(*mp)->m_next;
+ }
+#ifdef notyet
+ /* XXX
+ * Moving these out of udp_input() made them even more broken
+ * than they already were.
+ */
+ /* options were tossed already */
+ if (inp->inp_flags & INP_RECVOPTS) {
+ *mp = sbcreatecontrol((caddr_t) opts_deleted_above,
+ sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
+ if (*mp)
+ mp = &(*mp)->m_next;
+ }
+ /* ip_srcroute doesn't do what we want here, need to fix */
+ if (inp->inp_flags & INP_RECVRETOPTS) {
+ *mp = sbcreatecontrol((caddr_t) ip_srcroute(m),
+ sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
+ if (*mp)
+ mp = &(*mp)->m_next;
+ }
+#endif
+ if (inp->inp_flags & INP_RECVIF) {
+ struct sockaddr_dl sdl;
+
+ sdl.sdl_len = offsetof(struct sockaddr_dl, sdl_data[0]);
+ sdl.sdl_family = AF_LINK;
+ sdl.sdl_index = m->m_pkthdr.rcvif ?
+ m->m_pkthdr.rcvif->if_index : 0;
+ sdl.sdl_nlen = sdl.sdl_alen = sdl.sdl_slen = 0;
+ *mp = sbcreatecontrol((caddr_t) &sdl, sdl.sdl_len,
+ IP_RECVIF, IPPROTO_IP);
+ if (*mp)
+ mp = &(*mp)->m_next;
+ }
+}
+
+int
+ip_rsvp_init(struct socket *so)
+{
+ if (so->so_type != SOCK_RAW ||
+ so->so_proto->pr_protocol != IPPROTO_RSVP)
+ return EOPNOTSUPP;
+
+ if (ip_rsvpd != NULL)
+ return EADDRINUSE;
+
+ ip_rsvpd = so;
+ /*
+ * This may seem silly, but we need to be sure we don't over-increment
+ * the RSVP counter, in case something slips up.
+ */
+ if (!ip_rsvp_on) {
+ ip_rsvp_on = 1;
+ rsvp_on++;
+ }
+
+ return 0;
+}
+
+int
+ip_rsvp_done(void)
+{
+ ip_rsvpd = NULL;
+ /*
+ * This may seem silly, but we need to be sure we don't over-decrement
+ * the RSVP counter, in case something slips up.
+ */
+ if (ip_rsvp_on) {
+ ip_rsvp_on = 0;
+ rsvp_on--;
+ }
+ return 0;
+}
diff --git a/netinet/ip_mroute.c b/netinet/ip_mroute.c
new file mode 100644
index 0000000..3a89497
--- /dev/null
+++ b/netinet/ip_mroute.c
@@ -0,0 +1,2232 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * IP multicast forwarding procedures
+ *
+ * Written by David Waitzman, BBN Labs, August 1988.
+ * Modified by Steve Deering, Stanford, February 1989.
+ * Modified by Mark J. Steiglitz, Stanford, May, 1991
+ * Modified by Van Jacobson, LBL, January 1993
+ * Modified by Ajit Thyagarajan, PARC, August 1993
+ * Modified by Bill Fenner, PARC, April 1995
+ *
+ * MROUTING Revision: 3.5
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "opt_mrouting.h"
+
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/systm.h>
+#include <sys/sysctl.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/sockio.h>
+#include <sys/syslog.h>
+#include <net/if.h>
+#include <net/route.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/igmp.h>
+#include <netinet/igmp_var.h>
+#include <netinet/ip_mroute.h>
+#include <netinet/udp.h>
+
+#ifndef NTOHL
+#if BYTE_ORDER != BIG_ENDIAN
+#define NTOHL(d) ((d) = ntohl((d)))
+#define NTOHS(d) ((d) = ntohs((u_short)(d)))
+#define HTONL(d) ((d) = htonl((d)))
+#define HTONS(d) ((d) = htons((u_short)(d)))
+#else
+#define NTOHL(d)
+#define NTOHS(d)
+#define HTONL(d)
+#define HTONS(d)
+#endif
+#endif
+
+#ifndef MROUTING
+extern u_long _ip_mcast_src(int vifi);
+extern int _ip_mforward(struct ip *ip, struct ifnet *ifp,
+ struct mbuf *m, struct ip_moptions *imo);
+extern int _ip_mrouter_done(void);
+extern int _ip_mrouter_get(int cmd, struct socket *so,
+ struct mbuf **m);
+extern int _ip_mrouter_set(int cmd, struct socket *so,
+ struct mbuf *m);
+extern int _mrt_ioctl(int req, caddr_t data, struct proc *p);
+
+/*
+ * Dummy routines and globals used when multicast routing is not compiled in.
+ */
+
+struct socket *ip_mrouter = NULL;
+/* static u_int ip_mrtproto = 0; */
+/* static struct mrtstat mrtstat; */
+u_int rsvpdebug = 0;
+
+int
+_ip_mrouter_set(int cmd, struct socket *so, struct mbuf *m)
+{
+ return(EOPNOTSUPP);
+}
+
+int (*ip_mrouter_set)(int, struct socket *, struct mbuf *) = _ip_mrouter_set;
+
+
+int
+_ip_mrouter_get(int cmd, struct socket *so, struct mbuf **m)
+{
+ return(EOPNOTSUPP);
+}
+
+int (*ip_mrouter_get)(int, struct socket *, struct mbuf **) = _ip_mrouter_get;
+
+int
+_ip_mrouter_done(void)
+{
+ return(0);
+}
+
+int (*ip_mrouter_done)(void) = _ip_mrouter_done;
+
+int
+_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m,
+ struct ip_moptions *imo)
+{
+ return(0);
+}
+
+int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
+ struct ip_moptions *) = _ip_mforward;
+
+int
+_mrt_ioctl(int req, caddr_t data, struct proc *p)
+{
+ return EOPNOTSUPP;
+}
+
+int (*mrt_ioctl)(int, caddr_t, struct proc *) = _mrt_ioctl;
+
+void
+rsvp_input(struct mbuf *m, int iphlen)
+{
+ /* Can still get packets with rsvp_on = 0 if there is a local member
+ * of the group to which the RSVP packet is addressed. But in this
+ * case we want to throw the packet away.
+ */
+ if (!rsvp_on) {
+ m_freem(m);
+ return;
+ }
+
+ if (ip_rsvpd != NULL) {
+ if (rsvpdebug)
+ printf("rsvp_input: Sending packet up old-style socket\n");
+ rip_input(m, iphlen);
+ return;
+ }
+ /* Drop the packet */
+ m_freem(m);
+}
+
+void ipip_input(struct mbuf *m, int iphlen)
+{ /* XXX must fixup manually */
+ rip_input(m, iphlen);
+}
+
+int (*legal_vif_num)(int) = 0;
+
+/*
+ * This should never be called, since IP_MULTICAST_VIF should fail, but
+ * just in case it does get called, the code a little lower in ip_output
+ * will assign the packet a local address.
+ */
+u_long
+_ip_mcast_src(int vifi) { return INADDR_ANY; }
+u_long (*ip_mcast_src)(int) = _ip_mcast_src;
+
+int
+ip_rsvp_vif_init(struct socket *so, struct mbuf *m)
+{
+ return(EINVAL);
+}
+
+int
+ip_rsvp_vif_done(struct socket *so, struct mbuf *m)
+{
+ return(EINVAL);
+}
+
+void
+ip_rsvp_force_done(struct socket *so)
+{
+ return;
+}
+
+#else /* MROUTING */
+
+#define M_HASCL(m) ((m)->m_flags & M_EXT)
+
+#define INSIZ sizeof(struct in_addr)
+#define same(a1, a2) \
+ (bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0)
+
+#define MT_MRTABLE MT_RTABLE /* since nothing else uses it */
+
+/*
+ * Globals. All but ip_mrouter and ip_mrtproto could be static,
+ * except for netstat or debugging purposes.
+ */
+#ifndef MROUTE_LKM
+struct socket *ip_mrouter = NULL;
+struct mrtstat mrtstat;
+
+int ip_mrtproto = IGMP_DVMRP; /* for netstat only */
+#else /* MROUTE_LKM */
+extern void X_ipip_input(struct mbuf *m, int iphlen);
+extern struct mrtstat mrtstat;
+static int ip_mrtproto;
+#endif
+
+#define NO_RTE_FOUND 0x1
+#define RTE_FOUND 0x2
+
+static struct mbuf *mfctable[MFCTBLSIZ];
+static u_char nexpire[MFCTBLSIZ];
+static struct vif viftable[MAXVIFS];
+static u_int mrtdebug = 0; /* debug level */
+#define DEBUG_MFC 0x02
+#define DEBUG_FORWARD 0x04
+#define DEBUG_EXPIRE 0x08
+#define DEBUG_XMIT 0x10
+static u_int tbfdebug = 0; /* tbf debug level */
+static u_int rsvpdebug = 0; /* rsvp debug level */
+
+#define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */
+#define UPCALL_EXPIRE 6 /* number of timeouts */
+
+/*
+ * Define the token bucket filter structures
+ * tbftable -> each vif has one of these for storing info
+ */
+
+static struct tbf tbftable[MAXVIFS];
+#define TBF_REPROCESS (hz / 100) /* 100x / second */
+
+/*
+ * 'Interfaces' associated with decapsulator (so we can tell
+ * packets that went through it from ones that get reflected
+ * by a broken gateway). These interfaces are never linked into
+ * the system ifnet list & no routes point to them. I.e., packets
+ * can't be sent this way. They only exist as a placeholder for
+ * multicast source verification.
+ */
+static struct ifnet multicast_decap_if[MAXVIFS];
+
+#define ENCAP_TTL 64
+#define ENCAP_PROTO IPPROTO_IPIP /* 4 */
+
+/* prototype IP hdr for encapsulated packets */
+static struct ip multicast_encap_iphdr = {
+#if BYTE_ORDER == LITTLE_ENDIAN
+ sizeof(struct ip) >> 2, IPVERSION,
+#else
+ IPVERSION, sizeof(struct ip) >> 2,
+#endif
+ 0, /* tos */
+ sizeof(struct ip), /* total length */
+ 0, /* id */
+ 0, /* frag offset */
+ ENCAP_TTL, ENCAP_PROTO,
+ 0, /* checksum */
+};
+
+/*
+ * Private variables.
+ */
+static vifi_t numvifs = 0;
+static int have_encap_tunnel = 0;
+
+/*
+ * one-back cache used by ipip_input to locate a tunnel's vif
+ * given a datagram's src ip address.
+ */
+static u_long last_encap_src;
+static struct vif *last_encap_vif;
+
+static u_long X_ip_mcast_src(int vifi);
+static int X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, struct ip_moptions *imo);
+static int X_ip_mrouter_done(void);
+static int X_ip_mrouter_get(int cmd, struct socket *so, struct mbuf **m);
+static int X_ip_mrouter_set(int cmd, struct socket *so, struct mbuf *m);
+static int X_legal_vif_num(int vif);
+static int X_mrt_ioctl(int cmd, caddr_t data);
+
+static int get_sg_cnt(struct sioc_sg_req *);
+static int get_vif_cnt(struct sioc_vif_req *);
+static int ip_mrouter_init(struct socket *, struct mbuf *);
+static int add_vif(struct vifctl *);
+static int del_vif(vifi_t *);
+static int add_mfc(struct mfcctl *);
+static int del_mfc(struct mfcctl *);
+static int socket_send(struct socket *, struct mbuf *, struct sockaddr_in *);
+static int get_version(struct mbuf *);
+static int get_assert(struct mbuf *);
+static int set_assert(int *);
+static void expire_upcalls(void *);
+static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *,
+ vifi_t);
+static void phyint_send(struct ip *, struct vif *, struct mbuf *);
+static void encap_send(struct ip *, struct vif *, struct mbuf *);
+static void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long);
+static void tbf_queue(struct vif *, struct mbuf *);
+static void tbf_process_q(struct vif *);
+static void tbf_reprocess_q(void *);
+static int tbf_dq_sel(struct vif *, struct ip *);
+static void tbf_send_packet(struct vif *, struct mbuf *);
+static void tbf_update_tokens(struct vif *);
+static int priority(struct vif *, struct ip *);
+void multiencap_decap(struct mbuf *);
+
+/*
+ * whether or not special PIM assert processing is enabled.
+ */
+static int pim_assert;
+/*
+ * Rate limit for assert notification messages, in usec
+ */
+#define ASSERT_MSG_TIME 3000000
+
+/*
+ * Hash function for a source, group entry
+ */
+#define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \
+ ((g) >> 20) ^ ((g) >> 10) ^ (g))
+
+/*
+ * Find a route for a given origin IP address and Multicast group address
+ * Type of service parameter to be added in the future!!!
+ */
+
+#define MFCFIND(o, g, rt) { \
+ register struct mbuf *_mb_rt = mfctable[MFCHASH(o,g)]; \
+ register struct mfc *_rt = NULL; \
+ rt = NULL; \
+ ++mrtstat.mrts_mfc_lookups; \
+ while (_mb_rt) { \
+ _rt = mtod(_mb_rt, struct mfc *); \
+ if ((_rt->mfc_origin.s_addr == o) && \
+ (_rt->mfc_mcastgrp.s_addr == g) && \
+ (_mb_rt->m_act == NULL)) { \
+ rt = _rt; \
+ break; \
+ } \
+ _mb_rt = _mb_rt->m_next; \
+ } \
+ if (rt == NULL) { \
+ ++mrtstat.mrts_mfc_misses; \
+ } \
+}
+
+
+/*
+ * Macros to compute elapsed time efficiently
+ * Borrowed from Van Jacobson's scheduling code
+ */
+#define TV_DELTA(a, b, delta) { \
+ register int xxs; \
+ \
+ delta = (a).tv_usec - (b).tv_usec; \
+ if ((xxs = (a).tv_sec - (b).tv_sec)) { \
+ switch (xxs) { \
+ case 2: \
+ delta += 1000000; \
+ /* fall through */ \
+ case 1: \
+ delta += 1000000; \
+ break; \
+ default: \
+ delta += (1000000 * xxs); \
+ } \
+ } \
+}
+
+#define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \
+ (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)
+
+#ifdef UPCALL_TIMING
+u_long upcall_data[51];
+static void collate(struct timeval *);
+#endif /* UPCALL_TIMING */
+
+
+/*
+ * Handle MRT setsockopt commands to modify the multicast routing tables.
+ */
+static int
+X_ip_mrouter_set(int cmd, struct socket *so, struct mbuf *m)
+{
+ if (cmd != MRT_INIT && so != ip_mrouter) return EACCES;
+
+ switch (cmd) {
+ case MRT_INIT: return ip_mrouter_init(so, m);
+ case MRT_DONE: return ip_mrouter_done();
+ case MRT_ADD_VIF: return add_vif (mtod(m, struct vifctl *));
+ case MRT_DEL_VIF: return del_vif (mtod(m, vifi_t *));
+ case MRT_ADD_MFC: return add_mfc (mtod(m, struct mfcctl *));
+ case MRT_DEL_MFC: return del_mfc (mtod(m, struct mfcctl *));
+ case MRT_ASSERT: return set_assert(mtod(m, int *));
+ default: return EOPNOTSUPP;
+ }
+}
+
+#ifndef MROUTE_LKM
+int (*ip_mrouter_set)(int, struct socket *, struct mbuf *) = X_ip_mrouter_set;
+#endif
+
+/*
+ * Handle MRT getsockopt commands
+ */
+static int
+X_ip_mrouter_get(int cmd, struct socket *so, struct mbuf **m)
+{
+ struct mbuf *mb;
+
+ if (so != ip_mrouter) return EACCES;
+
+ *m = mb = m_get(M_WAIT, MT_SOOPTS);
+
+ switch (cmd) {
+ case MRT_VERSION: return get_version(mb);
+ case MRT_ASSERT: return get_assert(mb);
+ default: return EOPNOTSUPP;
+ }
+}
+
+#ifndef MROUTE_LKM
+int (*ip_mrouter_get)(int, struct socket *, struct mbuf **) = X_ip_mrouter_get;
+#endif
+
+/*
+ * Handle ioctl commands to obtain information from the cache
+ */
+static int
+X_mrt_ioctl(int cmd, caddr_t data)
+{
+ int error = 0;
+
+ switch (cmd) {
+ case (SIOCGETVIFCNT):
+ return (get_vif_cnt((struct sioc_vif_req *)data));
+ break;
+ case (SIOCGETSGCNT):
+ return (get_sg_cnt((struct sioc_sg_req *)data));
+ break;
+ default:
+ return (EINVAL);
+ break;
+ }
+ return error;
+}
+
+#ifndef MROUTE_LKM
+int (*mrt_ioctl)(int, caddr_t) = X_mrt_ioctl;
+#endif
+
+/*
+ * returns the packet, byte, rpf-failure count for the source group provided
+ */
+static int
+get_sg_cnt(struct sioc_sg_req *req)
+{
+ register struct mfc *rt;
+ int s;
+
+ s = splnet();
+ MFCFIND(req->src.s_addr, req->grp.s_addr, rt);
+ splx(s);
+ if (rt != NULL) {
+ req->pktcnt = rt->mfc_pkt_cnt;
+ req->bytecnt = rt->mfc_byte_cnt;
+ req->wrong_if = rt->mfc_wrong_if;
+ } else
+ req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
+
+ return 0;
+}
+
+/*
+ * returns the input and output packet and byte counts on the vif provided
+ */
+static int
+get_vif_cnt(struct sioc_vif_req *req)
+{
+ register vifi_t vifi = req->vifi;
+
+ if (vifi >= numvifs) return EINVAL;
+
+ req->icount = viftable[vifi].v_pkt_in;
+ req->ocount = viftable[vifi].v_pkt_out;
+ req->ibytes = viftable[vifi].v_bytes_in;
+ req->obytes = viftable[vifi].v_bytes_out;
+
+ return 0;
+}
+
+/*
+ * Enable multicast routing
+ */
+static int
+ip_mrouter_init(struct socket *so, struct mbuf *m)
+{
+ int *v;
+
+ if (mrtdebug)
+ log(LOG_DEBUG,"ip_mrouter_init: so_type = %d, pr_protocol = %d\n",
+ so->so_type, so->so_proto->pr_protocol);
+
+ if (so->so_type != SOCK_RAW ||
+ so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP;
+
+ if (!m || (m->m_len != sizeof(int *)))
+ return ENOPROTOOPT;
+
+ v = mtod(m, int *);
+ if (*v != 1)
+ return ENOPROTOOPT;
+
+ if (ip_mrouter != NULL) return EADDRINUSE;
+
+ ip_mrouter = so;
+
+ bzero((caddr_t)mfctable, sizeof(mfctable));
+ bzero((caddr_t)nexpire, sizeof(nexpire));
+
+ pim_assert = 0;
+
+ timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT);
+
+ if (mrtdebug)
+ log(LOG_DEBUG, "ip_mrouter_init\n");
+
+ return 0;
+}
+
+/*
+ * Disable multicast routing
+ */
+static int
+X_ip_mrouter_done(void)
+{
+ vifi_t vifi;
+ int i;
+ struct ifnet *ifp;
+ struct ifreq ifr;
+ struct mbuf *mb_rt;
+ struct mbuf *m;
+ struct rtdetq *rte;
+ int s;
+
+ s = splnet();
+
+ /*
+ * For each phyint in use, disable promiscuous reception of all IP
+ * multicasts.
+ */
+ for (vifi = 0; vifi < numvifs; vifi++) {
+ if (viftable[vifi].v_lcl_addr.s_addr != 0 &&
+ !(viftable[vifi].v_flags & VIFF_TUNNEL)) {
+ ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
+ ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr
+ = INADDR_ANY;
+ ifp = viftable[vifi].v_ifp;
+ (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
+ }
+ }
+ bzero((caddr_t)tbftable, sizeof(tbftable));
+ bzero((caddr_t)viftable, sizeof(viftable));
+ numvifs = 0;
+ pim_assert = 0;
+
+ untimeout(expire_upcalls, (caddr_t)NULL);
+
+ /*
+ * Free all multicast forwarding cache entries.
+ */
+ for (i = 0; i < MFCTBLSIZ; i++) {
+ mb_rt = mfctable[i];
+ while (mb_rt) {
+ if (mb_rt->m_act != NULL) {
+ while (mb_rt->m_act) {
+ m = mb_rt->m_act;
+ mb_rt->m_act = m->m_act;
+ rte = mtod(m, struct rtdetq *);
+ m_freem(rte->m);
+ m_free(m);
+ }
+ }
+ mb_rt = m_free(mb_rt);
+ }
+ }
+
+ bzero((caddr_t)mfctable, sizeof(mfctable));
+
+ /*
+ * Reset de-encapsulation cache
+ */
+ last_encap_src = 0;
+ last_encap_vif = NULL;
+ have_encap_tunnel = 0;
+
+ ip_mrouter = NULL;
+
+ splx(s);
+
+ if (mrtdebug)
+ log(LOG_DEBUG, "ip_mrouter_done\n");
+
+ return 0;
+}
+
+#ifndef MROUTE_LKM
+int (*ip_mrouter_done)(void) = X_ip_mrouter_done;
+#endif
+
+static int
+get_version(struct mbuf *mb)
+{
+ int *v;
+
+ v = mtod(mb, int *);
+
+ *v = 0x0305; /* XXX !!!! */
+ mb->m_len = sizeof(int);
+
+ return 0;
+}
+
+/*
+ * Set PIM assert processing global
+ */
+static int
+set_assert(int *i)
+{
+ if ((*i != 1) && (*i != 0))
+ return EINVAL;
+
+ pim_assert = *i;
+
+ return 0;
+}
+
+/*
+ * Get PIM assert processing global
+ */
+static int
+get_assert(struct mbuf *m)
+{
+ int *i;
+
+ i = mtod(m, int *);
+
+ *i = pim_assert;
+
+ return 0;
+}
+
+/*
+ * Add a vif to the vif table
+ */
+static int
+add_vif(struct vifctl *vifcp)
+{
+ register struct vif *vifp = viftable + vifcp->vifc_vifi;
+ static struct sockaddr_in sin = {sizeof sin, AF_INET};
+ struct ifaddr *ifa;
+ struct ifnet *ifp;
+ struct ifreq ifr;
+ int error, s;
+ struct tbf *v_tbf = tbftable + vifcp->vifc_vifi;
+
+ if (vifcp->vifc_vifi >= MAXVIFS) return EINVAL;
+ if (vifp->v_lcl_addr.s_addr != 0) return EADDRINUSE;
+
+ /* Find the interface with an address in AF_INET family */
+ sin.sin_addr = vifcp->vifc_lcl_addr;
+ ifa = ifa_ifwithaddr((struct sockaddr *)&sin);
+ if (ifa == 0) return EADDRNOTAVAIL;
+ ifp = ifa->ifa_ifp;
+
+ if (vifcp->vifc_flags & VIFF_TUNNEL) {
+ if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) {
+ /*
+ * An encapsulating tunnel is wanted. Tell ipip_input() to
+ * start paying attention to encapsulated packets.
+ */
+ if (have_encap_tunnel == 0) {
+ have_encap_tunnel = 1;
+ for (s = 0; s < MAXVIFS; ++s) {
+ multicast_decap_if[s].if_name = "mdecap";
+ multicast_decap_if[s].if_unit = s;
+ }
+ }
+ /*
+ * Set interface to fake encapsulator interface
+ */
+ ifp = &multicast_decap_if[vifcp->vifc_vifi];
+ /*
+ * Prepare cached route entry
+ */
+ bzero(&vifp->v_route, sizeof(vifp->v_route));
+ } else {
+ log(LOG_ERR, "source routed tunnels not supported\n");
+ return EOPNOTSUPP;
+ }
+ } else {
+ /* Make sure the interface supports multicast */
+ if ((ifp->if_flags & IFF_MULTICAST) == 0)
+ return EOPNOTSUPP;
+
+ /* Enable promiscuous reception of all IP multicasts from the if */
+ ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
+ ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY;
+ s = splnet();
+ error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr);
+ splx(s);
+ if (error)
+ return error;
+ }
+
+ s = splnet();
+ /* define parameters for the tbf structure */
+ vifp->v_tbf = v_tbf;
+ GET_TIME(vifp->v_tbf->tbf_last_pkt_t);
+ vifp->v_tbf->tbf_n_tok = 0;
+ vifp->v_tbf->tbf_q_len = 0;
+ vifp->v_tbf->tbf_max_q_len = MAXQSIZE;
+ vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL;
+
+ vifp->v_flags = vifcp->vifc_flags;
+ vifp->v_threshold = vifcp->vifc_threshold;
+ vifp->v_lcl_addr = vifcp->vifc_lcl_addr;
+ vifp->v_rmt_addr = vifcp->vifc_rmt_addr;
+ vifp->v_ifp = ifp;
+ /* scaling up here allows division by 1024 in critical code */
+ vifp->v_rate_limit= vifcp->vifc_rate_limit * 1024 / 1000;
+ vifp->v_rsvp_on = 0;
+ vifp->v_rsvpd = NULL;
+ /* initialize per vif pkt counters */
+ vifp->v_pkt_in = 0;
+ vifp->v_pkt_out = 0;
+ vifp->v_bytes_in = 0;
+ vifp->v_bytes_out = 0;
+ splx(s);
+
+ /* Adjust numvifs up if the vifi is higher than numvifs */
+ if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1;
+
+ if (mrtdebug)
+ log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d\n",
+ vifcp->vifc_vifi,
+ ntohl(vifcp->vifc_lcl_addr.s_addr),
+ (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask",
+ ntohl(vifcp->vifc_rmt_addr.s_addr),
+ vifcp->vifc_threshold,
+ vifcp->vifc_rate_limit);
+
+ return 0;
+}
+
+/*
+ * Delete a vif from the vif table
+ */
+static int
+del_vif(vifi_t *vifip)
+{
+ register struct vif *vifp = viftable + *vifip;
+ register vifi_t vifi;
+ register struct mbuf *m;
+ struct ifnet *ifp;
+ struct ifreq ifr;
+ int s;
+
+ if (*vifip >= numvifs) return EINVAL;
+ if (vifp->v_lcl_addr.s_addr == 0) return EADDRNOTAVAIL;
+
+ s = splnet();
+
+ if (!(vifp->v_flags & VIFF_TUNNEL)) {
+ ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
+ ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY;
+ ifp = vifp->v_ifp;
+ (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
+ }
+
+ if (vifp == last_encap_vif) {
+ last_encap_vif = 0;
+ last_encap_src = 0;
+ }
+
+ /*
+ * Free packets queued at the interface
+ */
+ while (vifp->v_tbf->tbf_q) {
+ m = vifp->v_tbf->tbf_q;
+ vifp->v_tbf->tbf_q = m->m_act;
+ m_freem(m);
+ }
+
+ bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf)));
+ bzero((caddr_t)vifp, sizeof (*vifp));
+
+ /* Adjust numvifs down */
+ for (vifi = numvifs; vifi > 0; vifi--)
+ if (viftable[vifi-1].v_lcl_addr.s_addr != 0) break;
+ numvifs = vifi;
+
+ splx(s);
+
+ if (mrtdebug)
+ log(LOG_DEBUG, "del_vif %d, numvifs %d\n", *vifip, numvifs);
+
+ return 0;
+}
+
+/*
+ * Add an mfc entry
+ */
+static int
+add_mfc(struct mfcctl *mfccp)
+{
+ struct mfc *rt;
+ register struct mbuf *mb_rt;
+ u_long hash;
+ struct mbuf *mb_ntry;
+ struct rtdetq *rte;
+ register u_short nstl;
+ int s;
+ int i;
+
+ MFCFIND(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr, rt);
+
+ /* If an entry already exists, just update the fields */
+ if (rt) {
+ if (mrtdebug & DEBUG_MFC)
+ log(LOG_DEBUG,"add_mfc update o %x g %x p %x\n",
+ ntohl(mfccp->mfcc_origin.s_addr),
+ ntohl(mfccp->mfcc_mcastgrp.s_addr),
+ mfccp->mfcc_parent);
+
+ s = splnet();
+ rt->mfc_parent = mfccp->mfcc_parent;
+ for (i = 0; i < numvifs; i++)
+ rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
+ splx(s);
+ return 0;
+ }
+
+ /*
+ * Find the entry for which the upcall was made and update
+ */
+ s = splnet();
+ hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);
+ for (mb_rt = mfctable[hash], nstl = 0; mb_rt; mb_rt = mb_rt->m_next) {
+
+ rt = mtod(mb_rt, struct mfc *);
+ if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
+ (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) &&
+ (mb_rt->m_act != NULL)) {
+
+ if (nstl++)
+ log(LOG_ERR, "add_mfc %s o %x g %x p %x dbx %x\n",
+ "multiple kernel entries",
+ ntohl(mfccp->mfcc_origin.s_addr),
+ ntohl(mfccp->mfcc_mcastgrp.s_addr),
+ mfccp->mfcc_parent, mb_rt->m_act);
+
+ if (mrtdebug & DEBUG_MFC)
+ log(LOG_DEBUG,"add_mfc o %x g %x p %x dbg %x\n",
+ ntohl(mfccp->mfcc_origin.s_addr),
+ ntohl(mfccp->mfcc_mcastgrp.s_addr),
+ mfccp->mfcc_parent, mb_rt->m_act);
+
+ rt->mfc_origin = mfccp->mfcc_origin;
+ rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp;
+ rt->mfc_parent = mfccp->mfcc_parent;
+ for (i = 0; i < numvifs; i++)
+ rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
+ /* initialize pkt counters per src-grp */
+ rt->mfc_pkt_cnt = 0;
+ rt->mfc_byte_cnt = 0;
+ rt->mfc_wrong_if = 0;
+ rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
+
+ rt->mfc_expire = 0; /* Don't clean this guy up */
+ nexpire[hash]--;
+
+ /* free packets Qed at the end of this entry */
+ while (mb_rt->m_act) {
+ mb_ntry = mb_rt->m_act;
+ rte = mtod(mb_ntry, struct rtdetq *);
+/* #ifdef RSVP_ISI */
+ ip_mdq(rte->m, rte->ifp, rt, -1);
+/* #endif */
+ mb_rt->m_act = mb_ntry->m_act;
+ m_freem(rte->m);
+#ifdef UPCALL_TIMING
+ collate(&(rte->t));
+#endif /* UPCALL_TIMING */
+ m_free(mb_ntry);
+ }
+ }
+ }
+
+ /*
+ * It is possible that an entry is being inserted without an upcall
+ */
+ if (nstl == 0) {
+ if (mrtdebug & DEBUG_MFC)
+ log(LOG_DEBUG,"add_mfc no upcall h %d o %x g %x p %x\n",
+ hash, ntohl(mfccp->mfcc_origin.s_addr),
+ ntohl(mfccp->mfcc_mcastgrp.s_addr),
+ mfccp->mfcc_parent);
+
+ for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) {
+
+ rt = mtod(mb_rt, struct mfc *);
+ if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
+ (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) {
+
+ rt->mfc_origin = mfccp->mfcc_origin;
+ rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp;
+ rt->mfc_parent = mfccp->mfcc_parent;
+ for (i = 0; i < numvifs; i++)
+ rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
+ /* initialize pkt counters per src-grp */
+ rt->mfc_pkt_cnt = 0;
+ rt->mfc_byte_cnt = 0;
+ rt->mfc_wrong_if = 0;
+ rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
+ if (rt->mfc_expire)
+ nexpire[hash]--;
+ rt->mfc_expire = 0;
+ }
+ }
+ if (mb_rt == NULL) {
+ /* no upcall, so make a new entry */
+ MGET(mb_rt, M_DONTWAIT, MT_MRTABLE);
+ if (mb_rt == NULL) {
+ splx(s);
+ return ENOBUFS;
+ }
+
+ rt = mtod(mb_rt, struct mfc *);
+
+ /* insert new entry at head of hash chain */
+ rt->mfc_origin = mfccp->mfcc_origin;
+ rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp;
+ rt->mfc_parent = mfccp->mfcc_parent;
+ for (i = 0; i < numvifs; i++)
+ rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
+ /* initialize pkt counters per src-grp */
+ rt->mfc_pkt_cnt = 0;
+ rt->mfc_byte_cnt = 0;
+ rt->mfc_wrong_if = 0;
+ rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
+ rt->mfc_expire = 0;
+
+ /* link into table */
+ mb_rt->m_next = mfctable[hash];
+ mfctable[hash] = mb_rt;
+ mb_rt->m_act = NULL;
+ }
+ }
+ splx(s);
+ return 0;
+}
+
+#ifdef UPCALL_TIMING
+/*
+ * collect delay statistics on the upcalls
+ */
+static void collate(struct timeval *t)
+{
+ register u_long d;
+ register struct timeval tp;
+ register u_long delta;
+
+ GET_TIME(tp);
+
+ if (TV_LT(*t, tp))
+ {
+ TV_DELTA(tp, *t, delta);
+
+ d = delta >> 10;
+ if (d > 50)
+ d = 50;
+
+ ++upcall_data[d];
+ }
+}
+#endif /* UPCALL_TIMING */
+
+/*
+ * Delete an mfc entry
+ */
+static int
+del_mfc(struct mfcctl *mfccp)
+{
+ struct in_addr origin;
+ struct in_addr mcastgrp;
+ struct mfc *rt;
+ struct mbuf *mb_rt;
+ struct mbuf **nptr;
+ u_long hash;
+ int s;
+
+ origin = mfccp->mfcc_origin;
+ mcastgrp = mfccp->mfcc_mcastgrp;
+ hash = MFCHASH(origin.s_addr, mcastgrp.s_addr);
+
+ if (mrtdebug & DEBUG_MFC)
+ log(LOG_DEBUG,"del_mfc orig %x mcastgrp %x\n",
+ ntohl(origin.s_addr), ntohl(mcastgrp.s_addr));
+
+ s = splnet();
+
+ nptr = &mfctable[hash];
+ while ((mb_rt = *nptr) != NULL) {
+ rt = mtod(mb_rt, struct mfc *);
+ if (origin.s_addr == rt->mfc_origin.s_addr &&
+ mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr &&
+ mb_rt->m_act == NULL)
+ break;
+
+ nptr = &mb_rt->m_next;
+ }
+ if (mb_rt == NULL) {
+ splx(s);
+ return EADDRNOTAVAIL;
+ }
+
+ MFREE(mb_rt, *nptr);
+
+ splx(s);
+
+ return 0;
+}
+
+/*
+ * Send a message to mrouted on the multicast routing socket
+ */
+static int
+socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src)
+{
+ if (s) {
+ if (sbappendaddr(&s->so_rcv,
+ (struct sockaddr *)src,
+ mm, (struct mbuf *)0) != 0) {
+ sorwakeup(s);
+ return 0;
+ }
+ }
+ m_freem(mm);
+ return -1;
+}
+
+/*
+ * IP multicast forwarding function. This function assumes that the packet
+ * pointed to by "ip" has arrived on (or is about to be sent to) the interface
+ * pointed to by "ifp", and the packet is to be relayed to other networks
+ * that have members of the packet's destination IP multicast group.
+ *
+ * The packet is returned unscathed to the caller, unless it is
+ * erroneous, in which case a non-zero return value tells the caller to
+ * discard it.
+ */
+
+#define IP_HDR_LEN 20 /* # bytes of fixed IP header (excluding options) */
+#define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */
+
+static int
+X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m,
+ struct ip_moptions *imo)
+{
+ register struct mfc *rt;
+ register u_char *ipoptions;
+ static struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
+ static int srctun = 0;
+ register struct mbuf *mm;
+ int s;
+ vifi_t vifi;
+ struct vif *vifp;
+
+ if (mrtdebug & DEBUG_FORWARD)
+ log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %x\n",
+ ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp);
+
+ if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 ||
+ (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) {
+ /*
+ * Packet arrived via a physical interface or
+ * an encapsulated tunnel.
+ */
+ } else {
+ /*
+ * Packet arrived through a source-route tunnel.
+ * Source-route tunnels are no longer supported.
+ */
+ if ((srctun++ % 1000) == 0)
+ log(LOG_ERR, "ip_mforward: received source-routed packet from %x\n",
+ ntohl(ip->ip_src.s_addr));
+
+ return 1;
+ }
+
+ if ((imo) && ((vifi = imo->imo_multicast_vif) < numvifs)) {
+ if (ip->ip_ttl < 255)
+ ip->ip_ttl++; /* compensate for -1 in *_send routines */
+ if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
+ vifp = viftable + vifi;
+ printf("Sending IPPROTO_RSVP from %lx to %lx on vif %d (%s%s%d)\n",
+ ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), vifi,
+ (vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "",
+ vifp->v_ifp->if_name, vifp->v_ifp->if_unit);
+ }
+ return (ip_mdq(m, ifp, NULL, vifi));
+ }
+ if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
+ printf("Warning: IPPROTO_RSVP from %lx to %lx without vif option\n",
+ ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr));
+ if(!imo)
+ printf("In fact, no options were specified at all\n");
+ }
+
+ /*
+ * Don't forward a packet with time-to-live of zero or one,
+ * or a packet destined to a local-only group.
+ */
+ if (ip->ip_ttl <= 1 ||
+ ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP)
+ return 0;
+
+ /*
+ * Determine forwarding vifs from the forwarding cache table
+ */
+ s = splnet();
+ MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt);
+
+ /* Entry exists, so forward if necessary */
+ if (rt != NULL) {
+ splx(s);
+ return (ip_mdq(m, ifp, rt, -1));
+ } else {
+ /*
+ * If we don't have a route for packet's origin,
+ * Make a copy of the packet &
+ * send message to routing daemon
+ */
+
+ register struct mbuf *mb_rt;
+ register struct mbuf *mb_ntry;
+ register struct mbuf *mb0;
+ register struct rtdetq *rte;
+ register struct mbuf *rte_m;
+ register u_long hash;
+ register int npkts;
+ int hlen = ip->ip_hl << 2;
+#ifdef UPCALL_TIMING
+ struct timeval tp;
+
+ GET_TIME(tp);
+#endif
+
+ mrtstat.mrts_no_route++;
+ if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC))
+ log(LOG_DEBUG, "ip_mforward: no rte s %x g %x\n",
+ ntohl(ip->ip_src.s_addr),
+ ntohl(ip->ip_dst.s_addr));
+
+ /*
+ * Allocate mbufs early so that we don't do extra work if we are
+ * just going to fail anyway. Make sure to pullup the header so
+ * that other people can't step on it.
+ */
+ MGET(mb_ntry, M_DONTWAIT, MT_DATA);
+ if (mb_ntry == NULL) {
+ splx(s);
+ return ENOBUFS;
+ }
+ mb0 = m_copy(m, 0, M_COPYALL);
+ if (mb0 && (M_HASCL(mb0) || mb0->m_len < hlen))
+ mb0 = m_pullup(mb0, hlen);
+ if (mb0 == NULL) {
+ m_free(mb_ntry);
+ splx(s);
+ return ENOBUFS;
+ }
+
+ /* is there an upcall waiting for this packet? */
+ hash = MFCHASH(ip->ip_src.s_addr, ip->ip_dst.s_addr);
+ for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) {
+ rt = mtod(mb_rt, struct mfc *);
+ if ((ip->ip_src.s_addr == rt->mfc_origin.s_addr) &&
+ (ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) &&
+ (mb_rt->m_act != NULL))
+ break;
+ }
+
+ if (mb_rt == NULL) {
+ int i;
+ struct igmpmsg *im;
+
+ /* no upcall, so make a new entry */
+ MGET(mb_rt, M_DONTWAIT, MT_MRTABLE);
+ if (mb_rt == NULL) {
+ m_free(mb_ntry);
+ m_freem(mb0);
+ splx(s);
+ return ENOBUFS;
+ }
+ /* Make a copy of the header to send to the user level process */
+ mm = m_copy(mb0, 0, hlen);
+ if (mm == NULL) {
+ m_free(mb_ntry);
+ m_freem(mb0);
+ m_free(mb_rt);
+ splx(s);
+ return ENOBUFS;
+ }
+
+ /*
+ * Send message to routing daemon to install
+ * a route into the kernel table
+ */
+ k_igmpsrc.sin_addr = ip->ip_src;
+
+ im = mtod(mm, struct igmpmsg *);
+ im->im_msgtype = IGMPMSG_NOCACHE;
+ im->im_mbz = 0;
+
+ mrtstat.mrts_upcalls++;
+
+ if (socket_send(ip_mrouter, mm, &k_igmpsrc) < 0) {
+ log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full\n");
+ ++mrtstat.mrts_upq_sockfull;
+ m_free(mb_ntry);
+ m_freem(mb0);
+ m_free(mb_rt);
+ splx(s);
+ return ENOBUFS;
+ }
+
+ rt = mtod(mb_rt, struct mfc *);
+
+ /* insert new entry at head of hash chain */
+ rt->mfc_origin.s_addr = ip->ip_src.s_addr;
+ rt->mfc_mcastgrp.s_addr = ip->ip_dst.s_addr;
+ rt->mfc_expire = UPCALL_EXPIRE;
+ nexpire[hash]++;
+ for (i = 0; i < numvifs; i++)
+ rt->mfc_ttls[i] = 0;
+ rt->mfc_parent = -1;
+
+ /* link into table */
+ mb_rt->m_next = mfctable[hash];
+ mfctable[hash] = mb_rt;
+ mb_rt->m_act = NULL;
+
+ rte_m = mb_rt;
+ } else {
+ /* determine if q has overflowed */
+ for (rte_m = mb_rt, npkts = 0; rte_m->m_act; rte_m = rte_m->m_act)
+ npkts++;
+
+ if (npkts > MAX_UPQ) {
+ mrtstat.mrts_upq_ovflw++;
+ m_free(mb_ntry);
+ m_freem(mb0);
+ splx(s);
+ return 0;
+ }
+ }
+
+ mb_ntry->m_act = NULL;
+ rte = mtod(mb_ntry, struct rtdetq *);
+
+ rte->m = mb0;
+ rte->ifp = ifp;
+#ifdef UPCALL_TIMING
+ rte->t = tp;
+#endif
+
+ /* Add this entry to the end of the queue */
+ rte_m->m_act = mb_ntry;
+
+ splx(s);
+
+ return 0;
+ }
+}
+
+#ifndef MROUTE_LKM
+int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
+ struct ip_moptions *) = X_ip_mforward;
+#endif
+
+/*
+ * Clean up the cache entry if upcall is not serviced
+ */
+static void
+expire_upcalls(void *unused)
+{
+ struct mbuf *mb_rt, *m, **nptr;
+ struct rtdetq *rte;
+ struct mfc *mfc;
+ int i;
+ int s;
+
+ s = splnet();
+ for (i = 0; i < MFCTBLSIZ; i++) {
+ if (nexpire[i] == 0)
+ continue;
+ nptr = &mfctable[i];
+ for (mb_rt = *nptr; mb_rt != NULL; mb_rt = *nptr) {
+ mfc = mtod(mb_rt, struct mfc *);
+
+ /*
+ * Skip real cache entries
+ * Make sure it wasn't marked to not expire (shouldn't happen)
+ * If it expires now
+ */
+ if (mb_rt->m_act != NULL &&
+ mfc->mfc_expire != 0 &&
+ --mfc->mfc_expire == 0) {
+ if (mrtdebug & DEBUG_EXPIRE)
+ log(LOG_DEBUG, "expire_upcalls: expiring (%x %x)\n",
+ ntohl(mfc->mfc_origin.s_addr),
+ ntohl(mfc->mfc_mcastgrp.s_addr));
+ /*
+ * drop all the packets
+ * free the mbuf with the pkt, if, timing info
+ */
+ while (mb_rt->m_act) {
+ m = mb_rt->m_act;
+ mb_rt->m_act = m->m_act;
+
+ rte = mtod(m, struct rtdetq *);
+ m_freem(rte->m);
+ m_free(m);
+ }
+ ++mrtstat.mrts_cache_cleanups;
+ nexpire[i]--;
+
+ MFREE(mb_rt, *nptr);
+ } else {
+ nptr = &mb_rt->m_next;
+ }
+ }
+ }
+ splx(s);
+ timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT);
+}
+
+/*
+ * Packet forwarding routine once entry in the cache is made
+ */
+static int
+ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt,
+ vifi_t xmt_vif)
+{
+ register struct ip *ip = mtod(m, struct ip *);
+ register vifi_t vifi;
+ register struct vif *vifp;
+ register int plen = ntohs(ip->ip_len);
+
+/*
+ * Macro to send packet on vif. Since RSVP packets don't get counted on
+ * input, they shouldn't get counted on output, so statistics keeping is
+ * seperate.
+ */
+#define MC_SEND(ip,vifp,m) { \
+ if ((vifp)->v_flags & VIFF_TUNNEL) \
+ encap_send((ip), (vifp), (m)); \
+ else \
+ phyint_send((ip), (vifp), (m)); \
+}
+
+ /*
+ * If xmt_vif is not -1, send on only the requested vif.
+ *
+ * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.)
+ */
+ if (xmt_vif < numvifs) {
+ MC_SEND(ip, viftable + xmt_vif, m);
+ return 1;
+ }
+
+ /*
+ * Don't forward if it didn't arrive from the parent vif for its origin.
+ */
+ vifi = rt->mfc_parent;
+ if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) {
+ /* came in the wrong interface */
+ if (mrtdebug & DEBUG_FORWARD)
+ log(LOG_DEBUG, "wrong if: ifp %x vifi %d vififp %x\n",
+ ifp, vifi, viftable[vifi].v_ifp);
+ ++mrtstat.mrts_wrong_if;
+ ++rt->mfc_wrong_if;
+ /*
+ * If we are doing PIM assert processing, and we are forwarding
+ * packets on this interface, and it is a broadcast medium
+ * interface (and not a tunnel), send a message to the routing daemon.
+ */
+ if (pim_assert && rt->mfc_ttls[vifi] &&
+ (ifp->if_flags & IFF_BROADCAST) &&
+ !(viftable[vifi].v_flags & VIFF_TUNNEL)) {
+ struct sockaddr_in k_igmpsrc;
+ struct mbuf *mm;
+ struct igmpmsg *im;
+ int hlen = ip->ip_hl << 2;
+ struct timeval now;
+ register u_long delta;
+
+ GET_TIME(now);
+
+ TV_DELTA(rt->mfc_last_assert, now, delta);
+
+ if (delta > ASSERT_MSG_TIME) {
+ mm = m_copy(m, 0, hlen);
+ if (mm && (M_HASCL(mm) || mm->m_len < hlen))
+ mm = m_pullup(mm, hlen);
+ if (mm == NULL) {
+ return ENOBUFS;
+ }
+
+ rt->mfc_last_assert = now;
+
+ im = mtod(mm, struct igmpmsg *);
+ im->im_msgtype = IGMPMSG_WRONGVIF;
+ im->im_mbz = 0;
+ im->im_vif = vifi;
+
+ k_igmpsrc.sin_addr = im->im_src;
+
+ socket_send(ip_mrouter, mm, &k_igmpsrc);
+ }
+ }
+ return 0;
+ }
+
+ /* If I sourced this packet, it counts as output, else it was input. */
+ if (ip->ip_src.s_addr == viftable[vifi].v_lcl_addr.s_addr) {
+ viftable[vifi].v_pkt_out++;
+ viftable[vifi].v_bytes_out += plen;
+ } else {
+ viftable[vifi].v_pkt_in++;
+ viftable[vifi].v_bytes_in += plen;
+ }
+ rt->mfc_pkt_cnt++;
+ rt->mfc_byte_cnt += plen;
+
+ /*
+ * For each vif, decide if a copy of the packet should be forwarded.
+ * Forward if:
+ * - the ttl exceeds the vif's threshold
+ * - there are group members downstream on interface
+ */
+ for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++)
+ if ((rt->mfc_ttls[vifi] > 0) &&
+ (ip->ip_ttl > rt->mfc_ttls[vifi])) {
+ vifp->v_pkt_out++;
+ vifp->v_bytes_out += plen;
+ MC_SEND(ip, vifp, m);
+ }
+
+ return 0;
+}
+
+/*
+ * check if a vif number is legal/ok. This is used by ip_output, to export
+ * numvifs there,
+ */
+static int
+X_legal_vif_num(int vif)
+{
+ if (vif >= 0 && vif < numvifs)
+ return(1);
+ else
+ return(0);
+}
+
+#ifndef MROUTE_LKM
+int (*legal_vif_num)(int) = X_legal_vif_num;
+#endif
+
+/*
+ * Return the local address used by this vif
+ */
+static u_long
+X_ip_mcast_src(int vifi)
+{
+ if (vifi >= 0 && vifi < numvifs)
+ return viftable[vifi].v_lcl_addr.s_addr;
+ else
+ return INADDR_ANY;
+}
+
+#ifndef MROUTE_LKM
+u_long (*ip_mcast_src)(int) = X_ip_mcast_src;
+#endif
+
+static void
+phyint_send(struct ip *ip, struct vif *vifp, struct mbuf *m)
+{
+ register struct mbuf *mb_copy;
+ register int hlen = ip->ip_hl << 2;
+
+ /*
+ * Make a new reference to the packet; make sure that
+ * the IP header is actually copied, not just referenced,
+ * so that ip_output() only scribbles on the copy.
+ */
+ mb_copy = m_copy(m, 0, M_COPYALL);
+ if (mb_copy && (M_HASCL(mb_copy) || mb_copy->m_len < hlen))
+ mb_copy = m_pullup(mb_copy, hlen);
+ if (mb_copy == NULL)
+ return;
+
+ if (vifp->v_rate_limit <= 0)
+ tbf_send_packet(vifp, mb_copy);
+ else
+ tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len);
+}
+
+static void
+encap_send(struct ip *ip, struct vif *vifp, struct mbuf *m)
+{
+ register struct mbuf *mb_copy;
+ register struct ip *ip_copy;
+ register int i, len = ip->ip_len;
+
+ /*
+ * copy the old packet & pullup it's IP header into the
+ * new mbuf so we can modify it. Try to fill the new
+ * mbuf since if we don't the ethernet driver will.
+ */
+ MGETHDR(mb_copy, M_DONTWAIT, MT_HEADER);
+ if (mb_copy == NULL)
+ return;
+ mb_copy->m_data += max_linkhdr;
+ mb_copy->m_len = sizeof(multicast_encap_iphdr);
+
+ if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) {
+ m_freem(mb_copy);
+ return;
+ }
+ i = MHLEN - M_LEADINGSPACE(mb_copy);
+ if (i > len)
+ i = len;
+ mb_copy = m_pullup(mb_copy, i);
+ if (mb_copy == NULL)
+ return;
+ mb_copy->m_pkthdr.len = len + sizeof(multicast_encap_iphdr);
+
+ /*
+ * fill in the encapsulating IP header.
+ */
+ ip_copy = mtod(mb_copy, struct ip *);
+ *ip_copy = multicast_encap_iphdr;
+ ip_copy->ip_id = htons(ip_id++);
+ ip_copy->ip_len += len;
+ ip_copy->ip_src = vifp->v_lcl_addr;
+ ip_copy->ip_dst = vifp->v_rmt_addr;
+
+ /*
+ * turn the encapsulated IP header back into a valid one.
+ */
+ ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr));
+ --ip->ip_ttl;
+ HTONS(ip->ip_len);
+ HTONS(ip->ip_off);
+ ip->ip_sum = 0;
+ mb_copy->m_data += sizeof(multicast_encap_iphdr);
+ ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
+ mb_copy->m_data -= sizeof(multicast_encap_iphdr);
+
+ if (vifp->v_rate_limit <= 0)
+ tbf_send_packet(vifp, mb_copy);
+ else
+ tbf_control(vifp, mb_copy, ip, ip_copy->ip_len);
+}
+
+/*
+ * De-encapsulate a packet and feed it back through ip input (this
+ * routine is called whenever IP gets a packet with proto type
+ * ENCAP_PROTO and a local destination address).
+ */
+void
+#ifdef MROUTE_LKM
+X_ipip_input(struct mbuf *m, int iphlen)
+#else
+ipip_input(struct mbuf *m, int iphlen)
+#endif
+{
+ struct ifnet *ifp = m->m_pkthdr.rcvif;
+ register struct ip *ip = mtod(m, struct ip *);
+ register int hlen = ip->ip_hl << 2;
+ register int s;
+ register struct ifqueue *ifq;
+ register struct vif *vifp;
+
+ if (!have_encap_tunnel) {
+ rip_input(m, iphlen);
+ return;
+ }
+ /*
+ * dump the packet if it's not to a multicast destination or if
+ * we don't have an encapsulating tunnel with the source.
+ * Note: This code assumes that the remote site IP address
+ * uniquely identifies the tunnel (i.e., that this site has
+ * at most one tunnel with the remote site).
+ */
+ if (! IN_MULTICAST(ntohl(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr))) {
+ ++mrtstat.mrts_bad_tunnel;
+ m_freem(m);
+ return;
+ }
+ if (ip->ip_src.s_addr != last_encap_src) {
+ register struct vif *vife;
+
+ vifp = viftable;
+ vife = vifp + numvifs;
+ last_encap_src = ip->ip_src.s_addr;
+ last_encap_vif = 0;
+ for ( ; vifp < vife; ++vifp)
+ if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) {
+ if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT))
+ == VIFF_TUNNEL)
+ last_encap_vif = vifp;
+ break;
+ }
+ }
+ if ((vifp = last_encap_vif) == 0) {
+ last_encap_src = 0;
+ mrtstat.mrts_cant_tunnel++; /*XXX*/
+ m_freem(m);
+ if (mrtdebug)
+ log(LOG_DEBUG, "ip_mforward: no tunnel with %x\n",
+ ntohl(ip->ip_src.s_addr));
+ return;
+ }
+ ifp = vifp->v_ifp;
+
+ if (hlen > IP_HDR_LEN)
+ ip_stripoptions(m, (struct mbuf *) 0);
+ m->m_data += IP_HDR_LEN;
+ m->m_len -= IP_HDR_LEN;
+ m->m_pkthdr.len -= IP_HDR_LEN;
+ m->m_pkthdr.rcvif = ifp;
+
+ ifq = &ipintrq;
+ s = splimp();
+ if (IF_QFULL(ifq)) {
+ IF_DROP(ifq);
+ m_freem(m);
+ } else {
+ IF_ENQUEUE(ifq, m);
+ /*
+ * normally we would need a "schednetisr(NETISR_IP)"
+ * here but we were called by ip_input and it is going
+ * to loop back & try to dequeue the packet we just
+ * queued as soon as we return so we avoid the
+ * unnecessary software interrrupt.
+ */
+ }
+ splx(s);
+}
+
+/*
+ * Token bucket filter module
+ */
+
+static void
+tbf_control(struct vif *vifp, struct mbuf *m, struct ip *ip, u_long p_len)
+{
+ register struct tbf *t = vifp->v_tbf;
+
+ if (p_len > MAX_BKT_SIZE) {
+ /* drop if packet is too large */
+ mrtstat.mrts_pkt2large++;
+ m_freem(m);
+ return;
+ }
+
+ tbf_update_tokens(vifp);
+
+ /* if there are enough tokens,
+ * and the queue is empty,
+ * send this packet out
+ */
+
+ if (t->tbf_q_len == 0) {
+ /* queue empty, send packet if enough tokens */
+ if (p_len <= t->tbf_n_tok) {
+ t->tbf_n_tok -= p_len;
+ tbf_send_packet(vifp, m);
+ } else {
+ /* queue packet and timeout till later */
+ tbf_queue(vifp, m);
+ timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS);
+ }
+ } else if (t->tbf_q_len < t->tbf_max_q_len) {
+ /* finite queue length, so queue pkts and process queue */
+ tbf_queue(vifp, m);
+ tbf_process_q(vifp);
+ } else {
+ /* queue length too much, try to dq and queue and process */
+ if (!tbf_dq_sel(vifp, ip)) {
+ mrtstat.mrts_q_overflow++;
+ m_freem(m);
+ return;
+ } else {
+ tbf_queue(vifp, m);
+ tbf_process_q(vifp);
+ }
+ }
+ return;
+}
+
+/*
+ * adds a packet to the queue at the interface
+ */
+static void
+tbf_queue(struct vif *vifp, struct mbuf *m)
+{
+ register int s = splnet();
+ register struct tbf *t = vifp->v_tbf;
+
+ if (t->tbf_t == NULL) {
+ /* Queue was empty */
+ t->tbf_q = m;
+ } else {
+ /* Insert at tail */
+ t->tbf_t->m_act = m;
+ }
+
+ /* Set new tail pointer */
+ t->tbf_t = m;
+
+#ifdef DIAGNOSTIC
+ /* Make sure we didn't get fed a bogus mbuf */
+ if (m->m_act)
+ panic("tbf_queue: m_act");
+#endif
+ m->m_act = NULL;
+
+ t->tbf_q_len++;
+
+ splx(s);
+}
+
+
+/*
+ * processes the queue at the interface
+ */
+static void
+tbf_process_q(struct vif *vifp)
+{
+ register struct mbuf *m;
+ register int len;
+ register int s = splnet();
+ register struct tbf *t = vifp->v_tbf;
+
+ /* loop through the queue at the interface and send as many packets
+ * as possible
+ */
+ while (t->tbf_q_len > 0) {
+ m = t->tbf_q;
+
+ len = mtod(m, struct ip *)->ip_len;
+
+ /* determine if the packet can be sent */
+ if (len <= t->tbf_n_tok) {
+ /* if so,
+ * reduce no of tokens, dequeue the packet,
+ * send the packet.
+ */
+ t->tbf_n_tok -= len;
+
+ t->tbf_q = m->m_act;
+ if (--t->tbf_q_len == 0)
+ t->tbf_t = NULL;
+
+ m->m_act = NULL;
+ tbf_send_packet(vifp, m);
+
+ } else break;
+ }
+ splx(s);
+}
+
+static void
+tbf_reprocess_q(void *xvifp)
+{
+ register struct vif *vifp = xvifp;
+ if (ip_mrouter == NULL)
+ return;
+
+ tbf_update_tokens(vifp);
+
+ tbf_process_q(vifp);
+
+ if (vifp->v_tbf->tbf_q_len)
+ timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS);
+}
+
+/* function that will selectively discard a member of the queue
+ * based on the precedence value and the priority
+ */
+static int
+tbf_dq_sel(struct vif *vifp, struct ip *ip)
+{
+ register int s = splnet();
+ register u_int p;
+ register struct mbuf *m, *last;
+ register struct mbuf **np;
+ register struct tbf *t = vifp->v_tbf;
+
+ p = priority(vifp, ip);
+
+ np = &t->tbf_q;
+ last = NULL;
+ while ((m = *np) != NULL) {
+ if (p > priority(vifp, mtod(m, struct ip *))) {
+ *np = m->m_act;
+ /* If we're removing the last packet, fix the tail pointer */
+ if (m == t->tbf_t)
+ t->tbf_t = last;
+ m_freem(m);
+ /* it's impossible for the queue to be empty, but
+ * we check anyway. */
+ if (--t->tbf_q_len == 0)
+ t->tbf_t = NULL;
+ splx(s);
+ mrtstat.mrts_drop_sel++;
+ return(1);
+ }
+ np = &m->m_act;
+ last = m;
+ }
+ splx(s);
+ return(0);
+}
+
+static void
+tbf_send_packet(struct vif *vifp, struct mbuf *m)
+{
+ struct ip_moptions imo;
+ int error;
+ static struct route ro;
+ int s = splnet();
+
+ if (vifp->v_flags & VIFF_TUNNEL) {
+ /* If tunnel options */
+ ip_output(m, (struct mbuf *)0, &vifp->v_route,
+ IP_FORWARDING, (struct ip_moptions *)0);
+ } else {
+ imo.imo_multicast_ifp = vifp->v_ifp;
+ imo.imo_multicast_ttl = mtod(m, struct ip *)->ip_ttl - 1;
+ imo.imo_multicast_loop = 1;
+ imo.imo_multicast_vif = -1;
+
+ /*
+ * Re-entrancy should not be a problem here, because
+ * the packets that we send out and are looped back at us
+ * should get rejected because they appear to come from
+ * the loopback interface, thus preventing looping.
+ */
+ error = ip_output(m, (struct mbuf *)0, &ro,
+ IP_FORWARDING, &imo);
+
+ if (mrtdebug & DEBUG_XMIT)
+ log(LOG_DEBUG, "phyint_send on vif %d err %d\n",
+ vifp - viftable, error);
+ }
+ splx(s);
+}
+
+/* determine the current time and then
+ * the elapsed time (between the last time and time now)
+ * in milliseconds & update the no. of tokens in the bucket
+ */
+static void
+tbf_update_tokens(struct vif *vifp)
+{
+ struct timeval tp;
+ register u_long tm;
+ register int s = splnet();
+ register struct tbf *t = vifp->v_tbf;
+
+ GET_TIME(tp);
+
+ TV_DELTA(tp, t->tbf_last_pkt_t, tm);
+
+ /*
+ * This formula is actually
+ * "time in seconds" * "bytes/second".
+ *
+ * (tm / 1000000) * (v_rate_limit * 1000 * (1000/1024) / 8)
+ *
+ * The (1000/1024) was introduced in add_vif to optimize
+ * this divide into a shift.
+ */
+ t->tbf_n_tok += tm * vifp->v_rate_limit / 1024 / 8;
+ t->tbf_last_pkt_t = tp;
+
+ if (t->tbf_n_tok > MAX_BKT_SIZE)
+ t->tbf_n_tok = MAX_BKT_SIZE;
+
+ splx(s);
+}
+
+static int
+priority(struct vif *vifp, struct ip *ip)
+{
+ register int prio;
+
+ /* temporary hack; may add general packet classifier some day */
+
+ /*
+ * The UDP port space is divided up into four priority ranges:
+ * [0, 16384) : unclassified - lowest priority
+ * [16384, 32768) : audio - highest priority
+ * [32768, 49152) : whiteboard - medium priority
+ * [49152, 65536) : video - low priority
+ */
+ if (ip->ip_p == IPPROTO_UDP) {
+ struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2));
+ switch (ntohs(udp->uh_dport) & 0xc000) {
+ case 0x4000:
+ prio = 70;
+ break;
+ case 0x8000:
+ prio = 60;
+ break;
+ case 0xc000:
+ prio = 55;
+ break;
+ default:
+ prio = 50;
+ break;
+ }
+ if (tbfdebug > 1)
+ log(LOG_DEBUG, "port %x prio%d\n", ntohs(udp->uh_dport), prio);
+ } else {
+ prio = 50;
+ }
+ return prio;
+}
+
+/*
+ * End of token bucket filter modifications
+ */
+
+int
+ip_rsvp_vif_init(struct socket *so, struct mbuf *m)
+{
+ int i;
+ register int s;
+
+ if (rsvpdebug)
+ printf("ip_rsvp_vif_init: so_type = %d, pr_protocol = %d\n",
+ so->so_type, so->so_proto->pr_protocol);
+
+ if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
+ return EOPNOTSUPP;
+
+ /* Check mbuf. */
+ if (m == NULL || m->m_len != sizeof(int)) {
+ return EINVAL;
+ }
+ i = *(mtod(m, int *));
+
+ if (rsvpdebug)
+ printf("ip_rsvp_vif_init: vif = %d rsvp_on = %d\n",i,rsvp_on);
+
+ s = splnet();
+
+ /* Check vif. */
+ if (!legal_vif_num(i)) {
+ splx(s);
+ return EADDRNOTAVAIL;
+ }
+
+ /* Check if socket is available. */
+ if (viftable[i].v_rsvpd != NULL) {
+ splx(s);
+ return EADDRINUSE;
+ }
+
+ viftable[i].v_rsvpd = so;
+ /* This may seem silly, but we need to be sure we don't over-increment
+ * the RSVP counter, in case something slips up.
+ */
+ if (!viftable[i].v_rsvp_on) {
+ viftable[i].v_rsvp_on = 1;
+ rsvp_on++;
+ }
+
+ splx(s);
+ return 0;
+}
+
+int
+ip_rsvp_vif_done(struct socket *so, struct mbuf *m)
+{
+ int i;
+ register int s;
+
+ if (rsvpdebug)
+ printf("ip_rsvp_vif_done: so_type = %d, pr_protocol = %d\n",
+ so->so_type, so->so_proto->pr_protocol);
+
+ if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
+ return EOPNOTSUPP;
+
+ /* Check mbuf. */
+ if (m == NULL || m->m_len != sizeof(int)) {
+ return EINVAL;
+ }
+ i = *(mtod(m, int *));
+
+ s = splnet();
+
+ /* Check vif. */
+ if (!legal_vif_num(i)) {
+ splx(s);
+ return EADDRNOTAVAIL;
+ }
+
+ if (rsvpdebug)
+ printf("ip_rsvp_vif_done: v_rsvpd = %p so = %p\n",
+ viftable[i].v_rsvpd, so);
+
+ viftable[i].v_rsvpd = NULL;
+ /* This may seem silly, but we need to be sure we don't over-decrement
+ * the RSVP counter, in case something slips up.
+ */
+ if (viftable[i].v_rsvp_on) {
+ viftable[i].v_rsvp_on = 0;
+ rsvp_on--;
+ }
+
+ splx(s);
+ return 0;
+}
+
+void
+ip_rsvp_force_done(struct socket *so)
+{
+ int vifi;
+ register int s;
+
+ /* Don't bother if it is not the right type of socket. */
+ if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
+ return;
+
+ s = splnet();
+
+ /* The socket may be attached to more than one vif...this
+ * is perfectly legal.
+ */
+ for (vifi = 0; vifi < numvifs; vifi++) {
+ if (viftable[vifi].v_rsvpd == so) {
+ viftable[vifi].v_rsvpd = NULL;
+ /* This may seem silly, but we need to be sure we don't
+ * over-decrement the RSVP counter, in case something slips up.
+ */
+ if (viftable[vifi].v_rsvp_on) {
+ viftable[vifi].v_rsvp_on = 0;
+ rsvp_on--;
+ }
+ }
+ }
+
+ splx(s);
+ return;
+}
+
+void
+rsvp_input(struct mbuf *m, int iphlen)
+{
+ int vifi;
+ register struct ip *ip = mtod(m, struct ip *);
+ static struct sockaddr_in rsvp_src = { sizeof rsvp_src, AF_INET };
+ register int s;
+ struct ifnet *ifp;
+
+ if (rsvpdebug)
+ printf("rsvp_input: rsvp_on %d\n",rsvp_on);
+
+ /* Can still get packets with rsvp_on = 0 if there is a local member
+ * of the group to which the RSVP packet is addressed. But in this
+ * case we want to throw the packet away.
+ */
+ if (!rsvp_on) {
+ m_freem(m);
+ return;
+ }
+
+ /* If the old-style non-vif-associated socket is set, then use
+ * it and ignore the new ones.
+ */
+ if (ip_rsvpd != NULL) {
+ if (rsvpdebug)
+ printf("rsvp_input: Sending packet up old-style socket\n");
+ rip_input(m, iphlen);
+ return;
+ }
+
+ s = splnet();
+
+ if (rsvpdebug)
+ printf("rsvp_input: check vifs\n");
+
+#ifdef DIAGNOSTIC
+ if (!(m->m_flags & M_PKTHDR))
+ panic("rsvp_input no hdr");
+#endif
+
+ ifp = m->m_pkthdr.rcvif;
+ /* Find which vif the packet arrived on. */
+ for (vifi = 0; vifi < numvifs; vifi++) {
+ if (viftable[vifi].v_ifp == ifp)
+ break;
+ }
+
+ if (vifi == numvifs) {
+ /* Can't find vif packet arrived on. Drop packet. */
+ if (rsvpdebug)
+ printf("rsvp_input: Can't find vif for packet...dropping it.\n");
+ m_freem(m);
+ splx(s);
+ return;
+ }
+
+ if (rsvpdebug)
+ printf("rsvp_input: check socket\n");
+
+ if (viftable[vifi].v_rsvpd == NULL) {
+ /* drop packet, since there is no specific socket for this
+ * interface */
+ if (rsvpdebug)
+ printf("rsvp_input: No socket defined for vif %d\n",vifi);
+ m_freem(m);
+ splx(s);
+ return;
+ }
+ rsvp_src.sin_addr = ip->ip_src;
+
+ if (rsvpdebug && m)
+ printf("rsvp_input: m->m_len = %d, sbspace() = %ld\n",
+ m->m_len,sbspace(&(viftable[vifi].v_rsvpd->so_rcv)));
+
+ if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0)
+ if (rsvpdebug)
+ printf("rsvp_input: Failed to append to socket\n");
+ else
+ if (rsvpdebug)
+ printf("rsvp_input: send packet up\n");
+
+ splx(s);
+}
+
+#ifdef MROUTE_LKM
+#include <sys/conf.h>
+#include <sys/exec.h>
+#include <sys/sysent.h>
+#include <sys/lkm.h>
+
+MOD_MISC("ip_mroute_mod")
+
+static int
+ip_mroute_mod_handle(struct lkm_table *lkmtp, int cmd)
+{
+ int i;
+ struct lkm_misc *args = lkmtp->private.lkm_misc;
+ int err = 0;
+
+ switch(cmd) {
+ static int (*old_ip_mrouter_cmd)();
+ static int (*old_ip_mrouter_done)();
+ static int (*old_ip_mforward)();
+ static int (*old_mrt_ioctl)();
+ static void (*old_proto4_input)();
+ static int (*old_legal_vif_num)();
+ extern struct protosw inetsw[];
+
+ case LKM_E_LOAD:
+ if(lkmexists(lkmtp) || ip_mrtproto)
+ return(EEXIST);
+ old_ip_mrouter_cmd = ip_mrouter_cmd;
+ ip_mrouter_cmd = X_ip_mrouter_cmd;
+ old_ip_mrouter_done = ip_mrouter_done;
+ ip_mrouter_done = X_ip_mrouter_done;
+ old_ip_mforward = ip_mforward;
+ ip_mforward = X_ip_mforward;
+ old_mrt_ioctl = mrt_ioctl;
+ mrt_ioctl = X_mrt_ioctl;
+ old_proto4_input = inetsw[ip_protox[ENCAP_PROTO]].pr_input;
+ inetsw[ip_protox[ENCAP_PROTO]].pr_input = X_ipip_input;
+ old_legal_vif_num = legal_vif_num;
+ legal_vif_num = X_legal_vif_num;
+ ip_mrtproto = IGMP_DVMRP;
+
+ printf("\nIP multicast routing loaded\n");
+ break;
+
+ case LKM_E_UNLOAD:
+ if (ip_mrouter)
+ return EINVAL;
+
+ ip_mrouter_cmd = old_ip_mrouter_cmd;
+ ip_mrouter_done = old_ip_mrouter_done;
+ ip_mforward = old_ip_mforward;
+ mrt_ioctl = old_mrt_ioctl;
+ inetsw[ip_protox[ENCAP_PROTO]].pr_input = old_proto4_input;
+ legal_vif_num = old_legal_vif_num;
+ ip_mrtproto = 0;
+ break;
+
+ default:
+ err = EINVAL;
+ break;
+ }
+
+ return(err);
+}
+
+int
+ip_mroute_mod(struct lkm_table *lkmtp, int cmd, int ver) {
+ DISPATCH(lkmtp, cmd, ver, ip_mroute_mod_handle, ip_mroute_mod_handle,
+ nosys);
+}
+
+#endif /* MROUTE_LKM */
+#endif /* MROUTING */
diff --git a/netinet/ip_mroute.h b/netinet/ip_mroute.h
new file mode 100644
index 0000000..9a28574
--- /dev/null
+++ b/netinet/ip_mroute.h
@@ -0,0 +1,271 @@
+/*
+ * Copyright (c) 1989 Stephen Deering.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Stephen Deering of Stanford University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ip_mroute.h 8.1 (Berkeley) 6/10/93
+ * $FreeBSD: src/sys/netinet/ip_mroute.h,v 1.31 2007/02/08 23:05:08 bms Exp $
+ */
+
+
+#ifndef _NETINET_IP_MROUTE_H_
+#define _NETINET_IP_MROUTE_H_
+
+#include <netinet/in.h> /* struct in_addr */
+#include <net/route.h> /* struct route */
+
+/*
+ * Definitions for IP multicast forwarding.
+ *
+ * Written by David Waitzman, BBN Labs, August 1988.
+ * Modified by Steve Deering, Stanford, February 1989.
+ * Modified by Ajit Thyagarajan, PARC, August 1993.
+ * Modified by Ajit Thyagarajan, PARC, August 1994.
+ * Modified by Ahmed Helmy, SGI, June 1996.
+ * Modified by Pavlin Radoslavov, ICSI, October 2002.
+ *
+ * MROUTING Revision: 3.3.1.3
+ * and PIM-SMv2 and PIM-DM support, advanced API support,
+ * bandwidth metering and signaling.
+ */
+
+
+/*
+ * Multicast Routing set/getsockopt commands.
+ */
+#define MRT_INIT 100 /* initialize forwarder */
+#define MRT_DONE 101 /* shut down forwarder */
+#define MRT_ADD_VIF 102 /* create virtual interface */
+#define MRT_DEL_VIF 103 /* delete virtual interface */
+#define MRT_ADD_MFC 104 /* insert forwarding cache entry */
+#define MRT_DEL_MFC 105 /* delete forwarding cache entry */
+#define MRT_VERSION 106 /* get kernel version number */
+#define MRT_ASSERT 107 /* enable assert processing */
+#define MRT_PIM MRT_ASSERT /* enable PIM processing */
+#define MRT_API_SUPPORT 109 /* supported MRT API */
+#define MRT_API_CONFIG 110 /* config MRT API */
+#define MRT_ADD_BW_UPCALL 111 /* create bandwidth monitor */
+#define MRT_DEL_BW_UPCALL 112 /* delete bandwidth monitor */
+
+
+#define GET_TIME(t) microtime(&t)
+
+/*
+ * Types and macros for handling bitmaps with one bit per virtual interface.
+ */
+#define MAXVIFS 32
+typedef u_long vifbitmap_t;
+typedef u_short vifi_t; /* type of a vif index */
+#define ALL_VIFS (vifi_t)-1
+
+#define VIFM_SET(n, m) ((m) |= (1 << (n)))
+#define VIFM_CLR(n, m) ((m) &= ~(1 << (n)))
+#define VIFM_ISSET(n, m) ((m) & (1 << (n)))
+#define VIFM_CLRALL(m) ((m) = 0x00000000)
+#define VIFM_COPY(mfrom, mto) ((mto) = (mfrom))
+#define VIFM_SAME(m1, m2) ((m1) == (m2))
+
+
+/*
+ * Argument structure for MRT_ADD_VIF.
+ * (MRT_DEL_VIF takes a single vifi_t argument.)
+ */
+struct vifctl {
+ vifi_t vifc_vifi; /* the index of the vif to be added */
+ u_char vifc_flags; /* VIFF_ flags defined below */
+ u_char vifc_threshold; /* min ttl required to forward on vif */
+ u_int vifc_rate_limit; /* max rate */
+ struct in_addr vifc_lcl_addr; /* local interface address */
+ struct in_addr vifc_rmt_addr; /* remote address (tunnels only) */
+};
+
+#define VIFF_TUNNEL 0x1 /* vif represents a tunnel end-point */
+#define VIFF_SRCRT 0x2 /* tunnel uses IP source routing */
+
+/*
+ * Argument structure for MRT_ADD_MFC and MRT_DEL_MFC
+ * (mfcc_tos to be added at a future point)
+ */
+struct mfcctl {
+ struct in_addr mfcc_origin; /* ip origin of mcasts */
+ struct in_addr mfcc_mcastgrp; /* multicast group associated*/
+ vifi_t mfcc_parent; /* incoming vif */
+ u_char mfcc_ttls[MAXVIFS]; /* forwarding ttls on vifs */
+};
+
+/*
+ * The kernel's multicast routing statistics.
+ */
+struct mrtstat {
+ u_long mrts_mfc_lookups; /* # forw. cache hash table hits */
+ u_long mrts_mfc_misses; /* # forw. cache hash table misses */
+ u_long mrts_upcalls; /* # calls to mrouted */
+ u_long mrts_no_route; /* no route for packet's origin */
+ u_long mrts_bad_tunnel; /* malformed tunnel options */
+ u_long mrts_cant_tunnel; /* no room for tunnel options */
+ u_long mrts_wrong_if; /* arrived on wrong interface */
+ u_long mrts_upq_ovflw; /* upcall Q overflow */
+ u_long mrts_cache_cleanups; /* # entries with no upcalls */
+ u_long mrts_drop_sel; /* pkts dropped selectively */
+ u_long mrts_q_overflow; /* pkts dropped - Q overflow */
+ u_long mrts_pkt2large; /* pkts dropped - size > BKT SIZE */
+ u_long mrts_upq_sockfull; /* upcalls dropped - socket full */
+};
+
+/*
+ * Argument structure used by mrouted to get src-grp pkt counts
+ */
+struct sioc_sg_req {
+ struct in_addr src;
+ struct in_addr grp;
+ u_long pktcnt;
+ u_long bytecnt;
+ u_long wrong_if;
+};
+
+/*
+ * Argument structure used by mrouted to get vif pkt counts
+ */
+struct sioc_vif_req {
+ vifi_t vifi; /* vif number */
+ u_long icount; /* Input packet count on vif */
+ u_long ocount; /* Output packet count on vif */
+ u_long ibytes; /* Input byte count on vif */
+ u_long obytes; /* Output byte count on vif */
+};
+
+
+/*
+ * The kernel's virtual-interface structure.
+ */
+struct vif {
+ u_char v_flags; /* VIFF_ flags defined above */
+ u_char v_threshold; /* min ttl required to forward on vif*/
+ u_int v_rate_limit; /* max rate */
+ struct tbf *v_tbf; /* token bucket structure at intf. */
+ struct in_addr v_lcl_addr; /* local interface address */
+ struct in_addr v_rmt_addr; /* remote address (tunnels only) */
+ struct ifnet *v_ifp; /* pointer to interface */
+ u_long v_pkt_in; /* # pkts in on interface */
+ u_long v_pkt_out; /* # pkts out on interface */
+ u_long v_bytes_in; /* # bytes in on interface */
+ u_long v_bytes_out; /* # bytes out on interface */
+ struct route v_route; /* cached route if this is a tunnel */
+ u_int v_rsvp_on; /* RSVP listening on this vif */
+ struct socket *v_rsvpd; /* RSVP daemon socket */
+};
+
+/*
+ * The kernel's multicast forwarding cache entry structure
+ * (A field for the type of service (mfc_tos) is to be added
+ * at a future point)
+ */
+struct mfc {
+ struct in_addr mfc_origin; /* IP origin of mcasts */
+ struct in_addr mfc_mcastgrp; /* multicast group associated*/
+ vifi_t mfc_parent; /* incoming vif */
+ u_char mfc_ttls[MAXVIFS]; /* forwarding ttls on vifs */
+ u_long mfc_pkt_cnt; /* pkt count for src-grp */
+ u_long mfc_byte_cnt; /* byte count for src-grp */
+ u_long mfc_wrong_if; /* wrong if for src-grp */
+ int mfc_expire; /* time to clean entry up */
+ struct timeval mfc_last_assert; /* last time I sent an assert*/
+};
+
+/*
+ * Struct used to communicate from kernel to multicast router
+ * note the convenient similarity to an IP packet
+ */
+struct igmpmsg {
+ u_long unused1;
+ u_long unused2;
+ u_char im_msgtype; /* what type of message */
+#define IGMPMSG_NOCACHE 1
+#define IGMPMSG_WRONGVIF 2
+ u_char im_mbz; /* must be zero */
+ u_char im_vif; /* vif rec'd on */
+ u_char unused3;
+ struct in_addr im_src, im_dst;
+};
+
+/*
+ * Argument structure used for pkt info. while upcall is made
+ */
+struct rtdetq {
+ struct mbuf *m; /* A copy of the packet */
+ struct ifnet *ifp; /* Interface pkt came in on */
+ vifi_t xmt_vif; /* Saved copy of imo_multicast_vif */
+#ifdef UPCALL_TIMING
+ struct timeval t; /* Timestamp */
+#endif /* UPCALL_TIMING */
+};
+
+#define MFCTBLSIZ 256
+#if (MFCTBLSIZ & (MFCTBLSIZ - 1)) == 0 /* from sys:route.h */
+#define MFCHASHMOD(h) ((h) & (MFCTBLSIZ - 1))
+#else
+#define MFCHASHMOD(h) ((h) % MFCTBLSIZ)
+#endif
+
+#define MAX_UPQ 4 /* max. no of pkts in upcall Q */
+
+/*
+ * Token Bucket filter code
+ */
+#define MAX_BKT_SIZE 10000 /* 10K bytes size */
+#define MAXQSIZE 10 /* max # of pkts in queue */
+
+/*
+ * the token bucket filter at each vif
+ */
+struct tbf
+{
+ struct timeval tbf_last_pkt_t; /* arr. time of last pkt */
+ u_long tbf_n_tok; /* no of tokens in bucket */
+ u_long tbf_q_len; /* length of queue at this vif */
+ u_long tbf_max_q_len; /* max. queue length */
+ struct mbuf *tbf_q; /* Packet queue */
+ struct mbuf *tbf_t; /* tail-insertion pointer */
+};
+
+#ifdef _KERNEL
+
+extern int (*ip_mrouter_set)(int, struct socket *, struct mbuf *);
+extern int (*ip_mrouter_get)(int, struct socket *, struct mbuf **);
+extern int (*ip_mrouter_done)(void);
+#ifdef MROUTING
+extern int (*mrt_ioctl)(int, caddr_t);
+#else
+extern int (*mrt_ioctl)(int, caddr_t, struct proc *);
+#endif
+
+#endif /* _KERNEL */
+
+#endif /* _NETINET_IP_MROUTE_H_ */
diff --git a/netinet/ip_output.c b/netinet/ip_output.c
new file mode 100644
index 0000000..5b1c497
--- /dev/null
+++ b/netinet/ip_output.c
@@ -0,0 +1,1336 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
+ * $FreeBSD: src/sys/netinet/ip_output.c,v 1.271 2007/03/23 09:43:36 bms Exp $
+ */
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#define _IP_VHL
+
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <errno.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <rtems/rtems_netinet_in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+
+#include <machine/in_cksum.h>
+
+#if !defined(COMPAT_IPFW) || COMPAT_IPFW == 1
+#undef COMPAT_IPFW
+#define COMPAT_IPFW 1
+#else
+#undef COMPAT_IPFW
+#endif
+
+u_short ip_id;
+
+static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
+static void ip_mloopback
+ (struct ifnet *, struct mbuf *, struct sockaddr_in *, int);
+static int ip_getmoptions
+ (int, struct ip_moptions *, struct mbuf **);
+static int ip_optcopy(struct ip *, struct ip *);
+static int ip_pcbopts(struct mbuf **, struct mbuf *);
+static int ip_setmoptions
+ (int, struct ip_moptions **, struct mbuf *);
+
+extern struct protosw inetsw[];
+
+/*
+ * IP output. The packet in mbuf chain m contains a skeletal IP
+ * header (with len, off, ttl, proto, tos, src, dst).
+ * The mbuf chain containing the packet will be freed.
+ * The mbuf opt, if present, will not be freed.
+ */
+int
+ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, int flags,
+ struct ip_moptions *imo)
+{
+ struct ip *ip, *mhip;
+ struct ifnet *ifp;
+ struct mbuf *m = m0;
+ int hlen = sizeof (struct ip);
+ int len = 0, off, error = 0;
+ struct sockaddr_in *dst;
+ struct in_ifaddr *ia;
+ int isbroadcast;
+
+#ifdef DIAGNOSTIC
+ if ((m->m_flags & M_PKTHDR) == 0)
+ panic("ip_output no HDR");
+ if (!ro)
+ panic("ip_output no route, proto = %d",
+ mtod(m, struct ip *)->ip_p);
+#endif
+ if (opt) {
+ m = ip_insertoptions(m, opt, &len);
+ hlen = len;
+ }
+ ip = mtod(m, struct ip *);
+ /*
+ * Fill in IP header.
+ */
+ if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
+#ifdef _IP_VHL
+ ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
+#else
+ ip->ip_v = IPVERSION;
+ ip->ip_hl = hlen >> 2;
+#endif
+ ip->ip_off &= IP_DF;
+ ip->ip_id = htons(ip_id++);
+ ipstat.ips_localout++;
+ } else {
+#ifdef _IP_VHL
+ hlen = IP_VHL_HL(ip->ip_vhl) << 2;
+#else
+ hlen = ip->ip_hl << 2;
+#endif
+ }
+
+ dst = (struct sockaddr_in *)&ro->ro_dst;
+ /*
+ * If there is a cached route,
+ * check that it is to the same destination
+ * and is still up. If not, free it and try again.
+ */
+ if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
+ dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
+ RTFREE(ro->ro_rt);
+ ro->ro_rt = (struct rtentry *)0;
+ }
+ if (ro->ro_rt == NULL) {
+ dst->sin_family = AF_INET;
+ dst->sin_len = sizeof(*dst);
+ dst->sin_addr = ip->ip_dst;
+ }
+ /*
+ * If routing to interface only,
+ * short circuit routing lookup.
+ */
+#define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
+#define sintosa(sin) ((struct sockaddr *)(sin))
+ if (flags & IP_ROUTETOIF) {
+ if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
+ (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
+ ipstat.ips_noroute++;
+ error = ENETUNREACH;
+ goto bad;
+ }
+ ifp = ia->ia_ifp;
+ ip->ip_ttl = 1;
+ isbroadcast = in_broadcast(dst->sin_addr, ifp);
+ } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
+ imo != NULL && imo->imo_multicast_ifp != NULL) {
+ /*
+ * Bypass the normal routing lookup for multicast
+ * packets if the interface is specified.
+ */
+ ifp = imo->imo_multicast_ifp;
+ IFP_TO_IA(ifp, ia);
+ isbroadcast = 0; /* fool gcc */
+ } else {
+ /*
+ * If this is the case, we probably don't want to allocate
+ * a protocol-cloned route since we didn't get one from the
+ * ULP. This lets TCP do its thing, while not burdening
+ * forwarding or ICMP with the overhead of cloning a route.
+ * Of course, we still want to do any cloning requested by
+ * the link layer, as this is probably required in all cases
+ * for correct operation (as it is for ARP).
+ */
+ if (ro->ro_rt == 0)
+ rtalloc_ign(ro, RTF_PRCLONING);
+ if (ro->ro_rt == 0) {
+ ipstat.ips_noroute++;
+ error = EHOSTUNREACH;
+ goto bad;
+ }
+ ia = ifatoia(ro->ro_rt->rt_ifa);
+ ifp = ro->ro_rt->rt_ifp;
+ ro->ro_rt->rt_use++;
+ if (ro->ro_rt->rt_flags & RTF_GATEWAY)
+ dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
+ if (ro->ro_rt->rt_flags & RTF_HOST)
+ isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
+ else
+ isbroadcast = in_broadcast(dst->sin_addr, ifp);
+ }
+ if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
+ struct in_multi *inm;
+
+ m->m_flags |= M_MCAST;
+ /*
+ * IP destination address is multicast. Make sure "dst"
+ * still points to the address in "ro". (It may have been
+ * changed to point to a gateway address, above.)
+ */
+ dst = (struct sockaddr_in *)&ro->ro_dst;
+ /*
+ * See if the caller provided any multicast options
+ */
+ if (imo != NULL) {
+ ip->ip_ttl = imo->imo_multicast_ttl;
+ if (imo->imo_multicast_ifp != NULL)
+ ifp = imo->imo_multicast_ifp;
+ if (imo->imo_multicast_vif != -1)
+ ip->ip_src.s_addr =
+ ip_mcast_src(imo->imo_multicast_vif);
+ } else
+ ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
+ /*
+ * Confirm that the outgoing interface supports multicast.
+ */
+ if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
+ if ((ifp->if_flags & IFF_MULTICAST) == 0) {
+ ipstat.ips_noroute++;
+ error = ENETUNREACH;
+ goto bad;
+ }
+ }
+ /*
+ * If source address not specified yet, use address
+ * of outgoing interface.
+ */
+ if (ip->ip_src.s_addr == INADDR_ANY) {
+ register struct in_ifaddr *ia;
+
+ for (ia = in_ifaddr; ia; ia = ia->ia_next)
+ if (ia->ia_ifp == ifp) {
+ ip->ip_src = IA_SIN(ia)->sin_addr;
+ break;
+ }
+ }
+
+ IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
+ if (inm != NULL &&
+ (imo == NULL || imo->imo_multicast_loop)) {
+ /*
+ * If we belong to the destination multicast group
+ * on the outgoing interface, and the caller did not
+ * forbid loopback, loop back a copy.
+ */
+ ip_mloopback(ifp, m, dst, hlen);
+ }
+ else {
+ /*
+ * If we are acting as a multicast router, perform
+ * multicast forwarding as if the packet had just
+ * arrived on the interface to which we are about
+ * to send. The multicast forwarding function
+ * recursively calls this function, using the
+ * IP_FORWARDING flag to prevent infinite recursion.
+ *
+ * Multicasts that are looped back by ip_mloopback(),
+ * above, will be forwarded by the ip_input() routine,
+ * if necessary.
+ */
+ if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
+ /*
+ * Check if rsvp daemon is running. If not, don't
+ * set ip_moptions. This ensures that the packet
+ * is multicast and not just sent down one link
+ * as prescribed by rsvpd.
+ */
+ if (!rsvp_on)
+ imo = NULL;
+ if (ip_mforward(ip, ifp, m, imo) != 0) {
+ m_freem(m);
+ goto done;
+ }
+ }
+ }
+
+ /*
+ * Multicasts with a time-to-live of zero may be looped-
+ * back, above, but must not be transmitted on a network.
+ * Also, multicasts addressed to the loopback interface
+ * are not sent -- the above call to ip_mloopback() will
+ * loop back a copy if this host actually belongs to the
+ * destination group on the loopback interface.
+ */
+ if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
+ m_freem(m);
+ goto done;
+ }
+
+ goto sendit;
+ }
+#ifndef notdef
+ /*
+ * If source address not specified yet, use address
+ * of outgoing interface.
+ */
+ if (ip->ip_src.s_addr == INADDR_ANY)
+ ip->ip_src = IA_SIN(ia)->sin_addr;
+#endif
+ /*
+ * Verify that we have any chance at all of being able to queue
+ * the packet or packet fragments
+ */
+ if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
+ ifp->if_snd.ifq_maxlen) {
+ error = ENOBUFS;
+ goto bad;
+ }
+
+ /*
+ * Look for broadcast address and
+ * verify user is allowed to send
+ * such a packet.
+ */
+ if (isbroadcast) {
+ if ((ifp->if_flags & IFF_BROADCAST) == 0) {
+ error = EADDRNOTAVAIL;
+ goto bad;
+ }
+ if ((flags & IP_ALLOWBROADCAST) == 0) {
+ error = EACCES;
+ goto bad;
+ }
+ /* don't allow broadcast messages to be fragmented */
+ if (ip->ip_len > ifp->if_mtu) {
+ error = EMSGSIZE;
+ goto bad;
+ }
+ m->m_flags |= M_BCAST;
+ } else {
+ m->m_flags &= ~M_BCAST;
+ }
+
+sendit:
+ /*
+ * IpHack's section.
+ * - Xlate: translate packet's addr/port (NAT).
+ * - Firewall: deny/allow/etc.
+ * - Wrap: fake packet's addr/port <unimpl.>
+ * - Encapsulate: put it in another IP and send out. <unimp.>
+ */
+
+#ifdef COMPAT_IPFW
+ if (ip_nat_ptr && !(*ip_nat_ptr)(&ip, &m, ifp, IP_NAT_OUT)) {
+ error = EACCES;
+ goto done;
+ }
+
+ /*
+ * Check with the firewall...
+ */
+ if (ip_fw_chk_ptr) {
+#ifdef IPDIVERT
+ ip_divert_port = (*ip_fw_chk_ptr)(&ip,
+ hlen, ifp, ip_divert_ignore, &m);
+ ip_divert_ignore = 0;
+ if (ip_divert_port) { /* Divert packet */
+ (*inetsw[ip_protox[IPPROTO_DIVERT]].pr_input)(m, 0);
+ goto done;
+ }
+#else
+ /* If ipfw says divert, we have to just drop packet */
+ if ((*ip_fw_chk_ptr)(&ip, hlen, ifp, 0, &m)) {
+ m_freem(m);
+ goto done;
+ }
+#endif
+ if (!m) {
+ error = EACCES;
+ goto done;
+ }
+ }
+#endif /* COMPAT_IPFW */
+
+ /*
+ * If small enough for interface, or the interface will take
+ * care of the fragmentation for us, we can just send directly.
+ */
+ if ((u_short)ip->ip_len <= ifp->if_mtu) {
+ ip->ip_len = htons(ip->ip_len);
+ ip->ip_off = htons(ip->ip_off);
+ ip->ip_sum = 0;
+#ifdef _IP_VHL
+ if (ip->ip_vhl == IP_VHL_BORING) {
+#else
+ if ((ip->ip_hl == 5) && (ip->ip_v = IPVERSION)) {
+#endif
+ ip->ip_sum = in_cksum_hdr(ip);
+ } else {
+ ip->ip_sum = in_cksum(m, hlen);
+ }
+ error = (*ifp->if_output)(ifp, m,
+ (struct sockaddr *)dst, ro->ro_rt);
+ goto done;
+ }
+ /*
+ * Too large for interface; fragment if possible.
+ * Must be able to put at least 8 bytes per fragment.
+ */
+ if (ip->ip_off & IP_DF) {
+ error = EMSGSIZE;
+ /*
+ * This case can happen if the user changed the MTU
+ * of an interface after enabling IP on it. Because
+ * most netifs don't keep track of routes pointing to
+ * them, there is no way for one to update all its
+ * routes when the MTU is changed.
+ */
+ if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
+ && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
+ && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
+ ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
+ }
+ ipstat.ips_cantfrag++;
+ goto bad;
+ }
+ len = (ifp->if_mtu - hlen) &~ 7;
+ if (len < 8) {
+ error = EMSGSIZE;
+ goto bad;
+ }
+
+ {
+ int mhlen, firstlen = len;
+ struct mbuf **mnext = &m->m_nextpkt;
+
+ /*
+ * Loop through length of segment after first fragment,
+ * make new header and copy data of each part and link onto chain.
+ */
+ m0 = m;
+ mhlen = sizeof (struct ip);
+ for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
+ MGETHDR(m, M_DONTWAIT, MT_HEADER);
+ if (m == 0) {
+ error = ENOBUFS;
+ ipstat.ips_odropped++;
+ goto sendorfree;
+ }
+ m->m_flags |= (m0->m_flags & M_MCAST);
+ m->m_data += max_linkhdr;
+ mhip = mtod(m, struct ip *);
+ *mhip = *ip;
+ if (hlen > sizeof (struct ip)) {
+ mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
+#ifdef _IP_VHL
+ mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
+#else
+ mhip->ip_v = IPVERSION;
+ mhip->ip_hl = mhlen >> 2;
+#endif
+ }
+ m->m_len = mhlen;
+ mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
+ if (ip->ip_off & IP_MF)
+ mhip->ip_off |= IP_MF;
+ if (off + len >= (u_short)ip->ip_len)
+ len = (u_short)ip->ip_len - off;
+ else
+ mhip->ip_off |= IP_MF;
+ mhip->ip_len = htons((u_short)(len + mhlen));
+ m->m_next = m_copy(m0, off, len);
+ if (m->m_next == 0) {
+ (void) m_free(m);
+ error = ENOBUFS; /* ??? */
+ ipstat.ips_odropped++;
+ goto sendorfree;
+ }
+ m->m_pkthdr.len = mhlen + len;
+ m->m_pkthdr.rcvif = NULL;
+ mhip->ip_off = htons(mhip->ip_off);
+ mhip->ip_sum = 0;
+#ifdef _IP_VHL
+ if (mhip->ip_vhl == IP_VHL_BORING) {
+#else
+ if ((mhip->ip_hl == 5) && (mhip->ip_v == IPVERSION) ) {
+#endif
+ mhip->ip_sum = in_cksum_hdr(mhip);
+ } else {
+ mhip->ip_sum = in_cksum(m, mhlen);
+ }
+ *mnext = m;
+ mnext = &m->m_nextpkt;
+ ipstat.ips_ofragments++;
+ }
+ /*
+ * Update first fragment by trimming what's been copied out
+ * and updating header, then send each fragment (in order).
+ */
+ m = m0;
+ m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
+ m->m_pkthdr.len = hlen + firstlen;
+ ip->ip_len = htons((u_short)m->m_pkthdr.len);
+ ip->ip_off |= IP_MF;
+ ip->ip_off = htons(ip->ip_off);
+ ip->ip_sum = 0;
+#ifdef _IP_VHL
+ if (ip->ip_vhl == IP_VHL_BORING) {
+#else
+ if ((ip->ip_hl == 5) && (ip->ip_v == IPVERSION) ) {
+#endif
+ ip->ip_sum = in_cksum_hdr(ip);
+ } else {
+ ip->ip_sum = in_cksum(m, hlen);
+ }
+sendorfree:
+ for (m = m0; m; m = m0) {
+ m0 = m->m_nextpkt;
+ m->m_nextpkt = 0;
+ if (error == 0)
+ error = (*ifp->if_output)(ifp, m,
+ (struct sockaddr *)dst, ro->ro_rt);
+ else
+ m_freem(m);
+ }
+
+ if (error == 0)
+ ipstat.ips_fragmented++;
+ }
+done:
+ return (error);
+bad:
+ m_freem(m0);
+ goto done;
+}
+
+/*
+ * Insert IP options into preformed packet.
+ * Adjust IP destination as required for IP source routing,
+ * as indicated by a non-zero in_addr at the start of the options.
+ *
+ * XXX This routine assumes that the packet has no options in place.
+ */
+static struct mbuf *
+ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen)
+{
+ register struct ipoption *p = mtod(opt, struct ipoption *);
+ struct mbuf *n;
+ register struct ip *ip = mtod(m, struct ip *);
+ uint32_t optlen;
+
+ optlen = opt->m_len - sizeof(p->ipopt_dst);
+ if (optlen + ip->ip_len > IP_MAXPACKET)
+ return (m); /* XXX should fail */
+ if (p->ipopt_dst.s_addr)
+ ip->ip_dst = p->ipopt_dst;
+ if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
+ MGETHDR(n, M_DONTWAIT, MT_HEADER);
+ if (n == 0)
+ return (m);
+ n->m_pkthdr.len = m->m_pkthdr.len + optlen;
+ m->m_len -= sizeof(struct ip);
+ m->m_data += sizeof(struct ip);
+ n->m_next = m;
+ m = n;
+ m->m_len = optlen + sizeof(struct ip);
+ m->m_data += max_linkhdr;
+ (void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
+ } else {
+ m->m_data -= optlen;
+ m->m_len += optlen;
+ m->m_pkthdr.len += optlen;
+ ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
+ }
+ ip = mtod(m, struct ip *);
+ bcopy(p->ipopt_list, ip + 1, optlen);
+ *phlen = sizeof(struct ip) + optlen;
+#ifdef _IP_VHL
+ ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
+#else
+ ip->ip_v = IPVERSION;
+ ip->ip_hl = *phlen >> 2;
+#endif
+ ip->ip_len += optlen;
+ return (m);
+}
+
+/*
+ * Copy options from ip to jp,
+ * omitting those not copied during fragmentation.
+ */
+static int
+ip_optcopy(struct ip *ip, struct ip *jp)
+{
+ register u_char *cp, *dp;
+ int opt, optlen, cnt;
+
+ cp = (u_char *)(ip + 1);
+ dp = (u_char *)(jp + 1);
+#ifdef _IP_VHL
+ cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
+#else
+ cnt = (ip->ip_hl << 2) - sizeof (struct ip);
+#endif
+ for (; cnt > 0; cnt -= optlen, cp += optlen) {
+ opt = cp[0];
+ if (opt == IPOPT_EOL)
+ break;
+ if (opt == IPOPT_NOP) {
+ /* Preserve for IP mcast tunnel's LSRR alignment. */
+ *dp++ = IPOPT_NOP;
+ optlen = 1;
+ continue;
+ } else
+ optlen = cp[IPOPT_OLEN];
+ /* bogus lengths should have been caught by ip_dooptions */
+ if (optlen > cnt)
+ optlen = cnt;
+ if (IPOPT_COPIED(opt)) {
+ bcopy(cp, dp, optlen);
+ dp += optlen;
+ }
+ }
+ for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
+ *dp++ = IPOPT_EOL;
+ return (optlen);
+}
+
+/*
+ * IP socket option processing.
+ */
+int
+ip_ctloutput(int op, struct socket *so, int level, int optname,
+ struct mbuf **mp)
+{
+ struct inpcb *inp = sotoinpcb(so);
+ register struct mbuf *m = *mp;
+ register int optval = 0;
+ int error = 0;
+
+ if (level != IPPROTO_IP) {
+ error = EINVAL;
+ if (op == PRCO_SETOPT && *mp)
+ (void) m_free(*mp);
+ } else switch (op) {
+
+ case PRCO_SETOPT:
+ switch (optname) {
+ case IP_OPTIONS:
+#ifdef notyet
+ case IP_RETOPTS:
+ return (ip_pcbopts(optname, &inp->inp_options, m));
+#else
+ return (ip_pcbopts(&inp->inp_options, m));
+#endif
+
+ case IP_TOS:
+ case IP_TTL:
+ case IP_RECVOPTS:
+ case IP_RECVRETOPTS:
+ case IP_RECVDSTADDR:
+ case IP_RECVIF:
+ if (m == 0 || m->m_len != sizeof(int))
+ error = EINVAL;
+ else {
+ optval = *mtod(m, int *);
+ switch (optname) {
+
+ case IP_TOS:
+ inp->inp_ip_tos = optval;
+ break;
+
+ case IP_TTL:
+ inp->inp_ip_ttl = optval;
+ break;
+#define OPTSET(bit) \
+ if (optval) \
+ inp->inp_flags |= bit; \
+ else \
+ inp->inp_flags &= ~bit;
+
+ case IP_RECVOPTS:
+ OPTSET(INP_RECVOPTS);
+ break;
+
+ case IP_RECVRETOPTS:
+ OPTSET(INP_RECVRETOPTS);
+ break;
+
+ case IP_RECVDSTADDR:
+ OPTSET(INP_RECVDSTADDR);
+ break;
+
+ case IP_RECVIF:
+ OPTSET(INP_RECVIF);
+ break;
+ }
+ }
+ break;
+#undef OPTSET
+
+ case IP_MULTICAST_IF:
+ case IP_MULTICAST_VIF:
+ case IP_MULTICAST_TTL:
+ case IP_MULTICAST_LOOP:
+ case IP_ADD_MEMBERSHIP:
+ case IP_DROP_MEMBERSHIP:
+ error = ip_setmoptions(optname, &inp->inp_moptions, m);
+ break;
+
+ case IP_PORTRANGE:
+ if (m == 0 || m->m_len != sizeof(int))
+ error = EINVAL;
+ else {
+ optval = *mtod(m, int *);
+
+ switch (optval) {
+
+ case IP_PORTRANGE_DEFAULT:
+ inp->inp_flags &= ~(INP_LOWPORT);
+ inp->inp_flags &= ~(INP_HIGHPORT);
+ break;
+
+ case IP_PORTRANGE_HIGH:
+ inp->inp_flags &= ~(INP_LOWPORT);
+ inp->inp_flags |= INP_HIGHPORT;
+ break;
+
+ case IP_PORTRANGE_LOW:
+ inp->inp_flags &= ~(INP_HIGHPORT);
+ inp->inp_flags |= INP_LOWPORT;
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+ }
+ break;
+
+ default:
+ error = ENOPROTOOPT;
+ break;
+ }
+ if (m)
+ (void)m_free(m);
+ break;
+
+ case PRCO_GETOPT:
+ switch (optname) {
+ case IP_OPTIONS:
+ case IP_RETOPTS:
+ *mp = m = m_get(M_WAIT, MT_SOOPTS);
+ if (inp->inp_options) {
+ m->m_len = inp->inp_options->m_len;
+ bcopy(mtod(inp->inp_options, void *),
+ mtod(m, void *), m->m_len);
+ } else
+ m->m_len = 0;
+ break;
+
+ case IP_TOS:
+ case IP_TTL:
+ case IP_RECVOPTS:
+ case IP_RECVRETOPTS:
+ case IP_RECVDSTADDR:
+ case IP_RECVIF:
+ *mp = m = m_get(M_WAIT, MT_SOOPTS);
+ m->m_len = sizeof(int);
+ switch (optname) {
+
+ case IP_TOS:
+ optval = inp->inp_ip_tos;
+ break;
+
+ case IP_TTL:
+ optval = inp->inp_ip_ttl;
+ break;
+
+#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
+
+ case IP_RECVOPTS:
+ optval = OPTBIT(INP_RECVOPTS);
+ break;
+
+ case IP_RECVRETOPTS:
+ optval = OPTBIT(INP_RECVRETOPTS);
+ break;
+
+ case IP_RECVDSTADDR:
+ optval = OPTBIT(INP_RECVDSTADDR);
+ break;
+
+ case IP_RECVIF:
+ optval = OPTBIT(INP_RECVIF);
+ break;
+ }
+ *mtod(m, int *) = optval;
+ break;
+
+ case IP_MULTICAST_IF:
+ case IP_MULTICAST_VIF:
+ case IP_MULTICAST_TTL:
+ case IP_MULTICAST_LOOP:
+ case IP_ADD_MEMBERSHIP:
+ case IP_DROP_MEMBERSHIP:
+ error = ip_getmoptions(optname, inp->inp_moptions, mp);
+ break;
+
+ case IP_PORTRANGE:
+ *mp = m = m_get(M_WAIT, MT_SOOPTS);
+ m->m_len = sizeof(int);
+
+ if (inp->inp_flags & INP_HIGHPORT)
+ optval = IP_PORTRANGE_HIGH;
+ else if (inp->inp_flags & INP_LOWPORT)
+ optval = IP_PORTRANGE_LOW;
+ else
+ optval = 0;
+
+ *mtod(m, int *) = optval;
+ break;
+
+ default:
+ error = ENOPROTOOPT;
+ break;
+ }
+ break;
+ }
+ return (error);
+}
+
+/*
+ * Set up IP options in pcb for insertion in output packets.
+ * Store in mbuf with pointer in pcbopt, adding pseudo-option
+ * with destination address if source routed.
+ */
+static int
+#ifdef notyet
+ip_pcbopts(int optname, struct mbuf **pcbopt, struct mbuf *m)
+#else
+ip_pcbopts(struct mbuf **pcbopt, struct mbuf *m)
+#endif
+{
+ register int cnt, optlen;
+ register u_char *cp;
+ u_char opt;
+
+ /* turn off any old options */
+ if (*pcbopt)
+ (void)m_free(*pcbopt);
+ *pcbopt = 0;
+ if (m == (struct mbuf *)0 || m->m_len == 0) {
+ /*
+ * Only turning off any previous options.
+ */
+ if (m)
+ (void)m_free(m);
+ return (0);
+ }
+
+#ifndef vax
+ if (m->m_len % sizeof(long))
+ goto bad;
+#endif
+ /*
+ * IP first-hop destination address will be stored before
+ * actual options; move other options back
+ * and clear it when none present.
+ */
+ if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
+ goto bad;
+ cnt = m->m_len;
+ m->m_len += sizeof(struct in_addr);
+ cp = mtod(m, u_char *) + sizeof(struct in_addr);
+ ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
+ bzero(mtod(m, caddr_t), sizeof(struct in_addr));
+
+ for (; cnt > 0; cnt -= optlen, cp += optlen) {
+ opt = cp[IPOPT_OPTVAL];
+ if (opt == IPOPT_EOL)
+ break;
+ if (opt == IPOPT_NOP)
+ optlen = 1;
+ else {
+ optlen = cp[IPOPT_OLEN];
+ if (optlen <= IPOPT_OLEN || optlen > cnt)
+ goto bad;
+ }
+ switch (opt) {
+
+ default:
+ break;
+
+ case IPOPT_LSRR:
+ case IPOPT_SSRR:
+ /*
+ * user process specifies route as:
+ * ->A->B->C->D
+ * D must be our final destination (but we can't
+ * check that since we may not have connected yet).
+ * A is first hop destination, which doesn't appear in
+ * actual IP option, but is stored before the options.
+ */
+ if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
+ goto bad;
+ m->m_len -= sizeof(struct in_addr);
+ cnt -= sizeof(struct in_addr);
+ optlen -= sizeof(struct in_addr);
+ cp[IPOPT_OLEN] = optlen;
+ /*
+ * Move first hop before start of options.
+ */
+ bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
+ sizeof(struct in_addr));
+ /*
+ * Then copy rest of options back
+ * to close up the deleted entry.
+ */
+ ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
+ sizeof(struct in_addr)),
+ (caddr_t)&cp[IPOPT_OFFSET+1],
+ (unsigned)cnt + sizeof(struct in_addr));
+ break;
+ }
+ }
+ if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
+ goto bad;
+ *pcbopt = m;
+ return (0);
+
+bad:
+ (void)m_free(m);
+ return (EINVAL);
+}
+
+/*
+ * Set the IP multicast options in response to user setsockopt().
+ */
+static int
+ip_setmoptions(int optname, struct ip_moptions **imop, struct mbuf *m)
+{
+ int error = 0;
+ u_char loop;
+ int i;
+ struct in_addr addr;
+ struct ip_mreq *mreq;
+ struct ifnet *ifp;
+ struct ip_moptions *imo = *imop;
+ struct route ro;
+ register struct sockaddr_in *dst;
+ int s;
+
+ if (imo == NULL) {
+ /*
+ * No multicast option buffer attached to the pcb;
+ * allocate one and initialize to default values.
+ */
+ imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS,
+ M_WAITOK);
+
+ if (imo == NULL)
+ return (ENOBUFS);
+ *imop = imo;
+ imo->imo_multicast_ifp = NULL;
+ imo->imo_multicast_vif = -1;
+ imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
+ imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
+ imo->imo_num_memberships = 0;
+ }
+
+ switch (optname) {
+ /* store an index number for the vif you wanna use in the send */
+ case IP_MULTICAST_VIF:
+ if (!legal_vif_num) {
+ error = EOPNOTSUPP;
+ break;
+ }
+ if (m == NULL || m->m_len != sizeof(int)) {
+ error = EINVAL;
+ break;
+ }
+ i = *(mtod(m, int *));
+ if (!legal_vif_num(i) && (i != -1)) {
+ error = EINVAL;
+ break;
+ }
+ imo->imo_multicast_vif = i;
+ break;
+
+ case IP_MULTICAST_IF:
+ /*
+ * Select the interface for outgoing multicast packets.
+ */
+ if (m == NULL || m->m_len != sizeof(struct in_addr)) {
+ error = EINVAL;
+ break;
+ }
+ addr = *(mtod(m, struct in_addr *));
+ /*
+ * INADDR_ANY is used to remove a previous selection.
+ * When no interface is selected, a default one is
+ * chosen every time a multicast packet is sent.
+ */
+ if (addr.s_addr == INADDR_ANY) {
+ imo->imo_multicast_ifp = NULL;
+ break;
+ }
+ /*
+ * The selected interface is identified by its local
+ * IP address. Find the interface and confirm that
+ * it supports multicasting.
+ */
+ s = splimp();
+ INADDR_TO_IFP(addr, ifp);
+ if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
+ splx(s);
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ imo->imo_multicast_ifp = ifp;
+ splx(s);
+ break;
+
+ case IP_MULTICAST_TTL:
+ /*
+ * Set the IP time-to-live for outgoing multicast packets.
+ */
+ if (m == NULL || m->m_len != 1) {
+ error = EINVAL;
+ break;
+ }
+ imo->imo_multicast_ttl = *(mtod(m, u_char *));
+ break;
+
+ case IP_MULTICAST_LOOP:
+ /*
+ * Set the loopback flag for outgoing multicast packets.
+ * Must be zero or one.
+ */
+ if (m == NULL || m->m_len != 1 ||
+ (loop = *(mtod(m, u_char *))) > 1) {
+ error = EINVAL;
+ break;
+ }
+ imo->imo_multicast_loop = loop;
+ break;
+
+ case IP_ADD_MEMBERSHIP:
+ /*
+ * Add a multicast group membership.
+ * Group must be a valid IP multicast address.
+ */
+ if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
+ error = EINVAL;
+ break;
+ }
+ mreq = mtod(m, struct ip_mreq *);
+ if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
+ error = EINVAL;
+ break;
+ }
+ s = splimp();
+ /*
+ * If no interface address was provided, use the interface of
+ * the route to the given multicast address.
+ */
+ if (mreq->imr_interface.s_addr == INADDR_ANY) {
+ bzero((caddr_t)&ro, sizeof(ro));
+ dst = (struct sockaddr_in *)&ro.ro_dst;
+ dst->sin_len = sizeof(*dst);
+ dst->sin_family = AF_INET;
+ dst->sin_addr = mreq->imr_multiaddr;
+ rtalloc(&ro);
+ if (ro.ro_rt == NULL) {
+ error = EADDRNOTAVAIL;
+ splx(s);
+ break;
+ }
+ ifp = ro.ro_rt->rt_ifp;
+ rtfree(ro.ro_rt);
+ }
+ else {
+ INADDR_TO_IFP(mreq->imr_interface, ifp);
+ }
+
+ /*
+ * See if we found an interface, and confirm that it
+ * supports multicast.
+ */
+ if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
+ error = EADDRNOTAVAIL;
+ splx(s);
+ break;
+ }
+ /*
+ * See if the membership already exists or if all the
+ * membership slots are full.
+ */
+ for (i = 0; i < imo->imo_num_memberships; ++i) {
+ if (imo->imo_membership[i]->inm_ifp == ifp &&
+ imo->imo_membership[i]->inm_addr.s_addr
+ == mreq->imr_multiaddr.s_addr)
+ break;
+ }
+ if (i < imo->imo_num_memberships) {
+ error = EADDRINUSE;
+ splx(s);
+ break;
+ }
+ if (i == IP_MAX_MEMBERSHIPS) {
+ error = ETOOMANYREFS;
+ splx(s);
+ break;
+ }
+ /*
+ * Everything looks good; add a new record to the multicast
+ * address list for the given interface.
+ */
+ if ((imo->imo_membership[i] =
+ in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
+ error = ENOBUFS;
+ splx(s);
+ break;
+ }
+ ++imo->imo_num_memberships;
+ splx(s);
+ break;
+
+ case IP_DROP_MEMBERSHIP:
+ /*
+ * Drop a multicast group membership.
+ * Group must be a valid IP multicast address.
+ */
+ if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
+ error = EINVAL;
+ break;
+ }
+ mreq = mtod(m, struct ip_mreq *);
+ if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
+ error = EINVAL;
+ break;
+ }
+
+ s = splimp();
+ /*
+ * If an interface address was specified, get a pointer
+ * to its ifnet structure.
+ */
+ if (mreq->imr_interface.s_addr == INADDR_ANY)
+ ifp = NULL;
+ else {
+ INADDR_TO_IFP(mreq->imr_interface, ifp);
+ if (ifp == NULL) {
+ error = EADDRNOTAVAIL;
+ splx(s);
+ break;
+ }
+ }
+ /*
+ * Find the membership in the membership array.
+ */
+ for (i = 0; i < imo->imo_num_memberships; ++i) {
+ if ((ifp == NULL ||
+ imo->imo_membership[i]->inm_ifp == ifp) &&
+ imo->imo_membership[i]->inm_addr.s_addr ==
+ mreq->imr_multiaddr.s_addr)
+ break;
+ }
+ if (i == imo->imo_num_memberships) {
+ error = EADDRNOTAVAIL;
+ splx(s);
+ break;
+ }
+ /*
+ * Give up the multicast address record to which the
+ * membership points.
+ */
+ in_delmulti(imo->imo_membership[i]);
+ /*
+ * Remove the gap in the membership array.
+ */
+ for (++i; i < imo->imo_num_memberships; ++i)
+ imo->imo_membership[i-1] = imo->imo_membership[i];
+ --imo->imo_num_memberships;
+ splx(s);
+ break;
+
+ default:
+ error = EOPNOTSUPP;
+ break;
+ }
+
+ /*
+ * If all options have default values, no need to keep the mbuf.
+ */
+ if (imo->imo_multicast_ifp == NULL &&
+ imo->imo_multicast_vif == -1 &&
+ imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
+ imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
+ imo->imo_num_memberships == 0) {
+ free(*imop, M_IPMOPTS);
+ *imop = NULL;
+ }
+
+ return (error);
+}
+
+/*
+ * Return the IP multicast options in response to user getsockopt().
+ */
+static int
+ip_getmoptions(int optname, struct ip_moptions *imo,
+ struct mbuf **mp)
+{
+ u_char *ttl;
+ u_char *loop;
+ struct in_addr *addr;
+ struct in_ifaddr *ia;
+
+ *mp = m_get(M_WAIT, MT_SOOPTS);
+
+ switch (optname) {
+
+ case IP_MULTICAST_VIF:
+ if (imo != NULL)
+ *(mtod(*mp, int *)) = imo->imo_multicast_vif;
+ else
+ *(mtod(*mp, int *)) = -1;
+ (*mp)->m_len = sizeof(int);
+ return(0);
+
+ case IP_MULTICAST_IF:
+ addr = mtod(*mp, struct in_addr *);
+ (*mp)->m_len = sizeof(struct in_addr);
+ if (imo == NULL || imo->imo_multicast_ifp == NULL)
+ addr->s_addr = INADDR_ANY;
+ else {
+ IFP_TO_IA(imo->imo_multicast_ifp, ia);
+ addr->s_addr = (ia == NULL) ? INADDR_ANY
+ : IA_SIN(ia)->sin_addr.s_addr;
+ }
+ return (0);
+
+ case IP_MULTICAST_TTL:
+ ttl = mtod(*mp, u_char *);
+ (*mp)->m_len = 1;
+ *ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL
+ : imo->imo_multicast_ttl;
+ return (0);
+
+ case IP_MULTICAST_LOOP:
+ loop = mtod(*mp, u_char *);
+ (*mp)->m_len = 1;
+ *loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP
+ : imo->imo_multicast_loop;
+ return (0);
+
+ default:
+ return (EOPNOTSUPP);
+ }
+}
+
+/*
+ * Discard the IP multicast options.
+ */
+void
+ip_freemoptions(struct ip_moptions *imo)
+{
+ register int i;
+
+ if (imo != NULL) {
+ for (i = 0; i < imo->imo_num_memberships; ++i)
+ in_delmulti(imo->imo_membership[i]);
+ free(imo, M_IPMOPTS);
+ }
+}
+
+/*
+ * Routine called from ip_output() to loop back a copy of an IP multicast
+ * packet to the input queue of a specified interface. Note that this
+ * calls the output routine of the loopback "driver", but with an interface
+ * pointer that might NOT be a loopback interface -- evil, but easier than
+ * replicating that code here.
+ */
+static void
+ip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst,
+ int hlen)
+{
+ register struct ip *ip;
+ struct mbuf *copym;
+
+ copym = m_copy(m, 0, M_COPYALL);
+ if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
+ copym = m_pullup(copym, hlen);
+ if (copym != NULL) {
+ /*
+ * We don't bother to fragment if the IP length is greater
+ * than the interface's MTU. Can this possibly matter?
+ */
+ ip = mtod(copym, struct ip *);
+ ip->ip_len = htons(ip->ip_len);
+ ip->ip_off = htons(ip->ip_off);
+ ip->ip_sum = 0;
+#ifdef _IP_VHL
+ if (ip->ip_vhl == IP_VHL_BORING) {
+ ip->ip_sum = in_cksum_hdr(ip);
+ } else {
+ ip->ip_sum = in_cksum(copym, hlen);
+ }
+#else
+ ip->ip_sum = in_cksum(copym, hlen);
+#endif
+ /*
+ * NB:
+ * It's not clear whether there are any lingering
+ * reentrancy problems in other areas which might
+ * be exposed by using ip_input directly (in
+ * particular, everything which modifies the packet
+ * in-place). Yet another option is using the
+ * protosw directly to deliver the looped back
+ * packet. For the moment, we'll err on the side
+ * of safety by continuing to abuse looutput().
+ */
+#ifdef notdef
+ copym->m_pkthdr.rcvif = ifp;
+ ip_input(copym);
+#else
+ (void) looutput(ifp, copym, (struct sockaddr *)dst, NULL);
+#endif
+ }
+}
diff --git a/netinet/ip_var.h b/netinet/ip_var.h
new file mode 100644
index 0000000..a6e0bc8
--- /dev/null
+++ b/netinet/ip_var.h
@@ -0,0 +1,215 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ip_var.h 8.2 (Berkeley) 1/9/95
+ * $FreeBSD: src/sys/netinet/ip_var.h,v 1.94 2005/01/07 01:45:44 imp Exp $
+ */
+
+
+#ifndef _NETINET_IP_VAR_H_
+#define _NETINET_IP_VAR_H_
+
+#include <netinet/in.h> /* struct in_addr */
+
+/*
+ * Overlay for ip header used by other protocols (tcp, udp).
+ */
+struct ipovly {
+ caddr_t ih_next;
+ caddr_t ih_prev; /* for protocol sequence q's */
+ u_char ih_x1; /* (unused) */
+ u_char ih_pr; /* protocol */
+ u_short ih_len; /* protocol length */
+ struct in_addr ih_src; /* source internet address */
+ struct in_addr ih_dst; /* destination internet address */
+};
+
+/*
+ * Ip reassembly queue structure. Each fragment
+ * being reassembled is attached to one of these structures.
+ * They are timed out after ipq_ttl drops to 0, and may also
+ * be reclaimed if memory becomes tight.
+ */
+struct ipq {
+ struct ipq *next,*prev; /* to other reass headers */
+ u_char ipq_ttl; /* time for reass q to live */
+ u_char ipq_p; /* protocol of this fragment */
+ u_short ipq_id; /* sequence id for reassembly */
+ struct ipasfrag *ipq_next,*ipq_prev;
+ /* to ip headers of fragments */
+ struct in_addr ipq_src,ipq_dst;
+#ifdef IPDIVERT
+ u_short ipq_divert; /* divert protocol port */
+#endif
+};
+
+/*
+ * Ip header, when holding a fragment.
+ *
+ * Note: ipf_next must be at same offset as ipq_next above
+ */
+struct ipasfrag {
+#if BYTE_ORDER == LITTLE_ENDIAN
+ u_char ip_hl:4,
+ ip_v:4;
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+ u_char ip_v:4,
+ ip_hl:4;
+#endif
+ u_char ipf_mff; /* XXX overlays ip_tos: use low bit
+ * to avoid destroying tos;
+ * copied from (ip_off&IP_MF) */
+ u_short ip_len;
+ u_short ip_id;
+ u_short ip_off;
+ u_char ip_ttl;
+ u_char ip_p;
+ u_short ip_sum;
+ struct ipasfrag *ipf_next; /* next fragment */
+ struct ipasfrag *ipf_prev; /* previous fragment */
+};
+
+/*
+ * Structure stored in mbuf in inpcb.ip_options
+ * and passed to ip_output when ip options are in use.
+ * The actual length of the options (including ipopt_dst)
+ * is in m_len.
+ */
+#define MAX_IPOPTLEN 40
+
+struct ipoption {
+ struct in_addr ipopt_dst; /* first-hop dst if source routed */
+ char ipopt_list[MAX_IPOPTLEN]; /* options proper */
+};
+
+/*
+ * Structure attached to inpcb.ip_moptions and
+ * passed to ip_output when IP multicast options are in use.
+ */
+struct ip_moptions {
+ struct ifnet *imo_multicast_ifp; /* ifp for outgoing multicasts */
+ u_char imo_multicast_ttl; /* TTL for outgoing multicasts */
+ u_char imo_multicast_loop; /* 1 => hear sends if a member */
+ u_short imo_num_memberships; /* no. memberships this socket */
+ struct in_multi *imo_membership[IP_MAX_MEMBERSHIPS];
+ u_long imo_multicast_vif; /* vif num outgoing multicasts */
+};
+
+struct ipstat {
+ u_long ips_total; /* total packets received */
+ u_long ips_badsum; /* checksum bad */
+ u_long ips_tooshort; /* packet too short */
+ u_long ips_toosmall; /* not enough data */
+ u_long ips_badhlen; /* ip header length < data size */
+ u_long ips_badlen; /* ip length < ip header length */
+ u_long ips_fragments; /* fragments received */
+ u_long ips_fragdropped; /* frags dropped (dups, out of space) */
+ u_long ips_fragtimeout; /* fragments timed out */
+ u_long ips_forward; /* packets forwarded */
+ u_long ips_cantforward; /* packets rcvd for unreachable dest */
+ u_long ips_redirectsent; /* packets forwarded on same net */
+ u_long ips_noproto; /* unknown or unsupported protocol */
+ u_long ips_delivered; /* datagrams delivered to upper level*/
+ u_long ips_localout; /* total ip packets generated here */
+ u_long ips_odropped; /* lost packets due to nobufs, etc. */
+ u_long ips_reassembled; /* total packets reassembled ok */
+ u_long ips_fragmented; /* datagrams successfully fragmented */
+ u_long ips_ofragments; /* output fragments created */
+ u_long ips_cantfrag; /* don't fragment flag was set, etc. */
+ u_long ips_badoptions; /* error in option processing */
+ u_long ips_noroute; /* packets discarded due to no route */
+ u_long ips_badvers; /* ip version != 4 */
+ u_long ips_rawout; /* total raw ip packets generated */
+ u_long ips_toolong; /* ip length > max ip packet size */
+};
+
+#ifdef _KERNEL
+/* flags passed to ip_output as last parameter */
+#define IP_FORWARDING 0x1 /* most of ip header exists */
+#define IP_RAWOUTPUT 0x2 /* raw ip header exists */
+#define IP_SENDONES 0x4 /* send all-ones broadcast */
+#define IP_ROUTETOIF SO_DONTROUTE /* bypass routing tables */
+#define IP_ALLOWBROADCAST SO_BROADCAST /* can send broadcast packets */
+
+struct ip;
+struct inpcb;
+struct route;
+struct sockopt;
+struct mbuf;
+
+extern struct ipstat ipstat;
+extern u_short ip_id; /* ip packet ctr, for ids */
+extern int ip_defttl; /* default IP ttl */
+extern u_char ip_protox[];
+extern struct socket *ip_rsvpd; /* reservation protocol daemon */
+extern struct socket *ip_mrouter; /* multicast routing daemon */
+extern int (*legal_vif_num)(int);
+extern u_long (*ip_mcast_src)(int);
+extern int rsvp_on;
+
+int ip_ctloutput(int, struct socket *, int, int, struct mbuf **);
+void ip_drain(void);
+void ip_freemoptions(struct ip_moptions *);
+void ip_init(void);
+extern int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
+ struct ip_moptions *);
+int ip_output(struct mbuf *,
+ struct mbuf *, struct route *, int, struct ip_moptions *);
+void ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
+ struct mbuf *);
+void ip_slowtimo(void);
+struct mbuf *
+ ip_srcroute(void);
+void ip_stripoptions(struct mbuf *, struct mbuf *);
+int rip_ctloutput(int, struct socket *, int, int, struct mbuf **);
+void rip_init(void);
+void rip_input(struct mbuf *, int);
+int rip_output(struct mbuf *, struct socket *, u_long);
+int rip_usrreq(struct socket *,
+ int, struct mbuf *, struct mbuf *, struct mbuf *);
+void ipip_input(struct mbuf *, int);
+void rsvp_input(struct mbuf *, int);
+int ip_rsvp_init(struct socket *);
+int ip_rsvp_done(void);
+int ip_rsvp_vif_init(struct socket *, struct mbuf *);
+int ip_rsvp_vif_done(struct socket *, struct mbuf *);
+void ip_rsvp_force_done(struct socket *);
+
+#ifdef IPDIVERT
+void div_init(void);
+void div_input(struct mbuf *, int);
+int div_usrreq(struct socket *,
+ int, struct mbuf *, struct mbuf *, struct mbuf *);
+extern u_short ip_divert_port;
+extern u_short ip_divert_ignore;
+#endif /* IPDIVERT */
+
+#endif /* _KERNEL */
+
+#endif /* !_NETINET_IP_VAR_H_ */
diff --git a/netinet/raw_ip.c b/netinet/raw_ip.c
new file mode 100644
index 0000000..58b85a9
--- /dev/null
+++ b/netinet/raw_ip.c
@@ -0,0 +1,492 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95
+ * $FreeBSD: src/sys/netinet/raw_ip.c,v 1.147 2005/01/07 01:45:45 imp Exp $
+ */
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "opt_inet6.h"
+#include "opt_ipsec.h"
+#include "opt_mac.h"
+
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/socketvar.h>
+#include <errno.h>
+#include <sys/systm.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#define _IP_VHL
+#include <netinet/in.h>
+#include <rtems/rtems_netinet_in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_mroute.h>
+
+#include <netinet/ip_fw.h>
+
+#if !defined(COMPAT_IPFW) || COMPAT_IPFW == 1
+#undef COMPAT_IPFW
+#define COMPAT_IPFW 1
+#else
+#undef COMPAT_IPFW
+#endif
+
+static struct inpcbhead ripcb;
+static struct inpcbinfo ripcbinfo;
+
+/*
+ * Nominal space allocated to a raw ip socket.
+ */
+#define RIPSNDQ 8192
+#define RIPRCVQ 8192
+
+/*
+ * Raw interface to IP protocol.
+ */
+
+/*
+ * Initialize raw connection block q.
+ */
+void
+rip_init(void)
+{
+ LIST_INIT(&ripcb);
+ ripcbinfo.listhead = &ripcb;
+ /*
+ * XXX We don't use the hash list for raw IP, but it's easier
+ * to allocate a one entry hash list than it is to check all
+ * over the place for hashbase == NULL.
+ */
+ ripcbinfo.hashbase = hashinit(1, M_PCB, &ripcbinfo.hashmask);
+}
+
+static struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET, 0, {0}, {0} };
+/*
+ * Setup generic address and protocol structures
+ * for raw_input routine, then pass them along with
+ * mbuf chain.
+ */
+void
+rip_input(struct mbuf *m, int iphlen)
+{
+ struct ip *ip = mtod(m, struct ip *);
+ register struct inpcb *inp;
+ struct inpcb *last = 0;
+ struct mbuf *opts = 0;
+
+ ripsrc.sin_addr = ip->ip_src;
+ for (inp = ripcb.lh_first; inp != NULL; inp = inp->inp_list.le_next) {
+ if (inp->inp_ip_p && inp->inp_ip_p != ip->ip_p)
+ continue;
+ if (inp->inp_laddr.s_addr &&
+ inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
+ continue;
+ if (inp->inp_faddr.s_addr &&
+ inp->inp_faddr.s_addr != ip->ip_src.s_addr)
+ continue;
+ if (last) {
+ struct mbuf *n = m_copy(m, 0, (int)M_COPYALL);
+ if (n) {
+ if (last->inp_flags & INP_CONTROLOPTS ||
+ last->inp_socket->so_options & SO_TIMESTAMP)
+ ip_savecontrol(last, &opts, ip, n);
+ if (sbappendaddr(&last->inp_socket->so_rcv,
+ (struct sockaddr *)&ripsrc, n,
+ opts) == 0) {
+ /* should notify about lost packet */
+ m_freem(n);
+ if (opts)
+ m_freem(opts);
+ } else
+ sorwakeup(last->inp_socket);
+ opts = 0;
+ }
+ }
+ last = inp;
+ }
+ if (last) {
+ if (last->inp_flags & INP_CONTROLOPTS ||
+ last->inp_socket->so_options & SO_TIMESTAMP)
+ ip_savecontrol(last, &opts, ip, m);
+ if (sbappendaddr(&last->inp_socket->so_rcv,
+ (struct sockaddr *)&ripsrc, m, opts) == 0) {
+ m_freem(m);
+ if (opts)
+ m_freem(opts);
+ } else
+ sorwakeup(last->inp_socket);
+ } else {
+ m_freem(m);
+ ipstat.ips_noproto++;
+ ipstat.ips_delivered--;
+ }
+}
+
+/*
+ * Generate IP header and pass packet to ip_output.
+ * Tack on options user may have setup with control call.
+ */
+int
+rip_output(struct mbuf *m, struct socket *so, u_long dst)
+{
+ struct ip *ip;
+ struct inpcb *inp = sotoinpcb(so);
+ int flags = ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0) |
+ IP_ALLOWBROADCAST;
+
+ /*
+ * If the user handed us a complete IP packet, use it.
+ * Otherwise, allocate an mbuf for a header and fill it in.
+ */
+ if ((inp->inp_flags & INP_HDRINCL) == 0) {
+ if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) {
+ m_freem(m);
+ return(EMSGSIZE);
+ }
+ M_PREPEND(m, sizeof(struct ip), M_WAIT);
+ ip = mtod(m, struct ip *);
+ ip->ip_tos = 0;
+ ip->ip_off = 0;
+ ip->ip_p = inp->inp_ip_p;
+ ip->ip_len = m->m_pkthdr.len;
+ ip->ip_src = inp->inp_laddr;
+ ip->ip_dst.s_addr = dst;
+ ip->ip_ttl = MAXTTL;
+ } else {
+ if (m->m_pkthdr.len > IP_MAXPACKET) {
+ m_freem(m);
+ return(EMSGSIZE);
+ }
+ ip = mtod(m, struct ip *);
+ /* don't allow both user specified and setsockopt options,
+ and don't allow packet length sizes that will crash */
+#ifdef _IP_VHL
+ if (((IP_VHL_HL(ip->ip_vhl) != (sizeof (*ip) >> 2))
+ && inp->inp_options)
+ || (ip->ip_len > m->m_pkthdr.len)
+ || (ip->ip_len < (IP_VHL_HL(ip->ip_vhl) << 2))) {
+#else
+ if (((ip->ip_hl != (sizeof (*ip) >> 2))
+ && inp->inp_options)
+ || (ip->ip_len > m->m_pkthdr.len)
+ || (ip->ip_len < (ip->ip_hl << 2))) {
+#endif
+ m_freem(m);
+ return EINVAL;
+ }
+ if (ip->ip_id == 0)
+ ip->ip_id = htons(ip_id++);
+ /* XXX prevent ip_output from overwriting header fields */
+ flags |= IP_RAWOUTPUT;
+ ipstat.ips_rawout++;
+ }
+ return (ip_output(m, inp->inp_options, &inp->inp_route, flags,
+ inp->inp_moptions));
+}
+
+/*
+ * Raw IP socket option processing.
+ */
+int
+rip_ctloutput(int op, struct socket *so, int level, int optname,
+ struct mbuf **m)
+{
+ struct inpcb *inp = sotoinpcb(so);
+ int error;
+
+ if (level != IPPROTO_IP) {
+ if (op == PRCO_SETOPT && *m)
+ (void)m_free(*m);
+ return (EINVAL);
+ }
+
+ switch (optname) {
+
+ case IP_HDRINCL:
+ error = 0;
+ if (op == PRCO_SETOPT) {
+ if (m == 0 || *m == 0 || (*m)->m_len < sizeof (int))
+ error = EINVAL;
+ else if (*mtod(*m, int *))
+ inp->inp_flags |= INP_HDRINCL;
+ else
+ inp->inp_flags &= ~INP_HDRINCL;
+ if (*m)
+ (void)m_free(*m);
+ } else {
+ *m = m_get(M_WAIT, MT_SOOPTS);
+ (*m)->m_len = sizeof (int);
+ *mtod(*m, int *) = inp->inp_flags & INP_HDRINCL;
+ }
+ return (error);
+
+#ifdef COMPAT_IPFW
+ case IP_FW_GET:
+ if (ip_fw_ctl_ptr == NULL || op == PRCO_SETOPT) {
+ if (*m) (void)m_free(*m);
+ return(EINVAL);
+ }
+ return (*ip_fw_ctl_ptr)(optname, m);
+
+ case IP_FW_ADD:
+ case IP_FW_DEL:
+ case IP_FW_FLUSH:
+ case IP_FW_ZERO:
+ if (ip_fw_ctl_ptr == NULL || op != PRCO_SETOPT) {
+ if (*m) (void)m_free(*m);
+ return(EINVAL);
+ }
+ return (*ip_fw_ctl_ptr)(optname, m);
+
+ case IP_NAT:
+ if (ip_nat_ctl_ptr == NULL) {
+ if (*m) (void)m_free(*m);
+ return(EINVAL);
+ }
+ return (*ip_nat_ctl_ptr)(op, m);
+
+#endif
+ case IP_RSVP_ON:
+ return ip_rsvp_init(so);
+ break;
+
+ case IP_RSVP_OFF:
+ return ip_rsvp_done();
+ break;
+
+ case IP_RSVP_VIF_ON:
+ return ip_rsvp_vif_init(so, *m);
+
+ case IP_RSVP_VIF_OFF:
+ return ip_rsvp_vif_done(so, *m);
+
+ case MRT_INIT:
+ case MRT_DONE:
+ case MRT_ADD_VIF:
+ case MRT_DEL_VIF:
+ case MRT_ADD_MFC:
+ case MRT_DEL_MFC:
+ case MRT_VERSION:
+ case MRT_ASSERT:
+ if (op == PRCO_SETOPT) {
+ error = ip_mrouter_set(optname, so, *m);
+ if (*m)
+ (void)m_free(*m);
+ } else if (op == PRCO_GETOPT) {
+ error = ip_mrouter_get(optname, so, m);
+ } else
+ error = EINVAL;
+ return (error);
+ }
+ return (ip_ctloutput(op, so, level, optname, m));
+}
+
+static u_long rip_sendspace = RIPSNDQ; /* XXX sysctl ? */
+static u_long rip_recvspace = RIPRCVQ; /* XXX sysctl ? */
+
+/*ARGSUSED*/
+int
+rip_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
+ struct mbuf *control)
+{
+ register int error = 0;
+ register struct inpcb *inp = sotoinpcb(so);
+ int s;
+
+ if (req == PRU_CONTROL)
+ return (in_control(so, (uintptr_t)m, (caddr_t)nam,
+ (struct ifnet *)control));
+
+ switch (req) {
+
+ case PRU_ATTACH:
+ if (inp)
+ panic("rip_attach");
+ if ((so->so_state & SS_PRIV) == 0) {
+ error = EACCES;
+ break;
+ }
+ s = splnet();
+ error = in_pcballoc(so, &ripcbinfo);
+ splx(s);
+ if (error)
+ break;
+ error = soreserve(so, rip_sendspace, rip_recvspace);
+ if (error)
+ break;
+ inp = (struct inpcb *)so->so_pcb;
+ inp->inp_ip_p = (uintptr_t)nam;
+ break;
+
+ case PRU_DISCONNECT:
+ if ((so->so_state & SS_ISCONNECTED) == 0) {
+ error = ENOTCONN;
+ break;
+ }
+ /* FALLTHROUGH */
+ case PRU_ABORT:
+ soisdisconnected(so);
+ /* FALLTHROUGH */
+ case PRU_DETACH:
+ if (inp == 0)
+ panic("rip_detach");
+ if (so == ip_mrouter)
+ ip_mrouter_done();
+ ip_rsvp_force_done(so);
+ if (so == ip_rsvpd)
+ ip_rsvp_done();
+ in_pcbdetach(inp);
+ break;
+
+ case PRU_BIND:
+ {
+ struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
+
+ if (nam->m_len != sizeof(*addr)) {
+ error = EINVAL;
+ break;
+ }
+ if ((ifnet == 0) ||
+ ((addr->sin_family != AF_INET) &&
+ (addr->sin_family != AF_IMPLINK)) ||
+ (addr->sin_addr.s_addr &&
+ ifa_ifwithaddr((struct sockaddr *)addr) == 0)) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ inp->inp_laddr = addr->sin_addr;
+ break;
+ }
+ case PRU_CONNECT:
+ {
+ struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
+
+ if (nam->m_len != sizeof(*addr)) {
+ error = EINVAL;
+ break;
+ }
+ if (ifnet == 0) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ if ((addr->sin_family != AF_INET) &&
+ (addr->sin_family != AF_IMPLINK)) {
+ error = EAFNOSUPPORT;
+ break;
+ }
+ inp->inp_faddr = addr->sin_addr;
+ soisconnected(so);
+ break;
+ }
+
+ case PRU_CONNECT2:
+ error = EOPNOTSUPP;
+ break;
+
+ /*
+ * Mark the connection as being incapable of further input.
+ */
+ case PRU_SHUTDOWN:
+ socantsendmore(so);
+ break;
+
+ /*
+ * Ship a packet out. The appropriate raw output
+ * routine handles any massaging necessary.
+ */
+ case PRU_SEND:
+ {
+ register u_long dst;
+
+ if (so->so_state & SS_ISCONNECTED) {
+ if (nam) {
+ error = EISCONN;
+ break;
+ }
+ dst = inp->inp_faddr.s_addr;
+ } else {
+ if (nam == NULL) {
+ error = ENOTCONN;
+ break;
+ }
+ dst = mtod(nam, struct sockaddr_in *)->sin_addr.s_addr;
+ }
+ error = rip_output(m, so, dst);
+ m = NULL;
+ break;
+ }
+
+ case PRU_SENSE:
+ /*
+ * stat: don't bother with a blocksize.
+ */
+ return (0);
+
+ /*
+ * Not supported.
+ */
+ case PRU_RCVOOB:
+ case PRU_RCVD:
+ case PRU_LISTEN:
+ case PRU_ACCEPT:
+ case PRU_SENDOOB:
+ error = EOPNOTSUPP;
+ break;
+
+ case PRU_SOCKADDR:
+ in_setsockaddr(inp, nam);
+ break;
+
+ case PRU_PEERADDR:
+ in_setpeeraddr(inp, nam);
+ break;
+
+ default:
+ panic("rip_usrreq");
+ }
+ if (m != NULL)
+ m_freem(m);
+ return (error);
+}
diff --git a/netinet/tcp_debug.c b/netinet/tcp_debug.c
new file mode 100644
index 0000000..a014158
--- /dev/null
+++ b/netinet/tcp_debug.c
@@ -0,0 +1,172 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_debug.c 8.1 (Berkeley) 6/10/93
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/cdefs.h>
+#include "opt_inet.h"
+#include "opt_tcpdebug.h"
+
+#ifdef TCPDEBUG
+/* load symbolic names */
+#define PRUREQUESTS
+#define TCPSTATES
+#define TCPTIMERS
+#define TANAMES
+#endif
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#include <netinet/tcp_debug.h>
+
+#ifdef TCPDEBUG
+static int tcpconsdebug = 0;
+#endif
+
+/*
+ * Global ring buffer of TCP debugging state. Each entry captures a snapshot
+ * of TCP connection state at any given moment. tcp_debx addresses at the
+ * next available slot. There is no explicit export of this data structure;
+ * it will be read via /dev/kmem by debugging tools.
+ */
+static struct tcp_debug tcp_debug[TCP_NDEBUG];
+static int tcp_debx;
+
+/*
+ * Save TCP state at a given moment; optionally, both tcpcb and TCP packet
+ * header state will be saved.
+ */
+void
+tcp_trace(short act, short ostate, struct tcpcb *tp, struct tcpiphdr *ti,
+ int req)
+{
+#ifdef TCPDEBUG
+ tcp_seq seq, ack;
+ int len, flags;
+#endif
+ struct tcp_debug *td;
+
+ td = &tcp_debug[tcp_debx++];
+ if (tcp_debx == TCP_NDEBUG)
+ tcp_debx = 0;
+ td->td_time = iptime();
+ td->td_act = act;
+ td->td_ostate = ostate;
+ td->td_tcb = (caddr_t)tp;
+ if (tp != NULL)
+ td->td_cb = *tp;
+ else
+ bzero((caddr_t)&td->td_cb, sizeof (*tp));
+ if (ti)
+ td->td_ti = *ti;
+ else
+ bzero((caddr_t)&td->td_ti, sizeof (*ti));
+ td->td_req = req;
+#ifdef TCPDEBUG
+ if (tcpconsdebug == 0)
+ return;
+ if (tp != NULL)
+ printf("%p %s:", tp, tcpstates[ostate]);
+ else
+ printf("???????? ");
+ printf("%s ", tanames[act]);
+ switch (act) {
+ case TA_INPUT:
+ case TA_OUTPUT:
+ case TA_DROP:
+ if (ti == 0)
+ break;
+ seq = ti->ti_seq;
+ ack = ti->ti_ack;
+ len = ti->ti_len;
+ if (act == TA_OUTPUT) {
+ seq = ntohl(seq);
+ ack = ntohl(ack);
+ len = ntohs((u_short)len);
+ }
+ if (act == TA_OUTPUT)
+ len -= sizeof (struct tcphdr);
+ if (len)
+ printf("[%x..%x)", seq, seq+len);
+ else
+ printf("%x", seq);
+ printf("@%x, urp=%x", ack, ti->ti_urp);
+ flags = ti->ti_flags;
+ if (flags) {
+ char *cp = "<";
+#define pf(f) { \
+ if (ti->ti_flags & TH_##f) { \
+ printf("%s%s", cp, #f); \
+ cp = ","; \
+ } \
+}
+ pf(SYN); pf(ACK); pf(FIN); pf(RST); pf(PUSH); pf(URG);
+ printf(">");
+ }
+ break;
+
+ case TA_USER:
+ printf("%s", prurequests[req&0xff]);
+ if ((req & 0xff) == PRU_SLOWTIMO)
+ printf("<%s>", tcptimers[req>>8]);
+ break;
+ }
+ if (tp != NULL)
+ printf(" -> %s", tcpstates[tp->t_state]);
+ /* print out internal state of tp !?! */
+ printf("\n");
+ if (tp == NULL)
+ return;
+ printf("\trcv_(nxt,wnd,up) (%x,%x,%x) snd_(una,nxt,max) (%x,%x,%x)\n",
+ tp->rcv_nxt, tp->rcv_wnd, tp->rcv_up, tp->snd_una, tp->snd_nxt,
+ tp->snd_max);
+ printf("\tsnd_(wl1,wl2,wnd) (%x,%x,%x)\n",
+ tp->snd_wl1, tp->snd_wl2, tp->snd_wnd);
+#endif /* TCPDEBUG */
+}
diff --git a/netinet/tcp_debug.h b/netinet/tcp_debug.h
new file mode 100644
index 0000000..55004f3
--- /dev/null
+++ b/netinet/tcp_debug.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_debug.h 8.1 (Berkeley) 6/10/93
+ * $FreeBSD: src/sys/netinet/tcp_debug.h,v 1.17 2009/02/13 15:14:43 luigi Exp $
+ */
+
+
+#ifndef _NETINET_TCP_DEBUG_H_
+#define _NETINET_TCP_DEBUG_H_
+
+#include <netinet/tcpip.h> /* struct tcpiphdr */
+#include <netinet/tcp_var.h> /* struct tcpcb */
+
+struct tcp_debug {
+ uint32_t td_time; /* network format */
+ short td_act;
+ short td_ostate;
+ caddr_t td_tcb;
+ struct tcpiphdr td_ti;
+ short td_req;
+ struct tcpcb td_cb;
+};
+
+#define TA_INPUT 0
+#define TA_OUTPUT 1
+#define TA_USER 2
+#define TA_RESPOND 3
+#define TA_DROP 4
+
+#ifdef TANAMES
+static const char *tanames[] =
+ { "input", "output", "user", "respond", "drop" };
+#endif
+
+#define TCP_NDEBUG 100
+
+#ifndef _KERNEL
+/* XXX common variables for broken applications. */
+struct tcp_debug tcp_debug[TCP_NDEBUG];
+int tcp_debx;
+#endif
+
+#endif /* !_NETINET_TCP_DEBUG_H_ */
diff --git a/netinet/tcp_fsm.h b/netinet/tcp_fsm.h
new file mode 100644
index 0000000..ceb053e
--- /dev/null
+++ b/netinet/tcp_fsm.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_fsm.h 8.1 (Berkeley) 6/10/93
+ */
+
+#ifndef _NETINET_TCP_FSM_H_
+#define _NETINET_TCP_FSM_H_
+
+/*
+ * TCP FSM state definitions.
+ * Per RFC793, September, 1981.
+ */
+
+#define TCP_NSTATES 11
+
+#define TCPS_CLOSED 0 /* closed */
+#define TCPS_LISTEN 1 /* listening for connection */
+#define TCPS_SYN_SENT 2 /* active, have sent syn */
+#define TCPS_SYN_RECEIVED 3 /* have send and received syn */
+/* states < TCPS_ESTABLISHED are those where connections not established */
+#define TCPS_ESTABLISHED 4 /* established */
+#define TCPS_CLOSE_WAIT 5 /* rcvd fin, waiting for close */
+/* states > TCPS_CLOSE_WAIT are those where user has closed */
+#define TCPS_FIN_WAIT_1 6 /* have closed, sent fin */
+#define TCPS_CLOSING 7 /* closed xchd FIN; await FIN ACK */
+#define TCPS_LAST_ACK 8 /* had fin and close; await FIN ACK */
+/* states > TCPS_CLOSE_WAIT && < TCPS_FIN_WAIT_2 await ACK of FIN */
+#define TCPS_FIN_WAIT_2 9 /* have closed, fin is acked */
+#define TCPS_TIME_WAIT 10 /* in 2*msl quiet wait after close */
+
+#define TCPS_HAVERCVDSYN(s) ((s) >= TCPS_SYN_RECEIVED)
+#define TCPS_HAVEESTABLISHED(s) ((s) >= TCPS_ESTABLISHED)
+#define TCPS_HAVERCVDFIN(s) ((s) >= TCPS_TIME_WAIT)
+
+#ifdef TCPOUTFLAGS
+/*
+ * Flags used when sending segments in tcp_output.
+ * Basic flags (TH_RST,TH_ACK,TH_SYN,TH_FIN) are totally
+ * determined by state, with the proviso that TH_FIN is sent only
+ * if all data queued for output is included in the segment.
+ */
+static u_char tcp_outflags[TCP_NSTATES] = {
+ TH_RST|TH_ACK, 0, TH_SYN, TH_SYN|TH_ACK,
+ TH_ACK, TH_ACK,
+ TH_FIN|TH_ACK, TH_FIN|TH_ACK, TH_FIN|TH_ACK, TH_ACK, TH_ACK,
+};
+#endif
+
+#ifdef KPROF
+int tcp_acounts[TCP_NSTATES][PRU_NREQ];
+#endif
+
+#ifdef TCPSTATES
+char *tcpstates[] = {
+ "CLOSED", "LISTEN", "SYN_SENT", "SYN_RCVD",
+ "ESTABLISHED", "CLOSE_WAIT", "FIN_WAIT_1", "CLOSING",
+ "LAST_ACK", "FIN_WAIT_2", "TIME_WAIT",
+};
+#endif
+
+#endif
diff --git a/netinet/tcp_input.c b/netinet/tcp_input.c
new file mode 100644
index 0000000..e62f1eb
--- /dev/null
+++ b/netinet/tcp_input.c
@@ -0,0 +1,2151 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_input.c 8.12 (Berkeley) 5/24/95
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "opt_tcpdebug.h"
+
+#ifndef TUBA_INCLUDE
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <errno.h>
+#include <sys/syslog.h>
+
+#include <machine/cpu.h> /* before tcp_seq.h, for tcp_random18() */
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <rtems/rtems_netinet_in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#ifdef TCPDEBUG
+#include <netinet/tcp_debug.h>
+static struct tcpiphdr tcp_saveti;
+#endif
+
+static int tcprexmtthresh = 3;
+tcp_seq tcp_iss;
+tcp_cc tcp_ccgen;
+
+struct tcpstat tcpstat;
+SYSCTL_STRUCT(_net_inet_tcp, TCPCTL_STATS, stats,
+ CTLFLAG_RD, &tcpstat , tcpstat, "");
+
+static int log_in_vain = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_RW,
+ &log_in_vain, 0, "");
+
+u_long tcp_now;
+struct inpcbhead tcb;
+struct inpcbinfo tcbinfo;
+
+static void tcp_dooptions(struct tcpcb *,
+ u_char *, int, struct tcpiphdr *, struct tcpopt *);
+static void tcp_pulloutofband(struct socket *,
+ struct tcpiphdr *, struct mbuf *);
+static int tcp_reass(struct tcpcb *, struct tcpiphdr *, struct mbuf *);
+static void tcp_xmit_timer(struct tcpcb *, int);
+
+#endif /* TUBA_INCLUDE */
+
+/*
+ * Insert segment ti into reassembly queue of tcp with
+ * control block tp. Return TH_FIN if reassembly now includes
+ * a segment with FIN. The macro form does the common case inline
+ * (segment is the next to be received on an established connection,
+ * and the queue is empty), avoiding linkage into and removal
+ * from the queue and repetition of various conversions.
+ * Set DELACK for segments received in order, but ack immediately
+ * when segments are out of order (so fast retransmit can work).
+ */
+#ifdef TCP_ACK_HACK
+#define TCP_REASS(tp, ti, m, so, flags) { \
+ if ((ti)->ti_seq == (tp)->rcv_nxt && \
+ (tp)->seg_next == (struct tcpiphdr *)(tp) && \
+ (tp)->t_state == TCPS_ESTABLISHED) { \
+ if (ti->ti_flags & TH_PUSH) \
+ tp->t_flags |= TF_ACKNOW; \
+ else \
+ tp->t_flags |= TF_DELACK; \
+ (tp)->rcv_nxt += (ti)->ti_len; \
+ flags = (ti)->ti_flags & TH_FIN; \
+ tcpstat.tcps_rcvpack++;\
+ tcpstat.tcps_rcvbyte += (ti)->ti_len;\
+ sbappend(&(so)->so_rcv, (m)); \
+ sorwakeup(so); \
+ } else { \
+ (flags) = tcp_reass((tp), (ti), (m)); \
+ tp->t_flags |= TF_ACKNOW; \
+ } \
+}
+#else
+#define TCP_REASS(tp, ti, m, so, flags) { \
+ if ((ti)->ti_seq == (tp)->rcv_nxt && \
+ (tp)->seg_next == (struct tcpiphdr *)(tp) && \
+ (tp)->t_state == TCPS_ESTABLISHED) { \
+ tp->t_flags |= TF_DELACK; \
+ (tp)->rcv_nxt += (ti)->ti_len; \
+ flags = (ti)->ti_flags & TH_FIN; \
+ tcpstat.tcps_rcvpack++;\
+ tcpstat.tcps_rcvbyte += (ti)->ti_len;\
+ sbappend(&(so)->so_rcv, (m)); \
+ sorwakeup(so); \
+ } else { \
+ (flags) = tcp_reass((tp), (ti), (m)); \
+ tp->t_flags |= TF_ACKNOW; \
+ } \
+}
+#endif
+#ifndef TUBA_INCLUDE
+
+static int
+tcp_reass(struct tcpcb *tp, struct tcpiphdr *ti, struct mbuf *m)
+{
+ register struct tcpiphdr *q;
+ struct socket *so = tp->t_inpcb->inp_socket;
+ int flags;
+ /*
+ * Call with ti==0 after become established to
+ * force pre-ESTABLISHED data up to user socket.
+ */
+ if (ti == 0)
+ goto present;
+
+ /*
+ * Find a segment which begins after this one does.
+ */
+ for (q = tp->seg_next; q != (struct tcpiphdr *)tp;
+ q = (struct tcpiphdr *)q->ti_next)
+ if (SEQ_GT(q->ti_seq, ti->ti_seq))
+ break;
+
+ /*
+ * If there is a preceding segment, it may provide some of
+ * our data already. If so, drop the data from the incoming
+ * segment. If it provides all of our data, drop us.
+ */
+ if ((struct tcpiphdr *)q->ti_prev != (struct tcpiphdr *)tp) {
+ register int i;
+ q = (struct tcpiphdr *)q->ti_prev;
+ /* conversion to int (in i) handles seq wraparound */
+ i = q->ti_seq + q->ti_len - ti->ti_seq;
+ if (i > 0) {
+ if (i >= ti->ti_len) {
+ tcpstat.tcps_rcvduppack++;
+ tcpstat.tcps_rcvdupbyte += ti->ti_len;
+ m_freem(m);
+ /*
+ * Try to present any queued data
+ * at the left window edge to the user.
+ * This is needed after the 3-WHS
+ * completes.
+ */
+ goto present; /* ??? */
+ }
+ m_adj(m, i);
+ ti->ti_len -= i;
+ ti->ti_seq += i;
+ }
+ q = (struct tcpiphdr *)(q->ti_next);
+ }
+ tcpstat.tcps_rcvoopack++;
+ tcpstat.tcps_rcvoobyte += ti->ti_len;
+#if (defined(__GNUC__) && (defined(__arm__) || defined(__mips__)))
+ STR32_UNALGN(ti,m);
+#else
+ REASS_MBUF(ti) = m; /* XXX */
+#endif
+ /*
+ * While we overlap succeeding segments trim them or,
+ * if they are completely covered, dequeue them.
+ */
+ while (q != (struct tcpiphdr *)tp) {
+ register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq;
+ if (i <= 0)
+ break;
+ if (i < q->ti_len) {
+ q->ti_seq += i;
+ q->ti_len -= i;
+#if (defined(__GNUC__) && (defined(__arm__) || defined(__mips__)))
+ LD32_UNALGN(q,m);
+ m_adj(m, i);
+#else
+ m_adj(REASS_MBUF(q), i);
+#endif
+ break;
+ }
+ q = (struct tcpiphdr *)q->ti_next;
+#if (defined(__GNUC__) && (defined(__arm__) || defined(__mips__)))
+ LD32_UNALGN((struct tcpiphdr *)q->ti_prev,m);
+#else
+ m = REASS_MBUF((struct tcpiphdr *)q->ti_prev);
+#endif
+ remque(q->ti_prev);
+ m_freem(m);
+ }
+
+ /*
+ * Stick new segment in its place.
+ */
+ insque(ti, q->ti_prev);
+
+present:
+ /*
+ * Present data to user, advancing rcv_nxt through
+ * completed sequence space.
+ */
+ if (!TCPS_HAVEESTABLISHED(tp->t_state))
+ return (0);
+ ti = tp->seg_next;
+ if (ti == (struct tcpiphdr *)tp || ti->ti_seq != tp->rcv_nxt)
+ return (0);
+ do {
+ tp->rcv_nxt += ti->ti_len;
+ flags = ti->ti_flags & TH_FIN;
+ remque(ti);
+#if (defined(__GNUC__) && (defined(__arm__) || defined(__mips__)))
+ LD32_UNALGN(ti,m);
+#else
+ m = REASS_MBUF(ti);
+#endif
+ ti = (struct tcpiphdr *)ti->ti_next;
+ if (so->so_state & SS_CANTRCVMORE)
+ m_freem(m);
+ else
+ sbappend(&so->so_rcv, m);
+ } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt);
+ sorwakeup(so);
+ return (flags);
+}
+
+/*
+ * TCP input routine, follows pages 65-76 of the
+ * protocol specification dated September, 1981 very closely.
+ */
+void
+tcp_input(struct mbuf *m, int iphlen)
+{
+ register struct tcpiphdr *ti;
+ register struct inpcb *inp;
+ u_char *optp = NULL;
+ int optlen = 0;
+ int len, tlen, off;
+ register struct tcpcb *tp = 0;
+ register int tiflags;
+ struct socket *so = 0;
+ int todrop, acked, ourfinisacked, needoutput = 0;
+ struct in_addr laddr;
+ int dropsocket = 0;
+ int iss = 0;
+ u_long tiwin;
+ struct tcpopt to; /* options in this segment */
+ struct rmxp_tao *taop; /* pointer to our TAO cache entry */
+ struct rmxp_tao tao_noncached; /* in case there's no cached entry */
+#ifdef TCPDEBUG
+ short ostate = 0;
+#endif
+
+ bzero((char *)&to, sizeof(to));
+
+ tcpstat.tcps_rcvtotal++;
+ /*
+ * Get IP and TCP header together in first mbuf.
+ * Note: IP leaves IP header in first mbuf.
+ */
+ ti = mtod(m, struct tcpiphdr *);
+ if (iphlen > sizeof (struct ip))
+ ip_stripoptions(m, (struct mbuf *)0);
+ if (m->m_len < sizeof (struct tcpiphdr)) {
+ if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == 0) {
+ tcpstat.tcps_rcvshort++;
+ return;
+ }
+ ti = mtod(m, struct tcpiphdr *);
+ }
+
+ /*
+ * Checksum extended TCP header and data.
+ */
+ tlen = ((struct ip *)ti)->ip_len;
+ len = sizeof (struct ip) + tlen;
+ ti->ti_next = ti->ti_prev = 0;
+ ti->ti_x1 = 0;
+ ti->ti_len = (u_short)tlen;
+ HTONS(ti->ti_len);
+ ti->ti_sum = in_cksum(m, len);
+ if (ti->ti_sum) {
+ tcpstat.tcps_rcvbadsum++;
+ goto drop;
+ }
+#endif /* TUBA_INCLUDE */
+
+ /*
+ * Check that TCP offset makes sense,
+ * pull out TCP options and adjust length. XXX
+ */
+ off = ti->ti_off << 2;
+ if (off < sizeof (struct tcphdr) || off > tlen) {
+ tcpstat.tcps_rcvbadoff++;
+ goto drop;
+ }
+ tlen -= off;
+ ti->ti_len = tlen;
+ if (off > sizeof (struct tcphdr)) {
+ if (m->m_len < sizeof(struct ip) + off) {
+ if ((m = m_pullup(m, sizeof (struct ip) + off)) == 0) {
+ tcpstat.tcps_rcvshort++;
+ return;
+ }
+ ti = mtod(m, struct tcpiphdr *);
+ }
+ optlen = off - sizeof (struct tcphdr);
+ optp = mtod(m, u_char *) + sizeof (struct tcpiphdr);
+ }
+ tiflags = ti->ti_flags;
+
+ /*
+ * Convert TCP protocol specific fields to host format.
+ */
+ NTOHL(ti->ti_seq);
+ NTOHL(ti->ti_ack);
+ NTOHS(ti->ti_win);
+ NTOHS(ti->ti_urp);
+
+ /*
+ * Drop TCP, IP headers and TCP options.
+ */
+ m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
+ m->m_len -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
+
+ /*
+ * Locate pcb for segment.
+ */
+findpcb:
+ inp = in_pcblookuphash(&tcbinfo, ti->ti_src, ti->ti_sport,
+ ti->ti_dst, ti->ti_dport, 1);
+
+ /*
+ * If the state is CLOSED (i.e., TCB does not exist) then
+ * all data in the incoming segment is discarded.
+ * If the TCB exists but is in CLOSED state, it is embryonic,
+ * but should either do a listen or a connect soon.
+ */
+ if (inp == NULL) {
+ if (log_in_vain && tiflags & TH_SYN) {
+ char buf0[INET_ADDRSTRLEN];
+ char buf1[INET_ADDRSTRLEN];
+
+ log(LOG_INFO, "Connection attempt to TCP %s:%d"
+ " from %s:%d\n",
+ inet_ntoa_r(ti->ti_dst, buf0), ntohs(ti->ti_dport),
+ inet_ntoa_r(ti->ti_src, buf1), ntohs(ti->ti_sport));
+ }
+ goto dropwithreset;
+ }
+ tp = intotcpcb(inp);
+ if (tp == 0)
+ goto dropwithreset;
+ if (tp->t_state == TCPS_CLOSED)
+ goto drop;
+
+ /* Unscale the window into a 32-bit value. */
+ if ((tiflags & TH_SYN) == 0)
+ tiwin = ti->ti_win << tp->snd_scale;
+ else
+ tiwin = ti->ti_win;
+
+ so = inp->inp_socket;
+ if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) {
+#ifdef TCPDEBUG
+ if (so->so_options & SO_DEBUG) {
+ ostate = tp->t_state;
+ tcp_saveti = *ti;
+ }
+#endif
+ if (so->so_options & SO_ACCEPTCONN) {
+ register struct tcpcb *tp0 = tp;
+ struct socket *so2;
+ if ((tiflags & (TH_RST|TH_ACK|TH_SYN)) != TH_SYN) {
+ /*
+ * Note: dropwithreset makes sure we don't
+ * send a RST in response to a RST.
+ */
+ if (tiflags & TH_ACK) {
+ tcpstat.tcps_badsyn++;
+ goto dropwithreset;
+ }
+ goto drop;
+ }
+ so2 = sonewconn(so, 0);
+ if (so2 == 0) {
+ tcpstat.tcps_listendrop++;
+ so2 = sodropablereq(so);
+ if (so2) {
+ tcp_drop(sototcpcb(so2), ETIMEDOUT);
+ so2 = sonewconn(so, 0);
+ }
+ if (!so2)
+ goto drop;
+ }
+ so = so2;
+ /*
+ * This is ugly, but ....
+ *
+ * Mark socket as temporary until we're
+ * committed to keeping it. The code at
+ * ``drop'' and ``dropwithreset'' check the
+ * flag dropsocket to see if the temporary
+ * socket created here should be discarded.
+ * We mark the socket as discardable until
+ * we're committed to it below in TCPS_LISTEN.
+ */
+ dropsocket++;
+ inp = (struct inpcb *)so->so_pcb;
+ inp->inp_laddr = ti->ti_dst;
+ inp->inp_lport = ti->ti_dport;
+ in_pcbrehash(inp);
+#if BSD>=43
+ inp->inp_options = ip_srcroute();
+#endif
+ tp = intotcpcb(inp);
+ tp->t_state = TCPS_LISTEN;
+ tp->t_flags |= tp0->t_flags & (TF_NOPUSH|TF_NOOPT);
+
+ /* Compute proper scaling value from buffer space */
+ while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
+ TCP_MAXWIN << tp->request_r_scale < so->so_rcv.sb_hiwat)
+ tp->request_r_scale++;
+ }
+ }
+
+ /*
+ * Segment received on connection.
+ * Reset idle time and keep-alive timer.
+ */
+ tp->t_idle = 0;
+ if (TCPS_HAVEESTABLISHED(tp->t_state))
+ tp->t_timer[TCPT_KEEP] = tcp_keepidle;
+
+ /*
+ * Process options if not in LISTEN state,
+ * else do it below (after getting remote address).
+ */
+ if (tp->t_state != TCPS_LISTEN)
+ tcp_dooptions(tp, optp, optlen, ti, &to);
+
+ /*
+ * Header prediction: check for the two common cases
+ * of a uni-directional data xfer. If the packet has
+ * no control flags, is in-sequence, the window didn't
+ * change and we're not retransmitting, it's a
+ * candidate. If the length is zero and the ack moved
+ * forward, we're the sender side of the xfer. Just
+ * free the data acked & wake any higher level process
+ * that was blocked waiting for space. If the length
+ * is non-zero and the ack didn't move, we're the
+ * receiver side. If we're getting packets in-order
+ * (the reassembly queue is empty), add the data to
+ * the socket buffer and note that we need a delayed ack.
+ * Make sure that the hidden state-flags are also off.
+ * Since we check for TCPS_ESTABLISHED above, it can only
+ * be TH_NEEDSYN.
+ */
+ if (tp->t_state == TCPS_ESTABLISHED &&
+ (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
+ ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) &&
+ ((to.to_flags & TOF_TS) == 0 ||
+ TSTMP_GEQ(to.to_tsval, tp->ts_recent)) &&
+ /*
+ * Using the CC option is compulsory if once started:
+ * the segment is OK if no T/TCP was negotiated or
+ * if the segment has a CC option equal to CCrecv
+ */
+ ((tp->t_flags & (TF_REQ_CC|TF_RCVD_CC)) != (TF_REQ_CC|TF_RCVD_CC) ||
+ ((to.to_flags & TOF_CC) != 0 && to.to_cc == tp->cc_recv)) &&
+ ti->ti_seq == tp->rcv_nxt &&
+ tiwin && tiwin == tp->snd_wnd &&
+ tp->snd_nxt == tp->snd_max) {
+
+ /*
+ * If last ACK falls within this segment's sequence numbers,
+ * record the timestamp.
+ * NOTE that the test is modified according to the latest
+ * proposal of the tcplw@cray.com list (Braden 1993/04/26).
+ */
+ if ((to.to_flags & TOF_TS) != 0 &&
+ SEQ_LEQ(ti->ti_seq, tp->last_ack_sent)) {
+ tp->ts_recent_age = tcp_now;
+ tp->ts_recent = to.to_tsval;
+ }
+
+ if (ti->ti_len == 0) {
+ if (SEQ_GT(ti->ti_ack, tp->snd_una) &&
+ SEQ_LEQ(ti->ti_ack, tp->snd_max) &&
+ tp->snd_cwnd >= tp->snd_wnd &&
+ tp->t_dupacks < tcprexmtthresh) {
+ /*
+ * this is a pure ack for outstanding data.
+ */
+ ++tcpstat.tcps_predack;
+ if ((to.to_flags & TOF_TS) != 0)
+ tcp_xmit_timer(tp,
+ tcp_now - to.to_tsecr + 1);
+ else if (tp->t_rtt &&
+ SEQ_GT(ti->ti_ack, tp->t_rtseq))
+ tcp_xmit_timer(tp, tp->t_rtt);
+ acked = ti->ti_ack - tp->snd_una;
+ tcpstat.tcps_rcvackpack++;
+ tcpstat.tcps_rcvackbyte += acked;
+ sbdrop(&so->so_snd, acked);
+ tp->snd_una = ti->ti_ack;
+ m_freem(m);
+
+ /*
+ * If all outstanding data are acked, stop
+ * retransmit timer, otherwise restart timer
+ * using current (possibly backed-off) value.
+ * If process is waiting for space,
+ * wakeup/selwakeup/signal. If data
+ * are ready to send, let tcp_output
+ * decide between more output or persist.
+ */
+ if (tp->snd_una == tp->snd_max)
+ tp->t_timer[TCPT_REXMT] = 0;
+ else if (tp->t_timer[TCPT_PERSIST] == 0)
+ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+
+ if (so->so_snd.sb_flags & SB_NOTIFY)
+ sowwakeup(so);
+ if (so->so_snd.sb_cc)
+ (void) tcp_output(tp);
+ return;
+ }
+ } else if (ti->ti_ack == tp->snd_una &&
+ tp->seg_next == (struct tcpiphdr *)tp &&
+ ti->ti_len <= sbspace(&so->so_rcv)) {
+ /*
+ * this is a pure, in-sequence data packet
+ * with nothing on the reassembly queue and
+ * we have enough buffer space to take it.
+ */
+ ++tcpstat.tcps_preddat;
+ tp->rcv_nxt += ti->ti_len;
+ tcpstat.tcps_rcvpack++;
+ tcpstat.tcps_rcvbyte += ti->ti_len;
+ /*
+ * Add data to socket buffer.
+ */
+ sbappend(&so->so_rcv, m);
+ sorwakeup(so);
+#ifdef TCP_ACK_HACK
+ /*
+ * If this is a short packet, then ACK now - with Nagel
+ * congestion avoidance sender won't send more until
+ * he gets an ACK.
+ */
+ if (tiflags & TH_PUSH) {
+ tp->t_flags |= TF_ACKNOW;
+ tcp_output(tp);
+ } else {
+ tp->t_flags |= TF_DELACK;
+ }
+#else
+ tp->t_flags |= TF_DELACK;
+#endif
+ return;
+ }
+ }
+
+ /*
+ * Calculate amount of space in receive window,
+ * and then do TCP input processing.
+ * Receive window is amount of space in rcv queue,
+ * but not less than advertised window.
+ */
+ { int win;
+
+ win = sbspace(&so->so_rcv);
+ if (win < 0)
+ win = 0;
+ tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
+ }
+
+ switch (tp->t_state) {
+
+ /*
+ * If the state is LISTEN then ignore segment if it contains an RST.
+ * If the segment contains an ACK then it is bad and send a RST.
+ * If it does not contain a SYN then it is not interesting; drop it.
+ * If it is from this socket, drop it, it must be forged.
+ * Don't bother responding if the destination was a broadcast.
+ * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial
+ * tp->iss, and send a segment:
+ * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
+ * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss.
+ * Fill in remote peer address fields if not previously specified.
+ * Enter SYN_RECEIVED state, and process any other fields of this
+ * segment in this state.
+ */
+ case TCPS_LISTEN: {
+ struct mbuf *am;
+ register struct sockaddr_in *sin;
+
+ if (tiflags & TH_RST)
+ goto drop;
+ if (tiflags & TH_ACK)
+ goto dropwithreset;
+ if ((tiflags & TH_SYN) == 0)
+ goto drop;
+ if ((ti->ti_dport == ti->ti_sport) &&
+ (ti->ti_dst.s_addr == ti->ti_src.s_addr))
+ goto drop;
+ /*
+ * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN
+ * in_broadcast() should never return true on a received
+ * packet with M_BCAST not set.
+ */
+ if (m->m_flags & (M_BCAST|M_MCAST) ||
+ IN_MULTICAST(ntohl(ti->ti_dst.s_addr)))
+ goto drop;
+ am = m_get(M_DONTWAIT, MT_SONAME); /* XXX */
+ if (am == NULL)
+ goto drop;
+ am->m_len = sizeof (struct sockaddr_in);
+ sin = mtod(am, struct sockaddr_in *);
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
+ sin->sin_addr = ti->ti_src;
+ sin->sin_port = ti->ti_sport;
+ bzero((caddr_t)sin->sin_zero, sizeof(sin->sin_zero));
+ laddr = inp->inp_laddr;
+ if (inp->inp_laddr.s_addr == INADDR_ANY)
+ inp->inp_laddr = ti->ti_dst;
+ if (in_pcbconnect(inp, am)) {
+ inp->inp_laddr = laddr;
+ (void) m_free(am);
+ goto drop;
+ }
+ (void) m_free(am);
+ tp->t_template = tcp_template(tp);
+ if (tp->t_template == 0) {
+ tp = tcp_drop(tp, ENOBUFS);
+ dropsocket = 0; /* socket is already gone */
+ goto drop;
+ }
+ if ((taop = tcp_gettaocache(inp)) == NULL) {
+ taop = &tao_noncached;
+ bzero(taop, sizeof(*taop));
+ }
+ tcp_dooptions(tp, optp, optlen, ti, &to);
+ if (iss)
+ tp->iss = iss;
+ else
+ tp->iss = tcp_iss;
+ tcp_iss += TCP_ISSINCR/4;
+ tp->irs = ti->ti_seq;
+ tcp_sendseqinit(tp);
+ tcp_rcvseqinit(tp);
+ /*
+ * Initialization of the tcpcb for transaction;
+ * set SND.WND = SEG.WND,
+ * initialize CCsend and CCrecv.
+ */
+ tp->snd_wnd = tiwin; /* initial send-window */
+ tp->cc_send = CC_INC(tcp_ccgen);
+ tp->cc_recv = to.to_cc;
+ /*
+ * Perform TAO test on incoming CC (SEG.CC) option, if any.
+ * - compare SEG.CC against cached CC from the same host,
+ * if any.
+ * - if SEG.CC > chached value, SYN must be new and is accepted
+ * immediately: save new CC in the cache, mark the socket
+ * connected, enter ESTABLISHED state, turn on flag to
+ * send a SYN in the next segment.
+ * A virtual advertised window is set in rcv_adv to
+ * initialize SWS prevention. Then enter normal segment
+ * processing: drop SYN, process data and FIN.
+ * - otherwise do a normal 3-way handshake.
+ */
+ if ((to.to_flags & TOF_CC) != 0) {
+ if (taop->tao_cc != 0 && CC_GT(to.to_cc, taop->tao_cc)) {
+ taop->tao_cc = to.to_cc;
+ tp->t_state = TCPS_ESTABLISHED;
+
+ /*
+ * If there is a FIN, or if there is data and the
+ * connection is local, then delay SYN,ACK(SYN) in
+ * the hope of piggy-backing it on a response
+ * segment. Otherwise must send ACK now in case
+ * the other side is slow starting.
+ */
+ if ((tiflags & TH_FIN) || (ti->ti_len != 0 &&
+ in_localaddr(inp->inp_faddr)))
+ tp->t_flags |= (TF_DELACK | TF_NEEDSYN);
+ else
+ tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);
+
+ /*
+ * Limit the `virtual advertised window' to TCP_MAXWIN
+ * here. Even if we requested window scaling, it will
+ * become effective only later when our SYN is acked.
+ */
+ tp->rcv_adv += min(tp->rcv_wnd, TCP_MAXWIN);
+ tcpstat.tcps_connects++;
+ soisconnected(so);
+ tp->t_timer[TCPT_KEEP] = tcp_keepinit;
+ dropsocket = 0; /* committed to socket */
+ tcpstat.tcps_accepts++;
+ goto trimthenstep6;
+ }
+ /* else do standard 3-way handshake */
+ } else {
+ /*
+ * No CC option, but maybe CC.NEW:
+ * invalidate cached value.
+ */
+ taop->tao_cc = 0;
+ }
+ /*
+ * TAO test failed or there was no CC option,
+ * do a standard 3-way handshake.
+ */
+ tp->t_flags |= TF_ACKNOW;
+ tp->t_state = TCPS_SYN_RECEIVED;
+ tp->t_timer[TCPT_KEEP] = tcp_keepinit;
+ dropsocket = 0; /* committed to socket */
+ tcpstat.tcps_accepts++;
+ goto trimthenstep6;
+ }
+
+ /*
+ * If the state is SYN_RECEIVED:
+ * if seg contains SYN/ACK, send a RST.
+ * if seg contains an ACK, but not for our SYN/ACK, send a RST.
+ */
+ case TCPS_SYN_RECEIVED:
+ if (tiflags & TH_ACK) {
+ if (tiflags & TH_SYN) {
+ tcpstat.tcps_badsyn++;
+ goto dropwithreset;
+ }
+ if (SEQ_LEQ(ti->ti_ack, tp->snd_una) ||
+ SEQ_GT(ti->ti_ack, tp->snd_max))
+ goto dropwithreset;
+ }
+ break;
+
+ /*
+ * If the state is SYN_SENT:
+ * if seg contains an ACK, but not for our SYN, drop the input.
+ * if seg contains a RST, then drop the connection.
+ * if seg does not contain SYN, then drop it.
+ * Otherwise this is an acceptable SYN segment
+ * initialize tp->rcv_nxt and tp->irs
+ * if seg contains ack then advance tp->snd_una
+ * if SYN has been acked change to ESTABLISHED else SYN_RCVD state
+ * arrange for segment to be acked (eventually)
+ * continue processing rest of data/controls, beginning with URG
+ */
+ case TCPS_SYN_SENT:
+ if ((taop = tcp_gettaocache(inp)) == NULL) {
+ taop = &tao_noncached;
+ bzero(taop, sizeof(*taop));
+ }
+
+ if ((tiflags & TH_ACK) &&
+ (SEQ_LEQ(ti->ti_ack, tp->iss) ||
+ SEQ_GT(ti->ti_ack, tp->snd_max))) {
+ /*
+ * If we have a cached CCsent for the remote host,
+ * hence we haven't just crashed and restarted,
+ * do not send a RST. This may be a retransmission
+ * from the other side after our earlier ACK was lost.
+ * Our new SYN, when it arrives, will serve as the
+ * needed ACK.
+ */
+ if (taop->tao_ccsent != 0)
+ goto drop;
+ else
+ goto dropwithreset;
+ }
+ if (tiflags & TH_RST) {
+ if (tiflags & TH_ACK)
+ tp = tcp_drop(tp, ECONNREFUSED);
+ goto drop;
+ }
+ if ((tiflags & TH_SYN) == 0)
+ goto drop;
+ tp->snd_wnd = ti->ti_win; /* initial send window */
+ tp->cc_recv = to.to_cc; /* foreign CC */
+
+ tp->irs = ti->ti_seq;
+ tcp_rcvseqinit(tp);
+ if (tiflags & TH_ACK) {
+ /*
+ * Our SYN was acked. If segment contains CC.ECHO
+ * option, check it to make sure this segment really
+ * matches our SYN. If not, just drop it as old
+ * duplicate, but send an RST if we're still playing
+ * by the old rules. If no CC.ECHO option, make sure
+ * we don't get fooled into using T/TCP.
+ */
+ if (to.to_flags & TOF_CCECHO) {
+ if (tp->cc_send != to.to_ccecho) {
+ if (taop->tao_ccsent != 0)
+ goto drop;
+ else
+ goto dropwithreset;
+ }
+ } else
+ tp->t_flags &= ~TF_RCVD_CC;
+ tcpstat.tcps_connects++;
+ soisconnected(so);
+ /* Do window scaling on this connection? */
+ if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
+ (TF_RCVD_SCALE|TF_REQ_SCALE)) {
+ tp->snd_scale = tp->requested_s_scale;
+ tp->rcv_scale = tp->request_r_scale;
+ }
+ /* Segment is acceptable, update cache if undefined. */
+ if (taop->tao_ccsent == 0)
+ taop->tao_ccsent = to.to_ccecho;
+
+ tp->rcv_adv += tp->rcv_wnd;
+ tp->snd_una++; /* SYN is acked */
+ /*
+ * If there's data, delay ACK; if there's also a FIN
+ * ACKNOW will be turned on later.
+ */
+ if (ti->ti_len != 0)
+ tp->t_flags |= TF_DELACK;
+ else
+ tp->t_flags |= TF_ACKNOW;
+ /*
+ * Received <SYN,ACK> in SYN_SENT[*] state.
+ * Transitions:
+ * SYN_SENT --> ESTABLISHED
+ * SYN_SENT* --> FIN_WAIT_1
+ */
+ if (tp->t_flags & TF_NEEDFIN) {
+ tp->t_state = TCPS_FIN_WAIT_1;
+ tp->t_flags &= ~TF_NEEDFIN;
+ tiflags &= ~TH_SYN;
+ } else {
+ tp->t_state = TCPS_ESTABLISHED;
+ tp->t_timer[TCPT_KEEP] = tcp_keepidle;
+ }
+ } else {
+ /*
+ * Received initial SYN in SYN-SENT[*] state => simul-
+ * taneous open. If segment contains CC option and there is
+ * a cached CC, apply TAO test; if it succeeds, connection is
+ * half-synchronized. Otherwise, do 3-way handshake:
+ * SYN-SENT -> SYN-RECEIVED
+ * SYN-SENT* -> SYN-RECEIVED*
+ * If there was no CC option, clear cached CC value.
+ */
+ tp->t_flags |= TF_ACKNOW;
+ tp->t_timer[TCPT_REXMT] = 0;
+ if (to.to_flags & TOF_CC) {
+ if (taop->tao_cc != 0 &&
+ CC_GT(to.to_cc, taop->tao_cc)) {
+ /*
+ * update cache and make transition:
+ * SYN-SENT -> ESTABLISHED*
+ * SYN-SENT* -> FIN-WAIT-1*
+ */
+ taop->tao_cc = to.to_cc;
+ if (tp->t_flags & TF_NEEDFIN) {
+ tp->t_state = TCPS_FIN_WAIT_1;
+ tp->t_flags &= ~TF_NEEDFIN;
+ } else {
+ tp->t_state = TCPS_ESTABLISHED;
+ tp->t_timer[TCPT_KEEP] = tcp_keepidle;
+ }
+ tp->t_flags |= TF_NEEDSYN;
+ } else
+ tp->t_state = TCPS_SYN_RECEIVED;
+ } else {
+ /* CC.NEW or no option => invalidate cache */
+ taop->tao_cc = 0;
+ tp->t_state = TCPS_SYN_RECEIVED;
+ }
+ }
+
+trimthenstep6:
+ /*
+ * Advance ti->ti_seq to correspond to first data byte.
+ * If data, trim to stay within window,
+ * dropping FIN if necessary.
+ */
+ ti->ti_seq++;
+ if (ti->ti_len > tp->rcv_wnd) {
+ todrop = ti->ti_len - tp->rcv_wnd;
+ m_adj(m, -todrop);
+ ti->ti_len = tp->rcv_wnd;
+ tiflags &= ~TH_FIN;
+ tcpstat.tcps_rcvpackafterwin++;
+ tcpstat.tcps_rcvbyteafterwin += todrop;
+ }
+ tp->snd_wl1 = ti->ti_seq - 1;
+ tp->rcv_up = ti->ti_seq;
+ /*
+ * Client side of transaction: already sent SYN and data.
+ * If the remote host used T/TCP to validate the SYN,
+ * our data will be ACK'd; if so, enter normal data segment
+ * processing in the middle of step 5, ack processing.
+ * Otherwise, goto step 6.
+ */
+ if (tiflags & TH_ACK)
+ goto process_ACK;
+ goto step6;
+ /*
+ * If the state is LAST_ACK or CLOSING or TIME_WAIT:
+ * if segment contains a SYN and CC [not CC.NEW] option:
+ * if state == TIME_WAIT and connection duration > MSL,
+ * drop packet and send RST;
+ *
+ * if SEG.CC > CCrecv then is new SYN, and can implicitly
+ * ack the FIN (and data) in retransmission queue.
+ * Complete close and delete TCPCB. Then reprocess
+ * segment, hoping to find new TCPCB in LISTEN state;
+ *
+ * else must be old SYN; drop it.
+ * else do normal processing.
+ */
+ case TCPS_LAST_ACK:
+ case TCPS_CLOSING:
+ case TCPS_TIME_WAIT:
+ if ((tiflags & TH_SYN) &&
+ (to.to_flags & TOF_CC) && tp->cc_recv != 0) {
+ if (tp->t_state == TCPS_TIME_WAIT &&
+ tp->t_duration > TCPTV_MSL)
+ goto dropwithreset;
+ if (CC_GT(to.to_cc, tp->cc_recv)) {
+ tp = tcp_close(tp);
+ goto findpcb;
+ }
+ else
+ goto drop;
+ }
+ break; /* continue normal processing */
+ }
+
+ /*
+ * States other than LISTEN or SYN_SENT.
+ * First check timestamp, if present.
+ * Then check the connection count, if present.
+ * Then check that at least some bytes of segment are within
+ * receive window. If segment begins before rcv_nxt,
+ * drop leading data (and SYN); if nothing left, just ack.
+ *
+ * RFC 1323 PAWS: If we have a timestamp reply on this segment
+ * and it's less than ts_recent, drop it.
+ */
+ if ((to.to_flags & TOF_TS) != 0 && (tiflags & TH_RST) == 0 &&
+ tp->ts_recent && TSTMP_LT(to.to_tsval, tp->ts_recent)) {
+
+ /* Check to see if ts_recent is over 24 days old. */
+ if ((int)(tcp_now - tp->ts_recent_age) > TCP_PAWS_IDLE) {
+ /*
+ * Invalidate ts_recent. If this segment updates
+ * ts_recent, the age will be reset later and ts_recent
+ * will get a valid value. If it does not, setting
+ * ts_recent to zero will at least satisfy the
+ * requirement that zero be placed in the timestamp
+ * echo reply when ts_recent isn't valid. The
+ * age isn't reset until we get a valid ts_recent
+ * because we don't want out-of-order segments to be
+ * dropped when ts_recent is old.
+ */
+ tp->ts_recent = 0;
+ } else {
+ tcpstat.tcps_rcvduppack++;
+ tcpstat.tcps_rcvdupbyte += ti->ti_len;
+ tcpstat.tcps_pawsdrop++;
+ goto dropafterack;
+ }
+ }
+
+ /*
+ * T/TCP mechanism
+ * If T/TCP was negotiated and the segment doesn't have CC,
+ * or if it's CC is wrong then drop the segment.
+ * RST segments do not have to comply with this.
+ */
+ if ((tp->t_flags & (TF_REQ_CC|TF_RCVD_CC)) == (TF_REQ_CC|TF_RCVD_CC) &&
+ ((to.to_flags & TOF_CC) == 0 || tp->cc_recv != to.to_cc) &&
+ (tiflags & TH_RST) == 0)
+ goto dropafterack;
+
+ todrop = tp->rcv_nxt - ti->ti_seq;
+ if (todrop > 0) {
+ if (tiflags & TH_SYN) {
+ tiflags &= ~TH_SYN;
+ ti->ti_seq++;
+ if (ti->ti_urp > 1)
+ ti->ti_urp--;
+ else
+ tiflags &= ~TH_URG;
+ todrop--;
+ }
+ /*
+ * Following if statement from Stevens, vol. 2, p. 960.
+ */
+ if (todrop > ti->ti_len
+ || (todrop == ti->ti_len && (tiflags & TH_FIN) == 0)) {
+ /*
+ * Any valid FIN must be to the left of the window.
+ * At this point the FIN must be a duplicate or out
+ * of sequence; drop it.
+ */
+ tiflags &= ~TH_FIN;
+
+ /*
+ * Send an ACK to resynchronize and drop any data.
+ * But keep on processing for RST or ACK.
+ */
+ tp->t_flags |= TF_ACKNOW;
+ todrop = ti->ti_len;
+ tcpstat.tcps_rcvduppack++;
+ tcpstat.tcps_rcvdupbyte += todrop;
+ } else {
+ tcpstat.tcps_rcvpartduppack++;
+ tcpstat.tcps_rcvpartdupbyte += todrop;
+ }
+ m_adj(m, todrop);
+ ti->ti_seq += todrop;
+ ti->ti_len -= todrop;
+ if (ti->ti_urp > todrop)
+ ti->ti_urp -= todrop;
+ else {
+ tiflags &= ~TH_URG;
+ ti->ti_urp = 0;
+ }
+ }
+
+ /*
+ * If new data are received on a connection after the
+ * user processes are gone, then RST the other end.
+ */
+ if ((so->so_state & SS_NOFDREF) &&
+ tp->t_state > TCPS_CLOSE_WAIT && ti->ti_len) {
+ tp = tcp_close(tp);
+ tcpstat.tcps_rcvafterclose++;
+ goto dropwithreset;
+ }
+
+ /*
+ * If segment ends after window, drop trailing data
+ * (and PUSH and FIN); if nothing left, just ACK.
+ */
+ todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd);
+ if (todrop > 0) {
+ tcpstat.tcps_rcvpackafterwin++;
+ if (todrop >= ti->ti_len) {
+ tcpstat.tcps_rcvbyteafterwin += ti->ti_len;
+ /*
+ * If a new connection request is received
+ * while in TIME_WAIT, drop the old connection
+ * and start over if the sequence numbers
+ * are above the previous ones.
+ */
+ if (tiflags & TH_SYN &&
+ tp->t_state == TCPS_TIME_WAIT &&
+ SEQ_GT(ti->ti_seq, tp->rcv_nxt)) {
+ iss = tp->rcv_nxt + TCP_ISSINCR;
+ tp = tcp_close(tp);
+ goto findpcb;
+ }
+ /*
+ * If window is closed can only take segments at
+ * window edge, and have to drop data and PUSH from
+ * incoming segments. Continue processing, but
+ * remember to ack. Otherwise, drop segment
+ * and ack.
+ */
+ if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) {
+ tp->t_flags |= TF_ACKNOW;
+ tcpstat.tcps_rcvwinprobe++;
+ } else
+ goto dropafterack;
+ } else
+ tcpstat.tcps_rcvbyteafterwin += todrop;
+ m_adj(m, -todrop);
+ ti->ti_len -= todrop;
+ tiflags &= ~(TH_PUSH|TH_FIN);
+ }
+
+ /*
+ * If last ACK falls within this segment's sequence numbers,
+ * record its timestamp.
+ * NOTE that the test is modified according to the latest
+ * proposal of the tcplw@cray.com list (Braden 1993/04/26).
+ */
+ if ((to.to_flags & TOF_TS) != 0 &&
+ SEQ_LEQ(ti->ti_seq, tp->last_ack_sent)) {
+ tp->ts_recent_age = tcp_now;
+ tp->ts_recent = to.to_tsval;
+ }
+
+ /*
+ * If the RST bit is set examine the state:
+ * SYN_RECEIVED STATE:
+ * If passive open, return to LISTEN state.
+ * If active open, inform user that connection was refused.
+ * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES:
+ * Inform user that connection was reset, and close tcb.
+ * CLOSING, LAST_ACK, TIME_WAIT STATES
+ * Close the tcb.
+ */
+ if (tiflags&TH_RST) switch (tp->t_state) {
+
+ case TCPS_SYN_RECEIVED:
+ so->so_error = ECONNREFUSED;
+ goto close;
+
+ case TCPS_ESTABLISHED:
+ case TCPS_FIN_WAIT_1:
+ case TCPS_FIN_WAIT_2:
+ case TCPS_CLOSE_WAIT:
+ so->so_error = ECONNRESET;
+ close:
+ tp->t_state = TCPS_CLOSED;
+ tcpstat.tcps_drops++;
+ tp = tcp_close(tp);
+ goto drop;
+
+ case TCPS_CLOSING:
+ case TCPS_LAST_ACK:
+ case TCPS_TIME_WAIT:
+ tp = tcp_close(tp);
+ goto drop;
+ }
+
+ /*
+ * If a SYN is in the window, then this is an
+ * error and we send an RST and drop the connection.
+ */
+ if (tiflags & TH_SYN) {
+ tp = tcp_drop(tp, ECONNRESET);
+ goto dropwithreset;
+ }
+
+ /*
+ * If the ACK bit is off: if in SYN-RECEIVED state or SENDSYN
+ * flag is on (half-synchronized state), then queue data for
+ * later processing; else drop segment and return.
+ */
+ if ((tiflags & TH_ACK) == 0) {
+ if (tp->t_state == TCPS_SYN_RECEIVED ||
+ (tp->t_flags & TF_NEEDSYN))
+ goto step6;
+ else
+ goto drop;
+ }
+
+ /*
+ * Ack processing.
+ */
+ switch (tp->t_state) {
+
+ /*
+ * In SYN_RECEIVED state, the ack ACKs our SYN, so enter
+ * ESTABLISHED state and continue processing.
+ * The ACK was checked above.
+ */
+ case TCPS_SYN_RECEIVED:
+
+ tcpstat.tcps_connects++;
+ soisconnected(so);
+ /* Do window scaling? */
+ if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
+ (TF_RCVD_SCALE|TF_REQ_SCALE)) {
+ tp->snd_scale = tp->requested_s_scale;
+ tp->rcv_scale = tp->request_r_scale;
+ }
+ /*
+ * Upon successful completion of 3-way handshake,
+ * update cache.CC if it was undefined, pass any queued
+ * data to the user, and advance state appropriately.
+ */
+ if ((taop = tcp_gettaocache(inp)) != NULL &&
+ taop->tao_cc == 0)
+ taop->tao_cc = tp->cc_recv;
+
+ /*
+ * Make transitions:
+ * SYN-RECEIVED -> ESTABLISHED
+ * SYN-RECEIVED* -> FIN-WAIT-1
+ */
+ if (tp->t_flags & TF_NEEDFIN) {
+ tp->t_state = TCPS_FIN_WAIT_1;
+ tp->t_flags &= ~TF_NEEDFIN;
+ } else {
+ tp->t_state = TCPS_ESTABLISHED;
+ tp->t_timer[TCPT_KEEP] = tcp_keepidle;
+ }
+ /*
+ * If segment contains data or ACK, will call tcp_reass()
+ * later; if not, do so now to pass queued data to user.
+ */
+ if (ti->ti_len == 0 && (tiflags & TH_FIN) == 0)
+ (void) tcp_reass(tp, (struct tcpiphdr *)0,
+ (struct mbuf *)0);
+ tp->snd_wl1 = ti->ti_seq - 1;
+ /* fall into ... */
+
+ /*
+ * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
+ * ACKs. If the ack is in the range
+ * tp->snd_una < ti->ti_ack <= tp->snd_max
+ * then advance tp->snd_una to ti->ti_ack and drop
+ * data from the retransmission queue. If this ACK reflects
+ * more up to date window information we update our window information.
+ */
+ case TCPS_ESTABLISHED:
+ case TCPS_FIN_WAIT_1:
+ case TCPS_FIN_WAIT_2:
+ case TCPS_CLOSE_WAIT:
+ case TCPS_CLOSING:
+ case TCPS_LAST_ACK:
+ case TCPS_TIME_WAIT:
+
+ if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) {
+ if (ti->ti_len == 0 && tiwin == tp->snd_wnd) {
+ tcpstat.tcps_rcvdupack++;
+ /*
+ * If we have outstanding data (other than
+ * a window probe), this is a completely
+ * duplicate ack (ie, window info didn't
+ * change), the ack is the biggest we've
+ * seen and we've seen exactly our rexmt
+ * threshhold of them, assume a packet
+ * has been dropped and retransmit it.
+ * Kludge snd_nxt & the congestion
+ * window so we send only this one
+ * packet.
+ *
+ * We know we're losing at the current
+ * window size so do congestion avoidance
+ * (set ssthresh to half the current window
+ * and pull our congestion window back to
+ * the new ssthresh).
+ *
+ * Dup acks mean that packets have left the
+ * network (they're now cached at the receiver)
+ * so bump cwnd by the amount in the receiver
+ * to keep a constant cwnd packets in the
+ * network.
+ */
+ if (tp->t_timer[TCPT_REXMT] == 0 ||
+ ti->ti_ack != tp->snd_una)
+ tp->t_dupacks = 0;
+ else if (++tp->t_dupacks == tcprexmtthresh) {
+ tcp_seq onxt = tp->snd_nxt;
+ u_int win =
+ min(tp->snd_wnd, tp->snd_cwnd) / 2 /
+ tp->t_maxseg;
+
+ if (win < 2)
+ win = 2;
+ tp->snd_ssthresh = win * tp->t_maxseg;
+ tp->t_timer[TCPT_REXMT] = 0;
+ tp->t_rtt = 0;
+ tp->snd_nxt = ti->ti_ack;
+ tp->snd_cwnd = tp->t_maxseg;
+ (void) tcp_output(tp);
+ tp->snd_cwnd = tp->snd_ssthresh +
+ tp->t_maxseg * tp->t_dupacks;
+ if (SEQ_GT(onxt, tp->snd_nxt))
+ tp->snd_nxt = onxt;
+ goto drop;
+ } else if (tp->t_dupacks > tcprexmtthresh) {
+ tp->snd_cwnd += tp->t_maxseg;
+ (void) tcp_output(tp);
+ goto drop;
+ }
+ } else
+ tp->t_dupacks = 0;
+ break;
+ }
+ /*
+ * If the congestion window was inflated to account
+ * for the other side's cached packets, retract it.
+ */
+ if (tp->t_dupacks >= tcprexmtthresh &&
+ tp->snd_cwnd > tp->snd_ssthresh)
+ tp->snd_cwnd = tp->snd_ssthresh;
+ tp->t_dupacks = 0;
+ if (SEQ_GT(ti->ti_ack, tp->snd_max)) {
+ tcpstat.tcps_rcvacktoomuch++;
+ goto dropafterack;
+ }
+ /*
+ * If we reach this point, ACK is not a duplicate,
+ * i.e., it ACKs something we sent.
+ */
+ if (tp->t_flags & TF_NEEDSYN) {
+ /*
+ * T/TCP: Connection was half-synchronized, and our
+ * SYN has been ACK'd (so connection is now fully
+ * synchronized). Go to non-starred state,
+ * increment snd_una for ACK of SYN, and check if
+ * we can do window scaling.
+ */
+ tp->t_flags &= ~TF_NEEDSYN;
+ tp->snd_una++;
+ /* Do window scaling? */
+ if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
+ (TF_RCVD_SCALE|TF_REQ_SCALE)) {
+ tp->snd_scale = tp->requested_s_scale;
+ tp->rcv_scale = tp->request_r_scale;
+ }
+ }
+
+process_ACK:
+ acked = ti->ti_ack - tp->snd_una;
+ tcpstat.tcps_rcvackpack++;
+ tcpstat.tcps_rcvackbyte += acked;
+
+ /*
+ * If we have a timestamp reply, update smoothed
+ * round trip time. If no timestamp is present but
+ * transmit timer is running and timed sequence
+ * number was acked, update smoothed round trip time.
+ * Since we now have an rtt measurement, cancel the
+ * timer backoff (cf., Phil Karn's retransmit alg.).
+ * Recompute the initial retransmit timer.
+ */
+ if (to.to_flags & TOF_TS)
+ tcp_xmit_timer(tp, tcp_now - to.to_tsecr + 1);
+ else if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq))
+ tcp_xmit_timer(tp,tp->t_rtt);
+
+ /*
+ * If all outstanding data is acked, stop retransmit
+ * timer and remember to restart (more output or persist).
+ * If there is more data to be acked, restart retransmit
+ * timer, using current (possibly backed-off) value.
+ */
+ if (ti->ti_ack == tp->snd_max) {
+ tp->t_timer[TCPT_REXMT] = 0;
+ needoutput = 1;
+ } else if (tp->t_timer[TCPT_PERSIST] == 0)
+ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+
+ /*
+ * If no data (only SYN) was ACK'd,
+ * skip rest of ACK processing.
+ */
+ if (acked == 0)
+ goto step6;
+
+ /*
+ * When new data is acked, open the congestion window.
+ * If the window gives us less than ssthresh packets
+ * in flight, open exponentially (maxseg per packet).
+ * Otherwise open linearly: maxseg per window
+ * (maxseg^2 / cwnd per packet).
+ */
+ {
+ register u_int cw = tp->snd_cwnd;
+ register u_int incr = tp->t_maxseg;
+
+ if (cw > tp->snd_ssthresh)
+ incr = incr * incr / cw;
+ tp->snd_cwnd = min(cw + incr, TCP_MAXWIN<<tp->snd_scale);
+ }
+ if (acked > so->so_snd.sb_cc) {
+ tp->snd_wnd -= so->so_snd.sb_cc;
+ sbdrop(&so->so_snd, (int)so->so_snd.sb_cc);
+ ourfinisacked = 1;
+ } else {
+ sbdrop(&so->so_snd, acked);
+ tp->snd_wnd -= acked;
+ ourfinisacked = 0;
+ }
+ if (so->so_snd.sb_flags & SB_NOTIFY)
+ sowwakeup(so);
+ tp->snd_una = ti->ti_ack;
+ if (SEQ_LT(tp->snd_nxt, tp->snd_una))
+ tp->snd_nxt = tp->snd_una;
+
+ switch (tp->t_state) {
+
+ /*
+ * In FIN_WAIT_1 STATE in addition to the processing
+ * for the ESTABLISHED state if our FIN is now acknowledged
+ * then enter FIN_WAIT_2.
+ */
+ case TCPS_FIN_WAIT_1:
+ if (ourfinisacked) {
+ /*
+ * If we can't receive any more
+ * data, then closing user can proceed.
+ * Starting the timer is contrary to the
+ * specification, but if we don't get a FIN
+ * we'll hang forever.
+ */
+ if (so->so_state & SS_CANTRCVMORE) {
+ soisdisconnected(so);
+ tp->t_timer[TCPT_2MSL] = tcp_maxidle;
+ }
+ tp->t_state = TCPS_FIN_WAIT_2;
+ }
+ break;
+
+ /*
+ * In CLOSING STATE in addition to the processing for
+ * the ESTABLISHED state if the ACK acknowledges our FIN
+ * then enter the TIME-WAIT state, otherwise ignore
+ * the segment.
+ */
+ case TCPS_CLOSING:
+ if (ourfinisacked) {
+ tp->t_state = TCPS_TIME_WAIT;
+ tcp_canceltimers(tp);
+ /* Shorten TIME_WAIT [RFC-1644, p.28] */
+ if (tp->cc_recv != 0 &&
+ tp->t_duration < TCPTV_MSL)
+ tp->t_timer[TCPT_2MSL] =
+ tp->t_rxtcur * TCPTV_TWTRUNC;
+ else
+ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
+ soisdisconnected(so);
+ }
+ break;
+
+ /*
+ * In LAST_ACK, we may still be waiting for data to drain
+ * and/or to be acked, as well as for the ack of our FIN.
+ * If our FIN is now acknowledged, delete the TCB,
+ * enter the closed state and return.
+ */
+ case TCPS_LAST_ACK:
+ if (ourfinisacked) {
+ tp = tcp_close(tp);
+ goto drop;
+ }
+ break;
+
+ /*
+ * In TIME_WAIT state the only thing that should arrive
+ * is a retransmission of the remote FIN. Acknowledge
+ * it and restart the finack timer.
+ */
+ case TCPS_TIME_WAIT:
+ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
+ goto dropafterack;
+ }
+ }
+
+step6:
+ /*
+ * Update window information.
+ * Don't look at window if no ACK: TAC's send garbage on first SYN.
+ */
+ if ((tiflags & TH_ACK) &&
+ (SEQ_LT(tp->snd_wl1, ti->ti_seq) ||
+ (tp->snd_wl1 == ti->ti_seq && (SEQ_LT(tp->snd_wl2, ti->ti_ack) ||
+ (tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))))) {
+ /* keep track of pure window updates */
+ if (ti->ti_len == 0 &&
+ tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd)
+ tcpstat.tcps_rcvwinupd++;
+ tp->snd_wnd = tiwin;
+ tp->snd_wl1 = ti->ti_seq;
+ tp->snd_wl2 = ti->ti_ack;
+ if (tp->snd_wnd > tp->max_sndwnd)
+ tp->max_sndwnd = tp->snd_wnd;
+ needoutput = 1;
+ }
+
+ /*
+ * Process segments with URG.
+ */
+ if ((tiflags & TH_URG) && ti->ti_urp &&
+ TCPS_HAVERCVDFIN(tp->t_state) == 0) {
+ /*
+ * This is a kludge, but if we receive and accept
+ * random urgent pointers, we'll crash in
+ * soreceive. It's hard to imagine someone
+ * actually wanting to send this much urgent data.
+ */
+ if (ti->ti_urp + so->so_rcv.sb_cc > sb_max) {
+ ti->ti_urp = 0; /* XXX */
+ tiflags &= ~TH_URG; /* XXX */
+ goto dodata; /* XXX */
+ }
+ /*
+ * If this segment advances the known urgent pointer,
+ * then mark the data stream. This should not happen
+ * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
+ * a FIN has been received from the remote side.
+ * In these states we ignore the URG.
+ *
+ * According to RFC961 (Assigned Protocols),
+ * the urgent pointer points to the last octet
+ * of urgent data. We continue, however,
+ * to consider it to indicate the first octet
+ * of data past the urgent section as the original
+ * spec states (in one of two places).
+ */
+ if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) {
+ tp->rcv_up = ti->ti_seq + ti->ti_urp;
+ so->so_oobmark = so->so_rcv.sb_cc +
+ (tp->rcv_up - tp->rcv_nxt) - 1;
+ if (so->so_oobmark == 0)
+ so->so_state |= SS_RCVATMARK;
+ sohasoutofband(so);
+ tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
+ }
+ /*
+ * Remove out of band data so doesn't get presented to user.
+ * This can happen independent of advancing the URG pointer,
+ * but if two URG's are pending at once, some out-of-band
+ * data may creep in... ick.
+ */
+ if (ti->ti_urp <= (u_long)ti->ti_len
+#ifdef SO_OOBINLINE
+ && (so->so_options & SO_OOBINLINE) == 0
+#endif
+ )
+ tcp_pulloutofband(so, ti, m);
+ } else
+ /*
+ * If no out of band data is expected,
+ * pull receive urgent pointer along
+ * with the receive window.
+ */
+ if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
+ tp->rcv_up = tp->rcv_nxt;
+dodata: /* XXX */
+
+ /*
+ * Process the segment text, merging it into the TCP sequencing queue,
+ * and arranging for acknowledgment of receipt if necessary.
+ * This process logically involves adjusting tp->rcv_wnd as data
+ * is presented to the user (this happens in tcp_usrreq.c,
+ * case PRU_RCVD). If a FIN has already been received on this
+ * connection then we just ignore the text.
+ */
+ if ((ti->ti_len || (tiflags&TH_FIN)) &&
+ TCPS_HAVERCVDFIN(tp->t_state) == 0) {
+ TCP_REASS(tp, ti, m, so, tiflags);
+ /*
+ * Note the amount of data that peer has sent into
+ * our window, in order to estimate the sender's
+ * buffer size.
+ */
+ len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt);
+ } else {
+ m_freem(m);
+ tiflags &= ~TH_FIN;
+ }
+
+ /*
+ * If FIN is received ACK the FIN and let the user know
+ * that the connection is closing.
+ */
+ if (tiflags & TH_FIN) {
+ if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
+ socantrcvmore(so);
+ /*
+ * If connection is half-synchronized
+ * (ie NEEDSYN flag on) then delay ACK,
+ * so it may be piggybacked when SYN is sent.
+ * Otherwise, since we received a FIN then no
+ * more input can be expected, send ACK now.
+ */
+ if (tp->t_flags & TF_NEEDSYN)
+ tp->t_flags |= TF_DELACK;
+ else
+ tp->t_flags |= TF_ACKNOW;
+ tp->rcv_nxt++;
+ }
+ switch (tp->t_state) {
+
+ /*
+ * In SYN_RECEIVED and ESTABLISHED STATES
+ * enter the CLOSE_WAIT state.
+ */
+ case TCPS_SYN_RECEIVED:
+ case TCPS_ESTABLISHED:
+ tp->t_state = TCPS_CLOSE_WAIT;
+ break;
+
+ /*
+ * If still in FIN_WAIT_1 STATE FIN has not been acked so
+ * enter the CLOSING state.
+ */
+ case TCPS_FIN_WAIT_1:
+ tp->t_state = TCPS_CLOSING;
+ break;
+
+ /*
+ * In FIN_WAIT_2 state enter the TIME_WAIT state,
+ * starting the time-wait timer, turning off the other
+ * standard timers.
+ */
+ case TCPS_FIN_WAIT_2:
+ tp->t_state = TCPS_TIME_WAIT;
+ tcp_canceltimers(tp);
+ /* Shorten TIME_WAIT [RFC-1644, p.28] */
+ if (tp->cc_recv != 0 &&
+ tp->t_duration < TCPTV_MSL) {
+ tp->t_timer[TCPT_2MSL] =
+ tp->t_rxtcur * TCPTV_TWTRUNC;
+ /* For transaction client, force ACK now. */
+ tp->t_flags |= TF_ACKNOW;
+ }
+ else
+ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
+ soisdisconnected(so);
+ break;
+
+ /*
+ * In TIME_WAIT state restart the 2 MSL time_wait timer.
+ */
+ case TCPS_TIME_WAIT:
+ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
+ break;
+ }
+ }
+#ifdef TCPDEBUG
+ if (so->so_options & SO_DEBUG)
+ tcp_trace(TA_INPUT, ostate, tp, &tcp_saveti, 0);
+#endif
+
+ /*
+ * Return any desired output.
+ */
+ if (needoutput || (tp->t_flags & TF_ACKNOW))
+ (void) tcp_output(tp);
+ return;
+
+dropafterack:
+ /*
+ * Generate an ACK dropping incoming segment if it occupies
+ * sequence space, where the ACK reflects our state.
+ */
+ if (tiflags & TH_RST)
+ goto drop;
+#ifdef TCPDEBUG
+ if (so->so_options & SO_DEBUG)
+ tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0);
+#endif
+ m_freem(m);
+ tp->t_flags |= TF_ACKNOW;
+ (void) tcp_output(tp);
+ return;
+
+dropwithreset:
+ /*
+ * Generate a RST, dropping incoming segment.
+ * Make ACK acceptable to originator of segment.
+ * Don't bother to respond if destination was broadcast/multicast.
+ */
+ if ((tiflags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST) ||
+ IN_MULTICAST(ntohl(ti->ti_dst.s_addr)))
+ goto drop;
+#ifdef TCPDEBUG
+ if (tp == 0 || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+ tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0);
+#endif
+ if (tiflags & TH_ACK)
+ tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST);
+ else {
+ if (tiflags & TH_SYN)
+ ti->ti_len++;
+ tcp_respond(tp, ti, m, ti->ti_seq+ti->ti_len, (tcp_seq)0,
+ TH_RST|TH_ACK);
+ }
+ /* destroy temporarily created socket */
+ if (dropsocket)
+ (void) soabort(so);
+ return;
+
+drop:
+ /*
+ * Drop space held by incoming segment and return.
+ */
+#ifdef TCPDEBUG
+ if (tp == 0 || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+ tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0);
+#endif
+ m_freem(m);
+ /* destroy temporarily created socket */
+ if (dropsocket)
+ (void) soabort(so);
+ return;
+#ifndef TUBA_INCLUDE
+}
+
+static void
+tcp_dooptions(struct tcpcb *tp, u_char *cp, int cnt, struct tcpiphdr *ti,
+ struct tcpopt *to)
+{
+ u_short mss = 0;
+ int opt, optlen;
+
+ for (; cnt > 0; cnt -= optlen, cp += optlen) {
+ opt = cp[0];
+ if (opt == TCPOPT_EOL)
+ break;
+ if (opt == TCPOPT_NOP)
+ optlen = 1;
+ else {
+ optlen = cp[1];
+ if (optlen <= 0)
+ break;
+ }
+ switch (opt) {
+
+ default:
+ continue;
+
+ case TCPOPT_MAXSEG:
+ if (optlen != TCPOLEN_MAXSEG)
+ continue;
+ if (!(ti->ti_flags & TH_SYN))
+ continue;
+ bcopy((char *) cp + 2, (char *) &mss, sizeof(mss));
+ NTOHS(mss);
+ break;
+
+ case TCPOPT_WINDOW:
+ if (optlen != TCPOLEN_WINDOW)
+ continue;
+ if (!(ti->ti_flags & TH_SYN))
+ continue;
+ tp->t_flags |= TF_RCVD_SCALE;
+ tp->requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT);
+ break;
+
+ case TCPOPT_TIMESTAMP:
+ if (optlen != TCPOLEN_TIMESTAMP)
+ continue;
+ to->to_flags |= TOF_TS;
+ bcopy((char *)cp + 2,
+ (char *)&to->to_tsval, sizeof(to->to_tsval));
+ NTOHL(to->to_tsval);
+ bcopy((char *)cp + 6,
+ (char *)&to->to_tsecr, sizeof(to->to_tsecr));
+ NTOHL(to->to_tsecr);
+
+ /*
+ * A timestamp received in a SYN makes
+ * it ok to send timestamp requests and replies.
+ */
+ if (ti->ti_flags & TH_SYN) {
+ tp->t_flags |= TF_RCVD_TSTMP;
+ tp->ts_recent = to->to_tsval;
+ tp->ts_recent_age = tcp_now;
+ }
+ break;
+ case TCPOPT_CC:
+ if (optlen != TCPOLEN_CC)
+ continue;
+ to->to_flags |= TOF_CC;
+ bcopy((char *)cp + 2,
+ (char *)&to->to_cc, sizeof(to->to_cc));
+ NTOHL(to->to_cc);
+ /*
+ * A CC or CC.new option received in a SYN makes
+ * it ok to send CC in subsequent segments.
+ */
+ if (ti->ti_flags & TH_SYN)
+ tp->t_flags |= TF_RCVD_CC;
+ break;
+ case TCPOPT_CCNEW:
+ if (optlen != TCPOLEN_CC)
+ continue;
+ if (!(ti->ti_flags & TH_SYN))
+ continue;
+ to->to_flags |= TOF_CCNEW;
+ bcopy((char *)cp + 2,
+ (char *)&to->to_cc, sizeof(to->to_cc));
+ NTOHL(to->to_cc);
+ /*
+ * A CC or CC.new option received in a SYN makes
+ * it ok to send CC in subsequent segments.
+ */
+ tp->t_flags |= TF_RCVD_CC;
+ break;
+ case TCPOPT_CCECHO:
+ if (optlen != TCPOLEN_CC)
+ continue;
+ if (!(ti->ti_flags & TH_SYN))
+ continue;
+ to->to_flags |= TOF_CCECHO;
+ bcopy((char *)cp + 2,
+ (char *)&to->to_ccecho, sizeof(to->to_ccecho));
+ NTOHL(to->to_ccecho);
+ break;
+ }
+ }
+ if (ti->ti_flags & TH_SYN)
+ tcp_mss(tp, mss); /* sets t_maxseg */
+}
+
+/*
+ * Pull out of band byte out of a segment so
+ * it doesn't appear in the user's data queue.
+ * It is still reflected in the segment length for
+ * sequencing purposes.
+ */
+static void
+tcp_pulloutofband(struct socket *so, struct tcpiphdr *ti, struct mbuf *m)
+{
+ int cnt = ti->ti_urp - 1;
+
+ while (cnt >= 0) {
+ if (m->m_len > cnt) {
+ char *cp = mtod(m, caddr_t) + cnt;
+ struct tcpcb *tp = sototcpcb(so);
+
+ tp->t_iobc = *cp;
+ tp->t_oobflags |= TCPOOB_HAVEDATA;
+ bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1));
+ m->m_len--;
+ return;
+ }
+ cnt -= m->m_len;
+ m = m->m_next;
+ if (m == 0)
+ break;
+ }
+ panic("tcp_pulloutofband");
+}
+
+/*
+ * Collect new round-trip time estimate
+ * and update averages and current timeout.
+ */
+static void
+tcp_xmit_timer(struct tcpcb *tp, int rtt)
+{
+ int delta;
+
+ tcpstat.tcps_rttupdated++;
+ tp->t_rttupdated++;
+ if (tp->t_srtt != 0) {
+ /*
+ * srtt is stored as fixed point with 5 bits after the
+ * binary point (i.e., scaled by 8). The following magic
+ * is equivalent to the smoothing algorithm in rfc793 with
+ * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
+ * point). Adjust rtt to origin 0.
+ */
+ delta = ((rtt - 1) << TCP_DELTA_SHIFT)
+ - (tp->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT));
+
+ if ((tp->t_srtt += delta) <= 0)
+ tp->t_srtt = 1;
+
+ /*
+ * We accumulate a smoothed rtt variance (actually, a
+ * smoothed mean difference), then set the retransmit
+ * timer to smoothed rtt + 4 times the smoothed variance.
+ * rttvar is stored as fixed point with 4 bits after the
+ * binary point (scaled by 16). The following is
+ * equivalent to rfc793 smoothing with an alpha of .75
+ * (rttvar = rttvar*3/4 + |delta| / 4). This replaces
+ * rfc793's wired-in beta.
+ */
+ if (delta < 0)
+ delta = -delta;
+ delta -= tp->t_rttvar >> (TCP_RTTVAR_SHIFT - TCP_DELTA_SHIFT);
+ if ((tp->t_rttvar += delta) <= 0)
+ tp->t_rttvar = 1;
+ } else {
+ /*
+ * No rtt measurement yet - use the unsmoothed rtt.
+ * Set the variance to half the rtt (so our first
+ * retransmit happens at 3*rtt).
+ */
+ tp->t_srtt = rtt << TCP_RTT_SHIFT;
+ tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
+ }
+ tp->t_rtt = 0;
+ tp->t_rxtshift = 0;
+
+ /*
+ * the retransmit should happen at rtt + 4 * rttvar.
+ * Because of the way we do the smoothing, srtt and rttvar
+ * will each average +1/2 tick of bias. When we compute
+ * the retransmit timer, we want 1/2 tick of rounding and
+ * 1 extra tick because of +-1/2 tick uncertainty in the
+ * firing of the timer. The bias will give us exactly the
+ * 1.5 tick we need. But, because the bias is
+ * statistical, we have to test that we don't drop below
+ * the minimum feasible timer (which is 2 ticks).
+ */
+ TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
+ max(tp->t_rttmin, rtt + 2), TCPTV_REXMTMAX);
+
+ /*
+ * We received an ack for a packet that wasn't retransmitted;
+ * it is probably safe to discard any error indications we've
+ * received recently. This isn't quite right, but close enough
+ * for now (a route might have failed after we sent a segment,
+ * and the return path might not be symmetrical).
+ */
+ tp->t_softerror = 0;
+}
+
+/*
+ * Determine a reasonable value for maxseg size.
+ * If the route is known, check route for mtu.
+ * If none, use an mss that can be handled on the outgoing
+ * interface without forcing IP to fragment; if bigger than
+ * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES
+ * to utilize large mbufs. If no route is found, route has no mtu,
+ * or the destination isn't local, use a default, hopefully conservative
+ * size (usually 512 or the default IP max size, but no more than the mtu
+ * of the interface), as we can't discover anything about intervening
+ * gateways or networks. We also initialize the congestion/slow start
+ * window to be a single segment if the destination isn't local.
+ * While looking at the routing entry, we also initialize other path-dependent
+ * parameters from pre-set or cached values in the routing entry.
+ *
+ * Also take into account the space needed for options that we
+ * send regularly. Make maxseg shorter by that amount to assure
+ * that we can send maxseg amount of data even when the options
+ * are present. Store the upper limit of the length of options plus
+ * data in maxopd.
+ *
+ * NOTE that this routine is only called when we process an incoming
+ * segment, for outgoing segments only tcp_mssopt is called.
+ *
+ * In case of T/TCP, we call this routine during implicit connection
+ * setup as well (offer = -1), to initialize maxseg from the cached
+ * MSS of our peer.
+ */
+void
+tcp_mss(struct tcpcb *tp, int offer)
+{
+ register struct rtentry *rt;
+ struct ifnet *ifp;
+ register int rtt, mss;
+ u_long bufsize;
+ struct inpcb *inp;
+ struct socket *so;
+ struct rmxp_tao *taop;
+ int origoffer = offer;
+
+ inp = tp->t_inpcb;
+ if ((rt = tcp_rtlookup(inp)) == NULL) {
+ tp->t_maxopd = tp->t_maxseg = tcp_mssdflt;
+ return;
+ }
+ ifp = rt->rt_ifp;
+ so = inp->inp_socket;
+
+ taop = rmx_taop(rt->rt_rmx);
+ /*
+ * Offer == -1 means that we didn't receive SYN yet,
+ * use cached value in that case;
+ */
+ if (offer == -1)
+ offer = taop->tao_mssopt;
+ /*
+ * Offer == 0 means that there was no MSS on the SYN segment,
+ * in this case we use tcp_mssdflt.
+ */
+ if (offer == 0)
+ offer = tcp_mssdflt;
+ else
+ /*
+ * Sanity check: make sure that maxopd will be large
+ * enough to allow some data on segments even is the
+ * all the option space is used (40bytes). Otherwise
+ * funny things may happen in tcp_output.
+ */
+ offer = max(offer, 64);
+ taop->tao_mssopt = offer;
+
+ /*
+ * While we're here, check if there's an initial rtt
+ * or rttvar. Convert from the route-table units
+ * to scaled multiples of the slow timeout timer.
+ */
+ if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) {
+ /*
+ * XXX the lock bit for RTT indicates that the value
+ * is also a minimum value; this is subject to time.
+ */
+ if (rt->rt_rmx.rmx_locks & RTV_RTT)
+ tp->t_rttmin = rtt / (RTM_RTTUNIT / PR_SLOWHZ);
+ tp->t_srtt = rtt / (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE));
+ tcpstat.tcps_usedrtt++;
+ if (rt->rt_rmx.rmx_rttvar) {
+ tp->t_rttvar = rt->rt_rmx.rmx_rttvar /
+ (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE));
+ tcpstat.tcps_usedrttvar++;
+ } else {
+ /* default variation is +- 1 rtt */
+ tp->t_rttvar =
+ tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
+ }
+ TCPT_RANGESET(tp->t_rxtcur,
+ ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
+ tp->t_rttmin, TCPTV_REXMTMAX);
+ }
+ /*
+ * if there's an mtu associated with the route, use it
+ */
+ if (rt->rt_rmx.rmx_mtu)
+ mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr);
+ else
+ {
+ mss = ifp->if_mtu - sizeof(struct tcpiphdr);
+ if (!in_localaddr(inp->inp_faddr))
+ mss = min(mss, tcp_mssdflt);
+ }
+ mss = min(mss, offer);
+ /*
+ * maxopd stores the maximum length of data AND options
+ * in a segment; maxseg is the amount of data in a normal
+ * segment. We need to store this value (maxopd) apart
+ * from maxseg, because now every segment carries options
+ * and thus we normally have somewhat less data in segments.
+ */
+ tp->t_maxopd = mss;
+
+ /*
+ * In case of T/TCP, origoffer==-1 indicates, that no segments
+ * were received yet. In this case we just guess, otherwise
+ * we do the same as before T/TCP.
+ */
+ if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
+ (origoffer == -1 ||
+ (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP))
+ mss -= TCPOLEN_TSTAMP_APPA;
+ if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC &&
+ (origoffer == -1 ||
+ (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC))
+ mss -= TCPOLEN_CC_APPA;
+
+#if (MCLBYTES & (MCLBYTES - 1)) == 0
+ if (mss > MCLBYTES)
+ mss &= ~(MCLBYTES-1);
+#else
+ if (mss > MCLBYTES)
+ mss = mss / MCLBYTES * MCLBYTES;
+#endif
+ /*
+ * If there's a pipesize, change the socket buffer
+ * to that size. Make the socket buffers an integral
+ * number of mss units; if the mss is larger than
+ * the socket buffer, decrease the mss.
+ */
+#ifdef RTV_SPIPE
+ if ((bufsize = rt->rt_rmx.rmx_sendpipe) == 0)
+#endif
+ bufsize = so->so_snd.sb_hiwat;
+ if (bufsize < mss)
+ mss = bufsize;
+ else {
+ bufsize = roundup(bufsize, mss);
+ if (bufsize > sb_max)
+ bufsize = sb_max;
+ (void)sbreserve(&so->so_snd, bufsize);
+ }
+ tp->t_maxseg = mss;
+
+#ifdef RTV_RPIPE
+ if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0)
+#endif
+ bufsize = so->so_rcv.sb_hiwat;
+ if (bufsize > mss) {
+ bufsize = roundup(bufsize, mss);
+ if (bufsize > sb_max)
+ bufsize = sb_max;
+ (void)sbreserve(&so->so_rcv, bufsize);
+ }
+ /*
+ * Don't force slow-start on local network.
+ */
+ if (!in_localaddr(inp->inp_faddr))
+ tp->snd_cwnd = mss;
+
+ if (rt->rt_rmx.rmx_ssthresh) {
+ /*
+ * There's some sort of gateway or interface
+ * buffer limit on the path. Use this to set
+ * the slow start threshhold, but set the
+ * threshold to no less than 2*mss.
+ */
+ tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh);
+ tcpstat.tcps_usedssthresh++;
+ }
+}
+
+/*
+ * Determine the MSS option to send on an outgoing SYN.
+ */
+int
+tcp_mssopt(struct tcpcb *tp)
+{
+ struct rtentry *rt;
+
+ rt = tcp_rtlookup(tp->t_inpcb);
+ if (rt == NULL)
+ return tcp_mssdflt;
+
+ return rt->rt_ifp->if_mtu - sizeof(struct tcpiphdr);
+}
+#endif /* TUBA_INCLUDE */
diff --git a/netinet/tcp_output.c b/netinet/tcp_output.c
new file mode 100644
index 0000000..0e50329
--- /dev/null
+++ b/netinet/tcp_output.c
@@ -0,0 +1,757 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_output.c 8.4 (Berkeley) 5/24/95
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "opt_tcpdebug.h"
+
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <errno.h>
+
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <rtems/rtems_netinet_in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#define TCPOUTFLAGS
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#ifdef TCPDEBUG
+#include <netinet/tcp_debug.h>
+#endif
+
+#ifdef notyet
+extern struct mbuf *m_copypack();
+#endif
+
+
+/*
+ * Tcp output routine: figure out what should be sent and send it.
+ */
+int
+tcp_output(
+ register struct tcpcb *tp)
+{
+ register struct socket *so = tp->t_inpcb->inp_socket;
+ register long len, win;
+ int off, flags, error;
+ register struct mbuf *m;
+ register struct tcpiphdr *ti;
+ u_char opt[TCP_MAXOLEN];
+ unsigned optlen, hdrlen;
+ int idle, sendalot;
+ struct rmxp_tao *taop;
+ struct rmxp_tao tao_noncached;
+
+ /*
+ * Determine length of data that should be transmitted,
+ * and flags that will be used.
+ * If there is some data or critical controls (SYN, RST)
+ * to send, then transmit; otherwise, investigate further.
+ */
+ idle = (tp->snd_max == tp->snd_una);
+ if (idle && tp->t_idle >= tp->t_rxtcur)
+ /*
+ * We have been idle for "a while" and no acks are
+ * expected to clock out any data we send --
+ * slow start to get ack "clock" running again.
+ */
+ tp->snd_cwnd = tp->t_maxseg;
+again:
+ sendalot = 0;
+ off = tp->snd_nxt - tp->snd_una;
+ win = min(tp->snd_wnd, tp->snd_cwnd);
+
+ flags = tcp_outflags[tp->t_state];
+ /*
+ * Get standard flags, and add SYN or FIN if requested by 'hidden'
+ * state flags.
+ */
+ if (tp->t_flags & TF_NEEDFIN)
+ flags |= TH_FIN;
+ if (tp->t_flags & TF_NEEDSYN)
+ flags |= TH_SYN;
+
+ /*
+ * If in persist timeout with window of 0, send 1 byte.
+ * Otherwise, if window is small but nonzero
+ * and timer expired, we will send what we can
+ * and go to transmit state.
+ */
+ if (tp->t_force) {
+ if (win == 0) {
+ /*
+ * If we still have some data to send, then
+ * clear the FIN bit. Usually this would
+ * happen below when it realizes that we
+ * aren't sending all the data. However,
+ * if we have exactly 1 byte of unset data,
+ * then it won't clear the FIN bit below,
+ * and if we are in persist state, we wind
+ * up sending the packet without recording
+ * that we sent the FIN bit.
+ *
+ * We can't just blindly clear the FIN bit,
+ * because if we don't have any more data
+ * to send then the probe will be the FIN
+ * itself.
+ */
+ if (off < so->so_snd.sb_cc)
+ flags &= ~TH_FIN;
+ win = 1;
+ } else {
+ tp->t_timer[TCPT_PERSIST] = 0;
+ tp->t_rxtshift = 0;
+ }
+ }
+
+ len = min(so->so_snd.sb_cc, win) - off;
+
+ if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) {
+ taop = &tao_noncached;
+ bzero(taop, sizeof(*taop));
+ }
+
+ /*
+ * Lop off SYN bit if it has already been sent. However, if this
+ * is SYN-SENT state and if segment contains data and if we don't
+ * know that foreign host supports TAO, suppress sending segment.
+ */
+ if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) {
+ flags &= ~TH_SYN;
+ off--, len++;
+ if (len > 0 && tp->t_state == TCPS_SYN_SENT &&
+ taop->tao_ccsent == 0)
+ return 0;
+ }
+
+ /*
+ * Be careful not to send data and/or FIN on SYN segments
+ * in cases when no CC option will be sent.
+ * This measure is needed to prevent interoperability problems
+ * with not fully conformant TCP implementations.
+ */
+ if ((flags & TH_SYN) &&
+ ((tp->t_flags & TF_NOOPT) || !(tp->t_flags & TF_REQ_CC) ||
+ ((flags & TH_ACK) && !(tp->t_flags & TF_RCVD_CC)))) {
+ len = 0;
+ flags &= ~TH_FIN;
+ }
+
+ if (len < 0) {
+ /*
+ * If FIN has been sent but not acked,
+ * but we haven't been called to retransmit,
+ * len will be -1. Otherwise, window shrank
+ * after we sent into it. If window shrank to 0,
+ * cancel pending retransmit, pull snd_nxt back
+ * to (closed) window, and set the persist timer
+ * if it isn't already going. If the window didn't
+ * close completely, just wait for an ACK.
+ */
+ len = 0;
+ if (win == 0) {
+ tp->t_timer[TCPT_REXMT] = 0;
+ tp->t_rxtshift = 0;
+ tp->snd_nxt = tp->snd_una;
+ if (tp->t_timer[TCPT_PERSIST] == 0)
+ tcp_setpersist(tp);
+ }
+ }
+ if (len > tp->t_maxseg) {
+ len = tp->t_maxseg;
+ sendalot = 1;
+ }
+ if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc))
+ flags &= ~TH_FIN;
+
+ win = sbspace(&so->so_rcv);
+
+ /*
+ * Sender silly window avoidance. If connection is idle
+ * and can send all data, a maximum segment,
+ * at least a maximum default-size segment do it,
+ * or are forced, do it; otherwise don't bother.
+ * If peer's buffer is tiny, then send
+ * when window is at least half open.
+ * If retransmitting (possibly after persist timer forced us
+ * to send into a small window), then must resend.
+ */
+ if (len) {
+ if (len == tp->t_maxseg)
+ goto send;
+ if ((idle || tp->t_flags & TF_NODELAY) &&
+ (tp->t_flags & TF_NOPUSH) == 0 &&
+ len + off >= so->so_snd.sb_cc)
+ goto send;
+ if (tp->t_force)
+ goto send;
+ if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0)
+ goto send;
+ if (SEQ_LT(tp->snd_nxt, tp->snd_max))
+ goto send;
+ }
+
+ /*
+ * Compare available window to amount of window
+ * known to peer (as advertised window less
+ * next expected input). If the difference is at least two
+ * max size segments, or at least 50% of the maximum possible
+ * window, then want to send a window update to peer.
+ */
+ if (win > 0) {
+ /*
+ * "adv" is the amount we can increase the window,
+ * taking into account that we are limited by
+ * TCP_MAXWIN << tp->rcv_scale.
+ */
+ long adv = min(win, (long)TCP_MAXWIN << tp->rcv_scale) -
+ (tp->rcv_adv - tp->rcv_nxt);
+
+ if (adv >= (long) (2 * tp->t_maxseg))
+ goto send;
+ if (2 * adv >= (long) so->so_rcv.sb_hiwat)
+ goto send;
+ }
+
+ /*
+ * Send if we owe peer an ACK.
+ */
+ if (tp->t_flags & TF_ACKNOW)
+ goto send;
+ if ((flags & TH_RST) ||
+ ((flags & TH_SYN) && (tp->t_flags & TF_NEEDSYN) == 0))
+ goto send;
+ if (SEQ_GT(tp->snd_up, tp->snd_una))
+ goto send;
+ /*
+ * If our state indicates that FIN should be sent
+ * and we have not yet done so, or we're retransmitting the FIN,
+ * then we need to send.
+ */
+ if (flags & TH_FIN &&
+ ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una))
+ goto send;
+
+ /*
+ * TCP window updates are not reliable, rather a polling protocol
+ * using ``persist'' packets is used to insure receipt of window
+ * updates. The three ``states'' for the output side are:
+ * idle not doing retransmits or persists
+ * persisting to move a small or zero window
+ * (re)transmitting and thereby not persisting
+ *
+ * tp->t_timer[TCPT_PERSIST]
+ * is set when we are in persist state.
+ * tp->t_force
+ * is set when we are called to send a persist packet.
+ * tp->t_timer[TCPT_REXMT]
+ * is set when we are retransmitting
+ * The output side is idle when both timers are zero.
+ *
+ * If send window is too small, there is data to transmit, and no
+ * retransmit or persist is pending, then go to persist state.
+ * If nothing happens soon, send when timer expires:
+ * if window is nonzero, transmit what we can,
+ * otherwise force out a byte.
+ */
+ if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 &&
+ tp->t_timer[TCPT_PERSIST] == 0) {
+ tp->t_rxtshift = 0;
+ tcp_setpersist(tp);
+ }
+
+ /*
+ * No reason to send a segment, just return.
+ */
+ return (0);
+
+send:
+ /*
+ * Before ESTABLISHED, force sending of initial options
+ * unless TCP set not to do any options.
+ * NOTE: we assume that the IP/TCP header plus TCP options
+ * always fit in a single mbuf, leaving room for a maximum
+ * link header, i.e.
+ * max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MHLEN
+ */
+ optlen = 0;
+ hdrlen = sizeof (struct tcpiphdr);
+ if (flags & TH_SYN) {
+ tp->snd_nxt = tp->iss;
+ if ((tp->t_flags & TF_NOOPT) == 0) {
+ u_short mss;
+
+ opt[0] = TCPOPT_MAXSEG;
+ opt[1] = TCPOLEN_MAXSEG;
+ mss = htons((u_short) tcp_mssopt(tp));
+ (void)memcpy(opt + 2, &mss, sizeof(mss));
+ optlen = TCPOLEN_MAXSEG;
+
+ if ((tp->t_flags & TF_REQ_SCALE) &&
+ ((flags & TH_ACK) == 0 ||
+ (tp->t_flags & TF_RCVD_SCALE))) {
+ *((u_long *) (opt + optlen)) = htonl(
+ TCPOPT_NOP << 24 |
+ TCPOPT_WINDOW << 16 |
+ TCPOLEN_WINDOW << 8 |
+ tp->request_r_scale);
+ optlen += 4;
+ }
+ }
+ }
+
+ /*
+ * Send a timestamp and echo-reply if this is a SYN and our side
+ * wants to use timestamps (TF_REQ_TSTMP is set) or both our side
+ * and our peer have sent timestamps in our SYN's.
+ */
+ if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
+ (flags & TH_RST) == 0 &&
+ ((flags & TH_ACK) == 0 ||
+ (tp->t_flags & TF_RCVD_TSTMP))) {
+ u_long *lp = (u_long *)(opt + optlen);
+
+ /* Form timestamp option as shown in appendix A of RFC 1323. */
+ *lp++ = htonl(TCPOPT_TSTAMP_HDR);
+ *lp++ = htonl(tcp_now);
+ *lp = htonl(tp->ts_recent);
+ optlen += TCPOLEN_TSTAMP_APPA;
+ }
+
+ /*
+ * Send `CC-family' options if our side wants to use them (TF_REQ_CC),
+ * options are allowed (!TF_NOOPT) and it's not a RST.
+ */
+ if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC &&
+ (flags & TH_RST) == 0) {
+ switch (flags & (TH_SYN|TH_ACK)) {
+ /*
+ * This is a normal ACK, send CC if we received CC before
+ * from our peer.
+ */
+ case TH_ACK:
+ if (!(tp->t_flags & TF_RCVD_CC))
+ break;
+ /*FALLTHROUGH*/
+
+ /*
+ * We can only get here in T/TCP's SYN_SENT* state, when
+ * we're a sending a non-SYN segment without waiting for
+ * the ACK of our SYN. A check above assures that we only
+ * do this if our peer understands T/TCP.
+ */
+ case 0:
+ opt[optlen++] = TCPOPT_NOP;
+ opt[optlen++] = TCPOPT_NOP;
+ opt[optlen++] = TCPOPT_CC;
+ opt[optlen++] = TCPOLEN_CC;
+ *(u_int32_t *)&opt[optlen] = htonl(tp->cc_send);
+
+ optlen += 4;
+ break;
+
+ /*
+ * This is our initial SYN, check whether we have to use
+ * CC or CC.new.
+ */
+ case TH_SYN:
+ opt[optlen++] = TCPOPT_NOP;
+ opt[optlen++] = TCPOPT_NOP;
+ opt[optlen++] = tp->t_flags & TF_SENDCCNEW ?
+ TCPOPT_CCNEW : TCPOPT_CC;
+ opt[optlen++] = TCPOLEN_CC;
+ *(u_int32_t *)&opt[optlen] = htonl(tp->cc_send);
+ optlen += 4;
+ break;
+
+ /*
+ * This is a SYN,ACK; send CC and CC.echo if we received
+ * CC from our peer.
+ */
+ case (TH_SYN|TH_ACK):
+ if (tp->t_flags & TF_RCVD_CC) {
+ opt[optlen++] = TCPOPT_NOP;
+ opt[optlen++] = TCPOPT_NOP;
+ opt[optlen++] = TCPOPT_CC;
+ opt[optlen++] = TCPOLEN_CC;
+ *(u_int32_t *)&opt[optlen] =
+ htonl(tp->cc_send);
+ optlen += 4;
+ opt[optlen++] = TCPOPT_NOP;
+ opt[optlen++] = TCPOPT_NOP;
+ opt[optlen++] = TCPOPT_CCECHO;
+ opt[optlen++] = TCPOLEN_CC;
+ *(u_int32_t *)&opt[optlen] =
+ htonl(tp->cc_recv);
+ optlen += 4;
+ }
+ break;
+ }
+ }
+
+ hdrlen += optlen;
+
+ /*
+ * Adjust data length if insertion of options will
+ * bump the packet length beyond the t_maxopd length.
+ * Clear the FIN bit because we cut off the tail of
+ * the segment.
+ */
+ if (len + optlen > tp->t_maxopd) {
+ /*
+ * If there is still more to send, don't close the connection.
+ */
+ flags &= ~TH_FIN;
+ len = tp->t_maxopd - optlen;
+ sendalot = 1;
+ }
+
+/*#ifdef DIAGNOSTIC*/
+ if (max_linkhdr + hdrlen > MHLEN)
+ panic("tcphdr too big");
+/*#endif*/
+
+ /*
+ * Grab a header mbuf, attaching a copy of data to
+ * be transmitted, and initialize the header from
+ * the template for sends on this connection.
+ */
+ if (len) {
+ if (tp->t_force && len == 1)
+ tcpstat.tcps_sndprobe++;
+ else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {
+ tcpstat.tcps_sndrexmitpack++;
+ tcpstat.tcps_sndrexmitbyte += len;
+ } else {
+ tcpstat.tcps_sndpack++;
+ tcpstat.tcps_sndbyte += len;
+ }
+#ifdef notyet
+ if ((m = m_copypack(so->so_snd.sb_mb, off,
+ (int)len, max_linkhdr + hdrlen)) == 0) {
+ error = ENOBUFS;
+ goto out;
+ }
+ /*
+ * m_copypack left space for our hdr; use it.
+ */
+ m->m_len += hdrlen;
+ m->m_data -= hdrlen;
+#else
+ MGETHDR(m, M_DONTWAIT, MT_HEADER);
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto out;
+ }
+ m->m_data += max_linkhdr;
+ m->m_len = hdrlen;
+ if (len <= MHLEN - hdrlen - max_linkhdr) {
+ m_copydata(so->so_snd.sb_mb, off, (int) len,
+ mtod(m, caddr_t) + hdrlen);
+ m->m_len += len;
+ } else {
+ m->m_next = m_copy(so->so_snd.sb_mb, off, (int) len);
+ if (m->m_next == 0) {
+ (void) m_free(m);
+ error = ENOBUFS;
+ goto out;
+ }
+ }
+#endif
+ /*
+ * If we're sending everything we've got, set PUSH.
+ * (This will keep happy those implementations which only
+ * give data to the user when a buffer fills or
+ * a PUSH comes in.)
+ */
+ if (off + len == so->so_snd.sb_cc)
+ flags |= TH_PUSH;
+ } else {
+ if (tp->t_flags & TF_ACKNOW)
+ tcpstat.tcps_sndacks++;
+ else if (flags & (TH_SYN|TH_FIN|TH_RST))
+ tcpstat.tcps_sndctrl++;
+ else if (SEQ_GT(tp->snd_up, tp->snd_una))
+ tcpstat.tcps_sndurg++;
+ else
+ tcpstat.tcps_sndwinup++;
+
+ MGETHDR(m, M_DONTWAIT, MT_HEADER);
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto out;
+ }
+ m->m_data += max_linkhdr;
+ m->m_len = hdrlen;
+ }
+ m->m_pkthdr.rcvif = (struct ifnet *)0;
+ ti = mtod(m, struct tcpiphdr *);
+ if (tp->t_template == 0)
+ panic("tcp_output");
+ (void)memcpy(ti, tp->t_template, sizeof (struct tcpiphdr));
+
+ /*
+ * Fill in fields, remembering maximum advertised
+ * window for use in delaying messages about window sizes.
+ * If resending a FIN, be sure not to use a new sequence number.
+ */
+ if (flags & TH_FIN && tp->t_flags & TF_SENTFIN &&
+ tp->snd_nxt == tp->snd_max)
+ tp->snd_nxt--;
+ /*
+ * If we are doing retransmissions, then snd_nxt will
+ * not reflect the first unsent octet. For ACK only
+ * packets, we do not want the sequence number of the
+ * retransmitted packet, we want the sequence number
+ * of the next unsent octet. So, if there is no data
+ * (and no SYN or FIN), use snd_max instead of snd_nxt
+ * when filling in ti_seq. But if we are in persist
+ * state, snd_max might reflect one byte beyond the
+ * right edge of the window, so use snd_nxt in that
+ * case, since we know we aren't doing a retransmission.
+ * (retransmit and persist are mutually exclusive...)
+ */
+ if (len || (flags & (TH_SYN|TH_FIN)) || tp->t_timer[TCPT_PERSIST])
+ ti->ti_seq = htonl(tp->snd_nxt);
+ else
+ ti->ti_seq = htonl(tp->snd_max);
+ ti->ti_ack = htonl(tp->rcv_nxt);
+ if (optlen) {
+ bcopy(opt, ti + 1, optlen);
+ ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2;
+ }
+ ti->ti_flags = flags;
+ /*
+ * Calculate receive window. Don't shrink window,
+ * but avoid silly window syndrome.
+ */
+ if (win < (long)(so->so_rcv.sb_hiwat / 4) && win < (long)tp->t_maxseg)
+ win = 0;
+ if (win < (long)(tp->rcv_adv - tp->rcv_nxt))
+ win = (long)(tp->rcv_adv - tp->rcv_nxt);
+ if (win > (long)TCP_MAXWIN << tp->rcv_scale)
+ win = (long)TCP_MAXWIN << tp->rcv_scale;
+ ti->ti_win = htons((u_short) (win>>tp->rcv_scale));
+ if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
+ ti->ti_urp = htons((u_short)(tp->snd_up - tp->snd_nxt));
+ ti->ti_flags |= TH_URG;
+ } else
+ /*
+ * If no urgent pointer to send, then we pull
+ * the urgent pointer to the left edge of the send window
+ * so that it doesn't drift into the send window on sequence
+ * number wraparound.
+ */
+ tp->snd_up = tp->snd_una; /* drag it along */
+
+ /*
+ * Put TCP length in extended header, and then
+ * checksum extended header and data.
+ */
+ if (len + optlen)
+ ti->ti_len = htons((u_short)(sizeof (struct tcphdr) +
+ optlen + len));
+ ti->ti_sum = in_cksum(m, (int)(hdrlen + len));
+
+ /*
+ * In transmit state, time the transmission and arrange for
+ * the retransmit. In persist state, just set snd_max.
+ */
+ if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) {
+ tcp_seq startseq = tp->snd_nxt;
+
+ /*
+ * Advance snd_nxt over sequence space of this segment.
+ */
+ if (flags & (TH_SYN|TH_FIN)) {
+ if (flags & TH_SYN)
+ tp->snd_nxt++;
+ if (flags & TH_FIN) {
+ tp->snd_nxt++;
+ tp->t_flags |= TF_SENTFIN;
+ }
+ }
+ tp->snd_nxt += len;
+ if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
+ tp->snd_max = tp->snd_nxt;
+ /*
+ * Time this transmission if not a retransmission and
+ * not currently timing anything.
+ */
+ if (tp->t_rtt == 0) {
+ tp->t_rtt = 1;
+ tp->t_rtseq = startseq;
+ tcpstat.tcps_segstimed++;
+ }
+ }
+
+ /*
+ * Set retransmit timer if not currently set,
+ * and not doing an ack or a keep-alive probe.
+ * Initial value for retransmit timer is smoothed
+ * round-trip time + 2 * round-trip time variance.
+ * Initialize shift counter which is used for backoff
+ * of retransmit time.
+ */
+ if (tp->t_timer[TCPT_REXMT] == 0 &&
+ tp->snd_nxt != tp->snd_una) {
+ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+ if (tp->t_timer[TCPT_PERSIST]) {
+ tp->t_timer[TCPT_PERSIST] = 0;
+ tp->t_rxtshift = 0;
+ }
+ }
+ } else
+ if (SEQ_GT(tp->snd_nxt + len, tp->snd_max))
+ tp->snd_max = tp->snd_nxt + len;
+
+#ifdef TCPDEBUG
+ /*
+ * Trace.
+ */
+ if (so->so_options & SO_DEBUG)
+ tcp_trace(TA_OUTPUT, tp->t_state, tp, ti, 0);
+#endif
+
+ /*
+ * Fill in IP length and desired time to live and
+ * send to IP level. There should be a better way
+ * to handle ttl and tos; we could keep them in
+ * the template, but need a way to checksum without them.
+ */
+ m->m_pkthdr.len = hdrlen + len;
+#ifdef TUBA
+ if (tp->t_tuba_pcb)
+ error = tuba_output(m, tp);
+ else
+#endif
+ {
+#if 1
+ struct rtentry *rt;
+#endif
+ ((struct ip *)ti)->ip_len = m->m_pkthdr.len;
+ ((struct ip *)ti)->ip_ttl = tp->t_inpcb->inp_ip_ttl; /* XXX */
+ ((struct ip *)ti)->ip_tos = tp->t_inpcb->inp_ip_tos; /* XXX */
+#if 1
+ /*
+ * See if we should do MTU discovery. We do it only if the following
+ * are true:
+ * 1) we have a valid route to the destination
+ * 2) the MTU is not locked (if it is, then discovery has been
+ * disabled)
+ */
+ if ((rt = tp->t_inpcb->inp_route.ro_rt)
+ && rt->rt_flags & RTF_UP
+ && !(rt->rt_rmx.rmx_locks & RTV_MTU)) {
+ ((struct ip *)ti)->ip_off |= IP_DF;
+ }
+#endif
+ error = ip_output(m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route,
+ so->so_options & SO_DONTROUTE, 0);
+ }
+ if (error) {
+out:
+ if (error == ENOBUFS) {
+ tcp_quench(tp->t_inpcb, 0);
+ return (0);
+ }
+#if 1
+ if (error == EMSGSIZE) {
+ /*
+ * ip_output() will have already fixed the route
+ * for us. tcp_mtudisc() will, as its last action,
+ * initiate retransmission, so it is important to
+ * not do so here.
+ */
+ tcp_mtudisc(tp->t_inpcb, 0);
+ return 0;
+ }
+#endif
+ if ((error == EHOSTUNREACH || error == ENETDOWN)
+ && TCPS_HAVERCVDSYN(tp->t_state)) {
+ tp->t_softerror = error;
+ return (0);
+ }
+ return (error);
+ }
+ tcpstat.tcps_sndtotal++;
+
+ /*
+ * Data sent (as far as we can tell).
+ * If this advertises a larger window than any other segment,
+ * then remember the size of the advertised window.
+ * Any pending ACK has now been sent.
+ */
+ if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv))
+ tp->rcv_adv = tp->rcv_nxt + win;
+ tp->last_ack_sent = tp->rcv_nxt;
+ tp->t_flags &= ~(TF_ACKNOW|TF_DELACK);
+ if (sendalot)
+ goto again;
+ return (0);
+}
+
+void
+tcp_setpersist(
+ register struct tcpcb *tp)
+{
+ register int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
+
+ if (tp->t_timer[TCPT_REXMT])
+ panic("tcp_output REXMT");
+ /*
+ * Start/restart persistance timer.
+ */
+ TCPT_RANGESET(tp->t_timer[TCPT_PERSIST],
+ t * tcp_backoff[tp->t_rxtshift],
+ TCPTV_PERSMIN, TCPTV_PERSMAX);
+ if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
+ tp->t_rxtshift++;
+}
diff --git a/netinet/tcp_seq.h b/netinet/tcp_seq.h
new file mode 100644
index 0000000..c24b294
--- /dev/null
+++ b/netinet/tcp_seq.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 1982, 1986, 1993, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_seq.h 8.3 (Berkeley) 6/21/95
+ * $FreeBSD: src/sys/netinet/tcp_seq.h,v 1.26 2006/06/18 14:24:12 andre Exp $
+ */
+
+
+#ifndef _NETINET_TCP_SEQ_H_
+#define _NETINET_TCP_SEQ_H_
+/*
+ * TCP sequence numbers are 32 bit integers operated
+ * on with modular arithmetic. These macros can be
+ * used to compare such integers.
+ */
+#define SEQ_LT(a,b) ((int)((a)-(b)) < 0)
+#define SEQ_LEQ(a,b) ((int)((a)-(b)) <= 0)
+#define SEQ_GT(a,b) ((int)((a)-(b)) > 0)
+#define SEQ_GEQ(a,b) ((int)((a)-(b)) >= 0)
+
+/* for modulo comparisons of timestamps */
+#define TSTMP_LT(a,b) ((int)((a)-(b)) < 0)
+#define TSTMP_GEQ(a,b) ((int)((a)-(b)) >= 0)
+
+/*
+ * TCP connection counts are 32 bit integers operated
+ * on with modular arithmetic. These macros can be
+ * used to compare such integers.
+ */
+#define CC_LT(a,b) ((int)((a)-(b)) < 0)
+#define CC_LEQ(a,b) ((int)((a)-(b)) <= 0)
+#define CC_GT(a,b) ((int)((a)-(b)) > 0)
+#define CC_GEQ(a,b) ((int)((a)-(b)) >= 0)
+
+/* Macro to increment a CC: skip 0 which has a special meaning */
+#define CC_INC(c) (++(c) == 0 ? ++(c) : (c))
+
+/*
+ * Macros to initialize tcp sequence numbers for
+ * send and receive from initial send and receive
+ * sequence numbers.
+ */
+#define tcp_rcvseqinit(tp) \
+ (tp)->rcv_adv = (tp)->rcv_nxt = (tp)->irs + 1
+
+#define tcp_sendseqinit(tp) \
+ (tp)->snd_una = (tp)->snd_nxt = (tp)->snd_max = (tp)->snd_up = \
+ (tp)->iss
+
+#define TCP_PAWS_IDLE (uint32_t)(24L * 24L * 60L * 60L * PR_SLOWHZ)
+ /* timestamp wrap-around time */
+
+#ifdef _KERNEL
+extern tcp_cc tcp_ccgen; /* global connection count */
+
+/*
+ * Increment for tcp_iss each second.
+ * This is designed to increment at the standard 250 KB/s,
+ * but with a random component averaging 128 KB.
+ * We also increment tcp_iss by a quarter of this amount
+ * each time we use the value for a new connection.
+ * If defined, the tcp_random18() macro should produce a
+ * number in the range [0-0x3ffff] that is hard to predict.
+ */
+#ifndef tcp_random18
+#define tcp_random18() (((uint32_t)random() >> 14) & 0x3ffffL)
+#endif
+#define TCP_ISSINCR (uint32_t)(122L*1024L + tcp_random18())
+
+extern tcp_seq tcp_iss; /* tcp initial send seq # */
+#else
+#define TCP_ISSINCR (250*1024) /* increment for tcp_iss each second */
+#endif /* _KERNEL */
+#endif /* _NETINET_TCP_SEQ_H_ */
diff --git a/netinet/tcp_subr.c b/netinet/tcp_subr.c
new file mode 100644
index 0000000..69012cc
--- /dev/null
+++ b/netinet/tcp_subr.c
@@ -0,0 +1,729 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95
+ * $FreeBSD: src/sys/netinet/tcp_subr.c,v 1.226 2005/05/07 00:41:36 cperciva Exp $
+ */
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "opt_tcpdebug.h"
+
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <errno.h>
+
+#include <net/route.h>
+#include <net/if.h>
+
+#define _IP_VHL
+#include <netinet/in.h>
+#include <rtems/rtems_netinet_in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#ifdef TCPDEBUG
+#include <netinet/tcp_debug.h>
+#endif
+
+int tcp_mssdflt = TCP_MSS;
+SYSCTL_INT(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt, CTLFLAG_RW,
+ &tcp_mssdflt , 0, "Default TCP Maximum Segment Size");
+
+static int tcp_do_rfc1323 = 1;
+#if !defined(__rtems__)
+static int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ;
+SYSCTL_INT(_net_inet_tcp, TCPCTL_RTTDFLT, rttdflt,
+ CTLFLAG_RW, &tcp_rttdflt , 0, "");
+
+SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323,
+ CTLFLAG_RW, &tcp_do_rfc1323 , 0, "");
+#endif
+
+static void tcp_notify(struct inpcb *, int);
+
+/*
+ * Target size of TCP PCB hash table. Will be rounded down to a prime
+ * number.
+ */
+#ifndef TCBHASHSIZE
+#define TCBHASHSIZE 128
+#endif
+
+/*
+ * Tcp initialization
+ */
+void
+tcp_init(void)
+{
+
+ tcp_iss = random(); /* wrong, but better than a constant */
+ tcp_ccgen = 1;
+ LIST_INIT(&tcb);
+ tcbinfo.listhead = &tcb;
+ tcbinfo.hashbase = hashinit(TCBHASHSIZE, M_PCB, &tcbinfo.hashmask);
+ if (max_protohdr < sizeof(struct tcpiphdr))
+ max_protohdr = sizeof(struct tcpiphdr);
+ if (max_linkhdr + sizeof(struct tcpiphdr) > MHLEN)
+ panic("tcp_init");
+}
+
+/*
+ * Create template to be used to send tcp packets on a connection.
+ * Call after host entry created, allocates an mbuf and fills
+ * in a skeletal tcp/ip header, minimizing the amount of work
+ * necessary when the connection is used.
+ */
+struct tcpiphdr *
+tcp_template(struct tcpcb *tp)
+{
+ register struct inpcb *inp = tp->t_inpcb;
+ register struct mbuf *m;
+ register struct tcpiphdr *n;
+
+ if ((n = tp->t_template) == 0) {
+ m = m_get(M_DONTWAIT, MT_HEADER);
+ if (m == NULL)
+ return (0);
+ m->m_len = sizeof (struct tcpiphdr);
+ n = mtod(m, struct tcpiphdr *);
+ }
+ n->ti_next = n->ti_prev = 0;
+ n->ti_x1 = 0;
+ n->ti_pr = IPPROTO_TCP;
+ n->ti_len = htons(sizeof (struct tcpiphdr) - sizeof (struct ip));
+ n->ti_src = inp->inp_laddr;
+ n->ti_dst = inp->inp_faddr;
+ n->ti_sport = inp->inp_lport;
+ n->ti_dport = inp->inp_fport;
+ n->ti_seq = 0;
+ n->ti_ack = 0;
+ n->ti_x2 = 0;
+ n->ti_off = 5;
+ n->ti_flags = 0;
+ n->ti_win = 0;
+ n->ti_sum = 0;
+ n->ti_urp = 0;
+ return (n);
+}
+
+/*
+ * Send a single message to the TCP at address specified by
+ * the given TCP/IP header. If m == 0, then we make a copy
+ * of the tcpiphdr at ti and send directly to the addressed host.
+ * This is used to force keep alive messages out using the TCP
+ * template for a connection tp->t_template. If flags are given
+ * then we send a message back to the TCP which originated the
+ * segment ti, and discard the mbuf containing it and any other
+ * attached mbufs.
+ *
+ * In any case the ack and sequence number of the transmitted
+ * segment are as specified by the parameters.
+ *
+ * NOTE: If m != NULL, then ti must point to *inside* the mbuf.
+ */
+void
+tcp_respond(struct tcpcb *tp, struct tcpiphdr *ti, struct mbuf *m,
+ tcp_seq ack, tcp_seq seq, int flags)
+{
+ register int tlen;
+ int win = 0;
+ struct route *ro = 0;
+ struct route sro;
+
+ if (tp) {
+ win = sbspace(&tp->t_inpcb->inp_socket->so_rcv);
+ ro = &tp->t_inpcb->inp_route;
+ } else {
+ ro = &sro;
+ bzero(ro, sizeof *ro);
+ }
+ if (m == NULL) {
+ m = m_gethdr(M_DONTWAIT, MT_HEADER);
+ if (m == NULL)
+ return;
+#ifdef TCP_COMPAT_42
+ tlen = 1;
+#else
+ tlen = 0;
+#endif
+ m->m_data += max_linkhdr;
+ *mtod(m, struct tcpiphdr *) = *ti;
+ ti = mtod(m, struct tcpiphdr *);
+ flags = TH_ACK;
+ } else {
+ m_freem(m->m_next);
+ m->m_next = NULL;
+ m->m_data = (caddr_t)ti;
+ m->m_len = sizeof (struct tcpiphdr);
+ tlen = 0;
+#define xchg(a,b,type) { type t; t=a; a=b; b=t; }
+ xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, u_long);
+ xchg(ti->ti_dport, ti->ti_sport, u_short);
+#undef xchg
+ }
+ ti->ti_len = htons((u_short)(sizeof (struct tcphdr) + tlen));
+ tlen += sizeof (struct tcpiphdr);
+ m->m_len = tlen;
+ m->m_pkthdr.len = tlen;
+ m->m_pkthdr.rcvif = (struct ifnet *) 0;
+ ti->ti_next = ti->ti_prev = 0;
+ ti->ti_x1 = 0;
+ ti->ti_seq = htonl(seq);
+ ti->ti_ack = htonl(ack);
+ ti->ti_x2 = 0;
+ ti->ti_off = sizeof (struct tcphdr) >> 2;
+ ti->ti_flags = flags;
+ if (tp)
+ ti->ti_win = htons((u_short) (win >> tp->rcv_scale));
+ else
+ ti->ti_win = htons((u_short)win);
+ ti->ti_urp = 0;
+ ti->ti_sum = 0;
+ ti->ti_sum = in_cksum(m, tlen);
+ ((struct ip *)ti)->ip_len = tlen;
+ ((struct ip *)ti)->ip_ttl = ip_defttl;
+#ifdef TCPDEBUG
+ if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+ tcp_trace(TA_OUTPUT, 0, tp, ti, 0);
+#endif
+ (void) ip_output(m, NULL, ro, 0, NULL);
+ if (ro == &sro && ro->ro_rt) {
+ RTFREE(ro->ro_rt);
+ }
+}
+
+/*
+ * Create a new TCP control block, making an
+ * empty reassembly queue and hooking it to the argument
+ * protocol control block.
+ */
+struct tcpcb *
+tcp_newtcpcb(struct inpcb *inp)
+{
+ struct tcpcb *tp;
+
+ tp = malloc(sizeof(*tp), M_PCB, M_NOWAIT);
+ if (tp == NULL)
+ return ((struct tcpcb *)0);
+ bzero((char *) tp, sizeof(struct tcpcb));
+ tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp;
+ tp->t_maxseg = tp->t_maxopd = tcp_mssdflt;
+
+ if (tcp_do_rfc1323)
+ tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
+ tp->t_inpcb = inp;
+ /*
+ * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
+ * rtt estimate. Set rttvar so that srtt + 4 * rttvar gives
+ * reasonable initial retransmit time.
+ */
+ tp->t_srtt = TCPTV_SRTTBASE;
+ tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
+ tp->t_rttmin = TCPTV_MIN;
+ tp->t_rxtcur = TCPTV_RTOBASE;
+ tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
+ tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
+ inp->inp_ip_ttl = ip_defttl;
+ inp->inp_ppcb = (caddr_t)tp;
+ return (tp);
+}
+
+/*
+ * Drop a TCP connection, reporting
+ * the specified error. If connection is synchronized,
+ * then send a RST to peer.
+ */
+struct tcpcb *
+tcp_drop(struct tcpcb *tp, int errnum)
+{
+ struct socket *so = tp->t_inpcb->inp_socket;
+
+ if (TCPS_HAVERCVDSYN(tp->t_state)) {
+ tp->t_state = TCPS_CLOSED;
+ (void) tcp_output(tp);
+ tcpstat.tcps_drops++;
+ } else
+ tcpstat.tcps_conndrops++;
+ if (errnum == ETIMEDOUT && tp->t_softerror)
+ errnum = tp->t_softerror;
+ so->so_error = errnum;
+ return (tcp_close(tp));
+}
+
+/*
+ * Close a TCP control block:
+ * discard all space held by the tcp
+ * discard internet protocol block
+ * wake up any sleepers
+ */
+struct tcpcb *
+tcp_close(struct tcpcb *tp)
+{
+ register struct tcpiphdr *t;
+ struct inpcb *inp = tp->t_inpcb;
+ struct socket *so = inp->inp_socket;
+ register struct mbuf *m;
+ register struct rtentry *rt;
+
+ /*
+ * If we got enough samples through the srtt filter,
+ * save the rtt and rttvar in the routing entry.
+ * 'Enough' is arbitrarily defined as the 16 samples.
+ * 16 samples is enough for the srtt filter to converge
+ * to within 5% of the correct value; fewer samples and
+ * we could save a very bogus rtt.
+ *
+ * Don't update the default route's characteristics and don't
+ * update anything that the user "locked".
+ */
+ if (tp->t_rttupdated >= 16 &&
+ (rt = inp->inp_route.ro_rt) &&
+ ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr != INADDR_ANY) {
+ register u_long i = 0;
+
+ if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) {
+ i = tp->t_srtt *
+ (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE));
+ if (rt->rt_rmx.rmx_rtt && i)
+ /*
+ * filter this update to half the old & half
+ * the new values, converting scale.
+ * See route.h and tcp_var.h for a
+ * description of the scaling constants.
+ */
+ rt->rt_rmx.rmx_rtt =
+ (rt->rt_rmx.rmx_rtt + i) / 2;
+ else
+ rt->rt_rmx.rmx_rtt = i;
+ tcpstat.tcps_cachedrtt++;
+ }
+ if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) {
+ i = tp->t_rttvar *
+ (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE));
+ if (rt->rt_rmx.rmx_rttvar && i)
+ rt->rt_rmx.rmx_rttvar =
+ (rt->rt_rmx.rmx_rttvar + i) / 2;
+ else
+ rt->rt_rmx.rmx_rttvar = i;
+ tcpstat.tcps_cachedrttvar++;
+ }
+ /*
+ * update the pipelimit (ssthresh) if it has been updated
+ * already or if a pipesize was specified & the threshhold
+ * got below half the pipesize. I.e., wait for bad news
+ * before we start updating, then update on both good
+ * and bad news.
+ */
+ if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 &&
+ ((i = tp->snd_ssthresh) != 0) && rt->rt_rmx.rmx_ssthresh) ||
+ i < (rt->rt_rmx.rmx_sendpipe / 2)) {
+ /*
+ * convert the limit from user data bytes to
+ * packets then to packet data bytes.
+ */
+ i = (i + tp->t_maxseg / 2) / tp->t_maxseg;
+ if (i < 2)
+ i = 2;
+ i *= (u_long)(tp->t_maxseg + sizeof (struct tcpiphdr));
+ if (rt->rt_rmx.rmx_ssthresh)
+ rt->rt_rmx.rmx_ssthresh =
+ (rt->rt_rmx.rmx_ssthresh + i) / 2;
+ else
+ rt->rt_rmx.rmx_ssthresh = i;
+ tcpstat.tcps_cachedssthresh++;
+ }
+ }
+ /* free the reassembly queue, if any */
+ t = tp->seg_next;
+ while (t != (struct tcpiphdr *)tp) {
+ t = (struct tcpiphdr *)t->ti_next;
+#if (defined(__GNUC__) && (defined(__arm__) || defined(__mips__)))
+ LD32_UNALGN((struct tcpiphdr *)t->ti_prev,m);
+#else
+ m = REASS_MBUF((struct tcpiphdr *)t->ti_prev);
+#endif
+ remque(t->ti_prev);
+ m_freem(m);
+ }
+ if (tp->t_template)
+ (void) m_free(dtom(tp->t_template));
+ free(tp, M_PCB);
+ inp->inp_ppcb = 0;
+ soisdisconnected(so);
+ in_pcbdetach(inp);
+ tcpstat.tcps_closed++;
+ return ((struct tcpcb *)0);
+}
+
+void
+tcp_drain(void)
+{
+
+}
+
+/*
+ * Notify a tcp user of an asynchronous error;
+ * store error as soft error, but wake up user
+ * (for now, won't do anything until can select for soft error).
+ */
+static void
+tcp_notify(struct inpcb *inp, int error)
+{
+ struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb;
+ struct socket *so = inp->inp_socket;
+
+ /*
+ * Ignore some errors if we are hooked up.
+ * If connection hasn't completed, has retransmitted several times,
+ * and receives a second error, give up now. This is better
+ * than waiting a long time to establish a connection that
+ * can never complete.
+ */
+ if (tp->t_state == TCPS_ESTABLISHED &&
+ (error == EHOSTUNREACH || error == ENETUNREACH ||
+ error == EHOSTDOWN)) {
+ return;
+ } else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
+ tp->t_softerror)
+ so->so_error = error;
+ else
+ tp->t_softerror = error;
+ soconnwakeup (so);
+ sorwakeup(so);
+ sowwakeup(so);
+}
+
+#ifdef __rtems__
+#define INP_INFO_RLOCK(a)
+#define INP_INFO_RUNLOCK(a)
+#define INP_LOCK(a)
+#define INP_UNLOCK(a)
+#endif
+
+static int
+tcp_pcblist(SYSCTL_HANDLER_ARGS)
+{
+ int error, i, n, s;
+ struct inpcb *inp, **inp_list;
+ inp_gen_t gencnt;
+ struct xinpgen xig;
+
+ /*
+ * The process of preparing the TCB list is too time-consuming and
+ * resource-intensive to repeat twice on every request.
+ */
+ if (req->oldptr == NULL) {
+ n = tcbinfo.ipi_count;
+ req->oldidx = 2 * (sizeof xig)
+ + (n + n/8) * sizeof(struct xtcpcb);
+ return (0);
+ }
+
+ if (req->newptr != NULL)
+ return (EPERM);
+
+ /*
+ * OK, now we're committed to doing something.
+ */
+ s = splnet();
+ INP_INFO_RLOCK(&tcbinfo);
+ gencnt = tcbinfo.ipi_gencnt;
+ n = tcbinfo.ipi_count;
+ INP_INFO_RUNLOCK(&tcbinfo);
+ splx(s);
+
+ sysctl_wire_old_buffer(req, 2 * (sizeof xig)
+ + n * sizeof(struct xtcpcb));
+
+ xig.xig_len = sizeof xig;
+ xig.xig_count = n;
+ xig.xig_gen = gencnt;
+/* xig.xig_sogen = so_gencnt; remove by ccj */
+ error = SYSCTL_OUT(req, &xig, sizeof xig);
+ if (error)
+ return error;
+
+ /* ccj add exit if the count is 0 */
+ if (!n)
+ return error;
+
+ inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
+ if (inp_list == 0)
+ return ENOMEM;
+
+ s = splnet();
+ INP_INFO_RLOCK(&tcbinfo);
+ for (inp = LIST_FIRST(tcbinfo.listhead), i = 0; inp && i < n;
+ inp = LIST_NEXT(inp, inp_list)) {
+ INP_LOCK(inp);
+ if (inp->inp_gencnt <= gencnt)
+#if 0
+ &&
+ cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0)
+#endif
+ inp_list[i++] = inp;
+ INP_UNLOCK(inp);
+ }
+ INP_INFO_RUNLOCK(&tcbinfo);
+ splx(s);
+ n = i;
+
+ error = 0;
+ for (i = 0; i < n; i++) {
+ inp = inp_list[i];
+ INP_LOCK(inp);
+ if (inp->inp_gencnt <= gencnt) {
+ struct xtcpcb xt;
+ caddr_t inp_ppcb;
+ xt.xt_len = sizeof xt;
+ /* XXX should avoid extra copy */
+ bcopy(inp, &xt.xt_inp, sizeof *inp);
+ inp_ppcb = inp->inp_ppcb;
+ if (inp_ppcb != NULL)
+ bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp);
+ else
+ bzero((char *) &xt.xt_tp, sizeof xt.xt_tp);
+#if 0
+ if (inp->inp_socket)
+ sotoxsocket(inp->inp_socket, &xt.xt_socket);
+#endif
+ error = SYSCTL_OUT(req, &xt, sizeof xt);
+ }
+ INP_UNLOCK(inp);
+ }
+ if (!error) {
+ /*
+ * Give the user an updated idea of our state.
+ * If the generation differs from what we told
+ * her before, she knows that something happened
+ * while we were processing this request, and it
+ * might be necessary to retry.
+ */
+ s = splnet();
+ INP_INFO_RLOCK(&tcbinfo);
+ xig.xig_gen = tcbinfo.ipi_gencnt;
+#if 0
+ xig.xig_sogen = so_gencnt;
+#endif
+ xig.xig_count = tcbinfo.ipi_count;
+ INP_INFO_RUNLOCK(&tcbinfo);
+ splx(s);
+ error = SYSCTL_OUT(req, &xig, sizeof xig);
+ }
+ free(inp_list, M_TEMP);
+ return error;
+}
+
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0,
+ tcp_pcblist, "S,xtcpcb", "List of active TCP connections");
+
+void
+tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
+{
+ struct ip *ip = vip;
+ struct tcphdr *th;
+ void (*notify)(struct inpcb *, int) = tcp_notify;
+
+ if (cmd == PRC_QUENCH)
+ notify = tcp_quench;
+#if 1
+ else if (cmd == PRC_MSGSIZE)
+ notify = tcp_mtudisc;
+#endif
+ else if (!PRC_IS_REDIRECT(cmd) &&
+ ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0))
+ return;
+ if (ip != NULL) {
+#ifdef _IP_VHL
+ th = (struct tcphdr *)((caddr_t)ip
+ + (IP_VHL_HL(ip->ip_vhl) << 2));
+#else
+ th = (struct tcphdr *)((caddr_t)ip
+ + (ip->ip_hl << 2));
+#endif
+ in_pcbnotify(&tcb, sa, th->th_dport, ip->ip_src, th->th_sport,
+ cmd, notify);
+ } else
+ in_pcbnotify(&tcb, sa, 0, zeroin_addr, 0, cmd, notify);
+}
+
+/*
+ * When a source quench is received, close congestion window
+ * to one segment. We will gradually open it again as we proceed.
+ */
+void
+tcp_quench( struct inpcb *inp, int errnum)
+{
+ struct tcpcb *tp = intotcpcb(inp);
+
+ if (tp)
+ tp->snd_cwnd = tp->t_maxseg;
+}
+
+/*
+ * When `need fragmentation' ICMP is received, update our idea of the MSS
+ * based on the new value in the route. Also nudge TCP to send something,
+ * since we know the packet we just sent was dropped.
+ * This duplicates some code in the tcp_mss() function in tcp_input.c.
+ */
+void
+tcp_mtudisc(struct inpcb *inp, int errnum)
+{
+ struct tcpcb *tp = intotcpcb(inp);
+ struct rtentry *rt;
+ struct rmxp_tao *taop;
+ struct socket *so = inp->inp_socket;
+ int offered;
+ int mss;
+
+ if (tp) {
+ rt = tcp_rtlookup(inp);
+ if (!rt || !rt->rt_rmx.rmx_mtu) {
+ tp->t_maxopd = tp->t_maxseg = tcp_mssdflt;
+ return;
+ }
+ taop = rmx_taop(rt->rt_rmx);
+ offered = taop->tao_mssopt;
+ mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr);
+ if (offered)
+ mss = min(mss, offered);
+ /*
+ * XXX - The above conditional probably violates the TCP
+ * spec. The problem is that, since we don't know the
+ * other end's MSS, we are supposed to use a conservative
+ * default. But, if we do that, then MTU discovery will
+ * never actually take place, because the conservative
+ * default is much less than the MTUs typically seen
+ * on the Internet today. For the moment, we'll sweep
+ * this under the carpet.
+ *
+ * The conservative default might not actually be a problem
+ * if the only case this occurs is when sending an initial
+ * SYN with options and data to a host we've never talked
+ * to before. Then, they will reply with an MSS value which
+ * will get recorded and the new parameters should get
+ * recomputed. For Further Study.
+ */
+ if (tp->t_maxopd <= mss)
+ return;
+ tp->t_maxopd = mss;
+
+ if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
+ (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)
+ mss -= TCPOLEN_TSTAMP_APPA;
+ if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC &&
+ (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC)
+ mss -= TCPOLEN_CC_APPA;
+#if (MCLBYTES & (MCLBYTES - 1)) == 0
+ if (mss > MCLBYTES)
+ mss &= ~(MCLBYTES-1);
+#else
+ if (mss > MCLBYTES)
+ mss = mss / MCLBYTES * MCLBYTES;
+#endif
+ if (so->so_snd.sb_hiwat < mss)
+ mss = so->so_snd.sb_hiwat;
+
+ tp->t_maxseg = mss;
+
+ tcpstat.tcps_mturesent++;
+ tp->t_rtt = 0;
+ tp->snd_nxt = tp->snd_una;
+ tcp_output(tp);
+ }
+}
+
+/*
+ * Look-up the routing entry to the peer of this inpcb. If no route
+ * is found and it cannot be allocated, then return NULL. This routine
+ * is called by TCP routines that access the rmx structure and by tcp_mss
+ * to get the interface MTU.
+ */
+struct rtentry *
+tcp_rtlookup(struct inpcb *inp)
+{
+ struct route *ro;
+ struct rtentry *rt;
+
+ ro = &inp->inp_route;
+ rt = ro->ro_rt;
+ if (rt == NULL || !(rt->rt_flags & RTF_UP)) {
+ /* No route yet, so try to acquire one */
+ if (inp->inp_faddr.s_addr != INADDR_ANY) {
+ ro->ro_dst.sa_family = AF_INET;
+ ro->ro_dst.sa_len = sizeof(ro->ro_dst);
+ ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
+ inp->inp_faddr;
+ rtalloc(ro);
+ rt = ro->ro_rt;
+ }
+ }
+ return rt;
+}
+
+/*
+ * Return a pointer to the cached information about the remote host.
+ * The cached information is stored in the protocol specific part of
+ * the route metrics.
+ */
+struct rmxp_tao *
+tcp_gettaocache(struct inpcb *inp)
+{
+ struct rtentry *rt = tcp_rtlookup(inp);
+
+ /* Make sure this is a host route and is up. */
+ if (rt == NULL ||
+ (rt->rt_flags & (RTF_UP|RTF_HOST)) != (RTF_UP|RTF_HOST))
+ return NULL;
+
+ return rmx_taop(rt->rt_rmx);
+}
diff --git a/netinet/tcp_timer.c b/netinet/tcp_timer.c
new file mode 100644
index 0000000..a6df2b2
--- /dev/null
+++ b/netinet/tcp_timer.c
@@ -0,0 +1,385 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "opt_tcpdebug.h"
+
+#ifndef TUBA_INCLUDE
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <errno.h>
+
+#include <machine/cpu.h> /* before tcp_seq.h, for tcp_random18() */
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#ifdef TCPDEBUG
+#include <netinet/tcp_debug.h>
+#endif
+
+int tcp_keepinit = TCPTV_KEEP_INIT;
+SYSCTL_INT(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit,
+ CTLFLAG_RW, &tcp_keepinit , 0, "");
+
+int tcp_keepidle = TCPTV_KEEP_IDLE;
+SYSCTL_INT(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle,
+ CTLFLAG_RW, &tcp_keepidle , 0, "");
+
+static int tcp_keepintvl = TCPTV_KEEPINTVL;
+SYSCTL_INT(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl,
+ CTLFLAG_RW, &tcp_keepintvl , 0, "");
+
+static int always_keepalive = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive,
+ CTLFLAG_RW, &always_keepalive , 0, "");
+
+static int tcp_keepcnt = TCPTV_KEEPCNT;
+ /* max idle probes */
+static int tcp_maxpersistidle = TCPTV_KEEP_IDLE;
+ /* max idle time in persist */
+int tcp_maxidle;
+#else /* TUBA_INCLUDE */
+
+static int tcp_maxpersistidle;
+#endif /* TUBA_INCLUDE */
+
+/*
+ * Fast timeout routine for processing delayed acks
+ */
+void
+tcp_fasttimo(void)
+{
+ register struct inpcb *inp;
+ register struct tcpcb *tp;
+ int s;
+
+ s = splnet();
+
+ for (inp = tcb.lh_first; inp != NULL; inp = inp->inp_list.le_next) {
+ if ((tp = (struct tcpcb *)inp->inp_ppcb) &&
+ (tp->t_flags & TF_DELACK)) {
+ tp->t_flags &= ~TF_DELACK;
+ tp->t_flags |= TF_ACKNOW;
+ tcpstat.tcps_delack++;
+ (void) tcp_output(tp);
+ }
+ }
+ splx(s);
+}
+
+/*
+ * Tcp protocol timeout routine called every 500 ms.
+ * Updates the timers in all active tcb's and
+ * causes finite state machine actions if timers expire.
+ */
+void
+tcp_slowtimo(void)
+{
+ register struct inpcb *ip, *ipnxt;
+ register struct tcpcb *tp;
+ register int i;
+ int s;
+#ifdef TCPDEBUG
+ int ostate;
+#endif
+
+ s = splnet();
+
+ tcp_maxidle = tcp_keepcnt * tcp_keepintvl;
+
+ ip = tcb.lh_first;
+ if (ip == NULL) {
+ splx(s);
+ return;
+ }
+ /*
+ * Search through tcb's and update active timers.
+ */
+ for (; ip != NULL; ip = ipnxt) {
+ ipnxt = ip->inp_list.le_next;
+ tp = intotcpcb(ip);
+ if (tp == 0 || tp->t_state == TCPS_LISTEN)
+ continue;
+ for (i = 0; i < TCPT_NTIMERS; i++) {
+ if (tp->t_timer[i] && --tp->t_timer[i] == 0) {
+#ifdef TCPDEBUG
+ ostate = tp->t_state;
+#endif
+ tp = tcp_timers(tp, i);
+ if (tp == NULL)
+ goto tpgone;
+#ifdef TCPDEBUG
+ if (tp->t_inpcb->inp_socket->so_options
+ & SO_DEBUG)
+ tcp_trace(TA_USER, ostate, tp,
+ (struct tcpiphdr *)0,
+ PRU_SLOWTIMO);
+#endif
+ }
+ }
+ tp->t_idle++;
+ tp->t_duration++;
+ if (tp->t_rtt)
+ tp->t_rtt++;
+tpgone:
+ ;
+ }
+ tcp_iss += TCP_ISSINCR/PR_SLOWHZ; /* increment iss */
+#ifdef TCP_COMPAT_42
+ if ((int)tcp_iss < 0)
+ tcp_iss = TCP_ISSINCR; /* XXX */
+#endif
+ tcp_now++; /* for timestamps */
+ splx(s);
+}
+#ifndef TUBA_INCLUDE
+
+/*
+ * Cancel all timers for TCP tp.
+ */
+void
+tcp_canceltimers(struct tcpcb *tp)
+{
+ register int i;
+
+ for (i = 0; i < TCPT_NTIMERS; i++)
+ tp->t_timer[i] = 0;
+}
+
+int tcp_backoff[TCP_MAXRXTSHIFT + 1] =
+ { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
+
+static int tcp_totbackoff = 511; /* sum of tcp_backoff[] */
+
+/*
+ * TCP timer processing.
+ */
+struct tcpcb *
+tcp_timers(struct tcpcb *tp, int timer)
+{
+ register int rexmt;
+
+ switch (timer) {
+
+ /*
+ * 2 MSL timeout in shutdown went off. If we're closed but
+ * still waiting for peer to close and connection has been idle
+ * too long, or if 2MSL time is up from TIME_WAIT, delete connection
+ * control block. Otherwise, check again in a bit.
+ */
+ case TCPT_2MSL:
+ if (tp->t_state != TCPS_TIME_WAIT &&
+ tp->t_idle <= tcp_maxidle)
+ tp->t_timer[TCPT_2MSL] = tcp_keepintvl;
+ else
+ tp = tcp_close(tp);
+ break;
+
+ /*
+ * Retransmission timer went off. Message has not
+ * been acked within retransmit interval. Back off
+ * to a longer retransmit interval and retransmit one segment.
+ */
+ case TCPT_REXMT:
+ if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
+ tp->t_rxtshift = TCP_MAXRXTSHIFT;
+ tcpstat.tcps_timeoutdrop++;
+ tp = tcp_drop(tp, tp->t_softerror ?
+ tp->t_softerror : ETIMEDOUT);
+ break;
+ }
+ tcpstat.tcps_rexmttimeo++;
+ rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
+ TCPT_RANGESET(tp->t_rxtcur, rexmt,
+ tp->t_rttmin, TCPTV_REXMTMAX);
+ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+ /*
+ * If losing, let the lower level know and try for
+ * a better route. Also, if we backed off this far,
+ * our srtt estimate is probably bogus. Clobber it
+ * so we'll take the next rtt measurement as our srtt;
+ * move the current srtt into rttvar to keep the current
+ * retransmit times until then.
+ */
+ if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
+ in_losing(tp->t_inpcb);
+ tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
+ tp->t_srtt = 0;
+ }
+ tp->snd_nxt = tp->snd_una;
+ /*
+ * Force a segment to be sent.
+ */
+ tp->t_flags |= TF_ACKNOW;
+ /*
+ * If timing a segment in this window, stop the timer.
+ */
+ tp->t_rtt = 0;
+ /*
+ * Close the congestion window down to one segment
+ * (we'll open it by one segment for each ack we get).
+ * Since we probably have a window's worth of unacked
+ * data accumulated, this "slow start" keeps us from
+ * dumping all that data as back-to-back packets (which
+ * might overwhelm an intermediate gateway).
+ *
+ * There are two phases to the opening: Initially we
+ * open by one mss on each ack. This makes the window
+ * size increase exponentially with time. If the
+ * window is larger than the path can handle, this
+ * exponential growth results in dropped packet(s)
+ * almost immediately. To get more time between
+ * drops but still "push" the network to take advantage
+ * of improving conditions, we switch from exponential
+ * to linear window opening at some threshhold size.
+ * For a threshhold, we use half the current window
+ * size, truncated to a multiple of the mss.
+ *
+ * (the minimum cwnd that will give us exponential
+ * growth is 2 mss. We don't allow the threshhold
+ * to go below this.)
+ */
+ {
+ u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;
+ if (win < 2)
+ win = 2;
+ tp->snd_cwnd = tp->t_maxseg;
+ tp->snd_ssthresh = win * tp->t_maxseg;
+ tp->t_dupacks = 0;
+ }
+ (void) tcp_output(tp);
+ break;
+
+ /*
+ * Persistance timer into zero window.
+ * Force a byte to be output, if possible.
+ */
+ case TCPT_PERSIST:
+ tcpstat.tcps_persisttimeo++;
+ /*
+ * Hack: if the peer is dead/unreachable, we do not
+ * time out if the window is closed. After a full
+ * backoff, drop the connection if the idle time
+ * (no responses to probes) reaches the maximum
+ * backoff that we would use if retransmitting.
+ */
+ if (tp->t_rxtshift == TCP_MAXRXTSHIFT) {
+ u_long maxidle = TCP_REXMTVAL(tp);
+ if (maxidle < tp->t_rttmin)
+ maxidle = tp->t_rttmin;
+ maxidle *= tcp_totbackoff;
+ if (tp->t_idle >= tcp_maxpersistidle ||
+ tp->t_idle >= maxidle) {
+ tcpstat.tcps_persistdrop++;
+ tp = tcp_drop(tp, ETIMEDOUT);
+ break;
+ }
+ }
+ tcp_setpersist(tp);
+ tp->t_force = 1;
+ (void) tcp_output(tp);
+ tp->t_force = 0;
+ break;
+
+ /*
+ * Keep-alive timer went off; send something
+ * or drop connection if idle for too long.
+ */
+ case TCPT_KEEP:
+ tcpstat.tcps_keeptimeo++;
+ if (tp->t_state < TCPS_ESTABLISHED)
+ goto dropit;
+ if ((always_keepalive ||
+ tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) &&
+ tp->t_state <= TCPS_CLOSING) {
+ if (tp->t_idle >= tcp_keepidle + tcp_maxidle)
+ goto dropit;
+ /*
+ * Send a packet designed to force a response
+ * if the peer is up and reachable:
+ * either an ACK if the connection is still alive,
+ * or an RST if the peer has closed the connection
+ * due to timeout or reboot.
+ * Using sequence number tp->snd_una-1
+ * causes the transmitted zero-length segment
+ * to lie outside the receive window;
+ * by the protocol spec, this requires the
+ * correspondent TCP to respond.
+ */
+ tcpstat.tcps_keepprobe++;
+#ifdef TCP_COMPAT_42
+ /*
+ * The keepalive packet must have nonzero length
+ * to get a 4.2 host to respond.
+ */
+ tcp_respond(tp, tp->t_template, (struct mbuf *)NULL,
+ tp->rcv_nxt - 1, tp->snd_una - 1, 0);
+#else
+ tcp_respond(tp, tp->t_template, (struct mbuf *)NULL,
+ tp->rcv_nxt, tp->snd_una - 1, 0);
+#endif
+ tp->t_timer[TCPT_KEEP] = tcp_keepintvl;
+ } else
+ tp->t_timer[TCPT_KEEP] = tcp_keepidle;
+ break;
+ dropit:
+ tcpstat.tcps_keepdrops++;
+ tp = tcp_drop(tp, ETIMEDOUT);
+ break;
+ }
+ return (tp);
+}
+#endif /* TUBA_INCLUDE */
diff --git a/netinet/tcp_timer.h b/netinet/tcp_timer.h
new file mode 100644
index 0000000..4bbbb71
--- /dev/null
+++ b/netinet/tcp_timer.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_timer.h 8.1 (Berkeley) 6/10/93
+ * $FreeBSD: src/sys/netinet/tcp_timer.h,v 1.26 2004/08/16 18:32:07 rwatson Exp $
+ */
+
+#ifndef _NETINET_TCP_TIMER_H_
+#define _NETINET_TCP_TIMER_H_
+
+/*
+ * Definitions of the TCP timers. These timers are counted
+ * down PR_SLOWHZ times a second.
+ */
+#define TCPT_NTIMERS 4
+
+#define TCPT_REXMT 0 /* retransmit */
+#define TCPT_PERSIST 1 /* retransmit persistence */
+#define TCPT_KEEP 2 /* keep alive */
+#define TCPT_2MSL 3 /* 2*msl quiet time timer */
+
+/*
+ * The TCPT_REXMT timer is used to force retransmissions.
+ * The TCP has the TCPT_REXMT timer set whenever segments
+ * have been sent for which ACKs are expected but not yet
+ * received. If an ACK is received which advances tp->snd_una,
+ * then the retransmit timer is cleared (if there are no more
+ * outstanding segments) or reset to the base value (if there
+ * are more ACKs expected). Whenever the retransmit timer goes off,
+ * we retransmit one unacknowledged segment, and do a backoff
+ * on the retransmit timer.
+ *
+ * The TCPT_PERSIST timer is used to keep window size information
+ * flowing even if the window goes shut. If all previous transmissions
+ * have been acknowledged (so that there are no retransmissions in progress),
+ * and the window is too small to bother sending anything, then we start
+ * the TCPT_PERSIST timer. When it expires, if the window is nonzero,
+ * we go to transmit state. Otherwise, at intervals send a single byte
+ * into the peer's window to force him to update our window information.
+ * We do this at most as often as TCPT_PERSMIN time intervals,
+ * but no more frequently than the current estimate of round-trip
+ * packet time. The TCPT_PERSIST timer is cleared whenever we receive
+ * a window update from the peer.
+ *
+ * The TCPT_KEEP timer is used to keep connections alive. If an
+ * connection is idle (no segments received) for TCPTV_KEEP_INIT amount of time,
+ * but not yet established, then we drop the connection. Once the connection
+ * is established, if the connection is idle for TCPTV_KEEP_IDLE time
+ * (and keepalives have been enabled on the socket), we begin to probe
+ * the connection. We force the peer to send us a segment by sending:
+ * <SEQ=SND.UNA-1><ACK=RCV.NXT><CTL=ACK>
+ * This segment is (deliberately) outside the window, and should elicit
+ * an ack segment in response from the peer. If, despite the TCPT_KEEP
+ * initiated segments we cannot elicit a response from a peer in TCPT_MAXIDLE
+ * amount of time probing, then we drop the connection.
+ */
+
+/*
+ * Time constants.
+ */
+#define TCPTV_MSL ( 30*PR_SLOWHZ) /* max seg lifetime (hah!) */
+#define TCPTV_SRTTBASE 0 /* base roundtrip time;
+ if 0, no idea yet */
+#define TCPTV_RTOBASE ( 3*PR_SLOWHZ) /* assumed RTO if no info */
+#define TCPTV_SRTTDFLT ( 3*PR_SLOWHZ) /* assumed RTT if no info */
+
+#define TCPTV_PERSMIN ( 5*PR_SLOWHZ) /* retransmit persistence */
+#define TCPTV_PERSMAX ( 60*PR_SLOWHZ) /* maximum persist interval */
+
+#define TCPTV_KEEP_INIT ( 75*PR_SLOWHZ) /* initial connect keep alive */
+#define TCPTV_KEEP_IDLE (120*60*PR_SLOWHZ) /* dflt time before probing */
+#define TCPTV_KEEPINTVL ( 75*PR_SLOWHZ) /* default probe interval */
+#define TCPTV_KEEPCNT 8 /* max probes before drop */
+
+#define TCPTV_MIN ( 1*PR_SLOWHZ) /* minimum allowable value */
+#define TCPTV_REXMTMAX ( 64*PR_SLOWHZ) /* max allowable REXMT value */
+
+#define TCPTV_TWTRUNC 8 /* RTO factor to truncate TW */
+
+#define TCP_LINGERTIME 120 /* linger at most 2 minutes */
+
+#define TCP_MAXRXTSHIFT 12 /* maximum retransmits */
+
+#ifdef TCPTIMERS
+static char *tcptimers[] =
+ { "REXMT", "PERSIST", "KEEP", "2MSL" };
+#endif
+
+/*
+ * Force a time value to be in a certain range.
+ */
+#define TCPT_RANGESET(tv, value, tvmin, tvmax) { \
+ (tv) = (value); \
+ if ((u_long)(tv) < (u_long)(tvmin)) \
+ (tv) = (tvmin); \
+ else if ((u_long)(tv) > (u_long)(tvmax)) \
+ (tv) = (tvmax); \
+}
+
+#ifdef _KERNEL
+extern int tcp_keepinit; /* time to establish connection */
+extern int tcp_keepidle; /* time before keepalive probes begin */
+extern int tcp_maxidle; /* time to drop after starting probes */
+extern int tcp_ttl; /* time to live for TCP segs */
+extern int tcp_backoff[];
+#endif
+
+#endif
diff --git a/netinet/tcp_usrreq.c b/netinet/tcp_usrreq.c
new file mode 100644
index 0000000..7f4c110
--- /dev/null
+++ b/netinet/tcp_usrreq.c
@@ -0,0 +1,845 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94
+ * $FreeBSD: src/sys/netinet/tcp_usrreq.c,v 1.120 2005/05/01 14:01:38 rwatson Exp $
+ */
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "opt_tcpdebug.h"
+
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <errno.h>
+#include <sys/stat.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <rtems/rtems_netinet_in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#ifdef TCPDEBUG
+#include <netinet/tcp_debug.h>
+#endif
+
+/*
+ * TCP protocol interface to socket abstraction.
+ */
+extern char *tcpstates[];
+
+static int tcp_attach(struct socket *);
+static int tcp_connect(struct tcpcb *, struct mbuf *);
+static struct tcpcb *
+ tcp_disconnect(struct tcpcb *);
+static struct tcpcb *
+ tcp_usrclosed(struct tcpcb *);
+
+#ifdef TCPDEBUG
+#define TCPDEBUG0 int ostate
+#define TCPDEBUG1() ostate = tp ? tp->t_state : 0
+#define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \
+ tcp_trace(TA_USER, ostate, tp, 0, req)
+#else
+#define TCPDEBUG0
+#define TCPDEBUG1()
+#define TCPDEBUG2(req)
+#endif
+
+/*
+ * TCP attaches to socket via pru_attach(), reserving space,
+ * and an internet control block.
+ */
+static int
+tcp_usr_attach(struct socket *so, intptr_t proto)
+{
+ int s = splnet();
+ int error;
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp = 0;
+ TCPDEBUG0;
+
+ TCPDEBUG1();
+ if (inp) {
+ error = EISCONN;
+ goto out;
+ }
+
+ error = tcp_attach(so);
+ if (error)
+ goto out;
+
+ if ((so->so_options & SO_LINGER) && so->so_linger == 0)
+ so->so_linger = TCP_LINGERTIME * hz;
+ tp = sototcpcb(so);
+out:
+ TCPDEBUG2(PRU_ATTACH);
+ splx(s);
+ return error;
+}
+
+/*
+ * pru_detach() detaches the TCP protocol from the socket.
+ * If the protocol state is non-embryonic, then can't
+ * do this directly: have to initiate a pru_disconnect(),
+ * which may finish later; embryonic TCB's can just
+ * be discarded here.
+ */
+static int
+tcp_usr_detach(struct socket *so)
+{
+ int s = splnet();
+ int error = 0;
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp;
+ TCPDEBUG0;
+
+ if (inp == 0) {
+ splx(s);
+ return EINVAL; /* XXX */
+ }
+ tp = intotcpcb(inp);
+ TCPDEBUG1();
+ if (tp->t_state > TCPS_LISTEN)
+ tp = tcp_disconnect(tp);
+ else
+ tp = tcp_close(tp);
+
+ TCPDEBUG2(PRU_DETACH);
+ splx(s);
+ return error;
+}
+
+#define COMMON_START() TCPDEBUG0; \
+ do { \
+ if (inp == 0) { \
+ splx(s); \
+ return EINVAL; \
+ } \
+ tp = intotcpcb(inp); \
+ TCPDEBUG1(); \
+ } while(0)
+
+#define COMMON_END(req) out: TCPDEBUG2(req); splx(s); return error; goto out
+
+
+/*
+ * Give the socket an address.
+ */
+static int
+tcp_usr_bind(struct socket *so, struct mbuf *nam)
+{
+ int s = splnet();
+ int error = 0;
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp;
+ struct sockaddr_in *sinp;
+
+ COMMON_START();
+
+ /*
+ * Must check for multicast addresses and disallow binding
+ * to them.
+ */
+ sinp = mtod(nam, struct sockaddr_in *);
+ if (sinp->sin_family == AF_INET &&
+ IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
+ error = EAFNOSUPPORT;
+ goto out;
+ }
+ error = in_pcbbind(inp, nam);
+ if (error)
+ goto out;
+ COMMON_END(PRU_BIND);
+
+}
+
+/*
+ * Prepare to accept connections.
+ */
+static int
+tcp_usr_listen(struct socket *so)
+{
+ int s = splnet();
+ int error = 0;
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp;
+
+ COMMON_START();
+ if (inp->inp_lport == 0)
+ error = in_pcbbind(inp, NULL);
+ if (error == 0)
+ tp->t_state = TCPS_LISTEN;
+ COMMON_END(PRU_LISTEN);
+}
+
+/*
+ * Initiate connection to peer.
+ * Create a template for use in transmissions on this connection.
+ * Enter SYN_SENT state, and mark socket as connecting.
+ * Start keep-alive timer, and seed output sequence space.
+ * Send initial segment on connection.
+ */
+static int
+tcp_usr_connect(struct socket *so, struct mbuf *nam)
+{
+ int s = splnet();
+ int error = 0;
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp;
+ struct sockaddr_in *sinp;
+
+ COMMON_START();
+
+ /*
+ * Must disallow TCP ``connections'' to multicast addresses.
+ */
+ sinp = mtod(nam, struct sockaddr_in *);
+ if (sinp->sin_family == AF_INET
+ && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
+ error = EAFNOSUPPORT;
+ goto out;
+ }
+
+ if ((error = tcp_connect(tp, nam)) != 0)
+ goto out;
+ error = tcp_output(tp);
+ COMMON_END(PRU_CONNECT);
+}
+
+/*
+ * Initiate disconnect from peer.
+ * If connection never passed embryonic stage, just drop;
+ * else if don't need to let data drain, then can just drop anyways,
+ * else have to begin TCP shutdown process: mark socket disconnecting,
+ * drain unread data, state switch to reflect user close, and
+ * send segment (e.g. FIN) to peer. Socket will be really disconnected
+ * when peer sends FIN and acks ours.
+ *
+ * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
+ */
+static int
+tcp_usr_disconnect(struct socket *so)
+{
+ int s = splnet();
+ int error = 0;
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp;
+
+ COMMON_START();
+ tp = tcp_disconnect(tp);
+ COMMON_END(PRU_DISCONNECT);
+}
+
+/*
+ * Accept a connection. Essentially all the work is
+ * done at higher levels; just return the address
+ * of the peer, storing through addr.
+ */
+static int
+tcp_usr_accept(struct socket *so, struct mbuf *nam)
+{
+ int s = splnet();
+ int error = 0;
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp;
+
+ COMMON_START();
+ in_setpeeraddr(inp, nam);
+ COMMON_END(PRU_ACCEPT);
+}
+
+/*
+ * Mark the connection as being incapable of further output.
+ */
+static int
+tcp_usr_shutdown(struct socket *so)
+{
+ int s = splnet();
+ int error = 0;
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp;
+
+ COMMON_START();
+ socantsendmore(so);
+ tp = tcp_usrclosed(tp);
+ if (tp)
+ error = tcp_output(tp);
+ COMMON_END(PRU_SHUTDOWN);
+}
+
+/*
+ * After a receive, possibly send window update to peer.
+ */
+static int
+tcp_usr_rcvd(struct socket *so, intptr_t flags)
+{
+ int s = splnet();
+ int error = 0;
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp;
+
+ COMMON_START();
+ tcp_output(tp);
+ COMMON_END(PRU_RCVD);
+}
+
+/*
+ * Do a send by putting data in output queue and updating urgent
+ * marker if URG set. Possibly send more data.
+ */
+static int
+tcp_usr_send(struct socket *so, int flags, struct mbuf *m, struct mbuf *nam,
+ struct mbuf *control)
+{
+ int s = splnet();
+ int error = 0;
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp;
+
+ COMMON_START();
+ if (control && control->m_len) {
+ m_freem(control); /* XXX shouldn't caller do this??? */
+ if (m)
+ m_freem(m);
+ error = EINVAL;
+ goto out;
+ }
+
+ if(!(flags & PRUS_OOB)) {
+ sbappend(&so->so_snd, m);
+ if (nam && tp->t_state < TCPS_SYN_SENT) {
+ /*
+ * Do implied connect if not yet connected,
+ * initialize window to default value, and
+ * initialize maxseg/maxopd using peer's cached
+ * MSS.
+ */
+ error = tcp_connect(tp, nam);
+ if (error)
+ goto out;
+ tp->snd_wnd = TTCP_CLIENT_SND_WND;
+ tcp_mss(tp, -1);
+ }
+
+ if (flags & PRUS_EOF) {
+ /*
+ * Close the send side of the connection after
+ * the data is sent.
+ */
+ socantsendmore(so);
+ tp = tcp_usrclosed(tp);
+ }
+ if (tp != NULL)
+ error = tcp_output(tp);
+ } else {
+ if (sbspace(&so->so_snd) < -512) {
+ m_freem(m);
+ error = ENOBUFS;
+ goto out;
+ }
+ /*
+ * According to RFC961 (Assigned Protocols),
+ * the urgent pointer points to the last octet
+ * of urgent data. We continue, however,
+ * to consider it to indicate the first octet
+ * of data past the urgent section.
+ * Otherwise, snd_up should be one lower.
+ */
+ sbappend(&so->so_snd, m);
+ if (nam && tp->t_state < TCPS_SYN_SENT) {
+ /*
+ * Do implied connect if not yet connected,
+ * initialize window to default value, and
+ * initialize maxseg/maxopd using peer's cached
+ * MSS.
+ */
+ error = tcp_connect(tp, nam);
+ if (error)
+ goto out;
+ tp->snd_wnd = TTCP_CLIENT_SND_WND;
+ tcp_mss(tp, -1);
+ }
+ tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
+ tp->t_force = 1;
+ error = tcp_output(tp);
+ tp->t_force = 0;
+ }
+ COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB :
+ ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
+}
+
+/*
+ * Abort the TCP.
+ */
+static int
+tcp_usr_abort(struct socket *so)
+{
+ int s = splnet();
+ int error = 0;
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp;
+
+ COMMON_START();
+ tp = tcp_drop(tp, ECONNABORTED);
+ COMMON_END(PRU_ABORT);
+}
+
+/*
+ * Fill in st_bklsize for fstat() operations on a socket.
+ */
+static int
+tcp_usr_sense(struct socket *so, struct stat *sb)
+{
+ int s = splnet();
+
+ sb->st_blksize = so->so_snd.sb_hiwat;
+ splx(s);
+ return 0;
+}
+
+/*
+ * Receive out-of-band data.
+ */
+static int
+tcp_usr_rcvoob(struct socket *so, struct mbuf *m, intptr_t flags)
+{
+ int s = splnet();
+ int error = 0;
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp;
+
+ COMMON_START();
+ if ((so->so_oobmark == 0 &&
+ (so->so_state & SS_RCVATMARK) == 0) ||
+ so->so_options & SO_OOBINLINE ||
+ tp->t_oobflags & TCPOOB_HADDATA) {
+ error = EINVAL;
+ goto out;
+ }
+ if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
+ error = EWOULDBLOCK;
+ goto out;
+ }
+ m->m_len = 1;
+ *mtod(m, caddr_t) = tp->t_iobc;
+ if ((flags & MSG_PEEK) == 0)
+ tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
+ COMMON_END(PRU_RCVOOB);
+}
+
+static int
+tcp_usr_sockaddr(struct socket *so, struct mbuf *nam)
+{
+ int s = splnet();
+ int error = 0;
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp;
+
+ COMMON_START();
+ in_setsockaddr(inp, nam);
+ COMMON_END(PRU_SOCKADDR);
+}
+
+static int
+tcp_usr_peeraddr(struct socket *so, struct mbuf *nam)
+{
+ int s = splnet();
+ int error = 0;
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp;
+
+ COMMON_START();
+ in_setpeeraddr(inp, nam);
+ COMMON_END(PRU_PEERADDR);
+}
+
+/*
+ * XXX - this should just be a call to in_control, but we need to get
+ * the types worked out.
+ */
+static int
+tcp_usr_control(struct socket *so, intptr_t cmd, caddr_t arg, struct ifnet *ifp)
+{
+ return in_control(so, cmd, arg, ifp);
+}
+
+/* xxx - should be const */
+struct pr_usrreqs tcp_usrreqs = {
+ tcp_usr_abort, tcp_usr_accept, tcp_usr_attach, tcp_usr_bind,
+ tcp_usr_connect, pru_connect2_notsupp, tcp_usr_control, tcp_usr_detach,
+ tcp_usr_disconnect, tcp_usr_listen, tcp_usr_peeraddr, tcp_usr_rcvd,
+ tcp_usr_rcvoob, tcp_usr_send, tcp_usr_sense, tcp_usr_shutdown,
+ tcp_usr_sockaddr
+};
+
+/*
+ * Common subroutine to open a TCP connection to remote host specified
+ * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local
+ * port number if needed. Call in_pcbladdr to do the routing and to choose
+ * a local host address (interface). If there is an existing incarnation
+ * of the same connection in TIME-WAIT state and if the remote host was
+ * sending CC options and if the connection duration was < MSL, then
+ * truncate the previous TIME-WAIT state and proceed.
+ * Initialize connection parameters and enter SYN-SENT state.
+ */
+static int
+tcp_connect(struct tcpcb *tp, struct mbuf *nam)
+{
+ struct inpcb *inp = tp->t_inpcb, *oinp;
+ struct socket *so = inp->inp_socket;
+ struct tcpcb *otp;
+ struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *);
+ struct sockaddr_in *ifaddr;
+ int error;
+ struct rmxp_tao *taop;
+ struct rmxp_tao tao_noncached;
+
+ if (inp->inp_lport == 0) {
+ error = in_pcbbind(inp, NULL);
+ if (error)
+ return error;
+ }
+
+ /*
+ * Cannot simply call in_pcbconnect, because there might be an
+ * earlier incarnation of this same connection still in
+ * TIME_WAIT state, creating an ADDRINUSE error.
+ */
+ error = in_pcbladdr(inp, nam, &ifaddr);
+ if (error)
+ return error;
+ oinp = in_pcblookuphash(inp->inp_pcbinfo,
+ sin->sin_addr, sin->sin_port,
+ inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr
+ : ifaddr->sin_addr,
+ inp->inp_lport, 0);
+ if (oinp) {
+ if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
+ otp->t_state == TCPS_TIME_WAIT &&
+ otp->t_duration < TCPTV_MSL &&
+ (otp->t_flags & TF_RCVD_CC))
+ otp = tcp_close(otp);
+ else
+ return EADDRINUSE;
+ }
+ if (inp->inp_laddr.s_addr == INADDR_ANY)
+ inp->inp_laddr = ifaddr->sin_addr;
+ inp->inp_faddr = sin->sin_addr;
+ inp->inp_fport = sin->sin_port;
+ in_pcbrehash(inp);
+
+ tp->t_template = tcp_template(tp);
+ if (tp->t_template == 0) {
+ in_pcbdisconnect(inp);
+ return ENOBUFS;
+ }
+
+ /* Compute window scaling to request. */
+ while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
+ (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
+ tp->request_r_scale++;
+
+ soisconnecting(so);
+ tcpstat.tcps_connattempt++;
+ tp->t_state = TCPS_SYN_SENT;
+ tp->t_timer[TCPT_KEEP] = tcp_keepinit;
+ tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2;
+ tcp_sendseqinit(tp);
+
+ /*
+ * Generate a CC value for this connection and
+ * check whether CC or CCnew should be used.
+ */
+ if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) {
+ taop = &tao_noncached;
+ bzero(taop, sizeof(*taop));
+ }
+
+ tp->cc_send = CC_INC(tcp_ccgen);
+ if (taop->tao_ccsent != 0 &&
+ CC_GEQ(tp->cc_send, taop->tao_ccsent)) {
+ taop->tao_ccsent = tp->cc_send;
+ } else {
+ taop->tao_ccsent = 0;
+ tp->t_flags |= TF_SENDCCNEW;
+ }
+
+ return 0;
+}
+
+int
+tcp_ctloutput(int op, struct socket *so, int level, int optname,
+ struct mbuf **mp)
+{
+ int error = 0, s;
+ struct inpcb *inp;
+ register struct tcpcb *tp;
+ register struct mbuf *m;
+ register int i;
+
+ s = splnet();
+ inp = sotoinpcb(so);
+ if (inp == NULL) {
+ splx(s);
+ if (op == PRCO_SETOPT && *mp)
+ (void) m_free(*mp);
+ return (ECONNRESET);
+ }
+ if (level != IPPROTO_TCP) {
+ error = ip_ctloutput(op, so, level, optname, mp);
+ splx(s);
+ return (error);
+ }
+ tp = intotcpcb(inp);
+
+ switch (op) {
+
+ case PRCO_SETOPT:
+ m = *mp;
+ switch (optname) {
+
+ case TCP_NODELAY:
+ if (m == NULL || m->m_len < sizeof (int))
+ error = EINVAL;
+ else if (*mtod(m, int *))
+ tp->t_flags |= TF_NODELAY;
+ else
+ tp->t_flags &= ~TF_NODELAY;
+ break;
+
+ case TCP_MAXSEG:
+ if (m && (i = *mtod(m, int *)) > 0 && i <= tp->t_maxseg)
+ tp->t_maxseg = i;
+ else
+ error = EINVAL;
+ break;
+
+ case TCP_NOOPT:
+ if (m == NULL || m->m_len < sizeof (int))
+ error = EINVAL;
+ else if (*mtod(m, int *))
+ tp->t_flags |= TF_NOOPT;
+ else
+ tp->t_flags &= ~TF_NOOPT;
+ break;
+
+ case TCP_NOPUSH:
+ if (m == NULL || m->m_len < sizeof (int))
+ error = EINVAL;
+ else if (*mtod(m, int *))
+ tp->t_flags |= TF_NOPUSH;
+ else
+ tp->t_flags &= ~TF_NOPUSH;
+ break;
+
+ default:
+ error = ENOPROTOOPT;
+ break;
+ }
+ if (m)
+ (void) m_free(m);
+ break;
+
+ case PRCO_GETOPT:
+ *mp = m = m_get(M_WAIT, MT_SOOPTS);
+ m->m_len = sizeof(int);
+
+ switch (optname) {
+ case TCP_NODELAY:
+ *mtod(m, int *) = tp->t_flags & TF_NODELAY;
+ break;
+ case TCP_MAXSEG:
+ *mtod(m, int *) = tp->t_maxseg;
+ break;
+ case TCP_NOOPT:
+ *mtod(m, int *) = tp->t_flags & TF_NOOPT;
+ break;
+ case TCP_NOPUSH:
+ *mtod(m, int *) = tp->t_flags & TF_NOPUSH;
+ break;
+ default:
+ error = ENOPROTOOPT;
+ break;
+ }
+ break;
+ }
+ splx(s);
+ return (error);
+}
+
+/*
+ * tcp_sendspace and tcp_recvspace are the default send and receive window
+ * sizes, respectively. These are obsolescent (this information should
+ * be set by the route).
+ */
+u_long tcp_sendspace = 1024*16;
+SYSCTL_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace,
+ CTLFLAG_RW, &tcp_sendspace , 0, "");
+u_long tcp_recvspace = 1024*16;
+SYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace,
+ CTLFLAG_RW, &tcp_recvspace , 0, "");
+
+#if defined(__rtems__)
+void rtems_set_tcp_buffer_sizes(u_long sendspace, u_long recvspace)
+{
+ if ( sendspace != 0 )
+ tcp_sendspace = sendspace;
+ if ( recvspace != 0 )
+ tcp_recvspace = recvspace;
+}
+#endif
+
+/*
+ * Attach TCP protocol to socket, allocating
+ * internet protocol control block, tcp control block,
+ * bufer space, and entering LISTEN state if to accept connections.
+ */
+static int
+tcp_attach(struct socket *so)
+{
+ register struct tcpcb *tp;
+ struct inpcb *inp;
+ int error;
+
+ if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
+ error = soreserve(so, tcp_sendspace, tcp_recvspace);
+ if (error)
+ return (error);
+ }
+ error = in_pcballoc(so, &tcbinfo);
+ if (error)
+ return (error);
+ inp = sotoinpcb(so);
+ tp = tcp_newtcpcb(inp);
+ if (tp == 0) {
+ int nofd = so->so_state & SS_NOFDREF; /* XXX */
+
+ so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */
+ in_pcbdetach(inp);
+ so->so_state |= nofd;
+ return (ENOBUFS);
+ }
+ tp->t_state = TCPS_CLOSED;
+ return (0);
+}
+
+/*
+ * Initiate (or continue) disconnect.
+ * If embryonic state, just send reset (once).
+ * If in ``let data drain'' option and linger null, just drop.
+ * Otherwise (hard), mark socket disconnecting and drop
+ * current input data; switch states based on user close, and
+ * send segment to peer (with FIN).
+ */
+static struct tcpcb *
+tcp_disconnect(struct tcpcb *tp)
+{
+ struct socket *so = tp->t_inpcb->inp_socket;
+
+ if (tp->t_state < TCPS_ESTABLISHED)
+ tp = tcp_close(tp);
+ else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
+ tp = tcp_drop(tp, 0);
+ else {
+ soisdisconnecting(so);
+ sbflush(&so->so_rcv);
+ tp = tcp_usrclosed(tp);
+ if (tp)
+ (void) tcp_output(tp);
+ }
+ return (tp);
+}
+
+/*
+ * User issued close, and wish to trail through shutdown states:
+ * if never received SYN, just forget it. If got a SYN from peer,
+ * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
+ * If already got a FIN from peer, then almost done; go to LAST_ACK
+ * state. In all other cases, have already sent FIN to peer (e.g.
+ * after PRU_SHUTDOWN), and just have to play tedious game waiting
+ * for peer to send FIN or not respond to keep-alives, etc.
+ * We can let the user exit from the close as soon as the FIN is acked.
+ */
+static struct tcpcb *
+tcp_usrclosed(struct tcpcb *tp)
+{
+
+ switch (tp->t_state) {
+
+ case TCPS_CLOSED:
+ case TCPS_LISTEN:
+ tp->t_state = TCPS_CLOSED;
+ tp = tcp_close(tp);
+ break;
+
+ case TCPS_SYN_SENT:
+ case TCPS_SYN_RECEIVED:
+ tp->t_flags |= TF_NEEDFIN;
+ break;
+
+ case TCPS_ESTABLISHED:
+ tp->t_state = TCPS_FIN_WAIT_1;
+ break;
+
+ case TCPS_CLOSE_WAIT:
+ tp->t_state = TCPS_LAST_ACK;
+ break;
+ }
+ if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
+ soisdisconnected(tp->t_inpcb->inp_socket);
+ /* To prevent the connection hanging in FIN_WAIT_2 forever. */
+ if (tp->t_state == TCPS_FIN_WAIT_2)
+ tp->t_timer[TCPT_2MSL] = tcp_maxidle;
+ }
+ return (tp);
+}
diff --git a/netinet/tcp_var.h b/netinet/tcp_var.h
new file mode 100644
index 0000000..cd70970
--- /dev/null
+++ b/netinet/tcp_var.h
@@ -0,0 +1,414 @@
+/*
+ * Copyright (c) 1982, 1986, 1993, 1994, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_var.h 8.4 (Berkeley) 5/24/95
+ * $FreeBSD: src/sys/netinet/tcp_var.h,v 1.121 2005/04/21 20:11:01 ps Exp $
+ */
+
+#ifndef _NETINET_TCP_VAR_H_
+#define _NETINET_TCP_VAR_H_
+
+#include <netinet/tcp.h>
+
+/*
+ * Kernel variables for tcp.
+ */
+
+#ifdef __BSD_VISIBLE
+#include <netinet/tcp_timer.h> /* TCPT_NTIMERS */
+
+/*
+ * Tcp control block, one per tcp; fields:
+ */
+struct tcpcb {
+ struct tcpiphdr *seg_next; /* sequencing queue */
+ struct tcpiphdr *seg_prev;
+ int t_state; /* state of this connection */
+ u_int t_flags;
+#define TF_ACKNOW 0x000001 /* ack peer immediately */
+#define TF_DELACK 0x000002 /* ack, but try to delay it */
+#define TF_NODELAY 0x000004 /* don't delay packets to coalesce */
+#define TF_NOOPT 0x000008 /* don't use tcp options */
+#define TF_SENTFIN 0x000010 /* have sent FIN */
+#define TF_REQ_SCALE 0x000020 /* have/will request window scaling */
+#define TF_RCVD_SCALE 0x000040 /* other side has requested scaling */
+#define TF_REQ_TSTMP 0x000080 /* have/will request timestamps */
+#define TF_RCVD_TSTMP 0x000100 /* a timestamp was received in SYN */
+#define TF_SACK_PERMIT 0x000200 /* other side said I could SACK */
+#define TF_NEEDSYN 0x000400 /* send SYN (implicit state) */
+#define TF_NEEDFIN 0x000800 /* send FIN (implicit state) */
+#define TF_NOPUSH 0x001000 /* don't push */
+#define TF_REQ_CC 0x002000 /* have/will request CC */
+#define TF_RCVD_CC 0x004000 /* a CC was received in SYN */
+#define TF_SENDCCNEW 0x008000 /* send CCnew instead of CC in SYN */
+#define TF_MORETOCOME 0x010000 /* More data to be appended to sock */
+#define TF_LQ_OVERFLOW 0x020000 /* listen queue overflow */
+#define TF_LASTIDLE 0x040000 /* connection was previously idle */
+#define TF_RXWIN0SENT 0x080000 /* sent a receiver win 0 in response */
+#define TF_FASTRECOVERY 0x100000 /* in NewReno Fast Recovery */
+#define TF_WASFRECOVERY 0x200000 /* was in NewReno Fast Recovery */
+#define TF_SIGNATURE 0x400000 /* require MD5 digests (RFC2385) */
+ int t_force; /* 1 if forcing out a byte */
+ int t_timer[TCPT_NTIMERS]; /* tcp timers */
+ int t_rxtshift; /* log(2) of rexmt exp. backoff */
+ int t_rxtcur; /* current retransmit value */
+ int t_dupacks; /* consecutive dup acks recd */
+ u_int t_maxseg; /* maximum segment size */
+ u_int t_maxopd; /* mss plus options */
+ struct tcpiphdr *t_template; /* skeletal packet for transmit */
+ struct inpcb *t_inpcb; /* back pointer to internet pcb */
+/*
+ * The following fields are used as in the protocol specification.
+ * See RFC783, Dec. 1981, page 21.
+ */
+/* send sequence variables */
+ tcp_seq snd_una; /* send unacknowledged */
+ tcp_seq snd_max; /* highest sequence number sent;
+ * used to recognize retransmits
+ */
+ tcp_seq snd_nxt; /* send next */
+ tcp_seq snd_up; /* send urgent pointer */
+
+ tcp_seq snd_wl1; /* window update seg seq number */
+ tcp_seq snd_wl2; /* window update seg ack number */
+ tcp_seq iss; /* initial send sequence number */
+ tcp_seq irs; /* initial receive sequence number */
+
+ tcp_seq rcv_nxt; /* receive next */
+ tcp_seq rcv_adv; /* advertised window */
+ u_long rcv_wnd; /* receive window */
+ tcp_seq rcv_up; /* receive urgent pointer */
+
+ u_long snd_wnd; /* send window */
+/*
+ * Additional variables for this implementation.
+ */
+/* congestion control (for slow start, source quench, retransmit after loss) */
+ u_long snd_cwnd; /* congestion-controlled window */
+ u_long snd_ssthresh; /* snd_cwnd size threshold for
+ * for slow start exponential to
+ * linear switch
+ */
+/*
+ * transmit timing stuff. See below for scale of srtt and rttvar.
+ * "Variance" is actually smoothed difference.
+ */
+ u_int t_idle; /* inactivity time */
+ int t_rtt; /* round trip time */
+ tcp_seq t_rtseq; /* sequence number being timed */
+ int t_srtt; /* smoothed round-trip time */
+ int t_rttvar; /* variance in round-trip time */
+ u_int t_rttmin; /* minimum rtt allowed */
+ u_long max_sndwnd; /* largest window peer has offered */
+
+ int t_softerror; /* possible error not yet reported */
+/* out-of-band data */
+ char t_oobflags; /* have some */
+ char t_iobc; /* input character */
+#define TCPOOB_HAVEDATA 0x01
+#define TCPOOB_HADDATA 0x02
+/* RFC 1323 variables */
+ u_char snd_scale; /* window scaling for send window */
+ u_char rcv_scale; /* window scaling for recv window */
+ u_char request_r_scale; /* pending window scaling */
+ u_char requested_s_scale;
+ u_long ts_recent; /* timestamp echo data */
+
+ u_long ts_recent_age; /* when last updated */
+ tcp_seq last_ack_sent;
+/* RFC 1644 variables */
+ tcp_cc cc_send; /* send connection count */
+ tcp_cc cc_recv; /* receive connection count */
+ u_long t_duration; /* connection duration */
+
+/* TUBA stuff */
+ caddr_t t_tuba_pcb; /* next level down pcb for TCP over z */
+/* More RTT stuff */
+ u_long t_rttupdated; /* number of times rtt sampled */
+};
+
+/*
+ * Structure to hold TCP options that are only used during segment
+ * processing (in tcp_input), but not held in the tcpcb.
+ * It's basically used to reduce the number of parameters
+ * to tcp_dooptions.
+ */
+struct tcpopt {
+ u_long to_flags; /* which options are present */
+#define TOF_TS 0x0001 /* timestamp */
+#define TOF_CC 0x0002 /* CC and CCnew are exclusive */
+#define TOF_CCNEW 0x0004
+#define TOF_CCECHO 0x0008
+#define TOF_MSS 0x0010
+#define TOF_SCALE 0x0020
+#define TOF_SIGNATURE 0x0040 /* signature option present */
+#define TOF_SIGLEN 0x0080 /* signature length valid (RFC2385) */
+#define TOF_SACK 0x0100 /* Peer sent SACK option */
+ u_int32_t to_tsval;
+ u_int32_t to_tsecr;
+ tcp_cc to_cc; /* holds CC or CCnew */
+ tcp_cc to_ccecho;
+};
+
+/*
+ * The TAO cache entry which is stored in the protocol family specific
+ * portion of the route metrics.
+ */
+struct rmxp_tao {
+ tcp_cc tao_cc; /* latest CC in valid SYN */
+ tcp_cc tao_ccsent; /* latest CC sent to peer */
+ u_short tao_mssopt; /* peer's cached MSS */
+};
+#define rmx_taop(r) ((struct rmxp_tao *)(r).rmx_filler)
+
+#define intotcpcb(ip) ((struct tcpcb *)(ip)->inp_ppcb)
+#define intotw(ip) ((struct tcptw *)(ip)->inp_ppcb)
+#define sototcpcb(so) (intotcpcb(sotoinpcb(so)))
+#endif /* __BSD_VISIBLE */
+
+/*
+ * The smoothed round-trip time and estimated variance
+ * are stored as fixed point numbers scaled by the values below.
+ * For convenience, these scales are also used in smoothing the average
+ * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed).
+ * With these scales, srtt has 3 bits to the right of the binary point,
+ * and thus an "ALPHA" of 0.875. rttvar has 2 bits to the right of the
+ * binary point, and is smoothed with an ALPHA of 0.75.
+ */
+#define TCP_RTT_SCALE 32 /* multiplier for srtt; 3 bits frac. */
+#define TCP_RTT_SHIFT 5 /* shift for srtt; 3 bits frac. */
+#define TCP_RTTVAR_SCALE 16 /* multiplier for rttvar; 2 bits */
+#define TCP_RTTVAR_SHIFT 4 /* shift for rttvar; 2 bits */
+#define TCP_DELTA_SHIFT 2 /* see tcp_input.c */
+
+/*
+ * The initial retransmission should happen at rtt + 4 * rttvar.
+ * Because of the way we do the smoothing, srtt and rttvar
+ * will each average +1/2 tick of bias. When we compute
+ * the retransmit timer, we want 1/2 tick of rounding and
+ * 1 extra tick because of +-1/2 tick uncertainty in the
+ * firing of the timer. The bias will give us exactly the
+ * 1.5 tick we need. But, because the bias is
+ * statistical, we have to test that we don't drop below
+ * the minimum feasible timer (which is 2 ticks).
+ * This version of the macro adapted from a paper by Lawrence
+ * Brakmo and Larry Peterson which outlines a problem caused
+ * by insufficient precision in the original implementation,
+ * which results in inappropriately large RTO values for very
+ * fast networks.
+ */
+#define TCP_REXMTVAL(tp) \
+ ((((tp)->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT)) \
+ + (tp)->t_rttvar) >> TCP_DELTA_SHIFT)
+
+/* XXX
+ * We want to avoid doing m_pullup on incoming packets but that
+ * means avoiding dtom on the tcp reassembly code. That in turn means
+ * keeping an mbuf pointer in the reassembly queue (since we might
+ * have a cluster). As a quick hack, the source & destination
+ * port numbers (which are no longer needed once we've located the
+ * tcpcb) are overlayed with an mbuf pointer.
+ */
+#if (defined(__GNUC__) && (defined(__arm__) || defined(__mips__)))
+#define STR32_UNALGN(ti,m) \
+ (ti)->ti_sport = (unsigned short)(((unsigned int) m & 0xffff0000) >> 16); \
+ (ti)->ti_dport = (unsigned short) ((unsigned int) m & 0x0000ffff);
+#define LD32_UNALGN(ti,m) \
+ m = (struct mbuf *)((((unsigned int) (ti)->ti_sport) << 16) | ( (unsigned int)(ti)->ti_dport));
+
+#else
+#define REASS_MBUF(ti) (*(struct mbuf **)&((ti)->ti_t))
+#endif
+
+/*
+ * TCP statistics.
+ * Many of these should be kept per connection,
+ * but that's inconvenient at the moment.
+ */
+struct tcpstat {
+ u_long tcps_connattempt; /* connections initiated */
+ u_long tcps_accepts; /* connections accepted */
+ u_long tcps_connects; /* connections established */
+ u_long tcps_drops; /* connections dropped */
+ u_long tcps_conndrops; /* embryonic connections dropped */
+ u_long tcps_closed; /* conn. closed (includes drops) */
+ u_long tcps_segstimed; /* segs where we tried to get rtt */
+ u_long tcps_rttupdated; /* times we succeeded */
+ u_long tcps_delack; /* delayed acks sent */
+ u_long tcps_timeoutdrop; /* conn. dropped in rxmt timeout */
+ u_long tcps_rexmttimeo; /* retransmit timeouts */
+ u_long tcps_persisttimeo; /* persist timeouts */
+ u_long tcps_keeptimeo; /* keepalive timeouts */
+ u_long tcps_keepprobe; /* keepalive probes sent */
+ u_long tcps_keepdrops; /* connections dropped in keepalive */
+
+ u_long tcps_sndtotal; /* total packets sent */
+ u_long tcps_sndpack; /* data packets sent */
+ u_long tcps_sndbyte; /* data bytes sent */
+ u_long tcps_sndrexmitpack; /* data packets retransmitted */
+ u_long tcps_sndrexmitbyte; /* data bytes retransmitted */
+ u_long tcps_sndacks; /* ack-only packets sent */
+ u_long tcps_sndprobe; /* window probes sent */
+ u_long tcps_sndurg; /* packets sent with URG only */
+ u_long tcps_sndwinup; /* window update-only packets sent */
+ u_long tcps_sndctrl; /* control (SYN|FIN|RST) packets sent */
+
+ u_long tcps_rcvtotal; /* total packets received */
+ u_long tcps_rcvpack; /* packets received in sequence */
+ u_long tcps_rcvbyte; /* bytes received in sequence */
+ u_long tcps_rcvbadsum; /* packets received with ccksum errs */
+ u_long tcps_rcvbadoff; /* packets received with bad offset */
+ u_long tcps_rcvshort; /* packets received too short */
+ u_long tcps_rcvduppack; /* duplicate-only packets received */
+ u_long tcps_rcvdupbyte; /* duplicate-only bytes received */
+ u_long tcps_rcvpartduppack; /* packets with some duplicate data */
+ u_long tcps_rcvpartdupbyte; /* dup. bytes in part-dup. packets */
+ u_long tcps_rcvoopack; /* out-of-order packets received */
+ u_long tcps_rcvoobyte; /* out-of-order bytes received */
+ u_long tcps_rcvpackafterwin; /* packets with data after window */
+ u_long tcps_rcvbyteafterwin; /* bytes rcvd after window */
+ u_long tcps_rcvafterclose; /* packets rcvd after "close" */
+ u_long tcps_rcvwinprobe; /* rcvd window probe packets */
+ u_long tcps_rcvdupack; /* rcvd duplicate acks */
+ u_long tcps_rcvacktoomuch; /* rcvd acks for unsent data */
+ u_long tcps_rcvackpack; /* rcvd ack packets */
+ u_long tcps_rcvackbyte; /* bytes acked by rcvd acks */
+ u_long tcps_rcvwinupd; /* rcvd window update packets */
+ u_long tcps_pawsdrop; /* segments dropped due to PAWS */
+ u_long tcps_predack; /* times hdr predict ok for acks */
+ u_long tcps_preddat; /* times hdr predict ok for data pkts */
+ u_long tcps_pcbcachemiss;
+ u_long tcps_cachedrtt; /* times cached RTT in route updated */
+ u_long tcps_cachedrttvar; /* times cached rttvar updated */
+ u_long tcps_cachedssthresh; /* times cached ssthresh updated */
+ u_long tcps_usedrtt; /* times RTT initialized from route */
+ u_long tcps_usedrttvar; /* times RTTVAR initialized from rt */
+ u_long tcps_usedssthresh; /* times ssthresh initialized from rt*/
+ u_long tcps_persistdrop; /* timeout in persist state */
+ u_long tcps_badsyn; /* bogus SYN, e.g. premature ACK */
+ u_long tcps_mturesent; /* resends due to MTU discovery */
+ u_long tcps_listendrop; /* listen queue overflows */
+};
+
+/*
+ * TCB structure exported to user-land via sysctl(3).
+ * Evil hack: declare only if in_pcb.h and sys/socketvar.h have been
+ * included. Not all of our clients do.
+ */
+#if defined(_NETINET_IN_PCB_H_) && defined(_SYS_SOCKETVAR_H_)
+struct xtcpcb {
+ size_t xt_len;
+ struct inpcb xt_inp;
+ struct tcpcb xt_tp;
+#if 0
+ struct xsocket xt_socket;
+ u_quad_t xt_alignment_hack;
+#endif
+};
+#endif
+
+/*
+ * Names for TCP sysctl objects
+ */
+#define TCPCTL_DO_RFC1323 1 /* use RFC-1323 extensions */
+#define TCPCTL_DO_RFC1644 2 /* use RFC-1644 extensions */
+#define TCPCTL_MSSDFLT 3 /* MSS default */
+#define TCPCTL_STATS 4 /* statistics (read-only) */
+#define TCPCTL_RTTDFLT 5 /* default RTT estimate */
+#define TCPCTL_KEEPIDLE 6 /* keepalive idle timer */
+#define TCPCTL_KEEPINTVL 7 /* interval to send keepalives */
+#define TCPCTL_SENDSPACE 8 /* send buffer space */
+#define TCPCTL_RECVSPACE 9 /* receive buffer space */
+#define TCPCTL_KEEPINIT 10 /* timeout for establishing syn */
+#define TCPCTL_PCBLIST 11 /* list of all outstanding PCBs */
+#define TCPCTL_MAXID 12
+
+#define TCPCTL_NAMES { \
+ { 0, 0 }, \
+ { "rfc1323", CTLTYPE_INT }, \
+ { "rfc1644", CTLTYPE_INT }, \
+ { "mssdflt", CTLTYPE_INT }, \
+ { "stats", CTLTYPE_STRUCT }, \
+ { "rttdflt", CTLTYPE_INT }, \
+ { "keepidle", CTLTYPE_INT }, \
+ { "keepintvl", CTLTYPE_INT }, \
+ { "sendspace", CTLTYPE_INT }, \
+ { "recvspace", CTLTYPE_INT }, \
+ { "keepinit", CTLTYPE_INT }, \
+}
+
+#ifdef _KERNEL
+#ifdef SYSCTL_DECL
+SYSCTL_DECL(_net_inet_tcp);
+#endif
+
+extern struct inpcbhead tcb; /* head of queue of active tcpcb's */
+extern struct inpcbinfo tcbinfo;
+extern struct tcpstat tcpstat; /* tcp statistics */
+extern int tcp_mssdflt; /* XXX */
+extern u_long tcp_now; /* for RFC 1323 timestamps */
+
+void tcp_canceltimers(struct tcpcb *);
+struct tcpcb *
+ tcp_close(struct tcpcb *);
+void tcp_ctlinput(int, struct sockaddr *, void *);
+int tcp_ctloutput(int, struct socket *, int, int, struct mbuf **);
+struct tcpcb *
+ tcp_drop(struct tcpcb *, int);
+void tcp_drain(void);
+void tcp_fasttimo(void);
+struct rmxp_tao *
+ tcp_gettaocache(struct inpcb *);
+void tcp_init(void);
+void tcp_input(struct mbuf *, int);
+void tcp_mss(struct tcpcb *, int);
+int tcp_mssopt(struct tcpcb *);
+void tcp_mtudisc(struct inpcb *, int);
+struct tcpcb *
+ tcp_newtcpcb(struct inpcb *);
+int tcp_output(struct tcpcb *);
+void tcp_quench(struct inpcb *, int);
+void tcp_respond(struct tcpcb *,
+ struct tcpiphdr *, struct mbuf *, tcp_seq, tcp_seq, int);
+struct rtentry *
+ tcp_rtlookup(struct inpcb *);
+void tcp_setpersist(struct tcpcb *);
+void tcp_slowtimo(void);
+struct tcpiphdr *
+ tcp_template(struct tcpcb *);
+struct tcpcb *
+ tcp_timers(struct tcpcb *, int);
+void tcp_trace(short, short, struct tcpcb *, struct tcpiphdr *, int);
+
+extern struct pr_usrreqs tcp_usrreqs;
+extern u_long tcp_sendspace;
+extern u_long tcp_recvspace;
+
+#endif /* _KERNEL */
+
+#endif /* _NETINET_TCP_VAR_H_ */
diff --git a/netinet/tcpip.h b/netinet/tcpip.h
new file mode 100644
index 0000000..1f16bd0
--- /dev/null
+++ b/netinet/tcpip.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcpip.h 8.1 (Berkeley) 6/10/93
+ * $FreeBSD: src/sys/netinet/tcpip.h,v 1.12.22.1.4.1 2010/06/14 02:09:06 kensmith Exp $
+ */
+
+
+#ifndef _NETINET_TCPIP_H_
+#define _NETINET_TCPIP_H_
+
+#ifdef __BSD_VISIBLE
+#include <netinet/tcp.h> /* struct tcphdr */
+#include <netinet/ip_var.h> /* struct ipovly */
+
+/*
+ * Tcp+ip header, after ip options removed.
+ */
+struct tcpiphdr {
+ struct ipovly ti_i; /* overlaid ip structure */
+ struct tcphdr ti_t; /* tcp header */
+};
+#define ti_next ti_i.ih_next
+#define ti_prev ti_i.ih_prev
+#define ti_x1 ti_i.ih_x1
+#define ti_pr ti_i.ih_pr
+#define ti_len ti_i.ih_len
+#define ti_src ti_i.ih_src
+#define ti_dst ti_i.ih_dst
+#define ti_sport ti_t.th_sport
+#define ti_dport ti_t.th_dport
+#define ti_seq ti_t.th_seq
+#define ti_ack ti_t.th_ack
+#define ti_x2 ti_t.th_x2
+#define ti_off ti_t.th_off
+#define ti_flags ti_t.th_flags
+#define ti_win ti_t.th_win
+#define ti_sum ti_t.th_sum
+#define ti_urp ti_t.th_urp
+#endif /* __BSD_VISIBLE */
+
+#endif
diff --git a/netinet/udp.h b/netinet/udp.h
new file mode 100644
index 0000000..98fc433
--- /dev/null
+++ b/netinet/udp.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)udp.h 8.1 (Berkeley) 6/10/93
+ */
+
+#ifndef _NETINET_UDP_H_
+#define _NETINET_UDP_H_
+
+/*
+ * Udp protocol header.
+ * Per RFC 768, September, 1981.
+ */
+struct udphdr {
+ u_short uh_sport; /* source port */
+ u_short uh_dport; /* destination port */
+ u_short uh_ulen; /* udp length */
+ u_short uh_sum; /* udp checksum */
+};
+
+#endif
diff --git a/netinet/udp_usrreq.c b/netinet/udp_usrreq.c
new file mode 100644
index 0000000..e4ae5c0
--- /dev/null
+++ b/netinet/udp_usrreq.c
@@ -0,0 +1,736 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95
+ * $FreeBSD: src/sys/netinet/udp_usrreq.c,v 1.170 2004/11/08 14:44:53 phk Exp $
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <rtems/rtems_netinet_in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+
+/*
+ * UDP protocol implementation.
+ * Per RFC 768, August, 1980.
+ */
+#ifndef COMPAT_42
+static int udpcksum = 1;
+#else
+static int udpcksum = 0; /* XXX */
+#endif
+SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW,
+ &udpcksum, 0, "");
+
+static int log_in_vain = 0;
+SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW,
+ &log_in_vain, 0, "");
+
+struct inpcbhead udb; /* from udp_var.h */
+struct inpcbinfo udbinfo;
+
+#ifndef UDBHASHSIZE
+#define UDBHASHSIZE 64
+#endif
+
+ struct udpstat udpstat; /* from udp_var.h */
+SYSCTL_STRUCT(_net_inet_udp, UDPCTL_STATS, stats, CTLFLAG_RD,
+ &udpstat, udpstat, "");
+
+static struct sockaddr_in udp_in = { sizeof(udp_in), AF_INET, 0, {0}, {0} };
+
+static void udp_detach(struct inpcb *);
+static int udp_output(struct inpcb *, struct mbuf *, struct mbuf *,
+ struct mbuf *);
+static void udp_notify(struct inpcb *, int);
+
+void
+udp_init(void)
+{
+ LIST_INIT(&udb);
+ udbinfo.listhead = &udb;
+ udbinfo.hashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.hashmask);
+}
+
+void
+udp_input(struct mbuf *m, int iphlen)
+{
+ register struct ip *ip;
+ register struct udphdr *uh;
+ register struct inpcb *inp;
+ struct mbuf *opts = 0;
+ int len;
+ struct ip save_ip;
+
+ udpstat.udps_ipackets++;
+
+ /*
+ * Strip IP options, if any; should skip this,
+ * make available to user, and use on returned packets,
+ * but we don't yet have a way to check the checksum
+ * with options still present.
+ */
+ if (iphlen > sizeof (struct ip)) {
+ ip_stripoptions(m, (struct mbuf *)0);
+ iphlen = sizeof(struct ip);
+ }
+
+ /*
+ * Get IP and UDP header together in first mbuf.
+ */
+ ip = mtod(m, struct ip *);
+ if (m->m_len < iphlen + sizeof(struct udphdr)) {
+ if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == 0) {
+ udpstat.udps_hdrops++;
+ return;
+ }
+ ip = mtod(m, struct ip *);
+ }
+ uh = (struct udphdr *)((caddr_t)ip + iphlen);
+
+ /*
+ * Make mbuf data length reflect UDP length.
+ * If not enough data to reflect UDP length, drop.
+ */
+ len = ntohs((u_short)uh->uh_ulen);
+ if (ip->ip_len != len) {
+ if (len > ip->ip_len || len < sizeof(struct udphdr)) {
+ udpstat.udps_badlen++;
+ goto bad;
+ }
+ m_adj(m, len - ip->ip_len);
+ /* ip->ip_len = len; */
+ }
+ /*
+ * Save a copy of the IP header in case we want restore it
+ * for sending an ICMP error message in response.
+ */
+ save_ip = *ip;
+
+ /*
+ * Checksum extended UDP header and data.
+ */
+ if (uh->uh_sum) {
+ ((struct ipovly *)ip)->ih_next = 0;
+ ((struct ipovly *)ip)->ih_prev = 0;
+ ((struct ipovly *)ip)->ih_x1 = 0;
+ ((struct ipovly *)ip)->ih_len = uh->uh_ulen;
+ uh->uh_sum = in_cksum(m, len + sizeof (struct ip));
+ if (uh->uh_sum) {
+ udpstat.udps_badsum++;
+ m_freem(m);
+ return;
+ }
+ }
+
+ if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
+ in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) {
+ struct inpcb *last;
+ /*
+ * Deliver a multicast or broadcast datagram to *all* sockets
+ * for which the local and remote addresses and ports match
+ * those of the incoming datagram. This allows more than
+ * one process to receive multi/broadcasts on the same port.
+ * (This really ought to be done for unicast datagrams as
+ * well, but that would cause problems with existing
+ * applications that open both address-specific sockets and
+ * a wildcard socket listening to the same port -- they would
+ * end up receiving duplicates of every unicast datagram.
+ * Those applications open the multiple sockets to overcome an
+ * inadequacy of the UDP socket interface, but for backwards
+ * compatibility we avoid the problem here rather than
+ * fixing the interface. Maybe 4.5BSD will remedy this?)
+ */
+
+ /*
+ * Construct sockaddr format source address.
+ */
+ udp_in.sin_port = uh->uh_sport;
+ udp_in.sin_addr = ip->ip_src;
+ m->m_len -= sizeof (struct udpiphdr);
+ m->m_data += sizeof (struct udpiphdr);
+ /*
+ * Locate pcb(s) for datagram.
+ * (Algorithm copied from raw_intr().)
+ */
+ last = NULL;
+ for (inp = udb.lh_first; inp != NULL; inp = inp->inp_list.le_next) {
+ if (inp->inp_lport != uh->uh_dport)
+ continue;
+ if (inp->inp_laddr.s_addr != INADDR_ANY) {
+ if (inp->inp_laddr.s_addr !=
+ ip->ip_dst.s_addr)
+ continue;
+ }
+ if (inp->inp_faddr.s_addr != INADDR_ANY) {
+ if (inp->inp_faddr.s_addr !=
+ ip->ip_src.s_addr ||
+ inp->inp_fport != uh->uh_sport)
+ continue;
+ }
+
+ if (last != NULL) {
+ struct mbuf *n;
+
+ if ((n = m_copy(m, 0, M_COPYALL)) != NULL) {
+ if (last->inp_flags & INP_CONTROLOPTS
+ || last->inp_socket->so_options & SO_TIMESTAMP)
+ ip_savecontrol(last, &opts, ip, n);
+ if (sbappendaddr(&last->inp_socket->so_rcv,
+ (struct sockaddr *)&udp_in,
+ n, opts) == 0) {
+ m_freem(n);
+ if (opts)
+ m_freem(opts);
+ udpstat.udps_fullsock++;
+ } else
+ sorwakeup(last->inp_socket);
+ opts = 0;
+ }
+ }
+ last = inp;
+ /*
+ * Don't look for additional matches if this one does
+ * not have either the SO_REUSEPORT or SO_REUSEADDR
+ * socket options set. This heuristic avoids searching
+ * through all pcbs in the common case of a non-shared
+ * port. It * assumes that an application will never
+ * clear these options after setting them.
+ */
+ if (((last->inp_socket->so_options&(SO_REUSEPORT|SO_REUSEADDR)) == 0))
+ break;
+ }
+
+ if (last == NULL) {
+ /*
+ * No matching pcb found; discard datagram.
+ * (No need to send an ICMP Port Unreachable
+ * for a broadcast or multicast datgram.)
+ */
+ udpstat.udps_noportbcast++;
+ goto bad;
+ }
+ if (last->inp_flags & INP_CONTROLOPTS
+ || last->inp_socket->so_options & SO_TIMESTAMP)
+ ip_savecontrol(last, &opts, ip, m);
+ if (sbappendaddr(&last->inp_socket->so_rcv,
+ (struct sockaddr *)&udp_in,
+ m, opts) == 0) {
+ udpstat.udps_fullsock++;
+ goto bad;
+ }
+ sorwakeup(last->inp_socket);
+ return;
+ }
+ /*
+ * Locate pcb for datagram.
+ */
+ inp = in_pcblookuphash(&udbinfo, ip->ip_src, uh->uh_sport,
+ ip->ip_dst, uh->uh_dport, 1);
+ if (inp == NULL) {
+ if (log_in_vain) {
+ char buf0[INET_ADDRSTRLEN];
+ char buf1[INET_ADDRSTRLEN];
+
+ log(LOG_INFO, "Connection attempt to UDP %s:%d"
+ " from %s:%d\n",
+ inet_ntoa_r(ip->ip_dst, buf0), ntohs(uh->uh_dport),
+ inet_ntoa_r(ip->ip_src, buf1), ntohs(uh->uh_sport));
+ }
+ udpstat.udps_noport++;
+ if (m->m_flags & (M_BCAST | M_MCAST)) {
+ udpstat.udps_noportbcast++;
+ goto bad;
+ }
+ *ip = save_ip;
+ icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
+ return;
+ }
+
+ /*
+ * Construct sockaddr format source address.
+ * Stuff source address and datagram in user buffer.
+ */
+ udp_in.sin_port = uh->uh_sport;
+ udp_in.sin_addr = ip->ip_src;
+ if (inp->inp_flags & INP_CONTROLOPTS
+ || inp->inp_socket->so_options & SO_TIMESTAMP)
+ ip_savecontrol(inp, &opts, ip, m);
+ iphlen += sizeof(struct udphdr);
+ m->m_len -= iphlen;
+ m->m_pkthdr.len -= iphlen;
+ m->m_data += iphlen;
+ if (sbappendaddr(&inp->inp_socket->so_rcv, (struct sockaddr *)&udp_in,
+ m, opts) == 0) {
+ udpstat.udps_fullsock++;
+ goto bad;
+ }
+ sorwakeup(inp->inp_socket);
+ return;
+bad:
+ m_freem(m);
+ if (opts)
+ m_freem(opts);
+}
+
+/*
+ * Notify a udp user of an asynchronous error;
+ * just wake up so that he can collect error status.
+ */
+static void
+udp_notify(struct inpcb *inp, int errnum)
+{
+ inp->inp_socket->so_error = errnum;
+ sorwakeup(inp->inp_socket);
+ sowwakeup(inp->inp_socket);
+}
+
+void
+udp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
+{
+ register struct ip *ip = vip;
+ register struct udphdr *uh;
+
+ if (!PRC_IS_REDIRECT(cmd) &&
+ ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0))
+ return;
+ if (ip) {
+ uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
+ in_pcbnotify(&udb, sa, uh->uh_dport, ip->ip_src, uh->uh_sport,
+ cmd, udp_notify);
+ } else
+ in_pcbnotify(&udb, sa, 0, zeroin_addr, 0, cmd, udp_notify);
+}
+
+static int
+udp_output(struct inpcb *inp, struct mbuf *m, struct mbuf *addr,
+ struct mbuf *control)
+{
+ register struct udpiphdr *ui;
+ register int len = m->m_pkthdr.len;
+ struct in_addr laddr;
+ int s = 0, error = 0;
+
+ laddr.s_addr = 0;
+ if (control)
+ m_freem(control); /* XXX */
+
+ if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) {
+ error = EMSGSIZE;
+ goto release;
+ }
+
+ if (addr) {
+ laddr = inp->inp_laddr;
+ if (inp->inp_faddr.s_addr != INADDR_ANY) {
+ error = EISCONN;
+ goto release;
+ }
+ /*
+ * Must block input while temporarily connected.
+ */
+ s = splnet();
+ error = in_pcbconnect(inp, addr);
+ if (error) {
+ splx(s);
+ goto release;
+ }
+ } else {
+ if (inp->inp_faddr.s_addr == INADDR_ANY) {
+ error = ENOTCONN;
+ goto release;
+ }
+ }
+ /*
+ * Calculate data length and get a mbuf
+ * for UDP and IP headers.
+ */
+ M_PREPEND(m, sizeof(struct udpiphdr), M_DONTWAIT);
+ if (m == 0) {
+ error = ENOBUFS;
+ if (addr) {
+ in_pcbdisconnect(inp);
+ inp->inp_laddr = laddr;
+ splx(s);
+ }
+ goto release;
+ }
+
+ /*
+ * Fill in mbuf with extended UDP header
+ * and addresses and length put into network format.
+ */
+ ui = mtod(m, struct udpiphdr *);
+ ui->ui_next = ui->ui_prev = 0;
+ ui->ui_x1 = 0;
+ ui->ui_pr = IPPROTO_UDP;
+ ui->ui_len = htons((u_short)len + sizeof (struct udphdr));
+ ui->ui_src = inp->inp_laddr;
+ ui->ui_dst = inp->inp_faddr;
+ ui->ui_sport = inp->inp_lport;
+ ui->ui_dport = inp->inp_fport;
+ ui->ui_ulen = ui->ui_len;
+
+ /*
+ * Stuff checksum and output datagram.
+ */
+ ui->ui_sum = 0;
+ if (udpcksum) {
+ if ((ui->ui_sum = in_cksum(m, sizeof (struct udpiphdr) + len)) == 0)
+ ui->ui_sum = 0xffff;
+ }
+ ((struct ip *)ui)->ip_len = sizeof (struct udpiphdr) + len;
+ ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */
+ ((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */
+ udpstat.udps_opackets++;
+ error = ip_output(m, inp->inp_options, &inp->inp_route,
+ inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST),
+ inp->inp_moptions);
+
+ if (addr) {
+ in_pcbdisconnect(inp);
+ inp->inp_laddr = laddr;
+ splx(s);
+ }
+ return (error);
+
+release:
+ m_freem(m);
+ return (error);
+}
+
+#ifdef __rtems__
+#define INP_INFO_RLOCK(a)
+#define INP_INFO_RUNLOCK(a)
+#define INP_LOCK(a)
+#define INP_UNLOCK(a)
+#endif
+
+static int
+udp_pcblist(SYSCTL_HANDLER_ARGS)
+{
+ int error, i, n, s;
+ struct inpcb *inp, **inp_list;
+ inp_gen_t gencnt;
+ struct xinpgen xig;
+
+ /*
+ * The process of preparing the TCB list is too time-consuming and
+ * resource-intensive to repeat twice on every request.
+ */
+ if (req->oldptr == 0) {
+ n = udbinfo.ipi_count;
+ req->oldidx = 2 * (sizeof xig)
+ + (n + n/8) * sizeof(struct xinpcb);
+ return 0;
+ }
+
+ if (req->newptr != 0)
+ return EPERM;
+
+ /*
+ * OK, now we're committed to doing something.
+ */
+ s = splnet();
+ gencnt = udbinfo.ipi_gencnt;
+ n = udbinfo.ipi_count;
+ splx(s);
+
+ sysctl_wire_old_buffer(req, 2 * (sizeof xig)
+ + n * sizeof(struct xinpcb));
+
+ xig.xig_len = sizeof xig;
+ xig.xig_count = n;
+ xig.xig_gen = gencnt;
+#if 0
+ xig.xig_sogen = so_gencnt;
+#endif
+ error = SYSCTL_OUT(req, &xig, sizeof xig);
+ if (error)
+ return error;
+
+ /* ccj add the exit if count is 0 */
+ if (!n)
+ return error;
+
+ inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
+ if (inp_list == 0)
+ return ENOMEM;
+
+ s = splnet();
+ INP_INFO_RLOCK(&udbinfo);
+ for (inp = LIST_FIRST(udbinfo.listhead), i = 0; inp && i < n;
+ inp = LIST_NEXT(inp, inp_list)) {
+ INP_LOCK(inp);
+ if (inp->inp_gencnt <= gencnt)
+#if 0
+ &&
+ cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0)
+#endif
+ inp_list[i++] = inp;
+ INP_UNLOCK(inp);
+ }
+ INP_INFO_RUNLOCK(&udbinfo);
+ splx(s);
+ n = i;
+
+ error = 0;
+ for (i = 0; i < n; i++) {
+ inp = inp_list[i];
+ INP_LOCK(inp);
+ if (inp->inp_gencnt <= gencnt) {
+ struct xinpcb xi;
+ xi.xi_len = sizeof xi;
+ /* XXX should avoid extra copy */
+ bcopy(inp, &xi.xi_inp, sizeof *inp);
+#if 0
+ if (inp->inp_socket)
+ sotoxsocket(inp->inp_socket, &xi.xi_socket);
+#endif
+ error = SYSCTL_OUT(req, &xi, sizeof xi);
+ }
+ INP_UNLOCK(inp);
+ }
+ if (!error) {
+ /*
+ * Give the user an updated idea of our state.
+ * If the generation differs from what we told
+ * her before, she knows that something happened
+ * while we were processing this request, and it
+ * might be necessary to retry.
+ */
+ s = splnet();
+ INP_INFO_RLOCK(&udbinfo);
+ xig.xig_gen = udbinfo.ipi_gencnt;
+#if 0
+ xig.xig_sogen = so_gencnt;
+#endif
+ xig.xig_count = udbinfo.ipi_count;
+ INP_INFO_RUNLOCK(&udbinfo);
+ splx(s);
+ error = SYSCTL_OUT(req, &xig, sizeof xig);
+ }
+ free(inp_list, M_TEMP);
+ return error;
+}
+
+SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0,
+ udp_pcblist, "S,xinpcb", "List of active UDP sockets");
+
+static u_long udp_sendspace = 9216; /* really max datagram size */
+ /* 40 1K datagrams */
+SYSCTL_INT(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW,
+ &udp_sendspace, 0, "");
+
+static u_long udp_recvspace = 40 * (1024 + sizeof(struct sockaddr_in));
+SYSCTL_INT(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
+ &udp_recvspace, 0, "");
+
+#if defined(__rtems__)
+void rtems_set_udp_buffer_sizes(u_long sendspace, u_long recvspace)
+{
+ if ( sendspace != 0 )
+ udp_sendspace = sendspace;
+ if ( recvspace != 0 )
+ udp_recvspace = recvspace;
+}
+#endif
+
+/*ARGSUSED*/
+int
+udp_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *addr,
+ struct mbuf *control)
+{
+ struct inpcb *inp = sotoinpcb(so);
+ int error = 0;
+ int s;
+
+ if (req == PRU_CONTROL)
+ return (in_control(so, (uintptr_t)m, (caddr_t)addr,
+ (struct ifnet *)control));
+ if (inp == NULL && req != PRU_ATTACH) {
+ error = EINVAL;
+ goto release;
+ }
+ /*
+ * Note: need to block udp_input while changing
+ * the udp pcb queue and/or pcb addresses.
+ */
+ switch (req) {
+
+ case PRU_ATTACH:
+ if (inp != NULL) {
+ error = EINVAL;
+ break;
+ }
+ s = splnet();
+ error = in_pcballoc(so, &udbinfo);
+ splx(s);
+ if (error)
+ break;
+ error = soreserve(so, udp_sendspace, udp_recvspace);
+ if (error)
+ break;
+ ((struct inpcb *) so->so_pcb)->inp_ip_ttl = ip_defttl;
+ break;
+
+ case PRU_DETACH:
+ udp_detach(inp);
+ break;
+
+ case PRU_BIND:
+ s = splnet();
+ error = in_pcbbind(inp, addr);
+ splx(s);
+ break;
+
+ case PRU_LISTEN:
+ error = EOPNOTSUPP;
+ break;
+
+ case PRU_CONNECT:
+ if (inp->inp_faddr.s_addr != INADDR_ANY) {
+ error = EISCONN;
+ break;
+ }
+ s = splnet();
+ error = in_pcbconnect(inp, addr);
+ splx(s);
+ if (error == 0)
+ soisconnected(so);
+ break;
+
+ case PRU_CONNECT2:
+ error = EOPNOTSUPP;
+ break;
+
+ case PRU_ACCEPT:
+ error = EOPNOTSUPP;
+ break;
+
+ case PRU_DISCONNECT:
+ if (inp->inp_faddr.s_addr == INADDR_ANY) {
+ error = ENOTCONN;
+ break;
+ }
+ s = splnet();
+ in_pcbdisconnect(inp);
+ inp->inp_laddr.s_addr = INADDR_ANY;
+ splx(s);
+ so->so_state &= ~SS_ISCONNECTED; /* XXX */
+ break;
+
+ case PRU_SHUTDOWN:
+ socantsendmore(so);
+ break;
+
+ case PRU_SEND:
+ return (udp_output(inp, m, addr, control));
+
+ case PRU_ABORT:
+ soisdisconnected(so);
+ udp_detach(inp);
+ break;
+
+ case PRU_SOCKADDR:
+ in_setsockaddr(inp, addr);
+ break;
+
+ case PRU_PEERADDR:
+ in_setpeeraddr(inp, addr);
+ break;
+
+ case PRU_SENSE:
+ /*
+ * stat: don't bother with a blocksize.
+ */
+ return (0);
+
+ case PRU_SENDOOB:
+ case PRU_FASTTIMO:
+ case PRU_SLOWTIMO:
+ case PRU_PROTORCV:
+ case PRU_PROTOSEND:
+ error = EOPNOTSUPP;
+ break;
+
+ case PRU_RCVD:
+ case PRU_RCVOOB:
+ return (EOPNOTSUPP); /* do not free mbuf's */
+
+ default:
+ panic("udp_usrreq");
+ }
+
+release:
+ if (control) {
+ printf("udp control data unexpectedly retained\n");
+ m_freem(control);
+ }
+ if (m)
+ m_freem(m);
+ return (error);
+}
+
+static void
+udp_detach(struct inpcb *inp)
+{
+ int s = splnet();
+
+ in_pcbdetach(inp);
+ splx(s);
+}
diff --git a/netinet/udp_var.h b/netinet/udp_var.h
new file mode 100644
index 0000000..c06e12d
--- /dev/null
+++ b/netinet/udp_var.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)udp_var.h 8.1 (Berkeley) 6/10/93
+ * $FreeBSD: src/sys/netinet/udp_var.h,v 1.29 2005/01/07 01:45:45 imp Exp $
+ */
+
+
+#ifndef _NETINET_UDP_VAR_H_
+#define _NETINET_UDP_VAR_H_
+
+#include <netinet/ip_var.h> /* struct ipovly */
+#include <netinet/udp.h> /* struct udphdr */
+
+/*
+ * UDP kernel structures and variables.
+ */
+struct udpiphdr {
+ struct ipovly ui_i; /* overlaid ip structure */
+ struct udphdr ui_u; /* udp header */
+};
+#define ui_next ui_i.ih_next
+#define ui_prev ui_i.ih_prev
+#define ui_x1 ui_i.ih_x1
+#define ui_pr ui_i.ih_pr
+#define ui_len ui_i.ih_len
+#define ui_src ui_i.ih_src
+#define ui_dst ui_i.ih_dst
+#define ui_sport ui_u.uh_sport
+#define ui_dport ui_u.uh_dport
+#define ui_ulen ui_u.uh_ulen
+#define ui_sum ui_u.uh_sum
+
+struct udpstat {
+ /* input statistics: */
+ u_long udps_ipackets; /* total input packets */
+ u_long udps_hdrops; /* packet shorter than header */
+ u_long udps_badsum; /* checksum error */
+ u_long udps_badlen; /* data length larger than packet */
+ u_long udps_noport; /* no socket on port */
+ u_long udps_noportbcast; /* of above, arrived as broadcast */
+ u_long udps_fullsock; /* not delivered, input socket full */
+ u_long udpps_pcbcachemiss; /* input packets missing pcb cache */
+ u_long udpps_pcbhashmiss; /* input packets not for hashed pcb */
+ /* output statistics: */
+ u_long udps_opackets; /* total output packets */
+};
+
+/*
+ * Names for UDP sysctl objects
+ */
+#define UDPCTL_CHECKSUM 1 /* checksum UDP packets */
+#define UDPCTL_STATS 2 /* statistics (read-only) */
+#define UDPCTL_MAXDGRAM 3 /* max datagram size */
+#define UDPCTL_RECVSPACE 4 /* default receive buffer space */
+#define UDPCTL_PCBLIST 5 /* list of PCBs for UDP sockets */
+#define UDPCTL_MAXID 6
+
+#define UDPCTL_NAMES { \
+ { 0, 0 }, \
+ { "checksum", CTLTYPE_INT }, \
+ { "stats", CTLTYPE_STRUCT }, \
+ { "maxdgram", CTLTYPE_INT }, \
+ { "recvspace", CTLTYPE_INT }, \
+}
+
+#ifdef _KERNEL
+SYSCTL_DECL(_net_inet_udp);
+
+extern struct inpcbhead udb;
+extern struct inpcbinfo udbinfo;
+extern struct udpstat udpstat;
+
+void udp_ctlinput(int, struct sockaddr *, void *);
+void udp_init(void);
+void udp_input(struct mbuf *, int);
+int udp_usrreq(struct socket *,
+ int, struct mbuf *, struct mbuf *, struct mbuf *);
+#endif
+
+#endif