diff options
author | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2013-11-06 16:20:21 +0100 |
---|---|---|
committer | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2013-11-11 10:08:08 +0100 |
commit | 66659ff1ad6831b0ea7425fa6ecd8a8687523658 (patch) | |
tree | 48e22b475fa8854128e0861a33fed6f78c8094b5 /freebsd/sys/contrib | |
parent | Define __GLOBL1() and __GLOBL() (diff) | |
download | rtems-libbsd-66659ff1ad6831b0ea7425fa6ecd8a8687523658.tar.bz2 |
Update to FreeBSD 9.2
Diffstat (limited to 'freebsd/sys/contrib')
26 files changed, 9204 insertions, 5810 deletions
diff --git a/freebsd/sys/contrib/altq/altq/altq_cbq.c b/freebsd/sys/contrib/altq/altq/altq_cbq.c index 9dde791c..6ebf6551 100644 --- a/freebsd/sys/contrib/altq/altq/altq_cbq.c +++ b/freebsd/sys/contrib/altq/altq/altq_cbq.c @@ -35,12 +35,10 @@ #if defined(__FreeBSD__) || defined(__NetBSD__) #include <rtems/bsd/local/opt_altq.h> -#if (__FreeBSD__ != 2) #include <rtems/bsd/local/opt_inet.h> #ifdef __FreeBSD__ #include <rtems/bsd/local/opt_inet6.h> #endif -#endif #endif /* __FreeBSD__ || __NetBSD__ */ #ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */ @@ -510,14 +508,8 @@ cbq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) /* grab class set by classifier */ if ((m->m_flags & M_PKTHDR) == 0) { /* should not happen */ -#if defined(__NetBSD__) || defined(__OpenBSD__)\ - || (defined(__FreeBSD__) && __FreeBSD_version >= 501113) printf("altq: packet for %s does not have pkthdr\n", ifq->altq_ifp->if_xname); -#else - printf("altq: packet for %s%d does not have pkthdr\n", - ifq->altq_ifp->if_name, ifq->altq_ifp->if_unit); -#endif m_freem(m); return (ENOBUFS); } @@ -1029,13 +1021,7 @@ cbqclose(dev, flag, fmt, p) while (cbq_list) { ifp = cbq_list->ifnp.ifq_->altq_ifp; -#if defined(__NetBSD__) || defined(__OpenBSD__)\ - || (defined(__FreeBSD__) && __FreeBSD_version >= 501113) sprintf(iface.cbq_ifacename, "%s", ifp->if_xname); -#else - sprintf(iface.cbq_ifacename, - "%s%d", ifp->if_name, ifp->if_unit); -#endif err = cbq_ifdetach(&iface); if (err != 0 && error == 0) error = err; diff --git a/freebsd/sys/contrib/altq/altq/altq_cdnr.c b/freebsd/sys/contrib/altq/altq/altq_cdnr.c index cc37e0b8..ce517318 100644 --- a/freebsd/sys/contrib/altq/altq/altq_cdnr.c +++ b/freebsd/sys/contrib/altq/altq/altq_cdnr.c @@ -1,7 +1,7 @@ #include <machine/rtems-bsd-kernel-space.h> /* $FreeBSD$ */ -/* $KAME: altq_cdnr.c,v 1.14 2003/09/05 22:40:36 itojun Exp $ */ +/* $KAME: altq_cdnr.c,v 1.15 2005/04/13 03:44:24 suz Exp $ */ /* * Copyright (C) 1999-2002 @@ -31,12 +31,10 @@ #if defined(__FreeBSD__) || defined(__NetBSD__) #include <rtems/bsd/local/opt_altq.h> -#if (__FreeBSD__ != 2) #include <rtems/bsd/local/opt_inet.h> #ifdef __FreeBSD__ #include <rtems/bsd/local/opt_inet6.h> #endif -#endif #endif /* __FreeBSD__ || __NetBSD__ */ #include <rtems/bsd/sys/param.h> diff --git a/freebsd/sys/contrib/altq/altq/altq_hfsc.c b/freebsd/sys/contrib/altq/altq/altq_hfsc.c index 41f7c6db..64c9d17c 100644 --- a/freebsd/sys/contrib/altq/altq/altq_hfsc.c +++ b/freebsd/sys/contrib/altq/altq/altq_hfsc.c @@ -46,12 +46,10 @@ #if defined(__FreeBSD__) || defined(__NetBSD__) #include <rtems/bsd/local/opt_altq.h> -#if (__FreeBSD__ != 2) #include <rtems/bsd/local/opt_inet.h> #ifdef __FreeBSD__ #include <rtems/bsd/local/opt_inet6.h> #endif -#endif #endif /* __FreeBSD__ || __NetBSD__ */ #ifdef ALTQ_HFSC /* hfsc is enabled by ALTQ_HFSC option in opt_altq.h */ @@ -700,14 +698,8 @@ hfsc_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) /* grab class set by classifier */ if ((m->m_flags & M_PKTHDR) == 0) { /* should not happen */ -#if defined(__NetBSD__) || defined(__OpenBSD__)\ - || (defined(__FreeBSD__) && __FreeBSD_version >= 501113) printf("altq: packet for %s does not have pkthdr\n", ifq->altq_ifp->if_xname); -#else - printf("altq: packet for %s%d does not have pkthdr\n", - ifq->altq_ifp->if_name, ifq->altq_ifp->if_unit); -#endif m_freem(m); return (ENOBUFS); } diff --git a/freebsd/sys/contrib/altq/altq/altq_priq.c b/freebsd/sys/contrib/altq/altq/altq_priq.c index e0e2522d..0cb47576 100644 --- a/freebsd/sys/contrib/altq/altq/altq_priq.c +++ b/freebsd/sys/contrib/altq/altq/altq_priq.c @@ -33,12 +33,10 @@ #if defined(__FreeBSD__) || defined(__NetBSD__) #include <rtems/bsd/local/opt_altq.h> -#if (__FreeBSD__ != 2) #include <rtems/bsd/local/opt_inet.h> #ifdef __FreeBSD__ #include <rtems/bsd/local/opt_inet6.h> #endif -#endif #endif /* __FreeBSD__ || __NetBSD__ */ #ifdef ALTQ_PRIQ /* priq is enabled by ALTQ_PRIQ option in opt_altq.h */ @@ -471,14 +469,8 @@ priq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) /* grab class set by classifier */ if ((m->m_flags & M_PKTHDR) == 0) { /* should not happen */ -#if defined(__NetBSD__) || defined(__OpenBSD__)\ - || (defined(__FreeBSD__) && __FreeBSD_version >= 501113) printf("altq: packet for %s does not have pkthdr\n", ifq->altq_ifp->if_xname); -#else - printf("altq: packet for %s%d does not have pkthdr\n", - ifq->altq_ifp->if_name, ifq->altq_ifp->if_unit); -#endif m_freem(m); return (ENOBUFS); } diff --git a/freebsd/sys/contrib/altq/altq/altq_red.c b/freebsd/sys/contrib/altq/altq/altq_red.c index 78b8e6fc..7d1ad735 100644 --- a/freebsd/sys/contrib/altq/altq/altq_red.c +++ b/freebsd/sys/contrib/altq/altq/altq_red.c @@ -64,12 +64,10 @@ #if defined(__FreeBSD__) || defined(__NetBSD__) #include <rtems/bsd/local/opt_altq.h> -#if (__FreeBSD__ != 2) #include <rtems/bsd/local/opt_inet.h> #ifdef __FreeBSD__ #include <rtems/bsd/local/opt_inet6.h> #endif -#endif #endif /* __FreeBSD__ || __NetBSD__ */ #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */ @@ -518,11 +516,9 @@ mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags) struct mbuf *m0; struct pf_mtag *at; void *hdr; - int af; at = pf_find_mtag(m); if (at != NULL) { - af = at->af; hdr = at->hdr; #ifdef ALTQ3_COMPAT } else if (pktattr != NULL) { @@ -532,9 +528,6 @@ mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags) } else return (0); - if (af != AF_INET && af != AF_INET6) - return (0); - /* verify that pattr_hdr is within the mbuf data */ for (m0 = m; m0 != NULL; m0 = m0->m_next) if (((caddr_t)hdr >= m0->m_data) && @@ -545,8 +538,8 @@ mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags) return (0); } - switch (af) { - case AF_INET: + switch (((struct ip *)hdr)->ip_v) { + case IPVERSION: if (flags & REDF_ECN4) { struct ip *ip = hdr; u_int8_t otos; @@ -579,7 +572,7 @@ mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags) } break; #ifdef INET6 - case AF_INET6: + case (IPV6_VERSION >> 4): if (flags & REDF_ECN6) { struct ip6_hdr *ip6 = hdr; u_int32_t flowlabel; diff --git a/freebsd/sys/contrib/altq/altq/altq_rio.c b/freebsd/sys/contrib/altq/altq/altq_rio.c index d1fe3f18..c5fb097d 100644 --- a/freebsd/sys/contrib/altq/altq/altq_rio.c +++ b/freebsd/sys/contrib/altq/altq/altq_rio.c @@ -63,12 +63,10 @@ #if defined(__FreeBSD__) || defined(__NetBSD__) #include <rtems/bsd/local/opt_altq.h> -#if (__FreeBSD__ != 2) #include <rtems/bsd/local/opt_inet.h> #ifdef __FreeBSD__ #include <rtems/bsd/local/opt_inet6.h> #endif -#endif #endif /* __FreeBSD__ || __NetBSD__ */ #ifdef ALTQ_RIO /* rio is enabled by ALTQ_RIO option in opt_altq.h */ diff --git a/freebsd/sys/contrib/altq/altq/altq_rmclass.c b/freebsd/sys/contrib/altq/altq/altq_rmclass.c index 6550dda4..b385f78a 100644 --- a/freebsd/sys/contrib/altq/altq/altq_rmclass.c +++ b/freebsd/sys/contrib/altq/altq/altq_rmclass.c @@ -1,7 +1,7 @@ #include <machine/rtems-bsd-kernel-space.h> /* $FreeBSD$ */ -/* $KAME: altq_rmclass.c,v 1.18 2003/11/06 06:32:53 kjc Exp $ */ +/* $KAME: altq_rmclass.c,v 1.19 2005/04/13 03:44:25 suz Exp $ */ /* * Copyright (c) 1991-1997 Regents of the University of California. @@ -43,12 +43,10 @@ #if defined(__FreeBSD__) || defined(__NetBSD__) #include <rtems/bsd/local/opt_altq.h> -#if (__FreeBSD__ != 2) #include <rtems/bsd/local/opt_inet.h> #ifdef __FreeBSD__ #include <rtems/bsd/local/opt_inet6.h> #endif -#endif #endif /* __FreeBSD__ || __NetBSD__ */ #ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */ diff --git a/freebsd/sys/contrib/altq/altq/altq_subr.c b/freebsd/sys/contrib/altq/altq/altq_subr.c index 3168da74..3ebd58d8 100644 --- a/freebsd/sys/contrib/altq/altq/altq_subr.c +++ b/freebsd/sys/contrib/altq/altq/altq_subr.c @@ -74,15 +74,12 @@ /* machine dependent clock related includes */ #ifdef __FreeBSD__ -#if __FreeBSD__ < 3 -#include <rtems/bsd/local/opt_cpu.h> /* for FreeBSD-2.2.8 to get i586_ctr_freq */ -#endif #include <sys/bus.h> #include <sys/cpu.h> #include <sys/eventhandler.h> #include <machine/clock.h> #endif -#if defined(__i386__) +#if defined(__amd64__) || defined(__i386__) #include <machine/cpufunc.h> /* for pentium tsc */ #include <machine/specialreg.h> /* for CPUID_TSC */ #ifdef __FreeBSD__ @@ -90,7 +87,7 @@ #elif defined(__NetBSD__) || defined(__OpenBSD__) #include <machine/cpu.h> /* for cpu_feature */ #endif -#endif /* __i386__ */ +#endif /* __amd64 || __i386__ */ /* * internal function prototypes @@ -451,7 +448,7 @@ static void tbr_timeout(arg) void *arg; { -#if defined(__FreeBSD__) +#ifdef __FreeBSD__ VNET_ITERATOR_DECL(vnet_iter); #endif struct ifnet *ifp; @@ -463,7 +460,7 @@ tbr_timeout(arg) #else s = splimp(); #endif -#if defined(__FreeBSD__) && (__FreeBSD_version >= 500000) +#ifdef __FreeBSD__ IFNET_RLOCK_NOSLEEP(); VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { @@ -479,7 +476,7 @@ tbr_timeout(arg) ifp->if_start != NULL) (*ifp->if_start)(ifp); } -#if defined(__FreeBSD__) && (__FreeBSD_version >= 500000) +#ifdef __FreeBSD__ CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); @@ -490,20 +487,6 @@ tbr_timeout(arg) CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); else tbr_timer = 0; /* don't need tbr_timer anymore */ -#if defined(__alpha__) && !defined(ALTQ_NOPCC) - { - /* - * XXX read out the machine dependent clock once a second - * to detect counter wrap-around. - */ - static u_int cnt; - - if (++cnt >= hz) { - (void)read_machclk(); - cnt = 0; - } - } -#endif /* __alpha__ && !ALTQ_NOPCC */ } /* @@ -901,16 +884,9 @@ int machclk_usepcc; u_int32_t machclk_freq; u_int32_t machclk_per_tick; -#ifdef __alpha__ -#ifdef __FreeBSD__ -extern u_int32_t cycles_per_sec; /* alpha cpu clock frequency */ -#elif defined(__NetBSD__) || defined(__OpenBSD__) -extern u_int64_t cycles_per_usec; /* alpha cpu clock frequency */ -#endif -#endif /* __alpha__ */ #if defined(__i386__) && defined(__NetBSD__) extern u_int64_t cpu_tsc_freq; -#endif /* __alpha__ */ +#endif #ifndef __rtems__ #if (__FreeBSD_version >= 700035) @@ -945,7 +921,7 @@ init_machclk_setup(void) machclk_usepcc = 1; -#if (!defined(__i386__) && !defined(__alpha__)) || defined(ALTQ_NOPCC) +#if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC) machclk_usepcc = 0; #endif #if defined(__FreeBSD__) && defined(SMP) @@ -954,16 +930,14 @@ init_machclk_setup(void) #if defined(__NetBSD__) && defined(MULTIPROCESSOR) machclk_usepcc = 0; #endif -#ifdef __i386__ -#ifndef __rtems__ +#if defined(__amd64__) || defined(__i386__) /* check if TSC is available */ - if (machclk_usepcc == 1 && ((cpu_feature & CPUID_TSC) == 0 || - tsc_is_broken)) -#else /* __rtems__ */ - /* check if TSC is available */ - if (machclk_usepcc == 1 && ((cpu_feature & CPUID_TSC) == 0 || - !(x86_has_tsc()) )) -#endif /* __rtems__ */ +#ifdef __FreeBSD__ + if ((cpu_feature & CPUID_TSC) == 0 || + atomic_load_acq_64(&tsc_freq) == 0) +#else + if ((cpu_feature & CPUID_TSC) == 0) +#endif machclk_usepcc = 0; #endif } @@ -993,27 +967,15 @@ init_machclk(void) * if the clock frequency (of Pentium TSC or Alpha PCC) is * accessible, just use it. */ -#ifndef __rtems__ -#ifdef __i386__ +#if defined(__amd64__) || defined(__i386__) #ifdef __FreeBSD__ -#if (__FreeBSD_version > 300000) - machclk_freq = tsc_freq; -#else - machclk_freq = i586_ctr_freq; -#endif + machclk_freq = atomic_load_acq_64(&tsc_freq); #elif defined(__NetBSD__) machclk_freq = (u_int32_t)cpu_tsc_freq; #elif defined(__OpenBSD__) && (defined(I586_CPU) || defined(I686_CPU)) machclk_freq = pentium_mhz * 1000000; #endif -#elif defined(__alpha__) -#ifdef __FreeBSD__ - machclk_freq = cycles_per_sec; -#elif defined(__NetBSD__) || defined(__OpenBSD__) - machclk_freq = (u_int32_t)(cycles_per_usec * 1000000); #endif -#endif /* __alpha__ */ -#endif /* __rtems__ */ /* * if we don't know the clock frequency, measure it. @@ -1059,25 +1021,8 @@ read_machclk(void) u_int64_t val; if (machclk_usepcc) { -#if defined(__i386__) +#if defined(__amd64__) || defined(__i386__) val = rdtsc(); -#elif defined(__alpha__) - static u_int32_t last_pcc, upper; - u_int32_t pcc; - - /* - * for alpha, make a 64bit counter value out of the 32bit - * alpha processor cycle counter. - * read_machclk must be called within a half of its - * wrap-around cycle (about 5 sec for 400MHz cpu) to properly - * detect a counter wrap-around. - * tbr_timeout calls read_machclk once a second. - */ - pcc = (u_int32_t)alpha_rpcc(); - if (pcc <= last_pcc) - upper++; - last_pcc = pcc; - val = ((u_int64_t)upper << 32) + pcc; #else panic("read_machclk"); #endif diff --git a/freebsd/sys/contrib/altq/altq/altq_var.h b/freebsd/sys/contrib/altq/altq/altq_var.h index 6f37f182..b956002a 100644 --- a/freebsd/sys/contrib/altq/altq/altq_var.h +++ b/freebsd/sys/contrib/altq/altq/altq_var.h @@ -141,11 +141,7 @@ extern u_int64_t read_machclk(void); * misc stuff for compatibility */ /* ioctl cmd type */ -#if defined(__FreeBSD__) && (__FreeBSD__ < 3) -typedef int ioctlcmd_t; -#else typedef u_long ioctlcmd_t; -#endif /* * queue macros: diff --git a/freebsd/sys/contrib/altq/altq/if_altq.h b/freebsd/sys/contrib/altq/altq/if_altq.h index 50462e9e..2d983e9a 100644 --- a/freebsd/sys/contrib/altq/altq/if_altq.h +++ b/freebsd/sys/contrib/altq/altq/if_altq.h @@ -1,5 +1,5 @@ /* $FreeBSD$ */ -/* $KAME: if_altq.h,v 1.11 2003/07/10 12:07:50 kjc Exp $ */ +/* $KAME: if_altq.h,v 1.12 2005/04/13 03:44:25 suz Exp $ */ /* * Copyright (C) 1997-2003 @@ -29,7 +29,7 @@ #ifndef _ALTQ_IF_ALTQ_H_ #define _ALTQ_IF_ALTQ_H_ -#if (defined(__FreeBSD__) && __FreeBSD_version >= 500000) +#ifdef __FreeBSD__ #include <rtems/bsd/sys/lock.h> /* XXX */ #include <sys/mutex.h> /* XXX */ #include <sys/event.h> /* XXX */ @@ -51,7 +51,7 @@ struct ifaltq { int ifq_len; int ifq_maxlen; int ifq_drops; -#if (defined(__FreeBSD__) && __FreeBSD_version >= 500000) +#ifdef __FreeBSD__ struct mtx ifq_mtx; #endif diff --git a/freebsd/sys/contrib/pf/net/if_pflog.c b/freebsd/sys/contrib/pf/net/if_pflog.c index 5efbf76d..51de5cd0 100644 --- a/freebsd/sys/contrib/pf/net/if_pflog.c +++ b/freebsd/sys/contrib/pf/net/if_pflog.c @@ -1,6 +1,6 @@ #include <machine/rtems-bsd-kernel-space.h> -/* $OpenBSD: if_pflog.c,v 1.22 2006/12/15 09:31:20 otto Exp $ */ +/* $OpenBSD: if_pflog.c,v 1.26 2007/10/18 21:58:18 mpf Exp $ */ /* * The authors of this code are John Ioannidis (ji@tla.org), * Angelos D. Keromytis (kermit@csd.uch.gr) and @@ -84,28 +84,28 @@ __FBSDID("$FreeBSD$"); #include <net/route.h> #include <net/bpf.h> -#ifdef INET +#if defined(INET) || defined(INET6) #include <netinet/in.h> +#endif +#ifdef INET #include <netinet/in_var.h> #include <netinet/in_systm.h> #include <netinet/ip.h> #endif #ifdef INET6 -#ifndef INET -#include <netinet/in.h> -#endif +#include <netinet6/in6_var.h> #include <netinet6/nd6.h> #endif /* INET6 */ #include <net/pfvar.h> #include <net/if_pflog.h> -#ifdef INET #ifdef __FreeBSD__ +#ifdef INET #include <machine/in_cksum.h> -#endif -#endif +#endif /* INET */ +#endif /* __FreeBSD__ */ #define PFLOGMTU (32768 + MHLEN + MLEN) @@ -117,7 +117,11 @@ __FBSDID("$FreeBSD$"); void pflogattach(int); int pflogoutput(struct ifnet *, struct mbuf *, struct sockaddr *, - struct route *); +#ifdef __FreeBSD__ + struct route *); +#else + struct rtentry *); +#endif int pflogioctl(struct ifnet *, u_long, caddr_t); void pflogstart(struct ifnet *); #ifdef __FreeBSD__ @@ -130,7 +134,7 @@ int pflog_clone_destroy(struct ifnet *); LIST_HEAD(, pflog_softc) pflogif_list; #ifdef __FreeBSD__ -IFC_SIMPLE_DECLARE(pflog, 1); +IFC_SIMPLE_DECLARE(pflog, 1); #else struct if_clone pflog_cloner = IF_CLONE_INITIALIZER("pflog", pflog_clone_create, pflog_clone_destroy); @@ -138,10 +142,6 @@ struct if_clone pflog_cloner = struct ifnet *pflogifs[PFLOGIFS_MAX]; /* for fast access */ -#ifndef __FreeBSD__ -extern int ifqmaxlen; -#endif - void pflogattach(int npflog) { @@ -149,9 +149,6 @@ pflogattach(int npflog) LIST_INIT(&pflogif_list); for (i = 0; i < PFLOGIFS_MAX; i++) pflogifs[i] = NULL; -#ifndef __FreeBSD__ - (void) pflog_clone_create(&pflog_cloner, 0); -#endif if_clone_attach(&pflog_cloner); } @@ -170,9 +167,9 @@ pflog_clone_create(struct if_clone *ifc, int unit) if (unit >= PFLOGIFS_MAX) return (EINVAL); - if ((pflogif = malloc(sizeof(*pflogif), M_DEVBUF, M_NOWAIT)) == NULL) + if ((pflogif = malloc(sizeof(*pflogif), + M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL) return (ENOMEM); - bzero(pflogif, sizeof(*pflogif)); pflogif->sc_unit = unit; #ifdef __FreeBSD__ @@ -211,6 +208,7 @@ pflog_clone_create(struct if_clone *ifc, int unit) s = splnet(); #ifdef __FreeBSD__ + /* XXX: Why pf(4) lock?! Better add a pflog lock?! */ PF_LOCK(); #endif LIST_INSERT_HEAD(&pflogif_list, pflogif, sc_list); @@ -291,7 +289,11 @@ pflogstart(struct ifnet *ifp) int pflogoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, - struct route *ro) +#ifdef __FreeBSD__ + struct route *rt) +#else + struct rtentry *rt) +#endif { m_freem(m); return (0); @@ -302,9 +304,6 @@ int pflogioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { switch (cmd) { - case SIOCSIFADDR: - case SIOCAIFADDR: - case SIOCSIFDSTADDR: case SIOCSIFFLAGS: #ifdef __FreeBSD__ if (ifp->if_flags & IFF_UP) @@ -319,7 +318,7 @@ pflogioctl(struct ifnet *ifp, u_long cmd, caddr_t data) #endif break; default: - return (EINVAL); + return (ENOTTY); } return (0); @@ -335,7 +334,7 @@ pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, struct pfloghdr hdr; if (kif == NULL || m == NULL || rm == NULL || pd == NULL) - return (-1); + return ( 1); if ((ifn = pflogifs[rm->logif]) == NULL || !ifn->if_bpf) return (0); @@ -349,7 +348,7 @@ pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, if (am == NULL) { hdr.rulenr = htonl(rm->nr); - hdr.subrulenr = -1; + hdr.subrulenr = 1; } else { hdr.rulenr = htonl(am->nr); hdr.subrulenr = htonl(rm->nr); @@ -359,11 +358,11 @@ pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, } if (rm->log & PF_LOG_SOCKET_LOOKUP && !pd->lookup.done) #ifdef __FreeBSD__ - /* + /* * XXX: This should not happen as we force an early lookup * via debug.pfugidhack */ - ; /* empty */ + ; /* empty */ #else pd->lookup.done = pf_socket_lookup(dir, pd); #endif @@ -432,7 +431,7 @@ static moduledata_t pflog_mod = { "pflog", pflog_modevent, 0 }; #define PFLOG_MODVER 1 -DECLARE_MODULE(pflog, pflog_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); +DECLARE_MODULE(pflog, pflog_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(pflog, PFLOG_MODVER); MODULE_DEPEND(pflog, pf, PF_MODVER, PF_MODVER, PF_MODVER); #endif /* __FreeBSD__ */ diff --git a/freebsd/sys/contrib/pf/net/if_pflog.h b/freebsd/sys/contrib/pf/net/if_pflog.h index a3c74d16..5f48f6c7 100644 --- a/freebsd/sys/contrib/pf/net/if_pflog.h +++ b/freebsd/sys/contrib/pf/net/if_pflog.h @@ -1,5 +1,4 @@ -/* $FreeBSD$ */ -/* $OpenBSD: if_pflog.h,v 1.14 2006/10/25 11:27:01 henning Exp $ */ +/* $OpenBSD: if_pflog.h,v 1.13 2006/10/23 12:46:09 henning Exp $ */ /* * Copyright 2001 Niels Provos <provos@citi.umich.edu> * All rights reserved. @@ -26,11 +25,10 @@ */ #ifndef _NET_IF_PFLOG_H_ -#define _NET_IF_PFLOG_H_ +#define _NET_IF_PFLOG_H_ #define PFLOGIFS_MAX 16 -#ifdef _KERNEL struct pflog_softc { #ifdef __FreeBSD__ struct ifnet *sc_ifp; /* the interface pointer */ @@ -40,9 +38,8 @@ struct pflog_softc { int sc_unit; LIST_ENTRY(pflog_softc) sc_list; }; -#endif /* _KERNEL */ -#define PFLOG_RULESET_NAME_SIZE 16 +#define PFLOG_RULESET_NAME_SIZE 16 struct pfloghdr { u_int8_t length; @@ -61,9 +58,9 @@ struct pfloghdr { u_int8_t pad[3]; }; -#define PFLOG_HDRLEN sizeof(struct pfloghdr) +#define PFLOG_HDRLEN sizeof(struct pfloghdr) /* minus pad, also used as a signature */ -#define PFLOG_REAL_HDRLEN offsetof(struct pfloghdr, pad) +#define PFLOG_REAL_HDRLEN offsetof(struct pfloghdr, pad) /* XXX remove later when old format logs are no longer needed */ struct old_pfloghdr { @@ -74,23 +71,24 @@ struct old_pfloghdr { u_short action; u_short dir; }; -#define OLD_PFLOG_HDRLEN sizeof(struct old_pfloghdr) +#define OLD_PFLOG_HDRLEN sizeof(struct old_pfloghdr) #ifdef _KERNEL - #ifdef __FreeBSD__ struct pf_rule; struct pf_ruleset; struct pfi_kif; struct pf_pdesc; +#if 0 typedef int pflog_packet_t(struct pfi_kif *, struct mbuf *, sa_family_t, u_int8_t, u_int8_t, struct pf_rule *, struct pf_rule *, struct pf_ruleset *, struct pf_pdesc *); extern pflog_packet_t *pflog_packet_ptr; -#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g,h) do { \ - if (pflog_packet_ptr != NULL) \ - pflog_packet_ptr(i,a,b,c,d,e,f,g,h); \ +#endif +#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g,h) do { \ + if (pflog_packet_ptr != NULL) \ + pflog_packet_ptr(i,a,b,c,d,e,f,g,h); \ } while (0) #else /* ! __FreeBSD__ */ #if NPFLOG > 0 @@ -98,6 +96,6 @@ extern pflog_packet_t *pflog_packet_ptr; #else #define PFLOG_PACKET(i,x,a,b,c,d,e,f,g,h) ((void)0) #endif /* NPFLOG > 0 */ -#endif /* __FreeBSD__ */ +#endif #endif /* _KERNEL */ #endif /* _NET_IF_PFLOG_H_ */ diff --git a/freebsd/sys/contrib/pf/net/if_pflow.h b/freebsd/sys/contrib/pf/net/if_pflow.h new file mode 100644 index 00000000..35ccbeb4 --- /dev/null +++ b/freebsd/sys/contrib/pf/net/if_pflow.h @@ -0,0 +1,126 @@ +/* $OpenBSD: if_pflow.h,v 1.5 2009/02/27 11:09:36 gollo Exp $ */ + +/* + * Copyright (c) 2008 Henning Brauer <henning@openbsd.org> + * Copyright (c) 2008 Joerg Goltermann <jg@osn.de> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * $FreeBSD$ + */ + +#ifndef _NET_IF_PFLOW_H_ +#define _NET_IF_PFLOW_H_ + +#define PFLOW_ID_LEN sizeof(u_int64_t) + +#define PFLOW_MAXFLOWS 30 +#define PFLOW_VERSION 5 +#define PFLOW_ENGINE_TYPE 42 +#define PFLOW_ENGINE_ID 42 +#define PFLOW_MAXBYTES 0xffffffff +#define PFLOW_TIMEOUT 30 + +struct pflow_flow { + u_int32_t src_ip; + u_int32_t dest_ip; + u_int32_t nexthop_ip; + u_int16_t if_index_in; + u_int16_t if_index_out; + u_int32_t flow_packets; + u_int32_t flow_octets; + u_int32_t flow_start; + u_int32_t flow_finish; + u_int16_t src_port; + u_int16_t dest_port; + u_int8_t pad1; + u_int8_t tcp_flags; + u_int8_t protocol; + u_int8_t tos; + u_int16_t src_as; + u_int16_t dest_as; + u_int8_t src_mask; + u_int8_t dest_mask; + u_int16_t pad2; +} __packed; + +#ifdef _KERNEL + +extern int pflow_ok; + +struct pflow_softc { + struct ifnet sc_if; + struct ifnet *sc_pflow_ifp; + + unsigned int sc_count; + unsigned int sc_maxcount; + u_int64_t sc_gcounter; + struct ip_moptions sc_imo; +#ifdef __FreeBSD__ + struct callout sc_tmo; +#else + struct timeout sc_tmo; +#endif + struct in_addr sc_sender_ip; + u_int16_t sc_sender_port; + struct in_addr sc_receiver_ip; + u_int16_t sc_receiver_port; + struct mbuf *sc_mbuf; /* current cumulative mbuf */ + SLIST_ENTRY(pflow_softc) sc_next; +}; + +extern struct pflow_softc *pflowif; + +#endif /* _KERNEL */ + +struct pflow_header { + u_int16_t version; + u_int16_t count; + u_int32_t uptime_ms; + u_int32_t time_sec; + u_int32_t time_nanosec; + u_int32_t flow_sequence; + u_int8_t engine_type; + u_int8_t engine_id; + u_int8_t reserved1; + u_int8_t reserved2; +} __packed; + +#define PFLOW_HDRLEN sizeof(struct pflow_header) + +struct pflowstats { + u_int64_t pflow_flows; + u_int64_t pflow_packets; + u_int64_t pflow_onomem; + u_int64_t pflow_oerrors; +}; + +/* + * Configuration structure for SIOCSETPFLOW SIOCGETPFLOW + */ +struct pflowreq { + struct in_addr sender_ip; + struct in_addr receiver_ip; + u_int16_t receiver_port; + u_int16_t addrmask; +#define PFLOW_MASK_SRCIP 0x01 +#define PFLOW_MASK_DSTIP 0x02 +#define PFLOW_MASK_DSTPRT 0x04 +}; + +#ifdef _KERNEL +int export_pflow(struct pf_state *); +int pflow_sysctl(int *, u_int, void *, size_t *, void *, size_t); +#endif /* _KERNEL */ + +#endif /* _NET_IF_PFLOW_H_ */ diff --git a/freebsd/sys/contrib/pf/net/if_pfsync.c b/freebsd/sys/contrib/pf/net/if_pfsync.c index 24c80e3d..36cb1573 100644 --- a/freebsd/sys/contrib/pf/net/if_pfsync.c +++ b/freebsd/sys/contrib/pf/net/if_pfsync.c @@ -1,6 +1,6 @@ #include <machine/rtems-bsd-kernel-space.h> -/* $OpenBSD: if_pfsync.c,v 1.73 2006/11/16 13:13:38 henning Exp $ */ +/* $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $ */ /* * Copyright (c) 2002 Michael Shalayeff @@ -28,27 +28,41 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ +/* + * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Revisions picked from OpenBSD after revision 1.110 import: + * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates + * 1.120, 1.175 - use monotonic time_uptime + * 1.122 - reduce number of updates for non-TCP sessions + * 1.128 - cleanups + * 1.146 - bzero() mbuf before sparsely filling it with data + * 1.170 - SIOCSIFMTU checks + */ + #ifdef __FreeBSD__ #include <rtems/bsd/local/opt_inet.h> #include <rtems/bsd/local/opt_inet6.h> -#include <rtems/bsd/local/opt_carp.h> -#include <rtems/bsd/local/opt_bpf.h> #include <rtems/bsd/local/opt_pf.h> #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#ifdef DEV_BPF -#define NBPFILTER DEV_BPF -#else -#define NBPFILTER 0 -#endif - -#ifdef DEV_PFSYNC -#define NPFSYNC DEV_PFSYNC -#else -#define NPFSYNC 0 -#endif +#define NBPFILTER 1 #ifdef DEV_CARP #define NCARP DEV_CARP @@ -58,7 +72,10 @@ __FBSDID("$FreeBSD$"); #endif /* __FreeBSD__ */ #include <rtems/bsd/sys/param.h> +#include <sys/kernel.h> #ifdef __FreeBSD__ +#include <sys/bus.h> +#include <sys/interrupt.h> #include <sys/priv.h> #endif #include <sys/proc.h> @@ -74,12 +91,15 @@ __FBSDID("$FreeBSD$"); #include <sys/taskqueue.h> #include <rtems/bsd/sys/lock.h> #include <sys/mutex.h> -#include <sys/sysctl.h> +#include <sys/protosw.h> #else #include <sys/ioctl.h> #include <sys/timeout.h> #endif -#include <sys/kernel.h> +#include <sys/sysctl.h> +#ifndef __FreeBSD__ +#include <sys/pool.h> +#endif #include <net/if.h> #ifdef __FreeBSD__ @@ -88,6 +108,11 @@ __FBSDID("$FreeBSD$"); #include <net/if_types.h> #include <net/route.h> #include <net/bpf.h> +#include <net/netisr.h> +#ifdef __FreeBSD__ +#include <net/vnet.h> +#endif + #include <netinet/in.h> #include <netinet/if_ether.h> #include <netinet/tcp.h> @@ -119,23 +144,188 @@ __FBSDID("$FreeBSD$"); #include "pfsync.h" #endif -#define PFSYNC_MINMTU \ - (sizeof(struct pfsync_header) + sizeof(struct pf_state)) +#define PFSYNC_MINPKT ( \ + sizeof(struct ip) + \ + sizeof(struct pfsync_header) + \ + sizeof(struct pfsync_subheader) + \ + sizeof(struct pfsync_eof)) + +struct pfsync_pkt { + struct ip *ip; + struct in_addr src; + u_int8_t flags; +}; + +int pfsync_input_hmac(struct mbuf *, int); + +int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, + struct pfsync_state_peer *); + +int pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int); +int pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int); +int pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int); +int pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int); +int pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int); +int pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int); +int pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int); +int pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int); +int pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int); +int pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int); +int pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int); + +int pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int); + +int (*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = { + pfsync_in_clr, /* PFSYNC_ACT_CLR */ + pfsync_in_ins, /* PFSYNC_ACT_INS */ + pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */ + pfsync_in_upd, /* PFSYNC_ACT_UPD */ + pfsync_in_upd_c, /* PFSYNC_ACT_UPD_C */ + pfsync_in_ureq, /* PFSYNC_ACT_UPD_REQ */ + pfsync_in_del, /* PFSYNC_ACT_DEL */ + pfsync_in_del_c, /* PFSYNC_ACT_DEL_C */ + pfsync_in_error, /* PFSYNC_ACT_INS_F */ + pfsync_in_error, /* PFSYNC_ACT_DEL_F */ + pfsync_in_bus, /* PFSYNC_ACT_BUS */ + pfsync_in_tdb, /* PFSYNC_ACT_TDB */ + pfsync_in_eof /* PFSYNC_ACT_EOF */ +}; + +struct pfsync_q { + int (*write)(struct pf_state *, struct mbuf *, int); + size_t len; + u_int8_t action; +}; + +/* we have one of these for every PFSYNC_S_ */ +int pfsync_out_state(struct pf_state *, struct mbuf *, int); +int pfsync_out_iack(struct pf_state *, struct mbuf *, int); +int pfsync_out_upd_c(struct pf_state *, struct mbuf *, int); +int pfsync_out_del(struct pf_state *, struct mbuf *, int); + +struct pfsync_q pfsync_qs[] = { + { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, + { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, + { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD }, + { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, + { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C } +}; + +void pfsync_q_ins(struct pf_state *, int); +void pfsync_q_del(struct pf_state *); + +struct pfsync_upd_req_item { + TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; + struct pfsync_upd_req ur_msg; +}; +TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item); + +struct pfsync_deferral { + TAILQ_ENTRY(pfsync_deferral) pd_entry; + struct pf_state *pd_st; + struct mbuf *pd_m; +#ifdef __FreeBSD__ + struct callout pd_tmo; +#else + struct timeout pd_tmo; +#endif +}; +TAILQ_HEAD(pfsync_deferrals, pfsync_deferral); + +#define PFSYNC_PLSIZE MAX(sizeof(struct pfsync_upd_req_item), \ + sizeof(struct pfsync_deferral)) -#ifdef PFSYNCDEBUG -#define DPRINTF(x) do { if (pfsyncdebug) printf x ; } while (0) -int pfsyncdebug; +#ifdef notyet +int pfsync_out_tdb(struct tdb *, struct mbuf *, int); +#endif + +struct pfsync_softc { +#ifdef __FreeBSD__ + struct ifnet *sc_ifp; #else -#define DPRINTF(x) + struct ifnet sc_if; #endif + struct ifnet *sc_sync_if; -struct pfsync_softc *pfsyncif = NULL; -struct pfsyncstats pfsyncstats; #ifdef __FreeBSD__ -SYSCTL_DECL(_net_inet_pfsync); -SYSCTL_STRUCT(_net_inet_pfsync, 0, stats, CTLFLAG_RW, - &pfsyncstats, pfsyncstats, + uma_zone_t sc_pool; +#else + struct pool sc_pool; +#endif + + struct ip_moptions sc_imo; + + struct in_addr sc_sync_peer; + u_int8_t sc_maxupdates; +#ifdef __FreeBSD__ + int pfsync_sync_ok; +#endif + + struct ip sc_template; + + struct pf_state_queue sc_qs[PFSYNC_S_COUNT]; + size_t sc_len; + + struct pfsync_upd_reqs sc_upd_req_list; + + struct pfsync_deferrals sc_deferrals; + u_int sc_deferred; + + void *sc_plus; + size_t sc_pluslen; + + u_int32_t sc_ureq_sent; + int sc_bulk_tries; +#ifdef __FreeBSD__ + struct callout sc_bulkfail_tmo; +#else + struct timeout sc_bulkfail_tmo; +#endif + + u_int32_t sc_ureq_received; + struct pf_state *sc_bulk_next; + struct pf_state *sc_bulk_last; +#ifdef __FreeBSD__ + struct callout sc_bulk_tmo; +#else + struct timeout sc_bulk_tmo; +#endif + + TAILQ_HEAD(, tdb) sc_tdb_q; + +#ifdef __FreeBSD__ + struct callout sc_tmo; +#else + struct timeout sc_tmo; +#endif +}; + +#ifdef __FreeBSD__ +static MALLOC_DEFINE(M_PFSYNC, "pfsync", "pfsync data"); +static VNET_DEFINE(struct pfsync_softc *, pfsyncif) = NULL; +#define V_pfsyncif VNET(pfsyncif) +static VNET_DEFINE(void *, pfsync_swi_cookie) = NULL; +#define V_pfsync_swi_cookie VNET(pfsync_swi_cookie) +static VNET_DEFINE(struct pfsyncstats, pfsyncstats); +#define V_pfsyncstats VNET(pfsyncstats) + +static void pfsyncintr(void *); +static int pfsync_multicast_setup(struct pfsync_softc *); +static void pfsync_multicast_cleanup(struct pfsync_softc *); +static int pfsync_init(void); +static void pfsync_uninit(void); +static void pfsync_sendout1(int); + +#define schednetisr(NETISR_PFSYNC) swi_sched(V_pfsync_swi_cookie, 0) + +SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC"); +SYSCTL_VNET_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_RW, + &VNET_NAME(pfsyncstats), pfsyncstats, "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)"); +#else +struct pfsync_softc *pfsyncif = NULL; +struct pfsyncstats pfsyncstats; +#define V_pfsyncstats pfsyncstats #endif void pfsyncattach(int); @@ -146,48 +336,54 @@ void pfsync_clone_destroy(struct ifnet *); int pfsync_clone_create(struct if_clone *, int); int pfsync_clone_destroy(struct ifnet *); #endif -void pfsync_setmtu(struct pfsync_softc *, int); int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, struct pf_state_peer *); -int pfsync_insert_net_state(struct pfsync_state *, u_int8_t); -#ifdef PFSYNC_TDB void pfsync_update_net_tdb(struct pfsync_tdb *); -#endif int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, +#ifdef __FreeBSD__ struct route *); +#else + struct rtentry *); +#endif int pfsyncioctl(struct ifnet *, u_long, caddr_t); void pfsyncstart(struct ifnet *); -struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **); -int pfsync_request_update(struct pfsync_state_upd *, struct in_addr *); -int pfsync_sendout(struct pfsync_softc *); -#ifdef PFSYNC_TDB -int pfsync_tdb_sendout(struct pfsync_softc *); -#endif -int pfsync_sendout_mbuf(struct pfsync_softc *, struct mbuf *); +struct mbuf *pfsync_if_dequeue(struct ifnet *); + +void pfsync_deferred(struct pf_state *, int); +void pfsync_undefer(struct pfsync_deferral *, int); +void pfsync_defer_tmo(void *); + +void pfsync_request_update(u_int32_t, u_int64_t); +void pfsync_update_state_req(struct pf_state *); + +void pfsync_drop(struct pfsync_softc *); +void pfsync_sendout(void); +void pfsync_send_plus(void *, size_t); void pfsync_timeout(void *); -#ifdef PFSYNC_TDB void pfsync_tdb_timeout(void *); -#endif -void pfsync_send_bus(struct pfsync_softc *, u_int8_t); + +void pfsync_bulk_start(void); +void pfsync_bulk_status(u_int8_t); void pfsync_bulk_update(void *); -void pfsync_bulkfail(void *); +void pfsync_bulk_fail(void *); #ifdef __FreeBSD__ -void pfsync_ifdetach(void *, struct ifnet *); -void pfsync_senddef(void *, int); - /* XXX: ugly */ #define betoh64 (unsigned long long)be64toh #define timeout_del callout_stop #endif -int pfsync_sync_ok; +#define PFSYNC_MAX_BULKTRIES 12 #ifndef __FreeBSD__ -extern int ifqmaxlen; +int pfsync_sync_ok; #endif #ifdef __FreeBSD__ +VNET_DEFINE(struct ifc_simple_data, pfsync_cloner_data); +VNET_DEFINE(struct if_clone, pfsync_cloner); +#define V_pfsync_cloner_data VNET(pfsync_cloner_data) +#define V_pfsync_cloner VNET(pfsync_cloner) IFC_SIMPLE_DECLARE(pfsync, 1); #else struct if_clone pfsync_cloner = @@ -199,7 +395,6 @@ pfsyncattach(int npfsync) { if_clone_attach(&pfsync_cloner); } - int #ifdef __FreeBSD__ pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) @@ -207,95 +402,76 @@ pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) pfsync_clone_create(struct if_clone *ifc, int unit) #endif { + struct pfsync_softc *sc; struct ifnet *ifp; + int q; if (unit != 0) return (EINVAL); +#ifdef __FreeBSD__ + sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO); + sc->pfsync_sync_ok = 1; +#else pfsync_sync_ok = 1; - if ((pfsyncif = malloc(sizeof(*pfsyncif), M_DEVBUF, M_NOWAIT)) == NULL) - return (ENOMEM); - bzero(pfsyncif, sizeof(*pfsyncif)); -#ifdef __FreeBSD__ - if ((pfsyncif->sc_imo.imo_membership = (struct in_multi **)malloc( - (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_DEVBUF, - M_NOWAIT)) == NULL) { - free(pfsyncif, M_DEVBUF); - return (ENOSPC); - } - pfsyncif->sc_imo.imo_mfilters = NULL; - pfsyncif->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; - pfsyncif->sc_imo.imo_multicast_vif = -1; + sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_NOWAIT | M_ZERO); +#endif - ifp = pfsyncif->sc_ifp = if_alloc(IFT_PFSYNC); - if (ifp == NULL) { - free(pfsyncif->sc_imo.imo_membership, M_DEVBUF); - free(pfsyncif, M_DEVBUF); - return (ENOSPC); - } - if_initname(ifp, ifc->ifc_name, unit); + for (q = 0; q < PFSYNC_S_COUNT; q++) + TAILQ_INIT(&sc->sc_qs[q]); - pfsyncif->sc_detachtag = EVENTHANDLER_REGISTER(ifnet_departure_event, - pfsync_ifdetach, pfsyncif, EVENTHANDLER_PRI_ANY); - if (pfsyncif->sc_detachtag == NULL) { - if_free(ifp); - free(pfsyncif->sc_imo.imo_membership, M_DEVBUF); - free(pfsyncif, M_DEVBUF); - return (ENOSPC); - } - - pfsyncif->sc_ifq.ifq_maxlen = ifqmaxlen; - mtx_init(&pfsyncif->sc_ifq.ifq_mtx, ifp->if_xname, - "pfsync send queue", MTX_DEF); - TASK_INIT(&pfsyncif->sc_send_task, 0, pfsync_senddef, pfsyncif); -#endif - pfsyncif->sc_mbuf = NULL; - pfsyncif->sc_mbuf_net = NULL; -#ifdef PFSYNC_TDB - pfsyncif->sc_mbuf_tdb = NULL; -#endif - pfsyncif->sc_statep.s = NULL; - pfsyncif->sc_statep_net.s = NULL; -#ifdef PFSYNC_TDB - pfsyncif->sc_statep_tdb.t = NULL; -#endif - pfsyncif->sc_maxupdates = 128; #ifdef __FreeBSD__ - pfsyncif->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP); - pfsyncif->sc_sendaddr.s_addr = htonl(INADDR_PFSYNC_GROUP); + sc->sc_pool = uma_zcreate("pfsync", PFSYNC_PLSIZE, NULL, NULL, NULL, + NULL, UMA_ALIGN_PTR, 0); #else - pfsyncif->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; - pfsyncif->sc_sendaddr.s_addr = INADDR_PFSYNC_GROUP; + pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, 0, 0, "pfsync", NULL); #endif - pfsyncif->sc_ureq_received = 0; - pfsyncif->sc_ureq_sent = 0; - pfsyncif->sc_bulk_send_next = NULL; - pfsyncif->sc_bulk_terminator = NULL; + TAILQ_INIT(&sc->sc_upd_req_list); + TAILQ_INIT(&sc->sc_deferrals); + sc->sc_deferred = 0; + + TAILQ_INIT(&sc->sc_tdb_q); + + sc->sc_len = PFSYNC_MINPKT; + sc->sc_maxupdates = 128; + #ifndef __FreeBSD__ - ifp = &pfsyncif->sc_if; + sc->sc_imo.imo_membership = (struct in_multi **)malloc( + (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS, + M_WAITOK | M_ZERO); + sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; +#endif + +#ifdef __FreeBSD__ + ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC); + if (ifp == NULL) { + uma_zdestroy(sc->sc_pool); + free(sc, M_PFSYNC); + return (ENOSPC); + } + if_initname(ifp, ifc->ifc_name, unit); +#else + ifp = &sc->sc_if; snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit); #endif - ifp->if_softc = pfsyncif; + ifp->if_softc = sc; ifp->if_ioctl = pfsyncioctl; ifp->if_output = pfsyncoutput; ifp->if_start = pfsyncstart; ifp->if_type = IFT_PFSYNC; ifp->if_snd.ifq_maxlen = ifqmaxlen; - ifp->if_hdrlen = PFSYNC_HDRLEN; - pfsync_setmtu(pfsyncif, ETHERMTU); + ifp->if_hdrlen = sizeof(struct pfsync_header); + ifp->if_mtu = ETHERMTU; #ifdef __FreeBSD__ - callout_init(&pfsyncif->sc_tmo, CALLOUT_MPSAFE); -#ifdef PFSYNC_TDB - callout_init(&pfsyncif->sc_tdb_tmo, CALLOUT_MPSAFE); -#endif - callout_init(&pfsyncif->sc_bulk_tmo, CALLOUT_MPSAFE); - callout_init(&pfsyncif->sc_bulkfail_tmo, CALLOUT_MPSAFE); + callout_init(&sc->sc_tmo, CALLOUT_MPSAFE); + callout_init_mtx(&sc->sc_bulk_tmo, &pf_task_mtx, 0); + callout_init(&sc->sc_bulkfail_tmo, CALLOUT_MPSAFE); #else - timeout_set(&pfsyncif->sc_tmo, pfsync_timeout, pfsyncif); - timeout_set(&pfsyncif->sc_tdb_tmo, pfsync_tdb_timeout, pfsyncif); - timeout_set(&pfsyncif->sc_bulk_tmo, pfsync_bulk_update, pfsyncif); - timeout_set(&pfsyncif->sc_bulkfail_tmo, pfsync_bulkfail, pfsyncif); + timeout_set(&sc->sc_tmo, pfsync_timeout, sc); + timeout_set(&sc->sc_bulk_tmo, pfsync_bulk_update, sc); + timeout_set(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc); #endif + if_attach(ifp); #ifndef __FreeBSD__ if_alloc_sadl(ifp); @@ -309,8 +485,14 @@ pfsync_clone_create(struct if_clone *ifc, int unit) #ifdef __FreeBSD__ bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN); #else - bpfattach(&pfsyncif->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN); + bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN); +#endif #endif + +#ifdef __FreeBSD__ + V_pfsyncif = sc; +#else + pfsyncif = sc; #endif return (0); @@ -323,60 +505,98 @@ int #endif pfsync_clone_destroy(struct ifnet *ifp) { + struct pfsync_softc *sc = ifp->if_softc; + #ifdef __FreeBSD__ - EVENTHANDLER_DEREGISTER(ifnet_departure_event, pfsyncif->sc_detachtag); - callout_stop(&pfsyncif->sc_tmo); -#ifdef PFSYNC_TDB - callout_stop(&pfsyncif->sc_tdb_tmo); + PF_LOCK(); +#endif + timeout_del(&sc->sc_bulkfail_tmo); + timeout_del(&sc->sc_bulk_tmo); + timeout_del(&sc->sc_tmo); +#ifdef __FreeBSD__ + PF_UNLOCK(); +#endif +#if NCARP > 0 +#ifdef notyet +#ifdef __FreeBSD__ + if (!sc->pfsync_sync_ok) +#else + if (!pfsync_sync_ok) +#endif + carp_group_demote_adj(&sc->sc_if, -1); #endif - callout_stop(&pfsyncif->sc_bulk_tmo); - callout_stop(&pfsyncif->sc_bulkfail_tmo); - /* XXX: more? */ #endif - #if NBPFILTER > 0 bpfdetach(ifp); #endif if_detach(ifp); + + pfsync_drop(sc); + + while (sc->sc_deferred > 0) + pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0); + +#ifdef __FreeBSD__ + UMA_DESTROY(sc->sc_pool); +#else + pool_destroy(&sc->sc_pool); +#endif #ifdef __FreeBSD__ if_free(ifp); - free(pfsyncif->sc_imo.imo_membership, M_DEVBUF); + if (sc->sc_imo.imo_membership) + pfsync_multicast_cleanup(sc); + free(sc, M_PFSYNC); +#else + free(sc->sc_imo.imo_membership, M_IPMOPTS); + free(sc, M_DEVBUF); #endif - free(pfsyncif, M_DEVBUF); + +#ifdef __FreeBSD__ + V_pfsyncif = NULL; +#else pfsyncif = NULL; +#endif + #ifndef __FreeBSD__ return (0); #endif } -/* - * Start output on the pfsync interface. - */ -void -pfsyncstart(struct ifnet *ifp) +struct mbuf * +pfsync_if_dequeue(struct ifnet *ifp) { struct mbuf *m; #ifndef __FreeBSD__ int s; #endif - for (;;) { #ifdef __FreeBSD__ - IF_LOCK(&ifp->if_snd); - _IF_DROP(&ifp->if_snd); - _IF_DEQUEUE(&ifp->if_snd, m); - IF_UNLOCK(&ifp->if_snd); + IF_LOCK(&ifp->if_snd); + _IF_DROP(&ifp->if_snd); + _IF_DEQUEUE(&ifp->if_snd, m); + IF_UNLOCK(&ifp->if_snd); #else - s = splnet(); - IF_DROP(&ifp->if_snd); - IF_DEQUEUE(&ifp->if_snd, m); - splx(s); + s = splnet(); + IF_DEQUEUE(&ifp->if_snd, m); + splx(s); #endif - if (m == NULL) - return; - else - m_freem(m); + return (m); +} + +/* + * Start output on the pfsync interface. + */ +void +pfsyncstart(struct ifnet *ifp) +{ + struct mbuf *m; + + while ((m = pfsync_if_dequeue(ifp)) != NULL) { +#ifndef __FreeBSD__ + IF_DROP(&ifp->if_snd); +#endif + m_freem(m); } } @@ -385,85 +605,198 @@ pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, struct pf_state_peer *d) { if (s->scrub.scrub_flag && d->scrub == NULL) { - d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); +#ifdef __FreeBSD__ + d->scrub = pool_get(&V_pf_state_scrub_pl, PR_NOWAIT | PR_ZERO); +#else + d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO); +#endif if (d->scrub == NULL) return (ENOMEM); - bzero(d->scrub, sizeof(*d->scrub)); } return (0); } +#ifndef __FreeBSD__ +void +pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) +{ + bzero(sp, sizeof(struct pfsync_state)); + + /* copy from state key */ + sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; + sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; + sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; + sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; + sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; + sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; + sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; + sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; + sp->proto = st->key[PF_SK_WIRE]->proto; + sp->af = st->key[PF_SK_WIRE]->af; + + /* copy from state */ + strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); + bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); + sp->creation = htonl(time_uptime - st->creation); + sp->expire = pf_state_expires(st); + if (sp->expire <= time_second) + sp->expire = htonl(0); + else + sp->expire = htonl(sp->expire - time_second); + + sp->direction = st->direction; + sp->log = st->log; + sp->timeout = st->timeout; + sp->state_flags = st->state_flags; + if (st->src_node) + sp->sync_flags |= PFSYNC_FLAG_SRCNODE; + if (st->nat_src_node) + sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE; + + bcopy(&st->id, &sp->id, sizeof(sp->id)); + sp->creatorid = st->creatorid; + pf_state_peer_hton(&st->src, &sp->src); + pf_state_peer_hton(&st->dst, &sp->dst); + + if (st->rule.ptr == NULL) + sp->rule = htonl(-1); + else + sp->rule = htonl(st->rule.ptr->nr); + if (st->anchor.ptr == NULL) + sp->anchor = htonl(-1); + else + sp->anchor = htonl(st->anchor.ptr->nr); + if (st->nat_rule.ptr == NULL) + sp->nat_rule = htonl(-1); + else + sp->nat_rule = htonl(st->nat_rule.ptr->nr); + + pf_state_counter_hton(st->packets[0], sp->packets[0]); + pf_state_counter_hton(st->packets[1], sp->packets[1]); + pf_state_counter_hton(st->bytes[0], sp->bytes[0]); + pf_state_counter_hton(st->bytes[1], sp->bytes[1]); + +} +#endif + int -pfsync_insert_net_state(struct pfsync_state *sp, u_int8_t chksum_flag) +pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) { struct pf_state *st = NULL; + struct pf_state_key *skw = NULL, *sks = NULL; struct pf_rule *r = NULL; struct pfi_kif *kif; + int pool_flags; + int error; + +#ifdef __FreeBSD__ + PF_LOCK_ASSERT(); + if (sp->creatorid == 0 && V_pf_status.debug >= PF_DEBUG_MISC) { +#else if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) { - printf("pfsync_insert_net_state: invalid creator id:" +#endif + printf("pfsync_state_import: invalid creator id:" " %08x\n", ntohl(sp->creatorid)); return (EINVAL); } - kif = pfi_kif_get(sp->ifname); - if (kif == NULL) { + if ((kif = pfi_kif_get(sp->ifname)) == NULL) { +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) +#else if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync_insert_net_state: " +#endif + printf("pfsync_state_import: " "unknown interface: %s\n", sp->ifname); - /* skip this state */ - return (0); + if (flags & PFSYNC_SI_IOCTL) + return (EINVAL); + return (0); /* skip this state */ } /* - * If the ruleset checksums match, it's safe to associate the state - * with the rule of that number. + * If the ruleset checksums match or the state is coming from the ioctl, + * it's safe to associate the state with the rule of that number. */ - if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && chksum_flag) + if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && + (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < + pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) r = pf_main_ruleset.rules[ PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; else +#ifdef __FreeBSD__ + r = &V_pf_default_rule; +#else r = &pf_default_rule; +#endif - if (!r->max_states || r->states < r->max_states) - st = pool_get(&pf_state_pl, PR_NOWAIT); - if (st == NULL) { - pfi_kif_unref(kif, PFI_KIF_REF_NONE); - return (ENOMEM); - } - bzero(st, sizeof(*st)); + if ((r->max_states && r->states_cur >= r->max_states)) + goto cleanup; - /* allocate memory for scrub info */ - if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || - pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) { - pfi_kif_unref(kif, PFI_KIF_REF_NONE); - if (st->src.scrub) - pool_put(&pf_state_scrub_pl, st->src.scrub); - pool_put(&pf_state_pl, st); - return (ENOMEM); - } +#ifdef __FreeBSD__ + if (flags & PFSYNC_SI_IOCTL) + pool_flags = PR_WAITOK | PR_ZERO; + else + pool_flags = PR_NOWAIT | PR_ZERO; - st->rule.ptr = r; - /* XXX get pointers to nat_rule and anchor */ + if ((st = pool_get(&V_pf_state_pl, pool_flags)) == NULL) + goto cleanup; +#else + if (flags & PFSYNC_SI_IOCTL) + pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO; + else + pool_flags = PR_LIMITFAIL | PR_ZERO; - /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ - r->states++; + if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL) + goto cleanup; +#endif - /* fill in the rest of the state entry */ - pf_state_host_ntoh(&sp->lan, &st->lan); - pf_state_host_ntoh(&sp->gwy, &st->gwy); - pf_state_host_ntoh(&sp->ext, &st->ext); + if ((skw = pf_alloc_state_key(pool_flags)) == NULL) + goto cleanup; - pf_state_peer_ntoh(&sp->src, &st->src); - pf_state_peer_ntoh(&sp->dst, &st->dst); + if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], + &sp->key[PF_SK_STACK].addr[0], sp->af) || + PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], + &sp->key[PF_SK_STACK].addr[1], sp->af) || + sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || + sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1]) { + if ((sks = pf_alloc_state_key(pool_flags)) == NULL) + goto cleanup; + } else + sks = skw; + + /* allocate memory for scrub info */ + if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || + pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) + goto cleanup; + + /* copy to state key(s) */ + skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; + skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; + skw->port[0] = sp->key[PF_SK_WIRE].port[0]; + skw->port[1] = sp->key[PF_SK_WIRE].port[1]; + skw->proto = sp->proto; + skw->af = sp->af; + if (sks != skw) { + sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; + sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; + sks->port[0] = sp->key[PF_SK_STACK].port[0]; + sks->port[1] = sp->key[PF_SK_STACK].port[1]; + sks->proto = sp->proto; + sks->af = sp->af; + } + /* copy to state */ bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); - st->creation = time_second - ntohl(sp->creation); - st->expire = ntohl(sp->expire) + time_second; + st->creation = time_uptime - ntohl(sp->creation); + st->expire = time_second; + if (sp->expire) { + /* XXX No adaptive scaling. */ + st->expire -= r->timeout[sp->timeout] - ntohl(sp->expire); + } - st->af = sp->af; - st->proto = sp->proto; + st->expire = ntohl(sp->expire) + time_second; st->direction = sp->direction; st->log = sp->log; st->timeout = sp->timeout; @@ -471,21 +804,74 @@ pfsync_insert_net_state(struct pfsync_state *sp, u_int8_t chksum_flag) bcopy(sp->id, &st->id, sizeof(st->id)); st->creatorid = sp->creatorid; - st->sync_flags = PFSTATE_FROMSYNC; + pf_state_peer_ntoh(&sp->src, &st->src); + pf_state_peer_ntoh(&sp->dst, &st->dst); + + st->rule.ptr = r; + st->nat_rule.ptr = NULL; + st->anchor.ptr = NULL; + st->rt_kif = NULL; + + st->pfsync_time = time_uptime; + st->sync_state = PFSYNC_S_NONE; - if (pf_insert_state(kif, st)) { - pfi_kif_unref(kif, PFI_KIF_REF_NONE); + /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ + r->states_cur++; + r->states_tot++; + + if (!ISSET(flags, PFSYNC_SI_IOCTL)) + SET(st->state_flags, PFSTATE_NOSYNC); + + if ((error = pf_state_insert(kif, skw, sks, st)) != 0) { /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */ - r->states--; + r->states_cur--; + goto cleanup_state; + } + + if (!ISSET(flags, PFSYNC_SI_IOCTL)) { + CLR(st->state_flags, PFSTATE_NOSYNC); + if (ISSET(st->state_flags, PFSTATE_ACK)) { + pfsync_q_ins(st, PFSYNC_S_IACK); + schednetisr(NETISR_PFSYNC); + } + } + CLR(st->state_flags, PFSTATE_ACK); + + return (0); + +cleanup: + error = ENOMEM; + if (skw == sks) + sks = NULL; +#ifdef __FreeBSD__ + if (skw != NULL) + pool_put(&V_pf_state_key_pl, skw); + if (sks != NULL) + pool_put(&V_pf_state_key_pl, sks); +#else + if (skw != NULL) + pool_put(&pf_state_key_pl, skw); + if (sks != NULL) + pool_put(&pf_state_key_pl, sks); +#endif + +cleanup_state: /* pf_state_insert frees the state keys */ + if (st) { +#ifdef __FreeBSD__ + if (st->dst.scrub) + pool_put(&V_pf_state_scrub_pl, st->dst.scrub); + if (st->src.scrub) + pool_put(&V_pf_state_scrub_pl, st->src.scrub); + pool_put(&V_pf_state_pl, st); +#else if (st->dst.scrub) pool_put(&pf_state_scrub_pl, st->dst.scrub); if (st->src.scrub) pool_put(&pf_state_scrub_pl, st->src.scrub); pool_put(&pf_state_pl, st); - return (EINVAL); +#endif } - - return (0); + return (error); } void @@ -495,597 +881,873 @@ pfsync_input(struct mbuf *m, __unused int off) pfsync_input(struct mbuf *m, ...) #endif { - struct ip *ip = mtod(m, struct ip *); - struct pfsync_header *ph; +#ifdef __FreeBSD__ + struct pfsync_softc *sc = V_pfsyncif; +#else struct pfsync_softc *sc = pfsyncif; - struct pf_state *st; - struct pf_state_cmp key; - struct pfsync_state *sp; - struct pfsync_state_upd *up; - struct pfsync_state_del *dp; - struct pfsync_state_clr *cp; - struct pfsync_state_upd_req *rup; - struct pfsync_state_bus *bus; -#ifdef PFSYNC_TDB - struct pfsync_tdb *pt; #endif - struct in_addr src; - struct mbuf *mp; - int iplen, action, error, i, s, count, offp, sfail, stale = 0; - u_int8_t chksum_flag = 0; + struct pfsync_pkt pkt; + struct ip *ip = mtod(m, struct ip *); + struct pfsync_header *ph; + struct pfsync_subheader subh; + + int offset; + int rv; - pfsyncstats.pfsyncs_ipackets++; + V_pfsyncstats.pfsyncs_ipackets++; /* verify that we have a sync interface configured */ - if (!sc || !sc->sc_sync_ifp || !pf_status.running) +#ifdef __FreeBSD__ + if (!sc || !sc->sc_sync_if || !V_pf_status.running) +#else + if (!sc || !sc->sc_sync_if || !pf_status.running) +#endif goto done; /* verify that the packet came in on the right interface */ - if (sc->sc_sync_ifp != m->m_pkthdr.rcvif) { - pfsyncstats.pfsyncs_badif++; + if (sc->sc_sync_if != m->m_pkthdr.rcvif) { + V_pfsyncstats.pfsyncs_badif++; goto done; } - /* verify that the IP TTL is 255. */ +#ifdef __FreeBSD__ + sc->sc_ifp->if_ipackets++; + sc->sc_ifp->if_ibytes += m->m_pkthdr.len; +#else + sc->sc_if.if_ipackets++; + sc->sc_if.if_ibytes += m->m_pkthdr.len; +#endif + /* verify that the IP TTL is 255. */ if (ip->ip_ttl != PFSYNC_DFLTTL) { - pfsyncstats.pfsyncs_badttl++; + V_pfsyncstats.pfsyncs_badttl++; goto done; } - iplen = ip->ip_hl << 2; - - if (m->m_pkthdr.len < iplen + sizeof(*ph)) { - pfsyncstats.pfsyncs_hdrops++; + offset = ip->ip_hl << 2; + if (m->m_pkthdr.len < offset + sizeof(*ph)) { + V_pfsyncstats.pfsyncs_hdrops++; goto done; } - if (iplen + sizeof(*ph) > m->m_len) { - if ((m = m_pullup(m, iplen + sizeof(*ph))) == NULL) { - pfsyncstats.pfsyncs_hdrops++; - goto done; + if (offset + sizeof(*ph) > m->m_len) { + if (m_pullup(m, offset + sizeof(*ph)) == NULL) { + V_pfsyncstats.pfsyncs_hdrops++; + return; } ip = mtod(m, struct ip *); } - ph = (struct pfsync_header *)((char *)ip + iplen); + ph = (struct pfsync_header *)((char *)ip + offset); /* verify the version */ if (ph->version != PFSYNC_VERSION) { - pfsyncstats.pfsyncs_badver++; + V_pfsyncstats.pfsyncs_badver++; goto done; } - action = ph->action; - count = ph->count; - - /* make sure it's a valid action code */ - if (action >= PFSYNC_ACT_MAX) { - pfsyncstats.pfsyncs_badact++; +#if 0 + if (pfsync_input_hmac(m, offset) != 0) { + /* XXX stats */ goto done; } +#endif /* Cheaper to grab this now than having to mess with mbufs later */ - src = ip->ip_src; - - if (!bcmp(&ph->pf_chksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) - chksum_flag++; - - switch (action) { - case PFSYNC_ACT_CLR: { - struct pf_state *nexts; - struct pfi_kif *kif; - u_int32_t creatorid; - if ((mp = m_pulldown(m, iplen + sizeof(*ph), - sizeof(*cp), &offp)) == NULL) { - pfsyncstats.pfsyncs_badlen++; - return; + pkt.ip = ip; + pkt.src = ip->ip_src; + pkt.flags = 0; + +#ifdef __FreeBSD__ + if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) +#else + if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) +#endif + pkt.flags |= PFSYNC_SI_CKSUM; + + offset += sizeof(*ph); + for (;;) { + m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); + offset += sizeof(subh); + + if (subh.action >= PFSYNC_ACT_MAX) { + V_pfsyncstats.pfsyncs_badact++; + goto done; } - cp = (struct pfsync_state_clr *)(mp->m_data + offp); - creatorid = cp->creatorid; - s = splsoftnet(); + rv = (*pfsync_acts[subh.action])(&pkt, m, offset, + ntohs(subh.count)); + if (rv == -1) + return; + + offset += rv; + } + +done: + m_freem(m); +} + +int +pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) +{ + struct pfsync_clr *clr; + struct mbuf *mp; + int len = sizeof(*clr) * count; + int i, offp; + + struct pf_state *st, *nexts; + struct pf_state_key *sk, *nextsk; + struct pf_state_item *si; + u_int32_t creatorid; + int s; + + mp = m_pulldown(m, offset, len, &offp); + if (mp == NULL) { + V_pfsyncstats.pfsyncs_badlen++; + return (-1); + } + clr = (struct pfsync_clr *)(mp->m_data + offp); + + s = splsoftnet(); #ifdef __FreeBSD__ - PF_LOCK(); + PF_LOCK(); #endif - if (cp->ifname[0] == '\0') { + for (i = 0; i < count; i++) { + creatorid = clr[i].creatorid; + + if (clr[i].ifname[0] == '\0') { +#ifdef __FreeBSD__ + for (st = RB_MIN(pf_state_tree_id, &V_tree_id); + st; st = nexts) { + nexts = RB_NEXT(pf_state_tree_id, &V_tree_id, st); +#else for (st = RB_MIN(pf_state_tree_id, &tree_id); st; st = nexts) { nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); +#endif if (st->creatorid == creatorid) { - st->sync_flags |= PFSTATE_FROMSYNC; + SET(st->state_flags, PFSTATE_NOSYNC); pf_unlink_state(st); } } } else { - if ((kif = pfi_kif_get(cp->ifname)) == NULL) { + if (pfi_kif_get(clr[i].ifname) == NULL) + continue; + + /* XXX correct? */ #ifdef __FreeBSD__ - PF_UNLOCK(); + for (sk = RB_MIN(pf_state_tree, &V_pf_statetbl); +#else + for (sk = RB_MIN(pf_state_tree, &pf_statetbl); #endif - splx(s); - return; - } - for (st = RB_MIN(pf_state_tree_lan_ext, - &kif->pfik_lan_ext); st; st = nexts) { - nexts = RB_NEXT(pf_state_tree_lan_ext, - &kif->pfik_lan_ext, st); - if (st->creatorid == creatorid) { - st->sync_flags |= PFSTATE_FROMSYNC; - pf_unlink_state(st); + sk; sk = nextsk) { + nextsk = RB_NEXT(pf_state_tree, +#ifdef __FreeBSD__ + &V_pf_statetbl, sk); +#else + &pf_statetbl, sk); +#endif + TAILQ_FOREACH(si, &sk->states, entry) { + if (si->s->creatorid == creatorid) { + SET(si->s->state_flags, + PFSTATE_NOSYNC); + pf_unlink_state(si->s); + } } } } + } #ifdef __FreeBSD__ - PF_UNLOCK(); + PF_UNLOCK(); #endif - splx(s); + splx(s); - break; + return (len); +} + +int +pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) +{ + struct mbuf *mp; + struct pfsync_state *sa, *sp; + int len = sizeof(*sp) * count; + int i, offp; + + int s; + + mp = m_pulldown(m, offset, len, &offp); + if (mp == NULL) { + V_pfsyncstats.pfsyncs_badlen++; + return (-1); } - case PFSYNC_ACT_INS: - if ((mp = m_pulldown(m, iplen + sizeof(*ph), - count * sizeof(*sp), &offp)) == NULL) { - pfsyncstats.pfsyncs_badlen++; - return; - } + sa = (struct pfsync_state *)(mp->m_data + offp); - s = splsoftnet(); + s = splsoftnet(); #ifdef __FreeBSD__ - PF_LOCK(); + PF_LOCK(); #endif - for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); - i < count; i++, sp++) { - /* check for invalid values */ - if (sp->timeout >= PFTM_MAX || - sp->src.state > PF_TCPS_PROXY_DST || - sp->dst.state > PF_TCPS_PROXY_DST || - sp->direction > PF_OUT || - (sp->af != AF_INET && sp->af != AF_INET6)) { - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync_insert: PFSYNC_ACT_INS: " - "invalid value\n"); - pfsyncstats.pfsyncs_badstate++; - continue; - } + for (i = 0; i < count; i++) { + sp = &sa[i]; - if ((error = pfsync_insert_net_state(sp, - chksum_flag))) { - if (error == ENOMEM) { + /* check for invalid values */ + if (sp->timeout >= PFTM_MAX || + sp->src.state > PF_TCPS_PROXY_DST || + sp->dst.state > PF_TCPS_PROXY_DST || + sp->direction > PF_OUT || + (sp->af != AF_INET && sp->af != AF_INET6)) { #ifdef __FreeBSD__ - PF_UNLOCK(); + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else + if (pf_status.debug >= PF_DEBUG_MISC) { #endif - splx(s); - goto done; - } - continue; + printf("pfsync_input: PFSYNC5_ACT_INS: " + "invalid value\n"); } + V_pfsyncstats.pfsyncs_badval++; + continue; } + + if (pfsync_state_import(sp, pkt->flags) == ENOMEM) { + /* drop out, but process the rest of the actions */ + break; + } + } #ifdef __FreeBSD__ - PF_UNLOCK(); + PF_UNLOCK(); #endif - splx(s); - break; - case PFSYNC_ACT_UPD: - if ((mp = m_pulldown(m, iplen + sizeof(*ph), - count * sizeof(*sp), &offp)) == NULL) { - pfsyncstats.pfsyncs_badlen++; - return; - } + splx(s); + + return (len); +} - s = splsoftnet(); +int +pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) +{ + struct pfsync_ins_ack *ia, *iaa; + struct pf_state_cmp id_key; + struct pf_state *st; + + struct mbuf *mp; + int len = count * sizeof(*ia); + int offp, i; + int s; + + mp = m_pulldown(m, offset, len, &offp); + if (mp == NULL) { + V_pfsyncstats.pfsyncs_badlen++; + return (-1); + } + iaa = (struct pfsync_ins_ack *)(mp->m_data + offp); + + s = splsoftnet(); #ifdef __FreeBSD__ - PF_LOCK(); + PF_LOCK(); #endif - for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); - i < count; i++, sp++) { - int flags = PFSYNC_FLAG_STALE; - - /* check for invalid values */ - if (sp->timeout >= PFTM_MAX || - sp->src.state > PF_TCPS_PROXY_DST || - sp->dst.state > PF_TCPS_PROXY_DST) { - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync_insert: PFSYNC_ACT_UPD: " - "invalid value\n"); - pfsyncstats.pfsyncs_badstate++; - continue; - } + for (i = 0; i < count; i++) { + ia = &iaa[i]; - bcopy(sp->id, &key.id, sizeof(key.id)); - key.creatorid = sp->creatorid; + bcopy(&ia->id, &id_key.id, sizeof(id_key.id)); + id_key.creatorid = ia->creatorid; - st = pf_find_state_byid(&key); - if (st == NULL) { - /* insert the update */ - if (pfsync_insert_net_state(sp, chksum_flag)) - pfsyncstats.pfsyncs_badstate++; - continue; - } - sfail = 0; - if (st->proto == IPPROTO_TCP) { - /* - * The state should never go backwards except - * for syn-proxy states. Neither should the - * sequence window slide backwards. - */ - if (st->src.state > sp->src.state && - (st->src.state < PF_TCPS_PROXY_SRC || - sp->src.state >= PF_TCPS_PROXY_SRC)) - sfail = 1; - else if (SEQ_GT(st->src.seqlo, - ntohl(sp->src.seqlo))) - sfail = 3; - else if (st->dst.state > sp->dst.state) { - /* There might still be useful - * information about the src state here, - * so import that part of the update, - * then "fail" so we send the updated - * state back to the peer who is missing - * our what we know. */ - pf_state_peer_ntoh(&sp->src, &st->src); - /* XXX do anything with timeouts? */ - sfail = 7; - flags = 0; - } else if (st->dst.state >= TCPS_SYN_SENT && - SEQ_GT(st->dst.seqlo, ntohl(sp->dst.seqlo))) - sfail = 4; - } else { - /* - * Non-TCP protocol state machine always go - * forwards - */ - if (st->src.state > sp->src.state) - sfail = 5; - else if (st->dst.state > sp->dst.state) - sfail = 6; - } - if (sfail) { - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync: %s stale update " - "(%d) id: %016llx " - "creatorid: %08x\n", - (sfail < 7 ? "ignoring" - : "partial"), sfail, - betoh64(st->id), - ntohl(st->creatorid)); - pfsyncstats.pfsyncs_badstate++; - - if (!(sp->sync_flags & PFSTATE_STALE)) { - /* we have a better state, send it */ - if (sc->sc_mbuf != NULL && !stale) - pfsync_sendout(sc); - stale++; - if (!st->sync_flags) - pfsync_pack_state( - PFSYNC_ACT_UPD, st, flags); - } - continue; - } - pfsync_alloc_scrub_memory(&sp->dst, &st->dst); - pf_state_peer_ntoh(&sp->src, &st->src); - pf_state_peer_ntoh(&sp->dst, &st->dst); - st->expire = ntohl(sp->expire) + time_second; - st->timeout = sp->timeout; - } - if (stale && sc->sc_mbuf != NULL) - pfsync_sendout(sc); + st = pf_find_state_byid(&id_key); + if (st == NULL) + continue; + + if (ISSET(st->state_flags, PFSTATE_ACK)) + pfsync_deferred(st, 0); + } #ifdef __FreeBSD__ - PF_UNLOCK(); + PF_UNLOCK(); #endif - splx(s); - break; + splx(s); /* - * It's not strictly necessary for us to support the "uncompressed" - * delete action, but it's relatively simple and maintains consistency. + * XXX this is not yet implemented, but we know the size of the + * message so we can skip it. */ - case PFSYNC_ACT_DEL: - if ((mp = m_pulldown(m, iplen + sizeof(*ph), - count * sizeof(*sp), &offp)) == NULL) { - pfsyncstats.pfsyncs_badlen++; - return; - } - s = splsoftnet(); + return (count * sizeof(struct pfsync_ins_ack)); +} + +int +pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src, + struct pfsync_state_peer *dst) +{ + int sfail = 0; + + /* + * The state should never go backwards except + * for syn-proxy states. Neither should the + * sequence window slide backwards. + */ + if (st->src.state > src->state && + (st->src.state < PF_TCPS_PROXY_SRC || + src->state >= PF_TCPS_PROXY_SRC)) + sfail = 1; + else if (SEQ_GT(st->src.seqlo, ntohl(src->seqlo))) + sfail = 3; + else if (st->dst.state > dst->state) { + /* There might still be useful + * information about the src state here, + * so import that part of the update, + * then "fail" so we send the updated + * state back to the peer who is missing + * our what we know. */ + pf_state_peer_ntoh(src, &st->src); + /* XXX do anything with timeouts? */ + sfail = 7; + } else if (st->dst.state >= TCPS_SYN_SENT && + SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo))) + sfail = 4; + + return (sfail); +} + +int +pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) +{ + struct pfsync_state *sa, *sp; + struct pf_state_cmp id_key; + struct pf_state_key *sk; + struct pf_state *st; + int sfail; + + struct mbuf *mp; + int len = count * sizeof(*sp); + int offp, i; + int s; + + mp = m_pulldown(m, offset, len, &offp); + if (mp == NULL) { + V_pfsyncstats.pfsyncs_badlen++; + return (-1); + } + sa = (struct pfsync_state *)(mp->m_data + offp); + + s = splsoftnet(); #ifdef __FreeBSD__ - PF_LOCK(); + PF_LOCK(); #endif - for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); - i < count; i++, sp++) { - bcopy(sp->id, &key.id, sizeof(key.id)); - key.creatorid = sp->creatorid; + for (i = 0; i < count; i++) { + sp = &sa[i]; - st = pf_find_state_byid(&key); - if (st == NULL) { - pfsyncstats.pfsyncs_badstate++; - continue; + /* check for invalid values */ + if (sp->timeout >= PFTM_MAX || + sp->src.state > PF_TCPS_PROXY_DST || + sp->dst.state > PF_TCPS_PROXY_DST) { +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else + if (pf_status.debug >= PF_DEBUG_MISC) { +#endif + printf("pfsync_input: PFSYNC_ACT_UPD: " + "invalid value\n"); } - st->sync_flags |= PFSTATE_FROMSYNC; - pf_unlink_state(st); + V_pfsyncstats.pfsyncs_badval++; + continue; } + + bcopy(sp->id, &id_key.id, sizeof(id_key.id)); + id_key.creatorid = sp->creatorid; + + st = pf_find_state_byid(&id_key); + if (st == NULL) { + /* insert the update */ + if (pfsync_state_import(sp, 0)) + V_pfsyncstats.pfsyncs_badstate++; + continue; + } + + if (ISSET(st->state_flags, PFSTATE_ACK)) + pfsync_deferred(st, 1); + + sk = st->key[PF_SK_WIRE]; /* XXX right one? */ + sfail = 0; + if (sk->proto == IPPROTO_TCP) + sfail = pfsync_upd_tcp(st, &sp->src, &sp->dst); + else { + /* + * Non-TCP protocol state machine always go + * forwards + */ + if (st->src.state > sp->src.state) + sfail = 5; + else if (st->dst.state > sp->dst.state) + sfail = 6; + } + + if (sfail) { #ifdef __FreeBSD__ - PF_UNLOCK(); + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else + if (pf_status.debug >= PF_DEBUG_MISC) { #endif - splx(s); - break; - case PFSYNC_ACT_UPD_C: { - int update_requested = 0; + printf("pfsync: %s stale update (%d)" + " id: %016llx creatorid: %08x\n", + (sfail < 7 ? "ignoring" : "partial"), + sfail, betoh64(st->id), + ntohl(st->creatorid)); + } + V_pfsyncstats.pfsyncs_stale++; - if ((mp = m_pulldown(m, iplen + sizeof(*ph), - count * sizeof(*up), &offp)) == NULL) { - pfsyncstats.pfsyncs_badlen++; - return; + pfsync_update_state(st); + schednetisr(NETISR_PFSYNC); + continue; } + pfsync_alloc_scrub_memory(&sp->dst, &st->dst); + pf_state_peer_ntoh(&sp->src, &st->src); + pf_state_peer_ntoh(&sp->dst, &st->dst); + st->expire = ntohl(sp->expire) + time_second; + st->timeout = sp->timeout; + st->pfsync_time = time_uptime; + } +#ifdef __FreeBSD__ + PF_UNLOCK(); +#endif + splx(s); + + return (len); +} - s = splsoftnet(); +int +pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) +{ + struct pfsync_upd_c *ua, *up; + struct pf_state_key *sk; + struct pf_state_cmp id_key; + struct pf_state *st; + + int len = count * sizeof(*up); + int sfail; + + struct mbuf *mp; + int offp, i; + int s; + + mp = m_pulldown(m, offset, len, &offp); + if (mp == NULL) { + V_pfsyncstats.pfsyncs_badlen++; + return (-1); + } + ua = (struct pfsync_upd_c *)(mp->m_data + offp); + + s = splsoftnet(); #ifdef __FreeBSD__ - PF_LOCK(); + PF_LOCK(); #endif - for (i = 0, up = (struct pfsync_state_upd *)(mp->m_data + offp); - i < count; i++, up++) { - /* check for invalid values */ - if (up->timeout >= PFTM_MAX || - up->src.state > PF_TCPS_PROXY_DST || - up->dst.state > PF_TCPS_PROXY_DST) { - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync_insert: " - "PFSYNC_ACT_UPD_C: " - "invalid value\n"); - pfsyncstats.pfsyncs_badstate++; - continue; + for (i = 0; i < count; i++) { + up = &ua[i]; + + /* check for invalid values */ + if (up->timeout >= PFTM_MAX || + up->src.state > PF_TCPS_PROXY_DST || + up->dst.state > PF_TCPS_PROXY_DST) { +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else + if (pf_status.debug >= PF_DEBUG_MISC) { +#endif + printf("pfsync_input: " + "PFSYNC_ACT_UPD_C: " + "invalid value\n"); } + V_pfsyncstats.pfsyncs_badval++; + continue; + } - bcopy(up->id, &key.id, sizeof(key.id)); - key.creatorid = up->creatorid; + bcopy(&up->id, &id_key.id, sizeof(id_key.id)); + id_key.creatorid = up->creatorid; - st = pf_find_state_byid(&key); - if (st == NULL) { - /* We don't have this state. Ask for it. */ - error = pfsync_request_update(up, &src); - if (error == ENOMEM) { + st = pf_find_state_byid(&id_key); + if (st == NULL) { + /* We don't have this state. Ask for it. */ + pfsync_request_update(id_key.creatorid, id_key.id); + continue; + } + + if (ISSET(st->state_flags, PFSTATE_ACK)) + pfsync_deferred(st, 1); + + sk = st->key[PF_SK_WIRE]; /* XXX right one? */ + sfail = 0; + if (sk->proto == IPPROTO_TCP) + sfail = pfsync_upd_tcp(st, &up->src, &up->dst); + else { + /* + * Non-TCP protocol state machine always go forwards + */ + if (st->src.state > up->src.state) + sfail = 5; + else if (st->dst.state > up->dst.state) + sfail = 6; + } + + if (sfail) { #ifdef __FreeBSD__ - PF_UNLOCK(); + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else + if (pf_status.debug >= PF_DEBUG_MISC) { #endif - splx(s); - goto done; - } - update_requested = 1; - pfsyncstats.pfsyncs_badstate++; - continue; + printf("pfsync: ignoring stale update " + "(%d) id: %016llx " + "creatorid: %08x\n", sfail, + betoh64(st->id), + ntohl(st->creatorid)); } - sfail = 0; - if (st->proto == IPPROTO_TCP) { - /* - * The state should never go backwards except - * for syn-proxy states. Neither should the - * sequence window slide backwards. - */ - if (st->src.state > up->src.state && - (st->src.state < PF_TCPS_PROXY_SRC || - up->src.state >= PF_TCPS_PROXY_SRC)) - sfail = 1; - else if (st->dst.state > up->dst.state) - sfail = 2; - else if (SEQ_GT(st->src.seqlo, - ntohl(up->src.seqlo))) - sfail = 3; - else if (st->dst.state >= TCPS_SYN_SENT && - SEQ_GT(st->dst.seqlo, ntohl(up->dst.seqlo))) - sfail = 4; - } else { - /* - * Non-TCP protocol state machine always go - * forwards - */ - if (st->src.state > up->src.state) - sfail = 5; - else if (st->dst.state > up->dst.state) - sfail = 6; - } - if (sfail) { - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync: ignoring stale update " - "(%d) id: %016llx " - "creatorid: %08x\n", sfail, - betoh64(st->id), - ntohl(st->creatorid)); - pfsyncstats.pfsyncs_badstate++; - - /* we have a better state, send it out */ - if ((!stale || update_requested) && - sc->sc_mbuf != NULL) { - pfsync_sendout(sc); - update_requested = 0; - } - stale++; - if (!st->sync_flags) - pfsync_pack_state(PFSYNC_ACT_UPD, st, - PFSYNC_FLAG_STALE); - continue; - } - pfsync_alloc_scrub_memory(&up->dst, &st->dst); - pf_state_peer_ntoh(&up->src, &st->src); - pf_state_peer_ntoh(&up->dst, &st->dst); - st->expire = ntohl(up->expire) + time_second; - st->timeout = up->timeout; + V_pfsyncstats.pfsyncs_stale++; + + pfsync_update_state(st); + schednetisr(NETISR_PFSYNC); + continue; } - if ((update_requested || stale) && sc->sc_mbuf) - pfsync_sendout(sc); + pfsync_alloc_scrub_memory(&up->dst, &st->dst); + pf_state_peer_ntoh(&up->src, &st->src); + pf_state_peer_ntoh(&up->dst, &st->dst); + st->expire = ntohl(up->expire) + time_second; + st->timeout = up->timeout; + st->pfsync_time = time_uptime; + } #ifdef __FreeBSD__ - PF_UNLOCK(); + PF_UNLOCK(); #endif - splx(s); - break; + splx(s); + + return (len); +} + +int +pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) +{ + struct pfsync_upd_req *ur, *ura; + struct mbuf *mp; + int len = count * sizeof(*ur); + int i, offp; + + struct pf_state_cmp id_key; + struct pf_state *st; + + mp = m_pulldown(m, offset, len, &offp); + if (mp == NULL) { + V_pfsyncstats.pfsyncs_badlen++; + return (-1); } - case PFSYNC_ACT_DEL_C: - if ((mp = m_pulldown(m, iplen + sizeof(*ph), - count * sizeof(*dp), &offp)) == NULL) { - pfsyncstats.pfsyncs_badlen++; - return; - } + ura = (struct pfsync_upd_req *)(mp->m_data + offp); - s = splsoftnet(); #ifdef __FreeBSD__ - PF_LOCK(); + PF_LOCK(); #endif - for (i = 0, dp = (struct pfsync_state_del *)(mp->m_data + offp); - i < count; i++, dp++) { - bcopy(dp->id, &key.id, sizeof(key.id)); - key.creatorid = dp->creatorid; + for (i = 0; i < count; i++) { + ur = &ura[i]; + + bcopy(&ur->id, &id_key.id, sizeof(id_key.id)); + id_key.creatorid = ur->creatorid; - st = pf_find_state_byid(&key); + if (id_key.id == 0 && id_key.creatorid == 0) + pfsync_bulk_start(); + else { + st = pf_find_state_byid(&id_key); if (st == NULL) { - pfsyncstats.pfsyncs_badstate++; + V_pfsyncstats.pfsyncs_badstate++; continue; } - st->sync_flags |= PFSTATE_FROMSYNC; - pf_unlink_state(st); + if (ISSET(st->state_flags, PFSTATE_NOSYNC)) + continue; + + pfsync_update_state_req(st); } + } #ifdef __FreeBSD__ - PF_UNLOCK(); + PF_UNLOCK(); #endif - splx(s); - break; - case PFSYNC_ACT_INS_F: - case PFSYNC_ACT_DEL_F: - /* not implemented */ - break; - case PFSYNC_ACT_UREQ: - if ((mp = m_pulldown(m, iplen + sizeof(*ph), - count * sizeof(*rup), &offp)) == NULL) { - pfsyncstats.pfsyncs_badlen++; - return; - } - s = splsoftnet(); + return (len); +} + +int +pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) +{ + struct mbuf *mp; + struct pfsync_state *sa, *sp; + struct pf_state_cmp id_key; + struct pf_state *st; + int len = count * sizeof(*sp); + int offp, i; + int s; + + mp = m_pulldown(m, offset, len, &offp); + if (mp == NULL) { + V_pfsyncstats.pfsyncs_badlen++; + return (-1); + } + sa = (struct pfsync_state *)(mp->m_data + offp); + + s = splsoftnet(); #ifdef __FreeBSD__ - PF_LOCK(); + PF_LOCK(); #endif - if (sc->sc_mbuf != NULL) - pfsync_sendout(sc); - for (i = 0, - rup = (struct pfsync_state_upd_req *)(mp->m_data + offp); - i < count; i++, rup++) { - bcopy(rup->id, &key.id, sizeof(key.id)); - key.creatorid = rup->creatorid; - - if (key.id == 0 && key.creatorid == 0) { - sc->sc_ureq_received = time_uptime; - if (sc->sc_bulk_send_next == NULL) - sc->sc_bulk_send_next = - TAILQ_FIRST(&state_list); - sc->sc_bulk_terminator = sc->sc_bulk_send_next; - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync: received " - "bulk update request\n"); - pfsync_send_bus(sc, PFSYNC_BUS_START); -#ifdef __FreeBSD__ - callout_reset(&sc->sc_bulk_tmo, 1 * hz, - pfsync_bulk_update, pfsyncif); -#else - timeout_add(&sc->sc_bulk_tmo, 1 * hz); -#endif - } else { - st = pf_find_state_byid(&key); - if (st == NULL) { - pfsyncstats.pfsyncs_badstate++; - continue; - } - if (!st->sync_flags) - pfsync_pack_state(PFSYNC_ACT_UPD, - st, 0); - } + for (i = 0; i < count; i++) { + sp = &sa[i]; + + bcopy(sp->id, &id_key.id, sizeof(id_key.id)); + id_key.creatorid = sp->creatorid; + + st = pf_find_state_byid(&id_key); + if (st == NULL) { + V_pfsyncstats.pfsyncs_badstate++; + continue; } - if (sc->sc_mbuf != NULL) - pfsync_sendout(sc); + SET(st->state_flags, PFSTATE_NOSYNC); + pf_unlink_state(st); + } #ifdef __FreeBSD__ - PF_UNLOCK(); + PF_UNLOCK(); #endif - splx(s); - break; - case PFSYNC_ACT_BUS: - /* If we're not waiting for a bulk update, who cares. */ - if (sc->sc_ureq_sent == 0) - break; + splx(s); - if ((mp = m_pulldown(m, iplen + sizeof(*ph), - sizeof(*bus), &offp)) == NULL) { - pfsyncstats.pfsyncs_badlen++; - return; + return (len); +} + +int +pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) +{ + struct mbuf *mp; + struct pfsync_del_c *sa, *sp; + struct pf_state_cmp id_key; + struct pf_state *st; + int len = count * sizeof(*sp); + int offp, i; + int s; + + mp = m_pulldown(m, offset, len, &offp); + if (mp == NULL) { + V_pfsyncstats.pfsyncs_badlen++; + return (-1); + } + sa = (struct pfsync_del_c *)(mp->m_data + offp); + + s = splsoftnet(); +#ifdef __FreeBSD__ + PF_LOCK(); +#endif + for (i = 0; i < count; i++) { + sp = &sa[i]; + + bcopy(&sp->id, &id_key.id, sizeof(id_key.id)); + id_key.creatorid = sp->creatorid; + + st = pf_find_state_byid(&id_key); + if (st == NULL) { + V_pfsyncstats.pfsyncs_badstate++; + continue; } - bus = (struct pfsync_state_bus *)(mp->m_data + offp); - switch (bus->status) { - case PFSYNC_BUS_START: + + SET(st->state_flags, PFSTATE_NOSYNC); + pf_unlink_state(st); + } #ifdef __FreeBSD__ - callout_reset(&sc->sc_bulkfail_tmo, - pf_pool_limits[PF_LIMIT_STATES].limit / - (PFSYNC_BULKPACKETS * sc->sc_maxcount), - pfsync_bulkfail, pfsyncif); + PF_UNLOCK(); +#endif + splx(s); + + return (len); +} + +int +pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) +{ +#ifdef __FreeBSD__ + struct pfsync_softc *sc = V_pfsyncif; #else - timeout_add(&sc->sc_bulkfail_tmo, - pf_pool_limits[PF_LIMIT_STATES].limit / - (PFSYNC_BULKPACKETS * sc->sc_maxcount)); + struct pfsync_softc *sc = pfsyncif; #endif - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync: received bulk " - "update start\n"); - break; - case PFSYNC_BUS_END: - if (time_uptime - ntohl(bus->endtime) >= - sc->sc_ureq_sent) { - /* that's it, we're happy */ - sc->sc_ureq_sent = 0; - sc->sc_bulk_tries = 0; - timeout_del(&sc->sc_bulkfail_tmo); + struct pfsync_bus *bus; + struct mbuf *mp; + int len = count * sizeof(*bus); + int offp; + + /* If we're not waiting for a bulk update, who cares. */ + if (sc->sc_ureq_sent == 0) + return (len); + + mp = m_pulldown(m, offset, len, &offp); + if (mp == NULL) { + V_pfsyncstats.pfsyncs_badlen++; + return (-1); + } + bus = (struct pfsync_bus *)(mp->m_data + offp); + + switch (bus->status) { + case PFSYNC_BUS_START: +#ifdef __FreeBSD__ + callout_reset(&sc->sc_bulkfail_tmo, 4 * hz + + V_pf_pool_limits[PF_LIMIT_STATES].limit / + ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) / + sizeof(struct pfsync_state)), + pfsync_bulk_fail, V_pfsyncif); +#else + timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + + pf_pool_limits[PF_LIMIT_STATES].limit / + ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / + sizeof(struct pfsync_state))); +#endif +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) +#else + if (pf_status.debug >= PF_DEBUG_MISC) +#endif + printf("pfsync: received bulk update start\n"); + break; + + case PFSYNC_BUS_END: + if (time_uptime - ntohl(bus->endtime) >= + sc->sc_ureq_sent) { + /* that's it, we're happy */ + sc->sc_ureq_sent = 0; + sc->sc_bulk_tries = 0; + timeout_del(&sc->sc_bulkfail_tmo); #if NCARP > 0 - if (!pfsync_sync_ok) +#ifdef notyet #ifdef __FreeBSD__ -#ifdef CARP_ADVANCED - carp_group_demote_adj(sc->sc_ifp, -1); + if (!sc->pfsync_sync_ok) +#else + if (!pfsync_sync_ok) +#endif + carp_group_demote_adj(&sc->sc_if, -1); +#endif #endif +#ifdef __FreeBSD__ + sc->pfsync_sync_ok = 1; #else - carp_group_demote_adj(&sc->sc_if, -1); + pfsync_sync_ok = 1; #endif +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) +#else + if (pf_status.debug >= PF_DEBUG_MISC) #endif - pfsync_sync_ok = 1; - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync: received valid " - "bulk update end\n"); - } else { - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync: received invalid " - "bulk update end: bad timestamp\n"); - } - break; + printf("pfsync: received valid " + "bulk update end\n"); + } else { +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) +#else + if (pf_status.debug >= PF_DEBUG_MISC) +#endif + printf("pfsync: received invalid " + "bulk update end: bad timestamp\n"); } break; -#ifdef PFSYNC_TDB - case PFSYNC_ACT_TDB_UPD: - if ((mp = m_pulldown(m, iplen + sizeof(*ph), - count * sizeof(*pt), &offp)) == NULL) { - pfsyncstats.pfsyncs_badlen++; - return; - } - s = splsoftnet(); + } + + return (len); +} + +int +pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) +{ + int len = count * sizeof(struct pfsync_tdb); + +#if defined(IPSEC) + struct pfsync_tdb *tp; + struct mbuf *mp; + int offp; + int i; + int s; + + mp = m_pulldown(m, offset, len, &offp); + if (mp == NULL) { + V_pfsyncstats.pfsyncs_badlen++; + return (-1); + } + tp = (struct pfsync_tdb *)(mp->m_data + offp); + + s = splsoftnet(); #ifdef __FreeBSD__ - PF_LOCK(); + PF_LOCK(); #endif - for (i = 0, pt = (struct pfsync_tdb *)(mp->m_data + offp); - i < count; i++, pt++) - pfsync_update_net_tdb(pt); + for (i = 0; i < count; i++) + pfsync_update_net_tdb(&tp[i]); #ifdef __FreeBSD__ - PF_UNLOCK(); + PF_UNLOCK(); #endif - splx(s); - break; + splx(s); #endif + + return (len); +} + +#if defined(IPSEC) +/* Update an in-kernel tdb. Silently fail if no tdb is found. */ +void +pfsync_update_net_tdb(struct pfsync_tdb *pt) +{ + struct tdb *tdb; + int s; + + /* check for invalid values */ + if (ntohl(pt->spi) <= SPI_RESERVED_MAX || + (pt->dst.sa.sa_family != AF_INET && + pt->dst.sa.sa_family != AF_INET6)) + goto bad; + + s = spltdb(); + tdb = gettdb(pt->spi, &pt->dst, pt->sproto); + if (tdb) { + pt->rpl = ntohl(pt->rpl); + pt->cur_bytes = betoh64(pt->cur_bytes); + + /* Neither replay nor byte counter should ever decrease. */ + if (pt->rpl < tdb->tdb_rpl || + pt->cur_bytes < tdb->tdb_cur_bytes) { + splx(s); + goto bad; + } + + tdb->tdb_rpl = pt->rpl; + tdb->tdb_cur_bytes = pt->cur_bytes; } + splx(s); + return; -done: - if (m) - m_freem(m); +bad: +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) +#else + if (pf_status.debug >= PF_DEBUG_MISC) +#endif + printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " + "invalid value\n"); + V_pfsyncstats.pfsyncs_badstate++; + return; +} +#endif + + +int +pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) +{ + /* check if we are at the right place in the packet */ + if (offset != m->m_pkthdr.len - sizeof(struct pfsync_eof)) + V_pfsyncstats.pfsyncs_badact++; + + /* we're done. free and let the caller return */ + m_freem(m); + return (-1); +} + +int +pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) +{ + V_pfsyncstats.pfsyncs_badact++; + + m_freem(m); + return (-1); } int pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, - struct route *ro) +#ifdef __FreeBSD__ + struct route *rt) +#else + struct rtentry *rt) +#endif { m_freem(m); return (0); @@ -1103,12 +1765,15 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) struct ip_moptions *imo = &sc->sc_imo; struct pfsyncreq pfsyncr; struct ifnet *sifp; + struct ip *ip; int s, error; switch (cmd) { +#if 0 case SIOCSIFADDR: case SIOCAIFADDR: case SIOCSIFDSTADDR: +#endif case SIOCSIFFLAGS: #ifdef __FreeBSD__ if (ifp->if_flags & IFF_UP) @@ -1123,32 +1788,33 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) #endif break; case SIOCSIFMTU: - if (ifr->ifr_mtu < PFSYNC_MINMTU) + if (!sc->sc_sync_if || + ifr->ifr_mtu <= PFSYNC_MINPKT || + ifr->ifr_mtu > sc->sc_sync_if->if_mtu) return (EINVAL); - if (ifr->ifr_mtu > MCLBYTES) - ifr->ifr_mtu = MCLBYTES; - s = splnet(); + if (ifr->ifr_mtu < ifp->if_mtu) { + s = splnet(); #ifdef __FreeBSD__ - PF_LOCK(); + PF_LOCK(); #endif - if (ifr->ifr_mtu < ifp->if_mtu) - pfsync_sendout(sc); - pfsync_setmtu(sc, ifr->ifr_mtu); + pfsync_sendout(); #ifdef __FreeBSD__ - PF_UNLOCK(); + PF_UNLOCK(); #endif - splx(s); + splx(s); + } + ifp->if_mtu = ifr->ifr_mtu; break; case SIOCGETPFSYNC: bzero(&pfsyncr, sizeof(pfsyncr)); - if (sc->sc_sync_ifp) + if (sc->sc_sync_if) { strlcpy(pfsyncr.pfsyncr_syncdev, - sc->sc_sync_ifp->if_xname, IFNAMSIZ); + sc->sc_sync_if->if_xname, IFNAMSIZ); + } pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; - if ((error = copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)))) - return (error); - break; + return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))); + case SIOCSETPFSYNC: #ifdef __FreeBSD__ if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) @@ -1184,22 +1850,18 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; if (pfsyncr.pfsyncr_syncdev[0] == 0) { - sc->sc_sync_ifp = NULL; - if (sc->sc_mbuf_net != NULL) { - /* Don't keep stale pfsync packets around. */ - s = splnet(); - m_freem(sc->sc_mbuf_net); - sc->sc_mbuf_net = NULL; - sc->sc_statep_net.s = NULL; - splx(s); - } + sc->sc_sync_if = NULL; #ifdef __FreeBSD__ PF_UNLOCK(); -#endif + if (imo->imo_membership) + pfsync_multicast_cleanup(sc); +#else if (imo->imo_num_memberships > 0) { - in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); + in_delmulti(imo->imo_membership[ + --imo->imo_num_memberships]); imo->imo_multicast_ifp = NULL; } +#endif break; } @@ -1208,116 +1870,117 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) #endif if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) return (EINVAL); + #ifdef __FreeBSD__ PF_LOCK(); #endif - s = splnet(); #ifdef __FreeBSD__ if (sifp->if_mtu < sc->sc_ifp->if_mtu || #else if (sifp->if_mtu < sc->sc_if.if_mtu || #endif - (sc->sc_sync_ifp != NULL && - sifp->if_mtu < sc->sc_sync_ifp->if_mtu) || + (sc->sc_sync_if != NULL && + sifp->if_mtu < sc->sc_sync_if->if_mtu) || sifp->if_mtu < MCLBYTES - sizeof(struct ip)) - pfsync_sendout(sc); - sc->sc_sync_ifp = sifp; + pfsync_sendout(); + sc->sc_sync_if = sifp; #ifdef __FreeBSD__ - pfsync_setmtu(sc, sc->sc_ifp->if_mtu); + if (imo->imo_membership) { + PF_UNLOCK(); + pfsync_multicast_cleanup(sc); + PF_LOCK(); + } #else - pfsync_setmtu(sc, sc->sc_if.if_mtu); -#endif - if (imo->imo_num_memberships > 0) { -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif imo->imo_multicast_ifp = NULL; } +#endif - if (sc->sc_sync_ifp && #ifdef __FreeBSD__ + if (sc->sc_sync_if && sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) { + PF_UNLOCK(); + error = pfsync_multicast_setup(sc); + if (error) + return (error); + PF_LOCK(); + } #else + if (sc->sc_sync_if && sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { -#endif struct in_addr addr; - if (!(sc->sc_sync_ifp->if_flags & IFF_MULTICAST)) { - sc->sc_sync_ifp = NULL; -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif + if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) { + sc->sc_sync_if = NULL; splx(s); return (EADDRNOTAVAIL); } -#ifdef __FreeBSD__ - addr.s_addr = htonl(INADDR_PFSYNC_GROUP); -#else addr.s_addr = INADDR_PFSYNC_GROUP; -#endif -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif if ((imo->imo_membership[0] = - in_addmulti(&addr, sc->sc_sync_ifp)) == NULL) { - sc->sc_sync_ifp = NULL; + in_addmulti(&addr, sc->sc_sync_if)) == NULL) { + sc->sc_sync_if = NULL; splx(s); return (ENOBUFS); } -#ifdef __FreeBSD__ - PF_LOCK(); -#endif imo->imo_num_memberships++; - imo->imo_multicast_ifp = sc->sc_sync_ifp; + imo->imo_multicast_ifp = sc->sc_sync_if; imo->imo_multicast_ttl = PFSYNC_DFLTTL; imo->imo_multicast_loop = 0; } +#endif /* !__FreeBSD__ */ - if (sc->sc_sync_ifp || + ip = &sc->sc_template; + bzero(ip, sizeof(*ip)); + ip->ip_v = IPVERSION; + ip->ip_hl = sizeof(sc->sc_template) >> 2; + ip->ip_tos = IPTOS_LOWDELAY; + /* len and id are set later */ #ifdef __FreeBSD__ - sc->sc_sendaddr.s_addr != htonl(INADDR_PFSYNC_GROUP)) { + ip->ip_off = IP_DF; #else - sc->sc_sendaddr.s_addr != INADDR_PFSYNC_GROUP) { + ip->ip_off = htons(IP_DF); #endif + ip->ip_ttl = PFSYNC_DFLTTL; + ip->ip_p = IPPROTO_PFSYNC; + ip->ip_src.s_addr = INADDR_ANY; + ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; + + if (sc->sc_sync_if) { /* Request a full state table update. */ sc->sc_ureq_sent = time_uptime; #if NCARP > 0 - if (pfsync_sync_ok) +#ifdef notyet #ifdef __FreeBSD__ -#ifdef CARP_ADVANCED - carp_group_demote_adj(sc->sc_ifp, 1); -#endif + if (sc->pfsync_sync_ok) #else + if (pfsync_sync_ok) +#endif carp_group_demote_adj(&sc->sc_if, 1); #endif #endif +#ifdef __FreeBSD__ + sc->pfsync_sync_ok = 0; +#else pfsync_sync_ok = 0; +#endif +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) +#else if (pf_status.debug >= PF_DEBUG_MISC) +#endif printf("pfsync: requesting bulk update\n"); #ifdef __FreeBSD__ callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, - pfsync_bulkfail, pfsyncif); + pfsync_bulk_fail, V_pfsyncif); #else - timeout_add(&sc->sc_bulkfail_tmo, 5 * hz); -#endif - error = pfsync_request_update(NULL, NULL); - if (error == ENOMEM) { -#ifdef __FreeBSD__ - PF_UNLOCK(); + timeout_add_sec(&sc->sc_bulkfail_tmo, 5); #endif - splx(s); - return (ENOMEM); - } - pfsync_sendout(sc); + pfsync_request_update(0, 0); } #ifdef __FreeBSD__ PF_UNLOCK(); @@ -1333,34 +1996,165 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) return (0); } -void -pfsync_setmtu(struct pfsync_softc *sc, int mtu_req) +int +pfsync_out_state(struct pf_state *st, struct mbuf *m, int offset) +{ + struct pfsync_state *sp = (struct pfsync_state *)(m->m_data + offset); + + pfsync_state_export(sp, st); + + return (sizeof(*sp)); +} + +int +pfsync_out_iack(struct pf_state *st, struct mbuf *m, int offset) +{ + struct pfsync_ins_ack *iack = + (struct pfsync_ins_ack *)(m->m_data + offset); + + iack->id = st->id; + iack->creatorid = st->creatorid; + + return (sizeof(*iack)); +} + +int +pfsync_out_upd_c(struct pf_state *st, struct mbuf *m, int offset) { - int mtu; + struct pfsync_upd_c *up = (struct pfsync_upd_c *)(m->m_data + offset); + + bzero(up, sizeof(*up)); + up->id = st->id; + pf_state_peer_hton(&st->src, &up->src); + pf_state_peer_hton(&st->dst, &up->dst); + up->creatorid = st->creatorid; - if (sc->sc_sync_ifp && sc->sc_sync_ifp->if_mtu < mtu_req) - mtu = sc->sc_sync_ifp->if_mtu; + up->expire = pf_state_expires(st); + if (up->expire <= time_second) + up->expire = htonl(0); else - mtu = mtu_req; + up->expire = htonl(up->expire - time_second); + up->timeout = st->timeout; + + return (sizeof(*up)); +} + +int +pfsync_out_del(struct pf_state *st, struct mbuf *m, int offset) +{ + struct pfsync_del_c *dp = (struct pfsync_del_c *)(m->m_data + offset); + + dp->id = st->id; + dp->creatorid = st->creatorid; - sc->sc_maxcount = (mtu - sizeof(struct pfsync_header)) / - sizeof(struct pfsync_state); - if (sc->sc_maxcount > 254) - sc->sc_maxcount = 254; + SET(st->state_flags, PFSTATE_NOSYNC); + + return (sizeof(*dp)); +} + +void +pfsync_drop(struct pfsync_softc *sc) +{ + struct pf_state *st; + struct pfsync_upd_req_item *ur; +#ifdef notyet + struct tdb *t; +#endif + int q; + + for (q = 0; q < PFSYNC_S_COUNT; q++) { + if (TAILQ_EMPTY(&sc->sc_qs[q])) + continue; + + TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) { +#ifdef PFSYNC_DEBUG #ifdef __FreeBSD__ - sc->sc_ifp->if_mtu = sizeof(struct pfsync_header) + + KASSERT(st->sync_state == q, + ("%s: st->sync_state == q", + __FUNCTION__)); #else - sc->sc_if.if_mtu = sizeof(struct pfsync_header) + + KASSERT(st->sync_state == q); +#endif #endif - sc->sc_maxcount * sizeof(struct pfsync_state); + st->sync_state = PFSYNC_S_NONE; + } + TAILQ_INIT(&sc->sc_qs[q]); + } + + while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { + TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); + pool_put(&sc->sc_pool, ur); + } + + sc->sc_plus = NULL; + +#ifdef notyet + if (!TAILQ_EMPTY(&sc->sc_tdb_q)) { + TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) + CLR(t->tdb_flags, TDBF_PFSYNC); + + TAILQ_INIT(&sc->sc_tdb_q); + } +#endif + + sc->sc_len = PFSYNC_MINPKT; } -struct mbuf * -pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp) +#ifdef __FreeBSD__ +void pfsync_sendout() { - struct pfsync_header *h; + pfsync_sendout1(1); +} + +static void +pfsync_sendout1(int schedswi) +{ + struct pfsync_softc *sc = V_pfsyncif; +#else +void +pfsync_sendout(void) +{ + struct pfsync_softc *sc = pfsyncif; +#endif +#if NBPFILTER > 0 +#ifdef __FreeBSD__ + struct ifnet *ifp = sc->sc_ifp; +#else + struct ifnet *ifp = &sc->sc_if; +#endif +#endif struct mbuf *m; - int len; + struct ip *ip; + struct pfsync_header *ph; + struct pfsync_subheader *subh; + struct pf_state *st; + struct pfsync_upd_req_item *ur; +#ifdef notyet + struct tdb *t; +#endif +#ifdef __FreeBSD__ + size_t pktlen; +#endif + int offset; + int q, count = 0; + +#ifdef __FreeBSD__ + PF_LOCK_ASSERT(); +#else + splassert(IPL_NET); +#endif + + if (sc == NULL || sc->sc_len == PFSYNC_MINPKT) + return; + +#if NBPFILTER > 0 + if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) { +#else + if (sc->sc_sync_if == NULL) { +#endif + pfsync_drop(sc); + return; + } MGETHDR(m, M_DONTWAIT, MT_DATA); if (m == NULL) { @@ -1369,932 +2163,1293 @@ pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp) #else sc->sc_if.if_oerrors++; #endif - return (NULL); + V_pfsyncstats.pfsyncs_onomem++; + pfsync_drop(sc); + return; } - switch (action) { - case PFSYNC_ACT_CLR: - len = sizeof(struct pfsync_header) + - sizeof(struct pfsync_state_clr); - break; - case PFSYNC_ACT_UPD_C: - len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd)) + - sizeof(struct pfsync_header); - break; - case PFSYNC_ACT_DEL_C: - len = (sc->sc_maxcount * sizeof(struct pfsync_state_del)) + - sizeof(struct pfsync_header); - break; - case PFSYNC_ACT_UREQ: - len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd_req)) + - sizeof(struct pfsync_header); - break; - case PFSYNC_ACT_BUS: - len = sizeof(struct pfsync_header) + - sizeof(struct pfsync_state_bus); - break; -#ifdef PFSYNC_TDB - case PFSYNC_ACT_TDB_UPD: - len = (sc->sc_maxcount * sizeof(struct pfsync_tdb)) + - sizeof(struct pfsync_header); - break; +#ifdef __FreeBSD__ + pktlen = max_linkhdr + sc->sc_len; + if (pktlen > MHLEN) { + /* Find the right pool to allocate from. */ + /* XXX: This is ugly. */ + m_cljget(m, M_DONTWAIT, pktlen <= MCLBYTES ? MCLBYTES : +#if MJUMPAGESIZE != MCLBYTES + pktlen <= MJUMPAGESIZE ? MJUMPAGESIZE : #endif - default: - len = (sc->sc_maxcount * sizeof(struct pfsync_state)) + - sizeof(struct pfsync_header); - break; - } - - if (len > MHLEN) { - MCLGET(m, M_DONTWAIT); - if ((m->m_flags & M_EXT) == 0) { + pktlen <= MJUM9BYTES ? MJUM9BYTES : MJUM16BYTES); +#else + if (max_linkhdr + sc->sc_len > MHLEN) { + MCLGETI(m, M_DONTWAIT, NULL, max_linkhdr + sc->sc_len); +#endif + if (!ISSET(m->m_flags, M_EXT)) { m_free(m); #ifdef __FreeBSD__ sc->sc_ifp->if_oerrors++; #else sc->sc_if.if_oerrors++; #endif - return (NULL); + V_pfsyncstats.pfsyncs_onomem++; + pfsync_drop(sc); + return; } - m->m_data += (MCLBYTES - len) &~ (sizeof(long) - 1); - } else - MH_ALIGN(m, len); + } + m->m_data += max_linkhdr; + m->m_len = m->m_pkthdr.len = sc->sc_len; - m->m_pkthdr.rcvif = NULL; - m->m_pkthdr.len = m->m_len = sizeof(struct pfsync_header); - h = mtod(m, struct pfsync_header *); - h->version = PFSYNC_VERSION; - h->af = 0; - h->count = 0; - h->action = action; -#ifndef PFSYNC_TDB - if (action != PFSYNC_ACT_TDB_UPD) -#endif - bcopy(&pf_status.pf_chksum, &h->pf_chksum, - PF_MD5_DIGEST_LENGTH); + /* build the ip header */ + ip = (struct ip *)m->m_data; + bcopy(&sc->sc_template, ip, sizeof(*ip)); + offset = sizeof(*ip); - *sp = (void *)((char *)h + PFSYNC_HDRLEN); -#ifdef PFSYNC_TDB - if (action == PFSYNC_ACT_TDB_UPD) #ifdef __FreeBSD__ - callout_reset(&sc->sc_tdb_tmo, hz, pfsync_tdb_timeout, - pfsyncif); + ip->ip_len = m->m_pkthdr.len; #else - timeout_add(&sc->sc_tdb_tmo, hz); -#endif - else + ip->ip_len = htons(m->m_pkthdr.len); #endif -#ifdef __FreeBSD__ - callout_reset(&sc->sc_tmo, hz, pfsync_timeout, pfsyncif); -#else - timeout_add(&sc->sc_tmo, hz); -#endif - return (m); -} + ip->ip_id = htons(ip_randomid()); -int -pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags) -{ - struct ifnet *ifp = NULL; - struct pfsync_softc *sc = pfsyncif; - struct pfsync_header *h, *h_net; - struct pfsync_state *sp = NULL; - struct pfsync_state_upd *up = NULL; - struct pfsync_state_del *dp = NULL; - struct pf_rule *r; - u_long secs; - int s, ret = 0; - u_int8_t i = 255, newaction = 0; + /* build the pfsync header */ + ph = (struct pfsync_header *)(m->m_data + offset); + bzero(ph, sizeof(*ph)); + offset += sizeof(*ph); - if (sc == NULL) - return (0); + ph->version = PFSYNC_VERSION; + ph->len = htons(sc->sc_len - sizeof(*ip)); #ifdef __FreeBSD__ - ifp = sc->sc_ifp; + bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); #else - ifp = &sc->sc_if; + bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); #endif - /* - * If a packet falls in the forest and there's nobody around to - * hear, does it make a sound? - */ - if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL && + /* walk the queues */ + for (q = 0; q < PFSYNC_S_COUNT; q++) { + if (TAILQ_EMPTY(&sc->sc_qs[q])) + continue; + + subh = (struct pfsync_subheader *)(m->m_data + offset); + offset += sizeof(*subh); + + count = 0; + TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) { +#ifdef PFSYNC_DEBUG #ifdef __FreeBSD__ - sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) { + KASSERT(st->sync_state == q, + ("%s: st->sync_state == q", + __FUNCTION__)); #else - sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { + KASSERT(st->sync_state == q); #endif - /* Don't leave any stale pfsync packets hanging around. */ - if (sc->sc_mbuf != NULL) { - m_freem(sc->sc_mbuf); - sc->sc_mbuf = NULL; - sc->sc_statep.s = NULL; +#endif + + offset += pfsync_qs[q].write(st, m, offset); + st->sync_state = PFSYNC_S_NONE; + count++; } - return (0); + TAILQ_INIT(&sc->sc_qs[q]); + + bzero(subh, sizeof(*subh)); + subh->action = pfsync_qs[q].action; + subh->count = htons(count); } - if (action >= PFSYNC_ACT_MAX) - return (EINVAL); + if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) { + subh = (struct pfsync_subheader *)(m->m_data + offset); + offset += sizeof(*subh); - s = splnet(); -#ifdef __FreeBSD__ - PF_ASSERT(MA_OWNED); -#endif - if (sc->sc_mbuf == NULL) { - if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action, - (void *)&sc->sc_statep.s)) == NULL) { - splx(s); - return (ENOMEM); - } - h = mtod(sc->sc_mbuf, struct pfsync_header *); - } else { - h = mtod(sc->sc_mbuf, struct pfsync_header *); - if (h->action != action) { - pfsync_sendout(sc); - if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action, - (void *)&sc->sc_statep.s)) == NULL) { - splx(s); - return (ENOMEM); - } - h = mtod(sc->sc_mbuf, struct pfsync_header *); - } else { - /* - * If it's an update, look in the packet to see if - * we already have an update for the state. - */ - if (action == PFSYNC_ACT_UPD && sc->sc_maxupdates) { - struct pfsync_state *usp = - (void *)((char *)h + PFSYNC_HDRLEN); - - for (i = 0; i < h->count; i++) { - if (!memcmp(usp->id, &st->id, - PFSYNC_ID_LEN) && - usp->creatorid == st->creatorid) { - sp = usp; - sp->updates++; - break; - } - usp++; - } - } + count = 0; + while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { + TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); + + bcopy(&ur->ur_msg, m->m_data + offset, + sizeof(ur->ur_msg)); + offset += sizeof(ur->ur_msg); + + pool_put(&sc->sc_pool, ur); + + count++; } + + bzero(subh, sizeof(*subh)); + subh->action = PFSYNC_ACT_UPD_REQ; + subh->count = htons(count); } - secs = time_second; + /* has someone built a custom region for us to add? */ + if (sc->sc_plus != NULL) { + bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen); + offset += sc->sc_pluslen; - st->pfsync_time = time_uptime; + sc->sc_plus = NULL; + } - if (sp == NULL) { - /* not a "duplicate" update */ - i = 255; - sp = sc->sc_statep.s++; - sc->sc_mbuf->m_pkthdr.len = - sc->sc_mbuf->m_len += sizeof(struct pfsync_state); - h->count++; - bzero(sp, sizeof(*sp)); - - bcopy(&st->id, sp->id, sizeof(sp->id)); - sp->creatorid = st->creatorid; - - strlcpy(sp->ifname, st->u.s.kif->pfik_name, sizeof(sp->ifname)); - pf_state_host_hton(&st->lan, &sp->lan); - pf_state_host_hton(&st->gwy, &sp->gwy); - pf_state_host_hton(&st->ext, &sp->ext); - - bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); - - sp->creation = htonl(secs - st->creation); - pf_state_counter_hton(st->packets[0], sp->packets[0]); - pf_state_counter_hton(st->packets[1], sp->packets[1]); - pf_state_counter_hton(st->bytes[0], sp->bytes[0]); - pf_state_counter_hton(st->bytes[1], sp->bytes[1]); - if ((r = st->rule.ptr) == NULL) - sp->rule = htonl(-1); - else - sp->rule = htonl(r->nr); - if ((r = st->anchor.ptr) == NULL) - sp->anchor = htonl(-1); - else - sp->anchor = htonl(r->nr); - sp->af = st->af; - sp->proto = st->proto; - sp->direction = st->direction; - sp->log = st->log; - sp->state_flags = st->state_flags; - sp->timeout = st->timeout; +#ifdef notyet + if (!TAILQ_EMPTY(&sc->sc_tdb_q)) { + subh = (struct pfsync_subheader *)(m->m_data + offset); + offset += sizeof(*subh); + + count = 0; + TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) { + offset += pfsync_out_tdb(t, m, offset); + CLR(t->tdb_flags, TDBF_PFSYNC); - if (flags & PFSYNC_FLAG_STALE) - sp->sync_flags |= PFSTATE_STALE; + count++; + } + TAILQ_INIT(&sc->sc_tdb_q); + + bzero(subh, sizeof(*subh)); + subh->action = PFSYNC_ACT_TDB; + subh->count = htons(count); } +#endif - pf_state_peer_hton(&st->src, &sp->src); - pf_state_peer_hton(&st->dst, &sp->dst); + subh = (struct pfsync_subheader *)(m->m_data + offset); + offset += sizeof(*subh); - if (st->expire <= secs) - sp->expire = htonl(0); - else - sp->expire = htonl(st->expire - secs); + bzero(subh, sizeof(*subh)); + subh->action = PFSYNC_ACT_EOF; + subh->count = htons(1); - /* do we need to build "compressed" actions for network transfer? */ - if (sc->sc_sync_ifp && flags & PFSYNC_FLAG_COMPRESS) { - switch (action) { - case PFSYNC_ACT_UPD: - newaction = PFSYNC_ACT_UPD_C; - break; - case PFSYNC_ACT_DEL: - newaction = PFSYNC_ACT_DEL_C; - break; - default: - /* by default we just send the uncompressed states */ - break; - } + /* XXX write checksum in EOF here */ + + /* we're done, let's put it on the wire */ +#if NBPFILTER > 0 + if (ifp->if_bpf) { + m->m_data += sizeof(*ip); + m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip); +#ifdef __FreeBSD__ + BPF_MTAP(ifp, m); +#else + bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); +#endif + m->m_data -= sizeof(*ip); + m->m_len = m->m_pkthdr.len = sc->sc_len; } - if (newaction) { - if (sc->sc_mbuf_net == NULL) { - if ((sc->sc_mbuf_net = pfsync_get_mbuf(sc, newaction, - (void *)&sc->sc_statep_net.s)) == NULL) { - splx(s); - return (ENOMEM); - } - } - h_net = mtod(sc->sc_mbuf_net, struct pfsync_header *); - - switch (newaction) { - case PFSYNC_ACT_UPD_C: - if (i != 255) { - up = (void *)((char *)h_net + - PFSYNC_HDRLEN + (i * sizeof(*up))); - up->updates++; - } else { - h_net->count++; - sc->sc_mbuf_net->m_pkthdr.len = - sc->sc_mbuf_net->m_len += sizeof(*up); - up = sc->sc_statep_net.u++; - - bzero(up, sizeof(*up)); - bcopy(&st->id, up->id, sizeof(up->id)); - up->creatorid = st->creatorid; - } - up->timeout = st->timeout; - up->expire = sp->expire; - up->src = sp->src; - up->dst = sp->dst; - break; - case PFSYNC_ACT_DEL_C: - sc->sc_mbuf_net->m_pkthdr.len = - sc->sc_mbuf_net->m_len += sizeof(*dp); - dp = sc->sc_statep_net.d++; - h_net->count++; - - bzero(dp, sizeof(*dp)); - bcopy(&st->id, dp->id, sizeof(dp->id)); - dp->creatorid = st->creatorid; - break; - } + if (sc->sc_sync_if == NULL) { + sc->sc_len = PFSYNC_MINPKT; + m_freem(m); + return; } +#endif - if (h->count == sc->sc_maxcount || - (sc->sc_maxupdates && (sp->updates >= sc->sc_maxupdates))) - ret = pfsync_sendout(sc); +#ifdef __FreeBSD__ + sc->sc_ifp->if_opackets++; + sc->sc_ifp->if_obytes += m->m_pkthdr.len; + sc->sc_len = PFSYNC_MINPKT; - splx(s); - return (ret); + if (!_IF_QFULL(&sc->sc_ifp->if_snd)) + _IF_ENQUEUE(&sc->sc_ifp->if_snd, m); + else { + m_freem(m); + sc->sc_ifp->if_snd.ifq_drops++; + } + if (schedswi) + swi_sched(V_pfsync_swi_cookie, 0); +#else + sc->sc_if.if_opackets++; + sc->sc_if.if_obytes += m->m_pkthdr.len; + + if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL) == 0) + pfsyncstats.pfsyncs_opackets++; + else + pfsyncstats.pfsyncs_oerrors++; + + /* start again */ + sc->sc_len = PFSYNC_MINPKT; +#endif } -/* This must be called in splnet() */ -int -pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src) +void +pfsync_insert_state(struct pf_state *st) { - struct ifnet *ifp = NULL; - struct pfsync_header *h; +#ifdef __FreeBSD__ + struct pfsync_softc *sc = V_pfsyncif; +#else struct pfsync_softc *sc = pfsyncif; - struct pfsync_state_upd_req *rup; - int ret = 0; - - if (sc == NULL) - return (0); +#endif #ifdef __FreeBSD__ - ifp = sc->sc_ifp; + PF_LOCK_ASSERT(); #else - ifp = &sc->sc_if; + splassert(IPL_SOFTNET); #endif - if (sc->sc_mbuf == NULL) { - if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, - (void *)&sc->sc_statep.s)) == NULL) - return (ENOMEM); - h = mtod(sc->sc_mbuf, struct pfsync_header *); - } else { - h = mtod(sc->sc_mbuf, struct pfsync_header *); - if (h->action != PFSYNC_ACT_UREQ) { - pfsync_sendout(sc); - if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, - (void *)&sc->sc_statep.s)) == NULL) - return (ENOMEM); - h = mtod(sc->sc_mbuf, struct pfsync_header *); - } - } - if (src != NULL) - sc->sc_sendaddr = *src; - sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*rup); - h->count++; - rup = sc->sc_statep.r++; - bzero(rup, sizeof(*rup)); - if (up != NULL) { - bcopy(up->id, rup->id, sizeof(rup->id)); - rup->creatorid = up->creatorid; + if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) || + st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { + SET(st->state_flags, PFSTATE_NOSYNC); + return; } - if (h->count == sc->sc_maxcount) - ret = pfsync_sendout(sc); + if (sc == NULL || ISSET(st->state_flags, PFSTATE_NOSYNC)) + return; + +#ifdef PFSYNC_DEBUG +#ifdef __FreeBSD__ + KASSERT(st->sync_state == PFSYNC_S_NONE, + ("%s: st->sync_state == PFSYNC_S_NONE", __FUNCTION__)); +#else + KASSERT(st->sync_state == PFSYNC_S_NONE); +#endif +#endif + + if (sc->sc_len == PFSYNC_MINPKT) +#ifdef __FreeBSD__ + callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, + V_pfsyncif); +#else + timeout_add_sec(&sc->sc_tmo, 1); +#endif + + pfsync_q_ins(st, PFSYNC_S_INS); - return (ret); + if (ISSET(st->state_flags, PFSTATE_ACK)) + schednetisr(NETISR_PFSYNC); + else + st->sync_updates = 0; } +int defer = 10; + int -pfsync_clear_states(u_int32_t creatorid, char *ifname) +pfsync_defer(struct pf_state *st, struct mbuf *m) { - struct ifnet *ifp = NULL; +#ifdef __FreeBSD__ + struct pfsync_softc *sc = V_pfsyncif; +#else struct pfsync_softc *sc = pfsyncif; - struct pfsync_state_clr *cp; - int s, ret; +#endif + struct pfsync_deferral *pd; - if (sc == NULL) +#ifdef __FreeBSD__ + PF_LOCK_ASSERT(); +#else + splassert(IPL_SOFTNET); +#endif + + if (sc->sc_deferred >= 128) + pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0); + + pd = pool_get(&sc->sc_pool, M_NOWAIT); + if (pd == NULL) return (0); + sc->sc_deferred++; #ifdef __FreeBSD__ - ifp = sc->sc_ifp; + m->m_flags |= M_SKIP_FIREWALL; #else - ifp = &sc->sc_if; + m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; #endif + SET(st->state_flags, PFSTATE_ACK); + + pd->pd_st = st; + pd->pd_m = m; + + TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); #ifdef __FreeBSD__ - PF_ASSERT(MA_OWNED); + callout_init(&pd->pd_tmo, CALLOUT_MPSAFE); + callout_reset(&pd->pd_tmo, defer, pfsync_defer_tmo, + pd); +#else + timeout_set(&pd->pd_tmo, pfsync_defer_tmo, pd); + timeout_add(&pd->pd_tmo, defer); #endif - s = splnet(); - if (sc->sc_mbuf != NULL) - pfsync_sendout(sc); - if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR, - (void *)&sc->sc_statep.c)) == NULL) { - splx(s); - return (ENOMEM); - } - sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*cp); - cp = sc->sc_statep.c; - cp->creatorid = creatorid; - if (ifname != NULL) - strlcpy(cp->ifname, ifname, IFNAMSIZ); - ret = (pfsync_sendout(sc)); - splx(s); - return (ret); + return (1); } void -pfsync_timeout(void *v) +pfsync_undefer(struct pfsync_deferral *pd, int drop) { - struct pfsync_softc *sc = v; +#ifdef __FreeBSD__ + struct pfsync_softc *sc = V_pfsyncif; +#else + struct pfsync_softc *sc = pfsyncif; +#endif int s; - s = splnet(); #ifdef __FreeBSD__ - PF_LOCK(); + PF_LOCK_ASSERT(); +#else + splassert(IPL_SOFTNET); #endif - pfsync_sendout(sc); + + TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); + sc->sc_deferred--; + + CLR(pd->pd_st->state_flags, PFSTATE_ACK); + timeout_del(&pd->pd_tmo); /* bah */ + if (drop) + m_freem(pd->pd_m); + else { + s = splnet(); #ifdef __FreeBSD__ - PF_UNLOCK(); + /* XXX: use pf_defered?! */ + PF_UNLOCK(); #endif - splx(s); + ip_output(pd->pd_m, (void *)NULL, (void *)NULL, 0, + (void *)NULL, (void *)NULL); +#ifdef __FreeBSD__ + PF_LOCK(); +#endif + splx(s); + } + + pool_put(&sc->sc_pool, pd); } -#ifdef PFSYNC_TDB void -pfsync_tdb_timeout(void *v) +pfsync_defer_tmo(void *arg) { - struct pfsync_softc *sc = v; +#if defined(__FreeBSD__) && defined(VIMAGE) + struct pfsync_deferral *pd = arg; +#endif int s; - s = splnet(); + s = splsoftnet(); #ifdef __FreeBSD__ + CURVNET_SET(pd->pd_m->m_pkthdr.rcvif->if_vnet); /* XXX */ PF_LOCK(); #endif - pfsync_tdb_sendout(sc); + pfsync_undefer(arg, 0); #ifdef __FreeBSD__ PF_UNLOCK(); + CURVNET_RESTORE(); #endif splx(s); } + +void +pfsync_deferred(struct pf_state *st, int drop) +{ +#ifdef __FreeBSD__ + struct pfsync_softc *sc = V_pfsyncif; +#else + struct pfsync_softc *sc = pfsyncif; #endif + struct pfsync_deferral *pd; + + TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { + if (pd->pd_st == st) { + pfsync_undefer(pd, drop); + return; + } + } + + panic("pfsync_send_deferred: unable to find deferred state"); +} + +u_int pfsync_upds = 0; -/* This must be called in splnet() */ void -pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status) +pfsync_update_state(struct pf_state *st) { - struct pfsync_state_bus *bus; +#ifdef __FreeBSD__ + struct pfsync_softc *sc = V_pfsyncif; +#else + struct pfsync_softc *sc = pfsyncif; +#endif + int sync = 0; #ifdef __FreeBSD__ - PF_ASSERT(MA_OWNED); + PF_LOCK_ASSERT(); +#else + splassert(IPL_SOFTNET); #endif - if (sc->sc_mbuf != NULL) - pfsync_sendout(sc); - if (pfsync_sync_ok && - (sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_BUS, - (void *)&sc->sc_statep.b)) != NULL) { - sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*bus); - bus = sc->sc_statep.b; - bus->creatorid = pf_status.hostid; - bus->status = status; - bus->endtime = htonl(time_uptime - sc->sc_ureq_received); - pfsync_sendout(sc); + if (sc == NULL) + return; + + if (ISSET(st->state_flags, PFSTATE_ACK)) + pfsync_deferred(st, 0); + if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { + if (st->sync_state != PFSYNC_S_NONE) + pfsync_q_del(st); + return; + } + + if (sc->sc_len == PFSYNC_MINPKT) +#ifdef __FreeBSD__ + callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, + V_pfsyncif); +#else + timeout_add_sec(&sc->sc_tmo, 1); +#endif + + switch (st->sync_state) { + case PFSYNC_S_UPD_C: + case PFSYNC_S_UPD: + case PFSYNC_S_INS: + /* we're already handling it */ + + if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { + st->sync_updates++; + if (st->sync_updates >= sc->sc_maxupdates) + sync = 1; + } + break; + + case PFSYNC_S_IACK: + pfsync_q_del(st); + case PFSYNC_S_NONE: + pfsync_q_ins(st, PFSYNC_S_UPD_C); + st->sync_updates = 0; + break; + + default: + panic("pfsync_update_state: unexpected sync state %d", + st->sync_state); + } + + if (sync || (time_uptime - st->pfsync_time) < 2) { + pfsync_upds++; + schednetisr(NETISR_PFSYNC); } } void -pfsync_bulk_update(void *v) +pfsync_request_update(u_int32_t creatorid, u_int64_t id) { - struct pfsync_softc *sc = v; - int s, i = 0; - struct pf_state *state; - - s = splnet(); #ifdef __FreeBSD__ - PF_LOCK(); + struct pfsync_softc *sc = V_pfsyncif; +#else + struct pfsync_softc *sc = pfsyncif; #endif - if (sc->sc_mbuf != NULL) - pfsync_sendout(sc); + struct pfsync_upd_req_item *item; + size_t nlen = sizeof(struct pfsync_upd_req); + int s; + + PF_LOCK_ASSERT(); /* - * Grab at most PFSYNC_BULKPACKETS worth of states which have not - * been sent since the latest request was made. + * this code does nothing to prevent multiple update requests for the + * same state being generated. */ - state = sc->sc_bulk_send_next; - if (state) - do { - /* send state update if syncable and not already sent */ - if (!state->sync_flags - && state->timeout < PFTM_MAX - && state->pfsync_time <= sc->sc_ureq_received) { - pfsync_pack_state(PFSYNC_ACT_UPD, state, 0); - i++; - } - /* figure next state to send */ - state = TAILQ_NEXT(state, u.s.entry_list); - - /* wrap to start of list if we hit the end */ - if (!state) - state = TAILQ_FIRST(&state_list); - } while (i < sc->sc_maxcount * PFSYNC_BULKPACKETS && - state != sc->sc_bulk_terminator); - - if (!state || state == sc->sc_bulk_terminator) { - /* we're done */ - pfsync_send_bus(sc, PFSYNC_BUS_END); - sc->sc_ureq_received = 0; - sc->sc_bulk_send_next = NULL; - sc->sc_bulk_terminator = NULL; - timeout_del(&sc->sc_bulk_tmo); - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync: bulk update complete\n"); - } else { - /* look again for more in a bit */ + item = pool_get(&sc->sc_pool, PR_NOWAIT); + if (item == NULL) { + /* XXX stats */ + return; + } + + item->ur_msg.id = id; + item->ur_msg.creatorid = creatorid; + + if (TAILQ_EMPTY(&sc->sc_upd_req_list)) + nlen += sizeof(struct pfsync_subheader); + #ifdef __FreeBSD__ - callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, - pfsyncif); + if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) { #else - timeout_add(&sc->sc_bulk_tmo, 1); + if (sc->sc_len + nlen > sc->sc_if.if_mtu) { #endif - sc->sc_bulk_send_next = state; + s = splnet(); + pfsync_sendout(); + splx(s); + + nlen = sizeof(struct pfsync_subheader) + + sizeof(struct pfsync_upd_req); } - if (sc->sc_mbuf != NULL) - pfsync_sendout(sc); - splx(s); + + TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); + sc->sc_len += nlen; + + schednetisr(NETISR_PFSYNC); +} + +void +pfsync_update_state_req(struct pf_state *st) +{ #ifdef __FreeBSD__ - PF_UNLOCK(); + struct pfsync_softc *sc = V_pfsyncif; +#else + struct pfsync_softc *sc = pfsyncif; #endif + + PF_LOCK_ASSERT(); + + if (sc == NULL) + panic("pfsync_update_state_req: nonexistant instance"); + + if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { + if (st->sync_state != PFSYNC_S_NONE) + pfsync_q_del(st); + return; + } + + switch (st->sync_state) { + case PFSYNC_S_UPD_C: + case PFSYNC_S_IACK: + pfsync_q_del(st); + case PFSYNC_S_NONE: + pfsync_q_ins(st, PFSYNC_S_UPD); + schednetisr(NETISR_PFSYNC); + return; + + case PFSYNC_S_INS: + case PFSYNC_S_UPD: + case PFSYNC_S_DEL: + /* we're already handling it */ + return; + + default: + panic("pfsync_update_state_req: unexpected sync state %d", + st->sync_state); + } } void -pfsync_bulkfail(void *v) +pfsync_delete_state(struct pf_state *st) { - struct pfsync_softc *sc = v; - int s, error; - #ifdef __FreeBSD__ - PF_LOCK(); + struct pfsync_softc *sc = V_pfsyncif; +#else + struct pfsync_softc *sc = pfsyncif; #endif - if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { - /* Try again in a bit */ + #ifdef __FreeBSD__ - callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulkfail, - pfsyncif); + PF_LOCK_ASSERT(); #else - timeout_add(&sc->sc_bulkfail_tmo, 5 * hz); + splassert(IPL_SOFTNET); #endif - s = splnet(); - error = pfsync_request_update(NULL, NULL); - if (error == ENOMEM) { - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync: cannot allocate mbufs for " - "bulk update\n"); - } else - pfsync_sendout(sc); - splx(s); - } else { - /* Pretend like the transfer was ok */ - sc->sc_ureq_sent = 0; - sc->sc_bulk_tries = 0; -#if NCARP > 0 - if (!pfsync_sync_ok) + + if (sc == NULL) + return; + + if (ISSET(st->state_flags, PFSTATE_ACK)) + pfsync_deferred(st, 1); + if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { + if (st->sync_state != PFSYNC_S_NONE) + pfsync_q_del(st); + return; + } + + if (sc->sc_len == PFSYNC_MINPKT) #ifdef __FreeBSD__ -#ifdef CARP_ADVANCED - carp_group_demote_adj(sc->sc_ifp, -1); -#endif + callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, + V_pfsyncif); #else - carp_group_demote_adj(&sc->sc_if, -1); -#endif + timeout_add_sec(&sc->sc_tmo, 1); #endif - pfsync_sync_ok = 1; - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync: failed to receive " - "bulk update status\n"); - timeout_del(&sc->sc_bulkfail_tmo); + + switch (st->sync_state) { + case PFSYNC_S_INS: + /* we never got to tell the world so just forget about it */ + pfsync_q_del(st); + return; + + case PFSYNC_S_UPD_C: + case PFSYNC_S_UPD: + case PFSYNC_S_IACK: + pfsync_q_del(st); + /* FALLTHROUGH to putting it on the del list */ + + case PFSYNC_S_NONE: + pfsync_q_ins(st, PFSYNC_S_DEL); + return; + + default: + panic("pfsync_delete_state: unexpected sync state %d", + st->sync_state); } +} + +void +pfsync_clear_states(u_int32_t creatorid, const char *ifname) +{ + struct { + struct pfsync_subheader subh; + struct pfsync_clr clr; + } __packed r; + #ifdef __FreeBSD__ - PF_UNLOCK(); + struct pfsync_softc *sc = V_pfsyncif; +#else + struct pfsync_softc *sc = pfsyncif; #endif + +#ifdef __FreeBSD__ + PF_LOCK_ASSERT(); +#else + splassert(IPL_SOFTNET); +#endif + + if (sc == NULL) + return; + + bzero(&r, sizeof(r)); + + r.subh.action = PFSYNC_ACT_CLR; + r.subh.count = htons(1); + + strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); + r.clr.creatorid = creatorid; + + pfsync_send_plus(&r, sizeof(r)); } -/* This must be called in splnet() */ -int -pfsync_sendout(struct pfsync_softc *sc) +void +pfsync_q_ins(struct pf_state *st, int q) { -#if NBPFILTER > 0 #ifdef __FreeBSD__ - struct ifnet *ifp = sc->sc_ifp; + struct pfsync_softc *sc = V_pfsyncif; #else - struct ifnet *ifp = &sc->sc_if; + struct pfsync_softc *sc = pfsyncif; #endif + size_t nlen = pfsync_qs[q].len; + int s; + + PF_LOCK_ASSERT(); + +#ifdef __FreeBSD__ + KASSERT(st->sync_state == PFSYNC_S_NONE, + ("%s: st->sync_state == PFSYNC_S_NONE", __FUNCTION__)); +#else + KASSERT(st->sync_state == PFSYNC_S_NONE); #endif - struct mbuf *m; +#if 1 || defined(PFSYNC_DEBUG) + if (sc->sc_len < PFSYNC_MINPKT) #ifdef __FreeBSD__ - PF_ASSERT(MA_OWNED); + panic("pfsync pkt len is too low %zu", sc->sc_len); +#else + panic("pfsync pkt len is too low %d", sc->sc_len); #endif - timeout_del(&sc->sc_tmo); +#endif + if (TAILQ_EMPTY(&sc->sc_qs[q])) + nlen += sizeof(struct pfsync_subheader); - if (sc->sc_mbuf == NULL) - return (0); - m = sc->sc_mbuf; - sc->sc_mbuf = NULL; - sc->sc_statep.s = NULL; +#ifdef __FreeBSD__ + if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) { +#else + if (sc->sc_len + nlen > sc->sc_if.if_mtu) { +#endif + s = splnet(); + pfsync_sendout(); + splx(s); -#if NBPFILTER > 0 - if (ifp->if_bpf) + nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; + } + + sc->sc_len += nlen; + TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); + st->sync_state = q; +} + +void +pfsync_q_del(struct pf_state *st) +{ #ifdef __FreeBSD__ - BPF_MTAP(ifp, m); + struct pfsync_softc *sc = V_pfsyncif; #else - bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); + struct pfsync_softc *sc = pfsyncif; #endif + int q = st->sync_state; + +#ifdef __FreeBSD__ + KASSERT(st->sync_state != PFSYNC_S_NONE, + ("%s: st->sync_state != PFSYNC_S_NONE", __FUNCTION__)); +#else + KASSERT(st->sync_state != PFSYNC_S_NONE); #endif - if (sc->sc_mbuf_net) { - m_freem(m); - m = sc->sc_mbuf_net; - sc->sc_mbuf_net = NULL; - sc->sc_statep_net.s = NULL; + sc->sc_len -= pfsync_qs[q].len; + TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); + st->sync_state = PFSYNC_S_NONE; + + if (TAILQ_EMPTY(&sc->sc_qs[q])) + sc->sc_len -= sizeof(struct pfsync_subheader); +} + +#ifdef notyet +void +pfsync_update_tdb(struct tdb *t, int output) +{ +#ifdef __FreeBSD__ + struct pfsync_softc *sc = V_pfsyncif; +#else + struct pfsync_softc *sc = pfsyncif; +#endif + size_t nlen = sizeof(struct pfsync_tdb); + int s; + + if (sc == NULL) + return; + + if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) { + if (TAILQ_EMPTY(&sc->sc_tdb_q)) + nlen += sizeof(struct pfsync_subheader); + + if (sc->sc_len + nlen > sc->sc_if.if_mtu) { + s = splnet(); + PF_LOCK(); + pfsync_sendout(); + PF_UNLOCK(); + splx(s); + + nlen = sizeof(struct pfsync_subheader) + + sizeof(struct pfsync_tdb); + } + + sc->sc_len += nlen; + TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry); + SET(t->tdb_flags, TDBF_PFSYNC); + t->tdb_updates = 0; + } else { + if (++t->tdb_updates >= sc->sc_maxupdates) + schednetisr(NETISR_PFSYNC); } - return pfsync_sendout_mbuf(sc, m); + if (output) + SET(t->tdb_flags, TDBF_PFSYNC_RPL); + else + CLR(t->tdb_flags, TDBF_PFSYNC_RPL); } -#ifdef PFSYNC_TDB -int -pfsync_tdb_sendout(struct pfsync_softc *sc) +void +pfsync_delete_tdb(struct tdb *t) { -#if NBPFILTER > 0 #ifdef __FreeBSD__ - struct ifnet *ifp = sc->sc_ifp; + struct pfsync_softc *sc = V_pfsyncif; #else - struct ifnet *ifp = &sc->sc_if; + struct pfsync_softc *sc = pfsyncif; #endif + + if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC)) + return; + + sc->sc_len -= sizeof(struct pfsync_tdb); + TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry); + CLR(t->tdb_flags, TDBF_PFSYNC); + + if (TAILQ_EMPTY(&sc->sc_tdb_q)) + sc->sc_len -= sizeof(struct pfsync_subheader); +} + +int +pfsync_out_tdb(struct tdb *t, struct mbuf *m, int offset) +{ + struct pfsync_tdb *ut = (struct pfsync_tdb *)(m->m_data + offset); + + bzero(ut, sizeof(*ut)); + ut->spi = t->tdb_spi; + bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst)); + /* + * When a failover happens, the master's rpl is probably above + * what we see here (we may be up to a second late), so + * increase it a bit for outbound tdbs to manage most such + * situations. + * + * For now, just add an offset that is likely to be larger + * than the number of packets we can see in one second. The RFC + * just says the next packet must have a higher seq value. + * + * XXX What is a good algorithm for this? We could use + * a rate-determined increase, but to know it, we would have + * to extend struct tdb. + * XXX pt->rpl can wrap over MAXINT, but if so the real tdb + * will soon be replaced anyway. For now, just don't handle + * this edge case. + */ +#define RPL_INCR 16384 + ut->rpl = htonl(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ? + RPL_INCR : 0)); + ut->cur_bytes = htobe64(t->tdb_cur_bytes); + ut->sproto = t->tdb_sproto; + + return (sizeof(*ut)); +} #endif - struct mbuf *m; +void +pfsync_bulk_start(void) +{ #ifdef __FreeBSD__ - PF_ASSERT(MA_OWNED); + struct pfsync_softc *sc = V_pfsyncif; +#else + struct pfsync_softc *sc = pfsyncif; #endif - timeout_del(&sc->sc_tdb_tmo); - if (sc->sc_mbuf_tdb == NULL) - return (0); - m = sc->sc_mbuf_tdb; - sc->sc_mbuf_tdb = NULL; - sc->sc_statep_tdb.t = NULL; +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) +#else + if (pf_status.debug >= PF_DEBUG_MISC) +#endif + printf("pfsync: received bulk update request\n"); -#if NBPFILTER > 0 - if (ifp->if_bpf) #ifdef __FreeBSD__ - BPF_MTAP(ifp, m); + PF_LOCK_ASSERT(); + if (TAILQ_EMPTY(&V_state_list)) #else - bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); + if (TAILQ_EMPTY(&state_list)) #endif + pfsync_bulk_status(PFSYNC_BUS_END); + else { + sc->sc_ureq_received = time_uptime; + if (sc->sc_bulk_next == NULL) +#ifdef __FreeBSD__ + sc->sc_bulk_next = TAILQ_FIRST(&V_state_list); +#else + sc->sc_bulk_next = TAILQ_FIRST(&state_list); #endif + sc->sc_bulk_last = sc->sc_bulk_next; - return pfsync_sendout_mbuf(sc, m); + pfsync_bulk_status(PFSYNC_BUS_START); + callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc); + } } -#endif -int -pfsync_sendout_mbuf(struct pfsync_softc *sc, struct mbuf *m) +void +pfsync_bulk_update(void *arg) { - struct sockaddr sa; - struct ip *ip; + struct pfsync_softc *sc = arg; + struct pf_state *st = sc->sc_bulk_next; + int i = 0; + int s; + + PF_LOCK_ASSERT(); + s = splsoftnet(); #ifdef __FreeBSD__ - PF_ASSERT(MA_OWNED); + CURVNET_SET(sc->sc_ifp->if_vnet); #endif - if (sc->sc_sync_ifp || + for (;;) { + if (st->sync_state == PFSYNC_S_NONE && + st->timeout < PFTM_MAX && + st->pfsync_time <= sc->sc_ureq_received) { + pfsync_update_state_req(st); + i++; + } + + st = TAILQ_NEXT(st, entry_list); + if (st == NULL) #ifdef __FreeBSD__ - sc->sc_sync_peer.s_addr != htonl(INADDR_PFSYNC_GROUP)) { + st = TAILQ_FIRST(&V_state_list); #else - sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) { + st = TAILQ_FIRST(&state_list); #endif - M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); - if (m == NULL) { - pfsyncstats.pfsyncs_onomem++; - return (0); + + if (st == sc->sc_bulk_last) { + /* we're done */ + sc->sc_bulk_next = NULL; + sc->sc_bulk_last = NULL; + pfsync_bulk_status(PFSYNC_BUS_END); + break; } - ip = mtod(m, struct ip *); - ip->ip_v = IPVERSION; - ip->ip_hl = sizeof(*ip) >> 2; - ip->ip_tos = IPTOS_LOWDELAY; + #ifdef __FreeBSD__ - ip->ip_len = m->m_pkthdr.len; + if (i > 1 && (sc->sc_ifp->if_mtu - sc->sc_len) < #else - ip->ip_len = htons(m->m_pkthdr.len); + if (i > 1 && (sc->sc_if.if_mtu - sc->sc_len) < #endif - ip->ip_id = htons(ip_randomid()); + sizeof(struct pfsync_state)) { + /* we've filled a packet */ + sc->sc_bulk_next = st; #ifdef __FreeBSD__ - ip->ip_off = IP_DF; + callout_reset(&sc->sc_bulk_tmo, 1, + pfsync_bulk_update, sc); #else - ip->ip_off = htons(IP_DF); + timeout_add(&sc->sc_bulk_tmo, 1); #endif - ip->ip_ttl = PFSYNC_DFLTTL; - ip->ip_p = IPPROTO_PFSYNC; - ip->ip_sum = 0; + break; + } + } - bzero(&sa, sizeof(sa)); - ip->ip_src.s_addr = INADDR_ANY; +#ifdef __FreeBSD__ + CURVNET_RESTORE(); +#endif + splx(s); +} + +void +pfsync_bulk_status(u_int8_t status) +{ + struct { + struct pfsync_subheader subh; + struct pfsync_bus bus; + } __packed r; #ifdef __FreeBSD__ - if (sc->sc_sendaddr.s_addr == htonl(INADDR_PFSYNC_GROUP)) + struct pfsync_softc *sc = V_pfsyncif; #else - if (sc->sc_sendaddr.s_addr == INADDR_PFSYNC_GROUP) + struct pfsync_softc *sc = pfsyncif; #endif - m->m_flags |= M_MCAST; - ip->ip_dst = sc->sc_sendaddr; - sc->sc_sendaddr.s_addr = sc->sc_sync_peer.s_addr; - pfsyncstats.pfsyncs_opackets++; + PF_LOCK_ASSERT(); + + bzero(&r, sizeof(r)); + + r.subh.action = PFSYNC_ACT_BUS; + r.subh.count = htons(1); #ifdef __FreeBSD__ - if (!IF_HANDOFF(&sc->sc_ifq, m, NULL)) - pfsyncstats.pfsyncs_oerrors++; - taskqueue_enqueue(taskqueue_thread, &pfsyncif->sc_send_task); + r.bus.creatorid = V_pf_status.hostid; #else - if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) - pfsyncstats.pfsyncs_oerrors++; + r.bus.creatorid = pf_status.hostid; #endif - } else - m_freem(m); + r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); + r.bus.status = status; - return (0); + pfsync_send_plus(&r, sizeof(r)); } -#ifdef PFSYNC_TDB -/* Update an in-kernel tdb. Silently fail if no tdb is found. */ void -pfsync_update_net_tdb(struct pfsync_tdb *pt) +pfsync_bulk_fail(void *arg) { - struct tdb *tdb; - int s; + struct pfsync_softc *sc = arg; - /* check for invalid values */ - if (ntohl(pt->spi) <= SPI_RESERVED_MAX || - (pt->dst.sa.sa_family != AF_INET && - pt->dst.sa.sa_family != AF_INET6)) - goto bad; +#ifdef __FreeBSD__ + CURVNET_SET(sc->sc_ifp->if_vnet); +#endif - s = spltdb(); - tdb = gettdb(pt->spi, &pt->dst, pt->sproto); - if (tdb) { - pt->rpl = ntohl(pt->rpl); - pt->cur_bytes = betoh64(pt->cur_bytes); + if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { + /* Try again */ +#ifdef __FreeBSD__ + callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, + pfsync_bulk_fail, V_pfsyncif); +#else + timeout_add_sec(&sc->sc_bulkfail_tmo, 5); +#endif + PF_LOCK(); + pfsync_request_update(0, 0); + PF_UNLOCK(); + } else { + /* Pretend like the transfer was ok */ + sc->sc_ureq_sent = 0; + sc->sc_bulk_tries = 0; +#if NCARP > 0 +#ifdef notyet +#ifdef __FreeBSD__ + if (!sc->pfsync_sync_ok) +#else + if (!pfsync_sync_ok) +#endif + carp_group_demote_adj(&sc->sc_if, -1); +#endif +#endif +#ifdef __FreeBSD__ + sc->pfsync_sync_ok = 1; +#else + pfsync_sync_ok = 1; +#endif +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) +#else + if (pf_status.debug >= PF_DEBUG_MISC) +#endif + printf("pfsync: failed to receive bulk update\n"); + } - /* Neither replay nor byte counter should ever decrease. */ - if (pt->rpl < tdb->tdb_rpl || - pt->cur_bytes < tdb->tdb_cur_bytes) { - splx(s); - goto bad; - } +#ifdef __FreeBSD__ + CURVNET_RESTORE(); +#endif +} - tdb->tdb_rpl = pt->rpl; - tdb->tdb_cur_bytes = pt->cur_bytes; +void +pfsync_send_plus(void *plus, size_t pluslen) +{ +#ifdef __FreeBSD__ + struct pfsync_softc *sc = V_pfsyncif; +#else + struct pfsync_softc *sc = pfsyncif; +#endif + int s; + + PF_LOCK_ASSERT(); + +#ifdef __FreeBSD__ + if (sc->sc_len + pluslen > sc->sc_ifp->if_mtu) { +#else + if (sc->sc_len + pluslen > sc->sc_if.if_mtu) { +#endif + s = splnet(); + pfsync_sendout(); + splx(s); } + + sc->sc_plus = plus; + sc->sc_len += (sc->sc_pluslen = pluslen); + + s = splnet(); + pfsync_sendout(); splx(s); - return; +} - bad: - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " - "invalid value\n"); - pfsyncstats.pfsyncs_badstate++; - return; +int +pfsync_up(void) +{ +#ifdef __FreeBSD__ + struct pfsync_softc *sc = V_pfsyncif; +#else + struct pfsync_softc *sc = pfsyncif; +#endif + +#ifdef __FreeBSD__ + if (sc == NULL || !ISSET(sc->sc_ifp->if_flags, IFF_DRV_RUNNING)) +#else + if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) +#endif + return (0); + + return (1); } -/* One of our local tdbs have been updated, need to sync rpl with others */ int -pfsync_update_tdb(struct tdb *tdb, int output) +pfsync_state_in_use(struct pf_state *st) { - struct ifnet *ifp = NULL; +#ifdef __FreeBSD__ + struct pfsync_softc *sc = V_pfsyncif; +#else struct pfsync_softc *sc = pfsyncif; - struct pfsync_header *h; - struct pfsync_tdb *pt = NULL; - int s, i, ret; +#endif if (sc == NULL) return (0); + if (st->sync_state != PFSYNC_S_NONE || + st == sc->sc_bulk_next || + st == sc->sc_bulk_last) + return (1); + + return (0); +} + +u_int pfsync_ints; +u_int pfsync_tmos; + +void +pfsync_timeout(void *arg) +{ +#if defined(__FreeBSD__) && defined(VIMAGE) + struct pfsync_softc *sc = arg; +#endif + int s; + #ifdef __FreeBSD__ - ifp = sc->sc_ifp; -#else - ifp = &sc->sc_if; + CURVNET_SET(sc->sc_ifp->if_vnet); #endif - if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL && + + pfsync_tmos++; + + s = splnet(); #ifdef __FreeBSD__ - sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) { -#else - sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { + PF_LOCK(); #endif - /* Don't leave any stale pfsync packets hanging around. */ - if (sc->sc_mbuf_tdb != NULL) { - m_freem(sc->sc_mbuf_tdb); - sc->sc_mbuf_tdb = NULL; - sc->sc_statep_tdb.t = NULL; - } - return (0); - } + pfsync_sendout(); +#ifdef __FreeBSD__ + PF_UNLOCK(); +#endif + splx(s); #ifdef __FreeBSD__ - PF_ASSERT(MA_OWNED); + CURVNET_RESTORE(); #endif +} + +/* this is a softnet/netisr handler */ +void +#ifdef __FreeBSD__ +pfsyncintr(void *arg) +{ + struct pfsync_softc *sc = arg; + struct mbuf *m, *n; + + CURVNET_SET(sc->sc_ifp->if_vnet); + pfsync_ints++; + + PF_LOCK(); + if (sc->sc_len > PFSYNC_MINPKT) + pfsync_sendout1(0); + _IF_DEQUEUE_ALL(&sc->sc_ifp->if_snd, m); + PF_UNLOCK(); + + for (; m != NULL; m = n) { + + n = m->m_nextpkt; + m->m_nextpkt = NULL; + if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL) + == 0) + V_pfsyncstats.pfsyncs_opackets++; + else + V_pfsyncstats.pfsyncs_oerrors++; + } + CURVNET_RESTORE(); +} +#else +pfsyncintr(void) +{ + int s; + + pfsync_ints++; + s = splnet(); - if (sc->sc_mbuf_tdb == NULL) { - if ((sc->sc_mbuf_tdb = pfsync_get_mbuf(sc, PFSYNC_ACT_TDB_UPD, - (void *)&sc->sc_statep_tdb.t)) == NULL) { - splx(s); - return (ENOMEM); - } - h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *); - } else { - h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *); - if (h->action != PFSYNC_ACT_TDB_UPD) { - /* - * XXX will never happen as long as there's - * only one "TDB action". - */ - pfsync_tdb_sendout(sc); - sc->sc_mbuf_tdb = pfsync_get_mbuf(sc, - PFSYNC_ACT_TDB_UPD, (void *)&sc->sc_statep_tdb.t); - if (sc->sc_mbuf_tdb == NULL) { - splx(s); - return (ENOMEM); - } - h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *); - } else if (sc->sc_maxupdates) { - /* - * If it's an update, look in the packet to see if - * we already have an update for the state. - */ - struct pfsync_tdb *u = - (void *)((char *)h + PFSYNC_HDRLEN); - - for (i = 0; !pt && i < h->count; i++) { - if (tdb->tdb_spi == u->spi && - tdb->tdb_sproto == u->sproto && - !bcmp(&tdb->tdb_dst, &u->dst, - SA_LEN(&u->dst.sa))) { - pt = u; - pt->updates++; - } - u++; - } - } + pfsync_sendout(); + splx(s); +} +#endif + +int +pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, + size_t newlen) +{ + +#ifdef notyet + /* All sysctl names at this level are terminal. */ + if (namelen != 1) + return (ENOTDIR); + + switch (name[0]) { + case PFSYNCCTL_STATS: + if (newp != NULL) + return (EPERM); + return (sysctl_struct(oldp, oldlenp, newp, newlen, + &V_pfsyncstats, sizeof(V_pfsyncstats))); } +#endif + return (ENOPROTOOPT); +} - if (pt == NULL) { - /* not a "duplicate" update */ - pt = sc->sc_statep_tdb.t++; - sc->sc_mbuf_tdb->m_pkthdr.len = - sc->sc_mbuf_tdb->m_len += sizeof(struct pfsync_tdb); - h->count++; - bzero(pt, sizeof(*pt)); +#ifdef __FreeBSD__ +static int +pfsync_multicast_setup(struct pfsync_softc *sc) +{ + struct ip_moptions *imo = &sc->sc_imo; + int error; - pt->spi = tdb->tdb_spi; - memcpy(&pt->dst, &tdb->tdb_dst, sizeof pt->dst); - pt->sproto = tdb->tdb_sproto; + if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) { + sc->sc_sync_if = NULL; + return (EADDRNOTAVAIL); } - /* - * When a failover happens, the master's rpl is probably above - * what we see here (we may be up to a second late), so - * increase it a bit for outbound tdbs to manage most such - * situations. - * - * For now, just add an offset that is likely to be larger - * than the number of packets we can see in one second. The RFC - * just says the next packet must have a higher seq value. - * - * XXX What is a good algorithm for this? We could use - * a rate-determined increase, but to know it, we would have - * to extend struct tdb. - * XXX pt->rpl can wrap over MAXINT, but if so the real tdb - * will soon be replaced anyway. For now, just don't handle - * this edge case. - */ -#define RPL_INCR 16384 - pt->rpl = htonl(tdb->tdb_rpl + (output ? RPL_INCR : 0)); - pt->cur_bytes = htobe64(tdb->tdb_cur_bytes); + imo->imo_membership = (struct in_multi **)malloc( + (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_PFSYNC, + M_WAITOK | M_ZERO); + imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; + imo->imo_multicast_vif = -1; - if (h->count == sc->sc_maxcount || - (sc->sc_maxupdates && (pt->updates >= sc->sc_maxupdates))) - ret = pfsync_tdb_sendout(sc); + if ((error = in_joingroup(sc->sc_sync_if, &sc->sc_sync_peer, NULL, + &imo->imo_membership[0])) != 0) { + free(imo->imo_membership, M_PFSYNC); + return (error); + } + imo->imo_num_memberships++; + imo->imo_multicast_ifp = sc->sc_sync_if; + imo->imo_multicast_ttl = PFSYNC_DFLTTL; + imo->imo_multicast_loop = 0; - splx(s); - return (ret); + return (0); } -#endif /* PFSYNC_TDB */ -#ifdef __FreeBSD__ -void -pfsync_ifdetach(void *arg, struct ifnet *ifp) +static void +pfsync_multicast_cleanup(struct pfsync_softc *sc) { - struct pfsync_softc *sc = (struct pfsync_softc *)arg; - struct ip_moptions *imo; + struct ip_moptions *imo = &sc->sc_imo; + + in_leavegroup(imo->imo_membership[0], NULL); + free(imo->imo_membership, M_PFSYNC); + imo->imo_membership = NULL; + imo->imo_multicast_ifp = NULL; +} - if (sc == NULL || sc->sc_sync_ifp != ifp) - return; /* not for us; unlocked read */ +#ifdef INET +extern struct domain inetdomain; +static struct protosw in_pfsync_protosw = { + .pr_type = SOCK_RAW, + .pr_domain = &inetdomain, + .pr_protocol = IPPROTO_PFSYNC, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = pfsync_input, + .pr_output = (pr_output_t *)rip_output, + .pr_ctloutput = rip_ctloutput, + .pr_usrreqs = &rip_usrreqs +}; +#endif +static int +pfsync_init() +{ + VNET_ITERATOR_DECL(vnet_iter); + int error = 0; + + VNET_LIST_RLOCK(); + VNET_FOREACH(vnet_iter) { + CURVNET_SET(vnet_iter); + V_pfsync_cloner = pfsync_cloner; + V_pfsync_cloner_data = pfsync_cloner_data; + V_pfsync_cloner.ifc_data = &V_pfsync_cloner_data; + if_clone_attach(&V_pfsync_cloner); + error = swi_add(NULL, "pfsync", pfsyncintr, V_pfsyncif, + SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie); + CURVNET_RESTORE(); + if (error) + goto fail_locked; + } + VNET_LIST_RUNLOCK(); +#ifdef INET + error = pf_proto_register(PF_INET, &in_pfsync_protosw); + if (error) + goto fail; + error = ipproto_register(IPPROTO_PFSYNC); + if (error) { + pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW); + goto fail; + } +#endif PF_LOCK(); + pfsync_state_import_ptr = pfsync_state_import; + pfsync_up_ptr = pfsync_up; + pfsync_insert_state_ptr = pfsync_insert_state; + pfsync_update_state_ptr = pfsync_update_state; + pfsync_delete_state_ptr = pfsync_delete_state; + pfsync_clear_states_ptr = pfsync_clear_states; + pfsync_state_in_use_ptr = pfsync_state_in_use; + pfsync_defer_ptr = pfsync_defer; + PF_UNLOCK(); - /* Deal with a member interface going away from under us. */ - sc->sc_sync_ifp = NULL; - if (sc->sc_mbuf_net != NULL) { - m_freem(sc->sc_mbuf_net); - sc->sc_mbuf_net = NULL; - sc->sc_statep_net.s = NULL; - } - imo = &sc->sc_imo; - if (imo->imo_num_memberships > 0) { - KASSERT(imo->imo_num_memberships == 1, - ("%s: imo_num_memberships != 1", __func__)); - /* - * Our event handler is always called after protocol - * domains have been detached from the underlying ifnet. - * Do not call in_delmulti(); we held a single reference - * which the protocol domain has purged in in_purgemaddrs(). - */ - PF_UNLOCK(); - imo->imo_membership[--imo->imo_num_memberships] = NULL; - PF_LOCK(); - imo->imo_multicast_ifp = NULL; + return (0); + +fail: + VNET_LIST_RLOCK(); +fail_locked: + VNET_FOREACH(vnet_iter) { + CURVNET_SET(vnet_iter); + if (V_pfsync_swi_cookie) { + swi_remove(V_pfsync_swi_cookie); + if_clone_detach(&V_pfsync_cloner); + } + CURVNET_RESTORE(); } + VNET_LIST_RUNLOCK(); - PF_UNLOCK(); + return (error); } -void -pfsync_senddef(void *arg, __unused int pending) +static void +pfsync_uninit() { - struct pfsync_softc *sc = (struct pfsync_softc *)arg; - struct mbuf *m; + VNET_ITERATOR_DECL(vnet_iter); - for(;;) { - IF_DEQUEUE(&sc->sc_ifq, m); - if (m == NULL) - break; - /* Deal with a member interface going away from under us. */ - if (sc->sc_sync_ifp == NULL) { - pfsyncstats.pfsyncs_oerrors++; - m_freem(m); - continue; - } - if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) - pfsyncstats.pfsyncs_oerrors++; + PF_LOCK(); + pfsync_state_import_ptr = NULL; + pfsync_up_ptr = NULL; + pfsync_insert_state_ptr = NULL; + pfsync_update_state_ptr = NULL; + pfsync_delete_state_ptr = NULL; + pfsync_clear_states_ptr = NULL; + pfsync_state_in_use_ptr = NULL; + pfsync_defer_ptr = NULL; + PF_UNLOCK(); + + ipproto_unregister(IPPROTO_PFSYNC); + pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW); + VNET_LIST_RLOCK(); + VNET_FOREACH(vnet_iter) { + CURVNET_SET(vnet_iter); + swi_remove(V_pfsync_swi_cookie); + if_clone_detach(&V_pfsync_cloner); + CURVNET_RESTORE(); } + VNET_LIST_RUNLOCK(); } static int @@ -2304,17 +3459,23 @@ pfsync_modevent(module_t mod, int type, void *data) switch (type) { case MOD_LOAD: - pfsyncattach(0); + error = pfsync_init(); + break; + case MOD_QUIESCE: + /* + * Module should not be unloaded due to race conditions. + */ + error = EPERM; break; case MOD_UNLOAD: - if_clone_detach(&pfsync_cloner); + pfsync_uninit(); break; default: error = EINVAL; break; } - return error; + return (error); } static moduledata_t pfsync_mod = { @@ -2325,7 +3486,7 @@ static moduledata_t pfsync_mod = { #define PFSYNC_MODVER 1 -DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); +DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); MODULE_VERSION(pfsync, PFSYNC_MODVER); -MODULE_DEPEND(pflog, pf, PF_MODVER, PF_MODVER, PF_MODVER); +MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER); #endif /* __FreeBSD__ */ diff --git a/freebsd/sys/contrib/pf/net/if_pfsync.h b/freebsd/sys/contrib/pf/net/if_pfsync.h index f306610f..17259b78 100644 --- a/freebsd/sys/contrib/pf/net/if_pfsync.h +++ b/freebsd/sys/contrib/pf/net/if_pfsync.h @@ -1,5 +1,4 @@ -/* $FreeBSD$ */ -/* $OpenBSD: if_pfsync.h,v 1.30 2006/10/31 14:49:01 henning Exp $ */ +/* $OpenBSD: if_pfsync.h,v 1.35 2008/06/29 08:42:15 mcbride Exp $ */ /* * Copyright (c) 2001 Michael Shalayeff @@ -27,227 +26,217 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ +/* + * Copyright (c) 2008 David Gwynne <dlg@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + #ifndef _NET_IF_PFSYNC_H_ -#define _NET_IF_PFSYNC_H_ +#define _NET_IF_PFSYNC_H_ +#define PFSYNC_VERSION 5 +#define PFSYNC_DFLTTL 255 -#define PFSYNC_ID_LEN sizeof(u_int64_t) +#define PFSYNC_ACT_CLR 0 /* clear all states */ +#define PFSYNC_ACT_INS 1 /* insert state */ +#define PFSYNC_ACT_INS_ACK 2 /* ack of insterted state */ +#define PFSYNC_ACT_UPD 3 /* update state */ +#define PFSYNC_ACT_UPD_C 4 /* "compressed" update state */ +#define PFSYNC_ACT_UPD_REQ 5 /* request "uncompressed" state */ +#define PFSYNC_ACT_DEL 6 /* delete state */ +#define PFSYNC_ACT_DEL_C 7 /* "compressed" delete state */ +#define PFSYNC_ACT_INS_F 8 /* insert fragment */ +#define PFSYNC_ACT_DEL_F 9 /* delete fragments */ +#define PFSYNC_ACT_BUS 10 /* bulk update status */ +#define PFSYNC_ACT_TDB 11 /* TDB replay counter update */ +#define PFSYNC_ACT_EOF 12 /* end of frame */ +#define PFSYNC_ACT_MAX 13 + +#define PFSYNC_ACTIONS "CLR ST", \ + "INS ST", \ + "INS ST ACK", \ + "UPD ST", \ + "UPD ST COMP", \ + "UPD ST REQ", \ + "DEL ST", \ + "DEL ST COMP", \ + "INS FR", \ + "DEL FR", \ + "BULK UPD STAT", \ + "TDB UPD", \ + "EOF" + +#define PFSYNC_HMAC_LEN 20 -struct pfsync_state_scrub { - u_int16_t pfss_flags; - u_int8_t pfss_ttl; /* stashed TTL */ -#define PFSYNC_SCRUB_FLAG_VALID 0x01 - u_int8_t scrub_flag; - u_int32_t pfss_ts_mod; /* timestamp modulation */ -} __packed; +/* + * A pfsync frame is built from a header followed by several sections which + * are all prefixed with their own subheaders. Frames must be terminated with + * an EOF subheader. + * + * | ... | + * | IP header | + * +============================+ + * | pfsync_header | + * +----------------------------+ + * | pfsync_subheader | + * +----------------------------+ + * | first action fields | + * | ... | + * +----------------------------+ + * | pfsync_subheader | + * +----------------------------+ + * | second action fields | + * | ... | + * +----------------------------+ + * | EOF pfsync_subheader | + * +----------------------------+ + * | HMAC | + * +============================+ + */ -struct pfsync_state_host { - struct pf_addr addr; - u_int16_t port; - u_int16_t pad[3]; +/* + * Frame header + */ + +struct pfsync_header { + u_int8_t version; + u_int8_t _pad; + u_int16_t len; + u_int8_t pfcksum[PF_MD5_DIGEST_LENGTH]; } __packed; -struct pfsync_state_peer { - struct pfsync_state_scrub scrub; /* state is scrubbed */ - u_int32_t seqlo; /* Max sequence number sent */ - u_int32_t seqhi; /* Max the other end ACKd + win */ - u_int32_t seqdiff; /* Sequence number modulator */ - u_int16_t max_win; /* largest window (pre scaling) */ - u_int16_t mss; /* Maximum segment size option */ - u_int8_t state; /* active state level */ - u_int8_t wscale; /* window scaling factor */ - u_int8_t pad[6]; +/* + * Frame region subheader + */ + +struct pfsync_subheader { + u_int8_t action; + u_int8_t _pad; + u_int16_t count; } __packed; -struct pfsync_state { - u_int32_t id[2]; - char ifname[IFNAMSIZ]; - struct pfsync_state_host lan; - struct pfsync_state_host gwy; - struct pfsync_state_host ext; - struct pfsync_state_peer src; - struct pfsync_state_peer dst; - struct pf_addr rt_addr; - u_int32_t rule; - u_int32_t anchor; - u_int32_t nat_rule; - u_int32_t creation; - u_int32_t expire; - u_int32_t packets[2][2]; - u_int32_t bytes[2][2]; - u_int32_t creatorid; - sa_family_t af; - u_int8_t proto; - u_int8_t direction; - u_int8_t log; - u_int8_t state_flags; - u_int8_t timeout; - u_int8_t sync_flags; - u_int8_t updates; +/* + * CLR + */ + +struct pfsync_clr { + char ifname[IFNAMSIZ]; + u_int32_t creatorid; } __packed; -#define PFSYNC_FLAG_COMPRESS 0x01 -#define PFSYNC_FLAG_STALE 0x02 +/* + * INS, UPD, DEL + */ + +/* these use struct pfsync_state in pfvar.h */ -#ifdef PFSYNC_TDB -struct pfsync_tdb { - u_int32_t spi; - union sockaddr_union dst; - u_int32_t rpl; - u_int64_t cur_bytes; - u_int8_t sproto; - u_int8_t updates; - u_int8_t pad[2]; +/* + * INS_ACK + */ + +struct pfsync_ins_ack { + u_int64_t id; + u_int32_t creatorid; } __packed; -#endif -struct pfsync_state_upd { - u_int32_t id[2]; +/* + * UPD_C + */ + +struct pfsync_upd_c { + u_int64_t id; struct pfsync_state_peer src; struct pfsync_state_peer dst; - u_int32_t creatorid; - u_int32_t expire; - u_int8_t timeout; - u_int8_t updates; - u_int8_t pad[6]; + u_int32_t creatorid; + u_int32_t expire; + u_int8_t timeout; + u_int8_t _pad[3]; } __packed; -struct pfsync_state_del { - u_int32_t id[2]; - u_int32_t creatorid; - struct { - u_int8_t state; - } src; - struct { - u_int8_t state; - } dst; - u_int8_t pad[2]; -} __packed; +/* + * UPD_REQ + */ -struct pfsync_state_upd_req { - u_int32_t id[2]; - u_int32_t creatorid; - u_int32_t pad; +struct pfsync_upd_req { + u_int64_t id; + u_int32_t creatorid; } __packed; -struct pfsync_state_clr { - char ifname[IFNAMSIZ]; - u_int32_t creatorid; - u_int32_t pad; -} __packed; +/* + * DEL_C + */ -struct pfsync_state_bus { - u_int32_t creatorid; - u_int32_t endtime; - u_int8_t status; -#define PFSYNC_BUS_START 1 -#define PFSYNC_BUS_END 2 - u_int8_t pad[7]; +struct pfsync_del_c { + u_int64_t id; + u_int32_t creatorid; } __packed; -#ifdef _KERNEL - -union sc_statep { - struct pfsync_state *s; - struct pfsync_state_upd *u; - struct pfsync_state_del *d; - struct pfsync_state_clr *c; - struct pfsync_state_bus *b; - struct pfsync_state_upd_req *r; -}; +/* + * INS_F, DEL_F + */ -#ifdef PFSYNC_TDB -union sc_tdb_statep { - struct pfsync_tdb *t; -}; -#endif +/* not implemented (yet) */ -extern int pfsync_sync_ok; +/* + * BUS + */ -struct pfsync_softc { -#ifdef __FreeBSD__ - struct ifnet *sc_ifp; -#else - struct ifnet sc_if; -#endif - struct ifnet *sc_sync_ifp; +struct pfsync_bus { + u_int32_t creatorid; + u_int32_t endtime; + u_int8_t status; +#define PFSYNC_BUS_START 1 +#define PFSYNC_BUS_END 2 + u_int8_t _pad[3]; +} __packed; - struct ip_moptions sc_imo; -#ifdef __FreeBSD__ - struct callout sc_tmo; -#ifdef PFSYNC_TDB - struct callout sc_tdb_tmo; -#endif - struct callout sc_bulk_tmo; - struct callout sc_bulkfail_tmo; -#else - struct timeout sc_tmo; - struct timeout sc_tdb_tmo; - struct timeout sc_bulk_tmo; - struct timeout sc_bulkfail_tmo; -#endif - struct in_addr sc_sync_peer; - struct in_addr sc_sendaddr; - struct mbuf *sc_mbuf; /* current cumulative mbuf */ - struct mbuf *sc_mbuf_net; /* current cumulative mbuf */ -#ifdef PFSYNC_TDB - struct mbuf *sc_mbuf_tdb; /* dito for TDB updates */ -#endif -#ifdef __FreeBSD__ - struct ifqueue sc_ifq; - struct task sc_send_task; -#endif - union sc_statep sc_statep; - union sc_statep sc_statep_net; -#ifdef PFSYNC_TDB - union sc_tdb_statep sc_statep_tdb; -#endif - u_int32_t sc_ureq_received; - u_int32_t sc_ureq_sent; - struct pf_state *sc_bulk_send_next; - struct pf_state *sc_bulk_terminator; - int sc_bulk_tries; - int sc_maxcount; /* number of states in mtu */ - int sc_maxupdates; /* number of updates/state */ -#ifdef __FreeBSD__ - eventhandler_tag sc_detachtag; -#endif -}; +/* + * TDB + */ -extern struct pfsync_softc *pfsyncif; -#endif +struct pfsync_tdb { + u_int32_t spi; + union sockaddr_union dst; + u_int32_t rpl; + u_int64_t cur_bytes; + u_int8_t sproto; + u_int8_t updates; + u_int8_t _pad[2]; +} __packed; +/* + * EOF + */ -struct pfsync_header { - u_int8_t version; -#define PFSYNC_VERSION 3 - u_int8_t af; - u_int8_t action; -#define PFSYNC_ACT_CLR 0 /* clear all states */ -#define PFSYNC_ACT_INS 1 /* insert state */ -#define PFSYNC_ACT_UPD 2 /* update state */ -#define PFSYNC_ACT_DEL 3 /* delete state */ -#define PFSYNC_ACT_UPD_C 4 /* "compressed" state update */ -#define PFSYNC_ACT_DEL_C 5 /* "compressed" state delete */ -#define PFSYNC_ACT_INS_F 6 /* insert fragment */ -#define PFSYNC_ACT_DEL_F 7 /* delete fragments */ -#define PFSYNC_ACT_UREQ 8 /* request "uncompressed" state */ -#define PFSYNC_ACT_BUS 9 /* Bulk Update Status */ -#define PFSYNC_ACT_TDB_UPD 10 /* TDB replay counter update */ -#define PFSYNC_ACT_MAX 11 - u_int8_t count; - u_int8_t pf_chksum[PF_MD5_DIGEST_LENGTH]; +struct pfsync_eof { + u_int8_t hmac[PFSYNC_HMAC_LEN]; } __packed; -#define PFSYNC_BULKPACKETS 1 /* # of packets per timeout */ -#define PFSYNC_MAX_BULKTRIES 12 -#define PFSYNC_HDRLEN sizeof(struct pfsync_header) -#define PFSYNC_ACTIONS \ - "CLR ST", "INS ST", "UPD ST", "DEL ST", \ - "UPD ST COMP", "DEL ST COMP", "INS FR", "DEL FR", \ - "UPD REQ", "BLK UPD STAT", "TDB UPD" +#define PFSYNC_HDRLEN sizeof(struct pfsync_header) + -#define PFSYNC_DFLTTL 255 + +/* + * Names for PFSYNC sysctl objects + */ +#define PFSYNCCTL_STATS 1 /* PFSYNC stats */ +#define PFSYNCCTL_MAXID 2 + +#define PFSYNCCTL_NAMES { \ + { 0, 0 }, \ + { "stats", CTLTYPE_STRUCT }, \ +} struct pfsyncstats { u_int64_t pfsyncs_ipackets; /* total input packets, IPv4 */ @@ -280,96 +269,56 @@ struct pfsyncreq { }; #ifdef __FreeBSD__ -#define SIOCSETPFSYNC _IOW('i', 247, struct ifreq) -#define SIOCGETPFSYNC _IOWR('i', 248, struct ifreq) +#define SIOCSETPFSYNC _IOW('i', 247, struct ifreq) +#define SIOCGETPFSYNC _IOWR('i', 248, struct ifreq) #endif -#define pf_state_peer_hton(s,d) do { \ - (d)->seqlo = htonl((s)->seqlo); \ - (d)->seqhi = htonl((s)->seqhi); \ - (d)->seqdiff = htonl((s)->seqdiff); \ - (d)->max_win = htons((s)->max_win); \ - (d)->mss = htons((s)->mss); \ - (d)->state = (s)->state; \ - (d)->wscale = (s)->wscale; \ - if ((s)->scrub) { \ - (d)->scrub.pfss_flags = \ - htons((s)->scrub->pfss_flags & PFSS_TIMESTAMP); \ - (d)->scrub.pfss_ttl = (s)->scrub->pfss_ttl; \ - (d)->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);\ - (d)->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID; \ - } \ -} while (0) - -#define pf_state_peer_ntoh(s,d) do { \ - (d)->seqlo = ntohl((s)->seqlo); \ - (d)->seqhi = ntohl((s)->seqhi); \ - (d)->seqdiff = ntohl((s)->seqdiff); \ - (d)->max_win = ntohs((s)->max_win); \ - (d)->mss = ntohs((s)->mss); \ - (d)->state = (s)->state; \ - (d)->wscale = (s)->wscale; \ - if ((s)->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID && \ - (d)->scrub != NULL) { \ - (d)->scrub->pfss_flags = \ - ntohs((s)->scrub.pfss_flags) & PFSS_TIMESTAMP; \ - (d)->scrub->pfss_ttl = (s)->scrub.pfss_ttl; \ - (d)->scrub->pfss_ts_mod = ntohl((s)->scrub.pfss_ts_mod);\ - } \ -} while (0) - -#define pf_state_host_hton(s,d) do { \ - bcopy(&(s)->addr, &(d)->addr, sizeof((d)->addr)); \ - (d)->port = (s)->port; \ -} while (0) - -#define pf_state_host_ntoh(s,d) do { \ - bcopy(&(s)->addr, &(d)->addr, sizeof((d)->addr)); \ - (d)->port = (s)->port; \ -} while (0) - -#define pf_state_counter_hton(s,d) do { \ - d[0] = htonl((s>>32)&0xffffffff); \ - d[1] = htonl(s&0xffffffff); \ -} while (0) - -#define pf_state_counter_ntoh(s,d) do { \ - d = ntohl(s[0]); \ - d = d<<32; \ - d += ntohl(s[1]); \ -} while (0) - #ifdef _KERNEL + +/* + * this shows where a pf state is with respect to the syncing. + */ +#define PFSYNC_S_INS 0x00 +#define PFSYNC_S_IACK 0x01 +#define PFSYNC_S_UPD 0x02 +#define PFSYNC_S_UPD_C 0x03 +#define PFSYNC_S_DEL 0x04 +#define PFSYNC_S_COUNT 0x05 + +#define PFSYNC_S_DEFER 0xfe +#define PFSYNC_S_NONE 0xff + #ifdef __FreeBSD__ -void pfsync_input(struct mbuf *, __unused int); +void pfsync_input(struct mbuf *, __unused int); #else -void pfsync_input(struct mbuf *, ...); +void pfsync_input(struct mbuf *, ...); #endif -int pfsync_clear_states(u_int32_t, char *); -int pfsync_pack_state(u_int8_t, struct pf_state *, int); -#define pfsync_insert_state(st) do { \ - if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) || \ - (st->proto == IPPROTO_PFSYNC)) \ - st->sync_flags |= PFSTATE_NOSYNC; \ - else if (!st->sync_flags) \ - pfsync_pack_state(PFSYNC_ACT_INS, (st), \ - PFSYNC_FLAG_COMPRESS); \ - st->sync_flags &= ~PFSTATE_FROMSYNC; \ -} while (0) -#define pfsync_update_state(st) do { \ - if (!st->sync_flags) \ - pfsync_pack_state(PFSYNC_ACT_UPD, (st), \ - PFSYNC_FLAG_COMPRESS); \ - st->sync_flags &= ~PFSTATE_FROMSYNC; \ -} while (0) -#define pfsync_delete_state(st) do { \ - if (!st->sync_flags) \ - pfsync_pack_state(PFSYNC_ACT_DEL, (st), \ - PFSYNC_FLAG_COMPRESS); \ -} while (0) -#ifdef PFSYNC_TDB -int pfsync_update_tdb(struct tdb *, int); +int pfsync_sysctl(int *, u_int, void *, size_t *, + void *, size_t); + +#define PFSYNC_SI_IOCTL 0x01 +#define PFSYNC_SI_CKSUM 0x02 +#define PFSYNC_SI_ACK 0x04 +int pfsync_state_import(struct pfsync_state *, u_int8_t); +#ifndef __FreeBSD__ +void pfsync_state_export(struct pfsync_state *, + struct pf_state *); #endif + +void pfsync_insert_state(struct pf_state *); +void pfsync_update_state(struct pf_state *); +void pfsync_delete_state(struct pf_state *); +void pfsync_clear_states(u_int32_t, const char *); + +#ifdef notyet +void pfsync_update_tdb(struct tdb *, int); +void pfsync_delete_tdb(struct tdb *); +#endif + +int pfsync_defer(struct pf_state *, struct mbuf *); + +int pfsync_up(void); +int pfsync_state_in_use(struct pf_state *); #endif #endif /* _NET_IF_PFSYNC_H_ */ diff --git a/freebsd/sys/contrib/pf/net/pf.c b/freebsd/sys/contrib/pf/net/pf.c index 70123329..edb4b2e9 100644 --- a/freebsd/sys/contrib/pf/net/pf.c +++ b/freebsd/sys/contrib/pf/net/pf.c @@ -1,11 +1,10 @@ #include <machine/rtems-bsd-kernel-space.h> -/* $OpenBSD: pf.c,v 1.527 2007/02/22 15:23:23 pyr Exp $ */ -/* add: $OpenBSD: pf.c,v 1.559 2007/09/18 18:45:59 markus Exp $ */ +/* $OpenBSD: pf.c,v 1.634 2009/02/27 12:37:45 henning Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier - * Copyright (c) 2002,2003 Henning Brauer + * Copyright (c) 2002 - 2008 Henning Brauer * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -50,28 +49,19 @@ __FBSDID("$FreeBSD$"); #include <rtems/bsd/local/opt_bpf.h> #include <rtems/bsd/local/opt_pf.h> -#ifdef DEV_BPF -#define NBPFILTER DEV_BPF -#else -#define NBPFILTER 0 -#endif - -#ifdef DEV_PFLOG -#define NPFLOG DEV_PFLOG -#else -#define NPFLOG 0 -#endif +#define NPFSYNC 1 -#ifdef DEV_PFSYNC -#define NPFSYNC DEV_PFSYNC +#ifdef DEV_PFLOW +#define NPFLOW DEV_PFLOW #else -#define NPFSYNC 0 +#define NPFLOW 0 #endif #else #include "bpfilter.h" #include "pflog.h" #include "pfsync.h" +#include "pflow.h" #endif #include <rtems/bsd/sys/param.h> @@ -83,8 +73,10 @@ __FBSDID("$FreeBSD$"); #include <sys/kernel.h> #include <rtems/bsd/sys/time.h> #ifdef __FreeBSD__ +#include <sys/random.h> #include <sys/sysctl.h> #include <sys/endian.h> +#define betoh64 be64toh #else #include <sys/pool.h> #endif @@ -97,11 +89,21 @@ __FBSDID("$FreeBSD$"); #include <sys/rwlock.h> #endif +#ifdef __FreeBSD__ +#include <sys/md5.h> +#else +#include <crypto/md5.h> +#endif + #include <net/if.h> #include <net/if_types.h> #include <net/bpf.h> #include <net/route.h> -#ifndef __FreeBSD__ +#ifdef __FreeBSD__ +#ifdef RADIX_MPATH +#include <net/radix_mpath.h> +#endif +#else #include <net/radix_mpath.h> #endif @@ -120,16 +122,18 @@ __FBSDID("$FreeBSD$"); #include <netinet/udp_var.h> #include <netinet/icmp_var.h> #include <netinet/if_ether.h> +#ifdef __FreeBSD__ +#include <netinet/ip_fw.h> +#include <netpfil/ipfw/ip_fw_private.h> /* XXX: only for DIR_IN/DIR_OUT */ +#endif #ifndef __FreeBSD__ #include <dev/rndvar.h> #endif #include <net/pfvar.h> #include <net/if_pflog.h> - -#if NPFSYNC > 0 +#include <net/if_pflow.h> #include <net/if_pfsync.h> -#endif /* NPFSYNC > 0 */ #ifdef INET6 #include <netinet/ip6.h> @@ -149,15 +153,61 @@ __FBSDID("$FreeBSD$"); #include <security/mac/mac_framework.h> extern int ip_optcopy(struct ip *, struct ip *); -extern int debug_pfugidhack; #endif -#define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x +#ifdef __FreeBSD__ +#define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x +#else +#define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x +#endif /* * Global variables */ +/* state tables */ +#ifdef __FreeBSD__ +VNET_DEFINE(struct pf_state_tree, pf_statetbl); + +VNET_DEFINE(struct pf_altqqueue, pf_altqs[2]); +VNET_DEFINE(struct pf_palist, pf_pabuf); +VNET_DEFINE(struct pf_altqqueue *, pf_altqs_active); +VNET_DEFINE(struct pf_altqqueue *, pf_altqs_inactive); +VNET_DEFINE(struct pf_status, pf_status); + +VNET_DEFINE(u_int32_t, ticket_altqs_active); +VNET_DEFINE(u_int32_t, ticket_altqs_inactive); +VNET_DEFINE(int, altqs_inactive_open); +VNET_DEFINE(u_int32_t, ticket_pabuf); + +VNET_DEFINE(MD5_CTX, pf_tcp_secret_ctx); +#define V_pf_tcp_secret_ctx VNET(pf_tcp_secret_ctx) +VNET_DEFINE(u_char, pf_tcp_secret[16]); +#define V_pf_tcp_secret VNET(pf_tcp_secret) +VNET_DEFINE(int, pf_tcp_secret_init); +#define V_pf_tcp_secret_init VNET(pf_tcp_secret_init) +VNET_DEFINE(int, pf_tcp_iss_off); +#define V_pf_tcp_iss_off VNET(pf_tcp_iss_off) + +struct pf_anchor_stackframe { + struct pf_ruleset *rs; + struct pf_rule *r; + struct pf_anchor_node *parent; + struct pf_anchor *child; +}; +VNET_DEFINE(struct pf_anchor_stackframe, pf_anchor_stack[64]); +#define V_pf_anchor_stack VNET(pf_anchor_stack) + +VNET_DEFINE(uma_zone_t, pf_src_tree_pl); +VNET_DEFINE(uma_zone_t, pf_rule_pl); +VNET_DEFINE(uma_zone_t, pf_pooladdr_pl); +VNET_DEFINE(uma_zone_t, pf_state_pl); +VNET_DEFINE(uma_zone_t, pf_state_key_pl); +VNET_DEFINE(uma_zone_t, pf_state_item_pl); +VNET_DEFINE(uma_zone_t, pf_altq_pl); +#else +struct pf_state_tree pf_statetbl; + struct pf_altqqueue pf_altqs[2]; struct pf_palist pf_pabuf; struct pf_altqqueue *pf_altqs_active; @@ -169,6 +219,11 @@ u_int32_t ticket_altqs_inactive; int altqs_inactive_open; u_int32_t ticket_pabuf; +MD5_CTX pf_tcp_secret_ctx; +u_char pf_tcp_secret[16]; +int pf_tcp_secret_init; +int pf_tcp_iss_off; + struct pf_anchor_stackframe { struct pf_ruleset *rs; struct pf_rule *r; @@ -176,16 +231,11 @@ struct pf_anchor_stackframe { struct pf_anchor *child; } pf_anchor_stack[64]; -#ifdef __FreeBSD__ -uma_zone_t pf_src_tree_pl, pf_rule_pl; -uma_zone_t pf_state_pl, pf_altq_pl, pf_pooladdr_pl; -#else -struct pool pf_src_tree_pl, pf_rule_pl; -struct pool pf_state_pl, pf_altq_pl, pf_pooladdr_pl; +struct pool pf_src_tree_pl, pf_rule_pl, pf_pooladdr_pl; +struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl; +struct pool pf_altq_pl; #endif -void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t); - void pf_init_threshold(struct pf_threshold *, u_int32_t, u_int32_t); void pf_add_threshold(struct pf_threshold *); @@ -214,18 +264,12 @@ void pf_send_tcp(const struct pf_rule *, sa_family_t, u_int16_t, u_int16_t, u_int32_t, u_int32_t, u_int8_t, u_int16_t, u_int16_t, u_int8_t, int, u_int16_t, struct ether_header *, struct ifnet *); -void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, +static void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, sa_family_t, struct pf_rule *); -struct pf_rule *pf_match_translation(struct pf_pdesc *, struct mbuf *, - int, int, struct pfi_kif *, - struct pf_addr *, u_int16_t, struct pf_addr *, - u_int16_t, int); -struct pf_rule *pf_get_translation(struct pf_pdesc *, struct mbuf *, - int, int, struct pfi_kif *, struct pf_src_node **, - struct pf_addr *, u_int16_t, - struct pf_addr *, u_int16_t, - struct pf_addr *, u_int16_t *); -int pf_test_tcp(struct pf_rule **, struct pf_state **, +void pf_detach_state(struct pf_state *); +void pf_state_key_detach(struct pf_state *, int); +u_int32_t pf_tcp_iss(struct pf_pdesc *); +int pf_test_rule(struct pf_rule **, struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, struct pf_rule **, #ifdef __FreeBSD__ @@ -234,23 +278,14 @@ int pf_test_tcp(struct pf_rule **, struct pf_state **, #else struct pf_ruleset **, struct ifqueue *); #endif -int pf_test_udp(struct pf_rule **, struct pf_state **, - int, struct pfi_kif *, struct mbuf *, int, - void *, struct pf_pdesc *, struct pf_rule **, -#ifdef __FreeBSD__ - struct pf_ruleset **, struct ifqueue *, - struct inpcb *); -#else - struct pf_ruleset **, struct ifqueue *); -#endif -int pf_test_icmp(struct pf_rule **, struct pf_state **, - int, struct pfi_kif *, struct mbuf *, int, - void *, struct pf_pdesc *, struct pf_rule **, - struct pf_ruleset **, struct ifqueue *); -int pf_test_other(struct pf_rule **, struct pf_state **, - int, struct pfi_kif *, struct mbuf *, int, void *, - struct pf_pdesc *, struct pf_rule **, - struct pf_ruleset **, struct ifqueue *); +static __inline int pf_create_state(struct pf_rule *, struct pf_rule *, + struct pf_rule *, struct pf_pdesc *, + struct pf_src_node *, struct pf_state_key *, + struct pf_state_key *, struct pf_state_key *, + struct pf_state_key *, struct mbuf *, int, + u_int16_t, u_int16_t, int *, struct pfi_kif *, + struct pf_state **, int, u_int16_t, u_int16_t, + int); int pf_test_fragment(struct pf_rule **, int, struct pfi_kif *, struct mbuf *, void *, struct pf_pdesc *, struct pf_rule **, @@ -259,7 +294,7 @@ int pf_tcp_track_full(struct pf_state_peer *, struct pf_state_peer *, struct pf_state **, struct pfi_kif *, struct mbuf *, int, struct pf_pdesc *, u_short *, int *); -int pf_tcp_track_sloppy(struct pf_state_peer *, +int pf_tcp_track_sloppy(struct pf_state_peer *, struct pf_state_peer *, struct pf_state **, struct pf_pdesc *, u_short *); int pf_test_state_tcp(struct pf_state **, int, @@ -272,30 +307,14 @@ int pf_test_state_icmp(struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, u_short *); int pf_test_state_other(struct pf_state **, int, - struct pfi_kif *, struct pf_pdesc *); -int pf_match_tag(struct mbuf *, struct pf_rule *, - struct pf_mtag *, int *); -int pf_step_out_of_anchor(int *, struct pf_ruleset **, - int, struct pf_rule **, struct pf_rule **, - int *); -void pf_hash(struct pf_addr *, struct pf_addr *, - struct pf_poolhashkey *, sa_family_t); -int pf_map_addr(u_int8_t, struct pf_rule *, - struct pf_addr *, struct pf_addr *, - struct pf_addr *, struct pf_src_node **); -int pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *, - struct pf_addr *, struct pf_addr *, u_int16_t, - struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t, - struct pf_src_node **); + struct pfi_kif *, struct mbuf *, struct pf_pdesc *); void pf_route(struct mbuf **, struct pf_rule *, int, struct ifnet *, struct pf_state *, struct pf_pdesc *); void pf_route6(struct mbuf **, struct pf_rule *, int, struct ifnet *, struct pf_state *, struct pf_pdesc *); -#ifdef __FreeBSD__ -/* XXX: import */ -#else +#ifndef __FreeBSD__ int pf_socket_lookup(int, struct pf_pdesc *); #endif u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t, @@ -303,24 +322,37 @@ u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t, u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t, sa_family_t); u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, - u_int16_t); + int, u_int16_t); void pf_set_rt_ifp(struct pf_state *, struct pf_addr *); int pf_check_proto_cksum(struct mbuf *, int, int, u_int8_t, sa_family_t); +#ifndef __FreeBSD__ +struct pf_divert *pf_get_divert(struct mbuf *); +#endif +void pf_print_state_parts(struct pf_state *, + struct pf_state_key *, struct pf_state_key *); int pf_addr_wrap_neq(struct pf_addr_wrap *, struct pf_addr_wrap *); -struct pf_state *pf_find_state_recurse(struct pfi_kif *, - struct pf_state_cmp *, u_int8_t); +int pf_compare_state_keys(struct pf_state_key *, + struct pf_state_key *, struct pfi_kif *, u_int); +#ifdef __FreeBSD__ +struct pf_state *pf_find_state(struct pfi_kif *, + struct pf_state_key_cmp *, u_int, struct mbuf *, + struct pf_mtag *); +#else +struct pf_state *pf_find_state(struct pfi_kif *, + struct pf_state_key_cmp *, u_int, struct mbuf *); +#endif int pf_src_connlimit(struct pf_state **); int pf_check_congestion(struct ifqueue *); #ifdef __FreeBSD__ int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len); -extern int pf_end_threads; +VNET_DECLARE(int, pf_end_threads); -struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX]; +VNET_DEFINE(struct pf_pool_limit, pf_pool_limits[PF_LIMIT_MAX]); #else extern struct pool pfr_ktable_pl; extern struct pool pfr_kentry_pl; @@ -334,80 +366,101 @@ struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { }; #endif -#define STATE_LOOKUP() \ +#ifdef __FreeBSD__ +#define PPACKET_LOOPED() \ + (pd->pf_mtag->flags & PF_PACKET_LOOPED) + +#define PACKET_LOOPED() \ + (pd.pf_mtag->flags & PF_PACKET_LOOPED) + +#define STATE_LOOKUP(i, k, d, s, m, pt) \ do { \ - if (direction == PF_IN) \ - *state = pf_find_state_recurse( \ - kif, &key, PF_EXT_GWY); \ - else \ - *state = pf_find_state_recurse( \ - kif, &key, PF_LAN_EXT); \ - if (*state == NULL || (*state)->timeout == PFTM_PURGE) \ + s = pf_find_state(i, k, d, m, pt); \ + if (s == NULL || (s)->timeout == PFTM_PURGE) \ return (PF_DROP); \ - if (direction == PF_OUT && \ - (((*state)->rule.ptr->rt == PF_ROUTETO && \ - (*state)->rule.ptr->direction == PF_OUT) || \ - ((*state)->rule.ptr->rt == PF_REPLYTO && \ - (*state)->rule.ptr->direction == PF_IN)) && \ - (*state)->rt_kif != NULL && \ - (*state)->rt_kif != kif) \ + if (PPACKET_LOOPED()) \ + return (PF_PASS); \ + if (d == PF_OUT && \ + (((s)->rule.ptr->rt == PF_ROUTETO && \ + (s)->rule.ptr->direction == PF_OUT) || \ + ((s)->rule.ptr->rt == PF_REPLYTO && \ + (s)->rule.ptr->direction == PF_IN)) && \ + (s)->rt_kif != NULL && \ + (s)->rt_kif != i) \ return (PF_PASS); \ } while (0) +#else +#define STATE_LOOKUP(i, k, d, s, m) \ + do { \ + s = pf_find_state(i, k, d, m); \ + if (s == NULL || (s)->timeout == PFTM_PURGE) \ + return (PF_DROP); \ + if (d == PF_OUT && \ + (((s)->rule.ptr->rt == PF_ROUTETO && \ + (s)->rule.ptr->direction == PF_OUT) || \ + ((s)->rule.ptr->rt == PF_REPLYTO && \ + (s)->rule.ptr->direction == PF_IN)) && \ + (s)->rt_kif != NULL && \ + (s)->rt_kif != i) \ + return (PF_PASS); \ + } while (0) +#endif -#define STATE_TRANSLATE(s) \ - (s)->lan.addr.addr32[0] != (s)->gwy.addr.addr32[0] || \ - ((s)->af == AF_INET6 && \ - ((s)->lan.addr.addr32[1] != (s)->gwy.addr.addr32[1] || \ - (s)->lan.addr.addr32[2] != (s)->gwy.addr.addr32[2] || \ - (s)->lan.addr.addr32[3] != (s)->gwy.addr.addr32[3])) || \ - (s)->lan.port != (s)->gwy.port - -#define BOUND_IFACE(r, k) \ +#ifdef __FreeBSD__ +#define BOUND_IFACE(r, k) \ + ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : V_pfi_all +#else +#define BOUND_IFACE(r, k) \ ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all +#endif -#define STATE_INC_COUNTERS(s) \ +#define STATE_INC_COUNTERS(s) \ do { \ - s->rule.ptr->states++; \ - if (s->anchor.ptr != NULL) \ - s->anchor.ptr->states++; \ - if (s->nat_rule.ptr != NULL) \ - s->nat_rule.ptr->states++; \ + s->rule.ptr->states_cur++; \ + s->rule.ptr->states_tot++; \ + if (s->anchor.ptr != NULL) { \ + s->anchor.ptr->states_cur++; \ + s->anchor.ptr->states_tot++; \ + } \ + if (s->nat_rule.ptr != NULL) { \ + s->nat_rule.ptr->states_cur++; \ + s->nat_rule.ptr->states_tot++; \ + } \ } while (0) -#define STATE_DEC_COUNTERS(s) \ +#define STATE_DEC_COUNTERS(s) \ do { \ if (s->nat_rule.ptr != NULL) \ - s->nat_rule.ptr->states--; \ + s->nat_rule.ptr->states_cur--; \ if (s->anchor.ptr != NULL) \ - s->anchor.ptr->states--; \ - s->rule.ptr->states--; \ + s->anchor.ptr->states_cur--; \ + s->rule.ptr->states_cur--; \ } while (0) +static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *); +static __inline int pf_state_compare_key(struct pf_state_key *, + struct pf_state_key *); +static __inline int pf_state_compare_id(struct pf_state *, + struct pf_state *); + +#ifdef __FreeBSD__ +VNET_DEFINE(struct pf_src_tree, tree_src_tracking); + +VNET_DEFINE(struct pf_state_tree_id, tree_id); +VNET_DEFINE(struct pf_state_queue, state_list); +#else struct pf_src_tree tree_src_tracking; struct pf_state_tree_id tree_id; struct pf_state_queue state_list; - -#ifdef __FreeBSD__ -static int pf_src_compare(struct pf_src_node *, struct pf_src_node *); -static int pf_state_compare_lan_ext(struct pf_state *, struct pf_state *); -static int pf_state_compare_ext_gwy(struct pf_state *, struct pf_state *); -static int pf_state_compare_id(struct pf_state *, struct pf_state *); #endif RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); -RB_GENERATE(pf_state_tree_lan_ext, pf_state, - u.s.entry_lan_ext, pf_state_compare_lan_ext); -RB_GENERATE(pf_state_tree_ext_gwy, pf_state, - u.s.entry_ext_gwy, pf_state_compare_ext_gwy); +RB_GENERATE(pf_state_tree, pf_state_key, entry, pf_state_compare_key); RB_GENERATE(pf_state_tree_id, pf_state, - u.s.entry_id, pf_state_compare_id); + entry_id, pf_state_compare_id); -#ifdef __FreeBSD__ -static int -#else static __inline int -#endif pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) { int diff; @@ -451,169 +504,6 @@ pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) return (0); } -#ifdef __FreeBSD__ -static int -#else -static __inline int -#endif -pf_state_compare_lan_ext(struct pf_state *a, struct pf_state *b) -{ - int diff; - - if ((diff = a->proto - b->proto) != 0) - return (diff); - if ((diff = a->af - b->af) != 0) - return (diff); - switch (a->af) { -#ifdef INET - case AF_INET: - if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0]) - return (1); - if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0]) - return (-1); - if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) - return (1); - if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) - return (-1); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - if (a->lan.addr.addr32[3] > b->lan.addr.addr32[3]) - return (1); - if (a->lan.addr.addr32[3] < b->lan.addr.addr32[3]) - return (-1); - if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3]) - return (1); - if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3]) - return (-1); - if (a->lan.addr.addr32[2] > b->lan.addr.addr32[2]) - return (1); - if (a->lan.addr.addr32[2] < b->lan.addr.addr32[2]) - return (-1); - if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2]) - return (1); - if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2]) - return (-1); - if (a->lan.addr.addr32[1] > b->lan.addr.addr32[1]) - return (1); - if (a->lan.addr.addr32[1] < b->lan.addr.addr32[1]) - return (-1); - if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1]) - return (1); - if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1]) - return (-1); - if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0]) - return (1); - if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0]) - return (-1); - if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) - return (1); - if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) - return (-1); - break; -#endif /* INET6 */ - } - - if ((diff = a->lan.port - b->lan.port) != 0) - return (diff); - if ((diff = a->ext.port - b->ext.port) != 0) - return (diff); - - return (0); -} - -#ifdef __FreeBSD__ -static int -#else -static __inline int -#endif -pf_state_compare_ext_gwy(struct pf_state *a, struct pf_state *b) -{ - int diff; - - if ((diff = a->proto - b->proto) != 0) - return (diff); - if ((diff = a->af - b->af) != 0) - return (diff); - switch (a->af) { -#ifdef INET - case AF_INET: - if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) - return (1); - if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) - return (-1); - if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0]) - return (1); - if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0]) - return (-1); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3]) - return (1); - if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3]) - return (-1); - if (a->gwy.addr.addr32[3] > b->gwy.addr.addr32[3]) - return (1); - if (a->gwy.addr.addr32[3] < b->gwy.addr.addr32[3]) - return (-1); - if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2]) - return (1); - if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2]) - return (-1); - if (a->gwy.addr.addr32[2] > b->gwy.addr.addr32[2]) - return (1); - if (a->gwy.addr.addr32[2] < b->gwy.addr.addr32[2]) - return (-1); - if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1]) - return (1); - if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1]) - return (-1); - if (a->gwy.addr.addr32[1] > b->gwy.addr.addr32[1]) - return (1); - if (a->gwy.addr.addr32[1] < b->gwy.addr.addr32[1]) - return (-1); - if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) - return (1); - if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) - return (-1); - if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0]) - return (1); - if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0]) - return (-1); - break; -#endif /* INET6 */ - } - - if ((diff = a->ext.port - b->ext.port) != 0) - return (diff); - if ((diff = a->gwy.port - b->gwy.port) != 0) - return (diff); - - return (0); -} - -#ifdef __FreeBSD__ -static int -#else -static __inline int -#endif -pf_state_compare_id(struct pf_state *a, struct pf_state *b) -{ - if (a->id > b->id) - return (1); - if (a->id < b->id) - return (-1); - if (a->creatorid > b->creatorid) - return (1); - if (a->creatorid < b->creatorid) - return (-1); - - return (0); -} - #ifdef INET6 void pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) @@ -634,80 +524,6 @@ pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) } #endif /* INET6 */ -struct pf_state * -pf_find_state_byid(struct pf_state_cmp *key) -{ - pf_status.fcounters[FCNT_STATE_SEARCH]++; - return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key)); -} - -struct pf_state * -pf_find_state_recurse(struct pfi_kif *kif, struct pf_state_cmp *key, u_int8_t tree) -{ - struct pf_state *s; - - pf_status.fcounters[FCNT_STATE_SEARCH]++; - - switch (tree) { - case PF_LAN_EXT: - if ((s = RB_FIND(pf_state_tree_lan_ext, &kif->pfik_lan_ext, - (struct pf_state *)key)) != NULL) - return (s); - if ((s = RB_FIND(pf_state_tree_lan_ext, &pfi_all->pfik_lan_ext, - (struct pf_state *)key)) != NULL) - return (s); - return (NULL); - case PF_EXT_GWY: - if ((s = RB_FIND(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, - (struct pf_state *)key)) != NULL) - return (s); - if ((s = RB_FIND(pf_state_tree_ext_gwy, &pfi_all->pfik_ext_gwy, - (struct pf_state *)key)) != NULL) - return (s); - return (NULL); - default: - panic("pf_find_state_recurse"); - } -} - -struct pf_state * -pf_find_state_all(struct pf_state_cmp *key, u_int8_t tree, int *more) -{ - struct pf_state *s, *ss = NULL; - struct pfi_kif *kif; - - pf_status.fcounters[FCNT_STATE_SEARCH]++; - - switch (tree) { - case PF_LAN_EXT: - TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) { - s = RB_FIND(pf_state_tree_lan_ext, - &kif->pfik_lan_ext, (struct pf_state *)key); - if (s == NULL) - continue; - if (more == NULL) - return (s); - ss = s; - (*more)++; - } - return (ss); - case PF_EXT_GWY: - TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) { - s = RB_FIND(pf_state_tree_ext_gwy, - &kif->pfik_ext_gwy, (struct pf_state *)key); - if (s == NULL) - continue; - if (more == NULL) - return (s); - ss = s; - (*more)++; - } - return (ss); - default: - panic("pf_find_state_all"); - } -} - void pf_init_threshold(struct pf_threshold *threshold, u_int32_t limit, u_int32_t seconds) @@ -741,7 +557,6 @@ pf_check_threshold(struct pf_threshold *threshold) int pf_src_connlimit(struct pf_state **state) { - struct pf_state *s; int bad = 0; (*state)->src_node->conn++; @@ -751,13 +566,21 @@ pf_src_connlimit(struct pf_state **state) if ((*state)->rule.ptr->max_src_conn && (*state)->rule.ptr->max_src_conn < (*state)->src_node->conn) { +#ifdef __FreeBSD__ + V_pf_status.lcounters[LCNT_SRCCONN]++; +#else pf_status.lcounters[LCNT_SRCCONN]++; +#endif bad++; } if ((*state)->rule.ptr->max_src_conn_rate.limit && pf_check_threshold(&(*state)->src_node->conn_rate)) { +#ifdef __FreeBSD__ + V_pf_status.lcounters[LCNT_SRCCONNRATE]++; +#else pf_status.lcounters[LCNT_SRCCONNRATE]++; +#endif bad++; } @@ -768,16 +591,21 @@ pf_src_connlimit(struct pf_state **state) struct pfr_addr p; u_int32_t killed = 0; +#ifdef __FreeBSD__ + V_pf_status.lcounters[LCNT_OVERLOAD_TABLE]++; + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else pf_status.lcounters[LCNT_OVERLOAD_TABLE]++; if (pf_status.debug >= PF_DEBUG_MISC) { +#endif printf("pf_src_connlimit: blocking address "); pf_print_host(&(*state)->src_node->addr, 0, - (*state)->af); + (*state)->key[PF_SK_WIRE]->af); } bzero(&p, sizeof(p)); - p.pfra_af = (*state)->af; - switch ((*state)->af) { + p.pfra_af = (*state)->key[PF_SK_WIRE]->af; + switch ((*state)->key[PF_SK_WIRE]->af) { #ifdef INET case AF_INET: p.pfra_net = 32; @@ -797,34 +625,51 @@ pf_src_connlimit(struct pf_state **state) /* kill existing states if that's required. */ if ((*state)->rule.ptr->flush) { - pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; + struct pf_state_key *sk; + struct pf_state *st; - RB_FOREACH(s, pf_state_tree_id, &tree_id) { +#ifdef __FreeBSD__ + V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; + RB_FOREACH(st, pf_state_tree_id, &V_tree_id) { +#else + pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; + RB_FOREACH(st, pf_state_tree_id, &tree_id) { +#endif + sk = st->key[PF_SK_WIRE]; /* * Kill states from this source. (Only those * from the same rule if PF_FLUSH_GLOBAL is not * set) */ - if (s->af == (*state)->af && + if (sk->af == + (*state)->key[PF_SK_WIRE]->af && (((*state)->direction == PF_OUT && PF_AEQ(&(*state)->src_node->addr, - &s->lan.addr, s->af)) || + &sk->addr[1], sk->af)) || ((*state)->direction == PF_IN && PF_AEQ(&(*state)->src_node->addr, - &s->ext.addr, s->af))) && + &sk->addr[0], sk->af))) && ((*state)->rule.ptr->flush & PF_FLUSH_GLOBAL || - (*state)->rule.ptr == s->rule.ptr)) { - s->timeout = PFTM_PURGE; - s->src.state = s->dst.state = + (*state)->rule.ptr == st->rule.ptr)) { + st->timeout = PFTM_PURGE; + st->src.state = st->dst.state = TCPS_CLOSED; killed++; } } +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) +#else if (pf_status.debug >= PF_DEBUG_MISC) +#endif printf(", %u states killed", killed); } +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) +#else if (pf_status.debug >= PF_DEBUG_MISC) +#endif printf("\n"); } @@ -848,18 +693,30 @@ pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, k.rule.ptr = rule; else k.rule.ptr = NULL; +#ifdef __FreeBSD__ + V_pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; + *sn = RB_FIND(pf_src_tree, &V_tree_src_tracking, &k); +#else pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); +#endif } if (*sn == NULL) { if (!rule->max_src_nodes || rule->src_nodes < rule->max_src_nodes) - (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT); +#ifdef __FreeBSD__ + (*sn) = pool_get(&V_pf_src_tree_pl, PR_NOWAIT | PR_ZERO); +#else + (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT | PR_ZERO); +#endif else +#ifdef __FreeBSD__ + V_pf_status.lcounters[LCNT_SRCNODES]++; +#else pf_status.lcounters[LCNT_SRCNODES]++; +#endif if ((*sn) == NULL) return (-1); - bzero(*sn, sizeof(struct pf_src_node)); pf_init_threshold(&(*sn)->conn_rate, rule->max_src_conn_rate.limit, @@ -873,109 +730,590 @@ pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, (*sn)->rule.ptr = NULL; PF_ACPY(&(*sn)->addr, src, af); if (RB_INSERT(pf_src_tree, +#ifdef __FreeBSD__ + &V_tree_src_tracking, *sn) != NULL) { + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else &tree_src_tracking, *sn) != NULL) { if (pf_status.debug >= PF_DEBUG_MISC) { +#endif printf("pf: src_tree insert failed: "); pf_print_host(&(*sn)->addr, 0, af); printf("\n"); } +#ifdef __FreeBSD__ + pool_put(&V_pf_src_tree_pl, *sn); +#else pool_put(&pf_src_tree_pl, *sn); +#endif return (-1); } (*sn)->creation = time_second; (*sn)->ruletype = rule->action; if ((*sn)->rule.ptr != NULL) (*sn)->rule.ptr->src_nodes++; +#ifdef __FreeBSD__ + V_pf_status.scounters[SCNT_SRC_NODE_INSERT]++; + V_pf_status.src_nodes++; +#else pf_status.scounters[SCNT_SRC_NODE_INSERT]++; pf_status.src_nodes++; +#endif } else { if (rule->max_src_states && (*sn)->states >= rule->max_src_states) { +#ifdef __FreeBSD__ + V_pf_status.lcounters[LCNT_SRCSTATES]++; +#else pf_status.lcounters[LCNT_SRCSTATES]++; +#endif return (-1); } } return (0); } +/* state table stuff */ + +static __inline int +pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b) +{ + int diff; + + if ((diff = a->proto - b->proto) != 0) + return (diff); + if ((diff = a->af - b->af) != 0) + return (diff); + switch (a->af) { +#ifdef INET + case AF_INET: + if (a->addr[0].addr32[0] > b->addr[0].addr32[0]) + return (1); + if (a->addr[0].addr32[0] < b->addr[0].addr32[0]) + return (-1); + if (a->addr[1].addr32[0] > b->addr[1].addr32[0]) + return (1); + if (a->addr[1].addr32[0] < b->addr[1].addr32[0]) + return (-1); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + if (a->addr[0].addr32[3] > b->addr[0].addr32[3]) + return (1); + if (a->addr[0].addr32[3] < b->addr[0].addr32[3]) + return (-1); + if (a->addr[1].addr32[3] > b->addr[1].addr32[3]) + return (1); + if (a->addr[1].addr32[3] < b->addr[1].addr32[3]) + return (-1); + if (a->addr[0].addr32[2] > b->addr[0].addr32[2]) + return (1); + if (a->addr[0].addr32[2] < b->addr[0].addr32[2]) + return (-1); + if (a->addr[1].addr32[2] > b->addr[1].addr32[2]) + return (1); + if (a->addr[1].addr32[2] < b->addr[1].addr32[2]) + return (-1); + if (a->addr[0].addr32[1] > b->addr[0].addr32[1]) + return (1); + if (a->addr[0].addr32[1] < b->addr[0].addr32[1]) + return (-1); + if (a->addr[1].addr32[1] > b->addr[1].addr32[1]) + return (1); + if (a->addr[1].addr32[1] < b->addr[1].addr32[1]) + return (-1); + if (a->addr[0].addr32[0] > b->addr[0].addr32[0]) + return (1); + if (a->addr[0].addr32[0] < b->addr[0].addr32[0]) + return (-1); + if (a->addr[1].addr32[0] > b->addr[1].addr32[0]) + return (1); + if (a->addr[1].addr32[0] < b->addr[1].addr32[0]) + return (-1); + break; +#endif /* INET6 */ + } + + if ((diff = a->port[0] - b->port[0]) != 0) + return (diff); + if ((diff = a->port[1] - b->port[1]) != 0) + return (diff); + + return (0); +} + +static __inline int +pf_state_compare_id(struct pf_state *a, struct pf_state *b) +{ + if (a->id > b->id) + return (1); + if (a->id < b->id) + return (-1); + if (a->creatorid > b->creatorid) + return (1); + if (a->creatorid < b->creatorid) + return (-1); + + return (0); +} + int -pf_insert_state(struct pfi_kif *kif, struct pf_state *state) +pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx) { - /* Thou MUST NOT insert multiple duplicate keys */ - state->u.s.kif = kif; - if (RB_INSERT(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state)) { - if (pf_status.debug >= PF_DEBUG_MISC) { - printf("pf: state insert failed: tree_lan_ext"); - printf(" lan: "); - pf_print_host(&state->lan.addr, state->lan.port, - state->af); - printf(" gwy: "); - pf_print_host(&state->gwy.addr, state->gwy.port, - state->af); - printf(" ext: "); - pf_print_host(&state->ext.addr, state->ext.port, - state->af); - if (state->sync_flags & PFSTATE_FROMSYNC) - printf(" (from sync)"); - printf("\n"); - } + struct pf_state_item *si; + struct pf_state_key *cur; + struct pf_state *olds = NULL; + +#ifdef __FreeBSD__ + KASSERT(s->key[idx] == NULL, ("%s: key is null!", __FUNCTION__)); +#else + KASSERT(s->key[idx] == NULL); /* XXX handle this? */ +#endif + +#ifdef __FreeBSD__ + if ((cur = RB_INSERT(pf_state_tree, &V_pf_statetbl, sk)) != NULL) { +#else + if ((cur = RB_INSERT(pf_state_tree, &pf_statetbl, sk)) != NULL) { +#endif + /* key exists. check for same kif, if none, add to key */ + TAILQ_FOREACH(si, &cur->states, entry) + if (si->s->kif == s->kif && + si->s->direction == s->direction) { + if (sk->proto == IPPROTO_TCP && + si->s->src.state >= TCPS_FIN_WAIT_2 && + si->s->dst.state >= TCPS_FIN_WAIT_2) { + si->s->src.state = si->s->dst.state = + TCPS_CLOSED; + /* unlink late or sks can go away */ + olds = si->s; + } else { +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else + if (pf_status.debug >= PF_DEBUG_MISC) { +#endif + printf("pf: %s key attach " + "failed on %s: ", + (idx == PF_SK_WIRE) ? + "wire" : "stack", + s->kif->pfik_name); + pf_print_state_parts(s, + (idx == PF_SK_WIRE) ? + sk : NULL, + (idx == PF_SK_STACK) ? + sk : NULL); + printf(", existing: "); + pf_print_state_parts(si->s, + (idx == PF_SK_WIRE) ? + sk : NULL, + (idx == PF_SK_STACK) ? + sk : NULL); + printf("\n"); + } +#ifdef __FreeBSD__ + pool_put(&V_pf_state_key_pl, sk); +#else + pool_put(&pf_state_key_pl, sk); +#endif + return (-1); /* collision! */ + } + } +#ifdef __FreeBSD__ + pool_put(&V_pf_state_key_pl, sk); +#else + pool_put(&pf_state_key_pl, sk); +#endif + s->key[idx] = cur; + } else + s->key[idx] = sk; + +#ifdef __FreeBSD__ + if ((si = pool_get(&V_pf_state_item_pl, PR_NOWAIT)) == NULL) { +#else + if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) { +#endif + pf_state_key_detach(s, idx); return (-1); } + si->s = s; - if (RB_INSERT(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state)) { - if (pf_status.debug >= PF_DEBUG_MISC) { - printf("pf: state insert failed: tree_ext_gwy"); - printf(" lan: "); - pf_print_host(&state->lan.addr, state->lan.port, - state->af); - printf(" gwy: "); - pf_print_host(&state->gwy.addr, state->gwy.port, - state->af); - printf(" ext: "); - pf_print_host(&state->ext.addr, state->ext.port, - state->af); - if (state->sync_flags & PFSTATE_FROMSYNC) - printf(" (from sync)"); - printf("\n"); + /* list is sorted, if-bound states before floating */ +#ifdef __FreeBSD__ + if (s->kif == V_pfi_all) +#else + if (s->kif == pfi_all) +#endif + TAILQ_INSERT_TAIL(&s->key[idx]->states, si, entry); + else + TAILQ_INSERT_HEAD(&s->key[idx]->states, si, entry); + + if (olds) + pf_unlink_state(olds); + + return (0); +} + +void +pf_detach_state(struct pf_state *s) +{ + if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK]) + s->key[PF_SK_WIRE] = NULL; + + if (s->key[PF_SK_STACK] != NULL) + pf_state_key_detach(s, PF_SK_STACK); + + if (s->key[PF_SK_WIRE] != NULL) + pf_state_key_detach(s, PF_SK_WIRE); +} + +void +pf_state_key_detach(struct pf_state *s, int idx) +{ + struct pf_state_item *si; + + si = TAILQ_FIRST(&s->key[idx]->states); + while (si && si->s != s) + si = TAILQ_NEXT(si, entry); + + if (si) { + TAILQ_REMOVE(&s->key[idx]->states, si, entry); +#ifdef __FreeBSD__ + pool_put(&V_pf_state_item_pl, si); +#else + pool_put(&pf_state_item_pl, si); +#endif + } + + if (TAILQ_EMPTY(&s->key[idx]->states)) { +#ifdef __FreeBSD__ + RB_REMOVE(pf_state_tree, &V_pf_statetbl, s->key[idx]); +#else + RB_REMOVE(pf_state_tree, &pf_statetbl, s->key[idx]); +#endif + if (s->key[idx]->reverse) + s->key[idx]->reverse->reverse = NULL; +#ifdef __FreeBSD__ + /* XXX: implement this */ +#else + if (s->key[idx]->inp) + s->key[idx]->inp->inp_pf_sk = NULL; +#endif +#ifdef __FreeBSD__ + pool_put(&V_pf_state_key_pl, s->key[idx]); +#else + pool_put(&pf_state_key_pl, s->key[idx]); +#endif + } + s->key[idx] = NULL; +} + +struct pf_state_key * +pf_alloc_state_key(int pool_flags) +{ + struct pf_state_key *sk; + +#ifdef __FreeBSD__ + if ((sk = pool_get(&V_pf_state_key_pl, pool_flags)) == NULL) +#else + if ((sk = pool_get(&pf_state_key_pl, pool_flags)) == NULL) +#endif + return (NULL); + TAILQ_INIT(&sk->states); + + return (sk); +} + +int +pf_state_key_setup(struct pf_pdesc *pd, struct pf_rule *nr, + struct pf_state_key **skw, struct pf_state_key **sks, + struct pf_state_key **skp, struct pf_state_key **nkp, + struct pf_addr *saddr, struct pf_addr *daddr, + u_int16_t sport, u_int16_t dport) +{ +#ifdef __FreeBSD__ + KASSERT((*skp == NULL && *nkp == NULL), + ("%s: skp == NULL && nkp == NULL", __FUNCTION__)); +#else + KASSERT((*skp == NULL && *nkp == NULL)); +#endif + + if ((*skp = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) + return (ENOMEM); + + PF_ACPY(&(*skp)->addr[pd->sidx], saddr, pd->af); + PF_ACPY(&(*skp)->addr[pd->didx], daddr, pd->af); + (*skp)->port[pd->sidx] = sport; + (*skp)->port[pd->didx] = dport; + (*skp)->proto = pd->proto; + (*skp)->af = pd->af; + + if (nr != NULL) { + if ((*nkp = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) + return (ENOMEM); /* caller must handle cleanup */ + + /* XXX maybe just bcopy and TAILQ_INIT(&(*nkp)->states) */ + PF_ACPY(&(*nkp)->addr[0], &(*skp)->addr[0], pd->af); + PF_ACPY(&(*nkp)->addr[1], &(*skp)->addr[1], pd->af); + (*nkp)->port[0] = (*skp)->port[0]; + (*nkp)->port[1] = (*skp)->port[1]; + (*nkp)->proto = pd->proto; + (*nkp)->af = pd->af; + } else + *nkp = *skp; + + if (pd->dir == PF_IN) { + *skw = *skp; + *sks = *nkp; + } else { + *sks = *skp; + *skw = *nkp; + } + return (0); +} + + +int +pf_state_insert(struct pfi_kif *kif, struct pf_state_key *skw, + struct pf_state_key *sks, struct pf_state *s) +{ +#ifndef __FreeBSD__ + splassert(IPL_SOFTNET); +#endif + + s->kif = kif; + + if (skw == sks) { + if (pf_state_key_attach(skw, s, PF_SK_WIRE)) + return (-1); + s->key[PF_SK_STACK] = s->key[PF_SK_WIRE]; + } else { + if (pf_state_key_attach(skw, s, PF_SK_WIRE)) { +#ifdef __FreeBSD__ + pool_put(&V_pf_state_key_pl, sks); +#else + pool_put(&pf_state_key_pl, sks); +#endif + return (-1); + } + if (pf_state_key_attach(sks, s, PF_SK_STACK)) { + pf_state_key_detach(s, PF_SK_WIRE); + return (-1); } - RB_REMOVE(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state); - return (-1); } - if (state->id == 0 && state->creatorid == 0) { - state->id = htobe64(pf_status.stateid++); - state->creatorid = pf_status.hostid; + if (s->id == 0 && s->creatorid == 0) { +#ifdef __FreeBSD__ + s->id = htobe64(V_pf_status.stateid++); + s->creatorid = V_pf_status.hostid; +#else + s->id = htobe64(pf_status.stateid++); + s->creatorid = pf_status.hostid; +#endif } - if (RB_INSERT(pf_state_tree_id, &tree_id, state) != NULL) { - if (pf_status.debug >= PF_DEBUG_MISC) { #ifdef __FreeBSD__ - printf("pf: state insert failed: " - "id: %016llx creatorid: %08x", - (long long)be64toh(state->id), - ntohl(state->creatorid)); + if (RB_INSERT(pf_state_tree_id, &V_tree_id, s) != NULL) { + if (V_pf_status.debug >= PF_DEBUG_MISC) { #else + if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) { + if (pf_status.debug >= PF_DEBUG_MISC) { +#endif printf("pf: state insert failed: " "id: %016llx creatorid: %08x", - betoh64(state->id), ntohl(state->creatorid)); +#ifdef __FreeBSD__ + (unsigned long long)betoh64(s->id), ntohl(s->creatorid)); +#else + betoh64(s->id), ntohl(s->creatorid)); #endif - if (state->sync_flags & PFSTATE_FROMSYNC) - printf(" (from sync)"); printf("\n"); } - RB_REMOVE(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state); - RB_REMOVE(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state); + pf_detach_state(s); return (-1); } - TAILQ_INSERT_TAIL(&state_list, state, u.s.entry_list); +#ifdef __FreeBSD__ + TAILQ_INSERT_TAIL(&V_state_list, s, entry_list); + V_pf_status.fcounters[FCNT_STATE_INSERT]++; + V_pf_status.states++; +#else + TAILQ_INSERT_TAIL(&state_list, s, entry_list); pf_status.fcounters[FCNT_STATE_INSERT]++; pf_status.states++; +#endif pfi_kif_ref(kif, PFI_KIF_REF_STATE); -#if NPFSYNC - pfsync_insert_state(state); +#if NPFSYNC > 0 +#ifdef __FreeBSD__ + if (pfsync_insert_state_ptr != NULL) + pfsync_insert_state_ptr(s); +#else + pfsync_insert_state(s); +#endif #endif return (0); } +struct pf_state * +pf_find_state_byid(struct pf_state_cmp *key) +{ +#ifdef __FreeBSD__ + V_pf_status.fcounters[FCNT_STATE_SEARCH]++; + + return (RB_FIND(pf_state_tree_id, &V_tree_id, (struct pf_state *)key)); +#else + pf_status.fcounters[FCNT_STATE_SEARCH]++; + + return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key)); +#endif +} + +/* XXX debug function, intended to be removed one day */ +int +pf_compare_state_keys(struct pf_state_key *a, struct pf_state_key *b, + struct pfi_kif *kif, u_int dir) +{ + /* a (from hdr) and b (new) must be exact opposites of each other */ + if (a->af == b->af && a->proto == b->proto && + PF_AEQ(&a->addr[0], &b->addr[1], a->af) && + PF_AEQ(&a->addr[1], &b->addr[0], a->af) && + a->port[0] == b->port[1] && + a->port[1] == b->port[0]) + return (0); + else { + /* mismatch. must not happen. */ + printf("pf: state key linking mismatch! dir=%s, " + "if=%s, stored af=%u, a0: ", + dir == PF_OUT ? "OUT" : "IN", kif->pfik_name, a->af); + pf_print_host(&a->addr[0], a->port[0], a->af); + printf(", a1: "); + pf_print_host(&a->addr[1], a->port[1], a->af); + printf(", proto=%u", a->proto); + printf(", found af=%u, a0: ", b->af); + pf_print_host(&b->addr[0], b->port[0], b->af); + printf(", a1: "); + pf_print_host(&b->addr[1], b->port[1], b->af); + printf(", proto=%u", b->proto); + printf(".\n"); + return (-1); + } +} + +struct pf_state * +#ifdef __FreeBSD__ +pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir, + struct mbuf *m, struct pf_mtag *pftag) +#else +pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir, + struct mbuf *m) +#endif +{ + struct pf_state_key *sk; + struct pf_state_item *si; + +#ifdef __FreeBSD__ + V_pf_status.fcounters[FCNT_STATE_SEARCH]++; +#else + pf_status.fcounters[FCNT_STATE_SEARCH]++; +#endif + +#ifdef __FreeBSD__ + if (dir == PF_OUT && pftag->statekey && + ((struct pf_state_key *)pftag->statekey)->reverse) + sk = ((struct pf_state_key *)pftag->statekey)->reverse; + else { +#ifdef __FreeBSD__ + if ((sk = RB_FIND(pf_state_tree, &V_pf_statetbl, +#else + if ((sk = RB_FIND(pf_state_tree, &pf_statetbl, +#endif + (struct pf_state_key *)key)) == NULL) + return (NULL); + if (dir == PF_OUT && pftag->statekey && + pf_compare_state_keys(pftag->statekey, sk, + kif, dir) == 0) { + ((struct pf_state_key *) + pftag->statekey)->reverse = sk; + sk->reverse = pftag->statekey; + } + } +#else + if (dir == PF_OUT && m->m_pkthdr.pf.statekey && + ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse) + sk = ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse; + else { +#ifdef __FreeBSD__ + if ((sk = RB_FIND(pf_state_tree, &V_pf_statetbl, +#else + if ((sk = RB_FIND(pf_state_tree, &pf_statetbl, +#endif + (struct pf_state_key *)key)) == NULL) + return (NULL); + if (dir == PF_OUT && m->m_pkthdr.pf.statekey && + pf_compare_state_keys(m->m_pkthdr.pf.statekey, sk, + kif, dir) == 0) { + ((struct pf_state_key *) + m->m_pkthdr.pf.statekey)->reverse = sk; + sk->reverse = m->m_pkthdr.pf.statekey; + } + } +#endif + + if (dir == PF_OUT) +#ifdef __FreeBSD__ + pftag->statekey = NULL; +#else + m->m_pkthdr.pf.statekey = NULL; +#endif + + /* list is sorted, if-bound states before floating ones */ + TAILQ_FOREACH(si, &sk->states, entry) +#ifdef __FreeBSD__ + if ((si->s->kif == V_pfi_all || si->s->kif == kif) && +#else + if ((si->s->kif == pfi_all || si->s->kif == kif) && +#endif + sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : + si->s->key[PF_SK_STACK])) + return (si->s); + + return (NULL); +} + +struct pf_state * +pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more) +{ + struct pf_state_key *sk; + struct pf_state_item *si, *ret = NULL; + +#ifdef __FreeBSD__ + V_pf_status.fcounters[FCNT_STATE_SEARCH]++; +#else + pf_status.fcounters[FCNT_STATE_SEARCH]++; +#endif + +#ifdef __FreeBSD__ + sk = RB_FIND(pf_state_tree, &V_pf_statetbl, (struct pf_state_key *)key); +#else + sk = RB_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key); +#endif + if (sk != NULL) { + TAILQ_FOREACH(si, &sk->states, entry) + if (dir == PF_INOUT || + (sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : + si->s->key[PF_SK_STACK]))) { + if (more == NULL) + return (si->s); + + if (ret) + (*more)++; + else + ret = si; + } + } + return (ret ? ret->s : NULL); +} + +/* END state table stuff */ + + void pf_purge_thread(void *v) { @@ -984,25 +1322,28 @@ pf_purge_thread(void *v) int locked; #endif + CURVNET_SET((struct vnet *)v); + for (;;) { tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz); #ifdef __FreeBSD__ - sx_slock(&pf_consistency_lock); + sx_slock(&V_pf_consistency_lock); PF_LOCK(); locked = 0; - if (pf_end_threads) { + if (V_pf_end_threads) { PF_UNLOCK(); - sx_sunlock(&pf_consistency_lock); - sx_xlock(&pf_consistency_lock); + sx_sunlock(&V_pf_consistency_lock); + sx_xlock(&V_pf_consistency_lock); PF_LOCK(); - pf_purge_expired_states(pf_status.states, 1); + + pf_purge_expired_states(V_pf_status.states, 1); pf_purge_expired_fragments(); pf_purge_expired_src_nodes(1); - pf_end_threads++; + V_pf_end_threads++; - sx_xunlock(&pf_consistency_lock); + sx_xunlock(&V_pf_consistency_lock); PF_UNLOCK(); wakeup(pf_purge_thread); kproc_exit(0); @@ -1012,16 +1353,16 @@ pf_purge_thread(void *v) /* process a fraction of the state table every second */ #ifdef __FreeBSD__ - if(!pf_purge_expired_states(1 + (pf_status.states - / pf_default_rule.timeout[PFTM_INTERVAL]), 0)) { + if (!pf_purge_expired_states(1 + (V_pf_status.states / + V_pf_default_rule.timeout[PFTM_INTERVAL]), 0)) { PF_UNLOCK(); - sx_sunlock(&pf_consistency_lock); - sx_xlock(&pf_consistency_lock); + sx_sunlock(&V_pf_consistency_lock); + sx_xlock(&V_pf_consistency_lock); PF_LOCK(); locked = 1; - pf_purge_expired_states(1 + (pf_status.states - / pf_default_rule.timeout[PFTM_INTERVAL]), 1); + pf_purge_expired_states(1 + (V_pf_status.states / + V_pf_default_rule.timeout[PFTM_INTERVAL]), 1); } #else pf_purge_expired_states(1 + (pf_status.states @@ -1029,16 +1370,13 @@ pf_purge_thread(void *v) #endif /* purge other expired types every PFTM_INTERVAL seconds */ +#ifdef __FreeBSD__ + if (++nloops >= V_pf_default_rule.timeout[PFTM_INTERVAL]) { +#else if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { +#endif pf_purge_expired_fragments(); - if (!pf_purge_expired_src_nodes(locked)) { - PF_UNLOCK(); - sx_sunlock(&pf_consistency_lock); - sx_xlock(&pf_consistency_lock); - PF_LOCK(); - locked = 1; - pf_purge_expired_src_nodes(1); - } + pf_purge_expired_src_nodes(0); nloops = 0; } @@ -1046,11 +1384,12 @@ pf_purge_thread(void *v) #ifdef __FreeBSD__ PF_UNLOCK(); if (locked) - sx_xunlock(&pf_consistency_lock); + sx_xunlock(&V_pf_consistency_lock); else - sx_sunlock(&pf_consistency_lock); + sx_sunlock(&V_pf_consistency_lock); #endif } + CURVNET_RESTORE(); } u_int32_t @@ -1066,7 +1405,7 @@ pf_state_expires(const struct pf_state *state) return (time_second); if (state->timeout == PFTM_UNTIL_PACKET) return (0); -#ifdef __FreeBSD__ +#ifdef __FreeBSD__ KASSERT(state->timeout != PFTM_UNLINKED, ("pf_state_expires: timeout == PFTM_UNLINKED")); KASSERT((state->timeout < PFTM_MAX), @@ -1077,15 +1416,25 @@ pf_state_expires(const struct pf_state *state) #endif timeout = state->rule.ptr->timeout[state->timeout]; if (!timeout) +#ifdef __FreeBSD__ + timeout = V_pf_default_rule.timeout[state->timeout]; +#else timeout = pf_default_rule.timeout[state->timeout]; +#endif start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START]; if (start) { end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END]; - states = state->rule.ptr->states; + states = state->rule.ptr->states_cur; } else { +#ifdef __FreeBSD__ + start = V_pf_default_rule.timeout[PFTM_ADAPTIVE_START]; + end = V_pf_default_rule.timeout[PFTM_ADAPTIVE_END]; + states = V_pf_status.states; +#else start = pf_default_rule.timeout[PFTM_ADAPTIVE_START]; end = pf_default_rule.timeout[PFTM_ADAPTIVE_END]; states = pf_status.states; +#endif } if (end && states > start && start < end) { if (states < end) @@ -1105,46 +1454,61 @@ void pf_purge_expired_src_nodes(int waslocked) #endif { - struct pf_src_node *cur, *next; - int locked = waslocked; - - for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { - next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); + struct pf_src_node *cur, *next; + int locked = waslocked; - if (cur->states <= 0 && cur->expire <= time_second) { - if (! locked) { #ifdef __FreeBSD__ - if (!sx_try_upgrade(&pf_consistency_lock)) - return (0); + for (cur = RB_MIN(pf_src_tree, &V_tree_src_tracking); cur; cur = next) { + next = RB_NEXT(pf_src_tree, &V_tree_src_tracking, cur); #else - rw_enter_write(&pf_consistency_lock); + for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { + next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); #endif - next = RB_NEXT(pf_src_tree, - &tree_src_tracking, cur); - locked = 1; - } - if (cur->rule.ptr != NULL) { - cur->rule.ptr->src_nodes--; - if (cur->rule.ptr->states <= 0 && - cur->rule.ptr->max_src_nodes <= 0) - pf_rm_rule(NULL, cur->rule.ptr); - } - RB_REMOVE(pf_src_tree, &tree_src_tracking, cur); - pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; - pf_status.src_nodes--; - pool_put(&pf_src_tree_pl, cur); - } - } - if (locked && !waslocked) + if (cur->states <= 0 && cur->expire <= time_second) { + if (! locked) { #ifdef __FreeBSD__ - sx_downgrade(&pf_consistency_lock); + if (!sx_try_upgrade(&V_pf_consistency_lock)) + return (0); #else - rw_exit_write(&pf_consistency_lock); + rw_enter_write(&pf_consistency_lock); #endif + next = RB_NEXT(pf_src_tree, +#ifdef __FreeBSD__ + &V_tree_src_tracking, cur); +#else + &tree_src_tracking, cur); +#endif + locked = 1; + } + if (cur->rule.ptr != NULL) { + cur->rule.ptr->src_nodes--; + if (cur->rule.ptr->states_cur <= 0 && + cur->rule.ptr->max_src_nodes <= 0) + pf_rm_rule(NULL, cur->rule.ptr); + } +#ifdef __FreeBSD__ + RB_REMOVE(pf_src_tree, &V_tree_src_tracking, cur); + V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; + V_pf_status.src_nodes--; + pool_put(&V_pf_src_tree_pl, cur); +#else + RB_REMOVE(pf_src_tree, &tree_src_tracking, cur); + pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; + pf_status.src_nodes--; + pool_put(&pf_src_tree_pl, cur); +#endif + } + } + if (locked && !waslocked) #ifdef __FreeBSD__ + { + sx_downgrade(&V_pf_consistency_lock); + } return (1); +#else + rw_exit_write(&pf_consistency_lock); #endif } @@ -1154,15 +1518,17 @@ pf_src_tree_remove_state(struct pf_state *s) u_int32_t timeout; if (s->src_node != NULL) { - if (s->proto == IPPROTO_TCP) { - if (s->src.tcp_est) - --s->src_node->conn; - } + if (s->src.tcp_est) + --s->src_node->conn; if (--s->src_node->states <= 0) { timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; if (!timeout) timeout = +#ifdef __FreeBSD__ + V_pf_default_rule.timeout[PFTM_SRC_NODE]; +#else pf_default_rule.timeout[PFTM_SRC_NODE]; +#endif s->src_node->expire = time_second + timeout; } } @@ -1171,7 +1537,11 @@ pf_src_tree_remove_state(struct pf_state *s) timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; if (!timeout) timeout = +#ifdef __FreeBSD__ + V_pf_default_rule.timeout[PFTM_SRC_NODE]; +#else pf_default_rule.timeout[PFTM_SRC_NODE]; +#endif s->nat_src_node->expire = time_second + timeout; } } @@ -1186,29 +1556,49 @@ pf_unlink_state(struct pf_state *cur) if (cur->local_flags & PFSTATE_EXPIRING) return; cur->local_flags |= PFSTATE_EXPIRING; +#else + splassert(IPL_SOFTNET); #endif + if (cur->src.state == PF_TCPS_PROXY_DST) { + /* XXX wire key the right one? */ #ifdef __FreeBSD__ - pf_send_tcp(NULL, cur->rule.ptr, cur->af, + pf_send_tcp(NULL, cur->rule.ptr, cur->key[PF_SK_WIRE]->af, #else - pf_send_tcp(cur->rule.ptr, cur->af, + pf_send_tcp(cur->rule.ptr, cur->key[PF_SK_WIRE]->af, #endif - &cur->ext.addr, &cur->lan.addr, - cur->ext.port, cur->lan.port, + &cur->key[PF_SK_WIRE]->addr[1], + &cur->key[PF_SK_WIRE]->addr[0], + cur->key[PF_SK_WIRE]->port[1], + cur->key[PF_SK_WIRE]->port[0], cur->src.seqhi, cur->src.seqlo + 1, TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL); } - RB_REMOVE(pf_state_tree_ext_gwy, - &cur->u.s.kif->pfik_ext_gwy, cur); - RB_REMOVE(pf_state_tree_lan_ext, - &cur->u.s.kif->pfik_lan_ext, cur); +#ifdef __FreeBSD__ + RB_REMOVE(pf_state_tree_id, &V_tree_id, cur); +#else RB_REMOVE(pf_state_tree_id, &tree_id, cur); -#if NPFSYNC - if (cur->creatorid == pf_status.hostid) - pfsync_delete_state(cur); +#endif +#if NPFLOW > 0 + if (cur->state_flags & PFSTATE_PFLOW) +#ifdef __FreeBSD__ + if (export_pflow_ptr != NULL) + export_pflow_ptr(cur); +#else + export_pflow(cur); +#endif +#endif +#if NPFSYNC > 0 +#ifdef __FreeBSD__ + if (pfsync_delete_state_ptr != NULL) + pfsync_delete_state_ptr(cur); +#else + pfsync_delete_state(cur); +#endif #endif cur->timeout = PFTM_UNLINKED; pf_src_tree_remove_state(cur); + pf_detach_state(cur); } /* callers should be at splsoftnet and hold the @@ -1216,10 +1606,17 @@ pf_unlink_state(struct pf_state *cur) void pf_free_state(struct pf_state *cur) { -#if NPFSYNC - if (pfsyncif != NULL && - (pfsyncif->sc_bulk_send_next == cur || - pfsyncif->sc_bulk_terminator == cur)) +#ifndef __FreeBSD__ + splassert(IPL_SOFTNET); +#endif + +#if NPFSYNC > 0 +#ifdef __FreeBSD__ + if (pfsync_state_in_use_ptr != NULL && + pfsync_state_in_use_ptr(cur)) +#else + if (pfsync_state_in_use(cur)) +#endif return; #endif #ifdef __FreeBSD__ @@ -1228,24 +1625,34 @@ pf_free_state(struct pf_state *cur) #else KASSERT(cur->timeout == PFTM_UNLINKED); #endif - if (--cur->rule.ptr->states <= 0 && + if (--cur->rule.ptr->states_cur <= 0 && cur->rule.ptr->src_nodes <= 0) pf_rm_rule(NULL, cur->rule.ptr); if (cur->nat_rule.ptr != NULL) - if (--cur->nat_rule.ptr->states <= 0 && + if (--cur->nat_rule.ptr->states_cur <= 0 && cur->nat_rule.ptr->src_nodes <= 0) pf_rm_rule(NULL, cur->nat_rule.ptr); if (cur->anchor.ptr != NULL) - if (--cur->anchor.ptr->states <= 0) + if (--cur->anchor.ptr->states_cur <= 0) pf_rm_rule(NULL, cur->anchor.ptr); pf_normalize_tcp_cleanup(cur); - pfi_kif_unref(cur->u.s.kif, PFI_KIF_REF_STATE); - TAILQ_REMOVE(&state_list, cur, u.s.entry_list); + pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE); +#ifdef __FreeBSD__ + TAILQ_REMOVE(&V_state_list, cur, entry_list); +#else + TAILQ_REMOVE(&state_list, cur, entry_list); +#endif if (cur->tag) pf_tag_unref(cur->tag); +#ifdef __FreeBSD__ + pool_put(&V_pf_state_pl, cur); + V_pf_status.fcounters[FCNT_STATE_REMOVALS]++; + V_pf_status.states--; +#else pool_put(&pf_state_pl, cur); pf_status.fcounters[FCNT_STATE_REMOVALS]++; pf_status.states--; +#endif } #ifdef __FreeBSD__ @@ -1259,28 +1666,32 @@ pf_purge_expired_states(u_int32_t maxcheck) static struct pf_state *cur = NULL; struct pf_state *next; #ifdef __FreeBSD__ - int locked = waslocked; + int locked = waslocked; #else - int locked = 0; + int locked = 0; #endif while (maxcheck--) { /* wrap to start of list when we hit the end */ if (cur == NULL) { +#ifdef __FreeBSD__ + cur = TAILQ_FIRST(&V_state_list); +#else cur = TAILQ_FIRST(&state_list); +#endif if (cur == NULL) break; /* list empty */ } /* get next state, as cur may get deleted */ - next = TAILQ_NEXT(cur, u.s.entry_list); + next = TAILQ_NEXT(cur, entry_list); if (cur->timeout == PFTM_UNLINKED) { /* free unlinked state */ if (! locked) { #ifdef __FreeBSD__ - if (!sx_try_upgrade(&pf_consistency_lock)) - return (0); + if (!sx_try_upgrade(&V_pf_consistency_lock)) + return (0); #else rw_enter_write(&pf_consistency_lock); #endif @@ -1292,8 +1703,8 @@ pf_purge_expired_states(u_int32_t maxcheck) pf_unlink_state(cur); if (! locked) { #ifdef __FreeBSD__ - if (!sx_try_upgrade(&pf_consistency_lock)) - return (0); + if (!sx_try_upgrade(&V_pf_consistency_lock)) + return (0); #else rw_enter_write(&pf_consistency_lock); #endif @@ -1306,7 +1717,7 @@ pf_purge_expired_states(u_int32_t maxcheck) #ifdef __FreeBSD__ if (!waslocked && locked) - sx_downgrade(&pf_consistency_lock); + sx_downgrade(&V_pf_consistency_lock); return (1); #else @@ -1320,7 +1731,7 @@ pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw) { if (aw->type != PF_ADDR_TABLE) return (0); - if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL) + if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname, 1)) == NULL) return (1); return (0); } @@ -1367,34 +1778,33 @@ pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) #ifdef INET6 case AF_INET6: { u_int16_t b; - u_int8_t i, curstart = 255, curend = 0, - maxstart = 0, maxend = 0; + u_int8_t i, curstart, curend, maxstart, maxend; + curstart = curend = maxstart = maxend = 255; for (i = 0; i < 8; i++) { if (!addr->addr16[i]) { if (curstart == 255) curstart = i; - else - curend = i; + curend = i; } else { - if (curstart) { - if ((curend - curstart) > - (maxend - maxstart)) { - maxstart = curstart; - maxend = curend; - curstart = 255; - } + if ((curend - curstart) > + (maxend - maxstart)) { + maxstart = curstart; + maxend = curend; } + curstart = curend = 255; } } + if ((curend - curstart) > + (maxend - maxstart)) { + maxstart = curstart; + maxend = curend; + } for (i = 0; i < 8; i++) { if (i >= maxstart && i <= maxend) { - if (maxend != 7) { - if (i == maxstart) - printf(":"); - } else { - if (i == maxend) - printf(":"); - } + if (i == 0) + printf(":"); + if (i == maxend) + printf(":"); } else { b = ntohs(addr->addr16[i]); printf("%x", b); @@ -1415,39 +1825,87 @@ pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) void pf_print_state(struct pf_state *s) { - switch (s->proto) { + pf_print_state_parts(s, NULL, NULL); +} + +void +pf_print_state_parts(struct pf_state *s, + struct pf_state_key *skwp, struct pf_state_key *sksp) +{ + struct pf_state_key *skw, *sks; + u_int8_t proto, dir; + + /* Do our best to fill these, but they're skipped if NULL */ + skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL); + sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL); + proto = skw ? skw->proto : (sks ? sks->proto : 0); + dir = s ? s->direction : 0; + + switch (proto) { + case IPPROTO_IPV4: + printf("IPv4"); + break; + case IPPROTO_IPV6: + printf("IPv6"); + break; case IPPROTO_TCP: - printf("TCP "); + printf("TCP"); break; case IPPROTO_UDP: - printf("UDP "); + printf("UDP"); break; case IPPROTO_ICMP: - printf("ICMP "); + printf("ICMP"); break; case IPPROTO_ICMPV6: - printf("ICMPV6 "); + printf("ICMPv6"); break; default: - printf("%u ", s->proto); + printf("%u", skw->proto); break; } - pf_print_host(&s->lan.addr, s->lan.port, s->af); - printf(" "); - pf_print_host(&s->gwy.addr, s->gwy.port, s->af); - printf(" "); - pf_print_host(&s->ext.addr, s->ext.port, s->af); - printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo, - s->src.seqhi, s->src.max_win, s->src.seqdiff); - if (s->src.wscale && s->dst.wscale) - printf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK); - printf("]"); - printf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo, - s->dst.seqhi, s->dst.max_win, s->dst.seqdiff); - if (s->src.wscale && s->dst.wscale) - printf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK); - printf("]"); - printf(" %u:%u", s->src.state, s->dst.state); + switch (dir) { + case PF_IN: + printf(" in"); + break; + case PF_OUT: + printf(" out"); + break; + } + if (skw) { + printf(" wire: "); + pf_print_host(&skw->addr[0], skw->port[0], skw->af); + printf(" "); + pf_print_host(&skw->addr[1], skw->port[1], skw->af); + } + if (sks) { + printf(" stack: "); + if (sks != skw) { + pf_print_host(&sks->addr[0], sks->port[0], sks->af); + printf(" "); + pf_print_host(&sks->addr[1], sks->port[1], sks->af); + } else + printf("-"); + } + if (s) { + if (proto == IPPROTO_TCP) { + printf(" [lo=%u high=%u win=%u modulator=%u", + s->src.seqlo, s->src.seqhi, + s->src.max_win, s->src.seqdiff); + if (s->src.wscale && s->dst.wscale) + printf(" wscale=%u", + s->src.wscale & PF_WSCALE_MASK); + printf("]"); + printf(" [lo=%u high=%u win=%u modulator=%u", + s->dst.seqlo, s->dst.seqhi, + s->dst.max_win, s->dst.seqdiff); + if (s->src.wscale && s->dst.wscale) + printf(" wscale=%u", + s->dst.wscale & PF_WSCALE_MASK); + printf("]"); + } + printf(" %u:%u", s->src.state, s->dst.state); + } } void @@ -1530,6 +1988,7 @@ pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) return (1); switch (aw1->type) { case PF_ADDR_ADDRMASK: + case PF_ADDR_RANGE: if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0)) return (1); if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0)) @@ -1653,12 +2112,13 @@ pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, struct pf_addr oia, ooa; PF_ACPY(&oia, ia, af); - PF_ACPY(&ooa, oa, af); + if (oa) + PF_ACPY(&ooa, oa, af); /* Change inner protocol port, fix inner protocol checksum. */ if (ip != NULL) { u_int16_t oip = *ip; - u_int32_t opc = 0; /* make the compiler happy */ + u_int32_t opc; if (pc != NULL) opc = *pc; @@ -1702,31 +2162,33 @@ pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, break; #endif /* INET6 */ } - /* Change outer ip address, fix outer ip or icmpv6 checksum. */ - PF_ACPY(oa, na, af); - switch (af) { + /* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */ + if (oa) { + PF_ACPY(oa, na, af); + switch (af) { #ifdef INET - case AF_INET: - *hc = pf_cksum_fixup(pf_cksum_fixup(*hc, - ooa.addr16[0], oa->addr16[0], 0), - ooa.addr16[1], oa->addr16[1], 0); - break; + case AF_INET: + *hc = pf_cksum_fixup(pf_cksum_fixup(*hc, + ooa.addr16[0], oa->addr16[0], 0), + ooa.addr16[1], oa->addr16[1], 0); + break; #endif /* INET */ #ifdef INET6 - case AF_INET6: - *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( - pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( - pf_cksum_fixup(pf_cksum_fixup(*ic, - ooa.addr16[0], oa->addr16[0], u), - ooa.addr16[1], oa->addr16[1], u), - ooa.addr16[2], oa->addr16[2], u), - ooa.addr16[3], oa->addr16[3], u), - ooa.addr16[4], oa->addr16[4], u), - ooa.addr16[5], oa->addr16[5], u), - ooa.addr16[6], oa->addr16[6], u), - ooa.addr16[7], oa->addr16[7], u); - break; + case AF_INET6: + *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(*ic, + ooa.addr16[0], oa->addr16[0], u), + ooa.addr16[1], oa->addr16[1], u), + ooa.addr16[2], oa->addr16[2], u), + ooa.addr16[3], oa->addr16[3], u), + ooa.addr16[4], oa->addr16[4], u), + ooa.addr16[5], oa->addr16[5], u), + ooa.addr16[6], oa->addr16[6], u), + ooa.addr16[7], oa->addr16[7], u); + break; #endif /* INET6 */ + } } } @@ -1748,7 +2210,7 @@ pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd, int copyback = 0, i, olen; struct sackblk sack; -#define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2) +#define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2) if (hlen < TCPOLEN_SACKLEN || !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af)) return 0; @@ -1817,9 +2279,9 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, #endif /* INET6 */ struct tcphdr *th; char *opt; - struct pf_mtag *pf_mtag; - #ifdef __FreeBSD__ + struct pf_mtag *pf_mtag; + KASSERT( #ifdef INET af == AF_INET @@ -1841,7 +2303,7 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, #ifdef INET6 h6 = NULL; #endif -#endif +#endif /* __FreeBSD__ */ /* maximum segment size tcp option */ tlen = sizeof(struct tcphdr); @@ -1867,42 +2329,46 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, return; #ifdef __FreeBSD__ #ifdef MAC - if (replyto) - mac_netinet_firewall_reply(replyto, m); - else - mac_netinet_firewall_send(m); -#else - (void)replyto; -#endif + mac_netinet_firewall_send(m); #endif if ((pf_mtag = pf_get_mtag(m)) == NULL) { m_freem(m); return; } +#endif if (tag) #ifdef __FreeBSD__ m->m_flags |= M_SKIP_FIREWALL; + pf_mtag->tag = rtag; #else - pf_mtag->flags |= PF_TAG_GENERATED; + m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; + m->m_pkthdr.pf.tag = rtag; #endif - pf_mtag->tag = rtag; - if (r != NULL && r->rtableid >= 0) #ifdef __FreeBSD__ { M_SETFIB(m, r->rtableid); -#endif pf_mtag->rtableid = r->rtableid; +#else + m->m_pkthdr.pf.rtableid = r->rtableid; +#endif #ifdef __FreeBSD__ } #endif + #ifdef ALTQ if (r != NULL && r->qid) { +#ifdef __FreeBSD__ pf_mtag->qid = r->qid; + /* add hints for ecn */ - pf_mtag->af = af; pf_mtag->hdr = mtod(m, struct ip *); +#else + m->m_pkthdr.pf.qid = r->qid; + /* add hints for ecn */ + m->m_pkthdr.pf.hdr = mtod(m, struct ip *); +#endif } #endif /* ALTQ */ m->m_data += max_linkhdr; @@ -1968,18 +2434,19 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, #ifdef __FreeBSD__ h->ip_off = V_path_mtu_discovery ? IP_DF : 0; h->ip_len = len; + h->ip_ttl = ttl ? ttl : V_ip_defttl; #else - h->ip_off = htons(ip_mtudisc ? IP_DF : 0); h->ip_len = htons(len); + h->ip_off = htons(ip_mtudisc ? IP_DF : 0); + h->ip_ttl = ttl ? ttl : ip_defttl; #endif - h->ip_ttl = ttl ? ttl : V_ip_defttl; h->ip_sum = 0; if (eh == NULL) { #ifdef __FreeBSD__ - PF_UNLOCK(); - ip_output(m, (void *)NULL, (void *)NULL, 0, - (void *)NULL, (void *)NULL); - PF_LOCK(); + PF_UNLOCK(); + ip_output(m, (void *)NULL, (void *)NULL, 0, + (void *)NULL, (void *)NULL); + PF_LOCK(); #else /* ! __FreeBSD__ */ ip_output(m, (void *)NULL, (void *)NULL, 0, (void *)NULL, (void *)NULL); @@ -2027,55 +2494,66 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); PF_LOCK(); #else - ip6_output(m, NULL, NULL, 0, NULL, NULL); + ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); #endif break; #endif /* INET6 */ } } -void +static void pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af, struct pf_rule *r) { - struct pf_mtag *pf_mtag; struct mbuf *m0; #ifdef __FreeBSD__ +#ifdef INET struct ip *ip; #endif + struct pf_mtag *pf_mtag; +#endif #ifdef __FreeBSD__ m0 = m_copypacket(m, M_DONTWAIT); if (m0 == NULL) return; #else - m0 = m_copy(m, 0, M_COPYALL); + if ((m0 = m_copy(m, 0, M_COPYALL)) == NULL) + return; #endif + +#ifdef __FreeBSD__ if ((pf_mtag = pf_get_mtag(m0)) == NULL) return; -#ifdef __FreeBSD__ /* XXX: revisit */ m0->m_flags |= M_SKIP_FIREWALL; #else - pf_mtag->flags |= PF_TAG_GENERATED; + m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; #endif if (r->rtableid >= 0) #ifdef __FreeBSD__ { M_SETFIB(m0, r->rtableid); -#endif pf_mtag->rtableid = r->rtableid; +#else + m0->m_pkthdr.pf.rtableid = r->rtableid; +#endif #ifdef __FreeBSD__ } #endif #ifdef ALTQ if (r->qid) { +#ifdef __FreeBSD__ pf_mtag->qid = r->qid; /* add hints for ecn */ - pf_mtag->af = af; pf_mtag->hdr = mtod(m0, struct ip *); +#else + m0->m_pkthdr.pf.qid = r->qid; + /* add hints for ecn */ + m0->m_pkthdr.pf.hdr = mtod(m0, struct ip *); +#endif } #endif /* ALTQ */ @@ -2155,6 +2633,44 @@ pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m, } } +/* + * Return 1 if b <= a <= e, otherwise return 0. + */ +int +pf_match_addr_range(struct pf_addr *b, struct pf_addr *e, + struct pf_addr *a, sa_family_t af) +{ + switch (af) { +#ifdef INET + case AF_INET: + if ((a->addr32[0] < b->addr32[0]) || + (a->addr32[0] > e->addr32[0])) + return (0); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: { + int i; + + /* check a >= b */ + for (i = 0; i < 4; ++i) + if (a->addr32[i] > b->addr32[i]) + break; + else if (a->addr32[i] < b->addr32[i]) + return (0); + /* check a <= e */ + for (i = 0; i < 4; ++i) + if (a->addr32[i] < e->addr32[i]) + break; + else if (a->addr32[i] > e->addr32[i]) + return (0); + break; + } +#endif /* INET6 */ + } + return (1); +} + int pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p) { @@ -2206,88 +2722,80 @@ pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) return (pf_match(op, a1, a2, g)); } -#ifndef __FreeBSD__ -struct pf_mtag * -pf_find_mtag(struct mbuf *m) -{ - struct m_tag *mtag; - - if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) == NULL) - return (NULL); - - return ((struct pf_mtag *)(mtag + 1)); -} - -struct pf_mtag * -pf_get_mtag(struct mbuf *m) -{ - struct m_tag *mtag; - - if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) == NULL) { - mtag = m_tag_get(PACKET_TAG_PF, sizeof(struct pf_mtag), - M_NOWAIT); - if (mtag == NULL) - return (NULL); - bzero(mtag + 1, sizeof(struct pf_mtag)); - m_tag_prepend(m, mtag); - } - - return ((struct pf_mtag *)(mtag + 1)); -} -#endif - int -pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_mtag *pf_mtag, - int *tag) +#ifdef __FreeBSD__ +pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag, + struct pf_mtag *pf_mtag) +#else +pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag) +#endif { if (*tag == -1) +#ifdef __FreeBSD__ *tag = pf_mtag->tag; +#else + *tag = m->m_pkthdr.pf.tag; +#endif return ((!r->match_tag_not && r->match_tag == *tag) || (r->match_tag_not && r->match_tag != *tag)); } int -pf_tag_packet(struct mbuf *m, struct pf_mtag *pf_mtag, int tag, int rtableid) +#ifdef __FreeBSD__ +pf_tag_packet(struct mbuf *m, int tag, int rtableid, + struct pf_mtag *pf_mtag) +#else +pf_tag_packet(struct mbuf *m, int tag, int rtableid) +#endif { if (tag <= 0 && rtableid < 0) return (0); - if (pf_mtag == NULL) - if ((pf_mtag = pf_get_mtag(m)) == NULL) - return (1); if (tag > 0) +#ifdef __FreeBSD__ pf_mtag->tag = tag; +#else + m->m_pkthdr.pf.tag = tag; +#endif if (rtableid >= 0) #ifdef __FreeBSD__ { M_SETFIB(m, rtableid); -#endif - pf_mtag->rtableid = rtableid; -#ifdef __FreeBSD__ } +#else + m->m_pkthdr.pf.rtableid = rtableid; #endif return (0); } -static void +void pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n, - struct pf_rule **r, struct pf_rule **a, int *match) + struct pf_rule **r, struct pf_rule **a, int *match) { struct pf_anchor_stackframe *f; (*r)->anchor->match = 0; if (match) *match = 0; +#ifdef __FreeBSD__ + if (*depth >= sizeof(V_pf_anchor_stack) / + sizeof(V_pf_anchor_stack[0])) { +#else if (*depth >= sizeof(pf_anchor_stack) / sizeof(pf_anchor_stack[0])) { +#endif printf("pf_step_into_anchor: stack overflow\n"); *r = TAILQ_NEXT(*r, entries); return; } else if (*depth == 0 && a != NULL) *a = *r; +#ifdef __FreeBSD__ + f = V_pf_anchor_stack + (*depth)++; +#else f = pf_anchor_stack + (*depth)++; +#endif f->rs = *rs; f->r = *r; if ((*r)->anchor_wildcard) { @@ -2316,7 +2824,11 @@ pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n, do { if (*depth <= 0) break; +#ifdef __FreeBSD__ + f = V_pf_anchor_stack + *depth - 1; +#else f = pf_anchor_stack + *depth - 1; +#endif if (f->parent != NULL && f->child != NULL) { if (f->child->match || (match != NULL && *match)) { @@ -2337,7 +2849,7 @@ pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n, if (*depth == 0 && a != NULL) *a = NULL; *rs = f->rs; - if (f->r->anchor->match || (match != NULL && *match)) + if (f->r->anchor->match || (match != NULL && *match)) quick = f->r->quick; *r = TAILQ_NEXT(f->r, entries); } while (*r == NULL); @@ -2402,567 +2914,6 @@ pf_addr_inc(struct pf_addr *addr, sa_family_t af) } #endif /* INET6 */ -#define mix(a,b,c) \ - do { \ - a -= b; a -= c; a ^= (c >> 13); \ - b -= c; b -= a; b ^= (a << 8); \ - c -= a; c -= b; c ^= (b >> 13); \ - a -= b; a -= c; a ^= (c >> 12); \ - b -= c; b -= a; b ^= (a << 16); \ - c -= a; c -= b; c ^= (b >> 5); \ - a -= b; a -= c; a ^= (c >> 3); \ - b -= c; b -= a; b ^= (a << 10); \ - c -= a; c -= b; c ^= (b >> 15); \ - } while (0) - -/* - * hash function based on bridge_hash in if_bridge.c - */ -void -pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, - struct pf_poolhashkey *key, sa_family_t af) -{ - u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0]; - - switch (af) { -#ifdef INET - case AF_INET: - a += inaddr->addr32[0]; - b += key->key32[1]; - mix(a, b, c); - hash->addr32[0] = c + key->key32[2]; - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - a += inaddr->addr32[0]; - b += inaddr->addr32[2]; - mix(a, b, c); - hash->addr32[0] = c; - a += inaddr->addr32[1]; - b += inaddr->addr32[3]; - c += key->key32[1]; - mix(a, b, c); - hash->addr32[1] = c; - a += inaddr->addr32[2]; - b += inaddr->addr32[1]; - c += key->key32[2]; - mix(a, b, c); - hash->addr32[2] = c; - a += inaddr->addr32[3]; - b += inaddr->addr32[0]; - c += key->key32[3]; - mix(a, b, c); - hash->addr32[3] = c; - break; -#endif /* INET6 */ - } -} - -int -pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, - struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn) -{ - unsigned char hash[16]; - struct pf_pool *rpool = &r->rpool; - struct pf_addr *raddr = &rpool->cur->addr.v.a.addr; - struct pf_addr *rmask = &rpool->cur->addr.v.a.mask; - struct pf_pooladdr *acur = rpool->cur; - struct pf_src_node k; - - if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR && - (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { - k.af = af; - PF_ACPY(&k.addr, saddr, af); - if (r->rule_flag & PFRULE_RULESRCTRACK || - r->rpool.opts & PF_POOL_STICKYADDR) - k.rule.ptr = r; - else - k.rule.ptr = NULL; - pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; - *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); - if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) { - PF_ACPY(naddr, &(*sn)->raddr, af); - if (pf_status.debug >= PF_DEBUG_MISC) { - printf("pf_map_addr: src tracking maps "); - pf_print_host(&k.addr, 0, af); - printf(" to "); - pf_print_host(naddr, 0, af); - printf("\n"); - } - return (0); - } - } - - if (rpool->cur->addr.type == PF_ADDR_NOROUTE) - return (1); - if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { - switch (af) { -#ifdef INET - case AF_INET: - if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 && - (rpool->opts & PF_POOL_TYPEMASK) != - PF_POOL_ROUNDROBIN) - return (1); - raddr = &rpool->cur->addr.p.dyn->pfid_addr4; - rmask = &rpool->cur->addr.p.dyn->pfid_mask4; - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 && - (rpool->opts & PF_POOL_TYPEMASK) != - PF_POOL_ROUNDROBIN) - return (1); - raddr = &rpool->cur->addr.p.dyn->pfid_addr6; - rmask = &rpool->cur->addr.p.dyn->pfid_mask6; - break; -#endif /* INET6 */ - } - } else if (rpool->cur->addr.type == PF_ADDR_TABLE) { - if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) - return (1); /* unsupported */ - } else { - raddr = &rpool->cur->addr.v.a.addr; - rmask = &rpool->cur->addr.v.a.mask; - } - - switch (rpool->opts & PF_POOL_TYPEMASK) { - case PF_POOL_NONE: - PF_ACPY(naddr, raddr, af); - break; - case PF_POOL_BITMASK: - PF_POOLMASK(naddr, raddr, rmask, saddr, af); - break; - case PF_POOL_RANDOM: - if (init_addr != NULL && PF_AZERO(init_addr, af)) { - switch (af) { -#ifdef INET - case AF_INET: - rpool->counter.addr32[0] = htonl(arc4random()); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - if (rmask->addr32[3] != 0xffffffff) - rpool->counter.addr32[3] = - htonl(arc4random()); - else - break; - if (rmask->addr32[2] != 0xffffffff) - rpool->counter.addr32[2] = - htonl(arc4random()); - else - break; - if (rmask->addr32[1] != 0xffffffff) - rpool->counter.addr32[1] = - htonl(arc4random()); - else - break; - if (rmask->addr32[0] != 0xffffffff) - rpool->counter.addr32[0] = - htonl(arc4random()); - break; -#endif /* INET6 */ - } - PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); - PF_ACPY(init_addr, naddr, af); - - } else { - PF_AINC(&rpool->counter, af); - PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); - } - break; - case PF_POOL_SRCHASH: - pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af); - PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af); - break; - case PF_POOL_ROUNDROBIN: - if (rpool->cur->addr.type == PF_ADDR_TABLE) { - if (!pfr_pool_get(rpool->cur->addr.p.tbl, - &rpool->tblidx, &rpool->counter, - &raddr, &rmask, af)) - goto get_addr; - } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { - if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, - &rpool->tblidx, &rpool->counter, - &raddr, &rmask, af)) - goto get_addr; - } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af)) - goto get_addr; - - try_next: - if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL) - rpool->cur = TAILQ_FIRST(&rpool->list); - if (rpool->cur->addr.type == PF_ADDR_TABLE) { - rpool->tblidx = -1; - if (pfr_pool_get(rpool->cur->addr.p.tbl, - &rpool->tblidx, &rpool->counter, - &raddr, &rmask, af)) { - /* table contains no address of type 'af' */ - if (rpool->cur != acur) - goto try_next; - return (1); - } - } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { - rpool->tblidx = -1; - if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, - &rpool->tblidx, &rpool->counter, - &raddr, &rmask, af)) { - /* table contains no address of type 'af' */ - if (rpool->cur != acur) - goto try_next; - return (1); - } - } else { - raddr = &rpool->cur->addr.v.a.addr; - rmask = &rpool->cur->addr.v.a.mask; - PF_ACPY(&rpool->counter, raddr, af); - } - - get_addr: - PF_ACPY(naddr, &rpool->counter, af); - if (init_addr != NULL && PF_AZERO(init_addr, af)) - PF_ACPY(init_addr, naddr, af); - PF_AINC(&rpool->counter, af); - break; - } - if (*sn != NULL) - PF_ACPY(&(*sn)->raddr, naddr, af); - - if (pf_status.debug >= PF_DEBUG_MISC && - (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { - printf("pf_map_addr: selected address "); - pf_print_host(naddr, 0, af); - printf("\n"); - } - - return (0); -} - -int -pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, - struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport, - struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high, - struct pf_src_node **sn) -{ - struct pf_state_cmp key; - struct pf_addr init_addr; - u_int16_t cut; - - bzero(&init_addr, sizeof(init_addr)); - if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) - return (1); - - if (proto == IPPROTO_ICMP) { - low = 1; - high = 65535; - } - - do { - key.af = af; - key.proto = proto; - PF_ACPY(&key.ext.addr, daddr, key.af); - PF_ACPY(&key.gwy.addr, naddr, key.af); - key.ext.port = dport; - - /* - * port search; start random, step; - * similar 2 portloop in in_pcbbind - */ - if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP || - proto == IPPROTO_ICMP)) { - key.gwy.port = dport; - if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) - return (0); - } else if (low == 0 && high == 0) { - key.gwy.port = *nport; - if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) - return (0); - } else if (low == high) { - key.gwy.port = htons(low); - if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) { - *nport = htons(low); - return (0); - } - } else { - u_int16_t tmp; - - if (low > high) { - tmp = low; - low = high; - high = tmp; - } - /* low < high */ - cut = htonl(arc4random()) % (1 + high - low) + low; - /* low <= cut <= high */ - for (tmp = cut; tmp <= high; ++(tmp)) { - key.gwy.port = htons(tmp); - if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == - NULL) { - *nport = htons(tmp); - return (0); - } - } - for (tmp = cut - 1; tmp >= low; --(tmp)) { - key.gwy.port = htons(tmp); - if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == - NULL) { - *nport = htons(tmp); - return (0); - } - } - } - - switch (r->rpool.opts & PF_POOL_TYPEMASK) { - case PF_POOL_RANDOM: - case PF_POOL_ROUNDROBIN: - if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) - return (1); - break; - case PF_POOL_NONE: - case PF_POOL_SRCHASH: - case PF_POOL_BITMASK: - default: - return (1); - } - } while (! PF_AEQ(&init_addr, naddr, af) ); - - return (1); /* none available */ -} - -struct pf_rule * -pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, - int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport, - struct pf_addr *daddr, u_int16_t dport, int rs_num) -{ - struct pf_rule *r, *rm = NULL; - struct pf_ruleset *ruleset = NULL; - int tag = -1; - int rtableid = -1; - int asd = 0; - - r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr); - while (r && rm == NULL) { - struct pf_rule_addr *src = NULL, *dst = NULL; - struct pf_addr_wrap *xdst = NULL; - - if (r->action == PF_BINAT && direction == PF_IN) { - src = &r->dst; - if (r->rpool.cur != NULL) - xdst = &r->rpool.cur->addr; - } else { - src = &r->src; - dst = &r->dst; - } - - r->evaluations++; - if (pfi_kif_match(r->kif, kif) == r->ifnot) - r = r->skip[PF_SKIP_IFP].ptr; - else if (r->direction && r->direction != direction) - r = r->skip[PF_SKIP_DIR].ptr; - else if (r->af && r->af != pd->af) - r = r->skip[PF_SKIP_AF].ptr; - else if (r->proto && r->proto != pd->proto) - r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, - src->neg, kif)) - r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR : - PF_SKIP_DST_ADDR].ptr; - else if (src->port_op && !pf_match_port(src->port_op, - src->port[0], src->port[1], sport)) - r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT : - PF_SKIP_DST_PORT].ptr; - else if (dst != NULL && - PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL)) - r = r->skip[PF_SKIP_DST_ADDR].ptr; - else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, - 0, NULL)) - r = TAILQ_NEXT(r, entries); - else if (dst != NULL && dst->port_op && - !pf_match_port(dst->port_op, dst->port[0], - dst->port[1], dport)) - r = r->skip[PF_SKIP_DST_PORT].ptr; - else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) - r = TAILQ_NEXT(r, entries); - else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto != - IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m, - off, pd->hdr.tcp), r->os_fingerprint))) - r = TAILQ_NEXT(r, entries); - else { - if (r->tag) - tag = r->tag; - if (r->rtableid >= 0) - rtableid = r->rtableid; - if (r->anchor == NULL) { - rm = r; - } else - pf_step_into_anchor(&asd, &ruleset, rs_num, - &r, NULL, NULL); - } - if (r == NULL) - pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r, - NULL, NULL); - } - if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) - return (NULL); - if (rm != NULL && (rm->action == PF_NONAT || - rm->action == PF_NORDR || rm->action == PF_NOBINAT)) - return (NULL); - return (rm); -} - -struct pf_rule * -pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, - struct pfi_kif *kif, struct pf_src_node **sn, - struct pf_addr *saddr, u_int16_t sport, - struct pf_addr *daddr, u_int16_t dport, - struct pf_addr *naddr, u_int16_t *nport) -{ - struct pf_rule *r = NULL; - - if (direction == PF_OUT) { - r = pf_match_translation(pd, m, off, direction, kif, saddr, - sport, daddr, dport, PF_RULESET_BINAT); - if (r == NULL) - r = pf_match_translation(pd, m, off, direction, kif, - saddr, sport, daddr, dport, PF_RULESET_NAT); - } else { - r = pf_match_translation(pd, m, off, direction, kif, saddr, - sport, daddr, dport, PF_RULESET_RDR); - if (r == NULL) - r = pf_match_translation(pd, m, off, direction, kif, - saddr, sport, daddr, dport, PF_RULESET_BINAT); - } - - if (r != NULL) { - switch (r->action) { - case PF_NONAT: - case PF_NOBINAT: - case PF_NORDR: - return (NULL); - case PF_NAT: - if (pf_get_sport(pd->af, pd->proto, r, saddr, - daddr, dport, naddr, nport, r->rpool.proxy_port[0], - r->rpool.proxy_port[1], sn)) { - DPFPRINTF(PF_DEBUG_MISC, - ("pf: NAT proxy port allocation " - "(%u-%u) failed\n", - r->rpool.proxy_port[0], - r->rpool.proxy_port[1])); - return (NULL); - } - break; - case PF_BINAT: - switch (direction) { - case PF_OUT: - if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){ - switch (pd->af) { -#ifdef INET - case AF_INET: - if (r->rpool.cur->addr.p.dyn-> - pfid_acnt4 < 1) - return (NULL); - PF_POOLMASK(naddr, - &r->rpool.cur->addr.p.dyn-> - pfid_addr4, - &r->rpool.cur->addr.p.dyn-> - pfid_mask4, - saddr, AF_INET); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - if (r->rpool.cur->addr.p.dyn-> - pfid_acnt6 < 1) - return (NULL); - PF_POOLMASK(naddr, - &r->rpool.cur->addr.p.dyn-> - pfid_addr6, - &r->rpool.cur->addr.p.dyn-> - pfid_mask6, - saddr, AF_INET6); - break; -#endif /* INET6 */ - } - } else - PF_POOLMASK(naddr, - &r->rpool.cur->addr.v.a.addr, - &r->rpool.cur->addr.v.a.mask, - saddr, pd->af); - break; - case PF_IN: - if (r->src.addr.type == PF_ADDR_DYNIFTL) { - switch (pd->af) { -#ifdef INET - case AF_INET: - if (r->src.addr.p.dyn-> - pfid_acnt4 < 1) - return (NULL); - PF_POOLMASK(naddr, - &r->src.addr.p.dyn-> - pfid_addr4, - &r->src.addr.p.dyn-> - pfid_mask4, - daddr, AF_INET); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - if (r->src.addr.p.dyn-> - pfid_acnt6 < 1) - return (NULL); - PF_POOLMASK(naddr, - &r->src.addr.p.dyn-> - pfid_addr6, - &r->src.addr.p.dyn-> - pfid_mask6, - daddr, AF_INET6); - break; -#endif /* INET6 */ - } - } else - PF_POOLMASK(naddr, - &r->src.addr.v.a.addr, - &r->src.addr.v.a.mask, daddr, - pd->af); - break; - } - break; - case PF_RDR: { - if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn)) - return (NULL); - if ((r->rpool.opts & PF_POOL_TYPEMASK) == - PF_POOL_BITMASK) - PF_POOLMASK(naddr, naddr, - &r->rpool.cur->addr.v.a.mask, daddr, - pd->af); - - if (r->rpool.proxy_port[1]) { - u_int32_t tmp_nport; - - tmp_nport = ((ntohs(dport) - - ntohs(r->dst.port[0])) % - (r->rpool.proxy_port[1] - - r->rpool.proxy_port[0] + 1)) + - r->rpool.proxy_port[0]; - - /* wrap around if necessary */ - if (tmp_nport > 65535) - tmp_nport -= 65535; - *nport = htons((u_int16_t)tmp_nport); - } else if (r->rpool.proxy_port[0]) - *nport = htons(r->rpool.proxy_port[0]); - break; - } - default: - return (NULL); - } - } - - return (r); -} - int #ifdef __FreeBSD__ pf_socket_lookup(int direction, struct pf_pdesc *pd, struct inpcb *inp_arg) @@ -2983,7 +2934,8 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd) return (-1); pd->lookup.uid = UID_MAX; pd->lookup.gid = GID_MAX; - pd->lookup.pid = NO_PID; /* XXX: revisit */ + pd->lookup.pid = NO_PID; + #ifdef __FreeBSD__ if (inp_arg != NULL) { INP_LOCK_ASSERT(inp_arg); @@ -2997,6 +2949,7 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd) return (1); } #endif + switch (pd->proto) { case IPPROTO_TCP: if (pd->hdr.tcp == NULL) @@ -3039,21 +2992,24 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd) #ifdef INET case AF_INET: #ifdef __FreeBSD__ - INP_INFO_RLOCK(pi); /* XXX LOR */ - inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4, - dport, 0, NULL); + /* + * XXXRW: would be nice if we had an mbuf here so that we + * could use in_pcblookup_mbuf(). + */ + inp = in_pcblookup(pi, saddr->v4, sport, daddr->v4, + dport, INPLOOKUP_RLOCKPCB, NULL); if (inp == NULL) { - inp = in_pcblookup_hash(pi, saddr->v4, sport, - daddr->v4, dport, INPLOOKUP_WILDCARD, NULL); - if(inp == NULL) { - INP_INFO_RUNLOCK(pi); + inp = in_pcblookup(pi, saddr->v4, sport, + daddr->v4, dport, INPLOOKUP_WILDCARD | + INPLOOKUP_RLOCKPCB, NULL); + if (inp == NULL) return (-1); - } } #else inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport); if (inp == NULL) { - inp = in_pcblookup_listen(tb, daddr->v4, dport, 0); + inp = in_pcblookup_listen(tb, daddr->v4, dport, 0, + NULL); if (inp == NULL) return (-1); } @@ -3063,22 +3019,25 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd) #ifdef INET6 case AF_INET6: #ifdef __FreeBSD__ - INP_INFO_RLOCK(pi); - inp = in6_pcblookup_hash(pi, &saddr->v6, sport, - &daddr->v6, dport, 0, NULL); + /* + * XXXRW: would be nice if we had an mbuf here so that we + * could use in6_pcblookup_mbuf(). + */ + inp = in6_pcblookup(pi, &saddr->v6, sport, + &daddr->v6, dport, INPLOOKUP_RLOCKPCB, NULL); if (inp == NULL) { - inp = in6_pcblookup_hash(pi, &saddr->v6, sport, - &daddr->v6, dport, INPLOOKUP_WILDCARD, NULL); - if (inp == NULL) { - INP_INFO_RUNLOCK(pi); + inp = in6_pcblookup(pi, &saddr->v6, sport, + &daddr->v6, dport, INPLOOKUP_WILDCARD | + INPLOOKUP_RLOCKPCB, NULL); + if (inp == NULL) return (-1); - } } #else inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6, dport); if (inp == NULL) { - inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 0); + inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 0, + NULL); if (inp == NULL) return (-1); } @@ -3090,6 +3049,7 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd) return (-1); } #ifdef __FreeBSD__ + INP_RLOCK_ASSERT(inp); #ifndef __rtems__ pd->lookup.uid = inp->inp_cred->cr_uid; pd->lookup.gid = inp->inp_cred->cr_groups[0]; @@ -3097,7 +3057,7 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd) pd->lookup.uid = BSD_DEFAULT_UID; pd->lookup.gid = BSD_DEFAULT_GID; #endif /* __rtems__ */ - INP_INFO_RUNLOCK(pi); + INP_RUNLOCK(inp); #else pd->lookup.uid = inp->inp_socket->so_euid; pd->lookup.gid = inp->inp_socket->so_egid; @@ -3152,7 +3112,11 @@ pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) int hlen; u_int8_t hdr[60]; u_int8_t *opt, optlen; +#ifdef __FreeBSD__ u_int16_t mss = V_tcp_mssdflt; +#else + u_int16_t mss = tcp_mssdflt; +#endif hlen = th_off << 2; /* hlen <= sizeof(hdr) */ if (hlen <= sizeof(struct tcphdr)) @@ -3185,7 +3149,7 @@ pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) } u_int16_t -pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer) +pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) { #ifdef INET struct sockaddr_in *dst; @@ -3196,8 +3160,13 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer) struct route_in6 ro6; #endif /* INET6 */ struct rtentry *rt = NULL; - int hlen = 0; /* make the compiler happy */ +#ifdef __FreeBSD__ + int hlen = 0; u_int16_t mss = V_tcp_mssdflt; +#else + int hlen; + u_int16_t mss = tcp_mssdflt; +#endif switch (af) { #ifdef INET @@ -3209,7 +3178,7 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer) dst->sin_len = sizeof(*dst); dst->sin_addr = addr->v4; #ifdef __FreeBSD__ - in_rtalloc_ign(&ro, 0, RT_DEFAULT_FIB); + in_rtalloc_ign(&ro, 0, rtableid); #else /* ! __FreeBSD__ */ rtalloc_noclone(&ro, NO_CLONING); #endif @@ -3225,7 +3194,7 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer) dst6->sin6_len = sizeof(*dst6); dst6->sin6_addr = addr->v6; #ifdef __FreeBSD__ - in6_rtalloc_ign(&ro6, 0, RT_DEFAULT_FIB); + in6_rtalloc_ign(&ro6, 0, rtableid); #else /* ! __FreeBSD__ */ rtalloc_noclone((struct route *)&ro6, NO_CLONING); #endif @@ -3236,7 +3205,11 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer) if (rt && rt->rt_ifp) { mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr); +#ifdef __FreeBSD__ mss = max(V_tcp_mssdflt, mss); +#else + mss = max(tcp_mssdflt, mss); +#endif RTFREE(rt); } mss = min(mss, offer); @@ -3248,55 +3221,113 @@ void pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr) { struct pf_rule *r = s->rule.ptr; + struct pf_src_node *sn = NULL; s->rt_kif = NULL; if (!r->rt || r->rt == PF_FASTROUTE) return; - switch (s->af) { + switch (s->key[PF_SK_WIRE]->af) { #ifdef INET case AF_INET: - pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, - &s->nat_src_node); + pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, &sn); s->rt_kif = r->rpool.cur->kif; break; #endif /* INET */ #ifdef INET6 case AF_INET6: - pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, - &s->nat_src_node); + pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, &sn); s->rt_kif = r->rpool.cur->kif; break; #endif /* INET6 */ } } +u_int32_t +pf_tcp_iss(struct pf_pdesc *pd) +{ + MD5_CTX ctx; + u_int32_t digest[4]; + +#ifdef __FreeBSD__ + if (V_pf_tcp_secret_init == 0) { + read_random(&V_pf_tcp_secret, sizeof(V_pf_tcp_secret)); + MD5Init(&V_pf_tcp_secret_ctx); + MD5Update(&V_pf_tcp_secret_ctx, V_pf_tcp_secret, + sizeof(V_pf_tcp_secret)); + V_pf_tcp_secret_init = 1; + } + + ctx = V_pf_tcp_secret_ctx; +#else + if (pf_tcp_secret_init == 0) { + arc4random_buf(pf_tcp_secret, sizeof(pf_tcp_secret)); + MD5Init(&pf_tcp_secret_ctx); + MD5Update(&pf_tcp_secret_ctx, pf_tcp_secret, + sizeof(pf_tcp_secret)); + pf_tcp_secret_init = 1; + } + + ctx = pf_tcp_secret_ctx; +#endif + + MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof(u_short)); + MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof(u_short)); + if (pd->af == AF_INET6) { + MD5Update(&ctx, (char *)&pd->src->v6, sizeof(struct in6_addr)); + MD5Update(&ctx, (char *)&pd->dst->v6, sizeof(struct in6_addr)); + } else { + MD5Update(&ctx, (char *)&pd->src->v4, sizeof(struct in_addr)); + MD5Update(&ctx, (char *)&pd->dst->v4, sizeof(struct in_addr)); + } + MD5Final((u_char *)digest, &ctx); +#ifdef __FreeBSD__ + V_pf_tcp_iss_off += 4096; +#define ISN_RANDOM_INCREMENT (4096 - 1) + return (digest[0] + (arc4random() & ISN_RANDOM_INCREMENT) + + V_pf_tcp_iss_off); +#undef ISN_RANDOM_INCREMENT +#else + pf_tcp_iss_off += 4096; + return (digest[0] + tcp_iss + pf_tcp_iss_off); +#endif +} + int -pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, +pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, -#ifdef __FreeBSD__ struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm, +#ifdef __FreeBSD__ struct ifqueue *ifq, struct inpcb *inp) #else - struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm, struct ifqueue *ifq) #endif { struct pf_rule *nr = NULL; struct pf_addr *saddr = pd->src, *daddr = pd->dst; - struct tcphdr *th = pd->hdr.tcp; - u_int16_t bport, nport = 0; sa_family_t af = pd->af; struct pf_rule *r, *a = NULL; struct pf_ruleset *ruleset = NULL; struct pf_src_node *nsn = NULL; + struct tcphdr *th = pd->hdr.tcp; + struct pf_state_key *skw = NULL, *sks = NULL; + struct pf_state_key *sk = NULL, *nk = NULL; u_short reason; - int rewrite = 0; + int rewrite = 0, hdrlen = 0; int tag = -1, rtableid = -1; - u_int16_t mss = V_tcp_mssdflt; int asd = 0; int match = 0; + int state_icmp = 0; +#ifdef __FreeBSD__ + u_int16_t sport = 0, dport = 0; + u_int16_t bproto_sum = 0, bip_sum = 0; +#else + u_int16_t sport, dport; + u_int16_t bproto_sum = 0, bip_sum; +#endif + u_int8_t icmptype = 0, icmpcode = 0; - if (pf_check_congestion(ifq)) { + + if (direction == PF_IN && pf_check_congestion(ifq)) { REASON_SET(&reason, PFRES_CONGEST); return (PF_DROP); } @@ -3304,44 +3335,193 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, #ifdef __FreeBSD__ if (inp != NULL) pd->lookup.done = pf_socket_lookup(direction, pd, inp); - else if (debug_pfugidhack) { + else if (V_debug_pfugidhack) { PF_UNLOCK(); DPFPRINTF(PF_DEBUG_MISC, ("pf: unlocked lookup\n")); - pd->lookup.done = pf_socket_lookup(direction, pd, inp); + pd->lookup.done = pf_socket_lookup(direction, pd, inp); PF_LOCK(); } #endif + switch (pd->proto) { + case IPPROTO_TCP: + sport = th->th_sport; + dport = th->th_dport; + hdrlen = sizeof(*th); + break; + case IPPROTO_UDP: + sport = pd->hdr.udp->uh_sport; + dport = pd->hdr.udp->uh_dport; + hdrlen = sizeof(*pd->hdr.udp); + break; +#ifdef INET + case IPPROTO_ICMP: + if (pd->af != AF_INET) + break; + sport = dport = pd->hdr.icmp->icmp_id; + hdrlen = sizeof(*pd->hdr.icmp); + icmptype = pd->hdr.icmp->icmp_type; + icmpcode = pd->hdr.icmp->icmp_code; + + if (icmptype == ICMP_UNREACH || + icmptype == ICMP_SOURCEQUENCH || + icmptype == ICMP_REDIRECT || + icmptype == ICMP_TIMXCEED || + icmptype == ICMP_PARAMPROB) + state_icmp++; + break; +#endif /* INET */ +#ifdef INET6 + case IPPROTO_ICMPV6: + if (af != AF_INET6) + break; + sport = dport = pd->hdr.icmp6->icmp6_id; + hdrlen = sizeof(*pd->hdr.icmp6); + icmptype = pd->hdr.icmp6->icmp6_type; + icmpcode = pd->hdr.icmp6->icmp6_code; + + if (icmptype == ICMP6_DST_UNREACH || + icmptype == ICMP6_PACKET_TOO_BIG || + icmptype == ICMP6_TIME_EXCEEDED || + icmptype == ICMP6_PARAM_PROB) + state_icmp++; + break; +#endif /* INET6 */ + default: + sport = dport = hdrlen = 0; + break; + } + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); - if (direction == PF_OUT) { - bport = nport = th->th_sport; - /* check outgoing packet for BINAT/NAT */ - if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn, - saddr, th->th_sport, daddr, th->th_dport, - &pd->naddr, &nport)) != NULL) { - PF_ACPY(&pd->baddr, saddr, af); - pf_change_ap(saddr, &th->th_sport, pd->ip_sum, - &th->th_sum, &pd->naddr, nport, 0, af); - rewrite++; - if (nr->natpass) - r = NULL; - pd->nat_rule = nr; + /* check packet for BINAT/NAT/RDR */ + if ((nr = pf_get_translation(pd, m, off, direction, kif, &nsn, + &skw, &sks, &sk, &nk, saddr, daddr, sport, dport)) != NULL) { + if (nk == NULL || sk == NULL) { + REASON_SET(&reason, PFRES_MEMORY); + goto cleanup; } - } else { - bport = nport = th->th_dport; - /* check incoming packet for BINAT/RDR */ - if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn, - saddr, th->th_sport, daddr, th->th_dport, - &pd->naddr, &nport)) != NULL) { - PF_ACPY(&pd->baddr, daddr, af); - pf_change_ap(daddr, &th->th_dport, pd->ip_sum, - &th->th_sum, &pd->naddr, nport, 0, af); + + if (pd->ip_sum) + bip_sum = *pd->ip_sum; + + switch (pd->proto) { + case IPPROTO_TCP: + bproto_sum = th->th_sum; + pd->proto_sum = &th->th_sum; + + if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) || + nk->port[pd->sidx] != sport) { + pf_change_ap(saddr, &th->th_sport, pd->ip_sum, + &th->th_sum, &nk->addr[pd->sidx], + nk->port[pd->sidx], 0, af); + pd->sport = &th->th_sport; + sport = th->th_sport; + } + + if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) || + nk->port[pd->didx] != dport) { + pf_change_ap(daddr, &th->th_dport, pd->ip_sum, + &th->th_sum, &nk->addr[pd->didx], + nk->port[pd->didx], 0, af); + dport = th->th_dport; + pd->dport = &th->th_dport; + } + rewrite++; + break; + case IPPROTO_UDP: + bproto_sum = pd->hdr.udp->uh_sum; + pd->proto_sum = &pd->hdr.udp->uh_sum; + + if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) || + nk->port[pd->sidx] != sport) { + pf_change_ap(saddr, &pd->hdr.udp->uh_sport, + pd->ip_sum, &pd->hdr.udp->uh_sum, + &nk->addr[pd->sidx], + nk->port[pd->sidx], 1, af); + sport = pd->hdr.udp->uh_sport; + pd->sport = &pd->hdr.udp->uh_sport; + } + + if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) || + nk->port[pd->didx] != dport) { + pf_change_ap(daddr, &pd->hdr.udp->uh_dport, + pd->ip_sum, &pd->hdr.udp->uh_sum, + &nk->addr[pd->didx], + nk->port[pd->didx], 1, af); + dport = pd->hdr.udp->uh_dport; + pd->dport = &pd->hdr.udp->uh_dport; + } + rewrite++; + break; +#ifdef INET + case IPPROTO_ICMP: + nk->port[0] = nk->port[1]; + if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET)) + pf_change_a(&saddr->v4.s_addr, pd->ip_sum, + nk->addr[pd->sidx].v4.s_addr, 0); + + if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET)) + pf_change_a(&daddr->v4.s_addr, pd->ip_sum, + nk->addr[pd->didx].v4.s_addr, 0); + + if (nk->port[1] != pd->hdr.icmp->icmp_id) { + pd->hdr.icmp->icmp_cksum = pf_cksum_fixup( + pd->hdr.icmp->icmp_cksum, sport, + nk->port[1], 0); + pd->hdr.icmp->icmp_id = nk->port[1]; + pd->sport = &pd->hdr.icmp->icmp_id; + } + m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp); + break; +#endif /* INET */ +#ifdef INET6 + case IPPROTO_ICMPV6: + nk->port[0] = nk->port[1]; + if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET6)) + pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum, + &nk->addr[pd->sidx], 0); + + if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET6)) + pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum, + &nk->addr[pd->didx], 0); rewrite++; - if (nr->natpass) - r = NULL; - pd->nat_rule = nr; + break; +#endif /* INET */ + default: + switch (af) { +#ifdef INET + case AF_INET: + if (PF_ANEQ(saddr, + &nk->addr[pd->sidx], AF_INET)) + pf_change_a(&saddr->v4.s_addr, + pd->ip_sum, + nk->addr[pd->sidx].v4.s_addr, 0); + + if (PF_ANEQ(daddr, + &nk->addr[pd->didx], AF_INET)) + pf_change_a(&daddr->v4.s_addr, + pd->ip_sum, + nk->addr[pd->didx].v4.s_addr, 0); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + if (PF_ANEQ(saddr, + &nk->addr[pd->sidx], AF_INET6)) + PF_ACPY(saddr, &nk->addr[pd->sidx], af); + + if (PF_ANEQ(daddr, + &nk->addr[pd->didx], AF_INET6)) + PF_ACPY(saddr, &nk->addr[pd->didx], af); + break; +#endif /* INET */ + } + break; } + if (nr->natpass) + r = NULL; + pd->nat_rule = nr; } while (r != NULL) { @@ -3352,26 +3532,36 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, r = r->skip[PF_SKIP_DIR].ptr; else if (r->af && r->af != af) r = r->skip[PF_SKIP_AF].ptr; - else if (r->proto && r->proto != IPPROTO_TCP) + else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; else if (PF_MISMATCHAW(&r->src.addr, saddr, af, - r->src.neg, kif)) + r->src.neg, kif, M_GETFIB(m))) r = r->skip[PF_SKIP_SRC_ADDR].ptr; + /* tcp/udp only. port_op always 0 in other cases */ else if (r->src.port_op && !pf_match_port(r->src.port_op, - r->src.port[0], r->src.port[1], th->th_sport)) + r->src.port[0], r->src.port[1], sport)) r = r->skip[PF_SKIP_SRC_PORT].ptr; else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, - r->dst.neg, NULL)) + r->dst.neg, NULL, M_GETFIB(m))) r = r->skip[PF_SKIP_DST_ADDR].ptr; + /* tcp/udp only. port_op always 0 in other cases */ else if (r->dst.port_op && !pf_match_port(r->dst.port_op, - r->dst.port[0], r->dst.port[1], th->th_dport)) + r->dst.port[0], r->dst.port[1], dport)) r = r->skip[PF_SKIP_DST_PORT].ptr; + /* icmp only. type always 0 in other cases */ + else if (r->type && r->type != icmptype + 1) + r = TAILQ_NEXT(r, entries); + /* icmp only. type always 0 in other cases */ + else if (r->code && r->code != icmpcode + 1) + r = TAILQ_NEXT(r, entries); else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); else if (r->rule_flag & PFRULE_FRAGMENT) r = TAILQ_NEXT(r, entries); - else if ((r->flagset & th->th_flags) != r->flags) + else if (pd->proto == IPPROTO_TCP && + (r->flagset & th->th_flags) != r->flags) r = TAILQ_NEXT(r, entries); + /* tcp/udp only. uid.op always 0 in other cases */ else if (r->uid.op && (pd->lookup.done || (pd->lookup.done = #ifdef __FreeBSD__ pf_socket_lookup(direction, pd, inp), 1)) && @@ -3381,6 +3571,7 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], pd->lookup.uid)) r = TAILQ_NEXT(r, entries); + /* tcp/udp only. gid.op always 0 in other cases */ else if (r->gid.op && (pd->lookup.done || (pd->lookup.done = #ifdef __FreeBSD__ pf_socket_lookup(direction, pd, inp), 1)) && @@ -3390,12 +3581,23 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], pd->lookup.gid)) r = TAILQ_NEXT(r, entries); - else if (r->prob && r->prob <= arc4random()) + else if (r->prob && +#ifdef __FreeBSD__ + r->prob <= arc4random()) +#else + r->prob <= arc4random_uniform(UINT_MAX - 1) + 1) +#endif r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) +#ifdef __FreeBSD__ + else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag)) +#else + else if (r->match_tag && !pf_match_tag(m, r, &tag)) +#endif r = TAILQ_NEXT(r, entries); - else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match( - pf_osfp_fingerprint(pd, m, off, th), r->os_fingerprint)) + else if (r->os_fingerprint != PF_OSFP_ANY && + (pd->proto != IPPROTO_TCP || !pf_osfp_match( + pf_osfp_fingerprint(pd, m, off, th), + r->os_fingerprint))) r = TAILQ_NEXT(r, entries); else { if (r->tag) @@ -3424,13 +3626,9 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, REASON_SET(&reason, PFRES_MATCH); - if (r->log || (nr != NULL && nr->natpass && nr->log)) { + if (r->log || (nr != NULL && nr->log)) { if (rewrite) -#ifdef __FreeBSD__ - m_copyback(m, off, sizeof(*th), (caddr_t)th); -#else - m_copyback(m, off, sizeof(*th), th); -#endif + m_copyback(m, off, hdrlen, pd->hdr.any); PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, a, ruleset, pd); } @@ -3441,161 +3639,233 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, (r->rule_flag & PFRULE_RETURN))) { /* undo NAT changes, if they have taken place */ if (nr != NULL) { - if (direction == PF_OUT) { - pf_change_ap(saddr, &th->th_sport, pd->ip_sum, - &th->th_sum, &pd->baddr, bport, 0, af); - rewrite++; - } else { - pf_change_ap(daddr, &th->th_dport, pd->ip_sum, - &th->th_sum, &pd->baddr, bport, 0, af); - rewrite++; - } - } - if (((r->rule_flag & PFRULE_RETURNRST) || + PF_ACPY(saddr, &sk->addr[pd->sidx], af); + PF_ACPY(daddr, &sk->addr[pd->didx], af); + if (pd->sport) + *pd->sport = sk->port[pd->sidx]; + if (pd->dport) + *pd->dport = sk->port[pd->didx]; + if (pd->proto_sum) + *pd->proto_sum = bproto_sum; + if (pd->ip_sum) + *pd->ip_sum = bip_sum; + m_copyback(m, off, hdrlen, pd->hdr.any); + } + if (pd->proto == IPPROTO_TCP && + ((r->rule_flag & PFRULE_RETURNRST) || (r->rule_flag & PFRULE_RETURN)) && !(th->th_flags & TH_RST)) { - u_int32_t ack = ntohl(th->th_seq) + pd->p_len; + u_int32_t ack = ntohl(th->th_seq) + pd->p_len; + int len = 0; +#ifdef INET + struct ip *h4; +#endif +#ifdef INET6 + struct ip6_hdr *h6; +#endif - if (th->th_flags & TH_SYN) - ack++; - if (th->th_flags & TH_FIN) - ack++; + switch (af) { +#ifdef INET + case AF_INET: + h4 = mtod(m, struct ip *); + len = ntohs(h4->ip_len) - off; + break; +#endif +#ifdef INET6 + case AF_INET6: + h6 = mtod(m, struct ip6_hdr *); + len = ntohs(h6->ip6_plen) - (off - sizeof(*h6)); + break; +#endif + } + + if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af)) + REASON_SET(&reason, PFRES_PROTCKSUM); + else { + if (th->th_flags & TH_SYN) + ack++; + if (th->th_flags & TH_FIN) + ack++; #ifdef __FreeBSD__ - pf_send_tcp(m, r, af, pd->dst, + pf_send_tcp(m, r, af, pd->dst, #else - pf_send_tcp(r, af, pd->dst, + pf_send_tcp(r, af, pd->dst, #endif - pd->src, th->th_dport, th->th_sport, - ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, - r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp); - } else if ((af == AF_INET) && r->return_icmp) + pd->src, th->th_dport, th->th_sport, + ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, + r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp); + } + } else if (pd->proto != IPPROTO_ICMP && af == AF_INET && + r->return_icmp) pf_send_icmp(m, r->return_icmp >> 8, r->return_icmp & 255, af, r); - else if ((af == AF_INET6) && r->return_icmp6) + else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 && + r->return_icmp6) pf_send_icmp(m, r->return_icmp6 >> 8, r->return_icmp6 & 255, af, r); } if (r->action == PF_DROP) - return (PF_DROP); + goto cleanup; - if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) { +#ifdef __FreeBSD__ + if (pf_tag_packet(m, tag, rtableid, pd->pf_mtag)) { +#else + if (pf_tag_packet(m, tag, rtableid)) { +#endif REASON_SET(&reason, PFRES_MEMORY); - return (PF_DROP); + goto cleanup; } - if (r->keep_state || nr != NULL || - (pd->flags & PFDESC_TCP_NORM)) { - /* create new state */ - u_int16_t len; - struct pf_state *s = NULL; - struct pf_src_node *sn = NULL; + if (!state_icmp && (r->keep_state || nr != NULL || + (pd->flags & PFDESC_TCP_NORM))) { + int action; + action = pf_create_state(r, nr, a, pd, nsn, skw, sks, nk, sk, m, + off, sport, dport, &rewrite, kif, sm, tag, bproto_sum, + bip_sum, hdrlen); + if (action != PF_PASS) + return (action); + } else { +#ifdef __FreeBSD__ + if (sk != NULL) + pool_put(&V_pf_state_key_pl, sk); + if (nk != NULL) + pool_put(&V_pf_state_key_pl, nk); +#else + if (sk != NULL) + pool_put(&pf_state_key_pl, sk); + if (nk != NULL) + pool_put(&pf_state_key_pl, nk); +#endif + } - len = pd->tot_len - off - (th->th_off << 2); + /* copy back packet headers if we performed NAT operations */ + if (rewrite) + m_copyback(m, off, hdrlen, pd->hdr.any); + +#if NPFSYNC > 0 + if (*sm != NULL && !ISSET((*sm)->state_flags, PFSTATE_NOSYNC) && +#ifdef __FreeBSD__ + direction == PF_OUT && pfsync_up_ptr != NULL && pfsync_up_ptr()) { +#else + direction == PF_OUT && pfsync_up()) { +#endif + /* + * We want the state created, but we dont + * want to send this in case a partner + * firewall has to know about it to allow + * replies through it. + */ +#ifdef __FreeBSD__ + if (pfsync_defer_ptr != NULL && + pfsync_defer_ptr(*sm, m)) +#else + if (pfsync_defer(*sm, m)) +#endif + return (PF_DEFER); + } +#endif + + return (PF_PASS); - /* check maximums */ - if (r->max_states && (r->states >= r->max_states)) { - pf_status.lcounters[LCNT_STATES]++; - REASON_SET(&reason, PFRES_MAXSTATES); - goto cleanup; - } - /* src node for filter rule */ - if ((r->rule_flag & PFRULE_SRCTRACK || - r->rpool.opts & PF_POOL_STICKYADDR) && - pf_insert_src_node(&sn, r, saddr, af) != 0) { - REASON_SET(&reason, PFRES_SRCLIMIT); - goto cleanup; - } - /* src node for translation rule */ - if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && - ((direction == PF_OUT && - pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) || - (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) { - REASON_SET(&reason, PFRES_SRCLIMIT); - goto cleanup; - } - s = pool_get(&pf_state_pl, PR_NOWAIT); - if (s == NULL) { - REASON_SET(&reason, PFRES_MEMORY); cleanup: - if (sn != NULL && sn->states == 0 && sn->expire == 0) { - RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); - pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; - pf_status.src_nodes--; - pool_put(&pf_src_tree_pl, sn); - } - if (nsn != sn && nsn != NULL && nsn->states == 0 && - nsn->expire == 0) { - RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn); - pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; - pf_status.src_nodes--; - pool_put(&pf_src_tree_pl, nsn); - } - return (PF_DROP); - } - bzero(s, sizeof(*s)); - s->rule.ptr = r; - s->nat_rule.ptr = nr; - s->anchor.ptr = a; - STATE_INC_COUNTERS(s); - if (r->allow_opts) - s->state_flags |= PFSTATE_ALLOWOPTS; - if (r->rule_flag & PFRULE_STATESLOPPY) - s->state_flags |= PFSTATE_SLOPPY; - s->log = r->log & PF_LOG_ALL; - if (nr != NULL) - s->log |= nr->log & PF_LOG_ALL; - s->proto = IPPROTO_TCP; - s->direction = direction; - s->af = af; - if (direction == PF_OUT) { - PF_ACPY(&s->gwy.addr, saddr, af); - s->gwy.port = th->th_sport; /* sport */ - PF_ACPY(&s->ext.addr, daddr, af); - s->ext.port = th->th_dport; - if (nr != NULL) { - PF_ACPY(&s->lan.addr, &pd->baddr, af); - s->lan.port = bport; - } else { - PF_ACPY(&s->lan.addr, &s->gwy.addr, af); - s->lan.port = s->gwy.port; - } - } else { - PF_ACPY(&s->lan.addr, daddr, af); - s->lan.port = th->th_dport; - PF_ACPY(&s->ext.addr, saddr, af); - s->ext.port = th->th_sport; - if (nr != NULL) { - PF_ACPY(&s->gwy.addr, &pd->baddr, af); - s->gwy.port = bport; - } else { - PF_ACPY(&s->gwy.addr, &s->lan.addr, af); - s->gwy.port = s->lan.port; - } - } +#ifdef __FreeBSD__ + if (sk != NULL) + pool_put(&V_pf_state_key_pl, sk); + if (nk != NULL) + pool_put(&V_pf_state_key_pl, nk); +#else + if (sk != NULL) + pool_put(&pf_state_key_pl, sk); + if (nk != NULL) + pool_put(&pf_state_key_pl, nk); +#endif + return (PF_DROP); +} +static __inline int +pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, + struct pf_pdesc *pd, struct pf_src_node *nsn, struct pf_state_key *skw, + struct pf_state_key *sks, struct pf_state_key *nk, struct pf_state_key *sk, + struct mbuf *m, int off, u_int16_t sport, u_int16_t dport, int *rewrite, + struct pfi_kif *kif, struct pf_state **sm, int tag, u_int16_t bproto_sum, + u_int16_t bip_sum, int hdrlen) +{ + struct pf_state *s = NULL; + struct pf_src_node *sn = NULL; + struct tcphdr *th = pd->hdr.tcp; +#ifdef __FreeBSD__ + u_int16_t mss = V_tcp_mssdflt; +#else + u_int16_t mss = tcp_mssdflt; +#endif + u_short reason; + + /* check maximums */ + if (r->max_states && (r->states_cur >= r->max_states)) { +#ifdef __FreeBSD__ + V_pf_status.lcounters[LCNT_STATES]++; +#else + pf_status.lcounters[LCNT_STATES]++; +#endif + REASON_SET(&reason, PFRES_MAXSTATES); + return (PF_DROP); + } + /* src node for filter rule */ + if ((r->rule_flag & PFRULE_SRCTRACK || + r->rpool.opts & PF_POOL_STICKYADDR) && + pf_insert_src_node(&sn, r, pd->src, pd->af) != 0) { + REASON_SET(&reason, PFRES_SRCLIMIT); + goto csfailed; + } + /* src node for translation rule */ + if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && + pf_insert_src_node(&nsn, nr, &sk->addr[pd->sidx], pd->af)) { + REASON_SET(&reason, PFRES_SRCLIMIT); + goto csfailed; + } +#ifdef __FreeBSD__ + s = pool_get(&V_pf_state_pl, PR_NOWAIT | PR_ZERO); +#else + s = pool_get(&pf_state_pl, PR_NOWAIT | PR_ZERO); +#endif + if (s == NULL) { + REASON_SET(&reason, PFRES_MEMORY); + goto csfailed; + } + s->rule.ptr = r; + s->nat_rule.ptr = nr; + s->anchor.ptr = a; + STATE_INC_COUNTERS(s); + if (r->allow_opts) + s->state_flags |= PFSTATE_ALLOWOPTS; + if (r->rule_flag & PFRULE_STATESLOPPY) + s->state_flags |= PFSTATE_SLOPPY; + if (r->rule_flag & PFRULE_PFLOW) + s->state_flags |= PFSTATE_PFLOW; + s->log = r->log & PF_LOG_ALL; + s->sync_state = PFSYNC_S_NONE; + if (nr != NULL) + s->log |= nr->log & PF_LOG_ALL; + switch (pd->proto) { + case IPPROTO_TCP: s->src.seqlo = ntohl(th->th_seq); - s->src.seqhi = s->src.seqlo + len + 1; + s->src.seqhi = s->src.seqlo + pd->p_len + 1; if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && r->keep_state == PF_STATE_MODULATE) { /* Generate sequence number modulator */ -#ifdef __FreeBSD__ - while ((s->src.seqdiff = - pf_new_isn(s) - s->src.seqlo) == 0) - ; -#else - while ((s->src.seqdiff = - tcp_rndiss_next() - s->src.seqlo) == 0) - ; -#endif + if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) == + 0) + s->src.seqdiff = 1; pf_change_a(&th->th_seq, &th->th_sum, htonl(s->src.seqlo + s->src.seqdiff), 0); - rewrite = 1; + *rewrite = 1; } else s->src.seqdiff = 0; if (th->th_flags & TH_SYN) { s->src.seqhi++; - s->src.wscale = pf_get_wscale(m, off, th->th_off, af); + s->src.wscale = pf_get_wscale(m, off, + th->th_off, pd->af); } s->src.max_win = MAX(ntohs(th->th_win), 1); if (s->src.wscale & PF_WSCALE_MASK) { @@ -3611,994 +3881,174 @@ cleanup: s->dst.max_win = 1; s->src.state = TCPS_SYN_SENT; s->dst.state = TCPS_CLOSED; - s->creation = time_second; - s->expire = time_second; s->timeout = PFTM_TCP_FIRST_PACKET; - pf_set_rt_ifp(s, saddr); - if (sn != NULL) { - s->src_node = sn; - s->src_node->states++; - } - if (nsn != NULL) { - PF_ACPY(&nsn->raddr, &pd->naddr, af); - s->nat_src_node = nsn; - s->nat_src_node->states++; - } + break; + case IPPROTO_UDP: + s->src.state = PFUDPS_SINGLE; + s->dst.state = PFUDPS_NO_TRAFFIC; + s->timeout = PFTM_UDP_FIRST_PACKET; + break; + case IPPROTO_ICMP: +#ifdef INET6 + case IPPROTO_ICMPV6: +#endif + s->timeout = PFTM_ICMP_FIRST_PACKET; + break; + default: + s->src.state = PFOTHERS_SINGLE; + s->dst.state = PFOTHERS_NO_TRAFFIC; + s->timeout = PFTM_OTHER_FIRST_PACKET; + } + + s->creation = time_second; + s->expire = time_second; + + if (sn != NULL) { + s->src_node = sn; + s->src_node->states++; + } + if (nsn != NULL) { + /* XXX We only modify one side for now. */ + PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af); + s->nat_src_node = nsn; + s->nat_src_node->states++; + } + if (pd->proto == IPPROTO_TCP) { if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m, off, pd, th, &s->src, &s->dst)) { REASON_SET(&reason, PFRES_MEMORY); pf_src_tree_remove_state(s); STATE_DEC_COUNTERS(s); +#ifdef __FreeBSD__ + pool_put(&V_pf_state_pl, s); +#else pool_put(&pf_state_pl, s); +#endif return (PF_DROP); } if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub && pf_normalize_tcp_stateful(m, off, pd, &reason, th, s, - &s->src, &s->dst, &rewrite)) { + &s->src, &s->dst, rewrite)) { /* This really shouldn't happen!!! */ DPFPRINTF(PF_DEBUG_URGENT, ("pf_normalize_tcp_stateful failed on first pkt")); pf_normalize_tcp_cleanup(s); pf_src_tree_remove_state(s); STATE_DEC_COUNTERS(s); - pool_put(&pf_state_pl, s); - return (PF_DROP); - } - if (pf_insert_state(BOUND_IFACE(r, kif), s)) { - pf_normalize_tcp_cleanup(s); - REASON_SET(&reason, PFRES_STATEINS); - pf_src_tree_remove_state(s); - STATE_DEC_COUNTERS(s); - pool_put(&pf_state_pl, s); - return (PF_DROP); - } else - *sm = s; - if (tag > 0) { - pf_tag_ref(tag); - s->tag = tag; - } - if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && - r->keep_state == PF_STATE_SYNPROXY) { - s->src.state = PF_TCPS_PROXY_SRC; - if (nr != NULL) { - if (direction == PF_OUT) { - pf_change_ap(saddr, &th->th_sport, - pd->ip_sum, &th->th_sum, &pd->baddr, - bport, 0, af); - } else { - pf_change_ap(daddr, &th->th_dport, - pd->ip_sum, &th->th_sum, &pd->baddr, - bport, 0, af); - } - } - s->src.seqhi = htonl(arc4random()); - /* Find mss option */ - mss = pf_get_mss(m, off, th->th_off, af); - mss = pf_calc_mss(saddr, af, mss); - mss = pf_calc_mss(daddr, af, mss); - s->src.mss = mss; #ifdef __FreeBSD__ - pf_send_tcp(NULL, r, af, daddr, saddr, th->th_dport, + pool_put(&V_pf_state_pl, s); #else - pf_send_tcp(r, af, daddr, saddr, th->th_dport, + pool_put(&pf_state_pl, s); #endif - th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, - TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL); - REASON_SET(&reason, PFRES_SYNPROXY); - return (PF_SYNPROXY_DROP); + return (PF_DROP); } } + s->direction = pd->dir; - /* copy back packet headers if we performed NAT operations */ - if (rewrite) - m_copyback(m, off, sizeof(*th), (caddr_t)th); - - return (PF_PASS); -} + if (sk == NULL && pf_state_key_setup(pd, nr, &skw, &sks, &sk, &nk, + pd->src, pd->dst, sport, dport)) + goto csfailed; -int -pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, - struct pfi_kif *kif, struct mbuf *m, int off, void *h, + if (pf_state_insert(BOUND_IFACE(r, kif), skw, sks, s)) { + if (pd->proto == IPPROTO_TCP) + pf_normalize_tcp_cleanup(s); + REASON_SET(&reason, PFRES_STATEINS); + pf_src_tree_remove_state(s); + STATE_DEC_COUNTERS(s); #ifdef __FreeBSD__ - struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm, - struct ifqueue *ifq, struct inpcb *inp) + pool_put(&V_pf_state_pl, s); #else - struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm, - struct ifqueue *ifq) + pool_put(&pf_state_pl, s); #endif -{ - struct pf_rule *nr = NULL; - struct pf_addr *saddr = pd->src, *daddr = pd->dst; - struct udphdr *uh = pd->hdr.udp; - u_int16_t bport, nport = 0; - sa_family_t af = pd->af; - struct pf_rule *r, *a = NULL; - struct pf_ruleset *ruleset = NULL; - struct pf_src_node *nsn = NULL; - u_short reason; - int rewrite = 0; - int tag = -1, rtableid = -1; - int asd = 0; - int match = 0; - - if (pf_check_congestion(ifq)) { - REASON_SET(&reason, PFRES_CONGEST); return (PF_DROP); - } + } else + *sm = s; -#ifdef __FreeBSD__ - if (inp != NULL) - pd->lookup.done = pf_socket_lookup(direction, pd, inp); - else if (debug_pfugidhack) { - PF_UNLOCK(); - DPFPRINTF(PF_DEBUG_MISC, ("pf: unlocked lookup\n")); - pd->lookup.done = pf_socket_lookup(direction, pd, inp); - PF_LOCK(); + pf_set_rt_ifp(s, pd->src); /* needs s->state_key set */ + if (tag > 0) { + pf_tag_ref(tag); + s->tag = tag; } + if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) == + TH_SYN && r->keep_state == PF_STATE_SYNPROXY) { + s->src.state = PF_TCPS_PROXY_SRC; + /* undo NAT changes, if they have taken place */ + if (nr != NULL) { + struct pf_state_key *skt = s->key[PF_SK_WIRE]; + if (pd->dir == PF_OUT) + skt = s->key[PF_SK_STACK]; + PF_ACPY(pd->src, &skt->addr[pd->sidx], pd->af); + PF_ACPY(pd->dst, &skt->addr[pd->didx], pd->af); + if (pd->sport) + *pd->sport = skt->port[pd->sidx]; + if (pd->dport) + *pd->dport = skt->port[pd->didx]; + if (pd->proto_sum) + *pd->proto_sum = bproto_sum; + if (pd->ip_sum) + *pd->ip_sum = bip_sum; + m_copyback(m, off, hdrlen, pd->hdr.any); + } + s->src.seqhi = htonl(arc4random()); + /* Find mss option */ + int rtid = M_GETFIB(m); + mss = pf_get_mss(m, off, th->th_off, pd->af); + mss = pf_calc_mss(pd->src, pd->af, rtid, mss); + mss = pf_calc_mss(pd->dst, pd->af, rtid, mss); + s->src.mss = mss; +#ifdef __FreeBSD__ + pf_send_tcp(NULL, r, pd->af, pd->dst, pd->src, th->th_dport, +#else + pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, #endif - - r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); - - if (direction == PF_OUT) { - bport = nport = uh->uh_sport; - /* check outgoing packet for BINAT/NAT */ - if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn, - saddr, uh->uh_sport, daddr, uh->uh_dport, - &pd->naddr, &nport)) != NULL) { - PF_ACPY(&pd->baddr, saddr, af); - pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum, - &uh->uh_sum, &pd->naddr, nport, 1, af); - rewrite++; - if (nr->natpass) - r = NULL; - pd->nat_rule = nr; - } - } else { - bport = nport = uh->uh_dport; - /* check incoming packet for BINAT/RDR */ - if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn, - saddr, uh->uh_sport, daddr, uh->uh_dport, &pd->naddr, - &nport)) != NULL) { - PF_ACPY(&pd->baddr, daddr, af); - pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum, - &uh->uh_sum, &pd->naddr, nport, 1, af); - rewrite++; - if (nr->natpass) - r = NULL; - pd->nat_rule = nr; - } + th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, + TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL); + REASON_SET(&reason, PFRES_SYNPROXY); + return (PF_SYNPROXY_DROP); } - while (r != NULL) { - r->evaluations++; - if (pfi_kif_match(r->kif, kif) == r->ifnot) - r = r->skip[PF_SKIP_IFP].ptr; - else if (r->direction && r->direction != direction) - r = r->skip[PF_SKIP_DIR].ptr; - else if (r->af && r->af != af) - r = r->skip[PF_SKIP_AF].ptr; - else if (r->proto && r->proto != IPPROTO_UDP) - r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, saddr, af, - r->src.neg, kif)) - r = r->skip[PF_SKIP_SRC_ADDR].ptr; - else if (r->src.port_op && !pf_match_port(r->src.port_op, - r->src.port[0], r->src.port[1], uh->uh_sport)) - r = r->skip[PF_SKIP_SRC_PORT].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, - r->dst.neg, NULL)) - r = r->skip[PF_SKIP_DST_ADDR].ptr; - else if (r->dst.port_op && !pf_match_port(r->dst.port_op, - r->dst.port[0], r->dst.port[1], uh->uh_dport)) - r = r->skip[PF_SKIP_DST_PORT].ptr; - else if (r->tos && !(r->tos == pd->tos)) - r = TAILQ_NEXT(r, entries); - else if (r->rule_flag & PFRULE_FRAGMENT) - r = TAILQ_NEXT(r, entries); - else if (r->uid.op && (pd->lookup.done || (pd->lookup.done = + return (PF_PASS); + +csfailed: #ifdef __FreeBSD__ - pf_socket_lookup(direction, pd, inp), 1)) && + if (sk != NULL) + pool_put(&V_pf_state_key_pl, sk); + if (nk != NULL) + pool_put(&V_pf_state_key_pl, nk); #else - pf_socket_lookup(direction, pd), 1)) && + if (sk != NULL) + pool_put(&pf_state_key_pl, sk); + if (nk != NULL) + pool_put(&pf_state_key_pl, nk); #endif - !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], - pd->lookup.uid)) - r = TAILQ_NEXT(r, entries); - else if (r->gid.op && (pd->lookup.done || (pd->lookup.done = + + if (sn != NULL && sn->states == 0 && sn->expire == 0) { #ifdef __FreeBSD__ - pf_socket_lookup(direction, pd, inp), 1)) && + RB_REMOVE(pf_src_tree, &V_tree_src_tracking, sn); + V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; + V_pf_status.src_nodes--; + pool_put(&V_pf_src_tree_pl, sn); #else - pf_socket_lookup(direction, pd), 1)) && + RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); + pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; + pf_status.src_nodes--; + pool_put(&pf_src_tree_pl, sn); #endif - !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], - pd->lookup.gid)) - r = TAILQ_NEXT(r, entries); - else if (r->prob && r->prob <= arc4random()) - r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) - r = TAILQ_NEXT(r, entries); - else if (r->os_fingerprint != PF_OSFP_ANY) - r = TAILQ_NEXT(r, entries); - else { - if (r->tag) - tag = r->tag; - if (r->rtableid >= 0) - rtableid = r->rtableid; - if (r->anchor == NULL) { - match = 1; - *rm = r; - *am = a; - *rsm = ruleset; - if ((*rm)->quick) - break; - r = TAILQ_NEXT(r, entries); - } else - pf_step_into_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a, &match); - } - if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a, &match)) - break; } - r = *rm; - a = *am; - ruleset = *rsm; - - REASON_SET(&reason, PFRES_MATCH); - - if (r->log || (nr != NULL && nr->natpass && nr->log)) { - if (rewrite) + if (nsn != sn && nsn != NULL && nsn->states == 0 && nsn->expire == 0) { #ifdef __FreeBSD__ - m_copyback(m, off, sizeof(*uh), (caddr_t)uh); + RB_REMOVE(pf_src_tree, &V_tree_src_tracking, nsn); + V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; + V_pf_status.src_nodes--; + pool_put(&V_pf_src_tree_pl, nsn); #else - m_copyback(m, off, sizeof(*uh), uh); + RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn); + pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; + pf_status.src_nodes--; + pool_put(&pf_src_tree_pl, nsn); #endif - PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, - a, ruleset, pd); - } - - if ((r->action == PF_DROP) && - ((r->rule_flag & PFRULE_RETURNICMP) || - (r->rule_flag & PFRULE_RETURN))) { - /* undo NAT changes, if they have taken place */ - if (nr != NULL) { - if (direction == PF_OUT) { - pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum, - &uh->uh_sum, &pd->baddr, bport, 1, af); - rewrite++; - } else { - pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum, - &uh->uh_sum, &pd->baddr, bport, 1, af); - rewrite++; - } - } - if ((af == AF_INET) && r->return_icmp) - pf_send_icmp(m, r->return_icmp >> 8, - r->return_icmp & 255, af, r); - else if ((af == AF_INET6) && r->return_icmp6) - pf_send_icmp(m, r->return_icmp6 >> 8, - r->return_icmp6 & 255, af, r); - } - - if (r->action == PF_DROP) - return (PF_DROP); - - if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) { - REASON_SET(&reason, PFRES_MEMORY); - return (PF_DROP); - } - - if (r->keep_state || nr != NULL) { - /* create new state */ - struct pf_state *s = NULL; - struct pf_src_node *sn = NULL; - - /* check maximums */ - if (r->max_states && (r->states >= r->max_states)) { - pf_status.lcounters[LCNT_STATES]++; - REASON_SET(&reason, PFRES_MAXSTATES); - goto cleanup; - } - /* src node for filter rule */ - if ((r->rule_flag & PFRULE_SRCTRACK || - r->rpool.opts & PF_POOL_STICKYADDR) && - pf_insert_src_node(&sn, r, saddr, af) != 0) { - REASON_SET(&reason, PFRES_SRCLIMIT); - goto cleanup; - } - /* src node for translation rule */ - if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && - ((direction == PF_OUT && - pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) || - (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) { - REASON_SET(&reason, PFRES_SRCLIMIT); - goto cleanup; - } - s = pool_get(&pf_state_pl, PR_NOWAIT); - if (s == NULL) { - REASON_SET(&reason, PFRES_MEMORY); -cleanup: - if (sn != NULL && sn->states == 0 && sn->expire == 0) { - RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); - pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; - pf_status.src_nodes--; - pool_put(&pf_src_tree_pl, sn); - } - if (nsn != sn && nsn != NULL && nsn->states == 0 && - nsn->expire == 0) { - RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn); - pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; - pf_status.src_nodes--; - pool_put(&pf_src_tree_pl, nsn); - } - return (PF_DROP); - } - bzero(s, sizeof(*s)); - s->rule.ptr = r; - s->nat_rule.ptr = nr; - s->anchor.ptr = a; - STATE_INC_COUNTERS(s); - if (r->allow_opts) - s->state_flags |= PFSTATE_ALLOWOPTS; - if (r->rule_flag & PFRULE_STATESLOPPY) - s->state_flags |= PFSTATE_SLOPPY; - s->log = r->log & PF_LOG_ALL; - if (nr != NULL) - s->log |= nr->log & PF_LOG_ALL; - s->proto = IPPROTO_UDP; - s->direction = direction; - s->af = af; - if (direction == PF_OUT) { - PF_ACPY(&s->gwy.addr, saddr, af); - s->gwy.port = uh->uh_sport; - PF_ACPY(&s->ext.addr, daddr, af); - s->ext.port = uh->uh_dport; - if (nr != NULL) { - PF_ACPY(&s->lan.addr, &pd->baddr, af); - s->lan.port = bport; - } else { - PF_ACPY(&s->lan.addr, &s->gwy.addr, af); - s->lan.port = s->gwy.port; - } - } else { - PF_ACPY(&s->lan.addr, daddr, af); - s->lan.port = uh->uh_dport; - PF_ACPY(&s->ext.addr, saddr, af); - s->ext.port = uh->uh_sport; - if (nr != NULL) { - PF_ACPY(&s->gwy.addr, &pd->baddr, af); - s->gwy.port = bport; - } else { - PF_ACPY(&s->gwy.addr, &s->lan.addr, af); - s->gwy.port = s->lan.port; - } - } - s->src.state = PFUDPS_SINGLE; - s->dst.state = PFUDPS_NO_TRAFFIC; - s->creation = time_second; - s->expire = time_second; - s->timeout = PFTM_UDP_FIRST_PACKET; - pf_set_rt_ifp(s, saddr); - if (sn != NULL) { - s->src_node = sn; - s->src_node->states++; - } - if (nsn != NULL) { - PF_ACPY(&nsn->raddr, &pd->naddr, af); - s->nat_src_node = nsn; - s->nat_src_node->states++; - } - if (pf_insert_state(BOUND_IFACE(r, kif), s)) { - REASON_SET(&reason, PFRES_STATEINS); - pf_src_tree_remove_state(s); - STATE_DEC_COUNTERS(s); - pool_put(&pf_state_pl, s); - return (PF_DROP); - } else - *sm = s; - if (tag > 0) { - pf_tag_ref(tag); - s->tag = tag; - } - } - - /* copy back packet headers if we performed NAT operations */ - if (rewrite) - m_copyback(m, off, sizeof(*uh), (caddr_t)uh); - - return (PF_PASS); -} - -int -pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, - struct pfi_kif *kif, struct mbuf *m, int off, void *h, - struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm, - struct ifqueue *ifq) -{ - struct pf_rule *nr = NULL; - struct pf_addr *saddr = pd->src, *daddr = pd->dst; - struct pf_rule *r, *a = NULL; - struct pf_ruleset *ruleset = NULL; - struct pf_src_node *nsn = NULL; - u_short reason; - u_int16_t icmpid = 0, bport, nport = 0; - sa_family_t af = pd->af; - u_int8_t icmptype = 0; /* make the compiler happy */ - u_int8_t icmpcode = 0; /* make the compiler happy */ - int state_icmp = 0; - int tag = -1, rtableid = -1; -#ifdef INET6 - int rewrite = 0; -#endif /* INET6 */ - int asd = 0; - int match = 0; - - if (pf_check_congestion(ifq)) { - REASON_SET(&reason, PFRES_CONGEST); - return (PF_DROP); - } - - switch (pd->proto) { -#ifdef INET - case IPPROTO_ICMP: - icmptype = pd->hdr.icmp->icmp_type; - icmpcode = pd->hdr.icmp->icmp_code; - icmpid = pd->hdr.icmp->icmp_id; - - if (icmptype == ICMP_UNREACH || - icmptype == ICMP_SOURCEQUENCH || - icmptype == ICMP_REDIRECT || - icmptype == ICMP_TIMXCEED || - icmptype == ICMP_PARAMPROB) - state_icmp++; - break; -#endif /* INET */ -#ifdef INET6 - case IPPROTO_ICMPV6: - icmptype = pd->hdr.icmp6->icmp6_type; - icmpcode = pd->hdr.icmp6->icmp6_code; - icmpid = pd->hdr.icmp6->icmp6_id; - - if (icmptype == ICMP6_DST_UNREACH || - icmptype == ICMP6_PACKET_TOO_BIG || - icmptype == ICMP6_TIME_EXCEEDED || - icmptype == ICMP6_PARAM_PROB) - state_icmp++; - break; -#endif /* INET6 */ - } - - r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); - - if (direction == PF_OUT) { - bport = nport = icmpid; - /* check outgoing packet for BINAT/NAT */ - if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn, - saddr, icmpid, daddr, icmpid, &pd->naddr, &nport)) != - NULL) { - PF_ACPY(&pd->baddr, saddr, af); - switch (af) { -#ifdef INET - case AF_INET: - pf_change_a(&saddr->v4.s_addr, pd->ip_sum, - pd->naddr.v4.s_addr, 0); - pd->hdr.icmp->icmp_cksum = pf_cksum_fixup( - pd->hdr.icmp->icmp_cksum, icmpid, nport, 0); - pd->hdr.icmp->icmp_id = nport; - m_copyback(m, off, ICMP_MINLEN, - (caddr_t)pd->hdr.icmp); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum, - &pd->naddr, 0); - rewrite++; - break; -#endif /* INET6 */ - } - if (nr->natpass) - r = NULL; - pd->nat_rule = nr; - } - } else { - bport = nport = icmpid; - /* check incoming packet for BINAT/RDR */ - if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn, - saddr, icmpid, daddr, icmpid, &pd->naddr, &nport)) != - NULL) { - PF_ACPY(&pd->baddr, daddr, af); - switch (af) { -#ifdef INET - case AF_INET: - pf_change_a(&daddr->v4.s_addr, - pd->ip_sum, pd->naddr.v4.s_addr, 0); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum, - &pd->naddr, 0); - rewrite++; - break; -#endif /* INET6 */ - } - if (nr->natpass) - r = NULL; - pd->nat_rule = nr; - } - } - - while (r != NULL) { - r->evaluations++; - if (pfi_kif_match(r->kif, kif) == r->ifnot) - r = r->skip[PF_SKIP_IFP].ptr; - else if (r->direction && r->direction != direction) - r = r->skip[PF_SKIP_DIR].ptr; - else if (r->af && r->af != af) - r = r->skip[PF_SKIP_AF].ptr; - else if (r->proto && r->proto != pd->proto) - r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, saddr, af, - r->src.neg, kif)) - r = r->skip[PF_SKIP_SRC_ADDR].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, - r->dst.neg, NULL)) - r = r->skip[PF_SKIP_DST_ADDR].ptr; - else if (r->type && r->type != icmptype + 1) - r = TAILQ_NEXT(r, entries); - else if (r->code && r->code != icmpcode + 1) - r = TAILQ_NEXT(r, entries); - else if (r->tos && !(r->tos == pd->tos)) - r = TAILQ_NEXT(r, entries); - else if (r->rule_flag & PFRULE_FRAGMENT) - r = TAILQ_NEXT(r, entries); - else if (r->prob && r->prob <= arc4random()) - r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) - r = TAILQ_NEXT(r, entries); - else if (r->os_fingerprint != PF_OSFP_ANY) - r = TAILQ_NEXT(r, entries); - else { - if (r->tag) - tag = r->tag; - if (r->rtableid >= 0) - rtableid = r->rtableid; - if (r->anchor == NULL) { - match = 1; - *rm = r; - *am = a; - *rsm = ruleset; - if ((*rm)->quick) - break; - r = TAILQ_NEXT(r, entries); - } else - pf_step_into_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a, &match); - } - if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a, &match)) - break; - } - r = *rm; - a = *am; - ruleset = *rsm; - - REASON_SET(&reason, PFRES_MATCH); - - if (r->log || (nr != NULL && nr->natpass && nr->log)) { -#ifdef INET6 - if (rewrite) - m_copyback(m, off, sizeof(struct icmp6_hdr), - (caddr_t)pd->hdr.icmp6); -#endif /* INET6 */ - PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, - a, ruleset, pd); - } - - if (r->action != PF_PASS) - return (PF_DROP); - - if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) { - REASON_SET(&reason, PFRES_MEMORY); - return (PF_DROP); - } - - if (!state_icmp && (r->keep_state || nr != NULL)) { - /* create new state */ - struct pf_state *s = NULL; - struct pf_src_node *sn = NULL; - - /* check maximums */ - if (r->max_states && (r->states >= r->max_states)) { - pf_status.lcounters[LCNT_STATES]++; - REASON_SET(&reason, PFRES_MAXSTATES); - goto cleanup; - } - /* src node for filter rule */ - if ((r->rule_flag & PFRULE_SRCTRACK || - r->rpool.opts & PF_POOL_STICKYADDR) && - pf_insert_src_node(&sn, r, saddr, af) != 0) { - REASON_SET(&reason, PFRES_SRCLIMIT); - goto cleanup; - } - /* src node for translation rule */ - if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && - ((direction == PF_OUT && - pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) || - (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) { - REASON_SET(&reason, PFRES_SRCLIMIT); - goto cleanup; - } - s = pool_get(&pf_state_pl, PR_NOWAIT); - if (s == NULL) { - REASON_SET(&reason, PFRES_MEMORY); -cleanup: - if (sn != NULL && sn->states == 0 && sn->expire == 0) { - RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); - pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; - pf_status.src_nodes--; - pool_put(&pf_src_tree_pl, sn); - } - if (nsn != sn && nsn != NULL && nsn->states == 0 && - nsn->expire == 0) { - RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn); - pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; - pf_status.src_nodes--; - pool_put(&pf_src_tree_pl, nsn); - } - return (PF_DROP); - } - bzero(s, sizeof(*s)); - s->rule.ptr = r; - s->nat_rule.ptr = nr; - s->anchor.ptr = a; - STATE_INC_COUNTERS(s); - if (r->allow_opts) - s->state_flags |= PFSTATE_ALLOWOPTS; - if (r->rule_flag & PFRULE_STATESLOPPY) - s->state_flags |= PFSTATE_SLOPPY; - s->log = r->log & PF_LOG_ALL; - if (nr != NULL) - s->log |= nr->log & PF_LOG_ALL; - s->proto = pd->proto; - s->direction = direction; - s->af = af; - if (direction == PF_OUT) { - PF_ACPY(&s->gwy.addr, saddr, af); - s->gwy.port = nport; - PF_ACPY(&s->ext.addr, daddr, af); - s->ext.port = 0; - if (nr != NULL) { - PF_ACPY(&s->lan.addr, &pd->baddr, af); - s->lan.port = bport; - } else { - PF_ACPY(&s->lan.addr, &s->gwy.addr, af); - s->lan.port = s->gwy.port; - } - } else { - PF_ACPY(&s->lan.addr, daddr, af); - s->lan.port = nport; - PF_ACPY(&s->ext.addr, saddr, af); - s->ext.port = 0; - if (nr != NULL) { - PF_ACPY(&s->gwy.addr, &pd->baddr, af); - s->gwy.port = bport; - } else { - PF_ACPY(&s->gwy.addr, &s->lan.addr, af); - s->gwy.port = s->lan.port; - } - } - s->creation = time_second; - s->expire = time_second; - s->timeout = PFTM_ICMP_FIRST_PACKET; - pf_set_rt_ifp(s, saddr); - if (sn != NULL) { - s->src_node = sn; - s->src_node->states++; - } - if (nsn != NULL) { - PF_ACPY(&nsn->raddr, &pd->naddr, af); - s->nat_src_node = nsn; - s->nat_src_node->states++; - } - if (pf_insert_state(BOUND_IFACE(r, kif), s)) { - REASON_SET(&reason, PFRES_STATEINS); - pf_src_tree_remove_state(s); - STATE_DEC_COUNTERS(s); - pool_put(&pf_state_pl, s); - return (PF_DROP); - } else - *sm = s; - if (tag > 0) { - pf_tag_ref(tag); - s->tag = tag; - } - } - -#ifdef INET6 - /* copy back packet headers if we performed IPv6 NAT operations */ - if (rewrite) - m_copyback(m, off, sizeof(struct icmp6_hdr), - (caddr_t)pd->hdr.icmp6); -#endif /* INET6 */ - - return (PF_PASS); -} - -int -pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, - struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd, - struct pf_rule **am, struct pf_ruleset **rsm, struct ifqueue *ifq) -{ - struct pf_rule *nr = NULL; - struct pf_rule *r, *a = NULL; - struct pf_ruleset *ruleset = NULL; - struct pf_src_node *nsn = NULL; - struct pf_addr *saddr = pd->src, *daddr = pd->dst; - sa_family_t af = pd->af; - u_short reason; - int tag = -1, rtableid = -1; - int asd = 0; - int match = 0; - - if (pf_check_congestion(ifq)) { - REASON_SET(&reason, PFRES_CONGEST); - return (PF_DROP); - } - - r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); - - if (direction == PF_OUT) { - /* check outgoing packet for BINAT/NAT */ - if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn, - saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) { - PF_ACPY(&pd->baddr, saddr, af); - switch (af) { -#ifdef INET - case AF_INET: - pf_change_a(&saddr->v4.s_addr, pd->ip_sum, - pd->naddr.v4.s_addr, 0); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - PF_ACPY(saddr, &pd->naddr, af); - break; -#endif /* INET6 */ - } - if (nr->natpass) - r = NULL; - pd->nat_rule = nr; - } - } else { - /* check incoming packet for BINAT/RDR */ - if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn, - saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) { - PF_ACPY(&pd->baddr, daddr, af); - switch (af) { -#ifdef INET - case AF_INET: - pf_change_a(&daddr->v4.s_addr, - pd->ip_sum, pd->naddr.v4.s_addr, 0); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - PF_ACPY(daddr, &pd->naddr, af); - break; -#endif /* INET6 */ - } - if (nr->natpass) - r = NULL; - pd->nat_rule = nr; - } } - - while (r != NULL) { - r->evaluations++; - if (pfi_kif_match(r->kif, kif) == r->ifnot) - r = r->skip[PF_SKIP_IFP].ptr; - else if (r->direction && r->direction != direction) - r = r->skip[PF_SKIP_DIR].ptr; - else if (r->af && r->af != af) - r = r->skip[PF_SKIP_AF].ptr; - else if (r->proto && r->proto != pd->proto) - r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, - r->src.neg, kif)) - r = r->skip[PF_SKIP_SRC_ADDR].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, - r->dst.neg, NULL)) - r = r->skip[PF_SKIP_DST_ADDR].ptr; - else if (r->tos && !(r->tos == pd->tos)) - r = TAILQ_NEXT(r, entries); - else if (r->rule_flag & PFRULE_FRAGMENT) - r = TAILQ_NEXT(r, entries); - else if (r->prob && r->prob <= arc4random()) - r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) - r = TAILQ_NEXT(r, entries); - else if (r->os_fingerprint != PF_OSFP_ANY) - r = TAILQ_NEXT(r, entries); - else { - if (r->tag) - tag = r->tag; - if (r->rtableid >= 0) - rtableid = r->rtableid; - if (r->anchor == NULL) { - match = 1; - *rm = r; - *am = a; - *rsm = ruleset; - if ((*rm)->quick) - break; - r = TAILQ_NEXT(r, entries); - } else - pf_step_into_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a, &match); - } - if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a, &match)) - break; - } - r = *rm; - a = *am; - ruleset = *rsm; - - REASON_SET(&reason, PFRES_MATCH); - - if (r->log || (nr != NULL && nr->natpass && nr->log)) - PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, - a, ruleset, pd); - - if ((r->action == PF_DROP) && - ((r->rule_flag & PFRULE_RETURNICMP) || - (r->rule_flag & PFRULE_RETURN))) { - struct pf_addr *a = NULL; - - if (nr != NULL) { - if (direction == PF_OUT) - a = saddr; - else - a = daddr; - } - if (a != NULL) { - switch (af) { -#ifdef INET - case AF_INET: - pf_change_a(&a->v4.s_addr, pd->ip_sum, - pd->baddr.v4.s_addr, 0); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - PF_ACPY(a, &pd->baddr, af); - break; -#endif /* INET6 */ - } - } - if ((af == AF_INET) && r->return_icmp) - pf_send_icmp(m, r->return_icmp >> 8, - r->return_icmp & 255, af, r); - else if ((af == AF_INET6) && r->return_icmp6) - pf_send_icmp(m, r->return_icmp6 >> 8, - r->return_icmp6 & 255, af, r); - } - - if (r->action != PF_PASS) - return (PF_DROP); - - if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) { - REASON_SET(&reason, PFRES_MEMORY); - return (PF_DROP); - } - - if (r->keep_state || nr != NULL) { - /* create new state */ - struct pf_state *s = NULL; - struct pf_src_node *sn = NULL; - - /* check maximums */ - if (r->max_states && (r->states >= r->max_states)) { - pf_status.lcounters[LCNT_STATES]++; - REASON_SET(&reason, PFRES_MAXSTATES); - goto cleanup; - } - /* src node for filter rule */ - if ((r->rule_flag & PFRULE_SRCTRACK || - r->rpool.opts & PF_POOL_STICKYADDR) && - pf_insert_src_node(&sn, r, saddr, af) != 0) { - REASON_SET(&reason, PFRES_SRCLIMIT); - goto cleanup; - } - /* src node for translation rule */ - if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && - ((direction == PF_OUT && - pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) || - (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) { - REASON_SET(&reason, PFRES_SRCLIMIT); - goto cleanup; - } - s = pool_get(&pf_state_pl, PR_NOWAIT); - if (s == NULL) { - REASON_SET(&reason, PFRES_MEMORY); -cleanup: - if (sn != NULL && sn->states == 0 && sn->expire == 0) { - RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); - pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; - pf_status.src_nodes--; - pool_put(&pf_src_tree_pl, sn); - } - if (nsn != sn && nsn != NULL && nsn->states == 0 && - nsn->expire == 0) { - RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn); - pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; - pf_status.src_nodes--; - pool_put(&pf_src_tree_pl, nsn); - } - return (PF_DROP); - } - bzero(s, sizeof(*s)); - s->rule.ptr = r; - s->nat_rule.ptr = nr; - s->anchor.ptr = a; - STATE_INC_COUNTERS(s); - if (r->allow_opts) - s->state_flags |= PFSTATE_ALLOWOPTS; - if (r->rule_flag & PFRULE_STATESLOPPY) - s->state_flags |= PFSTATE_SLOPPY; - s->log = r->log & PF_LOG_ALL; - if (nr != NULL) - s->log |= nr->log & PF_LOG_ALL; - s->proto = pd->proto; - s->direction = direction; - s->af = af; - if (direction == PF_OUT) { - PF_ACPY(&s->gwy.addr, saddr, af); - PF_ACPY(&s->ext.addr, daddr, af); - if (nr != NULL) - PF_ACPY(&s->lan.addr, &pd->baddr, af); - else - PF_ACPY(&s->lan.addr, &s->gwy.addr, af); - } else { - PF_ACPY(&s->lan.addr, daddr, af); - PF_ACPY(&s->ext.addr, saddr, af); - if (nr != NULL) - PF_ACPY(&s->gwy.addr, &pd->baddr, af); - else - PF_ACPY(&s->gwy.addr, &s->lan.addr, af); - } - s->src.state = PFOTHERS_SINGLE; - s->dst.state = PFOTHERS_NO_TRAFFIC; - s->creation = time_second; - s->expire = time_second; - s->timeout = PFTM_OTHER_FIRST_PACKET; - pf_set_rt_ifp(s, saddr); - if (sn != NULL) { - s->src_node = sn; - s->src_node->states++; - } - if (nsn != NULL) { - PF_ACPY(&nsn->raddr, &pd->naddr, af); - s->nat_src_node = nsn; - s->nat_src_node->states++; - } - if (pf_insert_state(BOUND_IFACE(r, kif), s)) { - REASON_SET(&reason, PFRES_STATEINS); - pf_src_tree_remove_state(s); - STATE_DEC_COUNTERS(s); - pool_put(&pf_state_pl, s); - return (PF_DROP); - } else - *sm = s; - if (tag > 0) { - pf_tag_ref(tag); - s->tag = tag; - } - } - - return (PF_PASS); + return (PF_DROP); } int @@ -4626,10 +4076,10 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, - r->src.neg, kif)) + r->src.neg, kif, M_GETFIB(m))) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, - r->dst.neg, NULL)) + r->dst.neg, NULL, M_GETFIB(m))) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); @@ -4645,9 +4095,14 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, pd->proto == IPPROTO_ICMPV6) && (r->type || r->code)) r = TAILQ_NEXT(r, entries); - else if (r->prob && r->prob <= arc4random()) + else if (r->prob && r->prob <= + (arc4random() % (UINT_MAX - 1) + 1)) r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) +#ifdef __FreeBSD__ + else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag)) +#else + else if (r->match_tag && !pf_match_tag(m, r, &tag)) +#endif r = TAILQ_NEXT(r, entries); else { if (r->anchor == NULL) { @@ -4679,7 +4134,11 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, if (r->action != PF_PASS) return (PF_DROP); - if (pf_tag_packet(m, pd->pf_mtag, tag, -1)) { +#ifdef __FreeBSD__ + if (pf_tag_packet(m, tag, -1, pd->pf_mtag)) { +#else + if (pf_tag_packet(m, tag, -1)) { +#endif REASON_SET(&reason, PFRES_MEMORY); return (PF_DROP); } @@ -4692,11 +4151,11 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, struct pf_state **state, struct pfi_kif *kif, struct mbuf *m, int off, struct pf_pdesc *pd, u_short *reason, int *copyback) { - struct tcphdr *th = pd->hdr.tcp; - u_int16_t win = ntohs(th->th_win); - u_int32_t ack, end, seq, orig_seq; - u_int8_t sws, dws; - int ackskew; + struct tcphdr *th = pd->hdr.tcp; + u_int16_t win = ntohs(th->th_win); + u_int32_t ack, end, seq, orig_seq; + u_int8_t sws, dws; + int ackskew; if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { sws = src->wscale & PF_WSCALE_MASK; @@ -4724,13 +4183,9 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, /* Deferred generation of sequence number modulator */ if (dst->seqdiff && !src->seqdiff) { -#ifdef __FreeBSD__ - while ((src->seqdiff = pf_new_isn(*state) - seq) == 0) - ; -#else - while ((src->seqdiff = tcp_rndiss_next() - seq) == 0) + /* use random iss for the TCP server */ + while ((src->seqdiff = arc4random() - seq) == 0) ; -#endif ack = ntohl(th->th_ack) - dst->seqdiff; pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + src->seqdiff), 0); @@ -4837,7 +4292,7 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, } -#define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ +#define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ if (SEQ_GEQ(src->seqhi, end) && /* Last octet inside other's window space */ SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && @@ -4847,7 +4302,8 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, (ackskew <= (MAXACKWINDOW << sws)) && /* Acking not more than one window forward */ ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || - (orig_seq == src->seqlo + 1) || (pd->flags & PFDESC_IP_REAS) == 0)) { + (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) || + (pd->flags & PFDESC_IP_REAS) == 0)) { /* Require an exact/+1 sequence match on resets when possible */ if (dst->scrub || src->scrub) { @@ -4937,19 +4393,25 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, * and keep updating the state TTL. */ +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else if (pf_status.debug >= PF_DEBUG_MISC) { +#endif printf("pf: loose state match: "); pf_print_state(*state); pf_print_flags(th->th_flags); printf(" seq=%u (%u) ack=%u len=%u ackskew=%d " - "pkts=%llu:%llu\n", seq, orig_seq, ack, pd->p_len, + "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, #ifdef __FreeBSD__ - ackskew, (unsigned long long)(*state)->packets[0], - (unsigned long long)(*state)->packets[1]); + pd->p_len, ackskew, (unsigned long long)(*state)->packets[0], + (unsigned long long)(*state)->packets[1], #else - ackskew, (*state)->packets[0], - (*state)->packets[1]); + pd->p_len, ackskew, (*state)->packets[0], + (*state)->packets[1], #endif + pd->dir == PF_IN ? "in" : "out", + pd->dir == (*state)->direction ? "fwd" : "rev"); } if (dst->scrub || src->scrub) { @@ -4987,7 +4449,7 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, /* Send RST for state mismatches during handshake */ if (!(th->th_flags & TH_RST)) #ifdef __FreeBSD__ - pf_send_tcp(m, (*state)->rule.ptr, pd->af, + pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, #else pf_send_tcp((*state)->rule.ptr, pd->af, #endif @@ -4999,16 +4461,16 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, src->seqlo = 0; src->seqhi = 1; src->max_win = 1; +#ifdef __FreeBSD__ + } else if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else } else if (pf_status.debug >= PF_DEBUG_MISC) { +#endif printf("pf: BAD state: "); pf_print_state(*state); pf_print_flags(th->th_flags); printf(" seq=%u (%u) ack=%u len=%u ackskew=%d " -#ifdef notyet "pkts=%llu:%llu dir=%s,%s\n", -#else - "pkts=%llu:%llu%s\n", -#endif seq, orig_seq, ack, pd->p_len, ackskew, #ifdef __FreeBSD__ (unsigned long long)(*state)->packets[0], @@ -5016,12 +4478,8 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, #else (*state)->packets[0], (*state)->packets[1], #endif -#ifdef notyet - direction == PF_IN ? "in" : "out", - direction == (*state)->direction ? "fwd" : "rev"); -#else - ""); -#endif + pd->dir == PF_IN ? "in" : "out", + pd->dir == (*state)->direction ? "fwd" : "rev"); printf("pf: State failure on: %c %c %c %c | %c %c\n", SEQ_GEQ(src->seqhi, end) ? ' ' : '1', SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? @@ -5035,7 +4493,6 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, return (PF_DROP); } - /* Any packets which have gotten here are to be passed */ return (PF_PASS); } @@ -5110,32 +4567,36 @@ pf_tcp_track_sloppy(struct pf_state_peer *src, struct pf_state_peer *dst, return (PF_PASS); } - int pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason) { - struct pf_state_cmp key; + struct pf_state_key_cmp key; struct tcphdr *th = pd->hdr.tcp; int copyback = 0; struct pf_state_peer *src, *dst; + struct pf_state_key *sk; key.af = pd->af; key.proto = IPPROTO_TCP; - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd->src, key.af); - PF_ACPY(&key.gwy.addr, pd->dst, key.af); - key.ext.port = th->th_sport; - key.gwy.port = th->th_dport; - } else { - PF_ACPY(&key.lan.addr, pd->src, key.af); - PF_ACPY(&key.ext.addr, pd->dst, key.af); - key.lan.port = th->th_sport; - key.ext.port = th->th_dport; + if (direction == PF_IN) { /* wire side, straight */ + PF_ACPY(&key.addr[0], pd->src, key.af); + PF_ACPY(&key.addr[1], pd->dst, key.af); + key.port[0] = th->th_sport; + key.port[1] = th->th_dport; + } else { /* stack side, reverse */ + PF_ACPY(&key.addr[1], pd->src, key.af); + PF_ACPY(&key.addr[0], pd->dst, key.af); + key.port[1] = th->th_sport; + key.port[0] = th->th_dport; } - STATE_LOOKUP(); +#ifdef __FreeBSD__ + STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); +#else + STATE_LOOKUP(kif, &key, direction, *state, m); +#endif if (direction == (*state)->direction) { src = &(*state)->src; @@ -5145,6 +4606,8 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, dst = &(*state)->src; } + sk = (*state)->key[pd->didx]; + if ((*state)->src.state == PF_TCPS_PROXY_SRC) { if (direction != (*state)->direction) { REASON_SET(reason, PFRES_SYNPROXY); @@ -5179,15 +4642,6 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, (*state)->src.state = PF_TCPS_PROXY_DST; } if ((*state)->src.state == PF_TCPS_PROXY_DST) { - struct pf_state_host *src, *dst; - - if (direction == PF_OUT) { - src = &(*state)->gwy; - dst = &(*state)->ext; - } else { - src = &(*state)->ext; - dst = &(*state)->lan; - } if (direction == (*state)->direction) { if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || @@ -5200,11 +4654,11 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, (*state)->dst.seqhi = htonl(arc4random()); #ifdef __FreeBSD__ pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, - &src->addr, #else - pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr, + pf_send_tcp((*state)->rule.ptr, pd->af, #endif - &dst->addr, src->port, dst->port, + &sk->addr[pd->sidx], &sk->addr[pd->didx], + sk->port[pd->sidx], sk->port[pd->didx], (*state)->dst.seqhi, 0, TH_SYN, 0, (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL); REASON_SET(reason, PFRES_SYNPROXY); @@ -5228,11 +4682,11 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, (*state)->tag, NULL, NULL); #ifdef __FreeBSD__ pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, - &src->addr, #else - pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr, + pf_send_tcp((*state)->rule.ptr, pd->af, #endif - &dst->addr, src->port, dst->port, + &sk->addr[pd->sidx], &sk->addr[pd->didx], + sk->port[pd->sidx], sk->port[pd->didx], (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, TH_ACK, (*state)->dst.max_win, 0, 0, 1, 0, NULL, NULL); @@ -5255,7 +4709,11 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) && dst->state >= TCPS_FIN_WAIT_2 && src->state >= TCPS_FIN_WAIT_2) { +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else if (pf_status.debug >= PF_DEBUG_MISC) { +#endif printf("pf: state reuse "); pf_print_state(*state); pf_print_flags(th->th_flags); @@ -5278,21 +4736,31 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, } /* translate source/destination address, if necessary */ - if (STATE_TRANSLATE(*state)) { - if (direction == PF_OUT) + if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { + struct pf_state_key *nk = (*state)->key[pd->didx]; + + if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) || + nk->port[pd->sidx] != th->th_sport) pf_change_ap(pd->src, &th->th_sport, pd->ip_sum, - &th->th_sum, &(*state)->gwy.addr, - (*state)->gwy.port, 0, pd->af); - else + &th->th_sum, &nk->addr[pd->sidx], + nk->port[pd->sidx], 0, pd->af); + + if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) || + nk->port[pd->didx] != th->th_dport) pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum, - &th->th_sum, &(*state)->lan.addr, - (*state)->lan.port, 0, pd->af); - m_copyback(m, off, sizeof(*th), (caddr_t)th); - } else if (copyback) { - /* Copyback sequence modulation or stateful scrub changes */ - m_copyback(m, off, sizeof(*th), (caddr_t)th); + &th->th_sum, &nk->addr[pd->didx], + nk->port[pd->didx], 0, pd->af); + copyback = 1; } + /* Copyback sequence modulation or stateful scrub changes if needed */ + if (copyback) +#ifdef __FreeBSD__ + m_copyback(m, off, sizeof(*th), (caddr_t)th); +#else + m_copyback(m, off, sizeof(*th), th); +#endif + return (PF_PASS); } @@ -5301,24 +4769,28 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd) { struct pf_state_peer *src, *dst; - struct pf_state_cmp key; + struct pf_state_key_cmp key; struct udphdr *uh = pd->hdr.udp; key.af = pd->af; key.proto = IPPROTO_UDP; - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd->src, key.af); - PF_ACPY(&key.gwy.addr, pd->dst, key.af); - key.ext.port = uh->uh_sport; - key.gwy.port = uh->uh_dport; - } else { - PF_ACPY(&key.lan.addr, pd->src, key.af); - PF_ACPY(&key.ext.addr, pd->dst, key.af); - key.lan.port = uh->uh_sport; - key.ext.port = uh->uh_dport; + if (direction == PF_IN) { /* wire side, straight */ + PF_ACPY(&key.addr[0], pd->src, key.af); + PF_ACPY(&key.addr[1], pd->dst, key.af); + key.port[0] = uh->uh_sport; + key.port[1] = uh->uh_dport; + } else { /* stack side, reverse */ + PF_ACPY(&key.addr[1], pd->src, key.af); + PF_ACPY(&key.addr[0], pd->dst, key.af); + key.port[1] = uh->uh_sport; + key.port[0] = uh->uh_dport; } - STATE_LOOKUP(); +#ifdef __FreeBSD__ + STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); +#else + STATE_LOOKUP(kif, &key, direction, *state, m); +#endif if (direction == (*state)->direction) { src = &(*state)->src; @@ -5342,16 +4814,25 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, (*state)->timeout = PFTM_UDP_SINGLE; /* translate source/destination address, if necessary */ - if (STATE_TRANSLATE(*state)) { - if (direction == PF_OUT) + if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { + struct pf_state_key *nk = (*state)->key[pd->didx]; + + if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) || + nk->port[pd->sidx] != uh->uh_sport) pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum, - &uh->uh_sum, &(*state)->gwy.addr, - (*state)->gwy.port, 1, pd->af); - else + &uh->uh_sum, &nk->addr[pd->sidx], + nk->port[pd->sidx], 1, pd->af); + + if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) || + nk->port[pd->didx] != uh->uh_dport) pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum, - &uh->uh_sum, &(*state)->lan.addr, - (*state)->lan.port, 1, pd->af); + &uh->uh_sum, &nk->addr[pd->didx], + nk->port[pd->didx], 1, pd->af); +#ifdef __FreeBSD__ m_copyback(m, off, sizeof(*uh), (caddr_t)uh); +#else + m_copyback(m, off, sizeof(*uh), uh); +#endif } return (PF_PASS); @@ -5361,12 +4842,15 @@ int pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason) { - struct pf_addr *saddr = pd->src, *daddr = pd->dst; - u_int16_t icmpid = 0; /* make the compiler happy */ - u_int16_t *icmpsum = NULL; /* make the compiler happy */ - u_int8_t icmptype = 0; /* make the compiler happy */ + struct pf_addr *saddr = pd->src, *daddr = pd->dst; +#ifdef __FreeBSD__ + u_int16_t icmpid = 0, *icmpsum; +#else + u_int16_t icmpid, *icmpsum; +#endif + u_int8_t icmptype; int state_icmp = 0; - struct pf_state_cmp key; + struct pf_state_key_cmp key; switch (pd->proto) { #ifdef INET @@ -5406,84 +4890,84 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, */ key.af = pd->af; key.proto = pd->proto; - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd->src, key.af); - PF_ACPY(&key.gwy.addr, pd->dst, key.af); - key.ext.port = 0; - key.gwy.port = icmpid; - } else { - PF_ACPY(&key.lan.addr, pd->src, key.af); - PF_ACPY(&key.ext.addr, pd->dst, key.af); - key.lan.port = icmpid; - key.ext.port = 0; + key.port[0] = key.port[1] = icmpid; + if (direction == PF_IN) { /* wire side, straight */ + PF_ACPY(&key.addr[0], pd->src, key.af); + PF_ACPY(&key.addr[1], pd->dst, key.af); + } else { /* stack side, reverse */ + PF_ACPY(&key.addr[1], pd->src, key.af); + PF_ACPY(&key.addr[0], pd->dst, key.af); } - STATE_LOOKUP(); +#ifdef __FreeBSD__ + STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); +#else + STATE_LOOKUP(kif, &key, direction, *state, m); +#endif (*state)->expire = time_second; (*state)->timeout = PFTM_ICMP_ERROR_REPLY; /* translate source/destination address, if necessary */ - if (STATE_TRANSLATE(*state)) { - if (direction == PF_OUT) { - switch (pd->af) { + if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { + struct pf_state_key *nk = (*state)->key[pd->didx]; + + switch (pd->af) { #ifdef INET - case AF_INET: + case AF_INET: + if (PF_ANEQ(pd->src, + &nk->addr[pd->sidx], AF_INET)) pf_change_a(&saddr->v4.s_addr, pd->ip_sum, - (*state)->gwy.addr.v4.s_addr, 0); - pd->hdr.icmp->icmp_cksum = - pf_cksum_fixup( - pd->hdr.icmp->icmp_cksum, icmpid, - (*state)->gwy.port, 0); - pd->hdr.icmp->icmp_id = - (*state)->gwy.port; - m_copyback(m, off, ICMP_MINLEN, - (caddr_t)pd->hdr.icmp); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - pf_change_a6(saddr, - &pd->hdr.icmp6->icmp6_cksum, - &(*state)->gwy.addr, 0); - m_copyback(m, off, - sizeof(struct icmp6_hdr), - (caddr_t)pd->hdr.icmp6); - break; -#endif /* INET6 */ - } - } else { - switch (pd->af) { -#ifdef INET - case AF_INET: + nk->addr[pd->sidx].v4.s_addr, 0); + + if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], + AF_INET)) pf_change_a(&daddr->v4.s_addr, pd->ip_sum, - (*state)->lan.addr.v4.s_addr, 0); + nk->addr[pd->didx].v4.s_addr, 0); + + if (nk->port[0] != + pd->hdr.icmp->icmp_id) { pd->hdr.icmp->icmp_cksum = pf_cksum_fixup( pd->hdr.icmp->icmp_cksum, icmpid, - (*state)->lan.port, 0); + nk->port[pd->sidx], 0); pd->hdr.icmp->icmp_id = - (*state)->lan.port; - m_copyback(m, off, ICMP_MINLEN, - (caddr_t)pd->hdr.icmp); - break; + nk->port[pd->sidx]; + } + + m_copyback(m, off, ICMP_MINLEN, +#ifdef __FreeBSD__ + (caddr_t) +#endif + pd->hdr.icmp); + break; #endif /* INET */ #ifdef INET6 - case AF_INET6: + case AF_INET6: + if (PF_ANEQ(pd->src, + &nk->addr[pd->sidx], AF_INET6)) + pf_change_a6(saddr, + &pd->hdr.icmp6->icmp6_cksum, + &nk->addr[pd->sidx], 0); + + if (PF_ANEQ(pd->dst, + &nk->addr[pd->didx], AF_INET6)) pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum, - &(*state)->lan.addr, 0); - m_copyback(m, off, - sizeof(struct icmp6_hdr), - (caddr_t)pd->hdr.icmp6); - break; + &nk->addr[pd->didx], 0); + + m_copyback(m, off, + sizeof(struct icmp6_hdr), +#ifdef __FreeBSD__ + (caddr_t) +#endif + pd->hdr.icmp6); + break; #endif /* INET6 */ - } } } - return (PF_PASS); } else { @@ -5493,6 +4977,9 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, */ struct pf_pdesc pd2; +#ifdef __FreeBSD__ + bzero(&pd2, sizeof pd2); +#endif #ifdef INET struct ip h2; #endif /* INET */ @@ -5500,10 +4987,18 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, struct ip6_hdr h2_6; int terminal = 0; #endif /* INET6 */ - int ipoff2 = 0; /* make the compiler happy */ - int off2 = 0; /* make the compiler happy */ +#ifdef __FreeBSD__ + int ipoff2 = 0; + int off2 = 0; +#else + int ipoff2; + int off2; +#endif pd2.af = pd->af; + /* Payload packet is from the opposite direction. */ + pd2.sidx = (direction == PF_IN) ? 1 : 0; + pd2.didx = (direction == PF_IN) ? 0 : 1; switch (pd->af) { #ifdef INET case AF_INET: @@ -5589,10 +5084,6 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, } while (!terminal); break; #endif /* INET6 */ -#ifdef __FreeBSD__ - default: - panic("AF not supported: %d", pd->af); -#endif } switch (pd2.proto) { @@ -5618,19 +5109,16 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, key.af = pd2.af; key.proto = IPPROTO_TCP; - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd2.dst, key.af); - PF_ACPY(&key.gwy.addr, pd2.src, key.af); - key.ext.port = th.th_dport; - key.gwy.port = th.th_sport; - } else { - PF_ACPY(&key.lan.addr, pd2.dst, key.af); - PF_ACPY(&key.ext.addr, pd2.src, key.af); - key.lan.port = th.th_dport; - key.ext.port = th.th_sport; - } + PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); + PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); + key.port[pd2.sidx] = th.th_sport; + key.port[pd2.didx] = th.th_dport; - STATE_LOOKUP(); +#ifdef __FreeBSD__ + STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); +#else + STATE_LOOKUP(kif, &key, direction, *state, m); +#endif if (direction == (*state)->direction) { src = &(*state)->dst; @@ -5656,7 +5144,11 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, if (!((*state)->state_flags & PFSTATE_SLOPPY) && (!SEQ_GEQ(src->seqhi, seq) || !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) { +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else if (pf_status.debug >= PF_DEBUG_MISC) { +#endif printf("pf: BAD ICMP %d:%d ", icmptype, pd->hdr.icmp->icmp_code); pf_print_host(pd->src, 0, pd->af); @@ -5668,22 +5160,47 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, } REASON_SET(reason, PFRES_BADSTATE); return (PF_DROP); + } else { +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else + if (pf_status.debug >= PF_DEBUG_MISC) { +#endif + printf("pf: OK ICMP %d:%d ", + icmptype, pd->hdr.icmp->icmp_code); + pf_print_host(pd->src, 0, pd->af); + printf(" -> "); + pf_print_host(pd->dst, 0, pd->af); + printf(" state: "); + pf_print_state(*state); + printf(" seq=%u\n", seq); + } } - if (STATE_TRANSLATE(*state)) { - if (direction == PF_IN) { + /* translate source/destination address, if necessary */ + if ((*state)->key[PF_SK_WIRE] != + (*state)->key[PF_SK_STACK]) { + struct pf_state_key *nk = + (*state)->key[pd->didx]; + + if (PF_ANEQ(pd2.src, + &nk->addr[pd2.sidx], pd2.af) || + nk->port[pd2.sidx] != th.th_sport) pf_change_icmp(pd2.src, &th.th_sport, - daddr, &(*state)->lan.addr, - (*state)->lan.port, NULL, + daddr, &nk->addr[pd2.sidx], + nk->port[pd2.sidx], NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, pd2.af); - } else { + + if (PF_ANEQ(pd2.dst, + &nk->addr[pd2.didx], pd2.af) || + nk->port[pd2.didx] != th.th_dport) pf_change_icmp(pd2.dst, &th.th_dport, - saddr, &(*state)->gwy.addr, - (*state)->gwy.port, NULL, + NULL, /* XXX Inbound NAT? */ + &nk->addr[pd2.didx], + nk->port[pd2.didx], NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, pd2.af); - } copyback = 1; } @@ -5692,22 +5209,38 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, #ifdef INET case AF_INET: m_copyback(m, off, ICMP_MINLEN, - (caddr_t)pd->hdr.icmp); +#ifdef __FreeBSD__ + (caddr_t) +#endif + pd->hdr.icmp); m_copyback(m, ipoff2, sizeof(h2), - (caddr_t)&h2); +#ifdef __FreeBSD__ + (caddr_t) +#endif + &h2); break; #endif /* INET */ #ifdef INET6 case AF_INET6: m_copyback(m, off, sizeof(struct icmp6_hdr), - (caddr_t)pd->hdr.icmp6); +#ifdef __FreeBSD__ + (caddr_t) +#endif + pd->hdr.icmp6); m_copyback(m, ipoff2, sizeof(h2_6), - (caddr_t)&h2_6); +#ifdef __FreeBSD__ + (caddr_t) +#endif + &h2_6); break; #endif /* INET6 */ } +#ifdef __FreeBSD__ m_copyback(m, off2, 8, (caddr_t)&th); +#else + m_copyback(m, off2, 8, &th); +#endif } return (PF_PASS); @@ -5726,57 +5259,79 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, key.af = pd2.af; key.proto = IPPROTO_UDP; - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd2.dst, key.af); - PF_ACPY(&key.gwy.addr, pd2.src, key.af); - key.ext.port = uh.uh_dport; - key.gwy.port = uh.uh_sport; - } else { - PF_ACPY(&key.lan.addr, pd2.dst, key.af); - PF_ACPY(&key.ext.addr, pd2.src, key.af); - key.lan.port = uh.uh_dport; - key.ext.port = uh.uh_sport; - } + PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); + PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); + key.port[pd2.sidx] = uh.uh_sport; + key.port[pd2.didx] = uh.uh_dport; - STATE_LOOKUP(); +#ifdef __FreeBSD__ + STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); +#else + STATE_LOOKUP(kif, &key, direction, *state, m); +#endif - if (STATE_TRANSLATE(*state)) { - if (direction == PF_IN) { + /* translate source/destination address, if necessary */ + if ((*state)->key[PF_SK_WIRE] != + (*state)->key[PF_SK_STACK]) { + struct pf_state_key *nk = + (*state)->key[pd->didx]; + + if (PF_ANEQ(pd2.src, + &nk->addr[pd2.sidx], pd2.af) || + nk->port[pd2.sidx] != uh.uh_sport) pf_change_icmp(pd2.src, &uh.uh_sport, - daddr, &(*state)->lan.addr, - (*state)->lan.port, &uh.uh_sum, + daddr, &nk->addr[pd2.sidx], + nk->port[pd2.sidx], &uh.uh_sum, pd2.ip_sum, icmpsum, pd->ip_sum, 1, pd2.af); - } else { + + if (PF_ANEQ(pd2.dst, + &nk->addr[pd2.didx], pd2.af) || + nk->port[pd2.didx] != uh.uh_dport) pf_change_icmp(pd2.dst, &uh.uh_dport, - saddr, &(*state)->gwy.addr, - (*state)->gwy.port, &uh.uh_sum, + NULL, /* XXX Inbound NAT? */ + &nk->addr[pd2.didx], + nk->port[pd2.didx], &uh.uh_sum, pd2.ip_sum, icmpsum, pd->ip_sum, 1, pd2.af); - } + switch (pd2.af) { #ifdef INET case AF_INET: m_copyback(m, off, ICMP_MINLEN, - (caddr_t)pd->hdr.icmp); - m_copyback(m, ipoff2, sizeof(h2), - (caddr_t)&h2); +#ifdef __FreeBSD__ + (caddr_t) +#endif + pd->hdr.icmp); +#ifdef __FreeBSD__ + m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); +#else + m_copyback(m, ipoff2, sizeof(h2), &h2); +#endif break; #endif /* INET */ #ifdef INET6 case AF_INET6: m_copyback(m, off, sizeof(struct icmp6_hdr), - (caddr_t)pd->hdr.icmp6); +#ifdef __FreeBSD__ + (caddr_t) +#endif + pd->hdr.icmp6); m_copyback(m, ipoff2, sizeof(h2_6), - (caddr_t)&h2_6); +#ifdef __FreeBSD__ + (caddr_t) +#endif + &h2_6); break; #endif /* INET6 */ } - m_copyback(m, off2, sizeof(uh), - (caddr_t)&uh); +#ifdef __FreeBSD__ + m_copyback(m, off2, sizeof(uh), (caddr_t)&uh); +#else + m_copyback(m, off2, sizeof(uh), &uh); +#endif } - return (PF_PASS); break; } @@ -5794,42 +5349,51 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, key.af = pd2.af; key.proto = IPPROTO_ICMP; - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd2.dst, key.af); - PF_ACPY(&key.gwy.addr, pd2.src, key.af); - key.ext.port = 0; - key.gwy.port = iih.icmp_id; - } else { - PF_ACPY(&key.lan.addr, pd2.dst, key.af); - PF_ACPY(&key.ext.addr, pd2.src, key.af); - key.lan.port = iih.icmp_id; - key.ext.port = 0; - } + PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); + PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); + key.port[0] = key.port[1] = iih.icmp_id; - STATE_LOOKUP(); +#ifdef __FreeBSD__ + STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); +#else + STATE_LOOKUP(kif, &key, direction, *state, m); +#endif - if (STATE_TRANSLATE(*state)) { - if (direction == PF_IN) { + /* translate source/destination address, if necessary */ + if ((*state)->key[PF_SK_WIRE] != + (*state)->key[PF_SK_STACK]) { + struct pf_state_key *nk = + (*state)->key[pd->didx]; + + if (PF_ANEQ(pd2.src, + &nk->addr[pd2.sidx], pd2.af) || + nk->port[pd2.sidx] != iih.icmp_id) pf_change_icmp(pd2.src, &iih.icmp_id, - daddr, &(*state)->lan.addr, - (*state)->lan.port, NULL, + daddr, &nk->addr[pd2.sidx], + nk->port[pd2.sidx], NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET); - } else { + + if (PF_ANEQ(pd2.dst, + &nk->addr[pd2.didx], pd2.af) || + nk->port[pd2.didx] != iih.icmp_id) pf_change_icmp(pd2.dst, &iih.icmp_id, - saddr, &(*state)->gwy.addr, - (*state)->gwy.port, NULL, + NULL, /* XXX Inbound NAT? */ + &nk->addr[pd2.didx], + nk->port[pd2.didx], NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET); - } - m_copyback(m, off, ICMP_MINLEN, - (caddr_t)pd->hdr.icmp); - m_copyback(m, ipoff2, sizeof(h2), - (caddr_t)&h2); - m_copyback(m, off2, ICMP_MINLEN, - (caddr_t)&iih); - } +#ifdef __FreeBSD__ + m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp); + m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); + m_copyback(m, off2, ICMP_MINLEN, (caddr_t)&iih); +#else + m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp); + m_copyback(m, ipoff2, sizeof(h2), &h2); + m_copyback(m, off2, ICMP_MINLEN, &iih); +#endif + } return (PF_PASS); break; } @@ -5848,42 +5412,55 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, key.af = pd2.af; key.proto = IPPROTO_ICMPV6; - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd2.dst, key.af); - PF_ACPY(&key.gwy.addr, pd2.src, key.af); - key.ext.port = 0; - key.gwy.port = iih.icmp6_id; - } else { - PF_ACPY(&key.lan.addr, pd2.dst, key.af); - PF_ACPY(&key.ext.addr, pd2.src, key.af); - key.lan.port = iih.icmp6_id; - key.ext.port = 0; - } + PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); + PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); + key.port[0] = key.port[1] = iih.icmp6_id; + +#ifdef __FreeBSD__ + STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); +#else + STATE_LOOKUP(kif, &key, direction, *state, m); +#endif - STATE_LOOKUP(); + /* translate source/destination address, if necessary */ + if ((*state)->key[PF_SK_WIRE] != + (*state)->key[PF_SK_STACK]) { + struct pf_state_key *nk = + (*state)->key[pd->didx]; - if (STATE_TRANSLATE(*state)) { - if (direction == PF_IN) { + if (PF_ANEQ(pd2.src, + &nk->addr[pd2.sidx], pd2.af) || + nk->port[pd2.sidx] != iih.icmp6_id) pf_change_icmp(pd2.src, &iih.icmp6_id, - daddr, &(*state)->lan.addr, - (*state)->lan.port, NULL, + daddr, &nk->addr[pd2.sidx], + nk->port[pd2.sidx], NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET6); - } else { + + if (PF_ANEQ(pd2.dst, + &nk->addr[pd2.didx], pd2.af) || + nk->port[pd2.didx] != iih.icmp6_id) pf_change_icmp(pd2.dst, &iih.icmp6_id, - saddr, &(*state)->gwy.addr, - (*state)->gwy.port, NULL, + NULL, /* XXX Inbound NAT? */ + &nk->addr[pd2.didx], + nk->port[pd2.didx], NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET6); - } + +#ifdef __FreeBSD__ m_copyback(m, off, sizeof(struct icmp6_hdr), (caddr_t)pd->hdr.icmp6); - m_copyback(m, ipoff2, sizeof(h2_6), - (caddr_t)&h2_6); + m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6); m_copyback(m, off2, sizeof(struct icmp6_hdr), (caddr_t)&iih); +#else + m_copyback(m, off, sizeof(struct icmp6_hdr), + pd->hdr.icmp6); + m_copyback(m, ipoff2, sizeof(h2_6), &h2_6); + m_copyback(m, off2, sizeof(struct icmp6_hdr), + &iih); +#endif } - return (PF_PASS); break; } @@ -5891,55 +5468,68 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, default: { key.af = pd2.af; key.proto = pd2.proto; - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd2.dst, key.af); - PF_ACPY(&key.gwy.addr, pd2.src, key.af); - key.ext.port = 0; - key.gwy.port = 0; - } else { - PF_ACPY(&key.lan.addr, pd2.dst, key.af); - PF_ACPY(&key.ext.addr, pd2.src, key.af); - key.lan.port = 0; - key.ext.port = 0; - } + PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); + PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); + key.port[0] = key.port[1] = 0; - STATE_LOOKUP(); +#ifdef __FreeBSD__ + STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); +#else + STATE_LOOKUP(kif, &key, direction, *state, m); +#endif - if (STATE_TRANSLATE(*state)) { - if (direction == PF_IN) { - pf_change_icmp(pd2.src, NULL, - daddr, &(*state)->lan.addr, - 0, NULL, + /* translate source/destination address, if necessary */ + if ((*state)->key[PF_SK_WIRE] != + (*state)->key[PF_SK_STACK]) { + struct pf_state_key *nk = + (*state)->key[pd->didx]; + + if (PF_ANEQ(pd2.src, + &nk->addr[pd2.sidx], pd2.af)) + pf_change_icmp(pd2.src, NULL, daddr, + &nk->addr[pd2.sidx], 0, NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, pd2.af); - } else { - pf_change_icmp(pd2.dst, NULL, - saddr, &(*state)->gwy.addr, - 0, NULL, + + if (PF_ANEQ(pd2.dst, + &nk->addr[pd2.didx], pd2.af)) + pf_change_icmp(pd2.src, NULL, + NULL, /* XXX Inbound NAT? */ + &nk->addr[pd2.didx], 0, NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, pd2.af); - } + switch (pd2.af) { #ifdef INET case AF_INET: +#ifdef __FreeBSD__ m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp); - m_copyback(m, ipoff2, sizeof(h2), - (caddr_t)&h2); + m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); +#else + m_copyback(m, off, ICMP_MINLEN, + pd->hdr.icmp); + m_copyback(m, ipoff2, sizeof(h2), &h2); +#endif break; #endif /* INET */ #ifdef INET6 case AF_INET6: m_copyback(m, off, sizeof(struct icmp6_hdr), - (caddr_t)pd->hdr.icmp6); +#ifdef __FreeBSD__ + (caddr_t) +#endif + pd->hdr.icmp6); m_copyback(m, ipoff2, sizeof(h2_6), - (caddr_t)&h2_6); +#ifdef __FreeBSD__ + (caddr_t) +#endif + &h2_6); break; #endif /* INET6 */ } } - return (PF_PASS); break; } @@ -5949,26 +5539,28 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, int pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, - struct pf_pdesc *pd) + struct mbuf *m, struct pf_pdesc *pd) { struct pf_state_peer *src, *dst; - struct pf_state_cmp key; + struct pf_state_key_cmp key; key.af = pd->af; key.proto = pd->proto; if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd->src, key.af); - PF_ACPY(&key.gwy.addr, pd->dst, key.af); - key.ext.port = 0; - key.gwy.port = 0; + PF_ACPY(&key.addr[0], pd->src, key.af); + PF_ACPY(&key.addr[1], pd->dst, key.af); + key.port[0] = key.port[1] = 0; } else { - PF_ACPY(&key.lan.addr, pd->src, key.af); - PF_ACPY(&key.ext.addr, pd->dst, key.af); - key.lan.port = 0; - key.ext.port = 0; + PF_ACPY(&key.addr[1], pd->src, key.af); + PF_ACPY(&key.addr[0], pd->dst, key.af); + key.port[1] = key.port[0] = 0; } - STATE_LOOKUP(); +#ifdef __FreeBSD__ + STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); +#else + STATE_LOOKUP(kif, &key, direction, *state, m); +#endif if (direction == (*state)->direction) { src = &(*state)->src; @@ -5992,39 +5584,48 @@ pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, (*state)->timeout = PFTM_OTHER_SINGLE; /* translate source/destination address, if necessary */ - if (STATE_TRANSLATE(*state)) { - if (direction == PF_OUT) - switch (pd->af) { + if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { + struct pf_state_key *nk = (*state)->key[pd->didx]; + +#ifdef __FreeBSD__ + KASSERT(nk, ("%s: nk is null", __FUNCTION__)); + KASSERT(pd, ("%s: pd is null", __FUNCTION__)); + KASSERT(pd->src, ("%s: pd->src is null", __FUNCTION__)); + KASSERT(pd->dst, ("%s: pd->dst is null", __FUNCTION__)); +#else + KASSERT(nk); + KASSERT(pd); + KASSERT(pd->src); + KASSERT(pd->dst); +#endif + switch (pd->af) { #ifdef INET - case AF_INET: + case AF_INET: + if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET)) pf_change_a(&pd->src->v4.s_addr, - pd->ip_sum, (*state)->gwy.addr.v4.s_addr, + pd->ip_sum, + nk->addr[pd->sidx].v4.s_addr, 0); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - PF_ACPY(pd->src, &(*state)->gwy.addr, pd->af); - break; -#endif /* INET6 */ - } - else - switch (pd->af) { -#ifdef INET - case AF_INET: + + + if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET)) pf_change_a(&pd->dst->v4.s_addr, - pd->ip_sum, (*state)->lan.addr.v4.s_addr, + pd->ip_sum, + nk->addr[pd->didx].v4.s_addr, 0); + break; #endif /* INET */ #ifdef INET6 - case AF_INET6: - PF_ACPY(pd->dst, &(*state)->lan.addr, pd->af); - break; + case AF_INET6: + if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET)) + PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af); + + if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET)) + PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af); #endif /* INET6 */ - } + } } - return (PF_PASS); } @@ -6080,8 +5681,14 @@ pf_pull_hdr(struct mbuf *m, int off, void *p, int len, } int -pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif) +pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, + int rtableid) { +#ifdef __FreeBSD__ +#ifdef RADIX_MPATH + struct radix_node_head *rnh; +#endif +#endif struct sockaddr_in *dst; int ret = 1; int check_mpath; @@ -6102,6 +5709,14 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif) struct ifnet *ifp; check_mpath = 0; +#ifdef __FreeBSD__ +#ifdef RADIX_MPATH + /* XXX: stick to table 0 for now */ + rnh = rt_tables_get_rnh(0, af); + if (rnh != NULL && rn_mpath_capable(rnh)) + check_mpath = 1; +#endif +#endif bzero(&ro, sizeof(ro)); switch (af) { case AF_INET: @@ -6109,18 +5724,24 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif) dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = addr->v4; -#ifndef __FreeBSD__ /* MULTIPATH_ROUTING */ +#ifndef __FreeBSD__ if (ipmultipath) check_mpath = 1; #endif break; #ifdef INET6 case AF_INET6: + /* + * Skip check for addresses with embedded interface scope, + * as they would always match anyway. + */ + if (IN6_IS_SCOPE_EMBED(&addr->v6)) + goto out; dst6 = (struct sockaddr_in6 *)&ro.ro_dst; dst6->sin6_family = AF_INET6; dst6->sin6_len = sizeof(*dst6); dst6->sin6_addr = addr->v6; -#ifndef __FreeBSD__ /* MULTIPATH_ROUTING */ +#ifndef __FreeBSD__ if (ip6_multipath) check_mpath = 1; #endif @@ -6135,13 +5756,21 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif) goto out; #ifdef __FreeBSD__ -/* XXX MRT not always INET */ /* stick with table 0 though */ - if (af == AF_INET) - in_rtalloc_ign((struct route *)&ro, 0, RT_DEFAULT_FIB); + switch (af) { #ifdef INET6 - else - in6_rtalloc_ign(&ro, 0, RT_DEFAULT_FIB); + case AF_INET6: + in6_rtalloc_ign(&ro, 0, rtableid); + break; #endif +#ifdef INET + case AF_INET: + in_rtalloc_ign((struct route *)&ro, 0, rtableid); + break; +#endif + default: + rtalloc_ign((struct route *)&ro, 0); /* No/default FIB. */ + break; + } #else /* ! __FreeBSD__ */ rtalloc_noclone((struct route *)&ro, NO_CLONING); #endif @@ -6170,11 +5799,13 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif) if (kif->pfik_ifp == ifp) ret = 1; -#ifdef __FreeBSD__ /* MULTIPATH_ROUTING */ - rn = NULL; -#else +#ifdef __FreeBSD__ +#ifdef RADIX_MPATH rn = rn_mpath_next(rn); #endif +#else + rn = rn_mpath_next(rn, 0); +#endif } while (check_mpath == 1 && rn != NULL && ret == 0); } else ret = 0; @@ -6185,7 +5816,8 @@ out: } int -pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw) +pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw, + int rtableid) { struct sockaddr_in *dst; #ifdef INET6 @@ -6217,12 +5849,21 @@ pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw) } #ifdef __FreeBSD__ - if (af == AF_INET) - in_rtalloc_ign((struct route *)&ro, 0, RT_DEFAULT_FIB); + switch (af) { #ifdef INET6 - else - in6_rtalloc_ign(&ro, 0, RT_DEFAULT_FIB); + case AF_INET6: + in6_rtalloc_ign(&ro, 0, rtableid); + break; #endif +#ifdef INET + case AF_INET: + in_rtalloc_ign((struct route *)&ro, 0, rtableid); + break; +#endif + default: + rtalloc_ign((struct route *)&ro, 0); + break; + } #else /* ! __FreeBSD__ */ rtalloc_noclone((struct route *)&ro, NO_CLONING); #endif @@ -6241,7 +5882,6 @@ pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw) } #ifdef INET - void pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, struct pf_state *s, struct pf_pdesc *pd) @@ -6266,7 +5906,11 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, (dir != PF_IN && dir != PF_OUT) || oifp == NULL) panic("pf_route: invalid parameters"); +#ifdef __FreeBSD__ if (pd->pf_mtag->routed++ > 3) { +#else + if ((*m)->m_pkthdr.pf.routed++ > 3) { +#endif m0 = *m; *m = NULL; goto bad; @@ -6301,9 +5945,17 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, dst->sin_addr = ip->ip_dst; if (r->rt == PF_FASTROUTE) { - in_rtalloc(ro, 0); +#ifdef __FreeBSD__ + in_rtalloc_ign(ro, 0, M_GETFIB(m0)); +#else + rtalloc(ro); +#endif if (ro->ro_rt == 0) { +#ifdef __FreeBSD__ KMOD_IPSTAT_INC(ips_noroute); +#else + ipstat.ips_noroute++; +#endif goto bad; } @@ -6369,7 +6021,7 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, * XXX: in_delayed_cksum assumes HBO for ip->ip_len (at least) */ NTOHS(ip->ip_len); - NTOHS(ip->ip_off); /* XXX: needed? */ + NTOHS(ip->ip_off); /* XXX: needed? */ in_delayed_cksum(m0); HTONS(ip->ip_len); HTONS(ip->ip_off); @@ -6378,9 +6030,8 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, m0->m_pkthdr.csum_flags &= ifp->if_hwassist; if (ntohs(ip->ip_len) <= ifp->if_mtu || - (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 || (ifp->if_hwassist & CSUM_FRAGMENT && - ((ip->ip_off & htons(IP_DF)) == 0))) { + ((ip->ip_off & htons(IP_DF)) == 0))) { /* * ip->ip_len = htons(ip->ip_len); * ip->ip_off = htons(ip->ip_off); @@ -6400,7 +6051,6 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, PF_LOCK(); goto done; } - #else /* Copied from ip_output. */ #ifdef IPSEC @@ -6421,25 +6071,28 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, if (!(ifp->if_capabilities & IFCAP_CSUM_TCPv4) || ifp->if_bridge != NULL) { in_delayed_cksum(m0); - m0->m_pkthdr.csum_flags &= ~M_TCPV4_CSUM_OUT; /* Clear */ + m0->m_pkthdr.csum_flags &= ~M_TCPV4_CSUM_OUT; /* Clr */ } } else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT) { if (!(ifp->if_capabilities & IFCAP_CSUM_UDPv4) || ifp->if_bridge != NULL) { in_delayed_cksum(m0); - m0->m_pkthdr.csum_flags &= ~M_UDPV4_CSUM_OUT; /* Clear */ + m0->m_pkthdr.csum_flags &= ~M_UDPV4_CSUM_OUT; /* Clr */ } } if (ntohs(ip->ip_len) <= ifp->if_mtu) { + ip->ip_sum = 0; if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) && ifp->if_bridge == NULL) { m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; +#ifdef __FreeBSD__ KMOD_IPSTAT_INC(ips_outhwcsum); - } else { - ip->ip_sum = 0; +#else + ipstat.ips_outhwcsum++; +#endif + } else ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); - } /* Update relevant hardware checksum stats for TCP/UDP */ if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT) KMOD_TCPSTAT_INC(tcps_outhwcsum); @@ -6449,12 +6102,17 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, goto done; } #endif + /* * Too large for interface; fragment if possible. * Must be able to put at least 8 bytes per fragment. */ - if (ip->ip_off & htons(IP_DF) || (m0->m_pkthdr.csum_flags & CSUM_TSO)) { + if (ip->ip_off & htons(IP_DF)) { +#ifdef __FreeBSD__ KMOD_IPSTAT_INC(ips_cantfrag); +#else + ipstat.ips_cantfrag++; +#endif if (r->rt != PF_DUPTO) { #ifdef __FreeBSD__ /* icmp_error() expects host byte ordering */ @@ -6485,7 +6143,7 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, error = ip_fragment(m0, ifp, ifp->if_mtu); #endif if (error) { -#ifndef __FreeBSD__ /* ip_fragment does not do m_freem() on FreeBSD */ +#ifndef __FreeBSD__ /* ip_fragment does not do m_freem() on FreeBSD */ m0 = NULL; #endif goto bad; @@ -6511,7 +6169,11 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, } if (error == 0) +#ifdef __FreeBSD__ KMOD_IPSTAT_INC(ips_fragmented); +#else + ipstat.ips_fragmented++; +#endif done: if (r->rt != PF_DUPTO) @@ -6539,13 +6201,16 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, struct ifnet *ifp = NULL; struct pf_addr naddr; struct pf_src_node *sn = NULL; - int error = 0; if (m == NULL || *m == NULL || r == NULL || (dir != PF_IN && dir != PF_OUT) || oifp == NULL) panic("pf_route6: invalid parameters"); +#ifdef __FreeBSD__ if (pd->pf_mtag->routed++ > 3) { +#else + if ((*m)->m_pkthdr.pf.routed++ > 3) { +#endif m0 = *m; *m = NULL; goto bad; @@ -6584,14 +6249,9 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, m0->m_flags |= M_SKIP_FIREWALL; PF_UNLOCK(); ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL); - PF_LOCK(); #else - mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT); - if (mtag == NULL) - goto bad; - m_tag_prepend(m0, mtag); - pd->pf_mtag->flags |= PF_TAG_GENERATED; - ip6_output(m0, NULL, NULL, 0, NULL, NULL); + m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; + ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL); #endif return; } @@ -6652,7 +6312,7 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, #ifdef __FreeBSD__ PF_UNLOCK(); #endif - error = nd6_output(ifp, ifp, m0, dst, NULL); + nd6_output(ifp, ifp, m0, dst, NULL); #ifdef __FreeBSD__ PF_LOCK(); #endif @@ -6663,7 +6323,7 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, PF_UNLOCK(); icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); PF_LOCK(); - } else + } else #else if (r->rt != PF_DUPTO) icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); @@ -6683,7 +6343,6 @@ bad: } #endif /* INET6 */ - #ifdef __FreeBSD__ /* * FreeBSD supports cksum offloads for the following drivers. @@ -6721,10 +6380,10 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t a if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) { sum = m->m_pkthdr.csum_data; } else { - ip = mtod(m, struct ip *); + ip = mtod(m, struct ip *); sum = in_pseudo(ip->ip_src.s_addr, - ip->ip_dst.s_addr, htonl((u_short)len + - m->m_pkthdr.csum_data + IPPROTO_TCP)); + ip->ip_dst.s_addr, htonl((u_short)len + + m->m_pkthdr.csum_data + IPPROTO_TCP)); } sum ^= 0xffff; ++hw_assist; @@ -6735,14 +6394,14 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t a if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) { sum = m->m_pkthdr.csum_data; } else { - ip = mtod(m, struct ip *); + ip = mtod(m, struct ip *); sum = in_pseudo(ip->ip_src.s_addr, - ip->ip_dst.s_addr, htonl((u_short)len + - m->m_pkthdr.csum_data + IPPROTO_UDP)); + ip->ip_dst.s_addr, htonl((u_short)len + + m->m_pkthdr.csum_data + IPPROTO_UDP)); } sum ^= 0xffff; ++hw_assist; - } + } break; case IPPROTO_ICMP: #ifdef INET6 @@ -6793,11 +6452,13 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t a KMOD_UDPSTAT_INC(udps_badsum); break; } +#ifdef INET case IPPROTO_ICMP: { KMOD_ICMPSTAT_INC(icps_checksum); break; } +#endif #ifdef INET6 case IPPROTO_ICMPV6: { @@ -6817,6 +6478,7 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t a return (0); } #else /* !__FreeBSD__ */ + /* * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag * off is the offset where the protocol header starts @@ -6893,9 +6555,11 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, case IPPROTO_UDP: KMOD_UDPSTAT_INC(udps_badsum); break; +#ifdef INET case IPPROTO_ICMP: KMOD_ICMPSTAT_INC(icps_checksum); break; +#endif #ifdef INET6 case IPPROTO_ICMPV6: KMOD_ICMP6STAT_INC(icp6s_checksum); @@ -6907,7 +6571,37 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, m->m_pkthdr.csum_flags |= flag_ok; return (0); } -#endif /* __FreeBSD__ */ +#endif + +#ifndef __FreeBSD__ +struct pf_divert * +pf_find_divert(struct mbuf *m) +{ + struct m_tag *mtag; + + if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) + return (NULL); + + return ((struct pf_divert *)(mtag + 1)); +} + +struct pf_divert * +pf_get_divert(struct mbuf *m) +{ + struct m_tag *mtag; + + if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) { + mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert), + M_NOWAIT); + if (mtag == NULL) + return (NULL); + bzero(mtag + 1, sizeof(struct pf_divert)); + m_tag_prepend(m, mtag); + } + + return ((struct pf_divert *)(mtag + 1)); +} +#endif #ifdef INET int @@ -6922,8 +6616,14 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct pfi_kif *kif; u_short action, reason = 0, log = 0; struct mbuf *m = *m0; - struct ip *h = NULL; /* make the compiler happy */ +#ifdef __FreeBSD__ + struct ip *h = NULL; + struct m_tag *ipfwtag; + struct pf_rule *a = NULL, *r = &V_pf_default_rule, *tr, *nr; +#else + struct ip *h; struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr; +#endif struct pf_state *s = NULL; struct pf_ruleset *ruleset = NULL; struct pf_pdesc pd; @@ -6931,44 +6631,32 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, #ifdef __FreeBSD__ PF_LOCK(); -#endif - if (!pf_status.running) -#ifdef __FreeBSD__ + if (!V_pf_status.running) { PF_UNLOCK(); -#endif return (PF_PASS); -#ifdef __FreeBSD__ } +#else + if (!pf_status.running) + return (PF_PASS); #endif memset(&pd, 0, sizeof(pd)); - if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) { #ifdef __FreeBSD__ + if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) { PF_UNLOCK(); -#endif DPFPRINTF(PF_DEBUG_URGENT, ("pf_test: pf_get_mtag returned NULL\n")); return (PF_DROP); } -#ifdef __FreeBSD__ - if (m->m_flags & M_SKIP_FIREWALL) { - PF_UNLOCK(); - return (PF_PASS); - } -#else - if (pd.pf_mtag->flags & PF_TAG_GENERATED) - return (PF_PASS); #endif - -#ifdef __FreeBSD__ - /* XXX_IMPORT: later */ -#else +#ifndef __FreeBSD__ if (ifp->if_type == IFT_CARP && ifp->if_carpdev) - ifp = ifp->if_carpdev; + kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif; + else #endif + kif = (struct pfi_kif *)ifp->if_pf_kif; - kif = (struct pfi_kif *)ifp->if_pf_kif; if (kif == NULL) { #ifdef __FreeBSD__ PF_UNLOCK(); @@ -6977,12 +6665,15 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname)); return (PF_DROP); } - if (kif->pfik_flags & PFI_IFLAG_SKIP) { + if (kif->pfik_flags & PFI_IFLAG_SKIP) #ifdef __FreeBSD__ + { PF_UNLOCK(); #endif return (PF_PASS); +#ifdef __FreeBSD__ } +#endif #ifdef __FreeBSD__ M_ASSERTPKTHDR(m); @@ -6991,7 +6682,7 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, if ((m->m_flags & M_PKTHDR) == 0) panic("non-M_PKTHDR is passed to pf_test"); #endif /* DIAGNOSTIC */ -#endif /* __FreeBSD__ */ +#endif if (m->m_pkthdr.len < (int)sizeof(*h)) { action = PF_DROP; @@ -7000,12 +6691,36 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, goto done; } +#ifdef __FreeBSD__ + if (m->m_flags & M_SKIP_FIREWALL) { + PF_UNLOCK(); + return (PF_PASS); + } +#else + if (m->m_pkthdr.pf.flags & PF_TAG_GENERATED) + return (PF_PASS); +#endif + +#ifdef __FreeBSD__ + if (ip_divert_ptr != NULL && + ((ipfwtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL)) != NULL)) { + struct ipfw_rule_ref *rr = (struct ipfw_rule_ref *)(ipfwtag+1); + if (rr->info & IPFW_IS_DIVERT && rr->rulenum == 0) { + pd.pf_mtag->flags |= PF_PACKET_LOOPED; + m_tag_delete(m, ipfwtag); + } + if (pd.pf_mtag->flags & PF_FASTFWD_OURS_PRESENT) { + m->m_flags |= M_FASTFWD_OURS; + pd.pf_mtag->flags &= ~PF_FASTFWD_OURS_PRESENT; + } + } else +#endif /* We do IP header normalization and packet reassembly here */ if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) { action = PF_DROP; goto done; } - m = *m0; + m = *m0; /* pf_normalize messes with m0 */ h = mtod(m, struct ip *); off = h->ip_hl << 2; @@ -7018,9 +6733,13 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, pd.src = (struct pf_addr *)&h->ip_src; pd.dst = (struct pf_addr *)&h->ip_dst; - PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET); + pd.sport = pd.dport = NULL; pd.ip_sum = &h->ip_sum; + pd.proto_sum = NULL; pd.proto = h->ip_p; + pd.dir = dir; + pd.sidx = (dir == PF_IN) ? 0 : 1; + pd.didx = (dir == PF_IN) ? 1 : 0; pd.af = AF_INET; pd.tos = h->ip_tos; pd.tot_len = ntohs(h->ip_len); @@ -7044,12 +6763,6 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, log = action != PF_PASS; goto done; } - if (dir == PF_IN && pf_check_proto_cksum(m, off, - ntohs(h->ip_len) - off, IPPROTO_TCP, AF_INET)) { - REASON_SET(&reason, PFRES_PROTCKSUM); - action = PF_DROP; - goto done; - } pd.p_len = pd.tot_len - off - (th.th_off << 2); if ((th.th_flags & TH_ACK) && pd.p_len == 0) pqid = 1; @@ -7059,18 +6772,23 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { -#if NPFSYNC +#if NPFSYNC > 0 +#ifdef __FreeBSD__ + if (pfsync_update_state_ptr != NULL) + pfsync_update_state_ptr(s); +#else pfsync_update_state(s); +#endif #endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) #ifdef __FreeBSD__ - action = pf_test_tcp(&r, &s, dir, kif, + action = pf_test_rule(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, NULL, inp); #else - action = pf_test_tcp(&r, &s, dir, kif, + action = pf_test_rule(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, &ipintrq); #endif break; @@ -7085,12 +6803,6 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, log = action != PF_PASS; goto done; } - if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m, - off, ntohs(h->ip_len) - off, IPPROTO_UDP, AF_INET)) { - action = PF_DROP; - REASON_SET(&reason, PFRES_PROTCKSUM); - goto done; - } if (uh.uh_dport == 0 || ntohs(uh.uh_ulen) > m->m_pkthdr.len - off || ntohs(uh.uh_ulen) < sizeof(struct udphdr)) { @@ -7100,18 +6812,23 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, } action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); if (action == PF_PASS) { -#if NPFSYNC +#if NPFSYNC > 0 +#ifdef __FreeBSD__ + if (pfsync_update_state_ptr != NULL) + pfsync_update_state_ptr(s); +#else pfsync_update_state(s); +#endif #endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) #ifdef __FreeBSD__ - action = pf_test_udp(&r, &s, dir, kif, + action = pf_test_rule(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, NULL, inp); #else - action = pf_test_udp(&r, &s, dir, kif, + action = pf_test_rule(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, &ipintrq); #endif break; @@ -7126,47 +6843,60 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, log = action != PF_PASS; goto done; } - if (dir == PF_IN && pf_check_proto_cksum(m, off, - ntohs(h->ip_len) - off, IPPROTO_ICMP, AF_INET)) { - action = PF_DROP; - REASON_SET(&reason, PFRES_PROTCKSUM); - goto done; - } action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { -#if NPFSYNC +#if NPFSYNC > 0 +#ifdef __FreeBSD__ + if (pfsync_update_state_ptr != NULL) + pfsync_update_state_ptr(s); +#else pfsync_update_state(s); +#endif #endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) #ifdef __FreeBSD__ - action = pf_test_icmp(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, NULL); + action = pf_test_rule(&r, &s, dir, kif, + m, off, h, &pd, &a, &ruleset, NULL, inp); #else - action = pf_test_icmp(&r, &s, dir, kif, + action = pf_test_rule(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, &ipintrq); #endif break; } +#ifdef INET6 + case IPPROTO_ICMPV6: { + action = PF_DROP; + DPFPRINTF(PF_DEBUG_MISC, + ("pf: dropping IPv4 packet with ICMPv6 payload\n")); + goto done; + } +#endif + default: - action = pf_test_state_other(&s, dir, kif, &pd); + action = pf_test_state_other(&s, dir, kif, m, &pd); if (action == PF_PASS) { -#if NPFSYNC +#if NPFSYNC > 0 +#ifdef __FreeBSD__ + if (pfsync_update_state_ptr != NULL) + pfsync_update_state_ptr(s); +#else pfsync_update_state(s); +#endif #endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) #ifdef __FreeBSD__ - action = pf_test_other(&r, &s, dir, kif, m, off, h, - &pd, &a, &ruleset, NULL); + action = pf_test_rule(&r, &s, dir, kif, m, off, h, + &pd, &a, &ruleset, NULL, inp); #else - action = pf_test_other(&r, &s, dir, kif, m, off, h, + action = pf_test_rule(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, &ipintrq); #endif break; @@ -7182,18 +6912,38 @@ done: ("pf: dropping packet with ip options\n")); } - if ((s && s->tag) || r->rtableid) - pf_tag_packet(m, pd.pf_mtag, s ? s->tag : 0, r->rtableid); + if ((s && s->tag) || r->rtableid >= 0) +#ifdef __FreeBSD__ + pf_tag_packet(m, s ? s->tag : 0, r->rtableid, pd.pf_mtag); +#else + pf_tag_packet(m, s ? s->tag : 0, r->rtableid); +#endif + + if (dir == PF_IN && s && s->key[PF_SK_STACK]) +#ifdef __FreeBSD__ + pd.pf_mtag->statekey = s->key[PF_SK_STACK]; +#else + m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK]; +#endif #ifdef ALTQ if (action == PF_PASS && r->qid) { +#ifdef __FreeBSD__ if (pqid || (pd.tos & IPTOS_LOWDELAY)) pd.pf_mtag->qid = r->pqid; else pd.pf_mtag->qid = r->qid; /* add hints for ecn */ - pd.pf_mtag->af = AF_INET; pd.pf_mtag->hdr = h; + +#else + if (pqid || (pd.tos & IPTOS_LOWDELAY)) + m->m_pkthdr.pf.qid = r->pqid; + else + m->m_pkthdr.pf.qid = r->qid; + /* add hints for ecn */ + m->m_pkthdr.pf.hdr = h; +#endif } #endif /* ALTQ */ @@ -7207,7 +6957,56 @@ done: (s->nat_rule.ptr->action == PF_RDR || s->nat_rule.ptr->action == PF_BINAT) && (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) - pd.pf_mtag->flags |= PF_TAG_TRANSLATE_LOCALHOST; +#ifdef __FreeBSD__ + m->m_flags |= M_SKIP_FIREWALL; +#else + m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; +#endif + +#ifdef __FreeBSD__ + if (action == PF_PASS && r->divert.port && + ip_divert_ptr != NULL && !PACKET_LOOPED()) { + + ipfwtag = m_tag_alloc(MTAG_IPFW_RULE, 0, + sizeof(struct ipfw_rule_ref), M_NOWAIT | M_ZERO); + if (ipfwtag != NULL) { + ((struct ipfw_rule_ref *)(ipfwtag+1))->info = + ntohs(r->divert.port); + ((struct ipfw_rule_ref *)(ipfwtag+1))->rulenum = dir; + + m_tag_prepend(m, ipfwtag); + + PF_UNLOCK(); + + if (m->m_flags & M_FASTFWD_OURS) { + pd.pf_mtag->flags |= PF_FASTFWD_OURS_PRESENT; + m->m_flags &= ~M_FASTFWD_OURS; + } + + ip_divert_ptr(*m0, + dir == PF_IN ? DIR_IN : DIR_OUT); + *m0 = NULL; + return (action); + } else { + /* XXX: ipfw has the same behaviour! */ + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); + log = 1; + DPFPRINTF(PF_DEBUG_MISC, + ("pf: failed to allocate divert tag\n")); + } + } +#else + if (dir == PF_IN && action == PF_PASS && r->divert.port) { + struct pf_divert *divert; + + if ((divert = pf_get_divert(m))) { + m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; + divert->port = r->divert.port; + divert->addr.ipv4 = r->divert.addr.v4; + } + } +#endif if (log) { struct pf_rule *lr; @@ -7251,53 +7050,44 @@ done: } tr = r; nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; - if (nr != NULL) { - struct pf_addr *x; - /* - * XXX: we need to make sure that the addresses - * passed to pfr_update_stats() are the same than - * the addresses used during matching (pfr_match) - */ - if (r == &pf_default_rule) { - tr = nr; - x = (s == NULL || s->direction == dir) ? - &pd.baddr : &pd.naddr; - } else - x = (s == NULL || s->direction == dir) ? - &pd.naddr : &pd.baddr; - if (x == &pd.baddr || s == NULL) { - /* we need to change the address */ - if (dir == PF_OUT) - pd.src = x; - else - pd.dst = x; - } - } +#ifdef __FreeBSD__ + if (nr != NULL && r == &V_pf_default_rule) +#else + if (nr != NULL && r == &pf_default_rule) +#endif + tr = nr; if (tr->src.addr.type == PF_ADDR_TABLE) - pfr_update_stats(tr->src.addr.p.tbl, (s == NULL || - s->direction == dir) ? pd.src : pd.dst, pd.af, - pd.tot_len, dir == PF_OUT, r->action == PF_PASS, - tr->src.neg); + pfr_update_stats(tr->src.addr.p.tbl, + (s == NULL) ? pd.src : + &s->key[(s->direction == PF_IN)]-> + addr[(s->direction == PF_OUT)], + pd.af, pd.tot_len, dir == PF_OUT, + r->action == PF_PASS, tr->src.neg); if (tr->dst.addr.type == PF_ADDR_TABLE) - pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL || - s->direction == dir) ? pd.dst : pd.src, pd.af, - pd.tot_len, dir == PF_OUT, r->action == PF_PASS, - tr->dst.neg); + pfr_update_stats(tr->dst.addr.p.tbl, + (s == NULL) ? pd.dst : + &s->key[(s->direction == PF_IN)]-> + addr[(s->direction == PF_IN)], + pd.af, pd.tot_len, dir == PF_OUT, + r->action == PF_PASS, tr->dst.neg); } - - if (action == PF_SYNPROXY_DROP) { + switch (action) { + case PF_SYNPROXY_DROP: m_freem(*m0); + case PF_DEFER: *m0 = NULL; action = PF_PASS; - } else if (r->rt) + break; + default: /* pf_route can free the mbuf causing *m0 to become NULL */ - pf_route(m0, r, dir, ifp, s, &pd); - + if (r->rt) + pf_route(m0, r, dir, kif->pfik_ifp, s, &pd); + break; + } #ifdef __FreeBSD__ PF_UNLOCK(); #endif - return (action); } #endif /* INET */ @@ -7315,8 +7105,13 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct pfi_kif *kif; u_short action, reason = 0, log = 0; struct mbuf *m = *m0, *n = NULL; +#ifdef __FreeBSD__ + struct ip6_hdr *h = NULL; + struct pf_rule *a = NULL, *r = &V_pf_default_rule, *tr, *nr; +#else struct ip6_hdr *h; struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr; +#endif struct pf_state *s = NULL; struct pf_ruleset *ruleset = NULL; struct pf_pdesc pd; @@ -7324,38 +7119,31 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, #ifdef __FreeBSD__ PF_LOCK(); -#endif - - if (!pf_status.running) -#ifdef __FreeBSD__ - { + if (!V_pf_status.running) { PF_UNLOCK(); -#endif return (PF_PASS); -#ifdef __FreeBSD__ } +#else + if (!pf_status.running) + return (PF_PASS); #endif memset(&pd, 0, sizeof(pd)); - if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) { #ifdef __FreeBSD__ + if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) { PF_UNLOCK(); -#endif DPFPRINTF(PF_DEBUG_URGENT, - ("pf_test6: pf_get_mtag returned NULL\n")); + ("pf_test: pf_get_mtag returned NULL\n")); return (PF_DROP); } - if (pd.pf_mtag->flags & PF_TAG_GENERATED) - return (PF_PASS); - -#ifdef __FreeBSD__ - /* XXX_IMPORT: later */ -#else +#endif +#ifndef __FreeBSD__ if (ifp->if_type == IFT_CARP && ifp->if_carpdev) - ifp = ifp->if_carpdev; + kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif; + else #endif + kif = (struct pfi_kif *)ifp->if_pf_kif; - kif = (struct pfi_kif *)ifp->if_pf_kif; if (kif == NULL) { #ifdef __FreeBSD__ PF_UNLOCK(); @@ -7364,12 +7152,15 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname)); return (PF_DROP); } - if (kif->pfik_flags & PFI_IFLAG_SKIP) { + if (kif->pfik_flags & PFI_IFLAG_SKIP) #ifdef __FreeBSD__ + { PF_UNLOCK(); #endif return (PF_PASS); +#ifdef __FreeBSD__ } +#endif #ifdef __FreeBSD__ M_ASSERTPKTHDR(m); @@ -7380,10 +7171,6 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, #endif /* DIAGNOSTIC */ #endif -#ifdef __FreeBSD__ - h = NULL; /* make the compiler happy */ -#endif - if (m->m_pkthdr.len < (int)sizeof(*h)) { action = PF_DROP; REASON_SET(&reason, PFRES_SHORT); @@ -7391,12 +7178,23 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, goto done; } +#ifdef __FreeBSD__ + if (pd.pf_mtag->flags & PF_TAG_GENERATED) { + PF_UNLOCK(); +#else + if (m->m_pkthdr.pf.flags & PF_TAG_GENERATED) +#endif + return (PF_PASS); +#ifdef __FreeBSD__ + } +#endif + /* We do IP header normalization and packet reassembly here */ if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) { action = PF_DROP; goto done; } - m = *m0; + m = *m0; /* pf_normalize messes with m0 */ h = mtod(m, struct ip6_hdr *); #if 1 @@ -7413,8 +7211,12 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, pd.src = (struct pf_addr *)&h->ip6_src; pd.dst = (struct pf_addr *)&h->ip6_dst; - PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET6); + pd.sport = pd.dport = NULL; pd.ip_sum = NULL; + pd.proto_sum = NULL; + pd.dir = dir; + pd.sidx = (dir == PF_IN) ? 0 : 1; + pd.didx = (dir == PF_IN) ? 1 : 0; pd.af = AF_INET6; pd.tos = 0; pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); @@ -7458,7 +7260,7 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, log = 1; goto done; } - /* fallthrough */ + /* FALLTHROUGH */ } case IPPROTO_AH: case IPPROTO_HOPOPTS: @@ -7503,13 +7305,6 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, log = action != PF_PASS; goto done; } - if (dir == PF_IN && pf_check_proto_cksum(n, off, - ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)), - IPPROTO_TCP, AF_INET6)) { - action = PF_DROP; - REASON_SET(&reason, PFRES_PROTCKSUM); - goto done; - } pd.p_len = pd.tot_len - off - (th.th_off << 2); action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd); if (action == PF_DROP) @@ -7517,18 +7312,23 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { -#if NPFSYNC +#if NPFSYNC > 0 +#ifdef __FreeBSD__ + if (pfsync_update_state_ptr != NULL) + pfsync_update_state_ptr(s); +#else pfsync_update_state(s); +#endif #endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) #ifdef __FreeBSD__ - action = pf_test_tcp(&r, &s, dir, kif, + action = pf_test_rule(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, NULL, inp); #else - action = pf_test_tcp(&r, &s, dir, kif, + action = pf_test_rule(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, &ip6intrq); #endif break; @@ -7543,13 +7343,6 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, log = action != PF_PASS; goto done; } - if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(n, - off, ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)), - IPPROTO_UDP, AF_INET6)) { - action = PF_DROP; - REASON_SET(&reason, PFRES_PROTCKSUM); - goto done; - } if (uh.uh_dport == 0 || ntohs(uh.uh_ulen) > m->m_pkthdr.len - off || ntohs(uh.uh_ulen) < sizeof(struct udphdr)) { @@ -7559,23 +7352,35 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, } action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); if (action == PF_PASS) { -#if NPFSYNC +#if NPFSYNC > 0 +#ifdef __FreeBSD__ + if (pfsync_update_state_ptr != NULL) + pfsync_update_state_ptr(s); +#else pfsync_update_state(s); +#endif #endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) #ifdef __FreeBSD__ - action = pf_test_udp(&r, &s, dir, kif, + action = pf_test_rule(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, NULL, inp); #else - action = pf_test_udp(&r, &s, dir, kif, + action = pf_test_rule(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, &ip6intrq); #endif break; } + case IPPROTO_ICMP: { + action = PF_DROP; + DPFPRINTF(PF_DEBUG_MISC, + ("pf: dropping IPv6 packet with ICMPv4 payload\n")); + goto done; + } + case IPPROTO_ICMPV6: { struct icmp6_hdr ih; @@ -7585,54 +7390,62 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, log = action != PF_PASS; goto done; } - if (dir == PF_IN && pf_check_proto_cksum(n, off, - ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)), - IPPROTO_ICMPV6, AF_INET6)) { - action = PF_DROP; - REASON_SET(&reason, PFRES_PROTCKSUM); - goto done; - } action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { -#if NPFSYNC +#if NPFSYNC > 0 +#ifdef __FreeBSD__ + if (pfsync_update_state_ptr != NULL) + pfsync_update_state_ptr(s); +#else pfsync_update_state(s); +#endif #endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) #ifdef __FreeBSD__ - action = pf_test_icmp(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, NULL); + action = pf_test_rule(&r, &s, dir, kif, + m, off, h, &pd, &a, &ruleset, NULL, inp); #else - action = pf_test_icmp(&r, &s, dir, kif, + action = pf_test_rule(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, &ip6intrq); #endif break; } default: - action = pf_test_state_other(&s, dir, kif, &pd); + action = pf_test_state_other(&s, dir, kif, m, &pd); if (action == PF_PASS) { -#if NPFSYNC +#if NPFSYNC > 0 +#ifdef __FreeBSD__ + if (pfsync_update_state_ptr != NULL) + pfsync_update_state_ptr(s); +#else pfsync_update_state(s); +#endif #endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) #ifdef __FreeBSD__ - action = pf_test_other(&r, &s, dir, kif, m, off, h, - &pd, &a, &ruleset, NULL); + action = pf_test_rule(&r, &s, dir, kif, m, off, h, + &pd, &a, &ruleset, NULL, inp); #else - action = pf_test_other(&r, &s, dir, kif, m, off, h, + action = pf_test_rule(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, &ip6intrq); #endif break; } done: + if (n != m) { + m_freem(n); + n = NULL; + } + /* handle dangerous IPv6 extension headers. */ if (action == PF_PASS && rh_cnt && !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) { @@ -7643,18 +7456,37 @@ done: ("pf: dropping packet with dangerous v6 headers\n")); } - if ((s && s->tag) || r->rtableid) - pf_tag_packet(m, pd.pf_mtag, s ? s->tag : 0, r->rtableid); + if ((s && s->tag) || r->rtableid >= 0) +#ifdef __FreeBSD__ + pf_tag_packet(m, s ? s->tag : 0, r->rtableid, pd.pf_mtag); +#else + pf_tag_packet(m, s ? s->tag : 0, r->rtableid); +#endif + + if (dir == PF_IN && s && s->key[PF_SK_STACK]) +#ifdef __FreeBSD__ + pd.pf_mtag->statekey = s->key[PF_SK_STACK]; +#else + m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK]; +#endif #ifdef ALTQ if (action == PF_PASS && r->qid) { +#ifdef __FreeBSD__ if (pd.tos & IPTOS_LOWDELAY) pd.pf_mtag->qid = r->pqid; else pd.pf_mtag->qid = r->qid; /* add hints for ecn */ - pd.pf_mtag->af = AF_INET6; pd.pf_mtag->hdr = h; +#else + if (pd.tos & IPTOS_LOWDELAY) + m->m_pkthdr.pf.qid = r->pqid; + else + m->m_pkthdr.pf.qid = r->qid; + /* add hints for ecn */ + m->m_pkthdr.pf.hdr = h; +#endif } #endif /* ALTQ */ @@ -7663,7 +7495,27 @@ done: (s->nat_rule.ptr->action == PF_RDR || s->nat_rule.ptr->action == PF_BINAT) && IN6_IS_ADDR_LOOPBACK(&pd.dst->v6)) - pd.pf_mtag->flags |= PF_TAG_TRANSLATE_LOCALHOST; +#ifdef __FreeBSD__ + m->m_flags |= M_SKIP_FIREWALL; +#else + m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; +#endif + +#ifdef __FreeBSD__ + /* XXX: Anybody working on it?! */ + if (r->divert.port) + printf("pf: divert(9) is not supported for IPv6\n"); +#else + if (dir == PF_IN && action == PF_PASS && r->divert.port) { + struct pf_divert *divert; + + if ((divert = pf_get_divert(m))) { + m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; + divert->port = r->divert.port; + divert->addr.ipv6 = r->divert.addr.v6; + } + } +#endif if (log) { struct pf_rule *lr; @@ -7707,48 +7559,39 @@ done: } tr = r; nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; - if (nr != NULL) { - struct pf_addr *x; - /* - * XXX: we need to make sure that the addresses - * passed to pfr_update_stats() are the same than - * the addresses used during matching (pfr_match) - */ - if (r == &pf_default_rule) { - tr = nr; - x = (s == NULL || s->direction == dir) ? - &pd.baddr : &pd.naddr; - } else { - x = (s == NULL || s->direction == dir) ? - &pd.naddr : &pd.baddr; - } - if (x == &pd.baddr || s == NULL) { - if (dir == PF_OUT) - pd.src = x; - else - pd.dst = x; - } - } +#ifdef __FreeBSD__ + if (nr != NULL && r == &V_pf_default_rule) +#else + if (nr != NULL && r == &pf_default_rule) +#endif + tr = nr; if (tr->src.addr.type == PF_ADDR_TABLE) - pfr_update_stats(tr->src.addr.p.tbl, (s == NULL || - s->direction == dir) ? pd.src : pd.dst, pd.af, - pd.tot_len, dir == PF_OUT, r->action == PF_PASS, - tr->src.neg); + pfr_update_stats(tr->src.addr.p.tbl, + (s == NULL) ? pd.src : + &s->key[(s->direction == PF_IN)]->addr[0], + pd.af, pd.tot_len, dir == PF_OUT, + r->action == PF_PASS, tr->src.neg); if (tr->dst.addr.type == PF_ADDR_TABLE) - pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL || - s->direction == dir) ? pd.dst : pd.src, pd.af, - pd.tot_len, dir == PF_OUT, r->action == PF_PASS, - tr->dst.neg); + pfr_update_stats(tr->dst.addr.p.tbl, + (s == NULL) ? pd.dst : + &s->key[(s->direction == PF_IN)]->addr[1], + pd.af, pd.tot_len, dir == PF_OUT, + r->action == PF_PASS, tr->dst.neg); } - - if (action == PF_SYNPROXY_DROP) { + switch (action) { + case PF_SYNPROXY_DROP: m_freem(*m0); + case PF_DEFER: *m0 = NULL; action = PF_PASS; - } else if (r->rt) + break; + default: /* pf_route6 can free the mbuf causing *m0 to become NULL */ - pf_route6(m0, r, dir, ifp, s, &pd); + if (r->rt) + pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd); + break; + } #ifdef __FreeBSD__ PF_UNLOCK(); @@ -7770,3 +7613,20 @@ pf_check_congestion(struct ifqueue *ifq) return (0); #endif } + +/* + * must be called whenever any addressing information such as + * address, port, protocol has changed + */ +void +pf_pkt_addr_changed(struct mbuf *m) +{ +#ifdef __FreeBSD__ + struct pf_mtag *pf_tag; + + if ((pf_tag = pf_find_mtag(m)) != NULL) + pf_tag->statekey = NULL; +#else + m->m_pkthdr.pf.statekey = NULL; +#endif +} diff --git a/freebsd/sys/contrib/pf/net/pf_if.c b/freebsd/sys/contrib/pf/net/pf_if.c index 8ff3c614..3ac645f9 100644 --- a/freebsd/sys/contrib/pf/net/pf_if.c +++ b/freebsd/sys/contrib/pf/net/pf_if.c @@ -1,6 +1,6 @@ #include <machine/rtems-bsd-kernel-space.h> -/* $OpenBSD: pf_if.c,v 1.46 2006/12/13 09:01:59 itojun Exp $ */ +/* $OpenBSD: pf_if.c,v 1.54 2008/06/14 16:55:28 mk Exp $ */ /* * Copyright 2005 Henning Brauer <henning@openbsd.org> @@ -56,6 +56,9 @@ __FBSDID("$FreeBSD$"); #include <sys/device.h> #endif #include <rtems/bsd/sys/time.h> +#ifndef __FreeBSD__ +#include <sys/pool.h> +#endif #include <net/if.h> #include <net/if_types.h> @@ -75,25 +78,35 @@ __FBSDID("$FreeBSD$"); #include <netinet/ip6.h> #endif /* INET6 */ -struct pfi_kif *pfi_all = NULL; -struct pfi_statehead pfi_statehead; #ifdef __FreeBSD__ -uma_zone_t pfi_addr_pl; +VNET_DEFINE(struct pfi_kif *, pfi_all); +VNET_DEFINE(uma_zone_t, pfi_addr_pl); +VNET_DEFINE(struct pfi_ifhead, pfi_ifs); +#define V_pfi_ifs VNET(pfi_ifs) +VNET_DEFINE(long, pfi_update); +#define V_pfi_update VNET(pfi_update) +VNET_DEFINE(struct pfr_addr *, pfi_buffer); +#define V_pfi_buffer VNET(pfi_buffer) +VNET_DEFINE(int, pfi_buffer_cnt); +#define V_pfi_buffer_cnt VNET(pfi_buffer_cnt) +VNET_DEFINE(int, pfi_buffer_max); +#define V_pfi_buffer_max VNET(pfi_buffer_max) #else +struct pfi_kif *pfi_all = NULL; struct pool pfi_addr_pl; -#endif struct pfi_ifhead pfi_ifs; long pfi_update = 1; struct pfr_addr *pfi_buffer; int pfi_buffer_cnt; int pfi_buffer_max; +#endif #ifdef __FreeBSD__ -eventhandler_tag pfi_attach_cookie = NULL; -eventhandler_tag pfi_detach_cookie = NULL; -eventhandler_tag pfi_attach_group_cookie = NULL; -eventhandler_tag pfi_change_group_cookie = NULL; -eventhandler_tag pfi_detach_group_cookie = NULL; -eventhandler_tag pfi_ifaddr_event_cookie = NULL; +eventhandler_tag pfi_attach_cookie; +eventhandler_tag pfi_detach_cookie; +eventhandler_tag pfi_attach_group_cookie; +eventhandler_tag pfi_change_group_cookie; +eventhandler_tag pfi_detach_group_cookie; +eventhandler_tag pfi_ifaddr_event_cookie; #endif void pfi_kif_update(struct pfi_kif *); @@ -109,11 +122,10 @@ int pfi_unmask(void *); #ifdef __FreeBSD__ void pfi_attach_ifnet_event(void * __unused, struct ifnet *); void pfi_detach_ifnet_event(void * __unused, struct ifnet *); -void pfi_attach_group_event(void * __unused, struct ifg_group *); -void pfi_change_group_event(void * __unused, char *); -void pfi_detach_group_event(void * __unused, struct ifg_group *); +void pfi_attach_group_event(void *, struct ifg_group *); +void pfi_change_group_event(void *, char *); +void pfi_detach_group_event(void *, struct ifg_group *); void pfi_ifaddr_event(void * __unused, struct ifnet *); - #endif RB_PROTOTYPE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); @@ -125,22 +137,31 @@ RB_GENERATE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); void pfi_initialize(void) { - +#ifdef __FreeBSD__ + if (V_pfi_all != NULL) /* already initialized */ +#else if (pfi_all != NULL) /* already initialized */ +#endif return; - TAILQ_INIT(&pfi_statehead); #ifndef __FreeBSD__ - pool_init(&pfi_addr_pl, sizeof(struct pfi_dynaddr), 0, 0, 0, + pool_init(&V_pfi_addr_pl, sizeof(struct pfi_dynaddr), 0, 0, 0, "pfiaddrpl", &pool_allocator_nointr); #endif +#ifdef __FreeBSD__ + V_pfi_buffer_max = 64; + V_pfi_buffer = malloc(V_pfi_buffer_max * sizeof(*V_pfi_buffer), + PFI_MTYPE, M_WAITOK); + + if ((V_pfi_all = pfi_kif_get(IFG_ALL)) == NULL) +#else pfi_buffer_max = 64; pfi_buffer = malloc(pfi_buffer_max * sizeof(*pfi_buffer), PFI_MTYPE, M_WAITOK); if ((pfi_all = pfi_kif_get(IFG_ALL)) == NULL) +#endif panic("pfi_kif_get for pfi_all failed"); - #ifdef __FreeBSD__ struct ifg_group *ifg; struct ifnet *ifp; @@ -157,11 +178,11 @@ pfi_initialize(void) pfi_detach_cookie = EVENTHANDLER_REGISTER(ifnet_departure_event, pfi_detach_ifnet_event, NULL, EVENTHANDLER_PRI_ANY); pfi_attach_group_cookie = EVENTHANDLER_REGISTER(group_attach_event, - pfi_attach_group_event, NULL, EVENTHANDLER_PRI_ANY); + pfi_attach_group_event, curvnet, EVENTHANDLER_PRI_ANY); pfi_change_group_cookie = EVENTHANDLER_REGISTER(group_change_event, - pfi_change_group_event, NULL, EVENTHANDLER_PRI_ANY); + pfi_change_group_event, curvnet, EVENTHANDLER_PRI_ANY); pfi_detach_group_cookie = EVENTHANDLER_REGISTER(group_detach_event, - pfi_detach_group_event, NULL, EVENTHANDLER_PRI_ANY); + pfi_detach_group_event, curvnet, EVENTHANDLER_PRI_ANY); pfi_ifaddr_event_cookie = EVENTHANDLER_REGISTER(ifaddr_event, pfi_ifaddr_event, NULL, EVENTHANDLER_PRI_ANY); #endif @@ -182,18 +203,18 @@ pfi_cleanup(void) EVENTHANDLER_DEREGISTER(ifaddr_event, pfi_ifaddr_event_cookie); PF_LOCK(); - pfi_all = NULL; - while ((p = RB_MIN(pfi_ifhead, &pfi_ifs))) { + V_pfi_all = NULL; + while ((p = RB_MIN(pfi_ifhead, &V_pfi_ifs))) { if (p->pfik_rules || p->pfik_states) { printf("pfi_cleanup: dangling refs for %s\n", p->pfik_name); } - RB_REMOVE(pfi_ifhead, &pfi_ifs, p); + RB_REMOVE(pfi_ifhead, &V_pfi_ifs, p); free(p, PFI_MTYPE); } - free(pfi_buffer, PFI_MTYPE); + free(V_pfi_buffer, PFI_MTYPE); } #endif @@ -205,18 +226,21 @@ pfi_kif_get(const char *kif_name) bzero(&s, sizeof(s)); strlcpy(s.pfik_name, kif_name, sizeof(s.pfik_name)); +#ifdef __FreeBSD__ + if ((kif = RB_FIND(pfi_ifhead, &V_pfi_ifs, (struct pfi_kif *)&s)) != NULL) +#else if ((kif = RB_FIND(pfi_ifhead, &pfi_ifs, (struct pfi_kif *)&s)) != NULL) +#endif return (kif); /* create new one */ #ifdef __FreeBSD__ - if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_NOWAIT)) == NULL) + if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_NOWAIT | M_ZERO)) == NULL) #else - if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_DONTWAIT)) == NULL) + if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_DONTWAIT|M_ZERO)) == NULL) #endif return (NULL); - bzero(kif, sizeof(*kif)); strlcpy(kif->pfik_name, kif_name, sizeof(kif->pfik_name)); #ifdef __FreeBSD__ /* @@ -232,7 +256,12 @@ pfi_kif_get(const char *kif_name) #endif TAILQ_INIT(&kif->pfik_dynaddrs); +#ifdef __FreeBSD__ + RB_INSERT(pfi_ifhead, &V_pfi_ifs, kif); +#else RB_INSERT(pfi_ifhead, &pfi_ifs, kif); +#endif + return (kif); } @@ -244,8 +273,7 @@ pfi_kif_ref(struct pfi_kif *kif, enum pfi_kif_refs what) kif->pfik_rules++; break; case PFI_KIF_REF_STATE: - if (!kif->pfik_states++) - TAILQ_INSERT_TAIL(&pfi_statehead, kif, pfik_w_states); + kif->pfik_states++; break; default: panic("pfi_kif_ref with unknown type"); @@ -273,20 +301,27 @@ pfi_kif_unref(struct pfi_kif *kif, enum pfi_kif_refs what) printf("pfi_kif_unref: state refcount <= 0\n"); return; } - if (!--kif->pfik_states) - TAILQ_REMOVE(&pfi_statehead, kif, pfik_w_states); + kif->pfik_states--; break; default: panic("pfi_kif_unref with unknown type"); } +#ifdef __FreeBSD__ + if (kif->pfik_ifp != NULL || kif->pfik_group != NULL || kif == V_pfi_all) +#else if (kif->pfik_ifp != NULL || kif->pfik_group != NULL || kif == pfi_all) +#endif return; if (kif->pfik_rules || kif->pfik_states) return; +#ifdef __FreeBSD__ + RB_REMOVE(pfi_ifhead, &V_pfi_ifs, kif); +#else RB_REMOVE(pfi_ifhead, &pfi_ifs, kif); +#endif free(kif, PFI_MTYPE); } @@ -314,7 +349,11 @@ pfi_attach_ifnet(struct ifnet *ifp) pfi_initialize(); s = splsoftnet(); +#ifdef __FreeBSD__ + V_pfi_update++; +#else pfi_update++; +#endif if ((kif = pfi_kif_get(ifp->if_xname)) == NULL) panic("pfi_kif_get failed"); @@ -343,7 +382,11 @@ pfi_detach_ifnet(struct ifnet *ifp) return; s = splsoftnet(); +#ifdef __FreeBSD__ + V_pfi_update++; +#else pfi_update++; +#endif #ifndef __FreeBSD__ hook_disestablish(ifp->if_addrhooks, kif->pfik_ah_cookie); #endif @@ -363,7 +406,11 @@ pfi_attach_ifgroup(struct ifg_group *ifg) pfi_initialize(); s = splsoftnet(); +#ifdef __FreeBSD__ + V_pfi_update++; +#else pfi_update++; +#endif if ((kif = pfi_kif_get(ifg->ifg_group)) == NULL) panic("pfi_kif_get failed"); @@ -383,7 +430,11 @@ pfi_detach_ifgroup(struct ifg_group *ifg) return; s = splsoftnet(); +#ifdef __FreeBSD__ + V_pfi_update++; +#else pfi_update++; +#endif kif->pfik_group = NULL; ifg->ifg_pf_kif = NULL; @@ -398,7 +449,11 @@ pfi_group_change(const char *group) int s; s = splsoftnet(); +#ifdef __FreeBSD__ + V_pfi_update++; +#else pfi_update++; +#endif if ((kif = pfi_kif_get(group)) == NULL) panic("pfi_kif_get failed"); @@ -452,9 +507,13 @@ pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af) if (aw->type != PF_ADDR_DYNIFTL) return (0); - if ((dyn = pool_get(&pfi_addr_pl, PR_NOWAIT)) == NULL) +#ifdef __FreeBSD__ + if ((dyn = pool_get(&V_pfi_addr_pl, PR_NOWAIT | PR_ZERO)) +#else + if ((dyn = pool_get(&pfi_addr_pl, PR_WAITOK | PR_LIMITFAIL | PR_ZERO)) +#endif + == NULL) return (1); - bzero(dyn, sizeof(*dyn)); s = splsoftnet(); if (!strcmp(aw->v.ifname, "self")) @@ -487,7 +546,7 @@ pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af) goto _bad; } - if ((dyn->pfid_kt = pfr_attach_table(ruleset, tblname)) == NULL) { + if ((dyn->pfid_kt = pfr_attach_table(ruleset, tblname, 1)) == NULL) { rv = 1; goto _bad; } @@ -509,7 +568,11 @@ _bad: pf_remove_if_empty_ruleset(ruleset); if (dyn->pfid_kif != NULL) pfi_kif_unref(dyn->pfid_kif, PFI_KIF_REF_RULE); +#ifdef __FreeBSD__ + pool_put(&V_pfi_addr_pl, dyn); +#else pool_put(&pfi_addr_pl, dyn); +#endif splx(s); return (rv); } @@ -543,10 +606,18 @@ pfi_dynaddr_update(struct pfi_dynaddr *dyn) kif = dyn->pfid_kif; kt = dyn->pfid_kt; +#ifdef __FreeBSD__ + if (kt->pfrkt_larg != V_pfi_update) { +#else if (kt->pfrkt_larg != pfi_update) { +#endif /* this table needs to be brought up-to-date */ pfi_table_update(kt, kif, dyn->pfid_net, dyn->pfid_iflags); +#ifdef __FreeBSD__ + kt->pfrkt_larg = V_pfi_update; +#else kt->pfrkt_larg = pfi_update; +#endif } pfr_dynaddr_update(kt, dyn); } @@ -557,7 +628,11 @@ pfi_table_update(struct pfr_ktable *kt, struct pfi_kif *kif, int net, int flags) int e, size2 = 0; struct ifg_member *ifgm; +#ifdef __FreeBSD__ + V_pfi_buffer_cnt = 0; +#else pfi_buffer_cnt = 0; +#endif if (kif->pfik_ifp != NULL) pfi_instance_add(kif->pfik_ifp, net, flags); @@ -565,10 +640,17 @@ pfi_table_update(struct pfr_ktable *kt, struct pfi_kif *kif, int net, int flags) TAILQ_FOREACH(ifgm, &kif->pfik_group->ifg_members, ifgm_next) pfi_instance_add(ifgm->ifgm_ifp, net, flags); +#ifdef __FreeBSD__ + if ((e = pfr_set_addrs(&kt->pfrkt_t, V_pfi_buffer, V_pfi_buffer_cnt, &size2, + NULL, NULL, NULL, 0, PFR_TFLAG_ALLMASK))) + printf("pfi_table_update: cannot set %d new addresses " + "into table %s: %d\n", V_pfi_buffer_cnt, kt->pfrkt_name, e); +#else if ((e = pfr_set_addrs(&kt->pfrkt_t, pfi_buffer, pfi_buffer_cnt, &size2, NULL, NULL, NULL, 0, PFR_TFLAG_ALLMASK))) printf("pfi_table_update: cannot set %d new addresses " "into table %s: %d\n", pfi_buffer_cnt, kt->pfrkt_name, e); +#endif } void @@ -589,9 +671,9 @@ pfi_instance_add(struct ifnet *ifp, int net, int flags) #ifdef __FreeBSD__ /* * XXX: For point-to-point interfaces, (ifname:0) and IPv4, - * jump over addresses without a proper route to work - * around a problem with ppp not fully removing the - * address used during IPCP. + * jump over addresses without a proper route to work + * around a problem with ppp not fully removing the + * address used during IPCP. */ if ((ifp->if_flags & IFF_POINTOPOINT) && !(ia->ifa_flags & IFA_ROUTE) && @@ -646,15 +728,24 @@ pfi_address_add(struct sockaddr *sa, int af, int net) struct pfr_addr *p; int i; +#ifdef __FreeBSD__ + if (V_pfi_buffer_cnt >= V_pfi_buffer_max) { + int new_max = V_pfi_buffer_max * 2; +#else if (pfi_buffer_cnt >= pfi_buffer_max) { int new_max = pfi_buffer_max * 2; +#endif if (new_max > PFI_BUFFER_MAX) { printf("pfi_address_add: address buffer full (%d/%d)\n", +#ifdef __FreeBSD__ + V_pfi_buffer_cnt, PFI_BUFFER_MAX); +#else pfi_buffer_cnt, PFI_BUFFER_MAX); +#endif return; } - p = malloc(new_max * sizeof(*pfi_buffer), PFI_MTYPE, + p = malloc(new_max * sizeof(*V_pfi_buffer), PFI_MTYPE, #ifdef __FreeBSD__ M_NOWAIT); #else @@ -662,18 +753,34 @@ pfi_address_add(struct sockaddr *sa, int af, int net) #endif if (p == NULL) { printf("pfi_address_add: no memory to grow buffer " +#ifdef __FreeBSD__ + "(%d/%d)\n", V_pfi_buffer_cnt, PFI_BUFFER_MAX); +#else "(%d/%d)\n", pfi_buffer_cnt, PFI_BUFFER_MAX); +#endif return; } - memcpy(p, pfi_buffer, pfi_buffer_max * sizeof(*pfi_buffer)); +#ifdef __FreeBSD__ + memcpy(V_pfi_buffer, p, V_pfi_buffer_cnt * sizeof(*V_pfi_buffer)); + /* no need to zero buffer */ + free(V_pfi_buffer, PFI_MTYPE); + V_pfi_buffer = p; + V_pfi_buffer_max = new_max; +#else + memcpy(pfi_buffer, p, pfi_buffer_cnt * sizeof(*pfi_buffer)); /* no need to zero buffer */ free(pfi_buffer, PFI_MTYPE); pfi_buffer = p; pfi_buffer_max = new_max; +#endif } if (af == AF_INET && net > 32) net = 128; +#ifdef __FreeBSD__ + p = V_pfi_buffer + V_pfi_buffer_cnt++; +#else p = pfi_buffer + pfi_buffer_cnt++; +#endif bzero(p, sizeof(*p)); p->pfra_af = af; p->pfra_net = net; @@ -706,7 +813,11 @@ pfi_dynaddr_remove(struct pf_addr_wrap *aw) aw->p.dyn->pfid_kif = NULL; pfr_detach_table(aw->p.dyn->pfid_kt); aw->p.dyn->pfid_kt = NULL; +#ifdef __FreeBSD__ + pool_put(&V_pfi_addr_pl, aw->p.dyn); +#else pool_put(&pfi_addr_pl, aw->p.dyn); +#endif aw->p.dyn = NULL; splx(s); } @@ -727,7 +838,11 @@ pfi_kifaddr_update(void *v) struct pfi_kif *kif = (struct pfi_kif *)v; s = splsoftnet(); +#ifdef __FreeBSD__ + V_pfi_update++; +#else pfi_update++; +#endif pfi_kif_update(kif); splx(s); } @@ -739,49 +854,61 @@ pfi_if_compare(struct pfi_kif *p, struct pfi_kif *q) } void -pfi_fill_oldstatus(struct pf_status *pfs) +pfi_update_status(const char *name, struct pf_status *pfs) { struct pfi_kif *p; - struct pfi_kif_cmp key; + struct pfi_kif_cmp key; + struct ifg_member p_member, *ifgm; + TAILQ_HEAD(, ifg_member) ifg_members; int i, j, k, s; - strlcpy(key.pfik_name, pfs->ifname, sizeof(key.pfik_name)); + strlcpy(key.pfik_name, name, sizeof(key.pfik_name)); s = splsoftnet(); +#ifdef __FreeBSD__ + p = RB_FIND(pfi_ifhead, &V_pfi_ifs, (struct pfi_kif *)&key); +#else p = RB_FIND(pfi_ifhead, &pfi_ifs, (struct pfi_kif *)&key); +#endif if (p == NULL) { splx(s); return; } - bzero(pfs->pcounters, sizeof(pfs->pcounters)); - bzero(pfs->bcounters, sizeof(pfs->bcounters)); - for (i = 0; i < 2; i++) - for (j = 0; j < 2; j++) - for (k = 0; k < 2; k++) { - pfs->pcounters[i][j][k] = - p->pfik_packets[i][j][k]; - pfs->bcounters[i][j] += - p->pfik_bytes[i][j][k]; - } - splx(s); -} - -int -pfi_clr_istats(const char *name) -{ - struct pfi_kif *p; - int s; + if (p->pfik_group != NULL) { + bcopy(&p->pfik_group->ifg_members, &ifg_members, + sizeof(ifg_members)); + } else { + /* build a temporary list for p only */ + bzero(&p_member, sizeof(p_member)); + p_member.ifgm_ifp = p->pfik_ifp; + TAILQ_INIT(&ifg_members); + TAILQ_INSERT_TAIL(&ifg_members, &p_member, ifgm_next); + } + if (pfs) { + bzero(pfs->pcounters, sizeof(pfs->pcounters)); + bzero(pfs->bcounters, sizeof(pfs->bcounters)); + } + TAILQ_FOREACH(ifgm, &ifg_members, ifgm_next) { + if (ifgm->ifgm_ifp == NULL) + continue; + p = (struct pfi_kif *)ifgm->ifgm_ifp->if_pf_kif; - s = splsoftnet(); - RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { - if (pfi_skip_if(name, p)) + /* just clear statistics */ + if (pfs == NULL) { + bzero(p->pfik_packets, sizeof(p->pfik_packets)); + bzero(p->pfik_bytes, sizeof(p->pfik_bytes)); + p->pfik_tzero = time_second; continue; - bzero(p->pfik_packets, sizeof(p->pfik_packets)); - bzero(p->pfik_bytes, sizeof(p->pfik_bytes)); - p->pfik_tzero = time_second; + } + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) + for (k = 0; k < 2; k++) { + pfs->pcounters[i][j][k] += + p->pfik_packets[i][j][k]; + pfs->bcounters[i][j] += + p->pfik_bytes[i][j][k]; + } } splx(s); - - return (0); } int @@ -794,8 +921,13 @@ pfi_get_ifaces(const char *name, struct pfi_kif *buf, int *size) #endif s = splsoftnet(); +#ifdef __FreeBSD__ + for (p = RB_MIN(pfi_ifhead, &V_pfi_ifs); p; p = nextp) { + nextp = RB_NEXT(pfi_ifhead, &V_pfi_ifs, p); +#else for (p = RB_MIN(pfi_ifhead, &pfi_ifs); p; p = nextp) { nextp = RB_NEXT(pfi_ifhead, &pfi_ifs, p); +#endif if (pfi_skip_if(name, p)) continue; if (*size > n++) { @@ -812,7 +944,11 @@ pfi_get_ifaces(const char *name, struct pfi_kif *buf, int *size) splx(s); return (EFAULT); } +#ifdef __FreeBSD__ + nextp = RB_NEXT(pfi_ifhead, &V_pfi_ifs, p); +#else nextp = RB_NEXT(pfi_ifhead, &pfi_ifs, p); +#endif pfi_kif_unref(p, PFI_KIF_REF_RULE); } } @@ -847,7 +983,11 @@ pfi_set_flags(const char *name, int flags) int s; s = splsoftnet(); +#ifdef __FreeBSD__ + RB_FOREACH(p, pfi_ifhead, &V_pfi_ifs) { +#else RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { +#endif if (pfi_skip_if(name, p)) continue; p->pfik_flags |= flags; @@ -863,7 +1003,11 @@ pfi_clear_flags(const char *name, int flags) int s; s = splsoftnet(); +#ifdef __FreeBSD__ + RB_FOREACH(p, pfi_ifhead, &V_pfi_ifs) { +#else RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { +#endif if (pfi_skip_if(name, p)) continue; p->pfik_flags &= ~flags; @@ -896,55 +1040,73 @@ pfi_unmask(void *addr) void pfi_attach_ifnet_event(void *arg __unused, struct ifnet *ifp) { + + CURVNET_SET(ifp->if_vnet); PF_LOCK(); pfi_attach_ifnet(ifp); #ifdef ALTQ pf_altq_ifnet_event(ifp, 0); #endif PF_UNLOCK(); + CURVNET_RESTORE(); } void pfi_detach_ifnet_event(void *arg __unused, struct ifnet *ifp) { + + CURVNET_SET(ifp->if_vnet); PF_LOCK(); pfi_detach_ifnet(ifp); #ifdef ALTQ pf_altq_ifnet_event(ifp, 1); #endif PF_UNLOCK(); + CURVNET_RESTORE(); } void -pfi_attach_group_event(void *arg __unused, struct ifg_group *ifg) +pfi_attach_group_event(void *arg , struct ifg_group *ifg) { + + CURVNET_SET((struct vnet *)arg); PF_LOCK(); pfi_attach_ifgroup(ifg); PF_UNLOCK(); + CURVNET_RESTORE(); } void -pfi_change_group_event(void *arg __unused, char *gname) +pfi_change_group_event(void *arg, char *gname) { + + CURVNET_SET((struct vnet *)arg); PF_LOCK(); pfi_group_change(gname); PF_UNLOCK(); + CURVNET_RESTORE(); } void -pfi_detach_group_event(void *arg __unused, struct ifg_group *ifg) +pfi_detach_group_event(void *arg, struct ifg_group *ifg) { + + CURVNET_SET((struct vnet *)arg); PF_LOCK(); pfi_detach_ifgroup(ifg); PF_UNLOCK(); + CURVNET_RESTORE(); } void pfi_ifaddr_event(void *arg __unused, struct ifnet *ifp) { + + CURVNET_SET(ifp->if_vnet); PF_LOCK(); if (ifp && ifp->if_pf_kif) pfi_kifaddr_update(ifp->if_pf_kif); PF_UNLOCK(); + CURVNET_RESTORE(); } #endif /* __FreeBSD__ */ diff --git a/freebsd/sys/contrib/pf/net/pf_ioctl.c b/freebsd/sys/contrib/pf/net/pf_ioctl.c index 7479b510..9cfa9b32 100644 --- a/freebsd/sys/contrib/pf/net/pf_ioctl.c +++ b/freebsd/sys/contrib/pf/net/pf_ioctl.c @@ -1,6 +1,6 @@ #include <machine/rtems-bsd-kernel-space.h> -/* $OpenBSD: pf_ioctl.c,v 1.175 2007/02/26 22:47:43 deraadt Exp $ */ +/* $OpenBSD: pf_ioctl.c,v 1.213 2009/02/15 21:46:12 mbalmer Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -46,29 +46,18 @@ __FBSDID("$FreeBSD$"); #include <rtems/bsd/local/opt_bpf.h> #include <rtems/bsd/local/opt_pf.h> -#ifdef DEV_BPF -#define NBPFILTER DEV_BPF -#else -#define NBPFILTER 0 -#endif +#define NPFSYNC 1 #ifdef DEV_PFLOG -#define NPFLOG DEV_PFLOG -#else -#define NPFLOG 0 -#endif - -#ifdef DEV_PFSYNC -#define NPFSYNC DEV_PFSYNC +#define NPFLOG DEV_PFLOG #else -#define NPFSYNC 0 +#define NPFLOG 0 #endif -#else -#include "bpfilter.h" -#include "pflog.h" +#else /* !__FreeBSD__ */ #include "pfsync.h" -#endif +#include "pflog.h" +#endif /* __FreeBSD__ */ #include <rtems/bsd/sys/param.h> #include <sys/systm.h> @@ -79,8 +68,9 @@ __FBSDID("$FreeBSD$"); #include <sys/socketvar.h> #include <sys/kernel.h> #include <rtems/bsd/sys/time.h> -#include <sys/malloc.h> #ifdef __FreeBSD__ +#include <sys/ucred.h> +#include <sys/jail.h> #include <sys/module.h> #include <sys/conf.h> #include <sys/proc.h> @@ -102,6 +92,7 @@ __FBSDID("$FreeBSD$"); #ifdef __FreeBSD__ #include <net/vnet.h> #endif +#include <net/route.h> #include <netinet/in.h> #include <netinet/in_var.h> @@ -118,11 +109,11 @@ __FBSDID("$FreeBSD$"); #endif #include <net/pfvar.h> -#if NPFSYNC > 0 #include <net/if_pfsync.h> -#endif /* NPFSYNC > 0 */ +#if NPFLOG > 0 #include <net/if_pflog.h> +#endif /* NPFLOG > 0 */ #ifdef INET6 #include <netinet/ip6.h> @@ -158,7 +149,7 @@ void pf_empty_pool(struct pf_palist *); #ifdef __FreeBSD__ int pfioctl(struct cdev *, u_long, caddr_t, int, struct thread *); #else -int pfioctl(struct cdev *, u_long, caddr_t, int, struct proc *); +int pfioctl(dev_t, u_long, caddr_t, int, struct proc *); #endif #ifdef ALTQ int pf_begin_altq(u_int32_t *); @@ -173,25 +164,43 @@ int pf_setup_pfsync_matching(struct pf_ruleset *); void pf_hash_rule(MD5_CTX *, struct pf_rule *); void pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *); int pf_commit_rules(u_int32_t, int, char *); +int pf_addr_setup(struct pf_ruleset *, + struct pf_addr_wrap *, sa_family_t); +void pf_addr_copyout(struct pf_addr_wrap *); + +#define TAGID_MAX 50000 -struct pf_rule pf_default_rule; #ifdef __FreeBSD__ -struct sx pf_consistency_lock; -SX_SYSINIT(pf_consistency_lock, &pf_consistency_lock, "pf_statetbl_lock"); -#else -struct rwlock pf_consistency_lock = RWLOCK_INITIALIZER; +VNET_DEFINE(struct pf_rule, pf_default_rule); +VNET_DEFINE(struct sx, pf_consistency_lock); + +#ifdef ALTQ +static VNET_DEFINE(int, pf_altq_running); +#define V_pf_altq_running VNET(pf_altq_running) #endif + +TAILQ_HEAD(pf_tags, pf_tagname); + +#define V_pf_tags VNET(pf_tags) +VNET_DEFINE(struct pf_tags, pf_tags); +#define V_pf_qids VNET(pf_qids) +VNET_DEFINE(struct pf_tags, pf_qids); + +#else /* !__FreeBSD__ */ +struct pf_rule pf_default_rule; +struct rwlock pf_consistency_lock = RWLOCK_INITIALIZER("pfcnslk"); #ifdef ALTQ static int pf_altq_running; #endif -#define TAGID_MAX 50000 TAILQ_HEAD(pf_tags, pf_tagname) pf_tags = TAILQ_HEAD_INITIALIZER(pf_tags), pf_qids = TAILQ_HEAD_INITIALIZER(pf_qids); +#endif /* __FreeBSD__ */ #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE) #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE #endif + u_int16_t tagname2tag(struct pf_tags *, char *); void tag2tagname(struct pf_tags *, u_int16_t, char *); void tag_unref(struct pf_tags *, u_int16_t); @@ -199,12 +208,15 @@ int pf_rtlabel_add(struct pf_addr_wrap *); void pf_rtlabel_remove(struct pf_addr_wrap *); void pf_rtlabel_copyout(struct pf_addr_wrap *); +#ifdef __FreeBSD__ +#define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x +#else #define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x - +#endif #ifdef __FreeBSD__ -static struct cdev *pf_dev; - +struct cdev *pf_dev; + /* * XXX - These are new and need to be checked when moveing to a new version */ @@ -218,22 +230,24 @@ static void pf_clear_srcnodes(void); /* * Wrapper functions for pfil(9) hooks */ +#ifdef INET static int pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, - int dir, struct inpcb *inp); + int dir, struct inpcb *inp); static int pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, - int dir, struct inpcb *inp); + int dir, struct inpcb *inp); +#endif #ifdef INET6 static int pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, - int dir, struct inpcb *inp); + int dir, struct inpcb *inp); static int pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, - int dir, struct inpcb *inp); + int dir, struct inpcb *inp); #endif - -static int hook_pf(void); -static int dehook_pf(void); -static int shutdown_pf(void); -static int pf_load(void); -static int pf_unload(void); + +static int hook_pf(void); +static int dehook_pf(void); +static int shutdown_pf(void); +static int pf_load(void); +static int pf_unload(void); static struct cdevsw pf_cdevsw = { .d_ioctl = pfioctl, @@ -241,78 +255,99 @@ static struct cdevsw pf_cdevsw = { .d_version = D_VERSION, }; -static volatile int pf_pfil_hooked = 0; -int pf_end_threads = 0; -struct mtx pf_task_mtx; -pflog_packet_t *pflog_packet_ptr = NULL; +static volatile VNET_DEFINE(int, pf_pfil_hooked); +#define V_pf_pfil_hooked VNET(pf_pfil_hooked) +VNET_DEFINE(int, pf_end_threads); +struct mtx pf_task_mtx; + +/* pfsync */ +pfsync_state_import_t *pfsync_state_import_ptr = NULL; +pfsync_insert_state_t *pfsync_insert_state_ptr = NULL; +pfsync_update_state_t *pfsync_update_state_ptr = NULL; +pfsync_delete_state_t *pfsync_delete_state_ptr = NULL; +pfsync_clear_states_t *pfsync_clear_states_ptr = NULL; +pfsync_state_in_use_t *pfsync_state_in_use_ptr = NULL; +pfsync_defer_t *pfsync_defer_ptr = NULL; +pfsync_up_t *pfsync_up_ptr = NULL; +/* pflow */ +export_pflow_t *export_pflow_ptr = NULL; +/* pflog */ +pflog_packet_t *pflog_packet_ptr = NULL; + +VNET_DEFINE(int, debug_pfugidhack); +SYSCTL_VNET_INT(_debug, OID_AUTO, pfugidhack, CTLFLAG_RW, + &VNET_NAME(debug_pfugidhack), 0, + "Enable/disable pf user/group rules mpsafe hack"); -int debug_pfugidhack = 0; -SYSCTL_INT(_debug, OID_AUTO, pfugidhack, CTLFLAG_RW, &debug_pfugidhack, 0, - "Enable/disable pf user/group rules mpsafe hack"); - -void +static void init_pf_mutex(void) { + mtx_init(&pf_task_mtx, "pf task mtx", NULL, MTX_DEF); } -void +static void destroy_pf_mutex(void) { + mtx_destroy(&pf_task_mtx); } - void init_zone_var(void) { - pf_src_tree_pl = pf_rule_pl = NULL; - pf_state_pl = pf_altq_pl = pf_pooladdr_pl = NULL; - pf_frent_pl = pf_frag_pl = pf_cache_pl = pf_cent_pl = NULL; - pf_state_scrub_pl = NULL; - pfr_ktable_pl = pfr_kentry_pl = NULL; + V_pf_src_tree_pl = V_pf_rule_pl = NULL; + V_pf_state_pl = V_pf_state_key_pl = V_pf_state_item_pl = NULL; + V_pf_altq_pl = V_pf_pooladdr_pl = NULL; + V_pf_frent_pl = V_pf_frag_pl = V_pf_cache_pl = V_pf_cent_pl = NULL; + V_pf_state_scrub_pl = NULL; + V_pfr_ktable_pl = V_pfr_kentry_pl = V_pfr_kcounters_pl = NULL; } void cleanup_pf_zone(void) { - UMA_DESTROY(pf_src_tree_pl); - UMA_DESTROY(pf_rule_pl); - UMA_DESTROY(pf_state_pl); - UMA_DESTROY(pf_altq_pl); - UMA_DESTROY(pf_pooladdr_pl); - UMA_DESTROY(pf_frent_pl); - UMA_DESTROY(pf_frag_pl); - UMA_DESTROY(pf_cache_pl); - UMA_DESTROY(pf_cent_pl); - UMA_DESTROY(pfr_ktable_pl); - UMA_DESTROY(pfr_kentry_pl2); - UMA_DESTROY(pfr_kentry_pl); - UMA_DESTROY(pf_state_scrub_pl); - UMA_DESTROY(pfi_addr_pl); + UMA_DESTROY(V_pf_src_tree_pl); + UMA_DESTROY(V_pf_rule_pl); + UMA_DESTROY(V_pf_state_pl); + UMA_DESTROY(V_pf_state_key_pl); + UMA_DESTROY(V_pf_state_item_pl); + UMA_DESTROY(V_pf_altq_pl); + UMA_DESTROY(V_pf_pooladdr_pl); + UMA_DESTROY(V_pf_frent_pl); + UMA_DESTROY(V_pf_frag_pl); + UMA_DESTROY(V_pf_cache_pl); + UMA_DESTROY(V_pf_cent_pl); + UMA_DESTROY(V_pfr_ktable_pl); + UMA_DESTROY(V_pfr_kentry_pl); + UMA_DESTROY(V_pfr_kcounters_pl); + UMA_DESTROY(V_pf_state_scrub_pl); + UMA_DESTROY(V_pfi_addr_pl); } int pfattach(void) { - u_int32_t *my_timeout = pf_default_rule.timeout; + u_int32_t *my_timeout = V_pf_default_rule.timeout; int error = 1; do { - UMA_CREATE(pf_src_tree_pl,struct pf_src_node, "pfsrctrpl"); - UMA_CREATE(pf_rule_pl, struct pf_rule, "pfrulepl"); - UMA_CREATE(pf_state_pl, struct pf_state, "pfstatepl"); - UMA_CREATE(pf_altq_pl, struct pf_altq, "pfaltqpl"); - UMA_CREATE(pf_pooladdr_pl, struct pf_pooladdr, "pfpooladdrpl"); - UMA_CREATE(pfr_ktable_pl, struct pfr_ktable, "pfrktable"); - UMA_CREATE(pfr_kentry_pl, struct pfr_kentry, "pfrkentry"); - UMA_CREATE(pfr_kentry_pl2, struct pfr_kentry, "pfrkentry2"); - UMA_CREATE(pf_frent_pl, struct pf_frent, "pffrent"); - UMA_CREATE(pf_frag_pl, struct pf_fragment, "pffrag"); - UMA_CREATE(pf_cache_pl, struct pf_fragment, "pffrcache"); - UMA_CREATE(pf_cent_pl, struct pf_frcache, "pffrcent"); - UMA_CREATE(pf_state_scrub_pl, struct pf_state_scrub, + UMA_CREATE(V_pf_src_tree_pl, struct pf_src_node, "pfsrctrpl"); + UMA_CREATE(V_pf_rule_pl, struct pf_rule, "pfrulepl"); + UMA_CREATE(V_pf_state_pl, struct pf_state, "pfstatepl"); + UMA_CREATE(V_pf_state_key_pl, struct pf_state, "pfstatekeypl"); + UMA_CREATE(V_pf_state_item_pl, struct pf_state, "pfstateitempl"); + UMA_CREATE(V_pf_altq_pl, struct pf_altq, "pfaltqpl"); + UMA_CREATE(V_pf_pooladdr_pl, struct pf_pooladdr, "pfpooladdrpl"); + UMA_CREATE(V_pfr_ktable_pl, struct pfr_ktable, "pfrktable"); + UMA_CREATE(V_pfr_kentry_pl, struct pfr_kentry, "pfrkentry"); + UMA_CREATE(V_pfr_kcounters_pl, struct pfr_kcounters, "pfrkcounters"); + UMA_CREATE(V_pf_frent_pl, struct pf_frent, "pffrent"); + UMA_CREATE(V_pf_frag_pl, struct pf_fragment, "pffrag"); + UMA_CREATE(V_pf_cache_pl, struct pf_fragment, "pffrcache"); + UMA_CREATE(V_pf_cent_pl, struct pf_frcache, "pffrcent"); + UMA_CREATE(V_pf_state_scrub_pl, struct pf_state_scrub, "pfstatescrub"); - UMA_CREATE(pfi_addr_pl, struct pfi_dynaddr, "pfiaddrpl"); + UMA_CREATE(V_pfi_addr_pl, struct pfi_dynaddr, "pfiaddrpl"); error = 0; } while(0); if (error) { @@ -327,34 +362,35 @@ pfattach(void) return (error); } - pf_pool_limits[PF_LIMIT_STATES].pp = pf_state_pl; - pf_pool_limits[PF_LIMIT_STATES].limit = PFSTATE_HIWAT; - pf_pool_limits[PF_LIMIT_SRC_NODES].pp = pf_src_tree_pl; - pf_pool_limits[PF_LIMIT_SRC_NODES].limit = PFSNODE_HIWAT; - pf_pool_limits[PF_LIMIT_FRAGS].pp = pf_frent_pl; - pf_pool_limits[PF_LIMIT_FRAGS].limit = PFFRAG_FRENT_HIWAT; - pf_pool_limits[PF_LIMIT_TABLES].pp = pfr_ktable_pl; - pf_pool_limits[PF_LIMIT_TABLES].limit = PFR_KTABLE_HIWAT; - pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].pp = pfr_kentry_pl; - pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].limit = PFR_KENTRY_HIWAT; - uma_zone_set_max(pf_pool_limits[PF_LIMIT_STATES].pp, - pf_pool_limits[PF_LIMIT_STATES].limit); - - RB_INIT(&tree_src_tracking); - RB_INIT(&pf_anchors); + V_pf_pool_limits[PF_LIMIT_STATES].pp = V_pf_state_pl; + V_pf_pool_limits[PF_LIMIT_STATES].limit = PFSTATE_HIWAT; + V_pf_pool_limits[PF_LIMIT_SRC_NODES].pp = V_pf_src_tree_pl; + V_pf_pool_limits[PF_LIMIT_SRC_NODES].limit = PFSNODE_HIWAT; + V_pf_pool_limits[PF_LIMIT_FRAGS].pp = V_pf_frent_pl; + V_pf_pool_limits[PF_LIMIT_FRAGS].limit = PFFRAG_FRENT_HIWAT; + V_pf_pool_limits[PF_LIMIT_TABLES].pp = V_pfr_ktable_pl; + V_pf_pool_limits[PF_LIMIT_TABLES].limit = PFR_KTABLE_HIWAT; + V_pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].pp = V_pfr_kentry_pl; + V_pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].limit = PFR_KENTRY_HIWAT; + uma_zone_set_max(V_pf_pool_limits[PF_LIMIT_STATES].pp, + V_pf_pool_limits[PF_LIMIT_STATES].limit); + + RB_INIT(&V_tree_src_tracking); + RB_INIT(&V_pf_anchors); pf_init_ruleset(&pf_main_ruleset); - TAILQ_INIT(&pf_altqs[0]); - TAILQ_INIT(&pf_altqs[1]); - TAILQ_INIT(&pf_pabuf); - pf_altqs_active = &pf_altqs[0]; - pf_altqs_inactive = &pf_altqs[1]; - TAILQ_INIT(&state_list); + + TAILQ_INIT(&V_pf_altqs[0]); + TAILQ_INIT(&V_pf_altqs[1]); + TAILQ_INIT(&V_pf_pabuf); + V_pf_altqs_active = &V_pf_altqs[0]; + V_pf_altqs_inactive = &V_pf_altqs[1]; + TAILQ_INIT(&V_state_list); /* default rule should never be garbage collected */ - pf_default_rule.entries.tqe_prev = &pf_default_rule.entries.tqe_next; - pf_default_rule.action = PF_PASS; - pf_default_rule.nr = -1; - pf_default_rule.rtableid = -1; + V_pf_default_rule.entries.tqe_prev = &V_pf_default_rule.entries.tqe_next; + V_pf_default_rule.action = PF_PASS; + V_pf_default_rule.nr = -1; + V_pf_default_rule.rtableid = -1; /* initialize default timeouts */ my_timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL; @@ -379,20 +415,24 @@ pfattach(void) my_timeout[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END; pf_normalize_init(); - bzero(&pf_status, sizeof(pf_status)); - pf_status.debug = PF_DEBUG_URGENT; - pf_pfil_hooked = 0; + bzero(&V_pf_status, sizeof(V_pf_status)); + V_pf_status.debug = PF_DEBUG_URGENT; + + V_pf_pfil_hooked = 0; /* XXX do our best to avoid a conflict */ - pf_status.hostid = arc4random(); + V_pf_status.hostid = arc4random(); - if (kproc_create(pf_purge_thread, NULL, NULL, 0, 0, "pfpurge")) + if (kproc_create(pf_purge_thread, curvnet, NULL, 0, 0, "pfpurge")) return (ENXIO); + m_addr_chg_pf_p = pf_pkt_addr_changed; + return (error); } #else /* !__FreeBSD__ */ + void pfattach(int num) { @@ -404,6 +444,10 @@ pfattach(int num) "pfsrctrpl", NULL); pool_init(&pf_state_pl, sizeof(struct pf_state), 0, 0, 0, "pfstatepl", NULL); + pool_init(&pf_state_key_pl, sizeof(struct pf_state_key), 0, 0, 0, + "pfstatekeypl", NULL); + pool_init(&pf_state_item_pl, sizeof(struct pf_state_item), 0, 0, 0, + "pfstateitempl", NULL); pool_init(&pf_altq_pl, sizeof(struct pf_altq), 0, 0, 0, "pfaltqpl", &pool_allocator_nointr); pool_init(&pf_pooladdr_pl, sizeof(struct pf_pooladdr), 0, 0, 0, @@ -415,7 +459,7 @@ pfattach(int num) pool_sethardlimit(pf_pool_limits[PF_LIMIT_STATES].pp, pf_pool_limits[PF_LIMIT_STATES].limit, NULL, 0); - if (ctob(physmem) <= 100*1024*1024) + if (physmem <= atop(100*1024*1024)) pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].limit = PFR_KENTRY_HIWAT_SMALL; @@ -465,32 +509,32 @@ pfattach(int num) pf_status.hostid = arc4random(); /* require process context to purge states, so perform in a thread */ - kproc_create_deferred(pf_thread_create, NULL); + kthread_create_deferred(pf_thread_create, NULL); } void pf_thread_create(void *v) { - if (kproc_create(pf_purge_thread, NULL, NULL, "pfpurge")) + if (kthread_create(pf_purge_thread, NULL, NULL, "pfpurge")) panic("pfpurge thread"); } int -pfopen(struct cdev *dev, int flags, int fmt, struct proc *p) +pfopen(dev_t dev, int flags, int fmt, struct proc *p) { - if (dev2unit(dev) >= 1) + if (minor(dev) >= 1) return (ENXIO); return (0); } int -pfclose(struct cdev *dev, int flags, int fmt, struct proc *p) +pfclose(dev_t dev, int flags, int fmt, struct proc *p) { - if (dev2unit(dev) >= 1) + if (minor(dev) >= 1) return (ENXIO); return (0); } -#endif /* __FreeBSD__ */ +#endif struct pf_pool * pf_get_pool(char *anchor, u_int32_t ticket, u_int8_t rule_action, @@ -557,7 +601,11 @@ pf_empty_pool(struct pf_palist *poola) pf_tbladdr_remove(&empty_pool_pa->addr); pfi_kif_unref(empty_pool_pa->kif, PFI_KIF_REF_RULE); TAILQ_REMOVE(poola, empty_pool_pa, entries); +#ifdef __FreeBSD__ + pool_put(&V_pf_pooladdr_pl, empty_pool_pa); +#else pool_put(&pf_pooladdr_pl, empty_pool_pa); +#endif } } @@ -565,7 +613,7 @@ void pf_rm_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule) { if (rulequeue != NULL) { - if (rule->states <= 0) { + if (rule->states_cur <= 0) { /* * XXX - we need to remove the table *before* detaching * the rule to make sure the table code does not delete @@ -581,7 +629,7 @@ pf_rm_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule) rule->nr = -1; } - if (rule->states > 0 || rule->src_nodes > 0 || + if (rule->states_cur > 0 || rule->src_nodes > 0 || rule->entries.tqe_prev != NULL) return; pf_tag_unref(rule->tag); @@ -604,7 +652,11 @@ pf_rm_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule) pfi_kif_unref(rule->kif, PFI_KIF_REF_RULE); pf_anchor_remove(rule); pf_empty_pool(&rule->rpool.list); +#ifdef __FreeBSD__ + pool_put(&V_pf_rule_pl, rule); +#else pool_put(&pf_rule_pl, rule); +#endif } u_int16_t @@ -635,11 +687,9 @@ tagname2tag(struct pf_tags *head, char *tagname) return (0); /* allocate and fill new struct pf_tagname */ - tag = (struct pf_tagname *)malloc(sizeof(struct pf_tagname), - M_TEMP, M_NOWAIT); + tag = malloc(sizeof(*tag), M_TEMP, M_NOWAIT|M_ZERO); if (tag == NULL) return (0); - bzero(tag, sizeof(struct pf_tagname)); strlcpy(tag->name, tagname, sizeof(tag->name)); tag->tag = new_tagid; tag->ref++; @@ -687,13 +737,21 @@ tag_unref(struct pf_tags *head, u_int16_t tag) u_int16_t pf_tagname2tag(char *tagname) { +#ifdef __FreeBSD__ + return (tagname2tag(&V_pf_tags, tagname)); +#else return (tagname2tag(&pf_tags, tagname)); +#endif } void pf_tag2tagname(u_int16_t tagid, char *p) { +#ifdef __FreeBSD__ + tag2tagname(&V_pf_tags, tagid, p); +#else tag2tagname(&pf_tags, tagid, p); +#endif } void @@ -701,7 +759,11 @@ pf_tag_ref(u_int16_t tag) { struct pf_tagname *t; +#ifdef __FreeBSD__ + TAILQ_FOREACH(t, &V_pf_tags, entries) +#else TAILQ_FOREACH(t, &pf_tags, entries) +#endif if (t->tag == tag) break; if (t != NULL) @@ -711,7 +773,11 @@ pf_tag_ref(u_int16_t tag) void pf_tag_unref(u_int16_t tag) { +#ifdef __FreeBSD__ + tag_unref(&V_pf_tags, tag); +#else tag_unref(&pf_tags, tag); +#endif } int @@ -764,19 +830,31 @@ pf_rtlabel_copyout(struct pf_addr_wrap *a) u_int32_t pf_qname2qid(char *qname) { +#ifdef __FreeBSD__ + return ((u_int32_t)tagname2tag(&V_pf_qids, qname)); +#else return ((u_int32_t)tagname2tag(&pf_qids, qname)); +#endif } void pf_qid2qname(u_int32_t qid, char *p) { +#ifdef __FreeBSD__ + tag2tagname(&V_pf_qids, (u_int16_t)qid, p); +#else tag2tagname(&pf_qids, (u_int16_t)qid, p); +#endif } void pf_qid_unref(u_int32_t qid) { +#ifdef __FreeBSD__ + tag_unref(&V_pf_qids, (u_int16_t)qid); +#else tag_unref(&pf_qids, (u_int16_t)qid); +#endif } int @@ -786,24 +864,35 @@ pf_begin_altq(u_int32_t *ticket) int error = 0; /* Purge the old altq list */ - while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) { - TAILQ_REMOVE(pf_altqs_inactive, altq, entries); #ifdef __FreeBSD__ + while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) { + TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries); if (altq->qname[0] == 0 && (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { #else + while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) { + TAILQ_REMOVE(pf_altqs_inactive, altq, entries); if (altq->qname[0] == 0) { #endif /* detach and destroy the discipline */ error = altq_remove(altq); } else pf_qid_unref(altq->qid); +#ifdef __FreeBSD__ + pool_put(&V_pf_altq_pl, altq); +#else pool_put(&pf_altq_pl, altq); +#endif } if (error) return (error); +#ifdef __FreeBSD__ + *ticket = ++V_ticket_altqs_inactive; + V_altqs_inactive_open = 1; +#else *ticket = ++ticket_altqs_inactive; altqs_inactive_open = 1; +#endif return (0); } @@ -813,24 +902,37 @@ pf_rollback_altq(u_int32_t ticket) struct pf_altq *altq; int error = 0; +#ifdef __FreeBSD__ + if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive) + return (0); + /* Purge the old altq list */ + while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) { + TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries); + if (altq->qname[0] == 0 && + (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { +#else if (!altqs_inactive_open || ticket != ticket_altqs_inactive) return (0); /* Purge the old altq list */ while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) { TAILQ_REMOVE(pf_altqs_inactive, altq, entries); -#ifdef __FreeBSD__ - if (altq->qname[0] == 0 && - (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { -#else if (altq->qname[0] == 0) { #endif /* detach and destroy the discipline */ error = altq_remove(altq); } else pf_qid_unref(altq->qid); +#ifdef __FreeBSD__ + pool_put(&V_pf_altq_pl, altq); +#else pool_put(&pf_altq_pl, altq); +#endif } +#ifdef __FreeBSD__ + V_altqs_inactive_open = 0; +#else altqs_inactive_open = 0; +#endif return (error); } @@ -841,27 +943,43 @@ pf_commit_altq(u_int32_t ticket) struct pf_altq *altq; int s, err, error = 0; +#ifdef __FreeBSD__ + if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive) +#else if (!altqs_inactive_open || ticket != ticket_altqs_inactive) +#endif return (EBUSY); /* swap altqs, keep the old. */ s = splsoftnet(); +#ifdef __FreeBSD__ + old_altqs = V_pf_altqs_active; + V_pf_altqs_active = V_pf_altqs_inactive; + V_pf_altqs_inactive = old_altqs; + V_ticket_altqs_active = V_ticket_altqs_inactive; +#else old_altqs = pf_altqs_active; pf_altqs_active = pf_altqs_inactive; pf_altqs_inactive = old_altqs; ticket_altqs_active = ticket_altqs_inactive; +#endif /* Attach new disciplines */ - TAILQ_FOREACH(altq, pf_altqs_active, entries) { #ifdef __FreeBSD__ - if (altq->qname[0] == 0 && - (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { + TAILQ_FOREACH(altq, V_pf_altqs_active, entries) { + if (altq->qname[0] == 0 && + (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { #else + TAILQ_FOREACH(altq, pf_altqs_active, entries) { if (altq->qname[0] == 0) { #endif /* attach the discipline */ error = altq_pfattach(altq); +#ifdef __FreeBSD__ + if (error == 0 && V_pf_altq_running) +#else if (error == 0 && pf_altq_running) +#endif error = pf_enable_altq(altq); if (error != 0) { splx(s); @@ -871,16 +989,22 @@ pf_commit_altq(u_int32_t ticket) } /* Purge the old altq list */ - while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) { - TAILQ_REMOVE(pf_altqs_inactive, altq, entries); #ifdef __FreeBSD__ + while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) { + TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries); if (altq->qname[0] == 0 && (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { #else + while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) { + TAILQ_REMOVE(pf_altqs_inactive, altq, entries); if (altq->qname[0] == 0) { #endif /* detach and destroy the discipline */ +#ifdef __FreeBSD__ + if (V_pf_altq_running) +#else if (pf_altq_running) +#endif error = pf_disable_altq(altq); err = altq_pfdetach(altq); if (err != 0 && error == 0) @@ -890,11 +1014,19 @@ pf_commit_altq(u_int32_t ticket) error = err; } else pf_qid_unref(altq->qid); +#ifdef __FreeBSD__ + pool_put(&V_pf_altq_pl, altq); +#else pool_put(&pf_altq_pl, altq); +#endif } splx(s); +#ifdef __FreeBSD__ + V_altqs_inactive_open = 0; +#else altqs_inactive_open = 0; +#endif return (error); } @@ -969,22 +1101,32 @@ pf_disable_altq(struct pf_altq *altq) void pf_altq_ifnet_event(struct ifnet *ifp, int remove) { - struct ifnet *ifp1; - struct pf_altq *a1, *a2, *a3; - u_int32_t ticket; - int error = 0; + struct ifnet *ifp1; + struct pf_altq *a1, *a2, *a3; + u_int32_t ticket; + int error = 0; /* Interrupt userland queue modifications */ +#ifdef __FreeBSD__ + if (V_altqs_inactive_open) + pf_rollback_altq(V_ticket_altqs_inactive); +#else if (altqs_inactive_open) pf_rollback_altq(ticket_altqs_inactive); +#endif /* Start new altq ruleset */ if (pf_begin_altq(&ticket)) return; /* Copy the current active set */ +#ifdef __FreeBSD__ + TAILQ_FOREACH(a1, V_pf_altqs_active, entries) { + a2 = pool_get(&V_pf_altq_pl, PR_NOWAIT); +#else TAILQ_FOREACH(a1, pf_altqs_active, entries) { a2 = pool_get(&pf_altq_pl, PR_NOWAIT); +#endif if (a2 == NULL) { error = ENOMEM; break; @@ -994,11 +1136,19 @@ pf_altq_ifnet_event(struct ifnet *ifp, int remove) if (a2->qname[0] != 0) { if ((a2->qid = pf_qname2qid(a2->qname)) == 0) { error = EBUSY; +#ifdef __FreeBSD__ + pool_put(&V_pf_altq_pl, a2); +#else pool_put(&pf_altq_pl, a2); +#endif break; } a2->altq_disc = NULL; +#ifdef __FreeBSD__ + TAILQ_FOREACH(a3, V_pf_altqs_inactive, entries) { +#else TAILQ_FOREACH(a3, pf_altqs_inactive, entries) { +#endif if (strncmp(a3->ifname, a2->ifname, IFNAMSIZ) == 0 && a3->qname[0] == 0) { a2->altq_disc = a3->altq_disc; @@ -1016,23 +1166,35 @@ pf_altq_ifnet_event(struct ifnet *ifp, int remove) error = altq_add(a2); PF_LOCK(); +#ifdef __FreeBSD__ + if (ticket != V_ticket_altqs_inactive) +#else if (ticket != ticket_altqs_inactive) +#endif error = EBUSY; if (error) { +#ifdef __FreeBSD__ + pool_put(&V_pf_altq_pl, a2); +#else pool_put(&pf_altq_pl, a2); +#endif break; } } +#ifdef __FreeBSD__ + TAILQ_INSERT_TAIL(V_pf_altqs_inactive, a2, entries); +#else TAILQ_INSERT_TAIL(pf_altqs_inactive, a2, entries); +#endif } if (error != 0) pf_rollback_altq(ticket); else pf_commit_altq(ticket); -} + } #endif #endif /* ALTQ */ @@ -1252,11 +1414,34 @@ pf_setup_pfsync_matching(struct pf_ruleset *rs) } MD5Final(digest, &ctx); +#ifdef __FreeBSD__ + memcpy(V_pf_status.pf_chksum, digest, sizeof(V_pf_status.pf_chksum)); +#else memcpy(pf_status.pf_chksum, digest, sizeof(pf_status.pf_chksum)); +#endif return (0); } int +pf_addr_setup(struct pf_ruleset *ruleset, struct pf_addr_wrap *addr, + sa_family_t af) +{ + if (pfi_dynaddr_setup(addr, af) || + pf_tbladdr_setup(ruleset, addr)) + return (EINVAL); + + return (0); +} + +void +pf_addr_copyout(struct pf_addr_wrap *addr) +{ + pfi_dynaddr_copyout(addr); + pf_tbladdr_copyout(addr); + pf_rtlabel_copyout(addr); +} + +int #ifdef __FreeBSD__ pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td) #else @@ -1270,6 +1455,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) #endif int error = 0; + CURVNET_SET(TD_TO_VNET(td)); + /* XXX keep in sync with switch() below */ #ifdef __FreeBSD__ if (securelevel_gt(td->td_ucred, 2)) @@ -1373,7 +1560,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } return (EACCES); case DIOCGETRULE: - if (((struct pfioc_rule *)addr)->action == PF_GET_CLR_CNTR) + if (((struct pfioc_rule *)addr)->action == + PF_GET_CLR_CNTR) return (EACCES); break; default: @@ -1382,9 +1570,9 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (flags & FWRITE) #ifdef __FreeBSD__ - sx_xlock(&pf_consistency_lock); + sx_xlock(&V_pf_consistency_lock); else - sx_slock(&pf_consistency_lock); + sx_slock(&V_pf_consistency_lock); #else rw_enter_write(&pf_consistency_lock); else @@ -1399,7 +1587,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) switch (cmd) { case DIOCSTART: +#ifdef __FreeBSD__ + if (V_pf_status.running) +#else if (pf_status.running) +#endif error = EEXIST; else { #ifdef __FreeBSD__ @@ -1411,33 +1603,48 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) ("pf: pfil registeration fail\n")); break; } -#endif + V_pf_status.running = 1; + V_pf_status.since = time_second; + + if (V_pf_status.stateid == 0) { + V_pf_status.stateid = time_second; + V_pf_status.stateid = V_pf_status.stateid << 32; + } +#else pf_status.running = 1; pf_status.since = time_second; + if (pf_status.stateid == 0) { pf_status.stateid = time_second; pf_status.stateid = pf_status.stateid << 32; } +#endif DPFPRINTF(PF_DEBUG_MISC, ("pf: started\n")); } break; case DIOCSTOP: - if (!pf_status.running) +#ifdef __FreeBSD__ + if (!V_pf_status.running) error = ENOENT; else { - pf_status.running = 0; -#ifdef __FreeBSD__ + V_pf_status.running = 0; PF_UNLOCK(); error = dehook_pf(); PF_LOCK(); if (error) { - pf_status.running = 1; + V_pf_status.running = 1; DPFPRINTF(PF_DEBUG_MISC, - ("pf: pfil unregisteration failed\n")); + ("pf: pfil unregisteration failed\n")); } -#endif + V_pf_status.since = time_second; +#else + if (!pf_status.running) + error = ENOENT; + else { + pf_status.running = 0; pf_status.since = time_second; +#endif DPFPRINTF(PF_DEBUG_MISC, ("pf: stopped\n")); } break; @@ -1473,16 +1680,22 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EBUSY; break; } - if (pr->pool_ticket != ticket_pabuf) { #ifdef __FreeBSD__ + if (pr->pool_ticket != V_ticket_pabuf) { DPFPRINTF(PF_DEBUG_MISC, ("pool_ticket: %d != %d\n", pr->pool_ticket, - ticket_pabuf)); + V_ticket_pabuf)); +#else + if (pr->pool_ticket != ticket_pabuf) { #endif error = EBUSY; break; } - rule = pool_get(&pf_rule_pl, PR_NOWAIT); +#ifdef __FreeBSD__ + rule = pool_get(&V_pf_rule_pl, PR_NOWAIT); +#else + rule = pool_get(&pf_rule_pl, PR_WAITOK|PR_LIMITFAIL); +#endif if (rule == NULL) { error = ENOMEM; break; @@ -1504,19 +1717,27 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) rule->kif = NULL; TAILQ_INIT(&rule->rpool.list); /* initialize refcounting */ - rule->states = 0; + rule->states_cur = 0; rule->src_nodes = 0; rule->entries.tqe_prev = NULL; #ifndef INET if (rule->af == AF_INET) { +#ifdef __FreeBSD__ + pool_put(&V_pf_rule_pl, rule); +#else pool_put(&pf_rule_pl, rule); +#endif error = EAFNOSUPPORT; break; } #endif /* INET */ #ifndef INET6 if (rule->af == AF_INET6) { +#ifdef __FreeBSD__ + pool_put(&V_pf_rule_pl, rule); +#else pool_put(&pf_rule_pl, rule); +#endif error = EAFNOSUPPORT; break; } @@ -1530,7 +1751,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (rule->ifname[0]) { rule->kif = pfi_kif_get(rule->ifname); if (rule->kif == NULL) { +#ifdef __FreeBSD__ + pool_put(&V_pf_rule_pl, rule); +#else pool_put(&pf_rule_pl, rule); +#endif error = EINVAL; break; } @@ -1567,40 +1792,42 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (rule->rt && !rule->direction) error = EINVAL; #if NPFLOG > 0 -#ifdef __FreeBSD__ if (!rule->log) rule->logif = 0; -#endif if (rule->logif >= PFLOGIFS_MAX) error = EINVAL; #endif if (pf_rtlabel_add(&rule->src.addr) || pf_rtlabel_add(&rule->dst.addr)) error = EBUSY; - if (pfi_dynaddr_setup(&rule->src.addr, rule->af)) + if (pf_addr_setup(ruleset, &rule->src.addr, rule->af)) error = EINVAL; - if (pfi_dynaddr_setup(&rule->dst.addr, rule->af)) - error = EINVAL; - if (pf_tbladdr_setup(ruleset, &rule->src.addr)) - error = EINVAL; - if (pf_tbladdr_setup(ruleset, &rule->dst.addr)) + if (pf_addr_setup(ruleset, &rule->dst.addr, rule->af)) error = EINVAL; if (pf_anchor_setup(rule, ruleset, pr->anchor_call)) error = EINVAL; +#ifdef __FreeBSD__ + TAILQ_FOREACH(pa, &V_pf_pabuf, entries) +#else TAILQ_FOREACH(pa, &pf_pabuf, entries) +#endif if (pf_tbladdr_setup(ruleset, &pa->addr)) error = EINVAL; if (rule->overload_tblname[0]) { if ((rule->overload_tbl = pfr_attach_table(ruleset, - rule->overload_tblname)) == NULL) + rule->overload_tblname, 0)) == NULL) error = EINVAL; else rule->overload_tbl->pfrkt_flags |= PFR_TFLAG_ACTIVE; } +#ifdef __FreeBSD__ + pf_mv_pool(&V_pf_pabuf, &rule->rpool.list); +#else pf_mv_pool(&pf_pabuf, &rule->rpool.list); +#endif if (((((rule->action == PF_NAT) || (rule->action == PF_RDR) || (rule->action == PF_BINAT)) && rule->anchor == NULL) || (rule->rt > PF_FASTROUTE)) && @@ -1613,14 +1840,13 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } #ifdef __FreeBSD__ - if (!debug_pfugidhack && (rule->uid.op || rule->gid.op || + if (!V_debug_pfugidhack && (rule->uid.op || rule->gid.op || rule->log & PF_LOG_SOCKET_LOOKUP)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: debug.pfugidhack enabled\n")); - debug_pfugidhack = 1; + V_debug_pfugidhack = 1; } #endif - rule->rpool.cur = TAILQ_FIRST(&rule->rpool.list); rule->evaluations = rule->packets[0] = rule->packets[1] = rule->bytes[0] = rule->bytes[1] = 0; @@ -1690,12 +1916,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EBUSY; break; } - pfi_dynaddr_copyout(&pr->rule.src.addr); - pfi_dynaddr_copyout(&pr->rule.dst.addr); - pf_tbladdr_copyout(&pr->rule.src.addr); - pf_tbladdr_copyout(&pr->rule.dst.addr); - pf_rtlabel_copyout(&pr->rule.src.addr); - pf_rtlabel_copyout(&pr->rule.dst.addr); + pf_addr_copyout(&pr->rule.src.addr); + pf_addr_copyout(&pr->rule.dst.addr); for (i = 0; i < PF_SKIP_COUNT; ++i) if (rule->skip[i].ptr == NULL) pr->rule.skip[i].nr = -1; @@ -1707,6 +1929,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) rule->evaluations = 0; rule->packets[0] = rule->packets[1] = 0; rule->bytes[0] = rule->bytes[1] = 0; + rule->states_tot = 0; } break; } @@ -1720,7 +1943,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (!(pcr->action == PF_CHANGE_REMOVE || pcr->action == PF_CHANGE_GET_TICKET) && +#ifdef __FreeBSD__ + pcr->pool_ticket != V_ticket_pabuf) { +#else pcr->pool_ticket != ticket_pabuf) { +#endif error = EBUSY; break; } @@ -1757,7 +1984,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } if (pcr->action != PF_CHANGE_REMOVE) { - newrule = pool_get(&pf_rule_pl, PR_NOWAIT); +#ifdef __FreeBSD__ + newrule = pool_get(&V_pf_rule_pl, PR_NOWAIT); +#else + newrule = pool_get(&pf_rule_pl, PR_WAITOK|PR_LIMITFAIL); +#endif if (newrule == NULL) { error = ENOMEM; break; @@ -1777,18 +2008,26 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) #endif TAILQ_INIT(&newrule->rpool.list); /* initialize refcounting */ - newrule->states = 0; + newrule->states_cur = 0; newrule->entries.tqe_prev = NULL; #ifndef INET if (newrule->af == AF_INET) { +#ifdef __FreeBSD__ + pool_put(&V_pf_rule_pl, newrule); +#else pool_put(&pf_rule_pl, newrule); +#endif error = EAFNOSUPPORT; break; } #endif /* INET */ #ifndef INET6 if (newrule->af == AF_INET6) { +#ifdef __FreeBSD__ + pool_put(&V_pf_rule_pl, newrule); +#else pool_put(&pf_rule_pl, newrule); +#endif error = EAFNOSUPPORT; break; } @@ -1796,7 +2035,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (newrule->ifname[0]) { newrule->kif = pfi_kif_get(newrule->ifname); if (newrule->kif == NULL) { +#ifdef __FreeBSD__ + pool_put(&V_pf_rule_pl, newrule); +#else pool_put(&pf_rule_pl, newrule); +#endif error = EINVAL; break; } @@ -1836,34 +2079,32 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EBUSY; if (newrule->rt && !newrule->direction) error = EINVAL; -#ifdef __FreeBSD__ #if NPFLOG > 0 if (!newrule->log) newrule->logif = 0; if (newrule->logif >= PFLOGIFS_MAX) error = EINVAL; #endif -#endif if (pf_rtlabel_add(&newrule->src.addr) || pf_rtlabel_add(&newrule->dst.addr)) error = EBUSY; - if (pfi_dynaddr_setup(&newrule->src.addr, newrule->af)) - error = EINVAL; - if (pfi_dynaddr_setup(&newrule->dst.addr, newrule->af)) + if (pf_addr_setup(ruleset, &newrule->src.addr, newrule->af)) error = EINVAL; - if (pf_tbladdr_setup(ruleset, &newrule->src.addr)) - error = EINVAL; - if (pf_tbladdr_setup(ruleset, &newrule->dst.addr)) + if (pf_addr_setup(ruleset, &newrule->dst.addr, newrule->af)) error = EINVAL; if (pf_anchor_setup(newrule, ruleset, pcr->anchor_call)) error = EINVAL; +#ifdef __FreeBSD__ + TAILQ_FOREACH(pa, &V_pf_pabuf, entries) +#else TAILQ_FOREACH(pa, &pf_pabuf, entries) +#endif if (pf_tbladdr_setup(ruleset, &pa->addr)) error = EINVAL; if (newrule->overload_tblname[0]) { if ((newrule->overload_tbl = pfr_attach_table( - ruleset, newrule->overload_tblname)) == + ruleset, newrule->overload_tblname, 0)) == NULL) error = EINVAL; else @@ -1871,7 +2112,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) PFR_TFLAG_ACTIVE; } +#ifdef __FreeBSD__ + pf_mv_pool(&V_pf_pabuf, &newrule->rpool.list); +#else pf_mv_pool(&pf_pabuf, &newrule->rpool.list); +#endif if (((((newrule->action == PF_NAT) || (newrule->action == PF_RDR) || (newrule->action == PF_BINAT) || @@ -1886,12 +2131,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } #ifdef __FreeBSD__ - if (!debug_pfugidhack && (newrule->uid.op || + if (!V_debug_pfugidhack && (newrule->uid.op || newrule->gid.op || newrule->log & PF_LOG_SOCKET_LOOKUP)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: debug.pfugidhack enabled\n")); - debug_pfugidhack = 1; + V_debug_pfugidhack = 1; } #endif @@ -1900,7 +2145,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) newrule->packets[0] = newrule->packets[1] = 0; newrule->bytes[0] = newrule->bytes[1] = 0; } +#ifdef __FreeBSD__ + pf_empty_pool(&V_pf_pabuf); +#else pf_empty_pool(&pf_pabuf); +#endif if (pcr->action == PF_CHANGE_ADD_HEAD) oldrule = TAILQ_FIRST( @@ -1953,166 +2202,164 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } case DIOCCLRSTATES: { - struct pf_state *state, *nexts; + struct pf_state *s, *nexts; struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr; - int killed = 0; + u_int killed = 0; - for (state = RB_MIN(pf_state_tree_id, &tree_id); state; - state = nexts) { - nexts = RB_NEXT(pf_state_tree_id, &tree_id, state); +#ifdef __FreeBSD__ + for (s = RB_MIN(pf_state_tree_id, &V_tree_id); s; s = nexts) { + nexts = RB_NEXT(pf_state_tree_id, &V_tree_id, s); +#else + for (s = RB_MIN(pf_state_tree_id, &tree_id); s; s = nexts) { + nexts = RB_NEXT(pf_state_tree_id, &tree_id, s); +#endif if (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname, - state->u.s.kif->pfik_name)) { -#if NPFSYNC + s->kif->pfik_name)) { +#if NPFSYNC > 0 /* don't send out individual delete messages */ - state->sync_flags = PFSTATE_NOSYNC; + SET(s->state_flags, PFSTATE_NOSYNC); #endif - pf_unlink_state(state); + pf_unlink_state(s); killed++; } } - psk->psk_af = killed; -#if NPFSYNC + psk->psk_killed = killed; +#if NPFSYNC > 0 +#ifdef __FreeBSD__ + if (pfsync_clear_states_ptr != NULL) + pfsync_clear_states_ptr(V_pf_status.hostid, psk->psk_ifname); +#else pfsync_clear_states(pf_status.hostid, psk->psk_ifname); #endif +#endif break; } case DIOCKILLSTATES: { - struct pf_state *state, *nexts; - struct pf_state_host *src, *dst; + struct pf_state *s, *nexts; + struct pf_state_key *sk; + struct pf_addr *srcaddr, *dstaddr; + u_int16_t srcport, dstport; struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr; - int killed = 0; + u_int killed = 0; - for (state = RB_MIN(pf_state_tree_id, &tree_id); state; - state = nexts) { - nexts = RB_NEXT(pf_state_tree_id, &tree_id, state); + if (psk->psk_pfcmp.id) { + if (psk->psk_pfcmp.creatorid == 0) +#ifdef __FreeBSD__ + psk->psk_pfcmp.creatorid = V_pf_status.hostid; +#else + psk->psk_pfcmp.creatorid = pf_status.hostid; +#endif + if ((s = pf_find_state_byid(&psk->psk_pfcmp))) { + pf_unlink_state(s); + psk->psk_killed = 1; + } + break; + } - if (state->direction == PF_OUT) { - src = &state->lan; - dst = &state->ext; +#ifdef __FreeBSD__ + for (s = RB_MIN(pf_state_tree_id, &V_tree_id); s; + s = nexts) { + nexts = RB_NEXT(pf_state_tree_id, &V_tree_id, s); +#else + for (s = RB_MIN(pf_state_tree_id, &tree_id); s; + s = nexts) { + nexts = RB_NEXT(pf_state_tree_id, &tree_id, s); +#endif + sk = s->key[PF_SK_WIRE]; + + if (s->direction == PF_OUT) { + srcaddr = &sk->addr[1]; + dstaddr = &sk->addr[0]; + srcport = sk->port[0]; + dstport = sk->port[0]; } else { - src = &state->ext; - dst = &state->lan; + srcaddr = &sk->addr[0]; + dstaddr = &sk->addr[1]; + srcport = sk->port[0]; + dstport = sk->port[0]; } - if ((!psk->psk_af || state->af == psk->psk_af) + if ((!psk->psk_af || sk->af == psk->psk_af) && (!psk->psk_proto || psk->psk_proto == - state->proto) && + sk->proto) && PF_MATCHA(psk->psk_src.neg, &psk->psk_src.addr.v.a.addr, &psk->psk_src.addr.v.a.mask, - &src->addr, state->af) && + srcaddr, sk->af) && PF_MATCHA(psk->psk_dst.neg, &psk->psk_dst.addr.v.a.addr, &psk->psk_dst.addr.v.a.mask, - &dst->addr, state->af) && + dstaddr, sk->af) && (psk->psk_src.port_op == 0 || pf_match_port(psk->psk_src.port_op, psk->psk_src.port[0], psk->psk_src.port[1], - src->port)) && + srcport)) && (psk->psk_dst.port_op == 0 || pf_match_port(psk->psk_dst.port_op, psk->psk_dst.port[0], psk->psk_dst.port[1], - dst->port)) && + dstport)) && + (!psk->psk_label[0] || (s->rule.ptr->label[0] && + !strcmp(psk->psk_label, s->rule.ptr->label))) && (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname, - state->u.s.kif->pfik_name))) { -#if NPFSYNC > 0 - /* send immediate delete of state */ - pfsync_delete_state(state); - state->sync_flags |= PFSTATE_NOSYNC; -#endif - pf_unlink_state(state); + s->kif->pfik_name))) { + pf_unlink_state(s); killed++; } } - psk->psk_af = killed; + psk->psk_killed = killed; break; } case DIOCADDSTATE: { struct pfioc_state *ps = (struct pfioc_state *)addr; - struct pf_state *state; - struct pfi_kif *kif; + struct pfsync_state *sp = &ps->state; - if (ps->state.timeout >= PFTM_MAX && - ps->state.timeout != PFTM_UNTIL_PACKET) { + if (sp->timeout >= PFTM_MAX && + sp->timeout != PFTM_UNTIL_PACKET) { error = EINVAL; break; } - state = pool_get(&pf_state_pl, PR_NOWAIT); - if (state == NULL) { - error = ENOMEM; - break; - } - kif = pfi_kif_get(ps->state.u.ifname); - if (kif == NULL) { - pool_put(&pf_state_pl, state); - error = ENOENT; - break; - } - bcopy(&ps->state, state, sizeof(struct pf_state)); - bzero(&state->u, sizeof(state->u)); - state->rule.ptr = &pf_default_rule; - state->nat_rule.ptr = NULL; - state->anchor.ptr = NULL; - state->rt_kif = NULL; - state->creation = time_second; - state->pfsync_time = 0; - state->packets[0] = state->packets[1] = 0; - state->bytes[0] = state->bytes[1] = 0; - - if (pf_insert_state(kif, state)) { - pfi_kif_unref(kif, PFI_KIF_REF_NONE); - pool_put(&pf_state_pl, state); - error = ENOMEM; - } +#ifdef __FreeBSD__ + if (pfsync_state_import_ptr != NULL) + error = pfsync_state_import_ptr(sp, PFSYNC_SI_IOCTL); +#else + error = pfsync_state_import(sp, PFSYNC_SI_IOCTL); +#endif break; } case DIOCGETSTATE: { struct pfioc_state *ps = (struct pfioc_state *)addr; - struct pf_state *state; - u_int32_t nr; - int secs; + struct pf_state *s; + struct pf_state_cmp id_key; - nr = 0; - RB_FOREACH(state, pf_state_tree_id, &tree_id) { - if (nr >= ps->nr) - break; - nr++; - } - if (state == NULL) { - error = EBUSY; + bcopy(ps->state.id, &id_key.id, sizeof(id_key.id)); + id_key.creatorid = ps->state.creatorid; + + s = pf_find_state_byid(&id_key); + if (s == NULL) { + error = ENOENT; break; } - secs = time_second; - bcopy(state, &ps->state, sizeof(ps->state)); - strlcpy(ps->state.u.ifname, state->u.s.kif->pfik_name, - sizeof(ps->state.u.ifname)); - ps->state.rule.nr = state->rule.ptr->nr; - ps->state.nat_rule.nr = (state->nat_rule.ptr == NULL) ? - -1 : state->nat_rule.ptr->nr; - ps->state.anchor.nr = (state->anchor.ptr == NULL) ? - -1 : state->anchor.ptr->nr; - ps->state.creation = secs - ps->state.creation; - ps->state.expire = pf_state_expires(state); - if (ps->state.expire > secs) - ps->state.expire -= secs; - else - ps->state.expire = 0; + + pfsync_state_export(&ps->state, s); break; } case DIOCGETSTATES: { struct pfioc_states *ps = (struct pfioc_states *)addr; struct pf_state *state; - struct pf_state *p, *pstore; + struct pfsync_state *p, *pstore; u_int32_t nr = 0; - int space = ps->ps_len; - if (space == 0) { + if (ps->ps_len == 0) { +#ifdef __FreeBSD__ + nr = V_pf_status.states; +#else nr = pf_status.states; - ps->ps_len = sizeof(struct pf_state) * nr; +#endif + ps->ps_len = sizeof(struct pfsync_state) * nr; break; } @@ -2126,29 +2373,16 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) p = ps->ps_states; +#ifdef __FreeBSD__ + state = TAILQ_FIRST(&V_state_list); +#else state = TAILQ_FIRST(&state_list); +#endif while (state) { if (state->timeout != PFTM_UNLINKED) { - int secs = time_second; - if ((nr+1) * sizeof(*p) > (unsigned)ps->ps_len) break; - - bcopy(state, pstore, sizeof(*pstore)); - strlcpy(pstore->u.ifname, - state->u.s.kif->pfik_name, - sizeof(pstore->u.ifname)); - pstore->rule.nr = state->rule.ptr->nr; - pstore->nat_rule.nr = (state->nat_rule.ptr == - NULL) ? -1 : state->nat_rule.ptr->nr; - pstore->anchor.nr = (state->anchor.ptr == - NULL) ? -1 : state->anchor.ptr->nr; - pstore->creation = secs - pstore->creation; - pstore->expire = pf_state_expires(state); - if (pstore->expire > secs) - pstore->expire -= secs; - else - pstore->expire = 0; + pfsync_state_export(pstore, state); #ifdef __FreeBSD__ PF_COPYOUT(pstore, p, sizeof(*p), error); #else @@ -2161,10 +2395,10 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) p++; nr++; } - state = TAILQ_NEXT(state, u.s.entry_list); + state = TAILQ_NEXT(state, entry_list); } - ps->ps_len = sizeof(struct pf_state) * nr; + ps->ps_len = sizeof(struct pfsync_state) * nr; free(pstore, M_TEMP); break; @@ -2172,8 +2406,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCGETSTATUS: { struct pf_status *s = (struct pf_status *)addr; +#ifdef __FreeBSD__ + bcopy(&V_pf_status, s, sizeof(struct pf_status)); +#else bcopy(&pf_status, s, sizeof(struct pf_status)); - pfi_fill_oldstatus(s); +#endif + pfi_update_status(s->ifname, s); break; } @@ -2181,35 +2419,51 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pfioc_if *pi = (struct pfioc_if *)addr; if (pi->ifname[0] == 0) { +#ifdef __FreeBSD__ + bzero(V_pf_status.ifname, IFNAMSIZ); +#else bzero(pf_status.ifname, IFNAMSIZ); +#endif break; } - if (ifunit(pi->ifname) == NULL) { - error = EINVAL; - break; - } +#ifdef __FreeBSD__ + strlcpy(V_pf_status.ifname, pi->ifname, IFNAMSIZ); +#else strlcpy(pf_status.ifname, pi->ifname, IFNAMSIZ); +#endif break; } case DIOCCLRSTATUS: { +#ifdef __FreeBSD__ + bzero(V_pf_status.counters, sizeof(V_pf_status.counters)); + bzero(V_pf_status.fcounters, sizeof(V_pf_status.fcounters)); + bzero(V_pf_status.scounters, sizeof(V_pf_status.scounters)); + V_pf_status.since = time_second; + if (*V_pf_status.ifname) + pfi_update_status(V_pf_status.ifname, NULL); +#else bzero(pf_status.counters, sizeof(pf_status.counters)); bzero(pf_status.fcounters, sizeof(pf_status.fcounters)); bzero(pf_status.scounters, sizeof(pf_status.scounters)); pf_status.since = time_second; if (*pf_status.ifname) - pfi_clr_istats(pf_status.ifname); + pfi_update_status(pf_status.ifname, NULL); +#endif break; } case DIOCNATLOOK: { struct pfioc_natlook *pnl = (struct pfioc_natlook *)addr; + struct pf_state_key *sk; struct pf_state *state; - struct pf_state_cmp key; + struct pf_state_key_cmp key; int m = 0, direction = pnl->direction; + int sidx, didx; - key.af = pnl->af; - key.proto = pnl->proto; + /* NATLOOK src and dst are reversed, so reverse sidx/didx */ + sidx = (direction == PF_IN) ? 1 : 0; + didx = (direction == PF_IN) ? 0 : 1; if (!pnl->proto || PF_AZERO(&pnl->saddr, pnl->af) || @@ -2219,43 +2473,23 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) (!pnl->dport || !pnl->sport))) error = EINVAL; else { - /* - * userland gives us source and dest of connection, - * reverse the lookup so we ask for what happens with - * the return traffic, enabling us to find it in the - * state tree. - */ - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, &pnl->daddr, pnl->af); - key.ext.port = pnl->dport; - PF_ACPY(&key.gwy.addr, &pnl->saddr, pnl->af); - key.gwy.port = pnl->sport; - state = pf_find_state_all(&key, PF_EXT_GWY, &m); - } else { - PF_ACPY(&key.lan.addr, &pnl->daddr, pnl->af); - key.lan.port = pnl->dport; - PF_ACPY(&key.ext.addr, &pnl->saddr, pnl->af); - key.ext.port = pnl->sport; - state = pf_find_state_all(&key, PF_LAN_EXT, &m); - } + key.af = pnl->af; + key.proto = pnl->proto; + PF_ACPY(&key.addr[sidx], &pnl->saddr, pnl->af); + key.port[sidx] = pnl->sport; + PF_ACPY(&key.addr[didx], &pnl->daddr, pnl->af); + key.port[didx] = pnl->dport; + + state = pf_find_state_all(&key, direction, &m); + if (m > 1) error = E2BIG; /* more than one state */ else if (state != NULL) { - if (direction == PF_IN) { - PF_ACPY(&pnl->rsaddr, &state->lan.addr, - state->af); - pnl->rsport = state->lan.port; - PF_ACPY(&pnl->rdaddr, &pnl->daddr, - pnl->af); - pnl->rdport = pnl->dport; - } else { - PF_ACPY(&pnl->rdaddr, &state->gwy.addr, - state->af); - pnl->rdport = state->gwy.port; - PF_ACPY(&pnl->rsaddr, &pnl->saddr, - pnl->af); - pnl->rsport = pnl->sport; - } + sk = state->key[sidx]; + PF_ACPY(&pnl->rsaddr, &sk->addr[sidx], sk->af); + pnl->rsport = sk->port[sidx]; + PF_ACPY(&pnl->rdaddr, &sk->addr[didx], sk->af); + pnl->rdport = sk->port[didx]; } else error = ENOENT; } @@ -2271,10 +2505,18 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EINVAL; goto fail; } +#ifdef __FreeBSD__ + old = V_pf_default_rule.timeout[pt->timeout]; +#else old = pf_default_rule.timeout[pt->timeout]; +#endif if (pt->timeout == PFTM_INTERVAL && pt->seconds == 0) pt->seconds = 1; +#ifdef __FreeBSD__ + V_pf_default_rule.timeout[pt->timeout] = pt->seconds; +#else pf_default_rule.timeout[pt->timeout] = pt->seconds; +#endif if (pt->timeout == PFTM_INTERVAL && pt->seconds < old) wakeup(pf_purge_thread); pt->seconds = old; @@ -2288,7 +2530,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EINVAL; goto fail; } +#ifdef __FreeBSD__ + pt->seconds = V_pf_default_rule.timeout[pt->timeout]; +#else pt->seconds = pf_default_rule.timeout[pt->timeout]; +#endif break; } @@ -2299,7 +2545,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EINVAL; goto fail; } +#ifdef __FreeBSD__ + pl->limit = V_pf_pool_limits[pl->index].limit; +#else pl->limit = pf_pool_limits[pl->index].limit; +#endif break; } @@ -2308,29 +2558,40 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) int old_limit; if (pl->index < 0 || pl->index >= PF_LIMIT_MAX || +#ifdef __FreeBSD__ + V_pf_pool_limits[pl->index].pp == NULL) { +#else pf_pool_limits[pl->index].pp == NULL) { +#endif error = EINVAL; goto fail; } #ifdef __FreeBSD__ - uma_zone_set_max(pf_pool_limits[pl->index].pp, pl->limit); + uma_zone_set_max(V_pf_pool_limits[pl->index].pp, pl->limit); + old_limit = V_pf_pool_limits[pl->index].limit; + V_pf_pool_limits[pl->index].limit = pl->limit; + pl->limit = old_limit; #else if (pool_sethardlimit(pf_pool_limits[pl->index].pp, pl->limit, NULL, 0) != 0) { error = EBUSY; goto fail; } -#endif old_limit = pf_pool_limits[pl->index].limit; pf_pool_limits[pl->index].limit = pl->limit; pl->limit = old_limit; +#endif break; } case DIOCSETDEBUG: { u_int32_t *level = (u_int32_t *)addr; +#ifdef __FreeBSD__ + V_pf_status.debug = *level; +#else pf_status.debug = *level; +#endif break; } @@ -2373,11 +2634,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pf_altq *altq; /* enable all altq interfaces on active list */ - TAILQ_FOREACH(altq, pf_altqs_active, entries) { #ifdef __FreeBSD__ + TAILQ_FOREACH(altq, V_pf_altqs_active, entries) { if (altq->qname[0] == 0 && (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { #else + TAILQ_FOREACH(altq, pf_altqs_active, entries) { if (altq->qname[0] == 0) { #endif error = pf_enable_altq(altq); @@ -2386,7 +2648,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } } if (error == 0) +#ifdef __FreeBSD__ + V_pf_altq_running = 1; +#else pf_altq_running = 1; +#endif DPFPRINTF(PF_DEBUG_MISC, ("altq: started\n")); break; } @@ -2395,11 +2661,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pf_altq *altq; /* disable all altq interfaces on active list */ - TAILQ_FOREACH(altq, pf_altqs_active, entries) { #ifdef __FreeBSD__ + TAILQ_FOREACH(altq, V_pf_altqs_active, entries) { if (altq->qname[0] == 0 && (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { #else + TAILQ_FOREACH(altq, pf_altqs_active, entries) { if (altq->qname[0] == 0) { #endif error = pf_disable_altq(altq); @@ -2408,7 +2675,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } } if (error == 0) +#ifdef __FreeBSD__ + V_pf_altq_running = 0; +#else pf_altq_running = 0; +#endif DPFPRINTF(PF_DEBUG_MISC, ("altq: stopped\n")); break; } @@ -2417,11 +2688,19 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pfioc_altq *pa = (struct pfioc_altq *)addr; struct pf_altq *altq, *a; +#ifdef __FreeBSD__ + if (pa->ticket != V_ticket_altqs_inactive) { +#else if (pa->ticket != ticket_altqs_inactive) { +#endif error = EBUSY; break; } - altq = pool_get(&pf_altq_pl, PR_NOWAIT); +#ifdef __FreeBSD__ + altq = pool_get(&V_pf_altq_pl, PR_NOWAIT); +#else + altq = pool_get(&pf_altq_pl, PR_WAITOK|PR_LIMITFAIL); +#endif if (altq == NULL) { error = ENOMEM; break; @@ -2438,11 +2717,19 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (altq->qname[0] != 0) { if ((altq->qid = pf_qname2qid(altq->qname)) == 0) { error = EBUSY; +#ifdef __FreeBSD__ + pool_put(&V_pf_altq_pl, altq); +#else pool_put(&pf_altq_pl, altq); +#endif break; } altq->altq_disc = NULL; +#ifdef __FreeBSD__ + TAILQ_FOREACH(a, V_pf_altqs_inactive, entries) { +#else TAILQ_FOREACH(a, pf_altqs_inactive, entries) { +#endif if (strncmp(a->ifname, altq->ifname, IFNAMSIZ) == 0 && a->qname[0] == 0) { altq->altq_disc = a->altq_disc; @@ -2458,18 +2745,26 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) altq->local_flags |= PFALTQ_FLAG_IF_REMOVED; } else { PF_UNLOCK(); -#endif +#endif error = altq_add(altq); #ifdef __FreeBSD__ PF_LOCK(); } #endif if (error) { +#ifdef __FreeBSD__ + pool_put(&V_pf_altq_pl, altq); +#else pool_put(&pf_altq_pl, altq); +#endif break; } +#ifdef __FreeBSD__ + TAILQ_INSERT_TAIL(V_pf_altqs_inactive, altq, entries); +#else TAILQ_INSERT_TAIL(pf_altqs_inactive, altq, entries); +#endif bcopy(altq, &pa->altq, sizeof(struct pf_altq)); break; } @@ -2479,9 +2774,15 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pf_altq *altq; pa->nr = 0; +#ifdef __FreeBSD__ + TAILQ_FOREACH(altq, V_pf_altqs_active, entries) + pa->nr++; + pa->ticket = V_ticket_altqs_active; +#else TAILQ_FOREACH(altq, pf_altqs_active, entries) pa->nr++; pa->ticket = ticket_altqs_active; +#endif break; } @@ -2490,12 +2791,20 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pf_altq *altq; u_int32_t nr; +#ifdef __FreeBSD__ + if (pa->ticket != V_ticket_altqs_active) { +#else if (pa->ticket != ticket_altqs_active) { +#endif error = EBUSY; break; } nr = 0; +#ifdef __FreeBSD__ + altq = TAILQ_FIRST(V_pf_altqs_active); +#else altq = TAILQ_FIRST(pf_altqs_active); +#endif while ((altq != NULL) && (nr < pa->nr)) { altq = TAILQ_NEXT(altq, entries); nr++; @@ -2519,13 +2828,21 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) u_int32_t nr; int nbytes; +#ifdef __FreeBSD__ + if (pq->ticket != V_ticket_altqs_active) { +#else if (pq->ticket != ticket_altqs_active) { +#endif error = EBUSY; break; } nbytes = pq->nbytes; nr = 0; +#ifdef __FreeBSD__ + altq = TAILQ_FIRST(V_pf_altqs_active); +#else altq = TAILQ_FIRST(pf_altqs_active); +#endif while ((altq != NULL) && (nr < pq->nr)) { altq = TAILQ_NEXT(altq, entries); nr++; @@ -2534,6 +2851,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EBUSY; break; } + #ifdef __FreeBSD__ if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) != 0) { error = ENXIO; @@ -2556,15 +2874,24 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCBEGINADDRS: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; +#ifdef __FreeBSD__ + pf_empty_pool(&V_pf_pabuf); + pp->ticket = ++V_ticket_pabuf; +#else pf_empty_pool(&pf_pabuf); pp->ticket = ++ticket_pabuf; +#endif break; } case DIOCADDADDR: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; +#ifdef __FreeBSD__ + if (pp->ticket != V_ticket_pabuf) { +#else if (pp->ticket != ticket_pabuf) { +#endif error = EBUSY; break; } @@ -2586,7 +2913,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EINVAL; break; } - pa = pool_get(&pf_pooladdr_pl, PR_NOWAIT); +#ifdef __FreeBSD__ + pa = pool_get(&V_pf_pooladdr_pl, PR_NOWAIT); +#else + pa = pool_get(&pf_pooladdr_pl, PR_WAITOK|PR_LIMITFAIL); +#endif if (pa == NULL) { error = ENOMEM; break; @@ -2595,7 +2926,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (pa->ifname[0]) { pa->kif = pfi_kif_get(pa->ifname); if (pa->kif == NULL) { +#ifdef __FreeBSD__ + pool_put(&V_pf_pooladdr_pl, pa); +#else pool_put(&pf_pooladdr_pl, pa); +#endif error = EINVAL; break; } @@ -2604,11 +2939,19 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (pfi_dynaddr_setup(&pa->addr, pp->af)) { pfi_dynaddr_remove(&pa->addr); pfi_kif_unref(pa->kif, PFI_KIF_REF_RULE); +#ifdef __FreeBSD__ + pool_put(&V_pf_pooladdr_pl, pa); +#else pool_put(&pf_pooladdr_pl, pa); +#endif error = EINVAL; break; } +#ifdef __FreeBSD__ + TAILQ_INSERT_TAIL(&V_pf_pabuf, pa, entries); +#else TAILQ_INSERT_TAIL(&pf_pabuf, pa, entries); +#endif break; } @@ -2647,9 +2990,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } bcopy(pa, &pp->addr, sizeof(struct pf_pooladdr)); - pfi_dynaddr_copyout(&pp->addr.addr); - pf_tbladdr_copyout(&pp->addr.addr); - pf_rtlabel_copyout(&pp->addr.addr); + pf_addr_copyout(&pp->addr.addr); break; } @@ -2682,7 +3023,13 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } if (pca->action != PF_CHANGE_REMOVE) { - newpa = pool_get(&pf_pooladdr_pl, PR_NOWAIT); +#ifdef __FreeBSD__ + newpa = pool_get(&V_pf_pooladdr_pl, + PR_NOWAIT); +#else + newpa = pool_get(&pf_pooladdr_pl, + PR_WAITOK|PR_LIMITFAIL); +#endif if (newpa == NULL) { error = ENOMEM; break; @@ -2690,14 +3037,22 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) bcopy(&pca->addr, newpa, sizeof(struct pf_pooladdr)); #ifndef INET if (pca->af == AF_INET) { +#ifdef __FreeBSD__ + pool_put(&V_pf_pooladdr_pl, newpa); +#else pool_put(&pf_pooladdr_pl, newpa); +#endif error = EAFNOSUPPORT; break; } #endif /* INET */ #ifndef INET6 if (pca->af == AF_INET6) { +#ifdef __FreeBSD__ + pool_put(&V_pf_pooladdr_pl, newpa); +#else pool_put(&pf_pooladdr_pl, newpa); +#endif error = EAFNOSUPPORT; break; } @@ -2705,7 +3060,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (newpa->ifname[0]) { newpa->kif = pfi_kif_get(newpa->ifname); if (newpa->kif == NULL) { +#ifdef __FreeBSD__ + pool_put(&V_pf_pooladdr_pl, newpa); +#else pool_put(&pf_pooladdr_pl, newpa); +#endif error = EINVAL; break; } @@ -2716,7 +3075,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) pf_tbladdr_setup(ruleset, &newpa->addr)) { pfi_dynaddr_remove(&newpa->addr); pfi_kif_unref(newpa->kif, PFI_KIF_REF_RULE); +#ifdef __FreeBSD__ + pool_put(&V_pf_pooladdr_pl, newpa); +#else pool_put(&pf_pooladdr_pl, newpa); +#endif error = EINVAL; break; } @@ -2745,7 +3108,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) pfi_dynaddr_remove(&oldpa->addr); pf_tbladdr_remove(&oldpa->addr); pfi_kif_unref(oldpa->kif, PFI_KIF_REF_RULE); +#ifdef __FreeBSD__ + pool_put(&V_pf_pooladdr_pl, oldpa); +#else pool_put(&pf_pooladdr_pl, oldpa); +#endif } else { if (oldpa == NULL) TAILQ_INSERT_TAIL(&pool->list, newpa, entries); @@ -2776,7 +3143,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) pr->nr = 0; if (ruleset->anchor == NULL) { /* XXX kludge for pf_main_ruleset */ +#ifdef __FreeBSD__ + RB_FOREACH(anchor, pf_anchor_global, &V_pf_anchors) +#else RB_FOREACH(anchor, pf_anchor_global, &pf_anchors) +#endif if (anchor->parent == NULL) pr->nr++; } else { @@ -2801,7 +3172,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) pr->name[0] = 0; if (ruleset->anchor == NULL) { /* XXX kludge for pf_main_ruleset */ +#ifdef __FreeBSD__ + RB_FOREACH(anchor, pf_anchor_global, &V_pf_anchors) +#else RB_FOREACH(anchor, pf_anchor_global, &pf_anchors) +#endif if (anchor->parent == NULL && nr++ == pr->nr) { strlcpy(pr->name, anchor->name, sizeof(pr->name)); @@ -3046,17 +3421,15 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) #ifdef __FreeBSD__ PF_UNLOCK(); #endif - ioe = (struct pfioc_trans_e *)malloc(sizeof(*ioe), - M_TEMP, M_WAITOK); - table = (struct pfr_table *)malloc(sizeof(*table), - M_TEMP, M_WAITOK); + ioe = malloc(sizeof(*ioe), M_TEMP, M_WAITOK); + table = malloc(sizeof(*table), M_TEMP, M_WAITOK); #ifdef __FreeBSD__ PF_LOCK(); #endif for (i = 0; i < io->size; i++) { #ifdef __FreeBSD__ - PF_COPYIN(io->array+i, ioe, sizeof(*ioe), error); - if (error) { + PF_COPYIN(io->array+i, ioe, sizeof(*ioe), error); + if (error) { #else if (copyin(io->array+i, ioe, sizeof(*ioe))) { #endif @@ -3132,10 +3505,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) #ifdef __FreeBSD__ PF_UNLOCK(); #endif - ioe = (struct pfioc_trans_e *)malloc(sizeof(*ioe), - M_TEMP, M_WAITOK); - table = (struct pfr_table *)malloc(sizeof(*table), - M_TEMP, M_WAITOK); + ioe = malloc(sizeof(*ioe), M_TEMP, M_WAITOK); + table = malloc(sizeof(*table), M_TEMP, M_WAITOK); #ifdef __FreeBSD__ PF_LOCK(); #endif @@ -3207,10 +3578,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) #ifdef __FreeBSD__ PF_UNLOCK(); #endif - ioe = (struct pfioc_trans_e *)malloc(sizeof(*ioe), - M_TEMP, M_WAITOK); - table = (struct pfr_table *)malloc(sizeof(*table), - M_TEMP, M_WAITOK); + ioe = malloc(sizeof(*ioe), M_TEMP, M_WAITOK); + table = malloc(sizeof(*table), M_TEMP, M_WAITOK); #ifdef __FreeBSD__ PF_LOCK(); #endif @@ -3236,8 +3605,13 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EINVAL; goto fail; } +#ifdef __FreeBSD__ + if (!V_altqs_inactive_open || ioe->ticket != + V_ticket_altqs_inactive) { +#else if (!altqs_inactive_open || ioe->ticket != ticket_altqs_inactive) { +#endif free(table, M_TEMP); free(ioe, M_TEMP); error = EBUSY; @@ -3248,7 +3622,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case PF_RULESET_TABLE: rs = pf_find_ruleset(ioe->anchor); if (rs == NULL || !rs->topen || ioe->ticket != - rs->tticket) { + rs->tticket) { free(table, M_TEMP); free(ioe, M_TEMP); error = EBUSY; @@ -3332,7 +3706,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) int space = psn->psn_len; if (space == 0) { +#ifdef __FreeBSD__ + RB_FOREACH(n, pf_src_tree, &V_tree_src_tracking) +#else RB_FOREACH(n, pf_src_tree, &tree_src_tracking) +#endif nr++; psn->psn_len = sizeof(struct pf_src_node) * nr; break; @@ -3345,9 +3723,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) #ifdef __FreeBSD__ PF_LOCK(); #endif - p = psn->psn_src_nodes; +#ifdef __FreeBSD__ + RB_FOREACH(n, pf_src_tree, &V_tree_src_tracking) { +#else RB_FOREACH(n, pf_src_tree, &tree_src_tracking) { +#endif int secs = time_second, diff; if ((nr + 1) * sizeof(*p) > (unsigned)psn->psn_len) @@ -3393,39 +3774,59 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pf_src_node *n; struct pf_state *state; +#ifdef __FreeBSD__ + RB_FOREACH(state, pf_state_tree_id, &V_tree_id) { +#else RB_FOREACH(state, pf_state_tree_id, &tree_id) { +#endif state->src_node = NULL; state->nat_src_node = NULL; } +#ifdef __FreeBSD__ + RB_FOREACH(n, pf_src_tree, &V_tree_src_tracking) { +#else RB_FOREACH(n, pf_src_tree, &tree_src_tracking) { +#endif n->expire = 1; n->states = 0; } pf_purge_expired_src_nodes(1); +#ifdef __FreeBSD__ + V_pf_status.src_nodes = 0; +#else pf_status.src_nodes = 0; +#endif break; } case DIOCKILLSRCNODES: { struct pf_src_node *sn; struct pf_state *s; - struct pfioc_src_node_kill *psnk = \ - (struct pfioc_src_node_kill *) addr; - int killed = 0; + struct pfioc_src_node_kill *psnk = + (struct pfioc_src_node_kill *)addr; + u_int killed = 0; +#ifdef __FreeBSD__ + RB_FOREACH(sn, pf_src_tree, &V_tree_src_tracking) { +#else RB_FOREACH(sn, pf_src_tree, &tree_src_tracking) { - if (PF_MATCHA(psnk->psnk_src.neg, \ - &psnk->psnk_src.addr.v.a.addr, \ - &psnk->psnk_src.addr.v.a.mask, \ - &sn->addr, sn->af) && - PF_MATCHA(psnk->psnk_dst.neg, \ - &psnk->psnk_dst.addr.v.a.addr, \ - &psnk->psnk_dst.addr.v.a.mask, \ - &sn->raddr, sn->af)) { +#endif + if (PF_MATCHA(psnk->psnk_src.neg, + &psnk->psnk_src.addr.v.a.addr, + &psnk->psnk_src.addr.v.a.mask, + &sn->addr, sn->af) && + PF_MATCHA(psnk->psnk_dst.neg, + &psnk->psnk_dst.addr.v.a.addr, + &psnk->psnk_dst.addr.v.a.mask, + &sn->raddr, sn->af)) { /* Handle state to src_node linkage */ if (sn->states != 0) { - RB_FOREACH(s, pf_state_tree_id, + RB_FOREACH(s, pf_state_tree_id, +#ifdef __FreeBSD__ + &V_tree_id) { +#else &tree_id) { +#endif if (s->src_node == sn) s->src_node = NULL; if (s->nat_src_node == sn) @@ -3441,17 +3842,24 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (killed > 0) pf_purge_expired_src_nodes(1); - psnk->psnk_af = killed; + psnk->psnk_killed = killed; break; } case DIOCSETHOSTID: { u_int32_t *hostid = (u_int32_t *)addr; +#ifdef __FreeBSD__ + if (*hostid == 0) + V_pf_status.hostid = arc4random(); + else + V_pf_status.hostid = *hostid; +#else if (*hostid == 0) pf_status.hostid = arc4random(); else pf_status.hostid = *hostid; +#endif break; } @@ -3494,43 +3902,110 @@ fail: PF_UNLOCK(); if (flags & FWRITE) - sx_xunlock(&pf_consistency_lock); + sx_xunlock(&V_pf_consistency_lock); else - sx_sunlock(&pf_consistency_lock); + sx_sunlock(&V_pf_consistency_lock); #else splx(s); - /* XXX: Lock order? */ if (flags & FWRITE) rw_exit_write(&pf_consistency_lock); else rw_exit_read(&pf_consistency_lock); #endif + + CURVNET_RESTORE(); + return (error); } #ifdef __FreeBSD__ +void +pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) +{ + bzero(sp, sizeof(struct pfsync_state)); + + /* copy from state key */ + sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; + sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; + sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; + sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; + sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; + sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; + sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; + sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; + sp->proto = st->key[PF_SK_WIRE]->proto; + sp->af = st->key[PF_SK_WIRE]->af; + + /* copy from state */ + strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); + bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); + sp->creation = htonl(time_second - st->creation); + sp->expire = pf_state_expires(st); + if (sp->expire <= time_second) + sp->expire = htonl(0); + else + sp->expire = htonl(sp->expire - time_second); + + sp->direction = st->direction; + sp->log = st->log; + sp->timeout = st->timeout; + sp->state_flags = st->state_flags; + if (st->src_node) + sp->sync_flags |= PFSYNC_FLAG_SRCNODE; + if (st->nat_src_node) + sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE; + + bcopy(&st->id, &sp->id, sizeof(sp->id)); + sp->creatorid = st->creatorid; + pf_state_peer_hton(&st->src, &sp->src); + pf_state_peer_hton(&st->dst, &sp->dst); + + if (st->rule.ptr == NULL) + sp->rule = htonl(-1); + else + sp->rule = htonl(st->rule.ptr->nr); + if (st->anchor.ptr == NULL) + sp->anchor = htonl(-1); + else + sp->anchor = htonl(st->anchor.ptr->nr); + if (st->nat_rule.ptr == NULL) + sp->nat_rule = htonl(-1); + else + sp->nat_rule = htonl(st->nat_rule.ptr->nr); + + pf_state_counter_hton(st->packets[0], sp->packets[0]); + pf_state_counter_hton(st->packets[1], sp->packets[1]); + pf_state_counter_hton(st->bytes[0], sp->bytes[0]); + pf_state_counter_hton(st->bytes[1], sp->bytes[1]); + +} + /* * XXX - Check for version missmatch!!! */ static void pf_clear_states(void) { - struct pf_state *state; - + struct pf_state *state; + +#ifdef __FreeBSD__ + RB_FOREACH(state, pf_state_tree_id, &V_tree_id) { +#else RB_FOREACH(state, pf_state_tree_id, &tree_id) { +#endif state->timeout = PFTM_PURGE; #if NPFSYNC /* don't send out individual delete messages */ - state->sync_flags = PFSTATE_NOSYNC; + state->sync_state = PFSTATE_NOSYNC; #endif pf_unlink_state(state); } - + #if 0 /* NPFSYNC */ /* * XXX This is called on module unload, we do not want to sync that over? */ */ - pfsync_clear_states(pf_status.hostid, psk->psk_ifname); + pfsync_clear_states(V_pf_status.hostid, psk->psk_ifname); #endif } @@ -3554,11 +4029,19 @@ pf_clear_srcnodes(void) struct pf_src_node *n; struct pf_state *state; +#ifdef __FreeBSD__ + RB_FOREACH(state, pf_state_tree_id, &V_tree_id) { +#else RB_FOREACH(state, pf_state_tree_id, &tree_id) { +#endif state->src_node = NULL; state->nat_src_node = NULL; } +#ifdef __FreeBSD__ + RB_FOREACH(n, pf_src_tree, &V_tree_src_tracking) { +#else RB_FOREACH(n, pf_src_tree, &tree_src_tracking) { +#endif n->expire = 1; n->states = 0; } @@ -3576,8 +4059,8 @@ shutdown_pf(void) int error = 0; u_int32_t t[5]; char nn = '\0'; - - pf_status.running = 0; + + V_pf_status.running = 0; do { if ((error = pf_begin_rules(&t[0], PF_RULESET_SCRUB, &nn)) != 0) { @@ -3587,22 +4070,22 @@ shutdown_pf(void) if ((error = pf_begin_rules(&t[1], PF_RULESET_FILTER, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: FILTER\n")); - break; /* XXX: rollback? */ + break; /* XXX: rollback? */ } if ((error = pf_begin_rules(&t[2], PF_RULESET_NAT, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: NAT\n")); - break; /* XXX: rollback? */ + break; /* XXX: rollback? */ } if ((error = pf_begin_rules(&t[3], PF_RULESET_BINAT, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: BINAT\n")); - break; /* XXX: rollback? */ + break; /* XXX: rollback? */ } if ((error = pf_begin_rules(&t[4], PF_RULESET_RDR, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: RDR\n")); - break; /* XXX: rollback? */ + break; /* XXX: rollback? */ } /* XXX: these should always succeed here */ @@ -3615,13 +4098,13 @@ shutdown_pf(void) if ((error = pf_clear_tables()) != 0) break; -#ifdef ALTQ + #ifdef ALTQ if ((error = pf_begin_altq(&t[0])) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: ALTQ\n")); break; } pf_commit_altq(t[0]); -#endif + #endif pf_clear_states(); @@ -3631,9 +4114,10 @@ shutdown_pf(void) /* fingerprints and interfaces have thier own cleanup code */ } while(0); - return (error); + return (error); } +#ifdef INET static int pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, struct inpcb *inp) @@ -3652,10 +4136,12 @@ pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, if ((*m)->m_pkthdr.len >= (int)sizeof(struct ip)) { /* if m_pkthdr.len is less than ip header, pf will handle. */ h = mtod(*m, struct ip *); - HTONS(h->ip_len); - HTONS(h->ip_off); + HTONS(h->ip_len); + HTONS(h->ip_off); } + CURVNET_SET(ifp->if_vnet); chk = pf_test(PF_IN, ifp, m, NULL, inp); + CURVNET_RESTORE(); if (chk && *m) { m_freem(*m); *m = NULL; @@ -3692,10 +4178,12 @@ pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, if ((*m)->m_pkthdr.len >= (int)sizeof(*h)) { /* if m_pkthdr.len is less than ip header, pf will handle. */ h = mtod(*m, struct ip *); - HTONS(h->ip_len); - HTONS(h->ip_off); + HTONS(h->ip_len); + HTONS(h->ip_off); } + CURVNET_SET(ifp->if_vnet); chk = pf_test(PF_OUT, ifp, m, NULL, inp); + CURVNET_RESTORE(); if (chk && *m) { m_freem(*m); *m = NULL; @@ -3708,6 +4196,7 @@ pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, } return chk; } +#endif #ifdef INET6 static int @@ -3725,8 +4214,10 @@ pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, * order to support scoped addresses. In order to support stateful * filtering we have change this to lo0 as it is the case in IPv4. */ + CURVNET_SET(ifp->if_vnet); chk = pf_test6(PF_IN, (*m)->m_flags & M_LOOP ? V_loif : ifp, m, NULL, inp); + CURVNET_RESTORE(); if (chk && *m) { m_freem(*m); *m = NULL; @@ -3743,12 +4234,17 @@ pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, */ int chk; - /* We need a proper CSUM befor we start (s. OpenBSD ip_output) */ + /* We need a proper CSUM before we start (s. OpenBSD ip_output) */ if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { +#ifdef INET + /* XXX-BZ copy&paste error from r126261? */ in_delayed_cksum(*m); +#endif (*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } + CURVNET_SET(ifp->if_vnet); chk = pf_test6(PF_OUT, ifp, m, NULL, inp); + CURVNET_RESTORE(); if (chk && *m) { m_freem(*m); *m = NULL; @@ -3760,51 +4256,60 @@ pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, static int hook_pf(void) { +#ifdef INET struct pfil_head *pfh_inet; +#endif #ifdef INET6 struct pfil_head *pfh_inet6; #endif - - PF_ASSERT(MA_NOTOWNED); - if (pf_pfil_hooked) + PF_UNLOCK_ASSERT(); + + if (V_pf_pfil_hooked) return (0); - + +#ifdef INET pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); if (pfh_inet == NULL) return (ESRCH); /* XXX */ pfil_add_hook(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet); pfil_add_hook(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet); +#endif #ifdef INET6 pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); if (pfh_inet6 == NULL) { +#ifdef INET pfil_remove_hook(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet); pfil_remove_hook(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet); +#endif return (ESRCH); /* XXX */ } pfil_add_hook(pf_check6_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet6); pfil_add_hook(pf_check6_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet6); #endif - pf_pfil_hooked = 1; + V_pf_pfil_hooked = 1; return (0); } static int dehook_pf(void) { +#ifdef INET struct pfil_head *pfh_inet; +#endif #ifdef INET6 struct pfil_head *pfh_inet6; #endif - PF_ASSERT(MA_NOTOWNED); + PF_UNLOCK_ASSERT(); - if (pf_pfil_hooked == 0) + if (V_pf_pfil_hooked == 0) return (0); +#ifdef INET pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); if (pfh_inet == NULL) return (ESRCH); /* XXX */ @@ -3812,6 +4317,7 @@ dehook_pf(void) pfh_inet); pfil_remove_hook(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet); +#endif #ifdef INET6 pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); if (pfh_inet6 == NULL) @@ -3822,21 +4328,34 @@ dehook_pf(void) pfh_inet6); #endif - pf_pfil_hooked = 0; + V_pf_pfil_hooked = 0; return (0); } static int pf_load(void) { - init_zone_var(); + VNET_ITERATOR_DECL(vnet_iter); + + VNET_LIST_RLOCK(); + VNET_FOREACH(vnet_iter) { + CURVNET_SET(vnet_iter); + V_pf_pfil_hooked = 0; + V_pf_end_threads = 0; + V_debug_pfugidhack = 0; + TAILQ_INIT(&V_pf_tags); + TAILQ_INIT(&V_pf_qids); + CURVNET_RESTORE(); + } + VNET_LIST_RUNLOCK(); + init_pf_mutex(); pf_dev = make_dev(&pf_cdevsw, 0, 0, 0, 0600, PF_NAME); - if (pfattach() < 0) { - destroy_dev(pf_dev); - destroy_pf_mutex(); + init_zone_var(); + sx_init(&V_pf_consistency_lock, "pf_statetbl_lock"); + if (pfattach() < 0) return (ENOMEM); - } + return (0); } @@ -3846,8 +4365,9 @@ pf_unload(void) int error = 0; PF_LOCK(); - pf_status.running = 0; + V_pf_status.running = 0; PF_UNLOCK(); + m_addr_chg_pf_p = NULL; error = dehook_pf(); if (error) { /* @@ -3860,8 +4380,8 @@ pf_unload(void) } PF_LOCK(); shutdown_pf(); - pf_end_threads = 1; - while (pf_end_threads < 2) { + V_pf_end_threads = 1; + while (V_pf_end_threads < 2) { wakeup_one(pf_purge_thread); msleep(pf_purge_thread, &pf_task_mtx, 0, "pftmo", hz); } @@ -3872,6 +4392,7 @@ pf_unload(void) PF_UNLOCK(); destroy_dev(pf_dev); destroy_pf_mutex(); + sx_destroy(&V_pf_consistency_lock); return error; } @@ -3884,7 +4405,12 @@ pf_modevent(module_t mod, int type, void *data) case MOD_LOAD: error = pf_load(); break; - + case MOD_QUIESCE: + /* + * Module should not be unloaded due to race conditions. + */ + error = EPERM; + break; case MOD_UNLOAD: error = pf_unload(); break; @@ -3894,13 +4420,13 @@ pf_modevent(module_t mod, int type, void *data) } return error; } - + static moduledata_t pf_mod = { "pf", pf_modevent, 0 }; -DECLARE_MODULE(pf, pf_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST); +DECLARE_MODULE(pf, pf_mod, SI_SUB_PSEUDO, SI_ORDER_FIRST); MODULE_VERSION(pf, PF_MODVER); -#endif /* __FreeBSD__ */ +#endif /* __FreeBSD__ */ diff --git a/freebsd/sys/contrib/pf/net/pf_lb.c b/freebsd/sys/contrib/pf/net/pf_lb.c new file mode 100644 index 00000000..0c2046c2 --- /dev/null +++ b/freebsd/sys/contrib/pf/net/pf_lb.c @@ -0,0 +1,795 @@ +#include <machine/rtems-bsd-kernel-space.h> + +/* $OpenBSD: pf_lb.c,v 1.2 2009/02/12 02:13:15 sthen Exp $ */ + +/* + * Copyright (c) 2001 Daniel Hartmeier + * Copyright (c) 2002 - 2008 Henning Brauer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Effort sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F30602-01-2-0537. + * + */ + +#ifdef __FreeBSD__ +#include <rtems/bsd/local/opt_inet.h> +#include <rtems/bsd/local/opt_inet6.h> + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); +#endif + +#ifdef __FreeBSD__ +#include <rtems/bsd/local/opt_bpf.h> +#include <rtems/bsd/local/opt_pf.h> + +#ifdef DEV_BPF +#define NBPFILTER DEV_BPF +#else +#define NBPFILTER 0 +#endif + +#ifdef DEV_PFLOG +#define NPFLOG DEV_PFLOG +#else +#define NPFLOG 0 +#endif + +#ifdef DEV_PFSYNC +#define NPFSYNC DEV_PFSYNC +#else +#define NPFSYNC 0 +#endif + +#ifdef DEV_PFLOW +#define NPFLOW DEV_PFLOW +#else +#define NPFLOW 0 +#endif + +#else +#include "bpfilter.h" +#include "pflog.h" +#include "pfsync.h" +#include "pflow.h" +#endif + +#include <rtems/bsd/sys/param.h> +#include <sys/systm.h> +#include <sys/mbuf.h> +#include <sys/filio.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/kernel.h> +#include <rtems/bsd/sys/time.h> +#ifdef __FreeBSD__ +#include <sys/sysctl.h> +#endif +#ifndef __FreeBSD__ +#include <sys/pool.h> +#endif +#include <sys/proc.h> +#ifdef __FreeBSD__ +#include <sys/kthread.h> +#include <rtems/bsd/sys/lock.h> +#include <sys/sx.h> +#else +#include <sys/rwlock.h> +#endif + +#ifdef __FreeBSD__ +#include <sys/md5.h> +#else +#include <crypto/md5.h> +#endif + +#include <net/if.h> +#include <net/if_types.h> +#include <net/bpf.h> +#include <net/route.h> +#include <net/radix_mpath.h> + +#include <netinet/in.h> +#include <netinet/in_var.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#include <netinet/ip_var.h> +#include <netinet/tcp.h> +#include <netinet/tcp_seq.h> +#include <netinet/udp.h> +#include <netinet/ip_icmp.h> +#include <netinet/in_pcb.h> +#include <netinet/tcp_timer.h> +#include <netinet/tcp_var.h> +#include <netinet/udp_var.h> +#include <netinet/icmp_var.h> +#include <netinet/if_ether.h> + +#ifndef __FreeBSD__ +#include <dev/rndvar.h> +#endif +#include <net/pfvar.h> +#include <net/if_pflog.h> +#include <net/if_pflow.h> + +#if NPFSYNC > 0 +#include <net/if_pfsync.h> +#endif /* NPFSYNC > 0 */ + +#ifdef INET6 +#include <netinet/ip6.h> +#include <netinet/in_pcb.h> +#include <netinet/icmp6.h> +#include <netinet6/nd6.h> +#endif /* INET6 */ + + +#ifdef __FreeBSD__ +#define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x +#else +#define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x +#endif + +/* + * Global variables + */ + +void pf_hash(struct pf_addr *, struct pf_addr *, + struct pf_poolhashkey *, sa_family_t); +struct pf_rule *pf_match_translation(struct pf_pdesc *, struct mbuf *, + int, int, struct pfi_kif *, + struct pf_addr *, u_int16_t, struct pf_addr *, + u_int16_t, int); +int pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *, + struct pf_addr *, struct pf_addr *, u_int16_t, + struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t, + struct pf_src_node **); + +#define mix(a,b,c) \ + do { \ + a -= b; a -= c; a ^= (c >> 13); \ + b -= c; b -= a; b ^= (a << 8); \ + c -= a; c -= b; c ^= (b >> 13); \ + a -= b; a -= c; a ^= (c >> 12); \ + b -= c; b -= a; b ^= (a << 16); \ + c -= a; c -= b; c ^= (b >> 5); \ + a -= b; a -= c; a ^= (c >> 3); \ + b -= c; b -= a; b ^= (a << 10); \ + c -= a; c -= b; c ^= (b >> 15); \ + } while (0) + +/* + * hash function based on bridge_hash in if_bridge.c + */ +void +pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, + struct pf_poolhashkey *key, sa_family_t af) +{ + u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0]; + + switch (af) { +#ifdef INET + case AF_INET: + a += inaddr->addr32[0]; + b += key->key32[1]; + mix(a, b, c); + hash->addr32[0] = c + key->key32[2]; + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + a += inaddr->addr32[0]; + b += inaddr->addr32[2]; + mix(a, b, c); + hash->addr32[0] = c; + a += inaddr->addr32[1]; + b += inaddr->addr32[3]; + c += key->key32[1]; + mix(a, b, c); + hash->addr32[1] = c; + a += inaddr->addr32[2]; + b += inaddr->addr32[1]; + c += key->key32[2]; + mix(a, b, c); + hash->addr32[2] = c; + a += inaddr->addr32[3]; + b += inaddr->addr32[0]; + c += key->key32[3]; + mix(a, b, c); + hash->addr32[3] = c; + break; +#endif /* INET6 */ + } +} + +struct pf_rule * +pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, + int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport, + struct pf_addr *daddr, u_int16_t dport, int rs_num) +{ + struct pf_rule *r, *rm = NULL; + struct pf_ruleset *ruleset = NULL; + int tag = -1; + int rtableid = -1; + int asd = 0; + + r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr); + while (r && rm == NULL) { + struct pf_rule_addr *src = NULL, *dst = NULL; + struct pf_addr_wrap *xdst = NULL; + + if (r->action == PF_BINAT && direction == PF_IN) { + src = &r->dst; + if (r->rpool.cur != NULL) + xdst = &r->rpool.cur->addr; + } else { + src = &r->src; + dst = &r->dst; + } + + r->evaluations++; + if (pfi_kif_match(r->kif, kif) == r->ifnot) + r = r->skip[PF_SKIP_IFP].ptr; + else if (r->direction && r->direction != direction) + r = r->skip[PF_SKIP_DIR].ptr; + else if (r->af && r->af != pd->af) + r = r->skip[PF_SKIP_AF].ptr; + else if (r->proto && r->proto != pd->proto) + r = r->skip[PF_SKIP_PROTO].ptr; + else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, + src->neg, kif, M_GETFIB(m))) + r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR : + PF_SKIP_DST_ADDR].ptr; + else if (src->port_op && !pf_match_port(src->port_op, + src->port[0], src->port[1], sport)) + r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT : + PF_SKIP_DST_PORT].ptr; + else if (dst != NULL && + PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL, + M_GETFIB(m))) + r = r->skip[PF_SKIP_DST_ADDR].ptr; + else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, + 0, NULL, M_GETFIB(m))) + r = TAILQ_NEXT(r, entries); + else if (dst != NULL && dst->port_op && + !pf_match_port(dst->port_op, dst->port[0], + dst->port[1], dport)) + r = r->skip[PF_SKIP_DST_PORT].ptr; +#ifdef __FreeBSD__ + else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag)) +#else + else if (r->match_tag && !pf_match_tag(m, r, &tag)) +#endif + r = TAILQ_NEXT(r, entries); + else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto != + IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m, + off, pd->hdr.tcp), r->os_fingerprint))) + r = TAILQ_NEXT(r, entries); + else { + if (r->tag) + tag = r->tag; + if (r->rtableid >= 0) + rtableid = r->rtableid; + if (r->anchor == NULL) { + rm = r; + } else + pf_step_into_anchor(&asd, &ruleset, rs_num, + &r, NULL, NULL); + } + if (r == NULL) + pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r, + NULL, NULL); + } +#ifdef __FreeBSD__ + if (pf_tag_packet(m, tag, rtableid, pd->pf_mtag)) +#else + if (pf_tag_packet(m, tag, rtableid)) +#endif + return (NULL); + if (rm != NULL && (rm->action == PF_NONAT || + rm->action == PF_NORDR || rm->action == PF_NOBINAT)) + return (NULL); + return (rm); +} + +int +pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, + struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport, + struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high, + struct pf_src_node **sn) +{ + struct pf_state_key_cmp key; + struct pf_addr init_addr; + u_int16_t cut; + + bzero(&init_addr, sizeof(init_addr)); + if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) + return (1); + + if (proto == IPPROTO_ICMP) { + low = 1; + high = 65535; + } + + do { + key.af = af; + key.proto = proto; + PF_ACPY(&key.addr[1], daddr, key.af); + PF_ACPY(&key.addr[0], naddr, key.af); + key.port[1] = dport; + + /* + * port search; start random, step; + * similar 2 portloop in in_pcbbind + */ + if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP || + proto == IPPROTO_ICMP)) { + key.port[0] = dport; + if (pf_find_state_all(&key, PF_IN, NULL) == NULL) + return (0); + } else if (low == 0 && high == 0) { + key.port[0] = *nport; + if (pf_find_state_all(&key, PF_IN, NULL) == NULL) + return (0); + } else if (low == high) { + key.port[0] = htons(low); + if (pf_find_state_all(&key, PF_IN, NULL) == NULL) { + *nport = htons(low); + return (0); + } + } else { + u_int16_t tmp; + + if (low > high) { + tmp = low; + low = high; + high = tmp; + } + /* low < high */ +#ifdef __FreeBSD__ + cut = htonl(arc4random()) % (1 + high - low) + low; +#else + cut = arc4random_uniform(1 + high - low) + low; +#endif + /* low <= cut <= high */ + for (tmp = cut; tmp <= high; ++(tmp)) { + key.port[0] = htons(tmp); + if (pf_find_state_all(&key, PF_IN, NULL) == +#ifdef __FreeBSD__ + NULL) { +#else + NULL && !in_baddynamic(tmp, proto)) { +#endif + *nport = htons(tmp); + return (0); + } + } + for (tmp = cut - 1; tmp >= low; --(tmp)) { + key.port[0] = htons(tmp); + if (pf_find_state_all(&key, PF_IN, NULL) == +#ifdef __FreeBSD__ + NULL) { +#else + NULL && !in_baddynamic(tmp, proto)) { +#endif + *nport = htons(tmp); + return (0); + } + } + } + + switch (r->rpool.opts & PF_POOL_TYPEMASK) { + case PF_POOL_RANDOM: + case PF_POOL_ROUNDROBIN: + if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) + return (1); + break; + case PF_POOL_NONE: + case PF_POOL_SRCHASH: + case PF_POOL_BITMASK: + default: + return (1); + } + } while (! PF_AEQ(&init_addr, naddr, af) ); + return (1); /* none available */ +} + +int +pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, + struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn) +{ + unsigned char hash[16]; + struct pf_pool *rpool = &r->rpool; + struct pf_addr *raddr = &rpool->cur->addr.v.a.addr; + struct pf_addr *rmask = &rpool->cur->addr.v.a.mask; + struct pf_pooladdr *acur = rpool->cur; + struct pf_src_node k; + + if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR && + (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { + k.af = af; + PF_ACPY(&k.addr, saddr, af); + if (r->rule_flag & PFRULE_RULESRCTRACK || + r->rpool.opts & PF_POOL_STICKYADDR) + k.rule.ptr = r; + else + k.rule.ptr = NULL; +#ifdef __FreeBSD__ + V_pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; + *sn = RB_FIND(pf_src_tree, &V_tree_src_tracking, &k); +#else + pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; + *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); +#endif + if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) { + PF_ACPY(naddr, &(*sn)->raddr, af); +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else + if (pf_status.debug >= PF_DEBUG_MISC) { +#endif + printf("pf_map_addr: src tracking maps "); + pf_print_host(&k.addr, 0, af); + printf(" to "); + pf_print_host(naddr, 0, af); + printf("\n"); + } + return (0); + } + } + + if (rpool->cur->addr.type == PF_ADDR_NOROUTE) + return (1); + if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { + switch (af) { +#ifdef INET + case AF_INET: + if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 && + (rpool->opts & PF_POOL_TYPEMASK) != + PF_POOL_ROUNDROBIN) + return (1); + raddr = &rpool->cur->addr.p.dyn->pfid_addr4; + rmask = &rpool->cur->addr.p.dyn->pfid_mask4; + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 && + (rpool->opts & PF_POOL_TYPEMASK) != + PF_POOL_ROUNDROBIN) + return (1); + raddr = &rpool->cur->addr.p.dyn->pfid_addr6; + rmask = &rpool->cur->addr.p.dyn->pfid_mask6; + break; +#endif /* INET6 */ + } + } else if (rpool->cur->addr.type == PF_ADDR_TABLE) { + if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) + return (1); /* unsupported */ + } else { + raddr = &rpool->cur->addr.v.a.addr; + rmask = &rpool->cur->addr.v.a.mask; + } + + switch (rpool->opts & PF_POOL_TYPEMASK) { + case PF_POOL_NONE: + PF_ACPY(naddr, raddr, af); + break; + case PF_POOL_BITMASK: + PF_POOLMASK(naddr, raddr, rmask, saddr, af); + break; + case PF_POOL_RANDOM: + if (init_addr != NULL && PF_AZERO(init_addr, af)) { + switch (af) { +#ifdef INET + case AF_INET: + rpool->counter.addr32[0] = htonl(arc4random()); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + if (rmask->addr32[3] != 0xffffffff) + rpool->counter.addr32[3] = + htonl(arc4random()); + else + break; + if (rmask->addr32[2] != 0xffffffff) + rpool->counter.addr32[2] = + htonl(arc4random()); + else + break; + if (rmask->addr32[1] != 0xffffffff) + rpool->counter.addr32[1] = + htonl(arc4random()); + else + break; + if (rmask->addr32[0] != 0xffffffff) + rpool->counter.addr32[0] = + htonl(arc4random()); + break; +#endif /* INET6 */ + } + PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); + PF_ACPY(init_addr, naddr, af); + + } else { + PF_AINC(&rpool->counter, af); + PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); + } + break; + case PF_POOL_SRCHASH: + pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af); + PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af); + break; + case PF_POOL_ROUNDROBIN: + if (rpool->cur->addr.type == PF_ADDR_TABLE) { + if (!pfr_pool_get(rpool->cur->addr.p.tbl, + &rpool->tblidx, &rpool->counter, + &raddr, &rmask, af)) + goto get_addr; + } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { + if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, + &rpool->tblidx, &rpool->counter, + &raddr, &rmask, af)) + goto get_addr; + } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af)) + goto get_addr; + + try_next: + if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL) + rpool->cur = TAILQ_FIRST(&rpool->list); + if (rpool->cur->addr.type == PF_ADDR_TABLE) { + rpool->tblidx = -1; + if (pfr_pool_get(rpool->cur->addr.p.tbl, + &rpool->tblidx, &rpool->counter, + &raddr, &rmask, af)) { + /* table contains no address of type 'af' */ + if (rpool->cur != acur) + goto try_next; + return (1); + } + } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { + rpool->tblidx = -1; + if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, + &rpool->tblidx, &rpool->counter, + &raddr, &rmask, af)) { + /* table contains no address of type 'af' */ + if (rpool->cur != acur) + goto try_next; + return (1); + } + } else { + raddr = &rpool->cur->addr.v.a.addr; + rmask = &rpool->cur->addr.v.a.mask; + PF_ACPY(&rpool->counter, raddr, af); + } + + get_addr: + PF_ACPY(naddr, &rpool->counter, af); + if (init_addr != NULL && PF_AZERO(init_addr, af)) + PF_ACPY(init_addr, naddr, af); + PF_AINC(&rpool->counter, af); + break; + } + if (*sn != NULL) + PF_ACPY(&(*sn)->raddr, naddr, af); + +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC && +#else + if (pf_status.debug >= PF_DEBUG_MISC && +#endif + (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { + printf("pf_map_addr: selected address "); + pf_print_host(naddr, 0, af); + printf("\n"); + } + + return (0); +} + +struct pf_rule * +pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, + struct pfi_kif *kif, struct pf_src_node **sn, + struct pf_state_key **skw, struct pf_state_key **sks, + struct pf_state_key **skp, struct pf_state_key **nkp, + struct pf_addr *saddr, struct pf_addr *daddr, + u_int16_t sport, u_int16_t dport) +{ + struct pf_rule *r = NULL; + + + if (direction == PF_OUT) { + r = pf_match_translation(pd, m, off, direction, kif, saddr, + sport, daddr, dport, PF_RULESET_BINAT); + if (r == NULL) + r = pf_match_translation(pd, m, off, direction, kif, + saddr, sport, daddr, dport, PF_RULESET_NAT); + } else { + r = pf_match_translation(pd, m, off, direction, kif, saddr, + sport, daddr, dport, PF_RULESET_RDR); + if (r == NULL) + r = pf_match_translation(pd, m, off, direction, kif, + saddr, sport, daddr, dport, PF_RULESET_BINAT); + } + + if (r != NULL) { + struct pf_addr *naddr; + u_int16_t *nport; + + if (pf_state_key_setup(pd, r, skw, sks, skp, nkp, + saddr, daddr, sport, dport)) + return r; + + /* XXX We only modify one side for now. */ + naddr = &(*nkp)->addr[1]; + nport = &(*nkp)->port[1]; + + switch (r->action) { + case PF_NONAT: + case PF_NOBINAT: + case PF_NORDR: + return (NULL); + case PF_NAT: + if (pf_get_sport(pd->af, pd->proto, r, saddr, + daddr, dport, naddr, nport, r->rpool.proxy_port[0], + r->rpool.proxy_port[1], sn)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: NAT proxy port allocation " + "(%u-%u) failed\n", + r->rpool.proxy_port[0], + r->rpool.proxy_port[1])); + return (NULL); + } + break; + case PF_BINAT: + switch (direction) { + case PF_OUT: + if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){ + switch (pd->af) { +#ifdef INET + case AF_INET: + if (r->rpool.cur->addr.p.dyn-> + pfid_acnt4 < 1) + return (NULL); + PF_POOLMASK(naddr, + &r->rpool.cur->addr.p.dyn-> + pfid_addr4, + &r->rpool.cur->addr.p.dyn-> + pfid_mask4, + saddr, AF_INET); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + if (r->rpool.cur->addr.p.dyn-> + pfid_acnt6 < 1) + return (NULL); + PF_POOLMASK(naddr, + &r->rpool.cur->addr.p.dyn-> + pfid_addr6, + &r->rpool.cur->addr.p.dyn-> + pfid_mask6, + saddr, AF_INET6); + break; +#endif /* INET6 */ + } + } else + PF_POOLMASK(naddr, + &r->rpool.cur->addr.v.a.addr, + &r->rpool.cur->addr.v.a.mask, + saddr, pd->af); + break; + case PF_IN: + if (r->src.addr.type == PF_ADDR_DYNIFTL) { + switch (pd->af) { +#ifdef INET + case AF_INET: + if (r->src.addr.p.dyn-> + pfid_acnt4 < 1) + return (NULL); + PF_POOLMASK(naddr, + &r->src.addr.p.dyn-> + pfid_addr4, + &r->src.addr.p.dyn-> + pfid_mask4, + daddr, AF_INET); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + if (r->src.addr.p.dyn-> + pfid_acnt6 < 1) + return (NULL); + PF_POOLMASK(naddr, + &r->src.addr.p.dyn-> + pfid_addr6, + &r->src.addr.p.dyn-> + pfid_mask6, + daddr, AF_INET6); + break; +#endif /* INET6 */ + } + } else + PF_POOLMASK(naddr, + &r->src.addr.v.a.addr, + &r->src.addr.v.a.mask, daddr, + pd->af); + break; + } + break; + case PF_RDR: { + if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn)) + return (NULL); + if ((r->rpool.opts & PF_POOL_TYPEMASK) == + PF_POOL_BITMASK) + PF_POOLMASK(naddr, naddr, + &r->rpool.cur->addr.v.a.mask, daddr, + pd->af); + + if (r->rpool.proxy_port[1]) { + u_int32_t tmp_nport; + + tmp_nport = ((ntohs(dport) - + ntohs(r->dst.port[0])) % + (r->rpool.proxy_port[1] - + r->rpool.proxy_port[0] + 1)) + + r->rpool.proxy_port[0]; + + /* wrap around if necessary */ + if (tmp_nport > 65535) + tmp_nport -= 65535; + *nport = htons((u_int16_t)tmp_nport); + } else if (r->rpool.proxy_port[0]) + *nport = htons(r->rpool.proxy_port[0]); + break; + } + default: + return (NULL); + } + /* + * Translation was a NOP. + * Pretend there was no match. + */ + if (!bcmp(*skp, *nkp, sizeof(struct pf_state_key_cmp))) { +#ifdef __FreeBSD__ + pool_put(&V_pf_state_key_pl, *nkp); + pool_put(&V_pf_state_key_pl, *skp); +#else + pool_put(&pf_state_key_pl, *nkp); + pool_put(&pf_state_key_pl, *skp); +#endif + *skw = *sks = *nkp = *skp = NULL; + return (NULL); + } + } + + return (r); +} + diff --git a/freebsd/sys/contrib/pf/net/pf_mtag.h b/freebsd/sys/contrib/pf/net/pf_mtag.h index a0ebf7ef..141a8679 100644 --- a/freebsd/sys/contrib/pf/net/pf_mtag.h +++ b/freebsd/sys/contrib/pf/net/pf_mtag.h @@ -37,15 +37,17 @@ #define PF_TAG_GENERATED 0x01 #define PF_TAG_FRAGCACHE 0x02 #define PF_TAG_TRANSLATE_LOCALHOST 0x04 +#define PF_PACKET_LOOPED 0x08 +#define PF_FASTFWD_OURS_PRESENT 0x10 struct pf_mtag { void *hdr; /* saved hdr pos in mbuf, for ECN */ - u_int rtableid; /* alternate routing table id */ + void *statekey; /* pf stackside statekey */ u_int32_t qid; /* queue id */ + u_int rtableid; /* alternate routing table id */ u_int16_t tag; /* tag id */ u_int8_t flags; u_int8_t routed; - sa_family_t af; /* for ECN */ }; static __inline struct pf_mtag *pf_find_mtag(struct mbuf *); diff --git a/freebsd/sys/contrib/pf/net/pf_norm.c b/freebsd/sys/contrib/pf/net/pf_norm.c index a59c3fd8..3780fa82 100644 --- a/freebsd/sys/contrib/pf/net/pf_norm.c +++ b/freebsd/sys/contrib/pf/net/pf_norm.c @@ -1,6 +1,6 @@ #include <machine/rtems-bsd-kernel-space.h> -/* $OpenBSD: pf_norm.c,v 1.107 2006/04/16 00:59:52 pascoe Exp $ */ +/* $OpenBSD: pf_norm.c,v 1.114 2009/01/29 14:11:45 henning Exp $ */ /* * Copyright 2001 Niels Provos <provos@citi.umich.edu> @@ -36,9 +36,9 @@ __FBSDID("$FreeBSD$"); #ifdef DEV_PFLOG -#define NPFLOG DEV_PFLOG +#define NPFLOG DEV_PFLOG #else -#define NPFLOG 0 +#define NPFLOG 0 #endif #else #include "pflog.h" @@ -80,8 +80,6 @@ __FBSDID("$FreeBSD$"); #include <net/pfvar.h> #ifndef __FreeBSD__ -#include <inttypes.h> - struct pf_frent { LIST_ENTRY(pf_frent) fr_next; struct ip *fr_ip; @@ -120,17 +118,35 @@ struct pf_fragment { }; #endif +#ifdef __FreeBSD__ +TAILQ_HEAD(pf_fragqueue, pf_fragment); +TAILQ_HEAD(pf_cachequeue, pf_fragment); +VNET_DEFINE(struct pf_fragqueue, pf_fragqueue); +#define V_pf_fragqueue VNET(pf_fragqueue) +VNET_DEFINE(struct pf_cachequeue, pf_cachequeue); +#define V_pf_cachequeue VNET(pf_cachequeue) +#else TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue; TAILQ_HEAD(pf_cachequeue, pf_fragment) pf_cachequeue; +#endif #ifndef __FreeBSD__ static __inline int pf_frag_compare(struct pf_fragment *, struct pf_fragment *); #else -static int pf_frag_compare(struct pf_fragment *, +static int pf_frag_compare(struct pf_fragment *, struct pf_fragment *); #endif + +#ifdef __FreeBSD__ +RB_HEAD(pf_frag_tree, pf_fragment); +VNET_DEFINE(struct pf_frag_tree, pf_frag_tree); +#define V_pf_frag_tree VNET(pf_frag_tree) +VNET_DEFINE(struct pf_frag_tree, pf_cache_tree); +#define V_pf_cache_tree VNET(pf_cache_tree) +#else RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree; +#endif RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); @@ -145,24 +161,45 @@ struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **, struct mbuf *pf_fragcache(struct mbuf **, struct ip*, struct pf_fragment **, int, int, int *); int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *, - struct tcphdr *, int); - + struct tcphdr *, int, sa_family_t); +void pf_scrub_ip(struct mbuf **, u_int32_t, u_int8_t, + u_int8_t); +#ifdef INET6 +void pf_scrub_ip6(struct mbuf **, u_int8_t); +#endif +#ifdef __FreeBSD__ +#define DPFPRINTF(x) do { \ + if (V_pf_status.debug >= PF_DEBUG_MISC) { \ + printf("%s: ", __func__); \ + printf x ; \ + } \ +} while(0) +#else #define DPFPRINTF(x) do { \ if (pf_status.debug >= PF_DEBUG_MISC) { \ printf("%s: ", __func__); \ printf x ; \ } \ } while(0) +#endif /* Globals */ #ifdef __FreeBSD__ -uma_zone_t pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl; -uma_zone_t pf_state_scrub_pl; +VNET_DEFINE(uma_zone_t, pf_frent_pl); +VNET_DEFINE(uma_zone_t, pf_frag_pl); +VNET_DEFINE(uma_zone_t, pf_cache_pl); +VNET_DEFINE(uma_zone_t, pf_cent_pl); +VNET_DEFINE(uma_zone_t, pf_state_scrub_pl); + +VNET_DEFINE(int, pf_nfrents); +#define V_pf_nfrents VNET(pf_nfrents) +VNET_DEFINE(int, pf_ncache); +#define V_pf_ncache VNET(pf_ncache) #else struct pool pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl; struct pool pf_state_scrub_pl; -#endif int pf_nfrents, pf_ncache; +#endif void pf_normalize_init(void) @@ -173,9 +210,9 @@ pf_normalize_init(void) * No high water mark support(It's hint not hard limit). * uma_zone_set_max(pf_frag_pl, PFFRAG_FRAG_HIWAT); */ - uma_zone_set_max(pf_frent_pl, PFFRAG_FRENT_HIWAT); - uma_zone_set_max(pf_cache_pl, PFFRAG_FRCACHE_HIWAT); - uma_zone_set_max(pf_cent_pl, PFFRAG_FRCENT_HIWAT); + uma_zone_set_max(V_pf_frent_pl, PFFRAG_FRENT_HIWAT); + uma_zone_set_max(V_pf_cache_pl, PFFRAG_FRCACHE_HIWAT); + uma_zone_set_max(V_pf_cent_pl, PFFRAG_FRCENT_HIWAT); #else pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent", NULL); @@ -194,8 +231,13 @@ pf_normalize_init(void) pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0); #endif +#ifdef __FreeBSD__ + TAILQ_INIT(&V_pf_fragqueue); + TAILQ_INIT(&V_pf_cachequeue); +#else TAILQ_INIT(&pf_fragqueue); TAILQ_INIT(&pf_cachequeue); +#endif } #ifdef __FreeBSD__ @@ -226,14 +268,20 @@ void pf_purge_expired_fragments(void) { struct pf_fragment *frag; +#ifdef __FreeBSD__ + u_int32_t expire = time_second - + V_pf_default_rule.timeout[PFTM_FRAG]; +#else u_int32_t expire = time_second - pf_default_rule.timeout[PFTM_FRAG]; +#endif - while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) { #ifdef __FreeBSD__ + while ((frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue)) != NULL) { KASSERT((BUFFER_FRAGMENTS(frag)), - ("BUFFER_FRAGMENTS(frag) == 0: %s", __FUNCTION__)); + ("BUFFER_FRAGMENTS(frag) == 0: %s", __FUNCTION__)); #else + while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) { KASSERT(BUFFER_FRAGMENTS(frag)); #endif if (frag->fr_timeout > expire) @@ -243,11 +291,12 @@ pf_purge_expired_fragments(void) pf_free_fragment(frag); } - while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) { #ifdef __FreeBSD__ + while ((frag = TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue)) != NULL) { KASSERT((!BUFFER_FRAGMENTS(frag)), - ("BUFFER_FRAGMENTS(frag) != 0: %s", __FUNCTION__)); + ("BUFFER_FRAGMENTS(frag) != 0: %s", __FUNCTION__)); #else + while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) { KASSERT(!BUFFER_FRAGMENTS(frag)); #endif if (frag->fr_timeout > expire) @@ -256,8 +305,8 @@ pf_purge_expired_fragments(void) DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag)); pf_free_fragment(frag); #ifdef __FreeBSD__ - KASSERT((TAILQ_EMPTY(&pf_cachequeue) || - TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag), + KASSERT((TAILQ_EMPTY(&V_pf_cachequeue) || + TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue) != frag), ("!(TAILQ_EMPTY() || TAILQ_LAST() == farg): %s", __FUNCTION__)); #else @@ -277,22 +326,44 @@ pf_flush_fragments(void) struct pf_fragment *frag; int goal; +#ifdef __FreeBSD__ + goal = V_pf_nfrents * 9 / 10; + DPFPRINTF(("trying to free > %d frents\n", + V_pf_nfrents - goal)); + while (goal < V_pf_nfrents) { +#else goal = pf_nfrents * 9 / 10; DPFPRINTF(("trying to free > %d frents\n", pf_nfrents - goal)); while (goal < pf_nfrents) { +#endif +#ifdef __FreeBSD__ + frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue); +#else frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue); +#endif if (frag == NULL) break; pf_free_fragment(frag); } +#ifdef __FreeBSD__ + goal = V_pf_ncache * 9 / 10; + DPFPRINTF(("trying to free > %d cache entries\n", + V_pf_ncache - goal)); + while (goal < V_pf_ncache) { +#else goal = pf_ncache * 9 / 10; DPFPRINTF(("trying to free > %d cache entries\n", pf_ncache - goal)); while (goal < pf_ncache) { +#endif +#ifdef __FreeBSD__ + frag = TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue); +#else frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue); +#endif if (frag == NULL) break; pf_free_fragment(frag); @@ -314,8 +385,13 @@ pf_free_fragment(struct pf_fragment *frag) LIST_REMOVE(frent, fr_next); m_freem(frent->fr_m); +#ifdef __FreeBSD__ + pool_put(&V_pf_frent_pl, frent); + V_pf_nfrents--; +#else pool_put(&pf_frent_pl, frent); pf_nfrents--; +#endif } } else { for (frcache = LIST_FIRST(&frag->fr_cache); frcache; @@ -327,15 +403,18 @@ pf_free_fragment(struct pf_fragment *frag) LIST_FIRST(&frag->fr_cache)->fr_off > frcache->fr_end), ("! (LIST_EMPTY() || LIST_FIRST()->fr_off >" - " frcache->fr_end): %s", __FUNCTION__)); + " frcache->fr_end): %s", __FUNCTION__)); + + pool_put(&V_pf_cent_pl, frcache); + V_pf_ncache--; #else KASSERT(LIST_EMPTY(&frag->fr_cache) || LIST_FIRST(&frag->fr_cache)->fr_off > frcache->fr_end); -#endif pool_put(&pf_cent_pl, frcache); pf_ncache--; +#endif } } @@ -364,11 +443,21 @@ pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree) /* XXX Are we sure we want to update the timeout? */ frag->fr_timeout = time_second; if (BUFFER_FRAGMENTS(frag)) { +#ifdef __FreeBSD__ + TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next); + TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next); +#else TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); +#endif } else { +#ifdef __FreeBSD__ + TAILQ_REMOVE(&V_pf_cachequeue, frag, frag_next); + TAILQ_INSERT_HEAD(&V_pf_cachequeue, frag, frag_next); +#else TAILQ_REMOVE(&pf_cachequeue, frag, frag_next); TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next); +#endif } } @@ -381,13 +470,25 @@ void pf_remove_fragment(struct pf_fragment *frag) { if (BUFFER_FRAGMENTS(frag)) { +#ifdef __FreeBSD__ + RB_REMOVE(pf_frag_tree, &V_pf_frag_tree, frag); + TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next); + pool_put(&V_pf_frag_pl, frag); +#else RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag); TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); pool_put(&pf_frag_pl, frag); +#endif } else { +#ifdef __FreeBSD__ + RB_REMOVE(pf_frag_tree, &V_pf_cache_tree, frag); + TAILQ_REMOVE(&V_pf_cachequeue, frag, frag_next); + pool_put(&V_pf_cache_pl, frag); +#else RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag); TAILQ_REMOVE(&pf_cachequeue, frag, frag_next); pool_put(&pf_cache_pl, frag); +#endif } } @@ -418,10 +519,18 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, /* Create a new reassembly queue for this packet */ if (*frag == NULL) { +#ifdef __FreeBSD__ + *frag = pool_get(&V_pf_frag_pl, PR_NOWAIT); +#else *frag = pool_get(&pf_frag_pl, PR_NOWAIT); +#endif if (*frag == NULL) { pf_flush_fragments(); +#ifdef __FreeBSD__ + *frag = pool_get(&V_pf_frag_pl, PR_NOWAIT); +#else *frag = pool_get(&pf_frag_pl, PR_NOWAIT); +#endif if (*frag == NULL) goto drop_fragment; } @@ -435,8 +544,13 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, (*frag)->fr_timeout = time_second; LIST_INIT(&(*frag)->fr_queue); +#ifdef __FreeBSD__ + RB_INSERT(pf_frag_tree, &V_pf_frag_tree, *frag); + TAILQ_INSERT_HEAD(&V_pf_fragqueue, *frag, frag_next); +#else RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag); TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next); +#endif /* We do not have a previous fragment */ frep = NULL; @@ -501,8 +615,13 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, next = LIST_NEXT(frea, fr_next); m_freem(frea->fr_m); LIST_REMOVE(frea, fr_next); +#ifdef __FreeBSD__ + pool_put(&V_pf_frent_pl, frea); + V_pf_nfrents--; +#else pool_put(&pf_frent_pl, frea); pf_nfrents--; +#endif } insert: @@ -562,26 +681,36 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, m2 = m->m_next; m->m_next = NULL; m_cat(m, m2); +#ifdef __FreeBSD__ + pool_put(&V_pf_frent_pl, frent); + V_pf_nfrents--; +#else pool_put(&pf_frent_pl, frent); pf_nfrents--; +#endif for (frent = next; frent != NULL; frent = next) { next = LIST_NEXT(frent, fr_next); m2 = frent->fr_m; +#ifdef __FreeBSD__ + pool_put(&V_pf_frent_pl, frent); + V_pf_nfrents--; +#else pool_put(&pf_frent_pl, frent); pf_nfrents--; +#endif #ifdef __FreeBSD__ m->m_pkthdr.csum_flags &= m2->m_pkthdr.csum_flags; m->m_pkthdr.csum_data += m2->m_pkthdr.csum_data; #endif m_cat(m, m2); } + #ifdef __FreeBSD__ while (m->m_pkthdr.csum_data & 0xffff0000) m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) + (m->m_pkthdr.csum_data >> 16); #endif - ip->ip_src = (*frag)->fr_src; ip->ip_dst = (*frag)->fr_dst; @@ -608,8 +737,13 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, drop_fragment: /* Oops - fail safe - drop packet */ +#ifdef __FreeBSD__ + pool_put(&V_pf_frent_pl, frent); + V_pf_nfrents--; +#else pool_put(&pf_frent_pl, frent); pf_nfrents--; +#endif m_freem(m); return (NULL); } @@ -634,22 +768,40 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, /* Create a new range queue for this packet */ if (*frag == NULL) { +#ifdef __FreeBSD__ + *frag = pool_get(&V_pf_cache_pl, PR_NOWAIT); +#else *frag = pool_get(&pf_cache_pl, PR_NOWAIT); +#endif if (*frag == NULL) { pf_flush_fragments(); +#ifdef __FreeBSD__ + *frag = pool_get(&V_pf_cache_pl, PR_NOWAIT); +#else *frag = pool_get(&pf_cache_pl, PR_NOWAIT); +#endif if (*frag == NULL) goto no_mem; } /* Get an entry for the queue */ +#ifdef __FreeBSD__ + cur = pool_get(&V_pf_cent_pl, PR_NOWAIT); + if (cur == NULL) { + pool_put(&V_pf_cache_pl, *frag); +#else cur = pool_get(&pf_cent_pl, PR_NOWAIT); if (cur == NULL) { pool_put(&pf_cache_pl, *frag); +#endif *frag = NULL; goto no_mem; } +#ifdef __FreeBSD__ + V_pf_ncache++; +#else pf_ncache++; +#endif (*frag)->fr_flags = PFFRAG_NOBUFFER; (*frag)->fr_max = 0; @@ -664,8 +816,13 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, LIST_INIT(&(*frag)->fr_cache); LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next); +#ifdef __FreeBSD__ + RB_INSERT(pf_frag_tree, &V_pf_cache_tree, *frag); + TAILQ_INSERT_HEAD(&V_pf_cachequeue, *frag, frag_next); +#else RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag); TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next); +#endif DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, max)); @@ -784,10 +941,18 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, h->ip_id, -precut, frp->fr_off, frp->fr_end, off, max)); +#ifdef __FreeBSD__ + cur = pool_get(&V_pf_cent_pl, PR_NOWAIT); +#else cur = pool_get(&pf_cent_pl, PR_NOWAIT); +#endif if (cur == NULL) goto no_mem; +#ifdef __FreeBSD__ + V_pf_ncache++; +#else pf_ncache++; +#endif cur->fr_off = off; cur->fr_end = max; @@ -844,10 +1009,18 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, h->ip_id, -aftercut, off, max, fra->fr_off, fra->fr_end)); +#ifdef __FreeBSD__ + cur = pool_get(&V_pf_cent_pl, PR_NOWAIT); +#else cur = pool_get(&pf_cent_pl, PR_NOWAIT); +#endif if (cur == NULL) goto no_mem; +#ifdef __FreeBSD__ + V_pf_ncache++; +#else pf_ncache++; +#endif cur->fr_off = off; cur->fr_end = max; @@ -865,8 +1038,13 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, max, fra->fr_off, fra->fr_end)); fra->fr_off = cur->fr_off; LIST_REMOVE(cur, fr_next); +#ifdef __FreeBSD__ + pool_put(&V_pf_cent_pl, cur); + V_pf_ncache--; +#else pool_put(&pf_cent_pl, cur); pf_ncache--; +#endif cur = NULL; } else if (frp && fra->fr_off <= frp->fr_end) { @@ -883,8 +1061,13 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, max, fra->fr_off, fra->fr_end)); fra->fr_off = frp->fr_off; LIST_REMOVE(frp, fr_next); +#ifdef __FreeBSD__ + pool_put(&V_pf_cent_pl, frp); + V_pf_ncache--; +#else pool_put(&pf_cent_pl, frp); pf_ncache--; +#endif frp = NULL; } @@ -951,6 +1134,7 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, return (NULL); } +#ifdef INET int pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, struct pf_pdesc *pd) @@ -966,6 +1150,7 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, u_int16_t max; int ip_len; int ip_off; + int tag = -1; r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); while (r != NULL) { @@ -980,12 +1165,18 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, r = r->skip[PF_SKIP_PROTO].ptr; else if (PF_MISMATCHAW(&r->src.addr, (struct pf_addr *)&h->ip_src.s_addr, AF_INET, - r->src.neg, kif)) + r->src.neg, kif, M_GETFIB(m))) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (PF_MISMATCHAW(&r->dst.addr, (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, - r->dst.neg, NULL)) + r->dst.neg, NULL, M_GETFIB(m))) r = r->skip[PF_SKIP_DST_ADDR].ptr; +#ifdef __FreeBSD__ + else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag)) +#else + else if (r->match_tag && !pf_match_tag(m, r, &tag)) +#endif + r = TAILQ_NEXT(r, entries); else break; } @@ -1044,7 +1235,11 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) { /* Fully buffer all of the fragments */ +#ifdef __FreeBSD__ + frag = pf_find_fragment(h, &V_pf_frag_tree); +#else frag = pf_find_fragment(h, &pf_frag_tree); +#endif /* Check if we saw the last fragment already */ if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && @@ -1052,12 +1247,20 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, goto bad; /* Get an entry for the fragment queue */ +#ifdef __FreeBSD__ + frent = pool_get(&V_pf_frent_pl, PR_NOWAIT); +#else frent = pool_get(&pf_frent_pl, PR_NOWAIT); +#endif if (frent == NULL) { REASON_SET(reason, PFRES_MEMORY); return (PF_DROP); } +#ifdef __FreeBSD__ + V_pf_nfrents++; +#else pf_nfrents++; +#endif frent->fr_ip = h; frent->fr_m = m; @@ -1088,7 +1291,11 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, /* non-buffering fragment cache (drops or masks overlaps) */ int nomem = 0; +#ifdef __FreeBSD__ if (dir == PF_OUT && pd->pf_mtag->flags & PF_TAG_FRAGCACHE) { +#else + if (dir == PF_OUT && m->m_pkthdr.pf.flags & PF_TAG_FRAGCACHE) { +#endif /* * Already passed the fragment cache in the * input direction. If we continued, it would @@ -1097,7 +1304,11 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, goto fragment_pass; } +#ifdef __FreeBSD__ + frag = pf_find_fragment(h, &V_pf_cache_tree); +#else frag = pf_find_fragment(h, &pf_cache_tree); +#endif /* Check if we saw the last fragment already */ if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && @@ -1128,7 +1339,11 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, } #endif if (dir == PF_IN) +#ifdef __FreeBSD__ pd->pf_mtag->flags |= PF_TAG_FRAGCACHE; +#else + m->m_pkthdr.pf.flags |= PF_TAG_FRAGCACHE; +#endif if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) goto drop; @@ -1144,33 +1359,11 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0); } - /* Enforce a minimum ttl, may cause endless packet loops */ - if (r->min_ttl && h->ip_ttl < r->min_ttl) { - u_int16_t ip_ttl = h->ip_ttl; - - h->ip_ttl = r->min_ttl; - h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); - } - - if (r->rule_flag & PFRULE_RANDOMID) { - u_int16_t ip_id = h->ip_id; - - h->ip_id = ip_randomid(); - h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0); - } - if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) - pd->flags |= PFDESC_IP_REAS; - - return (PF_PASS); + /* not missing a return here */ fragment_pass: - /* Enforce a minimum ttl, may cause endless packet loops */ - if (r->min_ttl && h->ip_ttl < r->min_ttl) { - u_int16_t ip_ttl = h->ip_ttl; + pf_scrub_ip(&m, r->rule_flag, r->min_ttl, r->set_tos); - h->ip_ttl = r->min_ttl; - h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); - } if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) pd->flags |= PFDESC_IP_REAS; return (PF_PASS); @@ -1200,6 +1393,7 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, return (PF_DROP); } +#endif #ifdef INET6 int @@ -1236,11 +1430,11 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, #endif else if (PF_MISMATCHAW(&r->src.addr, (struct pf_addr *)&h->ip6_src, AF_INET6, - r->src.neg, kif)) + r->src.neg, kif, M_GETFIB(m))) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (PF_MISMATCHAW(&r->dst.addr, (struct pf_addr *)&h->ip6_dst, AF_INET6, - r->dst.neg, NULL)) + r->dst.neg, NULL, M_GETFIB(m))) r = r->skip[PF_SKIP_DST_ADDR].ptr; else break; @@ -1339,9 +1533,7 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len) goto shortpkt; - /* Enforce a minimum ttl, may cause endless packet loops */ - if (r->min_ttl && h->ip6_hlim < r->min_ttl) - h->ip6_hlim = r->min_ttl; + pf_scrub_ip6(&m, r->min_ttl); return (PF_PASS); @@ -1403,13 +1595,13 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, - r->src.neg, kif)) + r->src.neg, kif, M_GETFIB(m))) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (r->src.port_op && !pf_match_port(r->src.port_op, r->src.port[0], r->src.port[1], th->th_sport)) r = r->skip[PF_SKIP_SRC_PORT].ptr; else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, - r->dst.neg, NULL)) + r->dst.neg, NULL, M_GETFIB(m))) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->dst.port_op && !pf_match_port(r->dst.port_op, r->dst.port[0], r->dst.port[1], th->th_dport)) @@ -1479,12 +1671,16 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, } /* Process options */ - if (r->max_mss && pf_normalize_tcpopt(r, m, th, off)) + if (r->max_mss && pf_normalize_tcpopt(r, m, th, off, pd->af)) rewrite = 1; /* copy back packet headers if we sanitized */ if (rewrite) +#ifdef __FreeBSD__ m_copyback(m, off, sizeof(*th), (caddr_t)th); +#else + m_copyback(m, off, sizeof(*th), th); +#endif return (PF_PASS); @@ -1506,11 +1702,13 @@ pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, #ifdef __FreeBSD__ KASSERT((src->scrub == NULL), ("pf_normalize_tcp_init: src->scrub != NULL")); + + src->scrub = pool_get(&V_pf_state_scrub_pl, PR_NOWAIT); #else KASSERT(src->scrub == NULL); -#endif src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); +#endif if (src->scrub == NULL) return (1); bzero(src->scrub, sizeof(*src->scrub)); @@ -1586,10 +1784,17 @@ pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, void pf_normalize_tcp_cleanup(struct pf_state *state) { +#ifdef __FreeBSD__ + if (state->src.scrub) + pool_put(&V_pf_state_scrub_pl, state->src.scrub); + if (state->dst.scrub) + pool_put(&V_pf_state_scrub_pl, state->dst.scrub); +#else if (state->src.scrub) pool_put(&pf_state_scrub_pl, state->src.scrub); if (state->dst.scrub) pool_put(&pf_state_scrub_pl, state->dst.scrub); +#endif /* Someday... flush the TCP segment reassembly descriptors. */ } @@ -1667,7 +1872,11 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, if (got_ts) { /* Huh? Multiple timestamps!? */ +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else if (pf_status.debug >= PF_DEBUG_MISC) { +#endif DPFPRINTF(("multiple TS??")); pf_print_state(state); printf("\n"); @@ -1736,7 +1945,11 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) && (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE || time_second - state->creation > TS_MAX_CONN)) { +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else if (pf_status.debug >= PF_DEBUG_MISC) { +#endif DPFPRINTF(("src idled out of PAWS\n")); pf_print_state(state); printf("\n"); @@ -1746,7 +1959,11 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, } if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) && uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) { +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else if (pf_status.debug >= PF_DEBUG_MISC) { +#endif DPFPRINTF(("dst idled out of PAWS\n")); pf_print_state(state); printf("\n"); @@ -1807,7 +2024,7 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, * network conditions that re-order packets and * cause our view of them to decrease. For now the * only lowerbound we can safely determine is that - * the TS echo will never be less than the orginal + * the TS echo will never be less than the original * TS. XXX There is probably a better lowerbound. * Remove TS_MAX_CONN with better lowerbound check. * tescr >= other original TS @@ -1830,7 +2047,11 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, * this packet. */ if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0) +#ifdef __FreeBSD__ + ts_fudge = V_pf_default_rule.timeout[PFTM_TS_DIFF]; +#else ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF]; +#endif /* Calculate max ticks since the last timestamp */ @@ -1838,7 +2059,7 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, #define TS_MICROSECS 1000000 /* microseconds per second */ #ifdef __FreeBSD__ #ifndef timersub -#define timersub(tvp, uvp, vvp) \ +#define timersub(tvp, uvp, vvp) \ do { \ (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec; \ (vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec; \ @@ -1895,7 +2116,11 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, "\n", dst->scrub->pfss_tsval, dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0)); #endif +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else if (pf_status.debug >= PF_DEBUG_MISC) { +#endif pf_print_state(state); pf_print_flags(th->th_flags); printf("\n"); @@ -1943,7 +2168,11 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, * Hey! Someone tried to sneak a packet in. Or the * stack changed its RFC1323 behavior?!?! */ +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else if (pf_status.debug >= PF_DEBUG_MISC) { +#endif DPFPRINTF(("Did not receive expected RFC1323 " "timestamp\n")); pf_print_state(state); @@ -1970,7 +2199,11 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, src->scrub->pfss_flags |= PFSS_DATA_TS; else { src->scrub->pfss_flags |= PFSS_DATA_NOTS; +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC && dst->scrub && +#else if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub && +#endif (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { /* Don't warn if other host rejected RFC1323 */ DPFPRINTF(("Broken RFC1323 stack did not " @@ -2018,17 +2251,25 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, int pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th, - int off) + int off, sa_family_t af) { u_int16_t *mss; int thoff; int opt, cnt, optlen = 0; int rewrite = 0; - u_char *optp; +#ifdef __FreeBSD__ + u_char opts[TCP_MAXOLEN]; +#else + u_char opts[MAX_TCPOPTLEN]; +#endif + u_char *optp = opts; thoff = th->th_off << 2; cnt = thoff - sizeof(struct tcphdr); - optp = mtod(m, caddr_t) + off + sizeof(struct tcphdr); + + if (cnt > 0 && !pf_pull_hdr(m, off + sizeof(*th), opts, cnt, + NULL, NULL, af)) + return (rewrite); for (; cnt > 0; cnt -= optlen, optp += optlen) { opt = optp[0]; @@ -2058,5 +2299,63 @@ pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th, } } + if (rewrite) + m_copyback(m, off + sizeof(*th), thoff - sizeof(*th), opts); + return (rewrite); } + +void +pf_scrub_ip(struct mbuf **m0, u_int32_t flags, u_int8_t min_ttl, u_int8_t tos) +{ + struct mbuf *m = *m0; + struct ip *h = mtod(m, struct ip *); + + /* Clear IP_DF if no-df was requested */ + if (flags & PFRULE_NODF && h->ip_off & htons(IP_DF)) { + u_int16_t ip_off = h->ip_off; + + h->ip_off &= htons(~IP_DF); + h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0); + } + + /* Enforce a minimum ttl, may cause endless packet loops */ + if (min_ttl && h->ip_ttl < min_ttl) { + u_int16_t ip_ttl = h->ip_ttl; + + h->ip_ttl = min_ttl; + h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); + } + + /* Enforce tos */ + if (flags & PFRULE_SET_TOS) { + u_int16_t ov, nv; + + ov = *(u_int16_t *)h; + h->ip_tos = tos; + nv = *(u_int16_t *)h; + + h->ip_sum = pf_cksum_fixup(h->ip_sum, ov, nv, 0); + } + + /* random-id, but not for fragments */ + if (flags & PFRULE_RANDOMID && !(h->ip_off & ~htons(IP_DF))) { + u_int16_t ip_id = h->ip_id; + + h->ip_id = ip_randomid(); + h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0); + } +} + +#ifdef INET6 +void +pf_scrub_ip6(struct mbuf **m0, u_int8_t min_ttl) +{ + struct mbuf *m = *m0; + struct ip6_hdr *h = mtod(m, struct ip6_hdr *); + + /* Enforce a minimum ttl, may cause endless packet loops */ + if (min_ttl && h->ip6_hlim < min_ttl) + h->ip6_hlim = min_ttl; +} +#endif diff --git a/freebsd/sys/contrib/pf/net/pf_osfp.c b/freebsd/sys/contrib/pf/net/pf_osfp.c index 7ff79c00..9ff90ad0 100644 --- a/freebsd/sys/contrib/pf/net/pf_osfp.c +++ b/freebsd/sys/contrib/pf/net/pf_osfp.c @@ -1,6 +1,6 @@ #include <machine/rtems-bsd-kernel-space.h> -/* $OpenBSD: pf_osfp.c,v 1.12 2006/12/13 18:14:10 itojun Exp $ */ +/* $OpenBSD: pf_osfp.c,v 1.14 2008/06/12 18:17:01 henning Exp $ */ /* * Copyright (c) 2003 Mike Frantzen <frantzen@w4g.org> @@ -27,7 +27,10 @@ __FBSDID("$FreeBSD$"); #include <rtems/bsd/sys/param.h> #include <sys/socket.h> #ifdef _KERNEL -# include <sys/systm.h> +#include <sys/systm.h> +#ifndef __FreeBSD__ +#include <sys/pool.h> +#endif #endif /* _KERNEL */ #include <sys/mbuf.h> @@ -44,10 +47,17 @@ __FBSDID("$FreeBSD$"); #include <netinet6/in6_var.h> #endif + #ifdef _KERNEL -# define DPFPRINTF(format, x...) \ +#ifdef __FreeBSD__ +#define DPFPRINTF(format, x...) \ + if (V_pf_status.debug >= PF_DEBUG_NOISY) \ + printf(format , ##x) +#else +#define DPFPRINTF(format, x...) \ if (pf_status.debug >= PF_DEBUG_NOISY) \ printf(format , ##x) +#endif #ifdef __FreeBSD__ typedef uma_zone_t pool_t; #else @@ -57,33 +67,43 @@ typedef struct pool pool_t; #else /* Userland equivalents so we can lend code to tcpdump et al. */ -# include <arpa/inet.h> -# include <errno.h> -# include <stdio.h> -# include <stdlib.h> -# include <string.h> -# include <netdb.h> -# define pool_t int -# define pool_get(pool, flags) malloc(*(pool)) -# define pool_put(pool, item) free(item) -# define pool_init(pool, size, a, ao, f, m, p) (*(pool)) = (size) - -# ifdef __FreeBSD__ -# define NTOHS(x) (x) = ntohs((u_int16_t)(x)) -# endif - -# ifdef PFDEBUG -# include <sys/stdarg.h> -# define DPFPRINTF(format, x...) fprintf(stderr, format , ##x) -# else -# define DPFPRINTF(format, x...) ((void)0) -# endif /* PFDEBUG */ +#include <arpa/inet.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <netdb.h> +#define pool_t int +#define pool_get(pool, flags) malloc(*(pool)) +#define pool_put(pool, item) free(item) +#define pool_init(pool, size, a, ao, f, m, p) (*(pool)) = (size) + +#ifdef __FreeBSD__ +#define NTOHS(x) (x) = ntohs((u_int16_t)(x)) +#endif + +#ifdef PFDEBUG +#include <sys/stdarg.h> +#define DPFPRINTF(format, x...) fprintf(stderr, format , ##x) +#else +#define DPFPRINTF(format, x...) ((void)0) +#endif /* PFDEBUG */ #endif /* _KERNEL */ +#ifdef __FreeBSD__ +SLIST_HEAD(pf_osfp_list, pf_os_fingerprint); +VNET_DEFINE(struct pf_osfp_list, pf_osfp_list); +#define V_pf_osfp_list VNET(pf_osfp_list) +VNET_DEFINE(pool_t, pf_osfp_entry_pl); +#define pf_osfp_entry_pl VNET(pf_osfp_entry_pl) +VNET_DEFINE(pool_t, pf_osfp_pl); +#define pf_osfp_pl VNET(pf_osfp_pl) +#else SLIST_HEAD(pf_osfp_list, pf_os_fingerprint) pf_osfp_list; pool_t pf_osfp_entry_pl; pool_t pf_osfp_pl; +#endif struct pf_os_fingerprint *pf_osfp_find(struct pf_osfp_list *, struct pf_os_fingerprint *, u_int8_t); @@ -274,7 +294,11 @@ pf_osfp_fingerprint_hdr(const struct ip *ip, const struct ip6_hdr *ip6, const st (fp.fp_flags & PF_OSFP_WSCALE_DC) ? "*" : "", fp.fp_wscale); +#ifdef __FreeBSD__ + if ((fpresult = pf_osfp_find(&V_pf_osfp_list, &fp, +#else if ((fpresult = pf_osfp_find(&pf_osfp_list, &fp, +#endif PF_OSFP_MAXTTL_OFFSET))) return (&fpresult->fp_oses); return (NULL); @@ -320,20 +344,23 @@ pf_osfp_initialize(void) { #if defined(__FreeBSD__) && defined(_KERNEL) int error = ENOMEM; - + do { pf_osfp_entry_pl = pf_osfp_pl = NULL; UMA_CREATE(pf_osfp_entry_pl, struct pf_osfp_entry, "pfospfen"); UMA_CREATE(pf_osfp_pl, struct pf_os_fingerprint, "pfosfp"); error = 0; } while(0); + + SLIST_INIT(&V_pf_osfp_list); #else pool_init(&pf_osfp_entry_pl, sizeof(struct pf_osfp_entry), 0, 0, 0, "pfosfpen", &pool_allocator_nointr); pool_init(&pf_osfp_pl, sizeof(struct pf_os_fingerprint), 0, 0, 0, "pfosfp", &pool_allocator_nointr); -#endif SLIST_INIT(&pf_osfp_list); +#endif + #ifdef __FreeBSD__ #ifdef _KERNEL return (error); @@ -347,6 +374,7 @@ pf_osfp_initialize(void) void pf_osfp_cleanup(void) { + UMA_DESTROY(pf_osfp_entry_pl); UMA_DESTROY(pf_osfp_pl); } @@ -359,8 +387,13 @@ pf_osfp_flush(void) struct pf_os_fingerprint *fp; struct pf_osfp_entry *entry; +#ifdef __FreeBSD__ + while ((fp = SLIST_FIRST(&V_pf_osfp_list))) { + SLIST_REMOVE_HEAD(&V_pf_osfp_list, fp_next); +#else while ((fp = SLIST_FIRST(&pf_osfp_list))) { SLIST_REMOVE_HEAD(&pf_osfp_list, fp_next); +#endif while ((entry = SLIST_FIRST(&fp->fp_oses))) { SLIST_REMOVE_HEAD(&fp->fp_oses, fp_entry); pool_put(&pf_osfp_entry_pl, entry); @@ -387,6 +420,7 @@ pf_osfp_add(struct pf_osfp_ioctl *fpioc) fpadd.fp_wscale = fpioc->fp_wscale; fpadd.fp_ttl = fpioc->fp_ttl; +#if 0 /* XXX RYAN wants to fix logging */ DPFPRINTF("adding osfp %s %s %s = %s%d:%d:%d:%s%d:0x%llx %d " "(TS=%s,M=%s%d,W=%s%d) %x\n", fpioc->fp_os.fp_class_nm, fpioc->fp_os.fp_version_nm, @@ -410,17 +444,31 @@ pf_osfp_add(struct pf_osfp_ioctl *fpioc) (fpadd.fp_flags & PF_OSFP_WSCALE_DC) ? "*" : "", fpadd.fp_wscale, fpioc->fp_os.fp_os); +#endif - +#ifdef __FreeBSD__ + if ((fp = pf_osfp_find_exact(&V_pf_osfp_list, &fpadd))) { +#else if ((fp = pf_osfp_find_exact(&pf_osfp_list, &fpadd))) { +#endif SLIST_FOREACH(entry, &fp->fp_oses, fp_entry) { if (PF_OSFP_ENTRY_EQ(entry, &fpioc->fp_os)) return (EEXIST); } - if ((entry = pool_get(&pf_osfp_entry_pl, PR_NOWAIT)) == NULL) + if ((entry = pool_get(&pf_osfp_entry_pl, +#ifdef __FreeBSD__ + PR_NOWAIT)) == NULL) +#else + PR_WAITOK|PR_LIMITFAIL)) == NULL) +#endif return (ENOMEM); } else { - if ((fp = pool_get(&pf_osfp_pl, PR_NOWAIT)) == NULL) + if ((fp = pool_get(&pf_osfp_pl, +#ifdef __FreeBSD__ + PR_NOWAIT)) == NULL) +#else + PR_WAITOK|PR_LIMITFAIL)) == NULL) +#endif return (ENOMEM); memset(fp, 0, sizeof(*fp)); fp->fp_tcpopts = fpioc->fp_tcpopts; @@ -432,11 +480,20 @@ pf_osfp_add(struct pf_osfp_ioctl *fpioc) fp->fp_wscale = fpioc->fp_wscale; fp->fp_ttl = fpioc->fp_ttl; SLIST_INIT(&fp->fp_oses); - if ((entry = pool_get(&pf_osfp_entry_pl, PR_NOWAIT)) == NULL) { + if ((entry = pool_get(&pf_osfp_entry_pl, +#ifdef __FreeBSD__ + PR_NOWAIT)) == NULL) { +#else + PR_WAITOK|PR_LIMITFAIL)) == NULL) { +#endif pool_put(&pf_osfp_pl, fp); return (ENOMEM); } +#ifdef __FreeBSD__ + pf_osfp_insert(&V_pf_osfp_list, fp); +#else pf_osfp_insert(&pf_osfp_list, fp); +#endif } memcpy(entry, &fpioc->fp_os, sizeof(*entry)); @@ -462,7 +519,7 @@ pf_osfp_find(struct pf_osfp_list *list, struct pf_os_fingerprint *find, { struct pf_os_fingerprint *f; -#define MATCH_INT(_MOD, _DC, _field) \ +#define MATCH_INT(_MOD, _DC, _field) \ if ((f->fp_flags & _DC) == 0) { \ if ((f->fp_flags & _MOD) == 0) { \ if (f->_field != find->_field) \ @@ -490,10 +547,11 @@ pf_osfp_find(struct pf_osfp_list *list, struct pf_os_fingerprint *find, if (find->fp_mss == 0) continue; -/* Some "smart" NAT devices and DSL routers will tweak the MSS size and +/* + * Some "smart" NAT devices and DSL routers will tweak the MSS size and * will set it to whatever is suitable for the link type. */ -#define SMART_MSS 1460 +#define SMART_MSS 1460 if ((find->fp_wsize % find->fp_mss || find->fp_wsize / find->fp_mss != f->fp_wsize) && @@ -505,8 +563,8 @@ pf_osfp_find(struct pf_osfp_list *list, struct pf_os_fingerprint *find, if (find->fp_mss == 0) continue; -#define MTUOFF (sizeof(struct ip) + sizeof(struct tcphdr)) -#define SMART_MTU (SMART_MSS + MTUOFF) +#define MTUOFF (sizeof(struct ip) + sizeof(struct tcphdr)) +#define SMART_MTU (SMART_MSS + MTUOFF) if ((find->fp_wsize % (find->fp_mss + MTUOFF) || find->fp_wsize / (find->fp_mss + MTUOFF) != f->fp_wsize) && @@ -577,7 +635,11 @@ pf_osfp_get(struct pf_osfp_ioctl *fpioc) memset(fpioc, 0, sizeof(*fpioc)); +#ifdef __FreeBSD__ + SLIST_FOREACH(fp, &V_pf_osfp_list, fp_next) { +#else SLIST_FOREACH(fp, &pf_osfp_list, fp_next) { +#endif SLIST_FOREACH(entry, &fp->fp_oses, fp_entry) { if (i++ == num) { fpioc->fp_mss = fp->fp_mss; @@ -604,19 +666,27 @@ pf_osfp_validate(void) { struct pf_os_fingerprint *f, *f2, find; +#ifdef __FreeBSD__ + SLIST_FOREACH(f, &V_pf_osfp_list, fp_next) { +#else SLIST_FOREACH(f, &pf_osfp_list, fp_next) { +#endif memcpy(&find, f, sizeof(find)); /* We do a few MSS/th_win percolations to make things unique */ if (find.fp_mss == 0) find.fp_mss = 128; if (f->fp_flags & PF_OSFP_WSIZE_MSS) - find.fp_wsize *= find.fp_mss, 1; + find.fp_wsize *= find.fp_mss; else if (f->fp_flags & PF_OSFP_WSIZE_MTU) find.fp_wsize *= (find.fp_mss + 40); else if (f->fp_flags & PF_OSFP_WSIZE_MOD) find.fp_wsize *= 2; +#ifdef __FreeBSD__ + if (f != (f2 = pf_osfp_find(&V_pf_osfp_list, &find, 0))) { +#else if (f != (f2 = pf_osfp_find(&pf_osfp_list, &find, 0))) { +#endif if (f2) printf("Found \"%s %s %s\" instead of " "\"%s %s %s\"\n", diff --git a/freebsd/sys/contrib/pf/net/pf_ruleset.c b/freebsd/sys/contrib/pf/net/pf_ruleset.c index 5e018b48..c7ab6178 100644 --- a/freebsd/sys/contrib/pf/net/pf_ruleset.c +++ b/freebsd/sys/contrib/pf/net/pf_ruleset.c @@ -1,6 +1,6 @@ #include <machine/rtems-bsd-kernel-space.h> -/* $OpenBSD: pf_ruleset.c,v 1.1 2006/10/27 13:56:51 mcbride Exp $ */ +/* $OpenBSD: pf_ruleset.c,v 1.2 2008/12/18 15:31:37 dhill Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -63,48 +63,55 @@ __FBSDID("$FreeBSD$"); #ifdef _KERNEL -# define DPFPRINTF(format, x...) \ - if (pf_status.debug >= PF_DEBUG_NOISY) \ +#ifdef __FreeBSD__ +#define DPFPRINTF(format, x...) \ + if (V_pf_status.debug >= PF_DEBUG_NOISY) \ + printf(format , ##x) +#else +#define DPFPRINTF(format, x...) \ + if (pf_status.debug >= PF_DEBUG_NOISY) \ printf(format , ##x) +#endif #ifdef __FreeBSD__ -#define rs_malloc(x) malloc(x, M_TEMP, M_NOWAIT) +#define rs_malloc(x) malloc(x, M_TEMP, M_NOWAIT|M_ZERO) #else -#define rs_malloc(x) malloc(x, M_TEMP, M_WAITOK) +#define rs_malloc(x) malloc(x, M_TEMP, M_WAITOK|M_CANFAIL|M_ZERO) #endif #define rs_free(x) free(x, M_TEMP) #else /* Userland equivalents so we can lend code to pfctl et al. */ -# include <arpa/inet.h> -# include <errno.h> -# include <stdio.h> -# include <stdlib.h> -# include <string.h> -# define rs_malloc(x) malloc(x) -# define rs_free(x) free(x) - -# ifdef PFDEBUG -# include <sys/stdarg.h> -# define DPFPRINTF(format, x...) fprintf(stderr, format , ##x) -# else -# define DPFPRINTF(format, x...) ((void)0) -# endif /* PFDEBUG */ +#include <arpa/inet.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#define rs_malloc(x) calloc(1, x) +#define rs_free(x) free(x) + +#ifdef PFDEBUG +#include <sys/stdarg.h> +#define DPFPRINTF(format, x...) fprintf(stderr, format , ##x) +#else +#define DPFPRINTF(format, x...) ((void)0) +#endif /* PFDEBUG */ #endif /* _KERNEL */ +#if defined(__FreeBSD__) && !defined(_KERNEL) +#undef V_pf_anchors +#define V_pf_anchors pf_anchors + +#undef pf_main_ruleset +#define pf_main_ruleset pf_main_anchor.ruleset +#endif +#if defined(__FreeBSD__) && defined(_KERNEL) +VNET_DEFINE(struct pf_anchor_global, pf_anchors); +VNET_DEFINE(struct pf_anchor, pf_main_anchor); +#else struct pf_anchor_global pf_anchors; struct pf_anchor pf_main_anchor; - -#ifndef __FreeBSD__ -/* XXX: hum? */ -int pf_get_ruleset_number(u_int8_t); -void pf_init_ruleset(struct pf_ruleset *); -int pf_anchor_setup(struct pf_rule *, - const struct pf_ruleset *, const char *); -int pf_anchor_copyout(const struct pf_ruleset *, - const struct pf_rule *, struct pfioc_rule *); -void pf_anchor_remove(struct pf_rule *); #endif static __inline int pf_anchor_compare(struct pf_anchor *, struct pf_anchor *); @@ -170,9 +177,14 @@ pf_find_anchor(const char *path) struct pf_anchor *key, *found; key = (struct pf_anchor *)rs_malloc(sizeof(*key)); - memset(key, 0, sizeof(*key)); + if (key == NULL) + return (NULL); strlcpy(key->path, path, sizeof(key->path)); +#ifdef __FreeBSD__ + found = RB_FIND(pf_anchor_global, &V_pf_anchors, key); +#else found = RB_FIND(pf_anchor_global, &pf_anchors, key); +#endif rs_free(key); return (found); } @@ -212,7 +224,8 @@ pf_find_or_create_ruleset(const char *path) if (ruleset != NULL) return (ruleset); p = (char *)rs_malloc(MAXPATHLEN); - bzero(p, MAXPATHLEN); + if (p == NULL) + return (NULL); strlcpy(p, path, MAXPATHLEN); while (parent == NULL && (q = strrchr(p, '/')) != NULL) { *q = 0; @@ -244,7 +257,6 @@ pf_find_or_create_ruleset(const char *path) rs_free(p); return (NULL); } - memset(anchor, 0, sizeof(*anchor)); RB_INIT(&anchor->children); strlcpy(anchor->name, q, sizeof(anchor->name)); if (parent != NULL) { @@ -253,7 +265,11 @@ pf_find_or_create_ruleset(const char *path) strlcat(anchor->path, "/", sizeof(anchor->path)); } strlcat(anchor->path, anchor->name, sizeof(anchor->path)); +#ifdef __FreeBSD__ + if ((dup = RB_INSERT(pf_anchor_global, &V_pf_anchors, anchor)) != +#else if ((dup = RB_INSERT(pf_anchor_global, &pf_anchors, anchor)) != +#endif NULL) { printf("pf_find_or_create_ruleset: RB_INSERT1 " "'%s' '%s' collides with '%s' '%s'\n", @@ -270,7 +286,11 @@ pf_find_or_create_ruleset(const char *path) "RB_INSERT2 '%s' '%s' collides with " "'%s' '%s'\n", anchor->path, anchor->name, dup->path, dup->name); +#ifdef __FreeBSD__ + RB_REMOVE(pf_anchor_global, &V_pf_anchors, +#else RB_REMOVE(pf_anchor_global, &pf_anchors, +#endif anchor); rs_free(anchor); rs_free(p); @@ -306,7 +326,11 @@ pf_remove_if_empty_ruleset(struct pf_ruleset *ruleset) !TAILQ_EMPTY(ruleset->rules[i].inactive.ptr) || ruleset->rules[i].inactive.open) return; +#ifdef __FreeBSD__ + RB_REMOVE(pf_anchor_global, &V_pf_anchors, ruleset->anchor); +#else RB_REMOVE(pf_anchor_global, &pf_anchors, ruleset->anchor); +#endif if ((parent = ruleset->anchor->parent) != NULL) RB_REMOVE(pf_anchor_node, &parent->children, ruleset->anchor); @@ -330,7 +354,8 @@ pf_anchor_setup(struct pf_rule *r, const struct pf_ruleset *s, if (!name[0]) return (0); path = (char *)rs_malloc(MAXPATHLEN); - bzero(path, MAXPATHLEN); + if (path == NULL) + return (1); if (name[0] == '/') strlcpy(path, name + 1, MAXPATHLEN); else { @@ -388,7 +413,8 @@ pf_anchor_copyout(const struct pf_ruleset *rs, const struct pf_rule *r, int i; a = (char *)rs_malloc(MAXPATHLEN); - bzero(a, MAXPATHLEN); + if (a == NULL) + return (1); if (rs->anchor == NULL) a[0] = 0; else diff --git a/freebsd/sys/contrib/pf/net/pf_table.c b/freebsd/sys/contrib/pf/net/pf_table.c index ee13e851..14e75dda 100644 --- a/freebsd/sys/contrib/pf/net/pf_table.c +++ b/freebsd/sys/contrib/pf/net/pf_table.c @@ -1,6 +1,6 @@ #include <machine/rtems-bsd-kernel-space.h> -/* $OpenBSD: pf_table.c,v 1.68 2006/05/02 10:08:45 dhartmei Exp $ */ +/* $OpenBSD: pf_table.c,v 1.79 2008/10/08 06:24:50 mcbride Exp $ */ /* * Copyright (c) 2002 Cedric Berger @@ -45,10 +45,10 @@ __FBSDID("$FreeBSD$"); #include <sys/socket.h> #include <sys/mbuf.h> #include <sys/kernel.h> -#include <rtems/bsd/sys/lock.h> -#include <sys/rwlock.h> #ifdef __FreeBSD__ #include <sys/malloc.h> +#else +#include <sys/pool.h> #endif #include <net/if.h> @@ -57,10 +57,9 @@ __FBSDID("$FreeBSD$"); #ifndef __FreeBSD__ #include <netinet/ip_ipsp.h> #endif - #include <net/pfvar.h> -#define ACCEPT_FLAGS(oklist) \ +#define ACCEPT_FLAGS(flags, oklist) \ do { \ if ((flags & ~(oklist)) & \ PFR_FLAG_ALLMASK) \ @@ -92,28 +91,26 @@ _copyout(const void *uaddr, void *kaddr, size_t len) return (r); } -#define COPYIN(from, to, size) \ +#define COPYIN(from, to, size, flags) \ ((flags & PFR_FLAG_USERIOCTL) ? \ _copyin((from), (to), (size)) : \ (bcopy((from), (to), (size)), 0)) -#define COPYOUT(from, to, size) \ +#define COPYOUT(from, to, size, flags) \ ((flags & PFR_FLAG_USERIOCTL) ? \ _copyout((from), (to), (size)) : \ (bcopy((from), (to), (size)), 0)) #else - -#define COPYIN(from, to, size) \ +#define COPYIN(from, to, size, flags) \ ((flags & PFR_FLAG_USERIOCTL) ? \ copyin((from), (to), (size)) : \ (bcopy((from), (to), (size)), 0)) -#define COPYOUT(from, to, size) \ +#define COPYOUT(from, to, size, flags) \ ((flags & PFR_FLAG_USERIOCTL) ? \ copyout((from), (to), (size)) : \ (bcopy((from), (to), (size)), 0)) - #endif #define FILLIN_SIN(sin, addr) \ @@ -130,26 +127,26 @@ _copyout(const void *uaddr, void *kaddr, size_t len) (sin6).sin6_addr = (addr); \ } while (0) -#define SWAP(type, a1, a2) \ +#define SWAP(type, a1, a2) \ do { \ type tmp = a1; \ a1 = a2; \ a2 = tmp; \ } while (0) -#define SUNION2PF(su, af) (((af)==AF_INET) ? \ +#define SUNION2PF(su, af) (((af)==AF_INET) ? \ (struct pf_addr *)&(su)->sin.sin_addr : \ (struct pf_addr *)&(su)->sin6.sin6_addr) #define AF_BITS(af) (((af)==AF_INET)?32:128) #define ADDR_NETWORK(ad) ((ad)->pfra_net < AF_BITS((ad)->pfra_af)) #define KENTRY_NETWORK(ke) ((ke)->pfrke_net < AF_BITS((ke)->pfrke_af)) -#define KENTRY_RNF_ROOT(ke) \ +#define KENTRY_RNF_ROOT(ke) \ ((((struct radix_node *)(ke))->rn_flags & RNF_ROOT) != 0) -#define NO_ADDRESSES (-1) -#define ENQUEUE_UNMARKED_ONLY (1) -#define INVERT_NEG_FLAG (1) +#define NO_ADDRESSES (-1) +#define ENQUEUE_UNMARKED_ONLY (1) +#define INVERT_NEG_FLAG (1) struct pfr_walktree { enum pfrw_op { @@ -171,28 +168,36 @@ struct pfr_walktree { int pfrw_free; int pfrw_flags; }; -#define pfrw_addr pfrw_1.pfrw1_addr -#define pfrw_astats pfrw_1.pfrw1_astats -#define pfrw_workq pfrw_1.pfrw1_workq -#define pfrw_kentry pfrw_1.pfrw1_kentry -#define pfrw_dyn pfrw_1.pfrw1_dyn -#define pfrw_cnt pfrw_free +#define pfrw_addr pfrw_1.pfrw1_addr +#define pfrw_astats pfrw_1.pfrw1_astats +#define pfrw_workq pfrw_1.pfrw1_workq +#define pfrw_kentry pfrw_1.pfrw1_kentry +#define pfrw_dyn pfrw_1.pfrw1_dyn +#define pfrw_cnt pfrw_free -#define senderr(e) do { rv = (e); goto _bad; } while (0) +#define senderr(e) do { rv = (e); goto _bad; } while (0) #ifdef __FreeBSD__ -uma_zone_t pfr_ktable_pl; -uma_zone_t pfr_kentry_pl; -uma_zone_t pfr_kentry_pl2; +VNET_DEFINE(uma_zone_t, pfr_ktable_pl); +VNET_DEFINE(uma_zone_t, pfr_kentry_pl); +VNET_DEFINE(uma_zone_t, pfr_kcounters_pl); +VNET_DEFINE(struct sockaddr_in, pfr_sin); +#define V_pfr_sin VNET(pfr_sin) +VNET_DEFINE(struct sockaddr_in6, pfr_sin6); +#define V_pfr_sin6 VNET(pfr_sin6) +VNET_DEFINE(union sockaddr_union, pfr_mask); +#define V_pfr_mask VNET(pfr_mask) +VNET_DEFINE(struct pf_addr, pfr_ffaddr); +#define V_pfr_ffaddr VNET(pfr_ffaddr) #else struct pool pfr_ktable_pl; struct pool pfr_kentry_pl; -struct pool pfr_kentry_pl2; -#endif +struct pool pfr_kcounters_pl; struct sockaddr_in pfr_sin; struct sockaddr_in6 pfr_sin6; union sockaddr_union pfr_mask; struct pf_addr pfr_ffaddr; +#endif void pfr_copyout_addr(struct pfr_addr *, struct pfr_kentry *ke); @@ -228,7 +233,7 @@ void pfr_setflags_ktable(struct pfr_ktable *, int); void pfr_clstats_ktables(struct pfr_ktableworkq *, long, int); void pfr_clstats_ktable(struct pfr_ktable *, long, int); -struct pfr_ktable *pfr_create_ktable(struct pfr_table *, long, int); +struct pfr_ktable *pfr_create_ktable(struct pfr_table *, long, int, int); void pfr_destroy_ktables(struct pfr_ktableworkq *, int); void pfr_destroy_ktable(struct pfr_ktable *, int); int pfr_ktable_compare(struct pfr_ktable *, @@ -253,12 +258,11 @@ pfr_initialize(void) { #ifndef __FreeBSD__ pool_init(&pfr_ktable_pl, sizeof(struct pfr_ktable), 0, 0, 0, - "pfrktable", &pool_allocator_oldnointr); + "pfrktable", NULL); pool_init(&pfr_kentry_pl, sizeof(struct pfr_kentry), 0, 0, 0, - "pfrkentry", &pool_allocator_oldnointr); - pool_init(&pfr_kentry_pl2, sizeof(struct pfr_kentry), 0, 0, 0, - "pfrkentry2", NULL); -#endif + "pfrkentry", NULL); + pool_init(&pfr_kcounters_pl, sizeof(struct pfr_kcounters), 0, 0, 0, + "pfrkcounters", NULL); pfr_sin.sin_len = sizeof(pfr_sin); pfr_sin.sin_family = AF_INET; @@ -266,6 +270,14 @@ pfr_initialize(void) pfr_sin6.sin6_family = AF_INET6; memset(&pfr_ffaddr, 0xff, sizeof(pfr_ffaddr)); +#else + V_pfr_sin.sin_len = sizeof(V_pfr_sin); + V_pfr_sin.sin_family = AF_INET; + V_pfr_sin6.sin6_len = sizeof(V_pfr_sin6); + V_pfr_sin6.sin6_family = AF_INET6; + + memset(&V_pfr_ffaddr, 0xff, sizeof(V_pfr_ffaddr)); +#endif } int @@ -275,7 +287,7 @@ pfr_clr_addrs(struct pfr_table *tbl, int *ndel, int flags) struct pfr_kentryworkq workq; int s; - ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY); + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY); if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL)) return (EINVAL); kt = pfr_lookup_table(tbl); @@ -286,7 +298,6 @@ pfr_clr_addrs(struct pfr_table *tbl, int *ndel, int flags) pfr_enqueue_addrs(kt, &workq, ndel, 0); if (!(flags & PFR_FLAG_DUMMY)) { - s = 0; if (flags & PFR_FLAG_ATOMIC) s = splsoftnet(); pfr_remove_kentries(kt, &workq); @@ -309,10 +320,11 @@ pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, struct pfr_kentryworkq workq; struct pfr_kentry *p, *q; struct pfr_addr ad; - int i, rv, s = 0, xadd = 0; + int i, rv, s, xadd = 0; long tzero = time_second; - ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK); + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY | + PFR_FLAG_FEEDBACK); if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL)) return (EINVAL); kt = pfr_lookup_table(tbl); @@ -320,12 +332,13 @@ pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, return (ESRCH); if (kt->pfrkt_flags & PFR_TFLAG_CONST) return (EPERM); - tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0); + tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0, + !(flags & PFR_FLAG_USERIOCTL)); if (tmpkt == NULL) return (ENOMEM); SLIST_INIT(&workq); for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad))) + if (COPYIN(addr+i, &ad, sizeof(ad), flags)) senderr(EFAULT); if (pfr_validate_addr(&ad)) senderr(EINVAL); @@ -342,7 +355,8 @@ pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, ad.pfra_fback = PFR_FB_NONE; } if (p == NULL && q == NULL) { - p = pfr_create_kentry(&ad, 0); + p = pfr_create_kentry(&ad, + !(flags & PFR_FLAG_USERIOCTL)); if (p == NULL) senderr(ENOMEM); if (pfr_route_kentry(tmpkt, p)) { @@ -353,10 +367,9 @@ pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, xadd++; } } - if (flags & PFR_FLAG_FEEDBACK) { - if (COPYOUT(&ad, addr+i, sizeof(ad))) + if (flags & PFR_FLAG_FEEDBACK) + if (COPYOUT(&ad, addr+i, sizeof(ad), flags)) senderr(EFAULT); - } } pfr_clean_node_mask(tmpkt, &workq); if (!(flags & PFR_FLAG_DUMMY)) { @@ -388,9 +401,10 @@ pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, struct pfr_kentryworkq workq; struct pfr_kentry *p; struct pfr_addr ad; - int i, rv, s = 0, xdel = 0, log = 1; + int i, rv, s, xdel = 0, log = 1; - ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK); + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY | + PFR_FLAG_FEEDBACK); if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL)) return (EINVAL); kt = pfr_lookup_table(tbl); @@ -417,7 +431,7 @@ pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, } else { /* iterate over addresses to delete */ for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad))) + if (COPYIN(addr+i, &ad, sizeof(ad), flags)) return (EFAULT); if (pfr_validate_addr(&ad)) return (EINVAL); @@ -428,7 +442,7 @@ pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, } SLIST_INIT(&workq); for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad))) + if (COPYIN(addr+i, &ad, sizeof(ad), flags)) senderr(EFAULT); if (pfr_validate_addr(&ad)) senderr(EINVAL); @@ -450,7 +464,7 @@ pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, xdel++; } if (flags & PFR_FLAG_FEEDBACK) - if (COPYOUT(&ad, addr+i, sizeof(ad))) + if (COPYOUT(&ad, addr+i, sizeof(ad), flags)) senderr(EFAULT); } if (!(flags & PFR_FLAG_DUMMY)) { @@ -478,10 +492,11 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, struct pfr_kentryworkq addq, delq, changeq; struct pfr_kentry *p, *q; struct pfr_addr ad; - int i, rv, s = 0, xadd = 0, xdel = 0, xchange = 0; + int i, rv, s, xadd = 0, xdel = 0, xchange = 0; long tzero = time_second; - ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK); + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY | + PFR_FLAG_FEEDBACK); if (pfr_validate_table(tbl, ignore_pfrt_flags, flags & PFR_FLAG_USERIOCTL)) return (EINVAL); @@ -490,7 +505,8 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, return (ESRCH); if (kt->pfrkt_flags & PFR_TFLAG_CONST) return (EPERM); - tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0); + tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0, + !(flags & PFR_FLAG_USERIOCTL)); if (tmpkt == NULL) return (ENOMEM); pfr_mark_addrs(kt); @@ -498,7 +514,7 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, SLIST_INIT(&delq); SLIST_INIT(&changeq); for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad))) + if (COPYIN(addr+i, &ad, sizeof(ad), flags)) senderr(EFAULT); if (pfr_validate_addr(&ad)) senderr(EINVAL); @@ -521,7 +537,8 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, ad.pfra_fback = PFR_FB_DUPLICATE; goto _skip; } - p = pfr_create_kentry(&ad, 0); + p = pfr_create_kentry(&ad, + !(flags & PFR_FLAG_USERIOCTL)); if (p == NULL) senderr(ENOMEM); if (pfr_route_kentry(tmpkt, p)) { @@ -535,7 +552,7 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, } _skip: if (flags & PFR_FLAG_FEEDBACK) - if (COPYOUT(&ad, addr+i, sizeof(ad))) + if (COPYOUT(&ad, addr+i, sizeof(ad), flags)) senderr(EFAULT); } pfr_enqueue_addrs(kt, &delq, &xdel, ENQUEUE_UNMARKED_ONLY); @@ -548,7 +565,7 @@ _skip: SLIST_FOREACH(p, &delq, pfrke_workq) { pfr_copyout_addr(&ad, p); ad.pfra_fback = PFR_FB_DELETED; - if (COPYOUT(&ad, addr+size+i, sizeof(ad))) + if (COPYOUT(&ad, addr+size+i, sizeof(ad), flags)) senderr(EFAULT); i++; } @@ -592,7 +609,7 @@ pfr_tst_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, struct pfr_addr ad; int i, xmatch = 0; - ACCEPT_FLAGS(PFR_FLAG_REPLACE); + ACCEPT_FLAGS(flags, PFR_FLAG_REPLACE); if (pfr_validate_table(tbl, 0, 0)) return (EINVAL); kt = pfr_lookup_table(tbl); @@ -600,7 +617,7 @@ pfr_tst_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, return (ESRCH); for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad))) + if (COPYIN(addr+i, &ad, sizeof(ad), flags)) return (EFAULT); if (pfr_validate_addr(&ad)) return (EINVAL); @@ -613,7 +630,7 @@ pfr_tst_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, (p->pfrke_not ? PFR_FB_NOTMATCH : PFR_FB_MATCH); if (p != NULL && !p->pfrke_not) xmatch++; - if (COPYOUT(&ad, addr+i, sizeof(ad))) + if (COPYOUT(&ad, addr+i, sizeof(ad), flags)) return (EFAULT); } if (nmatch != NULL) @@ -629,7 +646,7 @@ pfr_get_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int *size, struct pfr_walktree w; int rv; - ACCEPT_FLAGS(0); + ACCEPT_FLAGS(flags, 0); if (pfr_validate_table(tbl, 0, 0)) return (EINVAL); kt = pfr_lookup_table(tbl); @@ -652,7 +669,7 @@ pfr_get_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int *size, #endif if (!rv) #ifdef __FreeBSD__ - rv = kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, + rv = kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w); #else rv = rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w); @@ -676,10 +693,11 @@ pfr_get_astats(struct pfr_table *tbl, struct pfr_astats *addr, int *size, struct pfr_ktable *kt; struct pfr_walktree w; struct pfr_kentryworkq workq; - int rv, s = 0; + int rv, s; long tzero = time_second; - ACCEPT_FLAGS(PFR_FLAG_ATOMIC); /* XXX PFR_FLAG_CLSTATS disabled */ + /* XXX PFR_FLAG_CLSTATS disabled */ + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC); if (pfr_validate_table(tbl, 0, 0)) return (EINVAL); kt = pfr_lookup_table(tbl); @@ -735,9 +753,10 @@ pfr_clr_astats(struct pfr_table *tbl, struct pfr_addr *addr, int size, struct pfr_kentryworkq workq; struct pfr_kentry *p; struct pfr_addr ad; - int i, rv, s = 0, xzero = 0; + int i, rv, s, xzero = 0; - ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK); + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY | + PFR_FLAG_FEEDBACK); if (pfr_validate_table(tbl, 0, 0)) return (EINVAL); kt = pfr_lookup_table(tbl); @@ -745,7 +764,7 @@ pfr_clr_astats(struct pfr_table *tbl, struct pfr_addr *addr, int size, return (ESRCH); SLIST_INIT(&workq); for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad))) + if (COPYIN(addr+i, &ad, sizeof(ad), flags)) senderr(EFAULT); if (pfr_validate_addr(&ad)) senderr(EINVAL); @@ -753,7 +772,7 @@ pfr_clr_astats(struct pfr_table *tbl, struct pfr_addr *addr, int size, if (flags & PFR_FLAG_FEEDBACK) { ad.pfra_fback = (p != NULL) ? PFR_FB_CLEARED : PFR_FB_NONE; - if (COPYOUT(&ad, addr+i, sizeof(ad))) + if (COPYOUT(&ad, addr+i, sizeof(ad), flags)) senderr(EFAULT); } if (p != NULL) { @@ -868,7 +887,11 @@ struct pfr_kentry * pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact) { union sockaddr_union sa, mask; - struct radix_node_head *head = NULL; /* make the compiler happy */ +#ifdef __FreeBSD__ + struct radix_node_head *head = NULL; +#else + struct radix_node_head *head; +#endif struct pfr_kentry *ke; int s; @@ -884,7 +907,7 @@ pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact) pfr_prepare_network(&mask, ad->pfra_af, ad->pfra_net); s = splsoftnet(); /* rn_lookup makes use of globals */ #ifdef __FreeBSD__ - PF_ASSERT(MA_OWNED); + PF_LOCK_ASSERT(); #endif ke = (struct pfr_kentry *)rn_lookup(&sa, &mask, head); splx(s); @@ -905,13 +928,16 @@ pfr_create_kentry(struct pfr_addr *ad, int intr) { struct pfr_kentry *ke; +#ifdef __FreeBSD__ + ke = pool_get(&V_pfr_kentry_pl, PR_NOWAIT | PR_ZERO); +#else if (intr) - ke = pool_get(&pfr_kentry_pl2, PR_NOWAIT); + ke = pool_get(&pfr_kentry_pl, PR_NOWAIT | PR_ZERO); else - ke = pool_get(&pfr_kentry_pl, PR_NOWAIT); + ke = pool_get(&pfr_kentry_pl, PR_WAITOK|PR_ZERO|PR_LIMITFAIL); +#endif if (ke == NULL) return (NULL); - bzero(ke, sizeof(*ke)); if (ad->pfra_af == AF_INET) FILLIN_SIN(ke->pfrke_sa.sin, ad->pfra_ip4addr); @@ -920,7 +946,6 @@ pfr_create_kentry(struct pfr_addr *ad, int intr) ke->pfrke_af = ad->pfra_af; ke->pfrke_net = ad->pfra_net; ke->pfrke_not = ad->pfra_not; - ke->pfrke_intrpool = intr; return (ke); } @@ -938,10 +963,14 @@ pfr_destroy_kentries(struct pfr_kentryworkq *workq) void pfr_destroy_kentry(struct pfr_kentry *ke) { - if (ke->pfrke_intrpool) - pool_put(&pfr_kentry_pl2, ke); - else - pool_put(&pfr_kentry_pl, ke); + if (ke->pfrke_counters) +#ifdef __FreeBSD__ + pool_put(&V_pfr_kcounters_pl, ke->pfrke_counters); + pool_put(&V_pfr_kentry_pl, ke); +#else + pool_put(&pfr_kcounters_pl, ke->pfrke_counters); + pool_put(&pfr_kentry_pl, ke); +#endif } void @@ -1022,8 +1051,14 @@ pfr_clstats_kentries(struct pfr_kentryworkq *workq, long tzero, int negchange) s = splsoftnet(); if (negchange) p->pfrke_not = !p->pfrke_not; - bzero(p->pfrke_packets, sizeof(p->pfrke_packets)); - bzero(p->pfrke_bytes, sizeof(p->pfrke_bytes)); + if (p->pfrke_counters) { +#ifdef __FreeBSD__ + pool_put(&V_pfr_kcounters_pl, p->pfrke_counters); +#else + pool_put(&pfr_kcounters_pl, p->pfrke_counters); +#endif + p->pfrke_counters = NULL; + } splx(s); p->pfrke_tzero = tzero; } @@ -1036,10 +1071,10 @@ pfr_reset_feedback(struct pfr_addr *addr, int size, int flags) int i; for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad))) + if (COPYIN(addr+i, &ad, sizeof(ad), flags)) break; ad.pfra_fback = PFR_FB_NONE; - if (COPYOUT(&ad, addr+i, sizeof(ad))) + if (COPYOUT(&ad, addr+i, sizeof(ad), flags)) break; } } @@ -1074,7 +1109,11 @@ pfr_route_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke) { union sockaddr_union mask; struct radix_node *rn; - struct radix_node_head *head = NULL; /* make the compiler happy */ +#ifdef __FreeBSD__ + struct radix_node_head *head = NULL; +#else + struct radix_node_head *head; +#endif int s; bzero(ke->pfrke_node, sizeof(ke->pfrke_node)); @@ -1085,13 +1124,21 @@ pfr_route_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke) s = splsoftnet(); #ifdef __FreeBSD__ - PF_ASSERT(MA_OWNED); + PF_LOCK_ASSERT(); #endif if (KENTRY_NETWORK(ke)) { pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net); +#ifdef __FreeBSD__ rn = rn_addroute(&ke->pfrke_sa, &mask, head, ke->pfrke_node); +#else + rn = rn_addroute(&ke->pfrke_sa, &mask, head, ke->pfrke_node, 0); +#endif } else +#ifdef __FreeBSD__ rn = rn_addroute(&ke->pfrke_sa, NULL, head, ke->pfrke_node); +#else + rn = rn_addroute(&ke->pfrke_sa, NULL, head, ke->pfrke_node, 0); +#endif splx(s); return (rn == NULL ? -1 : 0); @@ -1102,7 +1149,11 @@ pfr_unroute_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke) { union sockaddr_union mask; struct radix_node *rn; - struct radix_node_head *head = NULL; /* make the compiler happy */ +#ifdef __FreeBSD__ + struct radix_node_head *head = NULL; +#else + struct radix_node_head *head; +#endif int s; if (ke->pfrke_af == AF_INET) @@ -1112,7 +1163,7 @@ pfr_unroute_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke) s = splsoftnet(); #ifdef __FreeBSD__ - PF_ASSERT(MA_OWNED); + PF_LOCK_ASSERT(); #endif if (KENTRY_NETWORK(ke)) { pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net); @@ -1175,7 +1226,7 @@ pfr_walktree(struct radix_node *rn, void *arg) struct pfr_addr ad; pfr_copyout_addr(&ad, ke); - if (COPYOUT(&ad, w->pfrw_addr, sizeof(ad))) + if (copyout(&ad, w->pfrw_addr, sizeof(ad))) return (EFAULT); w->pfrw_addr++; } @@ -1187,14 +1238,20 @@ pfr_walktree(struct radix_node *rn, void *arg) pfr_copyout_addr(&as.pfras_a, ke); s = splsoftnet(); - bcopy(ke->pfrke_packets, as.pfras_packets, - sizeof(as.pfras_packets)); - bcopy(ke->pfrke_bytes, as.pfras_bytes, - sizeof(as.pfras_bytes)); + if (ke->pfrke_counters) { + bcopy(ke->pfrke_counters->pfrkc_packets, + as.pfras_packets, sizeof(as.pfras_packets)); + bcopy(ke->pfrke_counters->pfrkc_bytes, + as.pfras_bytes, sizeof(as.pfras_bytes)); + } else { + bzero(as.pfras_packets, sizeof(as.pfras_packets)); + bzero(as.pfras_bytes, sizeof(as.pfras_bytes)); + as.pfras_a.pfra_fback = PFR_FB_NOCOUNT; + } splx(s); as.pfras_tzero = ke->pfrke_tzero; - if (COPYOUT(&as, w->pfrw_astats, sizeof(as))) + if (COPYOUT(&as, w->pfrw_astats, sizeof(as), flags)) return (EFAULT); w->pfrw_astats++; } @@ -1211,19 +1268,35 @@ pfr_walktree(struct radix_node *rn, void *arg) if (ke->pfrke_af == AF_INET) { if (w->pfrw_dyn->pfid_acnt4++ > 0) break; +#ifdef __FreeBSD__ + pfr_prepare_network(&V_pfr_mask, AF_INET, ke->pfrke_net); +#else pfr_prepare_network(&pfr_mask, AF_INET, ke->pfrke_net); +#endif w->pfrw_dyn->pfid_addr4 = *SUNION2PF( &ke->pfrke_sa, AF_INET); w->pfrw_dyn->pfid_mask4 = *SUNION2PF( +#ifdef __FreeBSD__ + &V_pfr_mask, AF_INET); +#else &pfr_mask, AF_INET); +#endif } else if (ke->pfrke_af == AF_INET6){ if (w->pfrw_dyn->pfid_acnt6++ > 0) break; +#ifdef __FreeBSD__ + pfr_prepare_network(&V_pfr_mask, AF_INET6, ke->pfrke_net); +#else pfr_prepare_network(&pfr_mask, AF_INET6, ke->pfrke_net); +#endif w->pfrw_dyn->pfid_addr6 = *SUNION2PF( &ke->pfrke_sa, AF_INET6); w->pfrw_dyn->pfid_mask6 = *SUNION2PF( +#ifdef __FreeBSD__ + &V_pfr_mask, AF_INET6); +#else &pfr_mask, AF_INET6); +#endif } break; } @@ -1235,9 +1308,10 @@ pfr_clr_tables(struct pfr_table *filter, int *ndel, int flags) { struct pfr_ktableworkq workq; struct pfr_ktable *p; - int s = 0, xdel = 0; + int s, xdel = 0; - ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_ALLRSETS); + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY | + PFR_FLAG_ALLRSETS); if (pfr_fix_anchor(filter->pfrt_anchor)) return (EINVAL); if (pfr_table_count(filter, flags) < 0) @@ -1272,14 +1346,14 @@ pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags) { struct pfr_ktableworkq addq, changeq; struct pfr_ktable *p, *q, *r, key; - int i, rv, s = 0, xadd = 0; + int i, rv, s, xadd = 0; long tzero = time_second; - ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY); + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY); SLIST_INIT(&addq); SLIST_INIT(&changeq); for (i = 0; i < size; i++) { - if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t))) + if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags)) senderr(EFAULT); if (pfr_validate_table(&key.pfrkt_t, PFR_TFLAG_USRMASK, flags & PFR_FLAG_USERIOCTL)) @@ -1287,7 +1361,8 @@ pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags) key.pfrkt_flags |= PFR_TFLAG_ACTIVE; p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); if (p == NULL) { - p = pfr_create_ktable(&key.pfrkt_t, tzero, 1); + p = pfr_create_ktable(&key.pfrkt_t, tzero, 1, + !(flags & PFR_FLAG_USERIOCTL)); if (p == NULL) senderr(ENOMEM); SLIST_FOREACH(q, &addq, pfrkt_workq) { @@ -1313,7 +1388,8 @@ pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags) } } key.pfrkt_flags = 0; - r = pfr_create_ktable(&key.pfrkt_t, 0, 1); + r = pfr_create_ktable(&key.pfrkt_t, 0, 1, + !(flags & PFR_FLAG_USERIOCTL)); if (r == NULL) senderr(ENOMEM); SLIST_INSERT_HEAD(&addq, r, pfrkt_workq); @@ -1352,12 +1428,12 @@ pfr_del_tables(struct pfr_table *tbl, int size, int *ndel, int flags) { struct pfr_ktableworkq workq; struct pfr_ktable *p, *q, key; - int i, s = 0, xdel = 0; + int i, s, xdel = 0; - ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY); + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY); SLIST_INIT(&workq); for (i = 0; i < size; i++) { - if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t))) + if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags)) return (EFAULT); if (pfr_validate_table(&key.pfrkt_t, 0, flags & PFR_FLAG_USERIOCTL)) @@ -1394,7 +1470,7 @@ pfr_get_tables(struct pfr_table *filter, struct pfr_table *tbl, int *size, struct pfr_ktable *p; int n, nn; - ACCEPT_FLAGS(PFR_FLAG_ALLRSETS); + ACCEPT_FLAGS(flags, PFR_FLAG_ALLRSETS); if (pfr_fix_anchor(filter->pfrt_anchor)) return (EINVAL); n = nn = pfr_table_count(filter, flags); @@ -1409,7 +1485,7 @@ pfr_get_tables(struct pfr_table *filter, struct pfr_table *tbl, int *size, continue; if (n-- <= 0) continue; - if (COPYOUT(&p->pfrkt_t, tbl++, sizeof(*tbl))) + if (COPYOUT(&p->pfrkt_t, tbl++, sizeof(*tbl), flags)) return (EFAULT); } if (n) { @@ -1426,11 +1502,11 @@ pfr_get_tstats(struct pfr_table *filter, struct pfr_tstats *tbl, int *size, { struct pfr_ktable *p; struct pfr_ktableworkq workq; - int s = 0, n, nn; + int s, n, nn; long tzero = time_second; - ACCEPT_FLAGS(PFR_FLAG_ATOMIC|PFR_FLAG_ALLRSETS); - /* XXX PFR_FLAG_CLSTATS disabled */ + /* XXX PFR_FLAG_CLSTATS disabled */ + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_ALLRSETS); if (pfr_fix_anchor(filter->pfrt_anchor)) return (EINVAL); n = nn = pfr_table_count(filter, flags); @@ -1450,9 +1526,8 @@ pfr_get_tstats(struct pfr_table *filter, struct pfr_tstats *tbl, int *size, continue; if (!(flags & PFR_FLAG_ATOMIC)) s = splsoftnet(); - if (COPYOUT(&p->pfrkt_ts, tbl++, sizeof(*tbl))) { - if (!(flags & PFR_FLAG_ATOMIC)) - splx(s); + if (COPYOUT(&p->pfrkt_ts, tbl++, sizeof(*tbl), flags)) { + splx(s); return (EFAULT); } if (!(flags & PFR_FLAG_ATOMIC)) @@ -1477,13 +1552,14 @@ pfr_clr_tstats(struct pfr_table *tbl, int size, int *nzero, int flags) { struct pfr_ktableworkq workq; struct pfr_ktable *p, key; - int i, s = 0, xzero = 0; + int i, s, xzero = 0; long tzero = time_second; - ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_ADDRSTOO); + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY | + PFR_FLAG_ADDRSTOO); SLIST_INIT(&workq); for (i = 0; i < size; i++) { - if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t))) + if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags)) return (EFAULT); if (pfr_validate_table(&key.pfrkt_t, 0, 0)) return (EINVAL); @@ -1511,16 +1587,16 @@ pfr_set_tflags(struct pfr_table *tbl, int size, int setflag, int clrflag, { struct pfr_ktableworkq workq; struct pfr_ktable *p, *q, key; - int i, s = 0, xchange = 0, xdel = 0; + int i, s, xchange = 0, xdel = 0; - ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY); + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY); if ((setflag & ~PFR_TFLAG_USRMASK) || (clrflag & ~PFR_TFLAG_USRMASK) || (setflag & clrflag)) return (EINVAL); SLIST_INIT(&workq); for (i = 0; i < size; i++) { - if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t))) + if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags)) return (EFAULT); if (pfr_validate_table(&key.pfrkt_t, 0, flags & PFR_FLAG_USERIOCTL)) @@ -1567,7 +1643,7 @@ pfr_ina_begin(struct pfr_table *trs, u_int32_t *ticket, int *ndel, int flags) struct pf_ruleset *rs; int xdel = 0; - ACCEPT_FLAGS(PFR_FLAG_DUMMY); + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY); rs = pf_find_or_create_ruleset(trs->pfrt_anchor); if (rs == NULL) return (ENOMEM); @@ -1604,7 +1680,7 @@ pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size, struct pf_ruleset *rs; int i, rv, xadd = 0, xaddr = 0; - ACCEPT_FLAGS(PFR_FLAG_DUMMY|PFR_FLAG_ADDRSTOO); + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_ADDRSTOO); if (size && !(flags & PFR_FLAG_ADDRSTOO)) return (EINVAL); if (pfr_validate_table(tbl, PFR_TFLAG_USRMASK, @@ -1617,7 +1693,8 @@ pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size, SLIST_INIT(&tableq); kt = RB_FIND(pfr_ktablehead, &pfr_ktables, (struct pfr_ktable *)tbl); if (kt == NULL) { - kt = pfr_create_ktable(tbl, 0, 1); + kt = pfr_create_ktable(tbl, 0, 1, + !(flags & PFR_FLAG_USERIOCTL)); if (kt == NULL) return (ENOMEM); SLIST_INSERT_HEAD(&tableq, kt, pfrkt_workq); @@ -1633,7 +1710,8 @@ pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size, kt->pfrkt_root = rt; goto _skip; } - rt = pfr_create_ktable(&key.pfrkt_t, 0, 1); + rt = pfr_create_ktable(&key.pfrkt_t, 0, 1, + !(flags & PFR_FLAG_USERIOCTL)); if (rt == NULL) { pfr_destroy_ktables(&tableq, 0); return (ENOMEM); @@ -1643,14 +1721,14 @@ pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size, } else if (!(kt->pfrkt_flags & PFR_TFLAG_INACTIVE)) xadd++; _skip: - shadow = pfr_create_ktable(tbl, 0, 0); + shadow = pfr_create_ktable(tbl, 0, 0, !(flags & PFR_FLAG_USERIOCTL)); if (shadow == NULL) { pfr_destroy_ktables(&tableq, 0); return (ENOMEM); } SLIST_INIT(&addrq); for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad))) + if (COPYIN(addr+i, &ad, sizeof(ad), flags)) senderr(EFAULT); if (pfr_validate_addr(&ad)) senderr(EINVAL); @@ -1700,7 +1778,7 @@ pfr_ina_rollback(struct pfr_table *trs, u_int32_t ticket, int *ndel, int flags) struct pf_ruleset *rs; int xdel = 0; - ACCEPT_FLAGS(PFR_FLAG_DUMMY); + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY); rs = pf_find_ruleset(trs->pfrt_anchor); if (rs == NULL || !rs->topen || ticket != rs->tticket) return (0); @@ -1730,10 +1808,10 @@ pfr_ina_commit(struct pfr_table *trs, u_int32_t ticket, int *nadd, struct pfr_ktable *p, *q; struct pfr_ktableworkq workq; struct pf_ruleset *rs; - int s = 0, xadd = 0, xchange = 0; + int s, xadd = 0, xchange = 0; long tzero = time_second; - ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY); + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY); rs = pf_find_ruleset(trs->pfrt_anchor); if (rs == NULL || !rs->topen || ticket != rs->tticket) return (EBUSY); @@ -1994,15 +2072,22 @@ pfr_clstats_ktable(struct pfr_ktable *kt, long tzero, int recurse) } struct pfr_ktable * -pfr_create_ktable(struct pfr_table *tbl, long tzero, int attachruleset) +pfr_create_ktable(struct pfr_table *tbl, long tzero, int attachruleset, + int intr) { struct pfr_ktable *kt; struct pf_ruleset *rs; - kt = pool_get(&pfr_ktable_pl, PR_NOWAIT); +#ifdef __FreeBSD__ + kt = pool_get(&V_pfr_ktable_pl, PR_NOWAIT|PR_ZERO); +#else + if (intr) + kt = pool_get(&pfr_ktable_pl, PR_NOWAIT|PR_ZERO|PR_LIMITFAIL); + else + kt = pool_get(&pfr_ktable_pl, PR_WAITOK|PR_ZERO|PR_LIMITFAIL); +#endif if (kt == NULL) return (NULL); - bzero(kt, sizeof(*kt)); kt->pfrkt_t = *tbl; if (attachruleset) { @@ -2069,7 +2154,11 @@ pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr) kt->pfrkt_rs->tables--; pf_remove_if_empty_ruleset(kt->pfrkt_rs); } +#ifdef __FreeBSD__ + pool_put(&V_pfr_ktable_pl, kt); +#else pool_put(&pfr_ktable_pl, kt); +#endif } int @@ -2104,16 +2193,26 @@ pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af) switch (af) { #ifdef INET case AF_INET: +#ifdef __FreeBSD__ + V_pfr_sin.sin_addr.s_addr = a->addr32[0]; + ke = (struct pfr_kentry *)rn_match(&V_pfr_sin, kt->pfrkt_ip4); +#else pfr_sin.sin_addr.s_addr = a->addr32[0]; ke = (struct pfr_kentry *)rn_match(&pfr_sin, kt->pfrkt_ip4); +#endif if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; break; #endif /* INET */ #ifdef INET6 case AF_INET6: +#ifdef __FreeBSD__ + bcopy(a, &V_pfr_sin6.sin6_addr, sizeof(V_pfr_sin6.sin6_addr)); + ke = (struct pfr_kentry *)rn_match(&V_pfr_sin6, kt->pfrkt_ip6); +#else bcopy(a, &pfr_sin6.sin6_addr, sizeof(pfr_sin6.sin6_addr)); ke = (struct pfr_kentry *)rn_match(&pfr_sin6, kt->pfrkt_ip6); +#endif if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; break; @@ -2141,16 +2240,26 @@ pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af, switch (af) { #ifdef INET case AF_INET: +#ifdef __FreeBSD__ + V_pfr_sin.sin_addr.s_addr = a->addr32[0]; + ke = (struct pfr_kentry *)rn_match(&V_pfr_sin, kt->pfrkt_ip4); +#else pfr_sin.sin_addr.s_addr = a->addr32[0]; ke = (struct pfr_kentry *)rn_match(&pfr_sin, kt->pfrkt_ip4); +#endif if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; break; #endif /* INET */ #ifdef INET6 case AF_INET6: +#ifdef __FreeBSD__ + bcopy(a, &V_pfr_sin6.sin6_addr, sizeof(V_pfr_sin6.sin6_addr)); + ke = (struct pfr_kentry *)rn_match(&V_pfr_sin6, kt->pfrkt_ip6); +#else bcopy(a, &pfr_sin6.sin6_addr, sizeof(pfr_sin6.sin6_addr)); ke = (struct pfr_kentry *)rn_match(&pfr_sin6, kt->pfrkt_ip6); +#endif if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; break; @@ -2165,14 +2274,24 @@ pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af, } kt->pfrkt_packets[dir_out][op_pass]++; kt->pfrkt_bytes[dir_out][op_pass] += len; - if (ke != NULL && op_pass != PFR_OP_XPASS) { - ke->pfrke_packets[dir_out][op_pass]++; - ke->pfrke_bytes[dir_out][op_pass] += len; + if (ke != NULL && op_pass != PFR_OP_XPASS && + (kt->pfrkt_flags & PFR_TFLAG_COUNTERS)) { + if (ke->pfrke_counters == NULL) +#ifdef __FreeBSD__ + ke->pfrke_counters = pool_get(&V_pfr_kcounters_pl, +#else + ke->pfrke_counters = pool_get(&pfr_kcounters_pl, +#endif + PR_NOWAIT | PR_ZERO); + if (ke->pfrke_counters != NULL) { + ke->pfrke_counters->pfrkc_packets[dir_out][op_pass]++; + ke->pfrke_counters->pfrkc_bytes[dir_out][op_pass] += len; + } } } struct pfr_ktable * -pfr_attach_table(struct pf_ruleset *rs, char *name) +pfr_attach_table(struct pf_ruleset *rs, char *name, int intr) { struct pfr_ktable *kt, *rt; struct pfr_table tbl; @@ -2184,14 +2303,14 @@ pfr_attach_table(struct pf_ruleset *rs, char *name) strlcpy(tbl.pfrt_anchor, ac->path, sizeof(tbl.pfrt_anchor)); kt = pfr_lookup_table(&tbl); if (kt == NULL) { - kt = pfr_create_ktable(&tbl, time_second, 1); + kt = pfr_create_ktable(&tbl, time_second, 1, intr); if (kt == NULL) return (NULL); if (ac != NULL) { bzero(tbl.pfrt_anchor, sizeof(tbl.pfrt_anchor)); rt = pfr_lookup_table(&tbl); if (rt == NULL) { - rt = pfr_create_ktable(&tbl, 0, 1); + rt = pfr_create_ktable(&tbl, 0, 1, intr); if (rt == NULL) { pfr_destroy_ktable(kt, 0); return (NULL); @@ -2217,20 +2336,31 @@ pfr_detach_table(struct pfr_ktable *kt) pfr_setflags_ktable(kt, kt->pfrkt_flags&~PFR_TFLAG_REFERENCED); } - int pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter, struct pf_addr **raddr, struct pf_addr **rmask, sa_family_t af) { +#ifdef __FreeBSD__ struct pfr_kentry *ke, *ke2 = NULL; struct pf_addr *addr = NULL; +#else + struct pfr_kentry *ke, *ke2; + struct pf_addr *addr; +#endif union sockaddr_union mask; int idx = -1, use_counter = 0; +#ifdef __FreeBSD__ + if (af == AF_INET) + addr = (struct pf_addr *)&V_pfr_sin.sin_addr; + else if (af == AF_INET6) + addr = (struct pf_addr *)&V_pfr_sin6.sin6_addr; +#else if (af == AF_INET) addr = (struct pf_addr *)&pfr_sin.sin_addr; else if (af == AF_INET6) addr = (struct pf_addr *)&pfr_sin6.sin6_addr; +#endif if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) kt = kt->pfrkt_root; if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) @@ -2245,11 +2375,21 @@ pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter, _next_block: ke = pfr_kentry_byidx(kt, idx, af); - if (ke == NULL) + if (ke == NULL) { + kt->pfrkt_nomatch++; return (1); + } +#ifdef __FreeBSD__ + pfr_prepare_network(&V_pfr_mask, af, ke->pfrke_net); +#else pfr_prepare_network(&pfr_mask, af, ke->pfrke_net); +#endif *raddr = SUNION2PF(&ke->pfrke_sa, af); +#ifdef __FreeBSD__ + *rmask = SUNION2PF(&V_pfr_mask, af); +#else *rmask = SUNION2PF(&pfr_mask, af); +#endif if (use_counter) { /* is supplied address within block? */ @@ -2269,27 +2409,42 @@ _next_block: /* this is a single IP address - no possible nested block */ PF_ACPY(counter, addr, af); *pidx = idx; + kt->pfrkt_match++; return (0); } for (;;) { /* we don't want to use a nested block */ +#ifdef __FreeBSD__ + if (af == AF_INET) + ke2 = (struct pfr_kentry *)rn_match(&V_pfr_sin, + kt->pfrkt_ip4); + else if (af == AF_INET6) + ke2 = (struct pfr_kentry *)rn_match(&V_pfr_sin6, + kt->pfrkt_ip6); +#else if (af == AF_INET) ke2 = (struct pfr_kentry *)rn_match(&pfr_sin, kt->pfrkt_ip4); else if (af == AF_INET6) ke2 = (struct pfr_kentry *)rn_match(&pfr_sin6, kt->pfrkt_ip6); +#endif /* no need to check KENTRY_RNF_ROOT() here */ if (ke2 == ke) { /* lookup return the same block - perfect */ PF_ACPY(counter, addr, af); *pidx = idx; + kt->pfrkt_match++; return (0); } /* we need to increase the counter past the nested block */ pfr_prepare_network(&mask, AF_INET, ke2->pfrke_net); +#ifdef __FreeBSD__ + PF_POOLMASK(addr, addr, SUNION2PF(&mask, af), &V_pfr_ffaddr, af); +#else PF_POOLMASK(addr, addr, SUNION2PF(&mask, af), &pfr_ffaddr, af); +#endif PF_AINC(addr, af); if (!PF_MATCHA(0, *raddr, *rmask, addr, af)) { /* ok, we reached the end of our main block */ diff --git a/freebsd/sys/contrib/pf/net/pfvar.h b/freebsd/sys/contrib/pf/net/pfvar.h index 0d711ffc..59177b5b 100644 --- a/freebsd/sys/contrib/pf/net/pfvar.h +++ b/freebsd/sys/contrib/pf/net/pfvar.h @@ -1,5 +1,4 @@ -/* $FreeBSD$ */ -/* $OpenBSD: pfvar.h,v 1.244 2007/02/23 21:31:51 deraadt Exp $ */ +/* $OpenBSD: pfvar.h,v 1.282 2009/01/29 15:12:28 pyr Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -78,9 +77,8 @@ struct inpcb; #endif enum { PF_INOUT, PF_IN, PF_OUT }; -enum { PF_LAN_EXT, PF_EXT_GWY, PF_ID }; enum { PF_PASS, PF_DROP, PF_SCRUB, PF_NOSCRUB, PF_NAT, PF_NONAT, - PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP }; + PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP, PF_DEFER }; enum { PF_RULESET_SCRUB, PF_RULESET_FILTER, PF_RULESET_NAT, PF_RULESET_BINAT, PF_RULESET_RDR, PF_RULESET_MAX }; enum { PF_OP_NONE, PF_OP_IRG, PF_OP_EQ, PF_OP_NE, PF_OP_LT, @@ -90,6 +88,7 @@ enum { PF_CHANGE_NONE, PF_CHANGE_ADD_HEAD, PF_CHANGE_ADD_TAIL, PF_CHANGE_ADD_BEFORE, PF_CHANGE_ADD_AFTER, PF_CHANGE_REMOVE, PF_CHANGE_GET_TICKET }; enum { PF_GET_NONE, PF_GET_CLR_CNTR }; +enum { PF_SK_WIRE, PF_SK_STACK, PF_SK_BOTH }; /* * Note about PFTM_*: real indices into pf_rule.timeout[] come before @@ -132,7 +131,8 @@ enum { PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS, enum { PF_POOL_NONE, PF_POOL_BITMASK, PF_POOL_RANDOM, PF_POOL_SRCHASH, PF_POOL_ROUNDROBIN }; enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL, - PF_ADDR_TABLE, PF_ADDR_RTLABEL, PF_ADDR_URPFFAILED }; + PF_ADDR_TABLE, PF_ADDR_RTLABEL, PF_ADDR_URPFFAILED, + PF_ADDR_RANGE }; #define PF_POOL_TYPEMASK 0x0f #define PF_POOL_STICKYADDR 0x20 #define PF_WSCALE_FLAG 0x80 @@ -212,87 +212,88 @@ struct pfi_dynaddr { */ #ifdef __FreeBSD__ -#define splsoftnet() splnet() +#define splsoftnet() splnet() #define HTONL(x) (x) = htonl((__uint32_t)(x)) #define HTONS(x) (x) = htons((__uint16_t)(x)) #define NTOHL(x) (x) = ntohl((__uint32_t)(x)) #define NTOHS(x) (x) = ntohs((__uint16_t)(x)) -#define PF_NAME "pf" +#define PF_NAME "pf" -#define PR_NOWAIT M_NOWAIT -#define pool_get(p, f) uma_zalloc(*(p), (f)) -#define pool_put(p, o) uma_zfree(*(p), (o)) +#define PR_NOWAIT M_NOWAIT +#define PR_WAITOK M_WAITOK +#define PR_ZERO M_ZERO +#define pool_get(p, f) uma_zalloc(*(p), (f)) +#define pool_put(p, o) uma_zfree(*(p), (o)) -#define UMA_CREATE(var, type, desc) \ - var = uma_zcreate(desc, sizeof(type), \ - NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); \ - if (var == NULL) break -#define UMA_DESTROY(var) \ - if(var) uma_zdestroy(var) +#define UMA_CREATE(var, type, desc) \ + var = uma_zcreate(desc, sizeof(type), \ + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); \ + if (var == NULL) \ + break +#define UMA_DESTROY(var) \ + if (var) \ + uma_zdestroy(var) +#ifdef __FreeBSD__ extern struct mtx pf_task_mtx; -#define PF_ASSERT(h) mtx_assert(&pf_task_mtx, (h)) - -#define PF_LOCK() do { \ - PF_ASSERT(MA_NOTOWNED); \ - mtx_lock(&pf_task_mtx); \ -} while(0) -#define PF_UNLOCK() do { \ - PF_ASSERT(MA_OWNED); \ - mtx_unlock(&pf_task_mtx); \ -} while(0) - -#define PF_COPYIN(uaddr, kaddr, len, r) do { \ - PF_UNLOCK(); \ - r = copyin((uaddr), (kaddr), (len)); \ - PF_LOCK(); \ +#define PF_LOCK_ASSERT() mtx_assert(&pf_task_mtx, MA_OWNED) +#define PF_UNLOCK_ASSERT() mtx_assert(&pf_task_mtx, MA_NOTOWNED) +#define PF_LOCK() mtx_lock(&pf_task_mtx) +#define PF_UNLOCK() mtx_unlock(&pf_task_mtx) +#else +#define PF_LOCK_ASSERT() +#define PF_UNLOCK_ASSERT() +#define PF_LOCK() +#define PF_UNLOCK() +#endif /* __FreeBSD__ */ + +#define PF_COPYIN(uaddr, kaddr, len, r) do { \ + PF_UNLOCK(); \ + r = copyin((uaddr), (kaddr), (len)); \ + PF_LOCK(); \ } while(0) -#define PF_COPYOUT(kaddr, uaddr, len, r) do { \ - PF_UNLOCK(); \ - r = copyout((kaddr), (uaddr), (len)); \ - PF_LOCK(); \ +#define PF_COPYOUT(kaddr, uaddr, len, r) do { \ + PF_UNLOCK(); \ + r = copyout((kaddr), (uaddr), (len)); \ + PF_LOCK(); \ } while(0) -extern void init_pf_mutex(void); -extern void destroy_pf_mutex(void); - -#define PF_MODVER 1 -#define PFLOG_MODVER 1 -#define PFSYNC_MODVER 1 - -#define PFLOG_MINVER 1 -#define PFLOG_PREFVER PFLOG_MODVER -#define PFLOG_MAXVER 1 -#define PFSYNC_MINVER 1 -#define PFSYNC_PREFVER PFSYNC_MODVER -#define PFSYNC_MAXVER 1 -#endif /* __FreeBSD__ */ - +#define PF_MODVER 1 +#define PFLOG_MODVER 1 +#define PFSYNC_MODVER 1 + +#define PFLOG_MINVER 1 +#define PFLOG_PREFVER PFLOG_MODVER +#define PFLOG_MAXVER 1 +#define PFSYNC_MINVER 1 +#define PFSYNC_PREFVER PFSYNC_MODVER +#define PFSYNC_MAXVER 1 +#endif /* __FreeBSD__ */ #ifdef INET #ifndef INET6 -#define PF_INET_ONLY +#define PF_INET_ONLY #endif /* ! INET6 */ #endif /* INET */ #ifdef INET6 #ifndef INET -#define PF_INET6_ONLY +#define PF_INET6_ONLY #endif /* ! INET */ #endif /* INET6 */ #ifdef INET #ifdef INET6 -#define PF_INET_INET6 +#define PF_INET_INET6 #endif /* INET6 */ #endif /* INET */ #else -#define PF_INET_INET6 +#define PF_INET_INET6 #endif /* _KERNEL */ @@ -401,18 +402,25 @@ extern void destroy_pf_mutex(void); #endif /* PF_INET6_ONLY */ #endif /* PF_INET_INET6 */ -#define PF_MISMATCHAW(aw, x, af, neg, ifp) \ +/* + * XXX callers not FIB-aware in our version of pf yet. + * OpenBSD fixed it later it seems, 2010/05/07 13:33:16 claudio. + */ +#define PF_MISMATCHAW(aw, x, af, neg, ifp, rtid) \ ( \ (((aw)->type == PF_ADDR_NOROUTE && \ - pf_routable((x), (af), NULL)) || \ + pf_routable((x), (af), NULL, (rtid))) || \ (((aw)->type == PF_ADDR_URPFFAILED && (ifp) != NULL && \ - pf_routable((x), (af), (ifp))) || \ + pf_routable((x), (af), (ifp), (rtid))) || \ ((aw)->type == PF_ADDR_RTLABEL && \ - !pf_rtlabel_match((x), (af), (aw))) || \ + !pf_rtlabel_match((x), (af), (aw), (rtid))) || \ ((aw)->type == PF_ADDR_TABLE && \ !pfr_match_addr((aw)->p.tbl, (x), (af))) || \ ((aw)->type == PF_ADDR_DYNIFTL && \ - !pfi_match_addr((aw)->p.dyn, (x), (af))) || \ + !pfi_match_addr((aw)->p.dyn, (x), (af))) || \ + ((aw)->type == PF_ADDR_RANGE && \ + !pf_match_addr_range(&(aw)->v.a.addr, \ + &(aw)->v.a.mask, (x), (af))) || \ ((aw)->type == PF_ADDR_ADDRMASK && \ !PF_AZERO(&(aw)->v.a.mask, (af)) && \ !PF_MATCHA(0, &(aw)->v.a.addr, \ @@ -619,12 +627,13 @@ struct pf_rule { int rtableid; u_int32_t timeout[PFTM_MAX]; - u_int32_t states; + u_int32_t states_cur; + u_int32_t states_tot; u_int32_t max_states; u_int32_t src_nodes; u_int32_t max_src_nodes; u_int32_t max_src_states; - u_int32_t spare1; /* netgraph */ + u_int32_t spare1; /* netgraph */ u_int32_t max_src_conn; struct { u_int32_t limit; @@ -643,7 +652,7 @@ struct pf_rule { u_int16_t max_mss; u_int16_t tag; u_int16_t match_tag; - u_int16_t spare2; /* netgraph */ + u_int16_t spare2; /* netgraph */ struct pf_rule_uid uid; struct pf_rule_gid gid; @@ -673,12 +682,18 @@ struct pf_rule { u_int8_t rt; u_int8_t return_ttl; u_int8_t tos; + u_int8_t set_tos; u_int8_t anchor_relative; u_int8_t anchor_wildcard; #define PF_FLUSH 0x01 #define PF_FLUSH_GLOBAL 0x02 u_int8_t flush; + + struct { + struct pf_addr addr; + u_int16_t port; + } divert; }; /* rule flags */ @@ -697,10 +712,12 @@ struct pf_rule { #define PFRULE_FRAGDROP 0x0400 /* drop funny fragments */ #define PFRULE_RANDOMID 0x0800 #define PFRULE_REASSEMBLE_TCP 0x1000 +#define PFRULE_SET_TOS 0x2000 /* rule flags again */ #define PFRULE_IFBOUND 0x00010000 /* if-bound */ #define PFRULE_STATESLOPPY 0x00020000 /* sloppy state tracking */ +#define PFRULE_PFLOW 0x00040000 #define PFSTATE_HIWAT 10000 /* default state table size */ #define PFSTATE_ADAPT_START 6000 /* default adaptive timeout start */ @@ -758,83 +775,268 @@ struct pf_state_host { }; struct pf_state_peer { + struct pf_state_scrub *scrub; /* state is scrubbed */ u_int32_t seqlo; /* Max sequence number sent */ u_int32_t seqhi; /* Max the other end ACKd + win */ u_int32_t seqdiff; /* Sequence number modulator */ u_int16_t max_win; /* largest window (pre scaling) */ + u_int16_t mss; /* Maximum segment size option */ u_int8_t state; /* active state level */ u_int8_t wscale; /* window scaling factor */ - u_int16_t mss; /* Maximum segment size option */ u_int8_t tcp_est; /* Did we reach TCPS_ESTABLISHED */ - struct pf_state_scrub *scrub; /* state is scrubbed */ - u_int8_t pad[3]; + u_int8_t pad[1]; }; TAILQ_HEAD(pf_state_queue, pf_state); -/* keep synced with struct pf_state, used in RB_FIND */ -struct pf_state_cmp { - u_int64_t id; - u_int32_t creatorid; - struct pf_state_host lan; - struct pf_state_host gwy; - struct pf_state_host ext; +/* keep synced with struct pf_state_key, used in RB_FIND */ +struct pf_state_key_cmp { + struct pf_addr addr[2]; + u_int16_t port[2]; sa_family_t af; u_int8_t proto; - u_int8_t direction; - u_int8_t pad; + u_int8_t pad[2]; +}; + +struct pf_state_item { + TAILQ_ENTRY(pf_state_item) entry; + struct pf_state *s; +}; + +TAILQ_HEAD(pf_statelisthead, pf_state_item); + +struct pf_state_key { + struct pf_addr addr[2]; + u_int16_t port[2]; + sa_family_t af; + u_int8_t proto; + u_int8_t pad[2]; + + RB_ENTRY(pf_state_key) entry; + struct pf_statelisthead states; + struct pf_state_key *reverse; + struct inpcb *inp; +}; + +/* keep synced with struct pf_state, used in RB_FIND */ +struct pf_state_cmp { + u_int64_t id; + u_int32_t creatorid; + u_int8_t direction; + u_int8_t pad[3]; }; struct pf_state { - u_int64_t id; + u_int64_t id; + u_int32_t creatorid; + u_int8_t direction; +#ifdef __FreeBSD__ + u_int8_t pad[2]; + u_int8_t local_flags; +#define PFSTATE_EXPIRING 0x01 +#else + u_int8_t pad[3]; +#endif + + TAILQ_ENTRY(pf_state) sync_list; + TAILQ_ENTRY(pf_state) entry_list; + RB_ENTRY(pf_state) entry_id; + struct pf_state_peer src; + struct pf_state_peer dst; + union pf_rule_ptr rule; + union pf_rule_ptr anchor; + union pf_rule_ptr nat_rule; + struct pf_addr rt_addr; + struct pf_state_key *key[2]; /* addresses stack and wire */ + struct pfi_kif *kif; + struct pfi_kif *rt_kif; + struct pf_src_node *src_node; + struct pf_src_node *nat_src_node; + u_int64_t packets[2]; + u_int64_t bytes[2]; + u_int32_t creation; + u_int32_t expire; + u_int32_t pfsync_time; + u_int16_t tag; + u_int8_t log; + u_int8_t state_flags; +#define PFSTATE_ALLOWOPTS 0x01 +#define PFSTATE_SLOPPY 0x02 +#define PFSTATE_PFLOW 0x04 +#define PFSTATE_NOSYNC 0x08 +#define PFSTATE_ACK 0x10 + u_int8_t timeout; + u_int8_t sync_state; /* PFSYNC_S_x */ + + /* XXX */ + u_int8_t sync_updates; + u_int8_t _tail[3]; +}; + +/* + * Unified state structures for pulling states out of the kernel + * used by pfsync(4) and the pf(4) ioctl. + */ +struct pfsync_state_scrub { + u_int16_t pfss_flags; + u_int8_t pfss_ttl; /* stashed TTL */ +#define PFSYNC_SCRUB_FLAG_VALID 0x01 + u_int8_t scrub_flag; + u_int32_t pfss_ts_mod; /* timestamp modulation */ +} __packed; + +struct pfsync_state_peer { + struct pfsync_state_scrub scrub; /* state is scrubbed */ + u_int32_t seqlo; /* Max sequence number sent */ + u_int32_t seqhi; /* Max the other end ACKd + win */ + u_int32_t seqdiff; /* Sequence number modulator */ + u_int16_t max_win; /* largest window (pre scaling) */ + u_int16_t mss; /* Maximum segment size option */ + u_int8_t state; /* active state level */ + u_int8_t wscale; /* window scaling factor */ + u_int8_t pad[6]; +} __packed; + +struct pfsync_state_key { + struct pf_addr addr[2]; + u_int16_t port[2]; +}; + +struct pfsync_state { + u_int32_t id[2]; + char ifname[IFNAMSIZ]; + struct pfsync_state_key key[2]; + struct pfsync_state_peer src; + struct pfsync_state_peer dst; + struct pf_addr rt_addr; + u_int32_t rule; + u_int32_t anchor; + u_int32_t nat_rule; + u_int32_t creation; + u_int32_t expire; + u_int32_t packets[2][2]; + u_int32_t bytes[2][2]; u_int32_t creatorid; - struct pf_state_host lan; - struct pf_state_host gwy; - struct pf_state_host ext; sa_family_t af; u_int8_t proto; u_int8_t direction; #ifdef __FreeBSD__ u_int8_t local_flags; -#define PFSTATE_EXPIRING 0x01 -#else +#define PFSTATE_EXPIRING 0x01 u_int8_t pad; #endif u_int8_t log; u_int8_t state_flags; -#define PFSTATE_ALLOWOPTS 0x01 -#define PFSTATE_SLOPPY 0x02 u_int8_t timeout; u_int8_t sync_flags; -#define PFSTATE_NOSYNC 0x01 -#define PFSTATE_FROMSYNC 0x02 -#define PFSTATE_STALE 0x04 - union { - struct { - RB_ENTRY(pf_state) entry_lan_ext; - RB_ENTRY(pf_state) entry_ext_gwy; - RB_ENTRY(pf_state) entry_id; - TAILQ_ENTRY(pf_state) entry_list; - struct pfi_kif *kif; - } s; - char ifname[IFNAMSIZ]; - } u; - struct pf_state_peer src; - struct pf_state_peer dst; - union pf_rule_ptr rule; - union pf_rule_ptr anchor; - union pf_rule_ptr nat_rule; - struct pf_addr rt_addr; - struct pfi_kif *rt_kif; - struct pf_src_node *src_node; - struct pf_src_node *nat_src_node; - u_int64_t packets[2]; - u_int64_t bytes[2]; - u_int32_t creation; - u_int32_t expire; - u_int32_t pfsync_time; - u_int16_t tag; -}; + u_int8_t updates; +} __packed; + +#ifdef __FreeBSD__ +#ifdef _KERNEL +/* pfsync */ +typedef int pfsync_state_import_t(struct pfsync_state *, u_int8_t); +typedef void pfsync_insert_state_t(struct pf_state *); +typedef void pfsync_update_state_t(struct pf_state *); +typedef void pfsync_delete_state_t(struct pf_state *); +typedef void pfsync_clear_states_t(u_int32_t, const char *); +typedef int pfsync_state_in_use_t(struct pf_state *); +typedef int pfsync_defer_t(struct pf_state *, struct mbuf *); +typedef int pfsync_up_t(void); + +extern pfsync_state_import_t *pfsync_state_import_ptr; +extern pfsync_insert_state_t *pfsync_insert_state_ptr; +extern pfsync_update_state_t *pfsync_update_state_ptr; +extern pfsync_delete_state_t *pfsync_delete_state_ptr; +extern pfsync_clear_states_t *pfsync_clear_states_ptr; +extern pfsync_state_in_use_t *pfsync_state_in_use_ptr; +extern pfsync_defer_t *pfsync_defer_ptr; +extern pfsync_up_t *pfsync_up_ptr; + +void pfsync_state_export(struct pfsync_state *, + struct pf_state *); + +/* pflow */ +typedef int export_pflow_t(struct pf_state *); + +extern export_pflow_t *export_pflow_ptr; + +/* pflog */ +struct pf_ruleset; +struct pf_pdesc; +typedef int pflog_packet_t(struct pfi_kif *, struct mbuf *, sa_family_t, + u_int8_t, u_int8_t, struct pf_rule *, struct pf_rule *, + struct pf_ruleset *, struct pf_pdesc *); + +extern pflog_packet_t *pflog_packet_ptr; + +/* pf uid hack */ +VNET_DECLARE(int, debug_pfugidhack); +#define V_debug_pfugidhack VNET(debug_pfugidhack) + +#define V_pf_end_threads VNET(pf_end_threads) +#endif + +/* Macros to set/clear/test flags. */ +#ifdef _KERNEL +#define SET(t, f) ((t) |= (f)) +#define CLR(t, f) ((t) &= ~(f)) +#define ISSET(t, f) ((t) & (f)) +#endif +#endif + +#define PFSYNC_FLAG_SRCNODE 0x04 +#define PFSYNC_FLAG_NATSRCNODE 0x08 + +/* for copies to/from network byte order */ +/* ioctl interface also uses network byte order */ +#define pf_state_peer_hton(s,d) do { \ + (d)->seqlo = htonl((s)->seqlo); \ + (d)->seqhi = htonl((s)->seqhi); \ + (d)->seqdiff = htonl((s)->seqdiff); \ + (d)->max_win = htons((s)->max_win); \ + (d)->mss = htons((s)->mss); \ + (d)->state = (s)->state; \ + (d)->wscale = (s)->wscale; \ + if ((s)->scrub) { \ + (d)->scrub.pfss_flags = \ + htons((s)->scrub->pfss_flags & PFSS_TIMESTAMP); \ + (d)->scrub.pfss_ttl = (s)->scrub->pfss_ttl; \ + (d)->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);\ + (d)->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID; \ + } \ +} while (0) + +#define pf_state_peer_ntoh(s,d) do { \ + (d)->seqlo = ntohl((s)->seqlo); \ + (d)->seqhi = ntohl((s)->seqhi); \ + (d)->seqdiff = ntohl((s)->seqdiff); \ + (d)->max_win = ntohs((s)->max_win); \ + (d)->mss = ntohs((s)->mss); \ + (d)->state = (s)->state; \ + (d)->wscale = (s)->wscale; \ + if ((s)->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID && \ + (d)->scrub != NULL) { \ + (d)->scrub->pfss_flags = \ + ntohs((s)->scrub.pfss_flags) & PFSS_TIMESTAMP; \ + (d)->scrub->pfss_ttl = (s)->scrub.pfss_ttl; \ + (d)->scrub->pfss_ts_mod = ntohl((s)->scrub.pfss_ts_mod);\ + } \ +} while (0) + +#define pf_state_counter_hton(s,d) do { \ + d[0] = htonl((s>>32)&0xffffffff); \ + d[1] = htonl(s&0xffffffff); \ +} while (0) + +#define pf_state_counter_from_pfsync(s) \ + (((u_int64_t)(s[0])<<32) | (u_int64_t)(s[1])) + +#define pf_state_counter_ntoh(s,d) do { \ + d = ntohl(s[0]); \ + d = d<<32; \ + d += ntohl(s[1]); \ +} while (0) TAILQ_HEAD(pf_rulequeue, pf_rule); @@ -881,9 +1083,11 @@ RB_PROTOTYPE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare); #define PFR_TFLAG_INACTIVE 0x00000008 #define PFR_TFLAG_REFERENCED 0x00000010 #define PFR_TFLAG_REFDANCHOR 0x00000020 -#define PFR_TFLAG_USRMASK 0x00000003 +#define PFR_TFLAG_COUNTERS 0x00000040 +/* Adjust masks below when adding flags. */ +#define PFR_TFLAG_USRMASK 0x00000043 #define PFR_TFLAG_SETMASK 0x0000003C -#define PFR_TFLAG_ALLMASK 0x0000003F +#define PFR_TFLAG_ALLMASK 0x0000007F struct pfr_table { char pfrt_anchor[MAXPATHLEN]; @@ -894,7 +1098,7 @@ struct pfr_table { enum { PFR_FB_NONE, PFR_FB_MATCH, PFR_FB_ADDED, PFR_FB_DELETED, PFR_FB_CHANGED, PFR_FB_CLEARED, PFR_FB_DUPLICATE, - PFR_FB_NOTMATCH, PFR_FB_CONFLICT, PFR_FB_MAX }; + PFR_FB_NOTMATCH, PFR_FB_CONFLICT, PFR_FB_NOCOUNT, PFR_FB_MAX }; struct pfr_addr { union { @@ -944,20 +1148,32 @@ union sockaddr_union { }; #endif /* _SOCKADDR_UNION_DEFINED */ +struct pfr_kcounters { + u_int64_t pfrkc_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; + u_int64_t pfrkc_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; +}; + SLIST_HEAD(pfr_kentryworkq, pfr_kentry); struct pfr_kentry { struct radix_node pfrke_node[2]; union sockaddr_union pfrke_sa; - u_int64_t pfrke_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; - u_int64_t pfrke_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; SLIST_ENTRY(pfr_kentry) pfrke_workq; + union { + + struct pfr_kcounters *pfrke_counters; +#if 0 + struct pfr_kroute *pfrke_route; +#endif + } u; long pfrke_tzero; u_int8_t pfrke_af; u_int8_t pfrke_net; u_int8_t pfrke_not; u_int8_t pfrke_mark; - u_int8_t pfrke_intrpool; }; +#define pfrke_counters u.pfrke_counters +#define pfrke_route u.pfrke_route + SLIST_HEAD(pfr_ktableworkq, pfr_ktable); RB_HEAD(pfr_ktablehead, pfr_ktable); @@ -986,17 +1202,25 @@ struct pfr_ktable { #define pfrkt_nomatch pfrkt_ts.pfrts_nomatch #define pfrkt_tzero pfrkt_ts.pfrts_tzero -RB_HEAD(pf_state_tree_lan_ext, pf_state); -RB_PROTOTYPE(pf_state_tree_lan_ext, pf_state, - u.s.entry_lan_ext, pf_state_compare_lan_ext); +RB_HEAD(pf_state_tree, pf_state_key); +RB_PROTOTYPE(pf_state_tree, pf_state_key, entry, pf_state_compare_key); -RB_HEAD(pf_state_tree_ext_gwy, pf_state); -RB_PROTOTYPE(pf_state_tree_ext_gwy, pf_state, - u.s.entry_ext_gwy, pf_state_compare_ext_gwy); +RB_HEAD(pf_state_tree_ext_gwy, pf_state_key); +RB_PROTOTYPE(pf_state_tree_ext_gwy, pf_state_key, + entry_ext_gwy, pf_state_compare_ext_gwy); -TAILQ_HEAD(pfi_statehead, pfi_kif); RB_HEAD(pfi_ifhead, pfi_kif); +/* state tables */ +#ifdef __FreeBSD__ +#ifdef _KERNEL +VNET_DECLARE(struct pf_state_tree, pf_statetbl); +#define V_pf_statetbl VNET(pf_statetbl) +#endif +#else +extern struct pf_state_tree pf_statetbl; +#endif + /* keep synced with pfi_kif, used in RB_FIND */ struct pfi_kif_cmp { char pfik_name[IFNAMSIZ]; @@ -1009,12 +1233,7 @@ struct pfi_kif { u_int64_t pfik_bytes[2][2][2]; u_int32_t pfik_tzero; int pfik_flags; - struct pf_state_tree_lan_ext pfik_lan_ext; - struct pf_state_tree_ext_gwy pfik_ext_gwy; - TAILQ_ENTRY(pfi_kif) pfik_w_states; -#ifndef __FreeBSD__ void *pfik_ah_cookie; -#endif struct ifnet *pfik_ifp; struct ifg_group *pfik_group; int pfik_states; @@ -1029,9 +1248,6 @@ enum pfi_kif_refs { }; #define PFI_IFLAG_SKIP 0x0100 /* skip filtering on interface */ -/* XXX: revisist */ -#define PFI_IFLAG_SETABLE_MASK 0x0100 /* setable via DIOC{SET,CLR}IFFLAG */ -#define PFI_IFLAG_PLACEHOLDER 0x8000 /* placeholder group/interface */ struct pf_pdesc { struct { @@ -1050,16 +1266,22 @@ struct pf_pdesc { #endif /* INET6 */ void *any; } hdr; - struct pf_addr baddr; /* address before translation */ - struct pf_addr naddr; /* address after translation */ + struct pf_rule *nat_rule; /* nat/rdr rule applied to packet */ - struct pf_addr *src; - struct pf_addr *dst; struct ether_header *eh; + struct pf_addr *src; /* src address */ + struct pf_addr *dst; /* dst address */ + u_int16_t *sport; + u_int16_t *dport; +#ifdef __FreeBSD__ struct pf_mtag *pf_mtag; - u_int16_t *ip_sum; +#endif + u_int32_t p_len; /* total length of payload */ + + u_int16_t *ip_sum; + u_int16_t *proto_sum; u_int16_t flags; /* Let SCRUB trigger behavior in * state code. Easier than tags */ #define PFDESC_TCP_NORM 0x0001 /* TCP shall be statefully scrubbed */ @@ -1067,6 +1289,9 @@ struct pf_pdesc { sa_family_t af; u_int8_t proto; u_int8_t tos; + u_int8_t dir; /* direction */ + u_int8_t sidx; /* key index for source */ + u_int8_t didx; /* key index for destination */ }; /* flags for RDR options */ @@ -1175,6 +1400,15 @@ struct pf_pdesc { *(a) = (x); \ } while (0) +#ifdef __FreeBSD__ +#define REASON_SET(a, x) \ + do { \ + if ((a) != NULL) \ + *(a) = (x); \ + if (x < PFRES_MAX) \ + V_pf_status.counters[x]++; \ + } while (0) +#else #define REASON_SET(a, x) \ do { \ if ((a) != NULL) \ @@ -1182,6 +1416,7 @@ struct pf_pdesc { if (x < PFRES_MAX) \ pf_status.counters[x]++; \ } while (0) +#endif struct pf_status { u_int64_t counters[PFRES_MAX]; @@ -1265,27 +1500,6 @@ struct pf_altq { u_int32_t qid; /* return value */ }; -#ifndef __FreeBSD__ - -#define PF_TAG_GENERATED 0x01 -#define PF_TAG_FRAGCACHE 0x02 -#define PF_TAG_TRANSLATE_LOCALHOST 0x04 - -struct pf_mtag { - void *hdr; /* saved hdr pos in mbuf, for ECN */ - u_int rtableid; /* alternate routing table id */ - u_int32_t qid; /* queue id */ - u_int16_t tag; /* tag id */ - u_int8_t flags; - u_int8_t routed; - sa_family_t af; /* for ECN */ -}; -#endif - -struct pf_tag { - u_int16_t tag; /* tag id */ -}; - struct pf_tagname { TAILQ_ENTRY(pf_tagname) entries; char name[PF_TAG_NAME_SIZE]; @@ -1293,6 +1507,14 @@ struct pf_tagname { int ref; }; +struct pf_divert { + union { + struct in_addr ipv4; + struct in6_addr ipv6; + } addr; + u_int16_t port; +}; + #define PFFRAG_FRENT_HIWAT 5000 /* Number of fragment entries */ #define PFFRAG_FRAG_HIWAT 1000 /* Number of fragmented packets */ #define PFFRAG_FRCENT_HIWAT 50000 /* Number of fragment cache entries */ @@ -1343,31 +1565,32 @@ struct pfioc_natlook { }; struct pfioc_state { - u_int32_t nr; - struct pf_state state; + struct pfsync_state state; }; struct pfioc_src_node_kill { - /* XXX returns the number of src nodes killed in psnk_af */ sa_family_t psnk_af; struct pf_rule_addr psnk_src; struct pf_rule_addr psnk_dst; + u_int psnk_killed; }; struct pfioc_state_kill { - /* XXX returns the number of states killed in psk_af */ + struct pf_state_cmp psk_pfcmp; sa_family_t psk_af; int psk_proto; struct pf_rule_addr psk_src; struct pf_rule_addr psk_dst; char psk_ifname[IFNAMSIZ]; + char psk_label[PF_RULE_LABEL_SIZE]; + u_int psk_killed; }; struct pfioc_states { int ps_len; union { - caddr_t psu_buf; - struct pf_state *psu_states; + caddr_t psu_buf; + struct pfsync_state *psu_states; } ps_u; #define ps_buf ps_u.psu_buf #define ps_states ps_u.psu_states @@ -1518,55 +1741,97 @@ struct pfioc_iface { #define DIOCRDELTABLES _IOWR('D', 62, struct pfioc_table) #define DIOCRGETTABLES _IOWR('D', 63, struct pfioc_table) #define DIOCRGETTSTATS _IOWR('D', 64, struct pfioc_table) -#define DIOCRCLRTSTATS _IOWR('D', 65, struct pfioc_table) +#define DIOCRCLRTSTATS _IOWR('D', 65, struct pfioc_table) #define DIOCRCLRADDRS _IOWR('D', 66, struct pfioc_table) #define DIOCRADDADDRS _IOWR('D', 67, struct pfioc_table) #define DIOCRDELADDRS _IOWR('D', 68, struct pfioc_table) #define DIOCRSETADDRS _IOWR('D', 69, struct pfioc_table) #define DIOCRGETADDRS _IOWR('D', 70, struct pfioc_table) #define DIOCRGETASTATS _IOWR('D', 71, struct pfioc_table) -#define DIOCRCLRASTATS _IOWR('D', 72, struct pfioc_table) +#define DIOCRCLRASTATS _IOWR('D', 72, struct pfioc_table) #define DIOCRTSTADDRS _IOWR('D', 73, struct pfioc_table) #define DIOCRSETTFLAGS _IOWR('D', 74, struct pfioc_table) -#define DIOCRINADEFINE _IOWR('D', 77, struct pfioc_table) -#define DIOCOSFPFLUSH _IO('D', 78) -#define DIOCOSFPADD _IOWR('D', 79, struct pf_osfp_ioctl) -#define DIOCOSFPGET _IOWR('D', 80, struct pf_osfp_ioctl) -#define DIOCXBEGIN _IOWR('D', 81, struct pfioc_trans) -#define DIOCXCOMMIT _IOWR('D', 82, struct pfioc_trans) -#define DIOCXROLLBACK _IOWR('D', 83, struct pfioc_trans) -#define DIOCGETSRCNODES _IOWR('D', 84, struct pfioc_src_nodes) -#define DIOCCLRSRCNODES _IO('D', 85) -#define DIOCSETHOSTID _IOWR('D', 86, u_int32_t) -#define DIOCIGETIFACES _IOWR('D', 87, struct pfioc_iface) -#define DIOCSETIFFLAG _IOWR('D', 89, struct pfioc_iface) -#define DIOCCLRIFFLAG _IOWR('D', 90, struct pfioc_iface) -#define DIOCKILLSRCNODES _IOWR('D', 91, struct pfioc_src_node_kill) +#define DIOCRINADEFINE _IOWR('D', 77, struct pfioc_table) +#define DIOCOSFPFLUSH _IO('D', 78) +#define DIOCOSFPADD _IOWR('D', 79, struct pf_osfp_ioctl) +#define DIOCOSFPGET _IOWR('D', 80, struct pf_osfp_ioctl) +#define DIOCXBEGIN _IOWR('D', 81, struct pfioc_trans) +#define DIOCXCOMMIT _IOWR('D', 82, struct pfioc_trans) +#define DIOCXROLLBACK _IOWR('D', 83, struct pfioc_trans) +#define DIOCGETSRCNODES _IOWR('D', 84, struct pfioc_src_nodes) +#define DIOCCLRSRCNODES _IO('D', 85) +#define DIOCSETHOSTID _IOWR('D', 86, u_int32_t) +#define DIOCIGETIFACES _IOWR('D', 87, struct pfioc_iface) +#define DIOCSETIFFLAG _IOWR('D', 89, struct pfioc_iface) +#define DIOCCLRIFFLAG _IOWR('D', 90, struct pfioc_iface) +#define DIOCKILLSRCNODES _IOWR('D', 91, struct pfioc_src_node_kill) #ifdef __FreeBSD__ struct pf_ifspeed { char ifname[IFNAMSIZ]; u_int32_t baudrate; }; -#define DIOCGIFSPEED _IOWR('D', 92, struct pf_ifspeed) +#define DIOCGIFSPEED _IOWR('D', 92, struct pf_ifspeed) #endif #ifdef _KERNEL RB_HEAD(pf_src_tree, pf_src_node); RB_PROTOTYPE(pf_src_tree, pf_src_node, entry, pf_src_compare); +#ifdef __FreeBSD__ +VNET_DECLARE(struct pf_src_tree, tree_src_tracking); +#define V_tree_src_tracking VNET(tree_src_tracking) +#else extern struct pf_src_tree tree_src_tracking; +#endif RB_HEAD(pf_state_tree_id, pf_state); RB_PROTOTYPE(pf_state_tree_id, pf_state, entry_id, pf_state_compare_id); +#ifdef __FreeBSD__ +VNET_DECLARE(struct pf_state_tree_id, tree_id); +#define V_tree_id VNET(tree_id) +VNET_DECLARE(struct pf_state_queue, state_list); +#define V_state_list VNET(state_list) +#else extern struct pf_state_tree_id tree_id; extern struct pf_state_queue state_list; +#endif TAILQ_HEAD(pf_poolqueue, pf_pool); +#ifdef __FreeBSD__ +VNET_DECLARE(struct pf_poolqueue, pf_pools[2]); +#define V_pf_pools VNET(pf_pools) +#else extern struct pf_poolqueue pf_pools[2]; +#endif TAILQ_HEAD(pf_altqqueue, pf_altq); +#ifdef __FreeBSD__ +VNET_DECLARE(struct pf_altqqueue, pf_altqs[2]); +#define V_pf_altqs VNET(pf_altqs) +VNET_DECLARE(struct pf_palist, pf_pabuf); +#define V_pf_pabuf VNET(pf_pabuf) +#else extern struct pf_altqqueue pf_altqs[2]; extern struct pf_palist pf_pabuf; +#endif +#ifdef __FreeBSD__ +VNET_DECLARE(u_int32_t, ticket_altqs_active); +#define V_ticket_altqs_active VNET(ticket_altqs_active) +VNET_DECLARE(u_int32_t, ticket_altqs_inactive); +#define V_ticket_altqs_inactive VNET(ticket_altqs_inactive) +VNET_DECLARE(int, altqs_inactive_open); +#define V_altqs_inactive_open VNET(altqs_inactive_open) +VNET_DECLARE(u_int32_t, ticket_pabuf); +#define V_ticket_pabuf VNET(ticket_pabuf) +VNET_DECLARE(struct pf_altqqueue *, pf_altqs_active); +#define V_pf_altqs_active VNET(pf_altqs_active) +VNET_DECLARE(struct pf_altqqueue *, pf_altqs_inactive); +#define V_pf_altqs_inactive VNET(pf_altqs_inactive) +VNET_DECLARE(struct pf_poolqueue *, pf_pools_active); +#define V_pf_pools_active VNET(pf_pools_active) +VNET_DECLARE(struct pf_poolqueue *, pf_pools_inactive); +#define V_pf_pools_inactive VNET(pf_pools_inactive) +#else extern u_int32_t ticket_altqs_active; extern u_int32_t ticket_altqs_inactive; extern int altqs_inactive_open; @@ -1575,6 +1840,7 @@ extern struct pf_altqqueue *pf_altqs_active; extern struct pf_altqqueue *pf_altqs_inactive; extern struct pf_poolqueue *pf_pools_active; extern struct pf_poolqueue *pf_pools_inactive; +#endif extern int pf_tbladdr_setup(struct pf_ruleset *, struct pf_addr_wrap *); extern void pf_tbladdr_remove(struct pf_addr_wrap *); @@ -1582,49 +1848,84 @@ extern void pf_tbladdr_copyout(struct pf_addr_wrap *); extern void pf_calc_skip_steps(struct pf_rulequeue *); #ifdef __FreeBSD__ #ifdef ALTQ -extern void pf_altq_ifnet_event(struct ifnet *, int); +extern void pf_altq_ifnet_event(struct ifnet *, int); #endif -extern uma_zone_t pf_src_tree_pl, pf_rule_pl; -extern uma_zone_t pf_state_pl, pf_altq_pl, pf_pooladdr_pl; -extern uma_zone_t pfr_ktable_pl, pfr_kentry_pl, pfr_kentry_pl2; -extern uma_zone_t pf_cache_pl, pf_cent_pl; -extern uma_zone_t pf_state_scrub_pl; -extern uma_zone_t pfi_addr_pl; +VNET_DECLARE(uma_zone_t, pf_src_tree_pl); +#define V_pf_src_tree_pl VNET(pf_src_tree_pl) +VNET_DECLARE(uma_zone_t, pf_rule_pl); +#define V_pf_rule_pl VNET(pf_rule_pl) +VNET_DECLARE(uma_zone_t, pf_state_pl); +#define V_pf_state_pl VNET(pf_state_pl) +VNET_DECLARE(uma_zone_t, pf_state_key_pl); +#define V_pf_state_key_pl VNET(pf_state_key_pl) +VNET_DECLARE(uma_zone_t, pf_state_item_pl); +#define V_pf_state_item_pl VNET(pf_state_item_pl) +VNET_DECLARE(uma_zone_t, pf_altq_pl); +#define V_pf_altq_pl VNET(pf_altq_pl) +VNET_DECLARE(uma_zone_t, pf_pooladdr_pl); +#define V_pf_pooladdr_pl VNET(pf_pooladdr_pl) +VNET_DECLARE(uma_zone_t, pfr_ktable_pl); +#define V_pfr_ktable_pl VNET(pfr_ktable_pl) +VNET_DECLARE(uma_zone_t, pfr_kentry_pl); +#define V_pfr_kentry_pl VNET(pfr_kentry_pl) +VNET_DECLARE(uma_zone_t, pfr_kcounters_pl); +#define V_pfr_kcounters_pl VNET(pfr_kcounters_pl) +VNET_DECLARE(uma_zone_t, pf_cache_pl); +#define V_pf_cache_pl VNET(pf_cache_pl) +VNET_DECLARE(uma_zone_t, pf_cent_pl); +#define V_pf_cent_pl VNET(pf_cent_pl) +VNET_DECLARE(uma_zone_t, pf_state_scrub_pl); +#define V_pf_state_scrub_pl VNET(pf_state_scrub_pl) +VNET_DECLARE(uma_zone_t, pfi_addr_pl); +#define V_pfi_addr_pl VNET(pfi_addr_pl) #else extern struct pool pf_src_tree_pl, pf_rule_pl; -extern struct pool pf_state_pl, pf_altq_pl, pf_pooladdr_pl; +extern struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl, + pf_altq_pl, pf_pooladdr_pl; extern struct pool pf_state_scrub_pl; #endif extern void pf_purge_thread(void *); #ifdef __FreeBSD__ extern int pf_purge_expired_src_nodes(int); -extern int pf_purge_expired_states(u_int32_t, int); +extern int pf_purge_expired_states(u_int32_t , int); #else extern void pf_purge_expired_src_nodes(int); extern void pf_purge_expired_states(u_int32_t); #endif extern void pf_unlink_state(struct pf_state *); extern void pf_free_state(struct pf_state *); -extern int pf_insert_state(struct pfi_kif *, +extern int pf_state_insert(struct pfi_kif *, + struct pf_state_key *, + struct pf_state_key *, struct pf_state *); extern int pf_insert_src_node(struct pf_src_node **, struct pf_rule *, struct pf_addr *, sa_family_t); void pf_src_tree_remove_state(struct pf_state *); extern struct pf_state *pf_find_state_byid(struct pf_state_cmp *); -extern struct pf_state *pf_find_state_all(struct pf_state_cmp *key, - u_int8_t tree, int *more); +extern struct pf_state *pf_find_state_all(struct pf_state_key_cmp *, + u_int, int *); extern void pf_print_state(struct pf_state *); extern void pf_print_flags(u_int8_t); extern u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t, u_int8_t); +#ifdef __FreeBSD__ +VNET_DECLARE(struct ifnet *, sync_ifp); +#define V_sync_ifp VNET(sync_ifp); +VNET_DECLARE(struct pf_rule, pf_default_rule); +#define V_pf_default_rule VNET(pf_default_rule) +#else extern struct ifnet *sync_ifp; extern struct pf_rule pf_default_rule; +#endif extern void pf_addrcpy(struct pf_addr *, struct pf_addr *, u_int8_t); void pf_rm_rule(struct pf_rulequeue *, struct pf_rule *); +#ifndef __FreeBSD__ +struct pf_divert *pf_find_divert(struct mbuf *); +#endif #ifdef INET #ifdef __FreeBSD__ @@ -1656,8 +1957,11 @@ void pf_change_a(void *, u_int16_t *, u_int32_t, u_int8_t); int pflog_packet(struct pfi_kif *, struct mbuf *, sa_family_t, u_int8_t, u_int8_t, struct pf_rule *, struct pf_rule *, struct pf_ruleset *, struct pf_pdesc *); +void pf_send_deferred_syn(struct pf_state *); int pf_match_addr(u_int8_t, struct pf_addr *, struct pf_addr *, struct pf_addr *, sa_family_t); +int pf_match_addr_range(struct pf_addr *, struct pf_addr *, + struct pf_addr *, sa_family_t); int pf_match(u_int8_t, u_int32_t, u_int32_t, u_int32_t); int pf_match_port(u_int8_t, u_int16_t, u_int16_t, u_int16_t); int pf_match_uid(u_int8_t, uid_t, uid_t, uid_t); @@ -1679,13 +1983,18 @@ int pf_normalize_tcp_stateful(struct mbuf *, int, struct pf_pdesc *, u_int32_t pf_state_expires(const struct pf_state *); void pf_purge_expired_fragments(void); -int pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *); -int pf_rtlabel_match(struct pf_addr *, sa_family_t, struct pf_addr_wrap *); +int pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *, + int); +int pf_rtlabel_match(struct pf_addr *, sa_family_t, struct pf_addr_wrap *, + int); #ifdef __FreeBSD__ -int pf_socket_lookup(int, struct pf_pdesc *, struct inpcb *); +int pf_socket_lookup(int, struct pf_pdesc *, struct inpcb *); #else int pf_socket_lookup(int, struct pf_pdesc *); #endif +struct pf_state_key *pf_alloc_state_key(int); +void pf_pkt_addr_changed(struct mbuf *); +int pf_state_key_attach(struct pf_state_key *, struct pf_state *, int); void pfr_initialize(void); int pfr_match_addr(struct pfr_ktable *, struct pf_addr *, sa_family_t); void pfr_update_stats(struct pfr_ktable *, struct pf_addr *, sa_family_t, @@ -1694,7 +2003,7 @@ int pfr_pool_get(struct pfr_ktable *, int *, struct pf_addr *, struct pf_addr **, struct pf_addr **, sa_family_t); void pfr_dynaddr_update(struct pfr_ktable *, struct pfi_dynaddr *); struct pfr_ktable * - pfr_attach_table(struct pf_ruleset *, char *); + pfr_attach_table(struct pf_ruleset *, char *, int); void pfr_detach_table(struct pfr_ktable *); int pfr_clr_tables(struct pfr_table *, int *, int); int pfr_add_tables(struct pfr_table *, int, int *, int); @@ -1723,8 +2032,12 @@ int pfr_ina_commit(struct pfr_table *, u_int32_t, int *, int *, int); int pfr_ina_define(struct pfr_table *, struct pfr_addr *, int, int *, int *, u_int32_t, int); -extern struct pfi_statehead pfi_statehead; +#ifdef __FreeBSD__ +VNET_DECLARE(struct pfi_kif *, pfi_all); +#define V_pfi_all VNET(pfi_all) +#else extern struct pfi_kif *pfi_all; +#endif void pfi_initialize(void); #ifdef __FreeBSD__ @@ -1744,30 +2057,44 @@ int pfi_match_addr(struct pfi_dynaddr *, struct pf_addr *, int pfi_dynaddr_setup(struct pf_addr_wrap *, sa_family_t); void pfi_dynaddr_remove(struct pf_addr_wrap *); void pfi_dynaddr_copyout(struct pf_addr_wrap *); -void pfi_fill_oldstatus(struct pf_status *); -int pfi_clr_istats(const char *); +void pfi_update_status(const char *, struct pf_status *); int pfi_get_ifaces(const char *, struct pfi_kif *, int *); int pfi_set_flags(const char *, int); int pfi_clear_flags(const char *, int); +#ifdef __FreeBSD__ +int pf_match_tag(struct mbuf *, struct pf_rule *, int *, + struct pf_mtag *); +#else +int pf_match_tag(struct mbuf *, struct pf_rule *, int *); +#endif u_int16_t pf_tagname2tag(char *); void pf_tag2tagname(u_int16_t, char *); void pf_tag_ref(u_int16_t); void pf_tag_unref(u_int16_t); -int pf_tag_packet(struct mbuf *, struct pf_mtag *, int, int); +#ifdef __FreeBSD__ +int pf_tag_packet(struct mbuf *, int, int, struct pf_mtag *); +#else +int pf_tag_packet(struct mbuf *, int, int); +#endif u_int32_t pf_qname2qid(char *); void pf_qid2qname(u_int32_t, char *); void pf_qid_unref(u_int32_t); -#ifndef __FreeBSD__ -struct pf_mtag *pf_find_mtag(struct mbuf *); -struct pf_mtag *pf_get_mtag(struct mbuf *); -#endif +#ifdef __FreeBSD__ +VNET_DECLARE(struct pf_status, pf_status); +#define V_pf_status VNET(pf_status) +#else extern struct pf_status pf_status; +#endif #ifdef __FreeBSD__ -extern uma_zone_t pf_frent_pl, pf_frag_pl; -extern struct sx pf_consistency_lock; +VNET_DECLARE(uma_zone_t, pf_frent_pl); +#define V_pf_frent_pl VNET(pf_frent_pl) +VNET_DECLARE(uma_zone_t, pf_frag_pl); +#define V_pf_frag_pl VNET(pf_frag_pl) +VNET_DECLARE(struct sx, pf_consistency_lock); +#define V_pf_consistency_lock VNET(pf_consistency_lock) #else extern struct pool pf_frent_pl, pf_frag_pl; extern struct rwlock pf_consistency_lock; @@ -1777,7 +2104,12 @@ struct pf_pool_limit { void *pp; unsigned limit; }; +#ifdef __FreeBSD__ +VNET_DECLARE(struct pf_pool_limit, pf_pool_limits[PF_LIMIT_MAX]); +#define V_pf_pool_limits VNET(pf_pool_limits) +#else extern struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX]; +#endif #ifdef __FreeBSD__ struct pf_frent { @@ -1788,34 +2120,44 @@ struct pf_frent { struct pf_frcache { LIST_ENTRY(pf_frcache) fr_next; - uint16_t fr_off; - uint16_t fr_end; + uint16_t fr_off; + uint16_t fr_end; }; struct pf_fragment { RB_ENTRY(pf_fragment) fr_entry; TAILQ_ENTRY(pf_fragment) frag_next; - struct in_addr fr_src; - struct in_addr fr_dst; - u_int8_t fr_p; /* protocol of this fragment */ - u_int8_t fr_flags; /* status flags */ - u_int16_t fr_id; /* fragment id for reassemble */ - u_int16_t fr_max; /* fragment data max */ - u_int32_t fr_timeout; -#define fr_queue fr_u.fru_queue -#define fr_cache fr_u.fru_cache + struct in_addr fr_src; + struct in_addr fr_dst; + u_int8_t fr_p; /* protocol of this fragment */ + u_int8_t fr_flags; /* status flags */ + u_int16_t fr_id; /* fragment id for reassemble */ + u_int16_t fr_max; /* fragment data max */ + u_int32_t fr_timeout; +#define fr_queue fr_u.fru_queue +#define fr_cache fr_u.fru_cache union { - LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */ - LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */ + LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */ + LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */ } fr_u; }; #endif /* (__FreeBSD__) */ #endif /* _KERNEL */ -extern struct pf_anchor_global pf_anchors; -extern struct pf_anchor pf_main_anchor; +#ifdef __FreeBSD__ +#ifdef _KERNEL +VNET_DECLARE(struct pf_anchor_global, pf_anchors); +#define V_pf_anchors VNET(pf_anchors) +VNET_DECLARE(struct pf_anchor, pf_main_anchor); +#define V_pf_main_anchor VNET(pf_main_anchor) +#define pf_main_ruleset V_pf_main_anchor.ruleset +#endif +#else +extern struct pf_anchor_global pf_anchors; +extern struct pf_anchor pf_main_anchor; #define pf_main_ruleset pf_main_anchor.ruleset +#endif /* these ruleset functions can be linked into userland programs (pfctl) */ int pf_get_ruleset_number(u_int8_t); @@ -1832,7 +2174,6 @@ struct pf_ruleset *pf_find_or_create_ruleset(const char *); void pf_rs_initialize(void); #ifndef __FreeBSD__ -/* ?!? */ #ifdef _KERNEL int pf_anchor_copyout(const struct pf_ruleset *, const struct pf_rule *, struct pfioc_rule *); @@ -1863,4 +2204,31 @@ int pf_osfp_match(struct pf_osfp_enlist *, pf_osfp_t); struct pf_os_fingerprint * pf_osfp_validate(void); +#ifdef _KERNEL +void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t); + +void pf_step_into_anchor(int *, struct pf_ruleset **, int, + struct pf_rule **, struct pf_rule **, int *); +int pf_step_out_of_anchor(int *, struct pf_ruleset **, + int, struct pf_rule **, struct pf_rule **, + int *); + +int pf_map_addr(u_int8_t, struct pf_rule *, + struct pf_addr *, struct pf_addr *, + struct pf_addr *, struct pf_src_node **); +struct pf_rule *pf_get_translation(struct pf_pdesc *, struct mbuf *, + int, int, struct pfi_kif *, struct pf_src_node **, + struct pf_state_key **, struct pf_state_key **, + struct pf_state_key **, struct pf_state_key **, + struct pf_addr *, struct pf_addr *, + u_int16_t, u_int16_t); + +int pf_state_key_setup(struct pf_pdesc *, struct pf_rule *, + struct pf_state_key **, struct pf_state_key **, + struct pf_state_key **, struct pf_state_key **, + struct pf_addr *, struct pf_addr *, + u_int16_t, u_int16_t); +#endif /* _KERNEL */ + + #endif /* _NET_PFVAR_H_ */ diff --git a/freebsd/sys/contrib/pf/netinet/in4_cksum.c b/freebsd/sys/contrib/pf/netinet/in4_cksum.c index 9b4a5360..19cc8ac4 100644 --- a/freebsd/sys/contrib/pf/netinet/in4_cksum.c +++ b/freebsd/sys/contrib/pf/netinet/in4_cksum.c @@ -77,7 +77,7 @@ #include <machine/in_cksum.h> #define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) -#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);} +#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; (void)ADDCARRY(sum);} int in4_cksum(struct mbuf *, u_int8_t, int, int); |