From a5ddb0ea69f21c16b7697a935d7a0c16bb3cffcf Mon Sep 17 00:00:00 2001 From: Sebastian Huber Date: Tue, 24 Sep 2019 11:05:03 +0200 Subject: Update to FreeBSD head 2019-09-24 Git mirror commit 6b0307a0a5184339393f555d5d424190d8a8277a. --- freebsd/sys/netpfil/ipfw/ip_fw_private.h | 109 ++--- freebsd/sys/netpfil/pf/if_pfsync.c | 48 ++- freebsd/sys/netpfil/pf/pf.c | 54 ++- freebsd/sys/netpfil/pf/pf_if.c | 33 +- freebsd/sys/netpfil/pf/pf_ioctl.c | 676 ++++++++++++++++++++----------- freebsd/sys/netpfil/pf/pf_norm.c | 45 +- freebsd/sys/netpfil/pf/pf_table.c | 231 ++++++++--- 7 files changed, 765 insertions(+), 431 deletions(-) (limited to 'freebsd/sys/netpfil') diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_private.h b/freebsd/sys/netpfil/ipfw/ip_fw_private.h index 7e966d0a..57fa7464 100644 --- a/freebsd/sys/netpfil/ipfw/ip_fw_private.h +++ b/freebsd/sys/netpfil/ipfw/ip_fw_private.h @@ -61,6 +61,7 @@ enum { IP_FW_NGTEE, IP_FW_NAT, IP_FW_REASS, + IP_FW_NAT64, }; /* @@ -83,11 +84,20 @@ struct _ip6dn_args { * efficient to pass variables around and extend the interface. */ struct ip_fw_args { - struct mbuf *m; /* the mbuf chain */ - struct ifnet *oif; /* output interface */ - struct sockaddr_in *next_hop; /* forward address */ - struct sockaddr_in6 *next_hop6; /* ipv6 forward address */ - + uint32_t flags; +#define IPFW_ARGS_ETHER 0x00010000 /* valid ethernet header */ +#define IPFW_ARGS_NH4 0x00020000 /* IPv4 next hop in hopstore */ +#define IPFW_ARGS_NH6 0x00040000 /* IPv6 next hop in hopstore */ +#define IPFW_ARGS_NH4PTR 0x00080000 /* IPv4 next hop in next_hop */ +#define IPFW_ARGS_NH6PTR 0x00100000 /* IPv6 next hop in next_hop6 */ +#define IPFW_ARGS_REF 0x00200000 /* valid ipfw_rule_ref */ +#define IPFW_ARGS_IN 0x00400000 /* called on input */ +#define IPFW_ARGS_OUT 0x00800000 /* called on output */ +#define IPFW_ARGS_IP4 0x01000000 /* belongs to v4 ISR */ +#define IPFW_ARGS_IP6 0x02000000 /* belongs to v6 ISR */ +#define IPFW_ARGS_DROP 0x04000000 /* drop it (dummynet) */ +#define IPFW_ARGS_LENMASK 0x0000ffff /* length of data in *mem */ +#define IPFW_ARGS_LENGTH(f) ((f) & IPFW_ARGS_LENMASK) /* * On return, it points to the matching rule. * On entry, rule.slot > 0 means the info is valid and @@ -95,45 +105,36 @@ struct ip_fw_args { * If chain_id == chain->id && slot >0 then jump to that slot. * Otherwise, we locate the first rule >= rulenum:rule_id */ - struct ipfw_rule_ref rule; /* match/restart info */ - - struct ether_header *eh; /* for bridged packets */ - - struct ipfw_flow_id f_id; /* grabbed from IP header */ - //uint32_t cookie; /* a cookie depending on rule action */ - struct inpcb *inp; - - struct _ip6dn_args dummypar; /* dummynet->ip6_output */ - union { /* store here if cannot use a pointer */ - struct sockaddr_in hopstore; - struct sockaddr_in6 hopstore6; + struct ipfw_rule_ref rule; /* match/restart info */ + + struct ifnet *ifp; /* input/output interface */ + struct inpcb *inp; + union { + /* + * next_hop[6] pointers can be used to point to next hop + * stored in rule's opcode to avoid copying into hopstore. + * Also, it is expected that all 0x1-0x10 flags are mutually + * exclusive. + */ + struct sockaddr_in *next_hop; + struct sockaddr_in6 *next_hop6; + /* ipfw next hop storage */ + struct sockaddr_in hopstore; + struct ip_fw_nh6 { + struct in6_addr sin6_addr; + uint32_t sin6_scope_id; + uint16_t sin6_port; + } hopstore6; }; + union { + struct mbuf *m; /* the mbuf chain */ + void *mem; /* or memory pointer */ + }; + struct ipfw_flow_id f_id; /* grabbed from IP header */ }; MALLOC_DECLARE(M_IPFW); -/* - * Hooks sometime need to know the direction of the packet - * (divert, dummynet, netgraph, ...) - * We use a generic definition here, with bit0-1 indicating the - * direction, bit 2 indicating layer2 or 3, bit 3-4 indicating the - * specific protocol - * indicating the protocol (if necessary) - */ -enum { - DIR_MASK = 0x3, - DIR_OUT = 0, - DIR_IN = 1, - DIR_FWD = 2, - DIR_DROP = 3, - PROTO_LAYER2 = 0x4, /* set for layer 2 */ - /* PROTO_DEFAULT = 0, */ - PROTO_IPV4 = 0x08, - PROTO_IPV6 = 0x10, - PROTO_IFB = 0x0c, /* layer2 + ifbridge */ - /* PROTO_OLDBDG = 0x14, unused, old bridge */ -}; - /* wrapper for freeing a packet, in case we need to do more work */ #ifndef FREE_PKT #if defined(__linux__) || defined(_WIN32) @@ -150,8 +151,8 @@ int ipfw_chk(struct ip_fw_args *args); struct mbuf *ipfw_send_pkt(struct mbuf *, struct ipfw_flow_id *, u_int32_t, u_int32_t, int); -/* attach (arg = 1) or detach (arg = 0) hooks */ -int ipfw_attach_hooks(int); +int ipfw_attach_hooks(void); +void ipfw_detach_hooks(void); #ifdef NOTYET void ipfw_nat_destroy(void); #endif @@ -162,10 +163,11 @@ struct ip_fw_chain; void ipfw_bpf_init(int); void ipfw_bpf_uninit(int); +void ipfw_bpf_tap(u_char *, u_int); +void ipfw_bpf_mtap(struct mbuf *); void ipfw_bpf_mtap2(void *, u_int, struct mbuf *); void ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen, - struct ip_fw_args *args, struct mbuf *m, struct ifnet *oif, - u_short offset, uint32_t tablearg, struct ip *ip); + struct ip_fw_args *args, u_short offset, uint32_t tablearg, struct ip *ip); VNET_DECLARE(u_int64_t, norule_counter); #define V_norule_counter VNET(norule_counter) VNET_DECLARE(int, verbose_limit); @@ -296,6 +298,8 @@ struct ip_fw_chain { void **srvstate; /* runtime service mappings */ #if defined( __linux__ ) || defined( _WIN32 ) spinlock_t rwmtx; +#else + struct rmlock rwmtx; #endif int static_len; /* total len of static rules (v0) */ uint32_t gencnt; /* NAT generation count */ @@ -436,23 +440,25 @@ struct ipfw_ifc { #define IPFW_PF_RUNLOCK(p) IPFW_RUNLOCK(p) #else /* FreeBSD */ #define IPFW_LOCK_INIT(_chain) do { \ + rm_init_flags(&(_chain)->rwmtx, "IPFW static rules", RM_RECURSE); \ rw_init(&(_chain)->uh_lock, "IPFW UH lock"); \ } while (0) #define IPFW_LOCK_DESTROY(_chain) do { \ + rm_destroy(&(_chain)->rwmtx); \ rw_destroy(&(_chain)->uh_lock); \ } while (0) -#define IPFW_RLOCK_ASSERT(_chain) rm_assert(&V_pfil_lock, RA_RLOCKED) -#define IPFW_WLOCK_ASSERT(_chain) rm_assert(&V_pfil_lock, RA_WLOCKED) +#define IPFW_RLOCK_ASSERT(_chain) rm_assert(&(_chain)->rwmtx, RA_RLOCKED) +#define IPFW_WLOCK_ASSERT(_chain) rm_assert(&(_chain)->rwmtx, RA_WLOCKED) #define IPFW_RLOCK_TRACKER struct rm_priotracker _tracker -#define IPFW_RLOCK(p) rm_rlock(&V_pfil_lock, &_tracker) -#define IPFW_RUNLOCK(p) rm_runlock(&V_pfil_lock, &_tracker) -#define IPFW_WLOCK(p) rm_wlock(&V_pfil_lock) -#define IPFW_WUNLOCK(p) rm_wunlock(&V_pfil_lock) -#define IPFW_PF_RLOCK(p) -#define IPFW_PF_RUNLOCK(p) +#define IPFW_RLOCK(p) rm_rlock(&(p)->rwmtx, &_tracker) +#define IPFW_RUNLOCK(p) rm_runlock(&(p)->rwmtx, &_tracker) +#define IPFW_WLOCK(p) rm_wlock(&(p)->rwmtx) +#define IPFW_WUNLOCK(p) rm_wunlock(&(p)->rwmtx) +#define IPFW_PF_RLOCK(p) IPFW_RLOCK(p) +#define IPFW_PF_RUNLOCK(p) IPFW_RUNLOCK(p) #endif #define IPFW_UH_RLOCK_ASSERT(_chain) rw_assert(&(_chain)->uh_lock, RA_RLOCKED) @@ -659,6 +665,7 @@ struct ip_fw *ipfw_alloc_rule(struct ip_fw_chain *chain, size_t rulesize); void ipfw_free_rule(struct ip_fw *rule); int ipfw_match_range(struct ip_fw *rule, ipfw_range_tlv *rt); int ipfw_mark_object_kidx(uint32_t *bmask, uint16_t etlv, uint16_t kidx); +ipfw_insn *ipfw_get_action(struct ip_fw *); typedef int (sopt_handler_f)(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd); diff --git a/freebsd/sys/netpfil/pf/if_pfsync.c b/freebsd/sys/netpfil/pf/if_pfsync.c index 026d19a3..9d87cf67 100644 --- a/freebsd/sys/netpfil/pf/if_pfsync.c +++ b/freebsd/sys/netpfil/pf/if_pfsync.c @@ -266,7 +266,7 @@ static void pfsync_push(struct pfsync_bucket *); static void pfsync_push_all(struct pfsync_softc *); static void pfsyncintr(void *); static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *, - void *); + struct in_mfilter *imf); static void pfsync_multicast_cleanup(struct pfsync_softc *); static void pfsync_pointers_init(void); static void pfsync_pointers_uninit(void); @@ -337,6 +337,7 @@ pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) pfsync_buckets = mp_ncpus * 2; sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO); + sc->sc_flags |= PFSYNCF_OK; sc->sc_maxupdates = 128; ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC); @@ -364,7 +365,7 @@ pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) M_PFSYNC, M_ZERO | M_WAITOK); for (c = 0; c < pfsync_buckets; c++) { b = &sc->sc_buckets[c]; - mtx_init(&b->b_mtx, pfsyncname, NULL, MTX_DEF); + mtx_init(&b->b_mtx, "pfsync bucket", NULL, MTX_DEF); b->b_id = c; b->b_sc = sc; @@ -431,8 +432,7 @@ pfsync_clone_destroy(struct ifnet *ifp) pfsync_drop(sc); if_free(ifp); - if (sc->sc_imo.imo_membership) - pfsync_multicast_cleanup(sc); + pfsync_multicast_cleanup(sc); mtx_destroy(&sc->sc_mtx); mtx_destroy(&sc->sc_bulk_mtx); @@ -1374,10 +1374,9 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) case SIOCSETPFSYNC: { - struct ip_moptions *imo = &sc->sc_imo; + struct in_mfilter *imf = NULL; struct ifnet *sifp; struct ip *ip; - void *mship = NULL; if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) return (error); @@ -1397,8 +1396,7 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) pfsyncr.pfsyncr_syncpeer.s_addr == 0 || pfsyncr.pfsyncr_syncpeer.s_addr == htonl(INADDR_PFSYNC_GROUP))) - mship = malloc((sizeof(struct in_multi *) * - IP_MIN_MEMBERSHIPS), M_PFSYNC, M_WAITOK | M_ZERO); + imf = ip_mfilter_alloc(M_WAITOK, 0, 0); PFSYNC_LOCK(sc); if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) @@ -1420,8 +1418,7 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) if (sc->sc_sync_if) if_rele(sc->sc_sync_if); sc->sc_sync_if = NULL; - if (imo->imo_membership) - pfsync_multicast_cleanup(sc); + pfsync_multicast_cleanup(sc); PFSYNC_UNLOCK(sc); break; } @@ -1437,14 +1434,13 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); } - if (imo->imo_membership) - pfsync_multicast_cleanup(sc); + pfsync_multicast_cleanup(sc); if (sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) { - error = pfsync_multicast_setup(sc, sifp, mship); + error = pfsync_multicast_setup(sc, sifp, imf); if (error) { if_rele(sifp); - free(mship, M_PFSYNC); + ip_mfilter_free(imf); PFSYNC_UNLOCK(sc); return (error); } @@ -2354,7 +2350,8 @@ pfsyncintr(void *arg) } static int -pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, void *mship) +pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, + struct in_mfilter *imf) { struct ip_moptions *imo = &sc->sc_imo; int error; @@ -2362,16 +2359,14 @@ pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, void *mship) if (!(ifp->if_flags & IFF_MULTICAST)) return (EADDRNOTAVAIL); - imo->imo_membership = (struct in_multi **)mship; - imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; imo->imo_multicast_vif = -1; if ((error = in_joingroup(ifp, &sc->sc_sync_peer, NULL, - &imo->imo_membership[0])) != 0) { - imo->imo_membership = NULL; + &imf->imf_inm)) != 0) return (error); - } - imo->imo_num_memberships++; + + ip_mfilter_init(&imo->imo_head); + ip_mfilter_insert(&imo->imo_head, imf); imo->imo_multicast_ifp = ifp; imo->imo_multicast_ttl = PFSYNC_DFLTTL; imo->imo_multicast_loop = 0; @@ -2383,10 +2378,13 @@ static void pfsync_multicast_cleanup(struct pfsync_softc *sc) { struct ip_moptions *imo = &sc->sc_imo; + struct in_mfilter *imf; - in_leavegroup(imo->imo_membership[0], NULL); - free(imo->imo_membership, M_PFSYNC); - imo->imo_membership = NULL; + while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) { + ip_mfilter_remove(&imo->imo_head, imf); + in_leavegroup(imf->imf_inm, NULL); + ip_mfilter_free(imf); + } imo->imo_multicast_ifp = NULL; } @@ -2405,7 +2403,7 @@ pfsync_detach_ifnet(struct ifnet *ifp) * is going away. We do need to ensure we don't try to do * cleanup later. */ - sc->sc_imo.imo_membership = NULL; + ip_mfilter_init(&sc->sc_imo.imo_head); sc->sc_imo.imo_multicast_ifp = NULL; sc->sc_sync_if = NULL; } diff --git a/freebsd/sys/netpfil/pf/pf.c b/freebsd/sys/netpfil/pf/pf.c index 9b4653e2..c0f6459b 100644 --- a/freebsd/sys/netpfil/pf/pf.c +++ b/freebsd/sys/netpfil/pf/pf.c @@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -93,8 +94,6 @@ __FBSDID("$FreeBSD$"); #include #include -#include /* XXX: only for DIR_IN/DIR_OUT */ - #ifdef INET6 #include #include @@ -115,10 +114,12 @@ __FBSDID("$FreeBSD$"); */ /* state tables */ -VNET_DEFINE(struct pf_altqqueue, pf_altqs[2]); +VNET_DEFINE(struct pf_altqqueue, pf_altqs[4]); VNET_DEFINE(struct pf_palist, pf_pabuf); VNET_DEFINE(struct pf_altqqueue *, pf_altqs_active); +VNET_DEFINE(struct pf_altqqueue *, pf_altq_ifs_active); VNET_DEFINE(struct pf_altqqueue *, pf_altqs_inactive); +VNET_DEFINE(struct pf_altqqueue *, pf_altq_ifs_inactive); VNET_DEFINE(struct pf_kstatus, pf_status); VNET_DEFINE(u_int32_t, ticket_altqs_active); @@ -360,7 +361,7 @@ VNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]); counter_u64_add(s->rule.ptr->states_cur, -1); \ } while (0) -static MALLOC_DEFINE(M_PFHASH, "pf_hash", "pf(4) hash header structures"); +MALLOC_DEFINE(M_PFHASH, "pf_hash", "pf(4) hash header structures"); VNET_DEFINE(struct pf_keyhash *, pf_keyhash); VNET_DEFINE(struct pf_idhash *, pf_idhash); VNET_DEFINE(struct pf_srchash *, pf_srchash); @@ -862,9 +863,13 @@ pf_initialize() /* ALTQ */ TAILQ_INIT(&V_pf_altqs[0]); TAILQ_INIT(&V_pf_altqs[1]); + TAILQ_INIT(&V_pf_altqs[2]); + TAILQ_INIT(&V_pf_altqs[3]); TAILQ_INIT(&V_pf_pabuf); V_pf_altqs_active = &V_pf_altqs[0]; - V_pf_altqs_inactive = &V_pf_altqs[1]; + V_pf_altq_ifs_active = &V_pf_altqs[1]; + V_pf_altqs_inactive = &V_pf_altqs[2]; + V_pf_altq_ifs_inactive = &V_pf_altqs[3]; /* Send & overload+flush queues. */ STAILQ_INIT(&V_pf_sendqueue); @@ -1560,7 +1565,7 @@ pf_state_expires(const struct pf_state *state) if (!timeout) timeout = V_pf_default_rule.timeout[state->timeout]; start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START]; - if (start) { + if (start && state->rule.ptr != &V_pf_default_rule) { end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END]; states = counter_u64_fetch(state->rule.ptr->states_cur); } else { @@ -3210,7 +3215,7 @@ pf_tcp_iss(struct pf_pdesc *pd) u_int32_t digest[4]; if (V_pf_tcp_secret_init == 0) { - read_random(&V_pf_tcp_secret, sizeof(V_pf_tcp_secret)); + arc4random_buf(&V_pf_tcp_secret, sizeof(V_pf_tcp_secret)); MD5Init(&V_pf_tcp_secret_ctx); MD5Update(&V_pf_tcp_secret_ctx, V_pf_tcp_secret, sizeof(V_pf_tcp_secret)); @@ -4602,7 +4607,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, { struct pf_addr *saddr = pd->src, *daddr = pd->dst; u_int16_t icmpid = 0, *icmpsum; - u_int8_t icmptype; + u_int8_t icmptype, icmpcode; int state_icmp = 0; struct pf_state_key_cmp key; @@ -4611,6 +4616,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, #ifdef INET case IPPROTO_ICMP: icmptype = pd->hdr.icmp->icmp_type; + icmpcode = pd->hdr.icmp->icmp_code; icmpid = pd->hdr.icmp->icmp_id; icmpsum = &pd->hdr.icmp->icmp_cksum; @@ -4625,6 +4631,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, #ifdef INET6 case IPPROTO_ICMPV6: icmptype = pd->hdr.icmp6->icmp6_type; + icmpcode = pd->hdr.icmp6->icmp6_code; icmpid = pd->hdr.icmp6->icmp6_id; icmpsum = &pd->hdr.icmp6->icmp6_cksum; @@ -4823,6 +4830,23 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, #endif /* INET6 */ } + if (PF_ANEQ(pd->dst, pd2.src, pd->af)) { + if (V_pf_status.debug >= PF_DEBUG_MISC) { + printf("pf: BAD ICMP %d:%d outer dst: ", + icmptype, icmpcode); + pf_print_host(pd->src, 0, pd->af); + printf(" -> "); + pf_print_host(pd->dst, 0, pd->af); + printf(" inner src: "); + pf_print_host(pd2.src, 0, pd2.af); + printf(" -> "); + pf_print_host(pd2.dst, 0, pd2.af); + printf("\n"); + } + REASON_SET(reason, PFRES_BADSTATE); + return (PF_DROP); + } + switch (pd2.proto) { case IPPROTO_TCP: { struct tcphdr th; @@ -4879,7 +4903,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) { if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pf: BAD ICMP %d:%d ", - icmptype, pd->hdr.icmp->icmp_code); + icmptype, icmpcode); pf_print_host(pd->src, 0, pd->af); printf(" -> "); pf_print_host(pd->dst, 0, pd->af); @@ -4892,7 +4916,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, } else { if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pf: OK ICMP %d:%d ", - icmptype, pd->hdr.icmp->icmp_code); + icmptype, icmpcode); pf_print_host(pd->src, 0, pd->af); printf(" -> "); pf_print_host(pd->dst, 0, pd->af); @@ -5249,7 +5273,7 @@ pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, nk->addr[pd->didx].v4.s_addr, 0); - break; + break; #endif /* INET */ #ifdef INET6 case AF_INET6: @@ -6159,7 +6183,7 @@ done: pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && (s->nat_rule.ptr->action == PF_RDR || s->nat_rule.ptr->action == PF_BINAT) && - (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) + IN_LOOPBACK(ntohl(pd.dst->v4.s_addr))) m->m_flags |= M_SKIP_FIREWALL; if (action == PF_PASS && r->divert.port && ip_divert_ptr != NULL && @@ -6190,7 +6214,7 @@ done: m->m_flags &= ~M_FASTFWD_OURS; } } - ip_divert_ptr(*m0, dir == PF_IN ? DIR_IN : DIR_OUT); + ip_divert_ptr(*m0, dir == PF_IN); *m0 = NULL; return (action); @@ -6339,9 +6363,8 @@ pf_test6(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb m = *m0; /* pf_normalize messes with m0 */ h = mtod(m, struct ip6_hdr *); -#if 1 /* - * we do not support jumbogram yet. if we keep going, zero ip6_plen + * we do not support jumbogram. if we keep going, zero ip6_plen * will do something bad, so drop the packet for now. */ if (htons(h->ip6_plen) == 0) { @@ -6349,7 +6372,6 @@ pf_test6(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb REASON_SET(&reason, PFRES_NORM); /*XXX*/ goto done; } -#endif pd.src = (struct pf_addr *)&h->ip6_src; pd.dst = (struct pf_addr *)&h->ip6_dst; diff --git a/freebsd/sys/netpfil/pf/pf_if.c b/freebsd/sys/netpfil/pf/pf_if.c index 4314bbce..44b6f7a3 100644 --- a/freebsd/sys/netpfil/pf/pf_if.c +++ b/freebsd/sys/netpfil/pf/pf_if.c @@ -302,13 +302,15 @@ pfi_kif_match(struct pfi_kif *rule_kif, struct pfi_kif *packet_kif) return (1); if (rule_kif->pfik_group != NULL) { - IF_ADDR_RLOCK(packet_kif->pfik_ifp); + struct epoch_tracker et; + + NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(p, &packet_kif->pfik_ifp->if_groups, ifgl_next) if (p->ifgl_group == rule_kif->pfik_group) { - IF_ADDR_RUNLOCK(packet_kif->pfik_ifp); + NET_EPOCH_EXIT(et); return (1); } - IF_ADDR_RUNLOCK(packet_kif->pfik_ifp); + NET_EPOCH_EXIT(et); } @@ -475,11 +477,13 @@ pfi_kif_update(struct pfi_kif *kif) /* again for all groups kif is member of */ if (kif->pfik_ifp != NULL) { - IF_ADDR_RLOCK(kif->pfik_ifp); + struct epoch_tracker et; + + NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifgl, &kif->pfik_ifp->if_groups, ifgl_next) pfi_kif_update((struct pfi_kif *) ifgl->ifgl_group->ifg_pf_kif); - IF_ADDR_RUNLOCK(kif->pfik_ifp); + NET_EPOCH_EXIT(et); } } @@ -515,10 +519,12 @@ pfi_table_update(struct pfr_ktable *kt, struct pfi_kif *kif, int net, int flags) if (kif->pfik_ifp != NULL) pfi_instance_add(kif->pfik_ifp, net, flags); else if (kif->pfik_group != NULL) { - IFNET_RLOCK_NOSLEEP(); + struct epoch_tracker et; + + NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifgm, &kif->pfik_group->ifg_members, ifgm_next) pfi_instance_add(ifgm->ifgm_ifp, net, flags); - IFNET_RUNLOCK_NOSLEEP(); + NET_EPOCH_EXIT(et); } if ((e = pfr_set_addrs(&kt->pfrkt_t, V_pfi_buffer, V_pfi_buffer_cnt, &size2, @@ -530,11 +536,12 @@ pfi_table_update(struct pfr_ktable *kt, struct pfi_kif *kif, int net, int flags) static void pfi_instance_add(struct ifnet *ifp, int net, int flags) { + struct epoch_tracker et; struct ifaddr *ia; int got4 = 0, got6 = 0; int net2, af; - IF_ADDR_RLOCK(ifp); + NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) { if (ia->ifa_addr == NULL) continue; @@ -592,7 +599,7 @@ pfi_instance_add(struct ifnet *ifp, int net, int flags) else pfi_address_add(ia->ifa_addr, af, net2); } - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); } static void @@ -760,15 +767,17 @@ pfi_skip_if(const char *filter, struct pfi_kif *p) if (filter[n-1] >= '0' && filter[n-1] <= '9') return (1); /* group names may not end in a digit */ if (p->pfik_ifp != NULL) { - IF_ADDR_RLOCK(p->pfik_ifp); + struct epoch_tracker et; + + NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(i, &p->pfik_ifp->if_groups, ifgl_next) { if (!strncmp(i->ifgl_group->ifg_group, filter, IFNAMSIZ)) { - IF_ADDR_RUNLOCK(p->pfik_ifp); + NET_EPOCH_EXIT(et); return (0); /* iface is in group "filter" */ } } - IF_ADDR_RUNLOCK(p->pfik_ifp); + NET_EPOCH_EXIT(et); } return (1); } diff --git a/freebsd/sys/netpfil/pf/pf_ioctl.c b/freebsd/sys/netpfil/pf/pf_ioctl.c index eaac7abc..06b308b5 100644 --- a/freebsd/sys/netpfil/pf/pf_ioctl.c +++ b/freebsd/sys/netpfil/pf/pf_ioctl.c @@ -48,11 +48,14 @@ __FBSDID("$FreeBSD$"); #include #include +#include +#include #include #include #include #include #include +#include #include #include #include @@ -131,18 +134,40 @@ VNET_DEFINE_STATIC(int, pf_altq_running); #define TAGID_MAX 50000 struct pf_tagname { - TAILQ_ENTRY(pf_tagname) entries; + TAILQ_ENTRY(pf_tagname) namehash_entries; + TAILQ_ENTRY(pf_tagname) taghash_entries; char name[PF_TAG_NAME_SIZE]; uint16_t tag; int ref; }; -TAILQ_HEAD(pf_tags, pf_tagname); -#define V_pf_tags VNET(pf_tags) -VNET_DEFINE(struct pf_tags, pf_tags); -#define V_pf_qids VNET(pf_qids) -VNET_DEFINE(struct pf_tags, pf_qids); -static MALLOC_DEFINE(M_PFTAG, "pf_tag", "pf(4) tag names"); +struct pf_tagset { + TAILQ_HEAD(, pf_tagname) *namehash; + TAILQ_HEAD(, pf_tagname) *taghash; + unsigned int mask; + uint32_t seed; + BITSET_DEFINE(, TAGID_MAX) avail; +}; + +VNET_DEFINE(struct pf_tagset, pf_tags); +#define V_pf_tags VNET(pf_tags) +static unsigned int pf_rule_tag_hashsize; +#define PF_RULE_TAG_HASH_SIZE_DEFAULT 128 +SYSCTL_UINT(_net_pf, OID_AUTO, rule_tag_hashsize, CTLFLAG_RDTUN, + &pf_rule_tag_hashsize, PF_RULE_TAG_HASH_SIZE_DEFAULT, + "Size of pf(4) rule tag hashtable"); + +#ifdef ALTQ +VNET_DEFINE(struct pf_tagset, pf_qids); +#define V_pf_qids VNET(pf_qids) +static unsigned int pf_queue_tag_hashsize; +#define PF_QUEUE_TAG_HASH_SIZE_DEFAULT 128 +SYSCTL_UINT(_net_pf, OID_AUTO, queue_tag_hashsize, CTLFLAG_RDTUN, + &pf_queue_tag_hashsize, PF_QUEUE_TAG_HASH_SIZE_DEFAULT, + "Size of pf(4) queue tag hashtable"); +#endif +VNET_DEFINE(uma_zone_t, pf_tag_z); +#define V_pf_tag_z VNET(pf_tag_z) static MALLOC_DEFINE(M_PFALTQ, "pf_altq", "pf(4) altq configuration db"); static MALLOC_DEFINE(M_PFRULE, "pf_rule", "pf(4) rules"); @@ -150,9 +175,14 @@ static MALLOC_DEFINE(M_PFRULE, "pf_rule", "pf(4) rules"); #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE #endif -static u_int16_t tagname2tag(struct pf_tags *, char *); +static void pf_init_tagset(struct pf_tagset *, unsigned int *, + unsigned int); +static void pf_cleanup_tagset(struct pf_tagset *); +static uint16_t tagname2hashindex(const struct pf_tagset *, const char *); +static uint16_t tag2hashindex(const struct pf_tagset *, uint16_t); +static u_int16_t tagname2tag(struct pf_tagset *, char *); static u_int16_t pf_tagname2tag(char *); -static void tag_unref(struct pf_tags *, u_int16_t); +static void tag_unref(struct pf_tagset *, u_int16_t); #define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x @@ -171,16 +201,16 @@ static void pf_tbladdr_copyout(struct pf_addr_wrap *); * Wrapper functions for pfil(9) hooks */ #ifdef INET -static int pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, - int dir, int flags, struct inpcb *inp); -static int pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, - int dir, int flags, struct inpcb *inp); +static pfil_return_t pf_check_in(struct mbuf **m, struct ifnet *ifp, + int flags, void *ruleset __unused, struct inpcb *inp); +static pfil_return_t pf_check_out(struct mbuf **m, struct ifnet *ifp, + int flags, void *ruleset __unused, struct inpcb *inp); #endif #ifdef INET6 -static int pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, - int dir, int flags, struct inpcb *inp); -static int pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, - int dir, int flags, struct inpcb *inp); +static pfil_return_t pf_check6_in(struct mbuf **m, struct ifnet *ifp, + int flags, void *ruleset __unused, struct inpcb *inp); +static pfil_return_t pf_check6_out(struct mbuf **m, struct ifnet *ifp, + int flags, void *ruleset __unused, struct inpcb *inp); #endif static int hook_pf(void); @@ -438,68 +468,141 @@ pf_free_rule(struct pf_rule *rule) free(rule, M_PFRULE); } +static void +pf_init_tagset(struct pf_tagset *ts, unsigned int *tunable_size, + unsigned int default_size) +{ + unsigned int i; + unsigned int hashsize; + + if (*tunable_size == 0 || !powerof2(*tunable_size)) + *tunable_size = default_size; + + hashsize = *tunable_size; + ts->namehash = mallocarray(hashsize, sizeof(*ts->namehash), M_PFHASH, + M_WAITOK); + ts->taghash = mallocarray(hashsize, sizeof(*ts->taghash), M_PFHASH, + M_WAITOK); + ts->mask = hashsize - 1; + ts->seed = arc4random(); + for (i = 0; i < hashsize; i++) { + TAILQ_INIT(&ts->namehash[i]); + TAILQ_INIT(&ts->taghash[i]); + } + BIT_FILL(TAGID_MAX, &ts->avail); +} + +static void +pf_cleanup_tagset(struct pf_tagset *ts) +{ + unsigned int i; + unsigned int hashsize; + struct pf_tagname *t, *tmp; + + /* + * Only need to clean up one of the hashes as each tag is hashed + * into each table. + */ + hashsize = ts->mask + 1; + for (i = 0; i < hashsize; i++) + TAILQ_FOREACH_SAFE(t, &ts->namehash[i], namehash_entries, tmp) + uma_zfree(V_pf_tag_z, t); + + free(ts->namehash, M_PFHASH); + free(ts->taghash, M_PFHASH); +} + +static uint16_t +tagname2hashindex(const struct pf_tagset *ts, const char *tagname) +{ + + return (murmur3_32_hash(tagname, strlen(tagname), ts->seed) & ts->mask); +} + +static uint16_t +tag2hashindex(const struct pf_tagset *ts, uint16_t tag) +{ + + return (tag & ts->mask); +} + static u_int16_t -tagname2tag(struct pf_tags *head, char *tagname) +tagname2tag(struct pf_tagset *ts, char *tagname) { - struct pf_tagname *tag, *p = NULL; - u_int16_t new_tagid = 1; + struct pf_tagname *tag; + u_int32_t index; + u_int16_t new_tagid; PF_RULES_WASSERT(); - TAILQ_FOREACH(tag, head, entries) + index = tagname2hashindex(ts, tagname); + TAILQ_FOREACH(tag, &ts->namehash[index], namehash_entries) if (strcmp(tagname, tag->name) == 0) { tag->ref++; return (tag->tag); } /* + * new entry + * * to avoid fragmentation, we do a linear search from the beginning - * and take the first free slot we find. if there is none or the list - * is empty, append a new entry at the end. + * and take the first free slot we find. */ - - /* new entry */ - if (!TAILQ_EMPTY(head)) - for (p = TAILQ_FIRST(head); p != NULL && - p->tag == new_tagid; p = TAILQ_NEXT(p, entries)) - new_tagid = p->tag + 1; - - if (new_tagid > TAGID_MAX) + new_tagid = BIT_FFS(TAGID_MAX, &ts->avail); + /* + * Tags are 1-based, with valid tags in the range [1..TAGID_MAX]. + * BIT_FFS() returns a 1-based bit number, with 0 indicating no bits + * set. It may also return a bit number greater than TAGID_MAX due + * to rounding of the number of bits in the vector up to a multiple + * of the vector word size at declaration/allocation time. + */ + if ((new_tagid == 0) || (new_tagid > TAGID_MAX)) return (0); + /* Mark the tag as in use. Bits are 0-based for BIT_CLR() */ + BIT_CLR(TAGID_MAX, new_tagid - 1, &ts->avail); + /* allocate and fill new struct pf_tagname */ - tag = malloc(sizeof(*tag), M_PFTAG, M_NOWAIT|M_ZERO); + tag = uma_zalloc(V_pf_tag_z, M_NOWAIT); if (tag == NULL) return (0); strlcpy(tag->name, tagname, sizeof(tag->name)); tag->tag = new_tagid; - tag->ref++; + tag->ref = 1; - if (p != NULL) /* insert new entry before p */ - TAILQ_INSERT_BEFORE(p, tag, entries); - else /* either list empty or no free slot in between */ - TAILQ_INSERT_TAIL(head, tag, entries); + /* Insert into namehash */ + TAILQ_INSERT_TAIL(&ts->namehash[index], tag, namehash_entries); + /* Insert into taghash */ + index = tag2hashindex(ts, new_tagid); + TAILQ_INSERT_TAIL(&ts->taghash[index], tag, taghash_entries); + return (tag->tag); } static void -tag_unref(struct pf_tags *head, u_int16_t tag) +tag_unref(struct pf_tagset *ts, u_int16_t tag) { - struct pf_tagname *p, *next; - + struct pf_tagname *t; + uint16_t index; + PF_RULES_WASSERT(); - for (p = TAILQ_FIRST(head); p != NULL; p = next) { - next = TAILQ_NEXT(p, entries); - if (tag == p->tag) { - if (--p->ref == 0) { - TAILQ_REMOVE(head, p, entries); - free(p, M_PFTAG); + index = tag2hashindex(ts, tag); + TAILQ_FOREACH(t, &ts->taghash[index], taghash_entries) + if (tag == t->tag) { + if (--t->ref == 0) { + TAILQ_REMOVE(&ts->taghash[index], t, + taghash_entries); + index = tagname2hashindex(ts, t->name); + TAILQ_REMOVE(&ts->namehash[index], t, + namehash_entries); + /* Bits are 0-based for BIT_SET() */ + BIT_SET(TAGID_MAX, tag - 1, &ts->avail); + uma_zfree(V_pf_tag_z, t); } break; } - } } static u_int16_t @@ -524,22 +627,25 @@ pf_qid_unref(u_int32_t qid) static int pf_begin_altq(u_int32_t *ticket) { - struct pf_altq *altq; + struct pf_altq *altq, *tmp; int error = 0; PF_RULES_WASSERT(); - /* Purge the old altq list */ - while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) { - TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries); - if (altq->qname[0] == 0 && - (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { + /* Purge the old altq lists */ + TAILQ_FOREACH_SAFE(altq, V_pf_altq_ifs_inactive, entries, tmp) { + if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { /* detach and destroy the discipline */ error = altq_remove(altq); - } else - pf_qid_unref(altq->qid); + } + free(altq, M_PFALTQ); + } + TAILQ_INIT(V_pf_altq_ifs_inactive); + TAILQ_FOREACH_SAFE(altq, V_pf_altqs_inactive, entries, tmp) { + pf_qid_unref(altq->qid); free(altq, M_PFALTQ); } + TAILQ_INIT(V_pf_altqs_inactive); if (error) return (error); *ticket = ++V_ticket_altqs_inactive; @@ -550,24 +656,27 @@ pf_begin_altq(u_int32_t *ticket) static int pf_rollback_altq(u_int32_t ticket) { - struct pf_altq *altq; + struct pf_altq *altq, *tmp; int error = 0; PF_RULES_WASSERT(); if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive) return (0); - /* Purge the old altq list */ - while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) { - TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries); - if (altq->qname[0] == 0 && - (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { + /* Purge the old altq lists */ + TAILQ_FOREACH_SAFE(altq, V_pf_altq_ifs_inactive, entries, tmp) { + if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { /* detach and destroy the discipline */ error = altq_remove(altq); - } else - pf_qid_unref(altq->qid); + } + free(altq, M_PFALTQ); + } + TAILQ_INIT(V_pf_altq_ifs_inactive); + TAILQ_FOREACH_SAFE(altq, V_pf_altqs_inactive, entries, tmp) { + pf_qid_unref(altq->qid); free(altq, M_PFALTQ); } + TAILQ_INIT(V_pf_altqs_inactive); V_altqs_inactive_open = 0; return (error); } @@ -575,8 +684,8 @@ pf_rollback_altq(u_int32_t ticket) static int pf_commit_altq(u_int32_t ticket) { - struct pf_altqqueue *old_altqs; - struct pf_altq *altq; + struct pf_altqqueue *old_altqs, *old_altq_ifs; + struct pf_altq *altq, *tmp; int err, error = 0; PF_RULES_WASSERT(); @@ -586,14 +695,16 @@ pf_commit_altq(u_int32_t ticket) /* swap altqs, keep the old. */ old_altqs = V_pf_altqs_active; + old_altq_ifs = V_pf_altq_ifs_active; V_pf_altqs_active = V_pf_altqs_inactive; + V_pf_altq_ifs_active = V_pf_altq_ifs_inactive; V_pf_altqs_inactive = old_altqs; + V_pf_altq_ifs_inactive = old_altq_ifs; V_ticket_altqs_active = V_ticket_altqs_inactive; /* Attach new disciplines */ - TAILQ_FOREACH(altq, V_pf_altqs_active, entries) { - if (altq->qname[0] == 0 && - (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { + TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) { + if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { /* attach the discipline */ error = altq_pfattach(altq); if (error == 0 && V_pf_altq_running) @@ -603,11 +714,9 @@ pf_commit_altq(u_int32_t ticket) } } - /* Purge the old altq list */ - while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) { - TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries); - if (altq->qname[0] == 0 && - (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { + /* Purge the old altq lists */ + TAILQ_FOREACH_SAFE(altq, V_pf_altq_ifs_inactive, entries, tmp) { + if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { /* detach and destroy the discipline */ if (V_pf_altq_running) error = pf_disable_altq(altq); @@ -617,10 +726,15 @@ pf_commit_altq(u_int32_t ticket) err = altq_remove(altq); if (err != 0 && error == 0) error = err; - } else - pf_qid_unref(altq->qid); + } + free(altq, M_PFALTQ); + } + TAILQ_INIT(V_pf_altq_ifs_inactive); + TAILQ_FOREACH_SAFE(altq, V_pf_altqs_inactive, entries, tmp) { + pf_qid_unref(altq->qid); free(altq, M_PFALTQ); } + TAILQ_INIT(V_pf_altqs_inactive); V_altqs_inactive_open = 0; return (error); @@ -677,14 +791,46 @@ pf_disable_altq(struct pf_altq *altq) return (error); } +static int +pf_altq_ifnet_event_add(struct ifnet *ifp, int remove, u_int32_t ticket, + struct pf_altq *altq) +{ + struct ifnet *ifp1; + int error = 0; + + /* Deactivate the interface in question */ + altq->local_flags &= ~PFALTQ_FLAG_IF_REMOVED; + if ((ifp1 = ifunit(altq->ifname)) == NULL || + (remove && ifp1 == ifp)) { + altq->local_flags |= PFALTQ_FLAG_IF_REMOVED; + } else { + error = altq_add(ifp1, altq); + + if (ticket != V_ticket_altqs_inactive) + error = EBUSY; + + if (error) + free(altq, M_PFALTQ); + } + + return (error); +} + void pf_altq_ifnet_event(struct ifnet *ifp, int remove) { - struct ifnet *ifp1; struct pf_altq *a1, *a2, *a3; u_int32_t ticket; int error = 0; + /* + * No need to re-evaluate the configuration for events on interfaces + * that do not support ALTQ, as it's not possible for such + * interfaces to be part of the configuration. + */ + if (!ALTQ_IS_READY(&ifp->if_snd)) + return; + /* Interrupt userland queue modifications */ if (V_altqs_inactive_open) pf_rollback_altq(V_ticket_altqs_inactive); @@ -694,7 +840,7 @@ pf_altq_ifnet_event(struct ifnet *ifp, int remove) return; /* Copy the current active set */ - TAILQ_FOREACH(a1, V_pf_altqs_active, entries) { + TAILQ_FOREACH(a1, V_pf_altq_ifs_active, entries) { a2 = malloc(sizeof(*a2), M_PFALTQ, M_NOWAIT); if (a2 == NULL) { error = ENOMEM; @@ -702,41 +848,43 @@ pf_altq_ifnet_event(struct ifnet *ifp, int remove) } bcopy(a1, a2, sizeof(struct pf_altq)); - if (a2->qname[0] != 0) { - if ((a2->qid = pf_qname2qid(a2->qname)) == 0) { - error = EBUSY; - free(a2, M_PFALTQ); - break; - } - a2->altq_disc = NULL; - TAILQ_FOREACH(a3, V_pf_altqs_inactive, entries) { - if (strncmp(a3->ifname, a2->ifname, - IFNAMSIZ) == 0 && a3->qname[0] == 0) { - a2->altq_disc = a3->altq_disc; - break; - } - } - } - /* Deactivate the interface in question */ - a2->local_flags &= ~PFALTQ_FLAG_IF_REMOVED; - if ((ifp1 = ifunit(a2->ifname)) == NULL || - (remove && ifp1 == ifp)) { - a2->local_flags |= PFALTQ_FLAG_IF_REMOVED; - } else { - error = altq_add(a2); + error = pf_altq_ifnet_event_add(ifp, remove, ticket, a2); + if (error) + break; - if (ticket != V_ticket_altqs_inactive) - error = EBUSY; + TAILQ_INSERT_TAIL(V_pf_altq_ifs_inactive, a2, entries); + } + if (error) + goto out; + TAILQ_FOREACH(a1, V_pf_altqs_active, entries) { + a2 = malloc(sizeof(*a2), M_PFALTQ, M_NOWAIT); + if (a2 == NULL) { + error = ENOMEM; + break; + } + bcopy(a1, a2, sizeof(struct pf_altq)); - if (error) { - free(a2, M_PFALTQ); + if ((a2->qid = pf_qname2qid(a2->qname)) == 0) { + error = EBUSY; + free(a2, M_PFALTQ); + break; + } + a2->altq_disc = NULL; + TAILQ_FOREACH(a3, V_pf_altq_ifs_inactive, entries) { + if (strncmp(a3->ifname, a2->ifname, + IFNAMSIZ) == 0) { + a2->altq_disc = a3->altq_disc; break; } } + error = pf_altq_ifnet_event_add(ifp, remove, ticket, a2); + if (error) + break; TAILQ_INSERT_TAIL(V_pf_altqs_inactive, a2, entries); } +out: if (error != 0) pf_rollback_altq(ticket); else @@ -1214,6 +1362,28 @@ pf_import_kaltq(struct pfioc_altq_v1 *pa, struct pf_altq *q, size_t ioc_size) return (0); } + +static struct pf_altq * +pf_altq_get_nth_active(u_int32_t n) +{ + struct pf_altq *altq; + u_int32_t nr; + + nr = 0; + TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) { + if (nr == n) + return (altq); + nr++; + } + + TAILQ_FOREACH(altq, V_pf_altqs_active, entries) { + if (nr == n) + return (altq); + nr++; + } + + return (NULL); +} #endif /* ALTQ */ static int @@ -2011,7 +2181,7 @@ relock_DIOCKILLSTATES: break; } - p = pstore = malloc(ps->ps_len, M_TEMP, M_WAITOK); + p = pstore = malloc(ps->ps_len, M_TEMP, M_WAITOK | M_ZERO); nr = 0; for (i = 0; i <= pf_hashmask; i++) { @@ -2273,9 +2443,8 @@ DIOCGETSTATES_full: PF_RULES_WLOCK(); /* enable all altq interfaces on active list */ - TAILQ_FOREACH(altq, V_pf_altqs_active, entries) { - if (altq->qname[0] == 0 && (altq->local_flags & - PFALTQ_FLAG_IF_REMOVED) == 0) { + TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) { + if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { error = pf_enable_altq(altq); if (error != 0) break; @@ -2293,9 +2462,8 @@ DIOCGETSTATES_full: PF_RULES_WLOCK(); /* disable all altq interfaces on active list */ - TAILQ_FOREACH(altq, V_pf_altqs_active, entries) { - if (altq->qname[0] == 0 && (altq->local_flags & - PFALTQ_FLAG_IF_REMOVED) == 0) { + TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) { + if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { error = pf_disable_altq(altq); if (error != 0) break; @@ -2340,9 +2508,9 @@ DIOCGETSTATES_full: break; } altq->altq_disc = NULL; - TAILQ_FOREACH(a, V_pf_altqs_inactive, entries) { + TAILQ_FOREACH(a, V_pf_altq_ifs_inactive, entries) { if (strncmp(a->ifname, altq->ifname, - IFNAMSIZ) == 0 && a->qname[0] == 0) { + IFNAMSIZ) == 0) { altq->altq_disc = a->altq_disc; break; } @@ -2352,7 +2520,7 @@ DIOCGETSTATES_full: if ((ifp = ifunit(altq->ifname)) == NULL) altq->local_flags |= PFALTQ_FLAG_IF_REMOVED; else - error = altq_add(altq); + error = altq_add(ifp, altq); if (error) { PF_RULES_WUNLOCK(); @@ -2360,7 +2528,10 @@ DIOCGETSTATES_full: break; } - TAILQ_INSERT_TAIL(V_pf_altqs_inactive, altq, entries); + if (altq->qname[0] != 0) + TAILQ_INSERT_TAIL(V_pf_altqs_inactive, altq, entries); + else + TAILQ_INSERT_TAIL(V_pf_altq_ifs_inactive, altq, entries); /* version error check done on import above */ pf_export_kaltq(altq, pa, IOCPARM_LEN(cmd)); PF_RULES_WUNLOCK(); @@ -2374,6 +2545,8 @@ DIOCGETSTATES_full: PF_RULES_RLOCK(); pa->nr = 0; + TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) + pa->nr++; TAILQ_FOREACH(altq, V_pf_altqs_active, entries) pa->nr++; pa->ticket = V_ticket_altqs_active; @@ -2385,7 +2558,6 @@ DIOCGETSTATES_full: case DIOCGETALTQV1: { struct pfioc_altq_v1 *pa = (struct pfioc_altq_v1 *)addr; struct pf_altq *altq; - u_int32_t nr; PF_RULES_RLOCK(); if (pa->ticket != V_ticket_altqs_active) { @@ -2393,12 +2565,7 @@ DIOCGETSTATES_full: error = EBUSY; break; } - nr = 0; - altq = TAILQ_FIRST(V_pf_altqs_active); - while ((altq != NULL) && (nr < pa->nr)) { - altq = TAILQ_NEXT(altq, entries); - nr++; - } + altq = pf_altq_get_nth_active(pa->nr); if (altq == NULL) { PF_RULES_RUNLOCK(); error = EBUSY; @@ -2419,7 +2586,6 @@ DIOCGETSTATES_full: case DIOCGETQSTATSV1: { struct pfioc_qstats_v1 *pq = (struct pfioc_qstats_v1 *)addr; struct pf_altq *altq; - u_int32_t nr; int nbytes; u_int32_t version; @@ -2430,12 +2596,7 @@ DIOCGETSTATES_full: break; } nbytes = pq->nbytes; - nr = 0; - altq = TAILQ_FIRST(V_pf_altqs_active); - while ((altq != NULL) && (nr < pq->nr)) { - altq = TAILQ_NEXT(altq, entries); - nr++; - } + altq = pf_altq_get_nth_active(pq->nr); if (altq == NULL) { PF_RULES_RUNLOCK(); error = EBUSY; @@ -2954,24 +3115,20 @@ DIOCCHANGEADDR_error: break; } - PF_RULES_WLOCK(); + PF_RULES_RLOCK(); n = pfr_table_count(&io->pfrio_table, io->pfrio_flags); io->pfrio_size = min(io->pfrio_size, n); + PF_RULES_RUNLOCK(); totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), - M_TEMP, M_NOWAIT); - if (pfrts == NULL) { - error = ENOMEM; - PF_RULES_WUNLOCK(); - break; - } + M_TEMP, M_WAITOK); error = copyin(io->pfrio_buffer, pfrts, totlen); if (error) { free(pfrts, M_TEMP); - PF_RULES_WUNLOCK(); break; } + PF_RULES_WLOCK(); error = pfr_set_tflags(pfrts, io->pfrio_size, io->pfrio_setflag, io->pfrio_clrflag, &io->pfrio_nchange, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); @@ -3589,19 +3746,25 @@ DIOCCHANGEADDR_error: struct pf_src_node *n, *p, *pstore; uint32_t i, nr = 0; + for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; + i++, sh++) { + PF_HASHROW_LOCK(sh); + LIST_FOREACH(n, &sh->nodes, entry) + nr++; + PF_HASHROW_UNLOCK(sh); + } + + psn->psn_len = min(psn->psn_len, + sizeof(struct pf_src_node) * nr); + if (psn->psn_len == 0) { - for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; - i++, sh++) { - PF_HASHROW_LOCK(sh); - LIST_FOREACH(n, &sh->nodes, entry) - nr++; - PF_HASHROW_UNLOCK(sh); - } psn->psn_len = sizeof(struct pf_src_node) * nr; break; } - p = pstore = malloc(psn->psn_len, M_TEMP, M_WAITOK); + nr = 0; + + p = pstore = malloc(psn->psn_len, M_TEMP, M_WAITOK | M_ZERO); for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++) { PF_HASHROW_LOCK(sh); @@ -3997,65 +4160,59 @@ shutdown_pf(void) /* status does not use malloced mem so no need to cleanup */ /* fingerprints and interfaces have their own cleanup code */ - - /* Free counters last as we updated them during shutdown. */ - counter_u64_free(V_pf_default_rule.states_cur); - counter_u64_free(V_pf_default_rule.states_tot); - counter_u64_free(V_pf_default_rule.src_nodes); - - for (int i = 0; i < PFRES_MAX; i++) - counter_u64_free(V_pf_status.counters[i]); - for (int i = 0; i < LCNT_MAX; i++) - counter_u64_free(V_pf_status.lcounters[i]); - for (int i = 0; i < FCNT_MAX; i++) - counter_u64_free(V_pf_status.fcounters[i]); - for (int i = 0; i < SCNT_MAX; i++) - counter_u64_free(V_pf_status.scounters[i]); } while(0); return (error); } +static pfil_return_t +pf_check_return(int chk, struct mbuf **m) +{ + + switch (chk) { + case PF_PASS: + if (*m == NULL) + return (PFIL_CONSUMED); + else + return (PFIL_PASS); + break; + default: + if (*m != NULL) { + m_freem(*m); + *m = NULL; + } + return (PFIL_DROPPED); + } +} + #ifdef INET -static int -pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags, - struct inpcb *inp) +static pfil_return_t +pf_check_in(struct mbuf **m, struct ifnet *ifp, int flags, + void *ruleset __unused, struct inpcb *inp) { int chk; chk = pf_test(PF_IN, flags, ifp, m, inp); - if (chk && *m) { - m_freem(*m); - *m = NULL; - } - if (chk != PF_PASS) - return (EACCES); - return (0); + return (pf_check_return(chk, m)); } -static int -pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags, - struct inpcb *inp) +static pfil_return_t +pf_check_out(struct mbuf **m, struct ifnet *ifp, int flags, + void *ruleset __unused, struct inpcb *inp) { int chk; chk = pf_test(PF_OUT, flags, ifp, m, inp); - if (chk && *m) { - m_freem(*m); - *m = NULL; - } - if (chk != PF_PASS) - return (EACCES); - return (0); + return (pf_check_return(chk, m)); } #endif #ifdef INET6 -static int -pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags, - struct inpcb *inp) +static pfil_return_t +pf_check6_in(struct mbuf **m, struct ifnet *ifp, int flags, + void *ruleset __unused, struct inpcb *inp) { int chk; @@ -4067,67 +4224,89 @@ pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags, CURVNET_SET(ifp->if_vnet); chk = pf_test6(PF_IN, flags, (*m)->m_flags & M_LOOP ? V_loif : ifp, m, inp); CURVNET_RESTORE(); - if (chk && *m) { - m_freem(*m); - *m = NULL; - } - if (chk != PF_PASS) - return (EACCES); - return (0); + + return (pf_check_return(chk, m)); } -static int -pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags, - struct inpcb *inp) +static pfil_return_t +pf_check6_out(struct mbuf **m, struct ifnet *ifp, int flags, + void *ruleset __unused, struct inpcb *inp) { int chk; CURVNET_SET(ifp->if_vnet); chk = pf_test6(PF_OUT, flags, ifp, m, inp); CURVNET_RESTORE(); - if (chk && *m) { - m_freem(*m); - *m = NULL; - } - if (chk != PF_PASS) - return (EACCES); - return (0); + + return (pf_check_return(chk, m)); } #endif /* INET6 */ -static int -hook_pf(void) -{ #ifdef INET - struct pfil_head *pfh_inet; +VNET_DEFINE_STATIC(pfil_hook_t, pf_ip4_in_hook); +VNET_DEFINE_STATIC(pfil_hook_t, pf_ip4_out_hook); +#define V_pf_ip4_in_hook VNET(pf_ip4_in_hook) +#define V_pf_ip4_out_hook VNET(pf_ip4_out_hook) #endif #ifdef INET6 - struct pfil_head *pfh_inet6; +VNET_DEFINE_STATIC(pfil_hook_t, pf_ip6_in_hook); +VNET_DEFINE_STATIC(pfil_hook_t, pf_ip6_out_hook); +#define V_pf_ip6_in_hook VNET(pf_ip6_in_hook) +#define V_pf_ip6_out_hook VNET(pf_ip6_out_hook) #endif +static int +hook_pf(void) +{ + struct pfil_hook_args pha; + struct pfil_link_args pla; + if (V_pf_pfil_hooked) return (0); + pha.pa_version = PFIL_VERSION; + pha.pa_modname = "pf"; + pha.pa_ruleset = NULL; + + pla.pa_version = PFIL_VERSION; + #ifdef INET - pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); - if (pfh_inet == NULL) - return (ESRCH); /* XXX */ - pfil_add_hook_flags(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet); - pfil_add_hook_flags(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet); + pha.pa_type = PFIL_TYPE_IP4; + pha.pa_func = pf_check_in; + pha.pa_flags = PFIL_IN; + pha.pa_rulname = "default-in"; + V_pf_ip4_in_hook = pfil_add_hook(&pha); + pla.pa_flags = PFIL_IN | PFIL_HEADPTR | PFIL_HOOKPTR; + pla.pa_head = V_inet_pfil_head; + pla.pa_hook = V_pf_ip4_in_hook; + (void)pfil_link(&pla); + pha.pa_func = pf_check_out; + pha.pa_flags = PFIL_OUT; + pha.pa_rulname = "default-out"; + V_pf_ip4_out_hook = pfil_add_hook(&pha); + pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR; + pla.pa_head = V_inet_pfil_head; + pla.pa_hook = V_pf_ip4_out_hook; + (void)pfil_link(&pla); #endif #ifdef INET6 - pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); - if (pfh_inet6 == NULL) { -#ifdef INET - pfil_remove_hook_flags(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK, - pfh_inet); - pfil_remove_hook_flags(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK, - pfh_inet); -#endif - return (ESRCH); /* XXX */ - } - pfil_add_hook_flags(pf_check6_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet6); - pfil_add_hook_flags(pf_check6_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet6); + pha.pa_type = PFIL_TYPE_IP6; + pha.pa_func = pf_check6_in; + pha.pa_flags = PFIL_IN; + pha.pa_rulname = "default-in6"; + V_pf_ip6_in_hook = pfil_add_hook(&pha); + pla.pa_flags = PFIL_IN | PFIL_HEADPTR | PFIL_HOOKPTR; + pla.pa_head = V_inet6_pfil_head; + pla.pa_hook = V_pf_ip6_in_hook; + (void)pfil_link(&pla); + pha.pa_func = pf_check6_out; + pha.pa_rulname = "default-out6"; + pha.pa_flags = PFIL_OUT; + V_pf_ip6_out_hook = pfil_add_hook(&pha); + pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR; + pla.pa_head = V_inet6_pfil_head; + pla.pa_hook = V_pf_ip6_out_hook; + (void)pfil_link(&pla); #endif V_pf_pfil_hooked = 1; @@ -4137,33 +4316,17 @@ hook_pf(void) static int dehook_pf(void) { -#ifdef INET - struct pfil_head *pfh_inet; -#endif -#ifdef INET6 - struct pfil_head *pfh_inet6; -#endif if (V_pf_pfil_hooked == 0) return (0); #ifdef INET - pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); - if (pfh_inet == NULL) - return (ESRCH); /* XXX */ - pfil_remove_hook_flags(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK, - pfh_inet); - pfil_remove_hook_flags(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK, - pfh_inet); + pfil_remove_hook(V_pf_ip4_in_hook); + pfil_remove_hook(V_pf_ip4_out_hook); #endif #ifdef INET6 - pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); - if (pfh_inet6 == NULL) - return (ESRCH); /* XXX */ - pfil_remove_hook_flags(pf_check6_in, NULL, PFIL_IN | PFIL_WAITOK, - pfh_inet6); - pfil_remove_hook_flags(pf_check6_out, NULL, PFIL_OUT | PFIL_WAITOK, - pfh_inet6); + pfil_remove_hook(V_pf_ip6_in_hook); + pfil_remove_hook(V_pf_ip6_out_hook); #endif V_pf_pfil_hooked = 0; @@ -4173,8 +4336,15 @@ dehook_pf(void) static void pf_load_vnet(void) { - TAILQ_INIT(&V_pf_tags); - TAILQ_INIT(&V_pf_qids); + V_pf_tag_z = uma_zcreate("pf tags", sizeof(struct pf_tagname), + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + + pf_init_tagset(&V_pf_tags, &pf_rule_tag_hashsize, + PF_RULE_TAG_HASH_SIZE_DEFAULT); +#ifdef ALTQ + pf_init_tagset(&V_pf_qids, &pf_queue_tag_hashsize, + PF_QUEUE_TAG_HASH_SIZE_DEFAULT); +#endif pfattach_vnet(); V_pf_vnet_active = 1; @@ -4191,7 +4361,7 @@ pf_load(void) pf_mtag_initialize(); - pf_dev = make_dev(&pf_cdevsw, 0, 0, 0, 0600, PF_NAME); + pf_dev = make_dev(&pf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, PF_NAME); if (pf_dev == NULL) return (ENOMEM); @@ -4241,6 +4411,26 @@ pf_unload_vnet(void) pf_cleanup(); if (IS_DEFAULT_VNET(curvnet)) pf_mtag_cleanup(); + + pf_cleanup_tagset(&V_pf_tags); +#ifdef ALTQ + pf_cleanup_tagset(&V_pf_qids); +#endif + uma_zdestroy(V_pf_tag_z); + + /* Free counters last as we updated them during shutdown. */ + counter_u64_free(V_pf_default_rule.states_cur); + counter_u64_free(V_pf_default_rule.states_tot); + counter_u64_free(V_pf_default_rule.src_nodes); + + for (int i = 0; i < PFRES_MAX; i++) + counter_u64_free(V_pf_status.counters[i]); + for (int i = 0; i < LCNT_MAX; i++) + counter_u64_free(V_pf_status.lcounters[i]); + for (int i = 0; i < FCNT_MAX; i++) + counter_u64_free(V_pf_status.fcounters[i]); + for (int i = 0; i < SCNT_MAX; i++) + counter_u64_free(V_pf_status.scounters[i]); } #endif /* __rtems__ */ diff --git a/freebsd/sys/netpfil/pf/pf_norm.c b/freebsd/sys/netpfil/pf/pf_norm.c index 9538e97c..eb25bbc8 100644 --- a/freebsd/sys/netpfil/pf/pf_norm.c +++ b/freebsd/sys/netpfil/pf/pf_norm.c @@ -838,11 +838,11 @@ pf_reassemble6(struct mbuf **m0, struct ip6_hdr *ip6, struct ip6_frag *fraghdr, } /* We have all the data. */ + frent = TAILQ_FIRST(&frag->fr_queue); + KASSERT(frent != NULL, ("frent != NULL")); extoff = frent->fe_extoff; maxlen = frag->fr_maxlen; frag_id = frag->fr_id; - frent = TAILQ_FIRST(&frag->fr_queue); - KASSERT(frent != NULL, ("frent != NULL")); total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; hdrlen = frent->fe_hdrlen - sizeof(struct ip6_frag); @@ -1141,9 +1141,8 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, int off; struct ip6_ext ext; struct ip6_opt opt; - struct ip6_opt_jumbo jumbo; struct ip6_frag frag; - u_int32_t jumbolen = 0, plen; + u_int32_t plen; int optend; int ooff; u_int8_t proto; @@ -1187,6 +1186,11 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len) goto drop; + plen = ntohs(h->ip6_plen); + /* jumbo payload option not supported */ + if (plen == 0) + goto drop; + extoff = 0; off = sizeof(struct ip6_hdr); proto = h->ip6_nxt; @@ -1230,26 +1234,8 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, goto shortpkt; if (ooff + sizeof(opt) + opt.ip6o_len > optend) goto drop; - switch (opt.ip6o_type) { - case IP6OPT_JUMBO: - if (h->ip6_plen != 0) - goto drop; - if (!pf_pull_hdr(m, ooff, &jumbo, - sizeof(jumbo), NULL, NULL, - AF_INET6)) - goto shortpkt; - memcpy(&jumbolen, jumbo.ip6oj_jumbo_len, - sizeof(jumbolen)); - jumbolen = ntohl(jumbolen); - if (jumbolen <= IPV6_MAXPACKET) - goto drop; - if (sizeof(struct ip6_hdr) + jumbolen != - m->m_pkthdr.len) - goto drop; - break; - default: - break; - } + if (opt.ip6o_type == IP6OPT_JUMBO) + goto drop; ooff += sizeof(opt) + opt.ip6o_len; } while (ooff < optend); @@ -1262,13 +1248,6 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, } } while (!terminal); - /* jumbo payload option must be present, or plen > 0 */ - if (ntohs(h->ip6_plen) == 0) - plen = jumbolen; - else - plen = ntohs(h->ip6_plen); - if (plen == 0) - goto drop; if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len) goto shortpkt; @@ -1277,10 +1256,6 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, return (PF_PASS); fragment: - /* Jumbo payload packets cannot be fragmented. */ - plen = ntohs(h->ip6_plen); - if (plen == 0 || jumbolen) - goto drop; if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len) goto shortpkt; diff --git a/freebsd/sys/netpfil/pf/pf_table.c b/freebsd/sys/netpfil/pf/pf_table.c index 3f15fb0e..96ed849c 100644 --- a/freebsd/sys/netpfil/pf/pf_table.c +++ b/freebsd/sys/netpfil/pf/pf_table.c @@ -53,6 +53,8 @@ __FBSDID("$FreeBSD$"); #include #include +#define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x + #define ACCEPT_FLAGS(flags, oklist) \ do { \ if ((flags & ~(oklist)) & \ @@ -113,6 +115,7 @@ struct pfr_walktree { struct pfi_dynaddr *pfrw1_dyn; } pfrw_1; int pfrw_free; + int pfrw_flags; }; #define pfrw_addr pfrw_1.pfrw1_addr #define pfrw_astats pfrw_1.pfrw1_astats @@ -126,15 +129,16 @@ struct pfr_walktree { static MALLOC_DEFINE(M_PFTABLE, "pf_table", "pf(4) tables structures"); VNET_DEFINE_STATIC(uma_zone_t, pfr_kentry_z); #define V_pfr_kentry_z VNET(pfr_kentry_z) -VNET_DEFINE_STATIC(uma_zone_t, pfr_kcounters_z); -#define V_pfr_kcounters_z VNET(pfr_kcounters_z) static struct pf_addr pfr_ffaddr = { .addr32 = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff } }; +static void pfr_copyout_astats(struct pfr_astats *, + const struct pfr_kentry *, + const struct pfr_walktree *); static void pfr_copyout_addr(struct pfr_addr *, - struct pfr_kentry *ke); + const struct pfr_kentry *ke); static int pfr_validate_addr(struct pfr_addr *); static void pfr_enqueue_addrs(struct pfr_ktable *, struct pfr_kentryworkq *, int *, int); @@ -142,8 +146,12 @@ static void pfr_mark_addrs(struct pfr_ktable *); static struct pfr_kentry *pfr_lookup_addr(struct pfr_ktable *, struct pfr_addr *, int); +static bool pfr_create_kentry_counter(struct pfr_kcounters *, + int, int); static struct pfr_kentry *pfr_create_kentry(struct pfr_addr *); static void pfr_destroy_kentries(struct pfr_kentryworkq *); +static void pfr_destroy_kentry_counter(struct pfr_kcounters *, + int, int); static void pfr_destroy_kentry(struct pfr_kentry *); static void pfr_insert_kentries(struct pfr_ktable *, struct pfr_kentryworkq *, long); @@ -202,9 +210,6 @@ pfr_initialize(void) V_pfr_kentry_z = uma_zcreate("pf table entries", sizeof(struct pfr_kentry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); - V_pfr_kcounters_z = uma_zcreate("pf table counters", - sizeof(struct pfr_kcounters), NULL, NULL, NULL, NULL, - UMA_ALIGN_PTR, 0); V_pf_limits[PF_LIMIT_TABLE_ENTRIES].zone = V_pfr_kentry_z; V_pf_limits[PF_LIMIT_TABLE_ENTRIES].limit = PFR_KENTRY_HIWAT; } @@ -214,7 +219,6 @@ pfr_cleanup(void) { uma_zdestroy(V_pfr_kentry_z); - uma_zdestroy(V_pfr_kcounters_z); } int @@ -608,6 +612,13 @@ pfr_get_astats(struct pfr_table *tbl, struct pfr_astats *addr, int *size, w.pfrw_op = PFRW_GET_ASTATS; w.pfrw_astats = addr; w.pfrw_free = kt->pfrkt_cnt; + /* + * Flags below are for backward compatibility. It was possible to have + * a table without per-entry counters. Now they are always allocated, + * we just discard data when reading it if table is not configured to + * have counters. + */ + w.pfrw_flags = kt->pfrkt_flags; rv = kt->pfrkt_ip4->rnh_walktree(&kt->pfrkt_ip4->rh, pfr_walktree, &w); if (!rv) rv = kt->pfrkt_ip6->rnh_walktree(&kt->pfrkt_ip6->rh, @@ -774,10 +785,30 @@ pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact) return (ke); } +static bool +pfr_create_kentry_counter(struct pfr_kcounters *kc, int pfr_dir, int pfr_op) +{ + kc->pfrkc_packets[pfr_dir][pfr_op] = counter_u64_alloc(M_NOWAIT); + if (! kc->pfrkc_packets[pfr_dir][pfr_op]) + return (false); + + kc->pfrkc_bytes[pfr_dir][pfr_op] = counter_u64_alloc(M_NOWAIT); + if (! kc->pfrkc_bytes[pfr_dir][pfr_op]) { + /* Previous allocation will be freed through + * pfr_destroy_kentry() */ + return (false); + } + + kc->pfrkc_tzero = 0; + + return (true); +} + static struct pfr_kentry * pfr_create_kentry(struct pfr_addr *ad) { struct pfr_kentry *ke; + int pfr_dir, pfr_op; ke = uma_zalloc(V_pfr_kentry_z, M_NOWAIT | M_ZERO); if (ke == NULL) @@ -790,6 +821,14 @@ pfr_create_kentry(struct pfr_addr *ad) ke->pfrke_af = ad->pfra_af; ke->pfrke_net = ad->pfra_net; ke->pfrke_not = ad->pfra_not; + for (pfr_dir = 0; pfr_dir < PFR_DIR_MAX; pfr_dir ++) + for (pfr_op = 0; pfr_op < PFR_OP_ADDR_MAX; pfr_op ++) { + if (! pfr_create_kentry_counter(&ke->pfrke_counters, + pfr_dir, pfr_op)) { + pfr_destroy_kentry(ke); + return (NULL); + } + } return (ke); } @@ -804,11 +843,23 @@ pfr_destroy_kentries(struct pfr_kentryworkq *workq) } } +static void +pfr_destroy_kentry_counter(struct pfr_kcounters *kc, int pfr_dir, int pfr_op) +{ + counter_u64_free(kc->pfrkc_packets[pfr_dir][pfr_op]); + counter_u64_free(kc->pfrkc_bytes[pfr_dir][pfr_op]); +} + static void pfr_destroy_kentry(struct pfr_kentry *ke) { - if (ke->pfrke_counters) - uma_zfree(V_pfr_kcounters_z, ke->pfrke_counters); + int pfr_dir, pfr_op; + + for (pfr_dir = 0; pfr_dir < PFR_DIR_MAX; pfr_dir ++) + for (pfr_op = 0; pfr_op < PFR_OP_ADDR_MAX; pfr_op ++) + pfr_destroy_kentry_counter(&ke->pfrke_counters, + pfr_dir, pfr_op); + uma_zfree(V_pfr_kentry_z, ke); } @@ -826,7 +877,7 @@ pfr_insert_kentries(struct pfr_ktable *kt, "(code=%d).\n", rv); break; } - p->pfrke_tzero = tzero; + p->pfrke_counters.pfrkc_tzero = tzero; n++; } kt->pfrkt_cnt += n; @@ -849,7 +900,7 @@ pfr_insert_kentry(struct pfr_ktable *kt, struct pfr_addr *ad, long tzero) if (rv) return (rv); - p->pfrke_tzero = tzero; + p->pfrke_counters.pfrkc_tzero = tzero; kt->pfrkt_cnt++; return (0); @@ -884,15 +935,20 @@ static void pfr_clstats_kentries(struct pfr_kentryworkq *workq, long tzero, int negchange) { struct pfr_kentry *p; + int pfr_dir, pfr_op; SLIST_FOREACH(p, workq, pfrke_workq) { if (negchange) p->pfrke_not = !p->pfrke_not; - if (p->pfrke_counters) { - uma_zfree(V_pfr_kcounters_z, p->pfrke_counters); - p->pfrke_counters = NULL; + for (pfr_dir = 0; pfr_dir < PFR_DIR_MAX; pfr_dir ++) { + for (pfr_op = 0; pfr_op < PFR_OP_ADDR_MAX; pfr_op ++) { + counter_u64_zero(p->pfrke_counters. + pfrkc_packets[pfr_dir][pfr_op]); + counter_u64_zero(p->pfrke_counters. + pfrkc_bytes[pfr_dir][pfr_op]); + } } - p->pfrke_tzero = tzero; + p->pfrke_counters.pfrkc_tzero = tzero; } } @@ -981,7 +1037,7 @@ pfr_unroute_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke) } static void -pfr_copyout_addr(struct pfr_addr *ad, struct pfr_kentry *ke) +pfr_copyout_addr(struct pfr_addr *ad, const struct pfr_kentry *ke) { bzero(ad, sizeof(*ad)); if (ke == NULL) @@ -995,6 +1051,33 @@ pfr_copyout_addr(struct pfr_addr *ad, struct pfr_kentry *ke) ad->pfra_ip6addr = ke->pfrke_sa.sin6.sin6_addr; } +static void +pfr_copyout_astats(struct pfr_astats *as, const struct pfr_kentry *ke, + const struct pfr_walktree *w) +{ + int dir, op; + const struct pfr_kcounters *kc = &ke->pfrke_counters; + + pfr_copyout_addr(&as->pfras_a, ke); + as->pfras_tzero = kc->pfrkc_tzero; + + if (! (w->pfrw_flags & PFR_TFLAG_COUNTERS)) { + bzero(as->pfras_packets, sizeof(as->pfras_packets)); + bzero(as->pfras_bytes, sizeof(as->pfras_bytes)); + as->pfras_a.pfra_fback = PFR_FB_NOCOUNT; + return; + } + + for (dir = 0; dir < PFR_DIR_MAX; dir ++) { + for (op = 0; op < PFR_OP_ADDR_MAX; op ++) { + as->pfras_packets[dir][op] = + counter_u64_fetch(kc->pfrkc_packets[dir][op]); + as->pfras_bytes[dir][op] = + counter_u64_fetch(kc->pfrkc_bytes[dir][op]); + } + } +} + static int pfr_walktree(struct radix_node *rn, void *arg) { @@ -1023,19 +1106,7 @@ pfr_walktree(struct radix_node *rn, void *arg) if (w->pfrw_free-- > 0) { struct pfr_astats as; - pfr_copyout_addr(&as.pfras_a, ke); - - if (ke->pfrke_counters) { - bcopy(ke->pfrke_counters->pfrkc_packets, - as.pfras_packets, sizeof(as.pfras_packets)); - bcopy(ke->pfrke_counters->pfrkc_bytes, - as.pfras_bytes, sizeof(as.pfras_bytes)); - } else { - bzero(as.pfras_packets, sizeof(as.pfras_packets)); - bzero(as.pfras_bytes, sizeof(as.pfras_bytes)); - as.pfras_a.pfra_fback = PFR_FB_NOCOUNT; - } - as.pfras_tzero = ke->pfrke_tzero; + pfr_copyout_astats(&as, ke, w); bcopy(&as, w->pfrw_astats, sizeof(as)); w->pfrw_astats++; @@ -1260,6 +1331,7 @@ pfr_get_tstats(struct pfr_table *filter, struct pfr_tstats *tbl, int *size, struct pfr_ktableworkq workq; int n, nn; long tzero = time_second; + int pfr_dir, pfr_op; /* XXX PFR_FLAG_CLSTATS disabled */ ACCEPT_FLAGS(flags, PFR_FLAG_ALLRSETS); @@ -1278,7 +1350,25 @@ pfr_get_tstats(struct pfr_table *filter, struct pfr_tstats *tbl, int *size, continue; if (n-- <= 0) continue; - bcopy(&p->pfrkt_ts, tbl++, sizeof(*tbl)); + bcopy(&p->pfrkt_kts.pfrts_t, &tbl->pfrts_t, + sizeof(struct pfr_table)); + for (pfr_dir = 0; pfr_dir < PFR_DIR_MAX; pfr_dir ++) { + for (pfr_op = 0; pfr_op < PFR_OP_TABLE_MAX; pfr_op ++) { + tbl->pfrts_packets[pfr_dir][pfr_op] = + counter_u64_fetch( + p->pfrkt_packets[pfr_dir][pfr_op]); + tbl->pfrts_bytes[pfr_dir][pfr_op] = + counter_u64_fetch( + p->pfrkt_bytes[pfr_dir][pfr_op]); + } + } + tbl->pfrts_match = counter_u64_fetch(p->pfrkt_match); + tbl->pfrts_nomatch = counter_u64_fetch(p->pfrkt_nomatch); + tbl->pfrts_tzero = p->pfrkt_tzero; + tbl->pfrts_cnt = p->pfrkt_cnt; + for (pfr_op = 0; pfr_op < PFR_REFCNT_MAX; pfr_op++) + tbl->pfrts_refcnt[pfr_op] = p->pfrkt_refcnt[pfr_op]; + tbl++; SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); } if (flags & PFR_FLAG_CLSTATS) @@ -1612,7 +1702,7 @@ pfr_commit_ktable(struct pfr_ktable *kt, long tzero) q->pfrke_mark = 1; SLIST_INSERT_HEAD(&garbageq, p, pfrke_workq); } else { - p->pfrke_tzero = tzero; + p->pfrke_counters.pfrkc_tzero = tzero; SLIST_INSERT_HEAD(&addq, p, pfrke_workq); } } @@ -1796,14 +1886,20 @@ static void pfr_clstats_ktable(struct pfr_ktable *kt, long tzero, int recurse) { struct pfr_kentryworkq addrq; + int pfr_dir, pfr_op; if (recurse) { pfr_enqueue_addrs(kt, &addrq, NULL, 0); pfr_clstats_kentries(&addrq, tzero, 0); } - bzero(kt->pfrkt_packets, sizeof(kt->pfrkt_packets)); - bzero(kt->pfrkt_bytes, sizeof(kt->pfrkt_bytes)); - kt->pfrkt_match = kt->pfrkt_nomatch = 0; + for (pfr_dir = 0; pfr_dir < PFR_DIR_MAX; pfr_dir ++) { + for (pfr_op = 0; pfr_op < PFR_OP_TABLE_MAX; pfr_op ++) { + counter_u64_zero(kt->pfrkt_packets[pfr_dir][pfr_op]); + counter_u64_zero(kt->pfrkt_bytes[pfr_dir][pfr_op]); + } + } + counter_u64_zero(kt->pfrkt_match); + counter_u64_zero(kt->pfrkt_nomatch); kt->pfrkt_tzero = tzero; } @@ -1812,6 +1908,7 @@ pfr_create_ktable(struct pfr_table *tbl, long tzero, int attachruleset) { struct pfr_ktable *kt; struct pf_ruleset *rs; + int pfr_dir, pfr_op; PF_RULES_WASSERT(); @@ -1830,6 +1927,34 @@ pfr_create_ktable(struct pfr_table *tbl, long tzero, int attachruleset) rs->tables++; } + for (pfr_dir = 0; pfr_dir < PFR_DIR_MAX; pfr_dir ++) { + for (pfr_op = 0; pfr_op < PFR_OP_TABLE_MAX; pfr_op ++) { + kt->pfrkt_packets[pfr_dir][pfr_op] = + counter_u64_alloc(M_NOWAIT); + if (! kt->pfrkt_packets[pfr_dir][pfr_op]) { + pfr_destroy_ktable(kt, 0); + return (NULL); + } + kt->pfrkt_bytes[pfr_dir][pfr_op] = + counter_u64_alloc(M_NOWAIT); + if (! kt->pfrkt_bytes[pfr_dir][pfr_op]) { + pfr_destroy_ktable(kt, 0); + return (NULL); + } + } + } + kt->pfrkt_match = counter_u64_alloc(M_NOWAIT); + if (! kt->pfrkt_match) { + pfr_destroy_ktable(kt, 0); + return (NULL); + } + + kt->pfrkt_nomatch = counter_u64_alloc(M_NOWAIT); + if (! kt->pfrkt_nomatch) { + pfr_destroy_ktable(kt, 0); + return (NULL); + } + if (!rn_inithead((void **)&kt->pfrkt_ip4, offsetof(struct sockaddr_in, sin_addr) * 8) || !rn_inithead((void **)&kt->pfrkt_ip6, @@ -1857,6 +1982,7 @@ static void pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr) { struct pfr_kentryworkq addrq; + int pfr_dir, pfr_op; if (flushaddr) { pfr_enqueue_addrs(kt, &addrq, NULL, 0); @@ -1873,6 +1999,15 @@ pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr) kt->pfrkt_rs->tables--; pf_remove_if_empty_ruleset(kt->pfrkt_rs); } + for (pfr_dir = 0; pfr_dir < PFR_DIR_MAX; pfr_dir ++) { + for (pfr_op = 0; pfr_op < PFR_OP_TABLE_MAX; pfr_op ++) { + counter_u64_free(kt->pfrkt_packets[pfr_dir][pfr_op]); + counter_u64_free(kt->pfrkt_bytes[pfr_dir][pfr_op]); + } + } + counter_u64_free(kt->pfrkt_match); + counter_u64_free(kt->pfrkt_nomatch); + free(kt, M_PFTABLE); } @@ -1941,9 +2076,9 @@ pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af) } match = (ke && !ke->pfrke_not); if (match) - kt->pfrkt_match++; + counter_u64_add(kt->pfrkt_match, 1); else - kt->pfrkt_nomatch++; + counter_u64_add(kt->pfrkt_nomatch, 1); return (match); } @@ -1994,20 +2129,18 @@ pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af, } if ((ke == NULL || ke->pfrke_not) != notrule) { if (op_pass != PFR_OP_PASS) - printf("pfr_update_stats: assertion failed.\n"); + DPFPRINTF(PF_DEBUG_URGENT, + ("pfr_update_stats: assertion failed.\n")); op_pass = PFR_OP_XPASS; } - kt->pfrkt_packets[dir_out][op_pass]++; - kt->pfrkt_bytes[dir_out][op_pass] += len; + counter_u64_add(kt->pfrkt_packets[dir_out][op_pass], 1); + counter_u64_add(kt->pfrkt_bytes[dir_out][op_pass], len); if (ke != NULL && op_pass != PFR_OP_XPASS && (kt->pfrkt_flags & PFR_TFLAG_COUNTERS)) { - if (ke->pfrke_counters == NULL) - ke->pfrke_counters = uma_zalloc(V_pfr_kcounters_z, - M_NOWAIT | M_ZERO); - if (ke->pfrke_counters != NULL) { - ke->pfrke_counters->pfrkc_packets[dir_out][op_pass]++; - ke->pfrke_counters->pfrkc_bytes[dir_out][op_pass] += len; - } + counter_u64_add(ke->pfrke_counters. + pfrkc_packets[dir_out][op_pass], 1); + counter_u64_add(ke->pfrke_counters. + pfrkc_bytes[dir_out][op_pass], len); } } @@ -2097,7 +2230,7 @@ pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter, _next_block: ke = pfr_kentry_byidx(kt, idx, af); if (ke == NULL) { - kt->pfrkt_nomatch++; + counter_u64_add(kt->pfrkt_nomatch, 1); return (1); } pfr_prepare_network(&umask, af, ke->pfrke_net); @@ -2122,7 +2255,7 @@ _next_block: /* this is a single IP address - no possible nested block */ PF_ACPY(counter, addr, af); *pidx = idx; - kt->pfrkt_match++; + counter_u64_add(kt->pfrkt_match, 1); return (0); } for (;;) { @@ -2142,7 +2275,7 @@ _next_block: /* lookup return the same block - perfect */ PF_ACPY(counter, addr, af); *pidx = idx; - kt->pfrkt_match++; + counter_u64_add(kt->pfrkt_match, 1); return (0); } -- cgit v1.2.3