diff options
author | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2019-01-21 09:45:46 +0100 |
---|---|---|
committer | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2019-01-21 10:00:40 +0100 |
commit | aec8db05b216a102e334d131b7a71f1514b04679 (patch) | |
tree | f9343da918ce3585a1ce469d53d4d54d79ca8cc6 /freebsd/sys/netpfil/pf/if_pfsync.c | |
parent | dpaa: Use if_transmit instead of legacy if_start (diff) | |
download | rtems-libbsd-aec8db05b216a102e334d131b7a71f1514b04679.tar.bz2 |
Update to FreeBSD stable/12 2019-01-16
Git mirror commit 4ff7d8141f665693dd8b17090d410604888f1e9a.
Update #3472.
Diffstat (limited to 'freebsd/sys/netpfil/pf/if_pfsync.c')
-rw-r--r-- | freebsd/sys/netpfil/pf/if_pfsync.c | 566 |
1 files changed, 330 insertions, 236 deletions
diff --git a/freebsd/sys/netpfil/pf/if_pfsync.c b/freebsd/sys/netpfil/pf/if_pfsync.c index 9612ac99..026d19a3 100644 --- a/freebsd/sys/netpfil/pf/if_pfsync.c +++ b/freebsd/sys/netpfil/pf/if_pfsync.c @@ -79,6 +79,7 @@ __FBSDID("$FreeBSD$"); #include <sys/mutex.h> #include <sys/priv.h> #include <sys/protosw.h> +#include <sys/smp.h> #include <sys/socket.h> #include <sys/sockio.h> #include <sys/sysctl.h> @@ -108,6 +109,8 @@ __FBSDID("$FreeBSD$"); sizeof(struct pfsync_header) + \ sizeof(struct pfsync_subheader) ) +struct pfsync_bucket; + struct pfsync_pkt { struct ip *ip; struct in_addr src; @@ -166,7 +169,7 @@ static struct pfsync_q pfsync_qs[] = { }; static void pfsync_q_ins(struct pf_state *, int, bool); -static void pfsync_q_del(struct pf_state *, bool); +static void pfsync_q_del(struct pf_state *, bool, struct pfsync_bucket *); static void pfsync_update_state(struct pf_state *); @@ -185,6 +188,28 @@ struct pfsync_deferral { struct mbuf *pd_m; }; +struct pfsync_sofct; + +struct pfsync_bucket +{ + int b_id; + struct pfsync_softc *b_sc; + struct mtx b_mtx; + struct callout b_tmo; + int b_flags; +#define PFSYNCF_BUCKET_PUSH 0x00000001 + + size_t b_len; + TAILQ_HEAD(, pf_state) b_qs[PFSYNC_S_COUNT]; + TAILQ_HEAD(, pfsync_upd_req_item) b_upd_req_list; + TAILQ_HEAD(, pfsync_deferral) b_deferrals; + u_int b_deferred; + void *b_plus; + size_t b_pluslen; + + struct ifaltq b_snd; +}; + struct pfsync_softc { /* Configuration */ struct ifnet *sc_ifp; @@ -194,20 +219,12 @@ struct pfsync_softc { uint32_t sc_flags; #define PFSYNCF_OK 0x00000001 #define PFSYNCF_DEFER 0x00000002 -#define PFSYNCF_PUSH 0x00000004 uint8_t sc_maxupdates; struct ip sc_template; - struct callout sc_tmo; struct mtx sc_mtx; /* Queued data */ - size_t sc_len; - TAILQ_HEAD(, pf_state) sc_qs[PFSYNC_S_COUNT]; - TAILQ_HEAD(, pfsync_upd_req_item) sc_upd_req_list; - TAILQ_HEAD(, pfsync_deferral) sc_deferrals; - u_int sc_deferred; - void *sc_plus; - size_t sc_pluslen; + struct pfsync_bucket *sc_buckets; /* Bulk update info */ struct mtx sc_bulk_mtx; @@ -225,6 +242,10 @@ struct pfsync_softc { #define PFSYNC_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) #define PFSYNC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) +#define PFSYNC_BUCKET_LOCK(b) mtx_lock(&(b)->b_mtx) +#define PFSYNC_BUCKET_UNLOCK(b) mtx_unlock(&(b)->b_mtx) +#define PFSYNC_BUCKET_LOCK_ASSERT(b) mtx_assert(&(b)->b_mtx, MA_OWNED) + #define PFSYNC_BLOCK(sc) mtx_lock(&(sc)->sc_bulk_mtx) #define PFSYNC_BUNLOCK(sc) mtx_unlock(&(sc)->sc_bulk_mtx) #define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED) @@ -241,7 +262,8 @@ VNET_DEFINE_STATIC(int, pfsync_carp_adj) = CARP_MAXSKEW; #define V_pfsync_carp_adj VNET(pfsync_carp_adj) static void pfsync_timeout(void *); -static void pfsync_push(struct pfsync_softc *); +static void pfsync_push(struct pfsync_bucket *); +static void pfsync_push_all(struct pfsync_softc *); static void pfsyncintr(void *); static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *, void *); @@ -251,12 +273,16 @@ static void pfsync_pointers_uninit(void); static int pfsync_init(void); static void pfsync_uninit(void); +static unsigned long pfsync_buckets; + SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC"); SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(pfsyncstats), pfsyncstats, "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)"); SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_RW, &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment"); +SYSCTL_ULONG(_net_pfsync, OID_AUTO, pfsync_buckets, CTLFLAG_RDTUN, + &pfsync_buckets, 0, "Number of pfsync hash buckets"); static int pfsync_clone_create(struct if_clone *, int, caddr_t); static void pfsync_clone_destroy(struct ifnet *); @@ -272,10 +298,10 @@ static void pfsync_undefer_state(struct pf_state *, int); static void pfsync_defer_tmo(void *); static void pfsync_request_update(u_int32_t, u_int64_t); -static void pfsync_update_state_req(struct pf_state *); +static bool pfsync_update_state_req(struct pf_state *); static void pfsync_drop(struct pfsync_softc *); -static void pfsync_sendout(int); +static void pfsync_sendout(int, int); static void pfsync_send_plus(void *, size_t); static void pfsync_bulk_start(void); @@ -287,6 +313,9 @@ static void pfsync_detach_ifnet(struct ifnet *); #ifdef IPSEC static void pfsync_update_net_tdb(struct pfsync_tdb *); #endif +static struct pfsync_bucket *pfsync_get_bucket(struct pfsync_softc *, + struct pf_state *); + #define PFSYNC_MAX_BULKTRIES 12 @@ -298,21 +327,16 @@ pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) { struct pfsync_softc *sc; struct ifnet *ifp; - int q; + struct pfsync_bucket *b; + int c, q; if (unit != 0) return (EINVAL); - sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO); - sc->sc_flags |= PFSYNCF_OK; - - for (q = 0; q < PFSYNC_S_COUNT; q++) - TAILQ_INIT(&sc->sc_qs[q]); - - TAILQ_INIT(&sc->sc_upd_req_list); - TAILQ_INIT(&sc->sc_deferrals); + if (! pfsync_buckets) + pfsync_buckets = mp_ncpus * 2; - sc->sc_len = PFSYNC_MINPKT; + sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO); sc->sc_maxupdates = 128; ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC); @@ -325,12 +349,10 @@ pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) ifp->if_ioctl = pfsyncioctl; ifp->if_output = pfsyncoutput; ifp->if_type = IFT_PFSYNC; - ifp->if_snd.ifq_maxlen = ifqmaxlen; ifp->if_hdrlen = sizeof(struct pfsync_header); ifp->if_mtu = ETHERMTU; mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF); mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF); - callout_init(&sc->sc_tmo, 1); callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0); callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0); @@ -338,6 +360,27 @@ pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN); + sc->sc_buckets = mallocarray(pfsync_buckets, sizeof(*sc->sc_buckets), + M_PFSYNC, M_ZERO | M_WAITOK); + for (c = 0; c < pfsync_buckets; c++) { + b = &sc->sc_buckets[c]; + mtx_init(&b->b_mtx, pfsyncname, NULL, MTX_DEF); + + b->b_id = c; + b->b_sc = sc; + b->b_len = PFSYNC_MINPKT; + + for (q = 0; q < PFSYNC_S_COUNT; q++) + TAILQ_INIT(&b->b_qs[q]); + + TAILQ_INIT(&b->b_upd_req_list); + TAILQ_INIT(&b->b_deferrals); + + callout_init(&b->b_tmo, 1); + + b->b_snd.ifq_maxlen = ifqmaxlen; + } + V_pfsyncif = sc; return (0); @@ -347,29 +390,36 @@ static void pfsync_clone_destroy(struct ifnet *ifp) { struct pfsync_softc *sc = ifp->if_softc; + struct pfsync_bucket *b; + int c; - /* - * At this stage, everything should have already been - * cleared by pfsync_uninit(), and we have only to - * drain callouts. - */ - while (sc->sc_deferred > 0) { - struct pfsync_deferral *pd = TAILQ_FIRST(&sc->sc_deferrals); - - TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); - sc->sc_deferred--; - if (callout_stop(&pd->pd_tmo) > 0) { - pf_release_state(pd->pd_st); - m_freem(pd->pd_m); - free(pd, M_PFSYNC); - } else { - pd->pd_refs++; - callout_drain(&pd->pd_tmo); - free(pd, M_PFSYNC); + for (c = 0; c < pfsync_buckets; c++) { + b = &sc->sc_buckets[c]; + /* + * At this stage, everything should have already been + * cleared by pfsync_uninit(), and we have only to + * drain callouts. + */ + while (b->b_deferred > 0) { + struct pfsync_deferral *pd = + TAILQ_FIRST(&b->b_deferrals); + + TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); + b->b_deferred--; + if (callout_stop(&pd->pd_tmo) > 0) { + pf_release_state(pd->pd_st); + m_freem(pd->pd_m); + free(pd, M_PFSYNC); + } else { + pd->pd_refs++; + callout_drain(&pd->pd_tmo); + free(pd, M_PFSYNC); + } } + + callout_drain(&b->b_tmo); } - callout_drain(&sc->sc_tmo); callout_drain(&sc->sc_bulkfail_tmo); callout_drain(&sc->sc_bulk_tmo); @@ -385,6 +435,8 @@ pfsync_clone_destroy(struct ifnet *ifp) pfsync_multicast_cleanup(sc); mtx_destroy(&sc->sc_mtx); mtx_destroy(&sc->sc_bulk_mtx); + + free(sc->sc_buckets, M_PFSYNC); free(sc, M_PFSYNC); V_pfsyncif = NULL; @@ -548,7 +600,7 @@ pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) st->state_flags &= ~PFSTATE_NOSYNC; if (st->state_flags & PFSTATE_ACK) { pfsync_q_ins(st, PFSYNC_S_IACK, true); - pfsync_push(sc); + pfsync_push_all(sc); } } st->state_flags &= ~PFSTATE_ACK; @@ -787,9 +839,7 @@ pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) continue; if (st->state_flags & PFSTATE_ACK) { - PFSYNC_LOCK(V_pfsyncif); pfsync_undefer_state(st, 0); - PFSYNC_UNLOCK(V_pfsyncif); } PF_STATE_UNLOCK(st); } @@ -878,9 +928,7 @@ pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) } if (st->state_flags & PFSTATE_ACK) { - PFSYNC_LOCK(sc); pfsync_undefer_state(st, 1); - PFSYNC_UNLOCK(sc); } if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) @@ -914,9 +962,7 @@ pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) pfsync_update_state(st); PF_STATE_UNLOCK(st); - PFSYNC_LOCK(sc); - pfsync_push(sc); - PFSYNC_UNLOCK(sc); + pfsync_push_all(sc); continue; } PF_STATE_UNLOCK(st); @@ -962,16 +1008,14 @@ pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) st = pf_find_state_byid(up->id, up->creatorid); if (st == NULL) { /* We don't have this state. Ask for it. */ - PFSYNC_LOCK(sc); + PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]); pfsync_request_update(up->creatorid, up->id); - PFSYNC_UNLOCK(sc); + PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]); continue; } if (st->state_flags & PFSTATE_ACK) { - PFSYNC_LOCK(sc); pfsync_undefer_state(st, 1); - PFSYNC_UNLOCK(sc); } if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) @@ -1005,9 +1049,7 @@ pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) pfsync_update_state(st); PF_STATE_UNLOCK(st); - PFSYNC_LOCK(sc); - pfsync_push(sc); - PFSYNC_UNLOCK(sc); + pfsync_push_all(sc); continue; } PF_STATE_UNLOCK(st); @@ -1285,6 +1327,7 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) struct ifreq *ifr = (struct ifreq *)data; struct pfsyncreq pfsyncr; int error; + int c; switch (cmd) { case SIOCSIFFLAGS: @@ -1305,10 +1348,12 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) ifr->ifr_mtu > sc->sc_sync_if->if_mtu) return (EINVAL); if (ifr->ifr_mtu < ifp->if_mtu) { - PFSYNC_LOCK(sc); - if (sc->sc_len > PFSYNC_MINPKT) - pfsync_sendout(1); - PFSYNC_UNLOCK(sc); + for (c = 0; c < pfsync_buckets; c++) { + PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]); + if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT) + pfsync_sendout(1, c); + PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); + } } ifp->if_mtu = ifr->ifr_mtu; break; @@ -1381,12 +1426,16 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) break; } - if (sc->sc_len > PFSYNC_MINPKT && - (sifp->if_mtu < sc->sc_ifp->if_mtu || - (sc->sc_sync_if != NULL && - sifp->if_mtu < sc->sc_sync_if->if_mtu) || - sifp->if_mtu < MCLBYTES - sizeof(struct ip))) - pfsync_sendout(1); + for (c = 0; c < pfsync_buckets; c++) { + PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]); + if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT && + (sifp->if_mtu < sc->sc_ifp->if_mtu || + (sc->sc_sync_if != NULL && + sifp->if_mtu < sc->sc_sync_if->if_mtu) || + sifp->if_mtu < MCLBYTES - sizeof(struct ip))) + pfsync_sendout(1, c); + PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); + } if (imo->imo_membership) pfsync_multicast_cleanup(sc); @@ -1423,8 +1472,10 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) sc->sc_flags &= ~PFSYNCF_OK; if (V_pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: requesting bulk update\n"); - pfsync_request_update(0, 0); PFSYNC_UNLOCK(sc); + PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]); + pfsync_request_update(0, 0); + PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]); PFSYNC_BLOCK(sc); sc->sc_ureq_sent = time_uptime; callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, @@ -1485,33 +1536,37 @@ pfsync_drop(struct pfsync_softc *sc) { struct pf_state *st, *next; struct pfsync_upd_req_item *ur; - int q; + struct pfsync_bucket *b; + int c, q; - for (q = 0; q < PFSYNC_S_COUNT; q++) { - if (TAILQ_EMPTY(&sc->sc_qs[q])) - continue; + for (c = 0; c < pfsync_buckets; c++) { + b = &sc->sc_buckets[c]; + for (q = 0; q < PFSYNC_S_COUNT; q++) { + if (TAILQ_EMPTY(&b->b_qs[q])) + continue; - TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, next) { - KASSERT(st->sync_state == q, - ("%s: st->sync_state == q", - __func__)); - st->sync_state = PFSYNC_S_NONE; - pf_release_state(st); + TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, next) { + KASSERT(st->sync_state == q, + ("%s: st->sync_state == q", + __func__)); + st->sync_state = PFSYNC_S_NONE; + pf_release_state(st); + } + TAILQ_INIT(&b->b_qs[q]); } - TAILQ_INIT(&sc->sc_qs[q]); - } - while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { - TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); - free(ur, M_PFSYNC); - } + while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) { + TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry); + free(ur, M_PFSYNC); + } - sc->sc_plus = NULL; - sc->sc_len = PFSYNC_MINPKT; + b->b_len = PFSYNC_MINPKT; + b->b_plus = NULL; + } } static void -pfsync_sendout(int schedswi) +pfsync_sendout(int schedswi, int c) { struct pfsync_softc *sc = V_pfsyncif; struct ifnet *ifp = sc->sc_ifp; @@ -1521,27 +1576,28 @@ pfsync_sendout(int schedswi) struct pfsync_subheader *subh; struct pf_state *st, *st_next; struct pfsync_upd_req_item *ur; + struct pfsync_bucket *b = &sc->sc_buckets[c]; int offset; int q, count = 0; KASSERT(sc != NULL, ("%s: null sc", __func__)); - KASSERT(sc->sc_len > PFSYNC_MINPKT, - ("%s: sc_len %zu", __func__, sc->sc_len)); - PFSYNC_LOCK_ASSERT(sc); + KASSERT(b->b_len > PFSYNC_MINPKT, + ("%s: sc_len %zu", __func__, b->b_len)); + PFSYNC_BUCKET_LOCK_ASSERT(b); if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) { pfsync_drop(sc); return; } - m = m_get2(max_linkhdr + sc->sc_len, M_NOWAIT, MT_DATA, M_PKTHDR); + m = m_get2(max_linkhdr + b->b_len, M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) { if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); V_pfsyncstats.pfsyncs_onomem++; return; } m->m_data += max_linkhdr; - m->m_len = m->m_pkthdr.len = sc->sc_len; + m->m_len = m->m_pkthdr.len = b->b_len; /* build the ip header */ ip = (struct ip *)m->m_data; @@ -1557,19 +1613,19 @@ pfsync_sendout(int schedswi) offset += sizeof(*ph); ph->version = PFSYNC_VERSION; - ph->len = htons(sc->sc_len - sizeof(*ip)); + ph->len = htons(b->b_len - sizeof(*ip)); bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); /* walk the queues */ for (q = 0; q < PFSYNC_S_COUNT; q++) { - if (TAILQ_EMPTY(&sc->sc_qs[q])) + if (TAILQ_EMPTY(&b->b_qs[q])) continue; subh = (struct pfsync_subheader *)(m->m_data + offset); offset += sizeof(*subh); count = 0; - TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, st_next) { + TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, st_next) { KASSERT(st->sync_state == q, ("%s: st->sync_state == q", __func__)); @@ -1583,7 +1639,7 @@ pfsync_sendout(int schedswi) pf_release_state(st); count++; } - TAILQ_INIT(&sc->sc_qs[q]); + TAILQ_INIT(&b->b_qs[q]); bzero(subh, sizeof(*subh)); subh->action = pfsync_qs[q].action; @@ -1591,13 +1647,13 @@ pfsync_sendout(int schedswi) V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count; } - if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) { + if (!TAILQ_EMPTY(&b->b_upd_req_list)) { subh = (struct pfsync_subheader *)(m->m_data + offset); offset += sizeof(*subh); count = 0; - while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { - TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); + while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) { + TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry); bcopy(&ur->ur_msg, m->m_data + offset, sizeof(ur->ur_msg)); @@ -1613,11 +1669,11 @@ pfsync_sendout(int schedswi) } /* has someone built a custom region for us to add? */ - if (sc->sc_plus != NULL) { - bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen); - offset += sc->sc_pluslen; + if (b->b_plus != NULL) { + bcopy(b->b_plus, m->m_data + offset, b->b_pluslen); + offset += b->b_pluslen; - sc->sc_plus = NULL; + b->b_plus = NULL; } subh = (struct pfsync_subheader *)(m->m_data + offset); @@ -1631,24 +1687,24 @@ pfsync_sendout(int schedswi) /* we're done, let's put it on the wire */ if (ifp->if_bpf) { m->m_data += sizeof(*ip); - m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip); + m->m_len = m->m_pkthdr.len = b->b_len - sizeof(*ip); BPF_MTAP(ifp, m); m->m_data -= sizeof(*ip); - m->m_len = m->m_pkthdr.len = sc->sc_len; + m->m_len = m->m_pkthdr.len = b->b_len; } if (sc->sc_sync_if == NULL) { - sc->sc_len = PFSYNC_MINPKT; + b->b_len = PFSYNC_MINPKT; m_freem(m); return; } if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); - sc->sc_len = PFSYNC_MINPKT; + b->b_len = PFSYNC_MINPKT; - if (!_IF_QFULL(&sc->sc_ifp->if_snd)) - _IF_ENQUEUE(&sc->sc_ifp->if_snd, m); + if (!_IF_QFULL(&b->b_snd)) + _IF_ENQUEUE(&b->b_snd, m); else { m_freem(m); if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1); @@ -1661,6 +1717,7 @@ static void pfsync_insert_state(struct pf_state *st) { struct pfsync_softc *sc = V_pfsyncif; + struct pfsync_bucket *b = pfsync_get_bucket(sc, st); if (st->state_flags & PFSTATE_NOSYNC) return; @@ -1674,12 +1731,12 @@ pfsync_insert_state(struct pf_state *st) KASSERT(st->sync_state == PFSYNC_S_NONE, ("%s: st->sync_state %u", __func__, st->sync_state)); - PFSYNC_LOCK(sc); - if (sc->sc_len == PFSYNC_MINPKT) - callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); + PFSYNC_BUCKET_LOCK(b); + if (b->b_len == PFSYNC_MINPKT) + callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); pfsync_q_ins(st, PFSYNC_S_INS, true); - PFSYNC_UNLOCK(sc); + PFSYNC_BUCKET_UNLOCK(b); st->sync_updates = 0; } @@ -1689,6 +1746,7 @@ pfsync_defer(struct pf_state *st, struct mbuf *m) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_deferral *pd; + struct pfsync_bucket *b = pfsync_get_bucket(sc, st); if (m->m_flags & (M_BCAST|M_MCAST)) return (0); @@ -1701,13 +1759,13 @@ pfsync_defer(struct pf_state *st, struct mbuf *m) return (0); } - if (sc->sc_deferred >= 128) - pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0); + if (b->b_deferred >= 128) + pfsync_undefer(TAILQ_FIRST(&b->b_deferrals), 0); pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT); if (pd == NULL) return (0); - sc->sc_deferred++; + b->b_deferred++; m->m_flags |= M_SKIP_FIREWALL; st->state_flags |= PFSTATE_ACK; @@ -1718,11 +1776,11 @@ pfsync_defer(struct pf_state *st, struct mbuf *m) pf_ref_state(st); pd->pd_m = m; - TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); - callout_init_mtx(&pd->pd_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); + TAILQ_INSERT_TAIL(&b->b_deferrals, pd, pd_entry); + callout_init_mtx(&pd->pd_tmo, &b->b_mtx, CALLOUT_RETURNUNLOCKED); callout_reset(&pd->pd_tmo, 10, pfsync_defer_tmo, pd); - pfsync_push(sc); + pfsync_push(b); return (1); } @@ -1733,11 +1791,12 @@ pfsync_undefer(struct pfsync_deferral *pd, int drop) struct pfsync_softc *sc = pd->pd_sc; struct mbuf *m = pd->pd_m; struct pf_state *st = pd->pd_st; + struct pfsync_bucket *b = pfsync_get_bucket(sc, st); - PFSYNC_LOCK_ASSERT(sc); + PFSYNC_BUCKET_LOCK_ASSERT(b); - TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); - sc->sc_deferred--; + TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); + b->b_deferred--; pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ free(pd, M_PFSYNC); pf_release_state(st); @@ -1745,8 +1804,8 @@ pfsync_undefer(struct pfsync_deferral *pd, int drop) if (drop) m_freem(m); else { - _IF_ENQUEUE(&sc->sc_ifp->if_snd, m); - pfsync_push(sc); + _IF_ENQUEUE(&b->b_snd, m); + pfsync_push(b); } } @@ -1757,13 +1816,14 @@ pfsync_defer_tmo(void *arg) struct pfsync_softc *sc = pd->pd_sc; struct mbuf *m = pd->pd_m; struct pf_state *st = pd->pd_st; + struct pfsync_bucket *b = pfsync_get_bucket(sc, st); - PFSYNC_LOCK_ASSERT(sc); + PFSYNC_BUCKET_LOCK_ASSERT(b); CURVNET_SET(m->m_pkthdr.rcvif->if_vnet); - TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); - sc->sc_deferred--; + TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); + b->b_deferred--; pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ if (pd->pd_refs == 0) free(pd, M_PFSYNC); @@ -1781,40 +1841,52 @@ pfsync_undefer_state(struct pf_state *st, int drop) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_deferral *pd; + struct pfsync_bucket *b = pfsync_get_bucket(sc, st); - PFSYNC_LOCK_ASSERT(sc); + PFSYNC_BUCKET_LOCK(b); - TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { + TAILQ_FOREACH(pd, &b->b_deferrals, pd_entry) { if (pd->pd_st == st) { if (callout_stop(&pd->pd_tmo) > 0) pfsync_undefer(pd, drop); + + PFSYNC_BUCKET_UNLOCK(b); return; } } + PFSYNC_BUCKET_UNLOCK(b); panic("%s: unable to find deferred state", __func__); } +static struct pfsync_bucket* +pfsync_get_bucket(struct pfsync_softc *sc, struct pf_state *st) +{ + int c = PF_IDHASH(st) % pfsync_buckets; + return &sc->sc_buckets[c]; +} + static void pfsync_update_state(struct pf_state *st) { struct pfsync_softc *sc = V_pfsyncif; bool sync = false, ref = true; + struct pfsync_bucket *b = pfsync_get_bucket(sc, st); PF_STATE_LOCK_ASSERT(st); - PFSYNC_LOCK(sc); + PFSYNC_BUCKET_LOCK(b); if (st->state_flags & PFSTATE_ACK) pfsync_undefer_state(st, 0); if (st->state_flags & PFSTATE_NOSYNC) { if (st->sync_state != PFSYNC_S_NONE) - pfsync_q_del(st, true); - PFSYNC_UNLOCK(sc); + pfsync_q_del(st, true, b); + PFSYNC_BUCKET_UNLOCK(b); return; } - if (sc->sc_len == PFSYNC_MINPKT) - callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); + if (b->b_len == PFSYNC_MINPKT) + callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); switch (st->sync_state) { case PFSYNC_S_UPD_C: @@ -1830,7 +1902,7 @@ pfsync_update_state(struct pf_state *st) break; case PFSYNC_S_IACK: - pfsync_q_del(st, false); + pfsync_q_del(st, false, b); ref = false; /* FALLTHROUGH */ @@ -1844,26 +1916,27 @@ pfsync_update_state(struct pf_state *st) } if (sync || (time_uptime - st->pfsync_time) < 2) - pfsync_push(sc); + pfsync_push(b); - PFSYNC_UNLOCK(sc); + PFSYNC_BUCKET_UNLOCK(b); } static void pfsync_request_update(u_int32_t creatorid, u_int64_t id) { struct pfsync_softc *sc = V_pfsyncif; + struct pfsync_bucket *b = &sc->sc_buckets[0]; struct pfsync_upd_req_item *item; size_t nlen = sizeof(struct pfsync_upd_req); - PFSYNC_LOCK_ASSERT(sc); + PFSYNC_BUCKET_LOCK_ASSERT(b); /* * This code does a bit to prevent multiple update requests for the * same state being generated. It searches current subheader queue, * but it doesn't lookup into queue of already packed datagrams. */ - TAILQ_FOREACH(item, &sc->sc_upd_req_list, ur_entry) + TAILQ_FOREACH(item, &b->b_upd_req_list, ur_entry) if (item->ur_msg.id == id && item->ur_msg.creatorid == creatorid) return; @@ -1875,46 +1948,47 @@ pfsync_request_update(u_int32_t creatorid, u_int64_t id) item->ur_msg.id = id; item->ur_msg.creatorid = creatorid; - if (TAILQ_EMPTY(&sc->sc_upd_req_list)) + if (TAILQ_EMPTY(&b->b_upd_req_list)) nlen += sizeof(struct pfsync_subheader); - if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) { - pfsync_sendout(1); + if (b->b_len + nlen > sc->sc_ifp->if_mtu) { + pfsync_sendout(1, 0); nlen = sizeof(struct pfsync_subheader) + sizeof(struct pfsync_upd_req); } - TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); - sc->sc_len += nlen; + TAILQ_INSERT_TAIL(&b->b_upd_req_list, item, ur_entry); + b->b_len += nlen; } -static void +static bool pfsync_update_state_req(struct pf_state *st) { struct pfsync_softc *sc = V_pfsyncif; - bool ref = true; + bool ref = true, full = false; + struct pfsync_bucket *b = pfsync_get_bucket(sc, st); PF_STATE_LOCK_ASSERT(st); - PFSYNC_LOCK(sc); + PFSYNC_BUCKET_LOCK(b); if (st->state_flags & PFSTATE_NOSYNC) { if (st->sync_state != PFSYNC_S_NONE) - pfsync_q_del(st, true); - PFSYNC_UNLOCK(sc); - return; + pfsync_q_del(st, true, b); + PFSYNC_BUCKET_UNLOCK(b); + return (full); } switch (st->sync_state) { case PFSYNC_S_UPD_C: case PFSYNC_S_IACK: - pfsync_q_del(st, false); + pfsync_q_del(st, false, b); ref = false; /* FALLTHROUGH */ case PFSYNC_S_NONE: pfsync_q_ins(st, PFSYNC_S_UPD, ref); - pfsync_push(sc); + pfsync_push(b); break; case PFSYNC_S_INS: @@ -1927,38 +2001,44 @@ pfsync_update_state_req(struct pf_state *st) panic("%s: unexpected sync state %d", __func__, st->sync_state); } - PFSYNC_UNLOCK(sc); + if ((sc->sc_ifp->if_mtu - b->b_len) < sizeof(struct pfsync_state)) + full = true; + + PFSYNC_BUCKET_UNLOCK(b); + + return (full); } static void pfsync_delete_state(struct pf_state *st) { struct pfsync_softc *sc = V_pfsyncif; + struct pfsync_bucket *b = pfsync_get_bucket(sc, st); bool ref = true; - PFSYNC_LOCK(sc); + PFSYNC_BUCKET_LOCK(b); if (st->state_flags & PFSTATE_ACK) pfsync_undefer_state(st, 1); if (st->state_flags & PFSTATE_NOSYNC) { if (st->sync_state != PFSYNC_S_NONE) - pfsync_q_del(st, true); - PFSYNC_UNLOCK(sc); + pfsync_q_del(st, true, b); + PFSYNC_BUCKET_UNLOCK(b); return; } - if (sc->sc_len == PFSYNC_MINPKT) - callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); + if (b->b_len == PFSYNC_MINPKT) + callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); switch (st->sync_state) { case PFSYNC_S_INS: /* We never got to tell the world so just forget about it. */ - pfsync_q_del(st, true); + pfsync_q_del(st, true, b); break; case PFSYNC_S_UPD_C: case PFSYNC_S_UPD: case PFSYNC_S_IACK: - pfsync_q_del(st, false); + pfsync_q_del(st, false, b); ref = false; /* FALLTHROUGH */ @@ -1970,13 +2050,12 @@ pfsync_delete_state(struct pf_state *st) panic("%s: unexpected sync state %d", __func__, st->sync_state); } - PFSYNC_UNLOCK(sc); + PFSYNC_BUCKET_UNLOCK(b); } static void pfsync_clear_states(u_int32_t creatorid, const char *ifname) { - struct pfsync_softc *sc = V_pfsyncif; struct { struct pfsync_subheader subh; struct pfsync_clr clr; @@ -1991,9 +2070,7 @@ pfsync_clear_states(u_int32_t creatorid, const char *ifname) strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); r.clr.creatorid = creatorid; - PFSYNC_LOCK(sc); pfsync_send_plus(&r, sizeof(r)); - PFSYNC_UNLOCK(sc); } static void @@ -2001,48 +2078,48 @@ pfsync_q_ins(struct pf_state *st, int q, bool ref) { struct pfsync_softc *sc = V_pfsyncif; size_t nlen = pfsync_qs[q].len; + struct pfsync_bucket *b = pfsync_get_bucket(sc, st); - PFSYNC_LOCK_ASSERT(sc); + PFSYNC_BUCKET_LOCK_ASSERT(b); KASSERT(st->sync_state == PFSYNC_S_NONE, ("%s: st->sync_state %u", __func__, st->sync_state)); - KASSERT(sc->sc_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu", - sc->sc_len)); + KASSERT(b->b_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu", + b->b_len)); - if (TAILQ_EMPTY(&sc->sc_qs[q])) + if (TAILQ_EMPTY(&b->b_qs[q])) nlen += sizeof(struct pfsync_subheader); - if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) { - pfsync_sendout(1); + if (b->b_len + nlen > sc->sc_ifp->if_mtu) { + pfsync_sendout(1, b->b_id); nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; } - sc->sc_len += nlen; - TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); + b->b_len += nlen; + TAILQ_INSERT_TAIL(&b->b_qs[q], st, sync_list); st->sync_state = q; if (ref) pf_ref_state(st); } static void -pfsync_q_del(struct pf_state *st, bool unref) +pfsync_q_del(struct pf_state *st, bool unref, struct pfsync_bucket *b) { - struct pfsync_softc *sc = V_pfsyncif; int q = st->sync_state; - PFSYNC_LOCK_ASSERT(sc); + PFSYNC_BUCKET_LOCK_ASSERT(b); KASSERT(st->sync_state != PFSYNC_S_NONE, ("%s: st->sync_state != PFSYNC_S_NONE", __func__)); - sc->sc_len -= pfsync_qs[q].len; - TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); + b->b_len -= pfsync_qs[q].len; + TAILQ_REMOVE(&b->b_qs[q], st, sync_list); st->sync_state = PFSYNC_S_NONE; if (unref) pf_release_state(st); - if (TAILQ_EMPTY(&sc->sc_qs[q])) - sc->sc_len -= sizeof(struct pfsync_subheader); + if (TAILQ_EMPTY(&b->b_qs[q])) + b->b_len -= sizeof(struct pfsync_subheader); } static void @@ -2096,23 +2173,19 @@ pfsync_bulk_update(void *arg) } for (; s; s = LIST_NEXT(s, entry)) { - - if (sent > 1 && (sc->sc_ifp->if_mtu - sc->sc_len) < - sizeof(struct pfsync_state)) { - /* We've filled a packet. */ - sc->sc_bulk_hashid = i; - sc->sc_bulk_stateid = s->id; - sc->sc_bulk_creatorid = s->creatorid; - PF_HASHROW_UNLOCK(ih); - callout_reset(&sc->sc_bulk_tmo, 1, - pfsync_bulk_update, sc); - goto full; - } - if (s->sync_state == PFSYNC_S_NONE && s->timeout < PFTM_MAX && s->pfsync_time <= sc->sc_ureq_received) { - pfsync_update_state_req(s); + if (pfsync_update_state_req(s)) { + /* We've filled a packet. */ + sc->sc_bulk_hashid = i; + sc->sc_bulk_stateid = s->id; + sc->sc_bulk_creatorid = s->creatorid; + PF_HASHROW_UNLOCK(ih); + callout_reset(&sc->sc_bulk_tmo, 1, + pfsync_bulk_update, sc); + goto full; + } sent++; } } @@ -2121,7 +2194,6 @@ pfsync_bulk_update(void *arg) /* We're done. */ pfsync_bulk_status(PFSYNC_BUS_END); - full: CURVNET_RESTORE(); } @@ -2146,15 +2218,14 @@ pfsync_bulk_status(u_int8_t status) r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); r.bus.status = status; - PFSYNC_LOCK(sc); pfsync_send_plus(&r, sizeof(r)); - PFSYNC_UNLOCK(sc); } static void pfsync_bulk_fail(void *arg) { struct pfsync_softc *sc = arg; + struct pfsync_bucket *b = &sc->sc_buckets[0]; CURVNET_SET(sc->sc_ifp->if_vnet); @@ -2164,9 +2235,9 @@ pfsync_bulk_fail(void *arg) /* Try again */ callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, V_pfsyncif); - PFSYNC_LOCK(sc); + PFSYNC_BUCKET_LOCK(b); pfsync_request_update(0, 0); - PFSYNC_UNLOCK(sc); + PFSYNC_BUCKET_UNLOCK(b); } else { /* Pretend like the transfer was ok. */ sc->sc_ureq_sent = 0; @@ -2188,73 +2259,96 @@ static void pfsync_send_plus(void *plus, size_t pluslen) { struct pfsync_softc *sc = V_pfsyncif; + struct pfsync_bucket *b = &sc->sc_buckets[0]; - PFSYNC_LOCK_ASSERT(sc); + PFSYNC_BUCKET_LOCK(b); - if (sc->sc_len + pluslen > sc->sc_ifp->if_mtu) - pfsync_sendout(1); + if (b->b_len + pluslen > sc->sc_ifp->if_mtu) + pfsync_sendout(1, b->b_id); - sc->sc_plus = plus; - sc->sc_len += (sc->sc_pluslen = pluslen); + b->b_plus = plus; + b->b_len += (b->b_pluslen = pluslen); - pfsync_sendout(1); + pfsync_sendout(1, b->b_id); + PFSYNC_BUCKET_UNLOCK(b); } static void pfsync_timeout(void *arg) { - struct pfsync_softc *sc = arg; + struct pfsync_bucket *b = arg; - CURVNET_SET(sc->sc_ifp->if_vnet); - PFSYNC_LOCK(sc); - pfsync_push(sc); - PFSYNC_UNLOCK(sc); + CURVNET_SET(b->b_sc->sc_ifp->if_vnet); + PFSYNC_BUCKET_LOCK(b); + pfsync_push(b); + PFSYNC_BUCKET_UNLOCK(b); CURVNET_RESTORE(); } static void -pfsync_push(struct pfsync_softc *sc) +pfsync_push(struct pfsync_bucket *b) { - PFSYNC_LOCK_ASSERT(sc); + PFSYNC_BUCKET_LOCK_ASSERT(b); - sc->sc_flags |= PFSYNCF_PUSH; + b->b_flags |= PFSYNCF_BUCKET_PUSH; swi_sched(V_pfsync_swi_cookie, 0); } static void +pfsync_push_all(struct pfsync_softc *sc) +{ + int c; + struct pfsync_bucket *b; + + for (c = 0; c < pfsync_buckets; c++) { + b = &sc->sc_buckets[c]; + + PFSYNC_BUCKET_LOCK(b); + pfsync_push(b); + PFSYNC_BUCKET_UNLOCK(b); + } +} + +static void pfsyncintr(void *arg) { struct pfsync_softc *sc = arg; + struct pfsync_bucket *b; struct mbuf *m, *n; + int c; CURVNET_SET(sc->sc_ifp->if_vnet); - PFSYNC_LOCK(sc); - if ((sc->sc_flags & PFSYNCF_PUSH) && sc->sc_len > PFSYNC_MINPKT) { - pfsync_sendout(0); - sc->sc_flags &= ~PFSYNCF_PUSH; - } - _IF_DEQUEUE_ALL(&sc->sc_ifp->if_snd, m); - PFSYNC_UNLOCK(sc); + for (c = 0; c < pfsync_buckets; c++) { + b = &sc->sc_buckets[c]; - for (; m != NULL; m = n) { + PFSYNC_BUCKET_LOCK(b); + if ((b->b_flags & PFSYNCF_BUCKET_PUSH) && b->b_len > PFSYNC_MINPKT) { + pfsync_sendout(0, b->b_id); + b->b_flags &= ~PFSYNCF_BUCKET_PUSH; + } + _IF_DEQUEUE_ALL(&b->b_snd, m); + PFSYNC_BUCKET_UNLOCK(b); - n = m->m_nextpkt; - m->m_nextpkt = NULL; + for (; m != NULL; m = n) { - /* - * We distinguish between a deferral packet and our - * own pfsync packet based on M_SKIP_FIREWALL - * flag. This is XXX. - */ - if (m->m_flags & M_SKIP_FIREWALL) - ip_output(m, NULL, NULL, 0, NULL, NULL); - else if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, - NULL) == 0) - V_pfsyncstats.pfsyncs_opackets++; - else - V_pfsyncstats.pfsyncs_oerrors++; + n = m->m_nextpkt; + m->m_nextpkt = NULL; + + /* + * We distinguish between a deferral packet and our + * own pfsync packet based on M_SKIP_FIREWALL + * flag. This is XXX. + */ + if (m->m_flags & M_SKIP_FIREWALL) + ip_output(m, NULL, NULL, 0, NULL, NULL); + else if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, + NULL) == 0) + V_pfsyncstats.pfsyncs_opackets++; + else + V_pfsyncstats.pfsyncs_oerrors++; + } } CURVNET_RESTORE(); } |